Florian Westphal says:

====================
netfilter: updates for net-next

1-3) IPVS updates from Julian Anastasov to enhance visibility into
     IPVS internal state by exposing hash size, load factor etc and
     allows userspace to tune the load factor used for resizing hash
     tables.

4) reject empty/not nul terminated device names from xt_physdev.
   This isn't a bug fix; existing code doesn't require a c-string.
   But clean this up anyway because conceptually the interface name
   definitely should be a c-string.

5) Switch nfnetlink to skb_mac_header helpers that didn't exist back
   when this code was written.  This gives us additional debug checks
   but is not intended to change functionality.

6) Let the xt ttl/hoplimit match reject unknown operator modes.
   This is a cleanup, the evaluation function simply returns false when
   the mode is out of range.  From Marino Dzalto.

7) xt_socket match should enable defrag after all other checks. This
   bug is harmless, historically defrag could not be disabled either
   except by rmmod.

8) remove UDP-Lite conntrack support, from Fernando Fernandez Mancera.

9) Avoid a couple -Wflex-array-member-not-at-end warnings in the old
   xtables 32bit compat code, from Gustavo A. R. Silva.

10) nftables fwd expression should drop packets when their ttl/hl has
    expired.  This is a bug fix deferred, its not deemed important
    enough for -rc8.
11) Add additional checks before assuming the mac header is an ethernet
    header, from Zhengchuan Liang.

* tag 'nf-next-26-04-10' of https://git.kernel.org/pub/scm/linux/kernel/git/netfilter/nf-next:
  netfilter: require Ethernet MAC header before using eth_hdr()
  netfilter: nft_fwd_netdev: check ttl/hl before forwarding
  netfilter: x_tables: Avoid a couple -Wflex-array-member-not-at-end warnings
  netfilter: conntrack: remove UDP-Lite conntrack support
  netfilter: xt_socket: enable defrag after all other checks
  netfilter: xt_HL: add pr_fmt and checkentry validation
  netfilter: nfnetlink: prefer skb_mac_header helpers
  netfilter: x_physdev: reject empty or not-nul terminated device names
  ipvs: add conn_lfactor and svc_lfactor sysctl vars
  ipvs: add ip_vs_status info
  ipvs: show the current conn_tab size to users
====================

Link: https://patch.msgid.link/20260410112352.23599-1-fw@strlen.de
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
This commit is contained in:
Jakub Kicinski
2026-04-12 09:39:20 -07:00
26 changed files with 399 additions and 231 deletions

View File

@@ -29,6 +29,33 @@ backup_only - BOOLEAN
If set, disable the director function while the server is
in backup mode to avoid packet loops for DR/TUN methods.
conn_lfactor - INTEGER
Possible values: -8 (larger table) .. 8 (smaller table)
Default: -4
Controls the sizing of the connection hash table based on the
load factor (number of connections per table buckets):
2^conn_lfactor = nodes / buckets
As result, the table grows if load increases and shrinks when
load decreases in the range of 2^8 - 2^conn_tab_bits (module
parameter).
The value is a shift count where negative values select
buckets = (connection hash nodes << -value) while positive
values select buckets = (connection hash nodes >> value). The
negative values reduce the collisions and reduce the time for
lookups but increase the table size. Positive values will
tolerate load above 100% when using smaller table is
preferred with the cost of more collisions. If using NAT
connections consider decreasing the value with one because
they add two nodes in the hash table.
Example:
-4: grow if load goes above 6% (buckets = nodes * 16)
2: grow if load goes above 400% (buckets = nodes / 4)
conn_reuse_mode - INTEGER
1 - default
@@ -219,6 +246,16 @@ secure_tcp - INTEGER
The value definition is the same as that of drop_entry and
drop_packet.
svc_lfactor - INTEGER
Possible values: -8 (larger table) .. 8 (smaller table)
Default: -3
Controls the sizing of the service hash table based on the
load factor (number of services per table buckets). The table
will grow and shrink in the range of 2^4 - 2^20.
See conn_lfactor for explanation.
sync_threshold - vector of 2 INTEGERs: sync_threshold, sync_period
default 3 50

View File

@@ -16,9 +16,6 @@ extern const struct nf_conntrack_l4proto nf_conntrack_l4proto_icmp;
#ifdef CONFIG_NF_CT_PROTO_SCTP
extern const struct nf_conntrack_l4proto nf_conntrack_l4proto_sctp;
#endif
#ifdef CONFIG_NF_CT_PROTO_UDPLITE
extern const struct nf_conntrack_l4proto nf_conntrack_l4proto_udplite;
#endif
#ifdef CONFIG_NF_CT_PROTO_GRE
extern const struct nf_conntrack_l4proto nf_conntrack_l4proto_gre;
#endif

View File

@@ -107,11 +107,6 @@ int nf_conntrack_udp_packet(struct nf_conn *ct,
unsigned int dataoff,
enum ip_conntrack_info ctinfo,
const struct nf_hook_state *state);
int nf_conntrack_udplite_packet(struct nf_conn *ct,
struct sk_buff *skb,
unsigned int dataoff,
enum ip_conntrack_info ctinfo,
const struct nf_hook_state *state);
int nf_conntrack_tcp_packet(struct nf_conn *ct,
struct sk_buff *skb,
unsigned int dataoff,
@@ -139,8 +134,6 @@ void nf_conntrack_icmpv6_init_net(struct net *net);
/* Existing built-in generic protocol */
extern const struct nf_conntrack_l4proto nf_conntrack_l4proto_generic;
#define MAX_NF_CT_PROTO IPPROTO_UDPLITE
const struct nf_conntrack_l4proto *nf_ct_l4proto_find(u8 l4proto);
/* Generic netlink helpers */

View File

@@ -7,6 +7,7 @@
#include <linux/module.h>
#include <linux/skbuff.h>
#include <linux/ipv6.h>
#include <linux/if_arp.h>
#include <linux/if_ether.h>
#include <linux/netfilter/x_tables.h>
@@ -21,8 +22,10 @@ eui64_mt6(const struct sk_buff *skb, struct xt_action_param *par)
{
unsigned char eui64[8];
if (!(skb_mac_header(skb) >= skb->head &&
skb_mac_header(skb) + ETH_HLEN <= skb->data)) {
if (!skb->dev || skb->dev->type != ARPHRD_ETHER)
return false;
if (!skb_mac_header_was_set(skb) || skb_mac_header_len(skb) < ETH_HLEN) {
par->hotdrop = true;
return false;
}

View File

@@ -209,17 +209,6 @@ config NF_CT_PROTO_SCTP
If unsure, say Y.
config NF_CT_PROTO_UDPLITE
bool 'UDP-Lite protocol connection tracking support'
depends on NETFILTER_ADVANCED
default y
help
With this option enabled, the layer 3 independent connection
tracking code will be able to do state tracking on UDP-Lite
connections.
If unsure, say Y.
config NF_CONNTRACK_AMANDA
tristate "Amanda backup protocol support"
depends on NETFILTER_ADVANCED

View File

@@ -11,6 +11,7 @@
#include <linux/etherdevice.h>
#include <linux/skbuff.h>
#include <linux/errno.h>
#include <linux/if_arp.h>
#include <linux/if_ether.h>
#include <linux/netlink.h>
#include <linux/jiffies.h>
@@ -220,8 +221,8 @@ bitmap_ipmac_kadt(struct ip_set *set, const struct sk_buff *skb,
return -IPSET_ERR_BITMAP_RANGE;
/* Backward compatibility: we don't check the second flag */
if (skb_mac_header(skb) < skb->head ||
(skb_mac_header(skb) + ETH_HLEN) > skb->data)
if (!skb->dev || skb->dev->type != ARPHRD_ETHER ||
!skb_mac_header_was_set(skb) || skb_mac_header_len(skb) < ETH_HLEN)
return -EINVAL;
e.id = ip_to_id(map, ip);

View File

@@ -11,6 +11,7 @@
#include <linux/skbuff.h>
#include <linux/errno.h>
#include <linux/random.h>
#include <linux/if_arp.h>
#include <linux/if_ether.h>
#include <net/ip.h>
#include <net/ipv6.h>
@@ -89,8 +90,8 @@ hash_ipmac4_kadt(struct ip_set *set, const struct sk_buff *skb,
struct hash_ipmac4_elem e = { .ip = 0, { .foo[0] = 0, .foo[1] = 0 } };
struct ip_set_ext ext = IP_SET_INIT_KEXT(skb, opt, set);
if (skb_mac_header(skb) < skb->head ||
(skb_mac_header(skb) + ETH_HLEN) > skb->data)
if (!skb->dev || skb->dev->type != ARPHRD_ETHER ||
!skb_mac_header_was_set(skb) || skb_mac_header_len(skb) < ETH_HLEN)
return -EINVAL;
if (opt->flags & IPSET_DIM_TWO_SRC)
@@ -205,8 +206,8 @@ hash_ipmac6_kadt(struct ip_set *set, const struct sk_buff *skb,
};
struct ip_set_ext ext = IP_SET_INIT_KEXT(skb, opt, set);
if (skb_mac_header(skb) < skb->head ||
(skb_mac_header(skb) + ETH_HLEN) > skb->data)
if (!skb->dev || skb->dev->type != ARPHRD_ETHER ||
!skb_mac_header_was_set(skb) || skb_mac_header_len(skb) < ETH_HLEN)
return -EINVAL;
if (opt->flags & IPSET_DIM_TWO_SRC)

View File

@@ -8,6 +8,7 @@
#include <linux/etherdevice.h>
#include <linux/skbuff.h>
#include <linux/errno.h>
#include <linux/if_arp.h>
#include <linux/if_ether.h>
#include <net/netlink.h>
@@ -77,8 +78,8 @@ hash_mac4_kadt(struct ip_set *set, const struct sk_buff *skb,
struct hash_mac4_elem e = { { .foo[0] = 0, .foo[1] = 0 } };
struct ip_set_ext ext = IP_SET_INIT_KEXT(skb, opt, set);
if (skb_mac_header(skb) < skb->head ||
(skb_mac_header(skb) + ETH_HLEN) > skb->data)
if (!skb->dev || skb->dev->type != ARPHRD_ETHER ||
!skb_mac_header_was_set(skb) || skb_mac_header_len(skb) < ETH_HLEN)
return -EINVAL;
if (opt->flags & IPSET_DIM_ONE_SRC)

View File

@@ -281,6 +281,20 @@ unlock:
mutex_unlock(&ipvs->est_mutex);
}
static int get_conn_tab_size(struct netns_ipvs *ipvs)
{
const struct ip_vs_rht *t;
int size = 0;
rcu_read_lock();
t = rcu_dereference(ipvs->conn_tab);
if (t)
size = t->size;
rcu_read_unlock();
return size;
}
int
ip_vs_use_count_inc(void)
{
@@ -2431,6 +2445,60 @@ static int ipvs_proc_run_estimation(const struct ctl_table *table, int write,
return ret;
}
static int ipvs_proc_conn_lfactor(const struct ctl_table *table, int write,
void *buffer, size_t *lenp, loff_t *ppos)
{
struct netns_ipvs *ipvs = table->extra2;
int *valp = table->data;
int val = *valp;
int ret;
struct ctl_table tmp_table = {
.data = &val,
.maxlen = sizeof(int),
};
ret = proc_dointvec(&tmp_table, write, buffer, lenp, ppos);
if (write && ret >= 0) {
if (val < -8 || val > 8) {
ret = -EINVAL;
} else {
*valp = val;
if (rcu_access_pointer(ipvs->conn_tab))
mod_delayed_work(system_unbound_wq,
&ipvs->conn_resize_work, 0);
}
}
return ret;
}
static int ipvs_proc_svc_lfactor(const struct ctl_table *table, int write,
void *buffer, size_t *lenp, loff_t *ppos)
{
struct netns_ipvs *ipvs = table->extra2;
int *valp = table->data;
int val = *valp;
int ret;
struct ctl_table tmp_table = {
.data = &val,
.maxlen = sizeof(int),
};
ret = proc_dointvec(&tmp_table, write, buffer, lenp, ppos);
if (write && ret >= 0) {
if (val < -8 || val > 8) {
ret = -EINVAL;
} else {
*valp = val;
if (rcu_access_pointer(ipvs->svc_table))
mod_delayed_work(system_unbound_wq,
&ipvs->svc_resize_work, 0);
}
}
return ret;
}
/*
* IPVS sysctl table (under the /proc/sys/net/ipv4/vs/)
* Do not change order or insert new entries without
@@ -2619,6 +2687,18 @@ static struct ctl_table vs_vars[] = {
.mode = 0644,
.proc_handler = ipvs_proc_est_nice,
},
{
.procname = "conn_lfactor",
.maxlen = sizeof(int),
.mode = 0644,
.proc_handler = ipvs_proc_conn_lfactor,
},
{
.procname = "svc_lfactor",
.maxlen = sizeof(int),
.mode = 0644,
.proc_handler = ipvs_proc_svc_lfactor,
},
#ifdef CONFIG_IP_VS_DEBUG
{
.procname = "debug_level",
@@ -2741,10 +2821,13 @@ static void ip_vs_info_seq_stop(struct seq_file *seq, void *v)
static int ip_vs_info_seq_show(struct seq_file *seq, void *v)
{
struct net *net = seq_file_net(seq);
struct netns_ipvs *ipvs = net_ipvs(net);
if (v == SEQ_START_TOKEN) {
seq_printf(seq,
"IP Virtual Server version %d.%d.%d (size=%d)\n",
NVERSION(IP_VS_VERSION_CODE), ip_vs_conn_tab_size);
NVERSION(IP_VS_VERSION_CODE), get_conn_tab_size(ipvs));
seq_puts(seq,
"Prot LocalAddress:Port Scheduler Flags\n");
seq_puts(seq,
@@ -2907,6 +2990,144 @@ static int ip_vs_stats_percpu_show(struct seq_file *seq, void *v)
return 0;
}
static int ip_vs_status_show(struct seq_file *seq, void *v)
{
struct net *net = seq_file_single_net(seq);
struct netns_ipvs *ipvs = net_ipvs(net);
unsigned int resched_score = 0;
struct ip_vs_conn_hnode *hn;
struct hlist_bl_head *head;
struct ip_vs_service *svc;
struct ip_vs_rht *t, *pt;
struct hlist_bl_node *e;
int old_gen, new_gen;
u32 counts[8];
u32 bucket;
int count;
u32 sum1;
u32 sum;
int i;
rcu_read_lock();
t = rcu_dereference(ipvs->conn_tab);
seq_printf(seq, "Conns:\t%d\n", atomic_read(&ipvs->conn_count));
seq_printf(seq, "Conn buckets:\t%d (%d bits, lfactor %d)\n",
t ? t->size : 0, t ? t->bits : 0, t ? t->lfactor : 0);
if (!atomic_read(&ipvs->conn_count))
goto after_conns;
old_gen = atomic_read(&ipvs->conn_tab_changes);
repeat_conn:
smp_rmb(); /* ipvs->conn_tab and conn_tab_changes */
memset(counts, 0, sizeof(counts));
ip_vs_rht_for_each_table_rcu(ipvs->conn_tab, t, pt) {
for (bucket = 0; bucket < t->size; bucket++) {
DECLARE_IP_VS_RHT_WALK_BUCKET_RCU();
count = 0;
resched_score++;
ip_vs_rht_walk_bucket_rcu(t, bucket, head) {
count = 0;
hlist_bl_for_each_entry_rcu(hn, e, head, node)
count++;
}
resched_score += count;
if (resched_score >= 100) {
resched_score = 0;
cond_resched_rcu();
new_gen = atomic_read(&ipvs->conn_tab_changes);
/* New table installed ? */
if (old_gen != new_gen) {
old_gen = new_gen;
goto repeat_conn;
}
}
counts[min(count, (int)ARRAY_SIZE(counts) - 1)]++;
}
}
for (sum = 0, i = 0; i < ARRAY_SIZE(counts); i++)
sum += counts[i];
sum1 = sum - counts[0];
seq_printf(seq, "Conn buckets empty:\t%u (%lu%%)\n",
counts[0], (unsigned long)counts[0] * 100 / max(sum, 1U));
for (i = 1; i < ARRAY_SIZE(counts); i++) {
if (!counts[i])
continue;
seq_printf(seq, "Conn buckets len-%d:\t%u (%lu%%)\n",
i, counts[i],
(unsigned long)counts[i] * 100 / max(sum1, 1U));
}
after_conns:
t = rcu_dereference(ipvs->svc_table);
count = ip_vs_get_num_services(ipvs);
seq_printf(seq, "Services:\t%d\n", count);
seq_printf(seq, "Service buckets:\t%d (%d bits, lfactor %d)\n",
t ? t->size : 0, t ? t->bits : 0, t ? t->lfactor : 0);
if (!count)
goto after_svc;
old_gen = atomic_read(&ipvs->svc_table_changes);
repeat_svc:
smp_rmb(); /* ipvs->svc_table and svc_table_changes */
memset(counts, 0, sizeof(counts));
ip_vs_rht_for_each_table_rcu(ipvs->svc_table, t, pt) {
for (bucket = 0; bucket < t->size; bucket++) {
DECLARE_IP_VS_RHT_WALK_BUCKET_RCU();
count = 0;
resched_score++;
ip_vs_rht_walk_bucket_rcu(t, bucket, head) {
count = 0;
hlist_bl_for_each_entry_rcu(svc, e, head,
s_list)
count++;
}
resched_score += count;
if (resched_score >= 100) {
resched_score = 0;
cond_resched_rcu();
new_gen = atomic_read(&ipvs->svc_table_changes);
/* New table installed ? */
if (old_gen != new_gen) {
old_gen = new_gen;
goto repeat_svc;
}
}
counts[min(count, (int)ARRAY_SIZE(counts) - 1)]++;
}
}
for (sum = 0, i = 0; i < ARRAY_SIZE(counts); i++)
sum += counts[i];
sum1 = sum - counts[0];
seq_printf(seq, "Service buckets empty:\t%u (%lu%%)\n",
counts[0], (unsigned long)counts[0] * 100 / max(sum, 1U));
for (i = 1; i < ARRAY_SIZE(counts); i++) {
if (!counts[i])
continue;
seq_printf(seq, "Service buckets len-%d:\t%u (%lu%%)\n",
i, counts[i],
(unsigned long)counts[i] * 100 / max(sum1, 1U));
}
after_svc:
seq_printf(seq, "Stats thread slots:\t%d (max %lu)\n",
ipvs->est_kt_count, ipvs->est_max_threads);
seq_printf(seq, "Stats chain max len:\t%d\n", ipvs->est_chain_max);
seq_printf(seq, "Stats thread ests:\t%d\n",
ipvs->est_chain_max * IPVS_EST_CHAIN_FACTOR *
IPVS_EST_NTICKS);
rcu_read_unlock();
return 0;
}
#endif
/*
@@ -3425,7 +3646,7 @@ do_ip_vs_get_ctl(struct sock *sk, int cmd, void __user *user, int *len)
char buf[64];
sprintf(buf, "IP Virtual Server version %d.%d.%d (size=%d)",
NVERSION(IP_VS_VERSION_CODE), ip_vs_conn_tab_size);
NVERSION(IP_VS_VERSION_CODE), get_conn_tab_size(ipvs));
if (copy_to_user(user, buf, strlen(buf)+1) != 0) {
ret = -EFAULT;
goto out;
@@ -3437,8 +3658,9 @@ do_ip_vs_get_ctl(struct sock *sk, int cmd, void __user *user, int *len)
case IP_VS_SO_GET_INFO:
{
struct ip_vs_getinfo info;
info.version = IP_VS_VERSION_CODE;
info.size = ip_vs_conn_tab_size;
info.size = get_conn_tab_size(ipvs);
info.num_services =
atomic_read(&ipvs->num_services[IP_VS_AF_INET]);
if (copy_to_user(user, &info, sizeof(info)) != 0)
@@ -4447,7 +4669,7 @@ static int ip_vs_genl_get_cmd(struct sk_buff *skb, struct genl_info *info)
if (nla_put_u32(msg, IPVS_INFO_ATTR_VERSION,
IP_VS_VERSION_CODE) ||
nla_put_u32(msg, IPVS_INFO_ATTR_CONN_TAB_SIZE,
ip_vs_conn_tab_size))
get_conn_tab_size(ipvs)))
goto nla_put_failure;
break;
}
@@ -4697,6 +4919,16 @@ static int __net_init ip_vs_control_net_init_sysctl(struct netns_ipvs *ipvs)
tbl[idx].extra2 = ipvs;
tbl[idx++].data = &ipvs->sysctl_est_nice;
if (unpriv)
tbl[idx].mode = 0444;
tbl[idx].extra2 = ipvs;
tbl[idx++].data = &ipvs->sysctl_conn_lfactor;
if (unpriv)
tbl[idx].mode = 0444;
tbl[idx].extra2 = ipvs;
tbl[idx++].data = &ipvs->sysctl_svc_lfactor;
#ifdef CONFIG_IP_VS_DEBUG
/* Global sysctls must be ro in non-init netns */
if (!net_eq(net, &init_net))
@@ -4807,6 +5039,9 @@ int __net_init ip_vs_control_net_init(struct netns_ipvs *ipvs)
ipvs->net->proc_net,
ip_vs_stats_percpu_show, NULL))
goto err_percpu;
if (!proc_create_net_single("ip_vs_status", 0, ipvs->net->proc_net,
ip_vs_status_show, NULL))
goto err_status;
#endif
ret = ip_vs_control_net_init_sysctl(ipvs);
@@ -4817,6 +5052,9 @@ int __net_init ip_vs_control_net_init(struct netns_ipvs *ipvs)
err:
#ifdef CONFIG_PROC_FS
remove_proc_entry("ip_vs_status", ipvs->net->proc_net);
err_status:
remove_proc_entry("ip_vs_stats_percpu", ipvs->net->proc_net);
err_percpu:
@@ -4842,6 +5080,7 @@ void __net_exit ip_vs_control_net_cleanup(struct netns_ipvs *ipvs)
ip_vs_control_net_cleanup_sysctl(ipvs);
cancel_delayed_work_sync(&ipvs->est_reload_work);
#ifdef CONFIG_PROC_FS
remove_proc_entry("ip_vs_status", ipvs->net->proc_net);
remove_proc_entry("ip_vs_stats_percpu", ipvs->net->proc_net);
remove_proc_entry("ip_vs_stats", ipvs->net->proc_net);
remove_proc_entry("ip_vs", ipvs->net->proc_net);

View File

@@ -323,9 +323,6 @@ nf_ct_get_tuple(const struct sk_buff *skb,
#endif
case IPPROTO_TCP:
case IPPROTO_UDP:
#ifdef CONFIG_NF_CT_PROTO_UDPLITE
case IPPROTO_UDPLITE:
#endif
#ifdef CONFIG_NF_CT_PROTO_SCTP
case IPPROTO_SCTP:
#endif
@@ -1987,11 +1984,6 @@ static int nf_conntrack_handle_packet(struct nf_conn *ct,
case IPPROTO_ICMPV6:
return nf_conntrack_icmpv6_packet(ct, skb, ctinfo, state);
#endif
#ifdef CONFIG_NF_CT_PROTO_UDPLITE
case IPPROTO_UDPLITE:
return nf_conntrack_udplite_packet(ct, skb, dataoff,
ctinfo, state);
#endif
#ifdef CONFIG_NF_CT_PROTO_SCTP
case IPPROTO_SCTP:
return nf_conntrack_sctp_packet(ct, skb, dataoff,

View File

@@ -103,9 +103,6 @@ const struct nf_conntrack_l4proto *nf_ct_l4proto_find(u8 l4proto)
#ifdef CONFIG_NF_CT_PROTO_SCTP
case IPPROTO_SCTP: return &nf_conntrack_l4proto_sctp;
#endif
#ifdef CONFIG_NF_CT_PROTO_UDPLITE
case IPPROTO_UDPLITE: return &nf_conntrack_l4proto_udplite;
#endif
#ifdef CONFIG_NF_CT_PROTO_GRE
case IPPROTO_GRE: return &nf_conntrack_l4proto_gre;
#endif

View File

@@ -129,91 +129,6 @@ int nf_conntrack_udp_packet(struct nf_conn *ct,
return NF_ACCEPT;
}
#ifdef CONFIG_NF_CT_PROTO_UDPLITE
static void udplite_error_log(const struct sk_buff *skb,
const struct nf_hook_state *state,
const char *msg)
{
nf_l4proto_log_invalid(skb, state, IPPROTO_UDPLITE, "%s", msg);
}
static bool udplite_error(struct sk_buff *skb,
unsigned int dataoff,
const struct nf_hook_state *state)
{
unsigned int udplen = skb->len - dataoff;
const struct udphdr *hdr;
struct udphdr _hdr;
unsigned int cscov;
/* Header is too small? */
hdr = skb_header_pointer(skb, dataoff, sizeof(_hdr), &_hdr);
if (!hdr) {
udplite_error_log(skb, state, "short packet");
return true;
}
cscov = ntohs(hdr->len);
if (cscov == 0) {
cscov = udplen;
} else if (cscov < sizeof(*hdr) || cscov > udplen) {
udplite_error_log(skb, state, "invalid checksum coverage");
return true;
}
/* UDPLITE mandates checksums */
if (!hdr->check) {
udplite_error_log(skb, state, "checksum missing");
return true;
}
/* Checksum invalid? Ignore. */
if (state->hook == NF_INET_PRE_ROUTING &&
state->net->ct.sysctl_checksum &&
nf_checksum_partial(skb, state->hook, dataoff, cscov, IPPROTO_UDP,
state->pf)) {
udplite_error_log(skb, state, "bad checksum");
return true;
}
return false;
}
/* Returns verdict for packet, and may modify conntracktype */
int nf_conntrack_udplite_packet(struct nf_conn *ct,
struct sk_buff *skb,
unsigned int dataoff,
enum ip_conntrack_info ctinfo,
const struct nf_hook_state *state)
{
unsigned int *timeouts;
if (udplite_error(skb, dataoff, state))
return -NF_ACCEPT;
timeouts = nf_ct_timeout_lookup(ct);
if (!timeouts)
timeouts = udp_get_timeouts(nf_ct_net(ct));
/* If we've seen traffic both ways, this is some kind of UDP
stream. Extend timeout. */
if (test_bit(IPS_SEEN_REPLY_BIT, &ct->status)) {
nf_ct_refresh_acct(ct, ctinfo, skb,
timeouts[UDP_CT_REPLIED]);
if (unlikely((ct->status & IPS_NAT_CLASH)))
return NF_ACCEPT;
/* Also, more likely to be important, and not a probe */
if (!test_and_set_bit(IPS_ASSURED_BIT, &ct->status))
nf_conntrack_event_cache(IPCT_ASSURED, ct);
} else {
nf_ct_refresh_acct(ct, ctinfo, skb, timeouts[UDP_CT_UNREPLIED]);
}
return NF_ACCEPT;
}
#endif
#ifdef CONFIG_NF_CONNTRACK_TIMEOUT
#include <linux/netfilter/nfnetlink.h>
@@ -299,26 +214,3 @@ const struct nf_conntrack_l4proto nf_conntrack_l4proto_udp =
},
#endif /* CONFIG_NF_CONNTRACK_TIMEOUT */
};
#ifdef CONFIG_NF_CT_PROTO_UDPLITE
const struct nf_conntrack_l4proto nf_conntrack_l4proto_udplite =
{
.l4proto = IPPROTO_UDPLITE,
.allow_clash = true,
#if IS_ENABLED(CONFIG_NF_CT_NETLINK)
.tuple_to_nlattr = nf_ct_port_tuple_to_nlattr,
.nlattr_to_tuple = nf_ct_port_nlattr_to_tuple,
.nlattr_tuple_size = nf_ct_port_nlattr_tuple_size,
.nla_policy = nf_ct_port_nla_policy,
#endif
#ifdef CONFIG_NF_CONNTRACK_TIMEOUT
.ctnl_timeout = {
.nlattr_to_obj = udp_timeout_nlattr_to_obj,
.obj_to_nlattr = udp_timeout_obj_to_nlattr,
.nlattr_max = CTA_TIMEOUT_UDP_MAX,
.obj_size = sizeof(unsigned int) * CTA_TIMEOUT_UDP_MAX,
.nla_policy = udp_timeout_nla_policy,
},
#endif /* CONFIG_NF_CONNTRACK_TIMEOUT */
};
#endif

View File

@@ -61,7 +61,6 @@ print_tuple(struct seq_file *s, const struct nf_conntrack_tuple *tuple,
ntohs(tuple->src.u.tcp.port),
ntohs(tuple->dst.u.tcp.port));
break;
case IPPROTO_UDPLITE:
case IPPROTO_UDP:
seq_printf(s, "sport=%hu dport=%hu ",
ntohs(tuple->src.u.udp.port),
@@ -277,7 +276,6 @@ static const char* l4proto_name(u16 proto)
case IPPROTO_UDP: return "udp";
case IPPROTO_GRE: return "gre";
case IPPROTO_SCTP: return "sctp";
case IPPROTO_UDPLITE: return "udplite";
case IPPROTO_ICMPV6: return "icmpv6";
}

View File

@@ -78,7 +78,10 @@ dump_arp_packet(struct nf_log_buf *m,
else
logflags = NF_LOG_DEFAULT_MASK;
if (logflags & NF_LOG_MACDECODE) {
if ((logflags & NF_LOG_MACDECODE) &&
skb->dev && skb->dev->type == ARPHRD_ETHER &&
skb_mac_header_was_set(skb) &&
skb_mac_header_len(skb) >= ETH_HLEN) {
nf_log_buf_add(m, "MACSRC=%pM MACDST=%pM ",
eth_hdr(skb)->h_source, eth_hdr(skb)->h_dest);
nf_log_dump_vlan(m, skb);
@@ -797,6 +800,9 @@ static void dump_mac_header(struct nf_log_buf *m,
switch (dev->type) {
case ARPHRD_ETHER:
if (!skb_mac_header_was_set(skb) || skb_mac_header_len(skb) < ETH_HLEN)
return;
nf_log_buf_add(m, "MACSRC=%pM MACDST=%pM ",
eth_hdr(skb)->h_source, eth_hdr(skb)->h_dest);
nf_log_dump_vlan(m, skb);

View File

@@ -68,7 +68,6 @@ static void nf_nat_ipv4_decode_session(struct sk_buff *skb,
fl4->daddr = t->dst.u3.ip;
if (t->dst.protonum == IPPROTO_TCP ||
t->dst.protonum == IPPROTO_UDP ||
t->dst.protonum == IPPROTO_UDPLITE ||
t->dst.protonum == IPPROTO_SCTP)
fl4->fl4_dport = t->dst.u.all;
}
@@ -79,7 +78,6 @@ static void nf_nat_ipv4_decode_session(struct sk_buff *skb,
fl4->saddr = t->src.u3.ip;
if (t->dst.protonum == IPPROTO_TCP ||
t->dst.protonum == IPPROTO_UDP ||
t->dst.protonum == IPPROTO_UDPLITE ||
t->dst.protonum == IPPROTO_SCTP)
fl4->fl4_sport = t->src.u.all;
}
@@ -99,7 +97,6 @@ static void nf_nat_ipv6_decode_session(struct sk_buff *skb,
fl6->daddr = t->dst.u3.in6;
if (t->dst.protonum == IPPROTO_TCP ||
t->dst.protonum == IPPROTO_UDP ||
t->dst.protonum == IPPROTO_UDPLITE ||
t->dst.protonum == IPPROTO_SCTP)
fl6->fl6_dport = t->dst.u.all;
}
@@ -110,7 +107,6 @@ static void nf_nat_ipv6_decode_session(struct sk_buff *skb,
fl6->saddr = t->src.u3.in6;
if (t->dst.protonum == IPPROTO_TCP ||
t->dst.protonum == IPPROTO_UDP ||
t->dst.protonum == IPPROTO_UDPLITE ||
t->dst.protonum == IPPROTO_SCTP)
fl6->fl6_sport = t->src.u.all;
}
@@ -415,7 +411,6 @@ static bool l4proto_in_range(const struct nf_conntrack_tuple *tuple,
case IPPROTO_GRE: /* all fall though */
case IPPROTO_TCP:
case IPPROTO_UDP:
case IPPROTO_UDPLITE:
case IPPROTO_SCTP:
if (maniptype == NF_NAT_MANIP_SRC)
port = tuple->src.u.all;
@@ -612,7 +607,6 @@ static void nf_nat_l4proto_unique_tuple(struct nf_conntrack_tuple *tuple,
goto find_free_id;
#endif
case IPPROTO_UDP:
case IPPROTO_UDPLITE:
case IPPROTO_TCP:
case IPPROTO_SCTP:
if (maniptype == NF_NAT_MANIP_SRC)

View File

@@ -79,23 +79,6 @@ static bool udp_manip_pkt(struct sk_buff *skb,
return true;
}
static bool udplite_manip_pkt(struct sk_buff *skb,
unsigned int iphdroff, unsigned int hdroff,
const struct nf_conntrack_tuple *tuple,
enum nf_nat_manip_type maniptype)
{
#ifdef CONFIG_NF_CT_PROTO_UDPLITE
struct udphdr *hdr;
if (skb_ensure_writable(skb, hdroff + sizeof(*hdr)))
return false;
hdr = (struct udphdr *)(skb->data + hdroff);
__udp_manip_pkt(skb, iphdroff, hdr, tuple, maniptype, true);
#endif
return true;
}
static bool
sctp_manip_pkt(struct sk_buff *skb,
unsigned int iphdroff, unsigned int hdroff,
@@ -287,9 +270,6 @@ static bool l4proto_manip_pkt(struct sk_buff *skb,
case IPPROTO_UDP:
return udp_manip_pkt(skb, iphdroff, hdroff,
tuple, maniptype);
case IPPROTO_UDPLITE:
return udplite_manip_pkt(skb, iphdroff, hdroff,
tuple, maniptype);
case IPPROTO_SCTP:
return sctp_manip_pkt(skb, iphdroff, hdroff,
tuple, maniptype);

View File

@@ -457,7 +457,6 @@ static int cttimeout_default_get(struct sk_buff *skb,
timeouts = nf_tcp_pernet(info->net)->timeouts;
break;
case IPPROTO_UDP:
case IPPROTO_UDPLITE:
timeouts = nf_udp_pernet(info->net)->timeouts;
break;
case IPPROTO_ICMPV6:

View File

@@ -401,7 +401,7 @@ nfulnl_timer(struct timer_list *t)
static u32 nfulnl_get_bridge_size(const struct sk_buff *skb)
{
u32 size = 0;
u32 mac_len, size = 0;
if (!skb_mac_header_was_set(skb))
return 0;
@@ -412,14 +412,17 @@ static u32 nfulnl_get_bridge_size(const struct sk_buff *skb)
size += nla_total_size(sizeof(u16)); /* tag */
}
if (skb->network_header > skb->mac_header)
size += nla_total_size(skb->network_header - skb->mac_header);
mac_len = skb_mac_header_len(skb);
if (mac_len > 0)
size += nla_total_size(mac_len);
return size;
}
static int nfulnl_put_bridge(struct nfulnl_instance *inst, const struct sk_buff *skb)
{
u32 mac_len;
if (!skb_mac_header_was_set(skb))
return 0;
@@ -437,12 +440,10 @@ static int nfulnl_put_bridge(struct nfulnl_instance *inst, const struct sk_buff
nla_nest_end(inst->skb, nest);
}
if (skb->mac_header < skb->network_header) {
int len = (int)(skb->network_header - skb->mac_header);
if (nla_put(inst->skb, NFULA_L2HDR, len, skb_mac_header(skb)))
goto nla_put_failure;
}
mac_len = skb_mac_header_len(skb);
if (mac_len > 0 &&
nla_put(inst->skb, NFULA_L2HDR, mac_len, skb_mac_header(skb)))
goto nla_put_failure;
return 0;

View File

@@ -579,6 +579,7 @@ static u32 nfqnl_get_bridge_size(struct nf_queue_entry *entry)
{
struct sk_buff *entskb = entry->skb;
u32 nlalen = 0;
u32 mac_len;
if (entry->state.pf != PF_BRIDGE || !skb_mac_header_was_set(entskb))
return 0;
@@ -587,9 +588,9 @@ static u32 nfqnl_get_bridge_size(struct nf_queue_entry *entry)
nlalen += nla_total_size(nla_total_size(sizeof(__be16)) +
nla_total_size(sizeof(__be16)));
if (entskb->network_header > entskb->mac_header)
nlalen += nla_total_size((entskb->network_header -
entskb->mac_header));
mac_len = skb_mac_header_len(entskb);
if (mac_len > 0)
nlalen += nla_total_size(mac_len);
return nlalen;
}
@@ -597,6 +598,7 @@ static u32 nfqnl_get_bridge_size(struct nf_queue_entry *entry)
static int nfqnl_put_bridge(struct nf_queue_entry *entry, struct sk_buff *skb)
{
struct sk_buff *entskb = entry->skb;
u32 mac_len;
if (entry->state.pf != PF_BRIDGE || !skb_mac_header_was_set(entskb))
return 0;
@@ -615,12 +617,10 @@ static int nfqnl_put_bridge(struct nf_queue_entry *entry, struct sk_buff *skb)
nla_nest_end(skb, nest);
}
if (entskb->mac_header < entskb->network_header) {
int len = (int)(entskb->network_header - entskb->mac_header);
if (nla_put(skb, NFQA_L2HDR, len, skb_mac_header(entskb)))
goto nla_put_failure;
}
mac_len = skb_mac_header_len(entskb);
if (mac_len > 0 &&
nla_put(skb, NFQA_L2HDR, mac_len, skb_mac_header(entskb)))
goto nla_put_failure;
return 0;
@@ -1004,13 +1004,13 @@ nf_queue_entry_dup(struct nf_queue_entry *e)
static void nf_bridge_adjust_skb_data(struct sk_buff *skb)
{
if (nf_bridge_info_get(skb))
__skb_push(skb, skb->network_header - skb->mac_header);
__skb_push(skb, skb_mac_header_len(skb));
}
static void nf_bridge_adjust_segmented_data(struct sk_buff *skb)
{
if (nf_bridge_info_get(skb))
__skb_pull(skb, skb->network_header - skb->mac_header);
__skb_pull(skb, skb_mac_header_len(skb));
}
#else
#define nf_bridge_adjust_skb_data(s) do {} while (0)
@@ -1469,8 +1469,7 @@ static int nfqa_parse_bridge(struct nf_queue_entry *entry,
}
if (nfqa[NFQA_L2HDR]) {
int mac_header_len = entry->skb->network_header -
entry->skb->mac_header;
u32 mac_header_len = skb_mac_header_len(entry->skb);
if (mac_header_len != nla_len(nfqa[NFQA_L2HDR]))
return -EINVAL;

View File

@@ -1252,7 +1252,6 @@ static int nft_ct_expect_obj_init(const struct nft_ctx *ctx,
switch (priv->l4proto) {
case IPPROTO_TCP:
case IPPROTO_UDP:
case IPPROTO_UDPLITE:
case IPPROTO_DCCP:
case IPPROTO_SCTP:
break;

View File

@@ -116,6 +116,11 @@ static void nft_fwd_neigh_eval(const struct nft_expr *expr,
goto out;
}
iph = ip_hdr(skb);
if (iph->ttl <= 1) {
verdict = NF_DROP;
goto out;
}
ip_decrease_ttl(iph);
neigh_table = NEIGH_ARP_TABLE;
break;
@@ -132,6 +137,11 @@ static void nft_fwd_neigh_eval(const struct nft_expr *expr,
goto out;
}
ip6h = ipv6_hdr(skb);
if (ip6h->hop_limit <= 1) {
verdict = NF_DROP;
goto out;
}
ip6h->hop_limit--;
neigh_table = NEIGH_ND_TABLE;
break;

View File

@@ -819,13 +819,17 @@ EXPORT_SYMBOL_GPL(xt_compat_match_to_user);
/* non-compat version may have padding after verdict */
struct compat_xt_standard_target {
struct compat_xt_entry_target t;
compat_uint_t verdict;
/* Must be last as it ends in a flexible-array member. */
TRAILING_OVERLAP(struct compat_xt_entry_target, t, data,
compat_uint_t verdict;
);
};
struct compat_xt_error_target {
struct compat_xt_entry_target t;
char errorname[XT_FUNCTION_MAXNAMELEN];
/* Must be last as it ends in a flexible-array member. */
TRAILING_OVERLAP(struct compat_xt_entry_target, t, data,
char errorname[XT_FUNCTION_MAXNAMELEN];
);
};
int xt_compat_check_entry_offsets(const void *base, const char *elems,

View File

@@ -6,6 +6,7 @@
* Hop Limit matching module
* (C) 2001-2002 Maciej Soltysiak <solt@dns.toxicfilms.tv>
*/
#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
#include <linux/ip.h>
#include <linux/ipv6.h>
@@ -22,6 +23,18 @@ MODULE_LICENSE("GPL");
MODULE_ALIAS("ipt_ttl");
MODULE_ALIAS("ip6t_hl");
static int ttl_mt_check(const struct xt_mtchk_param *par)
{
const struct ipt_ttl_info *info = par->matchinfo;
if (info->mode > IPT_TTL_GT) {
pr_err("Unknown TTL match mode: %d\n", info->mode);
return -EINVAL;
}
return 0;
}
static bool ttl_mt(const struct sk_buff *skb, struct xt_action_param *par)
{
const struct ipt_ttl_info *info = par->matchinfo;
@@ -41,6 +54,18 @@ static bool ttl_mt(const struct sk_buff *skb, struct xt_action_param *par)
return false;
}
static int hl_mt6_check(const struct xt_mtchk_param *par)
{
const struct ip6t_hl_info *info = par->matchinfo;
if (info->mode > IP6T_HL_GT) {
pr_err("Unknown Hop Limit match mode: %d\n", info->mode);
return -EINVAL;
}
return 0;
}
static bool hl_mt6(const struct sk_buff *skb, struct xt_action_param *par)
{
const struct ip6t_hl_info *info = par->matchinfo;
@@ -65,6 +90,7 @@ static struct xt_match hl_mt_reg[] __read_mostly = {
.name = "ttl",
.revision = 0,
.family = NFPROTO_IPV4,
.checkentry = ttl_mt_check,
.match = ttl_mt,
.matchsize = sizeof(struct ipt_ttl_info),
.me = THIS_MODULE,
@@ -73,6 +99,7 @@ static struct xt_match hl_mt_reg[] __read_mostly = {
.name = "hl",
.revision = 0,
.family = NFPROTO_IPV6,
.checkentry = hl_mt6_check,
.match = hl_mt6,
.matchsize = sizeof(struct ip6t_hl_info),
.me = THIS_MODULE,

View File

@@ -29,9 +29,7 @@ static bool mac_mt(const struct sk_buff *skb, struct xt_action_param *par)
if (skb->dev == NULL || skb->dev->type != ARPHRD_ETHER)
return false;
if (skb_mac_header(skb) < skb->head)
return false;
if (skb_mac_header(skb) + ETH_HLEN > skb->data)
if (!skb_mac_header_was_set(skb) || skb_mac_header_len(skb) < ETH_HLEN)
return false;
ret = ether_addr_equal(eth_hdr(skb)->h_source, info->srcaddr);
ret ^= info->invert;

View File

@@ -107,6 +107,28 @@ static int physdev_mt_check(const struct xt_mtchk_param *par)
return -EINVAL;
}
#define X(memb) strnlen(info->memb, sizeof(info->memb)) >= sizeof(info->memb)
if (info->bitmask & XT_PHYSDEV_OP_IN) {
if (info->physindev[0] == '\0')
return -EINVAL;
if (X(physindev))
return -ENAMETOOLONG;
}
if (info->bitmask & XT_PHYSDEV_OP_OUT) {
if (info->physoutdev[0] == '\0')
return -EINVAL;
if (X(physoutdev))
return -ENAMETOOLONG;
}
if (X(in_mask))
return -ENAMETOOLONG;
if (X(out_mask))
return -ENAMETOOLONG;
#undef X
if (!brnf_probed) {
brnf_probed = true;
request_module("br_netfilter");

View File

@@ -168,52 +168,41 @@ static int socket_mt_enable_defrag(struct net *net, int family)
static int socket_mt_v1_check(const struct xt_mtchk_param *par)
{
const struct xt_socket_mtinfo1 *info = (struct xt_socket_mtinfo1 *) par->matchinfo;
int err;
err = socket_mt_enable_defrag(par->net, par->family);
if (err)
return err;
if (info->flags & ~XT_SOCKET_FLAGS_V1) {
pr_info_ratelimited("unknown flags 0x%x\n",
info->flags & ~XT_SOCKET_FLAGS_V1);
return -EINVAL;
}
return 0;
return socket_mt_enable_defrag(par->net, par->family);
}
static int socket_mt_v2_check(const struct xt_mtchk_param *par)
{
const struct xt_socket_mtinfo2 *info = (struct xt_socket_mtinfo2 *) par->matchinfo;
int err;
err = socket_mt_enable_defrag(par->net, par->family);
if (err)
return err;
if (info->flags & ~XT_SOCKET_FLAGS_V2) {
pr_info_ratelimited("unknown flags 0x%x\n",
info->flags & ~XT_SOCKET_FLAGS_V2);
return -EINVAL;
}
return 0;
return socket_mt_enable_defrag(par->net, par->family);
}
static int socket_mt_v3_check(const struct xt_mtchk_param *par)
{
const struct xt_socket_mtinfo3 *info =
(struct xt_socket_mtinfo3 *)par->matchinfo;
int err;
err = socket_mt_enable_defrag(par->net, par->family);
if (err)
return err;
if (info->flags & ~XT_SOCKET_FLAGS_V3) {
pr_info_ratelimited("unknown flags 0x%x\n",
info->flags & ~XT_SOCKET_FLAGS_V3);
return -EINVAL;
}
return 0;
return socket_mt_enable_defrag(par->net, par->family);
}
static void socket_mt_destroy(const struct xt_mtdtor_param *par)