@@ -86,6 +86,8 @@ struct netns_ipv4 {
/* Shall we try to damage output packets if routing dev changes? */
int sysctl_ip_dynaddr;
int sysctl_ip_early_demux;
+ int sysctl_tcp_early_demux;
+ int sysctl_udp_early_demux;
int sysctl_fwmark_reflect;
int sysctl_tcp_fwmark_accept;
@@ -40,6 +40,7 @@
/* This is used to register protocols. */
struct net_protocol {
void (*early_demux)(struct sk_buff *skb);
+ int *early_demux_enabled;
int (*handler)(struct sk_buff *skb);
void (*err_handler)(struct sk_buff *skb, u32 info);
unsigned int no_policy:1,
@@ -54,7 +55,7 @@ struct net_protocol {
#if IS_ENABLED(CONFIG_IPV6)
struct inet6_protocol {
void (*early_demux)(struct sk_buff *skb);
-
+ int *early_demux_enabled;
int (*handler)(struct sk_buff *skb);
void (*err_handler)(struct sk_buff *skb,
@@ -1579,7 +1579,7 @@ u64 snmp_fold_field64(void __percpu *mib, int offt, size_t syncp_offset)
};
#endif
-static const struct net_protocol tcp_protocol = {
+static struct net_protocol tcp_protocol = {
.early_demux = tcp_v4_early_demux,
.handler = tcp_v4_rcv,
.err_handler = tcp_v4_err,
@@ -1588,7 +1588,7 @@ u64 snmp_fold_field64(void __percpu *mib, int offt, size_t syncp_offset)
.icmp_strict_tag_validation = 1,
};
-static const struct net_protocol udp_protocol = {
+static struct net_protocol udp_protocol = {
.early_demux = udp_v4_early_demux,
.handler = udp_rcv,
.err_handler = udp_err,
@@ -1699,7 +1699,10 @@ static __net_init int inet_init_net(struct net *net)
*/
net->ipv4.sysctl_ip_default_ttl = IPDEFTTL;
net->ipv4.sysctl_ip_dynaddr = 0;
- net->ipv4.sysctl_ip_early_demux = 1;
+ net->ipv4.sysctl_udp_early_demux = 1;
+ net->ipv4.sysctl_tcp_early_demux = 1;
+ tcp_protocol.early_demux_enabled = &net->ipv4.sysctl_tcp_early_demux;
+ udp_protocol.early_demux_enabled = &net->ipv4.sysctl_udp_early_demux;
return 0;
}
@@ -329,7 +329,7 @@ static int ip_rcv_finish(struct net *net, struct sock *sk, struct sk_buff *skb)
int protocol = iph->protocol;
ipprot = rcu_dereference(inet_protos[protocol]);
- if (ipprot && ipprot->early_demux) {
+ if (ipprot && ipprot->early_demux && *ipprot->early_demux_enabled) {
ipprot->early_demux(skb);
/* must reload iph, skb->head might have changed */
iph = ip_hdr(skb);
@@ -737,6 +737,20 @@ static int proc_tcp_fastopen_key(struct ctl_table *ctl, int write,
.proc_handler = proc_dointvec
},
{
+ .procname = "udp_early_demux",
+ .data = &init_net.ipv4.sysctl_udp_early_demux,
+ .maxlen = sizeof(int),
+ .mode = 0644,
+ .proc_handler = proc_dointvec
+ },
+ {
+ .procname = "tcp_early_demux",
+ .data = &init_net.ipv4.sysctl_tcp_early_demux,
+ .maxlen = sizeof(int),
+ .mode = 0644,
+ .proc_handler = proc_dointvec
+ },
+ {
.procname = "ip_default_ttl",
.data = &init_net.ipv4.sysctl_ip_default_ttl,
.maxlen = sizeof(int),
@@ -60,7 +60,7 @@ int ip6_rcv_finish(struct net *net, struct sock *sk, struct sk_buff *skb)
const struct inet6_protocol *ipprot;
ipprot = rcu_dereference(inet6_protos[ipv6_hdr(skb)->nexthdr]);
- if (ipprot && ipprot->early_demux)
+ if (ipprot && ipprot->early_demux && *ipprot->early_demux_enabled)
ipprot->early_demux(skb);
}
if (!skb_valid_dst(skb))
@@ -1926,7 +1926,7 @@ struct proto tcpv6_prot = {
.diag_destroy = tcp_abort,
};
-static const struct inet6_protocol tcpv6_protocol = {
+static struct inet6_protocol tcpv6_protocol = {
.early_demux = tcp_v6_early_demux,
.handler = tcp_v6_rcv,
.err_handler = tcp_v6_err,
@@ -1944,6 +1944,7 @@ struct proto tcpv6_prot = {
static int __net_init tcpv6_net_init(struct net *net)
{
+ tcpv6_protocol.early_demux_enabled = &net->ipv4.sysctl_tcp_early_demux;
return inet_ctl_sock_create(&net->ipv6.tcp_sk, PF_INET6,
SOCK_RAW, IPPROTO_TCP, net);
}
Certain system process significant unconnected UDP workload. It would be preferrable to disable UDP early demux for those systems and enable it for TCP only. Signed-off-by: Subash Abhinov Kasiviswanathan <subashab@codeaurora.org> Suggested-by: Eric Dumazet <edumazet@google.com> --- include/net/netns/ipv4.h | 2 ++ include/net/protocol.h | 3 ++- net/ipv4/af_inet.c | 9 ++++++--- net/ipv4/ip_input.c | 2 +- net/ipv4/sysctl_net_ipv4.c | 14 ++++++++++++++ net/ipv6/ip6_input.c | 2 +- net/ipv6/tcp_ipv6.c | 3 ++- 7 files changed, 28 insertions(+), 7 deletions(-)