Patchwork tcp: make sysctl_tcp_ecn namespace aware

login
register
mail settings
Submitter Hannes Frederic Sowa
Date Jan. 5, 2013, 3:01 p.m.
Message ID <20130105150130.GC4031@order.stressinduktion.org>
Download mbox | patch
Permalink /patch/209682/
State Changes Requested
Delegated to: David Miller
Headers show

Comments

Hannes Frederic Sowa - Jan. 5, 2013, 3:01 p.m.
As per suggestion from Eric Dumazet this patch makes tcp_ecn sysctl aware.
The reason behind this patch is to ease the testing of ecn problems on
the internet and allows applications to tune their own use of ecn.

Cc: Eric Dumazet <eric.dumazet@gmail.com>
Cc: David Miller <davem@davemloft.net>
Cc: Stephen Hemminger <shemminger@vyatta.com>
Signed-off-by: Hannes Frederic Sowa <hannes@stressinduktion.org>
---
 include/net/netns/ipv4.h   |  2 ++
 include/net/tcp.h          |  7 ++++---
 net/ipv4/syncookies.c      |  6 ++++--
 net/ipv4/sysctl_net_ipv4.c | 16 +++++++++-------
 net/ipv4/tcp_input.c       |  2 --
 net/ipv4/tcp_ipv4.c        |  4 +++-
 net/ipv4/tcp_output.c      |  1 +
 net/ipv6/syncookies.c      |  3 ++-
 net/ipv6/tcp_ipv6.c        |  7 +++++--
 9 files changed, 30 insertions(+), 18 deletions(-)
stephen hemminger - Jan. 5, 2013, 7 p.m.
Rather than passing sysctl_tcp_ecn around as a parameter, I think it
would be clearer and more efficient to make the ECN functions namespace
aware.

Instead of:
> @@ -728,7 +728,8 @@ struct tcp_skb_cb {
>   * notifications, we disable TCP ECN negociation.
>   */
>  static inline void
> -TCP_ECN_create_request(struct request_sock *req, const struct sk_buff *skb)
> +TCP_ECN_create_request(struct request_sock *req, const struct sk_buff *skb,
> +		int sysctl_tcp_ecn)
>  {
>  	const struct tcphdr *th = tcp_hdr(skb);
>  

@@ -731,8 +730,9 @@ static inline void
 TCP_ECN_create_request(struct request_sock *req, const struct sk_buff *skb)
 {
        const struct tcphdr *th = tcp_hdr(skb);
-
-       if (sysctl_tcp_ecn && th->ece && th->cwr &&
+       
+       if (sock_net(req->sk)->ipv4.sysctl_tcp_ecn && 
+           th->ece && th->cwr &&
            INET_ECN_is_not_ect(TCP_SKB_CB(skb)->ip_dsfield))
                inet_rsk(req)->ecn_ok = 1;
 }
--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Hannes Frederic Sowa - Jan. 5, 2013, 7:05 p.m.
On Sat, Jan 05, 2013 at 11:00:05AM -0800, Stephen Hemminger wrote:
> Rather than passing sysctl_tcp_ecn around as a parameter, I think it
> would be clearer and more efficient to make the ECN functions namespace
> aware.
> 
> Instead of:
> > @@ -728,7 +728,8 @@ struct tcp_skb_cb {
> >   * notifications, we disable TCP ECN negociation.
> >   */
> >  static inline void
> > -TCP_ECN_create_request(struct request_sock *req, const struct sk_buff *skb)
> > +TCP_ECN_create_request(struct request_sock *req, const struct sk_buff *skb,
> > +		int sysctl_tcp_ecn)
> >  {
> >  	const struct tcphdr *th = tcp_hdr(skb);
> >  
> 
> @@ -731,8 +730,9 @@ static inline void
>  TCP_ECN_create_request(struct request_sock *req, const struct sk_buff *skb)
>  {
>         const struct tcphdr *th = tcp_hdr(skb);
> -
> -       if (sysctl_tcp_ecn && th->ece && th->cwr &&
> +       
> +       if (sock_net(req->sk)->ipv4.sysctl_tcp_ecn && 
> +           th->ece && th->cwr &&
>             INET_ECN_is_not_ect(TCP_SKB_CB(skb)->ip_dsfield))
>                 inet_rsk(req)->ecn_ok = 1;

Having a quick look, I think req->sk is not initialized at that moment, but
I'll verify later.

Thanks!

--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Hannes Frederic Sowa - Jan. 6, 2013, 1:18 a.m.
On Sat, Jan 05, 2013 at 08:05:08PM +0100, Hannes Frederic Sowa wrote:
> On Sat, Jan 05, 2013 at 11:00:05AM -0800, Stephen Hemminger wrote:
> > Rather than passing sysctl_tcp_ecn around as a parameter, I think it
> > would be clearer and more efficient to make the ECN functions namespace
> > aware.
> > 
> > Instead of:
> > > @@ -728,7 +728,8 @@ struct tcp_skb_cb {
> > >   * notifications, we disable TCP ECN negociation.
> > >   */
> > >  static inline void
> > > -TCP_ECN_create_request(struct request_sock *req, const struct sk_buff *skb)
> > > +TCP_ECN_create_request(struct request_sock *req, const struct sk_buff *skb,
> > > +		int sysctl_tcp_ecn)
> > >  {
> > >  	const struct tcphdr *th = tcp_hdr(skb);
> > >  
> > 
> > @@ -731,8 +730,9 @@ static inline void
> >  TCP_ECN_create_request(struct request_sock *req, const struct sk_buff *skb)
> >  {
> >         const struct tcphdr *th = tcp_hdr(skb);
> > -
> > -       if (sysctl_tcp_ecn && th->ece && th->cwr &&
> > +       
> > +       if (sock_net(req->sk)->ipv4.sysctl_tcp_ecn && 
> > +           th->ece && th->cwr &&
> >             INET_ECN_is_not_ect(TCP_SKB_CB(skb)->ip_dsfield))
> >                 inet_rsk(req)->ecn_ok = 1;
> 
> Having a quick look, I think req->sk is not initialized at that moment, but
> I'll verify later.

I just verified that req->sk is indeed not initialized at that point. I
had a kernel panic on the first syn packet. So I would propose to keep
the patch as it is.

--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Eric Dumazet - Jan. 6, 2013, 1:27 a.m.
On Sun, 2013-01-06 at 02:18 +0100, Hannes Frederic Sowa wrote:

> 
> I just verified that req->sk is indeed not initialized at that point. I
> had a kernel panic on the first syn packet. So I would propose to keep
> the patch as it is.
> 

Please pass the "struct net" pointer instead, so that the tcp_ecn
dereference is done only if really needed.

Naming a function parameter, or an automatic variable, sysctl_tcp_ecn is
not very nice.



--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Patch

diff --git a/include/net/netns/ipv4.h b/include/net/netns/ipv4.h
index 2ae2b83..9b78862 100644
--- a/include/net/netns/ipv4.h
+++ b/include/net/netns/ipv4.h
@@ -61,6 +61,8 @@  struct netns_ipv4 {
 	int sysctl_icmp_ratemask;
 	int sysctl_icmp_errors_use_inbound_ifaddr;
 
+	int sysctl_tcp_ecn;
+
 	kgid_t sysctl_ping_group_range[2];
 	long sysctl_tcp_mem[3];
 
diff --git a/include/net/tcp.h b/include/net/tcp.h
index aed42c7..a808c59 100644
--- a/include/net/tcp.h
+++ b/include/net/tcp.h
@@ -266,7 +266,6 @@  extern int sysctl_tcp_abort_on_overflow;
 extern int sysctl_tcp_max_orphans;
 extern int sysctl_tcp_fack;
 extern int sysctl_tcp_reordering;
-extern int sysctl_tcp_ecn;
 extern int sysctl_tcp_dsack;
 extern int sysctl_tcp_wmem[3];
 extern int sysctl_tcp_rmem[3];
@@ -504,7 +503,8 @@  static inline __u32 cookie_v4_init_sequence(struct sock *sk,
 #endif
 
 extern __u32 cookie_init_timestamp(struct request_sock *req);
-extern bool cookie_check_timestamp(struct tcp_options_received *opt, bool *);
+extern bool cookie_check_timestamp(struct tcp_options_received *opt,
+				int sysctl_tcp_ecn, bool *ecn_ok);
 
 /* From net/ipv6/syncookies.c */
 extern struct sock *cookie_v6_check(struct sock *sk, struct sk_buff *skb);
@@ -728,7 +728,8 @@  struct tcp_skb_cb {
  * notifications, we disable TCP ECN negociation.
  */
 static inline void
-TCP_ECN_create_request(struct request_sock *req, const struct sk_buff *skb)
+TCP_ECN_create_request(struct request_sock *req, const struct sk_buff *skb,
+		int sysctl_tcp_ecn)
 {
 	const struct tcphdr *th = tcp_hdr(skb);
 
diff --git a/net/ipv4/syncookies.c b/net/ipv4/syncookies.c
index b236ef0..f51c3f5 100644
--- a/net/ipv4/syncookies.c
+++ b/net/ipv4/syncookies.c
@@ -232,7 +232,8 @@  static inline struct sock *get_cookie_sock(struct sock *sk, struct sk_buff *skb,
  *
  * return false if we decode an option that should not be.
  */
-bool cookie_check_timestamp(struct tcp_options_received *tcp_opt, bool *ecn_ok)
+bool cookie_check_timestamp(struct tcp_options_received *tcp_opt,
+			int sysctl_tcp_ecn, bool *ecn_ok)
 {
 	/* echoed timestamp, lowest bits contain options */
 	u32 options = tcp_opt->rcv_tsecr & TSMASK;
@@ -279,6 +280,7 @@  struct sock *cookie_v4_check(struct sock *sk, struct sk_buff *skb,
 	__u8 rcv_wscale;
 	bool ecn_ok = false;
 	struct flowi4 fl4;
+	int sysctl_tcp_ecn = sock_net(sk)->ipv4.sysctl_tcp_ecn;
 
 	if (!sysctl_tcp_syncookies || !th->ack || th->rst)
 		goto out;
@@ -295,7 +297,7 @@  struct sock *cookie_v4_check(struct sock *sk, struct sk_buff *skb,
 	memset(&tcp_opt, 0, sizeof(tcp_opt));
 	tcp_parse_options(skb, &tcp_opt, &hash_location, 0, NULL);
 
-	if (!cookie_check_timestamp(&tcp_opt, &ecn_ok))
+	if (!cookie_check_timestamp(&tcp_opt, sysctl_tcp_ecn, &ecn_ok))
 		goto out;
 
 	ret = NULL;
diff --git a/net/ipv4/sysctl_net_ipv4.c b/net/ipv4/sysctl_net_ipv4.c
index d84400b..7716d40 100644
--- a/net/ipv4/sysctl_net_ipv4.c
+++ b/net/ipv4/sysctl_net_ipv4.c
@@ -538,13 +538,6 @@  static struct ctl_table ipv4_table[] = {
 		.proc_handler	= proc_dointvec
 	},
 	{
-		.procname	= "tcp_ecn",
-		.data		= &sysctl_tcp_ecn,
-		.maxlen		= sizeof(int),
-		.mode		= 0644,
-		.proc_handler	= proc_dointvec
-	},
-	{
 		.procname	= "tcp_dsack",
 		.data		= &sysctl_tcp_dsack,
 		.maxlen		= sizeof(int),
@@ -850,6 +843,13 @@  static struct ctl_table ipv4_net_table[] = {
 		.proc_handler	= ipv4_ping_group_range,
 	},
 	{
+		.procname	= "tcp_ecn",
+		.data		= &init_net.ipv4.sysctl_tcp_ecn,
+		.maxlen		= sizeof(int),
+		.mode		= 0644,
+		.proc_handler	= proc_dointvec
+	},
+	{
 		.procname	= "tcp_mem",
 		.maxlen		= sizeof(init_net.ipv4.sysctl_tcp_mem),
 		.mode		= 0644,
@@ -882,6 +882,8 @@  static __net_init int ipv4_sysctl_init_net(struct net *net)
 			&net->ipv4.sysctl_icmp_ratemask;
 		table[6].data =
 			&net->ipv4.sysctl_ping_group_range;
+		table[7].data =
+			&net->ipv4.sysctl_tcp_ecn;
 
 		/* Don't export sysctls to unprivileged users */
 		if (net->user_ns != &init_user_ns)
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index a28e4db..38e1184 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -81,8 +81,6 @@  int sysctl_tcp_sack __read_mostly = 1;
 int sysctl_tcp_fack __read_mostly = 1;
 int sysctl_tcp_reordering __read_mostly = TCP_FASTRETRANS_THRESH;
 EXPORT_SYMBOL(sysctl_tcp_reordering);
-int sysctl_tcp_ecn __read_mostly = 2;
-EXPORT_SYMBOL(sysctl_tcp_ecn);
 int sysctl_tcp_dsack __read_mostly = 1;
 int sysctl_tcp_app_win __read_mostly = 31;
 int sysctl_tcp_adv_win_scale __read_mostly = 1;
diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
index 54139fa..70215e7 100644
--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c
@@ -1568,7 +1568,8 @@  int tcp_v4_conn_request(struct sock *sk, struct sk_buff *skb)
 		goto drop_and_free;
 
 	if (!want_cookie || tmp_opt.tstamp_ok)
-		TCP_ECN_create_request(req, skb);
+		TCP_ECN_create_request(req, skb,
+				sock_net(sk)->ipv4.sysctl_tcp_ecn);
 
 	if (want_cookie) {
 		isn = cookie_v4_init_sequence(sk, skb, &req->mss);
@@ -2888,6 +2889,7 @@  EXPORT_SYMBOL(tcp_prot);
 
 static int __net_init tcp_sk_init(struct net *net)
 {
+	net->ipv4.sysctl_tcp_ecn = 2;
 	return 0;
 }
 
diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
index 5d45159..a407f92 100644
--- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c
@@ -312,6 +312,7 @@  static inline void TCP_ECN_send_synack(const struct tcp_sock *tp, struct sk_buff
 static inline void TCP_ECN_send_syn(struct sock *sk, struct sk_buff *skb)
 {
 	struct tcp_sock *tp = tcp_sk(sk);
+	int sysctl_tcp_ecn = sock_net(sk)->ipv4.sysctl_tcp_ecn;
 
 	tp->ecn_flags = 0;
 	if (sysctl_tcp_ecn == 1) {
diff --git a/net/ipv6/syncookies.c b/net/ipv6/syncookies.c
index 4016197..2a632ed 100644
--- a/net/ipv6/syncookies.c
+++ b/net/ipv6/syncookies.c
@@ -163,6 +163,7 @@  struct sock *cookie_v6_check(struct sock *sk, struct sk_buff *skb)
 	struct dst_entry *dst;
 	__u8 rcv_wscale;
 	bool ecn_ok = false;
+	int sysctl_tcp_ecn = sock_net(sk)->ipv4.sysctl_tcp_ecn;
 
 	if (!sysctl_tcp_syncookies || !th->ack || th->rst)
 		goto out;
@@ -179,7 +180,7 @@  struct sock *cookie_v6_check(struct sock *sk, struct sk_buff *skb)
 	memset(&tcp_opt, 0, sizeof(tcp_opt));
 	tcp_parse_options(skb, &tcp_opt, &hash_location, 0, NULL);
 
-	if (!cookie_check_timestamp(&tcp_opt, &ecn_ok))
+	if (!cookie_check_timestamp(&tcp_opt, sysctl_tcp_ecn, &ecn_ok))
 		goto out;
 
 	ret = NULL;
diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c
index 93825dd..932d667 100644
--- a/net/ipv6/tcp_ipv6.c
+++ b/net/ipv6/tcp_ipv6.c
@@ -1026,8 +1026,11 @@  static int tcp_v6_conn_request(struct sock *sk, struct sk_buff *skb)
 	treq = inet6_rsk(req);
 	treq->rmt_addr = ipv6_hdr(skb)->saddr;
 	treq->loc_addr = ipv6_hdr(skb)->daddr;
-	if (!want_cookie || tmp_opt.tstamp_ok)
-		TCP_ECN_create_request(req, skb);
+	if (!want_cookie || tmp_opt.tstamp_ok) {
+		/* we reuse the ipv4 sysctl for ecn here */
+		TCP_ECN_create_request(req, skb,
+				sock_net(sk)->ipv4.sysctl_tcp_ecn);
+	}
 
 	treq->iif = sk->sk_bound_dev_if;