diff mbox

[2/5] soreuseport: TCP/IPv4 implementation

Message ID alpine.DEB.2.00.1301161259370.1764@cleese
State Rejected, archived
Delegated to: David Miller
Headers show

Commit Message

Vijay Subramanian Jan. 16, 2013, 9:09 p.m. UTC
> 	 * Unlike other sk lookup places we do not check
> @@ -73,8 +75,11 @@ int inet_csk_bind_conflict(const struct sock *sk,
> 		    (!sk->sk_bound_dev_if ||
> 		     !sk2->sk_bound_dev_if ||
> 		     sk->sk_bound_dev_if == sk2->sk_bound_dev_if)) {
> -			if (!reuse || !sk2->sk_reuse ||
> -			    sk2->sk_state == TCP_LISTEN) {
> +			if ((!reuse || !sk2->sk_reuse ||
> +			    sk2->sk_state == TCP_LISTEN) &&
> +			    (!reuseport || !sk2->sk_reuseport ||
> +			    (sk2->sk_state != TCP_TIME_WAIT &&
> +			     uid != sock_i_uid(sk2)))) {
> 				const __be32 sk2_rcv_saddr = sk_rcv_saddr(sk2);
> 				if (!sk2_rcv_saddr || !sk_rcv_saddr(sk) ||
> 				    sk2_rcv_saddr == sk_rcv_saddr(sk))


How about introducing some helper functions to make 
inet_csk_bid_conflict() and inet6_csk_bind_conflict() more readable as in 
patch below? We can add another test for reuseport() for soreuseport 
patches.

udp.c already has ipv4_rcv_saddr_equal() but it seems to call 
ipv6_only_sock() and not inet_v6_ipv6only() which is needed in 
inet{6}_csk_bid_conflict().So I added sk_rcv_saddr_equal().

Also the bind_conflict functions can return bool instead of int (not 
implemented in patch below).


If patch idea below is ok, I will send it officially.

Thanks,
Vijay



--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Comments

David Miller Jan. 16, 2013, 9:10 p.m. UTC | #1
From: Vijay Subramanian <subramanian.vijay@gmail.com>
Date: Wed, 16 Jan 2013 13:09:10 -0800 (PST)

> 
> +/* The port cannot be reused if the older socket is in LISTEN state
> or if
> + * either the old or new one does not allow reuse
> + */

Please post patches without them being destroyed by your mailer.
--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Tom Herbert Jan. 16, 2013, 9:30 p.m. UTC | #2
I'll send updated patches today fixing the issue raised so far.  You
might want to put these on top of those?

On Wed, Jan 16, 2013 at 1:09 PM, Vijay Subramanian
<subramanian.vijay@gmail.com> wrote:
>
>
>>          * Unlike other sk lookup places we do not check
>> @@ -73,8 +75,11 @@ int inet_csk_bind_conflict(const struct sock *sk,
>>                     (!sk->sk_bound_dev_if ||
>>                      !sk2->sk_bound_dev_if ||
>>                      sk->sk_bound_dev_if == sk2->sk_bound_dev_if)) {
>> -                       if (!reuse || !sk2->sk_reuse ||
>> -                           sk2->sk_state == TCP_LISTEN) {
>> +                       if ((!reuse || !sk2->sk_reuse ||
>> +                           sk2->sk_state == TCP_LISTEN) &&
>> +                           (!reuseport || !sk2->sk_reuseport ||
>> +                           (sk2->sk_state != TCP_TIME_WAIT &&
>> +                            uid != sock_i_uid(sk2)))) {
>>                                 const __be32 sk2_rcv_saddr =
>> sk_rcv_saddr(sk2);
>>                                 if (!sk2_rcv_saddr || !sk_rcv_saddr(sk) ||
>>                                     sk2_rcv_saddr == sk_rcv_saddr(sk))
>
>
>
> How about introducing some helper functions to make inet_csk_bid_conflict()
> and inet6_csk_bind_conflict() more readable as in patch below? We can add
> another test for reuseport() for soreuseport patches.
>
> udp.c already has ipv4_rcv_saddr_equal() but it seems to call
> ipv6_only_sock() and not inet_v6_ipv6only() which is needed in
> inet{6}_csk_bid_conflict().So I added sk_rcv_saddr_equal().
>
> Also the bind_conflict functions can return bool instead of int (not
> implemented in patch below).
>
>
> If patch idea below is ok, I will send it officially.
>
> Thanks,
> Vijay
>
>
> diff --git a/include/net/inet_connection_sock.h
> b/include/net/inet_connection_sock.h
> index 1832927..c15d2eb 100644
> --- a/include/net/inet_connection_sock.h
> +++ b/include/net/inet_connection_sock.h
> @@ -22,6 +22,8 @@
>
>  #include <net/inet_sock.h>
>  #include <net/request_sock.h>
> +#include <net/tcp_states.h>
> +#include <net/inet_timewait_sock.h>
>
>  #define INET_CSK_DEBUG 1
>
> @@ -205,6 +207,37 @@ static inline void inet_csk_clear_xmit_timer(struct
> sock *sk, const int what)
>  #endif
>  }
>
> +/* The port cannot be reused if the older socket is in LISTEN state or if
> + * either the old or new one does not allow reuse
> + */
> +static inline bool sk_reuse_equal(int reuse, const struct sock *sk2)
> +{
> +       return !reuse || !sk2->sk_reuse || sk2->sk_state == TCP_LISTEN;
> +}
> +
> +/* The port cannot be reused if both sockets are bound to the same device
> or
> + * if either one is not bound
> + */
> +static inline bool sk_bound_dev_equal(const struct sock *sk,
> +                                     const struct sock *sk2)
> +{
> +       return !sk->sk_bound_dev_if || !sk2->sk_bound_dev_if ||
> +              sk->sk_bound_dev_if == sk2->sk_bound_dev_if;
> +}
> +
> +/* The port cannot be reused if both sockets have the same rcv_saddr
> + * or if either rcv_saddr is NULL
> + */
> +static inline bool sk_rcv_saddr_equal(const struct sock *sk1,
> +                                     const struct sock *sk2)
> +{
> +       __be32 sk1_rcv_saddr = sk_rcv_saddr(sk1);
> +       __be32 sk2_rcv_saddr = sk_rcv_saddr(sk2);
> +
> +       return !sk2_rcv_saddr || !sk1_rcv_saddr ||
> +              sk2_rcv_saddr == sk1_rcv_saddr;
> +}
> +
>  /*
>   *     Reset the retransmission timer
>   */
> diff --git a/net/ipv4/inet_connection_sock.c
> b/net/ipv4/inet_connection_sock.c
> index d0670f0..375cca3 100644
> --- a/net/ipv4/inet_connection_sock.c
> +++ b/net/ipv4/inet_connection_sock.c
> @@ -68,26 +68,15 @@ int inet_csk_bind_conflict(const struct sock *sk,
>          */
>
>         sk_for_each_bound(sk2, node, &tb->owners) {
> -               if (sk != sk2 &&
> -                   !inet_v6_ipv6only(sk2) &&
> -                   (!sk->sk_bound_dev_if ||
> -                    !sk2->sk_bound_dev_if ||
>
> -                    sk->sk_bound_dev_if == sk2->sk_bound_dev_if)) {
> -                       if (!reuse || !sk2->sk_reuse ||
> -                           sk2->sk_state == TCP_LISTEN) {
> -                               const __be32 sk2_rcv_saddr =
> sk_rcv_saddr(sk2);
> -                               if (!sk2_rcv_saddr || !sk_rcv_saddr(sk) ||
> -                                   sk2_rcv_saddr == sk_rcv_saddr(sk))
> -                                       break;
> -                       }
> -                       if (!relax && reuse && sk2->sk_reuse &&
> -                           sk2->sk_state != TCP_LISTEN) {
> -                               const __be32 sk2_rcv_saddr =
> sk_rcv_saddr(sk2);
> +               if (sk != sk2 && !inet_v6_ipv6only(sk2) &&
> +                   sk_bound_dev_equal(sk, sk2)) {
> +                       if (sk_reuse_equal(reuse, sk2) &&
> +                           sk_rcv_saddr_equal(sk, sk2))
> +                               break;
>
> -                               if (!sk2_rcv_saddr || !sk_rcv_saddr(sk) ||
> -                                   sk2_rcv_saddr == sk_rcv_saddr(sk))
> -                                       break;
> -                       }
> +                       if (!relax && sk_reuse_equal(reuse, sk2) &&
> +                           sk_rcv_saddr_equal(sk, sk2))
> +                               break;
>                 }
>         }
>         return node != NULL;
> diff --git a/net/ipv6/inet6_connection_sock.c
> b/net/ipv6/inet6_connection_sock.c
> index 3064785..8ebe20d 100644
> --- a/net/ipv6/inet6_connection_sock.c
> +++ b/net/ipv6/inet6_connection_sock.c
> @@ -39,13 +39,9 @@ int inet6_csk_bind_conflict(const struct sock *sk,
>          * vs net namespaces issues.
>          */
>         sk_for_each_bound(sk2, node, &tb->owners) {
> -               if (sk != sk2 &&
> -                   (!sk->sk_bound_dev_if ||
> -                    !sk2->sk_bound_dev_if ||
> -                    sk->sk_bound_dev_if == sk2->sk_bound_dev_if) &&
> -                   (!sk->sk_reuse || !sk2->sk_reuse ||
> -                    sk2->sk_state == TCP_LISTEN) &&
> -                    ipv6_rcv_saddr_equal(sk, sk2))
> +               if (sk != sk2 && sk_bound_dev_equal(sk, sk2) &&
> +                   sk_reuse_equal(sk->sk_reuse, sk2) &&
> +                   ipv6_rcv_saddr_equal(sk, sk2))
>                         break;
>         }
>
>
--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Vijay Subramanian Jan. 16, 2013, 10:14 p.m. UTC | #3
On 16 January 2013 13:30, Tom Herbert <therbert@google.com> wrote:
> I'll send updated patches today fixing the issue raised so far.  You
> might want to put these on top of those?
>

Apologies for letting the mailer mangle the patch. I think I have fixed it now.
Instead of resending this version, I will wait till your patches are
applied and send a version on top of those.

Thanks,
Vijay
--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
diff mbox

Patch

diff --git a/include/net/inet_connection_sock.h b/include/net/inet_connection_sock.h
index 1832927..c15d2eb 100644
--- a/include/net/inet_connection_sock.h
+++ b/include/net/inet_connection_sock.h
@@ -22,6 +22,8 @@ 

  #include <net/inet_sock.h>
  #include <net/request_sock.h>
+#include <net/tcp_states.h>
+#include <net/inet_timewait_sock.h>

  #define INET_CSK_DEBUG 1

@@ -205,6 +207,37 @@  static inline void inet_csk_clear_xmit_timer(struct sock *sk, const int what)
  #endif
  }

+/* The port cannot be reused if the older socket is in LISTEN state or if
+ * either the old or new one does not allow reuse
+ */
+static inline bool sk_reuse_equal(int reuse, const struct sock *sk2)
+{
+	return !reuse || !sk2->sk_reuse || sk2->sk_state == TCP_LISTEN;
+}
+
+/* The port cannot be reused if both sockets are bound to the same device or
+ * if either one is not bound
+ */
+static inline bool sk_bound_dev_equal(const struct sock *sk,
+				      const struct sock *sk2)
+{
+	return !sk->sk_bound_dev_if || !sk2->sk_bound_dev_if ||
+	       sk->sk_bound_dev_if == sk2->sk_bound_dev_if;
+}
+
+/* The port cannot be reused if both sockets have the same rcv_saddr
+ * or if either rcv_saddr is NULL
+ */
+static inline bool sk_rcv_saddr_equal(const struct sock *sk1,
+				      const struct sock *sk2)
+{
+	__be32 sk1_rcv_saddr = sk_rcv_saddr(sk1);
+	__be32 sk2_rcv_saddr = sk_rcv_saddr(sk2);
+
+	return !sk2_rcv_saddr || !sk1_rcv_saddr ||
+	       sk2_rcv_saddr == sk1_rcv_saddr;
+}
+
  /*
   *	Reset the retransmission timer
   */
diff --git a/net/ipv4/inet_connection_sock.c b/net/ipv4/inet_connection_sock.c
index d0670f0..375cca3 100644
--- a/net/ipv4/inet_connection_sock.c
+++ b/net/ipv4/inet_connection_sock.c
@@ -68,26 +68,15 @@  int inet_csk_bind_conflict(const struct sock *sk,
  	 */

  	sk_for_each_bound(sk2, node, &tb->owners) {
-		if (sk != sk2 &&
-		    !inet_v6_ipv6only(sk2) &&
-		    (!sk->sk_bound_dev_if ||
-		     !sk2->sk_bound_dev_if ||
-		     sk->sk_bound_dev_if == sk2->sk_bound_dev_if)) {
-			if (!reuse || !sk2->sk_reuse ||
-			    sk2->sk_state == TCP_LISTEN) {
-				const __be32 sk2_rcv_saddr = sk_rcv_saddr(sk2);
-				if (!sk2_rcv_saddr || !sk_rcv_saddr(sk) ||
-				    sk2_rcv_saddr == sk_rcv_saddr(sk))
-					break;
-			}
-			if (!relax && reuse && sk2->sk_reuse &&
-			    sk2->sk_state != TCP_LISTEN) {
-				const __be32 sk2_rcv_saddr = sk_rcv_saddr(sk2);
+		if (sk != sk2 && !inet_v6_ipv6only(sk2) &&
+		    sk_bound_dev_equal(sk, sk2)) {
+			if (sk_reuse_equal(reuse, sk2) &&
+			    sk_rcv_saddr_equal(sk, sk2))
+				break;

-				if (!sk2_rcv_saddr || !sk_rcv_saddr(sk) ||
-				    sk2_rcv_saddr == sk_rcv_saddr(sk))
-					break;
-			}
+			if (!relax && sk_reuse_equal(reuse, sk2) &&
+			    sk_rcv_saddr_equal(sk, sk2))
+				break;
  		}
  	}
  	return node != NULL;
diff --git a/net/ipv6/inet6_connection_sock.c b/net/ipv6/inet6_connection_sock.c
index 3064785..8ebe20d 100644
--- a/net/ipv6/inet6_connection_sock.c
+++ b/net/ipv6/inet6_connection_sock.c
@@ -39,13 +39,9 @@  int inet6_csk_bind_conflict(const struct sock *sk,
  	 * vs net namespaces issues.
  	 */
  	sk_for_each_bound(sk2, node, &tb->owners) {
-		if (sk != sk2 &&
-		    (!sk->sk_bound_dev_if ||
-		     !sk2->sk_bound_dev_if ||
-		     sk->sk_bound_dev_if == sk2->sk_bound_dev_if) &&
-		    (!sk->sk_reuse || !sk2->sk_reuse ||
-		     sk2->sk_state == TCP_LISTEN) &&
-		     ipv6_rcv_saddr_equal(sk, sk2))
+		if (sk != sk2 && sk_bound_dev_equal(sk, sk2) &&
+		    sk_reuse_equal(sk->sk_reuse, sk2) &&
+		    ipv6_rcv_saddr_equal(sk, sk2))
  			break;
  	}