Unix socket buffer attribution

Submitted by Yannick Koehler on Jan. 25, 2013, 3:32 p.m.

Details

Message ID CAJ4BwwEY6vc3_rxou6PMxAZCdzW0X5WtsRX3SLGkP3g9df209g@mail.gmail.com
State Changes Requested
Delegated to: David Miller
Headers show

Commit Message

Yannick Koehler Jan. 25, 2013, 3:32 p.m.
This patch should fix an issue where unix socket buffer remains
accounted as part of the socket sndbuf (sk_wmem_alloc) instead of
being accounted as part of the receiving socket rcvbuf
(sk_rmem_alloc), leading to a situation where if one of the receiving
socket isn't calling recvfrom() the sending socket can no more send to
any of its listeners, even those which properly behave.  This could
create a DOS situation where the unix socket is reachable by many
users on the same linux machine.

Signed-off-by: Yannick Koehler <yannick@koehler.name>

the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Comments

Eric Dumazet Jan. 26, 2013, 12:53 a.m.
On Fri, 2013-01-25 at 10:32 -0500, Yannick Koehler wrote:
> This patch should fix an issue where unix socket buffer remains
> accounted as part of the socket sndbuf (sk_wmem_alloc) instead of
> being accounted as part of the receiving socket rcvbuf
> (sk_rmem_alloc), leading to a situation where if one of the receiving
> socket isn't calling recvfrom() the sending socket can no more send to
> any of its listeners, even those which properly behave.  This could
> create a DOS situation where the unix socket is reachable by many
> users on the same linux machine.
> 
> Signed-off-by: Yannick Koehler <yannick@koehler.name>
> 

Your patch is mangled, check Documentation/email-clients.txt

> diff -uprN -X linux-3.6/Documentation/dontdiff
> linux-3.6-vanilla/include/net/af_unix.h
> linux-3.6/include/net/af_unix.h
> --- linux-3.6-vanilla/include/net/af_unix.h 2012-09-30 19:47:46.000000000 -0400
> +++ linux-3.6/include/net/af_unix.h 2013-01-24 15:26:20.000000000 -0500
> @@ -34,6 +34,7 @@ struct unix_skb_parms {
>  #ifdef CONFIG_SECURITY_NETWORK
>   u32 secid; /* Security ID */
>  #endif
> + struct sock *peer; /* Skb's peer sk */
>  };
> 
>  #define UNIXCB(skb) (*(struct unix_skb_parms *)&((skb)->cb))
> diff -uprN -X linux-3.6/Documentation/dontdiff
> linux-3.6-vanilla/net/unix/af_unix.c linux-3.6/net/unix/af_unix.c
> --- linux-3.6-vanilla/net/unix/af_unix.c 2012-09-30 19:47:46.000000000 -0400
> +++ linux-3.6/net/unix/af_unix.c 2013-01-24 15:24:57.000000000 -0500
> @@ -1426,6 +1426,35 @@ static void maybe_add_creds(struct sk_bu
>  }
> 
>  /*
> + * Reduce the refcount from sk_wmem_alloc on the peer sk.
> + * Then remove invoke sock_rfree to release the memory
> + * from the current sock sk_rmem_alloc.
> + */
> +static void unix_sock_wrfree(struct sk_buff *skb)
> +{
> + struct sock *sk = UNIXCB(skb).peer;
> +
> + if (sk)
> + sk_free(sk);
> +
> + sock_rfree(skb);

Unfortunately it wont work, sock_rfree() can only be called with socket
lock held.

> +}
> +
> +static inline void unix_set_owner_r(struct sk_buff *skb, struct sock *sk,
> +     struct sock *other)
> +{
> + /* This operation garantee the peer sk isn't freed. */
> + atomic_add(1, &sk->sk_wmem_alloc);
> +
> + skb_orphan(skb);
> + skb->sk = other;
> + skb->destructor = unix_sock_wrfree;
> + atomic_add(skb->truesize, &other->sk_rmem_alloc);
> + sk_mem_charge(other, skb->truesize);
> + UNIXCB(skb).peer = sk;
> +}
> +
> +/*
>   * Send AF_UNIX data.
>   */
> 
> @@ -1579,9 +1607,16 @@ restart:
>   goto restart;
>   }
> 
> + if (atomic_read(&other->sk_rmem_alloc) + skb->truesize >=
> +    (unsigned)other->sk_rcvbuf) {
> + err = -EAGAIN;
> + goto out_unlock;
> + }

This might add a regression on clients expecting to block.

> +
>   if (sock_flag(other, SOCK_RCVTSTAMP))
>   __net_timestamp(skb);
>   maybe_add_creds(skb, sock, other);
> + unix_set_owner_r(skb, sk, other);
>   skb_queue_tail(&other->sk_receive_queue, skb);
>   if (max_level > unix_sk(other)->recursion_level)
>   unix_sk(other)->recursion_level = max_level;
> @@ -1696,7 +1731,14 @@ static int unix_stream_sendmsg(struct ki
>      (other->sk_shutdown & RCV_SHUTDOWN))
>   goto pipe_err_free;
> 
> + if (atomic_read(&other->sk_rmem_alloc) + skb->truesize >=
> +    (unsigned)other->sk_rcvbuf) {
> + err = -EAGAIN;
> + goto pipe_err_free;
> + }
> +

This might add a regression on clients expecting to block.


>   maybe_add_creds(skb, sock, other);
> + unix_set_owner_r(skb, sk, other);
>   skb_queue_tail(&other->sk_receive_queue, skb);
>   if (max_level > unix_sk(other)->recursion_level)
>   unix_sk(other)->recursion_level = max_level;
> @@ -1807,7 +1849,7 @@ static int unix_dgram_recvmsg(struct kio
>   POLLOUT | POLLWRNORM | POLLWRBAND);
> 
>   if (msg->msg_name)
> - unix_copy_addr(msg, skb->sk);
> + unix_copy_addr(msg, UNIXCB(skb).peer);
> 
>   if (size > skb->len - skip)
>   size = skb->len - skip;
> @@ -2007,7 +2049,7 @@ again:
> 
>   /* Copy address just once */
>   if (sunaddr) {
> - unix_copy_addr(msg, skb->sk);
> + unix_copy_addr(msg, UNIXCB(skb).peer);
>   sunaddr = NULL;
>   }


--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Patch hide | download patch | download mbox

diff -uprN -X linux-3.6/Documentation/dontdiff
linux-3.6-vanilla/include/net/af_unix.h
linux-3.6/include/net/af_unix.h
--- linux-3.6-vanilla/include/net/af_unix.h 2012-09-30 19:47:46.000000000 -0400
+++ linux-3.6/include/net/af_unix.h 2013-01-24 15:26:20.000000000 -0500
@@ -34,6 +34,7 @@  struct unix_skb_parms {
 #ifdef CONFIG_SECURITY_NETWORK
  u32 secid; /* Security ID */
 #endif
+ struct sock *peer; /* Skb's peer sk */
 };

 #define UNIXCB(skb) (*(struct unix_skb_parms *)&((skb)->cb))
diff -uprN -X linux-3.6/Documentation/dontdiff
linux-3.6-vanilla/net/unix/af_unix.c linux-3.6/net/unix/af_unix.c
--- linux-3.6-vanilla/net/unix/af_unix.c 2012-09-30 19:47:46.000000000 -0400
+++ linux-3.6/net/unix/af_unix.c 2013-01-24 15:24:57.000000000 -0500
@@ -1426,6 +1426,35 @@  static void maybe_add_creds(struct sk_bu
 }

 /*
+ * Reduce the refcount from sk_wmem_alloc on the peer sk.
+ * Then remove invoke sock_rfree to release the memory
+ * from the current sock sk_rmem_alloc.
+ */
+static void unix_sock_wrfree(struct sk_buff *skb)
+{
+ struct sock *sk = UNIXCB(skb).peer;
+
+ if (sk)
+ sk_free(sk);
+
+ sock_rfree(skb);
+}
+
+static inline void unix_set_owner_r(struct sk_buff *skb, struct sock *sk,
+     struct sock *other)
+{
+ /* This operation garantee the peer sk isn't freed. */
+ atomic_add(1, &sk->sk_wmem_alloc);
+
+ skb_orphan(skb);
+ skb->sk = other;
+ skb->destructor = unix_sock_wrfree;
+ atomic_add(skb->truesize, &other->sk_rmem_alloc);
+ sk_mem_charge(other, skb->truesize);
+ UNIXCB(skb).peer = sk;
+}
+
+/*
  * Send AF_UNIX data.
  */

@@ -1579,9 +1607,16 @@  restart:
  goto restart;
  }

+ if (atomic_read(&other->sk_rmem_alloc) + skb->truesize >=
+    (unsigned)other->sk_rcvbuf) {
+ err = -EAGAIN;
+ goto out_unlock;
+ }
+
  if (sock_flag(other, SOCK_RCVTSTAMP))
  __net_timestamp(skb);
  maybe_add_creds(skb, sock, other);
+ unix_set_owner_r(skb, sk, other);
  skb_queue_tail(&other->sk_receive_queue, skb);
  if (max_level > unix_sk(other)->recursion_level)
  unix_sk(other)->recursion_level = max_level;
@@ -1696,7 +1731,14 @@  static int unix_stream_sendmsg(struct ki
     (other->sk_shutdown & RCV_SHUTDOWN))
  goto pipe_err_free;

+ if (atomic_read(&other->sk_rmem_alloc) + skb->truesize >=
+    (unsigned)other->sk_rcvbuf) {
+ err = -EAGAIN;
+ goto pipe_err_free;
+ }
+
  maybe_add_creds(skb, sock, other);
+ unix_set_owner_r(skb, sk, other);
  skb_queue_tail(&other->sk_receive_queue, skb);
  if (max_level > unix_sk(other)->recursion_level)
  unix_sk(other)->recursion_level = max_level;
@@ -1807,7 +1849,7 @@  static int unix_dgram_recvmsg(struct kio
  POLLOUT | POLLWRNORM | POLLWRBAND);

  if (msg->msg_name)
- unix_copy_addr(msg, skb->sk);
+ unix_copy_addr(msg, UNIXCB(skb).peer);

  if (size > skb->len - skip)
  size = skb->len - skip;
@@ -2007,7 +2049,7 @@  again:

  /* Copy address just once */
  if (sunaddr) {
- unix_copy_addr(msg, skb->sk);
+ unix_copy_addr(msg, UNIXCB(skb).peer);
  sunaddr = NULL;
  }
--
To unsubscribe from this list: send the line "unsubscribe netdev" in