Message ID | 1447909413.22599.191.camel@edumazet-glaptop2.roam.corp.google.com |
---|---|
State | Accepted, archived |
Delegated to: | David Miller |
Headers | show |
On 11/19/2015 08:03 AM, Eric Dumazet wrote: > From: Eric Dumazet <edumazet@google.com> > > tcp_send_rcvq() is used for re-injecting data into tcp receive queue. > > Problems : > > - No check against size is performed, allowed user to fool kernel in > attempting very large memory allocations, eventually triggering > OOM when memory is fragmented. Doesn't the tcp_try_rmem_schedule() protect us from doing this "eventually"? I mean first we would be allowed to do it, but then the sock will be charged with the previous allocations and will not add more memory to socket. Nonetheless, Acked-by: Pavel Emelyanov <xemul@parallels.com> > - In case of fault during the copy we do not return correct errno. > > Lets use alloc_skb_with_frags() to cook optimal skbs. > > Fixes: 292e8d8c8538 ("tcp: Move rcvq sending to tcp_input.c") > Fixes: c0e88ff0f256 ("tcp: Repair socket queues") > Signed-off-by: Eric Dumazet <edumazet@google.com> > Cc: Pavel Emelyanov <xemul@parallels.com> > --- > net/ipv4/tcp_input.c | 22 +++++++++++++++++++--- > 1 file changed, 19 insertions(+), 3 deletions(-) > > diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c > index fdd88c3803a6..a4a0b6b3bcf2 100644 > --- a/net/ipv4/tcp_input.c > +++ b/net/ipv4/tcp_input.c > @@ -4481,19 +4481,34 @@ static int __must_check tcp_queue_rcv(struct sock *sk, struct sk_buff *skb, int > int tcp_send_rcvq(struct sock *sk, struct msghdr *msg, size_t size) > { > struct sk_buff *skb; > + int err = -ENOMEM; > + int data_len = 0; > bool fragstolen; > > if (size == 0) > return 0; > > - skb = alloc_skb(size, sk->sk_allocation); > + if (size > PAGE_SIZE) { > + int npages = min_t(size_t, size >> PAGE_SHIFT, MAX_SKB_FRAGS); > + > + data_len = npages << PAGE_SHIFT; > + size = data_len + (size & ~PAGE_MASK); > + } > + skb = alloc_skb_with_frags(size - data_len, data_len, > + PAGE_ALLOC_COSTLY_ORDER, > + &err, sk->sk_allocation); > if (!skb) > goto err; > > + skb_put(skb, size - data_len); > + skb->data_len = data_len; > + skb->len = size; > + > if (tcp_try_rmem_schedule(sk, skb, skb->truesize)) > goto err_free; > > - if (memcpy_from_msg(skb_put(skb, size), msg, size)) > + err = skb_copy_datagram_from_iter(skb, 0, &msg->msg_iter, size); > + if (err) > goto err_free; > > TCP_SKB_CB(skb)->seq = tcp_sk(sk)->rcv_nxt; > @@ -4509,7 +4524,8 @@ int tcp_send_rcvq(struct sock *sk, struct msghdr *msg, size_t size) > err_free: > kfree_skb(skb); > err: > - return -ENOMEM; > + return err; > + > } > > static void tcp_data_queue(struct sock *sk, struct sk_buff *skb) > > > . > -- To unsubscribe from this list: send the line "unsubscribe netdev" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
On Thu, 2015-11-19 at 13:51 +0300, Pavel Emelyanov wrote: > On 11/19/2015 08:03 AM, Eric Dumazet wrote: > > From: Eric Dumazet <edumazet@google.com> > > > > tcp_send_rcvq() is used for re-injecting data into tcp receive queue. > > > > Problems : > > > > - No check against size is performed, allowed user to fool kernel in > > attempting very large memory allocations, eventually triggering > > OOM when memory is fragmented. > > Doesn't the tcp_try_rmem_schedule() protect us from doing this "eventually"? > I mean first we would be allowed to do it, but then the sock will be charged > with the previous allocations and will not add more memory to socket. But this tcp_try_rmem_schedule() is done _after_ allocation was attempted. This is too late :( -- To unsubscribe from this list: send the line "unsubscribe netdev" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
On Thu, 2015-11-19 at 04:25 -0800, Eric Dumazet wrote: > On Thu, 2015-11-19 at 13:51 +0300, Pavel Emelyanov wrote: > > On 11/19/2015 08:03 AM, Eric Dumazet wrote: > > > From: Eric Dumazet <edumazet@google.com> > > > > > > tcp_send_rcvq() is used for re-injecting data into tcp receive queue. > > > > > > Problems : > > > > > > - No check against size is performed, allowed user to fool kernel in > > > attempting very large memory allocations, eventually triggering > > > OOM when memory is fragmented. > > > > Doesn't the tcp_try_rmem_schedule() protect us from doing this "eventually"? > > I mean first we would be allowed to do it, but then the sock will be charged > > with the previous allocations and will not add more memory to socket. > > But this tcp_try_rmem_schedule() is done _after_ allocation was > attempted. This is too late :( Simple reproducer allocating 2 Mbytes of contiguous kernel memory : #include <sys/types.h> #include <sys/socket.h> #include <netinet/tcp.h> #include <netinet/in.h> #include <arpa/inet.h> #include <errno.h> #include <stdlib.h> #include <stdio.h> #include <string.h> #include <unistd.h> static void fail(const char *str) { perror(str); exit(1); } void usage(int code) { exit(1); } int main(int argc, char *argv[]) { int res, c, repair = 1; int queue = TCP_RECV_QUEUE; int fd = socket(AF_INET, SOCK_STREAM, 0); size_t sz = 2*1024*1024 - 1024; char *buffer; struct sockaddr_in src, dst; while ((c = getopt(argc, argv, "m:")) != -1) { switch (c) { case 'm' : sz = atoi(optarg); break; default : usage(1); } } if (setsockopt(fd, SOL_TCP, TCP_REPAIR, &repair, sizeof(repair)) == -1) fail("TCP_REPAIR"); memset(&src, 0, sizeof(src)); src.sin_family = AF_INET; src.sin_port = htons(8888); src.sin_addr.s_addr = htonl(0x7f000001); if (bind(fd, (struct sockaddr *)&src, sizeof(src)) == -1) fail("bind"); memset(&dst, 0, sizeof(dst)); dst.sin_family = AF_INET; dst.sin_port = htons(9999); dst.sin_addr.s_addr = htonl(0x7f000001); if (connect(fd, (struct sockaddr *)&dst, sizeof(dst)) == -1) fail("connect"); if (setsockopt(fd, SOL_TCP, TCP_REPAIR_QUEUE, &queue, sizeof(queue)) == -1) fail("TCP_REPAIR_QUEUE"); buffer = malloc(sz); if (!buffer) buffer = malloc(1000); res = send(fd, buffer, sz, 0); printf("send() res=%d errno=%d\n", res, errno); if (res != -1) sleep(1000); return 0; } -- To unsubscribe from this list: send the line "unsubscribe netdev" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
From: Eric Dumazet <eric.dumazet@gmail.com> Date: Wed, 18 Nov 2015 21:03:33 -0800 > From: Eric Dumazet <edumazet@google.com> > > tcp_send_rcvq() is used for re-injecting data into tcp receive queue. > > Problems : > > - No check against size is performed, allowed user to fool kernel in > attempting very large memory allocations, eventually triggering > OOM when memory is fragmented. > > - In case of fault during the copy we do not return correct errno. > > Lets use alloc_skb_with_frags() to cook optimal skbs. > > Fixes: 292e8d8c8538 ("tcp: Move rcvq sending to tcp_input.c") > Fixes: c0e88ff0f256 ("tcp: Repair socket queues") > Signed-off-by: Eric Dumazet <edumazet@google.com> Good catch, applied and queued up for -stable. Thanks! -- To unsubscribe from this list: send the line "unsubscribe netdev" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index fdd88c3803a6..a4a0b6b3bcf2 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -4481,19 +4481,34 @@ static int __must_check tcp_queue_rcv(struct sock *sk, struct sk_buff *skb, int int tcp_send_rcvq(struct sock *sk, struct msghdr *msg, size_t size) { struct sk_buff *skb; + int err = -ENOMEM; + int data_len = 0; bool fragstolen; if (size == 0) return 0; - skb = alloc_skb(size, sk->sk_allocation); + if (size > PAGE_SIZE) { + int npages = min_t(size_t, size >> PAGE_SHIFT, MAX_SKB_FRAGS); + + data_len = npages << PAGE_SHIFT; + size = data_len + (size & ~PAGE_MASK); + } + skb = alloc_skb_with_frags(size - data_len, data_len, + PAGE_ALLOC_COSTLY_ORDER, + &err, sk->sk_allocation); if (!skb) goto err; + skb_put(skb, size - data_len); + skb->data_len = data_len; + skb->len = size; + if (tcp_try_rmem_schedule(sk, skb, skb->truesize)) goto err_free; - if (memcpy_from_msg(skb_put(skb, size), msg, size)) + err = skb_copy_datagram_from_iter(skb, 0, &msg->msg_iter, size); + if (err) goto err_free; TCP_SKB_CB(skb)->seq = tcp_sk(sk)->rcv_nxt; @@ -4509,7 +4524,8 @@ int tcp_send_rcvq(struct sock *sk, struct msghdr *msg, size_t size) err_free: kfree_skb(skb); err: - return -ENOMEM; + return err; + } static void tcp_data_queue(struct sock *sk, struct sk_buff *skb)