| Submitter | Eric Dumazet |
|---|---|
| Date | Jan. 23, 2009, 1:44 p.m. |
| Message ID | <4979C9B6.3080302@cosmosbay.com> |
| Download | mbox | patch |
| Permalink | /patch/20052/ |
| State | Superseded |
| Delegated to: | David Miller |
| Headers | show |
Comments
At Fri, 23 Jan 2009 14:44:22 +0100, Eric Dumazet wrote: > Here is an updated of patch, that still have an uniform random distribution, > keeping a starting point in the range [low - high]. > > My attempt to avoid a divide failed ;) > > Thank you > > [PATCH] udp: optimize bind(0) if many ports are in use > > commit 9088c5609584684149f3fb5b065aa7f18dcb03ff > (udp: Improve port randomization) introduced a regression for UDP bind() syscall > to null port (getting a random port) in case lot of ports are already in use. > > This is because we do about 28000 scans of very long chains (220 sockets per chain), > with many spin_lock_bh()/spin_unlock_bh() calls. > > Fix this using a bitmap (64 bytes for current value of UDP_HTABLE_SIZE) > so that we scan chains at most once. > > Instead of 250 ms per bind() call, we get after patch a time of 2.9 ms > > Based on a report from Vitaly Mayatskikh > > Reported-by: Vitaly Mayatskikh <v.mayatskih@gmail.com> > Signed-off-by: Eric Dumazet <dada1@cosmosbay.com> > --- > diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c > index cf5ab05..6e27868 100644 > --- a/net/ipv4/udp.c > +++ b/net/ipv4/udp.c > @@ -120,8 +120,11 @@ EXPORT_SYMBOL(sysctl_udp_wmem_min); > atomic_t udp_memory_allocated; > EXPORT_SYMBOL(udp_memory_allocated); > > +#define PORTS_PER_CHAIN (65536 / UDP_HTABLE_SIZE) > + > static int udp_lib_lport_inuse(struct net *net, __u16 num, > const struct udp_hslot *hslot, > + unsigned long *bitmap, > struct sock *sk, > int (*saddr_comp)(const struct sock *sk1, > const struct sock *sk2)) > @@ -132,12 +135,16 @@ static int udp_lib_lport_inuse(struct net *net, __u16 num, > sk_nulls_for_each(sk2, node, &hslot->head) > if (net_eq(sock_net(sk2), net) && > sk2 != sk && > - sk2->sk_hash == num && > + (bitmap || sk2->sk_hash == num) && > (!sk2->sk_reuse || !sk->sk_reuse) && > (!sk2->sk_bound_dev_if || !sk->sk_bound_dev_if > || sk2->sk_bound_dev_if == sk->sk_bound_dev_if) && > - (*saddr_comp)(sk, sk2)) > - return 1; > + (*saddr_comp)(sk, sk2)) { > + if (bitmap) > + __set_bit(sk2->sk_hash / UDP_HTABLE_SIZE, bitmap); > + else > + return 1; > + } > return 0; > } > > @@ -160,32 +167,42 @@ int udp_lib_get_port(struct sock *sk, unsigned short snum, > if (!snum) { > int low, high, remaining; > unsigned rand; > - unsigned short first; > + unsigned short first, last; > + DECLARE_BITMAP(bitmap, PORTS_PER_CHAIN); > > inet_get_local_port_range(&low, &high); > remaining = (high - low) + 1; > > rand = net_random(); > - snum = first = rand % remaining + low; > - rand |= 1; > - for (;;) { > - hslot = &udptable->hash[udp_hashfn(net, snum)]; > + first = rand % remaining + low; > + rand = (rand | 1) * UDP_HTABLE_SIZE; > + for (last = first + UDP_HTABLE_SIZE; first != last; first++) { > + hslot = &udptable->hash[udp_hashfn(net, first)]; > + bitmap_zero(bitmap, PORTS_PER_CHAIN); > spin_lock_bh(&hslot->lock); > - if (!udp_lib_lport_inuse(net, snum, hslot, sk, saddr_comp)) > - break; > - spin_unlock_bh(&hslot->lock); > + udp_lib_lport_inuse(net, snum, hslot, bitmap, sk, saddr_comp); > + > + snum = first; > + /* > + * PORTS_PER_CHAIN loops, because snum is unsigned short > + * and we add an odd multiple of UDP_HTABLE_SIZE > + */ > do { > - snum = snum + rand; > - } while (snum < low || snum > high); > - if (snum == first) > - goto fail; > + if (low <= snum && snum <= high && > + !test_bit(snum / UDP_HTABLE_SIZE, bitmap)) > + goto found; > + snum += rand; > + } while (snum != first); > + spin_unlock_bh(&hslot->lock); > } > + goto fail; > } else { > hslot = &udptable->hash[udp_hashfn(net, snum)]; > spin_lock_bh(&hslot->lock); > - if (udp_lib_lport_inuse(net, snum, hslot, sk, saddr_comp)) > + if (udp_lib_lport_inuse(net, snum, hslot, NULL, sk, saddr_comp)) > goto fail_unlock; > } > +found: > inet_sk(sk)->num = snum; > sk->sk_hash = snum; > if (sk_unhashed(sk)) { > Both randomness and speed are good. There're several never bound ports in the test, but only few. Thank you for the patch. -- wbr, Vitaly -- To unsubscribe from this list: send the line "unsubscribe netdev" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Vitaly Mayatskikh a écrit : > Both randomness and speed are good. There're several never bound ports > in the test, but only few. Thank you for the patch. I guess "never bound ports" were already in use by other applications on your machine :) -- To unsubscribe from this list: send the line "unsubscribe netdev" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
At Fri, 23 Jan 2009 17:05:15 +0100, Eric Dumazet wrote: > > Both randomness and speed are good. There're several never bound ports > > in the test, but only few. Thank you for the patch. > > I guess "never bound ports" were already in use by other applications on your machine :) Possibly, but not very probably :) -- wbr, Vitaly -- To unsubscribe from this list: send the line "unsubscribe netdev" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Patch
diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c index cf5ab05..6e27868 100644 --- a/net/ipv4/udp.c +++ b/net/ipv4/udp.c @@ -120,8 +120,11 @@ EXPORT_SYMBOL(sysctl_udp_wmem_min); atomic_t udp_memory_allocated; EXPORT_SYMBOL(udp_memory_allocated); +#define PORTS_PER_CHAIN (65536 / UDP_HTABLE_SIZE) + static int udp_lib_lport_inuse(struct net *net, __u16 num, const struct udp_hslot *hslot, + unsigned long *bitmap, struct sock *sk, int (*saddr_comp)(const struct sock *sk1, const struct sock *sk2)) @@ -132,12 +135,16 @@ static int udp_lib_lport_inuse(struct net *net, __u16 num, sk_nulls_for_each(sk2, node, &hslot->head) if (net_eq(sock_net(sk2), net) && sk2 != sk && - sk2->sk_hash == num && + (bitmap || sk2->sk_hash == num) && (!sk2->sk_reuse || !sk->sk_reuse) && (!sk2->sk_bound_dev_if || !sk->sk_bound_dev_if || sk2->sk_bound_dev_if == sk->sk_bound_dev_if) && - (*saddr_comp)(sk, sk2)) - return 1; + (*saddr_comp)(sk, sk2)) { + if (bitmap) + __set_bit(sk2->sk_hash / UDP_HTABLE_SIZE, bitmap); + else + return 1; + } return 0; } @@ -160,32 +167,42 @@ int udp_lib_get_port(struct sock *sk, unsigned short snum, if (!snum) { int low, high, remaining; unsigned rand; - unsigned short first; + unsigned short first, last; + DECLARE_BITMAP(bitmap, PORTS_PER_CHAIN); inet_get_local_port_range(&low, &high); remaining = (high - low) + 1; rand = net_random(); - snum = first = rand % remaining + low; - rand |= 1; - for (;;) { - hslot = &udptable->hash[udp_hashfn(net, snum)]; + first = rand % remaining + low; + rand = (rand | 1) * UDP_HTABLE_SIZE; + for (last = first + UDP_HTABLE_SIZE; first != last; first++) { + hslot = &udptable->hash[udp_hashfn(net, first)]; + bitmap_zero(bitmap, PORTS_PER_CHAIN); spin_lock_bh(&hslot->lock); - if (!udp_lib_lport_inuse(net, snum, hslot, sk, saddr_comp)) - break; - spin_unlock_bh(&hslot->lock); + udp_lib_lport_inuse(net, snum, hslot, bitmap, sk, saddr_comp); + + snum = first; + /* + * PORTS_PER_CHAIN loops, because snum is unsigned short + * and we add an odd multiple of UDP_HTABLE_SIZE + */ do { - snum = snum + rand; - } while (snum < low || snum > high); - if (snum == first) - goto fail; + if (low <= snum && snum <= high && + !test_bit(snum / UDP_HTABLE_SIZE, bitmap)) + goto found; + snum += rand; + } while (snum != first); + spin_unlock_bh(&hslot->lock); } + goto fail; } else { hslot = &udptable->hash[udp_hashfn(net, snum)]; spin_lock_bh(&hslot->lock); - if (udp_lib_lport_inuse(net, snum, hslot, sk, saddr_comp)) + if (udp_lib_lport_inuse(net, snum, hslot, NULL, sk, saddr_comp)) goto fail_unlock; } +found: inet_sk(sk)->num = snum; sk->sk_hash = snum; if (sk_unhashed(sk)) {