Message ID | 20170826015346.24247-2-colona@arista.com |
---|---|
State | Changes Requested, archived |
Delegated to: | David Miller |
Headers | show |
On Fri, 2017-08-25 at 18:53 -0700, Ivan Delalande wrote: > Report TCP MD5 (RFC2385) signing keys, addresses and address prefixes to > processes with CAP_NET_ADMIN requesting INET_DIAG_INFO. Currently it is > not possible to retrieve these from the kernel once they have been > configured on sockets. ... > +static int inet_diag_put_md5sig(struct sk_buff *skb, > + const struct tcp_md5sig_info *md5sig) > +{ > + const struct tcp_md5sig_key *key; > + struct nlattr *attr; > + struct tcp_md5sig *info; > + int md5sig_count = 0; > + > + hlist_for_each_entry_rcu(key, &md5sig->head, node) > + md5sig_count++; > + > + attr = nla_reserve(skb, INET_DIAG_MD5SIG, > + md5sig_count * sizeof(struct tcp_md5sig)); > + if (!attr) > + return -EMSGSIZE; > + > + info = nla_data(attr); > + hlist_for_each_entry_rcu(key, &md5sig->head, node) { > + inet_diag_md5sig_fill(info, key); > + info++; > + } > + > + return 0; > +} > +#endif Unless I missed something, I am sure I gave a feedback on this function already :/
On Fri, Aug 25, 2017 at 08:41:25PM -0700, Eric Dumazet wrote: > On Fri, 2017-08-25 at 18:53 -0700, Ivan Delalande wrote: > > Report TCP MD5 (RFC2385) signing keys, addresses and address prefixes to > > processes with CAP_NET_ADMIN requesting INET_DIAG_INFO. Currently it is > > not possible to retrieve these from the kernel once they have been > > configured on sockets. > > ... > > > +static int inet_diag_put_md5sig(struct sk_buff *skb, > > + const struct tcp_md5sig_info *md5sig) > > +{ > > + const struct tcp_md5sig_key *key; > > + struct nlattr *attr; > > + struct tcp_md5sig *info; > > + int md5sig_count = 0; > > + > > + hlist_for_each_entry_rcu(key, &md5sig->head, node) > > + md5sig_count++; > > + > > + attr = nla_reserve(skb, INET_DIAG_MD5SIG, > > + md5sig_count * sizeof(struct tcp_md5sig)); > > + if (!attr) > > + return -EMSGSIZE; > > + > > + info = nla_data(attr); > > + hlist_for_each_entry_rcu(key, &md5sig->head, node) { > > + inet_diag_md5sig_fill(info, key); > > + info++; > > + } > > + > > + return 0; > > +} > > +#endif > > Unless I missed something, I am sure I gave a feedback on this function > already :/ Sorry, I probably should have detailed my changes. I tried to address this by locking the whole socket in the caller, tcp_diag_get_aux, just outside of the rcu_read_lock. Would this work here, or do you see a better way? Thanks for your feedback,
On Sat, 2017-08-26 at 07:53 +0200, Ivan Delalande wrote: > > Sorry, I probably should have detailed my changes. I tried to address > this by locking the whole socket in the caller, tcp_diag_get_aux, just > outside of the rcu_read_lock. Would this work here, or do you see a > better way? > locking the socket is problematic. It is already done in tcp_get_info() since linux-4.10 and unfortunately it added unreasonable stall when a socket is flooded with tiny SACK messages (socket backlog is huge) People are now making tcp_rmem and tcp_wmem much bigger to allow BBR flows to reach line rate on very long distance communications. We are working to make tcp_rack_mark_lost() not having O(N) behavior, but it is not done yet. I would stick to RCU, but add sanity checks, so that _if_ the list is different on the second RCU list traversal, you make sure : 1) We do not try to put more data in the reserved space 2) We memset(ptr, 0, remaining) the remaining space if we found less entries in the 2nd loop.
diff --git a/include/uapi/linux/inet_diag.h b/include/uapi/linux/inet_diag.h index 678496897a68..f52ff62bfabe 100644 --- a/include/uapi/linux/inet_diag.h +++ b/include/uapi/linux/inet_diag.h @@ -143,6 +143,7 @@ enum { INET_DIAG_MARK, INET_DIAG_BBRINFO, INET_DIAG_CLASS_ID, + INET_DIAG_MD5SIG, __INET_DIAG_MAX, }; diff --git a/net/ipv4/tcp_diag.c b/net/ipv4/tcp_diag.c index a748c74aa8b7..99c54b765921 100644 --- a/net/ipv4/tcp_diag.c +++ b/net/ipv4/tcp_diag.c @@ -16,6 +16,7 @@ #include <linux/tcp.h> +#include <net/netlink.h> #include <net/tcp.h> static void tcp_diag_get_info(struct sock *sk, struct inet_diag_msg *r, @@ -36,6 +37,103 @@ static void tcp_diag_get_info(struct sock *sk, struct inet_diag_msg *r, tcp_get_info(sk, info); } +#ifdef CONFIG_TCP_MD5SIG +static void inet_diag_md5sig_fill(struct tcp_md5sig *info, + const struct tcp_md5sig_key *key) +{ + #if IS_ENABLED(CONFIG_IPV6) + if (key->family == AF_INET6) { + struct sockaddr_in6 *sin6 = + (struct sockaddr_in6 *)&info->tcpm_addr; + + memcpy(&sin6->sin6_addr, &key->addr.a6, + sizeof(struct in6_addr)); + } else + #endif + { + struct sockaddr_in *sin = + (struct sockaddr_in *)&info->tcpm_addr; + + memcpy(&sin->sin_addr, &key->addr.a4, sizeof(struct in_addr)); + } + + info->tcpm_addr.ss_family = key->family; + info->tcpm_prefixlen = key->prefixlen; + info->tcpm_keylen = key->keylen; + memcpy(info->tcpm_key, key->key, key->keylen); +} + +static int inet_diag_put_md5sig(struct sk_buff *skb, + const struct tcp_md5sig_info *md5sig) +{ + const struct tcp_md5sig_key *key; + struct nlattr *attr; + struct tcp_md5sig *info; + int md5sig_count = 0; + + hlist_for_each_entry_rcu(key, &md5sig->head, node) + md5sig_count++; + + attr = nla_reserve(skb, INET_DIAG_MD5SIG, + md5sig_count * sizeof(struct tcp_md5sig)); + if (!attr) + return -EMSGSIZE; + + info = nla_data(attr); + hlist_for_each_entry_rcu(key, &md5sig->head, node) { + inet_diag_md5sig_fill(info, key); + info++; + } + + return 0; +} +#endif + +static int tcp_diag_get_aux(struct sock *sk, bool net_admin, + struct sk_buff *skb) +{ +#ifdef CONFIG_TCP_MD5SIG + if (net_admin) { + struct tcp_md5sig_info *md5sig; + int err = 0; + + lock_sock(sk); + rcu_read_lock(); + md5sig = rcu_dereference(tcp_sk(sk)->md5sig_info); + if (md5sig) + err = inet_diag_put_md5sig(skb, md5sig); + rcu_read_unlock(); + release_sock(sk); + if (err < 0) + return err; + } +#endif + + return 0; +} + +static size_t tcp_diag_get_aux_size(struct sock *sk, bool net_admin) +{ + size_t size = 0; + +#ifdef CONFIG_TCP_MD5SIG + if (sk_fullsock(sk)) { + const struct tcp_md5sig_info *md5sig; + const struct tcp_md5sig_key *key; + + rcu_read_lock(); + md5sig = rcu_dereference(tcp_sk(sk)->md5sig_info); + if (md5sig) { + hlist_for_each_entry_rcu(key, &md5sig->head, node) + size += sizeof(struct tcp_md5sig); + } + rcu_read_unlock(); + } +#endif + + return size; +} + static void tcp_diag_dump(struct sk_buff *skb, struct netlink_callback *cb, const struct inet_diag_req_v2 *r, struct nlattr *bc) { @@ -68,13 +166,15 @@ static int tcp_diag_destroy(struct sk_buff *in_skb, #endif static const struct inet_diag_handler tcp_diag_handler = { - .dump = tcp_diag_dump, - .dump_one = tcp_diag_dump_one, - .idiag_get_info = tcp_diag_get_info, - .idiag_type = IPPROTO_TCP, - .idiag_info_size = sizeof(struct tcp_info), + .dump = tcp_diag_dump, + .dump_one = tcp_diag_dump_one, + .idiag_get_info = tcp_diag_get_info, + .idiag_get_aux = tcp_diag_get_aux, + .idiag_get_aux_size = tcp_diag_get_aux_size, + .idiag_type = IPPROTO_TCP, + .idiag_info_size = sizeof(struct tcp_info), #ifdef CONFIG_INET_DIAG_DESTROY - .destroy = tcp_diag_destroy, + .destroy = tcp_diag_destroy, #endif };
Report TCP MD5 (RFC2385) signing keys, addresses and address prefixes to processes with CAP_NET_ADMIN requesting INET_DIAG_INFO. Currently it is not possible to retrieve these from the kernel once they have been configured on sockets. Signed-off-by: Ivan Delalande <colona@arista.com> --- include/uapi/linux/inet_diag.h | 1 + net/ipv4/tcp_diag.c | 112 ++++++++++++++++++++++++++++++++++++++--- 2 files changed, 107 insertions(+), 6 deletions(-)