From patchwork Wed Sep 30 08:12:22 2015 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 8bit X-Patchwork-Submitter: Peter Christensen X-Patchwork-Id: 524170 X-Patchwork-Delegate: davem@davemloft.net Return-Path: X-Original-To: patchwork-incoming@ozlabs.org Delivered-To: patchwork-incoming@ozlabs.org Received: from vger.kernel.org (vger.kernel.org [209.132.180.67]) by ozlabs.org (Postfix) with ESMTP id 2892E140D17 for ; Wed, 30 Sep 2015 18:12:58 +1000 (AEST) Authentication-Results: ozlabs.org; dkim=fail reason="signature verification failed" (2048-bit key; unprotected) header.d=ordbogen.com header.i=@ordbogen.com header.b=lcN0YpkE; dkim-atps=neutral Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S1754797AbbI3IMw (ORCPT ); Wed, 30 Sep 2015 04:12:52 -0400 Received: from mail.ordbogen.com ([91.240.88.21]:38330 "EHLO mail.ordbogen.com" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S1754515AbbI3IMh (ORCPT ); Wed, 30 Sep 2015 04:12:37 -0400 X-Virus-Scanned: Debian amavisd-new at ordbogen.com Received: from pch.odense.ordbogen.com (odense.ordbogen.com [91.240.88.129]) (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits)) (No client certificate requested) by mail.ordbogen.com (Postfix) with ESMTPSA id E9C4C7F621; Wed, 30 Sep 2015 10:12:30 +0200 (CEST) DKIM-Signature: v=1; a=rsa-sha256; c=simple/simple; d=ordbogen.com; s=20150824; t=1443600751; bh=akh3rvYSeIe8efIJgWbO3zai//B6ztL3C4da6Ylbumc=; h=From:To:Cc:Subject:Date:In-Reply-To:References:From; b=lcN0YpkEJUKkh0VdUZnKtHNHWJKapHy+QLUsKSnnUY+XH1sF182MZUbZxQL9TxIDe 3EmHQ+sZgw7FK/AJAwaMtpgb5ywgULF2VD4jivfnXtxdpS8tlxe/c2oE+PYCFMsfiy Kcu2tMUOSMNKTDRYdkv2a4dg2nkj8Vyufpc/Jzb+dWNJlGMr2XEWofc5QIMBWlSBC6 Uq3fh5vU9PdYrg+RYypKickfq8b+1yOLAiMgvf0bP/yS5dBAeXX56Ck5NEa3Gw2Hn2 9gkZdYydnzsQ1FFzql8zQIxjMNvnhoNbIiLcCnCZZ1b803AKKZ22ad/l7CUABGY0ss aZ//wq/d06TCA== From: =?UTF-8?q?Peter=20N=C3=B8rlund?= To: netdev@vger.kernel.org Cc: "David S. Miller" , Alexey Kuznetsov , James Morris , Hideaki YOSHIFUJI , Patrick McHardy , =?UTF-8?q?Peter=20N=C3=B8rlund?= Subject: [PATCH v5 net-next 2/2] ipv4: ICMP packet inspection for multipath Date: Wed, 30 Sep 2015 10:12:22 +0200 Message-Id: <1443600742-20516-3-git-send-email-pch@ordbogen.com> X-Mailer: git-send-email 1.7.10.4 In-Reply-To: <1443600742-20516-1-git-send-email-pch@ordbogen.com> References: <1443600742-20516-1-git-send-email-pch@ordbogen.com> MIME-Version: 1.0 Sender: netdev-owner@vger.kernel.org Precedence: bulk List-ID: X-Mailing-List: netdev@vger.kernel.org From: Peter Nørlund ICMP packets are inspected to let them route together with the flow they belong to, minimizing the chance that a problematic path will affect flows on other paths, and so that anycast environments can work with ECMP. Signed-off-by: Peter Nørlund --- include/net/route.h | 11 +++++++++- net/ipv4/icmp.c | 19 ++++++++++++++++- net/ipv4/route.c | 59 +++++++++++++++++++++++++++++++++++++++++++++------ 3 files changed, 80 insertions(+), 9 deletions(-) diff --git a/include/net/route.h b/include/net/route.h index f46af25..7d79c05 100644 --- a/include/net/route.h +++ b/include/net/route.h @@ -28,6 +28,7 @@ #include #include #include +#include #include #include #include @@ -110,7 +111,15 @@ struct in_device; int ip_rt_init(void); void rt_cache_flush(struct net *net); void rt_flush_dev(struct net_device *dev); -struct rtable *__ip_route_output_key(struct net *, struct flowi4 *flp); +struct rtable *__ip_route_output_key_hash(struct net *, struct flowi4 *flp, + int mp_hash); + +static inline struct rtable *__ip_route_output_key(struct net *net, + struct flowi4 *flp) +{ + return __ip_route_output_key_hash(net, flp, -1); +} + struct rtable *ip_route_output_flow(struct net *, struct flowi4 *flp, struct sock *sk); struct dst_entry *ipv4_blackhole_route(struct net *net, diff --git a/net/ipv4/icmp.c b/net/ipv4/icmp.c index e5eb8ac..b3a1620 100644 --- a/net/ipv4/icmp.c +++ b/net/ipv4/icmp.c @@ -440,6 +440,22 @@ out_unlock: icmp_xmit_unlock(sk); } +#ifdef CONFIG_IP_ROUTE_MULTIPATH + +/* Source and destination is swapped. See ip_multipath_icmp_hash */ +static int icmp_multipath_hash_skb(const struct sk_buff *skb) +{ + const struct iphdr *iph = ip_hdr(skb); + + return fib_multipath_hash(iph->daddr, iph->saddr); +} + +#else + +#define icmp_multipath_hash_skb(skb) (-1) + +#endif + static struct rtable *icmp_route_lookup(struct net *net, struct flowi4 *fl4, struct sk_buff *skb_in, @@ -464,7 +480,8 @@ static struct rtable *icmp_route_lookup(struct net *net, fl4->flowi4_oif = vrf_master_ifindex(skb_in->dev); security_skb_classify_flow(skb_in, flowi4_to_flowi(fl4)); - rt = __ip_route_output_key(net, fl4); + rt = __ip_route_output_key_hash(net, fl4, + icmp_multipath_hash_skb(skb_in)); if (IS_ERR(rt)) return rt; diff --git a/net/ipv4/route.c b/net/ipv4/route.c index 64367f3..a2479a4 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c @@ -1646,6 +1646,48 @@ out: return err; } +#ifdef CONFIG_IP_ROUTE_MULTIPATH + +/* To make ICMP packets follow the right flow, the multipath hash is + * calculated from the inner IP addresses in reverse order. + */ +static int ip_multipath_icmp_hash(struct sk_buff *skb) +{ + const struct iphdr *outer_iph = ip_hdr(skb); + struct icmphdr _icmph; + const struct icmphdr *icmph; + struct iphdr _inner_iph; + const struct iphdr *inner_iph; + + if (unlikely((outer_iph->frag_off & htons(IP_OFFSET)) != 0)) + goto standard_hash; + + icmph = skb_header_pointer(skb, outer_iph->ihl * 4, sizeof(_icmph), + &_icmph); + if (!icmph) + goto standard_hash; + + if (icmph->type != ICMP_DEST_UNREACH && + icmph->type != ICMP_REDIRECT && + icmph->type != ICMP_TIME_EXCEEDED && + icmph->type != ICMP_PARAMETERPROB) { + goto standard_hash; + } + + inner_iph = skb_header_pointer(skb, + outer_iph->ihl * 4 + sizeof(_icmph), + sizeof(_inner_iph), &_inner_iph); + if (!inner_iph) + goto standard_hash; + + return fib_multipath_hash(inner_iph->daddr, inner_iph->saddr); + +standard_hash: + return fib_multipath_hash(outer_iph->saddr, outer_iph->daddr); +} + +#endif /* CONFIG_IP_ROUTE_MULTIPATH */ + static int ip_mkroute_input(struct sk_buff *skb, struct fib_result *res, const struct flowi4 *fl4, @@ -1656,7 +1698,10 @@ static int ip_mkroute_input(struct sk_buff *skb, if (res->fi && res->fi->fib_nhs > 1) { int h; - h = fib_multipath_hash(saddr, daddr); + if (unlikely(ip_hdr(skb)->protocol == IPPROTO_ICMP)) + h = ip_multipath_icmp_hash(skb); + else + h = fib_multipath_hash(saddr, daddr); fib_select_multipath(res, h); } #endif @@ -2042,7 +2087,8 @@ add: * Major route resolver routine. */ -struct rtable *__ip_route_output_key(struct net *net, struct flowi4 *fl4) +struct rtable *__ip_route_output_key_hash(struct net *net, struct flowi4 *fl4, + int mp_hash) { struct net_device *dev_out = NULL; __u8 tos = RT_FL_TOS(fl4); @@ -2207,10 +2253,9 @@ struct rtable *__ip_route_output_key(struct net *net, struct flowi4 *fl4) #ifdef CONFIG_IP_ROUTE_MULTIPATH if (res.fi->fib_nhs > 1 && fl4->flowi4_oif == 0) { - int h; - - h = fib_multipath_hash(fl4->saddr, fl4->daddr); - fib_select_multipath(&res, h); + if (mp_hash < 0) + mp_hash = fib_multipath_hash(fl4->saddr, fl4->daddr); + fib_select_multipath(&res, mp_hash); } else #endif @@ -2233,7 +2278,7 @@ out: rcu_read_unlock(); return rth; } -EXPORT_SYMBOL_GPL(__ip_route_output_key); +EXPORT_SYMBOL_GPL(__ip_route_output_key_hash); static struct dst_entry *ipv4_blackhole_dst_check(struct dst_entry *dst, u32 cookie) {