@@ -328,7 +328,7 @@ struct multipath_flow4 {
};
typedef void (*multipath_flow4_func_t)(struct multipath_flow4 *flow,
- void *ctx);
+ enum rt_mp_alg_t algo, void *ctx);
void fib_select_multipath(struct fib_result *res,
multipath_flow4_func_t flow_func,
@@ -28,6 +28,7 @@
#include <net/inetpeer.h>
#include <net/flow.h>
#include <net/inet_sock.h>
+#include <net/ip_fib.h>
#include <linux/in_route.h>
#include <linux/rtnetlink.h>
#include <linux/rcupdate.h>
@@ -110,7 +111,16 @@ struct in_device;
int ip_rt_init(void);
void rt_cache_flush(struct net *net);
void rt_flush_dev(struct net_device *dev);
-struct rtable *__ip_route_output_key(struct net *, struct flowi4 *flp);
+struct rtable *__ip_route_output_key_flow(struct net *, struct flowi4 *flp,
+ multipath_flow4_func_t flow_func,
+ void *ctx);
+
+static inline struct rtable *__ip_route_output_key(struct net *net,
+ struct flowi4 *flp)
+{
+ return __ip_route_output_key_flow(net, flp, NULL, NULL);
+}
+
struct rtable *ip_route_output_flow(struct net *, struct flowi4 *flp,
struct sock *sk);
struct dst_entry *ipv4_blackhole_route(struct net *net,
@@ -1536,7 +1536,7 @@ static int fib_multipath_hash(const struct fib_result *res,
{
struct multipath_flow4 flow;
- flow_func(&flow, ctx);
+ flow_func(&flow, res->fi->fib_mp_alg, ctx);
if (res->fi->fib_mp_alg == RT_MP_ALG_L4_HASH)
return jhash_3words(flow.saddr, flow.daddr, flow.ports, 0) >> 1;
@@ -439,6 +439,38 @@ out_unlock:
icmp_xmit_unlock(sk);
}
+/* Source and destination is swapped. See ip_multipath_flow_skb */
+static void icmp_multipath_flow(struct multipath_flow4 *flow,
+ enum rt_mp_alg_t algo, void *ctx)
+{
+ const struct sk_buff *skb = (const struct sk_buff *)ctx;
+ const struct iphdr *iph = ip_hdr(skb);
+
+ flow->saddr = iph->daddr;
+ flow->daddr = iph->saddr;
+ flow->ports = 0;
+
+ if (algo == RT_MP_ALG_L4_HASH)
+ return;
+
+ if (unlikely(!(iph->frag_off & htons(IP_DF))))
+ return;
+
+ if (iph->protocol == IPPROTO_TCP ||
+ iph->protocol == IPPROTO_UDP ||
+ iph->protocol == IPPROTO_SCTP) {
+ __be16 _ports[2];
+ const __be16 *ports;
+
+ ports = skb_header_pointer(skb, iph->ihl * 4, sizeof(_ports),
+ &_ports);
+ if (ports) {
+ flow->sport = ports[1];
+ flow->dport = ports[0];
+ }
+ }
+}
+
static struct rtable *icmp_route_lookup(struct net *net,
struct flowi4 *fl4,
struct sk_buff *skb_in,
@@ -463,7 +495,7 @@ static struct rtable *icmp_route_lookup(struct net *net,
fl4->flowi4_oif = vrf_master_ifindex(skb_in->dev) ? : skb_in->dev->ifindex;
security_skb_classify_flow(skb_in, flowi4_to_flowi(fl4));
- rt = __ip_route_output_key(net, fl4);
+ rt = __ip_route_output_key_flow(net, fl4, icmp_multipath_flow, skb_in);
if (IS_ERR(rt))
return rt;
@@ -1646,37 +1646,82 @@ out:
#ifdef CONFIG_IP_ROUTE_MULTIPATH
/* Fill multipath flow key data based on socket buffer */
-static void ip_multipath_flow_skb(struct multipath_flow4 *flow, void *ctx)
+static void ip_multipath_flow_skb(struct multipath_flow4 *flow,
+ enum rt_mp_alg_t algo, void *ctx)
{
const struct sk_buff *skb = (const struct sk_buff *)ctx;
- const struct iphdr *iph;
+ struct icmphdr _icmph;
+ struct iphdr _inner_iph;
+ const struct iphdr *outer_iph;
+ const struct icmphdr *icmph;
+ const struct iphdr *inner_iph;
+ unsigned int offset;
- iph = ip_hdr(skb);
+ outer_iph = ip_hdr(skb);
- flow->saddr = iph->saddr;
- flow->daddr = iph->daddr;
+ flow->saddr = outer_iph->saddr;
+ flow->daddr = outer_iph->daddr;
flow->ports = 0;
- if (unlikely(!(iph->frag_off & htons(IP_DF))))
- return;
+ offset = outer_iph->ihl * 4;
- if (iph->protocol == IPPROTO_TCP ||
- iph->protocol == IPPROTO_UDP ||
- iph->protocol == IPPROTO_SCTP) {
+ if (algo == RT_MP_ALG_L4_HASH) {
__be16 _ports[2];
const __be16 *ports;
- ports = skb_header_pointer(skb, iph->ihl * 4, sizeof(_ports),
+ if (unlikely(!(outer_iph->frag_off & htons(IP_DF))))
+ return;
+
+ if (outer_iph->protocol != IPPROTO_TCP &&
+ outer_iph->protocol != IPPROTO_UDP &&
+ outer_iph->protocol != IPPROTO_SCTP) {
+ return;
+ }
+
+ ports = skb_header_pointer(skb, offset, sizeof(_ports),
&_ports);
if (ports) {
flow->sport = ports[0];
flow->dport = ports[1];
}
+
+ return;
+ }
+
+ if (outer_iph->protocol != IPPROTO_ICMP)
+ return;
+
+ if (unlikely((outer_iph->frag_off & htons(IP_OFFSET)) != 0))
+ return;
+
+ icmph = skb_header_pointer(skb, offset, sizeof(_icmph), &_icmph);
+ if (!icmph)
+ return;
+
+ if (icmph->type != ICMP_DEST_UNREACH &&
+ icmph->type != ICMP_SOURCE_QUENCH &&
+ icmph->type != ICMP_REDIRECT &&
+ icmph->type != ICMP_TIME_EXCEEDED &&
+ icmph->type != ICMP_PARAMETERPROB) {
+ return;
}
+
+ offset += sizeof(_icmph);
+ inner_iph = skb_header_pointer(skb, offset, sizeof(_inner_iph),
+ &_inner_iph);
+ if (!inner_iph)
+ return;
+
+ /* Since the ICMP payload contains a packet sent from the current
+ * recipient, we swap source and destination addresses
+ */
+ flow->saddr = inner_iph->daddr;
+ flow->daddr = inner_iph->saddr;
}
/* Fill multipath flow key data based on flowi4 */
-static void ip_multipath_flow_fl4(struct multipath_flow4 *flow, void *ctx)
+static void ip_multipath_flow_fl4(struct multipath_flow4 *flow,
+ enum rt_mp_alg_t algo, void *ctx)
{
const struct flowi4 *fl4 = (const struct flowi4 *)ctx;
@@ -2086,7 +2131,9 @@ add:
* Major route resolver routine.
*/
-struct rtable *__ip_route_output_key(struct net *net, struct flowi4 *fl4)
+struct rtable *__ip_route_output_key_flow(struct net *net, struct flowi4 *fl4,
+ multipath_flow4_func_t flow_func,
+ void *ctx)
{
struct net_device *dev_out = NULL;
__u8 tos = RT_FL_TOS(fl4);
@@ -2248,9 +2295,12 @@ struct rtable *__ip_route_output_key(struct net *net, struct flowi4 *fl4)
}
#ifdef CONFIG_IP_ROUTE_MULTIPATH
- if (res.fi->fib_nhs > 1 && fl4->flowi4_oif == 0)
- fib_select_multipath(&res, ip_multipath_flow_fl4, fl4);
- else
+ if (res.fi->fib_nhs > 1 && fl4->flowi4_oif == 0) {
+ if (flow_func)
+ fib_select_multipath(&res, flow_func, ctx);
+ else
+ fib_select_multipath(&res, ip_multipath_flow_fl4, fl4);
+ } else
#endif
if (!res.prefixlen &&
res.table->tb_num_default > 1 &&