diff mbox

[net-next,1/2] bonding: modify the old and add new xmit hash functions

Message ID 1380122398-7370-2-git-send-email-nikolay@redhat.com
State Superseded, archived
Delegated to: David Miller
Headers show

Commit Message

Nikolay Aleksandrov Sept. 25, 2013, 3:19 p.m. UTC
This patch adds two new hash policy modes which use skb_flow_dissect:
3 - Encapsulated layer 2+3
4 - Encapsulated layer 3+4
There should be a good improvement for tunnel users in those modes.
It also changes the old hash functions to:
hash ^= (__force u32)flow.dst ^ (__force u32)flow.src;
hash ^= (hash >> 16);
hash ^= (hash >> 8);

Where hash will be initialized either to L2 hash, that is
SRCMAC[5] XOR DSTMAC[5], or to flow->ports which should be extracted
from the upper layer. Flow's dst and src are also extracted based on the
xmit policy either directly from the buffer or by using skb_flow_dissect,
but in both cases if the protocol is IPv6 then dst and src are obtained by
ipv6_addr_hash() on the real addresses. In case of a non-dissectable
packet, the algorithms fall back to L2 hashing.
The bond_set_mode_ops() function is now obsolete and thus deleted
because it was used only to set the proper hash policy. Also we trim a
pointer from struct bonding because we no longer need to keep the hash
function, now there's only a single hash function - bond_xmit_hash that
works based on bond->params.xmit_policy.

The hash function and skb_flow_dissect were suggested by Eric Dumazet.
The layer names were suggested by Andy Gospodarek, because I suck at
semantics.

Signed-off-by: Nikolay Aleksandrov <nikolay@redhat.com>
---
One line is intentionally left at 82 chars since it's the whole function
and IMO looks better that way.

 drivers/net/bonding/bond_3ad.c   |   2 +-
 drivers/net/bonding/bond_main.c  | 211 +++++++++++++++------------------------
 drivers/net/bonding/bond_sysfs.c |   2 -
 drivers/net/bonding/bonding.h    |   3 +-
 include/uapi/linux/if_bonding.h  |   2 +
 5 files changed, 86 insertions(+), 134 deletions(-)

Comments

Veaceslav Falico Sept. 25, 2013, 4:26 p.m. UTC | #1
On Wed, Sep 25, 2013 at 05:19:57PM +0200, Nikolay Aleksandrov wrote:
>This patch adds two new hash policy modes which use skb_flow_dissect:
>3 - Encapsulated layer 2+3
>4 - Encapsulated layer 3+4
>There should be a good improvement for tunnel users in those modes.
>It also changes the old hash functions to:
>hash ^= (__force u32)flow.dst ^ (__force u32)flow.src;
>hash ^= (hash >> 16);
>hash ^= (hash >> 8);
>
>Where hash will be initialized either to L2 hash, that is
>SRCMAC[5] XOR DSTMAC[5], or to flow->ports which should be extracted
>from the upper layer. Flow's dst and src are also extracted based on the
>xmit policy either directly from the buffer or by using skb_flow_dissect,
>but in both cases if the protocol is IPv6 then dst and src are obtained by
>ipv6_addr_hash() on the real addresses. In case of a non-dissectable
>packet, the algorithms fall back to L2 hashing.
>The bond_set_mode_ops() function is now obsolete and thus deleted
>because it was used only to set the proper hash policy. Also we trim a
>pointer from struct bonding because we no longer need to keep the hash
>function, now there's only a single hash function - bond_xmit_hash that
>works based on bond->params.xmit_policy.
>
>The hash function and skb_flow_dissect were suggested by Eric Dumazet.
>The layer names were suggested by Andy Gospodarek, because I suck at
>semantics.
>
>Signed-off-by: Nikolay Aleksandrov <nikolay@redhat.com>
>---
>One line is intentionally left at 82 chars since it's the whole function
>and IMO looks better that way.
>
> drivers/net/bonding/bond_3ad.c   |   2 +-
> drivers/net/bonding/bond_main.c  | 211 +++++++++++++++------------------------
> drivers/net/bonding/bond_sysfs.c |   2 -
> drivers/net/bonding/bonding.h    |   3 +-
> include/uapi/linux/if_bonding.h  |   2 +
> 5 files changed, 86 insertions(+), 134 deletions(-)

Nice!!! Though some comments below.

>
>diff --git a/drivers/net/bonding/bond_3ad.c b/drivers/net/bonding/bond_3ad.c
>index 0d8f427..b3ab703 100644
>--- a/drivers/net/bonding/bond_3ad.c
>+++ b/drivers/net/bonding/bond_3ad.c
>@@ -2442,7 +2442,7 @@ int bond_3ad_xmit_xor(struct sk_buff *skb, struct net_device *dev)
> 		goto out;
> 	}
>
>-	slave_agg_no = bond->xmit_hash_policy(skb, slaves_in_agg);
>+	slave_agg_no = bond_xmit_hash(bond, skb, slaves_in_agg);
>
> 	bond_for_each_slave(bond, slave) {
> 		struct aggregator *agg = SLAVE_AD_INFO(slave).port.aggregator;
>diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c
>index 55bbb8b..73e416b 100644
>--- a/drivers/net/bonding/bond_main.c
>+++ b/drivers/net/bonding/bond_main.c
>@@ -78,6 +78,7 @@
> #include <net/netns/generic.h>
> #include <net/pkt_sched.h>
> #include <linux/rculist.h>
>+#include <net/flow_keys.h>
> #include "bonding.h"
> #include "bond_3ad.h"
> #include "bond_alb.h"
>@@ -159,7 +160,8 @@ MODULE_PARM_DESC(min_links, "Minimum number of available links before turning on
> module_param(xmit_hash_policy, charp, 0);
> MODULE_PARM_DESC(xmit_hash_policy, "balance-xor and 802.3ad hashing method; "
> 				   "0 for layer 2 (default), 1 for layer 3+4, "
>-				   "2 for layer 2+3");
>+				   "2 for layer 2+3, 3 for encap layer 2+3, "
>+				   "4 for encap layer 3+4");
> module_param(arp_interval, int, 0);
> MODULE_PARM_DESC(arp_interval, "arp interval in milliseconds");
> module_param_array(arp_ip_target, charp, NULL, 0);
>@@ -217,6 +219,8 @@ const struct bond_parm_tbl xmit_hashtype_tbl[] = {
> {	"layer2",		BOND_XMIT_POLICY_LAYER2},
> {	"layer3+4",		BOND_XMIT_POLICY_LAYER34},
> {	"layer2+3",		BOND_XMIT_POLICY_LAYER23},
>+{	"encap2+3",		BOND_XMIT_POLICY_ENCAP23},
>+{	"encap3+4",		BOND_XMIT_POLICY_ENCAP34},
> {	NULL,			-1},
> };
>
>@@ -3026,99 +3030,99 @@ static struct notifier_block bond_netdev_notifier = {
>
> /*---------------------------- Hashing Policies -----------------------------*/
>
>-/*
>- * Hash for the output device based upon layer 2 data
>- */
>-static int bond_xmit_hash_policy_l2(struct sk_buff *skb, int count)
>+/* L2 hash helper */
>+static inline u32 bond_eth_hash(struct sk_buff *skb)
> {
> 	struct ethhdr *data = (struct ethhdr *)skb->data;
>
> 	if (skb_headlen(skb) >= offsetof(struct ethhdr, h_proto))
>-		return (data->h_dest[5] ^ data->h_source[5]) % count;
>+		return data->h_dest[5] ^ data->h_source[5];
>
> 	return 0;
> }
>
>-/*
>- * Hash for the output device based upon layer 2 and layer 3 data. If
>- * the packet is not IP, fall back on bond_xmit_hash_policy_l2()
>- */
>-static int bond_xmit_hash_policy_l23(struct sk_buff *skb, int count)
>+static void bond_flow_get_ports(struct sk_buff *skb, struct flow_keys *fk,
>+				int offset)
>+{
>+	__be32 *ports;
>+
>+	ports = skb_header_pointer(skb, offset, sizeof(fk->ports), &fk->ports);
>+	if (ports)
>+		fk->ports = *ports;
>+}
>+
>+/* Extract the appropriate headers based on bond's xmit policy */
>+static bool bond_flow_dissect(struct bonding *bond, struct sk_buff *skb,
>+			      struct flow_keys *fk)
> {
>-	const struct ethhdr *data;
>+	const struct ipv6hdr *iph6;
> 	const struct iphdr *iph;
>-	const struct ipv6hdr *ipv6h;
>-	u32 v6hash;
>-	const __be32 *s, *d;
>+	int noff, proto = -1, poff = -1;

Nitpick: Useless initialization of poff.

>+	bool ret = false;
>+
>+	if (bond->params.xmit_policy > BOND_XMIT_POLICY_LAYER23)
>+		return skb_flow_dissect(skb, fk);
>
>-	if (skb->protocol == htons(ETH_P_IP) &&
>-	    pskb_network_may_pull(skb, sizeof(*iph))) {
>+	fk->ports = 0;
>+	noff = skb_network_offset(skb);
>+	if (skb->protocol == htons(ETH_P_IP)) {
>+		if (!pskb_may_pull(skb, noff + sizeof(*iph)))
>+			goto out;
> 		iph = ip_hdr(skb);
>-		data = (struct ethhdr *)skb->data;
>-		return ((ntohl(iph->saddr ^ iph->daddr) & 0xffff) ^
>-			(data->h_dest[5] ^ data->h_source[5])) % count;
>-	} else if (skb->protocol == htons(ETH_P_IPV6) &&
>-		   pskb_network_may_pull(skb, sizeof(*ipv6h))) {
>-		ipv6h = ipv6_hdr(skb);
>-		data = (struct ethhdr *)skb->data;
>-		s = &ipv6h->saddr.s6_addr32[0];
>-		d = &ipv6h->daddr.s6_addr32[0];
>-		v6hash = (s[1] ^ d[1]) ^ (s[2] ^ d[2]) ^ (s[3] ^ d[3]);
>-		v6hash ^= (v6hash >> 24) ^ (v6hash >> 16) ^ (v6hash >> 8);
>-		return (v6hash ^ data->h_dest[5] ^ data->h_source[5]) % count;
>-	}
>-
>-	return bond_xmit_hash_policy_l2(skb, count);
>+		fk->src = iph->saddr;
>+		fk->dst = iph->daddr;
>+		noff += iph->ihl << 2;
>+		if (!ip_is_fragment(iph))
>+			proto = iph->protocol;
>+	} else if (skb->protocol == htons(ETH_P_IPV6)) {
>+		if (!pskb_may_pull(skb, noff + sizeof(*iph6)))
>+			goto out;
>+		iph6 = ipv6_hdr(skb);
>+		fk->src = (__force __be32)ipv6_addr_hash(&iph6->saddr);
>+		fk->dst = (__force __be32)ipv6_addr_hash(&iph6->daddr);
>+		noff += sizeof(*iph6);
>+		proto = iph6->nexthdr;
>+	}

else
	return false;

?

Otherwise we might report false-positive for vlans, per example, without
populating the flow keys.

>+	if (bond->params.xmit_policy == BOND_XMIT_POLICY_LAYER34) {
>+		poff = proto_ports_offset(proto);
>+		if (poff >= 0)
>+			bond_flow_get_ports(skb, fk, poff + noff);
>+	}

One idea would be to move the same initialization code from
skb_flow_dissect() to an external function and re-use it here?

>+	ret = true;
>+
>+out:
>+	return ret;
> }
>
>-/*
>- * Hash for the output device based upon layer 3 and layer 4 data. If
>- * the packet is a frag or not TCP or UDP, just use layer 3 data.  If it is
>- * altogether not IP, fall back on bond_xmit_hash_policy_l2()
>+/**
>+ * bond_xmit_hash - generate a hash value based on the xmit policy
>+ * @bond: bonding device
>+ * @skb: buffer to use for headers
>+ * @count: modulo value
>+ *
>+ * This function will extract the necessary headers from the skb buffer and use
>+ * them to generate a hash based on the xmit_policy set in the bonding device
>+ * which will be reduced modulo count before returning.
>  */
>-static int bond_xmit_hash_policy_l34(struct sk_buff *skb, int count)
>+int bond_xmit_hash(struct bonding *bond, struct sk_buff *skb, int count)
> {
>-	u32 layer4_xor = 0;
>-	const struct iphdr *iph;
>-	const struct ipv6hdr *ipv6h;
>-	const __be32 *s, *d;
>-	const __be16 *l4 = NULL;
>-	__be16 _l4[2];
>-	int noff = skb_network_offset(skb);
>-	int poff;
>-
>-	if (skb->protocol == htons(ETH_P_IP) &&
>-	    pskb_may_pull(skb, noff + sizeof(*iph))) {
>-		iph = ip_hdr(skb);
>-		poff = proto_ports_offset(iph->protocol);
>+	struct flow_keys flow;
>+	u32 hash;
>
>-		if (!ip_is_fragment(iph) && poff >= 0) {
>-			l4 = skb_header_pointer(skb, noff + (iph->ihl << 2) + poff,
>-						sizeof(_l4), &_l4);
>-			if (l4)
>-				layer4_xor = ntohs(l4[0] ^ l4[1]);
>-		}
>-		return (layer4_xor ^
>-			((ntohl(iph->saddr ^ iph->daddr)) & 0xffff)) % count;
>-	} else if (skb->protocol == htons(ETH_P_IPV6) &&
>-		   pskb_may_pull(skb, noff + sizeof(*ipv6h))) {
>-		ipv6h = ipv6_hdr(skb);
>-		poff = proto_ports_offset(ipv6h->nexthdr);
>-		if (poff >= 0) {
>-			l4 = skb_header_pointer(skb, noff + sizeof(*ipv6h) + poff,
>-						sizeof(_l4), &_l4);
>-			if (l4)
>-				layer4_xor = ntohs(l4[0] ^ l4[1]);
>-		}
>-		s = &ipv6h->saddr.s6_addr32[0];
>-		d = &ipv6h->daddr.s6_addr32[0];
>-		layer4_xor ^= (s[1] ^ d[1]) ^ (s[2] ^ d[2]) ^ (s[3] ^ d[3]);
>-		layer4_xor ^= (layer4_xor >> 24) ^ (layer4_xor >> 16) ^
>-			       (layer4_xor >> 8);
>-		return layer4_xor % count;
>-	}
>+	if (bond->params.xmit_policy == BOND_XMIT_POLICY_LAYER2 ||
>+	    !bond_flow_dissect(bond, skb, &flow))
>+		return bond_eth_hash(skb) % count;
>+
>+	if (bond->params.xmit_policy == BOND_XMIT_POLICY_LAYER23 ||
>+	    bond->params.xmit_policy == BOND_XMIT_POLICY_ENCAP23)
>+		hash = bond_eth_hash(skb);
>+	else
>+		hash = (__force u32)flow.ports;
>+	hash ^= (__force u32)flow.dst ^ (__force u32)flow.src;
>+	hash ^= (hash >> 16);
>+	hash ^= (hash >> 8);
>
>-	return bond_xmit_hash_policy_l2(skb, count);
>+	return hash % count;
> }
>
> /*-------------------------- Device entry points ----------------------------*/
>@@ -3700,8 +3704,7 @@ static int bond_xmit_activebackup(struct sk_buff *skb, struct net_device *bond_d
> 	return NETDEV_TX_OK;
> }
>
>-/*
>- * In bond_xmit_xor() , we determine the output device by using a pre-
>+/* In bond_xmit_xor() , we determine the output device by using a pre-
>  * determined xmit_hash_policy(), If the selected device is not enabled,
>  * find the next active slave.
>  */
>@@ -3709,8 +3712,7 @@ static int bond_xmit_xor(struct sk_buff *skb, struct net_device *bond_dev)
> {
> 	struct bonding *bond = netdev_priv(bond_dev);
>
>-	bond_xmit_slave_id(bond, skb,
>-			   bond->xmit_hash_policy(skb, bond->slave_cnt));
>+	bond_xmit_slave_id(bond, skb, bond_xmit_hash(bond, skb, bond->slave_cnt));
>
> 	return NETDEV_TX_OK;
> }
>@@ -3746,22 +3748,6 @@ static int bond_xmit_broadcast(struct sk_buff *skb, struct net_device *bond_dev)
>
> /*------------------------- Device initialization ---------------------------*/
>
>-static void bond_set_xmit_hash_policy(struct bonding *bond)
>-{
>-	switch (bond->params.xmit_policy) {
>-	case BOND_XMIT_POLICY_LAYER23:
>-		bond->xmit_hash_policy = bond_xmit_hash_policy_l23;
>-		break;
>-	case BOND_XMIT_POLICY_LAYER34:
>-		bond->xmit_hash_policy = bond_xmit_hash_policy_l34;
>-		break;
>-	case BOND_XMIT_POLICY_LAYER2:
>-	default:
>-		bond->xmit_hash_policy = bond_xmit_hash_policy_l2;
>-		break;
>-	}
>-}
>-
> /*
>  * Lookup the slave that corresponds to a qid
>  */
>@@ -3871,38 +3857,6 @@ static netdev_tx_t bond_start_xmit(struct sk_buff *skb, struct net_device *dev)
> 	return ret;
> }
>
>-/*
>- * set bond mode specific net device operations
>- */
>-void bond_set_mode_ops(struct bonding *bond, int mode)
>-{
>-	struct net_device *bond_dev = bond->dev;
>-
>-	switch (mode) {
>-	case BOND_MODE_ROUNDROBIN:
>-		break;
>-	case BOND_MODE_ACTIVEBACKUP:
>-		break;
>-	case BOND_MODE_XOR:
>-		bond_set_xmit_hash_policy(bond);
>-		break;
>-	case BOND_MODE_BROADCAST:
>-		break;
>-	case BOND_MODE_8023AD:
>-		bond_set_xmit_hash_policy(bond);
>-		break;
>-	case BOND_MODE_ALB:
>-		/* FALLTHRU */
>-	case BOND_MODE_TLB:
>-		break;
>-	default:
>-		/* Should never happen, mode already checked */
>-		pr_err("%s: Error: Unknown bonding mode %d\n",
>-		       bond_dev->name, mode);
>-		break;
>-	}
>-}
>-
> static int bond_ethtool_get_settings(struct net_device *bond_dev,
> 				     struct ethtool_cmd *ecmd)
> {
>@@ -4004,7 +3958,6 @@ static void bond_setup(struct net_device *bond_dev)
> 	ether_setup(bond_dev);
> 	bond_dev->netdev_ops = &bond_netdev_ops;
> 	bond_dev->ethtool_ops = &bond_ethtool_ops;
>-	bond_set_mode_ops(bond, bond->params.mode);
>
> 	bond_dev->destructor = bond_destructor;
>
>diff --git a/drivers/net/bonding/bond_sysfs.c b/drivers/net/bonding/bond_sysfs.c
>index c29b836..dba3b9b 100644
>--- a/drivers/net/bonding/bond_sysfs.c
>+++ b/drivers/net/bonding/bond_sysfs.c
>@@ -352,7 +352,6 @@ static ssize_t bonding_store_mode(struct device *d,
> 	/* don't cache arp_validate between modes */
> 	bond->params.arp_validate = BOND_ARP_VALIDATE_NONE;
> 	bond->params.mode = new_value;
>-	bond_set_mode_ops(bond, bond->params.mode);
> 	pr_info("%s: setting mode to %s (%d).\n",
> 		bond->dev->name, bond_mode_tbl[new_value].modename,
> 		new_value);
>@@ -392,7 +391,6 @@ static ssize_t bonding_store_xmit_hash(struct device *d,
> 		ret = -EINVAL;
> 	} else {
> 		bond->params.xmit_policy = new_value;
>-		bond_set_mode_ops(bond, bond->params.mode);
> 		pr_info("%s: setting xmit hash policy to %s (%d).\n",
> 			bond->dev->name,
> 			xmit_hashtype_tbl[new_value].modename, new_value);
>diff --git a/drivers/net/bonding/bonding.h b/drivers/net/bonding/bonding.h
>index 03cf3fd..4db9ec4 100644
>--- a/drivers/net/bonding/bonding.h
>+++ b/drivers/net/bonding/bonding.h
>@@ -245,7 +245,6 @@ struct bonding {
> 	char     proc_file_name[IFNAMSIZ];
> #endif /* CONFIG_PROC_FS */
> 	struct   list_head bond_list;
>-	int      (*xmit_hash_policy)(struct sk_buff *, int);
> 	u16      rr_tx_counter;
> 	struct   ad_bond_info ad_info;
> 	struct   alb_bond_info alb_info;
>@@ -446,7 +445,7 @@ int bond_release(struct net_device *bond_dev, struct net_device *slave_dev);
> void bond_mii_monitor(struct work_struct *);
> void bond_loadbalance_arp_mon(struct work_struct *);
> void bond_activebackup_arp_mon(struct work_struct *);
>-void bond_set_mode_ops(struct bonding *bond, int mode);
>+int bond_xmit_hash(struct bonding *bond, struct sk_buff *skb, int count);
> int bond_parse_parm(const char *mode_arg, const struct bond_parm_tbl *tbl);
> void bond_select_active_slave(struct bonding *bond);
> void bond_change_active_slave(struct bonding *bond, struct slave *new_active);
>diff --git a/include/uapi/linux/if_bonding.h b/include/uapi/linux/if_bonding.h
>index a17edda..9635a62 100644
>--- a/include/uapi/linux/if_bonding.h
>+++ b/include/uapi/linux/if_bonding.h
>@@ -91,6 +91,8 @@
> #define BOND_XMIT_POLICY_LAYER2		0 /* layer 2 (MAC only), default */
> #define BOND_XMIT_POLICY_LAYER34	1 /* layer 3+4 (IP ^ (TCP || UDP)) */
> #define BOND_XMIT_POLICY_LAYER23	2 /* layer 2+3 (IP ^ MAC) */
>+#define BOND_XMIT_POLICY_ENCAP23	3 /* encapsulated layer 2+3 */
>+#define BOND_XMIT_POLICY_ENCAP34	4 /* encapsulated layer 3+4 */
>
> typedef struct ifbond {
> 	__s32 bond_mode;
>-- 
>1.8.1.4
>
>--
>To unsubscribe from this list: send the line "unsubscribe netdev" in
>the body of a message to majordomo@vger.kernel.org
>More majordomo info at  http://vger.kernel.org/majordomo-info.html
--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Nikolay Aleksandrov Sept. 25, 2013, 4:36 p.m. UTC | #2
On 09/25/2013 06:26 PM, Veaceslav Falico wrote:
> On Wed, Sep 25, 2013 at 05:19:57PM +0200, Nikolay Aleksandrov wrote:
>> This patch adds two new hash policy modes which use skb_flow_dissect:
>> 3 - Encapsulated layer 2+3
>> 4 - Encapsulated layer 3+4
>> There should be a good improvement for tunnel users in those modes.
>> It also changes the old hash functions to:
>> hash ^= (__force u32)flow.dst ^ (__force u32)flow.src;
>> hash ^= (hash >> 16);
>> hash ^= (hash >> 8);
>>
>> Where hash will be initialized either to L2 hash, that is
>> SRCMAC[5] XOR DSTMAC[5], or to flow->ports which should be extracted
>> from the upper layer. Flow's dst and src are also extracted based on the
>> xmit policy either directly from the buffer or by using skb_flow_dissect,
>> but in both cases if the protocol is IPv6 then dst and src are obtained by
>> ipv6_addr_hash() on the real addresses. In case of a non-dissectable
>> packet, the algorithms fall back to L2 hashing.
>> The bond_set_mode_ops() function is now obsolete and thus deleted
>> because it was used only to set the proper hash policy. Also we trim a
>> pointer from struct bonding because we no longer need to keep the hash
>> function, now there's only a single hash function - bond_xmit_hash that
>> works based on bond->params.xmit_policy.
>>
>> The hash function and skb_flow_dissect were suggested by Eric Dumazet.
>> The layer names were suggested by Andy Gospodarek, because I suck at
>> semantics.
>>
>> Signed-off-by: Nikolay Aleksandrov <nikolay@redhat.com>
>> ---
>> One line is intentionally left at 82 chars since it's the whole function
>> and IMO looks better that way.
>>
>> drivers/net/bonding/bond_3ad.c   |   2 +-
>> drivers/net/bonding/bond_main.c  | 211
>> +++++++++++++++------------------------
>> drivers/net/bonding/bond_sysfs.c |   2 -
>> drivers/net/bonding/bonding.h    |   3 +-
>> include/uapi/linux/if_bonding.h  |   2 +
>> 5 files changed, 86 insertions(+), 134 deletions(-)
> 
> Nice!!! Though some comments below.
> 
>>
>> diff --git a/drivers/net/bonding/bond_3ad.c b/drivers/net/bonding/bond_3ad.c
>> index 0d8f427..b3ab703 100644
>> --- a/drivers/net/bonding/bond_3ad.c
>> +++ b/drivers/net/bonding/bond_3ad.c
>> @@ -2442,7 +2442,7 @@ int bond_3ad_xmit_xor(struct sk_buff *skb, struct
>> net_device *dev)
>>         goto out;
>>     }
>>
>> -    slave_agg_no = bond->xmit_hash_policy(skb, slaves_in_agg);
>> +    slave_agg_no = bond_xmit_hash(bond, skb, slaves_in_agg);
>>
>>     bond_for_each_slave(bond, slave) {
>>         struct aggregator *agg = SLAVE_AD_INFO(slave).port.aggregator;
>> diff --git a/drivers/net/bonding/bond_main.c
>> b/drivers/net/bonding/bond_main.c
>> index 55bbb8b..73e416b 100644
>> --- a/drivers/net/bonding/bond_main.c
>> +++ b/drivers/net/bonding/bond_main.c
>> @@ -78,6 +78,7 @@
>> #include <net/netns/generic.h>
>> #include <net/pkt_sched.h>
>> #include <linux/rculist.h>
>> +#include <net/flow_keys.h>
>> #include "bonding.h"
>> #include "bond_3ad.h"
>> #include "bond_alb.h"
>> @@ -159,7 +160,8 @@ MODULE_PARM_DESC(min_links, "Minimum number of
>> available links before turning on
>> module_param(xmit_hash_policy, charp, 0);
>> MODULE_PARM_DESC(xmit_hash_policy, "balance-xor and 802.3ad hashing
>> method; "
>>                    "0 for layer 2 (default), 1 for layer 3+4, "
>> -                   "2 for layer 2+3");
>> +                   "2 for layer 2+3, 3 for encap layer 2+3, "
>> +                   "4 for encap layer 3+4");
>> module_param(arp_interval, int, 0);
>> MODULE_PARM_DESC(arp_interval, "arp interval in milliseconds");
>> module_param_array(arp_ip_target, charp, NULL, 0);
>> @@ -217,6 +219,8 @@ const struct bond_parm_tbl xmit_hashtype_tbl[] = {
>> {    "layer2",        BOND_XMIT_POLICY_LAYER2},
>> {    "layer3+4",        BOND_XMIT_POLICY_LAYER34},
>> {    "layer2+3",        BOND_XMIT_POLICY_LAYER23},
>> +{    "encap2+3",        BOND_XMIT_POLICY_ENCAP23},
>> +{    "encap3+4",        BOND_XMIT_POLICY_ENCAP34},
>> {    NULL,            -1},
>> };
>>
>> @@ -3026,99 +3030,99 @@ static struct notifier_block bond_netdev_notifier
>> = {
>>
>> /*---------------------------- Hashing Policies
>> -----------------------------*/
>>
>> -/*
>> - * Hash for the output device based upon layer 2 data
>> - */
>> -static int bond_xmit_hash_policy_l2(struct sk_buff *skb, int count)
>> +/* L2 hash helper */
>> +static inline u32 bond_eth_hash(struct sk_buff *skb)
>> {
>>     struct ethhdr *data = (struct ethhdr *)skb->data;
>>
>>     if (skb_headlen(skb) >= offsetof(struct ethhdr, h_proto))
>> -        return (data->h_dest[5] ^ data->h_source[5]) % count;
>> +        return data->h_dest[5] ^ data->h_source[5];
>>
>>     return 0;
>> }
>>
>> -/*
>> - * Hash for the output device based upon layer 2 and layer 3 data. If
>> - * the packet is not IP, fall back on bond_xmit_hash_policy_l2()
>> - */
>> -static int bond_xmit_hash_policy_l23(struct sk_buff *skb, int count)
>> +static void bond_flow_get_ports(struct sk_buff *skb, struct flow_keys *fk,
>> +                int offset)
>> +{
>> +    __be32 *ports;
>> +
>> +    ports = skb_header_pointer(skb, offset, sizeof(fk->ports), &fk->ports);
>> +    if (ports)
>> +        fk->ports = *ports;
>> +}
>> +
>> +/* Extract the appropriate headers based on bond's xmit policy */
>> +static bool bond_flow_dissect(struct bonding *bond, struct sk_buff *skb,
>> +                  struct flow_keys *fk)
>> {
>> -    const struct ethhdr *data;
>> +    const struct ipv6hdr *iph6;
>>     const struct iphdr *iph;
>> -    const struct ipv6hdr *ipv6h;
>> -    u32 v6hash;
>> -    const __be32 *s, *d;
>> +    int noff, proto = -1, poff = -1;
> 
> Nitpick: Useless initialization of poff.
> 
Yeah, I saw it after posting and have this in the prepared v2 :-)

>> +    bool ret = false;
>> +
>> +    if (bond->params.xmit_policy > BOND_XMIT_POLICY_LAYER23)
>> +        return skb_flow_dissect(skb, fk);
>>
>> -    if (skb->protocol == htons(ETH_P_IP) &&
>> -        pskb_network_may_pull(skb, sizeof(*iph))) {
>> +    fk->ports = 0;
>> +    noff = skb_network_offset(skb);
>> +    if (skb->protocol == htons(ETH_P_IP)) {
>> +        if (!pskb_may_pull(skb, noff + sizeof(*iph)))
>> +            goto out;
>>         iph = ip_hdr(skb);
>> -        data = (struct ethhdr *)skb->data;
>> -        return ((ntohl(iph->saddr ^ iph->daddr) & 0xffff) ^
>> -            (data->h_dest[5] ^ data->h_source[5])) % count;
>> -    } else if (skb->protocol == htons(ETH_P_IPV6) &&
>> -           pskb_network_may_pull(skb, sizeof(*ipv6h))) {
>> -        ipv6h = ipv6_hdr(skb);
>> -        data = (struct ethhdr *)skb->data;
>> -        s = &ipv6h->saddr.s6_addr32[0];
>> -        d = &ipv6h->daddr.s6_addr32[0];
>> -        v6hash = (s[1] ^ d[1]) ^ (s[2] ^ d[2]) ^ (s[3] ^ d[3]);
>> -        v6hash ^= (v6hash >> 24) ^ (v6hash >> 16) ^ (v6hash >> 8);
>> -        return (v6hash ^ data->h_dest[5] ^ data->h_source[5]) % count;
>> -    }
>> -
>> -    return bond_xmit_hash_policy_l2(skb, count);
>> +        fk->src = iph->saddr;
>> +        fk->dst = iph->daddr;
>> +        noff += iph->ihl << 2;
>> +        if (!ip_is_fragment(iph))
>> +            proto = iph->protocol;
>> +    } else if (skb->protocol == htons(ETH_P_IPV6)) {
>> +        if (!pskb_may_pull(skb, noff + sizeof(*iph6)))
>> +            goto out;
>> +        iph6 = ipv6_hdr(skb);
>> +        fk->src = (__force __be32)ipv6_addr_hash(&iph6->saddr);
>> +        fk->dst = (__force __be32)ipv6_addr_hash(&iph6->daddr);
>> +        noff += sizeof(*iph6);
>> +        proto = iph6->nexthdr;
>> +    }
> 
> else
>     return false;
> 
> ?
> 
Hehe, I actually had that in v1 few months ago, I was wondering why I did
it that way...

> Otherwise we might report false-positive for vlans, per example, without
> populating the flow keys.
> 
Right.

>> +    if (bond->params.xmit_policy == BOND_XMIT_POLICY_LAYER34) {
>> +        poff = proto_ports_offset(proto);
>> +        if (poff >= 0)
>> +            bond_flow_get_ports(skb, fk, poff + noff);
>> +    }
> 
> One idea would be to move the same initialization code from
> skb_flow_dissect() to an external function and re-use it here?
> 
Yep, I could do an inline skb_flow_get_ports() and re-use it.

>> +    ret = true;
>> +
>> +out:
>> +    return ret;
>> }
>>
>> -/*
>> - * Hash for the output device based upon layer 3 and layer 4 data. If
>> - * the packet is a frag or not TCP or UDP, just use layer 3 data.  If it is
>> - * altogether not IP, fall back on bond_xmit_hash_policy_l2()
>> +/**
>> + * bond_xmit_hash - generate a hash value based on the xmit policy
>> + * @bond: bonding device
>> + * @skb: buffer to use for headers
>> + * @count: modulo value
>> + *
>> + * This function will extract the necessary headers from the skb buffer
>> and use
>> + * them to generate a hash based on the xmit_policy set in the bonding
>> device
>> + * which will be reduced modulo count before returning.
>>  */
>> -static int bond_xmit_hash_policy_l34(struct sk_buff *skb, int count)
>> +int bond_xmit_hash(struct bonding *bond, struct sk_buff *skb, int count)
>> {
>> -    u32 layer4_xor = 0;
>> -    const struct iphdr *iph;
>> -    const struct ipv6hdr *ipv6h;
>> -    const __be32 *s, *d;
>> -    const __be16 *l4 = NULL;
>> -    __be16 _l4[2];
>> -    int noff = skb_network_offset(skb);
>> -    int poff;
>> -
>> -    if (skb->protocol == htons(ETH_P_IP) &&
>> -        pskb_may_pull(skb, noff + sizeof(*iph))) {
>> -        iph = ip_hdr(skb);
>> -        poff = proto_ports_offset(iph->protocol);
>> +    struct flow_keys flow;
>> +    u32 hash;
>>
>> -        if (!ip_is_fragment(iph) && poff >= 0) {
>> -            l4 = skb_header_pointer(skb, noff + (iph->ihl << 2) + poff,
>> -                        sizeof(_l4), &_l4);
>> -            if (l4)
>> -                layer4_xor = ntohs(l4[0] ^ l4[1]);
>> -        }
>> -        return (layer4_xor ^
>> -            ((ntohl(iph->saddr ^ iph->daddr)) & 0xffff)) % count;
>> -    } else if (skb->protocol == htons(ETH_P_IPV6) &&
>> -           pskb_may_pull(skb, noff + sizeof(*ipv6h))) {
>> -        ipv6h = ipv6_hdr(skb);
>> -        poff = proto_ports_offset(ipv6h->nexthdr);
>> -        if (poff >= 0) {
>> -            l4 = skb_header_pointer(skb, noff + sizeof(*ipv6h) + poff,
>> -                        sizeof(_l4), &_l4);
>> -            if (l4)
>> -                layer4_xor = ntohs(l4[0] ^ l4[1]);
>> -        }
>> -        s = &ipv6h->saddr.s6_addr32[0];
>> -        d = &ipv6h->daddr.s6_addr32[0];
>> -        layer4_xor ^= (s[1] ^ d[1]) ^ (s[2] ^ d[2]) ^ (s[3] ^ d[3]);
>> -        layer4_xor ^= (layer4_xor >> 24) ^ (layer4_xor >> 16) ^
>> -                   (layer4_xor >> 8);
>> -        return layer4_xor % count;
>> -    }
>> +    if (bond->params.xmit_policy == BOND_XMIT_POLICY_LAYER2 ||
>> +        !bond_flow_dissect(bond, skb, &flow))
>> +        return bond_eth_hash(skb) % count;
>> +
>> +    if (bond->params.xmit_policy == BOND_XMIT_POLICY_LAYER23 ||
>> +        bond->params.xmit_policy == BOND_XMIT_POLICY_ENCAP23)
>> +        hash = bond_eth_hash(skb);
>> +    else
>> +        hash = (__force u32)flow.ports;
>> +    hash ^= (__force u32)flow.dst ^ (__force u32)flow.src;
>> +    hash ^= (hash >> 16);
>> +    hash ^= (hash >> 8);
>>
>> -    return bond_xmit_hash_policy_l2(skb, count);
>> +    return hash % count;
>> }
>>
>> /*-------------------------- Device entry points
>> ----------------------------*/
>> @@ -3700,8 +3704,7 @@ static int bond_xmit_activebackup(struct sk_buff
>> *skb, struct net_device *bond_d
>>     return NETDEV_TX_OK;
>> }
>>
>> -/*
>> - * In bond_xmit_xor() , we determine the output device by using a pre-
>> +/* In bond_xmit_xor() , we determine the output device by using a pre-
>>  * determined xmit_hash_policy(), If the selected device is not enabled,
>>  * find the next active slave.
>>  */
>> @@ -3709,8 +3712,7 @@ static int bond_xmit_xor(struct sk_buff *skb,
>> struct net_device *bond_dev)
>> {
>>     struct bonding *bond = netdev_priv(bond_dev);
>>
>> -    bond_xmit_slave_id(bond, skb,
>> -               bond->xmit_hash_policy(skb, bond->slave_cnt));
>> +    bond_xmit_slave_id(bond, skb, bond_xmit_hash(bond, skb,
>> bond->slave_cnt));
>>
>>     return NETDEV_TX_OK;
>> }
>> @@ -3746,22 +3748,6 @@ static int bond_xmit_broadcast(struct sk_buff
>> *skb, struct net_device *bond_dev)
>>
>> /*------------------------- Device initialization
>> ---------------------------*/
>>
>> -static void bond_set_xmit_hash_policy(struct bonding *bond)
>> -{
>> -    switch (bond->params.xmit_policy) {
>> -    case BOND_XMIT_POLICY_LAYER23:
>> -        bond->xmit_hash_policy = bond_xmit_hash_policy_l23;
>> -        break;
>> -    case BOND_XMIT_POLICY_LAYER34:
>> -        bond->xmit_hash_policy = bond_xmit_hash_policy_l34;
>> -        break;
>> -    case BOND_XMIT_POLICY_LAYER2:
>> -    default:
>> -        bond->xmit_hash_policy = bond_xmit_hash_policy_l2;
>> -        break;
>> -    }
>> -}
>> -
>> /*
>>  * Lookup the slave that corresponds to a qid
>>  */
>> @@ -3871,38 +3857,6 @@ static netdev_tx_t bond_start_xmit(struct sk_buff
>> *skb, struct net_device *dev)
>>     return ret;
>> }
>>
>> -/*
>> - * set bond mode specific net device operations
>> - */
>> -void bond_set_mode_ops(struct bonding *bond, int mode)
>> -{
>> -    struct net_device *bond_dev = bond->dev;
>> -
>> -    switch (mode) {
>> -    case BOND_MODE_ROUNDROBIN:
>> -        break;
>> -    case BOND_MODE_ACTIVEBACKUP:
>> -        break;
>> -    case BOND_MODE_XOR:
>> -        bond_set_xmit_hash_policy(bond);
>> -        break;
>> -    case BOND_MODE_BROADCAST:
>> -        break;
>> -    case BOND_MODE_8023AD:
>> -        bond_set_xmit_hash_policy(bond);
>> -        break;
>> -    case BOND_MODE_ALB:
>> -        /* FALLTHRU */
>> -    case BOND_MODE_TLB:
>> -        break;
>> -    default:
>> -        /* Should never happen, mode already checked */
>> -        pr_err("%s: Error: Unknown bonding mode %d\n",
>> -               bond_dev->name, mode);
>> -        break;
>> -    }
>> -}
>> -
>> static int bond_ethtool_get_settings(struct net_device *bond_dev,
>>                      struct ethtool_cmd *ecmd)
>> {
>> @@ -4004,7 +3958,6 @@ static void bond_setup(struct net_device *bond_dev)
>>     ether_setup(bond_dev);
>>     bond_dev->netdev_ops = &bond_netdev_ops;
>>     bond_dev->ethtool_ops = &bond_ethtool_ops;
>> -    bond_set_mode_ops(bond, bond->params.mode);
>>
>>     bond_dev->destructor = bond_destructor;
>>
>> diff --git a/drivers/net/bonding/bond_sysfs.c
>> b/drivers/net/bonding/bond_sysfs.c
>> index c29b836..dba3b9b 100644
>> --- a/drivers/net/bonding/bond_sysfs.c
>> +++ b/drivers/net/bonding/bond_sysfs.c
>> @@ -352,7 +352,6 @@ static ssize_t bonding_store_mode(struct device *d,
>>     /* don't cache arp_validate between modes */
>>     bond->params.arp_validate = BOND_ARP_VALIDATE_NONE;
>>     bond->params.mode = new_value;
>> -    bond_set_mode_ops(bond, bond->params.mode);
>>     pr_info("%s: setting mode to %s (%d).\n",
>>         bond->dev->name, bond_mode_tbl[new_value].modename,
>>         new_value);
>> @@ -392,7 +391,6 @@ static ssize_t bonding_store_xmit_hash(struct device *d,
>>         ret = -EINVAL;
>>     } else {
>>         bond->params.xmit_policy = new_value;
>> -        bond_set_mode_ops(bond, bond->params.mode);
>>         pr_info("%s: setting xmit hash policy to %s (%d).\n",
>>             bond->dev->name,
>>             xmit_hashtype_tbl[new_value].modename, new_value);
>> diff --git a/drivers/net/bonding/bonding.h b/drivers/net/bonding/bonding.h
>> index 03cf3fd..4db9ec4 100644
>> --- a/drivers/net/bonding/bonding.h
>> +++ b/drivers/net/bonding/bonding.h
>> @@ -245,7 +245,6 @@ struct bonding {
>>     char     proc_file_name[IFNAMSIZ];
>> #endif /* CONFIG_PROC_FS */
>>     struct   list_head bond_list;
>> -    int      (*xmit_hash_policy)(struct sk_buff *, int);
>>     u16      rr_tx_counter;
>>     struct   ad_bond_info ad_info;
>>     struct   alb_bond_info alb_info;
>> @@ -446,7 +445,7 @@ int bond_release(struct net_device *bond_dev, struct
>> net_device *slave_dev);
>> void bond_mii_monitor(struct work_struct *);
>> void bond_loadbalance_arp_mon(struct work_struct *);
>> void bond_activebackup_arp_mon(struct work_struct *);
>> -void bond_set_mode_ops(struct bonding *bond, int mode);
>> +int bond_xmit_hash(struct bonding *bond, struct sk_buff *skb, int count);
>> int bond_parse_parm(const char *mode_arg, const struct bond_parm_tbl *tbl);
>> void bond_select_active_slave(struct bonding *bond);
>> void bond_change_active_slave(struct bonding *bond, struct slave
>> *new_active);
>> diff --git a/include/uapi/linux/if_bonding.h
>> b/include/uapi/linux/if_bonding.h
>> index a17edda..9635a62 100644
>> --- a/include/uapi/linux/if_bonding.h
>> +++ b/include/uapi/linux/if_bonding.h
>> @@ -91,6 +91,8 @@
>> #define BOND_XMIT_POLICY_LAYER2        0 /* layer 2 (MAC only), default */
>> #define BOND_XMIT_POLICY_LAYER34    1 /* layer 3+4 (IP ^ (TCP || UDP)) */
>> #define BOND_XMIT_POLICY_LAYER23    2 /* layer 2+3 (IP ^ MAC) */
>> +#define BOND_XMIT_POLICY_ENCAP23    3 /* encapsulated layer 2+3 */
>> +#define BOND_XMIT_POLICY_ENCAP34    4 /* encapsulated layer 3+4 */
>>
>> typedef struct ifbond {
>>     __s32 bond_mode;
>> -- 
>> 1.8.1.4
>>
>> -- 
>> To unsubscribe from this list: send the line "unsubscribe netdev" in
>> the body of a message to majordomo@vger.kernel.org
>> More majordomo info at  http://vger.kernel.org/majordomo-info.html

--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
diff mbox

Patch

diff --git a/drivers/net/bonding/bond_3ad.c b/drivers/net/bonding/bond_3ad.c
index 0d8f427..b3ab703 100644
--- a/drivers/net/bonding/bond_3ad.c
+++ b/drivers/net/bonding/bond_3ad.c
@@ -2442,7 +2442,7 @@  int bond_3ad_xmit_xor(struct sk_buff *skb, struct net_device *dev)
 		goto out;
 	}
 
-	slave_agg_no = bond->xmit_hash_policy(skb, slaves_in_agg);
+	slave_agg_no = bond_xmit_hash(bond, skb, slaves_in_agg);
 
 	bond_for_each_slave(bond, slave) {
 		struct aggregator *agg = SLAVE_AD_INFO(slave).port.aggregator;
diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c
index 55bbb8b..73e416b 100644
--- a/drivers/net/bonding/bond_main.c
+++ b/drivers/net/bonding/bond_main.c
@@ -78,6 +78,7 @@ 
 #include <net/netns/generic.h>
 #include <net/pkt_sched.h>
 #include <linux/rculist.h>
+#include <net/flow_keys.h>
 #include "bonding.h"
 #include "bond_3ad.h"
 #include "bond_alb.h"
@@ -159,7 +160,8 @@  MODULE_PARM_DESC(min_links, "Minimum number of available links before turning on
 module_param(xmit_hash_policy, charp, 0);
 MODULE_PARM_DESC(xmit_hash_policy, "balance-xor and 802.3ad hashing method; "
 				   "0 for layer 2 (default), 1 for layer 3+4, "
-				   "2 for layer 2+3");
+				   "2 for layer 2+3, 3 for encap layer 2+3, "
+				   "4 for encap layer 3+4");
 module_param(arp_interval, int, 0);
 MODULE_PARM_DESC(arp_interval, "arp interval in milliseconds");
 module_param_array(arp_ip_target, charp, NULL, 0);
@@ -217,6 +219,8 @@  const struct bond_parm_tbl xmit_hashtype_tbl[] = {
 {	"layer2",		BOND_XMIT_POLICY_LAYER2},
 {	"layer3+4",		BOND_XMIT_POLICY_LAYER34},
 {	"layer2+3",		BOND_XMIT_POLICY_LAYER23},
+{	"encap2+3",		BOND_XMIT_POLICY_ENCAP23},
+{	"encap3+4",		BOND_XMIT_POLICY_ENCAP34},
 {	NULL,			-1},
 };
 
@@ -3026,99 +3030,99 @@  static struct notifier_block bond_netdev_notifier = {
 
 /*---------------------------- Hashing Policies -----------------------------*/
 
-/*
- * Hash for the output device based upon layer 2 data
- */
-static int bond_xmit_hash_policy_l2(struct sk_buff *skb, int count)
+/* L2 hash helper */
+static inline u32 bond_eth_hash(struct sk_buff *skb)
 {
 	struct ethhdr *data = (struct ethhdr *)skb->data;
 
 	if (skb_headlen(skb) >= offsetof(struct ethhdr, h_proto))
-		return (data->h_dest[5] ^ data->h_source[5]) % count;
+		return data->h_dest[5] ^ data->h_source[5];
 
 	return 0;
 }
 
-/*
- * Hash for the output device based upon layer 2 and layer 3 data. If
- * the packet is not IP, fall back on bond_xmit_hash_policy_l2()
- */
-static int bond_xmit_hash_policy_l23(struct sk_buff *skb, int count)
+static void bond_flow_get_ports(struct sk_buff *skb, struct flow_keys *fk,
+				int offset)
+{
+	__be32 *ports;
+
+	ports = skb_header_pointer(skb, offset, sizeof(fk->ports), &fk->ports);
+	if (ports)
+		fk->ports = *ports;
+}
+
+/* Extract the appropriate headers based on bond's xmit policy */
+static bool bond_flow_dissect(struct bonding *bond, struct sk_buff *skb,
+			      struct flow_keys *fk)
 {
-	const struct ethhdr *data;
+	const struct ipv6hdr *iph6;
 	const struct iphdr *iph;
-	const struct ipv6hdr *ipv6h;
-	u32 v6hash;
-	const __be32 *s, *d;
+	int noff, proto = -1, poff = -1;
+	bool ret = false;
+
+	if (bond->params.xmit_policy > BOND_XMIT_POLICY_LAYER23)
+		return skb_flow_dissect(skb, fk);
 
-	if (skb->protocol == htons(ETH_P_IP) &&
-	    pskb_network_may_pull(skb, sizeof(*iph))) {
+	fk->ports = 0;
+	noff = skb_network_offset(skb);
+	if (skb->protocol == htons(ETH_P_IP)) {
+		if (!pskb_may_pull(skb, noff + sizeof(*iph)))
+			goto out;
 		iph = ip_hdr(skb);
-		data = (struct ethhdr *)skb->data;
-		return ((ntohl(iph->saddr ^ iph->daddr) & 0xffff) ^
-			(data->h_dest[5] ^ data->h_source[5])) % count;
-	} else if (skb->protocol == htons(ETH_P_IPV6) &&
-		   pskb_network_may_pull(skb, sizeof(*ipv6h))) {
-		ipv6h = ipv6_hdr(skb);
-		data = (struct ethhdr *)skb->data;
-		s = &ipv6h->saddr.s6_addr32[0];
-		d = &ipv6h->daddr.s6_addr32[0];
-		v6hash = (s[1] ^ d[1]) ^ (s[2] ^ d[2]) ^ (s[3] ^ d[3]);
-		v6hash ^= (v6hash >> 24) ^ (v6hash >> 16) ^ (v6hash >> 8);
-		return (v6hash ^ data->h_dest[5] ^ data->h_source[5]) % count;
-	}
-
-	return bond_xmit_hash_policy_l2(skb, count);
+		fk->src = iph->saddr;
+		fk->dst = iph->daddr;
+		noff += iph->ihl << 2;
+		if (!ip_is_fragment(iph))
+			proto = iph->protocol;
+	} else if (skb->protocol == htons(ETH_P_IPV6)) {
+		if (!pskb_may_pull(skb, noff + sizeof(*iph6)))
+			goto out;
+		iph6 = ipv6_hdr(skb);
+		fk->src = (__force __be32)ipv6_addr_hash(&iph6->saddr);
+		fk->dst = (__force __be32)ipv6_addr_hash(&iph6->daddr);
+		noff += sizeof(*iph6);
+		proto = iph6->nexthdr;
+	}
+	if (bond->params.xmit_policy == BOND_XMIT_POLICY_LAYER34) {
+		poff = proto_ports_offset(proto);
+		if (poff >= 0)
+			bond_flow_get_ports(skb, fk, poff + noff);
+	}
+	ret = true;
+
+out:
+	return ret;
 }
 
-/*
- * Hash for the output device based upon layer 3 and layer 4 data. If
- * the packet is a frag or not TCP or UDP, just use layer 3 data.  If it is
- * altogether not IP, fall back on bond_xmit_hash_policy_l2()
+/**
+ * bond_xmit_hash - generate a hash value based on the xmit policy
+ * @bond: bonding device
+ * @skb: buffer to use for headers
+ * @count: modulo value
+ *
+ * This function will extract the necessary headers from the skb buffer and use
+ * them to generate a hash based on the xmit_policy set in the bonding device
+ * which will be reduced modulo count before returning.
  */
-static int bond_xmit_hash_policy_l34(struct sk_buff *skb, int count)
+int bond_xmit_hash(struct bonding *bond, struct sk_buff *skb, int count)
 {
-	u32 layer4_xor = 0;
-	const struct iphdr *iph;
-	const struct ipv6hdr *ipv6h;
-	const __be32 *s, *d;
-	const __be16 *l4 = NULL;
-	__be16 _l4[2];
-	int noff = skb_network_offset(skb);
-	int poff;
-
-	if (skb->protocol == htons(ETH_P_IP) &&
-	    pskb_may_pull(skb, noff + sizeof(*iph))) {
-		iph = ip_hdr(skb);
-		poff = proto_ports_offset(iph->protocol);
+	struct flow_keys flow;
+	u32 hash;
 
-		if (!ip_is_fragment(iph) && poff >= 0) {
-			l4 = skb_header_pointer(skb, noff + (iph->ihl << 2) + poff,
-						sizeof(_l4), &_l4);
-			if (l4)
-				layer4_xor = ntohs(l4[0] ^ l4[1]);
-		}
-		return (layer4_xor ^
-			((ntohl(iph->saddr ^ iph->daddr)) & 0xffff)) % count;
-	} else if (skb->protocol == htons(ETH_P_IPV6) &&
-		   pskb_may_pull(skb, noff + sizeof(*ipv6h))) {
-		ipv6h = ipv6_hdr(skb);
-		poff = proto_ports_offset(ipv6h->nexthdr);
-		if (poff >= 0) {
-			l4 = skb_header_pointer(skb, noff + sizeof(*ipv6h) + poff,
-						sizeof(_l4), &_l4);
-			if (l4)
-				layer4_xor = ntohs(l4[0] ^ l4[1]);
-		}
-		s = &ipv6h->saddr.s6_addr32[0];
-		d = &ipv6h->daddr.s6_addr32[0];
-		layer4_xor ^= (s[1] ^ d[1]) ^ (s[2] ^ d[2]) ^ (s[3] ^ d[3]);
-		layer4_xor ^= (layer4_xor >> 24) ^ (layer4_xor >> 16) ^
-			       (layer4_xor >> 8);
-		return layer4_xor % count;
-	}
+	if (bond->params.xmit_policy == BOND_XMIT_POLICY_LAYER2 ||
+	    !bond_flow_dissect(bond, skb, &flow))
+		return bond_eth_hash(skb) % count;
+
+	if (bond->params.xmit_policy == BOND_XMIT_POLICY_LAYER23 ||
+	    bond->params.xmit_policy == BOND_XMIT_POLICY_ENCAP23)
+		hash = bond_eth_hash(skb);
+	else
+		hash = (__force u32)flow.ports;
+	hash ^= (__force u32)flow.dst ^ (__force u32)flow.src;
+	hash ^= (hash >> 16);
+	hash ^= (hash >> 8);
 
-	return bond_xmit_hash_policy_l2(skb, count);
+	return hash % count;
 }
 
 /*-------------------------- Device entry points ----------------------------*/
@@ -3700,8 +3704,7 @@  static int bond_xmit_activebackup(struct sk_buff *skb, struct net_device *bond_d
 	return NETDEV_TX_OK;
 }
 
-/*
- * In bond_xmit_xor() , we determine the output device by using a pre-
+/* In bond_xmit_xor() , we determine the output device by using a pre-
  * determined xmit_hash_policy(), If the selected device is not enabled,
  * find the next active slave.
  */
@@ -3709,8 +3712,7 @@  static int bond_xmit_xor(struct sk_buff *skb, struct net_device *bond_dev)
 {
 	struct bonding *bond = netdev_priv(bond_dev);
 
-	bond_xmit_slave_id(bond, skb,
-			   bond->xmit_hash_policy(skb, bond->slave_cnt));
+	bond_xmit_slave_id(bond, skb, bond_xmit_hash(bond, skb, bond->slave_cnt));
 
 	return NETDEV_TX_OK;
 }
@@ -3746,22 +3748,6 @@  static int bond_xmit_broadcast(struct sk_buff *skb, struct net_device *bond_dev)
 
 /*------------------------- Device initialization ---------------------------*/
 
-static void bond_set_xmit_hash_policy(struct bonding *bond)
-{
-	switch (bond->params.xmit_policy) {
-	case BOND_XMIT_POLICY_LAYER23:
-		bond->xmit_hash_policy = bond_xmit_hash_policy_l23;
-		break;
-	case BOND_XMIT_POLICY_LAYER34:
-		bond->xmit_hash_policy = bond_xmit_hash_policy_l34;
-		break;
-	case BOND_XMIT_POLICY_LAYER2:
-	default:
-		bond->xmit_hash_policy = bond_xmit_hash_policy_l2;
-		break;
-	}
-}
-
 /*
  * Lookup the slave that corresponds to a qid
  */
@@ -3871,38 +3857,6 @@  static netdev_tx_t bond_start_xmit(struct sk_buff *skb, struct net_device *dev)
 	return ret;
 }
 
-/*
- * set bond mode specific net device operations
- */
-void bond_set_mode_ops(struct bonding *bond, int mode)
-{
-	struct net_device *bond_dev = bond->dev;
-
-	switch (mode) {
-	case BOND_MODE_ROUNDROBIN:
-		break;
-	case BOND_MODE_ACTIVEBACKUP:
-		break;
-	case BOND_MODE_XOR:
-		bond_set_xmit_hash_policy(bond);
-		break;
-	case BOND_MODE_BROADCAST:
-		break;
-	case BOND_MODE_8023AD:
-		bond_set_xmit_hash_policy(bond);
-		break;
-	case BOND_MODE_ALB:
-		/* FALLTHRU */
-	case BOND_MODE_TLB:
-		break;
-	default:
-		/* Should never happen, mode already checked */
-		pr_err("%s: Error: Unknown bonding mode %d\n",
-		       bond_dev->name, mode);
-		break;
-	}
-}
-
 static int bond_ethtool_get_settings(struct net_device *bond_dev,
 				     struct ethtool_cmd *ecmd)
 {
@@ -4004,7 +3958,6 @@  static void bond_setup(struct net_device *bond_dev)
 	ether_setup(bond_dev);
 	bond_dev->netdev_ops = &bond_netdev_ops;
 	bond_dev->ethtool_ops = &bond_ethtool_ops;
-	bond_set_mode_ops(bond, bond->params.mode);
 
 	bond_dev->destructor = bond_destructor;
 
diff --git a/drivers/net/bonding/bond_sysfs.c b/drivers/net/bonding/bond_sysfs.c
index c29b836..dba3b9b 100644
--- a/drivers/net/bonding/bond_sysfs.c
+++ b/drivers/net/bonding/bond_sysfs.c
@@ -352,7 +352,6 @@  static ssize_t bonding_store_mode(struct device *d,
 	/* don't cache arp_validate between modes */
 	bond->params.arp_validate = BOND_ARP_VALIDATE_NONE;
 	bond->params.mode = new_value;
-	bond_set_mode_ops(bond, bond->params.mode);
 	pr_info("%s: setting mode to %s (%d).\n",
 		bond->dev->name, bond_mode_tbl[new_value].modename,
 		new_value);
@@ -392,7 +391,6 @@  static ssize_t bonding_store_xmit_hash(struct device *d,
 		ret = -EINVAL;
 	} else {
 		bond->params.xmit_policy = new_value;
-		bond_set_mode_ops(bond, bond->params.mode);
 		pr_info("%s: setting xmit hash policy to %s (%d).\n",
 			bond->dev->name,
 			xmit_hashtype_tbl[new_value].modename, new_value);
diff --git a/drivers/net/bonding/bonding.h b/drivers/net/bonding/bonding.h
index 03cf3fd..4db9ec4 100644
--- a/drivers/net/bonding/bonding.h
+++ b/drivers/net/bonding/bonding.h
@@ -245,7 +245,6 @@  struct bonding {
 	char     proc_file_name[IFNAMSIZ];
 #endif /* CONFIG_PROC_FS */
 	struct   list_head bond_list;
-	int      (*xmit_hash_policy)(struct sk_buff *, int);
 	u16      rr_tx_counter;
 	struct   ad_bond_info ad_info;
 	struct   alb_bond_info alb_info;
@@ -446,7 +445,7 @@  int bond_release(struct net_device *bond_dev, struct net_device *slave_dev);
 void bond_mii_monitor(struct work_struct *);
 void bond_loadbalance_arp_mon(struct work_struct *);
 void bond_activebackup_arp_mon(struct work_struct *);
-void bond_set_mode_ops(struct bonding *bond, int mode);
+int bond_xmit_hash(struct bonding *bond, struct sk_buff *skb, int count);
 int bond_parse_parm(const char *mode_arg, const struct bond_parm_tbl *tbl);
 void bond_select_active_slave(struct bonding *bond);
 void bond_change_active_slave(struct bonding *bond, struct slave *new_active);
diff --git a/include/uapi/linux/if_bonding.h b/include/uapi/linux/if_bonding.h
index a17edda..9635a62 100644
--- a/include/uapi/linux/if_bonding.h
+++ b/include/uapi/linux/if_bonding.h
@@ -91,6 +91,8 @@ 
 #define BOND_XMIT_POLICY_LAYER2		0 /* layer 2 (MAC only), default */
 #define BOND_XMIT_POLICY_LAYER34	1 /* layer 3+4 (IP ^ (TCP || UDP)) */
 #define BOND_XMIT_POLICY_LAYER23	2 /* layer 2+3 (IP ^ MAC) */
+#define BOND_XMIT_POLICY_ENCAP23	3 /* encapsulated layer 2+3 */
+#define BOND_XMIT_POLICY_ENCAP34	4 /* encapsulated layer 3+4 */
 
 typedef struct ifbond {
 	__s32 bond_mode;