diff mbox

[RFC] bonding: prevent outgoing packets on inactive slaves

Message ID 20090805162429.GD16093@midget.suse.cz
State Rejected, archived
Delegated to: David Miller
Headers show

Commit Message

Jiri Bohac Aug. 5, 2009, 4:24 p.m. UTC
Applications exist that broadcast/multicast packets on all
devices in the system (e.g. avahi-mdns).

When a device is an inactive slave of a bond in active-backup
mode, its MAC address may be set identical to other divecies in
the bond. The broadcast/multicast packets may then confuse
switches to direct packets to the inactive slave, rather than the
active one.

This patch makes sure the TX queues on inactive slaves are
deactivated.

Signed-off-by: Jiri Bohac <jbohac@suse.cz>

Comments

Eric Dumazet Aug. 5, 2009, 4:57 p.m. UTC | #1
Jiri Bohac a écrit :
> Applications exist that broadcast/multicast packets on all
> devices in the system (e.g. avahi-mdns).
> 
> When a device is an inactive slave of a bond in active-backup
> mode, its MAC address may be set identical to other divecies in
> the bond. The broadcast/multicast packets may then confuse
> switches to direct packets to the inactive slave, rather than the
> active one.
> 
> This patch makes sure the TX queues on inactive slaves are
> deactivated.
> 

Wont this break ARP link monitoring ?

> Signed-off-by: Jiri Bohac <jbohac@suse.cz>
> 
> --- a/drivers/net/bonding/bonding.h
> +++ b/drivers/net/bonding/bonding.h
> @@ -20,6 +20,7 @@
>  #include <linux/if_bonding.h>
>  #include <linux/kobject.h>
>  #include <linux/in6.h>
> +#include <net/sch_generic.h>
>  #include "bond_3ad.h"
>  #include "bond_alb.h"
>  
> @@ -291,12 +292,21 @@ static inline void bond_set_slave_inactive_flags(struct slave *slave)
>  	slave->dev->priv_flags |= IFF_SLAVE_INACTIVE;
>  	if (slave_do_arp_validate(bond, slave))
>  		slave->dev->priv_flags |= IFF_SLAVE_NEEDARP;
> +
> +	/* prevent outgoing frames on inactive slaves from confusing switches */
> +	if (bond->params.mode == BOND_MODE_ACTIVEBACKUP)
> +		dev_deactivate_tx(slave->dev);
>  }
>  
>  static inline void bond_set_slave_active_flags(struct slave *slave)
>  {
> +	struct bonding *bond = netdev_priv(slave->dev->master);
> +
>  	slave->state = BOND_STATE_ACTIVE;
>  	slave->dev->priv_flags &= ~(IFF_SLAVE_INACTIVE | IFF_SLAVE_NEEDARP);
> +
> +	if (bond->params.mode == BOND_MODE_ACTIVEBACKUP)
> +		dev_activate_tx(slave->dev);
>  }
>  
>  static inline void bond_set_master_3ad_flags(struct bonding *bond)
> diff --git a/include/net/sch_generic.h b/include/net/sch_generic.h
> index 964ffa0..eee006a 100644
> --- a/include/net/sch_generic.h
> +++ b/include/net/sch_generic.h
> @@ -296,6 +296,8 @@ extern void dev_init_scheduler(struct net_device *dev);
>  extern void dev_shutdown(struct net_device *dev);
>  extern void dev_activate(struct net_device *dev);
>  extern void dev_deactivate(struct net_device *dev);
> +extern void dev_activate_tx(struct net_device *dev);
> +extern void dev_deactivate_tx(struct net_device *dev);
>  extern void qdisc_reset(struct Qdisc *qdisc);
>  extern void qdisc_destroy(struct Qdisc *qdisc);
>  extern void qdisc_tree_decrease_qlen(struct Qdisc *qdisc, unsigned int n);
> diff --git a/net/sched/sch_generic.c b/net/sched/sch_generic.c
> index 27d0381..514c73d 100644
> --- a/net/sched/sch_generic.c
> +++ b/net/sched/sch_generic.c
> @@ -628,7 +628,7 @@ static void transition_one_qdisc(struct net_device *dev,
>  	}
>  }
>  
> -void dev_activate(struct net_device *dev)
> +void dev_activate_tx(struct net_device *dev)
>  {
>  	int need_watchdog;
>  
> @@ -647,13 +647,19 @@ void dev_activate(struct net_device *dev)
>  
>  	need_watchdog = 0;
>  	netdev_for_each_tx_queue(dev, transition_one_qdisc, &need_watchdog);
> -	transition_one_qdisc(dev, &dev->rx_queue, NULL);
>  
>  	if (need_watchdog) {
>  		dev->trans_start = jiffies;
>  		dev_watchdog_up(dev);
>  	}
>  }
> +EXPORT_SYMBOL(dev_activate_tx);
> +
> +void dev_activate(struct net_device *dev)
> +{
> +	dev_activate_tx(dev);
> +	transition_one_qdisc(dev, &dev->rx_queue, NULL);
> +}
>  
>  static void dev_deactivate_queue(struct net_device *dev,
>  				 struct netdev_queue *dev_queue,
> @@ -703,12 +709,18 @@ static bool some_qdisc_is_busy(struct net_device *dev)
>  	return false;
>  }
>  
> -void dev_deactivate(struct net_device *dev)
> +void dev_deactivate_tx(struct net_device *dev)
>  {
>  	netdev_for_each_tx_queue(dev, dev_deactivate_queue, &noop_qdisc);
> -	dev_deactivate_queue(dev, &dev->rx_queue, &noop_qdisc);
>  
>  	dev_watchdog_down(dev);
> +}
> +EXPORT_SYMBOL(dev_deactivate_tx);
> +
> +void dev_deactivate(struct net_device *dev)
> +{
> +	dev_deactivate_tx(dev);
> +	dev_deactivate_queue(dev, &dev->rx_queue, &noop_qdisc);
>  
>  	/* Wait for outstanding qdisc-less dev_queue_xmit calls. */
>  	synchronize_rcu();
> 
> 
> 

--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Jay Vosburgh Aug. 5, 2009, 5:01 p.m. UTC | #2
Jiri Bohac <jbohac@suse.cz> wrote:

>Applications exist that broadcast/multicast packets on all
>devices in the system (e.g. avahi-mdns).
>
>When a device is an inactive slave of a bond in active-backup
>mode, its MAC address may be set identical to other divecies in
>the bond. The broadcast/multicast packets may then confuse
>switches to direct packets to the inactive slave, rather than the
>active one.
>
>This patch makes sure the TX queues on inactive slaves are
>deactivated.
>
>Signed-off-by: Jiri Bohac <jbohac@suse.cz>

	I'd love to include this patch; many times I've tracked down
"bonding" problems to some errant dingus confusing the switch, but I
think this patch will break some things, and therefore has to be a NAK.

	Specifically, I suspect this will break users of some protocols
that intentionally (and legitimately) bind directly to the slave
underneath bonding, LLDP for one.  I'm fairly sure there are such users,
because the inactive slave rx path was changed last year to permit
explicit binds to the inactive slaves to receive packets that normally
would be dropped:

commit 0d7a3681232f545c6a59f77e60f7667673ef0e93
Author: Joe Eykholt <jre@nuovasystems.com>
Date:   Wed Jul 2 18:22:01 2008 -0700

    net/core: Allow certain receives on inactive slave.
    
    Allow a packet_type that specifies the exact device to receive
    even on an inactive bonding slave devices.  This is important for some
    L2 protocols such as LLDP and FCoE.  This can eventually be used
    for the bonding special cases as well.
    
    Signed-off-by: Joe Eykholt <jre@nuovasystems.com>
    Signed-off-by: Jay Vosburgh <fubar@us.ibm.com>
    Signed-off-by: Jeff Garzik <jgarzik@redhat.com>

	The fact that they're receiving on the inactive slave suggests
that there may be transmits on the same slave, and a quick read of the
LLDP spec seems to agree.  I'm also unsure of exactly how FCoE operates
in this regard (whether it does anything that will break due to this
patch).

	Anybody have better information about LLDP or FCoE?

	-J

---
	-Jay Vosburgh, IBM Linux Technology Center, fubar@us.ibm.com
--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Jiri Bohac Aug. 6, 2009, 10:43 a.m. UTC | #3
On Wed, Aug 05, 2009 at 06:57:37PM +0200, Eric Dumazet wrote:
> Jiri Bohac a écrit :
> > This patch makes sure the TX queues on inactive slaves are
> > deactivated.
> > 
> 
> Wont this break ARP link monitoring ?

No, ARP monitoring sends the ARP requests from the active slave.
Eric Dumazet Aug. 6, 2009, 12:28 p.m. UTC | #4
Jiri Bohac a écrit :
> On Wed, Aug 05, 2009 at 06:57:37PM +0200, Eric Dumazet wrote:
>> Jiri Bohac a écrit :
>>> This patch makes sure the TX queues on inactive slaves are
>>> deactivated.
>>>
>> Wont this break ARP link monitoring ?
> 
> No, ARP monitoring sends the ARP requests from the active slave.
> 

Yes, this makes sense now you say it :)

Thanks
--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
diff mbox

Patch

--- a/drivers/net/bonding/bonding.h
+++ b/drivers/net/bonding/bonding.h
@@ -20,6 +20,7 @@ 
 #include <linux/if_bonding.h>
 #include <linux/kobject.h>
 #include <linux/in6.h>
+#include <net/sch_generic.h>
 #include "bond_3ad.h"
 #include "bond_alb.h"
 
@@ -291,12 +292,21 @@  static inline void bond_set_slave_inactive_flags(struct slave *slave)
 	slave->dev->priv_flags |= IFF_SLAVE_INACTIVE;
 	if (slave_do_arp_validate(bond, slave))
 		slave->dev->priv_flags |= IFF_SLAVE_NEEDARP;
+
+	/* prevent outgoing frames on inactive slaves from confusing switches */
+	if (bond->params.mode == BOND_MODE_ACTIVEBACKUP)
+		dev_deactivate_tx(slave->dev);
 }
 
 static inline void bond_set_slave_active_flags(struct slave *slave)
 {
+	struct bonding *bond = netdev_priv(slave->dev->master);
+
 	slave->state = BOND_STATE_ACTIVE;
 	slave->dev->priv_flags &= ~(IFF_SLAVE_INACTIVE | IFF_SLAVE_NEEDARP);
+
+	if (bond->params.mode == BOND_MODE_ACTIVEBACKUP)
+		dev_activate_tx(slave->dev);
 }
 
 static inline void bond_set_master_3ad_flags(struct bonding *bond)
diff --git a/include/net/sch_generic.h b/include/net/sch_generic.h
index 964ffa0..eee006a 100644
--- a/include/net/sch_generic.h
+++ b/include/net/sch_generic.h
@@ -296,6 +296,8 @@  extern void dev_init_scheduler(struct net_device *dev);
 extern void dev_shutdown(struct net_device *dev);
 extern void dev_activate(struct net_device *dev);
 extern void dev_deactivate(struct net_device *dev);
+extern void dev_activate_tx(struct net_device *dev);
+extern void dev_deactivate_tx(struct net_device *dev);
 extern void qdisc_reset(struct Qdisc *qdisc);
 extern void qdisc_destroy(struct Qdisc *qdisc);
 extern void qdisc_tree_decrease_qlen(struct Qdisc *qdisc, unsigned int n);
diff --git a/net/sched/sch_generic.c b/net/sched/sch_generic.c
index 27d0381..514c73d 100644
--- a/net/sched/sch_generic.c
+++ b/net/sched/sch_generic.c
@@ -628,7 +628,7 @@  static void transition_one_qdisc(struct net_device *dev,
 	}
 }
 
-void dev_activate(struct net_device *dev)
+void dev_activate_tx(struct net_device *dev)
 {
 	int need_watchdog;
 
@@ -647,13 +647,19 @@  void dev_activate(struct net_device *dev)
 
 	need_watchdog = 0;
 	netdev_for_each_tx_queue(dev, transition_one_qdisc, &need_watchdog);
-	transition_one_qdisc(dev, &dev->rx_queue, NULL);
 
 	if (need_watchdog) {
 		dev->trans_start = jiffies;
 		dev_watchdog_up(dev);
 	}
 }
+EXPORT_SYMBOL(dev_activate_tx);
+
+void dev_activate(struct net_device *dev)
+{
+	dev_activate_tx(dev);
+	transition_one_qdisc(dev, &dev->rx_queue, NULL);
+}
 
 static void dev_deactivate_queue(struct net_device *dev,
 				 struct netdev_queue *dev_queue,
@@ -703,12 +709,18 @@  static bool some_qdisc_is_busy(struct net_device *dev)
 	return false;
 }
 
-void dev_deactivate(struct net_device *dev)
+void dev_deactivate_tx(struct net_device *dev)
 {
 	netdev_for_each_tx_queue(dev, dev_deactivate_queue, &noop_qdisc);
-	dev_deactivate_queue(dev, &dev->rx_queue, &noop_qdisc);
 
 	dev_watchdog_down(dev);
+}
+EXPORT_SYMBOL(dev_deactivate_tx);
+
+void dev_deactivate(struct net_device *dev)
+{
+	dev_deactivate_tx(dev);
+	dev_deactivate_queue(dev, &dev->rx_queue, &noop_qdisc);
 
 	/* Wait for outstanding qdisc-less dev_queue_xmit calls. */
 	synchronize_rcu();