diff mbox

[net-next,V8] net: introduce ethernet teaming device

Message ID 1321085808-6871-1-git-send-email-jpirko@redhat.com
State Accepted, archived
Delegated to: David Miller
Headers show

Commit Message

Jiri Pirko Nov. 12, 2011, 8:16 a.m. UTC
This patch introduces new network device called team. It supposes to be
very fast, simple, userspace-driven alternative to existing bonding
driver.

Userspace library called libteam with couple of demo apps is available
here:
https://github.com/jpirko/libteam
Note it's still in its dipers atm.

team<->libteam use generic netlink for communication. That and rtnl
suppose to be the only way to configure team device, no sysfs etc.

Python binding of libteam was recently introduced.
Daemon providing arpmon/miimon active-backup functionality will be
introduced shortly. All what's necessary is already implemented in
kernel team driver.

Signed-off-by: Jiri Pirko <jpirko@redhat.com>

v7->v8:
	- check ndo_ndo_vlan_rx_[add/kill]_vid functions before calling
	  them.
	- use dev_kfree_skb_any() instead of dev_kfree_skb()

v6->v7:
	- transmit and receive functions are not checked in hot paths.
	  That also resolves memory leak on transmit when no port is
	  present

v5->v6:
	- changed couple of _rcu calls to non _rcu ones in non-readers

v4->v5:
	- team_change_mtu() uses team->lock while travesing though port
	  list
	- mac address changes are moved completely to jurisdiction of
	  userspace daemon. This way the daemon can do FOM1, FOM2 and
	  possibly other weird things with mac addresses.
	  Only round-robin mode sets up all ports to bond's address then
	  enslaved.
	- Extended Kconfig text

v3->v4:
	- remove redundant synchronize_rcu from __team_change_mode()
	- revert "set and clear of mode_ops happens per pointer, not per
	  byte"
	- extend comment of function __team_change_mode()

v2->v3:
	- team_change_mtu() uses rcu version of list traversal to unwind
	- set and clear of mode_ops happens per pointer, not per byte
	- port hashlist changed to be embedded into team structure
	- error branch in team_port_enter() does cleanup now
	- fixed rtln->rtnl

v1->v2:
	- modes are made as modules. Makes team more modular and
	  extendable.
	- several commenters' nitpicks found on v1 were fixed
	- several other bugs were fixed.
	- note I ignored Eric's comment about roundrobin port selector
	  as Eric's way may be easily implemented as another mode (mode
	  "random") in future.
---
 Documentation/networking/team.txt         |    2 +
 MAINTAINERS                               |    7 +
 drivers/net/Kconfig                       |    2 +
 drivers/net/Makefile                      |    1 +
 drivers/net/team/Kconfig                  |   43 +
 drivers/net/team/Makefile                 |    7 +
 drivers/net/team/team.c                   | 1583 +++++++++++++++++++++++++++++
 drivers/net/team/team_mode_activebackup.c |  137 +++
 drivers/net/team/team_mode_roundrobin.c   |  107 ++
 include/linux/Kbuild                      |    1 +
 include/linux/if.h                        |    1 +
 include/linux/if_team.h                   |  242 +++++
 12 files changed, 2133 insertions(+), 0 deletions(-)
 create mode 100644 Documentation/networking/team.txt
 create mode 100644 drivers/net/team/Kconfig
 create mode 100644 drivers/net/team/Makefile
 create mode 100644 drivers/net/team/team.c
 create mode 100644 drivers/net/team/team_mode_activebackup.c
 create mode 100644 drivers/net/team/team_mode_roundrobin.c
 create mode 100644 include/linux/if_team.h

Comments

David Miller Nov. 13, 2011, 9:09 p.m. UTC | #1
From: Jiri Pirko <jpirko@redhat.com>
Date: Sat, 12 Nov 2011 09:16:48 +0100

> This patch introduces new network device called team. It supposes to be
> very fast, simple, userspace-driven alternative to existing bonding
> driver.
> 
> Userspace library called libteam with couple of demo apps is available
> here:
> https://github.com/jpirko/libteam
> Note it's still in its dipers atm.
> 
> team<->libteam use generic netlink for communication. That and rtnl
> suppose to be the only way to configure team device, no sysfs etc.
> 
> Python binding of libteam was recently introduced.
> Daemon providing arpmon/miimon active-backup functionality will be
> introduced shortly. All what's necessary is already implemented in
> kernel team driver.
> 
> Signed-off-by: Jiri Pirko <jpirko@redhat.com>

Applied, thanks for all of your hard work.
--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Andy Gospodarek Nov. 14, 2011, 5:18 p.m. UTC | #2
On Sat, Nov 12, 2011 at 09:16:48AM +0100, Jiri Pirko wrote:
> This patch introduces new network device called team. It supposes to be
> very fast, simple, userspace-driven alternative to existing bonding
> driver.
> 
> Userspace library called libteam with couple of demo apps is available
> here:
> https://github.com/jpirko/libteam
> Note it's still in its dipers atm.
> 
> team<->libteam use generic netlink for communication. That and rtnl
> suppose to be the only way to configure team device, no sysfs etc.
> 
> Python binding of libteam was recently introduced.
> Daemon providing arpmon/miimon active-backup functionality will be
> introduced shortly. All what's necessary is already implemented in
> kernel team driver.
> 
> Signed-off-by: Jiri Pirko <jpirko@redhat.com>
> 
> v7->v8:
> 	- check ndo_ndo_vlan_rx_[add/kill]_vid functions before calling
> 	  them.
> 	- use dev_kfree_skb_any() instead of dev_kfree_skb()
> 
> v6->v7:
> 	- transmit and receive functions are not checked in hot paths.
> 	  That also resolves memory leak on transmit when no port is
> 	  present
> 
> v5->v6:
> 	- changed couple of _rcu calls to non _rcu ones in non-readers
> 
> v4->v5:
> 	- team_change_mtu() uses team->lock while travesing though port
> 	  list
> 	- mac address changes are moved completely to jurisdiction of
> 	  userspace daemon. This way the daemon can do FOM1, FOM2 and
> 	  possibly other weird things with mac addresses.
> 	  Only round-robin mode sets up all ports to bond's address then
> 	  enslaved.
> 	- Extended Kconfig text
> 
> v3->v4:
> 	- remove redundant synchronize_rcu from __team_change_mode()
> 	- revert "set and clear of mode_ops happens per pointer, not per
> 	  byte"
> 	- extend comment of function __team_change_mode()
> 
> v2->v3:
> 	- team_change_mtu() uses rcu version of list traversal to unwind
> 	- set and clear of mode_ops happens per pointer, not per byte
> 	- port hashlist changed to be embedded into team structure
> 	- error branch in team_port_enter() does cleanup now
> 	- fixed rtln->rtnl
> 
> v1->v2:
> 	- modes are made as modules. Makes team more modular and
> 	  extendable.
> 	- several commenters' nitpicks found on v1 were fixed
> 	- several other bugs were fixed.
> 	- note I ignored Eric's comment about roundrobin port selector
> 	  as Eric's way may be easily implemented as another mode (mode
> 	  "random") in future.

You better get ready for v9.

Running the command:

# team_manual_control team0 set mode roundrobin

on a system with team0 running in roundrobin mode produces this:

[ 2127.785321] BUG: unable to handle kernel NULL pointer dereference at           (null)
[ 2127.788079] IP: [<ffffffffa0196edd>] team_nl_fill_options_get_changed+0xc5/0x240 [team]
[ 2127.790847] PGD 13eecf067 PUD 13f758067 PMD 0 
[ 2127.793603] Oops: 0000 [#1] SMP 
[ 2127.796352] CPU 7 
[ 2127.796370] Modules linked in: team_mode_roundrobin(O) team(O) fcoe libfcoe libfc scsi_transport_fc scsi_tgt 8021q garp stp llc ip6t_REJECT nf_conntrack_ipv6 nf_defrag_ipv6 ip6table_filter ip6_tables xt_state nf_conntrack snd_hda_codec_hdmi snd_hda_codec_realtek snd_hda_intel snd_hda_codec snd_hwdep snd_seq snd_seq_device i2c_i801 joydev microcode shpchp snd_pcm snd_timer snd soundcore snd_page_alloc bnx2 iTCO_wdt iTCO_vendor_support e1000e uinput firewire_ohci firewire_core crc_itu_t i915 drm_kms_helper drm i2c_algo_bit i2c_core video [last unloaded: nf_defrag_ipv4]
[ 2127.808223] 
[ 2127.811261] Pid: 7085, comm: team_manual_con Tainted: G           O 3.2.0-rc1+ #1 Intel Corporation 2012 Client Platform/LosLunas CRB
[ 2127.814421] RIP: 0010:[<ffffffffa0196edd>]  [<ffffffffa0196edd>] team_nl_fill_options_get_changed+0xc5/0x240 [team]
[ 2127.817597] RSP: 0018:ffff88012ec3d968  EFLAGS: 00010286
[ 2127.820758] RAX: 0000000000000000 RBX: ffff8801397bb600 RCX: ffffffffffffffff
[ 2127.823947] RDX: ffff88013f4ba048 RSI: 0000000000000000 RDI: 0000000000000000
[ 2127.827154] RBP: ffff88012ec3d9c8 R08: ffff88013f4ba048 R09: 0000000000000004
[ 2127.830365] R10: 0000000000001bad R11: 0000000000000000 R12: ffff880143a8b740
[ 2127.833599] R13: ffff880143aca7e8 R14: ffff88013f4ba014 R15: ffff88013f4ba048
[ 2127.836838] FS:  00007fd65cdc8700(0000) GS:ffff88014e2e0000(0000) knlGS:0000000000000000
[ 2127.840102] CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
[ 2127.843386] CR2: 0000000000000000 CR3: 0000000128531000 CR4: 00000000001406e0
[ 2127.846688] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000
[ 2127.849987] DR3: 0000000000000000 DR6: 00000000ffff0ff0 DR7: 0000000000000400
[ 2127.853278] Process team_manual_con (pid: 7085, threadinfo ffff88012ec3c000, task ffff88013e842e40)
[ 2127.856605] Stack:
[ 2127.859898]  0000000000000000 ffff880143a8b7e8 0000000000000000 ffff88013f4ba01c
[ 2127.863261]  ffffffffa0019140 0000000500000000 0000000000000000 ffff8801397bb600
[ 2127.866623]  ffff88012ec3da58 ffffffffa0197058 ffff880143a8b740 00000000fffffff4
[ 2127.869993] Call Trace:
[ 2127.873344]  [<ffffffffa0197058>] ? team_nl_fill_options_get_changed+0x240/0x240 [team]
[ 2127.876750]  [<ffffffffa0197078>] team_nl_fill_options_get+0x20/0x22 [team]
[ 2127.880152]  [<ffffffffa019763c>] team_nl_send_generic+0x41/0x85 [team]
[ 2127.880156]  [<ffffffffa01976f5>] team_nl_cmd_options_get+0x36/0x3f [team]
[ 2127.880162]  [<ffffffff813fbdce>] genl_rcv_msg+0x1d8/0x203
[ 2127.880165]  [<ffffffff813fbbf6>] ? genl_rcv+0x2d/0x2d
[ 2127.880169]  [<ffffffff813fb7b2>] netlink_rcv_skb+0x42/0x8d
[ 2127.880172]  [<ffffffff813fbbef>] genl_rcv+0x26/0x2d
[ 2127.880174]  [<ffffffff813fb341>] netlink_unicast+0xec/0x156
[ 2127.880178]  [<ffffffff813fb5a6>] netlink_sendmsg+0x1fb/0x233
[ 2127.880182]  [<ffffffff813ca577>] sock_sendmsg+0xe6/0x109
[ 2127.880188]  [<ffffffff81120c76>] ? __mem_cgroup_commit_charge+0x9d/0xa9
[ 2127.880192]  [<ffffffff8112333b>] ? mem_cgroup_charge_common+0xb1/0xc3
[ 2127.880197]  [<ffffffff81043ff5>] ? should_resched+0xe/0x2d
[ 2127.880203]  [<ffffffff814b6208>] ? _cond_resched+0xe/0x22
[ 2127.880206]  [<ffffffff81043ff5>] ? should_resched+0xe/0x2d
[ 2127.880209]  [<ffffffff813d4433>] ? copy_from_user+0x2f/0x31
[ 2127.880212]  [<ffffffff813d481e>] ? verify_iovec+0x52/0xa4
[ 2127.880215]  [<ffffffff813ca85c>] __sys_sendmsg+0x213/0x2ba
[ 2127.880220]  [<ffffffff810fb1eb>] ? handle_mm_fault+0x1c8/0x1db
[ 2127.880224]  [<ffffffff814bab67>] ? do_page_fault+0x30c/0x37e
[ 2127.880228]  [<ffffffff814b7914>] ? _raw_spin_unlock_irqrestore+0x17/0x19
[ 2127.880232]  [<ffffffff81045079>] ? __wake_up+0x44/0x4d
[ 2127.880235]  [<ffffffff813cc459>] sys_sendmsg+0x42/0x60
[ 2127.880239]  [<ffffffff814be042>] system_call_fastpath+0x16/0x1b
[ 2127.880241] Code: e9 24 01 00 00 be 01 00 00 00 48 89 df e8 aa f3 ff ff 48 85 c0 49 89 c7 0f 84 4b 01 00 00 49 8b 75 10 31 c0 48 83 c9 ff 48 89 f7 <f2> ae 48 89 df 89 ca 48 89 f1 be 01 00 00 00 f7 d2 e8 2f 4c 0a 
[ 2127.880263] RIP  [<ffffffffa0196edd>] team_nl_fill_options_get_changed+0xc5/0x240 [team]
[ 2127.880268]  RSP <ffff88012ec3d968>
[ 2127.880269] CR2: 0000000000000000
[ 2127.880287] ---[ end trace 3e104c6acd231d26 ]---

Can you provide a detailed report of the testing you have done on the
team device?  It seems proper testing would have found something like
this.

--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Andy Gospodarek Nov. 14, 2011, 5:31 p.m. UTC | #3
On Sun, Nov 13, 2011 at 04:09:51PM -0500, David Miller wrote:
> From: Jiri Pirko <jpirko@redhat.com>
> Date: Sat, 12 Nov 2011 09:16:48 +0100
> 
> > This patch introduces new network device called team. It supposes to be
> > very fast, simple, userspace-driven alternative to existing bonding
> > driver.
> > 
> > Userspace library called libteam with couple of demo apps is available
> > here:
> > https://github.com/jpirko/libteam
> > Note it's still in its dipers atm.
> > 
> > team<->libteam use generic netlink for communication. That and rtnl
> > suppose to be the only way to configure team device, no sysfs etc.
> > 
> > Python binding of libteam was recently introduced.
> > Daemon providing arpmon/miimon active-backup functionality will be
> > introduced shortly. All what's necessary is already implemented in
> > kernel team driver.
> > 
> > Signed-off-by: Jiri Pirko <jpirko@redhat.com>
> 
> Applied, thanks for all of your hard work.

I'm a bit surprised by this.  Not only is this new function currently
difficult to setup (it took me over an hour to go from a a fresh F16
install to one that had all the necessary libraries and tools to even
configure a team device for the first time), but I was able to cause an
Oops with v8 in only a few minutes of testing.

I have no problem with this functionality as an add-on and possibly
future replacement to some of what currently exists with bonding, but it
seems like what is included in the initial support should should:

1. Not panic easily.
2. Have userspace bits in place to actually test all the proposed kernel
code.  (Jiri admits there is no way to verify the active-backup code).
3. Have some known, published test results.

I hope Jiri will reconsider having a separate team tree for the next few
weeks or months until these issues are worked out.  I think the hard
work will pay off and it is close to being ready; it just doesn't seem
like it is right now.

--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Eric Dumazet Nov. 14, 2011, 6:17 p.m. UTC | #4
Le lundi 14 novembre 2011 à 12:31 -0500, Andy Gospodarek a écrit :

> I'm a bit surprised by this.  Not only is this new function currently
> difficult to setup (it took me over an hour to go from a a fresh F16
> install to one that had all the necessary libraries and tools to even
> configure a team device for the first time), but I was able to cause an
> Oops with v8 in only a few minutes of testing.
> 
> I have no problem with this functionality as an add-on and possibly
> future replacement to some of what currently exists with bonding, but it
> seems like what is included in the initial support should should:
> 
> 1. Not panic easily.
> 2. Have userspace bits in place to actually test all the proposed kernel
> code.  (Jiri admits there is no way to verify the active-backup code).
> 3. Have some known, published test results.
> 
> I hope Jiri will reconsider having a separate team tree for the next few
> weeks or months until these issues are worked out.  I think the hard
> work will pay off and it is close to being ready; it just doesn't seem
> like it is right now.
> 

Its marked EXPERIMENTAL, so probably some changes are expected before
production mode :)



--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Rick Jones Nov. 14, 2011, 6:40 p.m. UTC | #5
On 11/12/2011 12:16 AM, Jiri Pirko wrote:
> This patch introduces new network device called team. It supposes to be
> very fast, simple, userspace-driven alternative to existing bonding
> driver.

What is the definition of "very" here - relative to bonding I presume? 
Are there actual performance figures available at this point?  Something 
along the lines of test through both on the same hardware.  I don't have 
HW on which to run myself but would be quite happy to help with say 
netperf command selection to demonstrate the difference between the two.

happy benchmrking,

rick jones
--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Jiri Pirko Nov. 14, 2011, 9:35 p.m. UTC | #6
Mon, Nov 14, 2011 at 06:18:40PM CET, andy@greyhouse.net wrote:
>On Sat, Nov 12, 2011 at 09:16:48AM +0100, Jiri Pirko wrote:
>> This patch introduces new network device called team. It supposes to be
>> very fast, simple, userspace-driven alternative to existing bonding
>> driver.
>> 
>> Userspace library called libteam with couple of demo apps is available
>> here:
>> https://github.com/jpirko/libteam
>> Note it's still in its dipers atm.
>> 
>> team<->libteam use generic netlink for communication. That and rtnl
>> suppose to be the only way to configure team device, no sysfs etc.
>> 
>> Python binding of libteam was recently introduced.
>> Daemon providing arpmon/miimon active-backup functionality will be
>> introduced shortly. All what's necessary is already implemented in
>> kernel team driver.
>> 
>> Signed-off-by: Jiri Pirko <jpirko@redhat.com>
>> 
>> v7->v8:
>> 	- check ndo_ndo_vlan_rx_[add/kill]_vid functions before calling
>> 	  them.
>> 	- use dev_kfree_skb_any() instead of dev_kfree_skb()
>> 
>> v6->v7:
>> 	- transmit and receive functions are not checked in hot paths.
>> 	  That also resolves memory leak on transmit when no port is
>> 	  present
>> 
>> v5->v6:
>> 	- changed couple of _rcu calls to non _rcu ones in non-readers
>> 
>> v4->v5:
>> 	- team_change_mtu() uses team->lock while travesing though port
>> 	  list
>> 	- mac address changes are moved completely to jurisdiction of
>> 	  userspace daemon. This way the daemon can do FOM1, FOM2 and
>> 	  possibly other weird things with mac addresses.
>> 	  Only round-robin mode sets up all ports to bond's address then
>> 	  enslaved.
>> 	- Extended Kconfig text
>> 
>> v3->v4:
>> 	- remove redundant synchronize_rcu from __team_change_mode()
>> 	- revert "set and clear of mode_ops happens per pointer, not per
>> 	  byte"
>> 	- extend comment of function __team_change_mode()
>> 
>> v2->v3:
>> 	- team_change_mtu() uses rcu version of list traversal to unwind
>> 	- set and clear of mode_ops happens per pointer, not per byte
>> 	- port hashlist changed to be embedded into team structure
>> 	- error branch in team_port_enter() does cleanup now
>> 	- fixed rtln->rtnl
>> 
>> v1->v2:
>> 	- modes are made as modules. Makes team more modular and
>> 	  extendable.
>> 	- several commenters' nitpicks found on v1 were fixed
>> 	- several other bugs were fixed.
>> 	- note I ignored Eric's comment about roundrobin port selector
>> 	  as Eric's way may be easily implemented as another mode (mode
>> 	  "random") in future.
>
>You better get ready for v9.
>
>Running the command:
>
># team_manual_control team0 set mode roundrobin
>
>on a system with team0 running in roundrobin mode produces this:
>
>[ 2127.785321] BUG: unable to handle kernel NULL pointer dereference at           (null)
>[ 2127.788079] IP: [<ffffffffa0196edd>] team_nl_fill_options_get_changed+0xc5/0x240 [team]
>[ 2127.790847] PGD 13eecf067 PUD 13f758067 PMD 0 
>[ 2127.793603] Oops: 0000 [#1] SMP 
>[ 2127.796352] CPU 7 
>[ 2127.796370] Modules linked in: team_mode_roundrobin(O) team(O) fcoe libfcoe libfc scsi_transport_fc scsi_tgt 8021q garp stp llc ip6t_REJECT nf_conntrack_ipv6 nf_defrag_ipv6 ip6table_filter ip6_tables xt_state nf_conntrack snd_hda_codec_hdmi snd_hda_codec_realtek snd_hda_intel snd_hda_codec snd_hwdep snd_seq snd_seq_device i2c_i801 joydev microcode shpchp snd_pcm snd_timer snd soundcore snd_page_alloc bnx2 iTCO_wdt iTCO_vendor_support e1000e uinput firewire_ohci firewire_core crc_itu_t i915 drm_kms_helper drm i2c_algo_bit i2c_core video [last unloaded: nf_defrag_ipv4]
>[ 2127.808223] 
>[ 2127.811261] Pid: 7085, comm: team_manual_con Tainted: G           O 3.2.0-rc1+ #1 Intel Corporation 2012 Client Platform/LosLunas CRB
>[ 2127.814421] RIP: 0010:[<ffffffffa0196edd>]  [<ffffffffa0196edd>] team_nl_fill_options_get_changed+0xc5/0x240 [team]
>[ 2127.817597] RSP: 0018:ffff88012ec3d968  EFLAGS: 00010286
>[ 2127.820758] RAX: 0000000000000000 RBX: ffff8801397bb600 RCX: ffffffffffffffff
>[ 2127.823947] RDX: ffff88013f4ba048 RSI: 0000000000000000 RDI: 0000000000000000
>[ 2127.827154] RBP: ffff88012ec3d9c8 R08: ffff88013f4ba048 R09: 0000000000000004
>[ 2127.830365] R10: 0000000000001bad R11: 0000000000000000 R12: ffff880143a8b740
>[ 2127.833599] R13: ffff880143aca7e8 R14: ffff88013f4ba014 R15: ffff88013f4ba048
>[ 2127.836838] FS:  00007fd65cdc8700(0000) GS:ffff88014e2e0000(0000) knlGS:0000000000000000
>[ 2127.840102] CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
>[ 2127.843386] CR2: 0000000000000000 CR3: 0000000128531000 CR4: 00000000001406e0
>[ 2127.846688] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000
>[ 2127.849987] DR3: 0000000000000000 DR6: 00000000ffff0ff0 DR7: 0000000000000400
>[ 2127.853278] Process team_manual_con (pid: 7085, threadinfo ffff88012ec3c000, task ffff88013e842e40)
>[ 2127.856605] Stack:
>[ 2127.859898]  0000000000000000 ffff880143a8b7e8 0000000000000000 ffff88013f4ba01c
>[ 2127.863261]  ffffffffa0019140 0000000500000000 0000000000000000 ffff8801397bb600
>[ 2127.866623]  ffff88012ec3da58 ffffffffa0197058 ffff880143a8b740 00000000fffffff4
>[ 2127.869993] Call Trace:
>[ 2127.873344]  [<ffffffffa0197058>] ? team_nl_fill_options_get_changed+0x240/0x240 [team]
>[ 2127.876750]  [<ffffffffa0197078>] team_nl_fill_options_get+0x20/0x22 [team]
>[ 2127.880152]  [<ffffffffa019763c>] team_nl_send_generic+0x41/0x85 [team]
>[ 2127.880156]  [<ffffffffa01976f5>] team_nl_cmd_options_get+0x36/0x3f [team]
>[ 2127.880162]  [<ffffffff813fbdce>] genl_rcv_msg+0x1d8/0x203
>[ 2127.880165]  [<ffffffff813fbbf6>] ? genl_rcv+0x2d/0x2d
>[ 2127.880169]  [<ffffffff813fb7b2>] netlink_rcv_skb+0x42/0x8d
>[ 2127.880172]  [<ffffffff813fbbef>] genl_rcv+0x26/0x2d
>[ 2127.880174]  [<ffffffff813fb341>] netlink_unicast+0xec/0x156
>[ 2127.880178]  [<ffffffff813fb5a6>] netlink_sendmsg+0x1fb/0x233
>[ 2127.880182]  [<ffffffff813ca577>] sock_sendmsg+0xe6/0x109
>[ 2127.880188]  [<ffffffff81120c76>] ? __mem_cgroup_commit_charge+0x9d/0xa9
>[ 2127.880192]  [<ffffffff8112333b>] ? mem_cgroup_charge_common+0xb1/0xc3
>[ 2127.880197]  [<ffffffff81043ff5>] ? should_resched+0xe/0x2d
>[ 2127.880203]  [<ffffffff814b6208>] ? _cond_resched+0xe/0x22
>[ 2127.880206]  [<ffffffff81043ff5>] ? should_resched+0xe/0x2d
>[ 2127.880209]  [<ffffffff813d4433>] ? copy_from_user+0x2f/0x31
>[ 2127.880212]  [<ffffffff813d481e>] ? verify_iovec+0x52/0xa4
>[ 2127.880215]  [<ffffffff813ca85c>] __sys_sendmsg+0x213/0x2ba
>[ 2127.880220]  [<ffffffff810fb1eb>] ? handle_mm_fault+0x1c8/0x1db
>[ 2127.880224]  [<ffffffff814bab67>] ? do_page_fault+0x30c/0x37e
>[ 2127.880228]  [<ffffffff814b7914>] ? _raw_spin_unlock_irqrestore+0x17/0x19
>[ 2127.880232]  [<ffffffff81045079>] ? __wake_up+0x44/0x4d
>[ 2127.880235]  [<ffffffff813cc459>] sys_sendmsg+0x42/0x60
>[ 2127.880239]  [<ffffffff814be042>] system_call_fastpath+0x16/0x1b
>[ 2127.880241] Code: e9 24 01 00 00 be 01 00 00 00 48 89 df e8 aa f3 ff ff 48 85 c0 49 89 c7 0f 84 4b 01 00 00 49 8b 75 10 31 c0 48 83 c9 ff 48 89 f7 <f2> ae 48 89 df 89 ca 48 89 f1 be 01 00 00 00 f7 d2 e8 2f 4c 0a 
>[ 2127.880263] RIP  [<ffffffffa0196edd>] team_nl_fill_options_get_changed+0xc5/0x240 [team]
>[ 2127.880268]  RSP <ffff88012ec3d968>
>[ 2127.880269] CR2: 0000000000000000
>[ 2127.880287] ---[ end trace 3e104c6acd231d26 ]---
>
>Can you provide a detailed report of the testing you have done on the
>team device?  It seems proper testing would have found something like
>this.

I just encountered the same bug now. Goind to investigate this. Did not
happen during my previous testing :( Sorry Andy.

Jirka

>
--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Jiri Pirko Nov. 14, 2011, 9:37 p.m. UTC | #7
Mon, Nov 14, 2011 at 07:17:52PM CET, eric.dumazet@gmail.com wrote:
>Le lundi 14 novembre 2011 à 12:31 -0500, Andy Gospodarek a écrit :
>
>> I'm a bit surprised by this.  Not only is this new function currently
>> difficult to setup (it took me over an hour to go from a a fresh F16
>> install to one that had all the necessary libraries and tools to even
>> configure a team device for the first time), but I was able to cause an
>> Oops with v8 in only a few minutes of testing.
>> 
>> I have no problem with this functionality as an add-on and possibly
>> future replacement to some of what currently exists with bonding, but it
>> seems like what is included in the initial support should should:
>> 
>> 1. Not panic easily.
>> 2. Have userspace bits in place to actually test all the proposed kernel
>> code.  (Jiri admits there is no way to verify the active-backup code).
>> 3. Have some known, published test results.
>> 
>> I hope Jiri will reconsider having a separate team tree for the next few
>> weeks or months until these issues are worked out.  I think the hard
>> work will pay off and it is close to being ready; it just doesn't seem
>> like it is right now.
>> 
>
>Its marked EXPERIMENTAL, so probably some changes are expected before
>production mode :)

Exactly. The primary goal was to get this broader audience so people
use this and find bugs, like you did Andy :)
>
>
>
--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Jiri Pirko Nov. 14, 2011, 9:51 p.m. UTC | #8
Mon, Nov 14, 2011 at 07:40:52PM CET, rick.jones2@hp.com wrote:
>On 11/12/2011 12:16 AM, Jiri Pirko wrote:
>>This patch introduces new network device called team. It supposes to be
>>very fast, simple, userspace-driven alternative to existing bonding
>>driver.
>
>What is the definition of "very" here - relative to bonding I
>presume? Are there actual performance figures available at this
>point?  Something along the lines of test through both on the same
>hardware.  I don't have HW on which to run myself but would be quite
>happy to help with say netperf command selection to demonstrate the
>difference between the two.

Rick, I will provide this.

But the simple fact how bond and team path differ suggests the results.

Stay tuned.

Jirka

>
>happy benchmrking,
>
>rick jones
--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Andy Gospodarek Nov. 15, 2011, 1:56 a.m. UTC | #9
On Mon, Nov 14, 2011 at 10:40:52AM -0800, Rick Jones wrote:
> On 11/12/2011 12:16 AM, Jiri Pirko wrote:
>> This patch introduces new network device called team. It supposes to be
>> very fast, simple, userspace-driven alternative to existing bonding
>> driver.
>
> What is the definition of "very" here - relative to bonding I presume?  
> Are there actual performance figures available at this point?  Something  
> along the lines of test through both on the same hardware.  I don't have  
> HW on which to run myself but would be quite happy to help with say  
> netperf command selection to demonstrate the difference between the two.
>
> happy benchmrking,
>

On most modern systems I suspect there will be little to no difference
between bonding RX peformance and team performance.

If there is any now, I suspect team and bond performance to be similar
by the time team has to account for the corner-cases bonding has already
resolved.  :-)

Benchmarks may prove otherwise, but I've yet to see Jiri produce
anything.  My initial testing doesn't demonstrate any measureable
differences with 1Gbps interfaces on a multi-core, multi-socket system.
 

--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Rick Jones Nov. 15, 2011, 5:22 p.m. UTC | #10
> On most modern systems I suspect there will be little to no difference
> between bonding RX peformance and team performance.
>
> If there is any now, I suspect team and bond performance to be similar
> by the time team has to account for the corner-cases bonding has already
> resolved.  :-)
>
> Benchmarks may prove otherwise, but I've yet to see Jiri produce
> anything.  My initial testing doesn't demonstrate any measureable
> differences with 1Gbps interfaces on a multi-core, multi-socket system.

I wouldn't expect much difference in terms of bandwidth, I was thinking 
the demonstration would be made in the area of service demand (CPU 
consumed per unit work) and perhaps aggregate packets per second.

happy benchmarking,

rick jones
--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Eric Dumazet Nov. 15, 2011, 6:35 p.m. UTC | #11
Le mardi 15 novembre 2011 à 09:22 -0800, Rick Jones a écrit :
> > On most modern systems I suspect there will be little to no difference
> > between bonding RX peformance and team performance.
> >
> > If there is any now, I suspect team and bond performance to be similar
> > by the time team has to account for the corner-cases bonding has already
> > resolved.  :-)
> >
> > Benchmarks may prove otherwise, but I've yet to see Jiri produce
> > anything.  My initial testing doesn't demonstrate any measureable
> > differences with 1Gbps interfaces on a multi-core, multi-socket system.
> 
> I wouldn't expect much difference in terms of bandwidth, I was thinking 
> the demonstration would be made in the area of service demand (CPU 
> consumed per unit work) and perhaps aggregate packets per second.

Well,

bonding is a NETIF_F_LLTX driver, but uses following rwlock in xmit
path :

read_lock(&bond->curr_slave_lock);
...
read_unlock(&bond->curr_slave_lock);

Two atomic operations on a contended cache line.

On a 16 cpu machine, here is some "perf stat" data of such workload :
(each thread doing 10.000.000 atomic_inc(&somesharedvar) )

# perf stat ./atomic 16

 Performance counter stats for './atomic 16':

      48016,104204 task-clock                #   15,566 CPUs utilized          
               555 context-switches          #    0,000 M/sec                  
                15 CPU-migrations            #    0,000 M/sec                  
               175 page-faults               #    0,000 M/sec                  
   121 669 943 013 cycles                    #    2,534 GHz                    
   121 321 455 748 stalled-cycles-frontend   #   99,71% frontend cycles idle   
   103 375 494 290 stalled-cycles-backend    #   84,96% backend  cycles idle   
       611 624 619 instructions              #    0,01  insns per cycle        
                                             #   198,36  stalled cycles per insn
       184 530 032 branches                  #    3,843 M/sec                  
           581 513 branch-misses             #    0,32% of all branches        

       3,084672937 seconds time elapsed

Cost per 'read_lock/read_unlock pair' : at least 616 ns

While on one cpu only :

# perf stat ./atomic 1

 Performance counter stats for './atomic 1':

         83,475050 task-clock                #    0,998 CPUs utilized          
                 3 context-switches          #    0,000 M/sec                  
                 1 CPU-migrations            #    0,000 M/sec                  
               144 page-faults               #    0,002 M/sec                  
       211 508 600 cycles                    #    2,534 GHz                    
       193 502 947 stalled-cycles-frontend   #   91,49% frontend cycles idle   
       124 428 400 stalled-cycles-backend    #   58,83% backend  cycles idle   
        30 870 434 instructions              #    0,15  insns per cycle        
                                             #    6,27  stalled cycles per insn
        10 163 364 branches                  #  121,753 M/sec                  
             9 633 branch-misses             #    0,09% of all branches        

       0,083679928 seconds time elapsed

Cost per 'read_lock/read_unlock pair' : 16 ns


Of course, bonding could be changed to use RCU as well,
if someone feels the need.

But teaming was designed to be RCU ready from the beginning.

exi

--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Jiri Pirko Nov. 16, 2011, 4:30 p.m. UTC | #12
Mon, Nov 14, 2011 at 10:35:12PM CET, jpirko@redhat.com wrote:
>Mon, Nov 14, 2011 at 06:18:40PM CET, andy@greyhouse.net wrote:
>>On Sat, Nov 12, 2011 at 09:16:48AM +0100, Jiri Pirko wrote:
>>> This patch introduces new network device called team. It supposes to be
>>> very fast, simple, userspace-driven alternative to existing bonding
>>> driver.
>>> 
>>> Userspace library called libteam with couple of demo apps is available
>>> here:
>>> https://github.com/jpirko/libteam
>>> Note it's still in its dipers atm.
>>> 
>>> team<->libteam use generic netlink for communication. That and rtnl
>>> suppose to be the only way to configure team device, no sysfs etc.
>>> 
>>> Python binding of libteam was recently introduced.
>>> Daemon providing arpmon/miimon active-backup functionality will be
>>> introduced shortly. All what's necessary is already implemented in
>>> kernel team driver.
>>> 
>>> Signed-off-by: Jiri Pirko <jpirko@redhat.com>
>>> 
>>> v7->v8:
>>> 	- check ndo_ndo_vlan_rx_[add/kill]_vid functions before calling
>>> 	  them.
>>> 	- use dev_kfree_skb_any() instead of dev_kfree_skb()
>>> 
>>> v6->v7:
>>> 	- transmit and receive functions are not checked in hot paths.
>>> 	  That also resolves memory leak on transmit when no port is
>>> 	  present
>>> 
>>> v5->v6:
>>> 	- changed couple of _rcu calls to non _rcu ones in non-readers
>>> 
>>> v4->v5:
>>> 	- team_change_mtu() uses team->lock while travesing though port
>>> 	  list
>>> 	- mac address changes are moved completely to jurisdiction of
>>> 	  userspace daemon. This way the daemon can do FOM1, FOM2 and
>>> 	  possibly other weird things with mac addresses.
>>> 	  Only round-robin mode sets up all ports to bond's address then
>>> 	  enslaved.
>>> 	- Extended Kconfig text
>>> 
>>> v3->v4:
>>> 	- remove redundant synchronize_rcu from __team_change_mode()
>>> 	- revert "set and clear of mode_ops happens per pointer, not per
>>> 	  byte"
>>> 	- extend comment of function __team_change_mode()
>>> 
>>> v2->v3:
>>> 	- team_change_mtu() uses rcu version of list traversal to unwind
>>> 	- set and clear of mode_ops happens per pointer, not per byte
>>> 	- port hashlist changed to be embedded into team structure
>>> 	- error branch in team_port_enter() does cleanup now
>>> 	- fixed rtln->rtnl
>>> 
>>> v1->v2:
>>> 	- modes are made as modules. Makes team more modular and
>>> 	  extendable.
>>> 	- several commenters' nitpicks found on v1 were fixed
>>> 	- several other bugs were fixed.
>>> 	- note I ignored Eric's comment about roundrobin port selector
>>> 	  as Eric's way may be easily implemented as another mode (mode
>>> 	  "random") in future.
>>
>>You better get ready for v9.
>>
>>Running the command:
>>
>># team_manual_control team0 set mode roundrobin
>>
>>on a system with team0 running in roundrobin mode produces this:
>>
>>[ 2127.785321] BUG: unable to handle kernel NULL pointer dereference at           (null)
>>[ 2127.788079] IP: [<ffffffffa0196edd>] team_nl_fill_options_get_changed+0xc5/0x240 [team]
>>[ 2127.790847] PGD 13eecf067 PUD 13f758067 PMD 0 
>>[ 2127.793603] Oops: 0000 [#1] SMP 
>>[ 2127.796352] CPU 7 
>>[ 2127.796370] Modules linked in: team_mode_roundrobin(O) team(O) fcoe libfcoe libfc scsi_transport_fc scsi_tgt 8021q garp stp llc ip6t_REJECT nf_conntrack_ipv6 nf_defrag_ipv6 ip6table_filter ip6_tables xt_state nf_conntrack snd_hda_codec_hdmi snd_hda_codec_realtek snd_hda_intel snd_hda_codec snd_hwdep snd_seq snd_seq_device i2c_i801 joydev microcode shpchp snd_pcm snd_timer snd soundcore snd_page_alloc bnx2 iTCO_wdt iTCO_vendor_support e1000e uinput firewire_ohci firewire_core crc_itu_t i915 drm_kms_helper drm i2c_algo_bit i2c_core video [last unloaded: nf_defrag_ipv4]
>>[ 2127.808223] 
>>[ 2127.811261] Pid: 7085, comm: team_manual_con Tainted: G           O 3.2.0-rc1+ #1 Intel Corporation 2012 Client Platform/LosLunas CRB
>>[ 2127.814421] RIP: 0010:[<ffffffffa0196edd>]  [<ffffffffa0196edd>] team_nl_fill_options_get_changed+0xc5/0x240 [team]
>>[ 2127.817597] RSP: 0018:ffff88012ec3d968  EFLAGS: 00010286
>>[ 2127.820758] RAX: 0000000000000000 RBX: ffff8801397bb600 RCX: ffffffffffffffff
>>[ 2127.823947] RDX: ffff88013f4ba048 RSI: 0000000000000000 RDI: 0000000000000000
>>[ 2127.827154] RBP: ffff88012ec3d9c8 R08: ffff88013f4ba048 R09: 0000000000000004
>>[ 2127.830365] R10: 0000000000001bad R11: 0000000000000000 R12: ffff880143a8b740
>>[ 2127.833599] R13: ffff880143aca7e8 R14: ffff88013f4ba014 R15: ffff88013f4ba048
>>[ 2127.836838] FS:  00007fd65cdc8700(0000) GS:ffff88014e2e0000(0000) knlGS:0000000000000000
>>[ 2127.840102] CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
>>[ 2127.843386] CR2: 0000000000000000 CR3: 0000000128531000 CR4: 00000000001406e0
>>[ 2127.846688] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000
>>[ 2127.849987] DR3: 0000000000000000 DR6: 00000000ffff0ff0 DR7: 0000000000000400
>>[ 2127.853278] Process team_manual_con (pid: 7085, threadinfo ffff88012ec3c000, task ffff88013e842e40)
>>[ 2127.856605] Stack:
>>[ 2127.859898]  0000000000000000 ffff880143a8b7e8 0000000000000000 ffff88013f4ba01c
>>[ 2127.863261]  ffffffffa0019140 0000000500000000 0000000000000000 ffff8801397bb600
>>[ 2127.866623]  ffff88012ec3da58 ffffffffa0197058 ffff880143a8b740 00000000fffffff4
>>[ 2127.869993] Call Trace:
>>[ 2127.873344]  [<ffffffffa0197058>] ? team_nl_fill_options_get_changed+0x240/0x240 [team]
>>[ 2127.876750]  [<ffffffffa0197078>] team_nl_fill_options_get+0x20/0x22 [team]
>>[ 2127.880152]  [<ffffffffa019763c>] team_nl_send_generic+0x41/0x85 [team]
>>[ 2127.880156]  [<ffffffffa01976f5>] team_nl_cmd_options_get+0x36/0x3f [team]
>>[ 2127.880162]  [<ffffffff813fbdce>] genl_rcv_msg+0x1d8/0x203
>>[ 2127.880165]  [<ffffffff813fbbf6>] ? genl_rcv+0x2d/0x2d
>>[ 2127.880169]  [<ffffffff813fb7b2>] netlink_rcv_skb+0x42/0x8d
>>[ 2127.880172]  [<ffffffff813fbbef>] genl_rcv+0x26/0x2d
>>[ 2127.880174]  [<ffffffff813fb341>] netlink_unicast+0xec/0x156
>>[ 2127.880178]  [<ffffffff813fb5a6>] netlink_sendmsg+0x1fb/0x233
>>[ 2127.880182]  [<ffffffff813ca577>] sock_sendmsg+0xe6/0x109
>>[ 2127.880188]  [<ffffffff81120c76>] ? __mem_cgroup_commit_charge+0x9d/0xa9
>>[ 2127.880192]  [<ffffffff8112333b>] ? mem_cgroup_charge_common+0xb1/0xc3
>>[ 2127.880197]  [<ffffffff81043ff5>] ? should_resched+0xe/0x2d
>>[ 2127.880203]  [<ffffffff814b6208>] ? _cond_resched+0xe/0x22
>>[ 2127.880206]  [<ffffffff81043ff5>] ? should_resched+0xe/0x2d
>>[ 2127.880209]  [<ffffffff813d4433>] ? copy_from_user+0x2f/0x31
>>[ 2127.880212]  [<ffffffff813d481e>] ? verify_iovec+0x52/0xa4
>>[ 2127.880215]  [<ffffffff813ca85c>] __sys_sendmsg+0x213/0x2ba
>>[ 2127.880220]  [<ffffffff810fb1eb>] ? handle_mm_fault+0x1c8/0x1db
>>[ 2127.880224]  [<ffffffff814bab67>] ? do_page_fault+0x30c/0x37e
>>[ 2127.880228]  [<ffffffff814b7914>] ? _raw_spin_unlock_irqrestore+0x17/0x19
>>[ 2127.880232]  [<ffffffff81045079>] ? __wake_up+0x44/0x4d
>>[ 2127.880235]  [<ffffffff813cc459>] sys_sendmsg+0x42/0x60
>>[ 2127.880239]  [<ffffffff814be042>] system_call_fastpath+0x16/0x1b
>>[ 2127.880241] Code: e9 24 01 00 00 be 01 00 00 00 48 89 df e8 aa f3 ff ff 48 85 c0 49 89 c7 0f 84 4b 01 00 00 49 8b 75 10 31 c0 48 83 c9 ff 48 89 f7 <f2> ae 48 89 df 89 ca 48 89 f1 be 01 00 00 00 f7 d2 e8 2f 4c 0a 
>>[ 2127.880263] RIP  [<ffffffffa0196edd>] team_nl_fill_options_get_changed+0xc5/0x240 [team]
>>[ 2127.880268]  RSP <ffff88012ec3d968>
>>[ 2127.880269] CR2: 0000000000000000
>>[ 2127.880287] ---[ end trace 3e104c6acd231d26 ]---
>>
>>Can you provide a detailed report of the testing you have done on the
>>team device?  It seems proper testing would have found something like
>>this.
>
>I just encountered the same bug now. Goind to investigate this. Did not
>happen during my previous testing :( Sorry Andy.

I believe I found the problem. I'm going to do some more testing, then
I'll post the patch.


>
>Jirka
>
>>
--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
=?ISO-8859-2?Q?Micha=B3_Miros=B3aw?= Nov. 16, 2011, 11:01 p.m. UTC | #13
2011/11/12 Jiri Pirko <jpirko@redhat.com>:
> This patch introduces new network device called team. It supposes to be
> very fast, simple, userspace-driven alternative to existing bonding
> driver.
[...]
> +#define TEAM_VLAN_FEATURES (NETIF_F_ALL_CSUM | NETIF_F_SG | \
> +                           NETIF_F_FRAGLIST | NETIF_F_ALL_TSO | \
> +                           NETIF_F_HIGHDMA | NETIF_F_LRO)
> +
> +static void __team_compute_features(struct team *team)
> +{
> +       struct team_port *port;
> +       u32 vlan_features = TEAM_VLAN_FEATURES;
> +       unsigned short max_hard_header_len = ETH_HLEN;
> +
> +       list_for_each_entry(port, &team->port_list, list) {
> +               vlan_features = netdev_increment_features(vlan_features,
> +                                       port->dev->vlan_features,
> +                                       TEAM_VLAN_FEATURES);
> +
> +               if (port->dev->hard_header_len > max_hard_header_len)
> +                       max_hard_header_len = port->dev->hard_header_len;
> +       }
> +
> +       team->dev->vlan_features = vlan_features;
> +       team->dev->hard_header_len = max_hard_header_len;
> +
> +       netdev_change_features(team->dev);
> +}
> +
> +static void team_compute_features(struct team *team)
> +{
> +       spin_lock(&team->lock);
> +       __team_compute_features(team);
> +       spin_unlock(&team->lock);
> +}
[...]

I just noticed that you missed ndo_fix_features in the driver. If you
look at bonding driver it should be easy to see how it should work.

Best Regards,
Michał Mirosław
--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
diff mbox

Patch

diff --git a/Documentation/networking/team.txt b/Documentation/networking/team.txt
new file mode 100644
index 0000000..5a01368
--- /dev/null
+++ b/Documentation/networking/team.txt
@@ -0,0 +1,2 @@ 
+Team devices are driven from userspace via libteam library which is here:
+	https://github.com/jpirko/libteam
diff --git a/MAINTAINERS b/MAINTAINERS
index 4808256..8d94169 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -6484,6 +6484,13 @@  W:	http://tcp-lp-mod.sourceforge.net/
 S:	Maintained
 F:	net/ipv4/tcp_lp.c
 
+TEAM DRIVER
+M:	Jiri Pirko <jpirko@redhat.com>
+L:	netdev@vger.kernel.org
+S:	Supported
+F:	drivers/net/team/
+F:	include/linux/if_team.h
+
 TEGRA SUPPORT
 M:	Colin Cross <ccross@android.com>
 M:	Olof Johansson <olof@lixom.net>
diff --git a/drivers/net/Kconfig b/drivers/net/Kconfig
index 583f66c..b3020be 100644
--- a/drivers/net/Kconfig
+++ b/drivers/net/Kconfig
@@ -125,6 +125,8 @@  config IFB
 	  'ifb1' etc.
 	  Look at the iproute2 documentation directory for usage etc
 
+source "drivers/net/team/Kconfig"
+
 config MACVLAN
 	tristate "MAC-VLAN support (EXPERIMENTAL)"
 	depends on EXPERIMENTAL
diff --git a/drivers/net/Makefile b/drivers/net/Makefile
index fa877cd..4e4ebfe 100644
--- a/drivers/net/Makefile
+++ b/drivers/net/Makefile
@@ -17,6 +17,7 @@  obj-$(CONFIG_NET) += Space.o loopback.o
 obj-$(CONFIG_NETCONSOLE) += netconsole.o
 obj-$(CONFIG_PHYLIB) += phy/
 obj-$(CONFIG_RIONET) += rionet.o
+obj-$(CONFIG_NET_TEAM) += team/
 obj-$(CONFIG_TUN) += tun.o
 obj-$(CONFIG_VETH) += veth.o
 obj-$(CONFIG_VIRTIO_NET) += virtio_net.o
diff --git a/drivers/net/team/Kconfig b/drivers/net/team/Kconfig
new file mode 100644
index 0000000..248a144
--- /dev/null
+++ b/drivers/net/team/Kconfig
@@ -0,0 +1,43 @@ 
+menuconfig NET_TEAM
+	tristate "Ethernet team driver support (EXPERIMENTAL)"
+	depends on EXPERIMENTAL
+	---help---
+	  This allows one to create virtual interfaces that teams together
+	  multiple ethernet devices.
+
+	  Team devices can be added using the "ip" command from the
+	  iproute2 package:
+
+	  "ip link add link [ address MAC ] [ NAME ] type team"
+
+	  To compile this driver as a module, choose M here: the module
+	  will be called team.
+
+if NET_TEAM
+
+config NET_TEAM_MODE_ROUNDROBIN
+	tristate "Round-robin mode support"
+	depends on NET_TEAM
+	---help---
+	  Basic mode where port used for transmitting packets is selected in
+	  round-robin fashion using packet counter.
+
+	  All added ports are setup to have bond's mac address.
+
+	  To compile this team mode as a module, choose M here: the module
+	  will be called team_mode_roundrobin.
+
+config NET_TEAM_MODE_ACTIVEBACKUP
+	tristate "Active-backup mode support"
+	depends on NET_TEAM
+	---help---
+	  Only one port is active at a time and the rest of ports are used
+	  for backup.
+
+	  Mac addresses of ports are not modified. Userspace is responsible
+	  to do so.
+
+	  To compile this team mode as a module, choose M here: the module
+	  will be called team_mode_activebackup.
+
+endif # NET_TEAM
diff --git a/drivers/net/team/Makefile b/drivers/net/team/Makefile
new file mode 100644
index 0000000..85f2028
--- /dev/null
+++ b/drivers/net/team/Makefile
@@ -0,0 +1,7 @@ 
+#
+# Makefile for the network team driver
+#
+
+obj-$(CONFIG_NET_TEAM) += team.o
+obj-$(CONFIG_NET_TEAM_MODE_ROUNDROBIN) += team_mode_roundrobin.o
+obj-$(CONFIG_NET_TEAM_MODE_ACTIVEBACKUP) += team_mode_activebackup.o
diff --git a/drivers/net/team/team.c b/drivers/net/team/team.c
new file mode 100644
index 0000000..60672bb
--- /dev/null
+++ b/drivers/net/team/team.c
@@ -0,0 +1,1583 @@ 
+/*
+ * net/drivers/team/team.c - Network team device driver
+ * Copyright (c) 2011 Jiri Pirko <jpirko@redhat.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ */
+
+#include <linux/kernel.h>
+#include <linux/types.h>
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/slab.h>
+#include <linux/rcupdate.h>
+#include <linux/errno.h>
+#include <linux/ctype.h>
+#include <linux/notifier.h>
+#include <linux/netdevice.h>
+#include <linux/if_arp.h>
+#include <linux/socket.h>
+#include <linux/etherdevice.h>
+#include <linux/rtnetlink.h>
+#include <net/rtnetlink.h>
+#include <net/genetlink.h>
+#include <net/netlink.h>
+#include <linux/if_team.h>
+
+#define DRV_NAME "team"
+
+
+/**********
+ * Helpers
+ **********/
+
+#define team_port_exists(dev) (dev->priv_flags & IFF_TEAM_PORT)
+
+static struct team_port *team_port_get_rcu(const struct net_device *dev)
+{
+	struct team_port *port = rcu_dereference(dev->rx_handler_data);
+
+	return team_port_exists(dev) ? port : NULL;
+}
+
+static struct team_port *team_port_get_rtnl(const struct net_device *dev)
+{
+	struct team_port *port = rtnl_dereference(dev->rx_handler_data);
+
+	return team_port_exists(dev) ? port : NULL;
+}
+
+/*
+ * Since the ability to change mac address for open port device is tested in
+ * team_port_add, this function can be called without control of return value
+ */
+static int __set_port_mac(struct net_device *port_dev,
+			  const unsigned char *dev_addr)
+{
+	struct sockaddr addr;
+
+	memcpy(addr.sa_data, dev_addr, ETH_ALEN);
+	addr.sa_family = ARPHRD_ETHER;
+	return dev_set_mac_address(port_dev, &addr);
+}
+
+int team_port_set_orig_mac(struct team_port *port)
+{
+	return __set_port_mac(port->dev, port->orig.dev_addr);
+}
+
+int team_port_set_team_mac(struct team_port *port)
+{
+	return __set_port_mac(port->dev, port->team->dev->dev_addr);
+}
+EXPORT_SYMBOL(team_port_set_team_mac);
+
+
+/*******************
+ * Options handling
+ *******************/
+
+void team_options_register(struct team *team, struct team_option *option,
+			   size_t option_count)
+{
+	int i;
+
+	for (i = 0; i < option_count; i++, option++)
+		list_add_tail(&option->list, &team->option_list);
+}
+EXPORT_SYMBOL(team_options_register);
+
+static void __team_options_change_check(struct team *team,
+					struct team_option *changed_option);
+
+static void __team_options_unregister(struct team *team,
+				      struct team_option *option,
+				      size_t option_count)
+{
+	int i;
+
+	for (i = 0; i < option_count; i++, option++)
+		list_del(&option->list);
+}
+
+void team_options_unregister(struct team *team, struct team_option *option,
+			     size_t option_count)
+{
+	__team_options_unregister(team, option, option_count);
+	__team_options_change_check(team, NULL);
+}
+EXPORT_SYMBOL(team_options_unregister);
+
+static int team_option_get(struct team *team, struct team_option *option,
+			   void *arg)
+{
+	return option->getter(team, arg);
+}
+
+static int team_option_set(struct team *team, struct team_option *option,
+			   void *arg)
+{
+	int err;
+
+	err = option->setter(team, arg);
+	if (err)
+		return err;
+
+	__team_options_change_check(team, option);
+	return err;
+}
+
+/****************
+ * Mode handling
+ ****************/
+
+static LIST_HEAD(mode_list);
+static DEFINE_SPINLOCK(mode_list_lock);
+
+static struct team_mode *__find_mode(const char *kind)
+{
+	struct team_mode *mode;
+
+	list_for_each_entry(mode, &mode_list, list) {
+		if (strcmp(mode->kind, kind) == 0)
+			return mode;
+	}
+	return NULL;
+}
+
+static bool is_good_mode_name(const char *name)
+{
+	while (*name != '\0') {
+		if (!isalpha(*name) && !isdigit(*name) && *name != '_')
+			return false;
+		name++;
+	}
+	return true;
+}
+
+int team_mode_register(struct team_mode *mode)
+{
+	int err = 0;
+
+	if (!is_good_mode_name(mode->kind) ||
+	    mode->priv_size > TEAM_MODE_PRIV_SIZE)
+		return -EINVAL;
+	spin_lock(&mode_list_lock);
+	if (__find_mode(mode->kind)) {
+		err = -EEXIST;
+		goto unlock;
+	}
+	list_add_tail(&mode->list, &mode_list);
+unlock:
+	spin_unlock(&mode_list_lock);
+	return err;
+}
+EXPORT_SYMBOL(team_mode_register);
+
+int team_mode_unregister(struct team_mode *mode)
+{
+	spin_lock(&mode_list_lock);
+	list_del_init(&mode->list);
+	spin_unlock(&mode_list_lock);
+	return 0;
+}
+EXPORT_SYMBOL(team_mode_unregister);
+
+static struct team_mode *team_mode_get(const char *kind)
+{
+	struct team_mode *mode;
+
+	spin_lock(&mode_list_lock);
+	mode = __find_mode(kind);
+	if (!mode) {
+		spin_unlock(&mode_list_lock);
+		request_module("team-mode-%s", kind);
+		spin_lock(&mode_list_lock);
+		mode = __find_mode(kind);
+	}
+	if (mode)
+		if (!try_module_get(mode->owner))
+			mode = NULL;
+
+	spin_unlock(&mode_list_lock);
+	return mode;
+}
+
+static void team_mode_put(const struct team_mode *mode)
+{
+	module_put(mode->owner);
+}
+
+static bool team_dummy_transmit(struct team *team, struct sk_buff *skb)
+{
+	dev_kfree_skb_any(skb);
+	return false;
+}
+
+rx_handler_result_t team_dummy_receive(struct team *team,
+				       struct team_port *port,
+				       struct sk_buff *skb)
+{
+	return RX_HANDLER_ANOTHER;
+}
+
+static void team_adjust_ops(struct team *team)
+{
+	/*
+	 * To avoid checks in rx/tx skb paths, ensure here that non-null and
+	 * correct ops are always set.
+	 */
+
+	if (list_empty(&team->port_list) ||
+	    !team->mode || !team->mode->ops->transmit)
+		team->ops.transmit = team_dummy_transmit;
+	else
+		team->ops.transmit = team->mode->ops->transmit;
+
+	if (list_empty(&team->port_list) ||
+	    !team->mode || !team->mode->ops->receive)
+		team->ops.receive = team_dummy_receive;
+	else
+		team->ops.receive = team->mode->ops->receive;
+}
+
+/*
+ * We can benefit from the fact that it's ensured no port is present
+ * at the time of mode change. Therefore no packets are in fly so there's no
+ * need to set mode operations in any special way.
+ */
+static int __team_change_mode(struct team *team,
+			      const struct team_mode *new_mode)
+{
+	/* Check if mode was previously set and do cleanup if so */
+	if (team->mode) {
+		void (*exit_op)(struct team *team) = team->ops.exit;
+
+		/* Clear ops area so no callback is called any longer */
+		memset(&team->ops, 0, sizeof(struct team_mode_ops));
+		team_adjust_ops(team);
+
+		if (exit_op)
+			exit_op(team);
+		team_mode_put(team->mode);
+		team->mode = NULL;
+		/* zero private data area */
+		memset(&team->mode_priv, 0,
+		       sizeof(struct team) - offsetof(struct team, mode_priv));
+	}
+
+	if (!new_mode)
+		return 0;
+
+	if (new_mode->ops->init) {
+		int err;
+
+		err = new_mode->ops->init(team);
+		if (err)
+			return err;
+	}
+
+	team->mode = new_mode;
+	memcpy(&team->ops, new_mode->ops, sizeof(struct team_mode_ops));
+	team_adjust_ops(team);
+
+	return 0;
+}
+
+static int team_change_mode(struct team *team, const char *kind)
+{
+	struct team_mode *new_mode;
+	struct net_device *dev = team->dev;
+	int err;
+
+	if (!list_empty(&team->port_list)) {
+		netdev_err(dev, "No ports can be present during mode change\n");
+		return -EBUSY;
+	}
+
+	if (team->mode && strcmp(team->mode->kind, kind) == 0) {
+		netdev_err(dev, "Unable to change to the same mode the team is in\n");
+		return -EINVAL;
+	}
+
+	new_mode = team_mode_get(kind);
+	if (!new_mode) {
+		netdev_err(dev, "Mode \"%s\" not found\n", kind);
+		return -EINVAL;
+	}
+
+	err = __team_change_mode(team, new_mode);
+	if (err) {
+		netdev_err(dev, "Failed to change to mode \"%s\"\n", kind);
+		team_mode_put(new_mode);
+		return err;
+	}
+
+	netdev_info(dev, "Mode changed to \"%s\"\n", kind);
+	return 0;
+}
+
+
+/************************
+ * Rx path frame handler
+ ************************/
+
+/* note: already called with rcu_read_lock */
+static rx_handler_result_t team_handle_frame(struct sk_buff **pskb)
+{
+	struct sk_buff *skb = *pskb;
+	struct team_port *port;
+	struct team *team;
+	rx_handler_result_t res;
+
+	skb = skb_share_check(skb, GFP_ATOMIC);
+	if (!skb)
+		return RX_HANDLER_CONSUMED;
+
+	*pskb = skb;
+
+	port = team_port_get_rcu(skb->dev);
+	team = port->team;
+
+	res = team->ops.receive(team, port, skb);
+	if (res == RX_HANDLER_ANOTHER) {
+		struct team_pcpu_stats *pcpu_stats;
+
+		pcpu_stats = this_cpu_ptr(team->pcpu_stats);
+		u64_stats_update_begin(&pcpu_stats->syncp);
+		pcpu_stats->rx_packets++;
+		pcpu_stats->rx_bytes += skb->len;
+		if (skb->pkt_type == PACKET_MULTICAST)
+			pcpu_stats->rx_multicast++;
+		u64_stats_update_end(&pcpu_stats->syncp);
+
+		skb->dev = team->dev;
+	} else {
+		this_cpu_inc(team->pcpu_stats->rx_dropped);
+	}
+
+	return res;
+}
+
+
+/****************
+ * Port handling
+ ****************/
+
+static bool team_port_find(const struct team *team,
+			   const struct team_port *port)
+{
+	struct team_port *cur;
+
+	list_for_each_entry(cur, &team->port_list, list)
+		if (cur == port)
+			return true;
+	return false;
+}
+
+/*
+ * Add/delete port to the team port list. Write guarded by rtnl_lock.
+ * Takes care of correct port->index setup (might be racy).
+ */
+static void team_port_list_add_port(struct team *team,
+				    struct team_port *port)
+{
+	port->index = team->port_count++;
+	hlist_add_head_rcu(&port->hlist,
+			   team_port_index_hash(team, port->index));
+	list_add_tail_rcu(&port->list, &team->port_list);
+}
+
+static void __reconstruct_port_hlist(struct team *team, int rm_index)
+{
+	int i;
+	struct team_port *port;
+
+	for (i = rm_index + 1; i < team->port_count; i++) {
+		port = team_get_port_by_index(team, i);
+		hlist_del_rcu(&port->hlist);
+		port->index--;
+		hlist_add_head_rcu(&port->hlist,
+				   team_port_index_hash(team, port->index));
+	}
+}
+
+static void team_port_list_del_port(struct team *team,
+				   struct team_port *port)
+{
+	int rm_index = port->index;
+
+	hlist_del_rcu(&port->hlist);
+	list_del_rcu(&port->list);
+	__reconstruct_port_hlist(team, rm_index);
+	team->port_count--;
+}
+
+#define TEAM_VLAN_FEATURES (NETIF_F_ALL_CSUM | NETIF_F_SG | \
+			    NETIF_F_FRAGLIST | NETIF_F_ALL_TSO | \
+			    NETIF_F_HIGHDMA | NETIF_F_LRO)
+
+static void __team_compute_features(struct team *team)
+{
+	struct team_port *port;
+	u32 vlan_features = TEAM_VLAN_FEATURES;
+	unsigned short max_hard_header_len = ETH_HLEN;
+
+	list_for_each_entry(port, &team->port_list, list) {
+		vlan_features = netdev_increment_features(vlan_features,
+					port->dev->vlan_features,
+					TEAM_VLAN_FEATURES);
+
+		if (port->dev->hard_header_len > max_hard_header_len)
+			max_hard_header_len = port->dev->hard_header_len;
+	}
+
+	team->dev->vlan_features = vlan_features;
+	team->dev->hard_header_len = max_hard_header_len;
+
+	netdev_change_features(team->dev);
+}
+
+static void team_compute_features(struct team *team)
+{
+	spin_lock(&team->lock);
+	__team_compute_features(team);
+	spin_unlock(&team->lock);
+}
+
+static int team_port_enter(struct team *team, struct team_port *port)
+{
+	int err = 0;
+
+	dev_hold(team->dev);
+	port->dev->priv_flags |= IFF_TEAM_PORT;
+	if (team->ops.port_enter) {
+		err = team->ops.port_enter(team, port);
+		if (err) {
+			netdev_err(team->dev, "Device %s failed to enter team mode\n",
+				   port->dev->name);
+			goto err_port_enter;
+		}
+	}
+
+	return 0;
+
+err_port_enter:
+	port->dev->priv_flags &= ~IFF_TEAM_PORT;
+	dev_put(team->dev);
+
+	return err;
+}
+
+static void team_port_leave(struct team *team, struct team_port *port)
+{
+	if (team->ops.port_leave)
+		team->ops.port_leave(team, port);
+	port->dev->priv_flags &= ~IFF_TEAM_PORT;
+	dev_put(team->dev);
+}
+
+static void __team_port_change_check(struct team_port *port, bool linkup);
+
+static int team_port_add(struct team *team, struct net_device *port_dev)
+{
+	struct net_device *dev = team->dev;
+	struct team_port *port;
+	char *portname = port_dev->name;
+	int err;
+
+	if (port_dev->flags & IFF_LOOPBACK ||
+	    port_dev->type != ARPHRD_ETHER) {
+		netdev_err(dev, "Device %s is of an unsupported type\n",
+			   portname);
+		return -EINVAL;
+	}
+
+	if (team_port_exists(port_dev)) {
+		netdev_err(dev, "Device %s is already a port "
+				"of a team device\n", portname);
+		return -EBUSY;
+	}
+
+	if (port_dev->flags & IFF_UP) {
+		netdev_err(dev, "Device %s is up. Set it down before adding it as a team port\n",
+			   portname);
+		return -EBUSY;
+	}
+
+	port = kzalloc(sizeof(struct team_port), GFP_KERNEL);
+	if (!port)
+		return -ENOMEM;
+
+	port->dev = port_dev;
+	port->team = team;
+
+	port->orig.mtu = port_dev->mtu;
+	err = dev_set_mtu(port_dev, dev->mtu);
+	if (err) {
+		netdev_dbg(dev, "Error %d calling dev_set_mtu\n", err);
+		goto err_set_mtu;
+	}
+
+	memcpy(port->orig.dev_addr, port_dev->dev_addr, ETH_ALEN);
+
+	err = team_port_enter(team, port);
+	if (err) {
+		netdev_err(dev, "Device %s failed to enter team mode\n",
+			   portname);
+		goto err_port_enter;
+	}
+
+	err = dev_open(port_dev);
+	if (err) {
+		netdev_dbg(dev, "Device %s opening failed\n",
+			   portname);
+		goto err_dev_open;
+	}
+
+	err = netdev_set_master(port_dev, dev);
+	if (err) {
+		netdev_err(dev, "Device %s failed to set master\n", portname);
+		goto err_set_master;
+	}
+
+	err = netdev_rx_handler_register(port_dev, team_handle_frame,
+					 port);
+	if (err) {
+		netdev_err(dev, "Device %s failed to register rx_handler\n",
+			   portname);
+		goto err_handler_register;
+	}
+
+	team_port_list_add_port(team, port);
+	team_adjust_ops(team);
+	__team_compute_features(team);
+	__team_port_change_check(port, !!netif_carrier_ok(port_dev));
+
+	netdev_info(dev, "Port device %s added\n", portname);
+
+	return 0;
+
+err_handler_register:
+	netdev_set_master(port_dev, NULL);
+
+err_set_master:
+	dev_close(port_dev);
+
+err_dev_open:
+	team_port_leave(team, port);
+	team_port_set_orig_mac(port);
+
+err_port_enter:
+	dev_set_mtu(port_dev, port->orig.mtu);
+
+err_set_mtu:
+	kfree(port);
+
+	return err;
+}
+
+static int team_port_del(struct team *team, struct net_device *port_dev)
+{
+	struct net_device *dev = team->dev;
+	struct team_port *port;
+	char *portname = port_dev->name;
+
+	port = team_port_get_rtnl(port_dev);
+	if (!port || !team_port_find(team, port)) {
+		netdev_err(dev, "Device %s does not act as a port of this team\n",
+			   portname);
+		return -ENOENT;
+	}
+
+	__team_port_change_check(port, false);
+	team_port_list_del_port(team, port);
+	team_adjust_ops(team);
+	netdev_rx_handler_unregister(port_dev);
+	netdev_set_master(port_dev, NULL);
+	dev_close(port_dev);
+	team_port_leave(team, port);
+	team_port_set_orig_mac(port);
+	dev_set_mtu(port_dev, port->orig.mtu);
+	synchronize_rcu();
+	kfree(port);
+	netdev_info(dev, "Port device %s removed\n", portname);
+	__team_compute_features(team);
+
+	return 0;
+}
+
+
+/*****************
+ * Net device ops
+ *****************/
+
+static const char team_no_mode_kind[] = "*NOMODE*";
+
+static int team_mode_option_get(struct team *team, void *arg)
+{
+	const char **str = arg;
+
+	*str = team->mode ? team->mode->kind : team_no_mode_kind;
+	return 0;
+}
+
+static int team_mode_option_set(struct team *team, void *arg)
+{
+	const char **str = arg;
+
+	return team_change_mode(team, *str);
+}
+
+static struct team_option team_options[] = {
+	{
+		.name = "mode",
+		.type = TEAM_OPTION_TYPE_STRING,
+		.getter = team_mode_option_get,
+		.setter = team_mode_option_set,
+	},
+};
+
+static int team_init(struct net_device *dev)
+{
+	struct team *team = netdev_priv(dev);
+	int i;
+
+	team->dev = dev;
+	spin_lock_init(&team->lock);
+
+	team->pcpu_stats = alloc_percpu(struct team_pcpu_stats);
+	if (!team->pcpu_stats)
+		return -ENOMEM;
+
+	for (i = 0; i < TEAM_PORT_HASHENTRIES; i++)
+		INIT_HLIST_HEAD(&team->port_hlist[i]);
+	INIT_LIST_HEAD(&team->port_list);
+
+	team_adjust_ops(team);
+
+	INIT_LIST_HEAD(&team->option_list);
+	team_options_register(team, team_options, ARRAY_SIZE(team_options));
+	netif_carrier_off(dev);
+
+	return 0;
+}
+
+static void team_uninit(struct net_device *dev)
+{
+	struct team *team = netdev_priv(dev);
+	struct team_port *port;
+	struct team_port *tmp;
+
+	spin_lock(&team->lock);
+	list_for_each_entry_safe(port, tmp, &team->port_list, list)
+		team_port_del(team, port->dev);
+
+	__team_change_mode(team, NULL); /* cleanup */
+	__team_options_unregister(team, team_options, ARRAY_SIZE(team_options));
+	spin_unlock(&team->lock);
+}
+
+static void team_destructor(struct net_device *dev)
+{
+	struct team *team = netdev_priv(dev);
+
+	free_percpu(team->pcpu_stats);
+	free_netdev(dev);
+}
+
+static int team_open(struct net_device *dev)
+{
+	netif_carrier_on(dev);
+	return 0;
+}
+
+static int team_close(struct net_device *dev)
+{
+	netif_carrier_off(dev);
+	return 0;
+}
+
+/*
+ * note: already called with rcu_read_lock
+ */
+static netdev_tx_t team_xmit(struct sk_buff *skb, struct net_device *dev)
+{
+	struct team *team = netdev_priv(dev);
+	bool tx_success = false;
+	unsigned int len = skb->len;
+
+	tx_success = team->ops.transmit(team, skb);
+	if (tx_success) {
+		struct team_pcpu_stats *pcpu_stats;
+
+		pcpu_stats = this_cpu_ptr(team->pcpu_stats);
+		u64_stats_update_begin(&pcpu_stats->syncp);
+		pcpu_stats->tx_packets++;
+		pcpu_stats->tx_bytes += len;
+		u64_stats_update_end(&pcpu_stats->syncp);
+	} else {
+		this_cpu_inc(team->pcpu_stats->tx_dropped);
+	}
+
+	return NETDEV_TX_OK;
+}
+
+static void team_change_rx_flags(struct net_device *dev, int change)
+{
+	struct team *team = netdev_priv(dev);
+	struct team_port *port;
+	int inc;
+
+	rcu_read_lock();
+	list_for_each_entry_rcu(port, &team->port_list, list) {
+		if (change & IFF_PROMISC) {
+			inc = dev->flags & IFF_PROMISC ? 1 : -1;
+			dev_set_promiscuity(port->dev, inc);
+		}
+		if (change & IFF_ALLMULTI) {
+			inc = dev->flags & IFF_ALLMULTI ? 1 : -1;
+			dev_set_allmulti(port->dev, inc);
+		}
+	}
+	rcu_read_unlock();
+}
+
+static void team_set_rx_mode(struct net_device *dev)
+{
+	struct team *team = netdev_priv(dev);
+	struct team_port *port;
+
+	rcu_read_lock();
+	list_for_each_entry_rcu(port, &team->port_list, list) {
+		dev_uc_sync(port->dev, dev);
+		dev_mc_sync(port->dev, dev);
+	}
+	rcu_read_unlock();
+}
+
+static int team_set_mac_address(struct net_device *dev, void *p)
+{
+	struct team *team = netdev_priv(dev);
+	struct team_port *port;
+	struct sockaddr *addr = p;
+
+	memcpy(dev->dev_addr, addr->sa_data, ETH_ALEN);
+	rcu_read_lock();
+	list_for_each_entry_rcu(port, &team->port_list, list)
+		if (team->ops.port_change_mac)
+			team->ops.port_change_mac(team, port);
+	rcu_read_unlock();
+	return 0;
+}
+
+static int team_change_mtu(struct net_device *dev, int new_mtu)
+{
+	struct team *team = netdev_priv(dev);
+	struct team_port *port;
+	int err;
+
+	/*
+	 * Alhough this is reader, it's guarded by team lock. It's not possible
+	 * to traverse list in reverse under rcu_read_lock
+	 */
+	spin_lock(&team->lock);
+	list_for_each_entry(port, &team->port_list, list) {
+		err = dev_set_mtu(port->dev, new_mtu);
+		if (err) {
+			netdev_err(dev, "Device %s failed to change mtu",
+				   port->dev->name);
+			goto unwind;
+		}
+	}
+	spin_unlock(&team->lock);
+
+	dev->mtu = new_mtu;
+
+	return 0;
+
+unwind:
+	list_for_each_entry_continue_reverse(port, &team->port_list, list)
+		dev_set_mtu(port->dev, dev->mtu);
+	spin_unlock(&team->lock);
+
+	return err;
+}
+
+static struct rtnl_link_stats64 *
+team_get_stats64(struct net_device *dev, struct rtnl_link_stats64 *stats)
+{
+	struct team *team = netdev_priv(dev);
+	struct team_pcpu_stats *p;
+	u64 rx_packets, rx_bytes, rx_multicast, tx_packets, tx_bytes;
+	u32 rx_dropped = 0, tx_dropped = 0;
+	unsigned int start;
+	int i;
+
+	for_each_possible_cpu(i) {
+		p = per_cpu_ptr(team->pcpu_stats, i);
+		do {
+			start = u64_stats_fetch_begin_bh(&p->syncp);
+			rx_packets	= p->rx_packets;
+			rx_bytes	= p->rx_bytes;
+			rx_multicast	= p->rx_multicast;
+			tx_packets	= p->tx_packets;
+			tx_bytes	= p->tx_bytes;
+		} while (u64_stats_fetch_retry_bh(&p->syncp, start));
+
+		stats->rx_packets	+= rx_packets;
+		stats->rx_bytes		+= rx_bytes;
+		stats->multicast	+= rx_multicast;
+		stats->tx_packets	+= tx_packets;
+		stats->tx_bytes		+= tx_bytes;
+		/*
+		 * rx_dropped & tx_dropped are u32, updated
+		 * without syncp protection.
+		 */
+		rx_dropped	+= p->rx_dropped;
+		tx_dropped	+= p->tx_dropped;
+	}
+	stats->rx_dropped	= rx_dropped;
+	stats->tx_dropped	= tx_dropped;
+	return stats;
+}
+
+static void team_vlan_rx_add_vid(struct net_device *dev, uint16_t vid)
+{
+	struct team *team = netdev_priv(dev);
+	struct team_port *port;
+
+	rcu_read_lock();
+	list_for_each_entry_rcu(port, &team->port_list, list) {
+		const struct net_device_ops *ops = port->dev->netdev_ops;
+
+		if (ops->ndo_vlan_rx_add_vid)
+			ops->ndo_vlan_rx_add_vid(port->dev, vid);
+	}
+	rcu_read_unlock();
+}
+
+static void team_vlan_rx_kill_vid(struct net_device *dev, uint16_t vid)
+{
+	struct team *team = netdev_priv(dev);
+	struct team_port *port;
+
+	rcu_read_lock();
+	list_for_each_entry_rcu(port, &team->port_list, list) {
+		const struct net_device_ops *ops = port->dev->netdev_ops;
+
+		if (ops->ndo_vlan_rx_kill_vid)
+			ops->ndo_vlan_rx_kill_vid(port->dev, vid);
+	}
+	rcu_read_unlock();
+}
+
+static int team_add_slave(struct net_device *dev, struct net_device *port_dev)
+{
+	struct team *team = netdev_priv(dev);
+	int err;
+
+	spin_lock(&team->lock);
+	err = team_port_add(team, port_dev);
+	spin_unlock(&team->lock);
+	return err;
+}
+
+static int team_del_slave(struct net_device *dev, struct net_device *port_dev)
+{
+	struct team *team = netdev_priv(dev);
+	int err;
+
+	spin_lock(&team->lock);
+	err = team_port_del(team, port_dev);
+	spin_unlock(&team->lock);
+	return err;
+}
+
+static const struct net_device_ops team_netdev_ops = {
+	.ndo_init		= team_init,
+	.ndo_uninit		= team_uninit,
+	.ndo_open		= team_open,
+	.ndo_stop		= team_close,
+	.ndo_start_xmit		= team_xmit,
+	.ndo_change_rx_flags	= team_change_rx_flags,
+	.ndo_set_rx_mode	= team_set_rx_mode,
+	.ndo_set_mac_address	= team_set_mac_address,
+	.ndo_change_mtu		= team_change_mtu,
+	.ndo_get_stats64	= team_get_stats64,
+	.ndo_vlan_rx_add_vid	= team_vlan_rx_add_vid,
+	.ndo_vlan_rx_kill_vid	= team_vlan_rx_kill_vid,
+	.ndo_add_slave		= team_add_slave,
+	.ndo_del_slave		= team_del_slave,
+};
+
+
+/***********************
+ * rt netlink interface
+ ***********************/
+
+static void team_setup(struct net_device *dev)
+{
+	ether_setup(dev);
+
+	dev->netdev_ops = &team_netdev_ops;
+	dev->destructor	= team_destructor;
+	dev->tx_queue_len = 0;
+	dev->flags |= IFF_MULTICAST;
+	dev->priv_flags &= ~(IFF_XMIT_DST_RELEASE | IFF_TX_SKB_SHARING);
+
+	/*
+	 * Indicate we support unicast address filtering. That way core won't
+	 * bring us to promisc mode in case a unicast addr is added.
+	 * Let this up to underlay drivers.
+	 */
+	dev->priv_flags |= IFF_UNICAST_FLT;
+
+	dev->features |= NETIF_F_LLTX;
+	dev->features |= NETIF_F_GRO;
+	dev->hw_features = NETIF_F_HW_VLAN_TX |
+			   NETIF_F_HW_VLAN_RX |
+			   NETIF_F_HW_VLAN_FILTER;
+
+	dev->features |= dev->hw_features;
+}
+
+static int team_newlink(struct net *src_net, struct net_device *dev,
+			struct nlattr *tb[], struct nlattr *data[])
+{
+	int err;
+
+	if (tb[IFLA_ADDRESS] == NULL)
+		random_ether_addr(dev->dev_addr);
+
+	err = register_netdevice(dev);
+	if (err)
+		return err;
+
+	return 0;
+}
+
+static int team_validate(struct nlattr *tb[], struct nlattr *data[])
+{
+	if (tb[IFLA_ADDRESS]) {
+		if (nla_len(tb[IFLA_ADDRESS]) != ETH_ALEN)
+			return -EINVAL;
+		if (!is_valid_ether_addr(nla_data(tb[IFLA_ADDRESS])))
+			return -EADDRNOTAVAIL;
+	}
+	return 0;
+}
+
+static struct rtnl_link_ops team_link_ops __read_mostly = {
+	.kind		= DRV_NAME,
+	.priv_size	= sizeof(struct team),
+	.setup		= team_setup,
+	.newlink	= team_newlink,
+	.validate	= team_validate,
+};
+
+
+/***********************************
+ * Generic netlink custom interface
+ ***********************************/
+
+static struct genl_family team_nl_family = {
+	.id		= GENL_ID_GENERATE,
+	.name		= TEAM_GENL_NAME,
+	.version	= TEAM_GENL_VERSION,
+	.maxattr	= TEAM_ATTR_MAX,
+	.netnsok	= true,
+};
+
+static const struct nla_policy team_nl_policy[TEAM_ATTR_MAX + 1] = {
+	[TEAM_ATTR_UNSPEC]			= { .type = NLA_UNSPEC, },
+	[TEAM_ATTR_TEAM_IFINDEX]		= { .type = NLA_U32 },
+	[TEAM_ATTR_LIST_OPTION]			= { .type = NLA_NESTED },
+	[TEAM_ATTR_LIST_PORT]			= { .type = NLA_NESTED },
+};
+
+static const struct nla_policy
+team_nl_option_policy[TEAM_ATTR_OPTION_MAX + 1] = {
+	[TEAM_ATTR_OPTION_UNSPEC]		= { .type = NLA_UNSPEC, },
+	[TEAM_ATTR_OPTION_NAME] = {
+		.type = NLA_STRING,
+		.len = TEAM_STRING_MAX_LEN,
+	},
+	[TEAM_ATTR_OPTION_CHANGED]		= { .type = NLA_FLAG },
+	[TEAM_ATTR_OPTION_TYPE]			= { .type = NLA_U8 },
+	[TEAM_ATTR_OPTION_DATA] = {
+		.type = NLA_BINARY,
+		.len = TEAM_STRING_MAX_LEN,
+	},
+};
+
+static int team_nl_cmd_noop(struct sk_buff *skb, struct genl_info *info)
+{
+	struct sk_buff *msg;
+	void *hdr;
+	int err;
+
+	msg = nlmsg_new(NLMSG_GOODSIZE, GFP_KERNEL);
+	if (!msg)
+		return -ENOMEM;
+
+	hdr = genlmsg_put(msg, info->snd_pid, info->snd_seq,
+			  &team_nl_family, 0, TEAM_CMD_NOOP);
+	if (IS_ERR(hdr)) {
+		err = PTR_ERR(hdr);
+		goto err_msg_put;
+	}
+
+	genlmsg_end(msg, hdr);
+
+	return genlmsg_unicast(genl_info_net(info), msg, info->snd_pid);
+
+err_msg_put:
+	nlmsg_free(msg);
+
+	return err;
+}
+
+/*
+ * Netlink cmd functions should be locked by following two functions.
+ * To ensure team_uninit would not be called in between, hold rcu_read_lock
+ * all the time.
+ */
+static struct team *team_nl_team_get(struct genl_info *info)
+{
+	struct net *net = genl_info_net(info);
+	int ifindex;
+	struct net_device *dev;
+	struct team *team;
+
+	if (!info->attrs[TEAM_ATTR_TEAM_IFINDEX])
+		return NULL;
+
+	ifindex = nla_get_u32(info->attrs[TEAM_ATTR_TEAM_IFINDEX]);
+	rcu_read_lock();
+	dev = dev_get_by_index_rcu(net, ifindex);
+	if (!dev || dev->netdev_ops != &team_netdev_ops) {
+		rcu_read_unlock();
+		return NULL;
+	}
+
+	team = netdev_priv(dev);
+	spin_lock(&team->lock);
+	return team;
+}
+
+static void team_nl_team_put(struct team *team)
+{
+	spin_unlock(&team->lock);
+	rcu_read_unlock();
+}
+
+static int team_nl_send_generic(struct genl_info *info, struct team *team,
+				int (*fill_func)(struct sk_buff *skb,
+						 struct genl_info *info,
+						 int flags, struct team *team))
+{
+	struct sk_buff *skb;
+	int err;
+
+	skb = nlmsg_new(NLMSG_GOODSIZE, GFP_KERNEL);
+	if (!skb)
+		return -ENOMEM;
+
+	err = fill_func(skb, info, NLM_F_ACK, team);
+	if (err < 0)
+		goto err_fill;
+
+	err = genlmsg_unicast(genl_info_net(info), skb, info->snd_pid);
+	return err;
+
+err_fill:
+	nlmsg_free(skb);
+	return err;
+}
+
+static int team_nl_fill_options_get_changed(struct sk_buff *skb,
+					    u32 pid, u32 seq, int flags,
+					    struct team *team,
+					    struct team_option *changed_option)
+{
+	struct nlattr *option_list;
+	void *hdr;
+	struct team_option *option;
+
+	hdr = genlmsg_put(skb, pid, seq, &team_nl_family, flags,
+			  TEAM_CMD_OPTIONS_GET);
+	if (IS_ERR(hdr))
+		return PTR_ERR(hdr);
+
+	NLA_PUT_U32(skb, TEAM_ATTR_TEAM_IFINDEX, team->dev->ifindex);
+	option_list = nla_nest_start(skb, TEAM_ATTR_LIST_OPTION);
+	if (!option_list)
+		return -EMSGSIZE;
+
+	list_for_each_entry(option, &team->option_list, list) {
+		struct nlattr *option_item;
+		long arg;
+
+		option_item = nla_nest_start(skb, TEAM_ATTR_ITEM_OPTION);
+		if (!option_item)
+			goto nla_put_failure;
+		NLA_PUT_STRING(skb, TEAM_ATTR_OPTION_NAME, option->name);
+		if (option == changed_option)
+			NLA_PUT_FLAG(skb, TEAM_ATTR_OPTION_CHANGED);
+		switch (option->type) {
+		case TEAM_OPTION_TYPE_U32:
+			NLA_PUT_U8(skb, TEAM_ATTR_OPTION_TYPE, NLA_U32);
+			team_option_get(team, option, &arg);
+			NLA_PUT_U32(skb, TEAM_ATTR_OPTION_DATA, arg);
+			break;
+		case TEAM_OPTION_TYPE_STRING:
+			NLA_PUT_U8(skb, TEAM_ATTR_OPTION_TYPE, NLA_STRING);
+			team_option_get(team, option, &arg);
+			NLA_PUT_STRING(skb, TEAM_ATTR_OPTION_DATA,
+				       (char *) arg);
+			break;
+		default:
+			BUG();
+		}
+		nla_nest_end(skb, option_item);
+	}
+
+	nla_nest_end(skb, option_list);
+	return genlmsg_end(skb, hdr);
+
+nla_put_failure:
+	genlmsg_cancel(skb, hdr);
+	return -EMSGSIZE;
+}
+
+static int team_nl_fill_options_get(struct sk_buff *skb,
+				    struct genl_info *info, int flags,
+				    struct team *team)
+{
+	return team_nl_fill_options_get_changed(skb, info->snd_pid,
+						info->snd_seq, NLM_F_ACK,
+						team, NULL);
+}
+
+static int team_nl_cmd_options_get(struct sk_buff *skb, struct genl_info *info)
+{
+	struct team *team;
+	int err;
+
+	team = team_nl_team_get(info);
+	if (!team)
+		return -EINVAL;
+
+	err = team_nl_send_generic(info, team, team_nl_fill_options_get);
+
+	team_nl_team_put(team);
+
+	return err;
+}
+
+static int team_nl_cmd_options_set(struct sk_buff *skb, struct genl_info *info)
+{
+	struct team *team;
+	int err = 0;
+	int i;
+	struct nlattr *nl_option;
+
+	team = team_nl_team_get(info);
+	if (!team)
+		return -EINVAL;
+
+	err = -EINVAL;
+	if (!info->attrs[TEAM_ATTR_LIST_OPTION]) {
+		err = -EINVAL;
+		goto team_put;
+	}
+
+	nla_for_each_nested(nl_option, info->attrs[TEAM_ATTR_LIST_OPTION], i) {
+		struct nlattr *mode_attrs[TEAM_ATTR_OPTION_MAX + 1];
+		enum team_option_type opt_type;
+		struct team_option *option;
+		char *opt_name;
+		bool opt_found = false;
+
+		if (nla_type(nl_option) != TEAM_ATTR_ITEM_OPTION) {
+			err = -EINVAL;
+			goto team_put;
+		}
+		err = nla_parse_nested(mode_attrs, TEAM_ATTR_OPTION_MAX,
+				       nl_option, team_nl_option_policy);
+		if (err)
+			goto team_put;
+		if (!mode_attrs[TEAM_ATTR_OPTION_NAME] ||
+		    !mode_attrs[TEAM_ATTR_OPTION_TYPE] ||
+		    !mode_attrs[TEAM_ATTR_OPTION_DATA]) {
+			err = -EINVAL;
+			goto team_put;
+		}
+		switch (nla_get_u8(mode_attrs[TEAM_ATTR_OPTION_TYPE])) {
+		case NLA_U32:
+			opt_type = TEAM_OPTION_TYPE_U32;
+			break;
+		case NLA_STRING:
+			opt_type = TEAM_OPTION_TYPE_STRING;
+			break;
+		default:
+			goto team_put;
+		}
+
+		opt_name = nla_data(mode_attrs[TEAM_ATTR_OPTION_NAME]);
+		list_for_each_entry(option, &team->option_list, list) {
+			long arg;
+			struct nlattr *opt_data_attr;
+
+			if (option->type != opt_type ||
+			    strcmp(option->name, opt_name))
+				continue;
+			opt_found = true;
+			opt_data_attr = mode_attrs[TEAM_ATTR_OPTION_DATA];
+			switch (opt_type) {
+			case TEAM_OPTION_TYPE_U32:
+				arg = nla_get_u32(opt_data_attr);
+				break;
+			case TEAM_OPTION_TYPE_STRING:
+				arg = (long) nla_data(opt_data_attr);
+				break;
+			default:
+				BUG();
+			}
+			err = team_option_set(team, option, &arg);
+			if (err)
+				goto team_put;
+		}
+		if (!opt_found) {
+			err = -ENOENT;
+			goto team_put;
+		}
+	}
+
+team_put:
+	team_nl_team_put(team);
+
+	return err;
+}
+
+static int team_nl_fill_port_list_get_changed(struct sk_buff *skb,
+					      u32 pid, u32 seq, int flags,
+					      struct team *team,
+					      struct team_port *changed_port)
+{
+	struct nlattr *port_list;
+	void *hdr;
+	struct team_port *port;
+
+	hdr = genlmsg_put(skb, pid, seq, &team_nl_family, flags,
+			  TEAM_CMD_PORT_LIST_GET);
+	if (IS_ERR(hdr))
+		return PTR_ERR(hdr);
+
+	NLA_PUT_U32(skb, TEAM_ATTR_TEAM_IFINDEX, team->dev->ifindex);
+	port_list = nla_nest_start(skb, TEAM_ATTR_LIST_PORT);
+	if (!port_list)
+		return -EMSGSIZE;
+
+	list_for_each_entry(port, &team->port_list, list) {
+		struct nlattr *port_item;
+
+		port_item = nla_nest_start(skb, TEAM_ATTR_ITEM_PORT);
+		if (!port_item)
+			goto nla_put_failure;
+		NLA_PUT_U32(skb, TEAM_ATTR_PORT_IFINDEX, port->dev->ifindex);
+		if (port == changed_port)
+			NLA_PUT_FLAG(skb, TEAM_ATTR_PORT_CHANGED);
+		if (port->linkup)
+			NLA_PUT_FLAG(skb, TEAM_ATTR_PORT_LINKUP);
+		NLA_PUT_U32(skb, TEAM_ATTR_PORT_SPEED, port->speed);
+		NLA_PUT_U8(skb, TEAM_ATTR_PORT_DUPLEX, port->duplex);
+		nla_nest_end(skb, port_item);
+	}
+
+	nla_nest_end(skb, port_list);
+	return genlmsg_end(skb, hdr);
+
+nla_put_failure:
+	genlmsg_cancel(skb, hdr);
+	return -EMSGSIZE;
+}
+
+static int team_nl_fill_port_list_get(struct sk_buff *skb,
+				      struct genl_info *info, int flags,
+				      struct team *team)
+{
+	return team_nl_fill_port_list_get_changed(skb, info->snd_pid,
+						  info->snd_seq, NLM_F_ACK,
+						  team, NULL);
+}
+
+static int team_nl_cmd_port_list_get(struct sk_buff *skb,
+				     struct genl_info *info)
+{
+	struct team *team;
+	int err;
+
+	team = team_nl_team_get(info);
+	if (!team)
+		return -EINVAL;
+
+	err = team_nl_send_generic(info, team, team_nl_fill_port_list_get);
+
+	team_nl_team_put(team);
+
+	return err;
+}
+
+static struct genl_ops team_nl_ops[] = {
+	{
+		.cmd = TEAM_CMD_NOOP,
+		.doit = team_nl_cmd_noop,
+		.policy = team_nl_policy,
+	},
+	{
+		.cmd = TEAM_CMD_OPTIONS_SET,
+		.doit = team_nl_cmd_options_set,
+		.policy = team_nl_policy,
+		.flags = GENL_ADMIN_PERM,
+	},
+	{
+		.cmd = TEAM_CMD_OPTIONS_GET,
+		.doit = team_nl_cmd_options_get,
+		.policy = team_nl_policy,
+		.flags = GENL_ADMIN_PERM,
+	},
+	{
+		.cmd = TEAM_CMD_PORT_LIST_GET,
+		.doit = team_nl_cmd_port_list_get,
+		.policy = team_nl_policy,
+		.flags = GENL_ADMIN_PERM,
+	},
+};
+
+static struct genl_multicast_group team_change_event_mcgrp = {
+	.name = TEAM_GENL_CHANGE_EVENT_MC_GRP_NAME,
+};
+
+static int team_nl_send_event_options_get(struct team *team,
+					  struct team_option *changed_option)
+{
+	struct sk_buff *skb;
+	int err;
+	struct net *net = dev_net(team->dev);
+
+	skb = nlmsg_new(NLMSG_GOODSIZE, GFP_KERNEL);
+	if (!skb)
+		return -ENOMEM;
+
+	err = team_nl_fill_options_get_changed(skb, 0, 0, 0, team,
+					       changed_option);
+	if (err < 0)
+		goto err_fill;
+
+	err = genlmsg_multicast_netns(net, skb, 0, team_change_event_mcgrp.id,
+				      GFP_KERNEL);
+	return err;
+
+err_fill:
+	nlmsg_free(skb);
+	return err;
+}
+
+static int team_nl_send_event_port_list_get(struct team_port *port)
+{
+	struct sk_buff *skb;
+	int err;
+	struct net *net = dev_net(port->team->dev);
+
+	skb = nlmsg_new(NLMSG_GOODSIZE, GFP_KERNEL);
+	if (!skb)
+		return -ENOMEM;
+
+	err = team_nl_fill_port_list_get_changed(skb, 0, 0, 0,
+						 port->team, port);
+	if (err < 0)
+		goto err_fill;
+
+	err = genlmsg_multicast_netns(net, skb, 0, team_change_event_mcgrp.id,
+				      GFP_KERNEL);
+	return err;
+
+err_fill:
+	nlmsg_free(skb);
+	return err;
+}
+
+static int team_nl_init(void)
+{
+	int err;
+
+	err = genl_register_family_with_ops(&team_nl_family, team_nl_ops,
+					    ARRAY_SIZE(team_nl_ops));
+	if (err)
+		return err;
+
+	err = genl_register_mc_group(&team_nl_family, &team_change_event_mcgrp);
+	if (err)
+		goto err_change_event_grp_reg;
+
+	return 0;
+
+err_change_event_grp_reg:
+	genl_unregister_family(&team_nl_family);
+
+	return err;
+}
+
+static void team_nl_fini(void)
+{
+	genl_unregister_family(&team_nl_family);
+}
+
+
+/******************
+ * Change checkers
+ ******************/
+
+static void __team_options_change_check(struct team *team,
+					struct team_option *changed_option)
+{
+	int err;
+
+	err = team_nl_send_event_options_get(team, changed_option);
+	if (err)
+		netdev_warn(team->dev, "Failed to send options change via netlink\n");
+}
+
+/* rtnl lock is held */
+static void __team_port_change_check(struct team_port *port, bool linkup)
+{
+	int err;
+
+	if (port->linkup == linkup)
+		return;
+
+	port->linkup = linkup;
+	if (linkup) {
+		struct ethtool_cmd ecmd;
+
+		err = __ethtool_get_settings(port->dev, &ecmd);
+		if (!err) {
+			port->speed = ethtool_cmd_speed(&ecmd);
+			port->duplex = ecmd.duplex;
+			goto send_event;
+		}
+	}
+	port->speed = 0;
+	port->duplex = 0;
+
+send_event:
+	err = team_nl_send_event_port_list_get(port);
+	if (err)
+		netdev_warn(port->team->dev, "Failed to send port change of device %s via netlink\n",
+			    port->dev->name);
+
+}
+
+static void team_port_change_check(struct team_port *port, bool linkup)
+{
+	struct team *team = port->team;
+
+	spin_lock(&team->lock);
+	__team_port_change_check(port, linkup);
+	spin_unlock(&team->lock);
+}
+
+/************************************
+ * Net device notifier event handler
+ ************************************/
+
+static int team_device_event(struct notifier_block *unused,
+			     unsigned long event, void *ptr)
+{
+	struct net_device *dev = (struct net_device *) ptr;
+	struct team_port *port;
+
+	port = team_port_get_rtnl(dev);
+	if (!port)
+		return NOTIFY_DONE;
+
+	switch (event) {
+	case NETDEV_UP:
+		if (netif_carrier_ok(dev))
+			team_port_change_check(port, true);
+	case NETDEV_DOWN:
+		team_port_change_check(port, false);
+	case NETDEV_CHANGE:
+		if (netif_running(port->dev))
+			team_port_change_check(port,
+					       !!netif_carrier_ok(port->dev));
+		break;
+	case NETDEV_UNREGISTER:
+		team_del_slave(port->team->dev, dev);
+		break;
+	case NETDEV_FEAT_CHANGE:
+		team_compute_features(port->team);
+		break;
+	case NETDEV_CHANGEMTU:
+		/* Forbid to change mtu of underlaying device */
+		return NOTIFY_BAD;
+	case NETDEV_PRE_TYPE_CHANGE:
+		/* Forbid to change type of underlaying device */
+		return NOTIFY_BAD;
+	}
+	return NOTIFY_DONE;
+}
+
+static struct notifier_block team_notifier_block __read_mostly = {
+	.notifier_call = team_device_event,
+};
+
+
+/***********************
+ * Module init and exit
+ ***********************/
+
+static int __init team_module_init(void)
+{
+	int err;
+
+	register_netdevice_notifier(&team_notifier_block);
+
+	err = rtnl_link_register(&team_link_ops);
+	if (err)
+		goto err_rtnl_reg;
+
+	err = team_nl_init();
+	if (err)
+		goto err_nl_init;
+
+	return 0;
+
+err_nl_init:
+	rtnl_link_unregister(&team_link_ops);
+
+err_rtnl_reg:
+	unregister_netdevice_notifier(&team_notifier_block);
+
+	return err;
+}
+
+static void __exit team_module_exit(void)
+{
+	team_nl_fini();
+	rtnl_link_unregister(&team_link_ops);
+	unregister_netdevice_notifier(&team_notifier_block);
+}
+
+module_init(team_module_init);
+module_exit(team_module_exit);
+
+MODULE_LICENSE("GPL v2");
+MODULE_AUTHOR("Jiri Pirko <jpirko@redhat.com>");
+MODULE_DESCRIPTION("Ethernet team device driver");
+MODULE_ALIAS_RTNL_LINK(DRV_NAME);
diff --git a/drivers/net/team/team_mode_activebackup.c b/drivers/net/team/team_mode_activebackup.c
new file mode 100644
index 0000000..6fe920c
--- /dev/null
+++ b/drivers/net/team/team_mode_activebackup.c
@@ -0,0 +1,137 @@ 
+/*
+ * net/drivers/team/team_mode_activebackup.c - Active-backup mode for team
+ * Copyright (c) 2011 Jiri Pirko <jpirko@redhat.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ */
+
+#include <linux/kernel.h>
+#include <linux/types.h>
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/errno.h>
+#include <linux/netdevice.h>
+#include <net/rtnetlink.h>
+#include <linux/if_team.h>
+
+struct ab_priv {
+	struct team_port __rcu *active_port;
+};
+
+static struct ab_priv *ab_priv(struct team *team)
+{
+	return (struct ab_priv *) &team->mode_priv;
+}
+
+static rx_handler_result_t ab_receive(struct team *team, struct team_port *port,
+				      struct sk_buff *skb) {
+	struct team_port *active_port;
+
+	active_port = rcu_dereference(ab_priv(team)->active_port);
+	if (active_port != port)
+		return RX_HANDLER_EXACT;
+	return RX_HANDLER_ANOTHER;
+}
+
+static bool ab_transmit(struct team *team, struct sk_buff *skb)
+{
+	struct team_port *active_port;
+
+	active_port = rcu_dereference(ab_priv(team)->active_port);
+	if (unlikely(!active_port))
+		goto drop;
+	skb->dev = active_port->dev;
+	if (dev_queue_xmit(skb))
+		return false;
+	return true;
+
+drop:
+	dev_kfree_skb_any(skb);
+	return false;
+}
+
+static void ab_port_leave(struct team *team, struct team_port *port)
+{
+	if (ab_priv(team)->active_port == port)
+		rcu_assign_pointer(ab_priv(team)->active_port, NULL);
+}
+
+static int ab_active_port_get(struct team *team, void *arg)
+{
+	u32 *ifindex = arg;
+
+	*ifindex = 0;
+	if (ab_priv(team)->active_port)
+		*ifindex = ab_priv(team)->active_port->dev->ifindex;
+	return 0;
+}
+
+static int ab_active_port_set(struct team *team, void *arg)
+{
+	u32 *ifindex = arg;
+	struct team_port *port;
+
+	list_for_each_entry_rcu(port, &team->port_list, list) {
+		if (port->dev->ifindex == *ifindex) {
+			rcu_assign_pointer(ab_priv(team)->active_port, port);
+			return 0;
+		}
+	}
+	return -ENOENT;
+}
+
+static struct team_option ab_options[] = {
+	{
+		.name = "activeport",
+		.type = TEAM_OPTION_TYPE_U32,
+		.getter = ab_active_port_get,
+		.setter = ab_active_port_set,
+	},
+};
+
+int ab_init(struct team *team)
+{
+	team_options_register(team, ab_options, ARRAY_SIZE(ab_options));
+	return 0;
+}
+
+void ab_exit(struct team *team)
+{
+	team_options_unregister(team, ab_options, ARRAY_SIZE(ab_options));
+}
+
+static const struct team_mode_ops ab_mode_ops = {
+	.init			= ab_init,
+	.exit			= ab_exit,
+	.receive		= ab_receive,
+	.transmit		= ab_transmit,
+	.port_leave		= ab_port_leave,
+};
+
+static struct team_mode ab_mode = {
+	.kind		= "activebackup",
+	.owner		= THIS_MODULE,
+	.priv_size	= sizeof(struct ab_priv),
+	.ops		= &ab_mode_ops,
+};
+
+static int __init ab_init_module(void)
+{
+	return team_mode_register(&ab_mode);
+}
+
+static void __exit ab_cleanup_module(void)
+{
+	team_mode_unregister(&ab_mode);
+}
+
+module_init(ab_init_module);
+module_exit(ab_cleanup_module);
+
+MODULE_LICENSE("GPL v2");
+MODULE_AUTHOR("Jiri Pirko <jpirko@redhat.com>");
+MODULE_DESCRIPTION("Active-backup mode for team");
+MODULE_ALIAS("team-mode-activebackup");
diff --git a/drivers/net/team/team_mode_roundrobin.c b/drivers/net/team/team_mode_roundrobin.c
new file mode 100644
index 0000000..a0e8f80
--- /dev/null
+++ b/drivers/net/team/team_mode_roundrobin.c
@@ -0,0 +1,107 @@ 
+/*
+ * net/drivers/team/team_mode_roundrobin.c - Round-robin mode for team
+ * Copyright (c) 2011 Jiri Pirko <jpirko@redhat.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ */
+
+#include <linux/kernel.h>
+#include <linux/types.h>
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/errno.h>
+#include <linux/netdevice.h>
+#include <linux/if_team.h>
+
+struct rr_priv {
+	unsigned int sent_packets;
+};
+
+static struct rr_priv *rr_priv(struct team *team)
+{
+	return (struct rr_priv *) &team->mode_priv;
+}
+
+static struct team_port *__get_first_port_up(struct team *team,
+					     struct team_port *port)
+{
+	struct team_port *cur;
+
+	if (port->linkup)
+		return port;
+	cur = port;
+	list_for_each_entry_continue_rcu(cur, &team->port_list, list)
+		if (cur->linkup)
+			return cur;
+	list_for_each_entry_rcu(cur, &team->port_list, list) {
+		if (cur == port)
+			break;
+		if (cur->linkup)
+			return cur;
+	}
+	return NULL;
+}
+
+static bool rr_transmit(struct team *team, struct sk_buff *skb)
+{
+	struct team_port *port;
+	int port_index;
+
+	port_index = rr_priv(team)->sent_packets++ % team->port_count;
+	port = team_get_port_by_index_rcu(team, port_index);
+	port = __get_first_port_up(team, port);
+	if (unlikely(!port))
+		goto drop;
+	skb->dev = port->dev;
+	if (dev_queue_xmit(skb))
+		return false;
+	return true;
+
+drop:
+	dev_kfree_skb_any(skb);
+	return false;
+}
+
+static int rr_port_enter(struct team *team, struct team_port *port)
+{
+	return team_port_set_team_mac(port);
+}
+
+static void rr_port_change_mac(struct team *team, struct team_port *port)
+{
+	team_port_set_team_mac(port);
+}
+
+static const struct team_mode_ops rr_mode_ops = {
+	.transmit		= rr_transmit,
+	.port_enter		= rr_port_enter,
+	.port_change_mac	= rr_port_change_mac,
+};
+
+static struct team_mode rr_mode = {
+	.kind		= "roundrobin",
+	.owner		= THIS_MODULE,
+	.priv_size	= sizeof(struct rr_priv),
+	.ops		= &rr_mode_ops,
+};
+
+static int __init rr_init_module(void)
+{
+	return team_mode_register(&rr_mode);
+}
+
+static void __exit rr_cleanup_module(void)
+{
+	team_mode_unregister(&rr_mode);
+}
+
+module_init(rr_init_module);
+module_exit(rr_cleanup_module);
+
+MODULE_LICENSE("GPL v2");
+MODULE_AUTHOR("Jiri Pirko <jpirko@redhat.com>");
+MODULE_DESCRIPTION("Round-robin mode for team");
+MODULE_ALIAS("team-mode-roundrobin");
diff --git a/include/linux/Kbuild b/include/linux/Kbuild
index 619b565..0b091b3 100644
--- a/include/linux/Kbuild
+++ b/include/linux/Kbuild
@@ -185,6 +185,7 @@  header-y += if_pppol2tp.h
 header-y += if_pppox.h
 header-y += if_slip.h
 header-y += if_strip.h
+header-y += if_team.h
 header-y += if_tr.h
 header-y += if_tun.h
 header-y += if_tunnel.h
diff --git a/include/linux/if.h b/include/linux/if.h
index db20bd4..06b6ef6 100644
--- a/include/linux/if.h
+++ b/include/linux/if.h
@@ -79,6 +79,7 @@ 
 #define IFF_TX_SKB_SHARING	0x10000	/* The interface supports sharing
 					 * skbs on transmit */
 #define IFF_UNICAST_FLT	0x20000		/* Supports unicast filtering	*/
+#define IFF_TEAM_PORT	0x40000		/* device used as team port */
 
 #define IF_GET_IFACE	0x0001		/* for querying only */
 #define IF_GET_PROTO	0x0002
diff --git a/include/linux/if_team.h b/include/linux/if_team.h
new file mode 100644
index 0000000..14f6388
--- /dev/null
+++ b/include/linux/if_team.h
@@ -0,0 +1,242 @@ 
+/*
+ * include/linux/if_team.h - Network team device driver header
+ * Copyright (c) 2011 Jiri Pirko <jpirko@redhat.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ */
+
+#ifndef _LINUX_IF_TEAM_H_
+#define _LINUX_IF_TEAM_H_
+
+#ifdef __KERNEL__
+
+struct team_pcpu_stats {
+	u64			rx_packets;
+	u64			rx_bytes;
+	u64			rx_multicast;
+	u64			tx_packets;
+	u64			tx_bytes;
+	struct u64_stats_sync	syncp;
+	u32			rx_dropped;
+	u32			tx_dropped;
+};
+
+struct team;
+
+struct team_port {
+	struct net_device *dev;
+	struct hlist_node hlist; /* node in hash list */
+	struct list_head list; /* node in ordinary list */
+	struct team *team;
+	int index;
+
+	/*
+	 * A place for storing original values of the device before it
+	 * become a port.
+	 */
+	struct {
+		unsigned char dev_addr[MAX_ADDR_LEN];
+		unsigned int mtu;
+	} orig;
+
+	bool linkup;
+	u32 speed;
+	u8 duplex;
+
+	struct rcu_head rcu;
+};
+
+struct team_mode_ops {
+	int (*init)(struct team *team);
+	void (*exit)(struct team *team);
+	rx_handler_result_t (*receive)(struct team *team,
+				       struct team_port *port,
+				       struct sk_buff *skb);
+	bool (*transmit)(struct team *team, struct sk_buff *skb);
+	int (*port_enter)(struct team *team, struct team_port *port);
+	void (*port_leave)(struct team *team, struct team_port *port);
+	void (*port_change_mac)(struct team *team, struct team_port *port);
+};
+
+enum team_option_type {
+	TEAM_OPTION_TYPE_U32,
+	TEAM_OPTION_TYPE_STRING,
+};
+
+struct team_option {
+	struct list_head list;
+	const char *name;
+	enum team_option_type type;
+	int (*getter)(struct team *team, void *arg);
+	int (*setter)(struct team *team, void *arg);
+};
+
+struct team_mode {
+	struct list_head list;
+	const char *kind;
+	struct module *owner;
+	size_t priv_size;
+	const struct team_mode_ops *ops;
+};
+
+#define TEAM_PORT_HASHBITS 4
+#define TEAM_PORT_HASHENTRIES (1 << TEAM_PORT_HASHBITS)
+
+#define TEAM_MODE_PRIV_LONGS 4
+#define TEAM_MODE_PRIV_SIZE (sizeof(long) * TEAM_MODE_PRIV_LONGS)
+
+struct team {
+	struct net_device *dev; /* associated netdevice */
+	struct team_pcpu_stats __percpu *pcpu_stats;
+
+	spinlock_t lock; /* used for overall locking, e.g. port lists write */
+
+	/*
+	 * port lists with port count
+	 */
+	int port_count;
+	struct hlist_head port_hlist[TEAM_PORT_HASHENTRIES];
+	struct list_head port_list;
+
+	struct list_head option_list;
+
+	const struct team_mode *mode;
+	struct team_mode_ops ops;
+	long mode_priv[TEAM_MODE_PRIV_LONGS];
+};
+
+static inline struct hlist_head *team_port_index_hash(struct team *team,
+						      int port_index)
+{
+	return &team->port_hlist[port_index & (TEAM_PORT_HASHENTRIES - 1)];
+}
+
+static inline struct team_port *team_get_port_by_index(struct team *team,
+						       int port_index)
+{
+	struct hlist_node *p;
+	struct team_port *port;
+	struct hlist_head *head = team_port_index_hash(team, port_index);
+
+	hlist_for_each_entry(port, p, head, hlist)
+		if (port->index == port_index)
+			return port;
+	return NULL;
+}
+static inline struct team_port *team_get_port_by_index_rcu(struct team *team,
+							   int port_index)
+{
+	struct hlist_node *p;
+	struct team_port *port;
+	struct hlist_head *head = team_port_index_hash(team, port_index);
+
+	hlist_for_each_entry_rcu(port, p, head, hlist)
+		if (port->index == port_index)
+			return port;
+	return NULL;
+}
+
+extern int team_port_set_team_mac(struct team_port *port);
+extern void team_options_register(struct team *team,
+				  struct team_option *option,
+				  size_t option_count);
+extern void team_options_unregister(struct team *team,
+				    struct team_option *option,
+				    size_t option_count);
+extern int team_mode_register(struct team_mode *mode);
+extern int team_mode_unregister(struct team_mode *mode);
+
+#endif /* __KERNEL__ */
+
+#define TEAM_STRING_MAX_LEN 32
+
+/**********************************
+ * NETLINK_GENERIC netlink family.
+ **********************************/
+
+enum {
+	TEAM_CMD_NOOP,
+	TEAM_CMD_OPTIONS_SET,
+	TEAM_CMD_OPTIONS_GET,
+	TEAM_CMD_PORT_LIST_GET,
+
+	__TEAM_CMD_MAX,
+	TEAM_CMD_MAX = (__TEAM_CMD_MAX - 1),
+};
+
+enum {
+	TEAM_ATTR_UNSPEC,
+	TEAM_ATTR_TEAM_IFINDEX,		/* u32 */
+	TEAM_ATTR_LIST_OPTION,		/* nest */
+	TEAM_ATTR_LIST_PORT,		/* nest */
+
+	__TEAM_ATTR_MAX,
+	TEAM_ATTR_MAX = __TEAM_ATTR_MAX - 1,
+};
+
+/* Nested layout of get/set msg:
+ *
+ *	[TEAM_ATTR_LIST_OPTION]
+ *		[TEAM_ATTR_ITEM_OPTION]
+ *			[TEAM_ATTR_OPTION_*], ...
+ *		[TEAM_ATTR_ITEM_OPTION]
+ *			[TEAM_ATTR_OPTION_*], ...
+ *		...
+ *	[TEAM_ATTR_LIST_PORT]
+ *		[TEAM_ATTR_ITEM_PORT]
+ *			[TEAM_ATTR_PORT_*], ...
+ *		[TEAM_ATTR_ITEM_PORT]
+ *			[TEAM_ATTR_PORT_*], ...
+ *		...
+ */
+
+enum {
+	TEAM_ATTR_ITEM_OPTION_UNSPEC,
+	TEAM_ATTR_ITEM_OPTION,		/* nest */
+
+	__TEAM_ATTR_ITEM_OPTION_MAX,
+	TEAM_ATTR_ITEM_OPTION_MAX = __TEAM_ATTR_ITEM_OPTION_MAX - 1,
+};
+
+enum {
+	TEAM_ATTR_OPTION_UNSPEC,
+	TEAM_ATTR_OPTION_NAME,		/* string */
+	TEAM_ATTR_OPTION_CHANGED,	/* flag */
+	TEAM_ATTR_OPTION_TYPE,		/* u8 */
+	TEAM_ATTR_OPTION_DATA,		/* dynamic */
+
+	__TEAM_ATTR_OPTION_MAX,
+	TEAM_ATTR_OPTION_MAX = __TEAM_ATTR_OPTION_MAX - 1,
+};
+
+enum {
+	TEAM_ATTR_ITEM_PORT_UNSPEC,
+	TEAM_ATTR_ITEM_PORT,		/* nest */
+
+	__TEAM_ATTR_ITEM_PORT_MAX,
+	TEAM_ATTR_ITEM_PORT_MAX = __TEAM_ATTR_ITEM_PORT_MAX - 1,
+};
+
+enum {
+	TEAM_ATTR_PORT_UNSPEC,
+	TEAM_ATTR_PORT_IFINDEX,		/* u32 */
+	TEAM_ATTR_PORT_CHANGED,		/* flag */
+	TEAM_ATTR_PORT_LINKUP,		/* flag */
+	TEAM_ATTR_PORT_SPEED,		/* u32 */
+	TEAM_ATTR_PORT_DUPLEX,		/* u8 */
+
+	__TEAM_ATTR_PORT_MAX,
+	TEAM_ATTR_PORT_MAX = __TEAM_ATTR_PORT_MAX - 1,
+};
+
+/*
+ * NETLINK_GENERIC related info
+ */
+#define TEAM_GENL_NAME "team"
+#define TEAM_GENL_VERSION 0x1
+#define TEAM_GENL_CHANGE_EVENT_MC_GRP_NAME "change_event"
+
+#endif /* _LINUX_IF_TEAM_H_ */