Message ID | 1530034992-28931-7-git-send-email-john.hurley@netronome.com |
---|---|
State | Superseded |
Headers | show |
Series | offload Linux LAG devices to the TC datapath | expand |
On 26/06/2018 20:43, John Hurley wrote: > A LAG slave cannot be added directly to an OvS bridge, nor can a OvS > bridge port be added to a LAG dev. However, LAG masters can be added to > OvS. > > Use TC blocks to indirectly offload slaves when their master is attached > as a linux-netdev to an OvS bridge. In the kernel TC datapath, blocks link > together netdevs in a similar way to LAG devices. For example, if a filter > is added to a block then it is added to all block devices, or if stats are > incremented on 1 device then the stats on the entire block are incremented. > This mimics LAG devices in that if a rule is applied to the LAG master > then it should be applied to all slaves etc. > > Monitor LAG slaves via the netlink socket in netdev-linux and, if their > master is attached to the OvS bridge and has a block id, add the slave's > qdisc to the same block. Similarly, if a slave is freed from a master, > remove the qdisc from the masters block. > > Signed-off-by: John Hurley <john.hurley@netronome.com> > Reviewed-by: Simon Horman <simon.horman@netronome.com> > Reviewed-by: Dirk van der Merwe <dirk.vandermerwe@netronome.com> > --- > lib/netdev-linux.c | 69 ++++++++++++++++++++++++++++++++++++++++++++++++++++++ > 1 file changed, 69 insertions(+) > > diff --git a/lib/netdev-linux.c b/lib/netdev-linux.c > index 81de3b1..4aec735 100644 > --- a/lib/netdev-linux.c > +++ b/lib/netdev-linux.c > @@ -233,6 +233,18 @@ enum { > VALID_FEATURES = 1 << 7, > }; > > +struct linux_lag_slave { > + uint32_t block_id; > + struct shash_node *node; > +}; > + > +/* Protects 'lag_shash' and the mutable members of struct linux_lag_slave. */ > +static struct ovs_mutex lag_mutex = OVS_MUTEX_INITIALIZER; > + > +/* All slaves whose LAG masters are network devices in OvS. */ > +static struct shash lag_shash OVS_GUARDED_BY(lag_mutex) > + = SHASH_INITIALIZER(&lag_shash); > + > /* Traffic control. */ > > /* An instance of a traffic control class. Always associated with a particular > @@ -692,6 +704,57 @@ netdev_linux_kind_is_lag(const char *kind) > } > > static void > +netdev_linux_update_lag(struct rtnetlink_change *change) > + OVS_REQUIRES(lag_mutex) > +{ > + struct linux_lag_slave *lag; hi, I reproduced today another possible segfault where change->slave is a bad pointer. This happened using vxlan interface. 2018-06-27T05:30:15.166Z|00038|netdev_linux|ERR|netdev_linux_update_lag: name vxlan_sys_4789 slave 0x40000000000 both rtnetlink_parse() and the caller netdev_linux_run() didn't zero the change struct. I zero change like the following to verify the fix. netdev_linux_run() ... if (!error) { struct rtnetlink_change change = {0}; if (rtnetlink_parse(&buf, &change)) { struct netdev *netdev_ = NULL; ... I checked the 2 other places calling rtnetlink_parse() 1. netdev_linux_update_via_netlink() struct rtnetlink_change chg; struct rtnetlink_change *change = &chg; There is a problem here as well as chg is not zero. 2. rtnetlink_notifier_create()->rtnetlink_parse_cb() Uses global rtn_change so no problem first time but from a second time we could be with incorrect data. So i reproduced the issue only from 1 out of 3 call places but I think we should zero change inside rtnetlink_parse(). > + > + if (change->slave && netdev_linux_kind_is_lag(change->slave)) { > + lag = shash_find_data(&lag_shash, change->ifname); > + > + if (!lag) { > + struct netdev *master_netdev; > + char master_name[IFNAMSIZ]; > + uint32_t block_id; > + int error = 0; > + > + if_indextoname(change->master_ifindex, master_name); > + master_netdev = netdev_from_name(master_name); > + > + if (is_netdev_linux_class(master_netdev->netdev_class)) { > + block_id = netdev_get_block_id(master_netdev); > + if (!block_id) { > + return; > + } > + > + lag = xmalloc(sizeof *lag); > + lag->block_id = block_id; > + lag->node = shash_add(&lag_shash, change->ifname, lag); > + > + /* LAG master is linux netdev so add slave to same block. */ > + error = tc_add_del_ingress_qdisc(change->if_index, true, > + block_id); > + if (error) { > + VLOG_WARN("failed to bind LAG slave to master's block"); > + shash_delete(&lag_shash, lag->node); > + free(lag); > + } > + } > + } > + } else if (change->master_ifindex == 0) { > + /* Check if this was a lag slave that has been freed. */ > + lag = shash_find_data(&lag_shash, change->ifname); > + > + if (lag) { > + tc_add_del_ingress_qdisc(change->if_index, false, > + lag->block_id); > + shash_delete(&lag_shash, lag->node); > + free(lag); > + } > + } > +} > + > +static void > netdev_linux_run(const struct netdev_class *netdev_class OVS_UNUSED) > { > struct nl_sock *sock; > @@ -734,6 +797,12 @@ netdev_linux_run(const struct netdev_class *netdev_class OVS_UNUSED) > netdev_linux_update(netdev, nsid, &change); > ovs_mutex_unlock(&netdev->mutex); > } > + else if (!netdev_ && change.ifname) { > + /* Netdev is not present in OvS but its master could be. */ > + ovs_mutex_lock(&lag_mutex); > + netdev_linux_update_lag(&change); > + ovs_mutex_unlock(&lag_mutex); > + } > netdev_close(netdev_); > } > } else if (error == ENOBUFS) { >
On Wed, Jun 27, 2018 at 6:48 AM, Roi Dayan <roid@mellanox.com> wrote: > > > On 26/06/2018 20:43, John Hurley wrote: >> A LAG slave cannot be added directly to an OvS bridge, nor can a OvS >> bridge port be added to a LAG dev. However, LAG masters can be added to >> OvS. >> >> Use TC blocks to indirectly offload slaves when their master is attached >> as a linux-netdev to an OvS bridge. In the kernel TC datapath, blocks link >> together netdevs in a similar way to LAG devices. For example, if a filter >> is added to a block then it is added to all block devices, or if stats are >> incremented on 1 device then the stats on the entire block are incremented. >> This mimics LAG devices in that if a rule is applied to the LAG master >> then it should be applied to all slaves etc. >> >> Monitor LAG slaves via the netlink socket in netdev-linux and, if their >> master is attached to the OvS bridge and has a block id, add the slave's >> qdisc to the same block. Similarly, if a slave is freed from a master, >> remove the qdisc from the masters block. >> >> Signed-off-by: John Hurley <john.hurley@netronome.com> >> Reviewed-by: Simon Horman <simon.horman@netronome.com> >> Reviewed-by: Dirk van der Merwe <dirk.vandermerwe@netronome.com> >> --- >> lib/netdev-linux.c | 69 ++++++++++++++++++++++++++++++++++++++++++++++++++++++ >> 1 file changed, 69 insertions(+) >> >> diff --git a/lib/netdev-linux.c b/lib/netdev-linux.c >> index 81de3b1..4aec735 100644 >> --- a/lib/netdev-linux.c >> +++ b/lib/netdev-linux.c >> @@ -233,6 +233,18 @@ enum { >> VALID_FEATURES = 1 << 7, >> }; >> >> +struct linux_lag_slave { >> + uint32_t block_id; >> + struct shash_node *node; >> +}; >> + >> +/* Protects 'lag_shash' and the mutable members of struct linux_lag_slave. */ >> +static struct ovs_mutex lag_mutex = OVS_MUTEX_INITIALIZER; >> + >> +/* All slaves whose LAG masters are network devices in OvS. */ >> +static struct shash lag_shash OVS_GUARDED_BY(lag_mutex) >> + = SHASH_INITIALIZER(&lag_shash); >> + >> /* Traffic control. */ >> >> /* An instance of a traffic control class. Always associated with a particular >> @@ -692,6 +704,57 @@ netdev_linux_kind_is_lag(const char *kind) >> } >> >> static void >> +netdev_linux_update_lag(struct rtnetlink_change *change) >> + OVS_REQUIRES(lag_mutex) >> +{ >> + struct linux_lag_slave *lag; > > hi, > > I reproduced today another possible segfault where change->slave is a bad pointer. > This happened using vxlan interface. > > 2018-06-27T05:30:15.166Z|00038|netdev_linux|ERR|netdev_linux_update_lag: name vxlan_sys_4789 slave 0x40000000000 > > both rtnetlink_parse() and the caller netdev_linux_run() didn't zero the change struct. > > I zero change like the following to verify the fix. > > netdev_linux_run() > ... > if (!error) { > struct rtnetlink_change change = {0}; > > if (rtnetlink_parse(&buf, &change)) { > struct netdev *netdev_ = NULL; > ... > > I checked the 2 other places calling rtnetlink_parse() > > 1. netdev_linux_update_via_netlink() > struct rtnetlink_change chg; > struct rtnetlink_change *change = &chg; > > There is a problem here as well as chg is not zero. > > 2. rtnetlink_notifier_create()->rtnetlink_parse_cb() > Uses global rtn_change so no problem first time but from a second time we could be > with incorrect data. > > > So i reproduced the issue only from 1 out of 3 call places but I think we should zero > change inside rtnetlink_parse(). > > Hi Roi, Thanks again. This is another good catch and something that didn't show in my tests. Apologies for that. I am able to reproduce the fail. It doesn't cause a segfault for me but I can see it accessing garbage in the slave pointer which could lead to a segfault. I don't necessarily think that setting change to 0 is the appropriate fix here. The reason it is not zero'd in other places is that no element should be touched without being set. If I add a check for 'rtnetlink_type_is_rtnlgrp_link(change->nlmsg_type)' to the lag_update in the same way as netdev_linux_update__ then we should be fine. If it is a new link message then rtnetlink_parse() cannot return true unless slave has explicitly been set to a received netlink attr or NULL. The problem was that non 'new link' messages were causing rtnetlink_parse to return true without explicitly setting the slave pointer, and then trying to access that pointer. Sorry I missed this before but thanks for reviewing/reporting. John > >> + >> + if (change->slave && netdev_linux_kind_is_lag(change->slave)) { >> + lag = shash_find_data(&lag_shash, change->ifname); >> + >> + if (!lag) { >> + struct netdev *master_netdev; >> + char master_name[IFNAMSIZ]; >> + uint32_t block_id; >> + int error = 0; >> + >> + if_indextoname(change->master_ifindex, master_name); >> + master_netdev = netdev_from_name(master_name); >> + >> + if (is_netdev_linux_class(master_netdev->netdev_class)) { >> + block_id = netdev_get_block_id(master_netdev); >> + if (!block_id) { >> + return; >> + } >> + >> + lag = xmalloc(sizeof *lag); >> + lag->block_id = block_id; >> + lag->node = shash_add(&lag_shash, change->ifname, lag); >> + >> + /* LAG master is linux netdev so add slave to same block. */ >> + error = tc_add_del_ingress_qdisc(change->if_index, true, >> + block_id); >> + if (error) { >> + VLOG_WARN("failed to bind LAG slave to master's block"); >> + shash_delete(&lag_shash, lag->node); >> + free(lag); >> + } >> + } >> + } >> + } else if (change->master_ifindex == 0) { >> + /* Check if this was a lag slave that has been freed. */ >> + lag = shash_find_data(&lag_shash, change->ifname); >> + >> + if (lag) { >> + tc_add_del_ingress_qdisc(change->if_index, false, >> + lag->block_id); >> + shash_delete(&lag_shash, lag->node); >> + free(lag); >> + } >> + } >> +} >> + >> +static void >> netdev_linux_run(const struct netdev_class *netdev_class OVS_UNUSED) >> { >> struct nl_sock *sock; >> @@ -734,6 +797,12 @@ netdev_linux_run(const struct netdev_class *netdev_class OVS_UNUSED) >> netdev_linux_update(netdev, nsid, &change); >> ovs_mutex_unlock(&netdev->mutex); >> } >> + else if (!netdev_ && change.ifname) { >> + /* Netdev is not present in OvS but its master could be. */ >> + ovs_mutex_lock(&lag_mutex); >> + netdev_linux_update_lag(&change); >> + ovs_mutex_unlock(&lag_mutex); >> + } >> netdev_close(netdev_); >> } >> } else if (error == ENOBUFS) { >>
diff --git a/lib/netdev-linux.c b/lib/netdev-linux.c index 81de3b1..4aec735 100644 --- a/lib/netdev-linux.c +++ b/lib/netdev-linux.c @@ -233,6 +233,18 @@ enum { VALID_FEATURES = 1 << 7, }; +struct linux_lag_slave { + uint32_t block_id; + struct shash_node *node; +}; + +/* Protects 'lag_shash' and the mutable members of struct linux_lag_slave. */ +static struct ovs_mutex lag_mutex = OVS_MUTEX_INITIALIZER; + +/* All slaves whose LAG masters are network devices in OvS. */ +static struct shash lag_shash OVS_GUARDED_BY(lag_mutex) + = SHASH_INITIALIZER(&lag_shash); + /* Traffic control. */ /* An instance of a traffic control class. Always associated with a particular @@ -692,6 +704,57 @@ netdev_linux_kind_is_lag(const char *kind) } static void +netdev_linux_update_lag(struct rtnetlink_change *change) + OVS_REQUIRES(lag_mutex) +{ + struct linux_lag_slave *lag; + + if (change->slave && netdev_linux_kind_is_lag(change->slave)) { + lag = shash_find_data(&lag_shash, change->ifname); + + if (!lag) { + struct netdev *master_netdev; + char master_name[IFNAMSIZ]; + uint32_t block_id; + int error = 0; + + if_indextoname(change->master_ifindex, master_name); + master_netdev = netdev_from_name(master_name); + + if (is_netdev_linux_class(master_netdev->netdev_class)) { + block_id = netdev_get_block_id(master_netdev); + if (!block_id) { + return; + } + + lag = xmalloc(sizeof *lag); + lag->block_id = block_id; + lag->node = shash_add(&lag_shash, change->ifname, lag); + + /* LAG master is linux netdev so add slave to same block. */ + error = tc_add_del_ingress_qdisc(change->if_index, true, + block_id); + if (error) { + VLOG_WARN("failed to bind LAG slave to master's block"); + shash_delete(&lag_shash, lag->node); + free(lag); + } + } + } + } else if (change->master_ifindex == 0) { + /* Check if this was a lag slave that has been freed. */ + lag = shash_find_data(&lag_shash, change->ifname); + + if (lag) { + tc_add_del_ingress_qdisc(change->if_index, false, + lag->block_id); + shash_delete(&lag_shash, lag->node); + free(lag); + } + } +} + +static void netdev_linux_run(const struct netdev_class *netdev_class OVS_UNUSED) { struct nl_sock *sock; @@ -734,6 +797,12 @@ netdev_linux_run(const struct netdev_class *netdev_class OVS_UNUSED) netdev_linux_update(netdev, nsid, &change); ovs_mutex_unlock(&netdev->mutex); } + else if (!netdev_ && change.ifname) { + /* Netdev is not present in OvS but its master could be. */ + ovs_mutex_lock(&lag_mutex); + netdev_linux_update_lag(&change); + ovs_mutex_unlock(&lag_mutex); + } netdev_close(netdev_); } } else if (error == ENOBUFS) {