| Message ID | 20260311060533.52598-6-amorenoz@redhat.com |
|---|---|
| State | New |
| Headers | show |
| Series | netdev-linux: Use event-driven netlink notifications. | expand |
| Context | Check | Description |
|---|---|---|
| ovsrobot/apply-robot | success | apply and check: success |
| ovsrobot/github-robot-_Build_and_Test | success | github build: passed |
On Wed, Mar 11, 2026 at 2:06 AM Adrian Moreno via dev < ovs-dev@openvswitch.org> wrote: > Now that netdev-linux flags are also cached, we need to ensure they are > updated correctly. > > Currently, there are subsystems that rely on link state changes via > rtnetlink notifiers (e.g: iface-notifier, route-table). In parallel, > netdev-linux creates its own netlink socket to listen for link > notifications based on which netdev's states are updated. This creates a > potential race: some subsystems might be notified that a link changed > but the internal netdev representation is still old. > > If we want to both cache netdev internal state (e.g: flags, ether_addr) > effectively we need to use the same callback chain, i.e: we need to use > rtnetlink to update netdev-linux netdevs as well when links change. > > The order of callbacks cannot be guaranteed so it could still be possible > to > have another subsystem be notified before netdev-linux has been given a > chance > to update its state. However, subsystems would likely want to aggregate > reconfigurations and will likely have their own "_run()" function to > operate. > > Therefore, it could be reasonalbe to assume the other subsystems would use > the > callback to set a flag that is then used to trigger reconfigurations in > their > own periodic "_run()" functions (this is exactly the case of the two > existing > users: iface-notifier, route-table). > > This patch makes netdev-linux use rtnetlink notifier for interface updates > and > independent nln notifiers for address changes. > > Signed-off-by: Adrian Moreno <amorenoz@redhat.com> > --- > lib/netdev-linux.c | 214 ++++++++++++++++++++--------------------- > lib/netdev-linux.h | 1 + > lib/netlink-notifier.h | 3 +- > lib/rtnetlink.c | 12 ++- > lib/rtnetlink.h | 1 + > 5 files changed, 118 insertions(+), 113 deletions(-) > > diff --git a/lib/netdev-linux.c b/lib/netdev-linux.c > index e6127240b..97be563f7 100644 > --- a/lib/netdev-linux.c > +++ b/lib/netdev-linux.c > @@ -298,6 +298,10 @@ static struct ovs_mutex lag_mutex = > OVS_MUTEX_INITIALIZER; > static struct shash lag_shash OVS_GUARDED_BY(lag_mutex) > = SHASH_INITIALIZER(&lag_shash); > > +static struct rtnetlink_change netlink_change; > +static struct nln *nln = NULL; > +static struct nln_notifier *notifiers[4] = {NULL, NULL, NULL, NULL}; > + > /* Traffic control. */ > > /* An instance of a traffic control class. Always associated with a > particular > @@ -648,40 +652,6 @@ static void netdev_linux_changed(struct netdev_linux > *netdev, > unsigned int ifi_flags, unsigned int > mask) > OVS_REQUIRES(netdev->mutex); > > -/* Returns a NETLINK_ROUTE socket listening for RTNLGRP_LINK, > - * RTNLGRP_IPV4_IFADDR and RTNLGRP_IPV6_IFADDR changes, or NULL > - * if no such socket could be created. */ > -static struct nl_sock * > -netdev_linux_notify_sock(void) > -{ > - static struct ovsthread_once once = OVSTHREAD_ONCE_INITIALIZER; > - static struct nl_sock *sock; > - unsigned int mcgroups[] = {RTNLGRP_LINK, RTNLGRP_IPV4_IFADDR, > - RTNLGRP_IPV6_IFADDR, RTNLGRP_IPV6_IFINFO}; > - > - if (ovsthread_once_start(&once)) { > - int error; > - > - error = nl_sock_create(NETLINK_ROUTE, &sock); > - if (!error) { > - size_t i; > - > - nl_sock_listen_all_nsid(sock, true); > - for (i = 0; i < ARRAY_SIZE(mcgroups); i++) { > - error = nl_sock_join_mcgroup(sock, mcgroups[i]); > - if (error) { > - nl_sock_destroy(sock); > - sock = NULL; > - break; > - } > - } > - } > - ovsthread_once_done(&once); > - } > - > - return sock; > -} > - > static bool > netdev_linux_miimon_enabled(void) > { > @@ -699,7 +669,7 @@ netdev_linux_kind_is_lag(const char *kind) > } > > static void > -netdev_linux_update_lag(struct rtnetlink_change *change) > +netdev_linux_update_lag(const struct rtnetlink_change *change) > OVS_REQUIRES(lag_mutex) > { > struct linux_lag_member *lag; > @@ -763,101 +733,128 @@ netdev_linux_update_lag(struct rtnetlink_change > *change) > } > } > > -void > -netdev_linux_run(const struct netdev_class *netdev_class OVS_UNUSED) > +static void > +netdev_linux_rtnetlink_update(const struct rtnetlink_change *change, > + void *aux OVS_UNUSED) > { > - struct nl_sock *sock; > - int error; > + struct netdev *netdev_ = NULL; > > - if (netdev_linux_miimon_enabled()) { > - netdev_linux_miimon_run(); > - } > + if (change && change->irrelevant) { > + return; > + } else if (!change) { > + struct shash device_shash; > + struct shash_node *node; > > - sock = netdev_linux_notify_sock(); > - if (!sock) { > + shash_init(&device_shash); > + netdev_get_devices(&netdev_linux_class, &device_shash); > + SHASH_FOR_EACH (node, &device_shash) { > + struct netdev_linux *netdev = netdev_linux_cast(netdev_); > + netdev_ = node->data; > + int error; > + > + ovs_mutex_lock(&netdev->mutex); > + error = update_flags_local(netdev); > + netdev_linux_changed(netdev, netdev->ifi_flags, > + error ? 0 : VALID_FLAGS); > + ovs_mutex_unlock(&netdev->mutex); > + > + netdev_close(netdev_); > + } > + shash_destroy(&device_shash); > return; > } > > - do { > - uint64_t buf_stub[4096 / 8]; > - int nsid; > - struct ofpbuf buf; > + if (change->ifname) { > + netdev_ = netdev_from_name(change->ifname); > + } > > - ofpbuf_use_stub(&buf, buf_stub, sizeof buf_stub); > - error = nl_sock_recv(sock, &buf, &nsid, false); > - if (!error) { > - struct rtnetlink_change change; > + if (netdev_ && is_netdev_linux_class(netdev_->netdev_class)) { > + struct netdev_linux *netdev = netdev_linux_cast(netdev_); > > - if (rtnetlink_parse(&buf, &change) && !change.irrelevant) { > - struct netdev *netdev_ = NULL; > - char dev_name[IFNAMSIZ]; > + ovs_mutex_lock(&netdev->mutex); > + netdev_linux_update(netdev, change->nsid, change); > + ovs_mutex_unlock(&netdev->mutex); > + } > > - if (!change.ifname) { > - change.ifname = if_indextoname(change.if_index, > dev_name); > - } > + if (change->ifname && > + rtnetlink_type_is_rtnlgrp_link(change->nlmsg_type)) { > > - if (change.ifname) { > - netdev_ = netdev_from_name(change.ifname); > - } > - if (netdev_ && > is_netdev_linux_class(netdev_->netdev_class)) { > - struct netdev_linux *netdev = > netdev_linux_cast(netdev_); > + /* Need to try updating the LAG information. */ > + ovs_mutex_lock(&lag_mutex); > + netdev_linux_update_lag(change); > + ovs_mutex_unlock(&lag_mutex); > + } > + netdev_close(netdev_); > +} > > - ovs_mutex_lock(&netdev->mutex); > - netdev_linux_update(netdev, nsid, &change); > - ovs_mutex_unlock(&netdev->mutex); > - } > +static void > +netdev_linux_nln_cb(const void *change, void *aux) > +{ > + netdev_linux_rtnetlink_update(change, aux); > +} > > - if (change.ifname && > - rtnetlink_type_is_rtnlgrp_link(change.nlmsg_type)) { > +static void > +netdev_linux_rtnetlink_config(void) > +{ > + static struct ovsthread_once once = OVSTHREAD_ONCE_INITIALIZER; > > - /* Need to try updating the LAG information. */ > - ovs_mutex_lock(&lag_mutex); > - netdev_linux_update_lag(&change); > - ovs_mutex_unlock(&lag_mutex); > - } > - netdev_close(netdev_); > - } > - } else if (error == ENOBUFS) { > - struct shash device_shash; > - struct shash_node *node; > + if (ovsthread_once_start(&once)) { > + unsigned int extra_mcgroups[] = {RTNLGRP_IPV4_IFADDR, > + RTNLGRP_IPV6_IFADDR, RTNLGRP_IPV6_IFINFO}; > + struct nln_notifier *notifier; > + int i; > > - nl_sock_drain(sock); > + ovs_assert(ARRAY_SIZE(notifiers) == ARRAY_SIZE(extra_mcgroups) + > 1); > This should probably be a BUILD_ASSERT_DECL, but why even have the notifiers array? It doesn't seem to be used? > > - shash_init(&device_shash); > - netdev_get_devices(&netdev_linux_class, &device_shash); > - SHASH_FOR_EACH (node, &device_shash) { > - struct netdev *netdev_ = node->data; > - struct netdev_linux *netdev = netdev_linux_cast(netdev_); > - > - ovs_mutex_lock(&netdev->mutex); > - update_flags_local(netdev); > - netdev_linux_changed(netdev, netdev->ifi_flags, > VALID_FLAGS); > - ovs_mutex_unlock(&netdev->mutex); > - > - netdev_close(netdev_); > - } > - shash_destroy(&device_shash); > - } else if (error != EAGAIN) { > - static struct vlog_rate_limit rll = VLOG_RATE_LIMIT_INIT(1, > 5); > - VLOG_WARN_RL(&rll, "error reading or parsing netlink (%s)", > - ovs_strerror(error)); > + nln = nln_create(NETLINK_ROUTE, true, rtnetlink_parse_cb, > + &netlink_change); > netlink_change probably doesn't have to be a global. > + if (!nln) { > + VLOG_ERR("failed to create nln notifier"); > } > - ofpbuf_uninit(&buf); > - } while (!error); > + > + for (i = 0; i < ARRAY_SIZE(extra_mcgroups); i++) { > + notifier = nln_notifier_create(nln, extra_mcgroups[i], > + netdev_linux_nln_cb, NULL); > + if (!notifier) { > + VLOG_ERR("failed to create nln notifier for mcgroup %d", > + extra_mcgroups[i]); > + } > + notifiers[i] = notifier; > + } > + > + notifier = > rtnetlink_notifier_create(netdev_linux_rtnetlink_update, > + NULL); > + if (!notifier) { > + VLOG_ERR("failed to create rtnetlink notifier"); > + } > + notifiers[i] = notifier; > + > + ovsthread_once_done(&once); > + } > +} > + > +void > +netdev_linux_run(const struct netdev_class *netdev_class OVS_UNUSED) > +{ > + rtnetlink_run(); > + if (nln) { > + nln_run(nln); > + } > + if (netdev_linux_miimon_enabled()) { > + netdev_linux_miimon_run(); > This patch changes the order of miimon_run to after netlink socket operations. Was that intentional? > + } > } > > static void > netdev_linux_wait(const struct netdev_class *netdev_class OVS_UNUSED) > { > - struct nl_sock *sock; > - > + rtnetlink_wait(); > + if (nln) { > + nln_wait(nln); > + } > if (netdev_linux_miimon_enabled()) { > netdev_linux_miimon_wait(); > } > - sock = netdev_linux_notify_sock(); > - if (sock) { > - nl_sock_wait(sock, POLLIN); > - } > } > > static void > @@ -900,9 +897,6 @@ netdev_linux_update__(struct netdev_linux *dev, > dev->etheraddr = change->mac; > dev->cache_valid |= VALID_ETHERADDR; > dev->ether_addr_error = 0; > - > - /* The mac addr has been changed, report it now. */ > - rtnetlink_report_link(); > } > > if (change->primary && > netdev_linux_kind_is_lag(change->primary)) { > @@ -968,6 +962,8 @@ netdev_linux_common_construct(struct netdev *netdev_) > return ENAMETOOLONG; > } > > + netdev_linux_rtnetlink_config(); > + > /* The device could be in the same network namespace or in another > one. */ > netnsid_unset(&netdev->netnsid); > ovs_mutex_init(&netdev->mutex); > diff --git a/lib/netdev-linux.h b/lib/netdev-linux.h > index ec19b0ded..81a5634f6 100644 > --- a/lib/netdev-linux.h > +++ b/lib/netdev-linux.h > @@ -25,6 +25,7 @@ > * Linux-specific code. */ > > struct netdev; > +struct rtnetlink_change; > > int netdev_linux_ethtool_set_flag(struct netdev *netdev, uint32_t flag, > const char *flag_name, bool enable); > diff --git a/lib/netlink-notifier.h b/lib/netlink-notifier.h > index 8b6c32048..d52e5f56c 100644 > --- a/lib/netlink-notifier.h > +++ b/lib/netlink-notifier.h > @@ -1,5 +1,4 @@ > -/* > - * Copyright (c) 2009 Nicira, Inc. > +/* Copyright (c) 2009 Nicira, Inc. > * > * Licensed under the Apache License, Version 2.0 (the "License"); > * you may not use this file except in compliance with the License. > diff --git a/lib/rtnetlink.c b/lib/rtnetlink.c > index 6b63afa06..4ae050ff0 100644 > --- a/lib/rtnetlink.c > +++ b/lib/rtnetlink.c > @@ -191,7 +191,7 @@ rtnetlink_parse(struct ofpbuf *buf, struct > rtnetlink_change *change) > } > > /* Return RTNLGRP_LINK on success, 0 on parse error. */ > -static int > +int > rtnetlink_parse_cb(struct ofpbuf *buf, int nsid, void *change) > { > bool ret = rtnetlink_parse(buf, change); > @@ -212,12 +212,20 @@ rtnetlink_parse_cb(struct ofpbuf *buf, int nsid, > void *change) > * > * xxx Joins more multicast groups when needed. > * > + * Callbacks might find that netdev-linux netdevs still hold outdated > cached > + * information. If the notification has to trigger some kind of > reconfiguration > + * that requires up-to-date netdev cache, it should do it asynchronously, > for > + * instance by setting a flag in the callback and acting on it during the > + * normal "*_run()" operation. > + * > + * Notifications might come from any network namespace. > + * > * Returns an initialized nln_notifier if successful, NULL otherwise. */ > struct nln_notifier * > rtnetlink_notifier_create(rtnetlink_notify_func *cb, void *aux) > { > if (!nln) { > - nln = nln_create(NETLINK_ROUTE, false, rtnetlink_parse_cb, > + nln = nln_create(NETLINK_ROUTE, true, rtnetlink_parse_cb, > &rtn_change); > } > > diff --git a/lib/rtnetlink.h b/lib/rtnetlink.h > index 9b2397b4e..f0b49bf1a 100644 > --- a/lib/rtnetlink.h > +++ b/lib/rtnetlink.h > @@ -76,4 +76,5 @@ void rtnetlink_notifier_destroy(struct nln_notifier *); > void rtnetlink_run(void); > void rtnetlink_wait(void); > void rtnetlink_report_link(void); > +int rtnetlink_parse_cb(struct ofpbuf *buf, int nsid, void *change); > #endif /* rtnetlink.h */ > -- > 2.53.0 > > _______________________________________________ > dev mailing list > dev@openvswitch.org > https://mail.openvswitch.org/mailman/listinfo/ovs-dev > >
diff --git a/lib/netdev-linux.c b/lib/netdev-linux.c index e6127240b..97be563f7 100644 --- a/lib/netdev-linux.c +++ b/lib/netdev-linux.c @@ -298,6 +298,10 @@ static struct ovs_mutex lag_mutex = OVS_MUTEX_INITIALIZER; static struct shash lag_shash OVS_GUARDED_BY(lag_mutex) = SHASH_INITIALIZER(&lag_shash); +static struct rtnetlink_change netlink_change; +static struct nln *nln = NULL; +static struct nln_notifier *notifiers[4] = {NULL, NULL, NULL, NULL}; + /* Traffic control. */ /* An instance of a traffic control class. Always associated with a particular @@ -648,40 +652,6 @@ static void netdev_linux_changed(struct netdev_linux *netdev, unsigned int ifi_flags, unsigned int mask) OVS_REQUIRES(netdev->mutex); -/* Returns a NETLINK_ROUTE socket listening for RTNLGRP_LINK, - * RTNLGRP_IPV4_IFADDR and RTNLGRP_IPV6_IFADDR changes, or NULL - * if no such socket could be created. */ -static struct nl_sock * -netdev_linux_notify_sock(void) -{ - static struct ovsthread_once once = OVSTHREAD_ONCE_INITIALIZER; - static struct nl_sock *sock; - unsigned int mcgroups[] = {RTNLGRP_LINK, RTNLGRP_IPV4_IFADDR, - RTNLGRP_IPV6_IFADDR, RTNLGRP_IPV6_IFINFO}; - - if (ovsthread_once_start(&once)) { - int error; - - error = nl_sock_create(NETLINK_ROUTE, &sock); - if (!error) { - size_t i; - - nl_sock_listen_all_nsid(sock, true); - for (i = 0; i < ARRAY_SIZE(mcgroups); i++) { - error = nl_sock_join_mcgroup(sock, mcgroups[i]); - if (error) { - nl_sock_destroy(sock); - sock = NULL; - break; - } - } - } - ovsthread_once_done(&once); - } - - return sock; -} - static bool netdev_linux_miimon_enabled(void) { @@ -699,7 +669,7 @@ netdev_linux_kind_is_lag(const char *kind) } static void -netdev_linux_update_lag(struct rtnetlink_change *change) +netdev_linux_update_lag(const struct rtnetlink_change *change) OVS_REQUIRES(lag_mutex) { struct linux_lag_member *lag; @@ -763,101 +733,128 @@ netdev_linux_update_lag(struct rtnetlink_change *change) } } -void -netdev_linux_run(const struct netdev_class *netdev_class OVS_UNUSED) +static void +netdev_linux_rtnetlink_update(const struct rtnetlink_change *change, + void *aux OVS_UNUSED) { - struct nl_sock *sock; - int error; + struct netdev *netdev_ = NULL; - if (netdev_linux_miimon_enabled()) { - netdev_linux_miimon_run(); - } + if (change && change->irrelevant) { + return; + } else if (!change) { + struct shash device_shash; + struct shash_node *node; - sock = netdev_linux_notify_sock(); - if (!sock) { + shash_init(&device_shash); + netdev_get_devices(&netdev_linux_class, &device_shash); + SHASH_FOR_EACH (node, &device_shash) { + struct netdev_linux *netdev = netdev_linux_cast(netdev_); + netdev_ = node->data; + int error; + + ovs_mutex_lock(&netdev->mutex); + error = update_flags_local(netdev); + netdev_linux_changed(netdev, netdev->ifi_flags, + error ? 0 : VALID_FLAGS); + ovs_mutex_unlock(&netdev->mutex); + + netdev_close(netdev_); + } + shash_destroy(&device_shash); return; } - do { - uint64_t buf_stub[4096 / 8]; - int nsid; - struct ofpbuf buf; + if (change->ifname) { + netdev_ = netdev_from_name(change->ifname); + } - ofpbuf_use_stub(&buf, buf_stub, sizeof buf_stub); - error = nl_sock_recv(sock, &buf, &nsid, false); - if (!error) { - struct rtnetlink_change change; + if (netdev_ && is_netdev_linux_class(netdev_->netdev_class)) { + struct netdev_linux *netdev = netdev_linux_cast(netdev_); - if (rtnetlink_parse(&buf, &change) && !change.irrelevant) { - struct netdev *netdev_ = NULL; - char dev_name[IFNAMSIZ]; + ovs_mutex_lock(&netdev->mutex); + netdev_linux_update(netdev, change->nsid, change); + ovs_mutex_unlock(&netdev->mutex); + } - if (!change.ifname) { - change.ifname = if_indextoname(change.if_index, dev_name); - } + if (change->ifname && + rtnetlink_type_is_rtnlgrp_link(change->nlmsg_type)) { - if (change.ifname) { - netdev_ = netdev_from_name(change.ifname); - } - if (netdev_ && is_netdev_linux_class(netdev_->netdev_class)) { - struct netdev_linux *netdev = netdev_linux_cast(netdev_); + /* Need to try updating the LAG information. */ + ovs_mutex_lock(&lag_mutex); + netdev_linux_update_lag(change); + ovs_mutex_unlock(&lag_mutex); + } + netdev_close(netdev_); +} - ovs_mutex_lock(&netdev->mutex); - netdev_linux_update(netdev, nsid, &change); - ovs_mutex_unlock(&netdev->mutex); - } +static void +netdev_linux_nln_cb(const void *change, void *aux) +{ + netdev_linux_rtnetlink_update(change, aux); +} - if (change.ifname && - rtnetlink_type_is_rtnlgrp_link(change.nlmsg_type)) { +static void +netdev_linux_rtnetlink_config(void) +{ + static struct ovsthread_once once = OVSTHREAD_ONCE_INITIALIZER; - /* Need to try updating the LAG information. */ - ovs_mutex_lock(&lag_mutex); - netdev_linux_update_lag(&change); - ovs_mutex_unlock(&lag_mutex); - } - netdev_close(netdev_); - } - } else if (error == ENOBUFS) { - struct shash device_shash; - struct shash_node *node; + if (ovsthread_once_start(&once)) { + unsigned int extra_mcgroups[] = {RTNLGRP_IPV4_IFADDR, + RTNLGRP_IPV6_IFADDR, RTNLGRP_IPV6_IFINFO}; + struct nln_notifier *notifier; + int i; - nl_sock_drain(sock); + ovs_assert(ARRAY_SIZE(notifiers) == ARRAY_SIZE(extra_mcgroups) + 1); - shash_init(&device_shash); - netdev_get_devices(&netdev_linux_class, &device_shash); - SHASH_FOR_EACH (node, &device_shash) { - struct netdev *netdev_ = node->data; - struct netdev_linux *netdev = netdev_linux_cast(netdev_); - - ovs_mutex_lock(&netdev->mutex); - update_flags_local(netdev); - netdev_linux_changed(netdev, netdev->ifi_flags, VALID_FLAGS); - ovs_mutex_unlock(&netdev->mutex); - - netdev_close(netdev_); - } - shash_destroy(&device_shash); - } else if (error != EAGAIN) { - static struct vlog_rate_limit rll = VLOG_RATE_LIMIT_INIT(1, 5); - VLOG_WARN_RL(&rll, "error reading or parsing netlink (%s)", - ovs_strerror(error)); + nln = nln_create(NETLINK_ROUTE, true, rtnetlink_parse_cb, + &netlink_change); + if (!nln) { + VLOG_ERR("failed to create nln notifier"); } - ofpbuf_uninit(&buf); - } while (!error); + + for (i = 0; i < ARRAY_SIZE(extra_mcgroups); i++) { + notifier = nln_notifier_create(nln, extra_mcgroups[i], + netdev_linux_nln_cb, NULL); + if (!notifier) { + VLOG_ERR("failed to create nln notifier for mcgroup %d", + extra_mcgroups[i]); + } + notifiers[i] = notifier; + } + + notifier = rtnetlink_notifier_create(netdev_linux_rtnetlink_update, + NULL); + if (!notifier) { + VLOG_ERR("failed to create rtnetlink notifier"); + } + notifiers[i] = notifier; + + ovsthread_once_done(&once); + } +} + +void +netdev_linux_run(const struct netdev_class *netdev_class OVS_UNUSED) +{ + rtnetlink_run(); + if (nln) { + nln_run(nln); + } + if (netdev_linux_miimon_enabled()) { + netdev_linux_miimon_run(); + } } static void netdev_linux_wait(const struct netdev_class *netdev_class OVS_UNUSED) { - struct nl_sock *sock; - + rtnetlink_wait(); + if (nln) { + nln_wait(nln); + } if (netdev_linux_miimon_enabled()) { netdev_linux_miimon_wait(); } - sock = netdev_linux_notify_sock(); - if (sock) { - nl_sock_wait(sock, POLLIN); - } } static void @@ -900,9 +897,6 @@ netdev_linux_update__(struct netdev_linux *dev, dev->etheraddr = change->mac; dev->cache_valid |= VALID_ETHERADDR; dev->ether_addr_error = 0; - - /* The mac addr has been changed, report it now. */ - rtnetlink_report_link(); } if (change->primary && netdev_linux_kind_is_lag(change->primary)) { @@ -968,6 +962,8 @@ netdev_linux_common_construct(struct netdev *netdev_) return ENAMETOOLONG; } + netdev_linux_rtnetlink_config(); + /* The device could be in the same network namespace or in another one. */ netnsid_unset(&netdev->netnsid); ovs_mutex_init(&netdev->mutex); diff --git a/lib/netdev-linux.h b/lib/netdev-linux.h index ec19b0ded..81a5634f6 100644 --- a/lib/netdev-linux.h +++ b/lib/netdev-linux.h @@ -25,6 +25,7 @@ * Linux-specific code. */ struct netdev; +struct rtnetlink_change; int netdev_linux_ethtool_set_flag(struct netdev *netdev, uint32_t flag, const char *flag_name, bool enable); diff --git a/lib/netlink-notifier.h b/lib/netlink-notifier.h index 8b6c32048..d52e5f56c 100644 --- a/lib/netlink-notifier.h +++ b/lib/netlink-notifier.h @@ -1,5 +1,4 @@ -/* - * Copyright (c) 2009 Nicira, Inc. +/* Copyright (c) 2009 Nicira, Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/lib/rtnetlink.c b/lib/rtnetlink.c index 6b63afa06..4ae050ff0 100644 --- a/lib/rtnetlink.c +++ b/lib/rtnetlink.c @@ -191,7 +191,7 @@ rtnetlink_parse(struct ofpbuf *buf, struct rtnetlink_change *change) } /* Return RTNLGRP_LINK on success, 0 on parse error. */ -static int +int rtnetlink_parse_cb(struct ofpbuf *buf, int nsid, void *change) { bool ret = rtnetlink_parse(buf, change); @@ -212,12 +212,20 @@ rtnetlink_parse_cb(struct ofpbuf *buf, int nsid, void *change) * * xxx Joins more multicast groups when needed. * + * Callbacks might find that netdev-linux netdevs still hold outdated cached + * information. If the notification has to trigger some kind of reconfiguration + * that requires up-to-date netdev cache, it should do it asynchronously, for + * instance by setting a flag in the callback and acting on it during the + * normal "*_run()" operation. + * + * Notifications might come from any network namespace. + * * Returns an initialized nln_notifier if successful, NULL otherwise. */ struct nln_notifier * rtnetlink_notifier_create(rtnetlink_notify_func *cb, void *aux) { if (!nln) { - nln = nln_create(NETLINK_ROUTE, false, rtnetlink_parse_cb, + nln = nln_create(NETLINK_ROUTE, true, rtnetlink_parse_cb, &rtn_change); } diff --git a/lib/rtnetlink.h b/lib/rtnetlink.h index 9b2397b4e..f0b49bf1a 100644 --- a/lib/rtnetlink.h +++ b/lib/rtnetlink.h @@ -76,4 +76,5 @@ void rtnetlink_notifier_destroy(struct nln_notifier *); void rtnetlink_run(void); void rtnetlink_wait(void); void rtnetlink_report_link(void); +int rtnetlink_parse_cb(struct ofpbuf *buf, int nsid, void *change); #endif /* rtnetlink.h */
Now that netdev-linux flags are also cached, we need to ensure they are updated correctly. Currently, there are subsystems that rely on link state changes via rtnetlink notifiers (e.g: iface-notifier, route-table). In parallel, netdev-linux creates its own netlink socket to listen for link notifications based on which netdev's states are updated. This creates a potential race: some subsystems might be notified that a link changed but the internal netdev representation is still old. If we want to both cache netdev internal state (e.g: flags, ether_addr) effectively we need to use the same callback chain, i.e: we need to use rtnetlink to update netdev-linux netdevs as well when links change. The order of callbacks cannot be guaranteed so it could still be possible to have another subsystem be notified before netdev-linux has been given a chance to update its state. However, subsystems would likely want to aggregate reconfigurations and will likely have their own "_run()" function to operate. Therefore, it could be reasonalbe to assume the other subsystems would use the callback to set a flag that is then used to trigger reconfigurations in their own periodic "_run()" functions (this is exactly the case of the two existing users: iface-notifier, route-table). This patch makes netdev-linux use rtnetlink notifier for interface updates and independent nln notifiers for address changes. Signed-off-by: Adrian Moreno <amorenoz@redhat.com> --- lib/netdev-linux.c | 214 ++++++++++++++++++++--------------------- lib/netdev-linux.h | 1 + lib/netlink-notifier.h | 3 +- lib/rtnetlink.c | 12 ++- lib/rtnetlink.h | 1 + 5 files changed, 118 insertions(+), 113 deletions(-)