diff mbox

[net-next,v3,10/12] ipv4: fib: Add an API to request a FIB dump

Message ID 1480500546-2544-11-git-send-email-jiri@resnulli.us
State Changes Requested, archived
Delegated to: David Miller
Headers show

Commit Message

Jiri Pirko Nov. 30, 2016, 10:09 a.m. UTC
From: Ido Schimmel <idosch@mellanox.com>

Commit b90eb7549499 ("fib: introduce FIB notification infrastructure")
introduced a new notification chain to notify listeners (f.e., switchdev
drivers) about addition and deletion of routes.

However, upon registration to the chain the FIB tables can already be
populated, which means potential listeners will have an incomplete view
of the tables.

Solve that by adding an API to request a FIB dump. The dump itself it
done using RCU in order not to starve consumers that need RTNL to make
progress.

The integrity of the dump is ensured by reading the FIB change sequence
counter before and after the dump. This allows us to avoid the
problematic situation in which the dumping process sends a ENTRY_ADD
notification following ENTRY_DEL generated by another process holding
RTNL.

Signed-off-by: Ido Schimmel <idosch@mellanox.com>
Signed-off-by: Jiri Pirko <jiri@mellanox.com>
---
 include/net/ip_fib.h |   4 ++
 net/ipv4/fib_trie.c  | 138 +++++++++++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 142 insertions(+)
diff mbox

Patch

diff --git a/include/net/ip_fib.h b/include/net/ip_fib.h
index 6c67b93..1388bfc 100644
--- a/include/net/ip_fib.h
+++ b/include/net/ip_fib.h
@@ -221,6 +221,10 @@  enum fib_event_type {
 	FIB_EVENT_RULE_DEL,
 };
 
+typedef void fib_dump_cb_t(struct notifier_block *nb);
+
+bool fib_notifier_dump(struct notifier_block *nb, struct net *net,
+		       fib_dump_cb_t *cb);
 int register_fib_notifier(struct notifier_block *nb);
 int unregister_fib_notifier(struct notifier_block *nb);
 int call_fib_notifiers(struct net *net, enum fib_event_type event_type,
diff --git a/net/ipv4/fib_trie.c b/net/ipv4/fib_trie.c
index 2891356..d944308 100644
--- a/net/ipv4/fib_trie.c
+++ b/net/ipv4/fib_trie.c
@@ -84,8 +84,90 @@ 
 #include <trace/events/fib.h>
 #include "fib_lookup.h"
 
+static unsigned int net_fib_seq(const struct net *net)
+{
+	unsigned int fib_seq;
+
+	rtnl_lock();
+	fib_seq = net->ipv4.fib_seq;
+	rtnl_unlock();
+
+	return fib_seq;
+}
+
 static ATOMIC_NOTIFIER_HEAD(fib_chain);
 
+static int call_fib_notifier(struct notifier_block *nb, struct net *net,
+			     enum fib_event_type event_type,
+			     struct fib_notifier_info *info)
+{
+	info->net = net;
+	return nb->notifier_call(nb, event_type, info);
+}
+
+static void fib_rules_notify(struct net *net, struct notifier_block *nb,
+			     enum fib_event_type event_type)
+{
+#ifdef CONFIG_IP_MULTIPLE_TABLES
+	struct fib_notifier_info info;
+
+	if (net->ipv4.fib_has_custom_rules)
+		call_fib_notifier(nb, net, event_type, &info);
+#endif
+}
+
+static void fib_notify(struct net *net, struct notifier_block *nb,
+		       enum fib_event_type event_type);
+
+static int call_fib_entry_notifier(struct notifier_block *nb, struct net *net,
+				   enum fib_event_type event_type, u32 dst,
+				   int dst_len, struct fib_info *fi,
+				   u8 tos, u8 type, u32 tb_id, u32 nlflags)
+{
+	struct fib_entry_notifier_info info = {
+		.dst = dst,
+		.dst_len = dst_len,
+		.fi = fi,
+		.tos = tos,
+		.type = type,
+		.tb_id = tb_id,
+		.nlflags = nlflags,
+	};
+	return call_fib_notifier(nb, net, event_type, &info.info);
+}
+
+static bool fib_dump_is_consistent(struct notifier_block *nb,
+				   const struct net *net, fib_dump_cb_t *cb,
+				   unsigned int fib_seq)
+{
+	if (fib_seq == net_fib_seq(net))
+		return true;
+	if (cb)
+		cb(nb);
+	return false;
+}
+
+bool fib_notifier_dump(struct notifier_block *nb, struct net *net,
+		       fib_dump_cb_t *cb)
+{
+	int retries = 0;
+
+	do {
+		unsigned int fib_seq = net_fib_seq(net);
+
+		rcu_read_lock();
+		fib_rules_notify(net, nb, FIB_EVENT_RULE_ADD);
+		fib_notify(net, nb, FIB_EVENT_ENTRY_ADD);
+		rcu_read_unlock();
+
+		if (fib_dump_is_consistent(nb, net, cb, fib_seq))
+			return true;
+	} while (++retries < net->ipv4.sysctl_fib_dump_max_retries);
+
+	return false;
+}
+EXPORT_SYMBOL(fib_notifier_dump);
+
 int register_fib_notifier(struct notifier_block *nb)
 {
 	return atomic_notifier_chain_register(&fib_chain, nb);
@@ -1902,6 +1984,62 @@  int fib_table_flush(struct net *net, struct fib_table *tb)
 	return found;
 }
 
+static void fib_leaf_notify(struct net *net, struct key_vector *l,
+			    struct fib_table *tb, struct notifier_block *nb,
+			    enum fib_event_type event_type)
+{
+	struct fib_alias *fa;
+
+	hlist_for_each_entry_rcu(fa, &l->leaf, fa_list) {
+		struct fib_info *fi = fa->fa_info;
+
+		if (!fi)
+			continue;
+
+		/* local and main table can share the same trie,
+		 * so don't notify twice for the same entry.
+		 */
+		if (tb->tb_id != fa->tb_id)
+			continue;
+
+		call_fib_entry_notifier(nb, net, event_type, l->key,
+					KEYLENGTH - fa->fa_slen, fi, fa->fa_tos,
+					fa->fa_type, fa->tb_id, 0);
+	}
+}
+
+static void fib_table_notify(struct net *net, struct fib_table *tb,
+			     struct notifier_block *nb,
+			     enum fib_event_type event_type)
+{
+	struct trie *t = (struct trie *)tb->tb_data;
+	struct key_vector *l, *tp = t->kv;
+	t_key key = 0;
+
+	while ((l = leaf_walk_rcu(&tp, key)) != NULL) {
+		fib_leaf_notify(net, l, tb, nb, event_type);
+
+		key = l->key + 1;
+		/* stop in case of wrap around */
+		if (key < l->key)
+			break;
+	}
+}
+
+static void fib_notify(struct net *net, struct notifier_block *nb,
+		       enum fib_event_type event_type)
+{
+	unsigned int h;
+
+	for (h = 0; h < FIB_TABLE_HASHSZ; h++) {
+		struct hlist_head *head = &net->ipv4.fib_table_hash[h];
+		struct fib_table *tb;
+
+		hlist_for_each_entry_rcu(tb, head, tb_hlist)
+			fib_table_notify(net, tb, nb, event_type);
+	}
+}
+
 static void __trie_free_rcu(struct rcu_head *head)
 {
 	struct fib_table *tb = container_of(head, struct fib_table, rcu);