diff mbox series

[net-next,v3,01/16] net: sched: protect block state with mutex

Message ID 20190204123301.4223-2-vladbu@mellanox.com
State Changes Requested
Delegated to: David Miller
Headers show
Series Refactor classifier API to work with chain/classifiers without rtnl lock | expand

Commit Message

Vlad Buslov Feb. 4, 2019, 12:32 p.m. UTC
Currently, tcf_block doesn't use any synchronization mechanisms to protect
critical sections that manage lifetime of its chains. block->chain_list and
multiple variables in tcf_chain that control its lifetime assume external
synchronization provided by global rtnl lock. Converting chain reference
counting to atomic reference counters is not possible because cls API uses
multiple counters and flags to control chain lifetime, so all of them must
be synchronized in chain get/put code.

Use single per-block lock to protect block data and manage lifetime of all
chains on the block. Always take block->lock when accessing chain_list.
Chain get and put modify chain lifetime-management data and parent block's
chain_list, so take the lock in these functions. Verify block->lock state
with assertions in functions that expect to be called with the lock taken
and are called from multiple places. Take block->lock when accessing
filter_chain_list.

In order to allow parallel update of rules on single block, move all calls
to classifiers outside of critical sections protected by new block->lock.
Rearrange chain get and put functions code to only access protected chain
data while holding block lock:
- Check if chain was explicitly created inside put function while holding
  block lock. Add additional argument to __tcf_chain_put() to only put
  explicitly created chain.
- Rearrange code to only access chain reference counter and chain action
  reference counter while holding block lock.
- Extract code that requires block->lock from tcf_chain_destroy() into
  standalone tcf_chain_destroy() function that is called by
  __tcf_chain_put() in same critical section that changes chain reference
  counters.

Signed-off-by: Vlad Buslov <vladbu@mellanox.com>
---

Changes from V2 to V3:
  - Change block->lock type to mutex.
  - Implement tcf_block_destroy() helper function that destroys
    block->lock mutex before deallocating the block.
  - Revert GFP_KERNEL->GFP_ATOMIC memory allocation flags of tcf_chain
    which is no longer needed after block->lock type change.

 include/net/sch_generic.h |   5 +++
 net/sched/cls_api.c       | 102 ++++++++++++++++++++++++++++++++++++++--------
 2 files changed, 89 insertions(+), 18 deletions(-)

Comments

Jiri Pirko Feb. 4, 2019, 3:23 p.m. UTC | #1
Mon, Feb 04, 2019 at 01:32:46PM CET, vladbu@mellanox.com wrote:
>Currently, tcf_block doesn't use any synchronization mechanisms to protect
>critical sections that manage lifetime of its chains. block->chain_list and
>multiple variables in tcf_chain that control its lifetime assume external
>synchronization provided by global rtnl lock. Converting chain reference
>counting to atomic reference counters is not possible because cls API uses
>multiple counters and flags to control chain lifetime, so all of them must
>be synchronized in chain get/put code.
>
>Use single per-block lock to protect block data and manage lifetime of all
>chains on the block. Always take block->lock when accessing chain_list.
>Chain get and put modify chain lifetime-management data and parent block's
>chain_list, so take the lock in these functions. Verify block->lock state
>with assertions in functions that expect to be called with the lock taken
>and are called from multiple places. Take block->lock when accessing
>filter_chain_list.
>
>In order to allow parallel update of rules on single block, move all calls
>to classifiers outside of critical sections protected by new block->lock.
>Rearrange chain get and put functions code to only access protected chain
>data while holding block lock:
>- Check if chain was explicitly created inside put function while holding
>  block lock. Add additional argument to __tcf_chain_put() to only put
>  explicitly created chain.
>- Rearrange code to only access chain reference counter and chain action
>  reference counter while holding block lock.
>- Extract code that requires block->lock from tcf_chain_destroy() into
>  standalone tcf_chain_destroy() function that is called by
>  __tcf_chain_put() in same critical section that changes chain reference
>  counters.
>
>Signed-off-by: Vlad Buslov <vladbu@mellanox.com>
>---
>
>Changes from V2 to V3:
>  - Change block->lock type to mutex.
>  - Implement tcf_block_destroy() helper function that destroys
>    block->lock mutex before deallocating the block.
>  - Revert GFP_KERNEL->GFP_ATOMIC memory allocation flags of tcf_chain
>    which is no longer needed after block->lock type change.
>
> include/net/sch_generic.h |   5 +++
> net/sched/cls_api.c       | 102 ++++++++++++++++++++++++++++++++++++++--------
> 2 files changed, 89 insertions(+), 18 deletions(-)
>
>diff --git a/include/net/sch_generic.h b/include/net/sch_generic.h
>index 7a4957599874..31b8ea66a47d 100644
>--- a/include/net/sch_generic.h
>+++ b/include/net/sch_generic.h
>@@ -12,6 +12,7 @@
> #include <linux/list.h>
> #include <linux/refcount.h>
> #include <linux/workqueue.h>
>+#include <linux/mutex.h>
> #include <net/gen_stats.h>
> #include <net/rtnetlink.h>
> 
>@@ -352,6 +353,10 @@ struct tcf_chain {
> };
> 
> struct tcf_block {
>+	/* Lock protects tcf_block and lifetime-management data of chains
>+	 * attached to the block (refcnt, action_refcnt, explicitly_created).
>+	 */
>+	struct mutex lock;
> 	struct list_head chain_list;
> 	u32 index; /* block index for shared blocks */
> 	refcount_t refcnt;
>diff --git a/net/sched/cls_api.c b/net/sched/cls_api.c
>index e2b5cb2eb34e..cc416b6a3aa2 100644
>--- a/net/sched/cls_api.c
>+++ b/net/sched/cls_api.c
>@@ -193,6 +193,9 @@ static void tcf_proto_destroy(struct tcf_proto *tp,
> 	kfree_rcu(tp, rcu);
> }
> 
>+#define ASSERT_BLOCK_LOCKED(block)					\
>+	lockdep_assert_held(&(block)->lock)
>+
> struct tcf_filter_chain_list_item {
> 	struct list_head list;
> 	tcf_chain_head_change_t *chain_head_change;
>@@ -204,6 +207,8 @@ static struct tcf_chain *tcf_chain_create(struct tcf_block *block,
> {
> 	struct tcf_chain *chain;
> 
>+	ASSERT_BLOCK_LOCKED(block);
>+
> 	chain = kzalloc(sizeof(*chain), GFP_KERNEL);
> 	if (!chain)
> 		return NULL;
>@@ -235,25 +240,51 @@ static void tcf_chain0_head_change(struct tcf_chain *chain,
> 		tcf_chain_head_change_item(item, tp_head);
> }
> 
>-static void tcf_chain_destroy(struct tcf_chain *chain)
>+/* Returns true if block can be safely freed. */
>+
>+static bool tcf_chain_detach(struct tcf_chain *chain)
> {
> 	struct tcf_block *block = chain->block;
> 
>+	ASSERT_BLOCK_LOCKED(block);
>+
> 	list_del(&chain->list);
> 	if (!chain->index)
> 		block->chain0.chain = NULL;
>+
>+	if (list_empty(&block->chain_list) &&
>+	    refcount_read(&block->refcnt) == 0)
>+		return true;
>+
>+	return false;
>+}
>+
>+static void tcf_block_destroy(struct tcf_block *block)
>+{
>+	mutex_destroy(&block->lock);
>+	kfree_rcu(block, rcu);
>+}
>+
>+static void tcf_chain_destroy(struct tcf_chain *chain, bool free_block)
>+{
>+	struct tcf_block *block = chain->block;
>+
> 	kfree(chain);
>-	if (list_empty(&block->chain_list) && !refcount_read(&block->refcnt))
>-		kfree_rcu(block, rcu);
>+	if (free_block)
>+		tcf_block_destroy(block);
> }
> 
> static void tcf_chain_hold(struct tcf_chain *chain)
> {
>+	ASSERT_BLOCK_LOCKED(chain->block);
>+
> 	++chain->refcnt;
> }
> 
> static bool tcf_chain_held_by_acts_only(struct tcf_chain *chain)
> {
>+	ASSERT_BLOCK_LOCKED(chain->block);
>+
> 	/* In case all the references are action references, this
> 	 * chain should not be shown to the user.
> 	 */
>@@ -265,6 +296,8 @@ static struct tcf_chain *tcf_chain_lookup(struct tcf_block *block,
> {
> 	struct tcf_chain *chain;
> 
>+	ASSERT_BLOCK_LOCKED(block);
>+
> 	list_for_each_entry(chain, &block->chain_list, list) {
> 		if (chain->index == chain_index)
> 			return chain;
>@@ -279,31 +312,40 @@ static struct tcf_chain *__tcf_chain_get(struct tcf_block *block,
> 					 u32 chain_index, bool create,
> 					 bool by_act)
> {
>-	struct tcf_chain *chain = tcf_chain_lookup(block, chain_index);
>+	struct tcf_chain *chain = NULL;
>+	bool is_first_reference;
> 
>+	mutex_lock(&block->lock);
>+	chain = tcf_chain_lookup(block, chain_index);
> 	if (chain) {
> 		tcf_chain_hold(chain);
> 	} else {
> 		if (!create)
>-			return NULL;
>+			goto errout;
> 		chain = tcf_chain_create(block, chain_index);
> 		if (!chain)
>-			return NULL;
>+			goto errout;
> 	}
> 
> 	if (by_act)
> 		++chain->action_refcnt;
>+	is_first_reference = chain->refcnt - chain->action_refcnt == 1;
>+	mutex_unlock(&block->lock);
> 
> 	/* Send notification only in case we got the first
> 	 * non-action reference. Until then, the chain acts only as
> 	 * a placeholder for actions pointing to it and user ought
> 	 * not know about them.
> 	 */
>-	if (chain->refcnt - chain->action_refcnt == 1 && !by_act)
>+	if (is_first_reference && !by_act)
> 		tc_chain_notify(chain, NULL, 0, NLM_F_CREATE | NLM_F_EXCL,
> 				RTM_NEWCHAIN, false);
> 
> 	return chain;
>+
>+errout:
>+	mutex_unlock(&block->lock);
>+	return chain;
> }
> 
> static struct tcf_chain *tcf_chain_get(struct tcf_block *block, u32 chain_index,
>@@ -320,37 +362,59 @@ EXPORT_SYMBOL(tcf_chain_get_by_act);
> 
> static void tc_chain_tmplt_del(struct tcf_chain *chain);
> 
>-static void __tcf_chain_put(struct tcf_chain *chain, bool by_act)
>+static void __tcf_chain_put(struct tcf_chain *chain, bool by_act,
>+			    bool explicitly_created)
> {
>+	struct tcf_block *block = chain->block;
>+	bool is_last, free_block = false;
>+	unsigned int refcnt;
>+
>+	mutex_lock(&block->lock);
>+	if (explicitly_created) {
>+		if (!chain->explicitly_created) {
>+			mutex_unlock(&block->lock);
>+			return;
>+		}
>+		chain->explicitly_created = false;

Hmm, I think that you left "chain->explicitly_created = false" at the
original location (tc_ctl_chain()). I think it would be better to do
the chain->explicitly_created management move in a separate patch.


>+	}
>+
> 	if (by_act)
> 		chain->action_refcnt--;
>-	chain->refcnt--;
>+
>+	/* tc_chain_notify_delete can't be called while holding block lock.
>+	 * However, when block is unlocked chain can be changed concurrently, so
>+	 * save these to temporary variables.
>+	 */
>+	refcnt = --chain->refcnt;
>+	is_last = refcnt - chain->action_refcnt == 0;
>+	if (refcnt == 0)
>+		free_block = tcf_chain_detach(chain);
>+	mutex_unlock(&block->lock);
> 
> 	/* The last dropped non-action reference will trigger notification. */
>-	if (chain->refcnt - chain->action_refcnt == 0 && !by_act)
>+	if (is_last && !by_act)
> 		tc_chain_notify(chain, NULL, 0, 0, RTM_DELCHAIN, false);
> 
>-	if (chain->refcnt == 0) {
>+	if (refcnt == 0) {
> 		tc_chain_tmplt_del(chain);
>-		tcf_chain_destroy(chain);
>+		tcf_chain_destroy(chain, free_block);
> 	}
> }
> 
> static void tcf_chain_put(struct tcf_chain *chain)
> {
>-	__tcf_chain_put(chain, false);
>+	__tcf_chain_put(chain, false, false);
> }
> 
> void tcf_chain_put_by_act(struct tcf_chain *chain)
> {
>-	__tcf_chain_put(chain, true);
>+	__tcf_chain_put(chain, true, false);
> }
> EXPORT_SYMBOL(tcf_chain_put_by_act);
> 
> static void tcf_chain_put_explicitly_created(struct tcf_chain *chain)
> {
>-	if (chain->explicitly_created)
>-		tcf_chain_put(chain);
>+	__tcf_chain_put(chain, false, true);
> }
> 
> static void tcf_chain_flush(struct tcf_chain *chain)
>@@ -764,6 +828,7 @@ static struct tcf_block *tcf_block_create(struct net *net, struct Qdisc *q,
> 		NL_SET_ERR_MSG(extack, "Memory allocation for block failed");
> 		return ERR_PTR(-ENOMEM);
> 	}
>+	mutex_init(&block->lock);
> 	INIT_LIST_HEAD(&block->chain_list);
> 	INIT_LIST_HEAD(&block->cb_list);
> 	INIT_LIST_HEAD(&block->owner_list);
>@@ -827,7 +892,7 @@ static void tcf_block_put_all_chains(struct tcf_block *block)
> static void __tcf_block_put(struct tcf_block *block, struct Qdisc *q,
> 			    struct tcf_block_ext_info *ei)
> {
>-	if (refcount_dec_and_test(&block->refcnt)) {
>+	if (refcount_dec_and_mutex_lock(&block->refcnt, &block->lock)) {
> 		/* Flushing/putting all chains will cause the block to be
> 		 * deallocated when last chain is freed. However, if chain_list
> 		 * is empty, block has to be manually deallocated. After block
>@@ -836,6 +901,7 @@ static void __tcf_block_put(struct tcf_block *block, struct Qdisc *q,
> 		 */
> 		bool free_block = list_empty(&block->chain_list);
> 
>+		mutex_unlock(&block->lock);
> 		if (tcf_block_shared(block))
> 			tcf_block_remove(block, block->net);
> 		if (!free_block)
>@@ -845,7 +911,7 @@ static void __tcf_block_put(struct tcf_block *block, struct Qdisc *q,
> 			tcf_block_offload_unbind(block, q, ei);
> 
> 		if (free_block)
>-			kfree_rcu(block, rcu);
>+			tcf_block_destroy(block);
> 		else
> 			tcf_block_put_all_chains(block);
> 	} else if (q) {
>-- 
>2.13.6
>
diff mbox series

Patch

diff --git a/include/net/sch_generic.h b/include/net/sch_generic.h
index 7a4957599874..31b8ea66a47d 100644
--- a/include/net/sch_generic.h
+++ b/include/net/sch_generic.h
@@ -12,6 +12,7 @@ 
 #include <linux/list.h>
 #include <linux/refcount.h>
 #include <linux/workqueue.h>
+#include <linux/mutex.h>
 #include <net/gen_stats.h>
 #include <net/rtnetlink.h>
 
@@ -352,6 +353,10 @@  struct tcf_chain {
 };
 
 struct tcf_block {
+	/* Lock protects tcf_block and lifetime-management data of chains
+	 * attached to the block (refcnt, action_refcnt, explicitly_created).
+	 */
+	struct mutex lock;
 	struct list_head chain_list;
 	u32 index; /* block index for shared blocks */
 	refcount_t refcnt;
diff --git a/net/sched/cls_api.c b/net/sched/cls_api.c
index e2b5cb2eb34e..cc416b6a3aa2 100644
--- a/net/sched/cls_api.c
+++ b/net/sched/cls_api.c
@@ -193,6 +193,9 @@  static void tcf_proto_destroy(struct tcf_proto *tp,
 	kfree_rcu(tp, rcu);
 }
 
+#define ASSERT_BLOCK_LOCKED(block)					\
+	lockdep_assert_held(&(block)->lock)
+
 struct tcf_filter_chain_list_item {
 	struct list_head list;
 	tcf_chain_head_change_t *chain_head_change;
@@ -204,6 +207,8 @@  static struct tcf_chain *tcf_chain_create(struct tcf_block *block,
 {
 	struct tcf_chain *chain;
 
+	ASSERT_BLOCK_LOCKED(block);
+
 	chain = kzalloc(sizeof(*chain), GFP_KERNEL);
 	if (!chain)
 		return NULL;
@@ -235,25 +240,51 @@  static void tcf_chain0_head_change(struct tcf_chain *chain,
 		tcf_chain_head_change_item(item, tp_head);
 }
 
-static void tcf_chain_destroy(struct tcf_chain *chain)
+/* Returns true if block can be safely freed. */
+
+static bool tcf_chain_detach(struct tcf_chain *chain)
 {
 	struct tcf_block *block = chain->block;
 
+	ASSERT_BLOCK_LOCKED(block);
+
 	list_del(&chain->list);
 	if (!chain->index)
 		block->chain0.chain = NULL;
+
+	if (list_empty(&block->chain_list) &&
+	    refcount_read(&block->refcnt) == 0)
+		return true;
+
+	return false;
+}
+
+static void tcf_block_destroy(struct tcf_block *block)
+{
+	mutex_destroy(&block->lock);
+	kfree_rcu(block, rcu);
+}
+
+static void tcf_chain_destroy(struct tcf_chain *chain, bool free_block)
+{
+	struct tcf_block *block = chain->block;
+
 	kfree(chain);
-	if (list_empty(&block->chain_list) && !refcount_read(&block->refcnt))
-		kfree_rcu(block, rcu);
+	if (free_block)
+		tcf_block_destroy(block);
 }
 
 static void tcf_chain_hold(struct tcf_chain *chain)
 {
+	ASSERT_BLOCK_LOCKED(chain->block);
+
 	++chain->refcnt;
 }
 
 static bool tcf_chain_held_by_acts_only(struct tcf_chain *chain)
 {
+	ASSERT_BLOCK_LOCKED(chain->block);
+
 	/* In case all the references are action references, this
 	 * chain should not be shown to the user.
 	 */
@@ -265,6 +296,8 @@  static struct tcf_chain *tcf_chain_lookup(struct tcf_block *block,
 {
 	struct tcf_chain *chain;
 
+	ASSERT_BLOCK_LOCKED(block);
+
 	list_for_each_entry(chain, &block->chain_list, list) {
 		if (chain->index == chain_index)
 			return chain;
@@ -279,31 +312,40 @@  static struct tcf_chain *__tcf_chain_get(struct tcf_block *block,
 					 u32 chain_index, bool create,
 					 bool by_act)
 {
-	struct tcf_chain *chain = tcf_chain_lookup(block, chain_index);
+	struct tcf_chain *chain = NULL;
+	bool is_first_reference;
 
+	mutex_lock(&block->lock);
+	chain = tcf_chain_lookup(block, chain_index);
 	if (chain) {
 		tcf_chain_hold(chain);
 	} else {
 		if (!create)
-			return NULL;
+			goto errout;
 		chain = tcf_chain_create(block, chain_index);
 		if (!chain)
-			return NULL;
+			goto errout;
 	}
 
 	if (by_act)
 		++chain->action_refcnt;
+	is_first_reference = chain->refcnt - chain->action_refcnt == 1;
+	mutex_unlock(&block->lock);
 
 	/* Send notification only in case we got the first
 	 * non-action reference. Until then, the chain acts only as
 	 * a placeholder for actions pointing to it and user ought
 	 * not know about them.
 	 */
-	if (chain->refcnt - chain->action_refcnt == 1 && !by_act)
+	if (is_first_reference && !by_act)
 		tc_chain_notify(chain, NULL, 0, NLM_F_CREATE | NLM_F_EXCL,
 				RTM_NEWCHAIN, false);
 
 	return chain;
+
+errout:
+	mutex_unlock(&block->lock);
+	return chain;
 }
 
 static struct tcf_chain *tcf_chain_get(struct tcf_block *block, u32 chain_index,
@@ -320,37 +362,59 @@  EXPORT_SYMBOL(tcf_chain_get_by_act);
 
 static void tc_chain_tmplt_del(struct tcf_chain *chain);
 
-static void __tcf_chain_put(struct tcf_chain *chain, bool by_act)
+static void __tcf_chain_put(struct tcf_chain *chain, bool by_act,
+			    bool explicitly_created)
 {
+	struct tcf_block *block = chain->block;
+	bool is_last, free_block = false;
+	unsigned int refcnt;
+
+	mutex_lock(&block->lock);
+	if (explicitly_created) {
+		if (!chain->explicitly_created) {
+			mutex_unlock(&block->lock);
+			return;
+		}
+		chain->explicitly_created = false;
+	}
+
 	if (by_act)
 		chain->action_refcnt--;
-	chain->refcnt--;
+
+	/* tc_chain_notify_delete can't be called while holding block lock.
+	 * However, when block is unlocked chain can be changed concurrently, so
+	 * save these to temporary variables.
+	 */
+	refcnt = --chain->refcnt;
+	is_last = refcnt - chain->action_refcnt == 0;
+	if (refcnt == 0)
+		free_block = tcf_chain_detach(chain);
+	mutex_unlock(&block->lock);
 
 	/* The last dropped non-action reference will trigger notification. */
-	if (chain->refcnt - chain->action_refcnt == 0 && !by_act)
+	if (is_last && !by_act)
 		tc_chain_notify(chain, NULL, 0, 0, RTM_DELCHAIN, false);
 
-	if (chain->refcnt == 0) {
+	if (refcnt == 0) {
 		tc_chain_tmplt_del(chain);
-		tcf_chain_destroy(chain);
+		tcf_chain_destroy(chain, free_block);
 	}
 }
 
 static void tcf_chain_put(struct tcf_chain *chain)
 {
-	__tcf_chain_put(chain, false);
+	__tcf_chain_put(chain, false, false);
 }
 
 void tcf_chain_put_by_act(struct tcf_chain *chain)
 {
-	__tcf_chain_put(chain, true);
+	__tcf_chain_put(chain, true, false);
 }
 EXPORT_SYMBOL(tcf_chain_put_by_act);
 
 static void tcf_chain_put_explicitly_created(struct tcf_chain *chain)
 {
-	if (chain->explicitly_created)
-		tcf_chain_put(chain);
+	__tcf_chain_put(chain, false, true);
 }
 
 static void tcf_chain_flush(struct tcf_chain *chain)
@@ -764,6 +828,7 @@  static struct tcf_block *tcf_block_create(struct net *net, struct Qdisc *q,
 		NL_SET_ERR_MSG(extack, "Memory allocation for block failed");
 		return ERR_PTR(-ENOMEM);
 	}
+	mutex_init(&block->lock);
 	INIT_LIST_HEAD(&block->chain_list);
 	INIT_LIST_HEAD(&block->cb_list);
 	INIT_LIST_HEAD(&block->owner_list);
@@ -827,7 +892,7 @@  static void tcf_block_put_all_chains(struct tcf_block *block)
 static void __tcf_block_put(struct tcf_block *block, struct Qdisc *q,
 			    struct tcf_block_ext_info *ei)
 {
-	if (refcount_dec_and_test(&block->refcnt)) {
+	if (refcount_dec_and_mutex_lock(&block->refcnt, &block->lock)) {
 		/* Flushing/putting all chains will cause the block to be
 		 * deallocated when last chain is freed. However, if chain_list
 		 * is empty, block has to be manually deallocated. After block
@@ -836,6 +901,7 @@  static void __tcf_block_put(struct tcf_block *block, struct Qdisc *q,
 		 */
 		bool free_block = list_empty(&block->chain_list);
 
+		mutex_unlock(&block->lock);
 		if (tcf_block_shared(block))
 			tcf_block_remove(block, block->net);
 		if (!free_block)
@@ -845,7 +911,7 @@  static void __tcf_block_put(struct tcf_block *block, struct Qdisc *q,
 			tcf_block_offload_unbind(block, q, ei);
 
 		if (free_block)
-			kfree_rcu(block, rcu);
+			tcf_block_destroy(block);
 		else
 			tcf_block_put_all_chains(block);
 	} else if (q) {