diff mbox series

[net-next,v4,07/12] net: sched: use flow block API

Message ID 20190709205550.3160-8-pablo@netfilter.org
State Accepted
Delegated to: Pablo Neira
Headers show
Series netfilter: add hardware offload infrastructure | expand

Commit Message

Pablo Neira Ayuso July 9, 2019, 8:55 p.m. UTC
This patch adds tcf_block_setup() which uses the flow block API.

This infrastructure takes the flow block callbacks coming from the
driver and register/unregister to/from the cls_api core.

Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
v4: Remove driver_list handling from the core - Jiri Pirko.

 net/sched/cls_api.c | 88 ++++++++++++++++++++++++++++++++++++++++++++++++++++-
 1 file changed, 87 insertions(+), 1 deletion(-)

Comments

Edward Cree Aug. 14, 2019, 4:32 p.m. UTC | #1
On 09/07/2019 21:55, Pablo Neira Ayuso wrote:
> This patch adds tcf_block_setup() which uses the flow block API.
>
> This infrastructure takes the flow block callbacks coming from the
> driver and register/unregister to/from the cls_api core.
>
> Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
> ---
> <snip>
> @@ -796,13 +804,20 @@ static int tcf_block_offload_cmd(struct tcf_block *block,
>  				 struct netlink_ext_ack *extack)
>  {
>  	struct tc_block_offload bo = {};
> +	int err;
>  
>  	bo.net = dev_net(dev);
>  	bo.command = command;
>  	bo.binder_type = ei->binder_type;
>  	bo.block = block;
>  	bo.extack = extack;
> -	return dev->netdev_ops->ndo_setup_tc(dev, TC_SETUP_BLOCK, &bo);
> +	INIT_LIST_HEAD(&bo.cb_list);
> +
> +	err = dev->netdev_ops->ndo_setup_tc(dev, TC_SETUP_BLOCK, &bo);
> +	if (err < 0)
> +		return err;
> +
> +	return tcf_block_setup(block, &bo);
>  }
>  
>  static int tcf_block_offload_bind(struct tcf_block *block, struct Qdisc *q,
> @@ -1636,6 +1651,77 @@ void tcf_block_cb_unregister(struct tcf_block *block,
>  }
>  EXPORT_SYMBOL(tcf_block_cb_unregister);
>  
> +static int tcf_block_bind(struct tcf_block *block,
> +			  struct flow_block_offload *bo)
> +{
> +	struct flow_block_cb *block_cb, *next;
> +	int err, i = 0;
> +
> +	list_for_each_entry(block_cb, &bo->cb_list, list) {
> +		err = tcf_block_playback_offloads(block, block_cb->cb,
> +						  block_cb->cb_priv, true,
> +						  tcf_block_offload_in_use(block),
> +						  bo->extack);
> +		if (err)
> +			goto err_unroll;
> +
> +		i++;
> +	}
> +	list_splice(&bo->cb_list, &block->cb_list);
> +
> +	return 0;
> +
> +err_unroll:
> +	list_for_each_entry_safe(block_cb, next, &bo->cb_list, list) {
> +		if (i-- > 0) {
> +			list_del(&block_cb->list);
> +			tcf_block_playback_offloads(block, block_cb->cb,
> +						    block_cb->cb_priv, false,
> +						    tcf_block_offload_in_use(block),
> +						    NULL);
> +		}
> +		flow_block_cb_free(block_cb);
> +	}
> +
> +	return err;
> +}
Why has the replay been moved from the function called by the driver
 (__tcf_block_cb_register()) to work done by the driver's caller based on
 what the driver has left on this flow_block_offload.cb_list?  This makes
 it impossible for the driver to (say) unregister a block outside of an
 explicit request from ndo_setup_tc().
In my under-development driver, I have a teardown path called on PCI
 remove, which calls tcf_block_cb_unregister() on all my block bindings
 (of which the driver keeps track), to ensure that no flow rules are still
 in place when unregister_netdev() is called; this is needed because some
 of the driver's state for certain rules involves taking a reference on
 the netdevice (dev_hold()).  Your structural changes here make that
 impossible; is there any reason why they're necessary?

-Ed
Pablo Neira Ayuso Aug. 16, 2019, 1:10 a.m. UTC | #2
On Wed, Aug 14, 2019 at 05:32:34PM +0100, Edward Cree wrote:
> On 09/07/2019 21:55, Pablo Neira Ayuso wrote:
> > This patch adds tcf_block_setup() which uses the flow block API.
> >
> > This infrastructure takes the flow block callbacks coming from the
> > driver and register/unregister to/from the cls_api core.
> >
> > Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
> > ---
> > <snip>
> > @@ -796,13 +804,20 @@ static int tcf_block_offload_cmd(struct tcf_block *block,
> >  				 struct netlink_ext_ack *extack)
> >  {
> >  	struct tc_block_offload bo = {};
> > +	int err;
> >  
> >  	bo.net = dev_net(dev);
> >  	bo.command = command;
> >  	bo.binder_type = ei->binder_type;
> >  	bo.block = block;
> >  	bo.extack = extack;
> > -	return dev->netdev_ops->ndo_setup_tc(dev, TC_SETUP_BLOCK, &bo);
> > +	INIT_LIST_HEAD(&bo.cb_list);
> > +
> > +	err = dev->netdev_ops->ndo_setup_tc(dev, TC_SETUP_BLOCK, &bo);
> > +	if (err < 0)
> > +		return err;
> > +
> > +	return tcf_block_setup(block, &bo);
> >  }
> >  
> >  static int tcf_block_offload_bind(struct tcf_block *block, struct Qdisc *q,
> > @@ -1636,6 +1651,77 @@ void tcf_block_cb_unregister(struct tcf_block *block,
> >  }
> >  EXPORT_SYMBOL(tcf_block_cb_unregister);
> >  
> > +static int tcf_block_bind(struct tcf_block *block,
> > +			  struct flow_block_offload *bo)
> > +{
> > +	struct flow_block_cb *block_cb, *next;
> > +	int err, i = 0;
> > +
> > +	list_for_each_entry(block_cb, &bo->cb_list, list) {
> > +		err = tcf_block_playback_offloads(block, block_cb->cb,
> > +						  block_cb->cb_priv, true,
> > +						  tcf_block_offload_in_use(block),
> > +						  bo->extack);
> > +		if (err)
> > +			goto err_unroll;
> > +
> > +		i++;
> > +	}
> > +	list_splice(&bo->cb_list, &block->cb_list);
> > +
> > +	return 0;
> > +
> > +err_unroll:
> > +	list_for_each_entry_safe(block_cb, next, &bo->cb_list, list) {
> > +		if (i-- > 0) {
> > +			list_del(&block_cb->list);
> > +			tcf_block_playback_offloads(block, block_cb->cb,
> > +						    block_cb->cb_priv, false,
> > +						    tcf_block_offload_in_use(block),
> > +						    NULL);
> > +		}
> > +		flow_block_cb_free(block_cb);
> > +	}
> > +
> > +	return err;
> > +}
> >
> Why has the replay been moved from the function called by the driver
>  (__tcf_block_cb_register()) to work done by the driver's caller based on
>  what the driver has left on this flow_block_offload.cb_list?  This makes
>  it impossible for the driver to (say) unregister a block outside of an
>  explicit request from ndo_setup_tc().
> In my under-development driver, I have a teardown path called on PCI
>  remove, which calls tcf_block_cb_unregister() on all my block bindings
>  (of which the driver keeps track), to ensure that no flow rules are still
>  in place when unregister_netdev() is called;

It's the subsystem that has to release resources when
unregister_netdev() event happens. At least in netfilter, when the
device is going away, the filtering policy is removed, hence the
FLOW_BLOCK_UNBIND is called to release the blocks and, hence, the
offload resources. I remember tc ingress qdisc works like this too.

>  this is needed because some  of the driver's state for certain
>  rules involves taking a reference on  the netdevice (dev_hold()). 
>  Your structural changes here make that  impossible; is there any
>  reason why they're necessary?

May I have access to your driver code?

This would make it easier for me to understand your requirements, and
to discuss changes with you.
diff mbox series

Patch

diff --git a/net/sched/cls_api.c b/net/sched/cls_api.c
index 72761b43ae41..db13944cc823 100644
--- a/net/sched/cls_api.c
+++ b/net/sched/cls_api.c
@@ -672,6 +672,9 @@  static void tc_indr_block_cb_del(struct tc_indr_block_cb *indr_block_cb)
 	kfree(indr_block_cb);
 }
 
+static int tcf_block_setup(struct tcf_block *block,
+			   struct flow_block_offload *bo);
+
 static void tc_indr_block_ing_cmd(struct tc_indr_block_dev *indr_dev,
 				  struct tc_indr_block_cb *indr_block_cb,
 				  enum flow_block_command command)
@@ -682,12 +685,14 @@  static void tc_indr_block_ing_cmd(struct tc_indr_block_dev *indr_dev,
 		.net		= dev_net(indr_dev->dev),
 		.block		= indr_dev->block,
 	};
+	INIT_LIST_HEAD(&bo.cb_list);
 
 	if (!indr_dev->block)
 		return;
 
 	indr_block_cb->cb(indr_dev->dev, indr_block_cb->cb_priv, TC_SETUP_BLOCK,
 			  &bo);
+	tcf_block_setup(indr_dev->block, &bo);
 }
 
 int __tc_indr_block_cb_register(struct net_device *dev, void *cb_priv,
@@ -772,6 +777,7 @@  static void tc_indr_block_call(struct tcf_block *block, struct net_device *dev,
 		.block		= block,
 		.extack		= extack,
 	};
+	INIT_LIST_HEAD(&bo.cb_list);
 
 	indr_dev = tc_indr_block_dev_lookup(dev);
 	if (!indr_dev)
@@ -782,6 +788,8 @@  static void tc_indr_block_call(struct tcf_block *block, struct net_device *dev,
 	list_for_each_entry(indr_block_cb, &indr_dev->cb_list, list)
 		indr_block_cb->cb(dev, indr_block_cb->cb_priv, TC_SETUP_BLOCK,
 				  &bo);
+
+	tcf_block_setup(block, &bo);
 }
 
 static bool tcf_block_offload_in_use(struct tcf_block *block)
@@ -796,13 +804,20 @@  static int tcf_block_offload_cmd(struct tcf_block *block,
 				 struct netlink_ext_ack *extack)
 {
 	struct tc_block_offload bo = {};
+	int err;
 
 	bo.net = dev_net(dev);
 	bo.command = command;
 	bo.binder_type = ei->binder_type;
 	bo.block = block;
 	bo.extack = extack;
-	return dev->netdev_ops->ndo_setup_tc(dev, TC_SETUP_BLOCK, &bo);
+	INIT_LIST_HEAD(&bo.cb_list);
+
+	err = dev->netdev_ops->ndo_setup_tc(dev, TC_SETUP_BLOCK, &bo);
+	if (err < 0)
+		return err;
+
+	return tcf_block_setup(block, &bo);
 }
 
 static int tcf_block_offload_bind(struct tcf_block *block, struct Qdisc *q,
@@ -1636,6 +1651,77 @@  void tcf_block_cb_unregister(struct tcf_block *block,
 }
 EXPORT_SYMBOL(tcf_block_cb_unregister);
 
+static int tcf_block_bind(struct tcf_block *block,
+			  struct flow_block_offload *bo)
+{
+	struct flow_block_cb *block_cb, *next;
+	int err, i = 0;
+
+	list_for_each_entry(block_cb, &bo->cb_list, list) {
+		err = tcf_block_playback_offloads(block, block_cb->cb,
+						  block_cb->cb_priv, true,
+						  tcf_block_offload_in_use(block),
+						  bo->extack);
+		if (err)
+			goto err_unroll;
+
+		i++;
+	}
+	list_splice(&bo->cb_list, &block->cb_list);
+
+	return 0;
+
+err_unroll:
+	list_for_each_entry_safe(block_cb, next, &bo->cb_list, list) {
+		if (i-- > 0) {
+			list_del(&block_cb->list);
+			tcf_block_playback_offloads(block, block_cb->cb,
+						    block_cb->cb_priv, false,
+						    tcf_block_offload_in_use(block),
+						    NULL);
+		}
+		flow_block_cb_free(block_cb);
+	}
+
+	return err;
+}
+
+static void tcf_block_unbind(struct tcf_block *block,
+			     struct flow_block_offload *bo)
+{
+	struct flow_block_cb *block_cb, *next;
+
+	list_for_each_entry_safe(block_cb, next, &bo->cb_list, list) {
+		tcf_block_playback_offloads(block, block_cb->cb,
+					    block_cb->cb_priv, false,
+					    tcf_block_offload_in_use(block),
+					    NULL);
+		list_del(&block_cb->list);
+		flow_block_cb_free(block_cb);
+	}
+}
+
+static int tcf_block_setup(struct tcf_block *block,
+			   struct flow_block_offload *bo)
+{
+	int err;
+
+	switch (bo->command) {
+	case FLOW_BLOCK_BIND:
+		err = tcf_block_bind(block, bo);
+		break;
+	case FLOW_BLOCK_UNBIND:
+		err = 0;
+		tcf_block_unbind(block, bo);
+		break;
+	default:
+		WARN_ON_ONCE(1);
+		err = -EOPNOTSUPP;
+	}
+
+	return err;
+}
+
 /* Main classifier routine: scans classifier chain attached
  * to this qdisc, (optionally) tests for protocol and asks
  * specific classifiers.