diff mbox series

[net-next,v3,05/10] net: sched: keep track of offloaded filters and check tc offload feature

Message ID 20171213151038.29665-6-jiri@resnulli.us
State Changes Requested, archived
Delegated to: David Miller
Headers show
Series net: sched: allow qdiscs to share filter block instances | expand

Commit Message

Jiri Pirko Dec. 13, 2017, 3:10 p.m. UTC
From: Jiri Pirko <jiri@mellanox.com>

During block bind, we need to check tc offload feature. If it is
disabled yet still the block contains offloaded filters, forbid the
bind. Also forbid to register callback for a block that already
containes offloaded filters, as the play back is not supported now.
For keeping track of offloaded filters there is a new counter
introduced, alongside with couple of helpers called from cls_* code.
These helpers set and clear TCA_CLS_FLAGS_IN_HW flag.

Signed-off-by: Jiri Pirko <jiri@mellanox.com>
---
v2->v3:
- new patch
---
 include/net/sch_generic.h | 17 +++++++++++++++
 net/sched/cls_api.c       | 53 +++++++++++++++++++++++++++++++++++++----------
 net/sched/cls_bpf.c       |  4 +++-
 net/sched/cls_flower.c    |  3 ++-
 net/sched/cls_matchall.c  |  3 ++-
 net/sched/cls_u32.c       | 13 ++++++------
 6 files changed, 73 insertions(+), 20 deletions(-)

Comments

Jakub Kicinski Dec. 14, 2017, 1:05 a.m. UTC | #1
On Wed, 13 Dec 2017 16:10:33 +0100, Jiri Pirko wrote:
> From: Jiri Pirko <jiri@mellanox.com>
> 
> During block bind, we need to check tc offload feature. If it is
> disabled yet still the block contains offloaded filters, forbid the
> bind. Also forbid to register callback for a block that already
> containes offloaded filters, as the play back is not supported now.
> For keeping track of offloaded filters there is a new counter
> introduced, alongside with couple of helpers called from cls_* code.
> These helpers set and clear TCA_CLS_FLAGS_IN_HW flag.
> 
> Signed-off-by: Jiri Pirko <jiri@mellanox.com>

> diff --git a/net/sched/cls_api.c b/net/sched/cls_api.c
> index de9dbcb..ac25142 100644
> --- a/net/sched/cls_api.c
> +++ b/net/sched/cls_api.c
> @@ -266,31 +266,50 @@ void tcf_chain_put(struct tcf_chain *chain)
>  }
>  EXPORT_SYMBOL(tcf_chain_put);
>  
> -static void tcf_block_offload_cmd(struct tcf_block *block, struct Qdisc *q,
> +static bool tcf_block_offload_in_use(struct tcf_block *block)
> +{
> +	return block->offloadcnt;
> +}
> +
> +static void tcf_block_offload_cmd(struct tcf_block *block,
> +				  struct net_device *dev,
>  				  struct tcf_block_ext_info *ei,
>  				  enum tc_block_command command)
>  {
> -	struct net_device *dev = q->dev_queue->dev;
>  	struct tc_block_offload bo = {};
>  
> -	if (!dev->netdev_ops->ndo_setup_tc)
> -		return;
>  	bo.command = command;
>  	bo.binder_type = ei->binder_type;
>  	bo.block = block;
>  	dev->netdev_ops->ndo_setup_tc(dev, TC_SETUP_BLOCK, &bo);
>  }
>  
> -static void tcf_block_offload_bind(struct tcf_block *block, struct Qdisc *q,
> -				   struct tcf_block_ext_info *ei)
> +static int tcf_block_offload_bind(struct tcf_block *block, struct Qdisc *q,
> +				  struct tcf_block_ext_info *ei)
>  {
> -	tcf_block_offload_cmd(block, q, ei, TC_BLOCK_BIND);
> +	struct net_device *dev = q->dev_queue->dev;
> +
> +	if (!dev->netdev_ops->ndo_setup_tc)
> +		return 0;
> +
> +	/* If tc offload feature is disabled and the block we try to bind
> +	 * to already has some offloaded filters, forbid to bind.
> +	 */
> +	if (!tc_can_offload(dev) && tcf_block_offload_in_use(block))
> +		return -EOPNOTSUPP;

I don't understand the tc_can_offload(dev) check here.  The flow is -
on bind if TC offloads are enabled the new port will get a TC_BLOCK_BIND
and request a register, but the register will fail since the block is
offloaded?

But the whole bind operation does not fail, so user will not see an
error.  The block will get bound but port's driver has no way to
register the callback.  I'm sorry if I'm just being slow here..

> +	tcf_block_offload_cmd(block, dev, ei, TC_BLOCK_BIND);
> +	return 0;
>  }
>  
>  static void tcf_block_offload_unbind(struct tcf_block *block, struct Qdisc *q,
>  				     struct tcf_block_ext_info *ei)
>  {
> -	tcf_block_offload_cmd(block, q, ei, TC_BLOCK_UNBIND);
> +	struct net_device *dev = q->dev_queue->dev;
> +
> +	if (!dev->netdev_ops->ndo_setup_tc)
> +		return;
> +	tcf_block_offload_cmd(block, dev, ei, TC_BLOCK_UNBIND);
>  }
>  
>  static int
> @@ -499,10 +518,15 @@ int tcf_block_get_ext(struct tcf_block **p_block, struct Qdisc *q,
>  	if (err)
>  		goto err_chain_head_change_cb_add;
>  
> -	tcf_block_offload_bind(block, q, ei);
> +	err = tcf_block_offload_bind(block, q, ei);
> +	if (err)
> +		goto err_block_offload_bind;

Would it perhaps make more sense to add a TC_BLOCK_JOIN or some such?
IIUC the problem is we don't know whether the driver/callee of the new
port is aware of previous callbacks/filters and we can't replay them.

Obviously registering new callbacks on offloaded blocks is a no-go.
For simple bind to a new port of an ASIC which already knows the rule
set, we just need to ask all callbacks if they know the port and as
long as any of them responds "yes" we are safe to assume the bind is OK.

(Existing drivers will all respond with EOPNOTSUPP to a new unknown command.)

Does that make sense?

>  	*p_block = block;
>  	return 0;
>  
> +err_block_offload_bind:
> +	tcf_chain_head_change_cb_del(tcf_block_chain_zero(block), ei);
>  err_chain_head_change_cb_add:
>  	tcf_block_owner_del(block, q, ei->binder_type);
>  err_block_owner_add:
> @@ -630,9 +654,16 @@ struct tcf_block_cb *__tcf_block_cb_register(struct tcf_block *block,
>  {
>  	struct tcf_block_cb *block_cb;
>  
> +	/* At this point, playback of previous block cb calls is not supported,
> +	 * so forbid to register to block which already has some offloaded
> +	 * filters present.
> +	 */
> +	if (tcf_block_offload_in_use(block))
> +		return ERR_PTR(-EOPNOTSUPP);
> 
>  	block_cb = kzalloc(sizeof(*block_cb), GFP_KERNEL);
>  	if (!block_cb)
> -		return NULL;
> +		return ERR_PTR(-ENOMEM);
>  	block_cb->cb = cb;
>  	block_cb->cb_ident = cb_ident;
>  	block_cb->cb_priv = cb_priv;
> @@ -648,7 +679,7 @@ int tcf_block_cb_register(struct tcf_block *block,
>  	struct tcf_block_cb *block_cb;
>  
>  	block_cb = __tcf_block_cb_register(block, cb, cb_ident, cb_priv);
> -	return block_cb ? 0 : -ENOMEM;
> +	return IS_ERR(block_cb) ? PTR_ERR(block_cb) : 0;
>  }
>  EXPORT_SYMBOL(tcf_block_cb_register);
>
Jiri Pirko Dec. 14, 2017, 9:47 a.m. UTC | #2
Thu, Dec 14, 2017 at 02:05:55AM CET, jakub.kicinski@netronome.com wrote:
>On Wed, 13 Dec 2017 16:10:33 +0100, Jiri Pirko wrote:
>> From: Jiri Pirko <jiri@mellanox.com>
>> 
>> During block bind, we need to check tc offload feature. If it is
>> disabled yet still the block contains offloaded filters, forbid the
>> bind. Also forbid to register callback for a block that already
>> containes offloaded filters, as the play back is not supported now.
>> For keeping track of offloaded filters there is a new counter
>> introduced, alongside with couple of helpers called from cls_* code.
>> These helpers set and clear TCA_CLS_FLAGS_IN_HW flag.
>> 
>> Signed-off-by: Jiri Pirko <jiri@mellanox.com>
>
>> diff --git a/net/sched/cls_api.c b/net/sched/cls_api.c
>> index de9dbcb..ac25142 100644
>> --- a/net/sched/cls_api.c
>> +++ b/net/sched/cls_api.c
>> @@ -266,31 +266,50 @@ void tcf_chain_put(struct tcf_chain *chain)
>>  }
>>  EXPORT_SYMBOL(tcf_chain_put);
>>  
>> -static void tcf_block_offload_cmd(struct tcf_block *block, struct Qdisc *q,
>> +static bool tcf_block_offload_in_use(struct tcf_block *block)
>> +{
>> +	return block->offloadcnt;
>> +}
>> +
>> +static void tcf_block_offload_cmd(struct tcf_block *block,
>> +				  struct net_device *dev,
>>  				  struct tcf_block_ext_info *ei,
>>  				  enum tc_block_command command)
>>  {
>> -	struct net_device *dev = q->dev_queue->dev;
>>  	struct tc_block_offload bo = {};
>>  
>> -	if (!dev->netdev_ops->ndo_setup_tc)
>> -		return;
>>  	bo.command = command;
>>  	bo.binder_type = ei->binder_type;
>>  	bo.block = block;
>>  	dev->netdev_ops->ndo_setup_tc(dev, TC_SETUP_BLOCK, &bo);
>>  }
>>  
>> -static void tcf_block_offload_bind(struct tcf_block *block, struct Qdisc *q,
>> -				   struct tcf_block_ext_info *ei)
>> +static int tcf_block_offload_bind(struct tcf_block *block, struct Qdisc *q,
>> +				  struct tcf_block_ext_info *ei)
>>  {
>> -	tcf_block_offload_cmd(block, q, ei, TC_BLOCK_BIND);
>> +	struct net_device *dev = q->dev_queue->dev;
>> +
>> +	if (!dev->netdev_ops->ndo_setup_tc)
>> +		return 0;
>> +
>> +	/* If tc offload feature is disabled and the block we try to bind
>> +	 * to already has some offloaded filters, forbid to bind.
>> +	 */
>> +	if (!tc_can_offload(dev) && tcf_block_offload_in_use(block))
>> +		return -EOPNOTSUPP;
>
>I don't understand the tc_can_offload(dev) check here.  The flow is -
>on bind if TC offloads are enabled the new port will get a TC_BLOCK_BIND
>and request a register, but the register will fail since the block is
>offloaded?

The point of this check is to disallow dev with tc offload disabled to
share a block which already holds offloaded filters.

That is similar to disallow disabling tc offload on device that shares a
block which contains offloaded filters.



>
>But the whole bind operation does not fail, so user will not see an
>error.  The block will get bound but port's driver has no way to
>register the callback.  I'm sorry if I'm just being slow here..
>
>> +	tcf_block_offload_cmd(block, dev, ei, TC_BLOCK_BIND);
>> +	return 0;

The thing is that driver which does not support TC_BLOCK_BIND would
return -EOPNOTSUPP here. For those drivers we continue, they just won't
have block cb registered so they won't receive cb calls to offload
filters.


>>  }
>>  
>>  static void tcf_block_offload_unbind(struct tcf_block *block, struct Qdisc *q,
>>  				     struct tcf_block_ext_info *ei)
>>  {
>> -	tcf_block_offload_cmd(block, q, ei, TC_BLOCK_UNBIND);
>> +	struct net_device *dev = q->dev_queue->dev;
>> +
>> +	if (!dev->netdev_ops->ndo_setup_tc)
>> +		return;
>> +	tcf_block_offload_cmd(block, dev, ei, TC_BLOCK_UNBIND);
>>  }
>>  
>>  static int
>> @@ -499,10 +518,15 @@ int tcf_block_get_ext(struct tcf_block **p_block, struct Qdisc *q,
>>  	if (err)
>>  		goto err_chain_head_change_cb_add;
>>  
>> -	tcf_block_offload_bind(block, q, ei);
>> +	err = tcf_block_offload_bind(block, q, ei);
>> +	if (err)
>> +		goto err_block_offload_bind;
>
>Would it perhaps make more sense to add a TC_BLOCK_JOIN or some such?

Why? Just a namechange?


>IIUC the problem is we don't know whether the driver/callee of the new
>port is aware of previous callbacks/filters and we can't replay them.
>
>Obviously registering new callbacks on offloaded blocks is a no-go.
>For simple bind to a new port of an ASIC which already knows the rule
>set, we just need to ask all callbacks if they know the port and as
>long as any of them responds "yes" we are safe to assume the bind is OK.
>
>(Existing drivers will all respond with EOPNOTSUPP to a new unknown command.)
>
>Does that make sense?

Hmm, I understand what you say. I have to think about that a bit more.

Thanks!

>
>>  	*p_block = block;
>>  	return 0;
>>  
>> +err_block_offload_bind:
>> +	tcf_chain_head_change_cb_del(tcf_block_chain_zero(block), ei);
>>  err_chain_head_change_cb_add:
>>  	tcf_block_owner_del(block, q, ei->binder_type);
>>  err_block_owner_add:
>> @@ -630,9 +654,16 @@ struct tcf_block_cb *__tcf_block_cb_register(struct tcf_block *block,
>>  {
>>  	struct tcf_block_cb *block_cb;
>>  
>> +	/* At this point, playback of previous block cb calls is not supported,
>> +	 * so forbid to register to block which already has some offloaded
>> +	 * filters present.
>> +	 */
>> +	if (tcf_block_offload_in_use(block))
>> +		return ERR_PTR(-EOPNOTSUPP);
>> 
>>  	block_cb = kzalloc(sizeof(*block_cb), GFP_KERNEL);
>>  	if (!block_cb)
>> -		return NULL;
>> +		return ERR_PTR(-ENOMEM);
>>  	block_cb->cb = cb;
>>  	block_cb->cb_ident = cb_ident;
>>  	block_cb->cb_priv = cb_priv;
>> @@ -648,7 +679,7 @@ int tcf_block_cb_register(struct tcf_block *block,
>>  	struct tcf_block_cb *block_cb;
>>  
>>  	block_cb = __tcf_block_cb_register(block, cb, cb_ident, cb_priv);
>> -	return block_cb ? 0 : -ENOMEM;
>> +	return IS_ERR(block_cb) ? PTR_ERR(block_cb) : 0;
>>  }
>>  EXPORT_SYMBOL(tcf_block_cb_register);
>>
Jiri Pirko Dec. 14, 2017, 1:10 p.m. UTC | #3
Thu, Dec 14, 2017 at 10:47:16AM CET, jiri@resnulli.us wrote:
>Thu, Dec 14, 2017 at 02:05:55AM CET, jakub.kicinski@netronome.com wrote:
>>On Wed, 13 Dec 2017 16:10:33 +0100, Jiri Pirko wrote:
>>> From: Jiri Pirko <jiri@mellanox.com>
>>> 
>>> During block bind, we need to check tc offload feature. If it is
>>> disabled yet still the block contains offloaded filters, forbid the
>>> bind. Also forbid to register callback for a block that already
>>> containes offloaded filters, as the play back is not supported now.
>>> For keeping track of offloaded filters there is a new counter
>>> introduced, alongside with couple of helpers called from cls_* code.
>>> These helpers set and clear TCA_CLS_FLAGS_IN_HW flag.
>>> 
>>> Signed-off-by: Jiri Pirko <jiri@mellanox.com>
>>
>>> diff --git a/net/sched/cls_api.c b/net/sched/cls_api.c
>>> index de9dbcb..ac25142 100644
>>> --- a/net/sched/cls_api.c
>>> +++ b/net/sched/cls_api.c
>>> @@ -266,31 +266,50 @@ void tcf_chain_put(struct tcf_chain *chain)
>>>  }
>>>  EXPORT_SYMBOL(tcf_chain_put);
>>>  
>>> -static void tcf_block_offload_cmd(struct tcf_block *block, struct Qdisc *q,
>>> +static bool tcf_block_offload_in_use(struct tcf_block *block)
>>> +{
>>> +	return block->offloadcnt;
>>> +}
>>> +
>>> +static void tcf_block_offload_cmd(struct tcf_block *block,
>>> +				  struct net_device *dev,
>>>  				  struct tcf_block_ext_info *ei,
>>>  				  enum tc_block_command command)
>>>  {
>>> -	struct net_device *dev = q->dev_queue->dev;
>>>  	struct tc_block_offload bo = {};
>>>  
>>> -	if (!dev->netdev_ops->ndo_setup_tc)
>>> -		return;
>>>  	bo.command = command;
>>>  	bo.binder_type = ei->binder_type;
>>>  	bo.block = block;
>>>  	dev->netdev_ops->ndo_setup_tc(dev, TC_SETUP_BLOCK, &bo);
>>>  }
>>>  
>>> -static void tcf_block_offload_bind(struct tcf_block *block, struct Qdisc *q,
>>> -				   struct tcf_block_ext_info *ei)
>>> +static int tcf_block_offload_bind(struct tcf_block *block, struct Qdisc *q,
>>> +				  struct tcf_block_ext_info *ei)
>>>  {
>>> -	tcf_block_offload_cmd(block, q, ei, TC_BLOCK_BIND);
>>> +	struct net_device *dev = q->dev_queue->dev;
>>> +
>>> +	if (!dev->netdev_ops->ndo_setup_tc)
>>> +		return 0;
>>> +
>>> +	/* If tc offload feature is disabled and the block we try to bind
>>> +	 * to already has some offloaded filters, forbid to bind.
>>> +	 */
>>> +	if (!tc_can_offload(dev) && tcf_block_offload_in_use(block))
>>> +		return -EOPNOTSUPP;
>>
>>I don't understand the tc_can_offload(dev) check here.  The flow is -
>>on bind if TC offloads are enabled the new port will get a TC_BLOCK_BIND
>>and request a register, but the register will fail since the block is
>>offloaded?
>
>The point of this check is to disallow dev with tc offload disabled to
>share a block which already holds offloaded filters.
>
>That is similar to disallow disabling tc offload on device that shares a
>block which contains offloaded filters.
>
>
>
>>
>>But the whole bind operation does not fail, so user will not see an
>>error.  The block will get bound but port's driver has no way to
>>register the callback.  I'm sorry if I'm just being slow here..
>>
>>> +	tcf_block_offload_cmd(block, dev, ei, TC_BLOCK_BIND);
>>> +	return 0;
>
>The thing is that driver which does not support TC_BLOCK_BIND would
>return -EOPNOTSUPP here. For those drivers we continue, they just won't
>have block cb registered so they won't receive cb calls to offload
>filters.
>
>
>>>  }
>>>  
>>>  static void tcf_block_offload_unbind(struct tcf_block *block, struct Qdisc *q,
>>>  				     struct tcf_block_ext_info *ei)
>>>  {
>>> -	tcf_block_offload_cmd(block, q, ei, TC_BLOCK_UNBIND);
>>> +	struct net_device *dev = q->dev_queue->dev;
>>> +
>>> +	if (!dev->netdev_ops->ndo_setup_tc)
>>> +		return;
>>> +	tcf_block_offload_cmd(block, dev, ei, TC_BLOCK_UNBIND);
>>>  }
>>>  
>>>  static int
>>> @@ -499,10 +518,15 @@ int tcf_block_get_ext(struct tcf_block **p_block, struct Qdisc *q,
>>>  	if (err)
>>>  		goto err_chain_head_change_cb_add;
>>>  
>>> -	tcf_block_offload_bind(block, q, ei);
>>> +	err = tcf_block_offload_bind(block, q, ei);
>>> +	if (err)
>>> +		goto err_block_offload_bind;
>>
>>Would it perhaps make more sense to add a TC_BLOCK_JOIN or some such?
>
>Why? Just a namechange?
>
>
>>IIUC the problem is we don't know whether the driver/callee of the new
>>port is aware of previous callbacks/filters and we can't replay them.

Well, the problem is a bit different.
There are 2 scenarios when we need to fail here:
1) tc offload feature is turned off, there are some filters offloaded in
   the block. That is what I commented above.
2) tc offload feature is turned on, there are some filters offloaded in
   the block but the block is not accounted by the driver. This is
   because of the lack or replay. This is taken care of in the beginning
   of __tcf_block_cb_register function - see below, there is a comment
   there.


>>
>>Obviously registering new callbacks on offloaded blocks is a no-go.
>>For simple bind to a new port of an ASIC which already knows the rule
>>set, we just need to ask all callbacks if they know the port and as
>>long as any of them responds "yes" we are safe to assume the bind is OK.
>>
>>(Existing drivers will all respond with EOPNOTSUPP to a new unknown command.)
>>
>>Does that make sense?
>
>Hmm, I understand what you say. I have to think about that a bit more.

As you see, both cases where we need to bail out are covered. Do you see
any other problem?


>
>Thanks!
>
>>
>>>  	*p_block = block;
>>>  	return 0;
>>>  
>>> +err_block_offload_bind:
>>> +	tcf_chain_head_change_cb_del(tcf_block_chain_zero(block), ei);
>>>  err_chain_head_change_cb_add:
>>>  	tcf_block_owner_del(block, q, ei->binder_type);
>>>  err_block_owner_add:
>>> @@ -630,9 +654,16 @@ struct tcf_block_cb *__tcf_block_cb_register(struct tcf_block *block,
>>>  {
>>>  	struct tcf_block_cb *block_cb;
>>>  
>>> +	/* At this point, playback of previous block cb calls is not supported,
>>> +	 * so forbid to register to block which already has some offloaded
>>> +	 * filters present.
>>> +	 */
>>> +	if (tcf_block_offload_in_use(block))
>>> +		return ERR_PTR(-EOPNOTSUPP);
>>> 
>>>  	block_cb = kzalloc(sizeof(*block_cb), GFP_KERNEL);
>>>  	if (!block_cb)
>>> -		return NULL;
>>> +		return ERR_PTR(-ENOMEM);
>>>  	block_cb->cb = cb;
>>>  	block_cb->cb_ident = cb_ident;
>>>  	block_cb->cb_priv = cb_priv;
>>> @@ -648,7 +679,7 @@ int tcf_block_cb_register(struct tcf_block *block,
>>>  	struct tcf_block_cb *block_cb;
>>>  
>>>  	block_cb = __tcf_block_cb_register(block, cb, cb_ident, cb_priv);
>>> -	return block_cb ? 0 : -ENOMEM;
>>> +	return IS_ERR(block_cb) ? PTR_ERR(block_cb) : 0;
>>>  }
>>>  EXPORT_SYMBOL(tcf_block_cb_register);
>>>
Jakub Kicinski Dec. 14, 2017, 6:49 p.m. UTC | #4
On Thu, 14 Dec 2017 14:10:45 +0100, Jiri Pirko wrote:
> >Why? Just a namechange?
> >
> >  
> >>IIUC the problem is we don't know whether the driver/callee of the new
> >>port is aware of previous callbacks/filters and we can't replay them.  
> 
> Well, the problem is a bit different.
> There are 2 scenarios when we need to fail here:
> 1) tc offload feature is turned off, there are some filters offloaded in
>    the block. That is what I commented above.
> 2) tc offload feature is turned on, there are some filters offloaded in
>    the block but the block is not accounted by the driver. This is
>    because of the lack or replay. This is taken care of in the beginning
>    of __tcf_block_cb_register function - see below, there is a comment
>    there.

Restating in code terms, shouldn't this:

+	tcf_block_offload_cmd(block, dev, ei, TC_BLOCK_BIND);
+	return 0;

return the error like this:

	return tcf_block_offload_cmd(block, dev, ei, TC_BLOCK_BIND);

We expect simple drivers to do this:

	case TC_BLOCK_BIND:
		return tcf_block_cb_register(f->block, mycb,
					     priv, priv);

Which will return an error for shared offloaded block, just need to
propagate it.
Jakub Kicinski Dec. 14, 2017, 7:22 p.m. UTC | #5
On Wed, 13 Dec 2017 16:10:33 +0100, Jiri Pirko wrote:
> diff --git a/net/sched/cls_bpf.c b/net/sched/cls_bpf.c
> index 69d7e9a..9cf61e7 100644
> --- a/net/sched/cls_bpf.c
> +++ b/net/sched/cls_bpf.c
> @@ -170,8 +170,10 @@ static int cls_bpf_offload_cmd(struct tcf_proto *tp, struct cls_bpf_prog *prog,
>  			cls_bpf_offload_cmd(tp, prog, TC_CLSBPF_DESTROY);
>  			return err;
>  		} else if (err > 0) {
> -			prog->gen_flags |= TCA_CLS_FLAGS_IN_HW;
> +			tcf_block_offload_inc(block, &prog->gen_flags);
>  		}
> +	} else {
> +		tcf_block_offload_dec(block, &prog->gen_flags);
>  	}
>  
>  	if (addorrep && skip_sw && !(prog->gen_flags & TCA_CLS_FLAGS_IN_HW))

The in_hw reporting also seems broken.

tools/testing/selftests/bpf/test_offload.py catches this.
Jiri Pirko Dec. 15, 2017, 9:09 a.m. UTC | #6
Thu, Dec 14, 2017 at 07:49:41PM CET, jakub.kicinski@netronome.com wrote:
>On Thu, 14 Dec 2017 14:10:45 +0100, Jiri Pirko wrote:
>> >Why? Just a namechange?
>> >
>> >  
>> >>IIUC the problem is we don't know whether the driver/callee of the new
>> >>port is aware of previous callbacks/filters and we can't replay them.  
>> 
>> Well, the problem is a bit different.
>> There are 2 scenarios when we need to fail here:
>> 1) tc offload feature is turned off, there are some filters offloaded in
>>    the block. That is what I commented above.
>> 2) tc offload feature is turned on, there are some filters offloaded in
>>    the block but the block is not accounted by the driver. This is
>>    because of the lack or replay. This is taken care of in the beginning
>>    of __tcf_block_cb_register function - see below, there is a comment
>>    there.
>
>Restating in code terms, shouldn't this:
>
>+	tcf_block_offload_cmd(block, dev, ei, TC_BLOCK_BIND);
>+	return 0;
>
>return the error like this:
>
>	return tcf_block_offload_cmd(block, dev, ei, TC_BLOCK_BIND);
>
>We expect simple drivers to do this:
>
>	case TC_BLOCK_BIND:
>		return tcf_block_cb_register(f->block, mycb,
>					     priv, priv);
>
>Which will return an error for shared offloaded block, just need to
>propagate it.

Got it. Will do. Thanks!
Jiri Pirko Dec. 15, 2017, 9:09 a.m. UTC | #7
Thu, Dec 14, 2017 at 08:22:43PM CET, jakub.kicinski@netronome.com wrote:
>On Wed, 13 Dec 2017 16:10:33 +0100, Jiri Pirko wrote:
>> diff --git a/net/sched/cls_bpf.c b/net/sched/cls_bpf.c
>> index 69d7e9a..9cf61e7 100644
>> --- a/net/sched/cls_bpf.c
>> +++ b/net/sched/cls_bpf.c
>> @@ -170,8 +170,10 @@ static int cls_bpf_offload_cmd(struct tcf_proto *tp, struct cls_bpf_prog *prog,
>>  			cls_bpf_offload_cmd(tp, prog, TC_CLSBPF_DESTROY);
>>  			return err;
>>  		} else if (err > 0) {
>> -			prog->gen_flags |= TCA_CLS_FLAGS_IN_HW;
>> +			tcf_block_offload_inc(block, &prog->gen_flags);
>>  		}
>> +	} else {
>> +		tcf_block_offload_dec(block, &prog->gen_flags);
>>  	}
>>  
>>  	if (addorrep && skip_sw && !(prog->gen_flags & TCA_CLS_FLAGS_IN_HW))
>
>The in_hw reporting also seems broken.
>
>tools/testing/selftests/bpf/test_offload.py catches this.

Will check it. Thanks!
diff mbox series

Patch

diff --git a/include/net/sch_generic.h b/include/net/sch_generic.h
index a75bbfa..9c08026 100644
--- a/include/net/sch_generic.h
+++ b/include/net/sch_generic.h
@@ -283,8 +283,25 @@  struct tcf_block {
 	struct list_head cb_list;
 	struct list_head owner_list;
 	bool keep_dst;
+	unsigned int offloadcnt;
 };
 
+static inline void tcf_block_offload_inc(struct tcf_block *block, u32 *flags)
+{
+	if (*flags & TCA_CLS_FLAGS_IN_HW)
+		return;
+	*flags |= TCA_CLS_FLAGS_IN_HW;
+	block->offloadcnt++;
+}
+
+static inline void tcf_block_offload_dec(struct tcf_block *block, u32 *flags)
+{
+	if (!(*flags & TCA_CLS_FLAGS_IN_HW))
+		return;
+	*flags &= ~TCA_CLS_FLAGS_IN_HW;
+	block->offloadcnt--;
+}
+
 static inline void qdisc_cb_private_validate(const struct sk_buff *skb, int sz)
 {
 	struct qdisc_skb_cb *qcb;
diff --git a/net/sched/cls_api.c b/net/sched/cls_api.c
index de9dbcb..ac25142 100644
--- a/net/sched/cls_api.c
+++ b/net/sched/cls_api.c
@@ -266,31 +266,50 @@  void tcf_chain_put(struct tcf_chain *chain)
 }
 EXPORT_SYMBOL(tcf_chain_put);
 
-static void tcf_block_offload_cmd(struct tcf_block *block, struct Qdisc *q,
+static bool tcf_block_offload_in_use(struct tcf_block *block)
+{
+	return block->offloadcnt;
+}
+
+static void tcf_block_offload_cmd(struct tcf_block *block,
+				  struct net_device *dev,
 				  struct tcf_block_ext_info *ei,
 				  enum tc_block_command command)
 {
-	struct net_device *dev = q->dev_queue->dev;
 	struct tc_block_offload bo = {};
 
-	if (!dev->netdev_ops->ndo_setup_tc)
-		return;
 	bo.command = command;
 	bo.binder_type = ei->binder_type;
 	bo.block = block;
 	dev->netdev_ops->ndo_setup_tc(dev, TC_SETUP_BLOCK, &bo);
 }
 
-static void tcf_block_offload_bind(struct tcf_block *block, struct Qdisc *q,
-				   struct tcf_block_ext_info *ei)
+static int tcf_block_offload_bind(struct tcf_block *block, struct Qdisc *q,
+				  struct tcf_block_ext_info *ei)
 {
-	tcf_block_offload_cmd(block, q, ei, TC_BLOCK_BIND);
+	struct net_device *dev = q->dev_queue->dev;
+
+	if (!dev->netdev_ops->ndo_setup_tc)
+		return 0;
+
+	/* If tc offload feature is disabled and the block we try to bind
+	 * to already has some offloaded filters, forbid to bind.
+	 */
+	if (!tc_can_offload(dev) && tcf_block_offload_in_use(block))
+		return -EOPNOTSUPP;
+
+	tcf_block_offload_cmd(block, dev, ei, TC_BLOCK_BIND);
+	return 0;
 }
 
 static void tcf_block_offload_unbind(struct tcf_block *block, struct Qdisc *q,
 				     struct tcf_block_ext_info *ei)
 {
-	tcf_block_offload_cmd(block, q, ei, TC_BLOCK_UNBIND);
+	struct net_device *dev = q->dev_queue->dev;
+
+	if (!dev->netdev_ops->ndo_setup_tc)
+		return;
+	tcf_block_offload_cmd(block, dev, ei, TC_BLOCK_UNBIND);
 }
 
 static int
@@ -499,10 +518,15 @@  int tcf_block_get_ext(struct tcf_block **p_block, struct Qdisc *q,
 	if (err)
 		goto err_chain_head_change_cb_add;
 
-	tcf_block_offload_bind(block, q, ei);
+	err = tcf_block_offload_bind(block, q, ei);
+	if (err)
+		goto err_block_offload_bind;
+
 	*p_block = block;
 	return 0;
 
+err_block_offload_bind:
+	tcf_chain_head_change_cb_del(tcf_block_chain_zero(block), ei);
 err_chain_head_change_cb_add:
 	tcf_block_owner_del(block, q, ei->binder_type);
 err_block_owner_add:
@@ -630,9 +654,16 @@  struct tcf_block_cb *__tcf_block_cb_register(struct tcf_block *block,
 {
 	struct tcf_block_cb *block_cb;
 
+	/* At this point, playback of previous block cb calls is not supported,
+	 * so forbid to register to block which already has some offloaded
+	 * filters present.
+	 */
+	if (tcf_block_offload_in_use(block))
+		return ERR_PTR(-EOPNOTSUPP);
+
 	block_cb = kzalloc(sizeof(*block_cb), GFP_KERNEL);
 	if (!block_cb)
-		return NULL;
+		return ERR_PTR(-ENOMEM);
 	block_cb->cb = cb;
 	block_cb->cb_ident = cb_ident;
 	block_cb->cb_priv = cb_priv;
@@ -648,7 +679,7 @@  int tcf_block_cb_register(struct tcf_block *block,
 	struct tcf_block_cb *block_cb;
 
 	block_cb = __tcf_block_cb_register(block, cb, cb_ident, cb_priv);
-	return block_cb ? 0 : -ENOMEM;
+	return IS_ERR(block_cb) ? PTR_ERR(block_cb) : 0;
 }
 EXPORT_SYMBOL(tcf_block_cb_register);
 
diff --git a/net/sched/cls_bpf.c b/net/sched/cls_bpf.c
index 69d7e9a..9cf61e7 100644
--- a/net/sched/cls_bpf.c
+++ b/net/sched/cls_bpf.c
@@ -170,8 +170,10 @@  static int cls_bpf_offload_cmd(struct tcf_proto *tp, struct cls_bpf_prog *prog,
 			cls_bpf_offload_cmd(tp, prog, TC_CLSBPF_DESTROY);
 			return err;
 		} else if (err > 0) {
-			prog->gen_flags |= TCA_CLS_FLAGS_IN_HW;
+			tcf_block_offload_inc(block, &prog->gen_flags);
 		}
+	} else {
+		tcf_block_offload_dec(block, &prog->gen_flags);
 	}
 
 	if (addorrep && skip_sw && !(prog->gen_flags & TCA_CLS_FLAGS_IN_HW))
diff --git a/net/sched/cls_flower.c b/net/sched/cls_flower.c
index 6132a73..f61df19 100644
--- a/net/sched/cls_flower.c
+++ b/net/sched/cls_flower.c
@@ -229,6 +229,7 @@  static void fl_hw_destroy_filter(struct tcf_proto *tp, struct cls_fl_filter *f)
 
 	tc_setup_cb_call(block, &f->exts, TC_SETUP_CLSFLOWER,
 			 &cls_flower, false);
+	tcf_block_offload_dec(block, &f->flags);
 }
 
 static int fl_hw_replace_filter(struct tcf_proto *tp,
@@ -256,7 +257,7 @@  static int fl_hw_replace_filter(struct tcf_proto *tp,
 		fl_hw_destroy_filter(tp, f);
 		return err;
 	} else if (err > 0) {
-		f->flags |= TCA_CLS_FLAGS_IN_HW;
+		tcf_block_offload_inc(block, &f->flags);
 	}
 
 	if (skip_sw && !(f->flags & TCA_CLS_FLAGS_IN_HW))
diff --git a/net/sched/cls_matchall.c b/net/sched/cls_matchall.c
index 66d4e00..d0e57c8 100644
--- a/net/sched/cls_matchall.c
+++ b/net/sched/cls_matchall.c
@@ -81,6 +81,7 @@  static void mall_destroy_hw_filter(struct tcf_proto *tp,
 	cls_mall.cookie = cookie;
 
 	tc_setup_cb_call(block, NULL, TC_SETUP_CLSMATCHALL, &cls_mall, false);
+	tcf_block_offload_dec(block, &head->flags);
 }
 
 static int mall_replace_hw_filter(struct tcf_proto *tp,
@@ -103,7 +104,7 @@  static int mall_replace_hw_filter(struct tcf_proto *tp,
 		mall_destroy_hw_filter(tp, head, cookie);
 		return err;
 	} else if (err > 0) {
-		head->flags |= TCA_CLS_FLAGS_IN_HW;
+		tcf_block_offload_inc(block, &head->flags);
 	}
 
 	if (skip_sw && !(head->flags & TCA_CLS_FLAGS_IN_HW))
diff --git a/net/sched/cls_u32.c b/net/sched/cls_u32.c
index ac152b4..0a5a2cb 100644
--- a/net/sched/cls_u32.c
+++ b/net/sched/cls_u32.c
@@ -530,16 +530,17 @@  static int u32_replace_hw_hnode(struct tcf_proto *tp, struct tc_u_hnode *h,
 	return 0;
 }
 
-static void u32_remove_hw_knode(struct tcf_proto *tp, u32 handle)
+static void u32_remove_hw_knode(struct tcf_proto *tp, struct tc_u_knode *n)
 {
 	struct tcf_block *block = tp->chain->block;
 	struct tc_cls_u32_offload cls_u32 = {};
 
 	tc_cls_common_offload_init(&cls_u32.common, tp);
 	cls_u32.command = TC_CLSU32_DELETE_KNODE;
-	cls_u32.knode.handle = handle;
+	cls_u32.knode.handle = n->handle;
 
 	tc_setup_cb_call(block, NULL, TC_SETUP_CLSU32, &cls_u32, false);
+	tcf_block_offload_dec(block, &n->flags);
 }
 
 static int u32_replace_hw_knode(struct tcf_proto *tp, struct tc_u_knode *n,
@@ -568,10 +569,10 @@  static int u32_replace_hw_knode(struct tcf_proto *tp, struct tc_u_knode *n,
 
 	err = tc_setup_cb_call(block, NULL, TC_SETUP_CLSU32, &cls_u32, skip_sw);
 	if (err < 0) {
-		u32_remove_hw_knode(tp, n->handle);
+		u32_remove_hw_knode(tp, n);
 		return err;
 	} else if (err > 0) {
-		n->flags |= TCA_CLS_FLAGS_IN_HW;
+		tcf_block_offload_inc(block, &n->flags);
 	}
 
 	if (skip_sw && !(n->flags & TCA_CLS_FLAGS_IN_HW))
@@ -590,7 +591,7 @@  static void u32_clear_hnode(struct tcf_proto *tp, struct tc_u_hnode *ht)
 			RCU_INIT_POINTER(ht->ht[h],
 					 rtnl_dereference(n->next));
 			tcf_unbind_filter(tp, &n->res);
-			u32_remove_hw_knode(tp, n->handle);
+			u32_remove_hw_knode(tp, n);
 			idr_remove_ext(&ht->handle_idr, n->handle);
 			if (tcf_exts_get_net(&n->exts))
 				call_rcu(&n->rcu, u32_delete_key_freepf_rcu);
@@ -683,7 +684,7 @@  static int u32_delete(struct tcf_proto *tp, void *arg, bool *last)
 		goto out;
 
 	if (TC_U32_KEY(ht->handle)) {
-		u32_remove_hw_knode(tp, ht->handle);
+		u32_remove_hw_knode(tp, (struct tc_u_knode *)ht);
 		ret = u32_delete_key(tp, (struct tc_u_knode *)ht);
 		goto out;
 	}