diff mbox series

[net,v2,2/3] xfrm: Add an activate() offload dev op

Message ID 1511891742-84759-2-git-send-email-avivh@mellanox.com
State Awaiting Upstream, archived
Delegated to: David Miller
Headers show
Series [net,v2,1/3] xfrm: Fix xfrm_input() to verify state is valid when (encap_type < 0) | expand

Commit Message

avivh@mellanox.com Nov. 28, 2017, 5:55 p.m. UTC
From: Aviv Heller <avivh@mellanox.com>

Adding the state to the offload device prior to replay init in
xfrm_state_construct() will result in NULL dereference if a matching
ESP packet is received in between.

In order to inhibit driver offload logic from processing the state's
packets prior to the xfrm_state object being completely initialized and
added to the SADBs, a new activate() operation was added to inform the
driver the aforementioned conditions have been met.

Signed-off-by: Aviv Heller <avivh@mellanox.com>
Signed-off-by: Yossi Kuperman <yossiku@mellanox.com>
---
v1 -> v2:
	- Separate to state addition and then activation, instead
	  of relocating dev state addition call.
---
 include/linux/netdevice.h |  1 +
 include/net/xfrm.h        | 12 ++++++++++++
 net/xfrm/xfrm_user.c      |  5 +++++
 3 files changed, 18 insertions(+)

Comments

Steffen Klassert Dec. 1, 2017, 7:09 a.m. UTC | #1
On Tue, Nov 28, 2017 at 07:55:41PM +0200, avivh@mellanox.com wrote:
> From: Aviv Heller <avivh@mellanox.com>
> 
> Adding the state to the offload device prior to replay init in
> xfrm_state_construct() will result in NULL dereference if a matching
> ESP packet is received in between.
> 
> In order to inhibit driver offload logic from processing the state's
> packets prior to the xfrm_state object being completely initialized and
> added to the SADBs, a new activate() operation was added to inform the
> driver the aforementioned conditions have been met.

We discussed this already some time ago, and I still think that
we should fix this by setting XFRM_STATE_VALID only after the
state is fully initialized.
Shannon Nelson Dec. 1, 2017, 7:47 p.m. UTC | #2
On 11/28/2017 9:55 AM, avivh@mellanox.com wrote:
> From: Aviv Heller <avivh@mellanox.com>
> 
> Adding the state to the offload device prior to replay init in
> xfrm_state_construct() will result in NULL dereference if a matching
> ESP packet is received in between.
> 
> In order to inhibit driver offload logic from processing the state's
> packets prior to the xfrm_state object being completely initialized and
> added to the SADBs, a new activate() operation was added to inform the
> driver the aforementioned conditions have been met.

Are there also conditions where you would want to temporarily 
deactivate, or pause, the incoming driver offload, followed then by 
another activate?

sln

> 
> Signed-off-by: Aviv Heller <avivh@mellanox.com>
> Signed-off-by: Yossi Kuperman <yossiku@mellanox.com>
> ---
> v1 -> v2:
> 	- Separate to state addition and then activation, instead
> 	  of relocating dev state addition call.
> ---
>   include/linux/netdevice.h |  1 +
>   include/net/xfrm.h        | 12 ++++++++++++
>   net/xfrm/xfrm_user.c      |  5 +++++
>   3 files changed, 18 insertions(+)
> 
> diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
> index 2eaac7d..c6ca356 100644
> --- a/include/linux/netdevice.h
> +++ b/include/linux/netdevice.h
> @@ -819,6 +819,7 @@ struct netdev_xdp {
>   #ifdef CONFIG_XFRM_OFFLOAD
>   struct xfrmdev_ops {
>   	int	(*xdo_dev_state_add) (struct xfrm_state *x);
> +	void	(*xdo_dev_state_activate) (struct xfrm_state *x);
>   	void	(*xdo_dev_state_delete) (struct xfrm_state *x);
>   	void	(*xdo_dev_state_free) (struct xfrm_state *x);
>   	bool	(*xdo_dev_offload_ok) (struct sk_buff *skb,
> diff --git a/include/net/xfrm.h b/include/net/xfrm.h
> index e015e16..324374e 100644
> --- a/include/net/xfrm.h
> +++ b/include/net/xfrm.h
> @@ -1877,6 +1877,14 @@ static inline bool xfrm_dst_offload_ok(struct dst_entry *dst)
>   	return false;
>   }
>   
> +static inline void xfrm_dev_state_activate(struct xfrm_state *x)
> +{
> +	struct xfrm_state_offload *xso = &x->xso;
> +
> +	if (xso->dev && xso->dev->xfrmdev_ops->xdo_dev_state_activate)
> +		xso->dev->xfrmdev_ops->xdo_dev_state_activate(x);
> +}
> +
>   static inline void xfrm_dev_state_delete(struct xfrm_state *x)
>   {
>   	struct xfrm_state_offload *xso = &x->xso;
> @@ -1907,6 +1915,10 @@ static inline int xfrm_dev_state_add(struct net *net, struct xfrm_state *x, stru
>   	return 0;
>   }
>   
> +static inline void xfrm_dev_state_activate(struct xfrm_state *x)
> +{
> +}
> +
>   static inline void xfrm_dev_state_delete(struct xfrm_state *x)
>   {
>   }
> diff --git a/net/xfrm/xfrm_user.c b/net/xfrm/xfrm_user.c
> index e44a0fe..d06f579 100644
> --- a/net/xfrm/xfrm_user.c
> +++ b/net/xfrm/xfrm_user.c
> @@ -662,6 +662,11 @@ static int xfrm_add_sa(struct sk_buff *skb, struct nlmsghdr *nlh,
>   		goto out;
>   	}
>   
> +	spin_lock_bh(&x->lock);
> +	if (x->km.state == XFRM_STATE_VALID)
> +		xfrm_dev_state_activate(x);
> +	spin_unlock_bh(&x->lock);
> +
>   	c.seq = nlh->nlmsg_seq;
>   	c.portid = nlh->nlmsg_pid;
>   	c.event = nlh->nlmsg_type;
>
Shannon Nelson Dec. 2, 2017, 10:11 p.m. UTC | #3
On 12/1/2017 11:47 AM, Shannon Nelson wrote:
> On 11/28/2017 9:55 AM, avivh@mellanox.com wrote:
>> From: Aviv Heller <avivh@mellanox.com>
>>
>> Adding the state to the offload device prior to replay init in
>> xfrm_state_construct() will result in NULL dereference if a matching
>> ESP packet is received in between.
>>
>> In order to inhibit driver offload logic from processing the state's
>> packets prior to the xfrm_state object being completely initialized and
>> added to the SADBs, a new activate() operation was added to inform the
>> driver the aforementioned conditions have been met.
> 
> Are there also conditions where you would want to temporarily 
> deactivate, or pause, the incoming driver offload, followed then by 
> another activate?
> 
> sln

Instead of setting up a half-ready state that needs the activate() 
operation to finish, can we instead just move the xfrm_dev_state_add() 
call to after the xfrm_init_replay()?  Especially since this really only 
makes sense for the inbound, and makes no sense for the outbound path.

sln

> 
>>
>> Signed-off-by: Aviv Heller <avivh@mellanox.com>
>> Signed-off-by: Yossi Kuperman <yossiku@mellanox.com>
>> ---
>> v1 -> v2:
>>     - Separate to state addition and then activation, instead
>>       of relocating dev state addition call.
>> ---
>>   include/linux/netdevice.h |  1 +
>>   include/net/xfrm.h        | 12 ++++++++++++
>>   net/xfrm/xfrm_user.c      |  5 +++++
>>   3 files changed, 18 insertions(+)
>>
>> diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
>> index 2eaac7d..c6ca356 100644
>> --- a/include/linux/netdevice.h
>> +++ b/include/linux/netdevice.h
>> @@ -819,6 +819,7 @@ struct netdev_xdp {
>>   #ifdef CONFIG_XFRM_OFFLOAD
>>   struct xfrmdev_ops {
>>       int    (*xdo_dev_state_add) (struct xfrm_state *x);
>> +    void    (*xdo_dev_state_activate) (struct xfrm_state *x);
>>       void    (*xdo_dev_state_delete) (struct xfrm_state *x);
>>       void    (*xdo_dev_state_free) (struct xfrm_state *x);
>>       bool    (*xdo_dev_offload_ok) (struct sk_buff *skb,
>> diff --git a/include/net/xfrm.h b/include/net/xfrm.h
>> index e015e16..324374e 100644
>> --- a/include/net/xfrm.h
>> +++ b/include/net/xfrm.h
>> @@ -1877,6 +1877,14 @@ static inline bool xfrm_dst_offload_ok(struct 
>> dst_entry *dst)
>>       return false;
>>   }
>> +static inline void xfrm_dev_state_activate(struct xfrm_state *x)
>> +{
>> +    struct xfrm_state_offload *xso = &x->xso;
>> +
>> +    if (xso->dev && xso->dev->xfrmdev_ops->xdo_dev_state_activate)
>> +        xso->dev->xfrmdev_ops->xdo_dev_state_activate(x);
>> +}
>> +
>>   static inline void xfrm_dev_state_delete(struct xfrm_state *x)
>>   {
>>       struct xfrm_state_offload *xso = &x->xso;
>> @@ -1907,6 +1915,10 @@ static inline int xfrm_dev_state_add(struct net 
>> *net, struct xfrm_state *x, stru
>>       return 0;
>>   }
>> +static inline void xfrm_dev_state_activate(struct xfrm_state *x)
>> +{
>> +}
>> +
>>   static inline void xfrm_dev_state_delete(struct xfrm_state *x)
>>   {
>>   }
>> diff --git a/net/xfrm/xfrm_user.c b/net/xfrm/xfrm_user.c
>> index e44a0fe..d06f579 100644
>> --- a/net/xfrm/xfrm_user.c
>> +++ b/net/xfrm/xfrm_user.c
>> @@ -662,6 +662,11 @@ static int xfrm_add_sa(struct sk_buff *skb, 
>> struct nlmsghdr *nlh,
>>           goto out;
>>       }
>> +    spin_lock_bh(&x->lock);
>> +    if (x->km.state == XFRM_STATE_VALID)
>> +        xfrm_dev_state_activate(x);
>> +    spin_unlock_bh(&x->lock);
>> +
>>       c.seq = nlh->nlmsg_seq;
>>       c.portid = nlh->nlmsg_pid;
>>       c.event = nlh->nlmsg_type;
>>
Yossi Kuperman Dec. 2, 2017, 10:33 p.m. UTC | #4
>> On 1 Dec 2017, at 9:09, Steffen Klassert <steffen.klassert@secunet.com> wrote:

>> 

>> On Tue, Nov 28, 2017 at 07:55:41PM +0200, avivh@mellanox.com wrote:

>> From: Aviv Heller <avivh@mellanox.com>

>> 

>> Adding the state to the offload device prior to replay init in

>> xfrm_state_construct() will result in NULL dereference if a matching

>> ESP packet is received in between.

>> 

>> In order to inhibit driver offload logic from processing the state's

>> packets prior to the xfrm_state object being completely initialized and

>> added to the SADBs, a new activate() operation was added to inform the

>> driver the aforementioned conditions have been met.

> 

> We discussed this already some time ago, and I still think that

> we should fix this by setting XFRM_STATE_VALID only after the

> state is fully initialized.


An upcoming patch will refactor the if statement (encap_type < 0) in xfrm_input, in order to support crypto offload with GRO disabled. Currently it doesn’t work. This entails yet another check for the validity of the state. Resulting in total of 3 copies: 1) for normal traffic, 2) GRO and 3) crypto offload.

Anyway, IMO it is not right that we (the driver) allow an incoming packet to be delivered while the SA is not yet ready. Rather than checking for an invalid input I prefer to make sure that such a case won’t happen in the first place.

To complete the picture, there is another patch to the driver which simply drop incoming packets that underwent successful decryption and haven’t been activated yet. Active state merely means that the SA is present in the driver’s hash table.

We can make a separate patch to set the state to valid once it is fully initialized, it make sense on its own.

What do you think?
Shannon Nelson Dec. 3, 2017, 12:38 a.m. UTC | #5
On 12/2/2017 2:33 PM, Yossi Kuperman wrote:
> 
> 
>>> On 1 Dec 2017, at 9:09, Steffen Klassert <steffen.klassert@secunet.com> wrote:
>>>
>>> On Tue, Nov 28, 2017 at 07:55:41PM +0200, avivh@mellanox.com wrote:
>>> From: Aviv Heller <avivh@mellanox.com>
>>>
>>> Adding the state to the offload device prior to replay init in
>>> xfrm_state_construct() will result in NULL dereference if a matching
>>> ESP packet is received in between.
>>>
>>> In order to inhibit driver offload logic from processing the state's
>>> packets prior to the xfrm_state object being completely initialized and
>>> added to the SADBs, a new activate() operation was added to inform the
>>> driver the aforementioned conditions have been met.
>>
>> We discussed this already some time ago, and I still think that
>> we should fix this by setting XFRM_STATE_VALID only after the
>> state is fully initialized.
> 
> An upcoming patch will refactor the if statement (encap_type < 0) in xfrm_input, in order to support crypto offload with GRO disabled. Currently it doesn’t work. This entails yet another check for the validity of the state. Resulting in total of 3 copies: 1) for normal traffic, 2) GRO and 3) crypto offload.
> 
> Anyway, IMO it is not right that we (the driver) allow an incoming packet to be delivered while the SA is not yet ready. Rather than checking for an invalid input I prefer to make sure that such a case won’t happen in the first place.
> 
> To complete the picture, there is another patch to the driver which simply drop incoming packets that underwent successful decryption and haven’t been activated yet. Active state merely means that the SA is present in the driver’s hash table.
> 
> We can make a separate patch to set the state to valid once it is fully initialized, it make sense on its own.
> 
> What do you think?
> 

If the SA isn't ready, just don't tell the driver about it.  Please 
don't add yet another state for the driver to track.  This should be as 
simple as possible, and shouldn't be any more complex than the model 
already used by ndo_vlan_rx_add_vid and ndo_vlan_rx_kill_vid.

sln
Boris Pismenny Dec. 3, 2017, 11:28 a.m. UTC | #6
> -----Original Message-----

> From: Yossi Kuperman

> Sent: Sunday, December 03, 2017 00:34

> To: Steffen Klassert <steffen.klassert@secunet.com>

> Cc: Aviv Heller <avivh@mellanox.com>; Herbert Xu

> <herbert@gondor.apana.org.au>; Boris Pismenny <borisp@mellanox.com>;

> Yevgeny Kliteynik <kliteyn@mellanox.com>; netdev@vger.kernel.org

> Subject: Re: [PATCH net v2 2/3] xfrm: Add an activate() offload dev op

> 

> 

> 

> >> On 1 Dec 2017, at 9:09, Steffen Klassert <steffen.klassert@secunet.com>

> wrote:

> >>

> >> On Tue, Nov 28, 2017 at 07:55:41PM +0200, avivh@mellanox.com wrote:

> >> From: Aviv Heller <avivh@mellanox.com>

> >>

> >> Adding the state to the offload device prior to replay init in

> >> xfrm_state_construct() will result in NULL dereference if a matching

> >> ESP packet is received in between.

> >>

> >> In order to inhibit driver offload logic from processing the state's

> >> packets prior to the xfrm_state object being completely initialized and

> >> added to the SADBs, a new activate() operation was added to inform the

> >> driver the aforementioned conditions have been met.

> >

> > We discussed this already some time ago, and I still think that

> > we should fix this by setting XFRM_STATE_VALID only after the

> > state is fully initialized.

> 

> An upcoming patch will refactor the if statement (encap_type < 0) in

> xfrm_input, in order to support crypto offload with GRO disabled. Currently it

> doesn’t work. This entails yet another check for the validity of the state.

> Resulting in total of 3 copies: 1) for normal traffic, 2) GRO and 3) crypto offload.

> 

> Anyway, IMO it is not right that we (the driver) allow an incoming packet to be

> delivered while the SA is not yet ready. Rather than checking for an invalid input

> I prefer to make sure that such a case won’t happen in the first place.

> 

> To complete the picture, there is another patch to the driver which simply drop

> incoming packets that underwent successful decryption and haven’t been

> activated yet. Active state merely means that the SA is present in the driver’s

> hash table.

> 

> We can make a separate patch to set the state to valid once it is fully initialized,

> it make sense on its own.

> 

> What do you think?

> 


I would prefer we do not add additional checks in the data-path.
IMO, the SA provided to the driver should be fully initialized.
Do not insert it to HW before it is activated.
Yossi Kuperman Dec. 3, 2017, 1:59 p.m. UTC | #7
> -----Original Message-----

> From: Boris Pismenny

> Sent: Sunday, December 3, 2017 1:29 PM

> To: Yossi Kuperman <yossiku@mellanox.com>; Steffen Klassert

> <steffen.klassert@secunet.com>

> Cc: Aviv Heller <avivh@mellanox.com>; Herbert Xu

> <herbert@gondor.apana.org.au>; Yevgeny Kliteynik <kliteyn@mellanox.com>;

> netdev@vger.kernel.org

> Subject: RE: [PATCH net v2 2/3] xfrm: Add an activate() offload dev op

> 

> 

> >

> > >> On 1 Dec 2017, at 9:09, Steffen Klassert

> > >> <steffen.klassert@secunet.com>

> > wrote:

> > >>

> > >> On Tue, Nov 28, 2017 at 07:55:41PM +0200, avivh@mellanox.com wrote:

> > >> From: Aviv Heller <avivh@mellanox.com>

> > >>

> > >> Adding the state to the offload device prior to replay init in

> > >> xfrm_state_construct() will result in NULL dereference if a

> > >> matching ESP packet is received in between.

> > >>

> > >> In order to inhibit driver offload logic from processing the

> > >> state's packets prior to the xfrm_state object being completely

> > >> initialized and added to the SADBs, a new activate() operation was

> > >> added to inform the driver the aforementioned conditions have been met.

> > >

> > > We discussed this already some time ago, and I still think that we

> > > should fix this by setting XFRM_STATE_VALID only after the state is

> > > fully initialized.

> >

> > An upcoming patch will refactor the if statement (encap_type < 0) in

> > xfrm_input, in order to support crypto offload with GRO disabled.

> > Currently it doesn’t work. This entails yet another check for the validity of the

> state.

> > Resulting in total of 3 copies: 1) for normal traffic, 2) GRO and 3) crypto

> offload.

> >

> > Anyway, IMO it is not right that we (the driver) allow an incoming

> > packet to be delivered while the SA is not yet ready. Rather than

> > checking for an invalid input I prefer to make sure that such a case won’t

> happen in the first place.

> >

> > To complete the picture, there is another patch to the driver which

> > simply drop incoming packets that underwent successful decryption and

> > haven’t been activated yet. Active state merely means that the SA is

> > present in the driver’s hash table.

> >

> > We can make a separate patch to set the state to valid once it is

> > fully initialized, it make sense on its own.

> >

> > What do you think?

> >

> 

> I would prefer we do not add additional checks in the data-path.

> IMO, the SA provided to the driver should be fully initialized.

> Do not insert it to HW before it is activated.


Nope, we are not adding anything new to the data-path. Currently,
xdo_dev_state_add configures the hardware with the new SA and
then stores a reference to it in a hash table. We suggest here
to split it into two parts: 1) configure the hardware 2) add to the 
hash table (activate). No change in the data-path whatsoever.
Yossi Kuperman Dec. 3, 2017, 9:09 p.m. UTC | #8
> -----Original Message-----

> From: Shannon Nelson [mailto:shannon.nelson@oracle.com]

> Sent: Sunday, December 3, 2017 2:38 AM

> To: Yossi Kuperman <yossiku@mellanox.com>; Steffen Klassert

> <steffen.klassert@secunet.com>

> Cc: Aviv Heller <avivh@mellanox.com>; Herbert Xu

> <herbert@gondor.apana.org.au>; Boris Pismenny <borisp@mellanox.com>;

> Yevgeny Kliteynik <kliteyn@mellanox.com>; netdev@vger.kernel.org

> Subject: Re: [PATCH net v2 2/3] xfrm: Add an activate() offload dev op

> 

> On 12/2/2017 2:33 PM, Yossi Kuperman wrote:

> >

> >

> >>> On 1 Dec 2017, at 9:09, Steffen Klassert <steffen.klassert@secunet.com>

> wrote:

> >>>

> >>> On Tue, Nov 28, 2017 at 07:55:41PM +0200, avivh@mellanox.com wrote:

> >>> From: Aviv Heller <avivh@mellanox.com>

> >>>

> >>> Adding the state to the offload device prior to replay init in

> >>> xfrm_state_construct() will result in NULL dereference if a matching

> >>> ESP packet is received in between.

> >>>

> >>> In order to inhibit driver offload logic from processing the state's

> >>> packets prior to the xfrm_state object being completely initialized

> >>> and added to the SADBs, a new activate() operation was added to

> >>> inform the driver the aforementioned conditions have been met.

> >>

> >> We discussed this already some time ago, and I still think that we

> >> should fix this by setting XFRM_STATE_VALID only after the state is

> >> fully initialized.

> >

> > An upcoming patch will refactor the if statement (encap_type < 0) in

> xfrm_input, in order to support crypto offload with GRO disabled. Currently it

> doesn’t work. This entails yet another check for the validity of the state.

> Resulting in total of 3 copies: 1) for normal traffic, 2) GRO and 3) crypto offload.

> >

> > Anyway, IMO it is not right that we (the driver) allow an incoming packet to be

> delivered while the SA is not yet ready. Rather than checking for an invalid input I

> prefer to make sure that such a case won’t happen in the first place.

> >

> > To complete the picture, there is another patch to the driver which simply drop

> incoming packets that underwent successful decryption and haven’t been

> activated yet. Active state merely means that the SA is present in the driver’s

> hash table.

> >

> > We can make a separate patch to set the state to valid once it is fully

> initialized, it make sense on its own.

> >

> > What do you think?

> >

> 

> If the SA isn't ready, just don't tell the driver about it.  Please don't add yet

> another state for the driver to track.  This should be as simple as possible, and

> shouldn't be any more complex than the model already used by

> ndo_vlan_rx_add_vid and ndo_vlan_rx_kill_vid.


It is not something new, the driver already "tracks" any SA that uses crypto offload, we just
do it in two phases; please see other answers.

Anyway it is symmetric with xdo_dev_state_delete and xdo_dev_state_free, J

> 

> sln
Yossi Kuperman Dec. 3, 2017, 10:16 p.m. UTC | #9
> -----Original Message-----

> From: Shannon Nelson [mailto:shannon.nelson@oracle.com]

> Sent: Sunday, December 3, 2017 12:11 AM

> To: Aviv Heller <avivh@mellanox.com>; Steffen Klassert

> <steffen.klassert@secunet.com>

> Cc: Herbert Xu <herbert@gondor.apana.org.au>; Boris Pismenny

> <borisp@mellanox.com>; Yossi Kuperman <yossiku@mellanox.com>;

> Yevgeny Kliteynik <kliteyn@mellanox.com>; netdev@vger.kernel.org

> Subject: Re: [PATCH net v2 2/3] xfrm: Add an activate() offload dev op

> 

> On 12/1/2017 11:47 AM, Shannon Nelson wrote:

> > On 11/28/2017 9:55 AM, avivh@mellanox.com wrote:

> >> From: Aviv Heller <avivh@mellanox.com>

> >>

> >> Adding the state to the offload device prior to replay init in

> >> xfrm_state_construct() will result in NULL dereference if a matching

> >> ESP packet is received in between.

> >>

> >> In order to inhibit driver offload logic from processing the state's

> >> packets prior to the xfrm_state object being completely initialized

> >> and added to the SADBs, a new activate() operation was added to

> >> inform the driver the aforementioned conditions have been met.

> >

> > Are there also conditions where you would want to temporarily

> > deactivate, or pause, the incoming driver offload, followed then by

> > another activate?

> >


Nope.

> > sln

> 

> Instead of setting up a half-ready state that needs the activate() operation to

> finish, can we instead just move the xfrm_dev_state_add() call to after the

> xfrm_init_replay()?  Especially since this really only makes sense for the

> inbound, and makes no sense for the outbound path.

> 


It might solve the problem this time, but still the state is not fully initialized and it might break 
in the future. Adding an SA to hardware can fail, and if we were to place xfrm_dev_state_add() 
after the SA is fully initialized, we will be forced to roll-back. Let alone the possible things that could
go wrong once an SA is fully active while the hardware is oblivious. The core issue here is that we
can't construct the SA and configure the hardware atomically, hence the two steps. The first step is
to ensure the hardware can be configured properly, if not abort before the SA is initialized, and the
second step is to just "mark" the state as active. Please note that the we don't explicitly mark it, it
is active just by its existence in the hash table, which we already have and maintain.


Out of curiosity, what's wrong with adding yet another callback (not on the critical-path)?

Thanks for your comments.

> sln

> 

> >

> >>

> >> Signed-off-by: Aviv Heller <avivh@mellanox.com>

> >> Signed-off-by: Yossi Kuperman <yossiku@mellanox.com>

> >> ---

> >> v1 -> v2:

> >>     - Separate to state addition and then activation, instead

> >>       of relocating dev state addition call.

> >> ---

> >>   include/linux/netdevice.h |  1 +

> >>   include/net/xfrm.h        | 12 ++++++++++++

> >>   net/xfrm/xfrm_user.c      |  5 +++++

> >>   3 files changed, 18 insertions(+)

> >>

> >> diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h

> >> index 2eaac7d..c6ca356 100644

> >> --- a/include/linux/netdevice.h

> >> +++ b/include/linux/netdevice.h

> >> @@ -819,6 +819,7 @@ struct netdev_xdp {

> >>   #ifdef CONFIG_XFRM_OFFLOAD

> >>   struct xfrmdev_ops {

> >>       int    (*xdo_dev_state_add) (struct xfrm_state *x);

> >> +    void    (*xdo_dev_state_activate) (struct xfrm_state *x);

> >>       void    (*xdo_dev_state_delete) (struct xfrm_state *x);

> >>       void    (*xdo_dev_state_free) (struct xfrm_state *x);

> >>       bool    (*xdo_dev_offload_ok) (struct sk_buff *skb, diff --git

> >> a/include/net/xfrm.h b/include/net/xfrm.h index e015e16..324374e

> >> 100644

> >> --- a/include/net/xfrm.h

> >> +++ b/include/net/xfrm.h

> >> @@ -1877,6 +1877,14 @@ static inline bool xfrm_dst_offload_ok(struct

> >> dst_entry *dst)

> >>       return false;

> >>   }

> >> +static inline void xfrm_dev_state_activate(struct xfrm_state *x) {

> >> +    struct xfrm_state_offload *xso = &x->xso;

> >> +

> >> +    if (xso->dev && xso->dev->xfrmdev_ops->xdo_dev_state_activate)

> >> +        xso->dev->xfrmdev_ops->xdo_dev_state_activate(x);

> >> +}

> >> +

> >>   static inline void xfrm_dev_state_delete(struct xfrm_state *x)

> >>   {

> >>       struct xfrm_state_offload *xso = &x->xso; @@ -1907,6 +1915,10

> >> @@ static inline int xfrm_dev_state_add(struct net *net, struct

> >> xfrm_state *x, stru

> >>       return 0;

> >>   }

> >> +static inline void xfrm_dev_state_activate(struct xfrm_state *x) { }

> >> +

> >>   static inline void xfrm_dev_state_delete(struct xfrm_state *x)

> >>   {

> >>   }

> >> diff --git a/net/xfrm/xfrm_user.c b/net/xfrm/xfrm_user.c index

> >> e44a0fe..d06f579 100644

> >> --- a/net/xfrm/xfrm_user.c

> >> +++ b/net/xfrm/xfrm_user.c

> >> @@ -662,6 +662,11 @@ static int xfrm_add_sa(struct sk_buff *skb,

> >> struct nlmsghdr *nlh,

> >>           goto out;

> >>       }

> >> +    spin_lock_bh(&x->lock);

> >> +    if (x->km.state == XFRM_STATE_VALID)

> >> +        xfrm_dev_state_activate(x);

> >> +    spin_unlock_bh(&x->lock);

> >> +

> >>       c.seq = nlh->nlmsg_seq;

> >>       c.portid = nlh->nlmsg_pid;

> >>       c.event = nlh->nlmsg_type;

> >>
Shannon Nelson Dec. 4, 2017, 7:40 p.m. UTC | #10
On 12/3/2017 2:16 PM, Yossi Kuperman wrote:
>> -----Original Message-----
>> From: Shannon Nelson [mailto:shannon.nelson@oracle.com]
>> Sent: Sunday, December 3, 2017 12:11 AM
>> To: Aviv Heller <avivh@mellanox.com>; Steffen Klassert
>> <steffen.klassert@secunet.com>
>> Cc: Herbert Xu <herbert@gondor.apana.org.au>; Boris Pismenny
>> <borisp@mellanox.com>; Yossi Kuperman <yossiku@mellanox.com>;
>> Yevgeny Kliteynik <kliteyn@mellanox.com>; netdev@vger.kernel.org
>> Subject: Re: [PATCH net v2 2/3] xfrm: Add an activate() offload dev op
>>
>> On 12/1/2017 11:47 AM, Shannon Nelson wrote:
>>> On 11/28/2017 9:55 AM, avivh@mellanox.com wrote:
>>>> From: Aviv Heller <avivh@mellanox.com>
>>>>
>>>> Adding the state to the offload device prior to replay init in
>>>> xfrm_state_construct() will result in NULL dereference if a matching
>>>> ESP packet is received in between.
>>>>
>>>> In order to inhibit driver offload logic from processing the state's
>>>> packets prior to the xfrm_state object being completely initialized
>>>> and added to the SADBs, a new activate() operation was added to
>>>> inform the driver the aforementioned conditions have been met.
>>>
>>> Are there also conditions where you would want to temporarily
>>> deactivate, or pause, the incoming driver offload, followed then by
>>> another activate?
>>>
> 
> Nope.
> 
>>> sln
>>
>> Instead of setting up a half-ready state that needs the activate() operation to
>> finish, can we instead just move the xfrm_dev_state_add() call to after the
>> xfrm_init_replay()?  Especially since this really only makes sense for the
>> inbound, and makes no sense for the outbound path.
>>
> 
> It might solve the problem this time, but still the state is not fully initialized and it might break
> in the future. Adding an SA to hardware can fail, and if we were to place xfrm_dev_state_add()
> after the SA is fully initialized, we will be forced to roll-back. Let alone the possible things that could
> go wrong once an SA is fully active while the hardware is oblivious. The core issue here is that we
> can't construct the SA and configure the hardware atomically, hence the two steps. The first step is
> to ensure the hardware can be configured properly, if not abort before the SA is initialized, and the
> second step is to just "mark" the state as active. Please note that the we don't explicitly mark it, it
> is active just by its existence in the hash table, which we already have and maintain.
> 
> 
> Out of curiosity, what's wrong with adding yet another callback (not on the critical-path)?

The existing model for hardware offloads is a single call to the driver 
to enable or disable - either the hardware does the offload or it 
doesn't.  The closest examples are probably mac filters and vlan tags, 
as well as flow filters: in all three cases the stack hands the offload 
request to the driver and expects the offload to start at that point. 
There is no in-between step of "here's an offload, but don't do it yet".

In reference the comment elsewhere about being "symmetric with 
xdo_dev_state_delete and xdo_dev_state_free", I'm not convinced that the 
xdo_dev_state_free is necessary: if the driver was already asked to 
delete the SA, why should it care about a free later?

The driver model should be kept simple.  There's no reason that I can 
see for the driver to need a two-step commit, either for the add or for 
the delete.  When the stack asks the driver to do an offload, it should 
be ready for the offload to happen.

sln

> 
> Thanks for your comments.
> 
>> sln
>>
>>>
>>>>
>>>> Signed-off-by: Aviv Heller <avivh@mellanox.com>
>>>> Signed-off-by: Yossi Kuperman <yossiku@mellanox.com>
>>>> ---
>>>> v1 -> v2:
>>>>      - Separate to state addition and then activation, instead
>>>>        of relocating dev state addition call.
>>>> ---
>>>>    include/linux/netdevice.h |  1 +
>>>>    include/net/xfrm.h        | 12 ++++++++++++
>>>>    net/xfrm/xfrm_user.c      |  5 +++++
>>>>    3 files changed, 18 insertions(+)
>>>>
>>>> diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
>>>> index 2eaac7d..c6ca356 100644
>>>> --- a/include/linux/netdevice.h
>>>> +++ b/include/linux/netdevice.h
>>>> @@ -819,6 +819,7 @@ struct netdev_xdp {
>>>>    #ifdef CONFIG_XFRM_OFFLOAD
>>>>    struct xfrmdev_ops {
>>>>        int    (*xdo_dev_state_add) (struct xfrm_state *x);
>>>> +    void    (*xdo_dev_state_activate) (struct xfrm_state *x);
>>>>        void    (*xdo_dev_state_delete) (struct xfrm_state *x);
>>>>        void    (*xdo_dev_state_free) (struct xfrm_state *x);
>>>>        bool    (*xdo_dev_offload_ok) (struct sk_buff *skb, diff --git
>>>> a/include/net/xfrm.h b/include/net/xfrm.h index e015e16..324374e
>>>> 100644
>>>> --- a/include/net/xfrm.h
>>>> +++ b/include/net/xfrm.h
>>>> @@ -1877,6 +1877,14 @@ static inline bool xfrm_dst_offload_ok(struct
>>>> dst_entry *dst)
>>>>        return false;
>>>>    }
>>>> +static inline void xfrm_dev_state_activate(struct xfrm_state *x) {
>>>> +    struct xfrm_state_offload *xso = &x->xso;
>>>> +
>>>> +    if (xso->dev && xso->dev->xfrmdev_ops->xdo_dev_state_activate)
>>>> +        xso->dev->xfrmdev_ops->xdo_dev_state_activate(x);
>>>> +}
>>>> +
>>>>    static inline void xfrm_dev_state_delete(struct xfrm_state *x)
>>>>    {
>>>>        struct xfrm_state_offload *xso = &x->xso; @@ -1907,6 +1915,10
>>>> @@ static inline int xfrm_dev_state_add(struct net *net, struct
>>>> xfrm_state *x, stru
>>>>        return 0;
>>>>    }
>>>> +static inline void xfrm_dev_state_activate(struct xfrm_state *x) { }
>>>> +
>>>>    static inline void xfrm_dev_state_delete(struct xfrm_state *x)
>>>>    {
>>>>    }
>>>> diff --git a/net/xfrm/xfrm_user.c b/net/xfrm/xfrm_user.c index
>>>> e44a0fe..d06f579 100644
>>>> --- a/net/xfrm/xfrm_user.c
>>>> +++ b/net/xfrm/xfrm_user.c
>>>> @@ -662,6 +662,11 @@ static int xfrm_add_sa(struct sk_buff *skb,
>>>> struct nlmsghdr *nlh,
>>>>            goto out;
>>>>        }
>>>> +    spin_lock_bh(&x->lock);
>>>> +    if (x->km.state == XFRM_STATE_VALID)
>>>> +        xfrm_dev_state_activate(x);
>>>> +    spin_unlock_bh(&x->lock);
>>>> +
>>>>        c.seq = nlh->nlmsg_seq;
>>>>        c.portid = nlh->nlmsg_pid;
>>>>        c.event = nlh->nlmsg_type;
>>>>
diff mbox series

Patch

diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index 2eaac7d..c6ca356 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -819,6 +819,7 @@  struct netdev_xdp {
 #ifdef CONFIG_XFRM_OFFLOAD
 struct xfrmdev_ops {
 	int	(*xdo_dev_state_add) (struct xfrm_state *x);
+	void	(*xdo_dev_state_activate) (struct xfrm_state *x);
 	void	(*xdo_dev_state_delete) (struct xfrm_state *x);
 	void	(*xdo_dev_state_free) (struct xfrm_state *x);
 	bool	(*xdo_dev_offload_ok) (struct sk_buff *skb,
diff --git a/include/net/xfrm.h b/include/net/xfrm.h
index e015e16..324374e 100644
--- a/include/net/xfrm.h
+++ b/include/net/xfrm.h
@@ -1877,6 +1877,14 @@  static inline bool xfrm_dst_offload_ok(struct dst_entry *dst)
 	return false;
 }
 
+static inline void xfrm_dev_state_activate(struct xfrm_state *x)
+{
+	struct xfrm_state_offload *xso = &x->xso;
+
+	if (xso->dev && xso->dev->xfrmdev_ops->xdo_dev_state_activate)
+		xso->dev->xfrmdev_ops->xdo_dev_state_activate(x);
+}
+
 static inline void xfrm_dev_state_delete(struct xfrm_state *x)
 {
 	struct xfrm_state_offload *xso = &x->xso;
@@ -1907,6 +1915,10 @@  static inline int xfrm_dev_state_add(struct net *net, struct xfrm_state *x, stru
 	return 0;
 }
 
+static inline void xfrm_dev_state_activate(struct xfrm_state *x)
+{
+}
+
 static inline void xfrm_dev_state_delete(struct xfrm_state *x)
 {
 }
diff --git a/net/xfrm/xfrm_user.c b/net/xfrm/xfrm_user.c
index e44a0fe..d06f579 100644
--- a/net/xfrm/xfrm_user.c
+++ b/net/xfrm/xfrm_user.c
@@ -662,6 +662,11 @@  static int xfrm_add_sa(struct sk_buff *skb, struct nlmsghdr *nlh,
 		goto out;
 	}
 
+	spin_lock_bh(&x->lock);
+	if (x->km.state == XFRM_STATE_VALID)
+		xfrm_dev_state_activate(x);
+	spin_unlock_bh(&x->lock);
+
 	c.seq = nlh->nlmsg_seq;
 	c.portid = nlh->nlmsg_pid;
 	c.event = nlh->nlmsg_type;