diff mbox series

[net-next,2/3] xfrm: Fix offload dev state addition to occur after insertion

Message ID 1508857831-55824-2-git-send-email-avivh@mellanox.com
State Awaiting Upstream, archived
Delegated to: David Miller
Headers show
Series [net-next,1/3] xfrm: Fix xfrm_input() to verify state is valid when (encap_type < 0) | expand

Commit Message

avivh@mellanox.com Oct. 24, 2017, 3:10 p.m. UTC
From: Aviv Heller <avivh@mellanox.com>

Adding the state to the offload device prior to replay init in
xfrm_state_construct() will result in NULL dereference if a matching
ESP packet is received in between.

Adding it after insertion also has the benefit of the driver not having
to check whether a state with the same match criteria already exists,
but forces us to use an atomic type for the offload_handle, to make
certain a stack-read/driver-write race won't result in reading corrupt
data.

Fixes: d77e38e612a0 ("xfrm: Add an IPsec hardware offloading API")
Signed-off-by: Aviv Heller <avivh@mellanox.com>
Signed-off-by: Yevgeny Kliteynik <kliteyn@mellanox.com>
---
 .../ethernet/mellanox/mlx5/core/en_accel/ipsec.c   | 12 +++++------
 .../mellanox/mlx5/core/en_accel/ipsec_rxtx.c       |  4 ++--
 include/net/xfrm.h                                 | 25 ++++++++++++++++++++--
 net/ipv4/esp4.c                                    |  4 ++--
 net/ipv4/esp4_offload.c                            |  2 +-
 net/ipv6/esp6.c                                    |  4 ++--
 net/ipv6/esp6_offload.c                            |  2 +-
 net/xfrm/xfrm_device.c                             |  2 +-
 net/xfrm/xfrm_user.c                               | 21 +++++++++---------
 9 files changed, 48 insertions(+), 28 deletions(-)

Comments

Steffen Klassert Oct. 25, 2017, 7:22 a.m. UTC | #1
On Tue, Oct 24, 2017 at 06:10:30PM +0300, avivh@mellanox.com wrote:
> From: Aviv Heller <avivh@mellanox.com>
> 
> Adding the state to the offload device prior to replay init in
> xfrm_state_construct() will result in NULL dereference if a matching
> ESP packet is received in between.
> 
> Adding it after insertion also has the benefit of the driver not having
> to check whether a state with the same match criteria already exists,
> but forces us to use an atomic type for the offload_handle, to make
> certain a stack-read/driver-write race won't result in reading corrupt
> data.

No, this will add multiple atomic operations to the packet path,
even in the non offloaded case.

I think the problem is that we set XFRM_STATE_VALID to early.
This was not a problem before we had offloading because
it was not possible to lookup this state before we inserted
it into the SADB. Now that the driver holds a subset of states
too, we need to make sure the state is fully initialized
before we mark it as valid.

The patch below should do it, in combination with your patch 1/3.

Could you please test this?

diff --git a/net/xfrm/xfrm_user.c b/net/xfrm/xfrm_user.c
index b997f13..96eb263 100644
--- a/net/xfrm/xfrm_user.c
+++ b/net/xfrm/xfrm_user.c
@@ -587,10 +587,6 @@ static struct xfrm_state *xfrm_state_construct(struct net *net,
 	if (attrs[XFRMA_OUTPUT_MARK])
 		x->props.output_mark = nla_get_u32(attrs[XFRMA_OUTPUT_MARK]);
 
-	err = __xfrm_init_state(x, false, attrs[XFRMA_OFFLOAD_DEV]);
-	if (err)
-		goto error;
-
 	if (attrs[XFRMA_SEC_CTX]) {
 		err = security_xfrm_state_alloc(x,
 						nla_data(attrs[XFRMA_SEC_CTX]));
@@ -620,6 +616,10 @@ static struct xfrm_state *xfrm_state_construct(struct net *net,
 	/* override default values from above */
 	xfrm_update_ae_params(x, attrs, 0);
 
+	err = __xfrm_init_state(x, false, attrs[XFRMA_OFFLOAD_DEV]);
+	if (err)
+		goto error;
+
 	return x;
 
 error:
Aviv Heller Oct. 25, 2017, 1:09 p.m. UTC | #2
-----Original message-----
> From: Steffen Klassert

> Sent: Wednesday, October 25 2017, 10:22 am

> To: avivh@mellanox.com

> Cc: Herbert Xu; Boris Pismenny; Yossi Kuperman; Yevgeny Kliteynik; netdev@vger.kernel.org

> Subject: Re: [PATCH net-next 2/3] xfrm: Fix offload dev state addition to occur after insertion

> 

> On Tue, Oct 24, 2017 at 06:10:30PM +0300, avivh@mellanox.com wrote:

> > From: Aviv Heller <avivh@mellanox.com>

> > 

> > Adding the state to the offload device prior to replay init in

> > xfrm_state_construct() will result in NULL dereference if a matching

> > ESP packet is received in between.

> > 

> > Adding it after insertion also has the benefit of the driver not having

> > to check whether a state with the same match criteria already exists,

> > but forces us to use an atomic type for the offload_handle, to make

> > certain a stack-read/driver-write race won't result in reading corrupt

> > data.

> 

> No, this will add multiple atomic operations to the packet path,

> even in the non offloaded case.

> 

> I think the problem is that we set XFRM_STATE_VALID to early.

> This was not a problem before we had offloading because

> it was not possible to lookup this state before we inserted

> it into the SADB. Now that the driver holds a subset of states

> too, we need to make sure the state is fully initialized

> before we mark it as valid.

> 

> The patch below should do it, in combination with your patch 1/3.

> 

> Could you please test this?

> 

> diff --git a/net/xfrm/xfrm_user.c b/net/xfrm/xfrm_user.c

> index b997f13..96eb263 100644

> --- a/net/xfrm/xfrm_user.c

> +++ b/net/xfrm/xfrm_user.c

> @@ -587,10 +587,6 @@ static struct xfrm_state *xfrm_state_construct(struct net *net,

>  	if (attrs[XFRMA_OUTPUT_MARK])

>  		x->props.output_mark = nla_get_u32(attrs[XFRMA_OUTPUT_MARK]);

>  

> -	err = __xfrm_init_state(x, false, attrs[XFRMA_OFFLOAD_DEV]);

> -	if (err)

> -		goto error;

> -

>  	if (attrs[XFRMA_SEC_CTX]) {

>  		err = security_xfrm_state_alloc(x,

>  						nla_data(attrs[XFRMA_SEC_CTX]));

> @@ -620,6 +616,10 @@ static struct xfrm_state *xfrm_state_construct(struct net *net,

>  	/* override default values from above */

>  	xfrm_update_ae_params(x, attrs, 0);

>  

> +	err = __xfrm_init_state(x, false, attrs[XFRMA_OFFLOAD_DEV]);

> +	if (err)

> +		goto error;

> +

>  	return x;

>  

>  error:

> 


Hi Steffen,

This patch does not work, due to:
	if (!x->type_offload)
		return -EINVAL;

test in xfrm_dev_state_add().

I agree with your analysis, and that we take a little performance hit due to the atomics, but we get the benefit of calling xfrm_dev_state_add() after the state is completely initialized, and passed the criteria for addition by xfrm_state_add().

Another approach I thought of is to insert the state with an invalid km.state, and only after the HW state addition succeeds, we move km.state to valid.
I did not go for this approach because here again we need to use atomics (or not?), and since state testing is done in quite a few places, was afraid of unexpected consequences.

What do you think?

Thanks,
Aviv
kernel test robot Oct. 26, 2017, 12:27 a.m. UTC | #3
Hi Aviv,

[auto build test WARNING on ipsec-next/master]
[also build test WARNING on v4.14-rc6 next-20171018]
[cannot apply to net-next/master]
[if your patch is applied to the wrong git tree, please drop us a note to help improve the system]

url:    https://github.com/0day-ci/linux/commits/avivh-mellanox-com/xfrm-Fix-xfrm_input-to-verify-state-is-valid-when-encap_type-0/20171026-060151
base:   https://git.kernel.org/pub/scm/linux/kernel/git/klassert/ipsec-next.git master
config: i386-allmodconfig (attached as .config)
compiler: gcc-6 (Debian 6.2.0-3) 6.2.0 20160901
reproduce:
        # save the attached .config to linux build tree
        make ARCH=i386 

All warnings (new ones prefixed by >>):

   drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec.c: In function 'mlx5e_xfrm_add_state':
>> drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec.c:307:33: warning: cast from pointer to integer of different size [-Wpointer-to-int-cast]
     xfrm_dev_set_offload_handle(x, (u64)sa_entry);
                                    ^
   drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec.c: In function 'mlx5e_xfrm_del_state':
>> drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec.c:323:42: warning: cast to pointer from integer of different size [-Wint-to-pointer-cast]
     struct mlx5e_ipsec_sa_entry *sa_entry = (void *)xfrm_dev_offload_handle(x);
                                             ^
   drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec.c: In function 'mlx5e_xfrm_free_state':
   drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec.c:345:42: warning: cast to pointer from integer of different size [-Wint-to-pointer-cast]
     struct mlx5e_ipsec_sa_entry *sa_entry = (void *)xfrm_dev_offload_handle(x);
                                             ^

vim +307 drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec.c

   260	
   261	static int mlx5e_xfrm_add_state(struct xfrm_state *x)
   262	{
   263		struct mlx5e_ipsec_sa_entry *sa_entry = NULL;
   264		struct net_device *netdev = x->xso.dev;
   265		struct mlx5_accel_ipsec_sa hw_sa;
   266		struct mlx5e_priv *priv;
   267		void *context;
   268		int err;
   269	
   270		priv = netdev_priv(netdev);
   271	
   272		err = mlx5e_xfrm_validate_state(x);
   273		if (err)
   274			return err;
   275	
   276		sa_entry = kzalloc(sizeof(*sa_entry), GFP_KERNEL);
   277		if (!sa_entry) {
   278			err = -ENOMEM;
   279			goto out;
   280		}
   281	
   282		sa_entry->x = x;
   283		sa_entry->ipsec = priv->ipsec;
   284	
   285		/* Add the SA to handle processed incoming packets before the add SA
   286		 * completion was received
   287		 */
   288		if (x->xso.flags & XFRM_OFFLOAD_INBOUND) {
   289			err = mlx5e_ipsec_sadb_rx_add(sa_entry);
   290			if (err) {
   291				netdev_info(netdev, "Failed adding to SADB_RX: %d\n", err);
   292				goto err_entry;
   293			}
   294		}
   295	
   296		mlx5e_ipsec_build_hw_sa(MLX5_IPSEC_CMD_ADD_SA, sa_entry, &hw_sa);
   297		context = mlx5_accel_ipsec_sa_cmd_exec(sa_entry->ipsec->en_priv->mdev, &hw_sa);
   298		if (IS_ERR(context)) {
   299			err = PTR_ERR(context);
   300			goto err_sadb_rx;
   301		}
   302	
   303		err = mlx5_accel_ipsec_sa_cmd_wait(context);
   304		if (err)
   305			goto err_sadb_rx;
   306	
 > 307		xfrm_dev_set_offload_handle(x, (u64)sa_entry);
   308		goto out;
   309	
   310	err_sadb_rx:
   311		if (x->xso.flags & XFRM_OFFLOAD_INBOUND) {
   312			mlx5e_ipsec_sadb_rx_del(sa_entry);
   313			mlx5e_ipsec_sadb_rx_free(sa_entry);
   314		}
   315	err_entry:
   316		kfree(sa_entry);
   317	out:
   318		return err;
   319	}
   320	
   321	static void mlx5e_xfrm_del_state(struct xfrm_state *x)
   322	{
 > 323		struct mlx5e_ipsec_sa_entry *sa_entry = (void *)xfrm_dev_offload_handle(x);
   324		struct mlx5_accel_ipsec_sa hw_sa;
   325		void *context;
   326	
   327		if (!sa_entry)
   328			return;
   329	
   330		WARN_ON(sa_entry->x != x);
   331	
   332		if (x->xso.flags & XFRM_OFFLOAD_INBOUND)
   333			mlx5e_ipsec_sadb_rx_del(sa_entry);
   334	
   335		mlx5e_ipsec_build_hw_sa(MLX5_IPSEC_CMD_DEL_SA, sa_entry, &hw_sa);
   336		context = mlx5_accel_ipsec_sa_cmd_exec(sa_entry->ipsec->en_priv->mdev, &hw_sa);
   337		if (IS_ERR(context))
   338			return;
   339	
   340		sa_entry->context = context;
   341	}
   342	

---
0-DAY kernel test infrastructure                Open Source Technology Center
https://lists.01.org/pipermail/kbuild-all                   Intel Corporation
Steffen Klassert Oct. 26, 2017, 6:16 a.m. UTC | #4
On Wed, Oct 25, 2017 at 01:09:44PM +0000, Aviv Heller wrote:
> -----Original message-----
> > From: Steffen Klassert
> > Sent: Wednesday, October 25 2017, 10:22 am
> > To: avivh@mellanox.com
> > Cc: Herbert Xu; Boris Pismenny; Yossi Kuperman; Yevgeny Kliteynik; netdev@vger.kernel.org
> > Subject: Re: [PATCH net-next 2/3] xfrm: Fix offload dev state addition to occur after insertion
> > 
> > On Tue, Oct 24, 2017 at 06:10:30PM +0300, avivh@mellanox.com wrote:
> > > From: Aviv Heller <avivh@mellanox.com>
> > > 
> > > Adding the state to the offload device prior to replay init in
> > > xfrm_state_construct() will result in NULL dereference if a matching
> > > ESP packet is received in between.
> > > 
> > > Adding it after insertion also has the benefit of the driver not having
> > > to check whether a state with the same match criteria already exists,
> > > but forces us to use an atomic type for the offload_handle, to make
> > > certain a stack-read/driver-write race won't result in reading corrupt
> > > data.
> > 
> > No, this will add multiple atomic operations to the packet path,
> > even in the non offloaded case.
> > 
> > I think the problem is that we set XFRM_STATE_VALID to early.
> > This was not a problem before we had offloading because
> > it was not possible to lookup this state before we inserted
> > it into the SADB. Now that the driver holds a subset of states
> > too, we need to make sure the state is fully initialized
> > before we mark it as valid.
> > 
> > The patch below should do it, in combination with your patch 1/3.
> > 
> > Could you please test this?
> > 
> > diff --git a/net/xfrm/xfrm_user.c b/net/xfrm/xfrm_user.c
> > index b997f13..96eb263 100644
> > --- a/net/xfrm/xfrm_user.c
> > +++ b/net/xfrm/xfrm_user.c
> > @@ -587,10 +587,6 @@ static struct xfrm_state *xfrm_state_construct(struct net *net,
> >  	if (attrs[XFRMA_OUTPUT_MARK])
> >  		x->props.output_mark = nla_get_u32(attrs[XFRMA_OUTPUT_MARK]);
> >  
> > -	err = __xfrm_init_state(x, false, attrs[XFRMA_OFFLOAD_DEV]);
> > -	if (err)
> > -		goto error;
> > -
> >  	if (attrs[XFRMA_SEC_CTX]) {
> >  		err = security_xfrm_state_alloc(x,
> >  						nla_data(attrs[XFRMA_SEC_CTX]));
> > @@ -620,6 +616,10 @@ static struct xfrm_state *xfrm_state_construct(struct net *net,
> >  	/* override default values from above */
> >  	xfrm_update_ae_params(x, attrs, 0);
> >  
> > +	err = __xfrm_init_state(x, false, attrs[XFRMA_OFFLOAD_DEV]);
> > +	if (err)
> > +		goto error;
> > +
> >  	return x;
> >  
> >  error:
> > 
> 
> Hi Steffen,
> 
> This patch does not work, due to:
> 	if (!x->type_offload)
> 		return -EINVAL;
> 
> test in xfrm_dev_state_add().

There is certainly a way arround that :)
The easiest I can think of would be to propagate XFRM_STATE_VALID
only after the state is inserted into the SADBs. I.e. move the
setting of XFRM_STATE_VALID out of __xfrm_init_state() and let the
callers do it.

> 
> I agree with your analysis, and that we take a little performance hit due to the atomics, but we get the benefit of calling xfrm_dev_state_add() after the state is completely initialized, and passed the criteria for addition by xfrm_state_add().

We already have too many of these atomic operatons in the
packet path, adding more is a no go.
Aviv Heller Oct. 26, 2017, 2:55 p.m. UTC | #5
-----Original message-----
> From: Steffen Klassert

> Sent: Thursday, October 26 2017, 9:16 am

> To: Aviv Heller

> Cc: netdev-owner@vger.kernel.org; avivh@mellanox.com; Herbert Xu; Boris Pismenny; Yossi Kuperman; Yevgeny Kliteynik; netdev@vger.kernel.org

> Subject: Re: [PATCH net-next 2/3] xfrm: Fix offload dev state addition to occur after insertion

> 

> On Wed, Oct 25, 2017 at 01:09:44PM +0000, Aviv Heller wrote:

> > -----Original message-----

> > > From: Steffen Klassert

> > > Sent: Wednesday, October 25 2017, 10:22 am

> > > To: avivh@mellanox.com

> > > Cc: Herbert Xu; Boris Pismenny; Yossi Kuperman; Yevgeny Kliteynik; netdev@vger.kernel.org

> > > Subject: Re: [PATCH net-next 2/3] xfrm: Fix offload dev state addition to occur after insertion

> > > 

> > > On Tue, Oct 24, 2017 at 06:10:30PM +0300, avivh@mellanox.com wrote:

> > > > From: Aviv Heller <avivh@mellanox.com>

> > > > 

> > > > Adding the state to the offload device prior to replay init in

> > > > xfrm_state_construct() will result in NULL dereference if a matching

> > > > ESP packet is received in between.

> > > > 

> > > > Adding it after insertion also has the benefit of the driver not having

> > > > to check whether a state with the same match criteria already exists,

> > > > but forces us to use an atomic type for the offload_handle, to make

> > > > certain a stack-read/driver-write race won't result in reading corrupt

> > > > data.

> > > 

> > > No, this will add multiple atomic operations to the packet path,

> > > even in the non offloaded case.

> > > 

> > > I think the problem is that we set XFRM_STATE_VALID to early.

> > > This was not a problem before we had offloading because

> > > it was not possible to lookup this state before we inserted

> > > it into the SADB. Now that the driver holds a subset of states

> > > too, we need to make sure the state is fully initialized

> > > before we mark it as valid.

> > > 

> > > The patch below should do it, in combination with your patch 1/3.

> > > 

> > > Could you please test this?

> > > 

> > > diff --git a/net/xfrm/xfrm_user.c b/net/xfrm/xfrm_user.c

> > > index b997f13..96eb263 100644

> > > --- a/net/xfrm/xfrm_user.c

> > > +++ b/net/xfrm/xfrm_user.c

> > > @@ -587,10 +587,6 @@ static struct xfrm_state *xfrm_state_construct(struct net *net,

> > >  	if (attrs[XFRMA_OUTPUT_MARK])

> > >  		x->props.output_mark = nla_get_u32(attrs[XFRMA_OUTPUT_MARK]);

> > >  

> > > -	err = __xfrm_init_state(x, false, attrs[XFRMA_OFFLOAD_DEV]);

> > > -	if (err)

> > > -		goto error;

> > > -

> > >  	if (attrs[XFRMA_SEC_CTX]) {

> > >  		err = security_xfrm_state_alloc(x,

> > >  						nla_data(attrs[XFRMA_SEC_CTX]));

> > > @@ -620,6 +616,10 @@ static struct xfrm_state *xfrm_state_construct(struct net *net,

> > >  	/* override default values from above */

> > >  	xfrm_update_ae_params(x, attrs, 0);

> > >  

> > > +	err = __xfrm_init_state(x, false, attrs[XFRMA_OFFLOAD_DEV]);

> > > +	if (err)

> > > +		goto error;

> > > +

> > >  	return x;

> > >  

> > >  error:

> > > 

> > 

> > Hi Steffen,

> > 

> > This patch does not work, due to:

> > 	if (!x->type_offload)

> > 		return -EINVAL;

> > 

> > test in xfrm_dev_state_add().

> 

> There is certainly a way arround that :)

> The easiest I can think of would be to propagate XFRM_STATE_VALID

> only after the state is inserted into the SADBs. I.e. move the

> setting of XFRM_STATE_VALID out of __xfrm_init_state() and let the

> callers do it.


This does seem like the easiest solution, if we don't move state addition to occur after insertion.
I'm waiting for our regression results (probably on Monday) for the patch below, and would appreciate your comments:

From 4da5c12abb59113428668afffc42eb51c48fa894 Mon Sep 17 00:00:00 2001
From: Aviv Heller <avivh@mellanox.com>

Date: Thu, 26 Oct 2017 16:30:41 +0300
Subject: [PATCH] Temp

Signed-off-by: Aviv Heller <avivh@mellanox.com>

---
 net/ipv4/ipcomp.c     |  1 +
 net/ipv6/ipcomp6.c    |  1 +
 net/key/af_key.c      |  1 +
 net/xfrm/xfrm_state.c |  6 +++---
 net/xfrm/xfrm_user.c  | 16 +++++++++-------
 5 files changed, 15 insertions(+), 10 deletions(-)

diff --git a/net/ipv4/ipcomp.c b/net/ipv4/ipcomp.c
index d97f4f2..0016162 100644
--- a/net/ipv4/ipcomp.c
+++ b/net/ipv4/ipcomp.c
@@ -80,6 +80,7 @@ static struct xfrm_state *ipcomp_tunnel_create(struct xfrm_state *x)
 	if (xfrm_init_state(t))
 		goto error;
 
+	t->km.state = XFRM_STATE_VALID;
 	atomic_set(&t->tunnel_users, 1);
 out:
 	return t;
diff --git a/net/ipv6/ipcomp6.c b/net/ipv6/ipcomp6.c
index 54d165b..95b67f3 100644
--- a/net/ipv6/ipcomp6.c
+++ b/net/ipv6/ipcomp6.c
@@ -107,6 +107,7 @@ static struct xfrm_state *ipcomp6_tunnel_create(struct xfrm_state *x)
 	if (xfrm_init_state(t))
 		goto error;
 
+	t->km.state = XFRM_STATE_VALID;
 	atomic_set(&t->tunnel_users, 1);
 
 out:
diff --git a/net/key/af_key.c b/net/key/af_key.c
index 10d7133..d1d43f7 100644
--- a/net/key/af_key.c
+++ b/net/key/af_key.c
@@ -1276,6 +1276,7 @@ static struct xfrm_state * pfkey_msg2xfrm_state(struct net *net,
 		goto out;
 
 	x->km.seq = hdr->sadb_msg_seq;
+	x->km.state = XFRM_STATE_VALID;
 	return x;
 
 out:
diff --git a/net/xfrm/xfrm_state.c b/net/xfrm/xfrm_state.c
index a41e2ef..38b90fd 100644
--- a/net/xfrm/xfrm_state.c
+++ b/net/xfrm/xfrm_state.c
@@ -1401,10 +1401,12 @@ static struct xfrm_state *xfrm_state_clone(struct xfrm_state *orig,
 	x->km.seq = orig->km.seq;
 	x->replay = orig->replay;
 	x->preplay = orig->preplay;
+	x->km.state = XFRM_STATE_VALID;
 
 	return x;
 
- error:
+error:
+	x->km.state = XFRM_STATE_DEAD;
 	xfrm_state_put(x);
 out:
 	return NULL;
@@ -2256,8 +2258,6 @@ int __xfrm_init_state(struct xfrm_state *x, bool init_replay, bool offload)
 			goto error;
 	}
 
-	x->km.state = XFRM_STATE_VALID;
-
 error:
 	return err;
 }
diff --git a/net/xfrm/xfrm_user.c b/net/xfrm/xfrm_user.c
index ffe8d5e..df304c2 100644
--- a/net/xfrm/xfrm_user.c
+++ b/net/xfrm/xfrm_user.c
@@ -595,13 +595,6 @@ static struct xfrm_state *xfrm_state_construct(struct net *net,
 			goto error;
 	}
 
-	if (attrs[XFRMA_OFFLOAD_DEV]) {
-		err = xfrm_dev_state_add(net, x,
-					 nla_data(attrs[XFRMA_OFFLOAD_DEV]));
-		if (err)
-			goto error;
-	}
-
 	if ((err = xfrm_alloc_replay_state_esn(&x->replay_esn, &x->preplay_esn,
 					       attrs[XFRMA_REPLAY_ESN_VAL])))
 		goto error;
@@ -617,6 +610,15 @@ static struct xfrm_state *xfrm_state_construct(struct net *net,
 	/* override default values from above */
 	xfrm_update_ae_params(x, attrs, 0);
 
+	x->km.state = XFRM_STATE_VALID;
+
+	if (attrs[XFRMA_OFFLOAD_DEV]) {
+		err = xfrm_dev_state_add(net, x,
+					 nla_data(attrs[XFRMA_OFFLOAD_DEV]));
+		if (err)
+			goto error;
+	}
+
 	return x;
 
 error:
-- 
1.8.3.1
Aviv Heller Oct. 26, 2017, 3:47 p.m. UTC | #6
-----Original message-----
> From: Aviv Heller

> Sent: Thursday, October 26 2017, 5:55 pm

> To: Steffen Klassert

> Cc: netdev-owner@vger.kernel.org; avivh@mellanox.com; Herbert Xu; Boris Pismenny; Yossi Kuperman; Yevgeny Kliteynik; netdev@vger.kernel.org

> Subject: RE: [PATCH net-next 2/3] xfrm: Fix offload dev state addition to occur after insertion

> 

> -----Original message-----

> > From: Steffen Klassert

> > Sent: Thursday, October 26 2017, 9:16 am

> > To: Aviv Heller

> > Cc: netdev-owner@vger.kernel.org; avivh@mellanox.com; Herbert Xu; Boris Pismenny; Yossi Kuperman; Yevgeny Kliteynik; netdev@vger.kernel.org

> > Subject: Re: [PATCH net-next 2/3] xfrm: Fix offload dev state addition to occur after insertion

> > 

> > On Wed, Oct 25, 2017 at 01:09:44PM +0000, Aviv Heller wrote:

> > > -----Original message-----

> > > > From: Steffen Klassert

> > > > Sent: Wednesday, October 25 2017, 10:22 am

> > > > To: avivh@mellanox.com

> > > > Cc: Herbert Xu; Boris Pismenny; Yossi Kuperman; Yevgeny Kliteynik; netdev@vger.kernel.org

> > > > Subject: Re: [PATCH net-next 2/3] xfrm: Fix offload dev state addition to occur after insertion

> > > > 

> > > > On Tue, Oct 24, 2017 at 06:10:30PM +0300, avivh@mellanox.com wrote:

> > > > > From: Aviv Heller <avivh@mellanox.com>

> > > > > 

> > > > > Adding the state to the offload device prior to replay init in

> > > > > xfrm_state_construct() will result in NULL dereference if a matching

> > > > > ESP packet is received in between.

> > > > > 

> > > > > Adding it after insertion also has the benefit of the driver not having

> > > > > to check whether a state with the same match criteria already exists,

> > > > > but forces us to use an atomic type for the offload_handle, to make

> > > > > certain a stack-read/driver-write race won't result in reading corrupt

> > > > > data.

> > > > 

> > > > No, this will add multiple atomic operations to the packet path,

> > > > even in the non offloaded case.

> > > > 

> > > > I think the problem is that we set XFRM_STATE_VALID to early.

> > > > This was not a problem before we had offloading because

> > > > it was not possible to lookup this state before we inserted

> > > > it into the SADB. Now that the driver holds a subset of states

> > > > too, we need to make sure the state is fully initialized

> > > > before we mark it as valid.

> > > > 

> > > > The patch below should do it, in combination with your patch 1/3.

> > > > 

> > > > Could you please test this?

> > > > 

> > > > diff --git a/net/xfrm/xfrm_user.c b/net/xfrm/xfrm_user.c

> > > > index b997f13..96eb263 100644

> > > > --- a/net/xfrm/xfrm_user.c

> > > > +++ b/net/xfrm/xfrm_user.c

> > > > @@ -587,10 +587,6 @@ static struct xfrm_state *xfrm_state_construct(struct net *net,

> > > >  	if (attrs[XFRMA_OUTPUT_MARK])

> > > >  		x->props.output_mark = nla_get_u32(attrs[XFRMA_OUTPUT_MARK]);

> > > >  

> > > > -	err = __xfrm_init_state(x, false, attrs[XFRMA_OFFLOAD_DEV]);

> > > > -	if (err)

> > > > -		goto error;

> > > > -

> > > >  	if (attrs[XFRMA_SEC_CTX]) {

> > > >  		err = security_xfrm_state_alloc(x,

> > > >  						nla_data(attrs[XFRMA_SEC_CTX]));

> > > > @@ -620,6 +616,10 @@ static struct xfrm_state *xfrm_state_construct(struct net *net,

> > > >  	/* override default values from above */

> > > >  	xfrm_update_ae_params(x, attrs, 0);

> > > >  

> > > > +	err = __xfrm_init_state(x, false, attrs[XFRMA_OFFLOAD_DEV]);

> > > > +	if (err)

> > > > +		goto error;

> > > > +

> > > >  	return x;

> > > >  

> > > >  error:

> > > > 

> > > 

> > > Hi Steffen,

> > > 

> > > This patch does not work, due to:

> > > 	if (!x->type_offload)

> > > 		return -EINVAL;

> > > 

> > > test in xfrm_dev_state_add().

> > 

> > There is certainly a way arround that :)

> > The easiest I can think of would be to propagate XFRM_STATE_VALID

> > only after the state is inserted into the SADBs. I.e. move the

> > setting of XFRM_STATE_VALID out of __xfrm_init_state() and let the

> > callers do it.

> 

> This does seem like the easiest solution, if we don't move state addition to occur after insertion.

> I'm waiting for our regression results (probably on Monday) for the patch below, and would appreciate your comments:

> 


Please ignore the last patch, I understood you wrong.
Will reimplement and submit.
diff mbox series

Patch

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec.c b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec.c
index bac5103..07846fe 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec.c
@@ -304,7 +304,7 @@  static int mlx5e_xfrm_add_state(struct xfrm_state *x)
 	if (err)
 		goto err_sadb_rx;
 
-	x->xso.offload_handle = (unsigned long)sa_entry;
+	xfrm_dev_set_offload_handle(x, (u64)sa_entry);
 	goto out;
 
 err_sadb_rx:
@@ -320,14 +320,13 @@  static int mlx5e_xfrm_add_state(struct xfrm_state *x)
 
 static void mlx5e_xfrm_del_state(struct xfrm_state *x)
 {
-	struct mlx5e_ipsec_sa_entry *sa_entry;
+	struct mlx5e_ipsec_sa_entry *sa_entry = (void *)xfrm_dev_offload_handle(x);
 	struct mlx5_accel_ipsec_sa hw_sa;
 	void *context;
 
-	if (!x->xso.offload_handle)
+	if (!sa_entry)
 		return;
 
-	sa_entry = (struct mlx5e_ipsec_sa_entry *)x->xso.offload_handle;
 	WARN_ON(sa_entry->x != x);
 
 	if (x->xso.flags & XFRM_OFFLOAD_INBOUND)
@@ -343,13 +342,12 @@  static void mlx5e_xfrm_del_state(struct xfrm_state *x)
 
 static void mlx5e_xfrm_free_state(struct xfrm_state *x)
 {
-	struct mlx5e_ipsec_sa_entry *sa_entry;
+	struct mlx5e_ipsec_sa_entry *sa_entry = (void *)xfrm_dev_offload_handle(x);
 	int res;
 
-	if (!x->xso.offload_handle)
+	if (!sa_entry)
 		return;
 
-	sa_entry = (struct mlx5e_ipsec_sa_entry *)x->xso.offload_handle;
 	WARN_ON(sa_entry->x != x);
 
 	res = mlx5_accel_ipsec_sa_cmd_wait(sa_entry->context);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_rxtx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_rxtx.c
index 4614ddf..c5d4e8f 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_rxtx.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_rxtx.c
@@ -243,7 +243,7 @@  struct sk_buff *mlx5e_ipsec_handle_tx_skb(struct net_device *netdev,
 		goto drop;
 	}
 
-	if (unlikely(!x->xso.offload_handle ||
+	if (unlikely(!xfrm_dev_offload_handle(x) ||
 		     (skb->protocol != htons(ETH_P_IP) &&
 		      skb->protocol != htons(ETH_P_IPV6)))) {
 		atomic64_inc(&priv->ipsec->sw_stats.ipsec_tx_drop_not_ip);
@@ -353,7 +353,7 @@  bool mlx5e_ipsec_feature_check(struct sk_buff *skb, struct net_device *netdev,
 
 	if (skb->sp && skb->sp->len) {
 		x = skb->sp->xvec[0];
-		if (x && x->xso.offload_handle)
+		if (x && xfrm_dev_offload_handle(x))
 			return true;
 	}
 	return false;
diff --git a/include/net/xfrm.h b/include/net/xfrm.h
index 3cb618b..41a1afc 100644
--- a/include/net/xfrm.h
+++ b/include/net/xfrm.h
@@ -125,7 +125,7 @@  struct xfrm_state_walk {
 
 struct xfrm_state_offload {
 	struct net_device	*dev;
-	unsigned long		offload_handle;
+	atomic64_t		offload_handle;
 	unsigned int		num_exthdrs;
 	u8			flags;
 };
@@ -1862,6 +1862,17 @@  int xfrm_dev_state_add(struct net *net, struct xfrm_state *x,
 		       struct xfrm_user_offload *xuo);
 bool xfrm_dev_offload_ok(struct sk_buff *skb, struct xfrm_state *x);
 
+static inline void xfrm_dev_set_offload_handle(struct xfrm_state *x,
+					       u64 offload_handle)
+{
+	atomic64_set(&x->xso.offload_handle, offload_handle);
+}
+
+static inline u64 xfrm_dev_offload_handle(struct xfrm_state *x)
+{
+	return atomic64_read(&x->xso.offload_handle);
+}
+
 static inline bool xfrm_dst_offload_ok(struct dst_entry *dst)
 {
 	struct xfrm_state *x = dst->xfrm;
@@ -1869,7 +1880,7 @@  static inline bool xfrm_dst_offload_ok(struct dst_entry *dst)
 	if (!x || !x->type_offload)
 		return false;
 
-	if (x->xso.offload_handle && (x->xso.dev == dst->path->dev) &&
+	if (xfrm_dev_offload_handle(x) && (x->xso.dev == dst->path->dev) &&
 	    !dst->child->xfrm)
 		return true;
 
@@ -1919,6 +1930,16 @@  static inline bool xfrm_dev_offload_ok(struct sk_buff *skb, struct xfrm_state *x
 	return false;
 }
 
+static inline void xfrm_dev_set_offload_handle(struct xfrm_state *x,
+					       u64 offload_handle)
+{
+}
+
+static inline u64 xfrm_dev_offload_handle(struct xfrm_state *x)
+{
+	return 0;
+}
+
 static inline bool xfrm_dst_offload_ok(struct dst_entry *dst)
 {
 	return false;
diff --git a/net/ipv4/esp4.c b/net/ipv4/esp4.c
index b00e4a4..250796f 100644
--- a/net/ipv4/esp4.c
+++ b/net/ipv4/esp4.c
@@ -832,7 +832,7 @@  static int esp_init_aead(struct xfrm_state *x)
 		     x->geniv, x->aead->alg_name) >= CRYPTO_MAX_ALG_NAME)
 		goto error;
 
-	if (x->xso.offload_handle)
+	if (xfrm_dev_offload_handle(x))
 		mask |= CRYPTO_ALG_ASYNC;
 
 	aead = crypto_alloc_aead(aead_name, 0, mask);
@@ -891,7 +891,7 @@  static int esp_init_authenc(struct xfrm_state *x)
 			goto error;
 	}
 
-	if (x->xso.offload_handle)
+	if (xfrm_dev_offload_handle(x))
 		mask |= CRYPTO_ALG_ASYNC;
 
 	aead = crypto_alloc_aead(authenc_name, 0, mask);
diff --git a/net/ipv4/esp4_offload.c b/net/ipv4/esp4_offload.c
index f8b918c..ddeb5c1 100644
--- a/net/ipv4/esp4_offload.c
+++ b/net/ipv4/esp4_offload.c
@@ -211,7 +211,7 @@  static int esp_xmit(struct xfrm_state *x, struct sk_buff *skb,  netdev_features_
 	if (!xo)
 		return -EINVAL;
 
-	if (!(features & NETIF_F_HW_ESP) || !x->xso.offload_handle ||
+	if (!(features & NETIF_F_HW_ESP) || !xfrm_dev_offload_handle(x) ||
 	    (x->xso.dev != skb->dev)) {
 		xo->flags |= CRYPTO_FALLBACK;
 		hw_offload = false;
diff --git a/net/ipv6/esp6.c b/net/ipv6/esp6.c
index 1696401..fd9daac 100644
--- a/net/ipv6/esp6.c
+++ b/net/ipv6/esp6.c
@@ -741,7 +741,7 @@  static int esp_init_aead(struct xfrm_state *x)
 		     x->geniv, x->aead->alg_name) >= CRYPTO_MAX_ALG_NAME)
 		goto error;
 
-	if (x->xso.offload_handle)
+	if (xfrm_dev_offload_handle(x))
 		mask |= CRYPTO_ALG_ASYNC;
 
 	aead = crypto_alloc_aead(aead_name, 0, mask);
@@ -800,7 +800,7 @@  static int esp_init_authenc(struct xfrm_state *x)
 			goto error;
 	}
 
-	if (x->xso.offload_handle)
+	if (xfrm_dev_offload_handle(x))
 		mask |= CRYPTO_ALG_ASYNC;
 
 	aead = crypto_alloc_aead(authenc_name, 0, mask);
diff --git a/net/ipv6/esp6_offload.c b/net/ipv6/esp6_offload.c
index 333a478..5103efd 100644
--- a/net/ipv6/esp6_offload.c
+++ b/net/ipv6/esp6_offload.c
@@ -238,7 +238,7 @@  static int esp6_xmit(struct xfrm_state *x, struct sk_buff *skb,  netdev_features
 	if (!xo)
 		return -EINVAL;
 
-	if (!(features & NETIF_F_HW_ESP) || !x->xso.offload_handle ||
+	if (!(features & NETIF_F_HW_ESP) || !xfrm_dev_offload_handle(x) ||
 	    (x->xso.dev != skb->dev)) {
 		xo->flags |= CRYPTO_FALLBACK;
 		hw_offload = false;
diff --git a/net/xfrm/xfrm_device.c b/net/xfrm/xfrm_device.c
index acf0010..0e7b6a4 100644
--- a/net/xfrm/xfrm_device.c
+++ b/net/xfrm/xfrm_device.c
@@ -119,7 +119,7 @@  bool xfrm_dev_offload_ok(struct sk_buff *skb, struct xfrm_state *x)
 	if (!x->type_offload || x->encap)
 		return false;
 
-	if ((x->xso.offload_handle && (dev == dst->path->dev)) &&
+	if ((xfrm_dev_offload_handle(x) && (dev == dst->path->dev)) &&
 	     !dst->child->xfrm && x->type->get_mtu) {
 		mtu = x->type->get_mtu(x, xdst->child_mtu_cached);
 
diff --git a/net/xfrm/xfrm_user.c b/net/xfrm/xfrm_user.c
index f7a12aa..a80ccfb 100644
--- a/net/xfrm/xfrm_user.c
+++ b/net/xfrm/xfrm_user.c
@@ -598,13 +598,6 @@  static struct xfrm_state *xfrm_state_construct(struct net *net,
 			goto error;
 	}
 
-	if (attrs[XFRMA_OFFLOAD_DEV]) {
-		err = xfrm_dev_state_add(net, x,
-					 nla_data(attrs[XFRMA_OFFLOAD_DEV]));
-		if (err)
-			goto error;
-	}
-
 	if ((err = xfrm_alloc_replay_state_esn(&x->replay_esn, &x->preplay_esn,
 					       attrs[XFRMA_REPLAY_ESN_VAL])))
 		goto error;
@@ -653,20 +646,28 @@  static int xfrm_add_sa(struct sk_buff *skb, struct nlmsghdr *nlh,
 	else
 		err = xfrm_state_update(x);
 
-	xfrm_audit_state_add(x, err ? 0 : 1, true);
-
-	if (err < 0) {
+	if (err) {
 		x->km.state = XFRM_STATE_DEAD;
 		__xfrm_state_put(x);
 		goto out;
 	}
 
+	if (attrs[XFRMA_OFFLOAD_DEV])
+		err = xfrm_dev_state_add(net, x,
+					 nla_data(attrs[XFRMA_OFFLOAD_DEV]));
+
+	if (err) {
+		xfrm_state_delete(x);
+		goto out;
+	}
+
 	c.seq = nlh->nlmsg_seq;
 	c.portid = nlh->nlmsg_pid;
 	c.event = nlh->nlmsg_type;
 
 	km_state_notify(x, &c);
 out:
+	xfrm_audit_state_add(x, err ? 0 : 1, true);
 	xfrm_state_put(x);
 	return err;
 }