diff mbox series

[bpf,v2,2/6] bpf: tls fix transition through disconnect with close

Message ID 156261324561.31108.14410711674221391677.stgit@ubuntu3-kvm1
State Changes Requested
Delegated to: BPF Maintainers
Headers show
Series [bpf,v2,1/6] tls: remove close callback sock unlock/lock and flush_sync | expand

Commit Message

John Fastabend July 8, 2019, 7:14 p.m. UTC
It is possible (via shutdown()) for TCP socks to go through TCP_CLOSE
state via tcp_dosconnect() without actually calling tcp_close which
would then call the tls close callback. Because of this a user could
disconnect a socket then put it in a LISTEN state which would break
our assumptions about sockets always being ESTABLISHED state.

More directly because close() can call unhash() and unhash is
implemented by sockmap if a sockmap socket has TLS enabled we can
incorrectly destroy the psock from unhash() and then call its close
handler again. But because the psock (sockmap socket representation)
is already destroyed we call close handler in sk->prot. However,
in some cases (TLS BASE/BASE case) this will still point at the
sockmap close handler resulting in a circular call and crash reported
by syzbot.

To fix both above issues implement the unhash() routine for TLS.

Fixes: 3c4d7559159bf ("tls: kernel TLS support")
Reported-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: John Fastabend <john.fastabend@gmail.com>
---
 include/net/tls.h  |    6 +++++-
 net/tls/tls_main.c |   54 ++++++++++++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 59 insertions(+), 1 deletion(-)

Comments

Jakub Kicinski July 10, 2019, 2:45 a.m. UTC | #1
On Mon, 08 Jul 2019 19:14:05 +0000, John Fastabend wrote:
> @@ -287,6 +313,27 @@ static void tls_sk_proto_cleanup(struct sock *sk,
>  #endif
>  }
>  
> +static void tls_sk_proto_unhash(struct sock *sk)
> +{
> +	struct inet_connection_sock *icsk = inet_csk(sk);
> +	long timeo = sock_sndtimeo(sk, 0);
> +	struct tls_context *ctx;
> +
> +	if (unlikely(!icsk->icsk_ulp_data)) {

Is this for when sockmap is stacked on top of TLS and TLS got removed
without letting sockmap know?

> +		if (sk->sk_prot->unhash)
> +			sk->sk_prot->unhash(sk);
> +	}
> +
> +	ctx = tls_get_ctx(sk);
> +	if (ctx->tx_conf == TLS_SW || ctx->rx_conf == TLS_SW)
> +		tls_sk_proto_cleanup(sk, ctx, timeo);
> +	icsk->icsk_ulp_data = NULL;

I think close only starts checking if ctx is NULL in patch 6.
Looks like some chunks of ctx checking/clearing got spread to
patch 1 and some to patch 6.

> +	tls_ctx_free_wq(ctx);
> +
> +	if (ctx->unhash)
> +		ctx->unhash(sk);
> +}
> +
>  static void tls_sk_proto_close(struct sock *sk, long timeout)
>  {
>  	struct tls_context *ctx = tls_get_ctx(sk);
John Fastabend July 10, 2019, 3:39 a.m. UTC | #2
Jakub Kicinski wrote:
> On Mon, 08 Jul 2019 19:14:05 +0000, John Fastabend wrote:
> > @@ -287,6 +313,27 @@ static void tls_sk_proto_cleanup(struct sock *sk,
> >  #endif
> >  }
> >  
> > +static void tls_sk_proto_unhash(struct sock *sk)
> > +{
> > +	struct inet_connection_sock *icsk = inet_csk(sk);
> > +	long timeo = sock_sndtimeo(sk, 0);
> > +	struct tls_context *ctx;
> > +
> > +	if (unlikely(!icsk->icsk_ulp_data)) {
> 
> Is this for when sockmap is stacked on top of TLS and TLS got removed
> without letting sockmap know?

Right its a pattern I used on the sockmap side and put here. But
I dropped the patch to let sockmap stack on top of TLS because
it was more than a fix IMO. We could probably drop this check on
the other hand its harmless.
> 
> > +		if (sk->sk_prot->unhash)
> > +			sk->sk_prot->unhash(sk);
> > +	}
> > +
> > +	ctx = tls_get_ctx(sk);
> > +	if (ctx->tx_conf == TLS_SW || ctx->rx_conf == TLS_SW)
> > +		tls_sk_proto_cleanup(sk, ctx, timeo);
> > +	icsk->icsk_ulp_data = NULL;
> 
> I think close only starts checking if ctx is NULL in patch 6.
> Looks like some chunks of ctx checking/clearing got spread to
> patch 1 and some to patch 6.

Yeah, I thought the patches were easier to read this way but
maybe not. Could add something in the commit log.

> 
> > +	tls_ctx_free_wq(ctx);
> > +
> > +	if (ctx->unhash)
> > +		ctx->unhash(sk);
> > +}
> > +
> >  static void tls_sk_proto_close(struct sock *sk, long timeout)
> >  {
> >  	struct tls_context *ctx = tls_get_ctx(sk);
>
Jakub Kicinski July 10, 2019, 7:34 p.m. UTC | #3
On Tue, 09 Jul 2019 20:39:24 -0700, John Fastabend wrote:
> Jakub Kicinski wrote:
> > On Mon, 08 Jul 2019 19:14:05 +0000, John Fastabend wrote:  
> > > @@ -287,6 +313,27 @@ static void tls_sk_proto_cleanup(struct sock *sk,
> > >  #endif
> > >  }
> > >  
> > > +static void tls_sk_proto_unhash(struct sock *sk)
> > > +{
> > > +	struct inet_connection_sock *icsk = inet_csk(sk);
> > > +	long timeo = sock_sndtimeo(sk, 0);
> > > +	struct tls_context *ctx;
> > > +
> > > +	if (unlikely(!icsk->icsk_ulp_data)) {  
> > 
> > Is this for when sockmap is stacked on top of TLS and TLS got removed
> > without letting sockmap know?  
> 
> Right its a pattern I used on the sockmap side and put here. But
> I dropped the patch to let sockmap stack on top of TLS because
> it was more than a fix IMO. We could probably drop this check on
> the other hand its harmless.

I feel like this code is pretty complex I struggle to follow all the
paths, so perhaps it'd be better to drop stuff that's not necessary 
to have a clearer picture.

> > > +		if (sk->sk_prot->unhash)
> > > +			sk->sk_prot->unhash(sk);
> > > +	}
> > > +
> > > +	ctx = tls_get_ctx(sk);
> > > +	if (ctx->tx_conf == TLS_SW || ctx->rx_conf == TLS_SW)
> > > +		tls_sk_proto_cleanup(sk, ctx, timeo);
> > > +	icsk->icsk_ulp_data = NULL;  
> > 
> > I think close only starts checking if ctx is NULL in patch 6.
> > Looks like some chunks of ctx checking/clearing got spread to
> > patch 1 and some to patch 6.  
> 
> Yeah, I thought the patches were easier to read this way but
> maybe not. Could add something in the commit log.

Ack! Let me try to get a full grip of patches 2 and 6 and come back 
to this.

> > > +	tls_ctx_free_wq(ctx);
> > > +
> > > +	if (ctx->unhash)
> > > +		ctx->unhash(sk);
> > > +}
> > > +
> > >  static void tls_sk_proto_close(struct sock *sk, long timeout)
> > >  {
> > >  	struct tls_context *ctx = tls_get_ctx(sk);
Jakub Kicinski July 10, 2019, 8:04 p.m. UTC | #4
On Wed, 10 Jul 2019 12:34:17 -0700, Jakub Kicinski wrote:
> > > > +		if (sk->sk_prot->unhash)
> > > > +			sk->sk_prot->unhash(sk);
> > > > +	}
> > > > +
> > > > +	ctx = tls_get_ctx(sk);
> > > > +	if (ctx->tx_conf == TLS_SW || ctx->rx_conf == TLS_SW)
> > > > +		tls_sk_proto_cleanup(sk, ctx, timeo);

Do we still need to hook into unhash? With patch 6 in place perhaps we
can just do disconnect 🥺

cleanup is going to kick off TX but also:

	if (unlikely(sk->sk_write_pending) &&
	    !wait_on_pending_writer(sk, &timeo))
		tls_handle_open_record(sk, 0);

Are we guaranteed that sk_write_pending is 0?  Otherwise
wait_on_pending_writer is hiding yet another release_sock() :(

> > > > +	icsk->icsk_ulp_data = NULL;    
> > > 
> > > I think close only starts checking if ctx is NULL in patch 6.
> > > Looks like some chunks of ctx checking/clearing got spread to
> > > patch 1 and some to patch 6.    
> > 
> > Yeah, I thought the patches were easier to read this way but
> > maybe not. Could add something in the commit log.  
> 
> Ack! Let me try to get a full grip of patches 2 and 6 and come back 
> to this.
> 
> > > > +	tls_ctx_free_wq(ctx);
> > > > +
> > > > +	if (ctx->unhash)
> > > > +		ctx->unhash(sk);
> > > > +}
> > > > +
> > > >  static void tls_sk_proto_close(struct sock *sk, long timeout)
> > > >  {
> > > >  	struct tls_context *ctx = tls_get_ctx(sk);
John Fastabend July 11, 2019, 4:35 p.m. UTC | #5
Jakub Kicinski wrote:
> On Tue, 09 Jul 2019 20:39:24 -0700, John Fastabend wrote:
> > Jakub Kicinski wrote:
> > > On Mon, 08 Jul 2019 19:14:05 +0000, John Fastabend wrote:  
> > > > @@ -287,6 +313,27 @@ static void tls_sk_proto_cleanup(struct sock *sk,
> > > >  #endif
> > > >  }
> > > >  
> > > > +static void tls_sk_proto_unhash(struct sock *sk)
> > > > +{
> > > > +	struct inet_connection_sock *icsk = inet_csk(sk);
> > > > +	long timeo = sock_sndtimeo(sk, 0);
> > > > +	struct tls_context *ctx;
> > > > +
> > > > +	if (unlikely(!icsk->icsk_ulp_data)) {  
> > > 
> > > Is this for when sockmap is stacked on top of TLS and TLS got removed
> > > without letting sockmap know?  
> > 
> > Right its a pattern I used on the sockmap side and put here. But
> > I dropped the patch to let sockmap stack on top of TLS because
> > it was more than a fix IMO. We could probably drop this check on
> > the other hand its harmless.
> 
> I feel like this code is pretty complex I struggle to follow all the
> paths, so perhaps it'd be better to drop stuff that's not necessary 
> to have a clearer picture.
> 

Sure I can drop it and add it later when its necessary.

> > > > +		if (sk->sk_prot->unhash)
> > > > +			sk->sk_prot->unhash(sk);
> > > > +	}
> > > > +
> > > > +	ctx = tls_get_ctx(sk);
> > > > +	if (ctx->tx_conf == TLS_SW || ctx->rx_conf == TLS_SW)
> > > > +		tls_sk_proto_cleanup(sk, ctx, timeo);
> > > > +	icsk->icsk_ulp_data = NULL;  
> > > 
> > > I think close only starts checking if ctx is NULL in patch 6.
> > > Looks like some chunks of ctx checking/clearing got spread to
> > > patch 1 and some to patch 6.  
> > 
> > Yeah, I thought the patches were easier to read this way but
> > maybe not. Could add something in the commit log.
> 
> Ack! Let me try to get a full grip of patches 2 and 6 and come back 
> to this.
> 
> > > > +	tls_ctx_free_wq(ctx);
> > > > +
> > > > +	if (ctx->unhash)
> > > > +		ctx->unhash(sk);
> > > > +}
> > > > +
> > > >  static void tls_sk_proto_close(struct sock *sk, long timeout)
> > > >  {
> > > >  	struct tls_context *ctx = tls_get_ctx(sk);
John Fastabend July 11, 2019, 4:47 p.m. UTC | #6
Jakub Kicinski wrote:
> On Wed, 10 Jul 2019 12:34:17 -0700, Jakub Kicinski wrote:
> > > > > +		if (sk->sk_prot->unhash)
> > > > > +			sk->sk_prot->unhash(sk);
> > > > > +	}
> > > > > +
> > > > > +	ctx = tls_get_ctx(sk);
> > > > > +	if (ctx->tx_conf == TLS_SW || ctx->rx_conf == TLS_SW)
> > > > > +		tls_sk_proto_cleanup(sk, ctx, timeo);
> 
> Do we still need to hook into unhash? With patch 6 in place perhaps we
> can just do disconnect 🥺

?? "can just do a disconnect", not sure I folow. We still need unhash
in cases where we have a TLS socket transition from ESTABLISHED
to LISTEN state without calling close(). This is independent of if
sockmap is running or not.

Originally, I thought this would be extremely rare but I did see it
in real applications on the sockmap side so presumably it is possible
here as well.

> 
> cleanup is going to kick off TX but also:
> 
> 	if (unlikely(sk->sk_write_pending) &&
> 	    !wait_on_pending_writer(sk, &timeo))
> 		tls_handle_open_record(sk, 0);
> 
> Are we guaranteed that sk_write_pending is 0?  Otherwise
> wait_on_pending_writer is hiding yet another release_sock() :(

Not seeing the path to release_sock() at the moment?

   tls_handle_open_record
     push_pending_record
      tls_sw_push_pending_record
        bpf_exec_tx_verdict

If bpf_exec_tx_verdict does a redirect we could hit a relase but that
is another fix I have to get queued up shortly. I think we can fix
that in another series.
Jakub Kicinski July 11, 2019, 6:32 p.m. UTC | #7
On Thu, 11 Jul 2019 09:47:16 -0700, John Fastabend wrote:
> Jakub Kicinski wrote:
> > On Wed, 10 Jul 2019 12:34:17 -0700, Jakub Kicinski wrote:  
> > > > > > +		if (sk->sk_prot->unhash)
> > > > > > +			sk->sk_prot->unhash(sk);
> > > > > > +	}
> > > > > > +
> > > > > > +	ctx = tls_get_ctx(sk);
> > > > > > +	if (ctx->tx_conf == TLS_SW || ctx->rx_conf == TLS_SW)
> > > > > > +		tls_sk_proto_cleanup(sk, ctx, timeo);  
> > 
> > Do we still need to hook into unhash? With patch 6 in place perhaps we
> > can just do disconnect 🥺  
> 
> ?? "can just do a disconnect", not sure I folow. We still need unhash
> in cases where we have a TLS socket transition from ESTABLISHED
> to LISTEN state without calling close(). This is independent of if
> sockmap is running or not.
> 
> Originally, I thought this would be extremely rare but I did see it
> in real applications on the sockmap side so presumably it is possible
> here as well.

Ugh, sorry, I meant shutdown. Instead of replacing the unhash callback
replace the shutdown callback. We probably shouldn't release the socket
lock either there, but we can sleep, so I'll be able to run the device
connection remove callback (which sleep).

> > cleanup is going to kick off TX but also:
> > 
> > 	if (unlikely(sk->sk_write_pending) &&
> > 	    !wait_on_pending_writer(sk, &timeo))
> > 		tls_handle_open_record(sk, 0);
> > 
> > Are we guaranteed that sk_write_pending is 0?  Otherwise
> > wait_on_pending_writer is hiding yet another release_sock() :(  
> 
> Not seeing the path to release_sock() at the moment?
> 
>    tls_handle_open_record
>      push_pending_record
>       tls_sw_push_pending_record
>         bpf_exec_tx_verdict

wait_on_pending_writer
  sk_wait_event
    release_sock

> If bpf_exec_tx_verdict does a redirect we could hit a relase but that
> is another fix I have to get queued up shortly. I think we can fix
> that in another series.

Ugh.
John Fastabend July 11, 2019, 9:25 p.m. UTC | #8
Jakub Kicinski wrote:
> On Thu, 11 Jul 2019 09:47:16 -0700, John Fastabend wrote:
> > Jakub Kicinski wrote:
> > > On Wed, 10 Jul 2019 12:34:17 -0700, Jakub Kicinski wrote:  
> > > > > > > +		if (sk->sk_prot->unhash)
> > > > > > > +			sk->sk_prot->unhash(sk);
> > > > > > > +	}
> > > > > > > +
> > > > > > > +	ctx = tls_get_ctx(sk);
> > > > > > > +	if (ctx->tx_conf == TLS_SW || ctx->rx_conf == TLS_SW)
> > > > > > > +		tls_sk_proto_cleanup(sk, ctx, timeo);  
> > > 
> > > Do we still need to hook into unhash? With patch 6 in place perhaps we
> > > can just do disconnect 🥺  
> > 
> > ?? "can just do a disconnect", not sure I folow. We still need unhash
> > in cases where we have a TLS socket transition from ESTABLISHED
> > to LISTEN state without calling close(). This is independent of if
> > sockmap is running or not.
> > 
> > Originally, I thought this would be extremely rare but I did see it
> > in real applications on the sockmap side so presumably it is possible
> > here as well.
> 
> Ugh, sorry, I meant shutdown. Instead of replacing the unhash callback
> replace the shutdown callback. We probably shouldn't release the socket
> lock either there, but we can sleep, so I'll be able to run the device
> connection remove callback (which sleep).
> 

ah OK seems doable to me. Do you want to write that on top of this
series? Or would you like to push it onto your branch and I can pull
it in push the rest of the patches on top and send it out? I think
if you can get to it in the next few days then it makes sense to wait.

I can't test the hardware side so probably makes more sense for
you to do it if you can.


> > > cleanup is going to kick off TX but also:
> > > 
> > > 	if (unlikely(sk->sk_write_pending) &&
> > > 	    !wait_on_pending_writer(sk, &timeo))
> > > 		tls_handle_open_record(sk, 0);
> > > 
> > > Are we guaranteed that sk_write_pending is 0?  Otherwise
> > > wait_on_pending_writer is hiding yet another release_sock() :(  
> > 
> > Not seeing the path to release_sock() at the moment?
> > 
> >    tls_handle_open_record
> >      push_pending_record
> >       tls_sw_push_pending_record
> >         bpf_exec_tx_verdict
> 
> wait_on_pending_writer
>   sk_wait_event
>     release_sock
> 

ah OK. I'll check on sk_write_pending...

> > If bpf_exec_tx_verdict does a redirect we could hit a relase but that
> > is another fix I have to get queued up shortly. I think we can fix
> > that in another series.
> 
> Ugh.
Jakub Kicinski July 12, 2019, 3:16 a.m. UTC | #9
On Thu, 11 Jul 2019 14:25:54 -0700, John Fastabend wrote:
> Jakub Kicinski wrote:
> > On Thu, 11 Jul 2019 09:47:16 -0700, John Fastabend wrote:  
> > > Jakub Kicinski wrote:  
> > > > On Wed, 10 Jul 2019 12:34:17 -0700, Jakub Kicinski wrote:    
> > > > > > > > +		if (sk->sk_prot->unhash)
> > > > > > > > +			sk->sk_prot->unhash(sk);
> > > > > > > > +	}
> > > > > > > > +
> > > > > > > > +	ctx = tls_get_ctx(sk);
> > > > > > > > +	if (ctx->tx_conf == TLS_SW || ctx->rx_conf == TLS_SW)
> > > > > > > > +		tls_sk_proto_cleanup(sk, ctx, timeo);    
> > > > 
> > > > Do we still need to hook into unhash? With patch 6 in place perhaps we
> > > > can just do disconnect 🥺    
> > > 
> > > ?? "can just do a disconnect", not sure I folow. We still need unhash
> > > in cases where we have a TLS socket transition from ESTABLISHED
> > > to LISTEN state without calling close(). This is independent of if
> > > sockmap is running or not.
> > > 
> > > Originally, I thought this would be extremely rare but I did see it
> > > in real applications on the sockmap side so presumably it is possible
> > > here as well.  
> > 
> > Ugh, sorry, I meant shutdown. Instead of replacing the unhash callback
> > replace the shutdown callback. We probably shouldn't release the socket
> > lock either there, but we can sleep, so I'll be able to run the device
> > connection remove callback (which sleep).
> 
> ah OK seems doable to me. Do you want to write that on top of this
> series? Or would you like to push it onto your branch and I can pull
> it in push the rest of the patches on top and send it out? I think
> if you can get to it in the next few days then it makes sense to wait.

Mm.. perhaps its easiest if we forget about HW for now and get SW 
to work? Once you get the SW to 100% I can probably figure out what 
to do for HW, but I feel like we got too many moving parts ATM.

> I can't test the hardware side so probably makes more sense for
> you to do it if you can.
>
> > > > cleanup is going to kick off TX but also:
> > > > 
> > > > 	if (unlikely(sk->sk_write_pending) &&
> > > > 	    !wait_on_pending_writer(sk, &timeo))
> > > > 		tls_handle_open_record(sk, 0);
> > > > 
> > > > Are we guaranteed that sk_write_pending is 0?  Otherwise
> > > > wait_on_pending_writer is hiding yet another release_sock() :(    
> > > 
> > > Not seeing the path to release_sock() at the moment?
> > > 
> > >    tls_handle_open_record
> > >      push_pending_record
> > >       tls_sw_push_pending_record
> > >         bpf_exec_tx_verdict  
> > 
> > wait_on_pending_writer
> >   sk_wait_event
> >     release_sock
> >   
> 
> ah OK. I'll check on sk_write_pending...
John Fastabend July 15, 2019, 8:58 p.m. UTC | #10
Jakub Kicinski wrote:
> On Thu, 11 Jul 2019 14:25:54 -0700, John Fastabend wrote:
> > Jakub Kicinski wrote:
> > > On Thu, 11 Jul 2019 09:47:16 -0700, John Fastabend wrote:  
> > > > Jakub Kicinski wrote:  
> > > > > On Wed, 10 Jul 2019 12:34:17 -0700, Jakub Kicinski wrote:    
> > > > > > > > > +		if (sk->sk_prot->unhash)
> > > > > > > > > +			sk->sk_prot->unhash(sk);
> > > > > > > > > +	}
> > > > > > > > > +
> > > > > > > > > +	ctx = tls_get_ctx(sk);
> > > > > > > > > +	if (ctx->tx_conf == TLS_SW || ctx->rx_conf == TLS_SW)
> > > > > > > > > +		tls_sk_proto_cleanup(sk, ctx, timeo);    
> > > > > 
> > > > > Do we still need to hook into unhash? With patch 6 in place perhaps we
> > > > > can just do disconnect 🥺    
> > > > 
> > > > ?? "can just do a disconnect", not sure I folow. We still need unhash
> > > > in cases where we have a TLS socket transition from ESTABLISHED
> > > > to LISTEN state without calling close(). This is independent of if
> > > > sockmap is running or not.
> > > > 
> > > > Originally, I thought this would be extremely rare but I did see it
> > > > in real applications on the sockmap side so presumably it is possible
> > > > here as well.  
> > > 
> > > Ugh, sorry, I meant shutdown. Instead of replacing the unhash callback
> > > replace the shutdown callback. We probably shouldn't release the socket
> > > lock either there, but we can sleep, so I'll be able to run the device
> > > connection remove callback (which sleep).
> > 
> > ah OK seems doable to me. Do you want to write that on top of this
> > series? Or would you like to push it onto your branch and I can pull
> > it in push the rest of the patches on top and send it out? I think
> > if you can get to it in the next few days then it makes sense to wait.
> 
> Mm.. perhaps its easiest if we forget about HW for now and get SW 
> to work? Once you get the SW to 100% I can probably figure out what 
> to do for HW, but I feel like we got too many moving parts ATM.

Hi Jack,

I went ahead and pushed a v3 with your patches at the front. This resolves
a set of issues for me so I think it makes sense to push now and look
to resolve any further issues later. I'll look into the close with pending
data potential issue to see if it is/is-not a real issue.

Thanks,
John
diff mbox series

Patch

diff --git a/include/net/tls.h b/include/net/tls.h
index 0a3540a6304d..2a98d0584999 100644
--- a/include/net/tls.h
+++ b/include/net/tls.h
@@ -251,6 +251,8 @@  struct tls_context {
 	u8 tx_conf:3;
 	u8 rx_conf:3;
 
+	struct proto *sk_proto;
+
 	int (*push_pending_record)(struct sock *sk, int flags);
 	void (*sk_write_space)(struct sock *sk);
 
@@ -288,6 +290,8 @@  struct tls_context {
 
 	struct list_head list;
 	refcount_t refcount;
+
+	struct work_struct gc;
 };
 
 enum tls_offload_ctx_dir {
@@ -356,7 +360,7 @@  int tls_set_sw_offload(struct sock *sk, struct tls_context *ctx, int tx);
 int tls_sw_sendmsg(struct sock *sk, struct msghdr *msg, size_t size);
 int tls_sw_sendpage(struct sock *sk, struct page *page,
 		    int offset, size_t size, int flags);
-void tls_sw_close(struct sock *sk, long timeout);
+void tls_sw_cancel_work_tx(struct tls_context *tls_ctx);
 void tls_sw_free_resources_tx(struct sock *sk);
 void tls_sw_free_resources_rx(struct sock *sk);
 void tls_sw_release_resources_rx(struct sock *sk);
diff --git a/net/tls/tls_main.c b/net/tls/tls_main.c
index d63c3583d2f7..e8418456ee24 100644
--- a/net/tls/tls_main.c
+++ b/net/tls/tls_main.c
@@ -251,6 +251,32 @@  static void tls_write_space(struct sock *sk)
 	ctx->sk_write_space(sk);
 }
 
+static void tls_ctx_free_deferred(struct work_struct *gc)
+{
+	struct tls_context *ctx = container_of(gc, struct tls_context, gc);
+
+	if (ctx->rx_conf == TLS_SW)
+		tls_sw_release_strp_rx(ctx);
+
+	/* Ensure any remaining work items are completed. The sk will
+	 * already have lost its tls_ctx reference by the time we get
+	 * here so no xmit operation will actually be performed.
+	 */
+	tls_sw_cancel_work_tx(ctx);
+	kfree(ctx);
+}
+
+static void tls_ctx_free_wq(struct tls_context *ctx)
+{
+	if (!ctx)
+		return;
+
+	memzero_explicit(&ctx->crypto_send, sizeof(ctx->crypto_send));
+	memzero_explicit(&ctx->crypto_recv, sizeof(ctx->crypto_recv));
+	INIT_WORK(&ctx->gc, tls_ctx_free_deferred);
+	schedule_work(&ctx->gc);
+}
+
 static void tls_ctx_free(struct tls_context *ctx)
 {
 	if (!ctx)
@@ -287,6 +313,27 @@  static void tls_sk_proto_cleanup(struct sock *sk,
 #endif
 }
 
+static void tls_sk_proto_unhash(struct sock *sk)
+{
+	struct inet_connection_sock *icsk = inet_csk(sk);
+	long timeo = sock_sndtimeo(sk, 0);
+	struct tls_context *ctx;
+
+	if (unlikely(!icsk->icsk_ulp_data)) {
+		if (sk->sk_prot->unhash)
+			sk->sk_prot->unhash(sk);
+	}
+
+	ctx = tls_get_ctx(sk);
+	if (ctx->tx_conf == TLS_SW || ctx->rx_conf == TLS_SW)
+		tls_sk_proto_cleanup(sk, ctx, timeo);
+	icsk->icsk_ulp_data = NULL;
+	tls_ctx_free_wq(ctx);
+
+	if (ctx->unhash)
+		ctx->unhash(sk);
+}
+
 static void tls_sk_proto_close(struct sock *sk, long timeout)
 {
 	struct tls_context *ctx = tls_get_ctx(sk);
@@ -305,6 +352,7 @@  static void tls_sk_proto_close(struct sock *sk, long timeout)
 	if (ctx->tx_conf == TLS_BASE && ctx->rx_conf == TLS_BASE)
 		goto skip_tx_cleanup;
 
+	sk->sk_prot = ctx->sk_proto;
 	tls_sk_proto_cleanup(sk, ctx, timeo);
 
 skip_tx_cleanup:
@@ -606,6 +654,7 @@  static struct tls_context *create_ctx(struct sock *sk)
 	ctx->setsockopt = sk->sk_prot->setsockopt;
 	ctx->getsockopt = sk->sk_prot->getsockopt;
 	ctx->sk_proto_close = sk->sk_prot->close;
+	ctx->unhash = sk->sk_prot->unhash;
 	return ctx;
 }
 
@@ -729,20 +778,24 @@  static void build_protos(struct proto prot[TLS_NUM_CONFIG][TLS_NUM_CONFIG],
 	prot[TLS_BASE][TLS_BASE].setsockopt	= tls_setsockopt;
 	prot[TLS_BASE][TLS_BASE].getsockopt	= tls_getsockopt;
 	prot[TLS_BASE][TLS_BASE].close		= tls_sk_proto_close;
+	prot[TLS_BASE][TLS_BASE].unhash		= tls_sk_proto_unhash;
 
 	prot[TLS_SW][TLS_BASE] = prot[TLS_BASE][TLS_BASE];
 	prot[TLS_SW][TLS_BASE].sendmsg		= tls_sw_sendmsg;
 	prot[TLS_SW][TLS_BASE].sendpage		= tls_sw_sendpage;
+	prot[TLS_SW][TLS_BASE].unhash		= tls_sk_proto_unhash;
 
 	prot[TLS_BASE][TLS_SW] = prot[TLS_BASE][TLS_BASE];
 	prot[TLS_BASE][TLS_SW].recvmsg		  = tls_sw_recvmsg;
 	prot[TLS_BASE][TLS_SW].stream_memory_read = tls_sw_stream_read;
 	prot[TLS_BASE][TLS_SW].close		  = tls_sk_proto_close;
+	prot[TLS_BASE][TLS_SW].unhash		  = tls_sk_proto_unhash;
 
 	prot[TLS_SW][TLS_SW] = prot[TLS_SW][TLS_BASE];
 	prot[TLS_SW][TLS_SW].recvmsg		= tls_sw_recvmsg;
 	prot[TLS_SW][TLS_SW].stream_memory_read	= tls_sw_stream_read;
 	prot[TLS_SW][TLS_SW].close		= tls_sk_proto_close;
+	prot[TLS_SW][TLS_SW].unhash		= tls_sk_proto_unhash;
 
 #ifdef CONFIG_TLS_DEVICE
 	prot[TLS_HW][TLS_BASE] = prot[TLS_BASE][TLS_BASE];
@@ -793,6 +846,7 @@  static int tls_init(struct sock *sk)
 	tls_build_proto(sk);
 	ctx->tx_conf = TLS_BASE;
 	ctx->rx_conf = TLS_BASE;
+	ctx->sk_proto = sk->sk_prot;
 	update_sk_prot(sk, ctx);
 out:
 	return rc;