diff mbox series

[net-next,1/5] tcp: Create list of TFO-contexts

Message ID 20181214224007.54813-2-cpaasch@apple.com
State Changes Requested, archived
Delegated to: David Miller
Headers show
Series tcp: Introduce a TFO key-pool for clean cookie-rotation | expand

Commit Message

Christoph Paasch Dec. 14, 2018, 10:40 p.m. UTC
Instead of having a single TFO-context, we now have a list of
tcp_fastopen_context, bounded by TCP_FASTOPEN_CTXT_LEN (set to 2).

This enables us to do a rolling TFO-key update that allows the server to
accept old cookies and at the same time announce new ones to the client
(see follow-up patch).

Signed-off-by: Christoph Paasch <cpaasch@apple.com>
---
 include/net/tcp.h       |  2 ++
 net/ipv4/tcp_fastopen.c | 52 +++++++++++++++++++++++++++++++++++++++++++++----
 2 files changed, 50 insertions(+), 4 deletions(-)

Comments

Eric Dumazet Dec. 17, 2018, 6:31 a.m. UTC | #1
On 12/14/2018 02:40 PM, Christoph Paasch wrote:
> Instead of having a single TFO-context, we now have a list of
> tcp_fastopen_context, bounded by TCP_FASTOPEN_CTXT_LEN (set to 2).
> 
> This enables us to do a rolling TFO-key update that allows the server to
> accept old cookies and at the same time announce new ones to the client
> (see follow-up patch).
> 
> Signed-off-by: Christoph Paasch <cpaasch@apple.com>
> ---
>  include/net/tcp.h       |  2 ++
>  net/ipv4/tcp_fastopen.c | 52 +++++++++++++++++++++++++++++++++++++++++++++----
>  2 files changed, 50 insertions(+), 4 deletions(-)
> 
> diff --git a/include/net/tcp.h b/include/net/tcp.h
> index e0a65c067662..e629ea2e6c9d 100644
> --- a/include/net/tcp.h
> +++ b/include/net/tcp.h
> @@ -1622,9 +1622,11 @@ bool tcp_fastopen_cookie_check(struct sock *sk, u16 *mss,
>  			     struct tcp_fastopen_cookie *cookie);
>  bool tcp_fastopen_defer_connect(struct sock *sk, int *err);
>  #define TCP_FASTOPEN_KEY_LENGTH 16
> +#define TCP_FASTOPEN_CTXT_LEN 2
>  
>  /* Fastopen key context */
>  struct tcp_fastopen_context {
> +	struct tcp_fastopen_context __rcu *next;
>  	struct crypto_cipher	*tfm;
>  	__u8			key[TCP_FASTOPEN_KEY_LENGTH];
>  	struct rcu_head		rcu;
> diff --git a/net/ipv4/tcp_fastopen.c b/net/ipv4/tcp_fastopen.c
> index 018a48477355..c52d5b8eabf0 100644
> --- a/net/ipv4/tcp_fastopen.c
> +++ b/net/ipv4/tcp_fastopen.c
> @@ -37,8 +37,14 @@ static void tcp_fastopen_ctx_free(struct rcu_head *head)
>  {
>  	struct tcp_fastopen_context *ctx =
>  	    container_of(head, struct tcp_fastopen_context, rcu);
> -	crypto_free_cipher(ctx->tfm);
> -	kfree(ctx);
> +
> +	while (ctx) {
> +		struct tcp_fastopen_context *prev = ctx;
> +		/* We own ctx, thus no need to hold the Fastopen-lock */
> +		ctx = rcu_dereference_protected(ctx->next, 1);
> +		crypto_free_cipher(prev->tfm);
> +		kfree(prev);
> +	}
>

It seems this function does not need to be changed, since at most one context
should be freed per run ?
Christoph Paasch Dec. 17, 2018, 3:49 p.m. UTC | #2
On 16/12/18 - 22:31:41, Eric Dumazet wrote:
> 
> 
> On 12/14/2018 02:40 PM, Christoph Paasch wrote:
> > Instead of having a single TFO-context, we now have a list of
> > tcp_fastopen_context, bounded by TCP_FASTOPEN_CTXT_LEN (set to 2).
> > 
> > This enables us to do a rolling TFO-key update that allows the server to
> > accept old cookies and at the same time announce new ones to the client
> > (see follow-up patch).
> > 
> > Signed-off-by: Christoph Paasch <cpaasch@apple.com>
> > ---
> >  include/net/tcp.h       |  2 ++
> >  net/ipv4/tcp_fastopen.c | 52 +++++++++++++++++++++++++++++++++++++++++++++----
> >  2 files changed, 50 insertions(+), 4 deletions(-)
> > 
> > diff --git a/include/net/tcp.h b/include/net/tcp.h
> > index e0a65c067662..e629ea2e6c9d 100644
> > --- a/include/net/tcp.h
> > +++ b/include/net/tcp.h
> > @@ -1622,9 +1622,11 @@ bool tcp_fastopen_cookie_check(struct sock *sk, u16 *mss,
> >  			     struct tcp_fastopen_cookie *cookie);
> >  bool tcp_fastopen_defer_connect(struct sock *sk, int *err);
> >  #define TCP_FASTOPEN_KEY_LENGTH 16
> > +#define TCP_FASTOPEN_CTXT_LEN 2
> >  
> >  /* Fastopen key context */
> >  struct tcp_fastopen_context {
> > +	struct tcp_fastopen_context __rcu *next;
> >  	struct crypto_cipher	*tfm;
> >  	__u8			key[TCP_FASTOPEN_KEY_LENGTH];
> >  	struct rcu_head		rcu;
> > diff --git a/net/ipv4/tcp_fastopen.c b/net/ipv4/tcp_fastopen.c
> > index 018a48477355..c52d5b8eabf0 100644
> > --- a/net/ipv4/tcp_fastopen.c
> > +++ b/net/ipv4/tcp_fastopen.c
> > @@ -37,8 +37,14 @@ static void tcp_fastopen_ctx_free(struct rcu_head *head)
> >  {
> >  	struct tcp_fastopen_context *ctx =
> >  	    container_of(head, struct tcp_fastopen_context, rcu);
> > -	crypto_free_cipher(ctx->tfm);
> > -	kfree(ctx);
> > +
> > +	while (ctx) {
> > +		struct tcp_fastopen_context *prev = ctx;
> > +		/* We own ctx, thus no need to hold the Fastopen-lock */
> > +		ctx = rcu_dereference_protected(ctx->next, 1);
> > +		crypto_free_cipher(prev->tfm);
> > +		kfree(prev);
> > +	}
> >
> 
> It seems this function does not need to be changed, since at most one context
> should be freed per run ?

It gets called from tcp_fastopen_destroy_cipher() though (to destroy the
socket's TFO-keys when the socket gets closed). There it has to destroy the
whole list.

Same when going through exit_batch for the namespace.


We could of course split it in tcp_fastopen_ctx_free_one() and
tcp_fastopen_ctx_free_all(). But maybe that's overkill as it's a rare thing
to do?


Christoph
Eric Dumazet Dec. 17, 2018, 4:04 p.m. UTC | #3
On Fri, Dec 14, 2018 at 2:40 PM Christoph Paasch <cpaasch@apple.com> wrote:
>

...

>  int tcp_fastopen_reset_cipher(struct net *net, struct sock *sk,
>                               void *key, unsigned int len)
>  {
> @@ -96,13 +131,22 @@ error:             kfree(ctx);
>         spin_lock(&net->ipv4.tcp_fastopen_ctx_lock);
>         if (sk) {
>                 q = &inet_csk(sk)->icsk_accept_queue.fastopenq;

> +               rcu_assign_pointer(ctx->next, q->ctx);
At this point, ctx is not yet visible, so you do not need a barrier yet
                    ctx->next = q->ctx;


> +               rcu_assign_pointer(q->ctx, ctx);

Note that readers could see 3 contexts in the chain, instead of maximum two.

This means that proc_tcp_fastopen_key() (your 3/5 change) would
overflow an automatic array :

while (ctxt) {
        memcpy(&key[i], ctxt->key, TCP_FASTOPEN_KEY_LENGTH);
        i += 4;
        ctxt = rcu_dereference(ctxt->next);
}


> +
>                 octx = rcu_dereference_protected(q->ctx,
>                         lockdep_is_held(&net->ipv4.tcp_fastopen_ctx_lock));
> -               rcu_assign_pointer(q->ctx, ctx);
> +
> +               octx = tcp_fastopen_cut_keypool(octx, &net->ipv4.tcp_fastopen_ctx_lock);
>         } else {
> +               rcu_assign_pointer(ctx->next, net->ipv4.tcp_fastopen_ctx);

same remark here.

> +               rcu_assign_pointer(net->ipv4.tcp_fastopen_ctx, ctx);
> +
>                 octx = rcu_dereference_protected(net->ipv4.tcp_fastopen_ctx,
>                         lockdep_is_held(&net->ipv4.tcp_fastopen_ctx_lock));
> -               rcu_assign_pointer(net->ipv4.tcp_fastopen_ctx, ctx);
> +
> +               octx = tcp_fastopen_cut_keypool(octx,
> +                                               &net->ipv4.tcp_fastopen_ctx_lock);
>         }
>         spin_unlock(&net->ipv4.tcp_fastopen_ctx_lock);
>
> --
> 2.16.2
>
Eric Dumazet Dec. 17, 2018, 4:07 p.m. UTC | #4
On 12/17/2018 07:49 AM, Christoph Paasch wrote:

> It gets called from tcp_fastopen_destroy_cipher() though (to destroy the
> socket's TFO-keys when the socket gets closed). There it has to destroy the
> whole list.
> 
> Same when going through exit_batch for the namespace.
> 
> 
> We could of course split it in tcp_fastopen_ctx_free_one() and
> tcp_fastopen_ctx_free_all(). But maybe that's overkill as it's a rare thing
> to do?


No, this is fine, please see my second review of this first patch.

Thanks.
Christoph Paasch Dec. 17, 2018, 9:57 p.m. UTC | #5
On 17/12/18 - 08:04:08, Eric Dumazet wrote:
> On Fri, Dec 14, 2018 at 2:40 PM Christoph Paasch <cpaasch@apple.com> wrote:
> >
> 
> ...
> 
> >  int tcp_fastopen_reset_cipher(struct net *net, struct sock *sk,
> >                               void *key, unsigned int len)
> >  {
> > @@ -96,13 +131,22 @@ error:             kfree(ctx);
> >         spin_lock(&net->ipv4.tcp_fastopen_ctx_lock);
> >         if (sk) {
> >                 q = &inet_csk(sk)->icsk_accept_queue.fastopenq;
> 
> > +               rcu_assign_pointer(ctx->next, q->ctx);
> At this point, ctx is not yet visible, so you do not need a barrier yet
>                     ctx->next = q->ctx;

Thanks, I will change that.

> 
> 
> > +               rcu_assign_pointer(q->ctx, ctx);
> 
> Note that readers could see 3 contexts in the chain, instead of maximum two.
> 
> This means that proc_tcp_fastopen_key() (your 3/5 change) would
> overflow an automatic array :
> 
> while (ctxt) {
>         memcpy(&key[i], ctxt->key, TCP_FASTOPEN_KEY_LENGTH);
>         i += 4;
>         ctxt = rcu_dereference(ctxt->next);
> }

Ouch! Thanks for spotting this.

If it's ok to have a brief moment of 3 contexts for the readers, I would
protect against overflows the readers.

> > +
> >                 octx = rcu_dereference_protected(q->ctx,
> >                         lockdep_is_held(&net->ipv4.tcp_fastopen_ctx_lock));
> > -               rcu_assign_pointer(q->ctx, ctx);
> > +
> > +               octx = tcp_fastopen_cut_keypool(octx, &net->ipv4.tcp_fastopen_ctx_lock);
> >         } else {
> > +               rcu_assign_pointer(ctx->next, net->ipv4.tcp_fastopen_ctx);
> 
> same remark here.

Same, will change that.


Christoph

> > +               rcu_assign_pointer(net->ipv4.tcp_fastopen_ctx, ctx);
> > +
> >                 octx = rcu_dereference_protected(net->ipv4.tcp_fastopen_ctx,
> >                         lockdep_is_held(&net->ipv4.tcp_fastopen_ctx_lock));
> > -               rcu_assign_pointer(net->ipv4.tcp_fastopen_ctx, ctx);
> > +
> > +               octx = tcp_fastopen_cut_keypool(octx,
> > +                                               &net->ipv4.tcp_fastopen_ctx_lock);
> >         }
> >         spin_unlock(&net->ipv4.tcp_fastopen_ctx_lock);
> >
> > --
> > 2.16.2
> >
Eric Dumazet Dec. 17, 2018, 10:01 p.m. UTC | #6
On Mon, Dec 17, 2018 at 1:57 PM Christoph Paasch <cpaasch@apple.com> wrote:
>
> On 17/12/18 - 08:04:08, Eric Dumazet wrote:
> > On Fri, Dec 14, 2018 at 2:40 PM Christoph Paasch <cpaasch@apple.com> wrote:
> > >
> >
> > ...
> >
> > >  int tcp_fastopen_reset_cipher(struct net *net, struct sock *sk,
> > >                               void *key, unsigned int len)
> > >  {
> > > @@ -96,13 +131,22 @@ error:             kfree(ctx);
> > >         spin_lock(&net->ipv4.tcp_fastopen_ctx_lock);
> > >         if (sk) {
> > >                 q = &inet_csk(sk)->icsk_accept_queue.fastopenq;
> >
> > > +               rcu_assign_pointer(ctx->next, q->ctx);
> > At this point, ctx is not yet visible, so you do not need a barrier yet
> >                     ctx->next = q->ctx;
>
> Thanks, I will change that.
>
> >
> >
> > > +               rcu_assign_pointer(q->ctx, ctx);
> >
> > Note that readers could see 3 contexts in the chain, instead of maximum two.
> >
> > This means that proc_tcp_fastopen_key() (your 3/5 change) would
> > overflow an automatic array :
> >
> > while (ctxt) {
> >         memcpy(&key[i], ctxt->key, TCP_FASTOPEN_KEY_LENGTH);
> >         i += 4;
> >         ctxt = rcu_dereference(ctxt->next);
> > }
>
> Ouch! Thanks for spotting this.
>
> If it's ok to have a brief moment of 3 contexts for the readers, I would
> protect against overflows the readers.

I believe you can refactor the code here, to publish the new pointer
(rcu_assign_pointer(ctx->next, q->ctx);)
only after you have shorten the chain.

No worries if one incoming packet can see only the old primary key,
but not the fallback one,
since we are anyway about to remove the old fallback.

Ideally the rcu_assign_pointer(ctx->next, q->ctx) operation should be
the last one, when the new chain
is clean and ready to be used.
Christoph Paasch Dec. 17, 2018, 10:50 p.m. UTC | #7
On 17/12/18 - 14:01:41, Eric Dumazet wrote:
> On Mon, Dec 17, 2018 at 1:57 PM Christoph Paasch <cpaasch@apple.com> wrote:
> >
> > On 17/12/18 - 08:04:08, Eric Dumazet wrote:
> > > On Fri, Dec 14, 2018 at 2:40 PM Christoph Paasch <cpaasch@apple.com> wrote:
> > > >
> > >
> > > ...
> > >
> > > >  int tcp_fastopen_reset_cipher(struct net *net, struct sock *sk,
> > > >                               void *key, unsigned int len)
> > > >  {
> > > > @@ -96,13 +131,22 @@ error:             kfree(ctx);
> > > >         spin_lock(&net->ipv4.tcp_fastopen_ctx_lock);
> > > >         if (sk) {
> > > >                 q = &inet_csk(sk)->icsk_accept_queue.fastopenq;
> > >
> > > > +               rcu_assign_pointer(ctx->next, q->ctx);
> > > At this point, ctx is not yet visible, so you do not need a barrier yet
> > >                     ctx->next = q->ctx;
> >
> > Thanks, I will change that.
> >
> > >
> > >
> > > > +               rcu_assign_pointer(q->ctx, ctx);
> > >
> > > Note that readers could see 3 contexts in the chain, instead of maximum two.
> > >
> > > This means that proc_tcp_fastopen_key() (your 3/5 change) would
> > > overflow an automatic array :
> > >
> > > while (ctxt) {
> > >         memcpy(&key[i], ctxt->key, TCP_FASTOPEN_KEY_LENGTH);
> > >         i += 4;
> > >         ctxt = rcu_dereference(ctxt->next);
> > > }
> >
> > Ouch! Thanks for spotting this.
> >
> > If it's ok to have a brief moment of 3 contexts for the readers, I would
> > protect against overflows the readers.
> 
> I believe you can refactor the code here, to publish the new pointer
> (rcu_assign_pointer(ctx->next, q->ctx);)
> only after you have shorten the chain.
> 
> No worries if one incoming packet can see only the old primary key,
> but not the fallback one,
> since we are anyway about to remove the old fallback.
> 
> Ideally the rcu_assign_pointer(ctx->next, q->ctx) operation should be
> the last one, when the new chain
> is clean and ready to be used.

Sounds good, I will do that.

Thanks,
Christoph
diff mbox series

Patch

diff --git a/include/net/tcp.h b/include/net/tcp.h
index e0a65c067662..e629ea2e6c9d 100644
--- a/include/net/tcp.h
+++ b/include/net/tcp.h
@@ -1622,9 +1622,11 @@  bool tcp_fastopen_cookie_check(struct sock *sk, u16 *mss,
 			     struct tcp_fastopen_cookie *cookie);
 bool tcp_fastopen_defer_connect(struct sock *sk, int *err);
 #define TCP_FASTOPEN_KEY_LENGTH 16
+#define TCP_FASTOPEN_CTXT_LEN 2
 
 /* Fastopen key context */
 struct tcp_fastopen_context {
+	struct tcp_fastopen_context __rcu *next;
 	struct crypto_cipher	*tfm;
 	__u8			key[TCP_FASTOPEN_KEY_LENGTH];
 	struct rcu_head		rcu;
diff --git a/net/ipv4/tcp_fastopen.c b/net/ipv4/tcp_fastopen.c
index 018a48477355..c52d5b8eabf0 100644
--- a/net/ipv4/tcp_fastopen.c
+++ b/net/ipv4/tcp_fastopen.c
@@ -37,8 +37,14 @@  static void tcp_fastopen_ctx_free(struct rcu_head *head)
 {
 	struct tcp_fastopen_context *ctx =
 	    container_of(head, struct tcp_fastopen_context, rcu);
-	crypto_free_cipher(ctx->tfm);
-	kfree(ctx);
+
+	while (ctx) {
+		struct tcp_fastopen_context *prev = ctx;
+		/* We own ctx, thus no need to hold the Fastopen-lock */
+		ctx = rcu_dereference_protected(ctx->next, 1);
+		crypto_free_cipher(prev->tfm);
+		kfree(prev);
+	}
 }
 
 void tcp_fastopen_destroy_cipher(struct sock *sk)
@@ -66,6 +72,35 @@  void tcp_fastopen_ctx_destroy(struct net *net)
 		call_rcu(&ctxt->rcu, tcp_fastopen_ctx_free);
 }
 
+static struct tcp_fastopen_context *
+tcp_fastopen_cut_keypool(struct tcp_fastopen_context *ctx,
+			 spinlock_t *lock)
+{
+	int cnt = 0;
+
+	while (ctx) {
+		/* We iterate the list to see if we have more than
+		 * TCP_FASTOPEN_CTXT_LEN contexts. If we do, we remove the rest
+		 * of the list and free it later
+		 */
+
+		cnt++;
+		if (cnt >= TCP_FASTOPEN_CTXT_LEN) {
+			/* It's the last one, return the rest so it gets freed */
+			struct tcp_fastopen_context *prev = ctx;
+
+			ctx = rcu_dereference_protected(ctx->next,
+							lockdep_is_held(lock));
+			rcu_assign_pointer(prev->next, NULL);
+			break;
+		}
+		ctx = rcu_dereference_protected(ctx->next,
+						lockdep_is_held(lock));
+	}
+
+	return ctx;
+}
+
 int tcp_fastopen_reset_cipher(struct net *net, struct sock *sk,
 			      void *key, unsigned int len)
 {
@@ -96,13 +131,22 @@  error:		kfree(ctx);
 	spin_lock(&net->ipv4.tcp_fastopen_ctx_lock);
 	if (sk) {
 		q = &inet_csk(sk)->icsk_accept_queue.fastopenq;
+		rcu_assign_pointer(ctx->next, q->ctx);
+		rcu_assign_pointer(q->ctx, ctx);
+
 		octx = rcu_dereference_protected(q->ctx,
 			lockdep_is_held(&net->ipv4.tcp_fastopen_ctx_lock));
-		rcu_assign_pointer(q->ctx, ctx);
+
+		octx = tcp_fastopen_cut_keypool(octx, &net->ipv4.tcp_fastopen_ctx_lock);
 	} else {
+		rcu_assign_pointer(ctx->next, net->ipv4.tcp_fastopen_ctx);
+		rcu_assign_pointer(net->ipv4.tcp_fastopen_ctx, ctx);
+
 		octx = rcu_dereference_protected(net->ipv4.tcp_fastopen_ctx,
 			lockdep_is_held(&net->ipv4.tcp_fastopen_ctx_lock));
-		rcu_assign_pointer(net->ipv4.tcp_fastopen_ctx, ctx);
+
+		octx = tcp_fastopen_cut_keypool(octx,
+						&net->ipv4.tcp_fastopen_ctx_lock);
 	}
 	spin_unlock(&net->ipv4.tcp_fastopen_ctx_lock);