diff mbox series

[RFC,net-next,3/6] net_sched: sch_fq: multiple release time support

Message ID 20200609140934.110785-4-willemdebruijn.kernel@gmail.com
State RFC
Delegated to: David Miller
Headers show
Series multi release pacing for UDP GSO | expand

Commit Message

Willem de Bruijn June 9, 2020, 2:09 p.m. UTC
From: Willem de Bruijn <willemb@google.com>

Optionally segment skbs on FQ enqueue, to later send segments at
their individual delivery time.

Segmentation on enqueue is new for FQ, but already happens in TBF,
CAKE and netem.

This slow patch should probably be behind a static_branch.

Signed-off-by: Willem de Bruijn <willemb@google.com>
---
 net/sched/sch_fq.c | 33 +++++++++++++++++++++++++++++++--
 1 file changed, 31 insertions(+), 2 deletions(-)

Comments

Eric Dumazet June 9, 2020, 3 p.m. UTC | #1
On 6/9/20 7:09 AM, Willem de Bruijn wrote:
> From: Willem de Bruijn <willemb@google.com>
> 
> Optionally segment skbs on FQ enqueue, to later send segments at
> their individual delivery time.
> 
> Segmentation on enqueue is new for FQ, but already happens in TBF,
> CAKE and netem.
> 
> This slow patch should probably be behind a static_branch.
> 
> Signed-off-by: Willem de Bruijn <willemb@google.com>
> ---
>  net/sched/sch_fq.c | 33 +++++++++++++++++++++++++++++++--
>  1 file changed, 31 insertions(+), 2 deletions(-)
> 
> diff --git a/net/sched/sch_fq.c b/net/sched/sch_fq.c
> index 8f06a808c59a..a5e2c35bb557 100644
> --- a/net/sched/sch_fq.c
> +++ b/net/sched/sch_fq.c
> @@ -439,8 +439,8 @@ static bool fq_packet_beyond_horizon(const struct sk_buff *skb,
>  	return unlikely((s64)skb->tstamp > (s64)(q->ktime_cache + q->horizon));
>  }
>  
> -static int fq_enqueue(struct sk_buff *skb, struct Qdisc *sch,
> -		      struct sk_buff **to_free)
> +static int __fq_enqueue(struct sk_buff *skb, struct Qdisc *sch,
> +			struct sk_buff **to_free)
>  {
>  	struct fq_sched_data *q = qdisc_priv(sch);
>  	struct fq_flow *f;
> @@ -496,6 +496,35 @@ static int fq_enqueue(struct sk_buff *skb, struct Qdisc *sch,
>  	return NET_XMIT_SUCCESS;
>  }
>  
> +static int fq_enqueue(struct sk_buff *skb, struct Qdisc *sch,
> +		      struct sk_buff **to_free)
> +{
> +	struct sk_buff *segs, *next;
> +	int ret;
> +
> +	if (likely(!skb_is_gso(skb) || !skb->sk ||

You also need to check sk_fullsock(skb->sk), otherwise KMSAN might be unhappy.

> +		   !skb->sk->sk_txtime_multi_release))
> +		return __fq_enqueue(skb, sch, to_free);
> +
> +	segs = skb_gso_segment_txtime(skb);
> +	if (IS_ERR(segs))
> +		return qdisc_drop(skb, sch, to_free);
> +	if (!segs)
> +		return __fq_enqueue(skb, sch, to_free);
> +
> +	consume_skb(skb);

   This needs to be qdisc_drop(skb, sch, to_free) if queue is full, see below.

> +
> +	ret = NET_XMIT_DROP;
> +	skb_list_walk_safe(segs, segs, next) {
> +		skb_mark_not_on_list(segs);
> +		qdisc_skb_cb(segs)->pkt_len = segs->len;

This seems to under-estimate bytes sent. See qdisc_pkt_len_init() for details.

> +		if (__fq_enqueue(segs, sch, to_free) == NET_XMIT_SUCCESS)
> +			ret = NET_XMIT_SUCCESS;
> +	}

        if (unlikely(ret == NET_XMIT_DROP))
            qdisc_drop(skb, sch, to_free);
        else
            consume_skb(skb);

> +
> +	return ret;
> +}
> +
>  static void fq_check_throttled(struct fq_sched_data *q, u64 now)
>  {
>  	unsigned long sample;
>
Eric Dumazet June 9, 2020, 3:10 p.m. UTC | #2
On 6/9/20 8:00 AM, Eric Dumazet wrote:
> 
> 
> On 6/9/20 7:09 AM, Willem de Bruijn wrote:
>> From: Willem de Bruijn <willemb@google.com>
>>
>> Optionally segment skbs on FQ enqueue, to later send segments at
>> their individual delivery time.
>>
>> Segmentation on enqueue is new for FQ, but already happens in TBF,
>> CAKE and netem.
>>
>> This slow patch should probably be behind a static_branch.
>>
>> Signed-off-by: Willem de Bruijn <willemb@google.com>
>> ---
>>  net/sched/sch_fq.c | 33 +++++++++++++++++++++++++++++++--
>>  1 file changed, 31 insertions(+), 2 deletions(-)
>>
>> diff --git a/net/sched/sch_fq.c b/net/sched/sch_fq.c
>> index 8f06a808c59a..a5e2c35bb557 100644
>> --- a/net/sched/sch_fq.c
>> +++ b/net/sched/sch_fq.c
>> @@ -439,8 +439,8 @@ static bool fq_packet_beyond_horizon(const struct sk_buff *skb,
>>  	return unlikely((s64)skb->tstamp > (s64)(q->ktime_cache + q->horizon));
>>  }
>>  
>> -static int fq_enqueue(struct sk_buff *skb, struct Qdisc *sch,
>> -		      struct sk_buff **to_free)
>> +static int __fq_enqueue(struct sk_buff *skb, struct Qdisc *sch,
>> +			struct sk_buff **to_free)
>>  {
>>  	struct fq_sched_data *q = qdisc_priv(sch);
>>  	struct fq_flow *f;
>> @@ -496,6 +496,35 @@ static int fq_enqueue(struct sk_buff *skb, struct Qdisc *sch,
>>  	return NET_XMIT_SUCCESS;
>>  }
>>  
>> +static int fq_enqueue(struct sk_buff *skb, struct Qdisc *sch,
>> +		      struct sk_buff **to_free)
>> +{
>> +	struct sk_buff *segs, *next;
>> +	int ret;
>> +
>> +	if (likely(!skb_is_gso(skb) || !skb->sk ||
> 
> You also need to check sk_fullsock(skb->sk), otherwise KMSAN might be unhappy.
> 
>> +		   !skb->sk->sk_txtime_multi_release))
>> +		return __fq_enqueue(skb, sch, to_free);
>> +
>> +	segs = skb_gso_segment_txtime(skb);
>> +	if (IS_ERR(segs))
>> +		return qdisc_drop(skb, sch, to_free);
>> +	if (!segs)
>> +		return __fq_enqueue(skb, sch, to_free);
>> +
>> +	consume_skb(skb);
> 
>    This needs to be qdisc_drop(skb, sch, to_free) if queue is full, see below.
> 
>> +
>> +	ret = NET_XMIT_DROP;
>> +	skb_list_walk_safe(segs, segs, next) {
>> +		skb_mark_not_on_list(segs);
>> +		qdisc_skb_cb(segs)->pkt_len = segs->len;
> 
> This seems to under-estimate bytes sent. See qdisc_pkt_len_init() for details.
> 
>> +		if (__fq_enqueue(segs, sch, to_free) == NET_XMIT_SUCCESS)
>> +			ret = NET_XMIT_SUCCESS;
>> +	}
> 
>         if (unlikely(ret == NET_XMIT_DROP))
>             qdisc_drop(skb, sch, to_free);

Maybe not qdisc_drop() (qdisc drop counters are already updated)
but at least kfree_skb() so that drop monitor is not fooled.

>         else
>             consume_skb(skb);
> 
>> +
>> +	return ret;
>> +}
>> +
>>  static void fq_check_throttled(struct fq_sched_data *q, u64 now)
>>  {
>>  	unsigned long sample;
>>
> 
>
diff mbox series

Patch

diff --git a/net/sched/sch_fq.c b/net/sched/sch_fq.c
index 8f06a808c59a..a5e2c35bb557 100644
--- a/net/sched/sch_fq.c
+++ b/net/sched/sch_fq.c
@@ -439,8 +439,8 @@  static bool fq_packet_beyond_horizon(const struct sk_buff *skb,
 	return unlikely((s64)skb->tstamp > (s64)(q->ktime_cache + q->horizon));
 }
 
-static int fq_enqueue(struct sk_buff *skb, struct Qdisc *sch,
-		      struct sk_buff **to_free)
+static int __fq_enqueue(struct sk_buff *skb, struct Qdisc *sch,
+			struct sk_buff **to_free)
 {
 	struct fq_sched_data *q = qdisc_priv(sch);
 	struct fq_flow *f;
@@ -496,6 +496,35 @@  static int fq_enqueue(struct sk_buff *skb, struct Qdisc *sch,
 	return NET_XMIT_SUCCESS;
 }
 
+static int fq_enqueue(struct sk_buff *skb, struct Qdisc *sch,
+		      struct sk_buff **to_free)
+{
+	struct sk_buff *segs, *next;
+	int ret;
+
+	if (likely(!skb_is_gso(skb) || !skb->sk ||
+		   !skb->sk->sk_txtime_multi_release))
+		return __fq_enqueue(skb, sch, to_free);
+
+	segs = skb_gso_segment_txtime(skb);
+	if (IS_ERR(segs))
+		return qdisc_drop(skb, sch, to_free);
+	if (!segs)
+		return __fq_enqueue(skb, sch, to_free);
+
+	consume_skb(skb);
+
+	ret = NET_XMIT_DROP;
+	skb_list_walk_safe(segs, segs, next) {
+		skb_mark_not_on_list(segs);
+		qdisc_skb_cb(segs)->pkt_len = segs->len;
+		if (__fq_enqueue(segs, sch, to_free) == NET_XMIT_SUCCESS)
+			ret = NET_XMIT_SUCCESS;
+	}
+
+	return ret;
+}
+
 static void fq_check_throttled(struct fq_sched_data *q, u64 now)
 {
 	unsigned long sample;