Message ID | 20200609140934.110785-4-willemdebruijn.kernel@gmail.com |
---|---|
State | RFC |
Delegated to: | David Miller |
Headers | show |
Series | multi release pacing for UDP GSO | expand |
On 6/9/20 7:09 AM, Willem de Bruijn wrote: > From: Willem de Bruijn <willemb@google.com> > > Optionally segment skbs on FQ enqueue, to later send segments at > their individual delivery time. > > Segmentation on enqueue is new for FQ, but already happens in TBF, > CAKE and netem. > > This slow patch should probably be behind a static_branch. > > Signed-off-by: Willem de Bruijn <willemb@google.com> > --- > net/sched/sch_fq.c | 33 +++++++++++++++++++++++++++++++-- > 1 file changed, 31 insertions(+), 2 deletions(-) > > diff --git a/net/sched/sch_fq.c b/net/sched/sch_fq.c > index 8f06a808c59a..a5e2c35bb557 100644 > --- a/net/sched/sch_fq.c > +++ b/net/sched/sch_fq.c > @@ -439,8 +439,8 @@ static bool fq_packet_beyond_horizon(const struct sk_buff *skb, > return unlikely((s64)skb->tstamp > (s64)(q->ktime_cache + q->horizon)); > } > > -static int fq_enqueue(struct sk_buff *skb, struct Qdisc *sch, > - struct sk_buff **to_free) > +static int __fq_enqueue(struct sk_buff *skb, struct Qdisc *sch, > + struct sk_buff **to_free) > { > struct fq_sched_data *q = qdisc_priv(sch); > struct fq_flow *f; > @@ -496,6 +496,35 @@ static int fq_enqueue(struct sk_buff *skb, struct Qdisc *sch, > return NET_XMIT_SUCCESS; > } > > +static int fq_enqueue(struct sk_buff *skb, struct Qdisc *sch, > + struct sk_buff **to_free) > +{ > + struct sk_buff *segs, *next; > + int ret; > + > + if (likely(!skb_is_gso(skb) || !skb->sk || You also need to check sk_fullsock(skb->sk), otherwise KMSAN might be unhappy. > + !skb->sk->sk_txtime_multi_release)) > + return __fq_enqueue(skb, sch, to_free); > + > + segs = skb_gso_segment_txtime(skb); > + if (IS_ERR(segs)) > + return qdisc_drop(skb, sch, to_free); > + if (!segs) > + return __fq_enqueue(skb, sch, to_free); > + > + consume_skb(skb); This needs to be qdisc_drop(skb, sch, to_free) if queue is full, see below. > + > + ret = NET_XMIT_DROP; > + skb_list_walk_safe(segs, segs, next) { > + skb_mark_not_on_list(segs); > + qdisc_skb_cb(segs)->pkt_len = segs->len; This seems to under-estimate bytes sent. See qdisc_pkt_len_init() for details. > + if (__fq_enqueue(segs, sch, to_free) == NET_XMIT_SUCCESS) > + ret = NET_XMIT_SUCCESS; > + } if (unlikely(ret == NET_XMIT_DROP)) qdisc_drop(skb, sch, to_free); else consume_skb(skb); > + > + return ret; > +} > + > static void fq_check_throttled(struct fq_sched_data *q, u64 now) > { > unsigned long sample; >
On 6/9/20 8:00 AM, Eric Dumazet wrote: > > > On 6/9/20 7:09 AM, Willem de Bruijn wrote: >> From: Willem de Bruijn <willemb@google.com> >> >> Optionally segment skbs on FQ enqueue, to later send segments at >> their individual delivery time. >> >> Segmentation on enqueue is new for FQ, but already happens in TBF, >> CAKE and netem. >> >> This slow patch should probably be behind a static_branch. >> >> Signed-off-by: Willem de Bruijn <willemb@google.com> >> --- >> net/sched/sch_fq.c | 33 +++++++++++++++++++++++++++++++-- >> 1 file changed, 31 insertions(+), 2 deletions(-) >> >> diff --git a/net/sched/sch_fq.c b/net/sched/sch_fq.c >> index 8f06a808c59a..a5e2c35bb557 100644 >> --- a/net/sched/sch_fq.c >> +++ b/net/sched/sch_fq.c >> @@ -439,8 +439,8 @@ static bool fq_packet_beyond_horizon(const struct sk_buff *skb, >> return unlikely((s64)skb->tstamp > (s64)(q->ktime_cache + q->horizon)); >> } >> >> -static int fq_enqueue(struct sk_buff *skb, struct Qdisc *sch, >> - struct sk_buff **to_free) >> +static int __fq_enqueue(struct sk_buff *skb, struct Qdisc *sch, >> + struct sk_buff **to_free) >> { >> struct fq_sched_data *q = qdisc_priv(sch); >> struct fq_flow *f; >> @@ -496,6 +496,35 @@ static int fq_enqueue(struct sk_buff *skb, struct Qdisc *sch, >> return NET_XMIT_SUCCESS; >> } >> >> +static int fq_enqueue(struct sk_buff *skb, struct Qdisc *sch, >> + struct sk_buff **to_free) >> +{ >> + struct sk_buff *segs, *next; >> + int ret; >> + >> + if (likely(!skb_is_gso(skb) || !skb->sk || > > You also need to check sk_fullsock(skb->sk), otherwise KMSAN might be unhappy. > >> + !skb->sk->sk_txtime_multi_release)) >> + return __fq_enqueue(skb, sch, to_free); >> + >> + segs = skb_gso_segment_txtime(skb); >> + if (IS_ERR(segs)) >> + return qdisc_drop(skb, sch, to_free); >> + if (!segs) >> + return __fq_enqueue(skb, sch, to_free); >> + >> + consume_skb(skb); > > This needs to be qdisc_drop(skb, sch, to_free) if queue is full, see below. > >> + >> + ret = NET_XMIT_DROP; >> + skb_list_walk_safe(segs, segs, next) { >> + skb_mark_not_on_list(segs); >> + qdisc_skb_cb(segs)->pkt_len = segs->len; > > This seems to under-estimate bytes sent. See qdisc_pkt_len_init() for details. > >> + if (__fq_enqueue(segs, sch, to_free) == NET_XMIT_SUCCESS) >> + ret = NET_XMIT_SUCCESS; >> + } > > if (unlikely(ret == NET_XMIT_DROP)) > qdisc_drop(skb, sch, to_free); Maybe not qdisc_drop() (qdisc drop counters are already updated) but at least kfree_skb() so that drop monitor is not fooled. > else > consume_skb(skb); > >> + >> + return ret; >> +} >> + >> static void fq_check_throttled(struct fq_sched_data *q, u64 now) >> { >> unsigned long sample; >> > >
diff --git a/net/sched/sch_fq.c b/net/sched/sch_fq.c index 8f06a808c59a..a5e2c35bb557 100644 --- a/net/sched/sch_fq.c +++ b/net/sched/sch_fq.c @@ -439,8 +439,8 @@ static bool fq_packet_beyond_horizon(const struct sk_buff *skb, return unlikely((s64)skb->tstamp > (s64)(q->ktime_cache + q->horizon)); } -static int fq_enqueue(struct sk_buff *skb, struct Qdisc *sch, - struct sk_buff **to_free) +static int __fq_enqueue(struct sk_buff *skb, struct Qdisc *sch, + struct sk_buff **to_free) { struct fq_sched_data *q = qdisc_priv(sch); struct fq_flow *f; @@ -496,6 +496,35 @@ static int fq_enqueue(struct sk_buff *skb, struct Qdisc *sch, return NET_XMIT_SUCCESS; } +static int fq_enqueue(struct sk_buff *skb, struct Qdisc *sch, + struct sk_buff **to_free) +{ + struct sk_buff *segs, *next; + int ret; + + if (likely(!skb_is_gso(skb) || !skb->sk || + !skb->sk->sk_txtime_multi_release)) + return __fq_enqueue(skb, sch, to_free); + + segs = skb_gso_segment_txtime(skb); + if (IS_ERR(segs)) + return qdisc_drop(skb, sch, to_free); + if (!segs) + return __fq_enqueue(skb, sch, to_free); + + consume_skb(skb); + + ret = NET_XMIT_DROP; + skb_list_walk_safe(segs, segs, next) { + skb_mark_not_on_list(segs); + qdisc_skb_cb(segs)->pkt_len = segs->len; + if (__fq_enqueue(segs, sch, to_free) == NET_XMIT_SUCCESS) + ret = NET_XMIT_SUCCESS; + } + + return ret; +} + static void fq_check_throttled(struct fq_sched_data *q, u64 now) { unsigned long sample;