Message ID | 1360932297-6698-1-git-send-email-jiri@resnulli.us |
---|---|
State | RFC, archived |
Headers | show |
On Fri, 2013-02-15 at 13:44 +0100, Jiri Pirko wrote: > According to Eric's suggestion, when gso skb can't be sent in one mtu > time, resegment it. > > Note that helper will be moved to sch_api.c probably so it can be used > by other code. > > Also, I'm not sure similar patch to this can be done for act_police. I > have to look at that more closely. > > Thanks for review. > > Signed-off-by: Jiri Pirko <jiri@resnulli.us> > --- > include/net/sch_generic.h | 1 + > net/core/dev.c | 24 ------------------------ > net/sched/sch_api.c | 27 +++++++++++++++++++++++++++ > net/sched/sch_tbf.c | 38 +++++++++++++++++++++++++++++++++++++- > 4 files changed, 65 insertions(+), 25 deletions(-) > > diff --git a/include/net/sch_generic.h b/include/net/sch_generic.h > index 2761c90..de6db57 100644 > --- a/include/net/sch_generic.h > +++ b/include/net/sch_generic.h > @@ -372,6 +372,7 @@ extern struct Qdisc *qdisc_create_dflt(struct netdev_queue *dev_queue, > struct Qdisc_ops *ops, u32 parentid); > extern void __qdisc_calculate_pkt_len(struct sk_buff *skb, > const struct qdisc_size_table *stab); > +extern void qdisc_pkt_len_init(struct sk_buff *skb); > extern void tcf_destroy(struct tcf_proto *tp); > extern void tcf_destroy_chain(struct tcf_proto **fl); > > diff --git a/net/core/dev.c b/net/core/dev.c > index f444736..8f86b1c 100644 > --- a/net/core/dev.c > +++ b/net/core/dev.c > @@ -2680,30 +2680,6 @@ out: > return rc; > } > > -static void qdisc_pkt_len_init(struct sk_buff *skb) > -{ > - const struct skb_shared_info *shinfo = skb_shinfo(skb); > - > - qdisc_skb_cb(skb)->pkt_len = skb->len; > - > - /* To get more precise estimation of bytes sent on wire, > - * we add to pkt_len the headers size of all segments > - */ > - if (shinfo->gso_size) { > - unsigned int hdr_len; > - > - /* mac layer + network layer */ > - hdr_len = skb_transport_header(skb) - skb_mac_header(skb); > - > - /* + transport layer */ > - if (likely(shinfo->gso_type & (SKB_GSO_TCPV4 | SKB_GSO_TCPV6))) > - hdr_len += tcp_hdrlen(skb); > - else > - hdr_len += sizeof(struct udphdr); > - qdisc_skb_cb(skb)->pkt_len += (shinfo->gso_segs - 1) * hdr_len; > - } > -} > - Please dont move this function, its called in fast path and should be inlined by compiler > static inline int __dev_xmit_skb(struct sk_buff *skb, struct Qdisc *q, > struct net_device *dev, > struct netdev_queue *txq) > diff --git a/net/sched/sch_api.c b/net/sched/sch_api.c > index fe1ba54..7672259 100644 > --- a/net/sched/sch_api.c > +++ b/net/sched/sch_api.c > @@ -29,6 +29,8 @@ > #include <linux/hrtimer.h> > #include <linux/lockdep.h> > #include <linux/slab.h> > +#include <linux/tcp.h> > +#include <linux/udp.h> > > #include <net/net_namespace.h> > #include <net/sock.h> > @@ -464,6 +466,31 @@ out: > } > EXPORT_SYMBOL(__qdisc_calculate_pkt_len); > > + > void qdisc_warn_nonwc(char *txt, struct Qdisc *qdisc) > { > if (!(qdisc->flags & TCQ_F_WARN_NONWC)) { > diff --git a/net/sched/sch_tbf.c b/net/sched/sch_tbf.c > index c8388f3..bfc89be 100644 > --- a/net/sched/sch_tbf.c > +++ b/net/sched/sch_tbf.c > @@ -121,7 +121,7 @@ static int tbf_enqueue(struct sk_buff *skb, struct Qdisc *sch) > struct tbf_sched_data *q = qdisc_priv(sch); > int ret; > > - if (qdisc_pkt_len(skb) > q->max_size) > + if (qdisc_pkt_len(skb) > q->max_size && !skb_is_gso(skb)) > return qdisc_reshape_fail(skb, sch); > > ret = qdisc_enqueue(skb, q->qdisc); > @@ -147,6 +147,33 @@ static unsigned int tbf_drop(struct Qdisc *sch) > return len; > } > > +static bool qdisc_gso_segment(struct Qdisc *qdisc, struct sk_buff *skb) > +{ > + struct sk_buff *segs; > + struct sk_buff *next_skb; > + struct sk_buff *prev_skb; > + int num_skbs = 0; > + > + segs = skb_gso_segment(skb, 0); > + if (IS_ERR(segs) || !segs) > + return false; > + __skb_unlink(skb, &qdisc->q); > + kfree_skb(skb); > + skb = segs; > + prev_skb = (struct sk_buff *) &qdisc->q; > + do { > + next_skb = skb->next; > + qdisc_pkt_len_init(skb); > + qdisc_calculate_pkt_len(skb, qdisc); You don't need this, packet is non gso, so you only have to qdisc_skb_cb(skb)->pkt_len = skb->len; qdisc_calculate_pkt_len(); > + __skb_queue_after(&qdisc->q, prev_skb, skb); Yes, this might need different strategy for non fifo qdisc, we can see that later. > + prev_skb = skb; > + skb = next_skb; > + num_skbs++; > + } while (skb); > + qdisc_tree_decrease_qlen(qdisc, 1 - num_skbs); > + return true; > +} > + > static struct sk_buff *tbf_dequeue(struct Qdisc *sch) > { > struct tbf_sched_data *q = qdisc_priv(sch); > @@ -167,6 +194,15 @@ static struct sk_buff *tbf_dequeue(struct Qdisc *sch) > ptoks = toks + q->ptokens; > if (ptoks > q->mtu) > ptoks = q->mtu; Cache psched_l2t_ns(&q->peak, len) in a variable here to avoid double call. > + if (skb_is_gso(skb) && > + (s64) psched_l2t_ns(&q->peak, len) > q->mtu && > + qdisc_gso_segment(q->qdisc, skb)) { > + q->qdisc->gso_skb = NULL; > + skb = q->qdisc->ops->peek(q->qdisc); > + if (unlikely(!skb)) > + return NULL; > + len = qdisc_pkt_len(skb); > + } > ptoks -= (s64) psched_l2t_ns(&q->peak, len); > } > toks += q->tokens; Thanks ! -- To unsubscribe from this list: send the line "unsubscribe netdev" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Thanks for review Eric. I will process your suggestions and submit the patch. Fri, Feb 15, 2013 at 03:18:49PM CET, eric.dumazet@gmail.com wrote: >On Fri, 2013-02-15 at 13:44 +0100, Jiri Pirko wrote: >> According to Eric's suggestion, when gso skb can't be sent in one mtu >> time, resegment it. >> >> Note that helper will be moved to sch_api.c probably so it can be used >> by other code. >> >> Also, I'm not sure similar patch to this can be done for act_police. I >> have to look at that more closely. Any idea how to fix this in act_police? I believe we can't use the same approach since there is no queue there. Thanks. Jiri -- To unsubscribe from this list: send the line "unsubscribe netdev" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
On Fri, 2013-02-15 at 13:44 +0100, Jiri Pirko wrote: > + segs = skb_gso_segment(skb, 0); Try to find out how to avoid a full copy, if the device is SG capable. maybe using segs = skb_gso_segment(skb, netif_skb_features(skb) & ~NETIF_F_GSO_MASK); Or something like that... > + if (IS_ERR(segs) || !segs) > + return false; > + __skb_unlink(skb, &qdisc->q); > + kfree_skb(skb); consume_skb() would be nice here. -- To unsubscribe from this list: send the line "unsubscribe netdev" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
diff --git a/include/net/sch_generic.h b/include/net/sch_generic.h index 2761c90..de6db57 100644 --- a/include/net/sch_generic.h +++ b/include/net/sch_generic.h @@ -372,6 +372,7 @@ extern struct Qdisc *qdisc_create_dflt(struct netdev_queue *dev_queue, struct Qdisc_ops *ops, u32 parentid); extern void __qdisc_calculate_pkt_len(struct sk_buff *skb, const struct qdisc_size_table *stab); +extern void qdisc_pkt_len_init(struct sk_buff *skb); extern void tcf_destroy(struct tcf_proto *tp); extern void tcf_destroy_chain(struct tcf_proto **fl); diff --git a/net/core/dev.c b/net/core/dev.c index f444736..8f86b1c 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -2680,30 +2680,6 @@ out: return rc; } -static void qdisc_pkt_len_init(struct sk_buff *skb) -{ - const struct skb_shared_info *shinfo = skb_shinfo(skb); - - qdisc_skb_cb(skb)->pkt_len = skb->len; - - /* To get more precise estimation of bytes sent on wire, - * we add to pkt_len the headers size of all segments - */ - if (shinfo->gso_size) { - unsigned int hdr_len; - - /* mac layer + network layer */ - hdr_len = skb_transport_header(skb) - skb_mac_header(skb); - - /* + transport layer */ - if (likely(shinfo->gso_type & (SKB_GSO_TCPV4 | SKB_GSO_TCPV6))) - hdr_len += tcp_hdrlen(skb); - else - hdr_len += sizeof(struct udphdr); - qdisc_skb_cb(skb)->pkt_len += (shinfo->gso_segs - 1) * hdr_len; - } -} - static inline int __dev_xmit_skb(struct sk_buff *skb, struct Qdisc *q, struct net_device *dev, struct netdev_queue *txq) diff --git a/net/sched/sch_api.c b/net/sched/sch_api.c index fe1ba54..7672259 100644 --- a/net/sched/sch_api.c +++ b/net/sched/sch_api.c @@ -29,6 +29,8 @@ #include <linux/hrtimer.h> #include <linux/lockdep.h> #include <linux/slab.h> +#include <linux/tcp.h> +#include <linux/udp.h> #include <net/net_namespace.h> #include <net/sock.h> @@ -464,6 +466,31 @@ out: } EXPORT_SYMBOL(__qdisc_calculate_pkt_len); +void qdisc_pkt_len_init(struct sk_buff *skb) +{ + const struct skb_shared_info *shinfo = skb_shinfo(skb); + + qdisc_skb_cb(skb)->pkt_len = skb->len; + + /* To get more precise estimation of bytes sent on wire, + * we add to pkt_len the headers size of all segments + */ + if (shinfo->gso_size) { + unsigned int hdr_len; + + /* mac layer + network layer */ + hdr_len = skb_transport_header(skb) - skb_mac_header(skb); + + /* + transport layer */ + if (likely(shinfo->gso_type & (SKB_GSO_TCPV4 | SKB_GSO_TCPV6))) + hdr_len += tcp_hdrlen(skb); + else + hdr_len += sizeof(struct udphdr); + qdisc_skb_cb(skb)->pkt_len += (shinfo->gso_segs - 1) * hdr_len; + } +} +EXPORT_SYMBOL(qdisc_pkt_len_init); + void qdisc_warn_nonwc(char *txt, struct Qdisc *qdisc) { if (!(qdisc->flags & TCQ_F_WARN_NONWC)) { diff --git a/net/sched/sch_tbf.c b/net/sched/sch_tbf.c index c8388f3..bfc89be 100644 --- a/net/sched/sch_tbf.c +++ b/net/sched/sch_tbf.c @@ -121,7 +121,7 @@ static int tbf_enqueue(struct sk_buff *skb, struct Qdisc *sch) struct tbf_sched_data *q = qdisc_priv(sch); int ret; - if (qdisc_pkt_len(skb) > q->max_size) + if (qdisc_pkt_len(skb) > q->max_size && !skb_is_gso(skb)) return qdisc_reshape_fail(skb, sch); ret = qdisc_enqueue(skb, q->qdisc); @@ -147,6 +147,33 @@ static unsigned int tbf_drop(struct Qdisc *sch) return len; } +static bool qdisc_gso_segment(struct Qdisc *qdisc, struct sk_buff *skb) +{ + struct sk_buff *segs; + struct sk_buff *next_skb; + struct sk_buff *prev_skb; + int num_skbs = 0; + + segs = skb_gso_segment(skb, 0); + if (IS_ERR(segs) || !segs) + return false; + __skb_unlink(skb, &qdisc->q); + kfree_skb(skb); + skb = segs; + prev_skb = (struct sk_buff *) &qdisc->q; + do { + next_skb = skb->next; + qdisc_pkt_len_init(skb); + qdisc_calculate_pkt_len(skb, qdisc); + __skb_queue_after(&qdisc->q, prev_skb, skb); + prev_skb = skb; + skb = next_skb; + num_skbs++; + } while (skb); + qdisc_tree_decrease_qlen(qdisc, 1 - num_skbs); + return true; +} + static struct sk_buff *tbf_dequeue(struct Qdisc *sch) { struct tbf_sched_data *q = qdisc_priv(sch); @@ -167,6 +194,15 @@ static struct sk_buff *tbf_dequeue(struct Qdisc *sch) ptoks = toks + q->ptokens; if (ptoks > q->mtu) ptoks = q->mtu; + if (skb_is_gso(skb) && + (s64) psched_l2t_ns(&q->peak, len) > q->mtu && + qdisc_gso_segment(q->qdisc, skb)) { + q->qdisc->gso_skb = NULL; + skb = q->qdisc->ops->peek(q->qdisc); + if (unlikely(!skb)) + return NULL; + len = qdisc_pkt_len(skb); + } ptoks -= (s64) psched_l2t_ns(&q->peak, len); } toks += q->tokens;
According to Eric's suggestion, when gso skb can't be sent in one mtu time, resegment it. Note that helper will be moved to sch_api.c probably so it can be used by other code. Also, I'm not sure similar patch to this can be done for act_police. I have to look at that more closely. Thanks for review. Signed-off-by: Jiri Pirko <jiri@resnulli.us> --- include/net/sch_generic.h | 1 + net/core/dev.c | 24 ------------------------ net/sched/sch_api.c | 27 +++++++++++++++++++++++++++ net/sched/sch_tbf.c | 38 +++++++++++++++++++++++++++++++++++++- 4 files changed, 65 insertions(+), 25 deletions(-)