Message ID | 20180526045338.10993-6-jakub.kicinski@netronome.com |
---|---|
State | Accepted, archived |
Delegated to: | David Miller |
Headers | show |
Series | nfp: abm: RED/MQ qdisc offload | expand |
On 26-May-18 7:53 AM, Jakub Kicinski wrote: > Offload simple RED configurations. For now support only DCTCP > like scenarios where min and max are the same. > > Signed-off-by: Jakub Kicinski <jakub.kicinski@netronome.com> > --- > drivers/net/ethernet/netronome/nfp/abm/main.c | 82 +++++++++++++++++++ > drivers/net/ethernet/netronome/nfp/abm/main.h | 10 +++ > 2 files changed, 92 insertions(+) > > diff --git a/drivers/net/ethernet/netronome/nfp/abm/main.c b/drivers/net/ethernet/netronome/nfp/abm/main.c > index 28a18ac62040..22251d88c958 100644 > --- a/drivers/net/ethernet/netronome/nfp/abm/main.c > +++ b/drivers/net/ethernet/netronome/nfp/abm/main.c > @@ -38,6 +38,8 @@ > #include <linux/netdevice.h> > #include <linux/rcupdate.h> > #include <linux/slab.h> > +#include <net/pkt_cls.h> > +#include <net/pkt_sched.h> > > #include "../nfpcore/nfp.h" > #include "../nfpcore/nfp_cpp.h" > @@ -55,6 +57,84 @@ static u32 nfp_abm_portid(enum nfp_repr_type rtype, unsigned int id) > FIELD_PREP(NFP_ABM_PORTID_ID, id); > } > > +static void > +nfp_abm_red_destroy(struct net_device *netdev, struct nfp_abm_link *alink, > + u32 handle) > +{ > + struct nfp_port *port = nfp_port_from_netdev(netdev); > + > + if (handle != alink->qdiscs[0].handle) > + return; > + > + alink->qdiscs[0].handle = TC_H_UNSPEC; > + port->tc_offload_cnt = 0; > + nfp_abm_ctrl_set_all_q_lvls(alink, ~0); > +} > + > +static int > +nfp_abm_red_replace(struct net_device *netdev, struct nfp_abm_link *alink, > + struct tc_red_qopt_offload *opt) > +{ > + struct nfp_port *port = nfp_port_from_netdev(netdev); > + int err; > + > + if (opt->set.min != opt->set.max || !opt->set.is_ecn) { I am a bit worried about the min == max. sch_red doesn't really support it. It will calculate incorrect delta value. (And that only if tc_red_eval_P in iproute2 won't reject it). You might maybe use max = min+1, because in real life it will probably act the same but without this problem. Nogah Frankel (from a new mail address) > + nfp_warn(alink->abm->app->cpp, > + "RED offload failed - unsupported parameters\n"); > + err = -EINVAL; > + goto err_destroy; > + } > + err = nfp_abm_ctrl_set_all_q_lvls(alink, opt->set.min); > + if (err) > + goto err_destroy; > + > + alink->qdiscs[0].handle = opt->handle; > + port->tc_offload_cnt = 1; > + > + return 0; > +err_destroy: > + if (alink->qdiscs[0].handle != TC_H_UNSPEC) > + nfp_abm_red_destroy(netdev, alink, alink->qdiscs[0].handle); > + return err; > +} > + > +static int > +nfp_abm_setup_tc_red(struct net_device *netdev, struct nfp_abm_link *alink, > + struct tc_red_qopt_offload *opt) > +{ > + if (opt->parent != TC_H_ROOT) > + return -EOPNOTSUPP; > + > + switch (opt->command) { > + case TC_RED_REPLACE: > + return nfp_abm_red_replace(netdev, alink, opt); > + case TC_RED_DESTROY: > + nfp_abm_red_destroy(netdev, alink, opt->handle); > + return 0; > + default: > + return -EOPNOTSUPP; > + } > +} > + > +static int > +nfp_abm_setup_tc(struct nfp_app *app, struct net_device *netdev, > + enum tc_setup_type type, void *type_data) > +{ > + struct nfp_repr *repr = netdev_priv(netdev); > + struct nfp_port *port; > + > + port = nfp_port_from_netdev(netdev); > + if (!port || port->type != NFP_PORT_PF_PORT) > + return -EOPNOTSUPP; > + > + switch (type) { > + case TC_SETUP_QDISC_RED: > + return nfp_abm_setup_tc_red(netdev, repr->app_priv, type_data); > + default: > + return -EOPNOTSUPP; > + } > +} > + > static struct net_device *nfp_abm_repr_get(struct nfp_app *app, u32 port_id) > { > enum nfp_repr_type rtype; > @@ -403,6 +483,8 @@ const struct nfp_app_type app_abm = { > .vnic_alloc = nfp_abm_vnic_alloc, > .vnic_free = nfp_abm_vnic_free, > > + .setup_tc = nfp_abm_setup_tc, > + > .eswitch_mode_get = nfp_abm_eswitch_mode_get, > .eswitch_mode_set = nfp_abm_eswitch_mode_set, > > diff --git a/drivers/net/ethernet/netronome/nfp/abm/main.h b/drivers/net/ethernet/netronome/nfp/abm/main.h > index 1ac651cdc140..979f98fb808b 100644 > --- a/drivers/net/ethernet/netronome/nfp/abm/main.h > +++ b/drivers/net/ethernet/netronome/nfp/abm/main.h > @@ -58,18 +58,28 @@ struct nfp_abm { > const struct nfp_rtsym *q_lvls; > }; > > +/** > + * struct nfp_red_qdisc - representation of single RED Qdisc > + * @handle: handle of currently offloaded RED Qdisc > + */ > +struct nfp_red_qdisc { > + u32 handle; > +}; > + > /** > * struct nfp_abm_link - port tuple of a ABM NIC > * @abm: back pointer to nfp_abm > * @vnic: data vNIC > * @id: id of the data vNIC > * @queue_base: id of base to host queue within PCIe (not QC idx) > + * @qdiscs: array of qdiscs > */ > struct nfp_abm_link { > struct nfp_abm *abm; > struct nfp_net *vnic; > unsigned int id; > unsigned int queue_base; > + struct nfp_red_qdisc qdiscs[1]; > }; > > void nfp_abm_ctrl_read_params(struct nfp_abm_link *alink); >
Hi Nogah! On Mon, 28 May 2018 18:49:51 +0300, Nogah Frankel wrote: > > +static int > > +nfp_abm_red_replace(struct net_device *netdev, struct nfp_abm_link *alink, > > + struct tc_red_qopt_offload *opt) > > +{ > > + struct nfp_port *port = nfp_port_from_netdev(netdev); > > + int err; > > + > > + if (opt->set.min != opt->set.max || !opt->set.is_ecn) { > > I am a bit worried about the min == max. > sch_red doesn't really support it. It will calculate incorrect delta > value. (And that only if tc_red_eval_P in iproute2 won't reject it). > You might maybe use max = min+1, because in real life it will probably > act the same but without this problem. I remember having a long think about this when I wrote the code. My conclusion was that the two would operate almost the same, and setting min == max may be most obvious to the user. If min + 1 == max sch_red would act probabilistically for qavg == min, which is not what the card would do. Userspace now does this: tc_red_eval_P() { int i = qmax - qmin; if (!i) return 0; if (i < 0) return -1; ... } And you've fixed delta to be treated as 1 to avoid division by 0 in commit 5c472203421a ("net_sched: red: Avoid devision by zero"): red_set_parms() { int delta = qth_max - qth_min; u32 max_p_delta; p->qth_min = qth_min << Wlog; p->qth_max = qth_max << Wlog; p->Wlog = Wlog; p->Plog = Plog; if (delta <= 0) delta = 1; p->qth_delta = delta; ... } So we should be safe. Targets will match. Probability adjustment for adaptive should work correctly. Which doesn't matter anyway, since we will never use the probabilistic action... > Nogah Frankel > (from a new mail address) Noted :)
On 29-May-18 2:05 AM, Jakub Kicinski wrote: Hi Jakub, > Hi Nogah! > > On Mon, 28 May 2018 18:49:51 +0300, Nogah Frankel wrote: >>> +static int >>> +nfp_abm_red_replace(struct net_device *netdev, struct nfp_abm_link *alink, >>> + struct tc_red_qopt_offload *opt) >>> +{ >>> + struct nfp_port *port = nfp_port_from_netdev(netdev); >>> + int err; >>> + >>> + if (opt->set.min != opt->set.max || !opt->set.is_ecn) { >> >> I am a bit worried about the min == max. >> sch_red doesn't really support it. It will calculate incorrect delta >> value. (And that only if tc_red_eval_P in iproute2 won't reject it). >> You might maybe use max = min+1, because in real life it will probably >> act the same but without this problem. > > I remember having a long think about this when I wrote the code. > My conclusion was that the two would operate almost the same, and > setting min == max may be most obvious to the user. I agree. > > If min + 1 == max sch_red would act probabilistically for qavg == min, > which is not what the card would do. > > Userspace now does this: > > tc_red_eval_P() { > int i = qmax - qmin; > > if (!i) > return 0; > if (i < 0) > return -1; > ... > } > > And you've fixed delta to be treated as 1 to avoid division by 0 in > commit 5c472203421a ("net_sched: red: Avoid devision by zero"): > > red_set_parms() { > int delta = qth_max - qth_min; > u32 max_p_delta; > > p->qth_min = qth_min << Wlog; > p->qth_max = qth_max << Wlog; > p->Wlog = Wlog; > p->Plog = Plog; > if (delta <= 0) > delta = 1; > p->qth_delta = delta; > ... > } I changes it to avoid division by 0, but I wasn't sure that the delta value of 1 will be good, just that it is better then 0. > > So we should be safe. Targets will match. Probability adjustment for > adaptive should work correctly. Which doesn't matter anyway, since we > will never use the probabilistic action... That makes sense, and it is a better way to set this setup (DCTCP, I guess?) than before. Thanks Nogah > >> Nogah Frankel >> (from a new mail address) > > Noted :) >
diff --git a/drivers/net/ethernet/netronome/nfp/abm/main.c b/drivers/net/ethernet/netronome/nfp/abm/main.c index 28a18ac62040..22251d88c958 100644 --- a/drivers/net/ethernet/netronome/nfp/abm/main.c +++ b/drivers/net/ethernet/netronome/nfp/abm/main.c @@ -38,6 +38,8 @@ #include <linux/netdevice.h> #include <linux/rcupdate.h> #include <linux/slab.h> +#include <net/pkt_cls.h> +#include <net/pkt_sched.h> #include "../nfpcore/nfp.h" #include "../nfpcore/nfp_cpp.h" @@ -55,6 +57,84 @@ static u32 nfp_abm_portid(enum nfp_repr_type rtype, unsigned int id) FIELD_PREP(NFP_ABM_PORTID_ID, id); } +static void +nfp_abm_red_destroy(struct net_device *netdev, struct nfp_abm_link *alink, + u32 handle) +{ + struct nfp_port *port = nfp_port_from_netdev(netdev); + + if (handle != alink->qdiscs[0].handle) + return; + + alink->qdiscs[0].handle = TC_H_UNSPEC; + port->tc_offload_cnt = 0; + nfp_abm_ctrl_set_all_q_lvls(alink, ~0); +} + +static int +nfp_abm_red_replace(struct net_device *netdev, struct nfp_abm_link *alink, + struct tc_red_qopt_offload *opt) +{ + struct nfp_port *port = nfp_port_from_netdev(netdev); + int err; + + if (opt->set.min != opt->set.max || !opt->set.is_ecn) { + nfp_warn(alink->abm->app->cpp, + "RED offload failed - unsupported parameters\n"); + err = -EINVAL; + goto err_destroy; + } + err = nfp_abm_ctrl_set_all_q_lvls(alink, opt->set.min); + if (err) + goto err_destroy; + + alink->qdiscs[0].handle = opt->handle; + port->tc_offload_cnt = 1; + + return 0; +err_destroy: + if (alink->qdiscs[0].handle != TC_H_UNSPEC) + nfp_abm_red_destroy(netdev, alink, alink->qdiscs[0].handle); + return err; +} + +static int +nfp_abm_setup_tc_red(struct net_device *netdev, struct nfp_abm_link *alink, + struct tc_red_qopt_offload *opt) +{ + if (opt->parent != TC_H_ROOT) + return -EOPNOTSUPP; + + switch (opt->command) { + case TC_RED_REPLACE: + return nfp_abm_red_replace(netdev, alink, opt); + case TC_RED_DESTROY: + nfp_abm_red_destroy(netdev, alink, opt->handle); + return 0; + default: + return -EOPNOTSUPP; + } +} + +static int +nfp_abm_setup_tc(struct nfp_app *app, struct net_device *netdev, + enum tc_setup_type type, void *type_data) +{ + struct nfp_repr *repr = netdev_priv(netdev); + struct nfp_port *port; + + port = nfp_port_from_netdev(netdev); + if (!port || port->type != NFP_PORT_PF_PORT) + return -EOPNOTSUPP; + + switch (type) { + case TC_SETUP_QDISC_RED: + return nfp_abm_setup_tc_red(netdev, repr->app_priv, type_data); + default: + return -EOPNOTSUPP; + } +} + static struct net_device *nfp_abm_repr_get(struct nfp_app *app, u32 port_id) { enum nfp_repr_type rtype; @@ -403,6 +483,8 @@ const struct nfp_app_type app_abm = { .vnic_alloc = nfp_abm_vnic_alloc, .vnic_free = nfp_abm_vnic_free, + .setup_tc = nfp_abm_setup_tc, + .eswitch_mode_get = nfp_abm_eswitch_mode_get, .eswitch_mode_set = nfp_abm_eswitch_mode_set, diff --git a/drivers/net/ethernet/netronome/nfp/abm/main.h b/drivers/net/ethernet/netronome/nfp/abm/main.h index 1ac651cdc140..979f98fb808b 100644 --- a/drivers/net/ethernet/netronome/nfp/abm/main.h +++ b/drivers/net/ethernet/netronome/nfp/abm/main.h @@ -58,18 +58,28 @@ struct nfp_abm { const struct nfp_rtsym *q_lvls; }; +/** + * struct nfp_red_qdisc - representation of single RED Qdisc + * @handle: handle of currently offloaded RED Qdisc + */ +struct nfp_red_qdisc { + u32 handle; +}; + /** * struct nfp_abm_link - port tuple of a ABM NIC * @abm: back pointer to nfp_abm * @vnic: data vNIC * @id: id of the data vNIC * @queue_base: id of base to host queue within PCIe (not QC idx) + * @qdiscs: array of qdiscs */ struct nfp_abm_link { struct nfp_abm *abm; struct nfp_net *vnic; unsigned int id; unsigned int queue_base; + struct nfp_red_qdisc qdiscs[1]; }; void nfp_abm_ctrl_read_params(struct nfp_abm_link *alink);
Offload simple RED configurations. For now support only DCTCP like scenarios where min and max are the same. Signed-off-by: Jakub Kicinski <jakub.kicinski@netronome.com> --- drivers/net/ethernet/netronome/nfp/abm/main.c | 82 +++++++++++++++++++ drivers/net/ethernet/netronome/nfp/abm/main.h | 10 +++ 2 files changed, 92 insertions(+)