diff mbox series

[net-next,05/14] nfp: abm: add simple RED offload

Message ID 20180526045338.10993-6-jakub.kicinski@netronome.com
State Accepted, archived
Delegated to: David Miller
Headers show
Series nfp: abm: RED/MQ qdisc offload | expand

Commit Message

Jakub Kicinski May 26, 2018, 4:53 a.m. UTC
Offload simple RED configurations.  For now support only DCTCP
like scenarios where min and max are the same.

Signed-off-by: Jakub Kicinski <jakub.kicinski@netronome.com>
---
 drivers/net/ethernet/netronome/nfp/abm/main.c | 82 +++++++++++++++++++
 drivers/net/ethernet/netronome/nfp/abm/main.h | 10 +++
 2 files changed, 92 insertions(+)

Comments

Nogah Frankel May 28, 2018, 3:49 p.m. UTC | #1
On 26-May-18 7:53 AM, Jakub Kicinski wrote:
> Offload simple RED configurations.  For now support only DCTCP
> like scenarios where min and max are the same.
> 
> Signed-off-by: Jakub Kicinski <jakub.kicinski@netronome.com>
> ---
>   drivers/net/ethernet/netronome/nfp/abm/main.c | 82 +++++++++++++++++++
>   drivers/net/ethernet/netronome/nfp/abm/main.h | 10 +++
>   2 files changed, 92 insertions(+)
> 
> diff --git a/drivers/net/ethernet/netronome/nfp/abm/main.c b/drivers/net/ethernet/netronome/nfp/abm/main.c
> index 28a18ac62040..22251d88c958 100644
> --- a/drivers/net/ethernet/netronome/nfp/abm/main.c
> +++ b/drivers/net/ethernet/netronome/nfp/abm/main.c
> @@ -38,6 +38,8 @@
>   #include <linux/netdevice.h>
>   #include <linux/rcupdate.h>
>   #include <linux/slab.h>
> +#include <net/pkt_cls.h>
> +#include <net/pkt_sched.h>
>   
>   #include "../nfpcore/nfp.h"
>   #include "../nfpcore/nfp_cpp.h"
> @@ -55,6 +57,84 @@ static u32 nfp_abm_portid(enum nfp_repr_type rtype, unsigned int id)
>   	       FIELD_PREP(NFP_ABM_PORTID_ID, id);
>   }
>   
> +static void
> +nfp_abm_red_destroy(struct net_device *netdev, struct nfp_abm_link *alink,
> +		    u32 handle)
> +{
> +	struct nfp_port *port = nfp_port_from_netdev(netdev);
> +
> +	if (handle != alink->qdiscs[0].handle)
> +		return;
> +
> +	alink->qdiscs[0].handle = TC_H_UNSPEC;
> +	port->tc_offload_cnt = 0;
> +	nfp_abm_ctrl_set_all_q_lvls(alink, ~0);
> +}
> +
> +static int
> +nfp_abm_red_replace(struct net_device *netdev, struct nfp_abm_link *alink,
> +		    struct tc_red_qopt_offload *opt)
> +{
> +	struct nfp_port *port = nfp_port_from_netdev(netdev);
> +	int err;
> +
> +	if (opt->set.min != opt->set.max || !opt->set.is_ecn) {

I am a bit worried about the min == max.
sch_red doesn't really support it. It will calculate incorrect delta 
value. (And that only if tc_red_eval_P in iproute2 won't reject it).
You might maybe use max = min+1,  because in real life it will probably 
act the same but without this problem.

Nogah Frankel
(from a new mail address)

> +		nfp_warn(alink->abm->app->cpp,
> +			 "RED offload failed - unsupported parameters\n");
> +		err = -EINVAL;
> +		goto err_destroy;
> +	}
> +	err = nfp_abm_ctrl_set_all_q_lvls(alink, opt->set.min);
> +	if (err)
> +		goto err_destroy;
> +
> +	alink->qdiscs[0].handle = opt->handle;
> +	port->tc_offload_cnt = 1;
> +
> +	return 0;
> +err_destroy:
> +	if (alink->qdiscs[0].handle != TC_H_UNSPEC)
> +		nfp_abm_red_destroy(netdev, alink, alink->qdiscs[0].handle);
> +	return err;
> +}
> +
> +static int
> +nfp_abm_setup_tc_red(struct net_device *netdev, struct nfp_abm_link *alink,
> +		     struct tc_red_qopt_offload *opt)
> +{
> +	if (opt->parent != TC_H_ROOT)
> +		return -EOPNOTSUPP;
> +
> +	switch (opt->command) {
> +	case TC_RED_REPLACE:
> +		return nfp_abm_red_replace(netdev, alink, opt);
> +	case TC_RED_DESTROY:
> +		nfp_abm_red_destroy(netdev, alink, opt->handle);
> +		return 0;
> +	default:
> +		return -EOPNOTSUPP;
> +	}
> +}
> +
> +static int
> +nfp_abm_setup_tc(struct nfp_app *app, struct net_device *netdev,
> +		 enum tc_setup_type type, void *type_data)
> +{
> +	struct nfp_repr *repr = netdev_priv(netdev);
> +	struct nfp_port *port;
> +
> +	port = nfp_port_from_netdev(netdev);
> +	if (!port || port->type != NFP_PORT_PF_PORT)
> +		return -EOPNOTSUPP;
> +
> +	switch (type) {
> +	case TC_SETUP_QDISC_RED:
> +		return nfp_abm_setup_tc_red(netdev, repr->app_priv, type_data);
> +	default:
> +		return -EOPNOTSUPP;
> +	}
> +}
> +
>   static struct net_device *nfp_abm_repr_get(struct nfp_app *app, u32 port_id)
>   {
>   	enum nfp_repr_type rtype;
> @@ -403,6 +483,8 @@ const struct nfp_app_type app_abm = {
>   	.vnic_alloc	= nfp_abm_vnic_alloc,
>   	.vnic_free	= nfp_abm_vnic_free,
>   
> +	.setup_tc	= nfp_abm_setup_tc,
> +
>   	.eswitch_mode_get	= nfp_abm_eswitch_mode_get,
>   	.eswitch_mode_set	= nfp_abm_eswitch_mode_set,
>   
> diff --git a/drivers/net/ethernet/netronome/nfp/abm/main.h b/drivers/net/ethernet/netronome/nfp/abm/main.h
> index 1ac651cdc140..979f98fb808b 100644
> --- a/drivers/net/ethernet/netronome/nfp/abm/main.h
> +++ b/drivers/net/ethernet/netronome/nfp/abm/main.h
> @@ -58,18 +58,28 @@ struct nfp_abm {
>   	const struct nfp_rtsym *q_lvls;
>   };
>   
> +/**
> + * struct nfp_red_qdisc - representation of single RED Qdisc
> + * @handle:	handle of currently offloaded RED Qdisc
> + */
> +struct nfp_red_qdisc {
> +	u32 handle;
> +};
> +
>   /**
>    * struct nfp_abm_link - port tuple of a ABM NIC
>    * @abm:	back pointer to nfp_abm
>    * @vnic:	data vNIC
>    * @id:		id of the data vNIC
>    * @queue_base:	id of base to host queue within PCIe (not QC idx)
> + * @qdiscs:	array of qdiscs
>    */
>   struct nfp_abm_link {
>   	struct nfp_abm *abm;
>   	struct nfp_net *vnic;
>   	unsigned int id;
>   	unsigned int queue_base;
> +	struct nfp_red_qdisc qdiscs[1];
>   };
>   
>   void nfp_abm_ctrl_read_params(struct nfp_abm_link *alink);
>
Jakub Kicinski May 28, 2018, 11:05 p.m. UTC | #2
Hi Nogah!

On Mon, 28 May 2018 18:49:51 +0300, Nogah Frankel wrote:
> > +static int
> > +nfp_abm_red_replace(struct net_device *netdev, struct nfp_abm_link *alink,
> > +		    struct tc_red_qopt_offload *opt)
> > +{
> > +	struct nfp_port *port = nfp_port_from_netdev(netdev);
> > +	int err;
> > +
> > +	if (opt->set.min != opt->set.max || !opt->set.is_ecn) {  
> 
> I am a bit worried about the min == max.
> sch_red doesn't really support it. It will calculate incorrect delta 
> value. (And that only if tc_red_eval_P in iproute2 won't reject it).
> You might maybe use max = min+1,  because in real life it will probably 
> act the same but without this problem.

I remember having a long think about this when I wrote the code.  
My conclusion was that the two would operate almost the same, and
setting min == max may be most obvious to the user.

If min + 1 == max sch_red would act probabilistically for qavg == min,
which is not what the card would do.

Userspace now does this:

tc_red_eval_P() {
	int i = qmax - qmin;
 
	if (!i)
		return 0;
	if (i < 0)
		return -1;
	...
}

And you've fixed delta to be treated as 1 to avoid division by 0 in
commit 5c472203421a ("net_sched: red: Avoid devision by zero"):

red_set_parms() {
	int delta = qth_max - qth_min;
	u32 max_p_delta;

	p->qth_min	= qth_min << Wlog;
	p->qth_max	= qth_max << Wlog;
	p->Wlog		= Wlog;
	p->Plog		= Plog;
	if (delta <= 0)
		delta = 1;
	p->qth_delta	= delta;
	...
}

So we should be safe.  Targets will match.  Probability adjustment for
adaptive should work correctly.  Which doesn't matter anyway, since we
will never use the probabilistic action...

> Nogah Frankel
> (from a new mail address)

Noted :)
Nogah Frankel May 29, 2018, 7:53 a.m. UTC | #3
On 29-May-18 2:05 AM, Jakub Kicinski wrote:

Hi Jakub,

> Hi Nogah!
> 
> On Mon, 28 May 2018 18:49:51 +0300, Nogah Frankel wrote:
>>> +static int
>>> +nfp_abm_red_replace(struct net_device *netdev, struct nfp_abm_link *alink,
>>> +		    struct tc_red_qopt_offload *opt)
>>> +{
>>> +	struct nfp_port *port = nfp_port_from_netdev(netdev);
>>> +	int err;
>>> +
>>> +	if (opt->set.min != opt->set.max || !opt->set.is_ecn) {
>>
>> I am a bit worried about the min == max.
>> sch_red doesn't really support it. It will calculate incorrect delta
>> value. (And that only if tc_red_eval_P in iproute2 won't reject it).
>> You might maybe use max = min+1,  because in real life it will probably
>> act the same but without this problem.
> 
> I remember having a long think about this when I wrote the code.
> My conclusion was that the two would operate almost the same, and
> setting min == max may be most obvious to the user.

I agree.

> 
> If min + 1 == max sch_red would act probabilistically for qavg == min,
> which is not what the card would do.
> 
> Userspace now does this:
> 
> tc_red_eval_P() {
> 	int i = qmax - qmin;
>   
> 	if (!i)
> 		return 0;
> 	if (i < 0)
> 		return -1;
> 	...
> }
> 
> And you've fixed delta to be treated as 1 to avoid division by 0 in
> commit 5c472203421a ("net_sched: red: Avoid devision by zero"):
> 
> red_set_parms() {
> 	int delta = qth_max - qth_min;
> 	u32 max_p_delta;
> 
> 	p->qth_min	= qth_min << Wlog;
> 	p->qth_max	= qth_max << Wlog;
> 	p->Wlog		= Wlog;
> 	p->Plog		= Plog;
> 	if (delta <= 0)
> 		delta = 1;
> 	p->qth_delta	= delta;
> 	...
> }

I changes it to avoid division by 0, but I wasn't sure that the delta 
value of 1 will be good, just that it is better then 0.

> 
> So we should be safe.  Targets will match.  Probability adjustment for
> adaptive should work correctly.  Which doesn't matter anyway, since we
> will never use the probabilistic action...

That makes sense, and it is a better way to set this setup (DCTCP, I 
guess?) than before.

Thanks

Nogah


> 
>> Nogah Frankel
>> (from a new mail address)
> 
> Noted :)
>
diff mbox series

Patch

diff --git a/drivers/net/ethernet/netronome/nfp/abm/main.c b/drivers/net/ethernet/netronome/nfp/abm/main.c
index 28a18ac62040..22251d88c958 100644
--- a/drivers/net/ethernet/netronome/nfp/abm/main.c
+++ b/drivers/net/ethernet/netronome/nfp/abm/main.c
@@ -38,6 +38,8 @@ 
 #include <linux/netdevice.h>
 #include <linux/rcupdate.h>
 #include <linux/slab.h>
+#include <net/pkt_cls.h>
+#include <net/pkt_sched.h>
 
 #include "../nfpcore/nfp.h"
 #include "../nfpcore/nfp_cpp.h"
@@ -55,6 +57,84 @@  static u32 nfp_abm_portid(enum nfp_repr_type rtype, unsigned int id)
 	       FIELD_PREP(NFP_ABM_PORTID_ID, id);
 }
 
+static void
+nfp_abm_red_destroy(struct net_device *netdev, struct nfp_abm_link *alink,
+		    u32 handle)
+{
+	struct nfp_port *port = nfp_port_from_netdev(netdev);
+
+	if (handle != alink->qdiscs[0].handle)
+		return;
+
+	alink->qdiscs[0].handle = TC_H_UNSPEC;
+	port->tc_offload_cnt = 0;
+	nfp_abm_ctrl_set_all_q_lvls(alink, ~0);
+}
+
+static int
+nfp_abm_red_replace(struct net_device *netdev, struct nfp_abm_link *alink,
+		    struct tc_red_qopt_offload *opt)
+{
+	struct nfp_port *port = nfp_port_from_netdev(netdev);
+	int err;
+
+	if (opt->set.min != opt->set.max || !opt->set.is_ecn) {
+		nfp_warn(alink->abm->app->cpp,
+			 "RED offload failed - unsupported parameters\n");
+		err = -EINVAL;
+		goto err_destroy;
+	}
+	err = nfp_abm_ctrl_set_all_q_lvls(alink, opt->set.min);
+	if (err)
+		goto err_destroy;
+
+	alink->qdiscs[0].handle = opt->handle;
+	port->tc_offload_cnt = 1;
+
+	return 0;
+err_destroy:
+	if (alink->qdiscs[0].handle != TC_H_UNSPEC)
+		nfp_abm_red_destroy(netdev, alink, alink->qdiscs[0].handle);
+	return err;
+}
+
+static int
+nfp_abm_setup_tc_red(struct net_device *netdev, struct nfp_abm_link *alink,
+		     struct tc_red_qopt_offload *opt)
+{
+	if (opt->parent != TC_H_ROOT)
+		return -EOPNOTSUPP;
+
+	switch (opt->command) {
+	case TC_RED_REPLACE:
+		return nfp_abm_red_replace(netdev, alink, opt);
+	case TC_RED_DESTROY:
+		nfp_abm_red_destroy(netdev, alink, opt->handle);
+		return 0;
+	default:
+		return -EOPNOTSUPP;
+	}
+}
+
+static int
+nfp_abm_setup_tc(struct nfp_app *app, struct net_device *netdev,
+		 enum tc_setup_type type, void *type_data)
+{
+	struct nfp_repr *repr = netdev_priv(netdev);
+	struct nfp_port *port;
+
+	port = nfp_port_from_netdev(netdev);
+	if (!port || port->type != NFP_PORT_PF_PORT)
+		return -EOPNOTSUPP;
+
+	switch (type) {
+	case TC_SETUP_QDISC_RED:
+		return nfp_abm_setup_tc_red(netdev, repr->app_priv, type_data);
+	default:
+		return -EOPNOTSUPP;
+	}
+}
+
 static struct net_device *nfp_abm_repr_get(struct nfp_app *app, u32 port_id)
 {
 	enum nfp_repr_type rtype;
@@ -403,6 +483,8 @@  const struct nfp_app_type app_abm = {
 	.vnic_alloc	= nfp_abm_vnic_alloc,
 	.vnic_free	= nfp_abm_vnic_free,
 
+	.setup_tc	= nfp_abm_setup_tc,
+
 	.eswitch_mode_get	= nfp_abm_eswitch_mode_get,
 	.eswitch_mode_set	= nfp_abm_eswitch_mode_set,
 
diff --git a/drivers/net/ethernet/netronome/nfp/abm/main.h b/drivers/net/ethernet/netronome/nfp/abm/main.h
index 1ac651cdc140..979f98fb808b 100644
--- a/drivers/net/ethernet/netronome/nfp/abm/main.h
+++ b/drivers/net/ethernet/netronome/nfp/abm/main.h
@@ -58,18 +58,28 @@  struct nfp_abm {
 	const struct nfp_rtsym *q_lvls;
 };
 
+/**
+ * struct nfp_red_qdisc - representation of single RED Qdisc
+ * @handle:	handle of currently offloaded RED Qdisc
+ */
+struct nfp_red_qdisc {
+	u32 handle;
+};
+
 /**
  * struct nfp_abm_link - port tuple of a ABM NIC
  * @abm:	back pointer to nfp_abm
  * @vnic:	data vNIC
  * @id:		id of the data vNIC
  * @queue_base:	id of base to host queue within PCIe (not QC idx)
+ * @qdiscs:	array of qdiscs
  */
 struct nfp_abm_link {
 	struct nfp_abm *abm;
 	struct nfp_net *vnic;
 	unsigned int id;
 	unsigned int queue_base;
+	struct nfp_red_qdisc qdiscs[1];
 };
 
 void nfp_abm_ctrl_read_params(struct nfp_abm_link *alink);