diff mbox series

[net-next,v2,2/6] net: sched: Allow extending set of supported RED flags

Message ID 20200311173356.38181-3-petrm@mellanox.com
State Changes Requested
Delegated to: David Miller
Headers show
Series RED: Introduce an ECN tail-dropping mode | expand

Commit Message

Petr Machata March 11, 2020, 5:33 p.m. UTC
The qdiscs RED, GRED, SFQ and CHOKE use different subsets of the same pool
of global RED flags. These are passed in tc_red_qopt.flags. However none of
these qdiscs validate the flag field, and just copy it over wholesale to
internal structures, and later dump it back. (An exception is GRED, which
does validate for VQs -- however not for the main setup.)

A broken userspace can therefore configure a qdisc with arbitrary
unsupported flags, and later expect to see the flags on qdisc dump. The
current ABI therefore allows storage of several bits of custom data to
qdisc instances of the types mentioned above. How many bits, depends on
which flags are meaningful for the qdisc in question. E.g. SFQ recognizes
flags ECN and HARDDROP, and the rest is not interpreted.

If SFQ ever needs to support ADAPTATIVE, it needs another way of doing it,
and at the same time it needs to retain the possibility to store 6 bits of
uninterpreted data. Likewise RED, which adds a new flag later in this
patchset.

To that end, this patch adds a new function, red_get_flags(), to split the
passed flags of RED-like qdiscs to flags and user bits, and to validate the
flags that are passed in. It further adds a new attribute, TCA_RED_FLAGS,
to pass arbitrary flags.

Signed-off-by: Petr Machata <petrm@mellanox.com>
---

Notes:
    v2:
    - This patch is new.

 include/net/red.h              | 25 +++++++++++++++++++++++++
 include/uapi/linux/pkt_sched.h | 16 ++++++++++++++++
 net/sched/sch_red.c            | 24 +++++++++++++++++++++---
 3 files changed, 62 insertions(+), 3 deletions(-)

Comments

Jakub Kicinski March 11, 2020, 10:09 p.m. UTC | #1
On Wed, 11 Mar 2020 19:33:52 +0200 Petr Machata wrote:
> diff --git a/include/net/red.h b/include/net/red.h
> index 9665582c4687..5718d2b25637 100644
> --- a/include/net/red.h
> +++ b/include/net/red.h
> @@ -179,6 +179,31 @@ static inline bool red_check_params(u32 qth_min, u32 qth_max, u8 Wlog)
>  	return true;
>  }
>  
> +static inline bool red_get_flags(unsigned char flags,
> +				 unsigned char historic_mask,
> +				 struct nlattr *flags_attr,
> +				 unsigned int supported_mask,
> +				 unsigned int *p_flags, unsigned char *p_userbits,
> +				 struct netlink_ext_ack *extack)
> +{
> +	if (flags && flags_attr) {
> +		NL_SET_ERR_MSG_MOD(extack, "flags should be passed either through qopt, or through a dedicated attribute");
> +		return false;
> +	}
> +
> +	*p_flags = flags & historic_mask;
> +	if (flags_attr)
> +		*p_flags |= nla_get_u32(flags_attr);

It's less error prone for callers not to modify the output parameters
until we're sure the call won't fail.

> +	if (*p_flags & ~supported_mask) {
> +		NL_SET_ERR_MSG_MOD(extack, "unsupported RED flags specified");
> +		return false;
> +	}
> +
> +	*p_userbits = flags & ~historic_mask;
> +	return true;
> +}
> +

> +#define TC_RED_HISTORIC_FLAGS (TC_RED_ECN | TC_RED_HARDDROP | TC_RED_ADAPTATIVE)
> +
>  struct tc_red_xstats {
>  	__u32           early;          /* Early drops */
>  	__u32           pdrop;          /* Drops due to queue limits */
> diff --git a/net/sched/sch_red.c b/net/sched/sch_red.c
> index 1695421333e3..61d7c5a61279 100644
> --- a/net/sched/sch_red.c
> +++ b/net/sched/sch_red.c
> @@ -35,7 +35,11 @@
>  
>  struct red_sched_data {
>  	u32			limit;		/* HARD maximal queue length */
> -	unsigned char		flags;
> +
> +	u32			flags;

Can we stick to uchar until the number of flags grows?

> +	/* Non-flags in tc_red_qopt.flags. */
> +	unsigned char		userbits;
> +
>  	struct timer_list	adapt_timer;
>  	struct Qdisc		*sch;
>  	struct red_parms	parms;
> @@ -44,6 +48,8 @@ struct red_sched_data {
>  	struct Qdisc		*qdisc;
>  };
>  
> +#define RED_SUPPORTED_FLAGS TC_RED_HISTORIC_FLAGS
> +
>  static inline int red_use_ecn(struct red_sched_data *q)
>  {
>  	return q->flags & TC_RED_ECN;
> @@ -186,6 +192,7 @@ static const struct nla_policy red_policy[TCA_RED_MAX + 1] = {
>  	[TCA_RED_PARMS]	= { .len = sizeof(struct tc_red_qopt) },
>  	[TCA_RED_STAB]	= { .len = RED_STAB_SIZE },
>  	[TCA_RED_MAX_P] = { .type = NLA_U32 },
> +	[TCA_RED_FLAGS] = { .type = NLA_U32 },

BITFIELD32? And then perhaps turn the define into a const validation
data?

Also policy needs a .strict_start_type now.

>  };
>  
>  static int red_change(struct Qdisc *sch, struct nlattr *opt,

> @@ -302,7 +317,8 @@ static int red_dump(struct Qdisc *sch, struct sk_buff *skb)
>  	struct nlattr *opts = NULL;
>  	struct tc_red_qopt opt = {
>  		.limit		= q->limit,
> -		.flags		= q->flags,
> +		.flags		= ((q->flags & TC_RED_HISTORIC_FLAGS) |
> +				   q->userbits),

nit: parens unnecessary

>  		.qth_min	= q->parms.qth_min >> q->parms.Wlog,
>  		.qth_max	= q->parms.qth_max >> q->parms.Wlog,
>  		.Wlog		= q->parms.Wlog,
> @@ -321,6 +337,8 @@ static int red_dump(struct Qdisc *sch, struct sk_buff *skb)
>  	if (nla_put(skb, TCA_RED_PARMS, sizeof(opt), &opt) ||
>  	    nla_put_u32(skb, TCA_RED_MAX_P, q->parms.max_P))
>  		goto nla_put_failure;
> +	if (q->flags & ~TC_RED_HISTORIC_FLAGS)
> +		nla_put_u32(skb, TCA_RED_FLAGS, q->flags);

Not 100% sure if conditional is needed, but please check the return
code.

>  	return nla_nest_end(skb, opts);
>  
>  nla_put_failure:
Petr Machata March 12, 2020, 12:12 a.m. UTC | #2
Jakub Kicinski <kuba@kernel.org> writes:

> On Wed, 11 Mar 2020 19:33:52 +0200 Petr Machata wrote:
>> diff --git a/include/net/red.h b/include/net/red.h
>> index 9665582c4687..5718d2b25637 100644
>> --- a/include/net/red.h
>> +++ b/include/net/red.h
>> @@ -179,6 +179,31 @@ static inline bool red_check_params(u32 qth_min, u32 qth_max, u8 Wlog)
>>  	return true;
>>  }
>>  
>> +static inline bool red_get_flags(unsigned char flags,
>> +				 unsigned char historic_mask,
>> +				 struct nlattr *flags_attr,
>> +				 unsigned int supported_mask,
>> +				 unsigned int *p_flags, unsigned char *p_userbits,
>> +				 struct netlink_ext_ack *extack)
>> +{
>> +	if (flags && flags_attr) {
>> +		NL_SET_ERR_MSG_MOD(extack, "flags should be passed either through qopt, or through a dedicated attribute");
>> +		return false;
>> +	}
>> +
>> +	*p_flags = flags & historic_mask;
>> +	if (flags_attr)
>> +		*p_flags |= nla_get_u32(flags_attr);
>
> It's less error prone for callers not to modify the output parameters
> until we're sure the call won't fail.

Ack.

>> +	if (*p_flags & ~supported_mask) {
>> +		NL_SET_ERR_MSG_MOD(extack, "unsupported RED flags specified");
>> +		return false;
>> +	}
>> +
>> +	*p_userbits = flags & ~historic_mask;
>> +	return true;
>> +}
>> +
>
>> +#define TC_RED_HISTORIC_FLAGS (TC_RED_ECN | TC_RED_HARDDROP | TC_RED_ADAPTATIVE)
>> +
>>  struct tc_red_xstats {
>>  	__u32           early;          /* Early drops */
>>  	__u32           pdrop;          /* Drops due to queue limits */
>> diff --git a/net/sched/sch_red.c b/net/sched/sch_red.c
>> index 1695421333e3..61d7c5a61279 100644
>> --- a/net/sched/sch_red.c
>> +++ b/net/sched/sch_red.c
>> @@ -35,7 +35,11 @@
>>  
>>  struct red_sched_data {
>>  	u32			limit;		/* HARD maximal queue length */
>> -	unsigned char		flags;
>> +
>> +	u32			flags;
>
> Can we stick to uchar until the number of flags grows?

No problem, but the attribute is u32.

>> +	/* Non-flags in tc_red_qopt.flags. */
>> +	unsigned char		userbits;
>> +
>>  	struct timer_list	adapt_timer;
>>  	struct Qdisc		*sch;
>>  	struct red_parms	parms;
>> @@ -44,6 +48,8 @@ struct red_sched_data {
>>  	struct Qdisc		*qdisc;
>>  };
>>  
>> +#define RED_SUPPORTED_FLAGS TC_RED_HISTORIC_FLAGS
>> +
>>  static inline int red_use_ecn(struct red_sched_data *q)
>>  {
>>  	return q->flags & TC_RED_ECN;
>> @@ -186,6 +192,7 @@ static const struct nla_policy red_policy[TCA_RED_MAX + 1] = {
>>  	[TCA_RED_PARMS]	= { .len = sizeof(struct tc_red_qopt) },
>>  	[TCA_RED_STAB]	= { .len = RED_STAB_SIZE },
>>  	[TCA_RED_MAX_P] = { .type = NLA_U32 },
>> +	[TCA_RED_FLAGS] = { .type = NLA_U32 },
>
> BITFIELD32? And then perhaps turn the define into a const validation
> data?

Nice.

> Also policy needs a .strict_start_type now.

OK.

>>  };
>>  
>>  static int red_change(struct Qdisc *sch, struct nlattr *opt,
>
>> @@ -302,7 +317,8 @@ static int red_dump(struct Qdisc *sch, struct sk_buff *skb)
>>  	struct nlattr *opts = NULL;
>>  	struct tc_red_qopt opt = {
>>  		.limit		= q->limit,
>> -		.flags		= q->flags,
>> +		.flags		= ((q->flags & TC_RED_HISTORIC_FLAGS) |
>> +				   q->userbits),
>
> nit: parens unnecessary

I'll drop them.

>>  		.qth_min	= q->parms.qth_min >> q->parms.Wlog,
>>  		.qth_max	= q->parms.qth_max >> q->parms.Wlog,
>>  		.Wlog		= q->parms.Wlog,
>> @@ -321,6 +337,8 @@ static int red_dump(struct Qdisc *sch, struct sk_buff *skb)
>>  	if (nla_put(skb, TCA_RED_PARMS, sizeof(opt), &opt) ||
>>  	    nla_put_u32(skb, TCA_RED_MAX_P, q->parms.max_P))
>>  		goto nla_put_failure;
>> +	if (q->flags & ~TC_RED_HISTORIC_FLAGS)
>> +		nla_put_u32(skb, TCA_RED_FLAGS, q->flags);
>
> Not 100% sure if conditional is needed, but please check the return
> code.

I didn't want to bother old-style clients with the new attribute.

I'll add the check.

>>  	return nla_nest_end(skb, opts);
>>  
>>  nla_put_failure:
diff mbox series

Patch

diff --git a/include/net/red.h b/include/net/red.h
index 9665582c4687..5718d2b25637 100644
--- a/include/net/red.h
+++ b/include/net/red.h
@@ -179,6 +179,31 @@  static inline bool red_check_params(u32 qth_min, u32 qth_max, u8 Wlog)
 	return true;
 }
 
+static inline bool red_get_flags(unsigned char flags,
+				 unsigned char historic_mask,
+				 struct nlattr *flags_attr,
+				 unsigned int supported_mask,
+				 unsigned int *p_flags, unsigned char *p_userbits,
+				 struct netlink_ext_ack *extack)
+{
+	if (flags && flags_attr) {
+		NL_SET_ERR_MSG_MOD(extack, "flags should be passed either through qopt, or through a dedicated attribute");
+		return false;
+	}
+
+	*p_flags = flags & historic_mask;
+	if (flags_attr)
+		*p_flags |= nla_get_u32(flags_attr);
+
+	if (*p_flags & ~supported_mask) {
+		NL_SET_ERR_MSG_MOD(extack, "unsupported RED flags specified");
+		return false;
+	}
+
+	*p_userbits = flags & ~historic_mask;
+	return true;
+}
+
 static inline void red_set_parms(struct red_parms *p,
 				 u32 qth_min, u32 qth_max, u8 Wlog, u8 Plog,
 				 u8 Scell_log, u8 *stab, u32 max_P)
diff --git a/include/uapi/linux/pkt_sched.h b/include/uapi/linux/pkt_sched.h
index bbe791b24168..277df546e1a9 100644
--- a/include/uapi/linux/pkt_sched.h
+++ b/include/uapi/linux/pkt_sched.h
@@ -256,6 +256,7 @@  enum {
 	TCA_RED_PARMS,
 	TCA_RED_STAB,
 	TCA_RED_MAX_P,
+	TCA_RED_FLAGS,		/* u32 */
 	__TCA_RED_MAX,
 };
 
@@ -268,12 +269,27 @@  struct tc_red_qopt {
 	unsigned char   Wlog;		/* log(W)		*/
 	unsigned char   Plog;		/* log(P_max/(qth_max-qth_min))	*/
 	unsigned char   Scell_log;	/* cell size for idle damping */
+
+	/* This field can be used for flags that a RED-like qdisc has
+	 * historically supported. E.g. when configuring RED, it can be used for
+	 * ECN, HARDDROP and ADAPTATIVE. For SFQ it can be used for ECN,
+	 * HARDDROP. Etc. Because this field has not been validated, and is
+	 * copied back on dump, any bits besides those to which a given qdisc
+	 * has assigned a historical meaning need to be considered for free use
+	 * by userspace tools.
+	 *
+	 * Any further flags need to be passed differently, e.g. through an
+	 * attribute (such as TCA_RED_FLAGS above). Such attribute should allow
+	 * passing both recent and historic flags in one value.
+	 */
 	unsigned char	flags;
 #define TC_RED_ECN		1
 #define TC_RED_HARDDROP		2
 #define TC_RED_ADAPTATIVE	4
 };
 
+#define TC_RED_HISTORIC_FLAGS (TC_RED_ECN | TC_RED_HARDDROP | TC_RED_ADAPTATIVE)
+
 struct tc_red_xstats {
 	__u32           early;          /* Early drops */
 	__u32           pdrop;          /* Drops due to queue limits */
diff --git a/net/sched/sch_red.c b/net/sched/sch_red.c
index 1695421333e3..61d7c5a61279 100644
--- a/net/sched/sch_red.c
+++ b/net/sched/sch_red.c
@@ -35,7 +35,11 @@ 
 
 struct red_sched_data {
 	u32			limit;		/* HARD maximal queue length */
-	unsigned char		flags;
+
+	u32			flags;
+	/* Non-flags in tc_red_qopt.flags. */
+	unsigned char		userbits;
+
 	struct timer_list	adapt_timer;
 	struct Qdisc		*sch;
 	struct red_parms	parms;
@@ -44,6 +48,8 @@  struct red_sched_data {
 	struct Qdisc		*qdisc;
 };
 
+#define RED_SUPPORTED_FLAGS TC_RED_HISTORIC_FLAGS
+
 static inline int red_use_ecn(struct red_sched_data *q)
 {
 	return q->flags & TC_RED_ECN;
@@ -186,6 +192,7 @@  static const struct nla_policy red_policy[TCA_RED_MAX + 1] = {
 	[TCA_RED_PARMS]	= { .len = sizeof(struct tc_red_qopt) },
 	[TCA_RED_STAB]	= { .len = RED_STAB_SIZE },
 	[TCA_RED_MAX_P] = { .type = NLA_U32 },
+	[TCA_RED_FLAGS] = { .type = NLA_U32 },
 };
 
 static int red_change(struct Qdisc *sch, struct nlattr *opt,
@@ -195,6 +202,8 @@  static int red_change(struct Qdisc *sch, struct nlattr *opt,
 	struct red_sched_data *q = qdisc_priv(sch);
 	struct nlattr *tb[TCA_RED_MAX + 1];
 	struct tc_red_qopt *ctl;
+	unsigned char userbits;
+	u32 flags;
 	int err;
 	u32 max_P;
 
@@ -216,6 +225,11 @@  static int red_change(struct Qdisc *sch, struct nlattr *opt,
 	if (!red_check_params(ctl->qth_min, ctl->qth_max, ctl->Wlog))
 		return -EINVAL;
 
+	if (!red_get_flags(ctl->flags, TC_RED_HISTORIC_FLAGS,
+			   tb[TCA_RED_FLAGS], RED_SUPPORTED_FLAGS,
+			   &flags, &userbits, extack))
+		return -EINVAL;
+
 	if (ctl->limit > 0) {
 		child = fifo_create_dflt(sch, &bfifo_qdisc_ops, ctl->limit,
 					 extack);
@@ -227,7 +241,8 @@  static int red_change(struct Qdisc *sch, struct nlattr *opt,
 	}
 
 	sch_tree_lock(sch);
-	q->flags = ctl->flags;
+	q->flags = flags;
+	q->userbits = userbits;
 	q->limit = ctl->limit;
 	if (child) {
 		qdisc_tree_flush_backlog(q->qdisc);
@@ -302,7 +317,8 @@  static int red_dump(struct Qdisc *sch, struct sk_buff *skb)
 	struct nlattr *opts = NULL;
 	struct tc_red_qopt opt = {
 		.limit		= q->limit,
-		.flags		= q->flags,
+		.flags		= ((q->flags & TC_RED_HISTORIC_FLAGS) |
+				   q->userbits),
 		.qth_min	= q->parms.qth_min >> q->parms.Wlog,
 		.qth_max	= q->parms.qth_max >> q->parms.Wlog,
 		.Wlog		= q->parms.Wlog,
@@ -321,6 +337,8 @@  static int red_dump(struct Qdisc *sch, struct sk_buff *skb)
 	if (nla_put(skb, TCA_RED_PARMS, sizeof(opt), &opt) ||
 	    nla_put_u32(skb, TCA_RED_MAX_P, q->parms.max_P))
 		goto nla_put_failure;
+	if (q->flags & ~TC_RED_HISTORIC_FLAGS)
+		nla_put_u32(skb, TCA_RED_FLAGS, q->flags);
 	return nla_nest_end(skb, opts);
 
 nla_put_failure: