diff mbox

[net-next,v11,3/4] net sched actions: dump more than TCA_ACT_MAX_PRIO actions per batch

Message ID 1500860146-26970-4-git-send-email-jhs@emojatatu.com
State Changes Requested, archived
Delegated to: David Miller
Headers show

Commit Message

Jamal Hadi Salim July 24, 2017, 1:35 a.m. UTC
From: Jamal Hadi Salim <jhs@mojatatu.com>

When you dump hundreds of thousands of actions, getting only 32 per
dump batch even when the socket buffer and memory allocations allow
is inefficient.

With this change, the user will get as many as possibly fitting
within the given constraints available to the kernel.

The top level action TLV space is extended. An attribute
TCA_ROOT_FLAGS is used to carry flags; flag TCA_FLAG_LARGE_DUMP_ON
is set by the user indicating the user is capable of processing
these large dumps. Older user space which doesnt set this flag
doesnt get the large (than 32) batches.
The kernel uses the TCA_ROOT_COUNT attribute to tell the user how many
actions are put in a single batch. As such user space app knows how long
to iterate (independent of the type of action being dumped)
instead of hardcoded maximum of 32 thus maintaining backward compat.

Some results dumping 1.5M actions below:
first an unpatched tc which doesnt understand these features...

prompt$ time -p tc actions ls action gact | grep index | wc -l
1500000
real 1388.43
user 2.07
sys 1386.79

Now lets see a patched tc which sets the correct flags when requesting
a dump:

prompt$ time -p updatedtc actions ls action gact | grep index | wc -l
1500000
real 178.13
user 2.02
sys 176.96

That is about 8x performance improvement for tc app which sets its
receive buffer to about 32K.

Signed-off-by: Jamal Hadi Salim <jhs@mojatatu.com>
---
 include/net/netlink.h          | 12 +++++++++++
 include/uapi/linux/rtnetlink.h | 22 +++++++++++++++++--
 net/sched/act_api.c            | 48 +++++++++++++++++++++++++++++++++---------
 3 files changed, 70 insertions(+), 12 deletions(-)

Comments

Jiri Pirko July 24, 2017, 11:27 a.m. UTC | #1
Mon, Jul 24, 2017 at 03:35:45AM CEST, jhs@mojatatu.com wrote:
>From: Jamal Hadi Salim <jhs@mojatatu.com>
>
>When you dump hundreds of thousands of actions, getting only 32 per
>dump batch even when the socket buffer and memory allocations allow
>is inefficient.
>
>With this change, the user will get as many as possibly fitting
>within the given constraints available to the kernel.
>
>The top level action TLV space is extended. An attribute
>TCA_ROOT_FLAGS is used to carry flags; flag TCA_FLAG_LARGE_DUMP_ON
>is set by the user indicating the user is capable of processing
>these large dumps. Older user space which doesnt set this flag
>doesnt get the large (than 32) batches.
>The kernel uses the TCA_ROOT_COUNT attribute to tell the user how many
>actions are put in a single batch. As such user space app knows how long
>to iterate (independent of the type of action being dumped)
>instead of hardcoded maximum of 32 thus maintaining backward compat.
>
>Some results dumping 1.5M actions below:
>first an unpatched tc which doesnt understand these features...
>
>prompt$ time -p tc actions ls action gact | grep index | wc -l
>1500000
>real 1388.43
>user 2.07
>sys 1386.79
>
>Now lets see a patched tc which sets the correct flags when requesting
>a dump:
>
>prompt$ time -p updatedtc actions ls action gact | grep index | wc -l
>1500000
>real 178.13
>user 2.02
>sys 176.96
>
>That is about 8x performance improvement for tc app which sets its
>receive buffer to about 32K.
>
>Signed-off-by: Jamal Hadi Salim <jhs@mojatatu.com>
>---
> include/net/netlink.h          | 12 +++++++++++
> include/uapi/linux/rtnetlink.h | 22 +++++++++++++++++--
> net/sched/act_api.c            | 48 +++++++++++++++++++++++++++++++++---------
> 3 files changed, 70 insertions(+), 12 deletions(-)
>
>diff --git a/include/net/netlink.h b/include/net/netlink.h
>index e33d1fb..87c0b15 100644
>--- a/include/net/netlink.h
>+++ b/include/net/netlink.h
>@@ -1207,6 +1207,18 @@ static inline struct in6_addr nla_get_in6_addr(const struct nlattr *nla)
> }
> 
> /**
>+ * nla_get_bitfield_32 - return payload of 32 bitfield attribute
>+ * @nla: nla_bitfield_32 attribute
>+ */
>+static inline struct nla_bitfield_32 nla_get_bitfield_32(const struct nlattr *nla)
>+{
>+	struct nla_bitfield_32 tmp;
>+
>+	nla_memcpy(&tmp, nla, sizeof(tmp));
>+	return tmp;
>+}

This helper should be part of the previous patch.


>+
>+/**
>  * nla_memdup - duplicate attribute memory (kmemdup)
>  * @src: netlink attribute to duplicate from
>  * @gfp: GFP mask
>diff --git a/include/uapi/linux/rtnetlink.h b/include/uapi/linux/rtnetlink.h
>index d148505..bfa80a6 100644
>--- a/include/uapi/linux/rtnetlink.h
>+++ b/include/uapi/linux/rtnetlink.h
>@@ -683,10 +683,28 @@ struct tcamsg {
> 	unsigned char	tca__pad1;
> 	unsigned short	tca__pad2;
> };
>+
>+enum {
>+	TCA_ROOT_UNSPEC,
>+	TCA_ROOT_TAB,
>+#define TCA_ACT_TAB TCA_ROOT_TAB
>+#define TCAA_MAX TCA_ROOT_TAB
>+	TCA_ROOT_FLAGS,
>+	TCA_ROOT_COUNT,
>+	__TCA_ROOT_MAX,
>+#define	TCA_ROOT_MAX (__TCA_ROOT_MAX - 1)
>+};
>+
> #define TA_RTA(r)  ((struct rtattr*)(((char*)(r)) + NLMSG_ALIGN(sizeof(struct tcamsg))))
> #define TA_PAYLOAD(n) NLMSG_PAYLOAD(n,sizeof(struct tcamsg))
>-#define TCA_ACT_TAB 1 /* attr type must be >=1 */	
>-#define TCAA_MAX 1
>+/* tcamsg flags stored in attribute TCA_ROOT_FLAGS
>+ *
>+ * TCA_FLAG_LARGE_DUMP_ON user->kernel to request for larger than TCA_ACT_MAX_PRIO
>+ * actions in a dump. All dump responses will contain the number of actions
>+ * being dumped stored in for user app's consumption in TCA_ROOT_COUNT
>+ *
>+ */
>+#define TCA_FLAG_LARGE_DUMP_ON		(1 << 0)
> 
> /* New extended info filters for IFLA_EXT_MASK */
> #define RTEXT_FILTER_VF		(1 << 0)
>diff --git a/net/sched/act_api.c b/net/sched/act_api.c
>index 848370e..15d6c46 100644
>--- a/net/sched/act_api.c
>+++ b/net/sched/act_api.c
>@@ -110,6 +110,7 @@ static int tcf_dump_walker(struct tcf_hashinfo *hinfo, struct sk_buff *skb,
> 			   struct netlink_callback *cb)
> {
> 	int err = 0, index = -1, i = 0, s_i = 0, n_i = 0;
>+	u32 act_flags = cb->args[2];
> 	struct nlattr *nest;
> 
> 	spin_lock_bh(&hinfo->lock);
>@@ -138,14 +139,18 @@ static int tcf_dump_walker(struct tcf_hashinfo *hinfo, struct sk_buff *skb,
> 			}
> 			nla_nest_end(skb, nest);
> 			n_i++;
>-			if (n_i >= TCA_ACT_MAX_PRIO)
>+			if (!(act_flags & TCA_FLAG_LARGE_DUMP_ON) &&
>+			    n_i >= TCA_ACT_MAX_PRIO)
> 				goto done;
> 		}
> 	}
> done:
> 	spin_unlock_bh(&hinfo->lock);
>-	if (n_i)
>+	if (n_i) {
> 		cb->args[0] += n_i;
>+		if (act_flags & TCA_FLAG_LARGE_DUMP_ON)
>+			cb->args[1] = n_i;
>+	}
> 	return n_i;
> 
> nla_put_failure:
>@@ -1068,11 +1073,17 @@ static int tcf_action_add(struct net *net, struct nlattr *nla,
> 	return tcf_add_notify(net, n, &actions, portid);
> }
> 
>+static u32 tcaa_root_flags_allowed = TCA_FLAG_LARGE_DUMP_ON;
>+static const struct nla_policy tcaa_policy[TCA_ROOT_MAX + 1] = {
>+	[TCA_ROOT_FLAGS] = { .type = NLA_BITFIELD_32,
>+			     .validation_data = &tcaa_root_flags_allowed },
>+};
>+
> static int tc_ctl_action(struct sk_buff *skb, struct nlmsghdr *n,
> 			 struct netlink_ext_ack *extack)
> {
> 	struct net *net = sock_net(skb->sk);
>-	struct nlattr *tca[TCAA_MAX + 1];
>+	struct nlattr *tca[TCA_ROOT_MAX + 1];
> 	u32 portid = skb ? NETLINK_CB(skb).portid : 0;
> 	int ret = 0, ovr = 0;
> 
>@@ -1080,7 +1091,7 @@ static int tc_ctl_action(struct sk_buff *skb, struct nlmsghdr *n,
> 	    !netlink_capable(skb, CAP_NET_ADMIN))
> 		return -EPERM;
> 
>-	ret = nlmsg_parse(n, sizeof(struct tcamsg), tca, TCAA_MAX, NULL,
>+	ret = nlmsg_parse(n, sizeof(struct tcamsg), tca, TCA_ROOT_MAX, NULL,
> 			  extack);
> 	if (ret < 0)
> 		return ret;
>@@ -1121,16 +1132,12 @@ static int tc_ctl_action(struct sk_buff *skb, struct nlmsghdr *n,
> 	return ret;
> }
> 
>-static struct nlattr *find_dump_kind(const struct nlmsghdr *n)
>+static struct nlattr *find_dump_kind(struct nlattr **nla)
> {
> 	struct nlattr *tb1, *tb2[TCA_ACT_MAX + 1];
> 	struct nlattr *tb[TCA_ACT_MAX_PRIO + 1];
>-	struct nlattr *nla[TCAA_MAX + 1];
> 	struct nlattr *kind;
> 
>-	if (nlmsg_parse(n, sizeof(struct tcamsg), nla, TCAA_MAX,
>-			NULL, NULL) < 0)
>-		return NULL;
> 	tb1 = nla[TCA_ACT_TAB];
> 	if (tb1 == NULL)
> 		return NULL;
>@@ -1157,8 +1164,18 @@ static int tc_dump_action(struct sk_buff *skb, struct netlink_callback *cb)
> 	struct tc_action_ops *a_o;
> 	int ret = 0;
> 	struct tcamsg *t = (struct tcamsg *) nlmsg_data(cb->nlh);
>-	struct nlattr *kind = find_dump_kind(cb->nlh);
>+	struct nla_bitfield_32 fb;
>+	struct nlattr *count_attr = NULL;
>+	struct nlattr *tb[TCA_ROOT_MAX + 1];
>+	struct nlattr *kind = NULL;

Reverse christmas tree :D


>+	u32 act_count = 0;
>+
>+	ret = nlmsg_parse(cb->nlh, sizeof(struct tcamsg), tb, TCA_ROOT_MAX,
>+			  tcaa_policy, NULL);
>+	if (ret < 0)
>+		return ret;
> 
>+	kind = find_dump_kind(tb);
> 	if (kind == NULL) {
> 		pr_info("tc_dump_action: action bad kind\n");
> 		return 0;
>@@ -1168,14 +1185,22 @@ static int tc_dump_action(struct sk_buff *skb, struct netlink_callback *cb)
> 	if (a_o == NULL)
> 		return 0;
> 
>+	if (tb[TCA_ROOT_FLAGS])
>+		fb = nla_get_bitfield_32(tb[TCA_ROOT_FLAGS]);

fb? bf? nbf? Please make this synced within the patchset.


Don't you need to mask value with selector? In fact, I think that
nla_get_bitfield_32 could just return u32 which would be (value&selector).
The validation takes care of unsupported bits.


>+
> 	nlh = nlmsg_put(skb, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq,
> 			cb->nlh->nlmsg_type, sizeof(*t), 0);
> 	if (!nlh)
> 		goto out_module_put;
>+
>+	cb->args[2] = fb.nla_value;
> 	t = nlmsg_data(nlh);
> 	t->tca_family = AF_UNSPEC;
> 	t->tca__pad1 = 0;
> 	t->tca__pad2 = 0;
>+	count_attr = nla_reserve(skb, TCA_ROOT_COUNT, sizeof(u32));
>+	if (!count_attr)
>+		goto out_module_put;
> 
> 	nest = nla_nest_start(skb, TCA_ACT_TAB);
> 	if (nest == NULL)
>@@ -1188,6 +1213,9 @@ static int tc_dump_action(struct sk_buff *skb, struct netlink_callback *cb)
> 	if (ret > 0) {
> 		nla_nest_end(skb, nest);
> 		ret = skb->len;
>+		act_count = cb->args[1];
>+		memcpy(nla_data(count_attr), &act_count, sizeof(u32));
>+		cb->args[1] = 0;
> 	} else
> 		nlmsg_trim(skb, b);
> 
>-- 
>1.9.1
>
Jamal Hadi Salim July 25, 2017, 11:22 a.m. UTC | #2
On 17-07-24 07:27 AM, Jiri Pirko wrote:
> Mon, Jul 24, 2017 at 03:35:45AM CEST, jhs@mojatatu.com wrote:
>> From: Jamal Hadi Salim <jhs@mojatatu.com>
[..]
> 
> This helper should be part of the previous patch.
> 

Will do next update.
> 

 >> @@ -1157,8 +1164,18 @@ static int tc_dump_action(struct sk_buff 
*skb, struct netlink_callback *cb)
 >> 	struct tc_action_ops *a_o;
 >> 	int ret = 0;
 >> 	struct tcamsg *t = (struct tcamsg *) nlmsg_data(cb->nlh);
 >> -	struct nlattr *kind = find_dump_kind(cb->nlh);
 >> +	struct nla_bitfield_32 fb;
 >> +	struct nlattr *count_attr = NULL;
 >> +	struct nlattr *tb[TCA_ROOT_MAX + 1];
 >> +	struct nlattr *kind = NULL;
 >
 > Reverse christmas tree :D
 >

There were already 2 christmas trees in place;->
I will re-arrange this.

>>
>> +	if (tb[TCA_ROOT_FLAGS])
>> +		fb = nla_get_bitfield_32(tb[TCA_ROOT_FLAGS]);
> 
> fb? bf? nbf? Please make this synced within the patchset.
> 
>

Ok, what do you like best? ;->

> Don't you need to mask value with selector? In fact, I think that
> nla_get_bitfield_32 could just return u32 which would be (value&selector).
> The validation takes care of unsupported bits.

For my use case I dont need any of the above since I dont need to
unset things. In other use cases you will need both selector and
value in case someone wants a bit to be set to 0.
Infact I think i will rename that helper to "nla_get_bitvalue_32"
to be more precise.

cheers,
jamal
Jiri Pirko July 25, 2017, 11:33 a.m. UTC | #3
Tue, Jul 25, 2017 at 01:22:44PM CEST, jhs@mojatatu.com wrote:
>On 17-07-24 07:27 AM, Jiri Pirko wrote:
>> Mon, Jul 24, 2017 at 03:35:45AM CEST, jhs@mojatatu.com wrote:
>> > From: Jamal Hadi Salim <jhs@mojatatu.com>
>[..]
>> 
>> This helper should be part of the previous patch.
>> 
>
>Will do next update.
>> 
>
>>> @@ -1157,8 +1164,18 @@ static int tc_dump_action(struct sk_buff *skb,
>struct netlink_callback *cb)
>>> 	struct tc_action_ops *a_o;
>>> 	int ret = 0;
>>> 	struct tcamsg *t = (struct tcamsg *) nlmsg_data(cb->nlh);
>>> -	struct nlattr *kind = find_dump_kind(cb->nlh);
>>> +	struct nla_bitfield_32 fb;
>>> +	struct nlattr *count_attr = NULL;
>>> +	struct nlattr *tb[TCA_ROOT_MAX + 1];
>>> +	struct nlattr *kind = NULL;
>>
>> Reverse christmas tree :D
>>
>
>There were already 2 christmas trees in place;->
>I will re-arrange this.
>
>> > 
>> > +	if (tb[TCA_ROOT_FLAGS])
>> > +		fb = nla_get_bitfield_32(tb[TCA_ROOT_FLAGS]);
>> 
>> fb? bf? nbf? Please make this synced within the patchset.
>> 
>> 
>
>Ok, what do you like best? ;->

"bf"


>
>> Don't you need to mask value with selector? In fact, I think that
>> nla_get_bitfield_32 could just return u32 which would be (value&selector).
>> The validation takes care of unsupported bits.
>
>For my use case I dont need any of the above since I dont need to
>unset things. In other use cases you will need both selector and
>value in case someone wants a bit to be set to 0.
>Infact I think i will rename that helper to "nla_get_bitvalue_32"
>to be more precise.

The getter should contain name of the type, so "nla_get_bitfield32_val"
is much better.

What if I pass val 0x1 and selector 0x0 from userspace. I don't have the
bit selected, so you should not process it in kernel, no?


>
>cheers,
>jamal
Jamal Hadi Salim July 25, 2017, 12:34 p.m. UTC | #4
On 17-07-25 07:33 AM, Jiri Pirko wrote:
> Tue, Jul 25, 2017 at 01:22:44PM CEST, jhs@mojatatu.com wrote:

>>> fb? bf? nbf? Please make this synced within the patchset.
>>>
>>>
>>
>> Ok, what do you like best? ;->
> 
> "bf"
> 

Ok.

> 
>>
>>> Don't you need to mask value with selector? In fact, I think that
>>> nla_get_bitfield_32 could just return u32 which would be (value&selector).
>>> The validation takes care of unsupported bits.
>>
>> For my use case I dont need any of the above since I dont need to
>> unset things. In other use cases you will need both selector and
>> value in case someone wants a bit to be set to 0.
>> Infact I think i will rename that helper to "nla_get_bitvalue_32"
>> to be more precise.
> 
> The getter should contain name of the type, so "nla_get_bitfield32_val"
> is much better.
> 

Actually I mispoke. I was returning the struct not the value. So
nla_get_bitfield32() is a better name.

> What if I pass val 0x1 and selector 0x0 from userspace. I don't have the
> bit selected, so you should not process it in kernel, no?
>

Yes, valid point. I am not sure - should we reject?

cheers,
jamal
Jiri Pirko July 25, 2017, 12:37 p.m. UTC | #5
Tue, Jul 25, 2017 at 02:34:58PM CEST, jhs@mojatatu.com wrote:
>On 17-07-25 07:33 AM, Jiri Pirko wrote:
>> Tue, Jul 25, 2017 at 01:22:44PM CEST, jhs@mojatatu.com wrote:
>
>> > > fb? bf? nbf? Please make this synced within the patchset.
>> > > 
>> > > 
>> > 
>> > Ok, what do you like best? ;->
>> 
>> "bf"
>> 
>
>Ok.
>
>> 
>> > 
>> > > Don't you need to mask value with selector? In fact, I think that
>> > > nla_get_bitfield_32 could just return u32 which would be (value&selector).
>> > > The validation takes care of unsupported bits.
>> > 
>> > For my use case I dont need any of the above since I dont need to
>> > unset things. In other use cases you will need both selector and
>> > value in case someone wants a bit to be set to 0.
>> > Infact I think i will rename that helper to "nla_get_bitvalue_32"
>> > to be more precise.
>> 
>> The getter should contain name of the type, so "nla_get_bitfield32_val"
>> is much better.
>> 
>
>Actually I mispoke. I was returning the struct not the value. So
>nla_get_bitfield32() is a better name.

ack


>
>> What if I pass val 0x1 and selector 0x0 from userspace. I don't have the
>> bit selected, so you should not process it in kernel, no?
>> 
>
>Yes, valid point. I am not sure - should we reject?

I think that the validation might check this and reject. Makes sense to
me.
Jamal Hadi Salim July 28, 2017, 1:41 p.m. UTC | #6
On 17-07-25 08:37 AM, Jiri Pirko wrote:
> Tue, Jul 25, 2017 at 02:34:58PM CEST, jhs@mojatatu.com wrote:
>> On 17-07-25 07:33 AM, Jiri Pirko wrote:
>>> Tue, Jul 25, 2017 at 01:22:44PM CEST, jhs@mojatatu.com wrote:

[..]
>>> What if I pass val 0x1 and selector 0x0 from userspace. I don't have the
>>> bit selected, so you should not process it in kernel, no?
>>>
>>
>> Yes, valid point. I am not sure - should we reject?
> 
> I think that the validation might check this and reject. Makes sense to
> me.
> 

How does this look? I havent tested it but covers all angles
I can think of.

static int validate_nla_bitfield32(const struct nlattr *nla,
                                     void *valid_flags_allowed)
{
         const struct nla_bitfield32 *bf = nla_data(nla);
         u32 *valid_flags_mask = valid_flags_allowed;

         if (!valid_flags_allowed)
                 return -EINVAL;
         /*disallow invalid selector */
         if ((bf->selector & valid_flags_allowed) >*valid_flags_allowed)
                 return -EINVAL;
         /*disallow invalid bit values */
         if (bf->value & ~*valid_flags_mask)
                 return -EINVAL;
         /*disallow valid bit values that are not selected*/
         if (bf->value & ~nbf->selector)
                 return -EINVAL;

         return 0;
}

cheers,
jamal
Jiri Pirko July 28, 2017, 2:12 p.m. UTC | #7
Fri, Jul 28, 2017 at 03:41:44PM CEST, jhs@mojatatu.com wrote:
>On 17-07-25 08:37 AM, Jiri Pirko wrote:
>> Tue, Jul 25, 2017 at 02:34:58PM CEST, jhs@mojatatu.com wrote:
>> > On 17-07-25 07:33 AM, Jiri Pirko wrote:
>> > > Tue, Jul 25, 2017 at 01:22:44PM CEST, jhs@mojatatu.com wrote:
>
>[..]
>> > > What if I pass val 0x1 and selector 0x0 from userspace. I don't have the
>> > > bit selected, so you should not process it in kernel, no?
>> > > 
>> > 
>> > Yes, valid point. I am not sure - should we reject?
>> 
>> I think that the validation might check this and reject. Makes sense to
>> me.
>> 
>
>How does this look? I havent tested it but covers all angles

Looks like a big mess to be honest. Mixing up u32* u32 void*. I don't
understand ****. Would be probably good to first apply my review comment
on the function itselt, then to add the checks :)


>I can think of.
>
>static int validate_nla_bitfield32(const struct nlattr *nla,
>                                    void *valid_flags_allowed)
>{
>        const struct nla_bitfield32 *bf = nla_data(nla);
>        u32 *valid_flags_mask = valid_flags_allowed;
>
>        if (!valid_flags_allowed)
>                return -EINVAL;
>        /*disallow invalid selector */
>        if ((bf->selector & valid_flags_allowed) >*valid_flags_allowed)
>                return -EINVAL;
>        /*disallow invalid bit values */
>        if (bf->value & ~*valid_flags_mask)
>                return -EINVAL;
>        /*disallow valid bit values that are not selected*/
>        if (bf->value & ~nbf->selector)
>                return -EINVAL;
>
>        return 0;
>}
>
>cheers,
>jamal
Jamal Hadi Salim July 28, 2017, 2:52 p.m. UTC | #8
On 17-07-28 10:12 AM, Jiri Pirko wrote:
> Fri, Jul 28, 2017 at 03:41:44PM CEST, jhs@mojatatu.com wrote:
[..]
> 
> Looks like a big mess to be honest. Mixing up u32* u32 void*. I don't
> understand ****. Would be probably good to first apply my review comment
> on the function itselt, then to add the checks :)
> 

I havent even compiled/test that Jiri.
Just ignore the void * and assume it is a u32 *.

I am trying to avoid doing unlucky number 13 patch.
So feedback on this is good. Just look at what it is disallowing
first.

back later.

cheers,
jamal

> 
>> I can think of.
>>
>> static int validate_nla_bitfield32(const struct nlattr *nla,
>>                                     void *valid_flags_allowed)
>> {
>>         const struct nla_bitfield32 *bf = nla_data(nla);
>>         u32 *valid_flags_mask = valid_flags_allowed;
>>
>>         if (!valid_flags_allowed)
>>                 return -EINVAL;
>>         /*disallow invalid selector */
>>         if ((bf->selector & valid_flags_allowed) >*valid_flags_allowed)
>>                 return -EINVAL;
>>         /*disallow invalid bit values */
>>         if (bf->value & ~*valid_flags_mask)
>>                 return -EINVAL;
>>         /*disallow valid bit values that are not selected*/
>>         if (bf->value & ~nbf->selector)
>>                 return -EINVAL;
>>
>>         return 0;
>> }
>>
>> cheers,
>> jamal
Jiri Pirko July 28, 2017, 2:57 p.m. UTC | #9
Fri, Jul 28, 2017 at 04:52:02PM CEST, jhs@mojatatu.com wrote:
>On 17-07-28 10:12 AM, Jiri Pirko wrote:
>> Fri, Jul 28, 2017 at 03:41:44PM CEST, jhs@mojatatu.com wrote:
>[..]
>> 
>> Looks like a big mess to be honest. Mixing up u32* u32 void*. I don't
>> understand ****. Would be probably good to first apply my review comment
>> on the function itselt, then to add the checks :)
>> 
>
>I havent even compiled/test that Jiri.
>Just ignore the void * and assume it is a u32 *.
>
>I am trying to avoid doing unlucky number 13 patch.

I'll rather wait for you to send the code in proper shape so I can
decypher easily and don't have to guess.


>So feedback on this is good. Just look at what it is disallowing
>first.
>
>back later.
>
>cheers,
>jamal
>
>> 
>> > I can think of.
>> > 
>> > static int validate_nla_bitfield32(const struct nlattr *nla,
>> >                                     void *valid_flags_allowed)
>> > {
>> >         const struct nla_bitfield32 *bf = nla_data(nla);
>> >         u32 *valid_flags_mask = valid_flags_allowed;
>> > 
>> >         if (!valid_flags_allowed)
>> >                 return -EINVAL;
>> >         /*disallow invalid selector */
>> >         if ((bf->selector & valid_flags_allowed) >*valid_flags_allowed)
>> >                 return -EINVAL;
>> >         /*disallow invalid bit values */
>> >         if (bf->value & ~*valid_flags_mask)
>> >                 return -EINVAL;
>> >         /*disallow valid bit values that are not selected*/
>> >         if (bf->value & ~nbf->selector)
>> >                 return -EINVAL;
>> > 
>> >         return 0;
>> > }
>> > 
>> > cheers,
>> > jamal
>
Jamal Hadi Salim July 28, 2017, 3:08 p.m. UTC | #10
On 17-07-28 10:52 AM, Jamal Hadi Salim wrote:
> On 17-07-28 10:12 AM, Jiri Pirko wrote:
>> Fri, Jul 28, 2017 at 03:41:44PM CEST, jhs@mojatatu.com wrote:
> [..]
>>
>> Looks like a big mess to be honest. Mixing up u32* u32 void*. I don't
>> understand ****. Would be probably good to first apply my review comment
>> on the function itselt, then to add the checks :)
>>
> 
> I havent even compiled/test that Jiri.
> Just ignore the void * and assume it is a u32 *.
> 

This compiled - but dont have much time right now to test.
===
static int validate_nla_bitfield32(const struct nlattr *nla,
                                    u32 *valid_flags_allowed)
{
         const struct nla_bitfield32 *bf = nla_data(nla);
         u32 *valid_flags_mask = valid_flags_allowed;

         if (!valid_flags_allowed)
                 return -EINVAL;

         /*disallow invalid selector */
         if ((bf->selector & *valid_flags_allowed) > *valid_flags_allowed)
                 return -EINVAL;

         /*disallow invalid bit values */
         if (bf->value & ~*valid_flags_mask)
                 return -EINVAL;

         /*disallow valid bit values that are not selected*/
         if (bf->value & ~bf->selector)
                 return -EINVAL;

         return 0;
}
========

cheers,
jamal
Jiri Pirko July 28, 2017, 3:45 p.m. UTC | #11
Fri, Jul 28, 2017 at 05:08:10PM CEST, jhs@mojatatu.com wrote:
>On 17-07-28 10:52 AM, Jamal Hadi Salim wrote:
>> On 17-07-28 10:12 AM, Jiri Pirko wrote:
>> > Fri, Jul 28, 2017 at 03:41:44PM CEST, jhs@mojatatu.com wrote:
>> [..]
>> > 
>> > Looks like a big mess to be honest. Mixing up u32* u32 void*. I don't
>> > understand ****. Would be probably good to first apply my review comment
>> > on the function itselt, then to add the checks :)
>> > 
>> 
>> I havent even compiled/test that Jiri.
>> Just ignore the void * and assume it is a u32 *.
>> 
>
>This compiled - but dont have much time right now to test.
>===
>static int validate_nla_bitfield32(const struct nlattr *nla,
>                                   u32 *valid_flags_allowed)
>{
>        const struct nla_bitfield32 *bf = nla_data(nla);
>        u32 *valid_flags_mask = valid_flags_allowed;

good one :D


>
>        if (!valid_flags_allowed)
>                return -EINVAL;
>
>        /*disallow invalid selector */
>        if ((bf->selector & *valid_flags_allowed) > *valid_flags_allowed)

I don't get the ">"....
Just (bf->selector & ~*valid_flags_allowed) should be enought, no?


>                return -EINVAL;
>
>        /*disallow invalid bit values */
>        if (bf->value & ~*valid_flags_mask)
>                return -EINVAL;
>
>        /*disallow valid bit values that are not selected*/
>        if (bf->value & ~bf->selector)
>                return -EINVAL;
>
>        return 0;
>}
>========
>
>cheers,
>jamal
Jamal Hadi Salim July 28, 2017, 10:10 p.m. UTC | #12
On 17-07-28 11:45 AM, Jiri Pirko wrote:
> Fri, Jul 28, 2017 at 05:08:10PM CEST, jhs@mojatatu.com wrote:
>> On 17-07-28 10:52 AM, Jamal Hadi Salim wrote:
>>> On 17-07-28 10:12 AM, Jiri Pirko wrote:


>>         /*disallow invalid selector */
>>         if ((bf->selector & *valid_flags_allowed) > *valid_flags_allowed)
> 
> I don't get the ">"....
> Just (bf->selector & ~*valid_flags_allowed) should be enought, no?
> 

It may be enough - I will try it out tommorow.
I was worried about the selector having more bits than the allowed flags.

cheers,
jamal
Jiri Pirko July 29, 2017, 7:19 a.m. UTC | #13
Sat, Jul 29, 2017 at 12:10:20AM CEST, jhs@mojatatu.com wrote:
>On 17-07-28 11:45 AM, Jiri Pirko wrote:
>> Fri, Jul 28, 2017 at 05:08:10PM CEST, jhs@mojatatu.com wrote:
>> > On 17-07-28 10:52 AM, Jamal Hadi Salim wrote:
>> > > On 17-07-28 10:12 AM, Jiri Pirko wrote:
>
>
>> >         /*disallow invalid selector */
>> >         if ((bf->selector & *valid_flags_allowed) > *valid_flags_allowed)
>> 
>> I don't get the ">"....
>> Just (bf->selector & ~*valid_flags_allowed) should be enought, no?
>> 
>
>It may be enough - I will try it out tommorow.
>I was worried about the selector having more bits than the allowed flags.

That is what the code I wrote above do. And I think that your code does
not (if you don't assume that allowed flags are always starting from the
least significant bit)

>
>cheers,
>jamal
diff mbox

Patch

diff --git a/include/net/netlink.h b/include/net/netlink.h
index e33d1fb..87c0b15 100644
--- a/include/net/netlink.h
+++ b/include/net/netlink.h
@@ -1207,6 +1207,18 @@  static inline struct in6_addr nla_get_in6_addr(const struct nlattr *nla)
 }
 
 /**
+ * nla_get_bitfield_32 - return payload of 32 bitfield attribute
+ * @nla: nla_bitfield_32 attribute
+ */
+static inline struct nla_bitfield_32 nla_get_bitfield_32(const struct nlattr *nla)
+{
+	struct nla_bitfield_32 tmp;
+
+	nla_memcpy(&tmp, nla, sizeof(tmp));
+	return tmp;
+}
+
+/**
  * nla_memdup - duplicate attribute memory (kmemdup)
  * @src: netlink attribute to duplicate from
  * @gfp: GFP mask
diff --git a/include/uapi/linux/rtnetlink.h b/include/uapi/linux/rtnetlink.h
index d148505..bfa80a6 100644
--- a/include/uapi/linux/rtnetlink.h
+++ b/include/uapi/linux/rtnetlink.h
@@ -683,10 +683,28 @@  struct tcamsg {
 	unsigned char	tca__pad1;
 	unsigned short	tca__pad2;
 };
+
+enum {
+	TCA_ROOT_UNSPEC,
+	TCA_ROOT_TAB,
+#define TCA_ACT_TAB TCA_ROOT_TAB
+#define TCAA_MAX TCA_ROOT_TAB
+	TCA_ROOT_FLAGS,
+	TCA_ROOT_COUNT,
+	__TCA_ROOT_MAX,
+#define	TCA_ROOT_MAX (__TCA_ROOT_MAX - 1)
+};
+
 #define TA_RTA(r)  ((struct rtattr*)(((char*)(r)) + NLMSG_ALIGN(sizeof(struct tcamsg))))
 #define TA_PAYLOAD(n) NLMSG_PAYLOAD(n,sizeof(struct tcamsg))
-#define TCA_ACT_TAB 1 /* attr type must be >=1 */	
-#define TCAA_MAX 1
+/* tcamsg flags stored in attribute TCA_ROOT_FLAGS
+ *
+ * TCA_FLAG_LARGE_DUMP_ON user->kernel to request for larger than TCA_ACT_MAX_PRIO
+ * actions in a dump. All dump responses will contain the number of actions
+ * being dumped stored in for user app's consumption in TCA_ROOT_COUNT
+ *
+ */
+#define TCA_FLAG_LARGE_DUMP_ON		(1 << 0)
 
 /* New extended info filters for IFLA_EXT_MASK */
 #define RTEXT_FILTER_VF		(1 << 0)
diff --git a/net/sched/act_api.c b/net/sched/act_api.c
index 848370e..15d6c46 100644
--- a/net/sched/act_api.c
+++ b/net/sched/act_api.c
@@ -110,6 +110,7 @@  static int tcf_dump_walker(struct tcf_hashinfo *hinfo, struct sk_buff *skb,
 			   struct netlink_callback *cb)
 {
 	int err = 0, index = -1, i = 0, s_i = 0, n_i = 0;
+	u32 act_flags = cb->args[2];
 	struct nlattr *nest;
 
 	spin_lock_bh(&hinfo->lock);
@@ -138,14 +139,18 @@  static int tcf_dump_walker(struct tcf_hashinfo *hinfo, struct sk_buff *skb,
 			}
 			nla_nest_end(skb, nest);
 			n_i++;
-			if (n_i >= TCA_ACT_MAX_PRIO)
+			if (!(act_flags & TCA_FLAG_LARGE_DUMP_ON) &&
+			    n_i >= TCA_ACT_MAX_PRIO)
 				goto done;
 		}
 	}
 done:
 	spin_unlock_bh(&hinfo->lock);
-	if (n_i)
+	if (n_i) {
 		cb->args[0] += n_i;
+		if (act_flags & TCA_FLAG_LARGE_DUMP_ON)
+			cb->args[1] = n_i;
+	}
 	return n_i;
 
 nla_put_failure:
@@ -1068,11 +1073,17 @@  static int tcf_action_add(struct net *net, struct nlattr *nla,
 	return tcf_add_notify(net, n, &actions, portid);
 }
 
+static u32 tcaa_root_flags_allowed = TCA_FLAG_LARGE_DUMP_ON;
+static const struct nla_policy tcaa_policy[TCA_ROOT_MAX + 1] = {
+	[TCA_ROOT_FLAGS] = { .type = NLA_BITFIELD_32,
+			     .validation_data = &tcaa_root_flags_allowed },
+};
+
 static int tc_ctl_action(struct sk_buff *skb, struct nlmsghdr *n,
 			 struct netlink_ext_ack *extack)
 {
 	struct net *net = sock_net(skb->sk);
-	struct nlattr *tca[TCAA_MAX + 1];
+	struct nlattr *tca[TCA_ROOT_MAX + 1];
 	u32 portid = skb ? NETLINK_CB(skb).portid : 0;
 	int ret = 0, ovr = 0;
 
@@ -1080,7 +1091,7 @@  static int tc_ctl_action(struct sk_buff *skb, struct nlmsghdr *n,
 	    !netlink_capable(skb, CAP_NET_ADMIN))
 		return -EPERM;
 
-	ret = nlmsg_parse(n, sizeof(struct tcamsg), tca, TCAA_MAX, NULL,
+	ret = nlmsg_parse(n, sizeof(struct tcamsg), tca, TCA_ROOT_MAX, NULL,
 			  extack);
 	if (ret < 0)
 		return ret;
@@ -1121,16 +1132,12 @@  static int tc_ctl_action(struct sk_buff *skb, struct nlmsghdr *n,
 	return ret;
 }
 
-static struct nlattr *find_dump_kind(const struct nlmsghdr *n)
+static struct nlattr *find_dump_kind(struct nlattr **nla)
 {
 	struct nlattr *tb1, *tb2[TCA_ACT_MAX + 1];
 	struct nlattr *tb[TCA_ACT_MAX_PRIO + 1];
-	struct nlattr *nla[TCAA_MAX + 1];
 	struct nlattr *kind;
 
-	if (nlmsg_parse(n, sizeof(struct tcamsg), nla, TCAA_MAX,
-			NULL, NULL) < 0)
-		return NULL;
 	tb1 = nla[TCA_ACT_TAB];
 	if (tb1 == NULL)
 		return NULL;
@@ -1157,8 +1164,18 @@  static int tc_dump_action(struct sk_buff *skb, struct netlink_callback *cb)
 	struct tc_action_ops *a_o;
 	int ret = 0;
 	struct tcamsg *t = (struct tcamsg *) nlmsg_data(cb->nlh);
-	struct nlattr *kind = find_dump_kind(cb->nlh);
+	struct nla_bitfield_32 fb;
+	struct nlattr *count_attr = NULL;
+	struct nlattr *tb[TCA_ROOT_MAX + 1];
+	struct nlattr *kind = NULL;
+	u32 act_count = 0;
+
+	ret = nlmsg_parse(cb->nlh, sizeof(struct tcamsg), tb, TCA_ROOT_MAX,
+			  tcaa_policy, NULL);
+	if (ret < 0)
+		return ret;
 
+	kind = find_dump_kind(tb);
 	if (kind == NULL) {
 		pr_info("tc_dump_action: action bad kind\n");
 		return 0;
@@ -1168,14 +1185,22 @@  static int tc_dump_action(struct sk_buff *skb, struct netlink_callback *cb)
 	if (a_o == NULL)
 		return 0;
 
+	if (tb[TCA_ROOT_FLAGS])
+		fb = nla_get_bitfield_32(tb[TCA_ROOT_FLAGS]);
+
 	nlh = nlmsg_put(skb, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq,
 			cb->nlh->nlmsg_type, sizeof(*t), 0);
 	if (!nlh)
 		goto out_module_put;
+
+	cb->args[2] = fb.nla_value;
 	t = nlmsg_data(nlh);
 	t->tca_family = AF_UNSPEC;
 	t->tca__pad1 = 0;
 	t->tca__pad2 = 0;
+	count_attr = nla_reserve(skb, TCA_ROOT_COUNT, sizeof(u32));
+	if (!count_attr)
+		goto out_module_put;
 
 	nest = nla_nest_start(skb, TCA_ACT_TAB);
 	if (nest == NULL)
@@ -1188,6 +1213,9 @@  static int tc_dump_action(struct sk_buff *skb, struct netlink_callback *cb)
 	if (ret > 0) {
 		nla_nest_end(skb, nest);
 		ret = skb->len;
+		act_count = cb->args[1];
+		memcpy(nla_data(count_attr), &act_count, sizeof(u32));
+		cb->args[1] = 0;
 	} else
 		nlmsg_trim(skb, b);