Message ID | 1500860146-26970-4-git-send-email-jhs@emojatatu.com |
---|---|
State | Changes Requested, archived |
Delegated to: | David Miller |
Headers | show |
Mon, Jul 24, 2017 at 03:35:45AM CEST, jhs@mojatatu.com wrote: >From: Jamal Hadi Salim <jhs@mojatatu.com> > >When you dump hundreds of thousands of actions, getting only 32 per >dump batch even when the socket buffer and memory allocations allow >is inefficient. > >With this change, the user will get as many as possibly fitting >within the given constraints available to the kernel. > >The top level action TLV space is extended. An attribute >TCA_ROOT_FLAGS is used to carry flags; flag TCA_FLAG_LARGE_DUMP_ON >is set by the user indicating the user is capable of processing >these large dumps. Older user space which doesnt set this flag >doesnt get the large (than 32) batches. >The kernel uses the TCA_ROOT_COUNT attribute to tell the user how many >actions are put in a single batch. As such user space app knows how long >to iterate (independent of the type of action being dumped) >instead of hardcoded maximum of 32 thus maintaining backward compat. > >Some results dumping 1.5M actions below: >first an unpatched tc which doesnt understand these features... > >prompt$ time -p tc actions ls action gact | grep index | wc -l >1500000 >real 1388.43 >user 2.07 >sys 1386.79 > >Now lets see a patched tc which sets the correct flags when requesting >a dump: > >prompt$ time -p updatedtc actions ls action gact | grep index | wc -l >1500000 >real 178.13 >user 2.02 >sys 176.96 > >That is about 8x performance improvement for tc app which sets its >receive buffer to about 32K. > >Signed-off-by: Jamal Hadi Salim <jhs@mojatatu.com> >--- > include/net/netlink.h | 12 +++++++++++ > include/uapi/linux/rtnetlink.h | 22 +++++++++++++++++-- > net/sched/act_api.c | 48 +++++++++++++++++++++++++++++++++--------- > 3 files changed, 70 insertions(+), 12 deletions(-) > >diff --git a/include/net/netlink.h b/include/net/netlink.h >index e33d1fb..87c0b15 100644 >--- a/include/net/netlink.h >+++ b/include/net/netlink.h >@@ -1207,6 +1207,18 @@ static inline struct in6_addr nla_get_in6_addr(const struct nlattr *nla) > } > > /** >+ * nla_get_bitfield_32 - return payload of 32 bitfield attribute >+ * @nla: nla_bitfield_32 attribute >+ */ >+static inline struct nla_bitfield_32 nla_get_bitfield_32(const struct nlattr *nla) >+{ >+ struct nla_bitfield_32 tmp; >+ >+ nla_memcpy(&tmp, nla, sizeof(tmp)); >+ return tmp; >+} This helper should be part of the previous patch. >+ >+/** > * nla_memdup - duplicate attribute memory (kmemdup) > * @src: netlink attribute to duplicate from > * @gfp: GFP mask >diff --git a/include/uapi/linux/rtnetlink.h b/include/uapi/linux/rtnetlink.h >index d148505..bfa80a6 100644 >--- a/include/uapi/linux/rtnetlink.h >+++ b/include/uapi/linux/rtnetlink.h >@@ -683,10 +683,28 @@ struct tcamsg { > unsigned char tca__pad1; > unsigned short tca__pad2; > }; >+ >+enum { >+ TCA_ROOT_UNSPEC, >+ TCA_ROOT_TAB, >+#define TCA_ACT_TAB TCA_ROOT_TAB >+#define TCAA_MAX TCA_ROOT_TAB >+ TCA_ROOT_FLAGS, >+ TCA_ROOT_COUNT, >+ __TCA_ROOT_MAX, >+#define TCA_ROOT_MAX (__TCA_ROOT_MAX - 1) >+}; >+ > #define TA_RTA(r) ((struct rtattr*)(((char*)(r)) + NLMSG_ALIGN(sizeof(struct tcamsg)))) > #define TA_PAYLOAD(n) NLMSG_PAYLOAD(n,sizeof(struct tcamsg)) >-#define TCA_ACT_TAB 1 /* attr type must be >=1 */ >-#define TCAA_MAX 1 >+/* tcamsg flags stored in attribute TCA_ROOT_FLAGS >+ * >+ * TCA_FLAG_LARGE_DUMP_ON user->kernel to request for larger than TCA_ACT_MAX_PRIO >+ * actions in a dump. All dump responses will contain the number of actions >+ * being dumped stored in for user app's consumption in TCA_ROOT_COUNT >+ * >+ */ >+#define TCA_FLAG_LARGE_DUMP_ON (1 << 0) > > /* New extended info filters for IFLA_EXT_MASK */ > #define RTEXT_FILTER_VF (1 << 0) >diff --git a/net/sched/act_api.c b/net/sched/act_api.c >index 848370e..15d6c46 100644 >--- a/net/sched/act_api.c >+++ b/net/sched/act_api.c >@@ -110,6 +110,7 @@ static int tcf_dump_walker(struct tcf_hashinfo *hinfo, struct sk_buff *skb, > struct netlink_callback *cb) > { > int err = 0, index = -1, i = 0, s_i = 0, n_i = 0; >+ u32 act_flags = cb->args[2]; > struct nlattr *nest; > > spin_lock_bh(&hinfo->lock); >@@ -138,14 +139,18 @@ static int tcf_dump_walker(struct tcf_hashinfo *hinfo, struct sk_buff *skb, > } > nla_nest_end(skb, nest); > n_i++; >- if (n_i >= TCA_ACT_MAX_PRIO) >+ if (!(act_flags & TCA_FLAG_LARGE_DUMP_ON) && >+ n_i >= TCA_ACT_MAX_PRIO) > goto done; > } > } > done: > spin_unlock_bh(&hinfo->lock); >- if (n_i) >+ if (n_i) { > cb->args[0] += n_i; >+ if (act_flags & TCA_FLAG_LARGE_DUMP_ON) >+ cb->args[1] = n_i; >+ } > return n_i; > > nla_put_failure: >@@ -1068,11 +1073,17 @@ static int tcf_action_add(struct net *net, struct nlattr *nla, > return tcf_add_notify(net, n, &actions, portid); > } > >+static u32 tcaa_root_flags_allowed = TCA_FLAG_LARGE_DUMP_ON; >+static const struct nla_policy tcaa_policy[TCA_ROOT_MAX + 1] = { >+ [TCA_ROOT_FLAGS] = { .type = NLA_BITFIELD_32, >+ .validation_data = &tcaa_root_flags_allowed }, >+}; >+ > static int tc_ctl_action(struct sk_buff *skb, struct nlmsghdr *n, > struct netlink_ext_ack *extack) > { > struct net *net = sock_net(skb->sk); >- struct nlattr *tca[TCAA_MAX + 1]; >+ struct nlattr *tca[TCA_ROOT_MAX + 1]; > u32 portid = skb ? NETLINK_CB(skb).portid : 0; > int ret = 0, ovr = 0; > >@@ -1080,7 +1091,7 @@ static int tc_ctl_action(struct sk_buff *skb, struct nlmsghdr *n, > !netlink_capable(skb, CAP_NET_ADMIN)) > return -EPERM; > >- ret = nlmsg_parse(n, sizeof(struct tcamsg), tca, TCAA_MAX, NULL, >+ ret = nlmsg_parse(n, sizeof(struct tcamsg), tca, TCA_ROOT_MAX, NULL, > extack); > if (ret < 0) > return ret; >@@ -1121,16 +1132,12 @@ static int tc_ctl_action(struct sk_buff *skb, struct nlmsghdr *n, > return ret; > } > >-static struct nlattr *find_dump_kind(const struct nlmsghdr *n) >+static struct nlattr *find_dump_kind(struct nlattr **nla) > { > struct nlattr *tb1, *tb2[TCA_ACT_MAX + 1]; > struct nlattr *tb[TCA_ACT_MAX_PRIO + 1]; >- struct nlattr *nla[TCAA_MAX + 1]; > struct nlattr *kind; > >- if (nlmsg_parse(n, sizeof(struct tcamsg), nla, TCAA_MAX, >- NULL, NULL) < 0) >- return NULL; > tb1 = nla[TCA_ACT_TAB]; > if (tb1 == NULL) > return NULL; >@@ -1157,8 +1164,18 @@ static int tc_dump_action(struct sk_buff *skb, struct netlink_callback *cb) > struct tc_action_ops *a_o; > int ret = 0; > struct tcamsg *t = (struct tcamsg *) nlmsg_data(cb->nlh); >- struct nlattr *kind = find_dump_kind(cb->nlh); >+ struct nla_bitfield_32 fb; >+ struct nlattr *count_attr = NULL; >+ struct nlattr *tb[TCA_ROOT_MAX + 1]; >+ struct nlattr *kind = NULL; Reverse christmas tree :D >+ u32 act_count = 0; >+ >+ ret = nlmsg_parse(cb->nlh, sizeof(struct tcamsg), tb, TCA_ROOT_MAX, >+ tcaa_policy, NULL); >+ if (ret < 0) >+ return ret; > >+ kind = find_dump_kind(tb); > if (kind == NULL) { > pr_info("tc_dump_action: action bad kind\n"); > return 0; >@@ -1168,14 +1185,22 @@ static int tc_dump_action(struct sk_buff *skb, struct netlink_callback *cb) > if (a_o == NULL) > return 0; > >+ if (tb[TCA_ROOT_FLAGS]) >+ fb = nla_get_bitfield_32(tb[TCA_ROOT_FLAGS]); fb? bf? nbf? Please make this synced within the patchset. Don't you need to mask value with selector? In fact, I think that nla_get_bitfield_32 could just return u32 which would be (value&selector). The validation takes care of unsupported bits. >+ > nlh = nlmsg_put(skb, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq, > cb->nlh->nlmsg_type, sizeof(*t), 0); > if (!nlh) > goto out_module_put; >+ >+ cb->args[2] = fb.nla_value; > t = nlmsg_data(nlh); > t->tca_family = AF_UNSPEC; > t->tca__pad1 = 0; > t->tca__pad2 = 0; >+ count_attr = nla_reserve(skb, TCA_ROOT_COUNT, sizeof(u32)); >+ if (!count_attr) >+ goto out_module_put; > > nest = nla_nest_start(skb, TCA_ACT_TAB); > if (nest == NULL) >@@ -1188,6 +1213,9 @@ static int tc_dump_action(struct sk_buff *skb, struct netlink_callback *cb) > if (ret > 0) { > nla_nest_end(skb, nest); > ret = skb->len; >+ act_count = cb->args[1]; >+ memcpy(nla_data(count_attr), &act_count, sizeof(u32)); >+ cb->args[1] = 0; > } else > nlmsg_trim(skb, b); > >-- >1.9.1 >
On 17-07-24 07:27 AM, Jiri Pirko wrote: > Mon, Jul 24, 2017 at 03:35:45AM CEST, jhs@mojatatu.com wrote: >> From: Jamal Hadi Salim <jhs@mojatatu.com> [..] > > This helper should be part of the previous patch. > Will do next update. > >> @@ -1157,8 +1164,18 @@ static int tc_dump_action(struct sk_buff *skb, struct netlink_callback *cb) >> struct tc_action_ops *a_o; >> int ret = 0; >> struct tcamsg *t = (struct tcamsg *) nlmsg_data(cb->nlh); >> - struct nlattr *kind = find_dump_kind(cb->nlh); >> + struct nla_bitfield_32 fb; >> + struct nlattr *count_attr = NULL; >> + struct nlattr *tb[TCA_ROOT_MAX + 1]; >> + struct nlattr *kind = NULL; > > Reverse christmas tree :D > There were already 2 christmas trees in place;-> I will re-arrange this. >> >> + if (tb[TCA_ROOT_FLAGS]) >> + fb = nla_get_bitfield_32(tb[TCA_ROOT_FLAGS]); > > fb? bf? nbf? Please make this synced within the patchset. > > Ok, what do you like best? ;-> > Don't you need to mask value with selector? In fact, I think that > nla_get_bitfield_32 could just return u32 which would be (value&selector). > The validation takes care of unsupported bits. For my use case I dont need any of the above since I dont need to unset things. In other use cases you will need both selector and value in case someone wants a bit to be set to 0. Infact I think i will rename that helper to "nla_get_bitvalue_32" to be more precise. cheers, jamal
Tue, Jul 25, 2017 at 01:22:44PM CEST, jhs@mojatatu.com wrote: >On 17-07-24 07:27 AM, Jiri Pirko wrote: >> Mon, Jul 24, 2017 at 03:35:45AM CEST, jhs@mojatatu.com wrote: >> > From: Jamal Hadi Salim <jhs@mojatatu.com> >[..] >> >> This helper should be part of the previous patch. >> > >Will do next update. >> > >>> @@ -1157,8 +1164,18 @@ static int tc_dump_action(struct sk_buff *skb, >struct netlink_callback *cb) >>> struct tc_action_ops *a_o; >>> int ret = 0; >>> struct tcamsg *t = (struct tcamsg *) nlmsg_data(cb->nlh); >>> - struct nlattr *kind = find_dump_kind(cb->nlh); >>> + struct nla_bitfield_32 fb; >>> + struct nlattr *count_attr = NULL; >>> + struct nlattr *tb[TCA_ROOT_MAX + 1]; >>> + struct nlattr *kind = NULL; >> >> Reverse christmas tree :D >> > >There were already 2 christmas trees in place;-> >I will re-arrange this. > >> > >> > + if (tb[TCA_ROOT_FLAGS]) >> > + fb = nla_get_bitfield_32(tb[TCA_ROOT_FLAGS]); >> >> fb? bf? nbf? Please make this synced within the patchset. >> >> > >Ok, what do you like best? ;-> "bf" > >> Don't you need to mask value with selector? In fact, I think that >> nla_get_bitfield_32 could just return u32 which would be (value&selector). >> The validation takes care of unsupported bits. > >For my use case I dont need any of the above since I dont need to >unset things. In other use cases you will need both selector and >value in case someone wants a bit to be set to 0. >Infact I think i will rename that helper to "nla_get_bitvalue_32" >to be more precise. The getter should contain name of the type, so "nla_get_bitfield32_val" is much better. What if I pass val 0x1 and selector 0x0 from userspace. I don't have the bit selected, so you should not process it in kernel, no? > >cheers, >jamal
On 17-07-25 07:33 AM, Jiri Pirko wrote: > Tue, Jul 25, 2017 at 01:22:44PM CEST, jhs@mojatatu.com wrote: >>> fb? bf? nbf? Please make this synced within the patchset. >>> >>> >> >> Ok, what do you like best? ;-> > > "bf" > Ok. > >> >>> Don't you need to mask value with selector? In fact, I think that >>> nla_get_bitfield_32 could just return u32 which would be (value&selector). >>> The validation takes care of unsupported bits. >> >> For my use case I dont need any of the above since I dont need to >> unset things. In other use cases you will need both selector and >> value in case someone wants a bit to be set to 0. >> Infact I think i will rename that helper to "nla_get_bitvalue_32" >> to be more precise. > > The getter should contain name of the type, so "nla_get_bitfield32_val" > is much better. > Actually I mispoke. I was returning the struct not the value. So nla_get_bitfield32() is a better name. > What if I pass val 0x1 and selector 0x0 from userspace. I don't have the > bit selected, so you should not process it in kernel, no? > Yes, valid point. I am not sure - should we reject? cheers, jamal
Tue, Jul 25, 2017 at 02:34:58PM CEST, jhs@mojatatu.com wrote: >On 17-07-25 07:33 AM, Jiri Pirko wrote: >> Tue, Jul 25, 2017 at 01:22:44PM CEST, jhs@mojatatu.com wrote: > >> > > fb? bf? nbf? Please make this synced within the patchset. >> > > >> > > >> > >> > Ok, what do you like best? ;-> >> >> "bf" >> > >Ok. > >> >> > >> > > Don't you need to mask value with selector? In fact, I think that >> > > nla_get_bitfield_32 could just return u32 which would be (value&selector). >> > > The validation takes care of unsupported bits. >> > >> > For my use case I dont need any of the above since I dont need to >> > unset things. In other use cases you will need both selector and >> > value in case someone wants a bit to be set to 0. >> > Infact I think i will rename that helper to "nla_get_bitvalue_32" >> > to be more precise. >> >> The getter should contain name of the type, so "nla_get_bitfield32_val" >> is much better. >> > >Actually I mispoke. I was returning the struct not the value. So >nla_get_bitfield32() is a better name. ack > >> What if I pass val 0x1 and selector 0x0 from userspace. I don't have the >> bit selected, so you should not process it in kernel, no? >> > >Yes, valid point. I am not sure - should we reject? I think that the validation might check this and reject. Makes sense to me.
On 17-07-25 08:37 AM, Jiri Pirko wrote: > Tue, Jul 25, 2017 at 02:34:58PM CEST, jhs@mojatatu.com wrote: >> On 17-07-25 07:33 AM, Jiri Pirko wrote: >>> Tue, Jul 25, 2017 at 01:22:44PM CEST, jhs@mojatatu.com wrote: [..] >>> What if I pass val 0x1 and selector 0x0 from userspace. I don't have the >>> bit selected, so you should not process it in kernel, no? >>> >> >> Yes, valid point. I am not sure - should we reject? > > I think that the validation might check this and reject. Makes sense to > me. > How does this look? I havent tested it but covers all angles I can think of. static int validate_nla_bitfield32(const struct nlattr *nla, void *valid_flags_allowed) { const struct nla_bitfield32 *bf = nla_data(nla); u32 *valid_flags_mask = valid_flags_allowed; if (!valid_flags_allowed) return -EINVAL; /*disallow invalid selector */ if ((bf->selector & valid_flags_allowed) >*valid_flags_allowed) return -EINVAL; /*disallow invalid bit values */ if (bf->value & ~*valid_flags_mask) return -EINVAL; /*disallow valid bit values that are not selected*/ if (bf->value & ~nbf->selector) return -EINVAL; return 0; } cheers, jamal
Fri, Jul 28, 2017 at 03:41:44PM CEST, jhs@mojatatu.com wrote: >On 17-07-25 08:37 AM, Jiri Pirko wrote: >> Tue, Jul 25, 2017 at 02:34:58PM CEST, jhs@mojatatu.com wrote: >> > On 17-07-25 07:33 AM, Jiri Pirko wrote: >> > > Tue, Jul 25, 2017 at 01:22:44PM CEST, jhs@mojatatu.com wrote: > >[..] >> > > What if I pass val 0x1 and selector 0x0 from userspace. I don't have the >> > > bit selected, so you should not process it in kernel, no? >> > > >> > >> > Yes, valid point. I am not sure - should we reject? >> >> I think that the validation might check this and reject. Makes sense to >> me. >> > >How does this look? I havent tested it but covers all angles Looks like a big mess to be honest. Mixing up u32* u32 void*. I don't understand ****. Would be probably good to first apply my review comment on the function itselt, then to add the checks :) >I can think of. > >static int validate_nla_bitfield32(const struct nlattr *nla, > void *valid_flags_allowed) >{ > const struct nla_bitfield32 *bf = nla_data(nla); > u32 *valid_flags_mask = valid_flags_allowed; > > if (!valid_flags_allowed) > return -EINVAL; > /*disallow invalid selector */ > if ((bf->selector & valid_flags_allowed) >*valid_flags_allowed) > return -EINVAL; > /*disallow invalid bit values */ > if (bf->value & ~*valid_flags_mask) > return -EINVAL; > /*disallow valid bit values that are not selected*/ > if (bf->value & ~nbf->selector) > return -EINVAL; > > return 0; >} > >cheers, >jamal
On 17-07-28 10:12 AM, Jiri Pirko wrote: > Fri, Jul 28, 2017 at 03:41:44PM CEST, jhs@mojatatu.com wrote: [..] > > Looks like a big mess to be honest. Mixing up u32* u32 void*. I don't > understand ****. Would be probably good to first apply my review comment > on the function itselt, then to add the checks :) > I havent even compiled/test that Jiri. Just ignore the void * and assume it is a u32 *. I am trying to avoid doing unlucky number 13 patch. So feedback on this is good. Just look at what it is disallowing first. back later. cheers, jamal > >> I can think of. >> >> static int validate_nla_bitfield32(const struct nlattr *nla, >> void *valid_flags_allowed) >> { >> const struct nla_bitfield32 *bf = nla_data(nla); >> u32 *valid_flags_mask = valid_flags_allowed; >> >> if (!valid_flags_allowed) >> return -EINVAL; >> /*disallow invalid selector */ >> if ((bf->selector & valid_flags_allowed) >*valid_flags_allowed) >> return -EINVAL; >> /*disallow invalid bit values */ >> if (bf->value & ~*valid_flags_mask) >> return -EINVAL; >> /*disallow valid bit values that are not selected*/ >> if (bf->value & ~nbf->selector) >> return -EINVAL; >> >> return 0; >> } >> >> cheers, >> jamal
Fri, Jul 28, 2017 at 04:52:02PM CEST, jhs@mojatatu.com wrote: >On 17-07-28 10:12 AM, Jiri Pirko wrote: >> Fri, Jul 28, 2017 at 03:41:44PM CEST, jhs@mojatatu.com wrote: >[..] >> >> Looks like a big mess to be honest. Mixing up u32* u32 void*. I don't >> understand ****. Would be probably good to first apply my review comment >> on the function itselt, then to add the checks :) >> > >I havent even compiled/test that Jiri. >Just ignore the void * and assume it is a u32 *. > >I am trying to avoid doing unlucky number 13 patch. I'll rather wait for you to send the code in proper shape so I can decypher easily and don't have to guess. >So feedback on this is good. Just look at what it is disallowing >first. > >back later. > >cheers, >jamal > >> >> > I can think of. >> > >> > static int validate_nla_bitfield32(const struct nlattr *nla, >> > void *valid_flags_allowed) >> > { >> > const struct nla_bitfield32 *bf = nla_data(nla); >> > u32 *valid_flags_mask = valid_flags_allowed; >> > >> > if (!valid_flags_allowed) >> > return -EINVAL; >> > /*disallow invalid selector */ >> > if ((bf->selector & valid_flags_allowed) >*valid_flags_allowed) >> > return -EINVAL; >> > /*disallow invalid bit values */ >> > if (bf->value & ~*valid_flags_mask) >> > return -EINVAL; >> > /*disallow valid bit values that are not selected*/ >> > if (bf->value & ~nbf->selector) >> > return -EINVAL; >> > >> > return 0; >> > } >> > >> > cheers, >> > jamal >
On 17-07-28 10:52 AM, Jamal Hadi Salim wrote: > On 17-07-28 10:12 AM, Jiri Pirko wrote: >> Fri, Jul 28, 2017 at 03:41:44PM CEST, jhs@mojatatu.com wrote: > [..] >> >> Looks like a big mess to be honest. Mixing up u32* u32 void*. I don't >> understand ****. Would be probably good to first apply my review comment >> on the function itselt, then to add the checks :) >> > > I havent even compiled/test that Jiri. > Just ignore the void * and assume it is a u32 *. > This compiled - but dont have much time right now to test. === static int validate_nla_bitfield32(const struct nlattr *nla, u32 *valid_flags_allowed) { const struct nla_bitfield32 *bf = nla_data(nla); u32 *valid_flags_mask = valid_flags_allowed; if (!valid_flags_allowed) return -EINVAL; /*disallow invalid selector */ if ((bf->selector & *valid_flags_allowed) > *valid_flags_allowed) return -EINVAL; /*disallow invalid bit values */ if (bf->value & ~*valid_flags_mask) return -EINVAL; /*disallow valid bit values that are not selected*/ if (bf->value & ~bf->selector) return -EINVAL; return 0; } ======== cheers, jamal
Fri, Jul 28, 2017 at 05:08:10PM CEST, jhs@mojatatu.com wrote: >On 17-07-28 10:52 AM, Jamal Hadi Salim wrote: >> On 17-07-28 10:12 AM, Jiri Pirko wrote: >> > Fri, Jul 28, 2017 at 03:41:44PM CEST, jhs@mojatatu.com wrote: >> [..] >> > >> > Looks like a big mess to be honest. Mixing up u32* u32 void*. I don't >> > understand ****. Would be probably good to first apply my review comment >> > on the function itselt, then to add the checks :) >> > >> >> I havent even compiled/test that Jiri. >> Just ignore the void * and assume it is a u32 *. >> > >This compiled - but dont have much time right now to test. >=== >static int validate_nla_bitfield32(const struct nlattr *nla, > u32 *valid_flags_allowed) >{ > const struct nla_bitfield32 *bf = nla_data(nla); > u32 *valid_flags_mask = valid_flags_allowed; good one :D > > if (!valid_flags_allowed) > return -EINVAL; > > /*disallow invalid selector */ > if ((bf->selector & *valid_flags_allowed) > *valid_flags_allowed) I don't get the ">".... Just (bf->selector & ~*valid_flags_allowed) should be enought, no? > return -EINVAL; > > /*disallow invalid bit values */ > if (bf->value & ~*valid_flags_mask) > return -EINVAL; > > /*disallow valid bit values that are not selected*/ > if (bf->value & ~bf->selector) > return -EINVAL; > > return 0; >} >======== > >cheers, >jamal
On 17-07-28 11:45 AM, Jiri Pirko wrote: > Fri, Jul 28, 2017 at 05:08:10PM CEST, jhs@mojatatu.com wrote: >> On 17-07-28 10:52 AM, Jamal Hadi Salim wrote: >>> On 17-07-28 10:12 AM, Jiri Pirko wrote: >> /*disallow invalid selector */ >> if ((bf->selector & *valid_flags_allowed) > *valid_flags_allowed) > > I don't get the ">".... > Just (bf->selector & ~*valid_flags_allowed) should be enought, no? > It may be enough - I will try it out tommorow. I was worried about the selector having more bits than the allowed flags. cheers, jamal
Sat, Jul 29, 2017 at 12:10:20AM CEST, jhs@mojatatu.com wrote: >On 17-07-28 11:45 AM, Jiri Pirko wrote: >> Fri, Jul 28, 2017 at 05:08:10PM CEST, jhs@mojatatu.com wrote: >> > On 17-07-28 10:52 AM, Jamal Hadi Salim wrote: >> > > On 17-07-28 10:12 AM, Jiri Pirko wrote: > > >> > /*disallow invalid selector */ >> > if ((bf->selector & *valid_flags_allowed) > *valid_flags_allowed) >> >> I don't get the ">".... >> Just (bf->selector & ~*valid_flags_allowed) should be enought, no? >> > >It may be enough - I will try it out tommorow. >I was worried about the selector having more bits than the allowed flags. That is what the code I wrote above do. And I think that your code does not (if you don't assume that allowed flags are always starting from the least significant bit) > >cheers, >jamal
diff --git a/include/net/netlink.h b/include/net/netlink.h index e33d1fb..87c0b15 100644 --- a/include/net/netlink.h +++ b/include/net/netlink.h @@ -1207,6 +1207,18 @@ static inline struct in6_addr nla_get_in6_addr(const struct nlattr *nla) } /** + * nla_get_bitfield_32 - return payload of 32 bitfield attribute + * @nla: nla_bitfield_32 attribute + */ +static inline struct nla_bitfield_32 nla_get_bitfield_32(const struct nlattr *nla) +{ + struct nla_bitfield_32 tmp; + + nla_memcpy(&tmp, nla, sizeof(tmp)); + return tmp; +} + +/** * nla_memdup - duplicate attribute memory (kmemdup) * @src: netlink attribute to duplicate from * @gfp: GFP mask diff --git a/include/uapi/linux/rtnetlink.h b/include/uapi/linux/rtnetlink.h index d148505..bfa80a6 100644 --- a/include/uapi/linux/rtnetlink.h +++ b/include/uapi/linux/rtnetlink.h @@ -683,10 +683,28 @@ struct tcamsg { unsigned char tca__pad1; unsigned short tca__pad2; }; + +enum { + TCA_ROOT_UNSPEC, + TCA_ROOT_TAB, +#define TCA_ACT_TAB TCA_ROOT_TAB +#define TCAA_MAX TCA_ROOT_TAB + TCA_ROOT_FLAGS, + TCA_ROOT_COUNT, + __TCA_ROOT_MAX, +#define TCA_ROOT_MAX (__TCA_ROOT_MAX - 1) +}; + #define TA_RTA(r) ((struct rtattr*)(((char*)(r)) + NLMSG_ALIGN(sizeof(struct tcamsg)))) #define TA_PAYLOAD(n) NLMSG_PAYLOAD(n,sizeof(struct tcamsg)) -#define TCA_ACT_TAB 1 /* attr type must be >=1 */ -#define TCAA_MAX 1 +/* tcamsg flags stored in attribute TCA_ROOT_FLAGS + * + * TCA_FLAG_LARGE_DUMP_ON user->kernel to request for larger than TCA_ACT_MAX_PRIO + * actions in a dump. All dump responses will contain the number of actions + * being dumped stored in for user app's consumption in TCA_ROOT_COUNT + * + */ +#define TCA_FLAG_LARGE_DUMP_ON (1 << 0) /* New extended info filters for IFLA_EXT_MASK */ #define RTEXT_FILTER_VF (1 << 0) diff --git a/net/sched/act_api.c b/net/sched/act_api.c index 848370e..15d6c46 100644 --- a/net/sched/act_api.c +++ b/net/sched/act_api.c @@ -110,6 +110,7 @@ static int tcf_dump_walker(struct tcf_hashinfo *hinfo, struct sk_buff *skb, struct netlink_callback *cb) { int err = 0, index = -1, i = 0, s_i = 0, n_i = 0; + u32 act_flags = cb->args[2]; struct nlattr *nest; spin_lock_bh(&hinfo->lock); @@ -138,14 +139,18 @@ static int tcf_dump_walker(struct tcf_hashinfo *hinfo, struct sk_buff *skb, } nla_nest_end(skb, nest); n_i++; - if (n_i >= TCA_ACT_MAX_PRIO) + if (!(act_flags & TCA_FLAG_LARGE_DUMP_ON) && + n_i >= TCA_ACT_MAX_PRIO) goto done; } } done: spin_unlock_bh(&hinfo->lock); - if (n_i) + if (n_i) { cb->args[0] += n_i; + if (act_flags & TCA_FLAG_LARGE_DUMP_ON) + cb->args[1] = n_i; + } return n_i; nla_put_failure: @@ -1068,11 +1073,17 @@ static int tcf_action_add(struct net *net, struct nlattr *nla, return tcf_add_notify(net, n, &actions, portid); } +static u32 tcaa_root_flags_allowed = TCA_FLAG_LARGE_DUMP_ON; +static const struct nla_policy tcaa_policy[TCA_ROOT_MAX + 1] = { + [TCA_ROOT_FLAGS] = { .type = NLA_BITFIELD_32, + .validation_data = &tcaa_root_flags_allowed }, +}; + static int tc_ctl_action(struct sk_buff *skb, struct nlmsghdr *n, struct netlink_ext_ack *extack) { struct net *net = sock_net(skb->sk); - struct nlattr *tca[TCAA_MAX + 1]; + struct nlattr *tca[TCA_ROOT_MAX + 1]; u32 portid = skb ? NETLINK_CB(skb).portid : 0; int ret = 0, ovr = 0; @@ -1080,7 +1091,7 @@ static int tc_ctl_action(struct sk_buff *skb, struct nlmsghdr *n, !netlink_capable(skb, CAP_NET_ADMIN)) return -EPERM; - ret = nlmsg_parse(n, sizeof(struct tcamsg), tca, TCAA_MAX, NULL, + ret = nlmsg_parse(n, sizeof(struct tcamsg), tca, TCA_ROOT_MAX, NULL, extack); if (ret < 0) return ret; @@ -1121,16 +1132,12 @@ static int tc_ctl_action(struct sk_buff *skb, struct nlmsghdr *n, return ret; } -static struct nlattr *find_dump_kind(const struct nlmsghdr *n) +static struct nlattr *find_dump_kind(struct nlattr **nla) { struct nlattr *tb1, *tb2[TCA_ACT_MAX + 1]; struct nlattr *tb[TCA_ACT_MAX_PRIO + 1]; - struct nlattr *nla[TCAA_MAX + 1]; struct nlattr *kind; - if (nlmsg_parse(n, sizeof(struct tcamsg), nla, TCAA_MAX, - NULL, NULL) < 0) - return NULL; tb1 = nla[TCA_ACT_TAB]; if (tb1 == NULL) return NULL; @@ -1157,8 +1164,18 @@ static int tc_dump_action(struct sk_buff *skb, struct netlink_callback *cb) struct tc_action_ops *a_o; int ret = 0; struct tcamsg *t = (struct tcamsg *) nlmsg_data(cb->nlh); - struct nlattr *kind = find_dump_kind(cb->nlh); + struct nla_bitfield_32 fb; + struct nlattr *count_attr = NULL; + struct nlattr *tb[TCA_ROOT_MAX + 1]; + struct nlattr *kind = NULL; + u32 act_count = 0; + + ret = nlmsg_parse(cb->nlh, sizeof(struct tcamsg), tb, TCA_ROOT_MAX, + tcaa_policy, NULL); + if (ret < 0) + return ret; + kind = find_dump_kind(tb); if (kind == NULL) { pr_info("tc_dump_action: action bad kind\n"); return 0; @@ -1168,14 +1185,22 @@ static int tc_dump_action(struct sk_buff *skb, struct netlink_callback *cb) if (a_o == NULL) return 0; + if (tb[TCA_ROOT_FLAGS]) + fb = nla_get_bitfield_32(tb[TCA_ROOT_FLAGS]); + nlh = nlmsg_put(skb, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq, cb->nlh->nlmsg_type, sizeof(*t), 0); if (!nlh) goto out_module_put; + + cb->args[2] = fb.nla_value; t = nlmsg_data(nlh); t->tca_family = AF_UNSPEC; t->tca__pad1 = 0; t->tca__pad2 = 0; + count_attr = nla_reserve(skb, TCA_ROOT_COUNT, sizeof(u32)); + if (!count_attr) + goto out_module_put; nest = nla_nest_start(skb, TCA_ACT_TAB); if (nest == NULL) @@ -1188,6 +1213,9 @@ static int tc_dump_action(struct sk_buff *skb, struct netlink_callback *cb) if (ret > 0) { nla_nest_end(skb, nest); ret = skb->len; + act_count = cb->args[1]; + memcpy(nla_data(count_attr), &act_count, sizeof(u32)); + cb->args[1] = 0; } else nlmsg_trim(skb, b);