diff mbox

[net-next,v12,0/4] net sched actions: improve dump performance

Message ID 466dd890-ffae-4cdb-dc97-d173bd390148@mojatatu.com
State Changes Requested, archived
Delegated to: stephen hemminger
Headers show

Commit Message

Jamal Hadi Salim July 31, 2017, 12:06 p.m. UTC
On 17-07-30 10:28 PM, David Miller wrote:
> 
> Series applied, thanks.
> 

Thanks David.

Attaching the iproute2 patch. I will submit an official one with
man page  changes later. Stephen - you take net-next changes?

cheers,
jamal

Comments

Stephen Hemminger July 31, 2017, 11:43 p.m. UTC | #1
On Mon, 31 Jul 2017 08:06:42 -0400
Jamal Hadi Salim <jhs@mojatatu.com> wrote:

> On 17-07-30 10:28 PM, David Miller wrote:
> > 
> > Series applied, thanks.
> >   
> 
> Thanks David.
> 
> Attaching the iproute2 patch. I will submit an official one with
> man page  changes later. Stephen - you take net-next changes?
> 
> cheers,
> jamal

I will fix this up. The kernel headers for iproute2 come from sanitized
kernel headers (not direct copy).
Stephen Hemminger Aug. 1, 2017, 3:54 a.m. UTC | #2
On Mon, 31 Jul 2017 08:06:42 -0400
Jamal Hadi Salim <jhs@mojatatu.com> wrote:

> On 17-07-30 10:28 PM, David Miller wrote:
> > 
> > Series applied, thanks.
> >   
> 
> Thanks David.
> 
> Attaching the iproute2 patch. I will submit an official one with
> man page  changes later. Stephen - you take net-next changes?
> 
> cheers,
> jamal

Please cleanup and resubmit for net-next.

The header files have been updated in iproute2 net-next branch.

It is not clear to me that the new code is backward compatiable.
Will new versions of tc work on old kernels and vice/versa?


Also, no #ifdef's
Jamal Hadi Salim Aug. 1, 2017, 11:05 a.m. UTC | #3
On 17-07-31 11:54 PM, Stephen Hemminger wrote:
> On Mon, 31 Jul 2017 08:06:42 -0400
> Jamal Hadi Salim <jhs@mojatatu.com> wrote:
> 

[..]
> Please cleanup and resubmit for net-next.
>

Will do.

> The header files have been updated in iproute2 net-next branch.
> 

When does net-next show up? I noticed some changes - example Jiri's
multi-table changes are not in the tree (I believe they were submitted
as part of net-next).

> It is not clear to me that the new code is backward compatiable
> Will new versions of tc work on old kernels and vice/versa?
> 

AFAIK and tested it is.

> 
> Also, no #ifdef's

Those will go away. The intention was to test things which will be
rejected (in case some other app in the future uses this feature).

cheers,
jamal
diff mbox

Patch

diff --git a/include/linux/netlink.h b/include/linux/netlink.h
index 3a53b9a..f4fc9c9 100644
--- a/include/linux/netlink.h
+++ b/include/linux/netlink.h
@@ -1,5 +1,5 @@ 
-#ifndef __LINUX_NETLINK_H
-#define __LINUX_NETLINK_H
+#ifndef _UAPI__LINUX_NETLINK_H
+#define _UAPI__LINUX_NETLINK_H
 
 #include <linux/kernel.h>
 #include <linux/socket.h> /* for __kernel_sa_family_t */
@@ -143,8 +143,10 @@  enum nlmsgerr_attrs {
 #define NETLINK_PKTINFO			3
 #define NETLINK_BROADCAST_ERROR		4
 #define NETLINK_NO_ENOBUFS		5
+#ifndef __KERNEL__
 #define NETLINK_RX_RING			6
 #define NETLINK_TX_RING			7
+#endif
 #define NETLINK_LISTEN_ALL_NSID		8
 #define NETLINK_LIST_MEMBERSHIPS	9
 #define NETLINK_CAP_ACK			10
@@ -171,6 +173,7 @@  struct nl_mmap_hdr {
 	__u32		nm_gid;
 };
 
+#ifndef __KERNEL__
 enum nl_mmap_status {
 	NL_MMAP_STATUS_UNUSED,
 	NL_MMAP_STATUS_RESERVED,
@@ -182,6 +185,7 @@  enum nl_mmap_status {
 #define NL_MMAP_MSG_ALIGNMENT		NLMSG_ALIGNTO
 #define NL_MMAP_MSG_ALIGN(sz)		__ALIGN_KERNEL(sz, NL_MMAP_MSG_ALIGNMENT)
 #define NL_MMAP_HDRLEN			NL_MMAP_MSG_ALIGN(sizeof(struct nl_mmap_hdr))
+#endif
 
 #define NET_MAJOR 36		/* Major 36 is reserved for networking 						*/
 
@@ -222,5 +226,22 @@  struct nlattr {
 #define NLA_ALIGN(len)		(((len) + NLA_ALIGNTO - 1) & ~(NLA_ALIGNTO - 1))
 #define NLA_HDRLEN		((int) NLA_ALIGN(sizeof(struct nlattr)))
 
+/* Generic 32 bitflags attribute content sent to the kernel.
+ *
+ * The value is a bitmap that defines the values being set
+ * The selector is a bitmask that defines which value is legit
+ *
+ * Examples:
+ *  value = 0x0, and selector = 0x1
+ *  implies we are selecting bit 1 and we want to set its value to 0.
+ *
+ *  value = 0x2, and selector = 0x2
+ *  implies we are selecting bit 2 and we want to set its value to 1.
+ *
+ */
+struct nla_bitfield32 {
+	__u32 value;
+	__u32 selector;
+};
 
-#endif /* __LINUX_NETLINK_H */
+#endif /* _UAPI__LINUX_NETLINK_H */
diff --git a/include/linux/rtnetlink.h b/include/linux/rtnetlink.h
index 1d62dad..dab7dad 100644
--- a/include/linux/rtnetlink.h
+++ b/include/linux/rtnetlink.h
@@ -1,5 +1,5 @@ 
-#ifndef __LINUX_RTNETLINK_H
-#define __LINUX_RTNETLINK_H
+#ifndef _UAPI__LINUX_RTNETLINK_H
+#define _UAPI__LINUX_RTNETLINK_H
 
 #include <linux/types.h>
 #include <linux/netlink.h>
@@ -586,6 +586,7 @@  enum {
 
 #define NDUSEROPT_MAX	(__NDUSEROPT_MAX - 1)
 
+#ifndef __KERNEL__
 /* RTnetlink multicast groups - backwards compatibility for userspace */
 #define RTMGRP_LINK		1
 #define RTMGRP_NOTIFY		2
@@ -606,6 +607,7 @@  enum {
 #define RTMGRP_DECnet_ROUTE     0x4000
 
 #define RTMGRP_IPV6_PREFIX	0x20000
+#endif
 
 /* RTnetlink multicast groups */
 enum rtnetlink_groups {
@@ -681,10 +683,29 @@  struct tcamsg {
 	unsigned char	tca__pad1;
 	unsigned short	tca__pad2;
 };
+
+enum {
+	TCA_ROOT_UNSPEC,
+	TCA_ROOT_TAB,
+#define TCA_ACT_TAB TCA_ROOT_TAB
+#define TCAA_MAX TCA_ROOT_TAB
+	TCA_ROOT_FLAGS,
+	TCA_ROOT_COUNT,
+	TCA_ROOT_TIME_DELTA, /* in msecs */
+	__TCA_ROOT_MAX,
+#define	TCA_ROOT_MAX (__TCA_ROOT_MAX - 1)
+};
+
 #define TA_RTA(r)  ((struct rtattr*)(((char*)(r)) + NLMSG_ALIGN(sizeof(struct tcamsg))))
 #define TA_PAYLOAD(n) NLMSG_PAYLOAD(n,sizeof(struct tcamsg))
-#define TCA_ACT_TAB 1 /* attr type must be >=1 */	
-#define TCAA_MAX 1
+/* tcamsg flags stored in attribute TCA_ROOT_FLAGS
+ *
+ * TCA_FLAG_LARGE_DUMP_ON user->kernel to request for larger than TCA_ACT_MAX_PRIO
+ * actions in a dump. All dump responses will contain the number of actions
+ * being dumped stored in for user app's consumption in TCA_ROOT_COUNT
+ *
+ */
+#define TCA_FLAG_LARGE_DUMP_ON		(1 << 0)
 
 /* New extended info filters for IFLA_EXT_MASK */
 #define RTEXT_FILTER_VF		(1 << 0)
@@ -696,4 +717,4 @@  struct tcamsg {
 
 
 
-#endif /* __LINUX_RTNETLINK_H */
+#endif /* _UAPI__LINUX_RTNETLINK_H */
diff --git a/tc/f_basic.c b/tc/f_basic.c
index d663668..8370ea6 100644
--- a/tc/f_basic.c
+++ b/tc/f_basic.c
@@ -135,7 +135,7 @@  static int basic_print_opt(struct filter_util *qu, FILE *f,
 	}
 
 	if (tb[TCA_BASIC_ACT]) {
-		tc_print_action(f, tb[TCA_BASIC_ACT]);
+		tc_print_action(f, tb[TCA_BASIC_ACT], 0);
 	}
 
 	return 0;
diff --git a/tc/f_bpf.c b/tc/f_bpf.c
index 2f8d12a..c115409 100644
--- a/tc/f_bpf.c
+++ b/tc/f_bpf.c
@@ -239,7 +239,7 @@  static int bpf_print_opt(struct filter_util *qu, FILE *f,
 	}
 
 	if (tb[TCA_BPF_ACT])
-		tc_print_action(f, tb[TCA_BPF_ACT]);
+		tc_print_action(f, tb[TCA_BPF_ACT], 0);
 
 	return 0;
 }
diff --git a/tc/f_cgroup.c b/tc/f_cgroup.c
index ecf9909..633700e 100644
--- a/tc/f_cgroup.c
+++ b/tc/f_cgroup.c
@@ -102,7 +102,7 @@  static int cgroup_print_opt(struct filter_util *qu, FILE *f,
 	}
 
 	if (tb[TCA_CGROUP_ACT])
-		tc_print_action(f, tb[TCA_CGROUP_ACT]);
+		tc_print_action(f, tb[TCA_CGROUP_ACT], 0);
 
 	return 0;
 }
diff --git a/tc/f_flow.c b/tc/f_flow.c
index 09ddcaa..b157104 100644
--- a/tc/f_flow.c
+++ b/tc/f_flow.c
@@ -347,7 +347,7 @@  static int flow_print_opt(struct filter_util *fu, FILE *f, struct rtattr *opt,
 		tc_print_police(f, tb[TCA_FLOW_POLICE]);
 	if (tb[TCA_FLOW_ACT]) {
 		fprintf(f, "\n");
-		tc_print_action(f, tb[TCA_FLOW_ACT]);
+		tc_print_action(f, tb[TCA_FLOW_ACT], 0);
 	}
 	return 0;
 }
diff --git a/tc/f_flower.c b/tc/f_flower.c
index 5be693a..934832e 100644
--- a/tc/f_flower.c
+++ b/tc/f_flower.c
@@ -1316,7 +1316,7 @@  static int flower_print_opt(struct filter_util *qu, FILE *f,
 	}
 
 	if (tb[TCA_FLOWER_ACT])
-		tc_print_action(f, tb[TCA_FLOWER_ACT]);
+		tc_print_action(f, tb[TCA_FLOWER_ACT], 0);
 
 	return 0;
 }
diff --git a/tc/f_fw.c b/tc/f_fw.c
index 790bef9..c39789b 100644
--- a/tc/f_fw.c
+++ b/tc/f_fw.c
@@ -160,7 +160,7 @@  static int fw_print_opt(struct filter_util *qu, FILE *f, struct rtattr *opt, __u
 
 	if (tb[TCA_FW_ACT]) {
 		fprintf(f, "\n");
-		tc_print_action(f, tb[TCA_FW_ACT]);
+		tc_print_action(f, tb[TCA_FW_ACT], 0);
 	}
 	return 0;
 }
diff --git a/tc/f_matchall.c b/tc/f_matchall.c
index 5a51e75..d78660e 100644
--- a/tc/f_matchall.c
+++ b/tc/f_matchall.c
@@ -145,7 +145,7 @@  static int matchall_print_opt(struct filter_util *qu, FILE *f,
 	}
 
 	if (tb[TCA_MATCHALL_ACT])
-		tc_print_action(f, tb[TCA_MATCHALL_ACT]);
+		tc_print_action(f, tb[TCA_MATCHALL_ACT], 0);
 
 	return 0;
 }
diff --git a/tc/f_route.c b/tc/f_route.c
index 30514c4..e88313f 100644
--- a/tc/f_route.c
+++ b/tc/f_route.c
@@ -168,7 +168,7 @@  static int route_print_opt(struct filter_util *qu, FILE *f, struct rtattr *opt,
 	if (tb[TCA_ROUTE4_POLICE])
 		tc_print_police(f, tb[TCA_ROUTE4_POLICE]);
 	if (tb[TCA_ROUTE4_ACT])
-		tc_print_action(f, tb[TCA_ROUTE4_ACT]);
+		tc_print_action(f, tb[TCA_ROUTE4_ACT], 0);
 	return 0;
 }
 
diff --git a/tc/f_rsvp.c b/tc/f_rsvp.c
index 94bfbef..65caeb4 100644
--- a/tc/f_rsvp.c
+++ b/tc/f_rsvp.c
@@ -402,7 +402,7 @@  static int rsvp_print_opt(struct filter_util *qu, FILE *f, struct rtattr *opt, _
 	}
 
 	if (tb[TCA_RSVP_ACT]) {
-		tc_print_action(f, tb[TCA_RSVP_ACT]);
+		tc_print_action(f, tb[TCA_RSVP_ACT], 0);
 	}
 	if (tb[TCA_RSVP_POLICE])
 		tc_print_police(f, tb[TCA_RSVP_POLICE]);
diff --git a/tc/f_tcindex.c b/tc/f_tcindex.c
index 784c890..dd1cb47 100644
--- a/tc/f_tcindex.c
+++ b/tc/f_tcindex.c
@@ -173,7 +173,7 @@  static int tcindex_print_opt(struct filter_util *qu, FILE *f,
 	}
 	if (tb[TCA_TCINDEX_ACT]) {
 		fprintf(f, "\n");
-		tc_print_action(f, tb[TCA_TCINDEX_ACT]);
+		tc_print_action(f, tb[TCA_TCINDEX_ACT], 0);
 	}
 	return 0;
 }
diff --git a/tc/f_u32.c b/tc/f_u32.c
index b272c2c..5815be9 100644
--- a/tc/f_u32.c
+++ b/tc/f_u32.c
@@ -1337,7 +1337,7 @@  static int u32_print_opt(struct filter_util *qu, FILE *f, struct rtattr *opt,
 	}
 
 	if (tb[TCA_U32_ACT])
-		tc_print_action(f, tb[TCA_U32_ACT]);
+		tc_print_action(f, tb[TCA_U32_ACT], 0);
 
 	return 0;
 }
diff --git a/tc/m_action.c b/tc/m_action.c
index 6ebe85e..123295c 100644
--- a/tc/m_action.c
+++ b/tc/m_action.c
@@ -346,21 +346,24 @@  tc_print_action_flush(FILE *f, const struct rtattr *arg)
 }
 
 int
-tc_print_action(FILE *f, const struct rtattr *arg)
+tc_print_action(FILE *f, const struct rtattr *arg, unsigned short tot_acts)
 {
 
 	int i;
-	struct rtattr *tb[TCA_ACT_MAX_PRIO + 1];
 
 	if (arg == NULL)
 		return 0;
 
-	parse_rtattr_nested(tb, TCA_ACT_MAX_PRIO, arg);
+	if (!tot_acts)
+		tot_acts = TCA_ACT_MAX_PRIO;
+
+	struct rtattr *tb[tot_acts + 1];
+	parse_rtattr_nested(tb, tot_acts, arg);
 
 	if (tab_flush && NULL != tb[0]  && NULL == tb[1])
 		return tc_print_action_flush(f, tb[0]);
 
-	for (i = 0; i < TCA_ACT_MAX_PRIO; i++) {
+	for (i = 0; i < tot_acts; i++) {
 		if (tb[i]) {
 			fprintf(f, "\n\taction order %d: ", i);
 			if (tc_print_one_action(f, tb[i]) < 0) {
@@ -380,7 +383,8 @@  int print_action(const struct sockaddr_nl *who,
 	FILE *fp = (FILE *)arg;
 	struct tcamsg *t = NLMSG_DATA(n);
 	int len = n->nlmsg_len;
-	struct rtattr *tb[TCAA_MAX+1];
+	__u32 *tot_acts = NULL;
+	struct rtattr *tb[TCA_ROOT_MAX+1];
 
 	len -= NLMSG_LENGTH(sizeof(*t));
 
@@ -389,8 +393,12 @@  int print_action(const struct sockaddr_nl *who,
 		return -1;
 	}
 
-	parse_rtattr(tb, TCAA_MAX, TA_RTA(t), len);
+	parse_rtattr(tb, TCA_ROOT_MAX, TA_RTA(t), len);
+
+	if (tb[TCA_ROOT_COUNT])
+		tot_acts = RTA_DATA(tb[TCA_ROOT_COUNT]);
 
+	fprintf(fp, "total acts %d \n", tot_acts?*tot_acts:0);
 	if (tb[TCA_ACT_TAB] == NULL) {
 		if (n->nlmsg_type != RTM_GETACTION)
 			fprintf(stderr, "print_action: NULL kind\n");
@@ -414,7 +422,9 @@  int print_action(const struct sockaddr_nl *who,
 			fprintf(fp, "Replaced action ");
 		}
 	}
-	tc_print_action(fp, tb[TCA_ACT_TAB]);
+
+
+	tc_print_action(fp, tb[TCA_ACT_TAB], tot_acts?*tot_acts:0);
 
 	return 0;
 }
@@ -427,7 +437,7 @@  static int tc_action_gd(int cmd, unsigned int flags, int *argc_p, char ***argv_p
 	char **argv = *argv_p;
 	int prio = 0;
 	int ret = 0;
-	__u32 i;
+	__u32 i = 0;
 	struct rtattr *tail;
 	struct rtattr *tail2;
 	struct nlmsghdr *ans = NULL;
@@ -498,7 +508,8 @@  static int tc_action_gd(int cmd, unsigned int flags, int *argc_p, char ***argv_p
 		tail2 = NLMSG_TAIL(&req.n);
 		addattr_l(&req.n, MAX_MSG, ++prio, NULL, 0);
 		addattr_l(&req.n, MAX_MSG, TCA_ACT_KIND, k, strlen(k) + 1);
-		addattr32(&req.n, MAX_MSG, TCA_ACT_INDEX, i);
+		if (i > 0)
+			addattr32(&req.n, MAX_MSG, TCA_ACT_INDEX, i);
 		tail2->rta_len = (void *) NLMSG_TAIL(&req.n) - (void *) tail2;
 
 	}
@@ -561,12 +572,16 @@  static int tc_action_modify(int cmd, unsigned int flags, int *argc_p, char ***ar
 	return ret;
 }
 
-static int tc_act_list_or_flush(int argc, char **argv, int event)
+static int tc_act_list_or_flush(int *argc_p, char ***argv_p, int event)
 {
+	struct rtattr *tail, *tail2, *tail3, *tail4;
 	int ret = 0, prio = 0, msg_size = 0;
-	char k[16];
-	struct rtattr *tail, *tail2;
 	struct action_util *a = NULL;
+	struct nla_bitfield32 flag_select = { 0 };
+	char **argv = *argv_p;
+	__u32 msec_since = 0;
+	int argc = *argc_p;
+	char k[16];
 	struct {
 		struct nlmsghdr         n;
 		struct tcamsg           t;
@@ -597,11 +612,40 @@  static int tc_act_list_or_flush(int argc, char **argv, int event)
 	}
 	strncpy(k, *argv, sizeof(k) - 1);
 
+	argc -= 1;
+	argv += 1;
+
+	if (argc && (strcmp(*argv, "since") == 0)) {
+	    NEXT_ARG();
+	    if (get_u32(&msec_since, *argv, 0))
+		    invarg("dump time \"since\" is invalid", *argv);
+	}
+
 	addattr_l(&req.n, MAX_MSG, ++prio, NULL, 0);
 	addattr_l(&req.n, MAX_MSG, TCA_ACT_KIND, k, strlen(k) + 1);
 	tail2->rta_len = (void *) NLMSG_TAIL(&req.n) - (void *) tail2;
 	tail->rta_len = (void *) NLMSG_TAIL(&req.n) - (void *) tail;
 
+	tail3 = NLMSG_TAIL(&req.n);
+#if 1
+	flag_select.value |= TCA_FLAG_LARGE_DUMP_ON;
+	flag_select.selector |= TCA_FLAG_LARGE_DUMP_ON;
+#endif
+#if 0
+	flag_select.value |= 8; /* test rejection */
+	flag_select.selector |= 8; /* test rejection */
+	flag_select.value = 0; /* test rejection */
+	flag_select.selector |= TCA_FLAG_LARGE_DUMP_ON; /* test rejection */
+#endif
+	addattr_l(&req.n, MAX_MSG, TCA_ROOT_FLAGS, &flag_select,
+		  sizeof(struct nla_bitfield32));
+	tail3->rta_len = (void *) NLMSG_TAIL(&req.n) - (void *) tail3;
+	if (msec_since) {
+		fprintf(stderr, "XXX: since %d\n", msec_since);
+		tail4 = NLMSG_TAIL(&req.n);
+		addattr32(&req.n, MAX_MSG, TCA_ROOT_TIME_DELTA, msec_since);
+		tail4->rta_len = (void *) NLMSG_TAIL(&req.n) - (void *) tail4;
+	}
 	msg_size = NLMSG_ALIGN(req.n.nlmsg_len) - NLMSG_ALIGN(sizeof(struct nlmsghdr));
 
 	if (event == RTM_GETACTION) {
@@ -626,6 +670,8 @@  static int tc_act_list_or_flush(int argc, char **argv, int event)
 
 bad_val:
 
+	*argc_p = argc;
+	*argv_p = argv;
 	return ret;
 }
 
@@ -655,13 +701,21 @@  int do_action(int argc, char **argv)
 				act_usage();
 				return -1;
 			}
-			return tc_act_list_or_flush(argc-2, argv+2, RTM_GETACTION);
+
+			argc -= 2;
+			argv += 2;
+			return tc_act_list_or_flush(&argc, &argv,
+						    RTM_GETACTION);
 		} else if (matches(*argv, "flush") == 0) {
 			if (argc <= 2) {
 				act_usage();
 				return -1;
 			}
-			return tc_act_list_or_flush(argc-2, argv+2, RTM_DELACTION);
+
+			argc -= 2;
+			argv += 2;
+			return tc_act_list_or_flush(&argc, &argv,
+						    RTM_DELACTION);
 		} else if (matches(*argv, "help") == 0) {
 			act_usage();
 			return -1;
diff --git a/tc/tc_util.h b/tc/tc_util.h
index 5c54ad3..583a21a 100644
--- a/tc/tc_util.h
+++ b/tc/tc_util.h
@@ -113,7 +113,7 @@  int act_parse_police(struct action_util *a, int *argc_p,
 		     char ***argv_p, int tca_id, struct nlmsghdr *n);
 int print_police(struct action_util *a, FILE *f, struct rtattr *tb);
 int police_print_xstats(struct action_util *a, FILE *f, struct rtattr *tb);
-int tc_print_action(FILE *f, const struct rtattr *tb);
+int tc_print_action(FILE *f, const struct rtattr *tb, unsigned short tot_acts);
 int tc_print_ipt(FILE *f, const struct rtattr *tb);
 int parse_action(int *argc_p, char ***argv_p, int tca_id, struct nlmsghdr *n);
 void print_tm(FILE *f, const struct tcf_t *tm);