diff mbox

[net-next,v10,0/4] net sched actions: improve dump performance

Message ID c7af9475-ac29-6860-098f-43b225af89eb@mojatatu.com
State Changes Requested, archived
Delegated to: David Miller
Headers show

Commit Message

Jamal Hadi Salim June 11, 2017, 12:06 p.m. UTC
Attached iproute2 used for testing these features..

cheers,
jamal
diff mbox

Patch

diff --git a/Makefile b/Makefile
index 18de7dc..ed3c10a 100644
--- a/Makefile
+++ b/Makefile
@@ -45,7 +45,7 @@  HOSTCC ?= $(CC)
 DEFINES += -D_GNU_SOURCE
 # Turn on transparent support for LFS
 DEFINES += -D_FILE_OFFSET_BITS=64 -D_LARGEFILE_SOURCE -D_LARGEFILE64_SOURCE
-CCOPTS = -O2
+CCOPTS = -g
 WFLAGS := -Wall -Wstrict-prototypes  -Wmissing-prototypes
 WFLAGS += -Wmissing-declarations -Wold-style-definition -Wformat=2
 
diff --git a/include/linux/rtnetlink.h b/include/linux/rtnetlink.h
index a96db83..988ebc2 100644
--- a/include/linux/rtnetlink.h
+++ b/include/linux/rtnetlink.h
@@ -1,5 +1,5 @@ 
-#ifndef __LINUX_RTNETLINK_H
-#define __LINUX_RTNETLINK_H
+#ifndef _UAPI__LINUX_RTNETLINK_H
+#define _UAPI__LINUX_RTNETLINK_H
 
 #include <linux/types.h>
 #include <linux/netlink.h>
@@ -179,6 +179,23 @@  struct rtattr {
 #define RTA_DATA(rta)   ((void*)(((char*)(rta)) + RTA_LENGTH(0)))
 #define RTA_PAYLOAD(rta) ((int)((rta)->rta_len) - RTA_LENGTH(0))
 
+/* Generic bitflags attribute content sent to the kernel.
+ *
+ * The nla_flag_values is a bitmap that defines the values being set
+ * The nla_flag_selector is a bitmask that defines which value is legit
+ *
+ * Examples:
+ *  nla_flag_values = 0x0, and nla_flag_selector = 0x1
+ *  implies we are selecting bit 1 and we want to set its value to 0.
+ *
+ *  nla_flag_values = 0x2, and nla_flag_selector = 0x2
+ *  implies we are selecting bit 2 and we want to set its value to 1.
+ *
+ */
+struct __nla_bit_flags {
+	__u32 nla_flag_values;
+	__u32 nla_flag_selector;
+};
 
 
 
@@ -278,6 +295,7 @@  enum rt_scope_t {
 #define RTM_F_EQUALIZE		0x400	/* Multipath equalizer: NI	*/
 #define RTM_F_PREFIX		0x800	/* Prefix addresses		*/
 #define RTM_F_LOOKUP_TABLE	0x1000	/* set rtm_table to FIB lookup result */
+#define RTM_F_FIB_MATCH	        0x2000	/* return full fib lookup match */
 
 /* Reserved table identifiers */
 
@@ -549,6 +567,7 @@  enum {
 	TCA_STAB,
 	TCA_PAD,
 	TCA_DUMP_INVISIBLE,
+	TCA_CHAIN,
 	__TCA_MAX
 };
 
@@ -581,6 +600,7 @@  enum {
 
 #define NDUSEROPT_MAX	(__NDUSEROPT_MAX - 1)
 
+#ifndef __KERNEL__
 /* RTnetlink multicast groups - backwards compatibility for userspace */
 #define RTMGRP_LINK		1
 #define RTMGRP_NOTIFY		2
@@ -601,6 +621,7 @@  enum {
 #define RTMGRP_DECnet_ROUTE     0x4000
 
 #define RTMGRP_IPV6_PREFIX	0x20000
+#endif
 
 /* RTnetlink multicast groups */
 enum rtnetlink_groups {
@@ -672,10 +693,29 @@  struct tcamsg {
 	unsigned char	tca__pad1;
 	unsigned short	tca__pad2;
 };
+
+enum {
+	TCA_ROOT_UNSPEC,
+	TCA_ROOT_TAB,
+#define TCA_ACT_TAB TCA_ROOT_TAB
+#define TCAA_MAX TCA_ROOT_TAB
+	TCA_ROOT_FLAGS,
+	TCA_ROOT_COUNT,
+	TCA_ROOT_TIME_DELTA, /* in msecs */
+	__TCA_ROOT_MAX,
+#define	TCA_ROOT_MAX (__TCA_ROOT_MAX - 1)
+};
+
 #define TA_RTA(r)  ((struct rtattr*)(((char*)(r)) + NLMSG_ALIGN(sizeof(struct tcamsg))))
 #define TA_PAYLOAD(n) NLMSG_PAYLOAD(n,sizeof(struct tcamsg))
-#define TCA_ACT_TAB 1 /* attr type must be >=1 */	
-#define TCAA_MAX 1
+/* tcamsg flags stored in attribute TCA_ROOT_FLAGS
+ *
+ * TCA_FLAG_LARGE_DUMP_ON user->kernel to request for larger than TCA_ACT_MAX_PRIO
+ * actions in a dump. All dump responses will contain the number of actions
+ * being dumped stored in for user app's consumption in TCA_ROOT_COUNT
+ *
+ */
+#define TCA_FLAG_LARGE_DUMP_ON		(1 << 0)
 
 /* New extended info filters for IFLA_EXT_MASK */
 #define RTEXT_FILTER_VF		(1 << 0)
@@ -687,4 +727,4 @@  struct tcamsg {
 
 
 
-#endif /* __LINUX_RTNETLINK_H */
+#endif /* _UAPI__LINUX_RTNETLINK_H */
diff --git a/tc/f_basic.c b/tc/f_basic.c
index d663668..8370ea6 100644
--- a/tc/f_basic.c
+++ b/tc/f_basic.c
@@ -135,7 +135,7 @@  static int basic_print_opt(struct filter_util *qu, FILE *f,
 	}
 
 	if (tb[TCA_BASIC_ACT]) {
-		tc_print_action(f, tb[TCA_BASIC_ACT]);
+		tc_print_action(f, tb[TCA_BASIC_ACT], 0);
 	}
 
 	return 0;
diff --git a/tc/f_bpf.c b/tc/f_bpf.c
index 75c44c0..6581293 100644
--- a/tc/f_bpf.c
+++ b/tc/f_bpf.c
@@ -236,7 +236,7 @@  static int bpf_print_opt(struct filter_util *qu, FILE *f,
 	}
 
 	if (tb[TCA_BPF_ACT])
-		tc_print_action(f, tb[TCA_BPF_ACT]);
+		tc_print_action(f, tb[TCA_BPF_ACT], 0);
 
 	return 0;
 }
diff --git a/tc/f_cgroup.c b/tc/f_cgroup.c
index ecf9909..633700e 100644
--- a/tc/f_cgroup.c
+++ b/tc/f_cgroup.c
@@ -102,7 +102,7 @@  static int cgroup_print_opt(struct filter_util *qu, FILE *f,
 	}
 
 	if (tb[TCA_CGROUP_ACT])
-		tc_print_action(f, tb[TCA_CGROUP_ACT]);
+		tc_print_action(f, tb[TCA_CGROUP_ACT], 0);
 
 	return 0;
 }
diff --git a/tc/f_flow.c b/tc/f_flow.c
index 09ddcaa..b157104 100644
--- a/tc/f_flow.c
+++ b/tc/f_flow.c
@@ -347,7 +347,7 @@  static int flow_print_opt(struct filter_util *fu, FILE *f, struct rtattr *opt,
 		tc_print_police(f, tb[TCA_FLOW_POLICE]);
 	if (tb[TCA_FLOW_ACT]) {
 		fprintf(f, "\n");
-		tc_print_action(f, tb[TCA_FLOW_ACT]);
+		tc_print_action(f, tb[TCA_FLOW_ACT], 0);
 	}
 	return 0;
 }
diff --git a/tc/f_flower.c b/tc/f_flower.c
index ebc63ca..d015ade 100644
--- a/tc/f_flower.c
+++ b/tc/f_flower.c
@@ -1179,7 +1179,7 @@  static int flower_print_opt(struct filter_util *qu, FILE *f,
 	}
 
 	if (tb[TCA_FLOWER_ACT])
-		tc_print_action(f, tb[TCA_FLOWER_ACT]);
+		tc_print_action(f, tb[TCA_FLOWER_ACT], 0);
 
 	return 0;
 }
diff --git a/tc/f_fw.c b/tc/f_fw.c
index 790bef9..c39789b 100644
--- a/tc/f_fw.c
+++ b/tc/f_fw.c
@@ -160,7 +160,7 @@  static int fw_print_opt(struct filter_util *qu, FILE *f, struct rtattr *opt, __u
 
 	if (tb[TCA_FW_ACT]) {
 		fprintf(f, "\n");
-		tc_print_action(f, tb[TCA_FW_ACT]);
+		tc_print_action(f, tb[TCA_FW_ACT], 0);
 	}
 	return 0;
 }
diff --git a/tc/f_matchall.c b/tc/f_matchall.c
index 5a51e75..d78660e 100644
--- a/tc/f_matchall.c
+++ b/tc/f_matchall.c
@@ -145,7 +145,7 @@  static int matchall_print_opt(struct filter_util *qu, FILE *f,
 	}
 
 	if (tb[TCA_MATCHALL_ACT])
-		tc_print_action(f, tb[TCA_MATCHALL_ACT]);
+		tc_print_action(f, tb[TCA_MATCHALL_ACT], 0);
 
 	return 0;
 }
diff --git a/tc/f_route.c b/tc/f_route.c
index 30514c4..e88313f 100644
--- a/tc/f_route.c
+++ b/tc/f_route.c
@@ -168,7 +168,7 @@  static int route_print_opt(struct filter_util *qu, FILE *f, struct rtattr *opt,
 	if (tb[TCA_ROUTE4_POLICE])
 		tc_print_police(f, tb[TCA_ROUTE4_POLICE]);
 	if (tb[TCA_ROUTE4_ACT])
-		tc_print_action(f, tb[TCA_ROUTE4_ACT]);
+		tc_print_action(f, tb[TCA_ROUTE4_ACT], 0);
 	return 0;
 }
 
diff --git a/tc/f_rsvp.c b/tc/f_rsvp.c
index 94bfbef..65caeb4 100644
--- a/tc/f_rsvp.c
+++ b/tc/f_rsvp.c
@@ -402,7 +402,7 @@  static int rsvp_print_opt(struct filter_util *qu, FILE *f, struct rtattr *opt, _
 	}
 
 	if (tb[TCA_RSVP_ACT]) {
-		tc_print_action(f, tb[TCA_RSVP_ACT]);
+		tc_print_action(f, tb[TCA_RSVP_ACT], 0);
 	}
 	if (tb[TCA_RSVP_POLICE])
 		tc_print_police(f, tb[TCA_RSVP_POLICE]);
diff --git a/tc/f_tcindex.c b/tc/f_tcindex.c
index 784c890..dd1cb47 100644
--- a/tc/f_tcindex.c
+++ b/tc/f_tcindex.c
@@ -173,7 +173,7 @@  static int tcindex_print_opt(struct filter_util *qu, FILE *f,
 	}
 	if (tb[TCA_TCINDEX_ACT]) {
 		fprintf(f, "\n");
-		tc_print_action(f, tb[TCA_TCINDEX_ACT]);
+		tc_print_action(f, tb[TCA_TCINDEX_ACT], 0);
 	}
 	return 0;
 }
diff --git a/tc/f_u32.c b/tc/f_u32.c
index ff700e9..4abb2fa 100644
--- a/tc/f_u32.c
+++ b/tc/f_u32.c
@@ -1337,7 +1337,7 @@  static int u32_print_opt(struct filter_util *qu, FILE *f, struct rtattr *opt,
 	}
 
 	if (tb[TCA_U32_ACT])
-		tc_print_action(f, tb[TCA_U32_ACT]);
+		tc_print_action(f, tb[TCA_U32_ACT], 0);
 
 	return 0;
 }
diff --git a/tc/m_action.c b/tc/m_action.c
index 6ebe85e..88a377c 100644
--- a/tc/m_action.c
+++ b/tc/m_action.c
@@ -346,21 +346,24 @@  tc_print_action_flush(FILE *f, const struct rtattr *arg)
 }
 
 int
-tc_print_action(FILE *f, const struct rtattr *arg)
+tc_print_action(FILE *f, const struct rtattr *arg, unsigned short tot_acts)
 {
 
 	int i;
-	struct rtattr *tb[TCA_ACT_MAX_PRIO + 1];
 
 	if (arg == NULL)
 		return 0;
 
-	parse_rtattr_nested(tb, TCA_ACT_MAX_PRIO, arg);
+	if (!tot_acts)
+		tot_acts = TCA_ACT_MAX_PRIO;
+
+	struct rtattr *tb[tot_acts + 1];
+	parse_rtattr_nested(tb, tot_acts, arg);
 
 	if (tab_flush && NULL != tb[0]  && NULL == tb[1])
 		return tc_print_action_flush(f, tb[0]);
 
-	for (i = 0; i < TCA_ACT_MAX_PRIO; i++) {
+	for (i = 0; i < tot_acts; i++) {
 		if (tb[i]) {
 			fprintf(f, "\n\taction order %d: ", i);
 			if (tc_print_one_action(f, tb[i]) < 0) {
@@ -380,7 +383,8 @@  int print_action(const struct sockaddr_nl *who,
 	FILE *fp = (FILE *)arg;
 	struct tcamsg *t = NLMSG_DATA(n);
 	int len = n->nlmsg_len;
-	struct rtattr *tb[TCAA_MAX+1];
+	__u32 *tot_acts = NULL;
+	struct rtattr *tb[TCA_ROOT_MAX+1];
 
 	len -= NLMSG_LENGTH(sizeof(*t));
 
@@ -389,8 +393,12 @@  int print_action(const struct sockaddr_nl *who,
 		return -1;
 	}
 
-	parse_rtattr(tb, TCAA_MAX, TA_RTA(t), len);
+	parse_rtattr(tb, TCA_ROOT_MAX, TA_RTA(t), len);
+
+	if (tb[TCA_ROOT_COUNT])
+		tot_acts = RTA_DATA(tb[TCA_ROOT_COUNT]);
 
+	fprintf(fp, "total acts %d \n", tot_acts?*tot_acts:0);
 	if (tb[TCA_ACT_TAB] == NULL) {
 		if (n->nlmsg_type != RTM_GETACTION)
 			fprintf(stderr, "print_action: NULL kind\n");
@@ -414,7 +422,9 @@  int print_action(const struct sockaddr_nl *who,
 			fprintf(fp, "Replaced action ");
 		}
 	}
-	tc_print_action(fp, tb[TCA_ACT_TAB]);
+
+
+	tc_print_action(fp, tb[TCA_ACT_TAB], tot_acts?*tot_acts:0);
 
 	return 0;
 }
@@ -427,7 +437,7 @@  static int tc_action_gd(int cmd, unsigned int flags, int *argc_p, char ***argv_p
 	char **argv = *argv_p;
 	int prio = 0;
 	int ret = 0;
-	__u32 i;
+	__u32 i = 0;
 	struct rtattr *tail;
 	struct rtattr *tail2;
 	struct nlmsghdr *ans = NULL;
@@ -498,7 +508,8 @@  static int tc_action_gd(int cmd, unsigned int flags, int *argc_p, char ***argv_p
 		tail2 = NLMSG_TAIL(&req.n);
 		addattr_l(&req.n, MAX_MSG, ++prio, NULL, 0);
 		addattr_l(&req.n, MAX_MSG, TCA_ACT_KIND, k, strlen(k) + 1);
-		addattr32(&req.n, MAX_MSG, TCA_ACT_INDEX, i);
+		if (i > 0)
+			addattr32(&req.n, MAX_MSG, TCA_ACT_INDEX, i);
 		tail2->rta_len = (void *) NLMSG_TAIL(&req.n) - (void *) tail2;
 
 	}
@@ -561,12 +572,16 @@  static int tc_action_modify(int cmd, unsigned int flags, int *argc_p, char ***ar
 	return ret;
 }
 
-static int tc_act_list_or_flush(int argc, char **argv, int event)
+static int tc_act_list_or_flush(int *argc_p, char ***argv_p, int event)
 {
+	struct rtattr *tail, *tail2, *tail3, *tail4;
 	int ret = 0, prio = 0, msg_size = 0;
-	char k[16];
-	struct rtattr *tail, *tail2;
 	struct action_util *a = NULL;
+	struct __nla_bit_flags flag_select = { 0 };
+	char **argv = *argv_p;
+	__u32 msec_since = 0;
+	int argc = *argc_p;
+	char k[16];
 	struct {
 		struct nlmsghdr         n;
 		struct tcamsg           t;
@@ -597,11 +612,38 @@  static int tc_act_list_or_flush(int argc, char **argv, int event)
 	}
 	strncpy(k, *argv, sizeof(k) - 1);
 
+	argc -= 1;
+	argv += 1;
+
+	fprintf(stderr, "%d: <%s>\n", argc, *argv);
+
+	if (argc && (strcmp(*argv, "since") == 0)) {
+	    NEXT_ARG();
+	    if (get_u32(&msec_since, *argv, 0))
+		    invarg("dump time \"since\" is invalid", *argv);
+	}
+
 	addattr_l(&req.n, MAX_MSG, ++prio, NULL, 0);
 	addattr_l(&req.n, MAX_MSG, TCA_ACT_KIND, k, strlen(k) + 1);
 	tail2->rta_len = (void *) NLMSG_TAIL(&req.n) - (void *) tail2;
 	tail->rta_len = (void *) NLMSG_TAIL(&req.n) - (void *) tail;
 
+	tail3 = NLMSG_TAIL(&req.n);
+	flag_select.nla_flag_values |= TCA_FLAG_LARGE_DUMP_ON;
+	flag_select.nla_flag_selector |= TCA_FLAG_LARGE_DUMP_ON;
+#if 0
+	flag_select.nla_flag_values |= 8; /* test rejection */
+	flag_select.nla_flag_selector |= 8; /* test rejection */
+#endif
+	addattr_l(&req.n, MAX_MSG, TCA_ROOT_FLAGS, &flag_select,
+		  sizeof(struct __nla_bit_flags));
+	tail3->rta_len = (void *) NLMSG_TAIL(&req.n) - (void *) tail3;
+	if (msec_since) {
+		fprintf(stderr, "XXX: since %d\n", msec_since);
+		tail4 = NLMSG_TAIL(&req.n);
+		addattr32(&req.n, MAX_MSG, TCA_ROOT_TIME_DELTA, msec_since);
+		tail4->rta_len = (void *) NLMSG_TAIL(&req.n) - (void *) tail4;
+	}
 	msg_size = NLMSG_ALIGN(req.n.nlmsg_len) - NLMSG_ALIGN(sizeof(struct nlmsghdr));
 
 	if (event == RTM_GETACTION) {
@@ -626,6 +668,8 @@  static int tc_act_list_or_flush(int argc, char **argv, int event)
 
 bad_val:
 
+	*argc_p = argc;
+	*argv_p = argv;
 	return ret;
 }
 
@@ -655,13 +699,21 @@  int do_action(int argc, char **argv)
 				act_usage();
 				return -1;
 			}
-			return tc_act_list_or_flush(argc-2, argv+2, RTM_GETACTION);
+
+			argc -= 2;
+			argv += 2;
+			return tc_act_list_or_flush(&argc, &argv,
+						    RTM_GETACTION);
 		} else if (matches(*argv, "flush") == 0) {
 			if (argc <= 2) {
 				act_usage();
 				return -1;
 			}
-			return tc_act_list_or_flush(argc-2, argv+2, RTM_DELACTION);
+
+			argc -= 2;
+			argv += 2;
+			return tc_act_list_or_flush(&argc, &argv,
+						    RTM_DELACTION);
 		} else if (matches(*argv, "help") == 0) {
 			act_usage();
 			return -1;
diff --git a/tc/tc_util.h b/tc/tc_util.h
index 4db26c6..af578f9 100644
--- a/tc/tc_util.h
+++ b/tc/tc_util.h
@@ -106,7 +106,7 @@  int act_parse_police(struct action_util *a, int *argc_p,
 		     char ***argv_p, int tca_id, struct nlmsghdr *n);
 int print_police(struct action_util *a, FILE *f, struct rtattr *tb);
 int police_print_xstats(struct action_util *a, FILE *f, struct rtattr *tb);
-int tc_print_action(FILE *f, const struct rtattr *tb);
+int tc_print_action(FILE *f, const struct rtattr *tb, unsigned short tot_acts);
 int tc_print_ipt(FILE *f, const struct rtattr *tb);
 int parse_action(int *argc_p, char ***argv_p, int tca_id, struct nlmsghdr *n);
 void print_tm(FILE *f, const struct tcf_t *tm);