diff mbox series

[iproute2/net-next,v5] tc: m_action: introduce support for hw stats type

Message ID 20200314092548.27793-1-jiri@resnulli.us
State Accepted
Delegated to: David Ahern
Headers show
Series [iproute2/net-next,v5] tc: m_action: introduce support for hw stats type | expand

Commit Message

Jiri Pirko March 14, 2020, 9:25 a.m. UTC
From: Jiri Pirko <jiri@mellanox.com>

Introduce support for per-action hw stats type config.

This patch allows user to specify one of the following types of HW
stats for added action:
immediate - queried during dump time
delayed - polled from HW periodically or sent by HW in async manner
disabled - no stats needed

Note that if "hw_stats" option is not passed, user does not care about
the type, just expects any type of stats.

Examples:
$ tc filter add dev enp0s16np28 ingress proto ip handle 1 pref 1 flower skip_sw dst_ip 192.168.1.1 action drop hw_stats disabled
$ tc -s filter show dev enp0s16np28 ingress
filter protocol ip pref 1 flower chain 0
filter protocol ip pref 1 flower chain 0 handle 0x1
  eth_type ipv4
  dst_ip 192.168.1.1
  skip_sw
  in_hw in_hw_count 2
        action order 1: gact action drop
         random type none pass val 0
         index 1 ref 1 bind 1 installed 7 sec used 2 sec
        Action statistics:
        Sent 0 bytes 0 pkt (dropped 0, overlimits 0 requeues 0)
        backlog 0b 0p requeues 0
        hw_stats disabled

$ tc filter add dev enp0s16np28 ingress proto ip handle 1 pref 1 flower skip_sw dst_ip 192.168.1.1 action drop hw_stats immediate
$ tc -s filter show dev enp0s16np28 ingress
filter protocol ip pref 1 flower chain 0
filter protocol ip pref 1 flower chain 0 handle 0x1
  eth_type ipv4
  dst_ip 192.168.1.1
  skip_sw
  in_hw in_hw_count 2
        action order 1: gact action drop
         random type none pass val 0
         index 1 ref 1 bind 1 installed 11 sec used 4 sec
        Action statistics:
        Sent 102 bytes 1 pkt (dropped 1, overlimits 0 requeues 0)
        Sent software 0 bytes 0 pkt
        Sent hardware 102 bytes 1 pkt
        backlog 0b 0p requeues 0
        hw_stats immediate

Signed-off-by: Jiri Pirko <jiri@mellanox.com>
---
v4->v5:
- s/hwstats/hw_stats/ in help message
v3->v4:
- added to "tc actions help" output
v2->v3:
- adjusted hw_stats_type_bf struct initialization
- added comment to "disable"
v1->v2:
- added more description and examples
---
 include/uapi/linux/pkt_cls.h | 22 ++++++++++++
 man/man8/tc-actions.8        | 31 +++++++++++++++++
 tc/m_action.c                | 66 +++++++++++++++++++++++++++++++++++-
 3 files changed, 118 insertions(+), 1 deletion(-)

Comments

David Ahern March 19, 2020, 11:56 p.m. UTC | #1
On 3/14/20 3:25 AM, Jiri Pirko wrote:
> From: Jiri Pirko <jiri@mellanox.com>
> 
> Introduce support for per-action hw stats type config.
> 
> This patch allows user to specify one of the following types of HW
> stats for added action:
> immediate - queried during dump time
> delayed - polled from HW periodically or sent by HW in async manner
> disabled - no stats needed
> 
> Note that if "hw_stats" option is not passed, user does not care about
> the type, just expects any type of stats.
> 
> Examples:
> $ tc filter add dev enp0s16np28 ingress proto ip handle 1 pref 1 flower skip_sw dst_ip 192.168.1.1 action drop hw_stats disabled

...

> @@ -149,6 +150,59 @@ new_cmd(char **argv)
>  		(matches(*argv, "add") == 0);
>  }
>  
> +static const struct hw_stats_type_item {
> +	const char *str;
> +	__u8 type;
> +} hw_stats_type_items[] = {
> +	{ "immediate", TCA_ACT_HW_STATS_TYPE_IMMEDIATE },
> +	{ "delayed", TCA_ACT_HW_STATS_TYPE_DELAYED },
> +	{ "disabled", 0 }, /* no bit set */
> +};
> +
> +static void print_hw_stats(const struct rtattr *arg)
> +{
> +	struct nla_bitfield32 *hw_stats_type_bf = RTA_DATA(arg);
> +	__u8 hw_stats_type;
> +	int i;
> +
> +	hw_stats_type = hw_stats_type_bf->value & hw_stats_type_bf->selector;
> +	print_string(PRINT_FP, NULL, "\t", NULL);
> +	open_json_array(PRINT_ANY, "hw_stats");

I still do not understand how the type can be multiple. The command line
is an 'or' : immediate, delayed, or disabled. Further, the filter is
added to a specific device which has a single driver. Seems like at
install / config time the user is explicitly stating a single type.
Jiri Pirko March 20, 2020, 9:59 a.m. UTC | #2
Fri, Mar 20, 2020 at 12:56:08AM CET, dsahern@gmail.com wrote:
>On 3/14/20 3:25 AM, Jiri Pirko wrote:
>> From: Jiri Pirko <jiri@mellanox.com>
>> 
>> Introduce support for per-action hw stats type config.
>> 
>> This patch allows user to specify one of the following types of HW
>> stats for added action:
>> immediate - queried during dump time
>> delayed - polled from HW periodically or sent by HW in async manner
>> disabled - no stats needed
>> 
>> Note that if "hw_stats" option is not passed, user does not care about
>> the type, just expects any type of stats.
>> 
>> Examples:
>> $ tc filter add dev enp0s16np28 ingress proto ip handle 1 pref 1 flower skip_sw dst_ip 192.168.1.1 action drop hw_stats disabled
>
>...
>
>> @@ -149,6 +150,59 @@ new_cmd(char **argv)
>>  		(matches(*argv, "add") == 0);
>>  }
>>  
>> +static const struct hw_stats_type_item {
>> +	const char *str;
>> +	__u8 type;
>> +} hw_stats_type_items[] = {
>> +	{ "immediate", TCA_ACT_HW_STATS_TYPE_IMMEDIATE },
>> +	{ "delayed", TCA_ACT_HW_STATS_TYPE_DELAYED },
>> +	{ "disabled", 0 }, /* no bit set */
>> +};
>> +
>> +static void print_hw_stats(const struct rtattr *arg)
>> +{
>> +	struct nla_bitfield32 *hw_stats_type_bf = RTA_DATA(arg);
>> +	__u8 hw_stats_type;
>> +	int i;
>> +
>> +	hw_stats_type = hw_stats_type_bf->value & hw_stats_type_bf->selector;
>> +	print_string(PRINT_FP, NULL, "\t", NULL);
>> +	open_json_array(PRINT_ANY, "hw_stats");
>
>I still do not understand how the type can be multiple. The command line
>is an 'or' : immediate, delayed, or disabled. Further, the filter is

The cmd line is "or". The uapi is bitfield as requested by kernel
reviewers. I originally had that as "or" too.


>added to a specific device which has a single driver. Seems like at

Nope, may be multiple drivers if the block is shared.


>install / config time the user is explicitly stating a single type.


Basically using tc with this patch, you cannot achieve to have multiple
values in this output. However, in general. It could be.

Also, I wanted to have this as array because when I introduce the "used
hw stats" which would indicate which type is actually used (in case you
pass any for example), there might be multiple values, when offloaded to
multiple drivers.
David Ahern March 20, 2020, 4:34 p.m. UTC | #3
On 3/14/20 3:25 AM, Jiri Pirko wrote:
> @@ -200,6 +208,29 @@ which indicates that action is expected to have minimal software data-path
>  traffic and doesn't need to allocate stat counters with percpu allocator.
>  This option is intended to be used by hardware-offloaded actions.
>  
> +.TP
> +.BI hw_stats " HW_STATS"
> +Speficies the type of HW stats of new action. If omitted, any stats counter type

Fixed the spelling and applied to iproute2-next.
Jakub Kicinski March 20, 2020, 8:08 p.m. UTC | #4
On Fri, 20 Mar 2020 10:34:04 -0600 David Ahern wrote:
> On 3/14/20 3:25 AM, Jiri Pirko wrote:
> > @@ -200,6 +208,29 @@ which indicates that action is expected to have minimal software data-path
> >  traffic and doesn't need to allocate stat counters with percpu allocator.
> >  This option is intended to be used by hardware-offloaded actions.
> >  
> > +.TP
> > +.BI hw_stats " HW_STATS"
> > +Speficies the type of HW stats of new action. If omitted, any stats counter type  
> 
> Fixed the spelling and applied to iproute2-next.

Just a heads up that the kernel uAPI is getting slightly renamed, you'll
need to do a s/HW_STATS_TYPE/HW_STATS/ the rename lands. Do you want me
to send a patch for that?
David Ahern March 20, 2020, 8:12 p.m. UTC | #5
On 3/20/20 2:08 PM, Jakub Kicinski wrote:
> On Fri, 20 Mar 2020 10:34:04 -0600 David Ahern wrote:
>> On 3/14/20 3:25 AM, Jiri Pirko wrote:
>>> @@ -200,6 +208,29 @@ which indicates that action is expected to have minimal software data-path
>>>  traffic and doesn't need to allocate stat counters with percpu allocator.
>>>  This option is intended to be used by hardware-offloaded actions.
>>>  
>>> +.TP
>>> +.BI hw_stats " HW_STATS"
>>> +Speficies the type of HW stats of new action. If omitted, any stats counter type  
>>
>> Fixed the spelling and applied to iproute2-next.
> 
> Just a heads up that the kernel uAPI is getting slightly renamed, you'll
> need to do a s/HW_STATS_TYPE/HW_STATS/ the rename lands. Do you want me
> to send a patch for that?
> 

sure
diff mbox series

Patch

diff --git a/include/uapi/linux/pkt_cls.h b/include/uapi/linux/pkt_cls.h
index 449a63971451..81cc1a869588 100644
--- a/include/uapi/linux/pkt_cls.h
+++ b/include/uapi/linux/pkt_cls.h
@@ -17,6 +17,7 @@  enum {
 	TCA_ACT_PAD,
 	TCA_ACT_COOKIE,
 	TCA_ACT_FLAGS,
+	TCA_ACT_HW_STATS_TYPE,
 	__TCA_ACT_MAX
 };
 
@@ -24,6 +25,27 @@  enum {
 					 * actions stats.
 					 */
 
+/* tca HW stats type
+ * When user does not pass the attribute, he does not care.
+ * It is the same as if he would pass the attribute with
+ * all supported bits set.
+ * In case no bits are set, user is not interested in getting any HW statistics.
+ */
+#define TCA_ACT_HW_STATS_TYPE_IMMEDIATE (1 << 0) /* Means that in dump, user
+						  * gets the current HW stats
+						  * state from the device
+						  * queried at the dump time.
+						  */
+#define TCA_ACT_HW_STATS_TYPE_DELAYED (1 << 1) /* Means that in dump, user gets
+						* HW stats that might be out
+						* of date for some time, maybe
+						* couple of seconds. This is
+						* the case when driver polls
+						* stats updates periodically
+						* or when it gets async stats update
+						* from the device.
+						*/
+
 #define TCA_ACT_MAX __TCA_ACT_MAX
 #define TCA_OLD_COMPAT (TCA_ACT_MAX+1)
 #define TCA_ACT_MAX_PRIO 32
diff --git a/man/man8/tc-actions.8 b/man/man8/tc-actions.8
index bee59f7247fa..7d7df00013c6 100644
--- a/man/man8/tc-actions.8
+++ b/man/man8/tc-actions.8
@@ -49,6 +49,8 @@  actions \- independently defined actions in tc
 ] [
 .I FLAGS
 ] [
+.I HWSTATSSPEC
+] [
 .I CONTROL
 ]
 
@@ -77,6 +79,12 @@  ACTNAME
 :=
 .I no_percpu
 
+.I HWSTATSSPEC
+:=
+.BR hw_stats " {"
+.IR immediate " | " delayed " | " disabled
+.R }
+
 .I ACTDETAIL
 :=
 .I ACTNAME ACTPARAMS
@@ -200,6 +208,29 @@  which indicates that action is expected to have minimal software data-path
 traffic and doesn't need to allocate stat counters with percpu allocator.
 This option is intended to be used by hardware-offloaded actions.
 
+.TP
+.BI hw_stats " HW_STATS"
+Speficies the type of HW stats of new action. If omitted, any stats counter type
+is going to be used, according to driver and its resources.
+The
+.I HW_STATS
+indicates the type. Any of the following are valid:
+.RS
+.TP
+.B immediate
+Means that in dump, user gets the current HW stats state from the device
+queried at the dump time.
+.TP
+.B delayed
+Means that in dump, user gets HW stats that might be out of date for
+some time, maybe couple of seconds. This is the case when driver polls
+stats updates periodically or when it gets async stats update
+from the device.
+.TP
+.B disabled
+No HW stats are going to be available in dump.
+.RE
+
 .TP
 .BI since " MSTIME"
 When dumping large number of actions, a millisecond time-filter can be
diff --git a/tc/m_action.c b/tc/m_action.c
index 4da810c8c0aa..58ae1846033b 100644
--- a/tc/m_action.c
+++ b/tc/m_action.c
@@ -51,8 +51,9 @@  static void act_usage(void)
 		"	FL := ls | list | flush | <ACTNAMESPEC>\n"
 		"	ACTNAMESPEC :=  action <ACTNAME>\n"
 		"	ACTISPEC := <ACTNAMESPEC> <INDEXSPEC>\n"
-		"	ACTSPEC := action <ACTDETAIL> [INDEXSPEC]\n"
+		"	ACTSPEC := action <ACTDETAIL> [INDEXSPEC] [HWSTATSSPEC]\n"
 		"	INDEXSPEC := index <32 bit indexvalue>\n"
+		"	HWSTATSSPEC := hw_stats [ immediate | delayed | disabled ]\n"
 		"	ACTDETAIL := <ACTNAME> <ACTPARAMS>\n"
 		"		Example ACTNAME is gact, mirred, bpf, etc\n"
 		"		Each action has its own parameters (ACTPARAMS)\n"
@@ -149,6 +150,59 @@  new_cmd(char **argv)
 		(matches(*argv, "add") == 0);
 }
 
+static const struct hw_stats_type_item {
+	const char *str;
+	__u8 type;
+} hw_stats_type_items[] = {
+	{ "immediate", TCA_ACT_HW_STATS_TYPE_IMMEDIATE },
+	{ "delayed", TCA_ACT_HW_STATS_TYPE_DELAYED },
+	{ "disabled", 0 }, /* no bit set */
+};
+
+static void print_hw_stats(const struct rtattr *arg)
+{
+	struct nla_bitfield32 *hw_stats_type_bf = RTA_DATA(arg);
+	__u8 hw_stats_type;
+	int i;
+
+	hw_stats_type = hw_stats_type_bf->value & hw_stats_type_bf->selector;
+	print_string(PRINT_FP, NULL, "\t", NULL);
+	open_json_array(PRINT_ANY, "hw_stats");
+
+	for (i = 0; i < ARRAY_SIZE(hw_stats_type_items); i++) {
+		const struct hw_stats_type_item *item;
+
+		item = &hw_stats_type_items[i];
+		if ((!hw_stats_type && !item->type) ||
+		    hw_stats_type & item->type)
+			print_string(PRINT_ANY, NULL, " %s", item->str);
+	}
+	close_json_array(PRINT_JSON, NULL);
+}
+
+static int parse_hw_stats(const char *str, struct nlmsghdr *n)
+{
+	int i;
+
+	for (i = 0; i < ARRAY_SIZE(hw_stats_type_items); i++) {
+		const struct hw_stats_type_item *item;
+
+		item = &hw_stats_type_items[i];
+		if (matches(str, item->str) == 0) {
+			struct nla_bitfield32 hw_stats_type_bf = {
+				.value = item->type,
+				.selector = item->type
+			};
+
+			addattr_l(n, MAX_MSG, TCA_ACT_HW_STATS_TYPE,
+				  &hw_stats_type_bf, sizeof(hw_stats_type_bf));
+			return 0;
+		}
+
+	}
+	return -1;
+}
+
 int parse_action(int *argc_p, char ***argv_p, int tca_id, struct nlmsghdr *n)
 {
 	int argc = *argc_p;
@@ -250,6 +304,14 @@  done0:
 				addattr_l(n, MAX_MSG, TCA_ACT_COOKIE,
 					  &act_ck, act_ck_len);
 
+			if (*argv && matches(*argv, "hw_stats") == 0) {
+				NEXT_ARG();
+				ret = parse_hw_stats(*argv, n);
+				if (ret < 0)
+					invarg("value is invalid\n", *argv);
+				NEXT_ARG_FWD();
+			}
+
 			if (*argv && strcmp(*argv, "no_percpu") == 0) {
 				struct nla_bitfield32 flags =
 					{ TCA_ACT_FLAGS_NO_PERCPU_STATS,
@@ -337,6 +399,8 @@  static int tc_print_one_action(FILE *f, struct rtattr *arg)
 				   TCA_ACT_FLAGS_NO_PERCPU_STATS);
 		print_string(PRINT_FP, NULL, "%s", _SL_);
 	}
+	if (tb[TCA_ACT_HW_STATS_TYPE])
+		print_hw_stats(tb[TCA_ACT_HW_STATS_TYPE]);
 
 	return 0;
 }