diff mbox series

[v2] net: Add trace events for all receive exit points

Message ID 20181112194405.4133-1-gbastien@versatic.net
State Changes Requested, archived
Delegated to: David Miller
Headers show
Series [v2] net: Add trace events for all receive exit points | expand

Commit Message

Geneviève Bastien Nov. 12, 2018, 7:44 p.m. UTC
Trace events are already present for the receive entry points, to indicate
how the reception entered the stack.

This patch adds the corresponding exit trace events that will bound the
reception such that all events occurring between the entry and the exit
can be considered as part of the reception context. This greatly helps
for dependency and root cause analyses.

Without this, it is impossible to determine whether a sched_wakeup
event following a netif_receive_skb event is the result of the packet
reception or a simple coincidence after further processing by the
thread.

Signed-off-by: Geneviève Bastien <gbastien@versatic.net>
CC: Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
CC: Steven Rostedt <rostedt@goodmis.org>
CC: Ingo Molnar <mingo@redhat.com>
CC: David S. Miller <davem@davemloft.net>
---
Changes in v2:
  - Add the return value to tracepoints where applicable
  - Verify if tracepoint is enabled before walking list in
    netif_receive_skb_list
---
 include/trace/events/net.h | 78 ++++++++++++++++++++++++++++++++++++++
 net/core/dev.c             | 38 ++++++++++++++++---
 2 files changed, 110 insertions(+), 6 deletions(-)

Comments

Mathieu Desnoyers Nov. 12, 2018, 8:09 p.m. UTC | #1
----- On Nov 12, 2018, at 2:44 PM, Geneviève Bastien gbastien@versatic.net wrote:

> Trace events are already present for the receive entry points, to indicate
> how the reception entered the stack.
> 
> This patch adds the corresponding exit trace events that will bound the
> reception such that all events occurring between the entry and the exit
> can be considered as part of the reception context. This greatly helps
> for dependency and root cause analyses.
> 
> Without this, it is impossible to determine whether a sched_wakeup
> event following a netif_receive_skb event is the result of the packet
> reception or a simple coincidence after further processing by the
> thread.

As discussed over IRC, it is _possible_ to use kretprobes to instrument
the exit, but it is not practical. A v3 will be sent soon to clarify
this part of the commit message.

Also, I wonder if we should use "net_dev_template_exit" for the event
class rather than "net_dev_template_return" ?

Thanks,

Mathieu

> 
> Signed-off-by: Geneviève Bastien <gbastien@versatic.net>
> CC: Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
> CC: Steven Rostedt <rostedt@goodmis.org>
> CC: Ingo Molnar <mingo@redhat.com>
> CC: David S. Miller <davem@davemloft.net>
> ---
> Changes in v2:
>  - Add the return value to tracepoints where applicable
>  - Verify if tracepoint is enabled before walking list in
>    netif_receive_skb_list
> ---
> include/trace/events/net.h | 78 ++++++++++++++++++++++++++++++++++++++
> net/core/dev.c             | 38 ++++++++++++++++---
> 2 files changed, 110 insertions(+), 6 deletions(-)
> 
> diff --git a/include/trace/events/net.h b/include/trace/events/net.h
> index 00aa72ce0e7c..cff1a7b9d0bb 100644
> --- a/include/trace/events/net.h
> +++ b/include/trace/events/net.h
> @@ -117,6 +117,42 @@ DECLARE_EVENT_CLASS(net_dev_template,
> 		__get_str(name), __entry->skbaddr, __entry->len)
> )
> 
> +DECLARE_EVENT_CLASS(net_dev_template_return,
> +
> +	TP_PROTO(struct sk_buff *skb, int ret),
> +
> +	TP_ARGS(skb, ret),
> +
> +	TP_STRUCT__entry(
> +		__field(void *,	skbaddr)
> +		__field(int,	ret)
> +	),
> +
> +	TP_fast_assign(
> +		__entry->skbaddr = skb;
> +		__entry->ret = ret;
> +	),
> +
> +	TP_printk("skbaddr=%p ret=%d", __entry->skbaddr, __entry->ret)
> +)
> +
> +DECLARE_EVENT_CLASS(net_dev_template_simple,
> +
> +	TP_PROTO(struct sk_buff *skb),
> +
> +	TP_ARGS(skb),
> +
> +	TP_STRUCT__entry(
> +		__field(void *,	skbaddr)
> +	),
> +
> +	TP_fast_assign(
> +		__entry->skbaddr = skb;
> +	),
> +
> +	TP_printk("skbaddr=%p", __entry->skbaddr)
> +)
> +
> DEFINE_EVENT(net_dev_template, net_dev_queue,
> 
> 	TP_PROTO(struct sk_buff *skb),
> @@ -244,6 +280,48 @@ DEFINE_EVENT(net_dev_rx_verbose_template,
> netif_rx_ni_entry,
> 	TP_ARGS(skb)
> );
> 
> +DEFINE_EVENT(net_dev_template_return, napi_gro_frags_exit,
> +
> +	TP_PROTO(struct sk_buff *skb, int ret),
> +
> +	TP_ARGS(skb, ret)
> +);
> +
> +DEFINE_EVENT(net_dev_template_return, napi_gro_receive_exit,
> +
> +	TP_PROTO(struct sk_buff *skb, int ret),
> +
> +	TP_ARGS(skb, ret)
> +);
> +
> +DEFINE_EVENT(net_dev_template_return, netif_receive_skb_exit,
> +
> +	TP_PROTO(struct sk_buff *skb, int ret),
> +
> +	TP_ARGS(skb, ret)
> +);
> +
> +DEFINE_EVENT(net_dev_template_simple, netif_receive_skb_list_exit,
> +
> +	TP_PROTO(struct sk_buff *skb),
> +
> +	TP_ARGS(skb)
> +);
> +
> +DEFINE_EVENT(net_dev_template_return, netif_rx_exit,
> +
> +	TP_PROTO(struct sk_buff *skb, int ret),
> +
> +	TP_ARGS(skb, ret)
> +);
> +
> +DEFINE_EVENT(net_dev_template_return, netif_rx_ni_exit,
> +
> +	TP_PROTO(struct sk_buff *skb, int ret),
> +
> +	TP_ARGS(skb, ret)
> +);
> +
> #endif /* _TRACE_NET_H */
> 
> /* This part must be outside protection */
> diff --git a/net/core/dev.c b/net/core/dev.c
> index 0ffcbdd55fa9..c4dc5df34abe 100644
> --- a/net/core/dev.c
> +++ b/net/core/dev.c
> @@ -4520,9 +4520,14 @@ static int netif_rx_internal(struct sk_buff *skb)
> 
> int netif_rx(struct sk_buff *skb)
> {
> +	int ret;
> +
> 	trace_netif_rx_entry(skb);
> 
> -	return netif_rx_internal(skb);
> +	ret = netif_rx_internal(skb);
> +	trace_netif_rx_exit(skb, ret);
> +
> +	return ret;
> }
> EXPORT_SYMBOL(netif_rx);
> 
> @@ -4537,6 +4542,7 @@ int netif_rx_ni(struct sk_buff *skb)
> 	if (local_softirq_pending())
> 		do_softirq();
> 	preempt_enable();
> +	trace_netif_rx_ni_exit(skb, err);
> 
> 	return err;
> }
> @@ -5222,9 +5228,14 @@ static void netif_receive_skb_list_internal(struct
> list_head *head)
>  */
> int netif_receive_skb(struct sk_buff *skb)
> {
> +	int ret;
> +
> 	trace_netif_receive_skb_entry(skb);
> 
> -	return netif_receive_skb_internal(skb);
> +	ret = netif_receive_skb_internal(skb);
> +	trace_netif_receive_skb_exit(skb, ret);
> +
> +	return ret;
> }
> EXPORT_SYMBOL(netif_receive_skb);
> 
> @@ -5244,9 +5255,15 @@ void netif_receive_skb_list(struct list_head *head)
> 
> 	if (list_empty(head))
> 		return;
> -	list_for_each_entry(skb, head, list)
> -		trace_netif_receive_skb_list_entry(skb);
> +	if (trace_netif_receive_skb_list_entry_enabled()) {
> +		list_for_each_entry(skb, head, list)
> +			trace_netif_receive_skb_list_entry(skb);
> +	}
> 	netif_receive_skb_list_internal(head);
> +	if (trace_netif_receive_skb_list_exit_enabled()) {
> +		list_for_each_entry(skb, head, list)
> +			trace_netif_receive_skb_list_exit(skb);
> +	}
> }
> EXPORT_SYMBOL(netif_receive_skb_list);
> 
> @@ -5634,12 +5651,17 @@ static gro_result_t napi_skb_finish(gro_result_t ret,
> struct sk_buff *skb)
> 
> gro_result_t napi_gro_receive(struct napi_struct *napi, struct sk_buff *skb)
> {
> +	gro_result_t ret;
> +
> 	skb_mark_napi_id(skb, napi);
> 	trace_napi_gro_receive_entry(skb);
> 
> 	skb_gro_reset_offset(skb);
> 
> -	return napi_skb_finish(dev_gro_receive(napi, skb), skb);
> +	ret = napi_skb_finish(dev_gro_receive(napi, skb), skb);
> +	trace_napi_gro_receive_exit(skb, ret);
> +
> +	return ret;
> }
> EXPORT_SYMBOL(napi_gro_receive);
> 
> @@ -5753,6 +5775,7 @@ static struct sk_buff *napi_frags_skb(struct napi_struct
> *napi)
> 
> gro_result_t napi_gro_frags(struct napi_struct *napi)
> {
> +	gro_result_t ret;
> 	struct sk_buff *skb = napi_frags_skb(napi);
> 
> 	if (!skb)
> @@ -5760,7 +5783,10 @@ gro_result_t napi_gro_frags(struct napi_struct *napi)
> 
> 	trace_napi_gro_frags_entry(skb);
> 
> -	return napi_frags_finish(napi, skb, dev_gro_receive(napi, skb));
> +	ret = napi_frags_finish(napi, skb, dev_gro_receive(napi, skb));
> +	trace_napi_gro_frags_exit(skb, ret);
> +
> +	return ret;
> }
> EXPORT_SYMBOL(napi_gro_frags);
> 
> --
> 2.19.1
Mathieu Desnoyers Nov. 12, 2018, 8:20 p.m. UTC | #2
----- On Nov 12, 2018, at 3:09 PM, Mathieu Desnoyers mathieu.desnoyers@efficios.com wrote:

> ----- On Nov 12, 2018, at 2:44 PM, Geneviève Bastien gbastien@versatic.net
> wrote:
> 
>> Trace events are already present for the receive entry points, to indicate
>> how the reception entered the stack.
>> 
>> This patch adds the corresponding exit trace events that will bound the
>> reception such that all events occurring between the entry and the exit
>> can be considered as part of the reception context. This greatly helps
>> for dependency and root cause analyses.
>> 
>> Without this, it is impossible to determine whether a sched_wakeup
>> event following a netif_receive_skb event is the result of the packet
>> reception or a simple coincidence after further processing by the
>> thread.
> 
> As discussed over IRC, it is _possible_ to use kretprobes to instrument
> the exit, but it is not practical. A v3 will be sent soon to clarify
> this part of the commit message.
> 
> Also, I wonder if we should use "net_dev_template_exit" for the event
> class rather than "net_dev_template_return" ?

Hrm, looking at this again, I notice that there is a single DEFINE_EVENT
using net_dev_template_simple.

We could simply turn netif_receive_skb_list_exit into a TRACE_EVENT(),
remove the net_dev_template_simple, and rename the net_dev_template_return
to net_dev_template ?

It's pretty clear from the prototype that it expects a "ret" argument,
so I don't see the need to also state it in the template name.

Thanks,

Mathieu

> 
> Thanks,
> 
> Mathieu
> 
>> 
>> Signed-off-by: Geneviève Bastien <gbastien@versatic.net>
>> CC: Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
>> CC: Steven Rostedt <rostedt@goodmis.org>
>> CC: Ingo Molnar <mingo@redhat.com>
>> CC: David S. Miller <davem@davemloft.net>
>> ---
>> Changes in v2:
>>  - Add the return value to tracepoints where applicable
>>  - Verify if tracepoint is enabled before walking list in
>>    netif_receive_skb_list
>> ---
>> include/trace/events/net.h | 78 ++++++++++++++++++++++++++++++++++++++
>> net/core/dev.c             | 38 ++++++++++++++++---
>> 2 files changed, 110 insertions(+), 6 deletions(-)
>> 
>> diff --git a/include/trace/events/net.h b/include/trace/events/net.h
>> index 00aa72ce0e7c..cff1a7b9d0bb 100644
>> --- a/include/trace/events/net.h
>> +++ b/include/trace/events/net.h
>> @@ -117,6 +117,42 @@ DECLARE_EVENT_CLASS(net_dev_template,
>> 		__get_str(name), __entry->skbaddr, __entry->len)
>> )
>> 
>> +DECLARE_EVENT_CLASS(net_dev_template_return,
>> +
>> +	TP_PROTO(struct sk_buff *skb, int ret),
>> +
>> +	TP_ARGS(skb, ret),
>> +
>> +	TP_STRUCT__entry(
>> +		__field(void *,	skbaddr)
>> +		__field(int,	ret)
>> +	),
>> +
>> +	TP_fast_assign(
>> +		__entry->skbaddr = skb;
>> +		__entry->ret = ret;
>> +	),
>> +
>> +	TP_printk("skbaddr=%p ret=%d", __entry->skbaddr, __entry->ret)
>> +)
>> +
>> +DECLARE_EVENT_CLASS(net_dev_template_simple,
>> +
>> +	TP_PROTO(struct sk_buff *skb),
>> +
>> +	TP_ARGS(skb),
>> +
>> +	TP_STRUCT__entry(
>> +		__field(void *,	skbaddr)
>> +	),
>> +
>> +	TP_fast_assign(
>> +		__entry->skbaddr = skb;
>> +	),
>> +
>> +	TP_printk("skbaddr=%p", __entry->skbaddr)
>> +)
>> +
>> DEFINE_EVENT(net_dev_template, net_dev_queue,
>> 
>> 	TP_PROTO(struct sk_buff *skb),
>> @@ -244,6 +280,48 @@ DEFINE_EVENT(net_dev_rx_verbose_template,
>> netif_rx_ni_entry,
>> 	TP_ARGS(skb)
>> );
>> 
>> +DEFINE_EVENT(net_dev_template_return, napi_gro_frags_exit,
>> +
>> +	TP_PROTO(struct sk_buff *skb, int ret),
>> +
>> +	TP_ARGS(skb, ret)
>> +);
>> +
>> +DEFINE_EVENT(net_dev_template_return, napi_gro_receive_exit,
>> +
>> +	TP_PROTO(struct sk_buff *skb, int ret),
>> +
>> +	TP_ARGS(skb, ret)
>> +);
>> +
>> +DEFINE_EVENT(net_dev_template_return, netif_receive_skb_exit,
>> +
>> +	TP_PROTO(struct sk_buff *skb, int ret),
>> +
>> +	TP_ARGS(skb, ret)
>> +);
>> +
>> +DEFINE_EVENT(net_dev_template_simple, netif_receive_skb_list_exit,
>> +
>> +	TP_PROTO(struct sk_buff *skb),
>> +
>> +	TP_ARGS(skb)
>> +);
>> +
>> +DEFINE_EVENT(net_dev_template_return, netif_rx_exit,
>> +
>> +	TP_PROTO(struct sk_buff *skb, int ret),
>> +
>> +	TP_ARGS(skb, ret)
>> +);
>> +
>> +DEFINE_EVENT(net_dev_template_return, netif_rx_ni_exit,
>> +
>> +	TP_PROTO(struct sk_buff *skb, int ret),
>> +
>> +	TP_ARGS(skb, ret)
>> +);
>> +
>> #endif /* _TRACE_NET_H */
>> 
>> /* This part must be outside protection */
>> diff --git a/net/core/dev.c b/net/core/dev.c
>> index 0ffcbdd55fa9..c4dc5df34abe 100644
>> --- a/net/core/dev.c
>> +++ b/net/core/dev.c
>> @@ -4520,9 +4520,14 @@ static int netif_rx_internal(struct sk_buff *skb)
>> 
>> int netif_rx(struct sk_buff *skb)
>> {
>> +	int ret;
>> +
>> 	trace_netif_rx_entry(skb);
>> 
>> -	return netif_rx_internal(skb);
>> +	ret = netif_rx_internal(skb);
>> +	trace_netif_rx_exit(skb, ret);
>> +
>> +	return ret;
>> }
>> EXPORT_SYMBOL(netif_rx);
>> 
>> @@ -4537,6 +4542,7 @@ int netif_rx_ni(struct sk_buff *skb)
>> 	if (local_softirq_pending())
>> 		do_softirq();
>> 	preempt_enable();
>> +	trace_netif_rx_ni_exit(skb, err);
>> 
>> 	return err;
>> }
>> @@ -5222,9 +5228,14 @@ static void netif_receive_skb_list_internal(struct
>> list_head *head)
>>  */
>> int netif_receive_skb(struct sk_buff *skb)
>> {
>> +	int ret;
>> +
>> 	trace_netif_receive_skb_entry(skb);
>> 
>> -	return netif_receive_skb_internal(skb);
>> +	ret = netif_receive_skb_internal(skb);
>> +	trace_netif_receive_skb_exit(skb, ret);
>> +
>> +	return ret;
>> }
>> EXPORT_SYMBOL(netif_receive_skb);
>> 
>> @@ -5244,9 +5255,15 @@ void netif_receive_skb_list(struct list_head *head)
>> 
>> 	if (list_empty(head))
>> 		return;
>> -	list_for_each_entry(skb, head, list)
>> -		trace_netif_receive_skb_list_entry(skb);
>> +	if (trace_netif_receive_skb_list_entry_enabled()) {
>> +		list_for_each_entry(skb, head, list)
>> +			trace_netif_receive_skb_list_entry(skb);
>> +	}
>> 	netif_receive_skb_list_internal(head);
>> +	if (trace_netif_receive_skb_list_exit_enabled()) {
>> +		list_for_each_entry(skb, head, list)
>> +			trace_netif_receive_skb_list_exit(skb);
>> +	}
>> }
>> EXPORT_SYMBOL(netif_receive_skb_list);
>> 
>> @@ -5634,12 +5651,17 @@ static gro_result_t napi_skb_finish(gro_result_t ret,
>> struct sk_buff *skb)
>> 
>> gro_result_t napi_gro_receive(struct napi_struct *napi, struct sk_buff *skb)
>> {
>> +	gro_result_t ret;
>> +
>> 	skb_mark_napi_id(skb, napi);
>> 	trace_napi_gro_receive_entry(skb);
>> 
>> 	skb_gro_reset_offset(skb);
>> 
>> -	return napi_skb_finish(dev_gro_receive(napi, skb), skb);
>> +	ret = napi_skb_finish(dev_gro_receive(napi, skb), skb);
>> +	trace_napi_gro_receive_exit(skb, ret);
>> +
>> +	return ret;
>> }
>> EXPORT_SYMBOL(napi_gro_receive);
>> 
>> @@ -5753,6 +5775,7 @@ static struct sk_buff *napi_frags_skb(struct napi_struct
>> *napi)
>> 
>> gro_result_t napi_gro_frags(struct napi_struct *napi)
>> {
>> +	gro_result_t ret;
>> 	struct sk_buff *skb = napi_frags_skb(napi);
>> 
>> 	if (!skb)
>> @@ -5760,7 +5783,10 @@ gro_result_t napi_gro_frags(struct napi_struct *napi)
>> 
>> 	trace_napi_gro_frags_entry(skb);
>> 
>> -	return napi_frags_finish(napi, skb, dev_gro_receive(napi, skb));
>> +	ret = napi_frags_finish(napi, skb, dev_gro_receive(napi, skb));
>> +	trace_napi_gro_frags_exit(skb, ret);
>> +
>> +	return ret;
>> }
>> EXPORT_SYMBOL(napi_gro_frags);
>> 
>> --
>> 2.19.1
> 
> --
> Mathieu Desnoyers
> EfficiOS Inc.
> http://www.efficios.com
Steven Rostedt Nov. 12, 2018, 8:32 p.m. UTC | #3
On Mon, 12 Nov 2018 14:44:05 -0500
Geneviève Bastien <gbastien@versatic.net> wrote:

> Trace events are already present for the receive entry points, to indicate
> how the reception entered the stack.
> 
> This patch adds the corresponding exit trace events that will bound the
> reception such that all events occurring between the entry and the exit
> can be considered as part of the reception context. This greatly helps
> for dependency and root cause analyses.
> 
> Without this, it is impossible to determine whether a sched_wakeup
> event following a netif_receive_skb event is the result of the packet
> reception or a simple coincidence after further processing by the
> thread.
> 
> Signed-off-by: Geneviève Bastien <gbastien@versatic.net>
> CC: Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
> CC: Steven Rostedt <rostedt@goodmis.org>
> CC: Ingo Molnar <mingo@redhat.com>
> CC: David S. Miller <davem@davemloft.net>
> ---
> Changes in v2:
>   - Add the return value to tracepoints where applicable
>   - Verify if tracepoint is enabled before walking list in
>     netif_receive_skb_list
> ---
>  include/trace/events/net.h | 78 ++++++++++++++++++++++++++++++++++++++
>  net/core/dev.c             | 38 ++++++++++++++++---
>  2 files changed, 110 insertions(+), 6 deletions(-)
> 
> diff --git a/include/trace/events/net.h b/include/trace/events/net.h
> index 00aa72ce0e7c..cff1a7b9d0bb 100644
> --- a/include/trace/events/net.h
> +++ b/include/trace/events/net.h
> @@ -117,6 +117,42 @@ DECLARE_EVENT_CLASS(net_dev_template,
>  		__get_str(name), __entry->skbaddr, __entry->len)
>  )
>  
> +DECLARE_EVENT_CLASS(net_dev_template_return,
> +
> +	TP_PROTO(struct sk_buff *skb, int ret),
> +
> +	TP_ARGS(skb, ret),
> +
> +	TP_STRUCT__entry(
> +		__field(void *,	skbaddr)
> +		__field(int,	ret)
> +	),
> +
> +	TP_fast_assign(
> +		__entry->skbaddr = skb;
> +		__entry->ret = ret;
> +	),
> +
> +	TP_printk("skbaddr=%p ret=%d", __entry->skbaddr, __entry->ret)
> +)
> +
> +DECLARE_EVENT_CLASS(net_dev_template_simple,
> +
> +	TP_PROTO(struct sk_buff *skb),
> +
> +	TP_ARGS(skb),
> +
> +	TP_STRUCT__entry(
> +		__field(void *,	skbaddr)
> +	),
> +
> +	TP_fast_assign(
> +		__entry->skbaddr = skb;
> +	),
> +
> +	TP_printk("skbaddr=%p", __entry->skbaddr)
> +)
> +
>  DEFINE_EVENT(net_dev_template, net_dev_queue,
>  
>  	TP_PROTO(struct sk_buff *skb),
> @@ -244,6 +280,48 @@ DEFINE_EVENT(net_dev_rx_verbose_template, netif_rx_ni_entry,
>  	TP_ARGS(skb)
>  );
>  
> +DEFINE_EVENT(net_dev_template_return, napi_gro_frags_exit,
> +
> +	TP_PROTO(struct sk_buff *skb, int ret),
> +
> +	TP_ARGS(skb, ret)
> +);
> +
> +DEFINE_EVENT(net_dev_template_return, napi_gro_receive_exit,
> +
> +	TP_PROTO(struct sk_buff *skb, int ret),
> +
> +	TP_ARGS(skb, ret)
> +);
> +
> +DEFINE_EVENT(net_dev_template_return, netif_receive_skb_exit,
> +
> +	TP_PROTO(struct sk_buff *skb, int ret),
> +
> +	TP_ARGS(skb, ret)
> +);
> +
> +DEFINE_EVENT(net_dev_template_simple, netif_receive_skb_list_exit,

One small nit, and I don't care enough to ask you to fix it, but others
might care. We probably should not intermix net_dev_template_simple
events within net_dev_template_return events.

But like I said, I don't really care, it's more cosmetic than anything
else.

For the tracing side:

Reviewed-by: Steven Rostedt (VMware) <rostedt@goodmis.org>

-- Steve


> +
> +	TP_PROTO(struct sk_buff *skb),
> +
> +	TP_ARGS(skb)
> +);
> +
> +DEFINE_EVENT(net_dev_template_return, netif_rx_exit,
> +
> +	TP_PROTO(struct sk_buff *skb, int ret),
> +
> +	TP_ARGS(skb, ret)
> +);
> +
> +DEFINE_EVENT(net_dev_template_return, netif_rx_ni_exit,
> +
> +	TP_PROTO(struct sk_buff *skb, int ret),
> +
> +	TP_ARGS(skb, ret)
> +);
> +
>  #endif /* _TRACE_NET_H */
>  
>
Mathieu Desnoyers Nov. 12, 2018, 8:37 p.m. UTC | #4
----- On Nov 12, 2018, at 3:20 PM, Mathieu Desnoyers mathieu.desnoyers@efficios.com wrote:

> ----- On Nov 12, 2018, at 3:09 PM, Mathieu Desnoyers
> mathieu.desnoyers@efficios.com wrote:
> 
>> ----- On Nov 12, 2018, at 2:44 PM, Geneviève Bastien gbastien@versatic.net
>> wrote:
>> 
>>> Trace events are already present for the receive entry points, to indicate
>>> how the reception entered the stack.
>>> 
>>> This patch adds the corresponding exit trace events that will bound the
>>> reception such that all events occurring between the entry and the exit
>>> can be considered as part of the reception context. This greatly helps
>>> for dependency and root cause analyses.
>>> 
>>> Without this, it is impossible to determine whether a sched_wakeup
>>> event following a netif_receive_skb event is the result of the packet
>>> reception or a simple coincidence after further processing by the
>>> thread.
>> 
>> As discussed over IRC, it is _possible_ to use kretprobes to instrument
>> the exit, but it is not practical. A v3 will be sent soon to clarify
>> this part of the commit message.
>> 
>> Also, I wonder if we should use "net_dev_template_exit" for the event
>> class rather than "net_dev_template_return" ?
> 
> Hrm, looking at this again, I notice that there is a single DEFINE_EVENT
> using net_dev_template_simple.
> 
> We could simply turn netif_receive_skb_list_exit into a TRACE_EVENT(),
> remove the net_dev_template_simple, and rename the net_dev_template_return
> to net_dev_template ?
> 
> It's pretty clear from the prototype that it expects a "ret" argument,
> so I don't see the need to also state it in the template name.

As you pointed out on IRC, net_dev_template already exists. So we can
use "net_dev_rx_exit_template" which is along the same lines as
its entry counterpart "net_dev_rx_verbose_template".

Thanks,

Mathieu


> 
> Thanks,
> 
> Mathieu
> 
>> 
>> Thanks,
>> 
>> Mathieu
>> 
>>> 
>>> Signed-off-by: Geneviève Bastien <gbastien@versatic.net>
>>> CC: Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
>>> CC: Steven Rostedt <rostedt@goodmis.org>
>>> CC: Ingo Molnar <mingo@redhat.com>
>>> CC: David S. Miller <davem@davemloft.net>
>>> ---
>>> Changes in v2:
>>>  - Add the return value to tracepoints where applicable
>>>  - Verify if tracepoint is enabled before walking list in
>>>    netif_receive_skb_list
>>> ---
>>> include/trace/events/net.h | 78 ++++++++++++++++++++++++++++++++++++++
>>> net/core/dev.c             | 38 ++++++++++++++++---
>>> 2 files changed, 110 insertions(+), 6 deletions(-)
>>> 
>>> diff --git a/include/trace/events/net.h b/include/trace/events/net.h
>>> index 00aa72ce0e7c..cff1a7b9d0bb 100644
>>> --- a/include/trace/events/net.h
>>> +++ b/include/trace/events/net.h
>>> @@ -117,6 +117,42 @@ DECLARE_EVENT_CLASS(net_dev_template,
>>> 		__get_str(name), __entry->skbaddr, __entry->len)
>>> )
>>> 
>>> +DECLARE_EVENT_CLASS(net_dev_template_return,
>>> +
>>> +	TP_PROTO(struct sk_buff *skb, int ret),
>>> +
>>> +	TP_ARGS(skb, ret),
>>> +
>>> +	TP_STRUCT__entry(
>>> +		__field(void *,	skbaddr)
>>> +		__field(int,	ret)
>>> +	),
>>> +
>>> +	TP_fast_assign(
>>> +		__entry->skbaddr = skb;
>>> +		__entry->ret = ret;
>>> +	),
>>> +
>>> +	TP_printk("skbaddr=%p ret=%d", __entry->skbaddr, __entry->ret)
>>> +)
>>> +
>>> +DECLARE_EVENT_CLASS(net_dev_template_simple,
>>> +
>>> +	TP_PROTO(struct sk_buff *skb),
>>> +
>>> +	TP_ARGS(skb),
>>> +
>>> +	TP_STRUCT__entry(
>>> +		__field(void *,	skbaddr)
>>> +	),
>>> +
>>> +	TP_fast_assign(
>>> +		__entry->skbaddr = skb;
>>> +	),
>>> +
>>> +	TP_printk("skbaddr=%p", __entry->skbaddr)
>>> +)
>>> +
>>> DEFINE_EVENT(net_dev_template, net_dev_queue,
>>> 
>>> 	TP_PROTO(struct sk_buff *skb),
>>> @@ -244,6 +280,48 @@ DEFINE_EVENT(net_dev_rx_verbose_template,
>>> netif_rx_ni_entry,
>>> 	TP_ARGS(skb)
>>> );
>>> 
>>> +DEFINE_EVENT(net_dev_template_return, napi_gro_frags_exit,
>>> +
>>> +	TP_PROTO(struct sk_buff *skb, int ret),
>>> +
>>> +	TP_ARGS(skb, ret)
>>> +);
>>> +
>>> +DEFINE_EVENT(net_dev_template_return, napi_gro_receive_exit,
>>> +
>>> +	TP_PROTO(struct sk_buff *skb, int ret),
>>> +
>>> +	TP_ARGS(skb, ret)
>>> +);
>>> +
>>> +DEFINE_EVENT(net_dev_template_return, netif_receive_skb_exit,
>>> +
>>> +	TP_PROTO(struct sk_buff *skb, int ret),
>>> +
>>> +	TP_ARGS(skb, ret)
>>> +);
>>> +
>>> +DEFINE_EVENT(net_dev_template_simple, netif_receive_skb_list_exit,
>>> +
>>> +	TP_PROTO(struct sk_buff *skb),
>>> +
>>> +	TP_ARGS(skb)
>>> +);
>>> +
>>> +DEFINE_EVENT(net_dev_template_return, netif_rx_exit,
>>> +
>>> +	TP_PROTO(struct sk_buff *skb, int ret),
>>> +
>>> +	TP_ARGS(skb, ret)
>>> +);
>>> +
>>> +DEFINE_EVENT(net_dev_template_return, netif_rx_ni_exit,
>>> +
>>> +	TP_PROTO(struct sk_buff *skb, int ret),
>>> +
>>> +	TP_ARGS(skb, ret)
>>> +);
>>> +
>>> #endif /* _TRACE_NET_H */
>>> 
>>> /* This part must be outside protection */
>>> diff --git a/net/core/dev.c b/net/core/dev.c
>>> index 0ffcbdd55fa9..c4dc5df34abe 100644
>>> --- a/net/core/dev.c
>>> +++ b/net/core/dev.c
>>> @@ -4520,9 +4520,14 @@ static int netif_rx_internal(struct sk_buff *skb)
>>> 
>>> int netif_rx(struct sk_buff *skb)
>>> {
>>> +	int ret;
>>> +
>>> 	trace_netif_rx_entry(skb);
>>> 
>>> -	return netif_rx_internal(skb);
>>> +	ret = netif_rx_internal(skb);
>>> +	trace_netif_rx_exit(skb, ret);
>>> +
>>> +	return ret;
>>> }
>>> EXPORT_SYMBOL(netif_rx);
>>> 
>>> @@ -4537,6 +4542,7 @@ int netif_rx_ni(struct sk_buff *skb)
>>> 	if (local_softirq_pending())
>>> 		do_softirq();
>>> 	preempt_enable();
>>> +	trace_netif_rx_ni_exit(skb, err);
>>> 
>>> 	return err;
>>> }
>>> @@ -5222,9 +5228,14 @@ static void netif_receive_skb_list_internal(struct
>>> list_head *head)
>>>  */
>>> int netif_receive_skb(struct sk_buff *skb)
>>> {
>>> +	int ret;
>>> +
>>> 	trace_netif_receive_skb_entry(skb);
>>> 
>>> -	return netif_receive_skb_internal(skb);
>>> +	ret = netif_receive_skb_internal(skb);
>>> +	trace_netif_receive_skb_exit(skb, ret);
>>> +
>>> +	return ret;
>>> }
>>> EXPORT_SYMBOL(netif_receive_skb);
>>> 
>>> @@ -5244,9 +5255,15 @@ void netif_receive_skb_list(struct list_head *head)
>>> 
>>> 	if (list_empty(head))
>>> 		return;
>>> -	list_for_each_entry(skb, head, list)
>>> -		trace_netif_receive_skb_list_entry(skb);
>>> +	if (trace_netif_receive_skb_list_entry_enabled()) {
>>> +		list_for_each_entry(skb, head, list)
>>> +			trace_netif_receive_skb_list_entry(skb);
>>> +	}
>>> 	netif_receive_skb_list_internal(head);
>>> +	if (trace_netif_receive_skb_list_exit_enabled()) {
>>> +		list_for_each_entry(skb, head, list)
>>> +			trace_netif_receive_skb_list_exit(skb);
>>> +	}
>>> }
>>> EXPORT_SYMBOL(netif_receive_skb_list);
>>> 
>>> @@ -5634,12 +5651,17 @@ static gro_result_t napi_skb_finish(gro_result_t ret,
>>> struct sk_buff *skb)
>>> 
>>> gro_result_t napi_gro_receive(struct napi_struct *napi, struct sk_buff *skb)
>>> {
>>> +	gro_result_t ret;
>>> +
>>> 	skb_mark_napi_id(skb, napi);
>>> 	trace_napi_gro_receive_entry(skb);
>>> 
>>> 	skb_gro_reset_offset(skb);
>>> 
>>> -	return napi_skb_finish(dev_gro_receive(napi, skb), skb);
>>> +	ret = napi_skb_finish(dev_gro_receive(napi, skb), skb);
>>> +	trace_napi_gro_receive_exit(skb, ret);
>>> +
>>> +	return ret;
>>> }
>>> EXPORT_SYMBOL(napi_gro_receive);
>>> 
>>> @@ -5753,6 +5775,7 @@ static struct sk_buff *napi_frags_skb(struct napi_struct
>>> *napi)
>>> 
>>> gro_result_t napi_gro_frags(struct napi_struct *napi)
>>> {
>>> +	gro_result_t ret;
>>> 	struct sk_buff *skb = napi_frags_skb(napi);
>>> 
>>> 	if (!skb)
>>> @@ -5760,7 +5783,10 @@ gro_result_t napi_gro_frags(struct napi_struct *napi)
>>> 
>>> 	trace_napi_gro_frags_entry(skb);
>>> 
>>> -	return napi_frags_finish(napi, skb, dev_gro_receive(napi, skb));
>>> +	ret = napi_frags_finish(napi, skb, dev_gro_receive(napi, skb));
>>> +	trace_napi_gro_frags_exit(skb, ret);
>>> +
>>> +	return ret;
>>> }
>>> EXPORT_SYMBOL(napi_gro_frags);
>>> 
>>> --
>>> 2.19.1
>> 
>> --
>> Mathieu Desnoyers
>> EfficiOS Inc.
>> http://www.efficios.com
> 
> --
> Mathieu Desnoyers
> EfficiOS Inc.
> http://www.efficios.com
Steven Rostedt Nov. 12, 2018, 8:40 p.m. UTC | #5
On Mon, 12 Nov 2018 15:20:55 -0500 (EST)
Mathieu Desnoyers <mathieu.desnoyers@efficios.com> wrote:

> 
> Hrm, looking at this again, I notice that there is a single DEFINE_EVENT
> using net_dev_template_simple.
> 
> We could simply turn netif_receive_skb_list_exit into a TRACE_EVENT(),
> remove the net_dev_template_simple, and rename the net_dev_template_return
> to net_dev_template ?

This too is only cosmetic and doesn't affect the code at all, because a
TRACE_EVENT() is really just:

#define TRACE_EVENT(name, proto, args, tstruct, assign, print) \
	DECLARE_EVENT_CLASS(name,			       \
			     PARAMS(proto),		       \
			     PARAMS(args),		       \
			     PARAMS(tstruct),		       \
			     PARAMS(assign),		       \
			     PARAMS(print));		       \
	DEFINE_EVENT(name, name, PARAMS(proto), PARAMS(args));

-- Steve

> 
> It's pretty clear from the prototype that it expects a "ret" argument,
> so I don't see the need to also state it in the template name.
> 
>
Mathieu Desnoyers Nov. 12, 2018, 8:46 p.m. UTC | #6
----- On Nov 12, 2018, at 3:40 PM, rostedt rostedt@goodmis.org wrote:

> On Mon, 12 Nov 2018 15:20:55 -0500 (EST)
> Mathieu Desnoyers <mathieu.desnoyers@efficios.com> wrote:
> 
>> 
>> Hrm, looking at this again, I notice that there is a single DEFINE_EVENT
>> using net_dev_template_simple.
>> 
>> We could simply turn netif_receive_skb_list_exit into a TRACE_EVENT(),
>> remove the net_dev_template_simple, and rename the net_dev_template_return
>> to net_dev_template ?
> 
> This too is only cosmetic and doesn't affect the code at all, because a
> TRACE_EVENT() is really just:
> 
> #define TRACE_EVENT(name, proto, args, tstruct, assign, print) \
>	DECLARE_EVENT_CLASS(name,			       \
>			     PARAMS(proto),		       \
>			     PARAMS(args),		       \
>			     PARAMS(tstruct),		       \
>			     PARAMS(assign),		       \
>			     PARAMS(print));		       \
>	DEFINE_EVENT(name, name, PARAMS(proto), PARAMS(args));
> 
> -- Steve
> 

Of course.

I also notice that in two cases, a "gro_result_t" is implicitly cast
to "int". I usually frown upon this kind of stuff, because it's asking
for trouble if gro_result_t typedef to something else than "int" in the
future.

I would recommend going for two templates, one which takes a "int"
ret parameter, and the other a "gro_result_t" ret parameter.

Or am I being too cautious ?

Thanks,

Mathieu


>> 
>> It's pretty clear from the prototype that it expects a "ret" argument,
>> so I don't see the need to also state it in the template name.
>>
Mathieu Desnoyers Nov. 12, 2018, 8:56 p.m. UTC | #7
----- On Nov 12, 2018, at 3:46 PM, Mathieu Desnoyers mathieu.desnoyers@efficios.com wrote:

> ----- On Nov 12, 2018, at 3:40 PM, rostedt rostedt@goodmis.org wrote:
> 
>> On Mon, 12 Nov 2018 15:20:55 -0500 (EST)
>> Mathieu Desnoyers <mathieu.desnoyers@efficios.com> wrote:
>> 
>>> 
>>> Hrm, looking at this again, I notice that there is a single DEFINE_EVENT
>>> using net_dev_template_simple.
>>> 
>>> We could simply turn netif_receive_skb_list_exit into a TRACE_EVENT(),
>>> remove the net_dev_template_simple, and rename the net_dev_template_return
>>> to net_dev_template ?
>> 
>> This too is only cosmetic and doesn't affect the code at all, because a
>> TRACE_EVENT() is really just:
>> 
>> #define TRACE_EVENT(name, proto, args, tstruct, assign, print) \
>>	DECLARE_EVENT_CLASS(name,			       \
>>			     PARAMS(proto),		       \
>>			     PARAMS(args),		       \
>>			     PARAMS(tstruct),		       \
>>			     PARAMS(assign),		       \
>>			     PARAMS(print));		       \
>>	DEFINE_EVENT(name, name, PARAMS(proto), PARAMS(args));
>> 
>> -- Steve
>> 
> 
> Of course.
> 
> I also notice that in two cases, a "gro_result_t" is implicitly cast
> to "int". I usually frown upon this kind of stuff, because it's asking
> for trouble if gro_result_t typedef to something else than "int" in the
> future.
> 
> I would recommend going for two templates, one which takes a "int"
> ret parameter, and the other a "gro_result_t" ret parameter.
> 
> Or am I being too cautious ?

Digging further, gro_result_t maps to:

enum gro_result {
        GRO_MERGED,
        GRO_MERGED_FREE,
        GRO_HELD,
        GRO_NORMAL,
        GRO_DROP,
        GRO_CONSUMED,
};
typedef enum gro_result gro_result_t;

So we should add a TRACE_DEFINE_ENUM() for those.

Thanks,

Mathieu



> 
> Thanks,
> 
> Mathieu
> 
> 
>>> 
>>> It's pretty clear from the prototype that it expects a "ret" argument,
>>> so I don't see the need to also state it in the template name.
>>> 
> 
> --
> Mathieu Desnoyers
> EfficiOS Inc.
> http://www.efficios.com
Steven Rostedt Nov. 13, 2018, 1:47 a.m. UTC | #8
On Mon, 12 Nov 2018 15:46:53 -0500 (EST)
Mathieu Desnoyers <mathieu.desnoyers@efficios.com> wrote:

> I also notice that in two cases, a "gro_result_t" is implicitly cast
> to "int". I usually frown upon this kind of stuff, because it's asking
> for trouble if gro_result_t typedef to something else than "int" in the
> future.
> 
> I would recommend going for two templates, one which takes a "int"
> ret parameter, and the other a "gro_result_t" ret parameter.
> 
> Or am I being too cautious ?

That's more of a question for the netdev maintainers. If they think
casting gro_result_t to int is fine, then I'm fine. If it breaks in the
future, they need to deal with it, I don't ;-)

The downside of two templates, is that the templates are the bloated
part of the trace event (DEFINE_EVENT()s are light weight). They can
add a couple of K to the memory foot print.

-- Steve
diff mbox series

Patch

diff --git a/include/trace/events/net.h b/include/trace/events/net.h
index 00aa72ce0e7c..cff1a7b9d0bb 100644
--- a/include/trace/events/net.h
+++ b/include/trace/events/net.h
@@ -117,6 +117,42 @@  DECLARE_EVENT_CLASS(net_dev_template,
 		__get_str(name), __entry->skbaddr, __entry->len)
 )
 
+DECLARE_EVENT_CLASS(net_dev_template_return,
+
+	TP_PROTO(struct sk_buff *skb, int ret),
+
+	TP_ARGS(skb, ret),
+
+	TP_STRUCT__entry(
+		__field(void *,	skbaddr)
+		__field(int,	ret)
+	),
+
+	TP_fast_assign(
+		__entry->skbaddr = skb;
+		__entry->ret = ret;
+	),
+
+	TP_printk("skbaddr=%p ret=%d", __entry->skbaddr, __entry->ret)
+)
+
+DECLARE_EVENT_CLASS(net_dev_template_simple,
+
+	TP_PROTO(struct sk_buff *skb),
+
+	TP_ARGS(skb),
+
+	TP_STRUCT__entry(
+		__field(void *,	skbaddr)
+	),
+
+	TP_fast_assign(
+		__entry->skbaddr = skb;
+	),
+
+	TP_printk("skbaddr=%p", __entry->skbaddr)
+)
+
 DEFINE_EVENT(net_dev_template, net_dev_queue,
 
 	TP_PROTO(struct sk_buff *skb),
@@ -244,6 +280,48 @@  DEFINE_EVENT(net_dev_rx_verbose_template, netif_rx_ni_entry,
 	TP_ARGS(skb)
 );
 
+DEFINE_EVENT(net_dev_template_return, napi_gro_frags_exit,
+
+	TP_PROTO(struct sk_buff *skb, int ret),
+
+	TP_ARGS(skb, ret)
+);
+
+DEFINE_EVENT(net_dev_template_return, napi_gro_receive_exit,
+
+	TP_PROTO(struct sk_buff *skb, int ret),
+
+	TP_ARGS(skb, ret)
+);
+
+DEFINE_EVENT(net_dev_template_return, netif_receive_skb_exit,
+
+	TP_PROTO(struct sk_buff *skb, int ret),
+
+	TP_ARGS(skb, ret)
+);
+
+DEFINE_EVENT(net_dev_template_simple, netif_receive_skb_list_exit,
+
+	TP_PROTO(struct sk_buff *skb),
+
+	TP_ARGS(skb)
+);
+
+DEFINE_EVENT(net_dev_template_return, netif_rx_exit,
+
+	TP_PROTO(struct sk_buff *skb, int ret),
+
+	TP_ARGS(skb, ret)
+);
+
+DEFINE_EVENT(net_dev_template_return, netif_rx_ni_exit,
+
+	TP_PROTO(struct sk_buff *skb, int ret),
+
+	TP_ARGS(skb, ret)
+);
+
 #endif /* _TRACE_NET_H */
 
 /* This part must be outside protection */
diff --git a/net/core/dev.c b/net/core/dev.c
index 0ffcbdd55fa9..c4dc5df34abe 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -4520,9 +4520,14 @@  static int netif_rx_internal(struct sk_buff *skb)
 
 int netif_rx(struct sk_buff *skb)
 {
+	int ret;
+
 	trace_netif_rx_entry(skb);
 
-	return netif_rx_internal(skb);
+	ret = netif_rx_internal(skb);
+	trace_netif_rx_exit(skb, ret);
+
+	return ret;
 }
 EXPORT_SYMBOL(netif_rx);
 
@@ -4537,6 +4542,7 @@  int netif_rx_ni(struct sk_buff *skb)
 	if (local_softirq_pending())
 		do_softirq();
 	preempt_enable();
+	trace_netif_rx_ni_exit(skb, err);
 
 	return err;
 }
@@ -5222,9 +5228,14 @@  static void netif_receive_skb_list_internal(struct list_head *head)
  */
 int netif_receive_skb(struct sk_buff *skb)
 {
+	int ret;
+
 	trace_netif_receive_skb_entry(skb);
 
-	return netif_receive_skb_internal(skb);
+	ret = netif_receive_skb_internal(skb);
+	trace_netif_receive_skb_exit(skb, ret);
+
+	return ret;
 }
 EXPORT_SYMBOL(netif_receive_skb);
 
@@ -5244,9 +5255,15 @@  void netif_receive_skb_list(struct list_head *head)
 
 	if (list_empty(head))
 		return;
-	list_for_each_entry(skb, head, list)
-		trace_netif_receive_skb_list_entry(skb);
+	if (trace_netif_receive_skb_list_entry_enabled()) {
+		list_for_each_entry(skb, head, list)
+			trace_netif_receive_skb_list_entry(skb);
+	}
 	netif_receive_skb_list_internal(head);
+	if (trace_netif_receive_skb_list_exit_enabled()) {
+		list_for_each_entry(skb, head, list)
+			trace_netif_receive_skb_list_exit(skb);
+	}
 }
 EXPORT_SYMBOL(netif_receive_skb_list);
 
@@ -5634,12 +5651,17 @@  static gro_result_t napi_skb_finish(gro_result_t ret, struct sk_buff *skb)
 
 gro_result_t napi_gro_receive(struct napi_struct *napi, struct sk_buff *skb)
 {
+	gro_result_t ret;
+
 	skb_mark_napi_id(skb, napi);
 	trace_napi_gro_receive_entry(skb);
 
 	skb_gro_reset_offset(skb);
 
-	return napi_skb_finish(dev_gro_receive(napi, skb), skb);
+	ret = napi_skb_finish(dev_gro_receive(napi, skb), skb);
+	trace_napi_gro_receive_exit(skb, ret);
+
+	return ret;
 }
 EXPORT_SYMBOL(napi_gro_receive);
 
@@ -5753,6 +5775,7 @@  static struct sk_buff *napi_frags_skb(struct napi_struct *napi)
 
 gro_result_t napi_gro_frags(struct napi_struct *napi)
 {
+	gro_result_t ret;
 	struct sk_buff *skb = napi_frags_skb(napi);
 
 	if (!skb)
@@ -5760,7 +5783,10 @@  gro_result_t napi_gro_frags(struct napi_struct *napi)
 
 	trace_napi_gro_frags_entry(skb);
 
-	return napi_frags_finish(napi, skb, dev_gro_receive(napi, skb));
+	ret = napi_frags_finish(napi, skb, dev_gro_receive(napi, skb));
+	trace_napi_gro_frags_exit(skb, ret);
+
+	return ret;
 }
 EXPORT_SYMBOL(napi_gro_frags);