diff mbox

[1/2] udp: add tracepoints for queueing skb to rcvbuf

Message ID 65795E11DBF1E645A09CEC7EAEE94B9C402B96E5@USINDEVS02.corp.hds.com
State Accepted, archived
Delegated to: David Miller
Headers show

Commit Message

Satoru Moriya June 17, 2011, 9:58 p.m. UTC
This patch adds a tracepoint to __udp_queue_rcv_skb to get the
return value of ip_queue_rcv_skb. It indicates why kernel drops
a packet at this point.

ip_queue_rcv_skb returns following values in the packet drop case:

rcvbuf is full                 : -ENOMEM
sk_filter returns error        : -EINVAL, -EACCESS, -ENOMEM, etc.
__sk_mem_schedule returns error: -ENOBUF


Signed-off-by: Satoru Moriya <satoru.moriya@hds.com>
---
 include/trace/events/udp.h |   32 ++++++++++++++++++++++++++++++++
 net/core/net-traces.c      |    1 +
 net/ipv4/udp.c             |    2 ++
 3 files changed, 35 insertions(+), 0 deletions(-)
 create mode 100644 include/trace/events/udp.h

Comments

Neil Horman June 21, 2011, 10:47 a.m. UTC | #1
On Fri, Jun 17, 2011 at 05:58:39PM -0400, Satoru Moriya wrote:
> This patch adds a tracepoint to __udp_queue_rcv_skb to get the
> return value of ip_queue_rcv_skb. It indicates why kernel drops
> a packet at this point.
> 
> ip_queue_rcv_skb returns following values in the packet drop case:
> 
> rcvbuf is full                 : -ENOMEM
> sk_filter returns error        : -EINVAL, -EACCESS, -ENOMEM, etc.
> __sk_mem_schedule returns error: -ENOBUF
> 
> 
> Signed-off-by: Satoru Moriya <satoru.moriya@hds.com>
> ---
>  include/trace/events/udp.h |   32 ++++++++++++++++++++++++++++++++
>  net/core/net-traces.c      |    1 +
>  net/ipv4/udp.c             |    2 ++
>  3 files changed, 35 insertions(+), 0 deletions(-)
>  create mode 100644 include/trace/events/udp.h
> 
> diff --git a/include/trace/events/udp.h b/include/trace/events/udp.h
> new file mode 100644
> index 0000000..a664bb9
> --- /dev/null
> +++ b/include/trace/events/udp.h
> @@ -0,0 +1,32 @@
> +#undef TRACE_SYSTEM
> +#define TRACE_SYSTEM udp
> +
> +#if !defined(_TRACE_UDP_H) || defined(TRACE_HEADER_MULTI_READ)
> +#define _TRACE_UDP_H
> +
> +#include <linux/udp.h>
> +#include <linux/tracepoint.h>
> +
> +TRACE_EVENT(udp_fail_queue_rcv_skb,
> +
> +	TP_PROTO(int rc, struct sock *sk),
> +
> +	TP_ARGS(rc, sk),
> +
> +	TP_STRUCT__entry(
> +		__field(int, rc)
> +		__field(__u16, lport)
> +	),
> +
> +	TP_fast_assign(
> +		__entry->rc = rc;
> +		__entry->lport = inet_sk(sk)->inet_num;
> +	),
> +
> +	TP_printk("rc=%d port=%hu", __entry->rc, __entry->lport)
> +);
> +
> +#endif /* _TRACE_UDP_H */
> +
> +/* This part must be outside protection */
> +#include <trace/define_trace.h>
> diff --git a/net/core/net-traces.c b/net/core/net-traces.c
> index 7f1bb2a..13aab64 100644
> --- a/net/core/net-traces.c
> +++ b/net/core/net-traces.c
> @@ -28,6 +28,7 @@
>  #include <trace/events/skb.h>
>  #include <trace/events/net.h>
>  #include <trace/events/napi.h>
> +#include <trace/events/udp.h>
>  
>  EXPORT_TRACEPOINT_SYMBOL_GPL(kfree_skb);
>  
> diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c
> index abca870..37aa9bf 100644
> --- a/net/ipv4/udp.c
> +++ b/net/ipv4/udp.c
> @@ -105,6 +105,7 @@
>  #include <net/route.h>
>  #include <net/checksum.h>
>  #include <net/xfrm.h>
> +#include <trace/events/udp.h>
>  #include "udp_impl.h"
>  
>  struct udp_table udp_table __read_mostly;
> @@ -1363,6 +1364,7 @@ static int __udp_queue_rcv_skb(struct sock *sk, struct sk_buff *skb)
>  					 is_udplite);
>  		UDP_INC_STATS_BH(sock_net(sk), UDP_MIB_INERRORS, is_udplite);
>  		kfree_skb(skb);
> +		trace_udp_fail_queue_rcv_skb(rc, sk);
>  		return -1;
>  	}
>  
> -- 
> 1.7.1
> 
I was thinking you could just trace callers of __sk_mem_schedule, but looking at
it this works as well
Acked-by: Neil Horman <nhorman@tuxdriver.com>

> 
> 
--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Hagen Paul Pfeifer June 21, 2011, 11:58 a.m. UTC | #2
On Tue, 21 Jun 2011 06:47:43 -0400, Neil Horman wrote:

> I was thinking you could just trace callers of __sk_mem_schedule, but
> looking at
> it this works as well
> Acked-by: Neil Horman <nhorman@tuxdriver.com>

Hey Neil,

since you acked the patch do you have any plans to migrate dropwatch to
use perf infrastructure and skip the netlink transport? Should be
practicable now. No kernel patch required to run dropwatch ;-)


HGN

--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Neil Horman June 21, 2011, 1:50 p.m. UTC | #3
On Tue, Jun 21, 2011 at 01:58:27PM +0200, Hagen Paul Pfeifer wrote:
> 
> On Tue, 21 Jun 2011 06:47:43 -0400, Neil Horman wrote:
> 
> 
> 
> > I was thinking you could just trace callers of __sk_mem_schedule, but
> 
> > looking at
> 
> > it this works as well
> 
> > Acked-by: Neil Horman <nhorman@tuxdriver.com>
> 
> 
> 
> Hey Neil,
> 
> 
> 
> since you acked the patch do you have any plans to migrate dropwatch to
> 
> use perf infrastructure and skip the netlink transport? Should be
> 
> practicable now. No kernel patch required to run dropwatch ;-)
> 
> 
I hadn't really thought about that much, but yes, I suppose I could migrate
dropwatch to export kfree_skb data via perf.  Admittedly I don't know much about
the perf api.  Do you have any pointers on its use (to save me time in figuring
out how it all works)?  If so I'll start looking into it.
Neil

> 
> 
> 
> HGN
> 
> 
> 
--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Hagen Paul Pfeifer June 21, 2011, 2:48 p.m. UTC | #4
On Tue, 21 Jun 2011 09:50:09 -0400, Neil Horman wrote:

> I hadn't really thought about that much, but yes, I suppose I could
migrate
> dropwatch to export kfree_skb data via perf.  Admittedly I don't know
much
> about
> the perf api.  Do you have any pointers on its use (to save me time in
> figuring
> out how it all works)?  If so I'll start looking into it.

http://git.kernel.org/?p=status/powertop/powertop.git;a=tree;f=perf;hb=HEAD

is probably a good starting point. Especially
perf_bundle.cpp:handle_trace_point(). But I am not sure if this is the most
clever way. The direct us of the perf api is somewhat dodgy (not sure if
the ABI will change). IIRC Steven Rostedt wrote about a user space library
(I CC'ed Steven). BUT: tracing via /sys/kernel/debug/tracing/* may be
enough, eventually there is no need for perf at all. Then trace-cmd may
provide some nice ideas how to wrap the /sys/kernel/debug/tracing interface
programmatically.

The idea behind dropwatch is great! There is currently to much
unconsolidated information. It takes a genius to understand where and later
why packets are dropped. A userspace tool where no kernel patch is required
is a big plus! ;-)

Hagen
--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Steven Rostedt June 21, 2011, 5:14 p.m. UTC | #5
On Tue, 2011-06-21 at 16:48 +0200, Hagen Paul Pfeifer wrote:
> On Tue, 21 Jun 2011 09:50:09 -0400, Neil Horman wrote:
> 
> > I hadn't really thought about that much, but yes, I suppose I could
> migrate
> > dropwatch to export kfree_skb data via perf.  Admittedly I don't know
> much
> > about
> > the perf api.  Do you have any pointers on its use (to save me time in
> > figuring
> > out how it all works)?  If so I'll start looking into it.
> 
> http://git.kernel.org/?p=status/powertop/powertop.git;a=tree;f=perf;hb=HEAD

Please please do not copy this code and reuse it. You will end up
forcing us to this ABI forever!

Please read this for background:

 http://lwn.net/Articles/442113/

I plan on doing a libperf.so that will allow any tool to interact with
trace events in the kernel the proper way. Yes trace-cmd currently has
its own library that deals with this properly, but the library is not
shipped with distros.

I plan on taking the trace-cmd library (which perf even uses - an older
verion) and make it into the libperf.so that distros will ship. But
unfortunately, my work has gotten in the way (the work that actually
pays me) and I'm doing other things at the moment.

-- Steve

> 
> is probably a good starting point. Especially
> perf_bundle.cpp:handle_trace_point(). But I am not sure if this is the most
> clever way. The direct us of the perf api is somewhat dodgy (not sure if
> the ABI will change). IIRC Steven Rostedt wrote about a user space library
> (I CC'ed Steven). BUT: tracing via /sys/kernel/debug/tracing/* may be
> enough, eventually there is no need for perf at all. Then trace-cmd may
> provide some nice ideas how to wrap the /sys/kernel/debug/tracing interface
> programmatically.
> 
> The idea behind dropwatch is great! There is currently to much
> unconsolidated information. It takes a genius to understand where and later
> why packets are dropped. A userspace tool where no kernel patch is required
> is a big plus! ;-)
> 
> Hagen


--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
diff mbox

Patch

diff --git a/include/trace/events/udp.h b/include/trace/events/udp.h
new file mode 100644
index 0000000..a664bb9
--- /dev/null
+++ b/include/trace/events/udp.h
@@ -0,0 +1,32 @@ 
+#undef TRACE_SYSTEM
+#define TRACE_SYSTEM udp
+
+#if !defined(_TRACE_UDP_H) || defined(TRACE_HEADER_MULTI_READ)
+#define _TRACE_UDP_H
+
+#include <linux/udp.h>
+#include <linux/tracepoint.h>
+
+TRACE_EVENT(udp_fail_queue_rcv_skb,
+
+	TP_PROTO(int rc, struct sock *sk),
+
+	TP_ARGS(rc, sk),
+
+	TP_STRUCT__entry(
+		__field(int, rc)
+		__field(__u16, lport)
+	),
+
+	TP_fast_assign(
+		__entry->rc = rc;
+		__entry->lport = inet_sk(sk)->inet_num;
+	),
+
+	TP_printk("rc=%d port=%hu", __entry->rc, __entry->lport)
+);
+
+#endif /* _TRACE_UDP_H */
+
+/* This part must be outside protection */
+#include <trace/define_trace.h>
diff --git a/net/core/net-traces.c b/net/core/net-traces.c
index 7f1bb2a..13aab64 100644
--- a/net/core/net-traces.c
+++ b/net/core/net-traces.c
@@ -28,6 +28,7 @@ 
 #include <trace/events/skb.h>
 #include <trace/events/net.h>
 #include <trace/events/napi.h>
+#include <trace/events/udp.h>
 
 EXPORT_TRACEPOINT_SYMBOL_GPL(kfree_skb);
 
diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c
index abca870..37aa9bf 100644
--- a/net/ipv4/udp.c
+++ b/net/ipv4/udp.c
@@ -105,6 +105,7 @@ 
 #include <net/route.h>
 #include <net/checksum.h>
 #include <net/xfrm.h>
+#include <trace/events/udp.h>
 #include "udp_impl.h"
 
 struct udp_table udp_table __read_mostly;
@@ -1363,6 +1364,7 @@  static int __udp_queue_rcv_skb(struct sock *sk, struct sk_buff *skb)
 					 is_udplite);
 		UDP_INC_STATS_BH(sock_net(sk), UDP_MIB_INERRORS, is_udplite);
 		kfree_skb(skb);
+		trace_udp_fail_queue_rcv_skb(rc, sk);
 		return -1;
 	}