Message ID | 20171024235709.2186047-1-songliubraving@fb.com |
---|---|
State | Changes Requested, archived |
Delegated to: | David Miller |
Headers | show |
Series | [net-next] tcp: add tracepoint trace_tcp_retransmit_synack() | expand |
CCing key audience of the patch. Thanks, Song > On Oct 24, 2017, at 4:57 PM, Song Liu <songliubraving@fb.com> wrote: > > This tracepoint can be used to trace synack retransmits. It maintains > pointer to struct request_sock. > > We cannot simply reuse trace_tcp_retransmit_skb() here, because the > sk here is the LISTEN socket. The IP addresses and ports should be > extracted from struct request_sock. > > Signed-off-by: Song Liu <songliubraving@fb.com> > --- > include/trace/events/tcp.h | 56 ++++++++++++++++++++++++++++++++++++++++++++++ > net/ipv4/tcp_output.c | 1 + > 2 files changed, 57 insertions(+) > > diff --git a/include/trace/events/tcp.h b/include/trace/events/tcp.h > index 03699ba..07cccca 100644 > --- a/include/trace/events/tcp.h > +++ b/include/trace/events/tcp.h > @@ -237,6 +237,62 @@ TRACE_EVENT(tcp_set_state, > show_tcp_state_name(__entry->newstate)) > ); > > +TRACE_EVENT(tcp_retransmit_synack, > + > + TP_PROTO(const struct sock *sk, const struct request_sock *req), > + > + TP_ARGS(sk, req), > + > + TP_STRUCT__entry( > + __field(const void *, skaddr) > + __field(const void *, req) > + __field(__u16, sport) > + __field(__u16, dport) > + __array(__u8, saddr, 4) > + __array(__u8, daddr, 4) > + __array(__u8, saddr_v6, 16) > + __array(__u8, daddr_v6, 16) > + ), > + > + TP_fast_assign( > + struct inet_request_sock *ireq = inet_rsk(req); > + struct in6_addr *pin6; > + __be32 *p32; > + > + __entry->skaddr = sk; > + __entry->req = req; > + > + __entry->sport = ireq->ir_num; > + __entry->dport = ntohs(ireq->ir_rmt_port); > + > + p32 = (__be32 *) __entry->saddr; > + *p32 = ireq->ir_loc_addr; > + > + p32 = (__be32 *) __entry->daddr; > + *p32 = ireq->ir_rmt_addr; > + > +#if IS_ENABLED(CONFIG_IPV6) > + if (sk->sk_family == AF_INET6) { > + pin6 = (struct in6_addr *)__entry->saddr_v6; > + *pin6 = ireq->ir_v6_loc_addr; > + pin6 = (struct in6_addr *)__entry->daddr_v6; > + *pin6 = ireq->ir_v6_rmt_addr; > + } else > +#endif > + { > + pin6 = (struct in6_addr *)__entry->saddr_v6; > + ipv6_addr_set_v4mapped(ireq->ir_loc_addr, pin6); > + pin6 = (struct in6_addr *)__entry->daddr_v6; > + ipv6_addr_set_v4mapped(ireq->ir_rmt_addr, pin6); > + } > + ), > + > + TP_printk("sport=%hu dport=%hu saddr=%pI4 daddr=%pI4 saddrv6=%pI6c daddrv6=%pI6c", > + __entry->sport, __entry->dport, > + __entry->saddr, __entry->daddr, > + __entry->saddr_v6, __entry->daddr_v6) > +); > + > #endif /* _TRACE_TCP_H */ > > /* This part must be outside protection */ > diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c > index 1f01f4c..6a728a5 100644 > --- a/net/ipv4/tcp_output.c > +++ b/net/ipv4/tcp_output.c > @@ -3735,6 +3735,7 @@ int tcp_rtx_synack(const struct sock *sk, struct request_sock *req) > __NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPSYNRETRANS); > if (unlikely(tcp_passive_fastopen(sk))) > tcp_sk(sk)->total_retrans++; > + trace_tcp_retransmit_synack(sk, req); > } > return res; > } > -- > 2.9.5 >
Hi Song, [auto build test WARNING on net-next/master] url: https://github.com/0day-ci/linux/commits/Song-Liu/tcp-add-tracepoint-trace_tcp_retransmit_synack/20171026-010651 reproduce: # apt-get install sparse make ARCH=x86_64 allmodconfig make C=1 CF=-D__CHECK_ENDIAN__ sparse warnings: (new ones prefixed by >>) vim +281 include/trace/events/tcp.h 241 242 TP_PROTO(const struct sock *sk, const struct request_sock *req), 243 244 TP_ARGS(sk, req), 245 246 TP_STRUCT__entry( 247 __field(const void *, skaddr) 248 __field(const void *, req) 249 __field(__u16, sport) 250 __field(__u16, dport) 251 __array(__u8, saddr, 4) 252 __array(__u8, daddr, 4) 253 __array(__u8, saddr_v6, 16) 254 __array(__u8, daddr_v6, 16) 255 ), 256 257 TP_fast_assign( 258 struct inet_request_sock *ireq = inet_rsk(req); 259 struct in6_addr *pin6; 260 __be32 *p32; 261 262 __entry->skaddr = sk; 263 __entry->req = req; 264 265 __entry->sport = ireq->ir_num; 266 __entry->dport = ntohs(ireq->ir_rmt_port); 267 268 p32 = (__be32 *) __entry->saddr; 269 *p32 = ireq->ir_loc_addr; 270 271 p32 = (__be32 *) __entry->daddr; 272 *p32 = ireq->ir_rmt_addr; 273 > 274 #if IS_ENABLED(CONFIG_IPV6) 275 if (sk->sk_family == AF_INET6) { 276 pin6 = (struct in6_addr *)__entry->saddr_v6; 277 *pin6 = ireq->ir_v6_loc_addr; 278 pin6 = (struct in6_addr *)__entry->daddr_v6; 279 *pin6 = ireq->ir_v6_rmt_addr; 280 } else > 281 #endif 282 { 283 pin6 = (struct in6_addr *)__entry->saddr_v6; 284 ipv6_addr_set_v4mapped(ireq->ir_loc_addr, pin6); 285 pin6 = (struct in6_addr *)__entry->daddr_v6; 286 ipv6_addr_set_v4mapped(ireq->ir_rmt_addr, pin6); 287 } 288 ), 289 290 TP_printk("sport=%hu dport=%hu saddr=%pI4 daddr=%pI4 saddrv6=%pI6c daddrv6=%pI6c", 291 __entry->sport, __entry->dport, 292 __entry->saddr, __entry->daddr, 293 __entry->saddr_v6, __entry->daddr_v6) 294 ); 295 --- 0-DAY kernel test infrastructure Open Source Technology Center https://lists.01.org/pipermail/kbuild-all Intel Corporation
> On Oct 25, 2017, at 8:13 PM, kbuild test robot <lkp@intel.com> wrote: > > Hi Song, > > [auto build test WARNING on net-next/master] > > url: https://github.com/0day-ci/linux/commits/Song-Liu/tcp-add-tracepoint-trace_tcp_retransmit_synack/20171026-010651 > reproduce: > # apt-get install sparse > make ARCH=x86_64 allmodconfig > make C=1 CF=-D__CHECK_ENDIAN__ > > > sparse warnings: (new ones prefixed by >>) > > > vim +281 include/trace/events/tcp.h > > 241 > 242 TP_PROTO(const struct sock *sk, const struct request_sock *req), > 243 > 244 TP_ARGS(sk, req), > 245 > 246 TP_STRUCT__entry( > 247 __field(const void *, skaddr) > 248 __field(const void *, req) > 249 __field(__u16, sport) > 250 __field(__u16, dport) > 251 __array(__u8, saddr, 4) > 252 __array(__u8, daddr, 4) > 253 __array(__u8, saddr_v6, 16) > 254 __array(__u8, daddr_v6, 16) > 255 ), > 256 > 257 TP_fast_assign( > 258 struct inet_request_sock *ireq = inet_rsk(req); > 259 struct in6_addr *pin6; > 260 __be32 *p32; > 261 > 262 __entry->skaddr = sk; > 263 __entry->req = req; > 264 > 265 __entry->sport = ireq->ir_num; > 266 __entry->dport = ntohs(ireq->ir_rmt_port); > 267 > 268 p32 = (__be32 *) __entry->saddr; > 269 *p32 = ireq->ir_loc_addr; > 270 > 271 p32 = (__be32 *) __entry->daddr; > 272 *p32 = ireq->ir_rmt_addr; > 273 >> 274 #if IS_ENABLED(CONFIG_IPV6) > 275 if (sk->sk_family == AF_INET6) { > 276 pin6 = (struct in6_addr *)__entry->saddr_v6; > 277 *pin6 = ireq->ir_v6_loc_addr; > 278 pin6 = (struct in6_addr *)__entry->daddr_v6; > 279 *pin6 = ireq->ir_v6_rmt_addr; > 280 } else >> 281 #endif In this case, we are putting CONFIG_IPV6 in TRACE_EVENT macro, which generates warnings like: ./include/trace/events/tcp.h:274:1: error: directive in argument list ./include/trace/events/tcp.h:281:1: error: directive in argument list Seems these warning cannot be easily avoided. This is also the same pattern we have been using in include/trace/events/tcp.h. Any suggestions on how shall we proceed from here? Thanks, Song
On Thu, Oct 26, 2017 at 4:50 PM, Song Liu <songliubraving@fb.com> wrote: > In this case, we are putting CONFIG_IPV6 in TRACE_EVENT macro, which generates > warnings like: > > ./include/trace/events/tcp.h:274:1: error: directive in argument list > ./include/trace/events/tcp.h:281:1: error: directive in argument list > > Seems these warning cannot be easily avoided. This is also the same pattern we > have been using in include/trace/events/tcp.h. Hmm, we use the same so why it only complains about this one? > > Any suggestions on how shall we proceed from here? > I think this warning is harmless, so perhaps not worthy time to shut it up, unless sparse provides a simple way to do so.
> On Oct 26, 2017, at 7:01 PM, Cong Wang <xiyou.wangcong@gmail.com> wrote: > > On Thu, Oct 26, 2017 at 4:50 PM, Song Liu <songliubraving@fb.com> wrote: >> In this case, we are putting CONFIG_IPV6 in TRACE_EVENT macro, which generates >> warnings like: >> >> ./include/trace/events/tcp.h:274:1: error: directive in argument list >> ./include/trace/events/tcp.h:281:1: error: directive in argument list >> >> Seems these warning cannot be easily avoided. This is also the same pattern we >> have been using in include/trace/events/tcp.h. > > Hmm, we use the same so why it only complains about this one?\ sparse reports same warning for all the lines in tcp.h. Don't know why kbuild test bot only complains about this patch. > >> >> Any suggestions on how shall we proceed from here? >> > > I think this warning is harmless, so perhaps not worthy time to > shut it up, unless sparse provides a simple way to do so.
On 10/26/17 10:06 PM, Song Liu wrote: > >> On Oct 26, 2017, at 7:01 PM, Cong Wang <xiyou.wangcong@gmail.com> wrote: >> >> On Thu, Oct 26, 2017 at 4:50 PM, Song Liu <songliubraving@fb.com> wrote: >>> In this case, we are putting CONFIG_IPV6 in TRACE_EVENT macro, which generates >>> warnings like: >>> >>> ./include/trace/events/tcp.h:274:1: error: directive in argument list >>> ./include/trace/events/tcp.h:281:1: error: directive in argument list >>> >>> Seems these warning cannot be easily avoided. This is also the same pattern we >>> have been using in include/trace/events/tcp.h. >> >> Hmm, we use the same so why it only complains about this one?\ > > sparse reports same warning for all the lines in tcp.h. Don't know why > kbuild test bot only complains about this patch. it's safe to ignore this sparse error. The #ifdef inside TP_fast_assign() in net.h have been there for years without issues. $ make C=2 net/core/dev.o ../include/trace/events/net.h:170:1: error: directive in argument list ../include/trace/events/net.h:172:1: error: directive in argument list ../include/trace/events/net.h:174:1: error: directive in argument list
Hi, On 25 October 2017 at 01:57, Song Liu <songliubraving@fb.com> wrote: > This tracepoint can be used to trace synack retransmits. It maintains > pointer to struct request_sock. > > We cannot simply reuse trace_tcp_retransmit_skb() here, because the > sk here is the LISTEN socket. The IP addresses and ports should be > extracted from struct request_sock. > > Signed-off-by: Song Liu <songliubraving@fb.com> > --- > include/trace/events/tcp.h | 56 ++++++++++++++++++++++++++++++++++++++++++++++ > net/ipv4/tcp_output.c | 1 + > 2 files changed, 57 insertions(+) > > diff --git a/include/trace/events/tcp.h b/include/trace/events/tcp.h > index 03699ba..07cccca 100644 > --- a/include/trace/events/tcp.h > +++ b/include/trace/events/tcp.h > @@ -237,6 +237,62 @@ TRACE_EVENT(tcp_set_state, > show_tcp_state_name(__entry->newstate)) > ); > > +TRACE_EVENT(tcp_retransmit_synack, > + > + TP_PROTO(const struct sock *sk, const struct request_sock *req), > + > + TP_ARGS(sk, req), > + > + TP_STRUCT__entry( > + __field(const void *, skaddr) > + __field(const void *, req) > + __field(__u16, sport) > + __field(__u16, dport) > + __array(__u8, saddr, 4) > + __array(__u8, daddr, 4) > + __array(__u8, saddr_v6, 16) > + __array(__u8, daddr_v6, 16) Would it make sense to add the inode of the network namespace that owns the socket? (along with the major/minor of the nsfs) If the kernel later gains tracepoints for TCP connect, accept, close including the netns ino, then I might be able to replace some ebpf-kprobes code by ebpf-tracepoints code :) > [...] Thanks, Alban
On 10/27/17 1:38 PM, Alban Crequy wrote: > Hi, > > On 25 October 2017 at 01:57, Song Liu <songliubraving@fb.com> wrote: >> This tracepoint can be used to trace synack retransmits. It maintains >> pointer to struct request_sock. >> >> We cannot simply reuse trace_tcp_retransmit_skb() here, because the >> sk here is the LISTEN socket. The IP addresses and ports should be >> extracted from struct request_sock. >> >> Signed-off-by: Song Liu <songliubraving@fb.com> >> --- >> include/trace/events/tcp.h | 56 ++++++++++++++++++++++++++++++++++++++++++++++ >> net/ipv4/tcp_output.c | 1 + >> 2 files changed, 57 insertions(+) >> >> diff --git a/include/trace/events/tcp.h b/include/trace/events/tcp.h >> index 03699ba..07cccca 100644 >> --- a/include/trace/events/tcp.h >> +++ b/include/trace/events/tcp.h >> @@ -237,6 +237,62 @@ TRACE_EVENT(tcp_set_state, >> show_tcp_state_name(__entry->newstate)) >> ); >> >> +TRACE_EVENT(tcp_retransmit_synack, >> + >> + TP_PROTO(const struct sock *sk, const struct request_sock *req), >> + >> + TP_ARGS(sk, req), >> + >> + TP_STRUCT__entry( >> + __field(const void *, skaddr) >> + __field(const void *, req) >> + __field(__u16, sport) >> + __field(__u16, dport) >> + __array(__u8, saddr, 4) >> + __array(__u8, daddr, 4) >> + __array(__u8, saddr_v6, 16) >> + __array(__u8, daddr_v6, 16) > > Would it make sense to add the inode of the network namespace that > owns the socket? (along with the major/minor of the nsfs) We cannot do this. netns ino is not unique identifier of netns. we can do such hack only inside programs by walking skb->dev->nd_net->net with bpf_probe_read() and realizing that this is unstable interface and not technically correct. > If the kernel later gains tracepoints for TCP connect, accept, close > including the netns ino, then I might be able to replace some > ebpf-kprobes code by ebpf-tracepoints code :) What is the use case for tracepoints in connect/accept/close ? Just because some _useful_ bcc script is using kprobe in particular kernel function it doesn't mean yet that we need a tracepoint in there. imo the general rule for tracepoints is to only add them when it's 100% certain that this is the right place for it and kprobe approach is not enough or not possible. In the case of recent addition of tcp tracepoints the main thing they achieve (vs our old kprobe approach) is that they are accurate. In this particular patch the kprobe on tcp_rtx_synack() is not the same as trace_tcp_retransmit_synack(), since it incorrectly counts failed send_synack(). It's solvable via kretprobe on tcp_rtx_synack() and checking %rax inside the bpf program, but kretprobes add runtime overhead and much slower than tracepoints.
diff --git a/include/trace/events/tcp.h b/include/trace/events/tcp.h index 03699ba..07cccca 100644 --- a/include/trace/events/tcp.h +++ b/include/trace/events/tcp.h @@ -237,6 +237,62 @@ TRACE_EVENT(tcp_set_state, show_tcp_state_name(__entry->newstate)) ); +TRACE_EVENT(tcp_retransmit_synack, + + TP_PROTO(const struct sock *sk, const struct request_sock *req), + + TP_ARGS(sk, req), + + TP_STRUCT__entry( + __field(const void *, skaddr) + __field(const void *, req) + __field(__u16, sport) + __field(__u16, dport) + __array(__u8, saddr, 4) + __array(__u8, daddr, 4) + __array(__u8, saddr_v6, 16) + __array(__u8, daddr_v6, 16) + ), + + TP_fast_assign( + struct inet_request_sock *ireq = inet_rsk(req); + struct in6_addr *pin6; + __be32 *p32; + + __entry->skaddr = sk; + __entry->req = req; + + __entry->sport = ireq->ir_num; + __entry->dport = ntohs(ireq->ir_rmt_port); + + p32 = (__be32 *) __entry->saddr; + *p32 = ireq->ir_loc_addr; + + p32 = (__be32 *) __entry->daddr; + *p32 = ireq->ir_rmt_addr; + +#if IS_ENABLED(CONFIG_IPV6) + if (sk->sk_family == AF_INET6) { + pin6 = (struct in6_addr *)__entry->saddr_v6; + *pin6 = ireq->ir_v6_loc_addr; + pin6 = (struct in6_addr *)__entry->daddr_v6; + *pin6 = ireq->ir_v6_rmt_addr; + } else +#endif + { + pin6 = (struct in6_addr *)__entry->saddr_v6; + ipv6_addr_set_v4mapped(ireq->ir_loc_addr, pin6); + pin6 = (struct in6_addr *)__entry->daddr_v6; + ipv6_addr_set_v4mapped(ireq->ir_rmt_addr, pin6); + } + ), + + TP_printk("sport=%hu dport=%hu saddr=%pI4 daddr=%pI4 saddrv6=%pI6c daddrv6=%pI6c", + __entry->sport, __entry->dport, + __entry->saddr, __entry->daddr, + __entry->saddr_v6, __entry->daddr_v6) +); + #endif /* _TRACE_TCP_H */ /* This part must be outside protection */ diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index 1f01f4c..6a728a5 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c @@ -3735,6 +3735,7 @@ int tcp_rtx_synack(const struct sock *sk, struct request_sock *req) __NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPSYNRETRANS); if (unlikely(tcp_passive_fastopen(sk))) tcp_sk(sk)->total_retrans++; + trace_tcp_retransmit_synack(sk, req); } return res; }
This tracepoint can be used to trace synack retransmits. It maintains pointer to struct request_sock. We cannot simply reuse trace_tcp_retransmit_skb() here, because the sk here is the LISTEN socket. The IP addresses and ports should be extracted from struct request_sock. Signed-off-by: Song Liu <songliubraving@fb.com> --- include/trace/events/tcp.h | 56 ++++++++++++++++++++++++++++++++++++++++++++++ net/ipv4/tcp_output.c | 1 + 2 files changed, 57 insertions(+)