diff mbox series

[net-next,2/5] tipc: add trace_events for tipc link

Message ID 20181219021800.21662-3-tuong.t.lien@dektech.com.au
State Accepted, archived
Delegated to: David Miller
Headers show
Series tipc: tracepoints and trace_events in TIPC | expand

Commit Message

Tuong Lien Dec. 19, 2018, 2:17 a.m. UTC
The commit adds the new trace_events for TIPC link object:

trace_tipc_link_timeout()
trace_tipc_link_fsm()
trace_tipc_link_reset()
trace_tipc_link_too_silent()
trace_tipc_link_retrans()
trace_tipc_link_bc_ack()
trace_tipc_link_conges()

And the traces for PROTOCOL messages at building and receiving:

trace_tipc_proto_build()
trace_tipc_proto_rcv()

Note:
a) The 'tipc_link_too_silent' event will only happen when the
'silent_intv_cnt' is about to reach the 'abort_limit' value (and the
event is enabled). The benefit for this kind of event is that we can
get an early indication about TIPC link loss issue due to timeout, then
can do some necessary actions for troubleshooting.

For example: To trigger the 'tipc_proto_rcv' when the 'too_silent'
event occurs:

echo 'enable_event:tipc:tipc_proto_rcv' > \
      events/tipc/tipc_link_too_silent/trigger

And disable it when TIPC link is reset:

echo 'disable_event:tipc:tipc_proto_rcv' > \
      events/tipc/tipc_link_reset/trigger

b) The 'tipc_link_retrans' or 'tipc_link_bc_ack' event is useful to
trace TIPC retransmission issues.

In addition, the commit adds the 'trace_tipc_list/link_dump()' at the
'retransmission failure' case. Then, if the issue occurs, the link
'transmq' along with the link data can be dumped for post-analysis.
These dump events should be enabled by default since it will only take
effect when the failure happens.

The same approach is also applied for the faulty case that the
validation of protocol message is failed.

Acked-by: Ying Xue <ying.xue@windriver.com>
Tested-by: Ying Xue <ying.xue@windriver.com>
Acked-by: Jon Maloy <jon.maloy@ericsson.com>
Signed-off-by: Tuong Lien <tuong.t.lien@dektech.com.au>
---
 net/tipc/link.c  |  33 ++++++++++++++-
 net/tipc/link.h  |   1 +
 net/tipc/node.c  |   8 +++-
 net/tipc/trace.h | 124 +++++++++++++++++++++++++++++++++++++++++++++++++++++++
 4 files changed, 164 insertions(+), 2 deletions(-)
diff mbox series

Patch

diff --git a/net/tipc/link.c b/net/tipc/link.c
index 668dab529021..55c44d867d4b 100644
--- a/net/tipc/link.c
+++ b/net/tipc/link.c
@@ -357,9 +357,11 @@  void tipc_link_remove_bc_peer(struct tipc_link *snd_l,
 	rcv_l->bc_peer_is_up = true;
 	rcv_l->state = LINK_ESTABLISHED;
 	tipc_link_bc_ack_rcv(rcv_l, ack, xmitq);
+	trace_tipc_link_reset(rcv_l, TIPC_DUMP_ALL, "bclink removed!");
 	tipc_link_reset(rcv_l);
 	rcv_l->state = LINK_RESET;
 	if (!snd_l->ackers) {
+		trace_tipc_link_reset(snd_l, TIPC_DUMP_ALL, "zero ackers!");
 		tipc_link_reset(snd_l);
 		snd_l->state = LINK_RESET;
 		__skb_queue_purge(xmitq);
@@ -523,6 +525,7 @@  bool tipc_link_bc_create(struct net *net, u32 ownnode, u32 peer,
 
 	l = *link;
 	strcpy(l->name, tipc_bclink_name);
+	trace_tipc_link_reset(l, TIPC_DUMP_ALL, "bclink created!");
 	tipc_link_reset(l);
 	l->state = LINK_RESET;
 	l->ackers = 0;
@@ -547,6 +550,7 @@  bool tipc_link_bc_create(struct net *net, u32 ownnode, u32 peer,
 int tipc_link_fsm_evt(struct tipc_link *l, int evt)
 {
 	int rc = 0;
+	int old_state = l->state;
 
 	switch (l->state) {
 	case LINK_RESETTING:
@@ -693,10 +697,12 @@  int tipc_link_fsm_evt(struct tipc_link *l, int evt)
 	default:
 		pr_err("Unknown FSM state %x in %s\n", l->state, l->name);
 	}
+	trace_tipc_link_fsm(l->name, old_state, l->state, evt);
 	return rc;
 illegal_evt:
 	pr_err("Illegal FSM event %x in state %x on link %s\n",
 	       evt, l->state, l->name);
+	trace_tipc_link_fsm(l->name, old_state, l->state, evt);
 	return rc;
 }
 
@@ -741,6 +747,18 @@  static void link_profile_stats(struct tipc_link *l)
 		l->stats.msg_length_profile[6]++;
 }
 
+/**
+ * tipc_link_too_silent - check if link is "too silent"
+ * @l: tipc link to be checked
+ *
+ * Returns true if the link 'silent_intv_cnt' is about to reach the
+ * 'abort_limit' value, otherwise false
+ */
+bool tipc_link_too_silent(struct tipc_link *l)
+{
+	return (l->silent_intv_cnt + 2 > l->abort_limit);
+}
+
 /* tipc_link_timeout - perform periodic task as instructed from node timeout
  */
 int tipc_link_timeout(struct tipc_link *l, struct sk_buff_head *xmitq)
@@ -754,6 +772,8 @@  int tipc_link_timeout(struct tipc_link *l, struct sk_buff_head *xmitq)
 	u16 bc_acked = l->bc_rcvlink->acked;
 	struct tipc_mon_state *mstate = &l->mon_state;
 
+	trace_tipc_link_timeout(l, TIPC_DUMP_NONE, " ");
+	trace_tipc_link_too_silent(l, TIPC_DUMP_ALL, " ");
 	switch (l->state) {
 	case LINK_ESTABLISHED:
 	case LINK_SYNCHING:
@@ -816,6 +836,7 @@  static int link_schedule_user(struct tipc_link *l, struct tipc_msg *hdr)
 	TIPC_SKB_CB(skb)->chain_imp = msg_importance(hdr);
 	skb_queue_tail(&l->wakeupq, skb);
 	l->stats.link_congs++;
+	trace_tipc_link_conges(l, TIPC_DUMP_ALL, "wakeup scheduled!");
 	return -ELINKCONG;
 }
 
@@ -1037,6 +1058,7 @@  static int tipc_link_retrans(struct tipc_link *l, struct tipc_link *r,
 	if (less(to, from))
 		return 0;
 
+	trace_tipc_link_retrans(r, from, to, &l->transmq);
 	/* Detect repeated retransmit failures on same packet */
 	if (r->prev_from != from) {
 		r->prev_from = from;
@@ -1044,6 +1066,9 @@  static int tipc_link_retrans(struct tipc_link *l, struct tipc_link *r,
 		r->stale_cnt = 0;
 	} else if (++r->stale_cnt > 99 && time_after(jiffies, r->stale_limit)) {
 		link_retransmit_failure(l, skb);
+		trace_tipc_list_dump(&l->transmq, true, "retrans failure!");
+		trace_tipc_link_dump(l, TIPC_DUMP_NONE, "retrans failure!");
+		trace_tipc_link_dump(r, TIPC_DUMP_NONE, "retrans failure!");
 		if (link_is_bc_sndlink(l))
 			return TIPC_LINK_DOWN_EVT;
 		return tipc_link_fsm_evt(l, LINK_FAILURE_EVT);
@@ -1403,6 +1428,7 @@  static void tipc_link_build_proto_msg(struct tipc_link *l, int mtyp, bool probe,
 		l->stats.sent_nacks++;
 	skb->priority = TC_PRIO_CONTROL;
 	__skb_queue_tail(xmitq, skb);
+	trace_tipc_proto_build(skb, false, l->name);
 }
 
 void tipc_link_create_dummy_tnl_msg(struct tipc_link *l,
@@ -1566,6 +1592,7 @@  static int tipc_link_proto_rcv(struct tipc_link *l, struct sk_buff *skb,
 	char *if_name;
 	int rc = 0;
 
+	trace_tipc_proto_rcv(skb, false, l->name);
 	if (tipc_link_is_blocked(l) || !xmitq)
 		goto exit;
 
@@ -1576,8 +1603,11 @@  static int tipc_link_proto_rcv(struct tipc_link *l, struct sk_buff *skb,
 	hdr = buf_msg(skb);
 	data = msg_data(hdr);
 
-	if (!tipc_link_validate_msg(l, hdr))
+	if (!tipc_link_validate_msg(l, hdr)) {
+		trace_tipc_skb_dump(skb, false, "PROTO invalid (1)!");
+		trace_tipc_link_dump(l, TIPC_DUMP_NONE, "PROTO invalid (1)!");
 		goto exit;
+	}
 
 	switch (mtyp) {
 	case RESET_MSG:
@@ -1820,6 +1850,7 @@  void tipc_link_bc_ack_rcv(struct tipc_link *l, u16 acked,
 	if (!more(acked, l->acked))
 		return;
 
+	trace_tipc_link_bc_ack(l, l->acked, acked, &snd_l->transmq);
 	/* Skip over packets peer has already acked */
 	skb_queue_walk(&snd_l->transmq, skb) {
 		if (more(buf_seqno(skb), l->acked))
diff --git a/net/tipc/link.h b/net/tipc/link.h
index e8f692598e4d..8439e0ee53a8 100644
--- a/net/tipc/link.h
+++ b/net/tipc/link.h
@@ -148,4 +148,5 @@  int tipc_link_bc_sync_rcv(struct tipc_link *l,   struct tipc_msg *hdr,
 			  struct sk_buff_head *xmitq);
 int tipc_link_bc_nack_rcv(struct tipc_link *l, struct sk_buff *skb,
 			  struct sk_buff_head *xmitq);
+bool tipc_link_too_silent(struct tipc_link *l);
 #endif
diff --git a/net/tipc/node.c b/net/tipc/node.c
index cb10b889ebf5..cff42c95cef8 100644
--- a/net/tipc/node.c
+++ b/net/tipc/node.c
@@ -775,6 +775,7 @@  static void __tipc_node_link_down(struct tipc_node *n, int *bearer_id,
 		if (tipc_link_peer_is_down(l))
 			tipc_node_fsm_evt(n, PEER_LOST_CONTACT_EVT);
 		tipc_node_fsm_evt(n, SELF_LOST_CONTACT_EVT);
+		trace_tipc_link_reset(l, TIPC_DUMP_ALL, "link down!");
 		tipc_link_fsm_evt(l, LINK_RESET_EVT);
 		tipc_link_reset(l);
 		tipc_link_build_reset_msg(l, xmitq);
@@ -792,6 +793,7 @@  static void __tipc_node_link_down(struct tipc_node *n, int *bearer_id,
 	tipc_node_fsm_evt(n, NODE_SYNCH_END_EVT);
 	n->sync_point = tipc_link_rcv_nxt(tnl) + (U16_MAX / 2 - 1);
 	tipc_link_tnl_prepare(l, tnl, FAILOVER_MSG, xmitq);
+	trace_tipc_link_reset(l, TIPC_DUMP_ALL, "link down -> failover!");
 	tipc_link_reset(l);
 	tipc_link_fsm_evt(l, LINK_RESET_EVT);
 	tipc_link_fsm_evt(l, LINK_FAILOVER_BEGIN_EVT);
@@ -1013,6 +1015,7 @@  void tipc_node_check_dest(struct net *net, u32 addr,
 			*respond = false;
 			goto exit;
 		}
+		trace_tipc_link_reset(l, TIPC_DUMP_ALL, "link created!");
 		tipc_link_reset(l);
 		tipc_link_fsm_evt(l, LINK_RESET_EVT);
 		if (n->state == NODE_FAILINGOVER)
@@ -1586,8 +1589,11 @@  static bool tipc_node_check_state(struct tipc_node *n, struct sk_buff *skb,
 		}
 	}
 
-	if (!tipc_link_validate_msg(l, hdr))
+	if (!tipc_link_validate_msg(l, hdr)) {
+		trace_tipc_skb_dump(skb, false, "PROTO invalid (2)!");
+		trace_tipc_link_dump(l, TIPC_DUMP_NONE, "PROTO invalid (2)!");
 		return false;
+	}
 
 	/* Check and update node accesibility if applicable */
 	if (state == SELF_UP_PEER_COMING) {
diff --git a/net/tipc/trace.h b/net/tipc/trace.h
index 4c74927df685..535c8958651f 100644
--- a/net/tipc/trace.h
+++ b/net/tipc/trace.h
@@ -74,6 +74,45 @@  enum {
 };
 #endif
 
+/* Link & Node FSM states: */
+#define state_sym(val)							  \
+	__print_symbolic(val,						  \
+			{(0xe),		"ESTABLISHED"			},\
+			{(0xe << 4),	"ESTABLISHING"			},\
+			{(0x1 << 8),	"RESET"				},\
+			{(0x2 << 12),	"RESETTING"			},\
+			{(0xd << 16),	"PEER_RESET"			},\
+			{(0xf << 20),	"FAILINGOVER"			},\
+			{(0xc << 24),	"SYNCHING"			},\
+			{(0xdd),	"SELF_DOWN_PEER_DOWN"		},\
+			{(0xaa),	"SELF_UP_PEER_UP"		},\
+			{(0xd1),	"SELF_DOWN_PEER_LEAVING"	},\
+			{(0xac),	"SELF_UP_PEER_COMING"		},\
+			{(0xca),	"SELF_COMING_PEER_UP"		},\
+			{(0x1d),	"SELF_LEAVING_PEER_DOWN"	},\
+			{(0xf0),	"FAILINGOVER"			},\
+			{(0xcc),	"SYNCHING"			})
+
+/* Link & Node FSM events: */
+#define evt_sym(val)							  \
+	__print_symbolic(val,						  \
+			{(0xec1ab1e),	"ESTABLISH_EVT"			},\
+			{(0x9eed0e),	"PEER_RESET_EVT"		},\
+			{(0xfa110e),	"FAILURE_EVT"			},\
+			{(0x10ca1d0e),	"RESET_EVT"			},\
+			{(0xfa110bee),	"FAILOVER_BEGIN_EVT"		},\
+			{(0xfa110ede),	"FAILOVER_END_EVT"		},\
+			{(0xc1ccbee),	"SYNCH_BEGIN_EVT"		},\
+			{(0xc1ccede),	"SYNCH_END_EVT"			},\
+			{(0xece),	"SELF_ESTABL_CONTACT_EVT"	},\
+			{(0x1ce),	"SELF_LOST_CONTACT_EVT"		},\
+			{(0x9ece),	"PEER_ESTABL_CONTACT_EVT"	},\
+			{(0x91ce),	"PEER_LOST_CONTACT_EVT"		},\
+			{(0xfbe),	"FAILOVER_BEGIN_EVT"		},\
+			{(0xfee),	"FAILOVER_END_EVT"		},\
+			{(0xcbe),	"SYNCH_BEGIN_EVT"		},\
+			{(0xcee),	"SYNCH_END_EVT"			})
+
 int tipc_skb_dump(struct sk_buff *skb, bool more, char *buf);
 int tipc_list_dump(struct sk_buff_head *list, bool more, char *buf);
 int tipc_sk_dump(struct sock *sk, u16 dqueues, char *buf);
@@ -104,6 +143,8 @@  DEFINE_EVENT(tipc_skb_class, name, \
 	TP_PROTO(struct sk_buff *skb, bool more, const char *header), \
 	TP_ARGS(skb, more, header))
 DEFINE_SKB_EVENT(tipc_skb_dump);
+DEFINE_SKB_EVENT(tipc_proto_build);
+DEFINE_SKB_EVENT(tipc_proto_rcv);
 
 DECLARE_EVENT_CLASS(tipc_list_class,
 
@@ -192,6 +233,58 @@  DEFINE_EVENT(tipc_link_class, name, \
 	TP_PROTO(struct tipc_link *l, u16 dqueues, const char *header), \
 	TP_ARGS(l, dqueues, header))
 DEFINE_LINK_EVENT(tipc_link_dump);
+DEFINE_LINK_EVENT(tipc_link_conges);
+DEFINE_LINK_EVENT(tipc_link_timeout);
+DEFINE_LINK_EVENT(tipc_link_reset);
+
+#define DEFINE_LINK_EVENT_COND(name, cond) \
+DEFINE_EVENT_CONDITION(tipc_link_class, name, \
+	TP_PROTO(struct tipc_link *l, u16 dqueues, const char *header), \
+	TP_ARGS(l, dqueues, header), \
+	TP_CONDITION(cond))
+DEFINE_LINK_EVENT_COND(tipc_link_too_silent, tipc_link_too_silent(l));
+
+DECLARE_EVENT_CLASS(tipc_link_transmq_class,
+
+	TP_PROTO(struct tipc_link *r, u16 f, u16 t, struct sk_buff_head *tq),
+
+	TP_ARGS(r, f, t, tq),
+
+	TP_STRUCT__entry(
+		__array(char, name, TIPC_MAX_LINK_NAME)
+		__field(u16, from)
+		__field(u16, to)
+		__field(u32, len)
+		__field(u16, fseqno)
+		__field(u16, lseqno)
+	),
+
+	TP_fast_assign(
+		tipc_link_name_ext(r, __entry->name);
+		__entry->from = f;
+		__entry->to = t;
+		__entry->len = skb_queue_len(tq);
+		__entry->fseqno = msg_seqno(buf_msg(skb_peek(tq)));
+		__entry->lseqno = msg_seqno(buf_msg(skb_peek_tail(tq)));
+	),
+
+	TP_printk("<%s> retrans req: [%u-%u] transmq: %u [%u-%u]\n",
+		  __entry->name, __entry->from, __entry->to,
+		  __entry->len, __entry->fseqno, __entry->lseqno)
+);
+
+DEFINE_EVENT(tipc_link_transmq_class, tipc_link_retrans,
+	TP_PROTO(struct tipc_link *r, u16 f, u16 t, struct sk_buff_head *tq),
+	TP_ARGS(r, f, t, tq)
+);
+
+DEFINE_EVENT_PRINT(tipc_link_transmq_class, tipc_link_bc_ack,
+	TP_PROTO(struct tipc_link *r, u16 f, u16 t, struct sk_buff_head *tq),
+	TP_ARGS(r, f, t, tq),
+	TP_printk("<%s> acked: [%u-%u] transmq: %u [%u-%u]\n",
+		  __entry->name, __entry->from, __entry->to,
+		  __entry->len, __entry->fseqno, __entry->lseqno)
+);
 
 DECLARE_EVENT_CLASS(tipc_node_class,
 
@@ -221,6 +314,37 @@  DEFINE_EVENT(tipc_node_class, name, \
 	TP_ARGS(n, more, header))
 DEFINE_NODE_EVENT(tipc_node_dump);
 
+DECLARE_EVENT_CLASS(tipc_fsm_class,
+
+	TP_PROTO(const char *name, u32 os, u32 ns, int evt),
+
+	TP_ARGS(name, os, ns, evt),
+
+	TP_STRUCT__entry(
+		__string(name, name)
+		__field(u32, os)
+		__field(u32, ns)
+		__field(u32, evt)
+	),
+
+	TP_fast_assign(
+		__assign_str(name, name);
+		__entry->os = os;
+		__entry->ns = ns;
+		__entry->evt = evt;
+	),
+
+	TP_printk("<%s> %s--(%s)->%s\n", __get_str(name),
+		  state_sym(__entry->os), evt_sym(__entry->evt),
+		  state_sym(__entry->ns))
+);
+
+#define DEFINE_FSM_EVENT(fsm_name) \
+DEFINE_EVENT(tipc_fsm_class, fsm_name, \
+	TP_PROTO(const char *name, u32 os, u32 ns, int evt), \
+	TP_ARGS(name, os, ns, evt))
+DEFINE_FSM_EVENT(tipc_link_fsm);
+
 #endif /* _TIPC_TRACE_H */
 
 /* This part must be outside protection */