diff mbox series

[net-next,2/3] tipc: reduce duplicate packets for unicast traffic

Message ID 20190404040953.1569-3-tuong.t.lien@dektech.com.au
State Accepted
Delegated to: David Miller
Headers show
Series tipc: improve TIPC unicast link throughput | expand

Commit Message

Tuong Lien April 4, 2019, 4:09 a.m. UTC
For unicast transmission, the current NACK sending althorithm is over-
active that forces the sending side to retransmit a packet that is not
really lost but just arrived at the receiving side with some delay, or
even retransmit same packets that have already been retransmitted
before. As a result, many duplicates are observed also under normal
condition, ie. without packet loss.

One example case is: node1 transmits 1 2 3 4 10 5 6 7 8 9, when node2
receives packet #10, it puts into the deferdq. When the packet #5 comes
it sends NACK with gap [6 - 9]. However, shortly after that, when
packet #6 arrives, it pulls out packet #10 from the deferfq, but it is
still out of order, so it makes another NACK with gap [7 - 9] and so on
... Finally, node1 has to retransmit the packets 5 6 7 8 9 a number of
times, but in fact all the packets are not lost at all, so duplicates!

This commit reduces duplicates by changing the condition to send NACK,
also restricting the retransmissions on individual packets via a timer
of about 1ms. However, it also needs to say that too tricky condition
for NACKs or too long timeout value for retransmissions will result in
performance reducing! The criterias in this commit are found to be
effective for both the requirements to reduce duplicates but not affect
performance.

The tipc_link_rcv() is also improved to only dequeue skb from the link
deferdq if it is expected (ie. its seqno <= rcv_nxt).

Acked-by: Ying Xue <ying.xue@windriver.com>
Acked-by: Jon Maloy <jon.maloy@ericsson.com>
Signed-off-by: Tuong Lien <tuong.t.lien@dektech.com.au>
---
 net/tipc/link.c | 26 ++++++++++++++++----------
 net/tipc/msg.h  | 21 +++++++++++++++++++++
 2 files changed, 37 insertions(+), 10 deletions(-)
diff mbox series

Patch

diff --git a/net/tipc/link.c b/net/tipc/link.c
index 5aee1ed23ba9..1f2cde0d025f 100644
--- a/net/tipc/link.c
+++ b/net/tipc/link.c
@@ -209,6 +209,7 @@  enum {
 };
 
 #define TIPC_BC_RETR_LIM msecs_to_jiffies(10)   /* [ms] */
+#define TIPC_UC_RETR_TIME (jiffies + msecs_to_jiffies(1))
 
 /*
  * Interval between NACKs when packets arrive out of order
@@ -1305,6 +1306,10 @@  static void tipc_link_advance_transmq(struct tipc_link *l, u16 acked, u16 gap,
 			kfree_skb(skb);
 		} else if (less_eq(seqno, acked + gap)) {
 			/* retransmit skb */
+			if (time_before(jiffies, TIPC_SKB_CB(skb)->nxt_retr))
+				continue;
+			TIPC_SKB_CB(skb)->nxt_retr = TIPC_UC_RETR_TIME;
+
 			_skb = __pskb_copy(skb, MIN_H_SIZE, GFP_ATOMIC);
 			if (!_skb)
 				continue;
@@ -1380,6 +1385,7 @@  static int tipc_link_build_nack_msg(struct tipc_link *l,
 				    struct sk_buff_head *xmitq)
 {
 	u32 def_cnt = ++l->stats.deferred_recv;
+	u32 defq_len = skb_queue_len(&l->deferdq);
 	int match1, match2;
 
 	if (link_is_bc_rcvlink(l)) {
@@ -1390,7 +1396,7 @@  static int tipc_link_build_nack_msg(struct tipc_link *l,
 		return 0;
 	}
 
-	if ((skb_queue_len(&l->deferdq) == 1) || !(def_cnt % TIPC_NACK_INTV))
+	if (defq_len >= 3 && !((defq_len - 3) % 16))
 		tipc_link_build_proto_msg(l, STATE_MSG, 0, 0, 0, 0, 0, xmitq);
 	return 0;
 }
@@ -1404,29 +1410,29 @@  int tipc_link_rcv(struct tipc_link *l, struct sk_buff *skb,
 		  struct sk_buff_head *xmitq)
 {
 	struct sk_buff_head *defq = &l->deferdq;
-	struct tipc_msg *hdr;
+	struct tipc_msg *hdr = buf_msg(skb);
 	u16 seqno, rcv_nxt, win_lim;
 	int rc = 0;
 
+	/* Verify and update link state */
+	if (unlikely(msg_user(hdr) == LINK_PROTOCOL))
+		return tipc_link_proto_rcv(l, skb, xmitq);
+
+	/* Don't send probe at next timeout expiration */
+	l->silent_intv_cnt = 0;
+
 	do {
 		hdr = buf_msg(skb);
 		seqno = msg_seqno(hdr);
 		rcv_nxt = l->rcv_nxt;
 		win_lim = rcv_nxt + TIPC_MAX_LINK_WIN;
 
-		/* Verify and update link state */
-		if (unlikely(msg_user(hdr) == LINK_PROTOCOL))
-			return tipc_link_proto_rcv(l, skb, xmitq);
-
 		if (unlikely(!link_is_up(l))) {
 			if (l->state == LINK_ESTABLISHING)
 				rc = TIPC_LINK_UP_EVT;
 			goto drop;
 		}
 
-		/* Don't send probe at next timeout expiration */
-		l->silent_intv_cnt = 0;
-
 		/* Drop if outside receive window */
 		if (unlikely(less(seqno, rcv_nxt) || more(seqno, win_lim))) {
 			l->stats.duplicates++;
@@ -1457,7 +1463,7 @@  int tipc_link_rcv(struct tipc_link *l, struct sk_buff *skb,
 			rc |= tipc_link_build_state_msg(l, xmitq);
 		if (unlikely(rc & ~TIPC_LINK_SND_STATE))
 			break;
-	} while ((skb = __skb_dequeue(defq)));
+	} while ((skb = __tipc_skb_dequeue(defq, l->rcv_nxt)));
 
 	return rc;
 drop:
diff --git a/net/tipc/msg.h b/net/tipc/msg.h
index ec5c511a9c9c..8de02ad6e352 100644
--- a/net/tipc/msg.h
+++ b/net/tipc/msg.h
@@ -1151,4 +1151,25 @@  static inline void tipc_skb_queue_splice_tail_init(struct sk_buff_head *list,
 	tipc_skb_queue_splice_tail(&tmp, head);
 }
 
+/* __tipc_skb_dequeue() - dequeue the head skb according to expected seqno
+ * @list: list to be dequeued from
+ * @seqno: seqno of the expected msg
+ *
+ * returns skb dequeued from the list if its seqno is less than or equal to
+ * the expected one, otherwise the skb is still hold
+ *
+ * Note: must be used with appropriate locks held only
+ */
+static inline struct sk_buff *__tipc_skb_dequeue(struct sk_buff_head *list,
+						 u16 seqno)
+{
+	struct sk_buff *skb = skb_peek(list);
+
+	if (skb && less_eq(buf_seqno(skb), seqno)) {
+		__skb_unlink(skb, list);
+		return skb;
+	}
+	return NULL;
+}
+
 #endif