diff mbox series

[RFC,net-next,1/6] net: multiple release time SO_TXTIME

Message ID 20200609140934.110785-2-willemdebruijn.kernel@gmail.com
State RFC
Delegated to: David Miller
Headers show
Series multi release pacing for UDP GSO | expand

Commit Message

Willem de Bruijn June 9, 2020, 2:09 p.m. UTC
From: Willem de Bruijn <willemb@google.com>

Pace transmission of segments in a UDP GSO datagram.

Batching datagram protocol stack traversals with UDP_SEGMENT saves
significant cycles for large data transfers.

But GSO packets are sent at once. Pacing traffic to internet clients
often requires sending just a few MSS per msec pacing interval.

SO_TXTIME allows delivery of packets at a later time. Extend it
to allow pacing the segments in a UDP GSO packet, to be able to build
larger GSO datagrams.

Add SO_TXTIME flag SOF_TXTIME_MULTI_RELEASE. This reinterprets the
lower 8 bits of the 64-bit release timestamp as

  - bits 4..7: release time interval in usec
  - bits 0..3: number of segments sent per period

So a timestamp of 0x148 means

  - 0x100 initial timestamp in Qdisc selected clocksource
  - every 4 usec release N MSS
  - N is 8

A subsequent qdisc change will pace the individual segments.

Packet transmission can race with the socket option. This is safe.
For predictable behavior, it is up to the caller to not toggle the
feature while packets on a socket are in flight.

Signed-off-by: Willem de Bruijn <willemb@google.com>
---
 include/linux/netdevice.h       |  1 +
 include/net/sock.h              |  3 ++-
 include/uapi/linux/net_tstamp.h |  3 ++-
 net/core/dev.c                  | 44 +++++++++++++++++++++++++++++++++
 net/core/sock.c                 |  4 +++
 5 files changed, 53 insertions(+), 2 deletions(-)
diff mbox series

Patch

diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index 1a96e9c4ec36..15ea976dd446 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -4528,6 +4528,7 @@  struct sk_buff *__skb_gso_segment(struct sk_buff *skb,
 				  netdev_features_t features, bool tx_path);
 struct sk_buff *skb_mac_gso_segment(struct sk_buff *skb,
 				    netdev_features_t features);
+struct sk_buff *skb_gso_segment_txtime(struct sk_buff *skb);
 
 struct netdev_bonding_info {
 	ifslave	slave;
diff --git a/include/net/sock.h b/include/net/sock.h
index c53cc42b5ab9..491e389b3570 100644
--- a/include/net/sock.h
+++ b/include/net/sock.h
@@ -493,7 +493,8 @@  struct sock {
 	u8			sk_clockid;
 	u8			sk_txtime_deadline_mode : 1,
 				sk_txtime_report_errors : 1,
-				sk_txtime_unused : 6;
+				sk_txtime_multi_release : 1,
+				sk_txtime_unused : 5;
 
 	struct socket		*sk_socket;
 	void			*sk_user_data;
diff --git a/include/uapi/linux/net_tstamp.h b/include/uapi/linux/net_tstamp.h
index 7ed0b3d1c00a..ca1ae3b6f601 100644
--- a/include/uapi/linux/net_tstamp.h
+++ b/include/uapi/linux/net_tstamp.h
@@ -162,8 +162,9 @@  struct scm_ts_pktinfo {
 enum txtime_flags {
 	SOF_TXTIME_DEADLINE_MODE = (1 << 0),
 	SOF_TXTIME_REPORT_ERRORS = (1 << 1),
+	SOF_TXTIME_MULTI_RELEASE = (1 << 2),
 
-	SOF_TXTIME_FLAGS_LAST = SOF_TXTIME_REPORT_ERRORS,
+	SOF_TXTIME_FLAGS_LAST = SOF_TXTIME_MULTI_RELEASE,
 	SOF_TXTIME_FLAGS_MASK = (SOF_TXTIME_FLAGS_LAST - 1) |
 				 SOF_TXTIME_FLAGS_LAST
 };
diff --git a/net/core/dev.c b/net/core/dev.c
index 061496a1f640..5058083375fb 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -3377,6 +3377,50 @@  struct sk_buff *__skb_gso_segment(struct sk_buff *skb,
 }
 EXPORT_SYMBOL(__skb_gso_segment);
 
+struct sk_buff *skb_gso_segment_txtime(struct sk_buff *skb)
+{
+	int mss_per_ival, mss_in_cur_ival;
+	struct sk_buff *segs, *seg;
+	struct skb_shared_info *sh;
+	u64 step_ns, tstamp;
+
+	if (!skb->sk || !sk_fullsock(skb->sk) ||
+	    !skb->sk->sk_txtime_multi_release)
+		return NULL;
+
+	/* extract multi release variables mss and stepsize */
+	mss_per_ival = skb->tstamp & 0xF;
+	step_ns = ((skb->tstamp >> 4) & 0xF) * NSEC_PER_MSEC;
+	tstamp = skb->tstamp;
+
+	if (mss_per_ival == 0)
+		return NULL;
+
+	/* skip multi-release if total segs can be sent at once */
+	sh = skb_shinfo(skb);
+	if (sh->gso_segs <= mss_per_ival)
+		return NULL;
+
+	segs = skb_gso_segment(skb, NETIF_F_SG | NETIF_F_HW_CSUM);
+	if (IS_ERR_OR_NULL(segs))
+		return segs;
+
+	mss_in_cur_ival = 0;
+
+	for (seg = segs; seg; seg = seg->next) {
+		seg->tstamp = tstamp & ~0xFF;
+
+		mss_in_cur_ival++;
+		if (mss_in_cur_ival == mss_per_ival) {
+			tstamp += step_ns;
+			mss_in_cur_ival = 0;
+		}
+	}
+
+	return segs;
+}
+EXPORT_SYMBOL_GPL(skb_gso_segment_txtime);
+
 /* Take action when hardware reception checksum errors are detected. */
 #ifdef CONFIG_BUG
 void netdev_rx_csum_fault(struct net_device *dev, struct sk_buff *skb)
diff --git a/net/core/sock.c b/net/core/sock.c
index 6c4acf1f0220..7036b8855154 100644
--- a/net/core/sock.c
+++ b/net/core/sock.c
@@ -1258,6 +1258,8 @@  int sock_setsockopt(struct socket *sock, int level, int optname,
 			!!(sk_txtime.flags & SOF_TXTIME_DEADLINE_MODE);
 		sk->sk_txtime_report_errors =
 			!!(sk_txtime.flags & SOF_TXTIME_REPORT_ERRORS);
+		sk->sk_txtime_multi_release =
+			!!(sk_txtime.flags & SOF_TXTIME_MULTI_RELEASE);
 		break;
 
 	case SO_BINDTOIFINDEX:
@@ -1608,6 +1610,8 @@  int sock_getsockopt(struct socket *sock, int level, int optname,
 				  SOF_TXTIME_DEADLINE_MODE : 0;
 		v.txtime.flags |= sk->sk_txtime_report_errors ?
 				  SOF_TXTIME_REPORT_ERRORS : 0;
+		v.txtime.flags |= sk->sk_txtime_multi_release ?
+				  SOF_TXTIME_MULTI_RELEASE : 0;
 		break;
 
 	case SO_BINDTOIFINDEX: