diff mbox

[v6,net-next,1/2] net: add skb_mstamp infrastructure

Message ID 1393309324.2316.119.camel@edumazet-glaptop2.roam.corp.google.com
State Changes Requested, archived
Delegated to: David Miller
Headers show

Commit Message

Eric Dumazet Feb. 25, 2014, 6:22 a.m. UTC
From: Eric Dumazet <edumazet@google.com>

ktime_get() is too expensive on some cases, and we'd like to get
usec resolution timestamps in TCP stack.

This patch adds a light weight facility using a combination of
local_clock() and jiffies samples.

Instead of :

	u64 t0, t1;

	t0 = ktime_get();
	// stuff
	t1 = ktime_get();
	delta_us = ktime_us_delta(t1, t0);

use :
	struct skb_mstamp t0, t1;

	skb_mstamp_get(&t0);
	// stuff
	skb_mstamp_get(&t1);
	delta_us = skb_mstamp_us_delta(&t1, &t0);

Note : local_clock() might have a (bounded) drift between cpus.

Do not use this infra in place of ktime_get() without understanding the
issues.

Signed-off-by: Eric Dumazet <edumazet@google.com>
Cc: Stephen Hemminger <stephen@networkplumber.org>
Cc: Yuchung Cheng <ycheng@google.com>
Cc: Neal Cardwell <ncardwell@google.com>
Cc: Larry Brakmo <brakmo@google.com>
Cc: Julian Anastasov <ja@ssi.bg>
---
 include/linux/skbuff.h |   59 +++++++++++++++++++++++++++++++++++++--
 1 file changed, 57 insertions(+), 2 deletions(-)



--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Comments

David Laight Feb. 25, 2014, 9:51 a.m. UTC | #1
From: Eric Dumazet

> ktime_get() is too expensive on some cases, and we'd like to get

> usec resolution timestamps in TCP stack.

> 

> This patch adds a light weight facility using a combination of

> local_clock() and jiffies samples.

...
> +struct skb_mstamp {

> +	union {

> +		u64		v64;

> +		struct {

> +			u32	stamp_us;

> +			u32	stamp_jiffies;

> +		};

> +	};

> +};


Do you need the union here?
If you don't attempt to convert the value to a u64 then code
is less likely to process it incorrectly.

Most modern ABI pass/return short structures in registers,
so the actual code should be similar if you return the struct
by value.

	David
Eric Dumazet Feb. 25, 2014, 12:21 p.m. UTC | #2
On Tue, 2014-02-25 at 09:51 +0000, David Laight wrote:

> 
> Do you need the union here?
> If you don't attempt to convert the value to a u64 then code
> is less likely to process it incorrectly.
> 

Check the second patch, it uses :

	first_ackt.v64 = 0;
...
	last_ackt = skb->skb_mstamp;
	if (!first_ackt.v64)
		first_ackt = last_ackt;

...

	skb_mstamp_get(&now);
	if (first_ackt.v64) {
        	seq_rtt_us = skb_mstamp_us_delta(&now, &first_ackt);
        	ca_seq_rtt_us = skb_mstamp_us_delta(&now, &last_ackt);
	}


Without the union, it would be more expensive on 64bit arches,
or relying on the compiler to be smart enough.

> Most modern ABI pass/return short structures in registers,
> so the actual code should be similar if you return the struct
> by value.

I am well aware of this, but this is a matter of taste, and I prefer
this more traditional way.



--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
David Miller Feb. 26, 2014, 8:02 p.m. UTC | #3
From: Eric Dumazet <eric.dumazet@gmail.com>
Date: Mon, 24 Feb 2014 22:22:04 -0800

> Instead of :
> 
> 	u64 t0, t1;
> 
> 	t0 = ktime_get();
> 	// stuff
> 	t1 = ktime_get();
> 	delta_us = ktime_us_delta(t1, t0);
> 
> use :
> 	struct skb_mstamp t0, t1;
> 
> 	skb_mstamp_get(&t0);
> 	// stuff
> 	skb_mstamp_get(&t1);
> 	delta_us = skb_mstamp_us_delta(&t1, &t0);
 ...
> +/**
> + * skb_mstamp_delta - compute the difference in usec between two skb_mstamp
> + * @t1: pointer to oldest sample
> + * @t0: pointer to newest sample
> + */
> +static inline u32 skb_mstamp_us_delta(const struct skb_mstamp *t1,
> +				      const struct skb_mstamp *t0)

Maybe your definition of "newest" is different from mine, to me it
means most recently sampled.  And if so, your t1 and t0 descriptions
seem reversed.

Please fix this and resubmit with Neal's ACK etc.

Thanks!
--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Eric Dumazet Feb. 26, 2014, 8:29 p.m. UTC | #4
On Wed, 2014-02-26 at 15:02 -0500, David Miller wrote:
> From: Eric Dumazet <eric.dumazet@gmail.com>

> > + * skb_mstamp_delta - compute the difference in usec between two skb_mstamp
> > + * @t1: pointer to oldest sample
> > + * @t0: pointer to newest sample
> > + */
> > +static inline u32 skb_mstamp_us_delta(const struct skb_mstamp *t1,
> > +				      const struct skb_mstamp *t0)
> 
> Maybe your definition of "newest" is different from mine, to me it
> means most recently sampled.  And if so, your t1 and t0 descriptions
> seem reversed.
> 
> Please fix this and resubmit with Neal's ACK etc.

Humpf, right you are, I'll send a v6, thanks !


--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
diff mbox

Patch

diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index 11b6925f0e96..7f24f50f307e 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -32,6 +32,7 @@ 
 #include <linux/hrtimer.h>
 #include <linux/dma-mapping.h>
 #include <linux/netdev_features.h>
+#include <linux/sched.h>
 #include <net/flow_keys.h>
 
 /* A. Checksumming of received packets by device.
@@ -356,11 +357,62 @@  typedef unsigned int sk_buff_data_t;
 typedef unsigned char *sk_buff_data_t;
 #endif
 
+/**
+ * struct skb_mstamp - multi resolution time stamps
+ * @stamp_us: timestamp in us resolution
+ * @stamp_jiffies: timestamp in jiffies
+ */
+struct skb_mstamp {
+	union {
+		u64		v64;
+		struct {
+			u32	stamp_us;
+			u32	stamp_jiffies;
+		};
+	};
+};
+
+/**
+ * skb_mstamp_get - get sample current time
+ * @cl: place to store timestamps
+ */
+static inline void skb_mstamp_get(struct skb_mstamp *cl)
+{
+	u64 val = local_clock();
+
+	do_div(val, NSEC_PER_USEC);
+	cl->stamp_us = (u32)val;
+	cl->stamp_jiffies = (u32)jiffies;
+}
+
+/**
+ * skb_mstamp_delta - compute the difference in usec between two skb_mstamp
+ * @t1: pointer to oldest sample
+ * @t0: pointer to newest sample
+ */
+static inline u32 skb_mstamp_us_delta(const struct skb_mstamp *t1,
+				      const struct skb_mstamp *t0)
+{
+	s32 delta_us = t1->stamp_us - t0->stamp_us;
+	u32 delta_jiffies = t1->stamp_jiffies - t0->stamp_jiffies;
+
+	/* If delta_us is negative, this might be because interval is too big,
+	 * or local_clock() drift is too big : fallback using jiffies.
+	 */
+	if (delta_us <= 0 ||
+	    delta_jiffies >= (INT_MAX / (USEC_PER_SEC / HZ)))
+
+		delta_us = jiffies_to_usecs(delta_jiffies);
+
+	return delta_us;
+}
+
+
 /** 
  *	struct sk_buff - socket buffer
  *	@next: Next buffer in list
  *	@prev: Previous buffer in list
- *	@tstamp: Time we arrived
+ *	@tstamp: Time we arrived/left
  *	@sk: Socket we are owned by
  *	@dev: Device we arrived on/are leaving by
  *	@cb: Control buffer. Free for use by every layer. Put private vars here
@@ -429,7 +481,10 @@  struct sk_buff {
 	struct sk_buff		*next;
 	struct sk_buff		*prev;
 
-	ktime_t			tstamp;
+	union {
+		ktime_t		tstamp;
+		struct skb_mstamp skb_mstamp;
+	};
 
 	struct sock		*sk;
 	struct net_device	*dev;