diff mbox

[v2,net-next,3/3] net: add socket option for low latency polling

Message ID 20130612112016.19288.86849.stgit@ladj378.jer.intel.com
State Changes Requested, archived
Delegated to: David Miller
Headers show

Commit Message

Eliezer Tamir June 12, 2013, 11:20 a.m. UTC
adds a socket option for low latency polling.
This allows overriding the global sysctl value with a per-socket one.

Signed-off-by: Eliezer Tamir <eliezer.tamir@linux.intel.com>
---

 arch/alpha/include/uapi/asm/socket.h   |    2 ++
 arch/avr32/include/uapi/asm/socket.h   |    2 ++
 arch/cris/include/uapi/asm/socket.h    |    2 ++
 arch/frv/include/uapi/asm/socket.h     |    2 ++
 arch/h8300/include/uapi/asm/socket.h   |    2 ++
 arch/ia64/include/uapi/asm/socket.h    |    2 ++
 arch/m32r/include/uapi/asm/socket.h    |    2 ++
 arch/mips/include/uapi/asm/socket.h    |    2 ++
 arch/mn10300/include/uapi/asm/socket.h |    2 ++
 arch/parisc/include/uapi/asm/socket.h  |    2 ++
 arch/powerpc/include/uapi/asm/socket.h |    2 ++
 arch/s390/include/uapi/asm/socket.h    |    2 ++
 arch/sparc/include/uapi/asm/socket.h   |    2 ++
 arch/xtensa/include/uapi/asm/socket.h  |    2 ++
 include/net/ll_poll.h                  |   10 +++++-----
 include/net/sock.h                     |    2 ++
 include/uapi/asm-generic/socket.h      |    2 ++
 net/core/sock.c                        |   20 ++++++++++++++++++++
 18 files changed, 57 insertions(+), 5 deletions(-)


--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Comments

Eric Dumazet June 12, 2013, 12:44 p.m. UTC | #1
On Wed, 2013-06-12 at 14:20 +0300, Eliezer Tamir wrote:
> adds a socket option for low latency polling.
> This allows overriding the global sysctl value with a per-socket one.
> 
> Signed-off-by: Eliezer Tamir <eliezer.tamir@linux.intel.com>
> ---

It seems EXPORT_SYMBOL_GPL(sysctl_net_ll_poll) can now be removed ?



--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Eliezer Tamir June 12, 2013, 12:54 p.m. UTC | #2
On 12/06/2013 15:44, Eric Dumazet wrote:
> On Wed, 2013-06-12 at 14:20 +0300, Eliezer Tamir wrote:
>> adds a socket option for low latency polling.
>> This allows overriding the global sysctl value with a per-socket one.
>>
>> Signed-off-by: Eliezer Tamir <eliezer.tamir@linux.intel.com>
>> ---
>
> It seems EXPORT_SYMBOL_GPL(sysctl_net_ll_poll) can now be removed ?

I think we would need it for poll.

Poll needs a way that is not dependent on any individual socket to know
how long to busy-wait, since we have to know it before we poll any
individual socket.

The patch I have right now for poll is not ready (and not fully tested).
But I can post it as an RFC if you want.

-Eliezer
--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Eliezer Tamir June 12, 2013, 12:58 p.m. UTC | #3
On 12/06/2013 15:54, Eliezer Tamir wrote:
> On 12/06/2013 15:44, Eric Dumazet wrote:
>> On Wed, 2013-06-12 at 14:20 +0300, Eliezer Tamir wrote:
>>> adds a socket option for low latency polling.
>>> This allows overriding the global sysctl value with a per-socket one.
>>>
>>> Signed-off-by: Eliezer Tamir <eliezer.tamir@linux.intel.com>
>>> ---
>>
>> It seems EXPORT_SYMBOL_GPL(sysctl_net_ll_poll) can now be removed ?
>
> I think we would need it for poll.
>
> Poll needs a way that is not dependent on any individual socket to know
> how long to busy-wait, since we have to know it before we poll any
> individual socket.

Or maybe, since the time constants seem to be different, we should have
two separate sysctl values, one for read and another for poll?

(and then the one for read does not need to be exported to modules.)
--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Eric Dumazet June 12, 2013, 1:55 p.m. UTC | #4
On Wed, 2013-06-12 at 15:54 +0300, Eliezer Tamir wrote:
> On 12/06/2013 15:44, Eric Dumazet wrote:
> > On Wed, 2013-06-12 at 14:20 +0300, Eliezer Tamir wrote:
> >> adds a socket option for low latency polling.
> >> This allows overriding the global sysctl value with a per-socket one.
> >>
> >> Signed-off-by: Eliezer Tamir <eliezer.tamir@linux.intel.com>
> >> ---
> >
> > It seems EXPORT_SYMBOL_GPL(sysctl_net_ll_poll) can now be removed ?
> 
> I think we would need it for poll.

Hmm, poll is part of the kernel, not in a module, but lets see.


--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
diff mbox

Patch

diff --git a/arch/alpha/include/uapi/asm/socket.h b/arch/alpha/include/uapi/asm/socket.h
index eee6ea7..4885825 100644
--- a/arch/alpha/include/uapi/asm/socket.h
+++ b/arch/alpha/include/uapi/asm/socket.h
@@ -81,4 +81,6 @@ 
 
 #define SO_SELECT_ERR_QUEUE	45
 
+#define SO_LL			46
+
 #endif /* _UAPI_ASM_SOCKET_H */
diff --git a/arch/avr32/include/uapi/asm/socket.h b/arch/avr32/include/uapi/asm/socket.h
index 37401f5..79b6179 100644
--- a/arch/avr32/include/uapi/asm/socket.h
+++ b/arch/avr32/include/uapi/asm/socket.h
@@ -74,4 +74,6 @@ 
 
 #define SO_SELECT_ERR_QUEUE	45
 
+#define SO_LL			46
+
 #endif /* __ASM_AVR32_SOCKET_H */
diff --git a/arch/cris/include/uapi/asm/socket.h b/arch/cris/include/uapi/asm/socket.h
index ba409c9..47b1ec5 100644
--- a/arch/cris/include/uapi/asm/socket.h
+++ b/arch/cris/include/uapi/asm/socket.h
@@ -76,6 +76,8 @@ 
 
 #define SO_SELECT_ERR_QUEUE	45
 
+#define SO_LL			46
+
 #endif /* _ASM_SOCKET_H */
 
 
diff --git a/arch/frv/include/uapi/asm/socket.h b/arch/frv/include/uapi/asm/socket.h
index 31dbb5d..dbc0852 100644
--- a/arch/frv/include/uapi/asm/socket.h
+++ b/arch/frv/include/uapi/asm/socket.h
@@ -74,5 +74,7 @@ 
 
 #define SO_SELECT_ERR_QUEUE	45
 
+#define SO_LL			46
+
 #endif /* _ASM_SOCKET_H */
 
diff --git a/arch/h8300/include/uapi/asm/socket.h b/arch/h8300/include/uapi/asm/socket.h
index 5d1c6d0..a38d38a 100644
--- a/arch/h8300/include/uapi/asm/socket.h
+++ b/arch/h8300/include/uapi/asm/socket.h
@@ -74,4 +74,6 @@ 
 
 #define SO_SELECT_ERR_QUEUE	45
 
+#define SO_LL			46
+
 #endif /* _ASM_SOCKET_H */
diff --git a/arch/ia64/include/uapi/asm/socket.h b/arch/ia64/include/uapi/asm/socket.h
index 6b4329f..d3358b7 100644
--- a/arch/ia64/include/uapi/asm/socket.h
+++ b/arch/ia64/include/uapi/asm/socket.h
@@ -83,4 +83,6 @@ 
 
 #define SO_SELECT_ERR_QUEUE	45
 
+#define SO_LL			46
+
 #endif /* _ASM_IA64_SOCKET_H */
diff --git a/arch/m32r/include/uapi/asm/socket.h b/arch/m32r/include/uapi/asm/socket.h
index 2a3b59e..44aaf46 100644
--- a/arch/m32r/include/uapi/asm/socket.h
+++ b/arch/m32r/include/uapi/asm/socket.h
@@ -74,4 +74,6 @@ 
 
 #define SO_SELECT_ERR_QUEUE	45
 
+#define SO_LL			46
+
 #endif /* _ASM_M32R_SOCKET_H */
diff --git a/arch/mips/include/uapi/asm/socket.h b/arch/mips/include/uapi/asm/socket.h
index 3b21150..6a07992 100644
--- a/arch/mips/include/uapi/asm/socket.h
+++ b/arch/mips/include/uapi/asm/socket.h
@@ -92,4 +92,6 @@ 
 
 #define SO_SELECT_ERR_QUEUE	45
 
+#define SO_LL			46
+
 #endif /* _UAPI_ASM_SOCKET_H */
diff --git a/arch/mn10300/include/uapi/asm/socket.h b/arch/mn10300/include/uapi/asm/socket.h
index b4ce844..db80fd3 100644
--- a/arch/mn10300/include/uapi/asm/socket.h
+++ b/arch/mn10300/include/uapi/asm/socket.h
@@ -74,4 +74,6 @@ 
 
 #define SO_SELECT_ERR_QUEUE	45
 
+#define SO_LL			46
+
 #endif /* _ASM_SOCKET_H */
diff --git a/arch/parisc/include/uapi/asm/socket.h b/arch/parisc/include/uapi/asm/socket.h
index 70c512a..f866fff 100644
--- a/arch/parisc/include/uapi/asm/socket.h
+++ b/arch/parisc/include/uapi/asm/socket.h
@@ -73,6 +73,8 @@ 
 
 #define SO_SELECT_ERR_QUEUE	0x4026
 
+#define SO_LL			0x4027
+
 /* O_NONBLOCK clashes with the bits used for socket types.  Therefore we
  * have to define SOCK_NONBLOCK to a different value here.
  */
diff --git a/arch/powerpc/include/uapi/asm/socket.h b/arch/powerpc/include/uapi/asm/socket.h
index a36daf3..405fb09 100644
--- a/arch/powerpc/include/uapi/asm/socket.h
+++ b/arch/powerpc/include/uapi/asm/socket.h
@@ -81,4 +81,6 @@ 
 
 #define SO_SELECT_ERR_QUEUE	45
 
+#define SO_LL			46
+
 #endif	/* _ASM_POWERPC_SOCKET_H */
diff --git a/arch/s390/include/uapi/asm/socket.h b/arch/s390/include/uapi/asm/socket.h
index 2dacb306..0c5105fb 100644
--- a/arch/s390/include/uapi/asm/socket.h
+++ b/arch/s390/include/uapi/asm/socket.h
@@ -80,4 +80,6 @@ 
 
 #define SO_SELECT_ERR_QUEUE	45
 
+#define SO_LL			46
+
 #endif /* _ASM_SOCKET_H */
diff --git a/arch/sparc/include/uapi/asm/socket.h b/arch/sparc/include/uapi/asm/socket.h
index 89f49b6..b46c3fa 100644
--- a/arch/sparc/include/uapi/asm/socket.h
+++ b/arch/sparc/include/uapi/asm/socket.h
@@ -70,6 +70,8 @@ 
 
 #define SO_SELECT_ERR_QUEUE	0x0029
 
+#define SO_LL			0x0030
+
 /* Security levels - as per NRL IPv6 - don't actually do anything */
 #define SO_SECURITY_AUTHENTICATION		0x5001
 #define SO_SECURITY_ENCRYPTION_TRANSPORT	0x5002
diff --git a/arch/xtensa/include/uapi/asm/socket.h b/arch/xtensa/include/uapi/asm/socket.h
index a8f44f5..b21ace4 100644
--- a/arch/xtensa/include/uapi/asm/socket.h
+++ b/arch/xtensa/include/uapi/asm/socket.h
@@ -85,4 +85,6 @@ 
 
 #define SO_SELECT_ERR_QUEUE	45
 
+#define SO_LL			46
+
 #endif	/* _XTENSA_SOCKET_H */
diff --git a/include/net/ll_poll.h b/include/net/ll_poll.h
index d20508d..437bf11 100644
--- a/include/net/ll_poll.h
+++ b/include/net/ll_poll.h
@@ -43,15 +43,15 @@  extern unsigned int sysctl_net_ll_poll __read_mostly;
 /* we don't mind a ~2.5% imprecision */
 #define TSC_MHZ (tsc_khz >> 10)
 
-static inline cycles_t ll_end_time(void)
+static inline cycles_t ll_end_time(struct sock *sk)
 {
-	return (cycles_t)TSC_MHZ * ACCESS_ONCE(sysctl_net_ll_poll)
+	return (cycles_t)TSC_MHZ * ACCESS_ONCE(sk->sk_ll_usec)
 		 + get_cycles();
 }
 
 static inline bool sk_valid_ll(struct sock *sk)
 {
-	return sysctl_net_ll_poll && sk->sk_napi_id &&
+	return sk->sk_ll_usec && sk->sk_napi_id &&
 	       !need_resched() && !signal_pending(current);
 }
 
@@ -63,7 +63,7 @@  static inline bool can_poll_ll(cycles_t end_time)
 
 static inline bool sk_poll_ll(struct sock *sk, int nonblock)
 {
-	cycles_t end_time = ll_end_time();
+	cycles_t end_time = ll_end_time(sk);
 	const struct net_device_ops *ops;
 	struct napi_struct *napi;
 	int rc = false;
@@ -117,7 +117,7 @@  static inline void sk_mark_ll(struct sock *sk, struct sk_buff *skb)
 
 #else /* CONFIG_NET_LL_RX_POLL */
 
-static inline cycles_t ll_end_time(void)
+static inline cycles_t ll_end_time(struct sock *sk)
 {
 	return 0;
 }
diff --git a/include/net/sock.h b/include/net/sock.h
index ac8e181..21db792 100644
--- a/include/net/sock.h
+++ b/include/net/sock.h
@@ -230,6 +230,7 @@  struct cg_proto;
   *	@sk_wmem_queued: persistent queue size
   *	@sk_forward_alloc: space allocated forward
   *	@sk_napi_id: id of the last napi context to receive data for sk
+  *	@sk_ll_usec: usecs to busypoll when there is no data
   *	@sk_allocation: allocation mode
   *	@sk_sndbuf: size of send buffer in bytes
   *	@sk_flags: %SO_LINGER (l_onoff), %SO_BROADCAST, %SO_KEEPALIVE,
@@ -328,6 +329,7 @@  struct sock {
 #endif
 #ifdef CONFIG_NET_LL_RX_POLL
 	unsigned int		sk_napi_id;
+	unsigned int		sk_ll_usec;
 #endif
 	atomic_t		sk_drops;
 	int			sk_rcvbuf;
diff --git a/include/uapi/asm-generic/socket.h b/include/uapi/asm-generic/socket.h
index c5d2e3a..ca3a20d 100644
--- a/include/uapi/asm-generic/socket.h
+++ b/include/uapi/asm-generic/socket.h
@@ -76,4 +76,6 @@ 
 
 #define SO_SELECT_ERR_QUEUE	45
 
+#define SO_LL			46
+
 #endif /* __ASM_GENERIC_SOCKET_H */
diff --git a/net/core/sock.c b/net/core/sock.c
index 788c0da..1e744b1 100644
--- a/net/core/sock.c
+++ b/net/core/sock.c
@@ -913,6 +913,19 @@  set_rcvbuf:
 		sock_valbool_flag(sk, SOCK_SELECT_ERR_QUEUE, valbool);
 		break;
 
+#ifdef CONFIG_NET_LL_RX_POLL
+	case SO_LL:
+		/* allow unprivileged users to decrease the value */
+		if ((val > sk->sk_ll_usec) && !capable(CAP_NET_ADMIN))
+			ret = -EPERM;
+		else {
+			if (val < 0)
+				ret = -EINVAL;
+			else
+				sk->sk_ll_usec = val;
+		}
+		break;
+#endif
 	default:
 		ret = -ENOPROTOOPT;
 		break;
@@ -1170,6 +1183,12 @@  int sock_getsockopt(struct socket *sock, int level, int optname,
 		v.val = sock_flag(sk, SOCK_SELECT_ERR_QUEUE);
 		break;
 
+#ifdef CONFIG_NET_LL_RX_POLL
+	case SO_LL:
+		v.val = sk->sk_ll_usec;
+		break;
+#endif
+
 	default:
 		return -ENOPROTOOPT;
 	}
@@ -2288,6 +2307,7 @@  void sock_init_data(struct socket *sock, struct sock *sk)
 
 #ifdef CONFIG_NET_LL_RX_POLL
 	sk->sk_napi_id		=	0;
+	sk->sk_ll_usec		=	sysctl_net_ll_poll;
 #endif
 
 	/*