diff mbox

[v8,net-next,5/7] net: simple poll/select low latency socket poll

Message ID 20130603080200.18273.52073.stgit@ladj378.jer.intel.com
State Changes Requested, archived
Delegated to: David Miller
Headers show

Commit Message

Eliezer Tamir June 3, 2013, 8:02 a.m. UTC
A very naive select/poll busy-poll support.
Add busy-polling to sock_poll().
When poll/select have nothing to report, call the low-level
sock_poll() again untill we are out of time or we find something.
Rigth now we poll every socket once, this is subpotimal
but impoves latency when the number of sockets polled is not large.

Signed-off-by: Alexander Duyck <alexander.h.duyck@intel.com>
Signed-off-by: Jesse Brandeburg <jesse.brandeburg@intel.com>
Tested-by: Willem de Bruijn <willemb@google.com>
Signed-off-by: Eliezer Tamir <eliezer.tamir@linux.intel.com>
---

 fs/select.c  |    7 +++++++
 net/socket.c |   10 +++++++++-
 2 files changed, 16 insertions(+), 1 deletions(-)


--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Comments

Eric Dumazet June 3, 2013, 1:15 p.m. UTC | #1
On Mon, 2013-06-03 at 11:02 +0300, Eliezer Tamir wrote:
> A very naive select/poll busy-poll support.
> Add busy-polling to sock_poll().
> When poll/select have nothing to report, call the low-level
> sock_poll() again untill we are out of time or we find something.
> Rigth now we poll every socket once, this is subpotimal
> but impoves latency when the number of sockets polled is not large.
> 
> Signed-off-by: Alexander Duyck <alexander.h.duyck@intel.com>
> Signed-off-by: Jesse Brandeburg <jesse.brandeburg@intel.com>
> Tested-by: Willem de Bruijn <willemb@google.com>
> Signed-off-by: Eliezer Tamir <eliezer.tamir@linux.intel.com>
> ---
> 
>  fs/select.c  |    7 +++++++
>  net/socket.c |   10 +++++++++-
>  2 files changed, 16 insertions(+), 1 deletions(-)
> 
> diff --git a/fs/select.c b/fs/select.c
> index 8c1c96c..f116bf0 100644
> --- a/fs/select.c
> +++ b/fs/select.c
> @@ -27,6 +27,7 @@
>  #include <linux/rcupdate.h>
>  #include <linux/hrtimer.h>
>  #include <linux/sched/rt.h>
> +#include <net/ll_poll.h>
>  
>  #include <asm/uaccess.h>
>  
> @@ -400,6 +401,7 @@ int do_select(int n, fd_set_bits *fds, struct timespec *end_time)
>  	poll_table *wait;
>  	int retval, i, timed_out = 0;
>  	unsigned long slack = 0;
> +	cycles_t ll_time = ll_end_time();
>  
>  	rcu_read_lock();
>  	retval = max_select_fd(n, fds);
> @@ -486,6 +488,8 @@ int do_select(int n, fd_set_bits *fds, struct timespec *end_time)
>  			break;
>  		}
>  
> +		if (can_poll_ll(ll_time))
> +			continue;
>  		/*
>  		 * If this is the first loop and we have a timeout
>  		 * given, then we convert to ktime_t and set the to
> @@ -750,6 +754,7 @@ static int do_poll(unsigned int nfds,  struct poll_list *list,
>  	ktime_t expire, *to = NULL;
>  	int timed_out = 0, count = 0;
>  	unsigned long slack = 0;
> +	cycles_t ll_time = ll_end_time();
>  
>  	/* Optimise the no-wait case */
>  	if (end_time && !end_time->tv_sec && !end_time->tv_nsec) {
> @@ -795,6 +800,8 @@ static int do_poll(unsigned int nfds,  struct poll_list *list,
>  		if (count || timed_out)
>  			break;
>  
> +		if (can_poll_ll(ll_time))
> +			continue;
>  		/*
>  		 * If this is the first loop and we have a timeout
>  		 * given, then we convert to ktime_t and set the to
> diff --git a/net/socket.c b/net/socket.c
> index 721f4e7..02d0e15 100644
> --- a/net/socket.c
> +++ b/net/socket.c
> @@ -1148,13 +1148,21 @@ EXPORT_SYMBOL(sock_create_lite);
>  /* No kernel lock held - perfect */
>  static unsigned int sock_poll(struct file *file, poll_table *wait)
>  {
> +	unsigned int poll_result;
>  	struct socket *sock;
>  
>  	/*
>  	 *      We can't return errors to poll, so it's either yes or no.
>  	 */
>  	sock = file->private_data;
> -	return sock->ops->poll(file, sock, wait);
> +
> +	poll_result = sock->ops->poll(file, sock, wait);
> +
> +	if (!(poll_result & (POLLRDNORM | POLLERR | POLLRDHUP | POLLHUP)) &&
> +		sk_valid_ll(sock->sk) && sk_poll_ll(sock->sk, 1))
> +			poll_result = sock->ops->poll(file, sock, NULL);
> +
> +	return poll_result;
>  }
>  
>  static int sock_mmap(struct file *file, struct vm_area_struct *vma)
> 


In fact, for TCP, POLLOUT event being ready can also be triggered by
incoming messages, as the ACK might allow the user application to push
more data in the write queue.

And you might check wait->_key to avoid testing flags that user is not
interested into.



--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Eliezer Tamir June 3, 2013, 1:59 p.m. UTC | #2
On 03/06/2013 16:15, Eric Dumazet wrote:
> On Mon, 2013-06-03 at 11:02 +0300, Eliezer Tamir wrote:
>>   	sock = file->private_data;
>> -	return sock->ops->poll(file, sock, wait);
>> +
>> +	poll_result = sock->ops->poll(file, sock, wait);
>> +
>> +	if (!(poll_result & (POLLRDNORM | POLLERR | POLLRDHUP | POLLHUP)) &&
>> +		sk_valid_ll(sock->sk) && sk_poll_ll(sock->sk, 1))
>> +			poll_result = sock->ops->poll(file, sock, NULL);
>> +
>> +	return poll_result;
>>   }
>>
>>   static int sock_mmap(struct file *file, struct vm_area_struct *vma)
>>
>
>
> In fact, for TCP, POLLOUT event being ready can also be triggered by
> incoming messages, as the ACK might allow the user application to push
> more data in the write queue.
>
> And you might check wait->_key to avoid testing flags that user is not
> interested into.

yes, comparing to _key is more correct.
In any case this needs to be completely rewritten for support for
working well with a large number of sockets.

-Eliezer
--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Eliezer Tamir June 4, 2013, 8:52 a.m. UTC | #3
On 03/06/2013 16:59, Eliezer Tamir wrote:
> On 03/06/2013 16:15, Eric Dumazet wrote:
>> On Mon, 2013-06-03 at 11:02 +0300, Eliezer Tamir wrote:
>>>       sock = file->private_data;
>>> -    return sock->ops->poll(file, sock, wait);
>>> +
>>> +    poll_result = sock->ops->poll(file, sock, wait);
>>> +
>>> +    if (!(poll_result & (POLLRDNORM | POLLERR | POLLRDHUP |
>>> POLLHUP)) &&
>>> +        sk_valid_ll(sock->sk) && sk_poll_ll(sock->sk, 1))
>>> +            poll_result = sock->ops->poll(file, sock, NULL);
>>> +
>>> +    return poll_result;
>>>   }
>>>
>>>   static int sock_mmap(struct file *file, struct vm_area_struct *vma)
>>>
>>
>>
>> In fact, for TCP, POLLOUT event being ready can also be triggered by
>> incoming messages, as the ACK might allow the user application to push
>> more data in the write queue.
>>
>> And you might check wait->_key to avoid testing flags that user is not
>> interested into.
>
> yes, comparing to _key is more correct.
> In any case this needs to be completely rewritten for support for
> working well with a large number of sockets.
>

Is it possible for wait to be NULL? (do we need to check for that?)
I see that poll_does_not_wait() checks for that, but I could not find
anywhere this is actually done.

-Eliezer

--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
diff mbox

Patch

diff --git a/fs/select.c b/fs/select.c
index 8c1c96c..f116bf0 100644
--- a/fs/select.c
+++ b/fs/select.c
@@ -27,6 +27,7 @@ 
 #include <linux/rcupdate.h>
 #include <linux/hrtimer.h>
 #include <linux/sched/rt.h>
+#include <net/ll_poll.h>
 
 #include <asm/uaccess.h>
 
@@ -400,6 +401,7 @@  int do_select(int n, fd_set_bits *fds, struct timespec *end_time)
 	poll_table *wait;
 	int retval, i, timed_out = 0;
 	unsigned long slack = 0;
+	cycles_t ll_time = ll_end_time();
 
 	rcu_read_lock();
 	retval = max_select_fd(n, fds);
@@ -486,6 +488,8 @@  int do_select(int n, fd_set_bits *fds, struct timespec *end_time)
 			break;
 		}
 
+		if (can_poll_ll(ll_time))
+			continue;
 		/*
 		 * If this is the first loop and we have a timeout
 		 * given, then we convert to ktime_t and set the to
@@ -750,6 +754,7 @@  static int do_poll(unsigned int nfds,  struct poll_list *list,
 	ktime_t expire, *to = NULL;
 	int timed_out = 0, count = 0;
 	unsigned long slack = 0;
+	cycles_t ll_time = ll_end_time();
 
 	/* Optimise the no-wait case */
 	if (end_time && !end_time->tv_sec && !end_time->tv_nsec) {
@@ -795,6 +800,8 @@  static int do_poll(unsigned int nfds,  struct poll_list *list,
 		if (count || timed_out)
 			break;
 
+		if (can_poll_ll(ll_time))
+			continue;
 		/*
 		 * If this is the first loop and we have a timeout
 		 * given, then we convert to ktime_t and set the to
diff --git a/net/socket.c b/net/socket.c
index 721f4e7..02d0e15 100644
--- a/net/socket.c
+++ b/net/socket.c
@@ -1148,13 +1148,21 @@  EXPORT_SYMBOL(sock_create_lite);
 /* No kernel lock held - perfect */
 static unsigned int sock_poll(struct file *file, poll_table *wait)
 {
+	unsigned int poll_result;
 	struct socket *sock;
 
 	/*
 	 *      We can't return errors to poll, so it's either yes or no.
 	 */
 	sock = file->private_data;
-	return sock->ops->poll(file, sock, wait);
+
+	poll_result = sock->ops->poll(file, sock, wait);
+
+	if (!(poll_result & (POLLRDNORM | POLLERR | POLLRDHUP | POLLHUP)) &&
+		sk_valid_ll(sock->sk) && sk_poll_ll(sock->sk, 1))
+			poll_result = sock->ops->poll(file, sock, NULL);
+
+	return poll_result;
 }
 
 static int sock_mmap(struct file *file, struct vm_area_struct *vma)