diff mbox

[3/4] treewide: convert PF_MEMALLOC manipulations to new helpers

Message ID 20170405074700.29871-4-vbabka@suse.cz
State Not Applicable, archived
Delegated to: David Miller
Headers show

Commit Message

Vlastimil Babka April 5, 2017, 7:46 a.m. UTC
We now have memalloc_noreclaim_{save,restore} helpers for robust setting and
clearing of PF_MEMALLOC. Let's convert the code which was using the generic
tsk_restore_flags(). No functional change.

Signed-off-by: Vlastimil Babka <vbabka@suse.cz>
Cc: Josef Bacik <jbacik@fb.com>
Cc: Lee Duncan <lduncan@suse.com>
Cc: Chris Leech <cleech@redhat.com>
Cc: "David S. Miller" <davem@davemloft.net>
Cc: Eric Dumazet <edumazet@google.com>
---
 drivers/block/nbd.c      | 7 ++++---
 drivers/scsi/iscsi_tcp.c | 7 ++++---
 net/core/dev.c           | 7 ++++---
 net/core/sock.c          | 7 ++++---
 4 files changed, 16 insertions(+), 12 deletions(-)

Comments

Michal Hocko April 5, 2017, 11:30 a.m. UTC | #1
On Wed 05-04-17 09:46:59, Vlastimil Babka wrote:
> We now have memalloc_noreclaim_{save,restore} helpers for robust setting and
> clearing of PF_MEMALLOC. Let's convert the code which was using the generic
> tsk_restore_flags(). No functional change.

It would be really great to revisit why those places outside of the mm
proper really need this flag. I know this is a painful exercise but I
wouldn't be surprised if there were abusers there.

> Signed-off-by: Vlastimil Babka <vbabka@suse.cz>
> Cc: Josef Bacik <jbacik@fb.com>
> Cc: Lee Duncan <lduncan@suse.com>
> Cc: Chris Leech <cleech@redhat.com>
> Cc: "David S. Miller" <davem@davemloft.net>
> Cc: Eric Dumazet <edumazet@google.com>

Acked-by: Michal Hocko <mhocko@suse.com>

> ---
>  drivers/block/nbd.c      | 7 ++++---
>  drivers/scsi/iscsi_tcp.c | 7 ++++---
>  net/core/dev.c           | 7 ++++---
>  net/core/sock.c          | 7 ++++---
>  4 files changed, 16 insertions(+), 12 deletions(-)
> 
> diff --git a/drivers/block/nbd.c b/drivers/block/nbd.c
> index 03ae72985c79..929fc548c7fb 100644
> --- a/drivers/block/nbd.c
> +++ b/drivers/block/nbd.c
> @@ -18,6 +18,7 @@
>  #include <linux/module.h>
>  #include <linux/init.h>
>  #include <linux/sched.h>
> +#include <linux/sched/mm.h>
>  #include <linux/fs.h>
>  #include <linux/bio.h>
>  #include <linux/stat.h>
> @@ -210,7 +211,7 @@ static int sock_xmit(struct nbd_device *nbd, int index, int send,
>  	struct socket *sock = nbd->socks[index]->sock;
>  	int result;
>  	struct msghdr msg;
> -	unsigned long pflags = current->flags;
> +	unsigned int noreclaim_flag;
>  
>  	if (unlikely(!sock)) {
>  		dev_err_ratelimited(disk_to_dev(nbd->disk),
> @@ -221,7 +222,7 @@ static int sock_xmit(struct nbd_device *nbd, int index, int send,
>  
>  	msg.msg_iter = *iter;
>  
> -	current->flags |= PF_MEMALLOC;
> +	noreclaim_flag = memalloc_noreclaim_save();
>  	do {
>  		sock->sk->sk_allocation = GFP_NOIO | __GFP_MEMALLOC;
>  		msg.msg_name = NULL;
> @@ -244,7 +245,7 @@ static int sock_xmit(struct nbd_device *nbd, int index, int send,
>  			*sent += result;
>  	} while (msg_data_left(&msg));
>  
> -	tsk_restore_flags(current, pflags, PF_MEMALLOC);
> +	memalloc_noreclaim_restore(noreclaim_flag);
>  
>  	return result;
>  }
> diff --git a/drivers/scsi/iscsi_tcp.c b/drivers/scsi/iscsi_tcp.c
> index 4228aba1f654..4842fc0e809d 100644
> --- a/drivers/scsi/iscsi_tcp.c
> +++ b/drivers/scsi/iscsi_tcp.c
> @@ -30,6 +30,7 @@
>  #include <linux/types.h>
>  #include <linux/inet.h>
>  #include <linux/slab.h>
> +#include <linux/sched/mm.h>
>  #include <linux/file.h>
>  #include <linux/blkdev.h>
>  #include <linux/delay.h>
> @@ -371,10 +372,10 @@ static inline int iscsi_sw_tcp_xmit_qlen(struct iscsi_conn *conn)
>  static int iscsi_sw_tcp_pdu_xmit(struct iscsi_task *task)
>  {
>  	struct iscsi_conn *conn = task->conn;
> -	unsigned long pflags = current->flags;
> +	unsigned int noreclaim_flag;
>  	int rc = 0;
>  
> -	current->flags |= PF_MEMALLOC;
> +	noreclaim_flag = memalloc_noreclaim_save();
>  
>  	while (iscsi_sw_tcp_xmit_qlen(conn)) {
>  		rc = iscsi_sw_tcp_xmit(conn);
> @@ -387,7 +388,7 @@ static int iscsi_sw_tcp_pdu_xmit(struct iscsi_task *task)
>  		rc = 0;
>  	}
>  
> -	tsk_restore_flags(current, pflags, PF_MEMALLOC);
> +	memalloc_noreclaim_restore(noreclaim_flag);
>  	return rc;
>  }
>  
> diff --git a/net/core/dev.c b/net/core/dev.c
> index fde8b3f7136b..e0705a126b24 100644
> --- a/net/core/dev.c
> +++ b/net/core/dev.c
> @@ -81,6 +81,7 @@
>  #include <linux/hash.h>
>  #include <linux/slab.h>
>  #include <linux/sched.h>
> +#include <linux/sched/mm.h>
>  #include <linux/mutex.h>
>  #include <linux/string.h>
>  #include <linux/mm.h>
> @@ -4227,7 +4228,7 @@ static int __netif_receive_skb(struct sk_buff *skb)
>  	int ret;
>  
>  	if (sk_memalloc_socks() && skb_pfmemalloc(skb)) {
> -		unsigned long pflags = current->flags;
> +		unsigned int noreclaim_flag;
>  
>  		/*
>  		 * PFMEMALLOC skbs are special, they should
> @@ -4238,9 +4239,9 @@ static int __netif_receive_skb(struct sk_buff *skb)
>  		 * Use PF_MEMALLOC as this saves us from propagating the allocation
>  		 * context down to all allocation sites.
>  		 */
> -		current->flags |= PF_MEMALLOC;
> +		noreclaim_flag = memalloc_noreclaim_save();
>  		ret = __netif_receive_skb_core(skb, true);
> -		tsk_restore_flags(current, pflags, PF_MEMALLOC);
> +		memalloc_noreclaim_restore(noreclaim_flag);
>  	} else
>  		ret = __netif_receive_skb_core(skb, false);
>  
> diff --git a/net/core/sock.c b/net/core/sock.c
> index 392f9b6f96e2..0b2d06b4c308 100644
> --- a/net/core/sock.c
> +++ b/net/core/sock.c
> @@ -102,6 +102,7 @@
>  #include <linux/proc_fs.h>
>  #include <linux/seq_file.h>
>  #include <linux/sched.h>
> +#include <linux/sched/mm.h>
>  #include <linux/timer.h>
>  #include <linux/string.h>
>  #include <linux/sockios.h>
> @@ -372,14 +373,14 @@ EXPORT_SYMBOL_GPL(sk_clear_memalloc);
>  int __sk_backlog_rcv(struct sock *sk, struct sk_buff *skb)
>  {
>  	int ret;
> -	unsigned long pflags = current->flags;
> +	unsigned int noreclaim_flag;
>  
>  	/* these should have been dropped before queueing */
>  	BUG_ON(!sock_flag(sk, SOCK_MEMALLOC));
>  
> -	current->flags |= PF_MEMALLOC;
> +	noreclaim_flag = memalloc_noreclaim_save();
>  	ret = sk->sk_backlog_rcv(sk, skb);
> -	tsk_restore_flags(current, pflags, PF_MEMALLOC);
> +	memalloc_noreclaim_restore(noreclaim_flag);
>  
>  	return ret;
>  }
> -- 
> 2.12.2
Wouter Verhelst April 6, 2017, 6:38 a.m. UTC | #2
On Wed, Apr 05, 2017 at 01:30:31PM +0200, Michal Hocko wrote:
> On Wed 05-04-17 09:46:59, Vlastimil Babka wrote:
> > We now have memalloc_noreclaim_{save,restore} helpers for robust setting and
> > clearing of PF_MEMALLOC. Let's convert the code which was using the generic
> > tsk_restore_flags(). No functional change.
> 
> It would be really great to revisit why those places outside of the mm
> proper really need this flag. I know this is a painful exercise but I
> wouldn't be surprised if there were abusers there.
[...]
> > ---
> >  drivers/block/nbd.c      | 7 ++++---
> >  drivers/scsi/iscsi_tcp.c | 7 ++++---
> >  net/core/dev.c           | 7 ++++---
> >  net/core/sock.c          | 7 ++++---
> >  4 files changed, 16 insertions(+), 12 deletions(-)

These were all done to make swapping over network safe. The idea is that
if a socket has SOCK_MEMALLOC set, incoming packets for that socket can
access PFMEMALLOC reserves (whereas other sockets cannot); this all in
the hope that one packe destined to that socket will contain the TCP ACK
that confirms the swapout was successful and we can now release RAM
pages for other processes.

I don't know whether they need the PF_MEMALLOC flag specifically (not a
kernel hacker), but they do need to interact with it at any rate.
Mel Gorman April 6, 2017, 11:25 a.m. UTC | #3
On Thu, Apr 06, 2017 at 08:38:10AM +0200, Wouter Verhelst wrote:
> On Wed, Apr 05, 2017 at 01:30:31PM +0200, Michal Hocko wrote:
> > On Wed 05-04-17 09:46:59, Vlastimil Babka wrote:
> > > We now have memalloc_noreclaim_{save,restore} helpers for robust setting and
> > > clearing of PF_MEMALLOC. Let's convert the code which was using the generic
> > > tsk_restore_flags(). No functional change.
> > 
> > It would be really great to revisit why those places outside of the mm
> > proper really need this flag. I know this is a painful exercise but I
> > wouldn't be surprised if there were abusers there.
> [...]
> > > ---
> > >  drivers/block/nbd.c      | 7 ++++---
> > >  drivers/scsi/iscsi_tcp.c | 7 ++++---
> > >  net/core/dev.c           | 7 ++++---
> > >  net/core/sock.c          | 7 ++++---
> > >  4 files changed, 16 insertions(+), 12 deletions(-)
> 
> These were all done to make swapping over network safe. The idea is that
> if a socket has SOCK_MEMALLOC set, incoming packets for that socket can
> access PFMEMALLOC reserves (whereas other sockets cannot); this all in
> the hope that one packe destined to that socket will contain the TCP ACK
> that confirms the swapout was successful and we can now release RAM
> pages for other processes.
> 
> I don't know whether they need the PF_MEMALLOC flag specifically (not a
> kernel hacker), but they do need to interact with it at any rate.
> 

At the time it was required to get access to emergency reserves so swapping
can continue. The flip side is that the memory is then protected so pages
allocated from emergency reserves are not used for network traffic that
is not involved with swap. This means that under heavy swap load, it was
perfectly possible for unrelated traffic to get dropped for quite some
time.
diff mbox

Patch

diff --git a/drivers/block/nbd.c b/drivers/block/nbd.c
index 03ae72985c79..929fc548c7fb 100644
--- a/drivers/block/nbd.c
+++ b/drivers/block/nbd.c
@@ -18,6 +18,7 @@ 
 #include <linux/module.h>
 #include <linux/init.h>
 #include <linux/sched.h>
+#include <linux/sched/mm.h>
 #include <linux/fs.h>
 #include <linux/bio.h>
 #include <linux/stat.h>
@@ -210,7 +211,7 @@  static int sock_xmit(struct nbd_device *nbd, int index, int send,
 	struct socket *sock = nbd->socks[index]->sock;
 	int result;
 	struct msghdr msg;
-	unsigned long pflags = current->flags;
+	unsigned int noreclaim_flag;
 
 	if (unlikely(!sock)) {
 		dev_err_ratelimited(disk_to_dev(nbd->disk),
@@ -221,7 +222,7 @@  static int sock_xmit(struct nbd_device *nbd, int index, int send,
 
 	msg.msg_iter = *iter;
 
-	current->flags |= PF_MEMALLOC;
+	noreclaim_flag = memalloc_noreclaim_save();
 	do {
 		sock->sk->sk_allocation = GFP_NOIO | __GFP_MEMALLOC;
 		msg.msg_name = NULL;
@@ -244,7 +245,7 @@  static int sock_xmit(struct nbd_device *nbd, int index, int send,
 			*sent += result;
 	} while (msg_data_left(&msg));
 
-	tsk_restore_flags(current, pflags, PF_MEMALLOC);
+	memalloc_noreclaim_restore(noreclaim_flag);
 
 	return result;
 }
diff --git a/drivers/scsi/iscsi_tcp.c b/drivers/scsi/iscsi_tcp.c
index 4228aba1f654..4842fc0e809d 100644
--- a/drivers/scsi/iscsi_tcp.c
+++ b/drivers/scsi/iscsi_tcp.c
@@ -30,6 +30,7 @@ 
 #include <linux/types.h>
 #include <linux/inet.h>
 #include <linux/slab.h>
+#include <linux/sched/mm.h>
 #include <linux/file.h>
 #include <linux/blkdev.h>
 #include <linux/delay.h>
@@ -371,10 +372,10 @@  static inline int iscsi_sw_tcp_xmit_qlen(struct iscsi_conn *conn)
 static int iscsi_sw_tcp_pdu_xmit(struct iscsi_task *task)
 {
 	struct iscsi_conn *conn = task->conn;
-	unsigned long pflags = current->flags;
+	unsigned int noreclaim_flag;
 	int rc = 0;
 
-	current->flags |= PF_MEMALLOC;
+	noreclaim_flag = memalloc_noreclaim_save();
 
 	while (iscsi_sw_tcp_xmit_qlen(conn)) {
 		rc = iscsi_sw_tcp_xmit(conn);
@@ -387,7 +388,7 @@  static int iscsi_sw_tcp_pdu_xmit(struct iscsi_task *task)
 		rc = 0;
 	}
 
-	tsk_restore_flags(current, pflags, PF_MEMALLOC);
+	memalloc_noreclaim_restore(noreclaim_flag);
 	return rc;
 }
 
diff --git a/net/core/dev.c b/net/core/dev.c
index fde8b3f7136b..e0705a126b24 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -81,6 +81,7 @@ 
 #include <linux/hash.h>
 #include <linux/slab.h>
 #include <linux/sched.h>
+#include <linux/sched/mm.h>
 #include <linux/mutex.h>
 #include <linux/string.h>
 #include <linux/mm.h>
@@ -4227,7 +4228,7 @@  static int __netif_receive_skb(struct sk_buff *skb)
 	int ret;
 
 	if (sk_memalloc_socks() && skb_pfmemalloc(skb)) {
-		unsigned long pflags = current->flags;
+		unsigned int noreclaim_flag;
 
 		/*
 		 * PFMEMALLOC skbs are special, they should
@@ -4238,9 +4239,9 @@  static int __netif_receive_skb(struct sk_buff *skb)
 		 * Use PF_MEMALLOC as this saves us from propagating the allocation
 		 * context down to all allocation sites.
 		 */
-		current->flags |= PF_MEMALLOC;
+		noreclaim_flag = memalloc_noreclaim_save();
 		ret = __netif_receive_skb_core(skb, true);
-		tsk_restore_flags(current, pflags, PF_MEMALLOC);
+		memalloc_noreclaim_restore(noreclaim_flag);
 	} else
 		ret = __netif_receive_skb_core(skb, false);
 
diff --git a/net/core/sock.c b/net/core/sock.c
index 392f9b6f96e2..0b2d06b4c308 100644
--- a/net/core/sock.c
+++ b/net/core/sock.c
@@ -102,6 +102,7 @@ 
 #include <linux/proc_fs.h>
 #include <linux/seq_file.h>
 #include <linux/sched.h>
+#include <linux/sched/mm.h>
 #include <linux/timer.h>
 #include <linux/string.h>
 #include <linux/sockios.h>
@@ -372,14 +373,14 @@  EXPORT_SYMBOL_GPL(sk_clear_memalloc);
 int __sk_backlog_rcv(struct sock *sk, struct sk_buff *skb)
 {
 	int ret;
-	unsigned long pflags = current->flags;
+	unsigned int noreclaim_flag;
 
 	/* these should have been dropped before queueing */
 	BUG_ON(!sock_flag(sk, SOCK_MEMALLOC));
 
-	current->flags |= PF_MEMALLOC;
+	noreclaim_flag = memalloc_noreclaim_save();
 	ret = sk->sk_backlog_rcv(sk, skb);
-	tsk_restore_flags(current, pflags, PF_MEMALLOC);
+	memalloc_noreclaim_restore(noreclaim_flag);
 
 	return ret;
 }