diff mbox

[5/9] mm: compaction: Acquire the zone->lru_lock as late as possible

Message ID 1348224383-1499-6-git-send-email-mgorman@suse.de
State New
Headers show

Commit Message

Mel Gorman Sept. 21, 2012, 10:46 a.m. UTC
Compactions migrate scanner acquires the zone->lru_lock when scanning a range
of pages looking for LRU pages to acquire. It does this even if there are
no LRU pages in the range. If multiple processes are compacting then this
can cause severe locking contention. To make matters worse commit b2eef8c0
(mm: compaction: minimise the time IRQs are disabled while isolating pages
for migration) releases the lru_lock every SWAP_CLUSTER_MAX pages that are
scanned.

This patch makes two changes to how the migrate scanner acquires the LRU
lock. First, it only releases the LRU lock every SWAP_CLUSTER_MAX pages if
the lock is contended. This reduces the number of times it unnecessarily
disables and re-enables IRQs. The second is that it defers acquiring the
LRU lock for as long as possible. If there are no LRU pages or the only
LRU pages are transhuge then the LRU lock will not be acquired at all
which reduces contention on zone->lru_lock.

Signed-off-by: Mel Gorman <mgorman@suse.de>
Acked-by: Rik van Riel <riel@redhat.com>
---
 mm/compaction.c |   63 +++++++++++++++++++++++++++++++++++++------------------
 1 file changed, 43 insertions(+), 20 deletions(-)

Comments

Rafael Aquini Sept. 21, 2012, 5:51 p.m. UTC | #1
On Fri, Sep 21, 2012 at 11:46:19AM +0100, Mel Gorman wrote:
> Compactions migrate scanner acquires the zone->lru_lock when scanning a range
> of pages looking for LRU pages to acquire. It does this even if there are
> no LRU pages in the range. If multiple processes are compacting then this
> can cause severe locking contention. To make matters worse commit b2eef8c0
> (mm: compaction: minimise the time IRQs are disabled while isolating pages
> for migration) releases the lru_lock every SWAP_CLUSTER_MAX pages that are
> scanned.
> 
> This patch makes two changes to how the migrate scanner acquires the LRU
> lock. First, it only releases the LRU lock every SWAP_CLUSTER_MAX pages if
> the lock is contended. This reduces the number of times it unnecessarily
> disables and re-enables IRQs. The second is that it defers acquiring the
> LRU lock for as long as possible. If there are no LRU pages or the only
> LRU pages are transhuge then the LRU lock will not be acquired at all
> which reduces contention on zone->lru_lock.
> 
> Signed-off-by: Mel Gorman <mgorman@suse.de>
> Acked-by: Rik van Riel <riel@redhat.com>
> ---

Acked-by: Rafael Aquini <aquini@redhat.com>
Minchan Kim Sept. 25, 2012, 7:05 a.m. UTC | #2
Hi Mel,

I have a question below.

On Fri, Sep 21, 2012 at 11:46:19AM +0100, Mel Gorman wrote:
> Compactions migrate scanner acquires the zone->lru_lock when scanning a range
> of pages looking for LRU pages to acquire. It does this even if there are
> no LRU pages in the range. If multiple processes are compacting then this
> can cause severe locking contention. To make matters worse commit b2eef8c0
> (mm: compaction: minimise the time IRQs are disabled while isolating pages
> for migration) releases the lru_lock every SWAP_CLUSTER_MAX pages that are
> scanned.
> 
> This patch makes two changes to how the migrate scanner acquires the LRU
> lock. First, it only releases the LRU lock every SWAP_CLUSTER_MAX pages if
> the lock is contended. This reduces the number of times it unnecessarily
> disables and re-enables IRQs. The second is that it defers acquiring the
> LRU lock for as long as possible. If there are no LRU pages or the only
> LRU pages are transhuge then the LRU lock will not be acquired at all
> which reduces contention on zone->lru_lock.
> 
> Signed-off-by: Mel Gorman <mgorman@suse.de>
> Acked-by: Rik van Riel <riel@redhat.com>
> ---
>  mm/compaction.c |   63 +++++++++++++++++++++++++++++++++++++------------------
>  1 file changed, 43 insertions(+), 20 deletions(-)
> 
> diff --git a/mm/compaction.c b/mm/compaction.c
> index 6b55491..a6068ff 100644
> --- a/mm/compaction.c
> +++ b/mm/compaction.c
> @@ -50,6 +50,11 @@ static inline bool migrate_async_suitable(int migratetype)
>  	return is_migrate_cma(migratetype) || migratetype == MIGRATE_MOVABLE;
>  }
>  
> +static inline bool should_release_lock(spinlock_t *lock)
> +{
> +	return need_resched() || spin_is_contended(lock);
> +}
> +
>  /*
>   * Compaction requires the taking of some coarse locks that are potentially
>   * very heavily contended. Check if the process needs to be scheduled or
> @@ -62,7 +67,7 @@ static inline bool migrate_async_suitable(int migratetype)
>  static bool compact_checklock_irqsave(spinlock_t *lock, unsigned long *flags,
>  				      bool locked, struct compact_control *cc)
>  {
> -	if (need_resched() || spin_is_contended(lock)) {
> +	if (should_release_lock(lock)) {
>  		if (locked) {
>  			spin_unlock_irqrestore(lock, *flags);
>  			locked = false;
> @@ -327,7 +332,7 @@ isolate_migratepages_range(struct zone *zone, struct compact_control *cc,
>  	isolate_mode_t mode = 0;
>  	struct lruvec *lruvec;
>  	unsigned long flags;
> -	bool locked;
> +	bool locked = false;
>  
>  	/*
>  	 * Ensure that there are not too many pages isolated from the LRU
> @@ -347,23 +352,17 @@ isolate_migratepages_range(struct zone *zone, struct compact_control *cc,
>  
>  	/* Time to isolate some pages for migration */
>  	cond_resched();
> -	spin_lock_irqsave(&zone->lru_lock, flags);
> -	locked = true;
>  	for (; low_pfn < end_pfn; low_pfn++) {
>  		struct page *page;
>  
>  		/* give a chance to irqs before checking need_resched() */
> -		if (!((low_pfn+1) % SWAP_CLUSTER_MAX)) {
> -			spin_unlock_irqrestore(&zone->lru_lock, flags);
> -			locked = false;
> +		if (locked && !((low_pfn+1) % SWAP_CLUSTER_MAX)) {
> +			if (should_release_lock(&zone->lru_lock)) {
> +				spin_unlock_irqrestore(&zone->lru_lock, flags);
> +				locked = false;
> +			}
>  		}
>  
> -		/* Check if it is ok to still hold the lock */
> -		locked = compact_checklock_irqsave(&zone->lru_lock, &flags,
> -								locked, cc);
> -		if (!locked || fatal_signal_pending(current))
> -			break;
> -
>  		/*
>  		 * migrate_pfn does not necessarily start aligned to a
>  		 * pageblock. Ensure that pfn_valid is called when moving
> @@ -403,21 +402,38 @@ isolate_migratepages_range(struct zone *zone, struct compact_control *cc,
>  		pageblock_nr = low_pfn >> pageblock_order;
>  		if (!cc->sync && last_pageblock_nr != pageblock_nr &&
>  		    !migrate_async_suitable(get_pageblock_migratetype(page))) {
> -			low_pfn += pageblock_nr_pages;
> -			low_pfn = ALIGN(low_pfn, pageblock_nr_pages) - 1;
> -			last_pageblock_nr = pageblock_nr;
> -			continue;
> +			goto next_pageblock;
>  		}
>  
> +		/* Check may be lockless but that's ok as we recheck later */
>  		if (!PageLRU(page))
>  			continue;
>  
>  		/*
> -		 * PageLRU is set, and lru_lock excludes isolation,
> -		 * splitting and collapsing (collapsing has already
> -		 * happened if PageLRU is set).
> +		 * PageLRU is set. lru_lock normally excludes isolation
> +		 * splitting and collapsing (collapsing has already happened
> +		 * if PageLRU is set) but the lock is not necessarily taken
> +		 * here and it is wasteful to take it just to check transhuge.
> +		 * Check transhuge without lock and skip if it's either a
> +		 * transhuge or hugetlbfs page.
>  		 */
>  		if (PageTransHuge(page)) {
> +			if (!locked)
> +				goto next_pageblock;

Why skip all pages in a pageblock if !locked?
Shouldn't we add some comment?

> +			low_pfn += (1 << compound_order(page)) - 1;
> +			continue;
> +		}
> +
> +		/* Check if it is ok to still hold the lock */
> +		locked = compact_checklock_irqsave(&zone->lru_lock, &flags,
> +								locked, cc);
> +		if (!locked || fatal_signal_pending(current))
> +			break;
> +
> +		/* Recheck PageLRU and PageTransHuge under lock */
> +		if (!PageLRU(page))
> +			continue;
> +		if (PageTransHuge(page)) {
>  			low_pfn += (1 << compound_order(page)) - 1;
>  			continue;
>  		}
> @@ -444,6 +460,13 @@ isolate_migratepages_range(struct zone *zone, struct compact_control *cc,
>  			++low_pfn;
>  			break;
>  		}
> +
> +		continue;
> +
> +next_pageblock:
> +		low_pfn += pageblock_nr_pages;
> +		low_pfn = ALIGN(low_pfn, pageblock_nr_pages) - 1;
> +		last_pageblock_nr = pageblock_nr;
>  	}
>  
>  	acct_isolated(zone, locked, cc);
> -- 
> 1.7.9.2
> 
> --
> To unsubscribe, send a message with 'unsubscribe linux-mm' in
> the body to majordomo@kvack.org.  For more info on Linux MM,
> see: http://www.linux-mm.org/ .
> Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>
Mel Gorman Sept. 25, 2012, 7:51 a.m. UTC | #3
On Tue, Sep 25, 2012 at 04:05:17PM +0900, Minchan Kim wrote:
> Hi Mel,
> 
> I have a question below.
> 
> On Fri, Sep 21, 2012 at 11:46:19AM +0100, Mel Gorman wrote:
> > Compactions migrate scanner acquires the zone->lru_lock when scanning a range
> > of pages looking for LRU pages to acquire. It does this even if there are
> > no LRU pages in the range. If multiple processes are compacting then this
> > can cause severe locking contention. To make matters worse commit b2eef8c0
> > (mm: compaction: minimise the time IRQs are disabled while isolating pages
> > for migration) releases the lru_lock every SWAP_CLUSTER_MAX pages that are
> > scanned.
> > 
> > This patch makes two changes to how the migrate scanner acquires the LRU
> > lock. First, it only releases the LRU lock every SWAP_CLUSTER_MAX pages if
> > the lock is contended. This reduces the number of times it unnecessarily
> > disables and re-enables IRQs. The second is that it defers acquiring the
> > LRU lock for as long as possible. If there are no LRU pages or the only
> > LRU pages are transhuge then the LRU lock will not be acquired at all
> > which reduces contention on zone->lru_lock.
> > 
> > Signed-off-by: Mel Gorman <mgorman@suse.de>
> > Acked-by: Rik van Riel <riel@redhat.com>
> > ---
> >  mm/compaction.c |   63 +++++++++++++++++++++++++++++++++++++------------------
> >  1 file changed, 43 insertions(+), 20 deletions(-)
> > 
> > diff --git a/mm/compaction.c b/mm/compaction.c
> > index 6b55491..a6068ff 100644
> > --- a/mm/compaction.c
> > +++ b/mm/compaction.c
> > @@ -50,6 +50,11 @@ static inline bool migrate_async_suitable(int migratetype)
> >  	return is_migrate_cma(migratetype) || migratetype == MIGRATE_MOVABLE;
> >  }
> >  
> > +static inline bool should_release_lock(spinlock_t *lock)
> > +{
> > +	return need_resched() || spin_is_contended(lock);
> > +}
> > +
> >  /*
> >   * Compaction requires the taking of some coarse locks that are potentially
> >   * very heavily contended. Check if the process needs to be scheduled or
> > @@ -62,7 +67,7 @@ static inline bool migrate_async_suitable(int migratetype)
> >  static bool compact_checklock_irqsave(spinlock_t *lock, unsigned long *flags,
> >  				      bool locked, struct compact_control *cc)
> >  {
> > -	if (need_resched() || spin_is_contended(lock)) {
> > +	if (should_release_lock(lock)) {
> >  		if (locked) {
> >  			spin_unlock_irqrestore(lock, *flags);
> >  			locked = false;
> > @@ -327,7 +332,7 @@ isolate_migratepages_range(struct zone *zone, struct compact_control *cc,
> >  	isolate_mode_t mode = 0;
> >  	struct lruvec *lruvec;
> >  	unsigned long flags;
> > -	bool locked;
> > +	bool locked = false;
> >  
> >  	/*
> >  	 * Ensure that there are not too many pages isolated from the LRU
> > @@ -347,23 +352,17 @@ isolate_migratepages_range(struct zone *zone, struct compact_control *cc,
> >  
> >  	/* Time to isolate some pages for migration */
> >  	cond_resched();
> > -	spin_lock_irqsave(&zone->lru_lock, flags);
> > -	locked = true;
> >  	for (; low_pfn < end_pfn; low_pfn++) {
> >  		struct page *page;
> >  
> >  		/* give a chance to irqs before checking need_resched() */
> > -		if (!((low_pfn+1) % SWAP_CLUSTER_MAX)) {
> > -			spin_unlock_irqrestore(&zone->lru_lock, flags);
> > -			locked = false;
> > +		if (locked && !((low_pfn+1) % SWAP_CLUSTER_MAX)) {
> > +			if (should_release_lock(&zone->lru_lock)) {
> > +				spin_unlock_irqrestore(&zone->lru_lock, flags);
> > +				locked = false;
> > +			}
> >  		}
> >  
> > -		/* Check if it is ok to still hold the lock */
> > -		locked = compact_checklock_irqsave(&zone->lru_lock, &flags,
> > -								locked, cc);
> > -		if (!locked || fatal_signal_pending(current))
> > -			break;
> > -
> >  		/*
> >  		 * migrate_pfn does not necessarily start aligned to a
> >  		 * pageblock. Ensure that pfn_valid is called when moving
> > @@ -403,21 +402,38 @@ isolate_migratepages_range(struct zone *zone, struct compact_control *cc,
> >  		pageblock_nr = low_pfn >> pageblock_order;
> >  		if (!cc->sync && last_pageblock_nr != pageblock_nr &&
> >  		    !migrate_async_suitable(get_pageblock_migratetype(page))) {
> > -			low_pfn += pageblock_nr_pages;
> > -			low_pfn = ALIGN(low_pfn, pageblock_nr_pages) - 1;
> > -			last_pageblock_nr = pageblock_nr;
> > -			continue;
> > +			goto next_pageblock;
> >  		}
> >  
> > +		/* Check may be lockless but that's ok as we recheck later */
> >  		if (!PageLRU(page))
> >  			continue;
> >  
> >  		/*
> > -		 * PageLRU is set, and lru_lock excludes isolation,
> > -		 * splitting and collapsing (collapsing has already
> > -		 * happened if PageLRU is set).
> > +		 * PageLRU is set. lru_lock normally excludes isolation
> > +		 * splitting and collapsing (collapsing has already happened
> > +		 * if PageLRU is set) but the lock is not necessarily taken
> > +		 * here and it is wasteful to take it just to check transhuge.
> > +		 * Check transhuge without lock and skip if it's either a
> > +		 * transhuge or hugetlbfs page.
> >  		 */
> >  		if (PageTransHuge(page)) {
> > +			if (!locked)
> > +				goto next_pageblock;
> 
> Why skip all pages in a pageblock if !locked?
> Shouldn't we add some comment?
> 

The comment is above the block already. The lru_lock normally excludes
isolation and splitting. If we do not hold the hold, it's not safe to
call compound_order so instead we skip the entire pageblock.
diff mbox

Patch

diff --git a/mm/compaction.c b/mm/compaction.c
index 6b55491..a6068ff 100644
--- a/mm/compaction.c
+++ b/mm/compaction.c
@@ -50,6 +50,11 @@  static inline bool migrate_async_suitable(int migratetype)
 	return is_migrate_cma(migratetype) || migratetype == MIGRATE_MOVABLE;
 }
 
+static inline bool should_release_lock(spinlock_t *lock)
+{
+	return need_resched() || spin_is_contended(lock);
+}
+
 /*
  * Compaction requires the taking of some coarse locks that are potentially
  * very heavily contended. Check if the process needs to be scheduled or
@@ -62,7 +67,7 @@  static inline bool migrate_async_suitable(int migratetype)
 static bool compact_checklock_irqsave(spinlock_t *lock, unsigned long *flags,
 				      bool locked, struct compact_control *cc)
 {
-	if (need_resched() || spin_is_contended(lock)) {
+	if (should_release_lock(lock)) {
 		if (locked) {
 			spin_unlock_irqrestore(lock, *flags);
 			locked = false;
@@ -327,7 +332,7 @@  isolate_migratepages_range(struct zone *zone, struct compact_control *cc,
 	isolate_mode_t mode = 0;
 	struct lruvec *lruvec;
 	unsigned long flags;
-	bool locked;
+	bool locked = false;
 
 	/*
 	 * Ensure that there are not too many pages isolated from the LRU
@@ -347,23 +352,17 @@  isolate_migratepages_range(struct zone *zone, struct compact_control *cc,
 
 	/* Time to isolate some pages for migration */
 	cond_resched();
-	spin_lock_irqsave(&zone->lru_lock, flags);
-	locked = true;
 	for (; low_pfn < end_pfn; low_pfn++) {
 		struct page *page;
 
 		/* give a chance to irqs before checking need_resched() */
-		if (!((low_pfn+1) % SWAP_CLUSTER_MAX)) {
-			spin_unlock_irqrestore(&zone->lru_lock, flags);
-			locked = false;
+		if (locked && !((low_pfn+1) % SWAP_CLUSTER_MAX)) {
+			if (should_release_lock(&zone->lru_lock)) {
+				spin_unlock_irqrestore(&zone->lru_lock, flags);
+				locked = false;
+			}
 		}
 
-		/* Check if it is ok to still hold the lock */
-		locked = compact_checklock_irqsave(&zone->lru_lock, &flags,
-								locked, cc);
-		if (!locked || fatal_signal_pending(current))
-			break;
-
 		/*
 		 * migrate_pfn does not necessarily start aligned to a
 		 * pageblock. Ensure that pfn_valid is called when moving
@@ -403,21 +402,38 @@  isolate_migratepages_range(struct zone *zone, struct compact_control *cc,
 		pageblock_nr = low_pfn >> pageblock_order;
 		if (!cc->sync && last_pageblock_nr != pageblock_nr &&
 		    !migrate_async_suitable(get_pageblock_migratetype(page))) {
-			low_pfn += pageblock_nr_pages;
-			low_pfn = ALIGN(low_pfn, pageblock_nr_pages) - 1;
-			last_pageblock_nr = pageblock_nr;
-			continue;
+			goto next_pageblock;
 		}
 
+		/* Check may be lockless but that's ok as we recheck later */
 		if (!PageLRU(page))
 			continue;
 
 		/*
-		 * PageLRU is set, and lru_lock excludes isolation,
-		 * splitting and collapsing (collapsing has already
-		 * happened if PageLRU is set).
+		 * PageLRU is set. lru_lock normally excludes isolation
+		 * splitting and collapsing (collapsing has already happened
+		 * if PageLRU is set) but the lock is not necessarily taken
+		 * here and it is wasteful to take it just to check transhuge.
+		 * Check transhuge without lock and skip if it's either a
+		 * transhuge or hugetlbfs page.
 		 */
 		if (PageTransHuge(page)) {
+			if (!locked)
+				goto next_pageblock;
+			low_pfn += (1 << compound_order(page)) - 1;
+			continue;
+		}
+
+		/* Check if it is ok to still hold the lock */
+		locked = compact_checklock_irqsave(&zone->lru_lock, &flags,
+								locked, cc);
+		if (!locked || fatal_signal_pending(current))
+			break;
+
+		/* Recheck PageLRU and PageTransHuge under lock */
+		if (!PageLRU(page))
+			continue;
+		if (PageTransHuge(page)) {
 			low_pfn += (1 << compound_order(page)) - 1;
 			continue;
 		}
@@ -444,6 +460,13 @@  isolate_migratepages_range(struct zone *zone, struct compact_control *cc,
 			++low_pfn;
 			break;
 		}
+
+		continue;
+
+next_pageblock:
+		low_pfn += pageblock_nr_pages;
+		low_pfn = ALIGN(low_pfn, pageblock_nr_pages) - 1;
+		last_pageblock_nr = pageblock_nr;
 	}
 
 	acct_isolated(zone, locked, cc);