diff mbox

[1/1] UBUNTU: SAUCE: add tracing for user initiated readahead requests

Message ID 1280393201-27874-2-git-send-email-apw@canonical.com
State Accepted
Delegated to: Leann Ogasawara
Headers show

Commit Message

Andy Whitcroft July 29, 2010, 8:46 a.m. UTC
Track pages which undergo readahead and for each record which were
actually consumed, via either read or faulted into a map.  This allows
userspace readahead applications (such as ureadahead) to track which
pages in core at the end of a boot are actually required and generate an
optimal readahead pack.  It also allows pack adjustment and optimisation
in parallel with readahead, allowing the pack to evolve to be accurate
as userspace paths change.  The status of the pages are reported back via
the mincore() call using a newly allocated bit.

Signed-off-by: Andy Whitcroft <apw@canonical.com>
---
 include/linux/page-flags.h |    3 +++
 mm/filemap.c               |    3 +++
 mm/memory.c                |    7 ++++++-
 mm/mincore.c               |    2 ++
 mm/readahead.c             |    1 +
 5 files changed, 15 insertions(+), 1 deletions(-)

Comments

Stefan Bader July 29, 2010, 9:20 a.m. UTC | #1
On 07/29/2010 10:46 AM, Andy Whitcroft wrote:
> Track pages which undergo readahead and for each record which were
> actually consumed, via either read or faulted into a map.  This allows
> userspace readahead applications (such as ureadahead) to track which
> pages in core at the end of a boot are actually required and generate an
> optimal readahead pack.  It also allows pack adjustment and optimisation
> in parallel with readahead, allowing the pack to evolve to be accurate
> as userspace paths change.  The status of the pages are reported back via
> the mincore() call using a newly allocated bit.
> 
> Signed-off-by: Andy Whitcroft <apw@canonical.com>
> ---
>  include/linux/page-flags.h |    3 +++
>  mm/filemap.c               |    3 +++
>  mm/memory.c                |    7 ++++++-
>  mm/mincore.c               |    2 ++
>  mm/readahead.c             |    1 +
>  5 files changed, 15 insertions(+), 1 deletions(-)
> 
> diff --git a/include/linux/page-flags.h b/include/linux/page-flags.h
> index 5b59f35..89dc94f 100644
> --- a/include/linux/page-flags.h
> +++ b/include/linux/page-flags.h
> @@ -108,6 +108,7 @@ enum pageflags {
>  #ifdef CONFIG_MEMORY_FAILURE
>  	PG_hwpoison,		/* hardware poisoned page. Don't touch */
>  #endif
> +	PG_readaheadunused,	/* user oriented readahead as yet unused*/
>  	__NR_PAGEFLAGS,
>  
>  	/* Filesystems */
> @@ -239,6 +240,8 @@ PAGEFLAG(MappedToDisk, mappedtodisk)
>  PAGEFLAG(Reclaim, reclaim) TESTCLEARFLAG(Reclaim, reclaim)
>  PAGEFLAG(Readahead, reclaim)		/* Reminder to do async read-ahead */
>  
> +PAGEFLAG(ReadaheadUnused, readaheadunused)
> +
>  #ifdef CONFIG_HIGHMEM
>  /*
>   * Must use a macro here due to header dependency issues. page_zone() is not
> diff --git a/mm/filemap.c b/mm/filemap.c
> index 20e5642..26e5e15 100644
> --- a/mm/filemap.c
> +++ b/mm/filemap.c
> @@ -1192,6 +1192,9 @@ int file_read_actor(read_descriptor_t *desc, struct page *page,
>  	if (size > count)
>  		size = count;
>  
> +	if (PageReadaheadUnused(page))
> +		ClearPageReadaheadUnused(page);
> +
>  	/*
>  	 * Faults on the destination of a read are common, so do it before
>  	 * taking the kmap.
> diff --git a/mm/memory.c b/mm/memory.c
> index 119b7cc..97ca21b 100644
> --- a/mm/memory.c
> +++ b/mm/memory.c
> @@ -2865,10 +2865,15 @@ static int __do_fault(struct mm_struct *mm, struct vm_area_struct *vma,
>  	else
>  		VM_BUG_ON(!PageLocked(vmf.page));
>  
> +	page = vmf.page;
> +
> +	/* Mark the page as used on fault. */
> +	if (PageReadaheadUnused(page))
> +		ClearPageReadaheadUnused(page);
> +
>  	/*
>  	 * Should we do an early C-O-W break?
>  	 */
> -	page = vmf.page;
>  	if (flags & FAULT_FLAG_WRITE) {
>  		if (!(vma->vm_flags & VM_SHARED)) {
>  			anon = 1;
> diff --git a/mm/mincore.c b/mm/mincore.c
> index 9ac42dc..a4e573a 100644
> --- a/mm/mincore.c
> +++ b/mm/mincore.c
> @@ -77,6 +77,8 @@ static unsigned char mincore_page(struct address_space *mapping, pgoff_t pgoff)
>  	page = find_get_page(mapping, pgoff);
>  	if (page) {
>  		present = PageUptodate(page);
> +		if (present)
> +			present |= (PageReadaheadUnused(page) << 7);
>  		page_cache_release(page);
>  	}
>  
> diff --git a/mm/readahead.c b/mm/readahead.c
> index 77506a2..6948b92 100644
> --- a/mm/readahead.c
> +++ b/mm/readahead.c
> @@ -181,6 +181,7 @@ __do_page_cache_readahead(struct address_space *mapping, struct file *filp,
>  		list_add(&page->lru, &page_pool);
>  		if (page_idx == nr_to_read - lookahead_size)
>  			SetPageReadahead(page);
> +		SetPageReadaheadUnused(page);
>  		ret++;
>  	}
>  

I think it looks good. Just out of interest, the last hunk sounds a bit like it
only sets PageReadahead on one page while PageREadaheadUnused is set on all of
them. Which seems a bit odd.

-Stefan
Andy Whitcroft July 29, 2010, 9:41 a.m. UTC | #2
On Thu, Jul 29, 2010 at 11:20:40AM +0200, Stefan Bader wrote:
> On 07/29/2010 10:46 AM, Andy Whitcroft wrote:
> > Track pages which undergo readahead and for each record which were
> > actually consumed, via either read or faulted into a map.  This allows
> > userspace readahead applications (such as ureadahead) to track which
> > pages in core at the end of a boot are actually required and generate an
> > optimal readahead pack.  It also allows pack adjustment and optimisation
> > in parallel with readahead, allowing the pack to evolve to be accurate
> > as userspace paths change.  The status of the pages are reported back via
> > the mincore() call using a newly allocated bit.
> > 
> > Signed-off-by: Andy Whitcroft <apw@canonical.com>
> > ---
> >  include/linux/page-flags.h |    3 +++
> >  mm/filemap.c               |    3 +++
> >  mm/memory.c                |    7 ++++++-
> >  mm/mincore.c               |    2 ++
> >  mm/readahead.c             |    1 +
> >  5 files changed, 15 insertions(+), 1 deletions(-)
> > 
> > diff --git a/include/linux/page-flags.h b/include/linux/page-flags.h
> > index 5b59f35..89dc94f 100644
> > --- a/include/linux/page-flags.h
> > +++ b/include/linux/page-flags.h
> > @@ -108,6 +108,7 @@ enum pageflags {
> >  #ifdef CONFIG_MEMORY_FAILURE
> >  	PG_hwpoison,		/* hardware poisoned page. Don't touch */
> >  #endif
> > +	PG_readaheadunused,	/* user oriented readahead as yet unused*/
> >  	__NR_PAGEFLAGS,
> >  
> >  	/* Filesystems */
> > @@ -239,6 +240,8 @@ PAGEFLAG(MappedToDisk, mappedtodisk)
> >  PAGEFLAG(Reclaim, reclaim) TESTCLEARFLAG(Reclaim, reclaim)
> >  PAGEFLAG(Readahead, reclaim)		/* Reminder to do async read-ahead */
> >  
> > +PAGEFLAG(ReadaheadUnused, readaheadunused)
> > +
> >  #ifdef CONFIG_HIGHMEM
> >  /*
> >   * Must use a macro here due to header dependency issues. page_zone() is not
> > diff --git a/mm/filemap.c b/mm/filemap.c
> > index 20e5642..26e5e15 100644
> > --- a/mm/filemap.c
> > +++ b/mm/filemap.c
> > @@ -1192,6 +1192,9 @@ int file_read_actor(read_descriptor_t *desc, struct page *page,
> >  	if (size > count)
> >  		size = count;
> >  
> > +	if (PageReadaheadUnused(page))
> > +		ClearPageReadaheadUnused(page);
> > +
> >  	/*
> >  	 * Faults on the destination of a read are common, so do it before
> >  	 * taking the kmap.
> > diff --git a/mm/memory.c b/mm/memory.c
> > index 119b7cc..97ca21b 100644
> > --- a/mm/memory.c
> > +++ b/mm/memory.c
> > @@ -2865,10 +2865,15 @@ static int __do_fault(struct mm_struct *mm, struct vm_area_struct *vma,
> >  	else
> >  		VM_BUG_ON(!PageLocked(vmf.page));
> >  
> > +	page = vmf.page;
> > +
> > +	/* Mark the page as used on fault. */
> > +	if (PageReadaheadUnused(page))
> > +		ClearPageReadaheadUnused(page);
> > +
> >  	/*
> >  	 * Should we do an early C-O-W break?
> >  	 */
> > -	page = vmf.page;
> >  	if (flags & FAULT_FLAG_WRITE) {
> >  		if (!(vma->vm_flags & VM_SHARED)) {
> >  			anon = 1;
> > diff --git a/mm/mincore.c b/mm/mincore.c
> > index 9ac42dc..a4e573a 100644
> > --- a/mm/mincore.c
> > +++ b/mm/mincore.c
> > @@ -77,6 +77,8 @@ static unsigned char mincore_page(struct address_space *mapping, pgoff_t pgoff)
> >  	page = find_get_page(mapping, pgoff);
> >  	if (page) {
> >  		present = PageUptodate(page);
> > +		if (present)
> > +			present |= (PageReadaheadUnused(page) << 7);
> >  		page_cache_release(page);
> >  	}
> >  
> > diff --git a/mm/readahead.c b/mm/readahead.c
> > index 77506a2..6948b92 100644
> > --- a/mm/readahead.c
> > +++ b/mm/readahead.c
> > @@ -181,6 +181,7 @@ __do_page_cache_readahead(struct address_space *mapping, struct file *filp,
> >  		list_add(&page->lru, &page_pool);
> >  		if (page_idx == nr_to_read - lookahead_size)
> >  			SetPageReadahead(page);
> > +		SetPageReadaheadUnused(page);
> >  		ret++;
> >  	}
> >  
> 
> I think it looks good. Just out of interest, the last hunk sounds a bit like it
> only sets PageReadahead on one page while PageREadaheadUnused is set on all of
> them. Which seems a bit odd.

Thats because the PageReadahead flag is a marker, a pointer into the
memory space, we read the block marked with that for real we know its
time to schedule more readahead as we are close to consume all of the
previous readhead.

-apw
Stefan Bader July 29, 2010, 11:48 a.m. UTC | #3
On 07/29/2010 11:41 AM, Andy Whitcroft wrote:
> On Thu, Jul 29, 2010 at 11:20:40AM +0200, Stefan Bader wrote:
>> On 07/29/2010 10:46 AM, Andy Whitcroft wrote:
>>> Track pages which undergo readahead and for each record which were
>>> actually consumed, via either read or faulted into a map.  This allows
>>> userspace readahead applications (such as ureadahead) to track which
>>> pages in core at the end of a boot are actually required and generate an
>>> optimal readahead pack.  It also allows pack adjustment and optimisation
>>> in parallel with readahead, allowing the pack to evolve to be accurate
>>> as userspace paths change.  The status of the pages are reported back via
>>> the mincore() call using a newly allocated bit.
>>>
>>> Signed-off-by: Andy Whitcroft <apw@canonical.com>
>>> ---
>>>  include/linux/page-flags.h |    3 +++
>>>  mm/filemap.c               |    3 +++
>>>  mm/memory.c                |    7 ++++++-
>>>  mm/mincore.c               |    2 ++
>>>  mm/readahead.c             |    1 +
>>>  5 files changed, 15 insertions(+), 1 deletions(-)
>>>
>>> diff --git a/include/linux/page-flags.h b/include/linux/page-flags.h
>>> index 5b59f35..89dc94f 100644
>>> --- a/include/linux/page-flags.h
>>> +++ b/include/linux/page-flags.h
>>> @@ -108,6 +108,7 @@ enum pageflags {
>>>  #ifdef CONFIG_MEMORY_FAILURE
>>>  	PG_hwpoison,		/* hardware poisoned page. Don't touch */
>>>  #endif
>>> +	PG_readaheadunused,	/* user oriented readahead as yet unused*/
>>>  	__NR_PAGEFLAGS,
>>>  
>>>  	/* Filesystems */
>>> @@ -239,6 +240,8 @@ PAGEFLAG(MappedToDisk, mappedtodisk)
>>>  PAGEFLAG(Reclaim, reclaim) TESTCLEARFLAG(Reclaim, reclaim)
>>>  PAGEFLAG(Readahead, reclaim)		/* Reminder to do async read-ahead */
>>>  
>>> +PAGEFLAG(ReadaheadUnused, readaheadunused)
>>> +
>>>  #ifdef CONFIG_HIGHMEM
>>>  /*
>>>   * Must use a macro here due to header dependency issues. page_zone() is not
>>> diff --git a/mm/filemap.c b/mm/filemap.c
>>> index 20e5642..26e5e15 100644
>>> --- a/mm/filemap.c
>>> +++ b/mm/filemap.c
>>> @@ -1192,6 +1192,9 @@ int file_read_actor(read_descriptor_t *desc, struct page *page,
>>>  	if (size > count)
>>>  		size = count;
>>>  
>>> +	if (PageReadaheadUnused(page))
>>> +		ClearPageReadaheadUnused(page);
>>> +
>>>  	/*
>>>  	 * Faults on the destination of a read are common, so do it before
>>>  	 * taking the kmap.
>>> diff --git a/mm/memory.c b/mm/memory.c
>>> index 119b7cc..97ca21b 100644
>>> --- a/mm/memory.c
>>> +++ b/mm/memory.c
>>> @@ -2865,10 +2865,15 @@ static int __do_fault(struct mm_struct *mm, struct vm_area_struct *vma,
>>>  	else
>>>  		VM_BUG_ON(!PageLocked(vmf.page));
>>>  
>>> +	page = vmf.page;
>>> +
>>> +	/* Mark the page as used on fault. */
>>> +	if (PageReadaheadUnused(page))
>>> +		ClearPageReadaheadUnused(page);
>>> +
>>>  	/*
>>>  	 * Should we do an early C-O-W break?
>>>  	 */
>>> -	page = vmf.page;
>>>  	if (flags & FAULT_FLAG_WRITE) {
>>>  		if (!(vma->vm_flags & VM_SHARED)) {
>>>  			anon = 1;
>>> diff --git a/mm/mincore.c b/mm/mincore.c
>>> index 9ac42dc..a4e573a 100644
>>> --- a/mm/mincore.c
>>> +++ b/mm/mincore.c
>>> @@ -77,6 +77,8 @@ static unsigned char mincore_page(struct address_space *mapping, pgoff_t pgoff)
>>>  	page = find_get_page(mapping, pgoff);
>>>  	if (page) {
>>>  		present = PageUptodate(page);
>>> +		if (present)
>>> +			present |= (PageReadaheadUnused(page) << 7);
>>>  		page_cache_release(page);
>>>  	}
>>>  
>>> diff --git a/mm/readahead.c b/mm/readahead.c
>>> index 77506a2..6948b92 100644
>>> --- a/mm/readahead.c
>>> +++ b/mm/readahead.c
>>> @@ -181,6 +181,7 @@ __do_page_cache_readahead(struct address_space *mapping, struct file *filp,
>>>  		list_add(&page->lru, &page_pool);
>>>  		if (page_idx == nr_to_read - lookahead_size)
>>>  			SetPageReadahead(page);
>>> +		SetPageReadaheadUnused(page);
>>>  		ret++;
>>>  	}
>>>  
>>
>> I think it looks good. Just out of interest, the last hunk sounds a bit like it
>> only sets PageReadahead on one page while PageREadaheadUnused is set on all of
>> them. Which seems a bit odd.
> 
> Thats because the PageReadahead flag is a marker, a pointer into the
> memory space, we read the block marked with that for real we know its
> time to schedule more readahead as we are close to consume all of the
> previous readhead.
> 
> -apw

Ah, thanks for the explanation. It sounded rather like a marker which pages came
from readahead. But it makes sense and probably is hard to find a good name for it.


Acked-by: Stefan Bader <stefan.bader@canonical.com>
diff mbox

Patch

diff --git a/include/linux/page-flags.h b/include/linux/page-flags.h
index 5b59f35..89dc94f 100644
--- a/include/linux/page-flags.h
+++ b/include/linux/page-flags.h
@@ -108,6 +108,7 @@  enum pageflags {
 #ifdef CONFIG_MEMORY_FAILURE
 	PG_hwpoison,		/* hardware poisoned page. Don't touch */
 #endif
+	PG_readaheadunused,	/* user oriented readahead as yet unused*/
 	__NR_PAGEFLAGS,
 
 	/* Filesystems */
@@ -239,6 +240,8 @@  PAGEFLAG(MappedToDisk, mappedtodisk)
 PAGEFLAG(Reclaim, reclaim) TESTCLEARFLAG(Reclaim, reclaim)
 PAGEFLAG(Readahead, reclaim)		/* Reminder to do async read-ahead */
 
+PAGEFLAG(ReadaheadUnused, readaheadunused)
+
 #ifdef CONFIG_HIGHMEM
 /*
  * Must use a macro here due to header dependency issues. page_zone() is not
diff --git a/mm/filemap.c b/mm/filemap.c
index 20e5642..26e5e15 100644
--- a/mm/filemap.c
+++ b/mm/filemap.c
@@ -1192,6 +1192,9 @@  int file_read_actor(read_descriptor_t *desc, struct page *page,
 	if (size > count)
 		size = count;
 
+	if (PageReadaheadUnused(page))
+		ClearPageReadaheadUnused(page);
+
 	/*
 	 * Faults on the destination of a read are common, so do it before
 	 * taking the kmap.
diff --git a/mm/memory.c b/mm/memory.c
index 119b7cc..97ca21b 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -2865,10 +2865,15 @@  static int __do_fault(struct mm_struct *mm, struct vm_area_struct *vma,
 	else
 		VM_BUG_ON(!PageLocked(vmf.page));
 
+	page = vmf.page;
+
+	/* Mark the page as used on fault. */
+	if (PageReadaheadUnused(page))
+		ClearPageReadaheadUnused(page);
+
 	/*
 	 * Should we do an early C-O-W break?
 	 */
-	page = vmf.page;
 	if (flags & FAULT_FLAG_WRITE) {
 		if (!(vma->vm_flags & VM_SHARED)) {
 			anon = 1;
diff --git a/mm/mincore.c b/mm/mincore.c
index 9ac42dc..a4e573a 100644
--- a/mm/mincore.c
+++ b/mm/mincore.c
@@ -77,6 +77,8 @@  static unsigned char mincore_page(struct address_space *mapping, pgoff_t pgoff)
 	page = find_get_page(mapping, pgoff);
 	if (page) {
 		present = PageUptodate(page);
+		if (present)
+			present |= (PageReadaheadUnused(page) << 7);
 		page_cache_release(page);
 	}
 
diff --git a/mm/readahead.c b/mm/readahead.c
index 77506a2..6948b92 100644
--- a/mm/readahead.c
+++ b/mm/readahead.c
@@ -181,6 +181,7 @@  __do_page_cache_readahead(struct address_space *mapping, struct file *filp,
 		list_add(&page->lru, &page_pool);
 		if (page_idx == nr_to_read - lookahead_size)
 			SetPageReadahead(page);
+		SetPageReadaheadUnused(page);
 		ret++;
 	}