Patchwork ehea: Add hugepage detection

login
register
mail settings
Submitter Thomas Klein
Date Oct. 24, 2008, 11:07 a.m.
Message ID <200810241307.31072.>
Download mbox | patch
Permalink /patch/5659/
State Not Applicable
Delegated to: Benjamin Herrenschmidt
Headers show

Comments

Thomas Klein - Oct. 24, 2008, 11:07 a.m.
16GB hugepages may not be part of a memory region (firmware restriction). This patch
modifies the walk_memory_resource callback fn to filter hugepages and add only standard
memory to the busmap which is later on used for MR registration.

Signed-off-by: Thomas Klein <tklein@de.ibm.com>
---
This patch is based on davem's net-2.6.git.
Dave Hansen - Oct. 24, 2008, 6:55 p.m.
On Fri, 2008-10-24 at 13:07 +0200, Thomas Klein wrote:
> 16GB hugepages may not be part of a memory region (firmware restriction). This patch
> modifies the walk_memory_resource callback fn to filter hugepages and add only standard
> memory to the busmap which is later on used for MR registration.

Does this support a case where a userspace app is reading network
packets into a 16GB page backed area?  I think you need to elaborate on
what kind of memory you need to have registered in these memory regions.
It's hard to review what you've done here otherwise.

> --- linux-2.6.27/drivers/net/ehea/ehea_qmr.c	2008-10-24 09:29:19.000000000 +0200
> +++ patched_kernel/drivers/net/ehea/ehea_qmr.c	2008-10-24 09:45:15.000000000 +0200
> @@ -636,6 +636,9 @@ static int ehea_update_busmap(unsigned l
>  {
>  	unsigned long i, start_section, end_section;
> 
> +	if (!pgnum)
> +		return 0;

This probably needs a comment.  It's not obvious what it is doing.

>  	if (!ehea_bmap) {
>  		ehea_bmap = kzalloc(sizeof(struct ehea_bmap), GFP_KERNEL);
>  		if (!ehea_bmap)
> @@ -692,10 +695,47 @@ int ehea_rem_sect_bmap(unsigned long pfn
>  	return ret;
>  }
> 
> -static int ehea_create_busmap_callback(unsigned long pfn,
> -				       unsigned long nr_pages, void *arg)
> +static int ehea_is_hugepage(unsigned long pfn)
> +{
> +	return ((((pfn << PAGE_SHIFT) & (EHEA_HUGEPAGE_SIZE - 1)) == 0)
> +		&& (compound_order(pfn_to_page(pfn)) + PAGE_SHIFT
> +		    == EHEA_HUGEPAGESHIFT) );
> +}

Whoa.  That's dense.  Can you actually read that in less than 5 minutes?
Seriously.

I'm not sure what else you use EHEA_HUGEPAGE_SIZE for or if this gets
duplicated, but this would look nicer if you just had a:

#define EHEA_HUGEPAGE_PFN_MASK ((EHEA_HUGEPAGE_SIZE - 1) >> PAGE_SHIFT)

	if (pfn & EHEA_HUGEPAGE_PFN_MASK)
		return 0;

Or, with no new macro:

	if ((pfn << PAGE_SHIFT) & (EHEA_HUGEPAGE_SIZE - 1) != 0)
		return 0;

	page_order = compound_order(pfn_to_page(pfn));
	if (page_order + PAGE_SHIFT != EHEA_HUGEPAGESHIFT)
		return 0;
	return 1;
}

Please break that up into something that is truly readable.  gcc will
generate the exact same code.

> +static int ehea_create_busmap_callback(unsigned long initial_pfn,
> +				       unsigned long total_nr_pages, void *arg)
>  {
> -	return ehea_update_busmap(pfn, nr_pages, EHEA_BUSMAP_ADD_SECT);
> +	int ret;
> +	unsigned long pfn, start_pfn, end_pfn, nr_pages;
> +
> +	if ((total_nr_pages * PAGE_SIZE) < EHEA_HUGEPAGE_SIZE)
> +		return ehea_update_busmap(initial_pfn, total_nr_pages,
> +					  EHEA_BUSMAP_ADD_SECT);
> +
> +	/* Given chunk is >= 16GB -> check for hugepages */
> +	start_pfn = initial_pfn;
> +	end_pfn = initial_pfn + total_nr_pages;
> +	pfn = start_pfn;
> +
> +	while (pfn < end_pfn) {
> +		if (ehea_is_hugepage(pfn)) {
> +			/* Add mem found in front of the hugepage */
> +			nr_pages = pfn - start_pfn;
> +			ret = ehea_update_busmap(start_pfn, nr_pages,
> +						 EHEA_BUSMAP_ADD_SECT);
> +			if (ret)
> +				return ret;
> +
> +			/* Skip the hugepage */
> +			pfn += (EHEA_HUGEPAGE_SIZE / PAGE_SIZE);
> +			start_pfn = pfn;
> +		} else
> +			pfn += (EHEA_SECTSIZE / PAGE_SIZE);
> +	}
> +
> +	/* Add mem found behind the hugepage(s)  */
> +	nr_pages = pfn - start_pfn;
> +	return ehea_update_busmap(start_pfn, nr_pages, EHEA_BUSMAP_ADD_SECT);
>  }
> 
>  int ehea_create_busmap(void)
> diff -Nurp -X dontdiff linux-2.6.27/drivers/net/ehea/ehea_qmr.h patched_kernel/drivers/net/ehea/ehea_qmr.h
> --- linux-2.6.27/drivers/net/ehea/ehea_qmr.h	2008-10-24 09:29:19.000000000 +0200
> +++ patched_kernel/drivers/net/ehea/ehea_qmr.h	2008-10-24 09:45:15.000000000 +0200
> @@ -40,6 +40,8 @@
>  #define EHEA_PAGESIZE          (1UL << EHEA_PAGESHIFT)
>  #define EHEA_SECTSIZE          (1UL << 24)
>  #define EHEA_PAGES_PER_SECTION (EHEA_SECTSIZE >> EHEA_PAGESHIFT)
> +#define EHEA_HUGEPAGESHIFT     34
> +#define EHEA_HUGEPAGE_SIZE     (1UL << EHEA_HUGEPAGESHIFT)

I'm a bit worried that you're basically duplicating hugetlb.h here.  Why
not just use the existing 16GB page macros?  While you're at it please
expand these to give some more useful macros so you don't have to do
arithmetic on them in the code as much.

#define EHEA_SECT_NR_PAGES (EHEA_SECTSIZE / PAGE_SIZE)

for instance.

-- Dave

--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Patch

diff -Nurp -X dontdiff linux-2.6.27/drivers/net/ehea/ehea.h patched_kernel/drivers/net/ehea/ehea.h
--- linux-2.6.27/drivers/net/ehea/ehea.h	2008-10-24 09:29:19.000000000 +0200
+++ patched_kernel/drivers/net/ehea/ehea.h	2008-10-24 09:45:15.000000000 +0200
@@ -40,7 +40,7 @@ 
 #include <asm/io.h>
 
 #define DRV_NAME	"ehea"
-#define DRV_VERSION	"EHEA_0094"
+#define DRV_VERSION	"EHEA_0095"
 
 /* eHEA capability flags */
 #define DLPAR_PORT_ADD_REM 1
diff -Nurp -X dontdiff linux-2.6.27/drivers/net/ehea/ehea_qmr.c patched_kernel/drivers/net/ehea/ehea_qmr.c
--- linux-2.6.27/drivers/net/ehea/ehea_qmr.c	2008-10-24 09:29:19.000000000 +0200
+++ patched_kernel/drivers/net/ehea/ehea_qmr.c	2008-10-24 09:45:15.000000000 +0200
@@ -636,6 +636,9 @@  static int ehea_update_busmap(unsigned l
 {
 	unsigned long i, start_section, end_section;
 
+	if (!pgnum)
+		return 0;
+
 	if (!ehea_bmap) {
 		ehea_bmap = kzalloc(sizeof(struct ehea_bmap), GFP_KERNEL);
 		if (!ehea_bmap)
@@ -692,10 +695,47 @@  int ehea_rem_sect_bmap(unsigned long pfn
 	return ret;
 }
 
-static int ehea_create_busmap_callback(unsigned long pfn,
-				       unsigned long nr_pages, void *arg)
+static int ehea_is_hugepage(unsigned long pfn)
+{
+	return ((((pfn << PAGE_SHIFT) & (EHEA_HUGEPAGE_SIZE - 1)) == 0)
+		&& (compound_order(pfn_to_page(pfn)) + PAGE_SHIFT
+		    == EHEA_HUGEPAGESHIFT) );
+}
+
+static int ehea_create_busmap_callback(unsigned long initial_pfn,
+				       unsigned long total_nr_pages, void *arg)
 {
-	return ehea_update_busmap(pfn, nr_pages, EHEA_BUSMAP_ADD_SECT);
+	int ret;
+	unsigned long pfn, start_pfn, end_pfn, nr_pages;
+
+	if ((total_nr_pages * PAGE_SIZE) < EHEA_HUGEPAGE_SIZE)
+		return ehea_update_busmap(initial_pfn, total_nr_pages,
+					  EHEA_BUSMAP_ADD_SECT);
+
+	/* Given chunk is >= 16GB -> check for hugepages */
+	start_pfn = initial_pfn;
+	end_pfn = initial_pfn + total_nr_pages;
+	pfn = start_pfn;
+
+	while (pfn < end_pfn) {
+		if (ehea_is_hugepage(pfn)) {
+			/* Add mem found in front of the hugepage */
+			nr_pages = pfn - start_pfn;
+			ret = ehea_update_busmap(start_pfn, nr_pages,
+						 EHEA_BUSMAP_ADD_SECT);
+			if (ret)
+				return ret;
+
+			/* Skip the hugepage */
+			pfn += (EHEA_HUGEPAGE_SIZE / PAGE_SIZE);
+			start_pfn = pfn;
+		} else
+			pfn += (EHEA_SECTSIZE / PAGE_SIZE);
+	}
+
+	/* Add mem found behind the hugepage(s)  */
+	nr_pages = pfn - start_pfn;
+	return ehea_update_busmap(start_pfn, nr_pages, EHEA_BUSMAP_ADD_SECT);
 }
 
 int ehea_create_busmap(void)
diff -Nurp -X dontdiff linux-2.6.27/drivers/net/ehea/ehea_qmr.h patched_kernel/drivers/net/ehea/ehea_qmr.h
--- linux-2.6.27/drivers/net/ehea/ehea_qmr.h	2008-10-24 09:29:19.000000000 +0200
+++ patched_kernel/drivers/net/ehea/ehea_qmr.h	2008-10-24 09:45:15.000000000 +0200
@@ -40,6 +40,8 @@ 
 #define EHEA_PAGESIZE          (1UL << EHEA_PAGESHIFT)
 #define EHEA_SECTSIZE          (1UL << 24)
 #define EHEA_PAGES_PER_SECTION (EHEA_SECTSIZE >> EHEA_PAGESHIFT)
+#define EHEA_HUGEPAGESHIFT     34
+#define EHEA_HUGEPAGE_SIZE     (1UL << EHEA_HUGEPAGESHIFT)
 
 #if ((1UL << SECTION_SIZE_BITS) < EHEA_SECTSIZE)
 #error eHEA module cannot work if kernel sectionsize < ehea sectionsize