diff mbox

sparc64: Add 16GB hugepage support

Message ID 1495672233-116815-1-git-send-email-nitin.m.gupta@oracle.com
State Changes Requested
Delegated to: David Miller
Headers show

Commit Message

Nitin Gupta May 25, 2017, 12:29 a.m. UTC
Orabug: 25362942

Signed-off-by: Nitin Gupta <nitin.m.gupta@oracle.com>
---
 arch/sparc/include/asm/page_64.h    |  3 +-
 arch/sparc/include/asm/pgtable_64.h |  5 +++
 arch/sparc/include/asm/tsb.h        | 35 +++++++++++++++++-
 arch/sparc/kernel/tsb.S             |  2 +-
 arch/sparc/mm/hugetlbpage.c         | 74 ++++++++++++++++++++++++++-----------
 arch/sparc/mm/init_64.c             | 41 ++++++++++++++++----
 6 files changed, 128 insertions(+), 32 deletions(-)

Comments

Paul Gortmaker May 25, 2017, 3:34 a.m. UTC | #1
[[PATCH] sparc64: Add 16GB hugepage support] On 24/05/2017 (Wed 17:29) Nitin Gupta wrote:

> Orabug: 25362942
> 
> Signed-off-by: Nitin Gupta <nitin.m.gupta@oracle.com>

If this wasn't an accidental git send-email misfire, then there should
be a long log indicating the use case, the perforamnce increase, the
testing that was done, etc. etc. 

Normally I'd not notice but since I was Cc'd I figured it was worth a
mention -- for example the vendor ID above doesn't mean a thing to
all the rest of us, hence why I suspect it was a git send-email misfire;
sadly, I think we've all accidentally done that at least once....

Paul.
--

> ---
>  arch/sparc/include/asm/page_64.h    |  3 +-
>  arch/sparc/include/asm/pgtable_64.h |  5 +++
>  arch/sparc/include/asm/tsb.h        | 35 +++++++++++++++++-
>  arch/sparc/kernel/tsb.S             |  2 +-
>  arch/sparc/mm/hugetlbpage.c         | 74 ++++++++++++++++++++++++++-----------
>  arch/sparc/mm/init_64.c             | 41 ++++++++++++++++----
>  6 files changed, 128 insertions(+), 32 deletions(-)
> 
> diff --git a/arch/sparc/include/asm/page_64.h b/arch/sparc/include/asm/page_64.h
> index 5961b2d..8ee1f97 100644
> --- a/arch/sparc/include/asm/page_64.h
> +++ b/arch/sparc/include/asm/page_64.h
> @@ -17,6 +17,7 @@
>  
>  #define HPAGE_SHIFT		23
>  #define REAL_HPAGE_SHIFT	22
> +#define HPAGE_16GB_SHIFT	34
>  #define HPAGE_2GB_SHIFT		31
>  #define HPAGE_256MB_SHIFT	28
>  #define HPAGE_64K_SHIFT		16
> @@ -28,7 +29,7 @@
>  #define HUGETLB_PAGE_ORDER	(HPAGE_SHIFT - PAGE_SHIFT)
>  #define HAVE_ARCH_HUGETLB_UNMAPPED_AREA
>  #define REAL_HPAGE_PER_HPAGE	(_AC(1,UL) << (HPAGE_SHIFT - REAL_HPAGE_SHIFT))
> -#define HUGE_MAX_HSTATE		4
> +#define HUGE_MAX_HSTATE		5
>  #endif
>  
>  #ifndef __ASSEMBLY__
> diff --git a/arch/sparc/include/asm/pgtable_64.h b/arch/sparc/include/asm/pgtable_64.h
> index 6fbd931..2444b02 100644
> --- a/arch/sparc/include/asm/pgtable_64.h
> +++ b/arch/sparc/include/asm/pgtable_64.h
> @@ -414,6 +414,11 @@ static inline bool is_hugetlb_pmd(pmd_t pmd)
>  	return !!(pmd_val(pmd) & _PAGE_PMD_HUGE);
>  }
>  
> +static inline bool is_hugetlb_pud(pud_t pud)
> +{
> +	return !!(pud_val(pud) & _PAGE_PUD_HUGE);
> +}
> +
>  #ifdef CONFIG_TRANSPARENT_HUGEPAGE
>  static inline pmd_t pmd_mkhuge(pmd_t pmd)
>  {
> diff --git a/arch/sparc/include/asm/tsb.h b/arch/sparc/include/asm/tsb.h
> index 32258e0..fbd8da7 100644
> --- a/arch/sparc/include/asm/tsb.h
> +++ b/arch/sparc/include/asm/tsb.h
> @@ -195,6 +195,36 @@ extern struct tsb_phys_patch_entry __tsb_phys_patch, __tsb_phys_patch_end;
>  	 nop; \
>  699:
>  
> +	/* PUD has been loaded into REG1, interpret the value, seeing
> +	 * if it is a HUGE PUD or a normal one.  If it is not valid
> +	 * then jump to FAIL_LABEL.  If it is a HUGE PUD, and it
> +	 * translates to a valid PTE, branch to PTE_LABEL.
> +	 *
> +	 * We have to propagate bits [32:22] from the virtual address
> +	 * to resolve at 4M granularity.
> +	 */
> +#if defined(CONFIG_HUGETLB_PAGE) || defined(CONFIG_TRANSPARENT_HUGEPAGE)
> +#define USER_PGTABLE_CHECK_PUD_HUGE(VADDR, REG1, REG2, FAIL_LABEL, PTE_LABEL) \
> +	brz,pn		REG1, FAIL_LABEL;		\
> +	 sethi		%uhi(_PAGE_PUD_HUGE), REG2;	\
> +	sllx		REG2, 32, REG2;			\
> +	andcc		REG1, REG2, %g0;		\
> +	be,pt		%xcc, 700f;			\
> +	 sethi		%hi(0x1ffc0000), REG2;		\
> +	brgez,pn	REG1, FAIL_LABEL;		\
> +	 sllx		REG2, 1, REG2;			\
> +	brgez,pn	REG1, FAIL_LABEL;		\
> +	 andn		REG1, REG2, REG1;		\
> +	and		VADDR, REG2, REG2;		\
> +	brlz,pt		REG1, PTE_LABEL;		\
> +	 or		REG1, REG2, REG1;		\
> +700:
> +#else
> +#define USER_PGTABLE_CHECK_PUD_HUGE(VADDR, REG1, REG2, FAIL_LABEL, PTE_LABEL) \
> +	brz,pn		REG1, FAIL_LABEL; \
> +	 nop;
> +#endif
> +
>  	/* PMD has been loaded into REG1, interpret the value, seeing
>  	 * if it is a HUGE PMD or a normal one.  If it is not valid
>  	 * then jump to FAIL_LABEL.  If it is a HUGE PMD, and it
> @@ -209,14 +239,14 @@ extern struct tsb_phys_patch_entry __tsb_phys_patch, __tsb_phys_patch_end;
>  	 sethi		%uhi(_PAGE_PMD_HUGE), REG2;	\
>  	sllx		REG2, 32, REG2;			\
>  	andcc		REG1, REG2, %g0;		\
> -	be,pt		%xcc, 700f;			\
> +	be,pt		%xcc, 701f;			\
>  	 sethi		%hi(4 * 1024 * 1024), REG2;	\
>  	brgez,pn	REG1, FAIL_LABEL;		\
>  	 andn		REG1, REG2, REG1;		\
>  	and		VADDR, REG2, REG2;		\
>  	brlz,pt		REG1, PTE_LABEL;		\
>  	 or		REG1, REG2, REG1;		\
> -700:
> +701:
>  #else
>  #define USER_PGTABLE_CHECK_PMD_HUGE(VADDR, REG1, REG2, FAIL_LABEL, PTE_LABEL) \
>  	brz,pn		REG1, FAIL_LABEL; \
> @@ -242,6 +272,7 @@ extern struct tsb_phys_patch_entry __tsb_phys_patch, __tsb_phys_patch_end;
>  	srlx		REG2, 64 - PAGE_SHIFT, REG2; \
>  	andn		REG2, 0x7, REG2; \
>  	ldxa		[REG1 + REG2] ASI_PHYS_USE_EC, REG1; \
> +	USER_PGTABLE_CHECK_PUD_HUGE(VADDR, REG1, REG2, FAIL_LABEL, 800f) \
>  	brz,pn		REG1, FAIL_LABEL; \
>  	 sllx		VADDR, 64 - (PMD_SHIFT + PMD_BITS), REG2; \
>  	srlx		REG2, 64 - PAGE_SHIFT, REG2; \
> diff --git a/arch/sparc/kernel/tsb.S b/arch/sparc/kernel/tsb.S
> index 10689cf..a0a5a13 100644
> --- a/arch/sparc/kernel/tsb.S
> +++ b/arch/sparc/kernel/tsb.S
> @@ -117,7 +117,7 @@ tsb_miss_page_table_walk_sun4v_fastpath:
>  	/* Valid PTE is now in %g5.  */
>  
>  #if defined(CONFIG_HUGETLB_PAGE) || defined(CONFIG_TRANSPARENT_HUGEPAGE)
> -	sethi		%uhi(_PAGE_PMD_HUGE), %g7
> +	sethi		%uhi(_PAGE_PMD_HUGE | _PAGE_PUD_HUGE), %g7
>  	sllx		%g7, 32, %g7
>  
>  	andcc		%g5, %g7, %g0
> diff --git a/arch/sparc/mm/hugetlbpage.c b/arch/sparc/mm/hugetlbpage.c
> index 7c29d38..62c1e62 100644
> --- a/arch/sparc/mm/hugetlbpage.c
> +++ b/arch/sparc/mm/hugetlbpage.c
> @@ -143,6 +143,10 @@ static pte_t sun4v_hugepage_shift_to_tte(pte_t entry, unsigned int shift)
>  	pte_val(entry) = pte_val(entry) & ~_PAGE_SZALL_4V;
>  
>  	switch (shift) {
> +	case HPAGE_16GB_SHIFT:
> +		hugepage_size = _PAGE_SZ16GB_4V;
> +		pte_val(entry) |= _PAGE_PUD_HUGE;
> +		break;
>  	case HPAGE_2GB_SHIFT:
>  		hugepage_size = _PAGE_SZ2GB_4V;
>  		pte_val(entry) |= _PAGE_PMD_HUGE;
> @@ -187,6 +191,9 @@ static unsigned int sun4v_huge_tte_to_shift(pte_t entry)
>  	unsigned int shift;
>  
>  	switch (tte_szbits) {
> +	case _PAGE_SZ16GB_4V:
> +		shift = HPAGE_16GB_SHIFT;
> +		break;
>  	case _PAGE_SZ2GB_4V:
>  		shift = HPAGE_2GB_SHIFT;
>  		break;
> @@ -263,7 +270,12 @@ pte_t *huge_pte_alloc(struct mm_struct *mm,
>  
>  	pgd = pgd_offset(mm, addr);
>  	pud = pud_alloc(mm, pgd, addr);
> -	if (pud) {
> +	if (!pud)
> +		return NULL;
> +
> +	if (sz >= PUD_SIZE)
> +		pte = (pte_t *)pud;
> +	else {
>  		pmd = pmd_alloc(mm, pud, addr);
>  		if (!pmd)
>  			return NULL;
> @@ -288,12 +300,16 @@ pte_t *huge_pte_offset(struct mm_struct *mm, unsigned long addr)
>  	if (!pgd_none(*pgd)) {
>  		pud = pud_offset(pgd, addr);
>  		if (!pud_none(*pud)) {
> -			pmd = pmd_offset(pud, addr);
> -			if (!pmd_none(*pmd)) {
> -				if (is_hugetlb_pmd(*pmd))
> -					pte = (pte_t *)pmd;
> -				else
> -					pte = pte_offset_map(pmd, addr);
> +			if (is_hugetlb_pud(*pud))
> +				pte = (pte_t *)pud;
> +			else {
> +				pmd = pmd_offset(pud, addr);
> +				if (!pmd_none(*pmd)) {
> +					if (is_hugetlb_pmd(*pmd))
> +						pte = (pte_t *)pmd;
> +					else
> +						pte = pte_offset_map(pmd, addr);
> +				}
>  			}
>  		}
>  	}
> @@ -304,12 +320,20 @@ pte_t *huge_pte_offset(struct mm_struct *mm, unsigned long addr)
>  void set_huge_pte_at(struct mm_struct *mm, unsigned long addr,
>  		     pte_t *ptep, pte_t entry)
>  {
> -	unsigned int i, nptes, orig_shift, shift;
> -	unsigned long size;
> +	unsigned int nptes, orig_shift, shift;
> +	unsigned long i, size;
>  	pte_t orig;
>  
>  	size = huge_tte_to_size(entry);
> -	shift = size >= HPAGE_SIZE ? PMD_SHIFT : PAGE_SHIFT;
> +
> +	shift = PAGE_SHIFT;
> +	if (size >= PUD_SIZE)
> +		shift = PUD_SHIFT;
> +	else if (size >= PMD_SIZE)
> +		shift = PMD_SHIFT;
> +	else
> +		shift = PAGE_SHIFT;
> +
>  	nptes = size >> shift;
>  
>  	if (!pte_present(*ptep) && pte_present(entry))
> @@ -332,19 +356,23 @@ void set_huge_pte_at(struct mm_struct *mm, unsigned long addr,
>  pte_t huge_ptep_get_and_clear(struct mm_struct *mm, unsigned long addr,
>  			      pte_t *ptep)
>  {
> -	unsigned int i, nptes, hugepage_shift;
> +	unsigned int i, nptes, orig_shift, shift;
>  	unsigned long size;
>  	pte_t entry;
>  
>  	entry = *ptep;
>  	size = huge_tte_to_size(entry);
> -	if (size >= HPAGE_SIZE)
> -		nptes = size >> PMD_SHIFT;
> +
> +	shift = PAGE_SHIFT;
> +	if (size >= PUD_SIZE)
> +		shift = PUD_SHIFT;
> +	else if (size >= PMD_SIZE)
> +		shift = PMD_SHIFT;
>  	else
> -		nptes = size >> PAGE_SHIFT;
> +		shift = PAGE_SHIFT;
>  
> -	hugepage_shift = pte_none(entry) ? PAGE_SHIFT :
> -		huge_tte_to_shift(entry);
> +	nptes = size >> shift;
> +	orig_shift = pte_none(entry) ? PAGE_SHIFT : huge_tte_to_shift(entry);
>  
>  	if (pte_present(entry))
>  		mm->context.hugetlb_pte_count -= nptes;
> @@ -353,11 +381,11 @@ pte_t huge_ptep_get_and_clear(struct mm_struct *mm, unsigned long addr,
>  	for (i = 0; i < nptes; i++)
>  		ptep[i] = __pte(0UL);
>  
> -	maybe_tlb_batch_add(mm, addr, ptep, entry, 0, hugepage_shift);
> +	maybe_tlb_batch_add(mm, addr, ptep, entry, 0, orig_shift);
>  	/* An HPAGE_SIZE'ed page is composed of two REAL_HPAGE_SIZE'ed pages */
>  	if (size == HPAGE_SIZE)
>  		maybe_tlb_batch_add(mm, addr + REAL_HPAGE_SIZE, ptep, entry, 0,
> -				    hugepage_shift);
> +				    orig_shift);
>  
>  	return entry;
>  }
> @@ -370,7 +398,8 @@ int pmd_huge(pmd_t pmd)
>  
>  int pud_huge(pud_t pud)
>  {
> -	return 0;
> +	return !pud_none(pud) &&
> +		(pud_val(pud) & (_PAGE_VALID|_PAGE_PUD_HUGE)) != _PAGE_VALID;
>  }
>  
>  static void hugetlb_free_pte_range(struct mmu_gather *tlb, pmd_t *pmd,
> @@ -434,8 +463,11 @@ static void hugetlb_free_pud_range(struct mmu_gather *tlb, pgd_t *pgd,
>  		next = pud_addr_end(addr, end);
>  		if (pud_none_or_clear_bad(pud))
>  			continue;
> -		hugetlb_free_pmd_range(tlb, pud, addr, next, floor,
> -				       ceiling);
> +		if (is_hugetlb_pud(*pud))
> +			pud_clear(pud);
> +		else
> +			hugetlb_free_pmd_range(tlb, pud, addr, next, floor,
> +					       ceiling);
>  	} while (pud++, addr = next, addr != end);
>  
>  	start &= PGDIR_MASK;
> diff --git a/arch/sparc/mm/init_64.c b/arch/sparc/mm/init_64.c
> index 0cda653..7c0fe73 100644
> --- a/arch/sparc/mm/init_64.c
> +++ b/arch/sparc/mm/init_64.c
> @@ -337,6 +337,10 @@ static int __init setup_hugepagesz(char *string)
>  	hugepage_shift = ilog2(hugepage_size);
>  
>  	switch (hugepage_shift) {
> +	case HPAGE_16GB_SHIFT:
> +		hv_pgsz_mask = HV_PGSZ_MASK_16GB;
> +		hv_pgsz_idx = HV_PGSZ_IDX_16GB;
> +		break;
>  	case HPAGE_2GB_SHIFT:
>  		hv_pgsz_mask = HV_PGSZ_MASK_2GB;
>  		hv_pgsz_idx = HV_PGSZ_IDX_2GB;
> @@ -376,6 +380,7 @@ void update_mmu_cache(struct vm_area_struct *vma, unsigned long address, pte_t *
>  {
>  	struct mm_struct *mm;
>  	unsigned long flags;
> +	bool is_huge_tsb;
>  	pte_t pte = *ptep;
>  
>  	if (tlb_type != hypervisor) {
> @@ -393,15 +398,37 @@ void update_mmu_cache(struct vm_area_struct *vma, unsigned long address, pte_t *
>  
>  	spin_lock_irqsave(&mm->context.lock, flags);
>  
> +	is_huge_tsb = false;
>  #if defined(CONFIG_HUGETLB_PAGE) || defined(CONFIG_TRANSPARENT_HUGEPAGE)
> -	if ((mm->context.hugetlb_pte_count || mm->context.thp_pte_count) &&
> -	    is_hugetlb_pmd(__pmd(pte_val(pte)))) {
> -		/* We are fabricating 8MB pages using 4MB real hw pages.  */
> -		pte_val(pte) |= (address & (1UL << REAL_HPAGE_SHIFT));
> -		__update_mmu_tsb_insert(mm, MM_TSB_HUGE, REAL_HPAGE_SHIFT,
> -					address, pte_val(pte));
> -	} else
> +	if (mm->context.hugetlb_pte_count || mm->context.thp_pte_count) {
> +		unsigned long hugepage_size = PAGE_SIZE;
> +
> +		if (is_vm_hugetlb_page(vma))
> +			hugepage_size = huge_page_size(hstate_vma(vma));
> +
> +		if (hugepage_size >= PUD_SIZE) {
> +			unsigned long mask = 0x1ffc00000UL;
> +
> +			/* Transfer bits [32:22] from address to resolve
> +			 * at 4M granularity.
> +			 */
> +			pte_val(pte) &= ~mask;
> +			pte_val(pte) |= (address & mask);
> +		} else if (hugepage_size >= PMD_SIZE) {
> +			/* We are fabricating 8MB pages using 4MB
> +			 * real hw pages.
> +			 */
> +			pte_val(pte) |= (address & (1UL << REAL_HPAGE_SHIFT));
> +		}
> +
> +		if (hugepage_size >= PMD_SIZE) {
> +			__update_mmu_tsb_insert(mm, MM_TSB_HUGE,
> +				REAL_HPAGE_SHIFT, address, pte_val(pte));
> +			is_huge_tsb = true;
> +		}
> +	}
>  #endif
> +	if (!is_huge_tsb)
>  		__update_mmu_tsb_insert(mm, MM_TSB_BASE, PAGE_SHIFT,
>  					address, pte_val(pte));
>  
> -- 
> 2.9.2
> 
--
To unsubscribe from this list: send the line "unsubscribe sparclinux" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
David Miller May 25, 2017, 3:45 a.m. UTC | #2
From: Paul Gortmaker <paul.gortmaker@windriver.com>
Date: Wed, 24 May 2017 23:34:42 -0400

> [[PATCH] sparc64: Add 16GB hugepage support] On 24/05/2017 (Wed 17:29) Nitin Gupta wrote:
> 
>> Orabug: 25362942
>> 
>> Signed-off-by: Nitin Gupta <nitin.m.gupta@oracle.com>
> 
> If this wasn't an accidental git send-email misfire, then there should
> be a long log indicating the use case, the perforamnce increase, the
> testing that was done, etc. etc. 
> 
> Normally I'd not notice but since I was Cc'd I figured it was worth a
> mention -- for example the vendor ID above doesn't mean a thing to
> all the rest of us, hence why I suspect it was a git send-email misfire;
> sadly, I think we've all accidentally done that at least once....

Agreed.

No commit message whatsoever is basically unacceptable for something
like this.
--
To unsubscribe from this list: send the line "unsubscribe sparclinux" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Nitin Gupta May 25, 2017, 4:08 a.m. UTC | #3
On 5/24/17 8:45 PM, David Miller wrote:
> From: Paul Gortmaker <paul.gortmaker@windriver.com>
> Date: Wed, 24 May 2017 23:34:42 -0400
> 
>> [[PATCH] sparc64: Add 16GB hugepage support] On 24/05/2017 (Wed 17:29) Nitin Gupta wrote:
>>
>>> Orabug: 25362942
>>>
>>> Signed-off-by: Nitin Gupta <nitin.m.gupta@oracle.com>
>>
>> If this wasn't an accidental git send-email misfire, then there should
>> be a long log indicating the use case, the perforamnce increase, the
>> testing that was done, etc. etc. 
>>
>> Normally I'd not notice but since I was Cc'd I figured it was worth a
>> mention -- for example the vendor ID above doesn't mean a thing to
>> all the rest of us, hence why I suspect it was a git send-email misfire;
>> sadly, I think we've all accidentally done that at least once....
> 
> Agreed.
> 
> No commit message whatsoever is basically unacceptable for something
> like this.
>

Ok, I will include usage, testing notes, performance numbers etc., in
v2 patch. Still, I do try to include "Orabug" for better tracking of
bugs internally; I hope that's okay.

Thanks,
Nitin

--
To unsubscribe from this list: send the line "unsubscribe sparclinux" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
diff mbox

Patch

diff --git a/arch/sparc/include/asm/page_64.h b/arch/sparc/include/asm/page_64.h
index 5961b2d..8ee1f97 100644
--- a/arch/sparc/include/asm/page_64.h
+++ b/arch/sparc/include/asm/page_64.h
@@ -17,6 +17,7 @@ 
 
 #define HPAGE_SHIFT		23
 #define REAL_HPAGE_SHIFT	22
+#define HPAGE_16GB_SHIFT	34
 #define HPAGE_2GB_SHIFT		31
 #define HPAGE_256MB_SHIFT	28
 #define HPAGE_64K_SHIFT		16
@@ -28,7 +29,7 @@ 
 #define HUGETLB_PAGE_ORDER	(HPAGE_SHIFT - PAGE_SHIFT)
 #define HAVE_ARCH_HUGETLB_UNMAPPED_AREA
 #define REAL_HPAGE_PER_HPAGE	(_AC(1,UL) << (HPAGE_SHIFT - REAL_HPAGE_SHIFT))
-#define HUGE_MAX_HSTATE		4
+#define HUGE_MAX_HSTATE		5
 #endif
 
 #ifndef __ASSEMBLY__
diff --git a/arch/sparc/include/asm/pgtable_64.h b/arch/sparc/include/asm/pgtable_64.h
index 6fbd931..2444b02 100644
--- a/arch/sparc/include/asm/pgtable_64.h
+++ b/arch/sparc/include/asm/pgtable_64.h
@@ -414,6 +414,11 @@  static inline bool is_hugetlb_pmd(pmd_t pmd)
 	return !!(pmd_val(pmd) & _PAGE_PMD_HUGE);
 }
 
+static inline bool is_hugetlb_pud(pud_t pud)
+{
+	return !!(pud_val(pud) & _PAGE_PUD_HUGE);
+}
+
 #ifdef CONFIG_TRANSPARENT_HUGEPAGE
 static inline pmd_t pmd_mkhuge(pmd_t pmd)
 {
diff --git a/arch/sparc/include/asm/tsb.h b/arch/sparc/include/asm/tsb.h
index 32258e0..fbd8da7 100644
--- a/arch/sparc/include/asm/tsb.h
+++ b/arch/sparc/include/asm/tsb.h
@@ -195,6 +195,36 @@  extern struct tsb_phys_patch_entry __tsb_phys_patch, __tsb_phys_patch_end;
 	 nop; \
 699:
 
+	/* PUD has been loaded into REG1, interpret the value, seeing
+	 * if it is a HUGE PUD or a normal one.  If it is not valid
+	 * then jump to FAIL_LABEL.  If it is a HUGE PUD, and it
+	 * translates to a valid PTE, branch to PTE_LABEL.
+	 *
+	 * We have to propagate bits [32:22] from the virtual address
+	 * to resolve at 4M granularity.
+	 */
+#if defined(CONFIG_HUGETLB_PAGE) || defined(CONFIG_TRANSPARENT_HUGEPAGE)
+#define USER_PGTABLE_CHECK_PUD_HUGE(VADDR, REG1, REG2, FAIL_LABEL, PTE_LABEL) \
+	brz,pn		REG1, FAIL_LABEL;		\
+	 sethi		%uhi(_PAGE_PUD_HUGE), REG2;	\
+	sllx		REG2, 32, REG2;			\
+	andcc		REG1, REG2, %g0;		\
+	be,pt		%xcc, 700f;			\
+	 sethi		%hi(0x1ffc0000), REG2;		\
+	brgez,pn	REG1, FAIL_LABEL;		\
+	 sllx		REG2, 1, REG2;			\
+	brgez,pn	REG1, FAIL_LABEL;		\
+	 andn		REG1, REG2, REG1;		\
+	and		VADDR, REG2, REG2;		\
+	brlz,pt		REG1, PTE_LABEL;		\
+	 or		REG1, REG2, REG1;		\
+700:
+#else
+#define USER_PGTABLE_CHECK_PUD_HUGE(VADDR, REG1, REG2, FAIL_LABEL, PTE_LABEL) \
+	brz,pn		REG1, FAIL_LABEL; \
+	 nop;
+#endif
+
 	/* PMD has been loaded into REG1, interpret the value, seeing
 	 * if it is a HUGE PMD or a normal one.  If it is not valid
 	 * then jump to FAIL_LABEL.  If it is a HUGE PMD, and it
@@ -209,14 +239,14 @@  extern struct tsb_phys_patch_entry __tsb_phys_patch, __tsb_phys_patch_end;
 	 sethi		%uhi(_PAGE_PMD_HUGE), REG2;	\
 	sllx		REG2, 32, REG2;			\
 	andcc		REG1, REG2, %g0;		\
-	be,pt		%xcc, 700f;			\
+	be,pt		%xcc, 701f;			\
 	 sethi		%hi(4 * 1024 * 1024), REG2;	\
 	brgez,pn	REG1, FAIL_LABEL;		\
 	 andn		REG1, REG2, REG1;		\
 	and		VADDR, REG2, REG2;		\
 	brlz,pt		REG1, PTE_LABEL;		\
 	 or		REG1, REG2, REG1;		\
-700:
+701:
 #else
 #define USER_PGTABLE_CHECK_PMD_HUGE(VADDR, REG1, REG2, FAIL_LABEL, PTE_LABEL) \
 	brz,pn		REG1, FAIL_LABEL; \
@@ -242,6 +272,7 @@  extern struct tsb_phys_patch_entry __tsb_phys_patch, __tsb_phys_patch_end;
 	srlx		REG2, 64 - PAGE_SHIFT, REG2; \
 	andn		REG2, 0x7, REG2; \
 	ldxa		[REG1 + REG2] ASI_PHYS_USE_EC, REG1; \
+	USER_PGTABLE_CHECK_PUD_HUGE(VADDR, REG1, REG2, FAIL_LABEL, 800f) \
 	brz,pn		REG1, FAIL_LABEL; \
 	 sllx		VADDR, 64 - (PMD_SHIFT + PMD_BITS), REG2; \
 	srlx		REG2, 64 - PAGE_SHIFT, REG2; \
diff --git a/arch/sparc/kernel/tsb.S b/arch/sparc/kernel/tsb.S
index 10689cf..a0a5a13 100644
--- a/arch/sparc/kernel/tsb.S
+++ b/arch/sparc/kernel/tsb.S
@@ -117,7 +117,7 @@  tsb_miss_page_table_walk_sun4v_fastpath:
 	/* Valid PTE is now in %g5.  */
 
 #if defined(CONFIG_HUGETLB_PAGE) || defined(CONFIG_TRANSPARENT_HUGEPAGE)
-	sethi		%uhi(_PAGE_PMD_HUGE), %g7
+	sethi		%uhi(_PAGE_PMD_HUGE | _PAGE_PUD_HUGE), %g7
 	sllx		%g7, 32, %g7
 
 	andcc		%g5, %g7, %g0
diff --git a/arch/sparc/mm/hugetlbpage.c b/arch/sparc/mm/hugetlbpage.c
index 7c29d38..62c1e62 100644
--- a/arch/sparc/mm/hugetlbpage.c
+++ b/arch/sparc/mm/hugetlbpage.c
@@ -143,6 +143,10 @@  static pte_t sun4v_hugepage_shift_to_tte(pte_t entry, unsigned int shift)
 	pte_val(entry) = pte_val(entry) & ~_PAGE_SZALL_4V;
 
 	switch (shift) {
+	case HPAGE_16GB_SHIFT:
+		hugepage_size = _PAGE_SZ16GB_4V;
+		pte_val(entry) |= _PAGE_PUD_HUGE;
+		break;
 	case HPAGE_2GB_SHIFT:
 		hugepage_size = _PAGE_SZ2GB_4V;
 		pte_val(entry) |= _PAGE_PMD_HUGE;
@@ -187,6 +191,9 @@  static unsigned int sun4v_huge_tte_to_shift(pte_t entry)
 	unsigned int shift;
 
 	switch (tte_szbits) {
+	case _PAGE_SZ16GB_4V:
+		shift = HPAGE_16GB_SHIFT;
+		break;
 	case _PAGE_SZ2GB_4V:
 		shift = HPAGE_2GB_SHIFT;
 		break;
@@ -263,7 +270,12 @@  pte_t *huge_pte_alloc(struct mm_struct *mm,
 
 	pgd = pgd_offset(mm, addr);
 	pud = pud_alloc(mm, pgd, addr);
-	if (pud) {
+	if (!pud)
+		return NULL;
+
+	if (sz >= PUD_SIZE)
+		pte = (pte_t *)pud;
+	else {
 		pmd = pmd_alloc(mm, pud, addr);
 		if (!pmd)
 			return NULL;
@@ -288,12 +300,16 @@  pte_t *huge_pte_offset(struct mm_struct *mm, unsigned long addr)
 	if (!pgd_none(*pgd)) {
 		pud = pud_offset(pgd, addr);
 		if (!pud_none(*pud)) {
-			pmd = pmd_offset(pud, addr);
-			if (!pmd_none(*pmd)) {
-				if (is_hugetlb_pmd(*pmd))
-					pte = (pte_t *)pmd;
-				else
-					pte = pte_offset_map(pmd, addr);
+			if (is_hugetlb_pud(*pud))
+				pte = (pte_t *)pud;
+			else {
+				pmd = pmd_offset(pud, addr);
+				if (!pmd_none(*pmd)) {
+					if (is_hugetlb_pmd(*pmd))
+						pte = (pte_t *)pmd;
+					else
+						pte = pte_offset_map(pmd, addr);
+				}
 			}
 		}
 	}
@@ -304,12 +320,20 @@  pte_t *huge_pte_offset(struct mm_struct *mm, unsigned long addr)
 void set_huge_pte_at(struct mm_struct *mm, unsigned long addr,
 		     pte_t *ptep, pte_t entry)
 {
-	unsigned int i, nptes, orig_shift, shift;
-	unsigned long size;
+	unsigned int nptes, orig_shift, shift;
+	unsigned long i, size;
 	pte_t orig;
 
 	size = huge_tte_to_size(entry);
-	shift = size >= HPAGE_SIZE ? PMD_SHIFT : PAGE_SHIFT;
+
+	shift = PAGE_SHIFT;
+	if (size >= PUD_SIZE)
+		shift = PUD_SHIFT;
+	else if (size >= PMD_SIZE)
+		shift = PMD_SHIFT;
+	else
+		shift = PAGE_SHIFT;
+
 	nptes = size >> shift;
 
 	if (!pte_present(*ptep) && pte_present(entry))
@@ -332,19 +356,23 @@  void set_huge_pte_at(struct mm_struct *mm, unsigned long addr,
 pte_t huge_ptep_get_and_clear(struct mm_struct *mm, unsigned long addr,
 			      pte_t *ptep)
 {
-	unsigned int i, nptes, hugepage_shift;
+	unsigned int i, nptes, orig_shift, shift;
 	unsigned long size;
 	pte_t entry;
 
 	entry = *ptep;
 	size = huge_tte_to_size(entry);
-	if (size >= HPAGE_SIZE)
-		nptes = size >> PMD_SHIFT;
+
+	shift = PAGE_SHIFT;
+	if (size >= PUD_SIZE)
+		shift = PUD_SHIFT;
+	else if (size >= PMD_SIZE)
+		shift = PMD_SHIFT;
 	else
-		nptes = size >> PAGE_SHIFT;
+		shift = PAGE_SHIFT;
 
-	hugepage_shift = pte_none(entry) ? PAGE_SHIFT :
-		huge_tte_to_shift(entry);
+	nptes = size >> shift;
+	orig_shift = pte_none(entry) ? PAGE_SHIFT : huge_tte_to_shift(entry);
 
 	if (pte_present(entry))
 		mm->context.hugetlb_pte_count -= nptes;
@@ -353,11 +381,11 @@  pte_t huge_ptep_get_and_clear(struct mm_struct *mm, unsigned long addr,
 	for (i = 0; i < nptes; i++)
 		ptep[i] = __pte(0UL);
 
-	maybe_tlb_batch_add(mm, addr, ptep, entry, 0, hugepage_shift);
+	maybe_tlb_batch_add(mm, addr, ptep, entry, 0, orig_shift);
 	/* An HPAGE_SIZE'ed page is composed of two REAL_HPAGE_SIZE'ed pages */
 	if (size == HPAGE_SIZE)
 		maybe_tlb_batch_add(mm, addr + REAL_HPAGE_SIZE, ptep, entry, 0,
-				    hugepage_shift);
+				    orig_shift);
 
 	return entry;
 }
@@ -370,7 +398,8 @@  int pmd_huge(pmd_t pmd)
 
 int pud_huge(pud_t pud)
 {
-	return 0;
+	return !pud_none(pud) &&
+		(pud_val(pud) & (_PAGE_VALID|_PAGE_PUD_HUGE)) != _PAGE_VALID;
 }
 
 static void hugetlb_free_pte_range(struct mmu_gather *tlb, pmd_t *pmd,
@@ -434,8 +463,11 @@  static void hugetlb_free_pud_range(struct mmu_gather *tlb, pgd_t *pgd,
 		next = pud_addr_end(addr, end);
 		if (pud_none_or_clear_bad(pud))
 			continue;
-		hugetlb_free_pmd_range(tlb, pud, addr, next, floor,
-				       ceiling);
+		if (is_hugetlb_pud(*pud))
+			pud_clear(pud);
+		else
+			hugetlb_free_pmd_range(tlb, pud, addr, next, floor,
+					       ceiling);
 	} while (pud++, addr = next, addr != end);
 
 	start &= PGDIR_MASK;
diff --git a/arch/sparc/mm/init_64.c b/arch/sparc/mm/init_64.c
index 0cda653..7c0fe73 100644
--- a/arch/sparc/mm/init_64.c
+++ b/arch/sparc/mm/init_64.c
@@ -337,6 +337,10 @@  static int __init setup_hugepagesz(char *string)
 	hugepage_shift = ilog2(hugepage_size);
 
 	switch (hugepage_shift) {
+	case HPAGE_16GB_SHIFT:
+		hv_pgsz_mask = HV_PGSZ_MASK_16GB;
+		hv_pgsz_idx = HV_PGSZ_IDX_16GB;
+		break;
 	case HPAGE_2GB_SHIFT:
 		hv_pgsz_mask = HV_PGSZ_MASK_2GB;
 		hv_pgsz_idx = HV_PGSZ_IDX_2GB;
@@ -376,6 +380,7 @@  void update_mmu_cache(struct vm_area_struct *vma, unsigned long address, pte_t *
 {
 	struct mm_struct *mm;
 	unsigned long flags;
+	bool is_huge_tsb;
 	pte_t pte = *ptep;
 
 	if (tlb_type != hypervisor) {
@@ -393,15 +398,37 @@  void update_mmu_cache(struct vm_area_struct *vma, unsigned long address, pte_t *
 
 	spin_lock_irqsave(&mm->context.lock, flags);
 
+	is_huge_tsb = false;
 #if defined(CONFIG_HUGETLB_PAGE) || defined(CONFIG_TRANSPARENT_HUGEPAGE)
-	if ((mm->context.hugetlb_pte_count || mm->context.thp_pte_count) &&
-	    is_hugetlb_pmd(__pmd(pte_val(pte)))) {
-		/* We are fabricating 8MB pages using 4MB real hw pages.  */
-		pte_val(pte) |= (address & (1UL << REAL_HPAGE_SHIFT));
-		__update_mmu_tsb_insert(mm, MM_TSB_HUGE, REAL_HPAGE_SHIFT,
-					address, pte_val(pte));
-	} else
+	if (mm->context.hugetlb_pte_count || mm->context.thp_pte_count) {
+		unsigned long hugepage_size = PAGE_SIZE;
+
+		if (is_vm_hugetlb_page(vma))
+			hugepage_size = huge_page_size(hstate_vma(vma));
+
+		if (hugepage_size >= PUD_SIZE) {
+			unsigned long mask = 0x1ffc00000UL;
+
+			/* Transfer bits [32:22] from address to resolve
+			 * at 4M granularity.
+			 */
+			pte_val(pte) &= ~mask;
+			pte_val(pte) |= (address & mask);
+		} else if (hugepage_size >= PMD_SIZE) {
+			/* We are fabricating 8MB pages using 4MB
+			 * real hw pages.
+			 */
+			pte_val(pte) |= (address & (1UL << REAL_HPAGE_SHIFT));
+		}
+
+		if (hugepage_size >= PMD_SIZE) {
+			__update_mmu_tsb_insert(mm, MM_TSB_HUGE,
+				REAL_HPAGE_SHIFT, address, pte_val(pte));
+			is_huge_tsb = true;
+		}
+	}
 #endif
+	if (!is_huge_tsb)
 		__update_mmu_tsb_insert(mm, MM_TSB_BASE, PAGE_SHIFT,
 					address, pte_val(pte));