[20/25] powerpc: Handle exceptions caused by pkey violation

Message ID 1504910713-7094-29-git-send-email-linuxram@us.ibm.com
State Changes Requested
Headers show
Series
  • powerpc: Free up RPAGE_RSV bits
Related show

Commit Message

Ram Pai Sept. 8, 2017, 10:45 p.m.
Handle Data and  Instruction exceptions caused by memory
protection-key.

The CPU will detect the key fault if the HPTE is already
programmed with the key.

However if the HPTE is not  hashed, a key fault will not
be detected by the  hardware. The   software will detect
pkey violation in such a case.

Signed-off-by: Ram Pai <linuxram@us.ibm.com>
---
 arch/powerpc/mm/fault.c |   37 ++++++++++++++++++++++++++++++++-----
 1 files changed, 32 insertions(+), 5 deletions(-)

Comments

Balbir Singh Oct. 18, 2017, 11:27 p.m. | #1
On Fri,  8 Sep 2017 15:45:08 -0700
Ram Pai <linuxram@us.ibm.com> wrote:

> Handle Data and  Instruction exceptions caused by memory
> protection-key.
> 
> The CPU will detect the key fault if the HPTE is already
> programmed with the key.
> 
> However if the HPTE is not  hashed, a key fault will not
> be detected by the  hardware. The   software will detect
> pkey violation in such a case.


This bit is not clear.

> 
> Signed-off-by: Ram Pai <linuxram@us.ibm.com>
> ---
>  arch/powerpc/mm/fault.c |   37 ++++++++++++++++++++++++++++++++-----
>  1 files changed, 32 insertions(+), 5 deletions(-)
> 
> diff --git a/arch/powerpc/mm/fault.c b/arch/powerpc/mm/fault.c
> index 4797d08..a16bc43 100644
> --- a/arch/powerpc/mm/fault.c
> +++ b/arch/powerpc/mm/fault.c
> @@ -145,6 +145,23 @@ static noinline int bad_area(struct pt_regs *regs, unsigned long address)
>  	return __bad_area(regs, address, SEGV_MAPERR);
>  }
>  
> +static int bad_page_fault_exception(struct pt_regs *regs, unsigned long address,
> +					int si_code)
> +{
> +	int sig = SIGBUS;
> +	int code = BUS_OBJERR;
> +
> +#ifdef CONFIG_PPC64_MEMORY_PROTECTION_KEYS
> +	if (si_code & DSISR_KEYFAULT) {
> +		sig = SIGSEGV;
> +		code = SEGV_PKUERR;
> +	}
> +#endif /* CONFIG_PPC64_MEMORY_PROTECTION_KEYS */
> +
> +	_exception(sig, regs, code, address);
> +	return 0;
> +}
> +
>  static int do_sigbus(struct pt_regs *regs, unsigned long address,
>  		     unsigned int fault)
>  {
> @@ -391,11 +408,9 @@ static int __do_page_fault(struct pt_regs *regs, unsigned long address,
>  		return 0;
>  
>  	if (unlikely(page_fault_is_bad(error_code))) {
> -		if (is_user) {
> -			_exception(SIGBUS, regs, BUS_OBJERR, address);
> -			return 0;
> -		}
> -		return SIGBUS;
> +		if (!is_user)
> +			return SIGBUS;
> +		return bad_page_fault_exception(regs, address, error_code);
>  	}
>  
>  	/* Additional sanity check(s) */
> @@ -492,6 +507,18 @@ static int __do_page_fault(struct pt_regs *regs, unsigned long address,
>  	if (unlikely(access_error(is_write, is_exec, vma)))
>  		return bad_area(regs, address);
>  
> +#ifdef CONFIG_PPC64_MEMORY_PROTECTION_KEYS
> +	if (!arch_vma_access_permitted(vma, flags & FAULT_FLAG_WRITE,
> +			is_exec, 0))
> +		return __bad_area(regs, address, SEGV_PKUERR);


Hmm.. this is for the missing entry in the HPT and software detecting the
fault you mentioned above? Why do we need this case?

> +#endif /* CONFIG_PPC64_MEMORY_PROTECTION_KEYS */
> +
> +
> +	/* handle_mm_fault() needs to know if its a instruction access
> +	 * fault.
> +	 */

comment style

> +	if (is_exec)
> +		flags |= FAULT_FLAG_INSTRUCTION;
>  	/*
>  	 * If for any reason at all we couldn't handle the fault,
>  	 * make sure we exit gracefully rather than endlessly redo

Balbir Singh.
Ram Pai Oct. 19, 2017, 4:53 p.m. | #2
On Thu, Oct 19, 2017 at 10:27:52AM +1100, Balbir Singh wrote:
> On Fri,  8 Sep 2017 15:45:08 -0700
> Ram Pai <linuxram@us.ibm.com> wrote:
> 
> > Handle Data and  Instruction exceptions caused by memory
> > protection-key.
> > 
> > The CPU will detect the key fault if the HPTE is already
> > programmed with the key.
> > 
> > However if the HPTE is not  hashed, a key fault will not
> > be detected by the  hardware. The   software will detect
> > pkey violation in such a case.
> 
> 
> This bit is not clear.
> 
> > 
> > Signed-off-by: Ram Pai <linuxram@us.ibm.com>
> > ---
> >  arch/powerpc/mm/fault.c |   37 ++++++++++++++++++++++++++++++++-----
> >  1 files changed, 32 insertions(+), 5 deletions(-)
> > 
> > diff --git a/arch/powerpc/mm/fault.c b/arch/powerpc/mm/fault.c
> > index 4797d08..a16bc43 100644
> > --- a/arch/powerpc/mm/fault.c
> > +++ b/arch/powerpc/mm/fault.c
> > @@ -145,6 +145,23 @@ static noinline int bad_area(struct pt_regs *regs, unsigned long address)
> >  	return __bad_area(regs, address, SEGV_MAPERR);
> >  }
> >  
> > +static int bad_page_fault_exception(struct pt_regs *regs, unsigned long address,
> > +					int si_code)
> > +{
> > +	int sig = SIGBUS;
> > +	int code = BUS_OBJERR;
> > +
> > +#ifdef CONFIG_PPC64_MEMORY_PROTECTION_KEYS
> > +	if (si_code & DSISR_KEYFAULT) {
> > +		sig = SIGSEGV;
> > +		code = SEGV_PKUERR;
> > +	}
> > +#endif /* CONFIG_PPC64_MEMORY_PROTECTION_KEYS */
> > +
> > +	_exception(sig, regs, code, address);
> > +	return 0;
> > +}
> > +
> >  static int do_sigbus(struct pt_regs *regs, unsigned long address,
> >  		     unsigned int fault)
> >  {
> > @@ -391,11 +408,9 @@ static int __do_page_fault(struct pt_regs *regs, unsigned long address,
> >  		return 0;
> >  
> >  	if (unlikely(page_fault_is_bad(error_code))) {
> > -		if (is_user) {
> > -			_exception(SIGBUS, regs, BUS_OBJERR, address);
> > -			return 0;
> > -		}
> > -		return SIGBUS;
> > +		if (!is_user)
> > +			return SIGBUS;
> > +		return bad_page_fault_exception(regs, address, error_code);
> >  	}
> >  
> >  	/* Additional sanity check(s) */
> > @@ -492,6 +507,18 @@ static int __do_page_fault(struct pt_regs *regs, unsigned long address,
> >  	if (unlikely(access_error(is_write, is_exec, vma)))
> >  		return bad_area(regs, address);
> >  
> > +#ifdef CONFIG_PPC64_MEMORY_PROTECTION_KEYS
> > +	if (!arch_vma_access_permitted(vma, flags & FAULT_FLAG_WRITE,
> > +			is_exec, 0))
> > +		return __bad_area(regs, address, SEGV_PKUERR);
> 
> 
> Hmm.. this is for the missing entry in the HPT and software detecting the
> fault you mentioned above? Why do we need this case?

I thought I had put in a comment motivating the reason. Seems to have
disappeared. Will add it back.  But here is the reason....

hardware enforces key-exception only after the key is programmed into
the HPTE. However there is a window where the key is programmed into the
PTE and waiting for a page fault so that it can propagate key to the
HPTE. It is that window, during which we have to guard for key
violation. The above check closes that small window of vulnerability.
	
RP
Michael Ellerman Oct. 24, 2017, 3:47 p.m. | #3
Ram Pai <linuxram@us.ibm.com> writes:

> Handle Data and  Instruction exceptions caused by memory
> protection-key.
>
> The CPU will detect the key fault if the HPTE is already
> programmed with the key.
>
> However if the HPTE is not  hashed, a key fault will not
> be detected by the  hardware. The   software will detect
> pkey violation in such a case.

That seems like the wrong trade off to me.

It means every fault has to go through arch_vma_access_permitted(),
which is at least a function call in the best case, even when pkeys are
not in use, and/or the range in question is not protected by a key.

Why not instead just service the fault and let the hardware catch it?

If that access to a protected page is a bug then the process will
probably just exit, in which case who cares about the overhead of the
extra fault.

If the access is not currently allowed, but the process wants to take
the signal and do something to allow it, then is the overhead of the
extra fault going to matter vs the signal etc?

I think we should just let the hardware take a 2nd fault, unless someone
can demonstrate a workload where the cost of that is prohibitive.

Or does that not work for some reason?

> diff --git a/arch/powerpc/mm/fault.c b/arch/powerpc/mm/fault.c
> index 4797d08..a16bc43 100644
> --- a/arch/powerpc/mm/fault.c
> +++ b/arch/powerpc/mm/fault.c
> @@ -391,11 +408,9 @@ static int __do_page_fault(struct pt_regs *regs, unsigned long address,
>  		return 0;
>  
>  	if (unlikely(page_fault_is_bad(error_code))) {
> -		if (is_user) {
> -			_exception(SIGBUS, regs, BUS_OBJERR, address);
> -			return 0;
> -		}
> -		return SIGBUS;
> +		if (!is_user)
> +			return SIGBUS;
> +		return bad_page_fault_exception(regs, address, error_code);

I don't see why we want to handle the key fault here.

I know it's caught here at the moment, because it's in
DSISR_BAD_FAULT_64S, but I think now that we support keys we should
remove DSISR_KEYFAULT from DSISR_BAD_FAULT_64S.

Then we should let it fall through to further down, and handle it in
access_error().

That way eg. the page fault accounting will work as normal etc.

> @@ -492,6 +507,18 @@ static int __do_page_fault(struct pt_regs *regs, unsigned long address,
>  	if (unlikely(access_error(is_write, is_exec, vma)))
>  		return bad_area(regs, address);
>  
> +#ifdef CONFIG_PPC64_MEMORY_PROTECTION_KEYS
> +	if (!arch_vma_access_permitted(vma, flags & FAULT_FLAG_WRITE,
> +			is_exec, 0))
> +		return __bad_area(regs, address, SEGV_PKUERR);
> +#endif /* CONFIG_PPC64_MEMORY_PROTECTION_KEYS */
> +
> +
> +	/* handle_mm_fault() needs to know if its a instruction access
> +	 * fault.
> +	 */
> +	if (is_exec)
> +		flags |= FAULT_FLAG_INSTRUCTION;

What is this doing here? We already do that further up.

cheers
Ram Pai Oct. 24, 2017, 6:26 p.m. | #4
On Tue, Oct 24, 2017 at 05:47:38PM +0200, Michael Ellerman wrote:
> Ram Pai <linuxram@us.ibm.com> writes:
> 
> > Handle Data and  Instruction exceptions caused by memory
> > protection-key.
> >
> > The CPU will detect the key fault if the HPTE is already
> > programmed with the key.
> >
> > However if the HPTE is not  hashed, a key fault will not
> > be detected by the  hardware. The   software will detect
> > pkey violation in such a case.
> 
> That seems like the wrong trade off to me.
> 
> It means every fault has to go through arch_vma_access_permitted(),
> which is at least a function call in the best case, even when pkeys are
> not in use, and/or the range in question is not protected by a key.
> 
> Why not instead just service the fault and let the hardware catch it?
> 
> If that access to a protected page is a bug then the process will
> probably just exit, in which case who cares about the overhead of the
> extra fault.
> 
> If the access is not currently allowed, but the process wants to take
> the signal and do something to allow it, then is the overhead of the
> extra fault going to matter vs the signal etc?
> 
> I think we should just let the hardware take a 2nd fault, unless someone
> can demonstrate a workload where the cost of that is prohibitive.
> 
> Or does that not work for some reason?

It does not work, because the arch-neutral code error-outs the fault
without letting the power-specific code to page-in the faulting page.

So neither does the page gets faulted, nor does the key-fault gets 
signalled.

> 
> > diff --git a/arch/powerpc/mm/fault.c b/arch/powerpc/mm/fault.c
> > index 4797d08..a16bc43 100644
> > --- a/arch/powerpc/mm/fault.c
> > +++ b/arch/powerpc/mm/fault.c
> > @@ -391,11 +408,9 @@ static int __do_page_fault(struct pt_regs *regs, unsigned long address,
> >  		return 0;
> >  
> >  	if (unlikely(page_fault_is_bad(error_code))) {
> > -		if (is_user) {
> > -			_exception(SIGBUS, regs, BUS_OBJERR, address);
> > -			return 0;
> > -		}
> > -		return SIGBUS;
> > +		if (!is_user)
> > +			return SIGBUS;
> > +		return bad_page_fault_exception(regs, address, error_code);
> 
> I don't see why we want to handle the key fault here.
> 
> I know it's caught here at the moment, because it's in
> DSISR_BAD_FAULT_64S, but I think now that we support keys we should
> remove DSISR_KEYFAULT from DSISR_BAD_FAULT_64S.
> 
> Then we should let it fall through to further down, and handle it in
> access_error().
> 
> That way eg. the page fault accounting will work as normal etc.

Bit tricky to do that. Will try. For one if I take out DSISR_KEYFAULT
from DSISR_BAD_FAULT_64S, than the assembly code in do_hash_page() will
not call  __do_page_fault().  We want it called, but somehow
special-handle DSISR_KEYFAULT to call access_error().

> 
> > @@ -492,6 +507,18 @@ static int __do_page_fault(struct pt_regs *regs, unsigned long address,
> >  	if (unlikely(access_error(is_write, is_exec, vma)))
> >  		return bad_area(regs, address);
> >  
> > +#ifdef CONFIG_PPC64_MEMORY_PROTECTION_KEYS
> > +	if (!arch_vma_access_permitted(vma, flags & FAULT_FLAG_WRITE,
> > +			is_exec, 0))
> > +		return __bad_area(regs, address, SEGV_PKUERR);
> > +#endif /* CONFIG_PPC64_MEMORY_PROTECTION_KEYS */
> > +
> > +
> > +	/* handle_mm_fault() needs to know if its a instruction access
> > +	 * fault.
> > +	 */
> > +	if (is_exec)
> > +		flags |= FAULT_FLAG_INSTRUCTION;
> 
> What is this doing here? We already do that further up.

The more I think of this, I find that the code can be optimized and
remove redundancy.  I am unnecessarily called arch_vma_access_permitted()
above, when all the hardwork anyway gets done by handle_mm_fault().
handle_mm_fault() anyway calls arch_vma_access_permitted().

I could rather use the return value of handle_mm_fault() to signal
a key-error to the userspace.

RP
Aneesh Kumar K.V Oct. 29, 2017, 2:03 p.m. | #5
Michael Ellerman <mpe@ellerman.id.au> writes:

> Ram Pai <linuxram@us.ibm.com> writes:
>
>> Handle Data and  Instruction exceptions caused by memory
>> protection-key.
>>
>> The CPU will detect the key fault if the HPTE is already
>> programmed with the key.
>>
>> However if the HPTE is not  hashed, a key fault will not
>> be detected by the  hardware. The   software will detect
>> pkey violation in such a case.
>
> That seems like the wrong trade off to me.
>
> It means every fault has to go through arch_vma_access_permitted(),
> which is at least a function call in the best case, even when pkeys are
> not in use, and/or the range in question is not protected by a key.

We don't really need to call arch_vma_access_permitted() in
arch/powerpc/ do_page_fault(). Core kernel does that in
handle_mm_fault(). So if the first fault is a bad access handle_mm_fault
handle this. If it is a valid access we insert the right hash page table
entry and then we do a wrong access, we detect that a key fault in the
low level hash fault handler. IIUC, the call the
arch_vma_access_permitted() from arch/powerpc/ can go away?

-aneesh
Ram Pai Oct. 30, 2017, 12:37 a.m. | #6
On Sun, Oct 29, 2017 at 07:33:25PM +0530, Aneesh Kumar K.V wrote:
> Michael Ellerman <mpe@ellerman.id.au> writes:
> 
> > Ram Pai <linuxram@us.ibm.com> writes:
> >
> >> Handle Data and  Instruction exceptions caused by memory
> >> protection-key.
> >>
> >> The CPU will detect the key fault if the HPTE is already
> >> programmed with the key.
> >>
> >> However if the HPTE is not  hashed, a key fault will not
> >> be detected by the  hardware. The   software will detect
> >> pkey violation in such a case.
> >
> > That seems like the wrong trade off to me.
> >
> > It means every fault has to go through arch_vma_access_permitted(),
> > which is at least a function call in the best case, even when pkeys are
> > not in use, and/or the range in question is not protected by a key.
> 
> We don't really need to call arch_vma_access_permitted() in
> arch/powerpc/ do_page_fault(). Core kernel does that in
> handle_mm_fault(). So if the first fault is a bad access handle_mm_fault
> handle this. If it is a valid access we insert the right hash page table
> entry and then we do a wrong access, we detect that a key fault in the
> low level hash fault handler. IIUC, the call the
> arch_vma_access_permitted() from arch/powerpc/ can go away?


Yes. since handle_mm_fault() checks for key-violation, we can leverage
that in __do_page_fault(), instead of calling arch_vma_access_permitted().

Have fixed it in the next version of patches, about to to hit the mailing
list this week.

RP

Patch

diff --git a/arch/powerpc/mm/fault.c b/arch/powerpc/mm/fault.c
index 4797d08..a16bc43 100644
--- a/arch/powerpc/mm/fault.c
+++ b/arch/powerpc/mm/fault.c
@@ -145,6 +145,23 @@  static noinline int bad_area(struct pt_regs *regs, unsigned long address)
 	return __bad_area(regs, address, SEGV_MAPERR);
 }
 
+static int bad_page_fault_exception(struct pt_regs *regs, unsigned long address,
+					int si_code)
+{
+	int sig = SIGBUS;
+	int code = BUS_OBJERR;
+
+#ifdef CONFIG_PPC64_MEMORY_PROTECTION_KEYS
+	if (si_code & DSISR_KEYFAULT) {
+		sig = SIGSEGV;
+		code = SEGV_PKUERR;
+	}
+#endif /* CONFIG_PPC64_MEMORY_PROTECTION_KEYS */
+
+	_exception(sig, regs, code, address);
+	return 0;
+}
+
 static int do_sigbus(struct pt_regs *regs, unsigned long address,
 		     unsigned int fault)
 {
@@ -391,11 +408,9 @@  static int __do_page_fault(struct pt_regs *regs, unsigned long address,
 		return 0;
 
 	if (unlikely(page_fault_is_bad(error_code))) {
-		if (is_user) {
-			_exception(SIGBUS, regs, BUS_OBJERR, address);
-			return 0;
-		}
-		return SIGBUS;
+		if (!is_user)
+			return SIGBUS;
+		return bad_page_fault_exception(regs, address, error_code);
 	}
 
 	/* Additional sanity check(s) */
@@ -492,6 +507,18 @@  static int __do_page_fault(struct pt_regs *regs, unsigned long address,
 	if (unlikely(access_error(is_write, is_exec, vma)))
 		return bad_area(regs, address);
 
+#ifdef CONFIG_PPC64_MEMORY_PROTECTION_KEYS
+	if (!arch_vma_access_permitted(vma, flags & FAULT_FLAG_WRITE,
+			is_exec, 0))
+		return __bad_area(regs, address, SEGV_PKUERR);
+#endif /* CONFIG_PPC64_MEMORY_PROTECTION_KEYS */
+
+
+	/* handle_mm_fault() needs to know if its a instruction access
+	 * fault.
+	 */
+	if (is_exec)
+		flags |= FAULT_FLAG_INSTRUCTION;
 	/*
 	 * If for any reason at all we couldn't handle the fault,
 	 * make sure we exit gracefully rather than endlessly redo