diff mbox

[v2,19/20] x86/mm: Add speculative pagefault handling

Message ID 1503007519-26777-20-git-send-email-ldufour@linux.vnet.ibm.com (mailing list archive)
State Not Applicable
Headers show

Commit Message

Laurent Dufour Aug. 17, 2017, 10:05 p.m. UTC
From: Peter Zijlstra <peterz@infradead.org>

Try a speculative fault before acquiring mmap_sem, if it returns with
VM_FAULT_RETRY continue with the mmap_sem acquisition and do the
traditional fault.

Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>

[Clearing of FAULT_FLAG_ALLOW_RETRY is now done in
 handle_speculative_fault()]
[Retry with usual fault path in the case VM_ERROR is returned by
 handle_speculative_fault(). This allows signal to be delivered]
Signed-off-by: Laurent Dufour <ldufour@linux.vnet.ibm.com>
---
 arch/x86/include/asm/pgtable_types.h |  7 +++++++
 arch/x86/mm/fault.c                  | 19 +++++++++++++++++++
 2 files changed, 26 insertions(+)

Comments

Anshuman Khandual Aug. 21, 2017, 7:29 a.m. UTC | #1
On 08/18/2017 03:35 AM, Laurent Dufour wrote:
> From: Peter Zijlstra <peterz@infradead.org>
> 
> Try a speculative fault before acquiring mmap_sem, if it returns with
> VM_FAULT_RETRY continue with the mmap_sem acquisition and do the
> traditional fault.
> 
> Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
> 
> [Clearing of FAULT_FLAG_ALLOW_RETRY is now done in
>  handle_speculative_fault()]
> [Retry with usual fault path in the case VM_ERROR is returned by
>  handle_speculative_fault(). This allows signal to be delivered]
> Signed-off-by: Laurent Dufour <ldufour@linux.vnet.ibm.com>
> ---
>  arch/x86/include/asm/pgtable_types.h |  7 +++++++
>  arch/x86/mm/fault.c                  | 19 +++++++++++++++++++
>  2 files changed, 26 insertions(+)
> 
> diff --git a/arch/x86/include/asm/pgtable_types.h b/arch/x86/include/asm/pgtable_types.h
> index bf9638e1ee42..4fd2693a037e 100644
> --- a/arch/x86/include/asm/pgtable_types.h
> +++ b/arch/x86/include/asm/pgtable_types.h
> @@ -234,6 +234,13 @@ enum page_cache_mode {
>  #define PGD_IDENT_ATTR	 0x001		/* PRESENT (no other attributes) */
>  #endif
>  
> +/*
> + * Advertise that we call the Speculative Page Fault handler.
> + */
> +#ifdef CONFIG_X86_64
> +#define __HAVE_ARCH_CALL_SPF
> +#endif
> +
>  #ifdef CONFIG_X86_32
>  # include <asm/pgtable_32_types.h>
>  #else
> diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c
> index 2a1fa10c6a98..4c070b9a4362 100644
> --- a/arch/x86/mm/fault.c
> +++ b/arch/x86/mm/fault.c
> @@ -1365,6 +1365,24 @@ __do_page_fault(struct pt_regs *regs, unsigned long error_code,
>  	if (error_code & PF_INSTR)
>  		flags |= FAULT_FLAG_INSTRUCTION;
>  
> +#ifdef __HAVE_ARCH_CALL_SPF
> +	if (error_code & PF_USER) {
> +		fault = handle_speculative_fault(mm, address, flags);
> +
> +		/*
> +		 * We also check against VM_FAULT_ERROR because we have to
> +		 * raise a signal by calling later mm_fault_error() which
> +		 * requires the vma pointer to be set. So in that case,
> +		 * we fall through the normal path.

Cant mm_fault_error() be called inside handle_speculative_fault() ?
Falling through the normal page fault path again just to raise a
signal seems overkill. Looking into mm_fault_error(), it seems they
are different for x86 and powerpc.

X86:

mm_fault_error(struct pt_regs *regs, unsigned long error_code,
               unsigned long address, struct vm_area_struct *vma,
               unsigned int fault)

powerpc:

mm_fault_error(struct pt_regs *regs, unsigned long addr, int fault)

Even in case of X86, I guess we would have reference to the faulting
VMA (after the SRCU search) which can be used to call this function
directly.
Laurent Dufour Aug. 29, 2017, 2:50 p.m. UTC | #2
On 21/08/2017 09:29, Anshuman Khandual wrote:
> On 08/18/2017 03:35 AM, Laurent Dufour wrote:
>> From: Peter Zijlstra <peterz@infradead.org>
>>
>> Try a speculative fault before acquiring mmap_sem, if it returns with
>> VM_FAULT_RETRY continue with the mmap_sem acquisition and do the
>> traditional fault.
>>
>> Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
>>
>> [Clearing of FAULT_FLAG_ALLOW_RETRY is now done in
>>  handle_speculative_fault()]
>> [Retry with usual fault path in the case VM_ERROR is returned by
>>  handle_speculative_fault(). This allows signal to be delivered]
>> Signed-off-by: Laurent Dufour <ldufour@linux.vnet.ibm.com>
>> ---
>>  arch/x86/include/asm/pgtable_types.h |  7 +++++++
>>  arch/x86/mm/fault.c                  | 19 +++++++++++++++++++
>>  2 files changed, 26 insertions(+)
>>
>> diff --git a/arch/x86/include/asm/pgtable_types.h b/arch/x86/include/asm/pgtable_types.h
>> index bf9638e1ee42..4fd2693a037e 100644
>> --- a/arch/x86/include/asm/pgtable_types.h
>> +++ b/arch/x86/include/asm/pgtable_types.h
>> @@ -234,6 +234,13 @@ enum page_cache_mode {
>>  #define PGD_IDENT_ATTR	 0x001		/* PRESENT (no other attributes) */
>>  #endif
>>  
>> +/*
>> + * Advertise that we call the Speculative Page Fault handler.
>> + */
>> +#ifdef CONFIG_X86_64
>> +#define __HAVE_ARCH_CALL_SPF
>> +#endif
>> +
>>  #ifdef CONFIG_X86_32
>>  # include <asm/pgtable_32_types.h>
>>  #else
>> diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c
>> index 2a1fa10c6a98..4c070b9a4362 100644
>> --- a/arch/x86/mm/fault.c
>> +++ b/arch/x86/mm/fault.c
>> @@ -1365,6 +1365,24 @@ __do_page_fault(struct pt_regs *regs, unsigned long error_code,
>>  	if (error_code & PF_INSTR)
>>  		flags |= FAULT_FLAG_INSTRUCTION;
>>  
>> +#ifdef __HAVE_ARCH_CALL_SPF
>> +	if (error_code & PF_USER) {
>> +		fault = handle_speculative_fault(mm, address, flags);
>> +
>> +		/*
>> +		 * We also check against VM_FAULT_ERROR because we have to
>> +		 * raise a signal by calling later mm_fault_error() which
>> +		 * requires the vma pointer to be set. So in that case,
>> +		 * we fall through the normal path.
> 
> Cant mm_fault_error() be called inside handle_speculative_fault() ?
> Falling through the normal page fault path again just to raise a
> signal seems overkill. Looking into mm_fault_error(), it seems they
> are different for x86 and powerpc.
> 
> X86:
> 
> mm_fault_error(struct pt_regs *regs, unsigned long error_code,
>                unsigned long address, struct vm_area_struct *vma,
>                unsigned int fault)
> 
> powerpc:
> 
> mm_fault_error(struct pt_regs *regs, unsigned long addr, int fault)
> 
> Even in case of X86, I guess we would have reference to the faulting
> VMA (after the SRCU search) which can be used to call this function
> directly.

Yes I think this is doable in the case of x86.
Laurent Dufour Aug. 29, 2017, 2:58 p.m. UTC | #3
On 29/08/2017 16:50, Laurent Dufour wrote:
> On 21/08/2017 09:29, Anshuman Khandual wrote:
>> On 08/18/2017 03:35 AM, Laurent Dufour wrote:
>>> From: Peter Zijlstra <peterz@infradead.org>
>>>
>>> Try a speculative fault before acquiring mmap_sem, if it returns with
>>> VM_FAULT_RETRY continue with the mmap_sem acquisition and do the
>>> traditional fault.
>>>
>>> Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
>>>
>>> [Clearing of FAULT_FLAG_ALLOW_RETRY is now done in
>>>  handle_speculative_fault()]
>>> [Retry with usual fault path in the case VM_ERROR is returned by
>>>  handle_speculative_fault(). This allows signal to be delivered]
>>> Signed-off-by: Laurent Dufour <ldufour@linux.vnet.ibm.com>
>>> ---
>>>  arch/x86/include/asm/pgtable_types.h |  7 +++++++
>>>  arch/x86/mm/fault.c                  | 19 +++++++++++++++++++
>>>  2 files changed, 26 insertions(+)
>>>
>>> diff --git a/arch/x86/include/asm/pgtable_types.h b/arch/x86/include/asm/pgtable_types.h
>>> index bf9638e1ee42..4fd2693a037e 100644
>>> --- a/arch/x86/include/asm/pgtable_types.h
>>> +++ b/arch/x86/include/asm/pgtable_types.h
>>> @@ -234,6 +234,13 @@ enum page_cache_mode {
>>>  #define PGD_IDENT_ATTR	 0x001		/* PRESENT (no other attributes) */
>>>  #endif
>>>  
>>> +/*
>>> + * Advertise that we call the Speculative Page Fault handler.
>>> + */
>>> +#ifdef CONFIG_X86_64
>>> +#define __HAVE_ARCH_CALL_SPF
>>> +#endif
>>> +
>>>  #ifdef CONFIG_X86_32
>>>  # include <asm/pgtable_32_types.h>
>>>  #else
>>> diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c
>>> index 2a1fa10c6a98..4c070b9a4362 100644
>>> --- a/arch/x86/mm/fault.c
>>> +++ b/arch/x86/mm/fault.c
>>> @@ -1365,6 +1365,24 @@ __do_page_fault(struct pt_regs *regs, unsigned long error_code,
>>>  	if (error_code & PF_INSTR)
>>>  		flags |= FAULT_FLAG_INSTRUCTION;
>>>  
>>> +#ifdef __HAVE_ARCH_CALL_SPF
>>> +	if (error_code & PF_USER) {
>>> +		fault = handle_speculative_fault(mm, address, flags);
>>> +
>>> +		/*
>>> +		 * We also check against VM_FAULT_ERROR because we have to
>>> +		 * raise a signal by calling later mm_fault_error() which
>>> +		 * requires the vma pointer to be set. So in that case,
>>> +		 * we fall through the normal path.
>>
>> Cant mm_fault_error() be called inside handle_speculative_fault() ?
>> Falling through the normal page fault path again just to raise a
>> signal seems overkill. Looking into mm_fault_error(), it seems they
>> are different for x86 and powerpc.
>>
>> X86:
>>
>> mm_fault_error(struct pt_regs *regs, unsigned long error_code,
>>                unsigned long address, struct vm_area_struct *vma,
>>                unsigned int fault)
>>
>> powerpc:
>>
>> mm_fault_error(struct pt_regs *regs, unsigned long addr, int fault)
>>
>> Even in case of X86, I guess we would have reference to the faulting
>> VMA (after the SRCU search) which can be used to call this function
>> directly.
> 
> Yes I think this is doable in the case of x86.

Indeed this is not doable as the vma pointer is not returned by
handle_speculative_fault() and this is not possible to return it because
once srcu_read_unlock() is called, the pointer is no more safe.
diff mbox

Patch

diff --git a/arch/x86/include/asm/pgtable_types.h b/arch/x86/include/asm/pgtable_types.h
index bf9638e1ee42..4fd2693a037e 100644
--- a/arch/x86/include/asm/pgtable_types.h
+++ b/arch/x86/include/asm/pgtable_types.h
@@ -234,6 +234,13 @@  enum page_cache_mode {
 #define PGD_IDENT_ATTR	 0x001		/* PRESENT (no other attributes) */
 #endif
 
+/*
+ * Advertise that we call the Speculative Page Fault handler.
+ */
+#ifdef CONFIG_X86_64
+#define __HAVE_ARCH_CALL_SPF
+#endif
+
 #ifdef CONFIG_X86_32
 # include <asm/pgtable_32_types.h>
 #else
diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c
index 2a1fa10c6a98..4c070b9a4362 100644
--- a/arch/x86/mm/fault.c
+++ b/arch/x86/mm/fault.c
@@ -1365,6 +1365,24 @@  __do_page_fault(struct pt_regs *regs, unsigned long error_code,
 	if (error_code & PF_INSTR)
 		flags |= FAULT_FLAG_INSTRUCTION;
 
+#ifdef __HAVE_ARCH_CALL_SPF
+	if (error_code & PF_USER) {
+		fault = handle_speculative_fault(mm, address, flags);
+
+		/*
+		 * We also check against VM_FAULT_ERROR because we have to
+		 * raise a signal by calling later mm_fault_error() which
+		 * requires the vma pointer to be set. So in that case,
+		 * we fall through the normal path.
+		 */
+		if (!(fault & VM_FAULT_RETRY || fault & VM_FAULT_ERROR)) {
+			perf_sw_event(PERF_COUNT_SW_SPF_DONE, 1,
+				      regs, address);
+			goto done;
+		}
+	}
+#endif /* __HAVE_ARCH_CALL_SPF */
+
 	/*
 	 * When running in the kernel we expect faults to occur only to
 	 * addresses in user space.  All other faults represent errors in
@@ -1474,6 +1492,7 @@  __do_page_fault(struct pt_regs *regs, unsigned long error_code,
 		return;
 	}
 
+done:
 	/*
 	 * Major/minor page fault accounting. If any of the events
 	 * returned VM_FAULT_MAJOR, we account it as a major fault.