diff mbox

[2/2] kvm: powerpc: set cache coherency only for kernel managed pages

Message ID 1374127456-9614-2-git-send-email-Bharat.Bhushan@freescale.com
State New, archived
Headers show

Commit Message

Bharat Bhushan July 18, 2013, 6:04 a.m. UTC
If there is a struct page for the requested mapping then it's
normal DDR and the mapping sets "M" bit (coherent, cacheable)
else this is treated as I/O and we set  "I + G"  (cache inhibited, guarded)

This helps setting proper TLB mapping for direct assigned device

Signed-off-by: Bharat Bhushan <bharat.bhushan@freescale.com>
---
 arch/powerpc/kvm/e500_mmu_host.c |   17 ++++++++++++-----
 1 files changed, 12 insertions(+), 5 deletions(-)

Comments

Tiejun Chen July 18, 2013, 6:26 a.m. UTC | #1
On 07/18/2013 02:04 PM, Bharat Bhushan wrote:
> If there is a struct page for the requested mapping then it's
> normal DDR and the mapping sets "M" bit (coherent, cacheable)
> else this is treated as I/O and we set  "I + G"  (cache inhibited, guarded)
>
> This helps setting proper TLB mapping for direct assigned device
>
> Signed-off-by: Bharat Bhushan <bharat.bhushan@freescale.com>
> ---
>   arch/powerpc/kvm/e500_mmu_host.c |   17 ++++++++++++-----
>   1 files changed, 12 insertions(+), 5 deletions(-)
>
> diff --git a/arch/powerpc/kvm/e500_mmu_host.c b/arch/powerpc/kvm/e500_mmu_host.c
> index 1c6a9d7..089c227 100644
> --- a/arch/powerpc/kvm/e500_mmu_host.c
> +++ b/arch/powerpc/kvm/e500_mmu_host.c
> @@ -64,13 +64,20 @@ static inline u32 e500_shadow_mas3_attrib(u32 mas3, int usermode)
>   	return mas3;
>   }
>
> -static inline u32 e500_shadow_mas2_attrib(u32 mas2, int usermode)
> +static inline u32 e500_shadow_mas2_attrib(u32 mas2, pfn_t pfn)
>   {
> +	u32 mas2_attr;
> +
> +	mas2_attr = mas2 & MAS2_ATTRIB_MASK;
> +
> +	if (!pfn_valid(pfn)) {

Why not directly use kvm_is_mmio_pfn()?

Tiejun

> +		mas2_attr |= MAS2_I | MAS2_G;
> +	} else {
>   #ifdef CONFIG_SMP
> -	return (mas2 & MAS2_ATTRIB_MASK) | MAS2_M;
> -#else
> -	return mas2 & MAS2_ATTRIB_MASK;
> +		mas2_attr |= MAS2_M;
>   #endif
> +	}
> +	return mas2_attr;
>   }
>
>   /*
> @@ -313,7 +320,7 @@ static void kvmppc_e500_setup_stlbe(
>   	/* Force IPROT=0 for all guest mappings. */
>   	stlbe->mas1 = MAS1_TSIZE(tsize) | get_tlb_sts(gtlbe) | MAS1_VALID;
>   	stlbe->mas2 = (gvaddr & MAS2_EPN) |
> -		      e500_shadow_mas2_attrib(gtlbe->mas2, pr);
> +		      e500_shadow_mas2_attrib(gtlbe->mas2, pfn);
>   	stlbe->mas7_3 = ((u64)pfn << PAGE_SHIFT) |
>   			e500_shadow_mas3_attrib(gtlbe->mas7_3, pr);
>
>

--
To unsubscribe from this list: send the line "unsubscribe kvm-ppc" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Tiejun Chen July 18, 2013, 7:31 a.m. UTC | #2
On 07/18/2013 03:12 PM, Bhushan Bharat-R65777 wrote:
>
>
>> -----Original Message-----
>> From: "“tiejun.chen”" [mailto:tiejun.chen@windriver.com]
>> Sent: Thursday, July 18, 2013 11:56 AM
>> To: Bhushan Bharat-R65777
>> Cc: kvm-ppc@vger.kernel.org; kvm@vger.kernel.org; agraf@suse.de; Wood Scott-
>> B07421; Bhushan Bharat-R65777
>> Subject: Re: [PATCH 2/2] kvm: powerpc: set cache coherency only for kernel
>> managed pages
>>
>> On 07/18/2013 02:04 PM, Bharat Bhushan wrote:
>>> If there is a struct page for the requested mapping then it's normal
>>> DDR and the mapping sets "M" bit (coherent, cacheable) else this is
>>> treated as I/O and we set  "I + G"  (cache inhibited, guarded)
>>>
>>> This helps setting proper TLB mapping for direct assigned device
>>>
>>> Signed-off-by: Bharat Bhushan <bharat.bhushan@freescale.com>
>>> ---
>>>    arch/powerpc/kvm/e500_mmu_host.c |   17 ++++++++++++-----
>>>    1 files changed, 12 insertions(+), 5 deletions(-)
>>>
>>> diff --git a/arch/powerpc/kvm/e500_mmu_host.c
>>> b/arch/powerpc/kvm/e500_mmu_host.c
>>> index 1c6a9d7..089c227 100644
>>> --- a/arch/powerpc/kvm/e500_mmu_host.c
>>> +++ b/arch/powerpc/kvm/e500_mmu_host.c
>>> @@ -64,13 +64,20 @@ static inline u32 e500_shadow_mas3_attrib(u32 mas3, int
>> usermode)
>>>    	return mas3;
>>>    }
>>>
>>> -static inline u32 e500_shadow_mas2_attrib(u32 mas2, int usermode)
>>> +static inline u32 e500_shadow_mas2_attrib(u32 mas2, pfn_t pfn)
>>>    {
>>> +	u32 mas2_attr;
>>> +
>>> +	mas2_attr = mas2 & MAS2_ATTRIB_MASK;
>>> +
>>> +	if (!pfn_valid(pfn)) {
>>
>> Why not directly use kvm_is_mmio_pfn()?
>
> What I understand from this function (someone can correct me) is that it returns "false" when the page is managed by kernel and is not marked as RESERVED (for some reason). For us it does not matter whether the page is reserved or not, if it is kernel visible page then it is DDR.
>

I think you are setting I|G by addressing all mmio pages, right? If so,

     KVM: direct mmio pfn check

     Userspace may specify memory slots that are backed by mmio pages rather than
     normal RAM.  In some cases it is not enough to identify these mmio pages
     by pfn_valid().  This patch adds checking the PageReserved as well.

Tiejun

> -Bharat
>
>>
>> Tiejun
>>
>>> +		mas2_attr |= MAS2_I | MAS2_G;
>>> +	} else {
>>>    #ifdef CONFIG_SMP
>>> -	return (mas2 & MAS2_ATTRIB_MASK) | MAS2_M;
>>> -#else
>>> -	return mas2 & MAS2_ATTRIB_MASK;
>>> +		mas2_attr |= MAS2_M;
>>>    #endif
>>> +	}
>>> +	return mas2_attr;
>>>    }
>>>
>>>    /*
>>> @@ -313,7 +320,7 @@ static void kvmppc_e500_setup_stlbe(
>>>    	/* Force IPROT=0 for all guest mappings. */
>>>    	stlbe->mas1 = MAS1_TSIZE(tsize) | get_tlb_sts(gtlbe) | MAS1_VALID;
>>>    	stlbe->mas2 = (gvaddr & MAS2_EPN) |
>>> -		      e500_shadow_mas2_attrib(gtlbe->mas2, pr);
>>> +		      e500_shadow_mas2_attrib(gtlbe->mas2, pfn);
>>>    	stlbe->mas7_3 = ((u64)pfn << PAGE_SHIFT) |
>>>    			e500_shadow_mas3_attrib(gtlbe->mas7_3, pr);
>>>
>>>
>>
>

--
To unsubscribe from this list: send the line "unsubscribe kvm-ppc" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Tiejun Chen July 18, 2013, 8:21 a.m. UTC | #3
On 07/18/2013 04:08 PM, Bhushan Bharat-R65777 wrote:
>
>
>> -----Original Message-----
>> From: kvm-ppc-owner@vger.kernel.org [mailto:kvm-ppc-owner@vger.kernel.org] On
>> Behalf Of "“tiejun.chen”"
>> Sent: Thursday, July 18, 2013 1:01 PM
>> To: Bhushan Bharat-R65777
>> Cc: kvm-ppc@vger.kernel.org; kvm@vger.kernel.org; agraf@suse.de; Wood Scott-
>> B07421
>> Subject: Re: [PATCH 2/2] kvm: powerpc: set cache coherency only for kernel
>> managed pages
>>
>> On 07/18/2013 03:12 PM, Bhushan Bharat-R65777 wrote:
>>>
>>>
>>>> -----Original Message-----
>>>> From: "“tiejun.chen”" [mailto:tiejun.chen@windriver.com]
>>>> Sent: Thursday, July 18, 2013 11:56 AM
>>>> To: Bhushan Bharat-R65777
>>>> Cc: kvm-ppc@vger.kernel.org; kvm@vger.kernel.org; agraf@suse.de; Wood
>>>> Scott- B07421; Bhushan Bharat-R65777
>>>> Subject: Re: [PATCH 2/2] kvm: powerpc: set cache coherency only for
>>>> kernel managed pages
>>>>
>>>> On 07/18/2013 02:04 PM, Bharat Bhushan wrote:
>>>>> If there is a struct page for the requested mapping then it's normal
>>>>> DDR and the mapping sets "M" bit (coherent, cacheable) else this is
>>>>> treated as I/O and we set  "I + G"  (cache inhibited, guarded)
>>>>>
>>>>> This helps setting proper TLB mapping for direct assigned device
>>>>>
>>>>> Signed-off-by: Bharat Bhushan <bharat.bhushan@freescale.com>
>>>>> ---
>>>>>     arch/powerpc/kvm/e500_mmu_host.c |   17 ++++++++++++-----
>>>>>     1 files changed, 12 insertions(+), 5 deletions(-)
>>>>>
>>>>> diff --git a/arch/powerpc/kvm/e500_mmu_host.c
>>>>> b/arch/powerpc/kvm/e500_mmu_host.c
>>>>> index 1c6a9d7..089c227 100644
>>>>> --- a/arch/powerpc/kvm/e500_mmu_host.c
>>>>> +++ b/arch/powerpc/kvm/e500_mmu_host.c
>>>>> @@ -64,13 +64,20 @@ static inline u32 e500_shadow_mas3_attrib(u32
>>>>> mas3, int
>>>> usermode)
>>>>>     	return mas3;
>>>>>     }
>>>>>
>>>>> -static inline u32 e500_shadow_mas2_attrib(u32 mas2, int usermode)
>>>>> +static inline u32 e500_shadow_mas2_attrib(u32 mas2, pfn_t pfn)
>>>>>     {
>>>>> +	u32 mas2_attr;
>>>>> +
>>>>> +	mas2_attr = mas2 & MAS2_ATTRIB_MASK;
>>>>> +
>>>>> +	if (!pfn_valid(pfn)) {
>>>>
>>>> Why not directly use kvm_is_mmio_pfn()?
>>>
>>> What I understand from this function (someone can correct me) is that it
>> returns "false" when the page is managed by kernel and is not marked as RESERVED
>> (for some reason). For us it does not matter whether the page is reserved or
>> not, if it is kernel visible page then it is DDR.
>>>
>>
>> I think you are setting I|G by addressing all mmio pages, right? If so,
>>
>>       KVM: direct mmio pfn check
>>
>>       Userspace may specify memory slots that are backed by mmio pages rather
>> than
>>       normal RAM.  In some cases it is not enough to identify these mmio pages
>>       by pfn_valid().  This patch adds checking the PageReserved as well.
>
> Do you know what are those "some cases" and how checking PageReserved helps in those cases?

No, myself didn't see these actual cases in qemu,too. But this should be 
chronically persistent as I understand ;-)

Tiejun

>
> -Bharat
>
>>
>> Tiejun
>>
>>> -Bharat
>>>
>>>>
>>>> Tiejun
>>>>
>>>>> +		mas2_attr |= MAS2_I | MAS2_G;
>>>>> +	} else {
>>>>>     #ifdef CONFIG_SMP
>>>>> -	return (mas2 & MAS2_ATTRIB_MASK) | MAS2_M;
>>>>> -#else
>>>>> -	return mas2 & MAS2_ATTRIB_MASK;
>>>>> +		mas2_attr |= MAS2_M;
>>>>>     #endif
>>>>> +	}
>>>>> +	return mas2_attr;
>>>>>     }
>>>>>
>>>>>     /*
>>>>> @@ -313,7 +320,7 @@ static void kvmppc_e500_setup_stlbe(
>>>>>     	/* Force IPROT=0 for all guest mappings. */
>>>>>     	stlbe->mas1 = MAS1_TSIZE(tsize) | get_tlb_sts(gtlbe) | MAS1_VALID;
>>>>>     	stlbe->mas2 = (gvaddr & MAS2_EPN) |
>>>>> -		      e500_shadow_mas2_attrib(gtlbe->mas2, pr);
>>>>> +		      e500_shadow_mas2_attrib(gtlbe->mas2, pfn);
>>>>>     	stlbe->mas7_3 = ((u64)pfn << PAGE_SHIFT) |
>>>>>     			e500_shadow_mas3_attrib(gtlbe->mas7_3, pr);
>>>>>
>>>>>
>>>>
>>>
>>
>> --
>> To unsubscribe from this list: send the line "unsubscribe kvm-ppc" in the body
>> of a message to majordomo@vger.kernel.org More majordomo info at
>> http://vger.kernel.org/majordomo-info.html
>

--
To unsubscribe from this list: send the line "unsubscribe kvm-ppc" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Bharat Bhushan July 18, 2013, 8:22 a.m. UTC | #4
> -----Original Message-----

> From: "“tiejun.chen”" [mailto:tiejun.chen@windriver.com]

> Sent: Thursday, July 18, 2013 1:52 PM

> To: Bhushan Bharat-R65777

> Cc: kvm-ppc@vger.kernel.org; kvm@vger.kernel.org; agraf@suse.de; Wood Scott-

> B07421

> Subject: Re: [PATCH 2/2] kvm: powerpc: set cache coherency only for kernel

> managed pages

> 

> On 07/18/2013 04:08 PM, Bhushan Bharat-R65777 wrote:

> >

> >

> >> -----Original Message-----

> >> From: kvm-ppc-owner@vger.kernel.org

> >> [mailto:kvm-ppc-owner@vger.kernel.org] On Behalf Of "“tiejun.chen”"

> >> Sent: Thursday, July 18, 2013 1:01 PM

> >> To: Bhushan Bharat-R65777

> >> Cc: kvm-ppc@vger.kernel.org; kvm@vger.kernel.org; agraf@suse.de; Wood

> >> Scott-

> >> B07421

> >> Subject: Re: [PATCH 2/2] kvm: powerpc: set cache coherency only for

> >> kernel managed pages

> >>

> >> On 07/18/2013 03:12 PM, Bhushan Bharat-R65777 wrote:

> >>>

> >>>

> >>>> -----Original Message-----

> >>>> From: "“tiejun.chen”" [mailto:tiejun.chen@windriver.com]

> >>>> Sent: Thursday, July 18, 2013 11:56 AM

> >>>> To: Bhushan Bharat-R65777

> >>>> Cc: kvm-ppc@vger.kernel.org; kvm@vger.kernel.org; agraf@suse.de;

> >>>> Wood

> >>>> Scott- B07421; Bhushan Bharat-R65777

> >>>> Subject: Re: [PATCH 2/2] kvm: powerpc: set cache coherency only for

> >>>> kernel managed pages

> >>>>

> >>>> On 07/18/2013 02:04 PM, Bharat Bhushan wrote:

> >>>>> If there is a struct page for the requested mapping then it's

> >>>>> normal DDR and the mapping sets "M" bit (coherent, cacheable) else

> >>>>> this is treated as I/O and we set  "I + G"  (cache inhibited,

> >>>>> guarded)

> >>>>>

> >>>>> This helps setting proper TLB mapping for direct assigned device

> >>>>>

> >>>>> Signed-off-by: Bharat Bhushan <bharat.bhushan@freescale.com>

> >>>>> ---

> >>>>>     arch/powerpc/kvm/e500_mmu_host.c |   17 ++++++++++++-----

> >>>>>     1 files changed, 12 insertions(+), 5 deletions(-)

> >>>>>

> >>>>> diff --git a/arch/powerpc/kvm/e500_mmu_host.c

> >>>>> b/arch/powerpc/kvm/e500_mmu_host.c

> >>>>> index 1c6a9d7..089c227 100644

> >>>>> --- a/arch/powerpc/kvm/e500_mmu_host.c

> >>>>> +++ b/arch/powerpc/kvm/e500_mmu_host.c

> >>>>> @@ -64,13 +64,20 @@ static inline u32 e500_shadow_mas3_attrib(u32

> >>>>> mas3, int

> >>>> usermode)

> >>>>>     	return mas3;

> >>>>>     }

> >>>>>

> >>>>> -static inline u32 e500_shadow_mas2_attrib(u32 mas2, int usermode)

> >>>>> +static inline u32 e500_shadow_mas2_attrib(u32 mas2, pfn_t pfn)

> >>>>>     {

> >>>>> +	u32 mas2_attr;

> >>>>> +

> >>>>> +	mas2_attr = mas2 & MAS2_ATTRIB_MASK;

> >>>>> +

> >>>>> +	if (!pfn_valid(pfn)) {

> >>>>

> >>>> Why not directly use kvm_is_mmio_pfn()?

> >>>

> >>> What I understand from this function (someone can correct me) is

> >>> that it

> >> returns "false" when the page is managed by kernel and is not marked

> >> as RESERVED (for some reason). For us it does not matter whether the

> >> page is reserved or not, if it is kernel visible page then it is DDR.

> >>>

> >>

> >> I think you are setting I|G by addressing all mmio pages, right? If

> >> so,

> >>

> >>       KVM: direct mmio pfn check

> >>

> >>       Userspace may specify memory slots that are backed by mmio

> >> pages rather than

> >>       normal RAM.  In some cases it is not enough to identify these mmio

> pages

> >>       by pfn_valid().  This patch adds checking the PageReserved as well.

> >

> > Do you know what are those "some cases" and how checking PageReserved helps in

> those cases?

> 

> No, myself didn't see these actual cases in qemu,too. But this should be

> chronically persistent as I understand ;-)


Then I will wait till someone educate me :)

-Bharat

> 

> Tiejun

> 

> >

> > -Bharat

> >

> >>

> >> Tiejun

> >>

> >>> -Bharat

> >>>

> >>>>

> >>>> Tiejun

> >>>>

> >>>>> +		mas2_attr |= MAS2_I | MAS2_G;

> >>>>> +	} else {

> >>>>>     #ifdef CONFIG_SMP

> >>>>> -	return (mas2 & MAS2_ATTRIB_MASK) | MAS2_M;

> >>>>> -#else

> >>>>> -	return mas2 & MAS2_ATTRIB_MASK;

> >>>>> +		mas2_attr |= MAS2_M;

> >>>>>     #endif

> >>>>> +	}

> >>>>> +	return mas2_attr;

> >>>>>     }

> >>>>>

> >>>>>     /*

> >>>>> @@ -313,7 +320,7 @@ static void kvmppc_e500_setup_stlbe(

> >>>>>     	/* Force IPROT=0 for all guest mappings. */

> >>>>>     	stlbe->mas1 = MAS1_TSIZE(tsize) | get_tlb_sts(gtlbe) | MAS1_VALID;

> >>>>>     	stlbe->mas2 = (gvaddr & MAS2_EPN) |

> >>>>> -		      e500_shadow_mas2_attrib(gtlbe->mas2, pr);

> >>>>> +		      e500_shadow_mas2_attrib(gtlbe->mas2, pfn);

> >>>>>     	stlbe->mas7_3 = ((u64)pfn << PAGE_SHIFT) |

> >>>>>     			e500_shadow_mas3_attrib(gtlbe->mas7_3, pr);

> >>>>>

> >>>>>

> >>>>

> >>>

> >>

> >> --

> >> To unsubscribe from this list: send the line "unsubscribe kvm-ppc" in

> >> the body of a message to majordomo@vger.kernel.org More majordomo

> >> info at http://vger.kernel.org/majordomo-info.html

> >

>
Tiejun Chen July 18, 2013, 8:27 a.m. UTC | #5
On 07/18/2013 02:04 PM, Bharat Bhushan wrote:
> If there is a struct page for the requested mapping then it's
> normal DDR and the mapping sets "M" bit (coherent, cacheable)
> else this is treated as I/O and we set  "I + G"  (cache inhibited, guarded)
>
> This helps setting proper TLB mapping for direct assigned device
>
> Signed-off-by: Bharat Bhushan <bharat.bhushan@freescale.com>
> ---
>   arch/powerpc/kvm/e500_mmu_host.c |   17 ++++++++++++-----
>   1 files changed, 12 insertions(+), 5 deletions(-)
>
> diff --git a/arch/powerpc/kvm/e500_mmu_host.c b/arch/powerpc/kvm/e500_mmu_host.c
> index 1c6a9d7..089c227 100644
> --- a/arch/powerpc/kvm/e500_mmu_host.c
> +++ b/arch/powerpc/kvm/e500_mmu_host.c
> @@ -64,13 +64,20 @@ static inline u32 e500_shadow_mas3_attrib(u32 mas3, int usermode)
>   	return mas3;
>   }
>
> -static inline u32 e500_shadow_mas2_attrib(u32 mas2, int usermode)
> +static inline u32 e500_shadow_mas2_attrib(u32 mas2, pfn_t pfn)
>   {
> +	u32 mas2_attr;
> +
> +	mas2_attr = mas2 & MAS2_ATTRIB_MASK;
> +
> +	if (!pfn_valid(pfn)) {
> +		mas2_attr |= MAS2_I | MAS2_G;
> +	} else {
>   #ifdef CONFIG_SMP
> -	return (mas2 & MAS2_ATTRIB_MASK) | MAS2_M;
> -#else
> -	return mas2 & MAS2_ATTRIB_MASK;
> +		mas2_attr |= MAS2_M;
>   #endif
> +	}

Additionally, in UP case this little chunk of code is equivalent to

	if (1) {
		mas2_attr |= MAS2_I | MAS2_G;
	} else {
	}

So you'd better wrapper MAS2_m in advance like,

#ifdef CONFIG_SMP
#define M_IF_SMP        MAS2_M
#else
#define M_IF_SMP        0
#endif

Then	
	if (1)
		mas2_attr |= MAS2_I | MAS2_G;
	else
		mas2_attr |= M_IF_SMP;

Tiejun

> +	return mas2_attr;
>   }
>
>   /*
> @@ -313,7 +320,7 @@ static void kvmppc_e500_setup_stlbe(
>   	/* Force IPROT=0 for all guest mappings. */
>   	stlbe->mas1 = MAS1_TSIZE(tsize) | get_tlb_sts(gtlbe) | MAS1_VALID;
>   	stlbe->mas2 = (gvaddr & MAS2_EPN) |
> -		      e500_shadow_mas2_attrib(gtlbe->mas2, pr);
> +		      e500_shadow_mas2_attrib(gtlbe->mas2, pfn);
>   	stlbe->mas7_3 = ((u64)pfn << PAGE_SHIFT) |
>   			e500_shadow_mas3_attrib(gtlbe->mas7_3, pr);
>
>

--
To unsubscribe from this list: send the line "unsubscribe kvm-ppc" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Tiejun Chen July 18, 2013, 8:55 a.m. UTC | #6
On 07/18/2013 04:25 PM, Bhushan Bharat-R65777 wrote:
>
>
>> -----Original Message-----
>> From: Bhushan Bharat-R65777
>> Sent: Thursday, July 18, 2013 1:53 PM
>> To: '"“tiejun.chen”"'
>> Cc: kvm-ppc@vger.kernel.org; kvm@vger.kernel.org; agraf@suse.de; Wood Scott-
>> B07421
>> Subject: RE: [PATCH 2/2] kvm: powerpc: set cache coherency only for kernel
>> managed pages
>>
>>
>>
>>> -----Original Message-----
>>> From: "“tiejun.chen”" [mailto:tiejun.chen@windriver.com]
>>> Sent: Thursday, July 18, 2013 1:52 PM
>>> To: Bhushan Bharat-R65777
>>> Cc: kvm-ppc@vger.kernel.org; kvm@vger.kernel.org; agraf@suse.de; Wood
>>> Scott-
>>> B07421
>>> Subject: Re: [PATCH 2/2] kvm: powerpc: set cache coherency only for
>>> kernel managed pages
>>>
>>> On 07/18/2013 04:08 PM, Bhushan Bharat-R65777 wrote:
>>>>
>>>>
>>>>> -----Original Message-----
>>>>> From: kvm-ppc-owner@vger.kernel.org
>>>>> [mailto:kvm-ppc-owner@vger.kernel.org] On Behalf Of "“tiejun.chen”"
>>>>> Sent: Thursday, July 18, 2013 1:01 PM
>>>>> To: Bhushan Bharat-R65777
>>>>> Cc: kvm-ppc@vger.kernel.org; kvm@vger.kernel.org; agraf@suse.de;
>>>>> Wood
>>>>> Scott-
>>>>> B07421
>>>>> Subject: Re: [PATCH 2/2] kvm: powerpc: set cache coherency only for
>>>>> kernel managed pages
>>>>>
>>>>> On 07/18/2013 03:12 PM, Bhushan Bharat-R65777 wrote:
>>>>>>
>>>>>>
>>>>>>> -----Original Message-----
>>>>>>> From: "“tiejun.chen”" [mailto:tiejun.chen@windriver.com]
>>>>>>> Sent: Thursday, July 18, 2013 11:56 AM
>>>>>>> To: Bhushan Bharat-R65777
>>>>>>> Cc: kvm-ppc@vger.kernel.org; kvm@vger.kernel.org; agraf@suse.de;
>>>>>>> Wood
>>>>>>> Scott- B07421; Bhushan Bharat-R65777
>>>>>>> Subject: Re: [PATCH 2/2] kvm: powerpc: set cache coherency only
>>>>>>> for kernel managed pages
>>>>>>>
>>>>>>> On 07/18/2013 02:04 PM, Bharat Bhushan wrote:
>>>>>>>> If there is a struct page for the requested mapping then it's
>>>>>>>> normal DDR and the mapping sets "M" bit (coherent, cacheable)
>>>>>>>> else this is treated as I/O and we set  "I + G"  (cache
>>>>>>>> inhibited,
>>>>>>>> guarded)
>>>>>>>>
>>>>>>>> This helps setting proper TLB mapping for direct assigned device
>>>>>>>>
>>>>>>>> Signed-off-by: Bharat Bhushan <bharat.bhushan@freescale.com>
>>>>>>>> ---
>>>>>>>>      arch/powerpc/kvm/e500_mmu_host.c |   17 ++++++++++++-----
>>>>>>>>      1 files changed, 12 insertions(+), 5 deletions(-)
>>>>>>>>
>>>>>>>> diff --git a/arch/powerpc/kvm/e500_mmu_host.c
>>>>>>>> b/arch/powerpc/kvm/e500_mmu_host.c
>>>>>>>> index 1c6a9d7..089c227 100644
>>>>>>>> --- a/arch/powerpc/kvm/e500_mmu_host.c
>>>>>>>> +++ b/arch/powerpc/kvm/e500_mmu_host.c
>>>>>>>> @@ -64,13 +64,20 @@ static inline u32
>>>>>>>> e500_shadow_mas3_attrib(u32 mas3, int
>>>>>>> usermode)
>>>>>>>>      	return mas3;
>>>>>>>>      }
>>>>>>>>
>>>>>>>> -static inline u32 e500_shadow_mas2_attrib(u32 mas2, int
>>>>>>>> usermode)
>>>>>>>> +static inline u32 e500_shadow_mas2_attrib(u32 mas2, pfn_t pfn)
>>>>>>>>      {
>>>>>>>> +	u32 mas2_attr;
>>>>>>>> +
>>>>>>>> +	mas2_attr = mas2 & MAS2_ATTRIB_MASK;
>>>>>>>> +
>>>>>>>> +	if (!pfn_valid(pfn)) {
>>>>>>>
>>>>>>> Why not directly use kvm_is_mmio_pfn()?
>>>>>>
>>>>>> What I understand from this function (someone can correct me) is
>>>>>> that it
>>>>> returns "false" when the page is managed by kernel and is not
>>>>> marked as RESERVED (for some reason). For us it does not matter
>>>>> whether the page is reserved or not, if it is kernel visible page then it
>> is DDR.
>>>>>>
>>>>>
>>>>> I think you are setting I|G by addressing all mmio pages, right? If
>>>>> so,
>>>>>
>>>>>        KVM: direct mmio pfn check
>>>>>
>>>>>        Userspace may specify memory slots that are backed by mmio
>>>>> pages rather than
>>>>>        normal RAM.  In some cases it is not enough to identify these
>>>>> mmio
>>> pages
>>>>>        by pfn_valid().  This patch adds checking the PageReserved as well.
>>>>
>>>> Do you know what are those "some cases" and how checking
>>>> PageReserved helps in
>>> those cases?
>>>
>>> No, myself didn't see these actual cases in qemu,too. But this should
>>> be chronically persistent as I understand ;-)
>>
>> Then I will wait till someone educate me :)
>
> The reason is , kvm_is_mmio_pfn() function looks pretty heavy and I do not want to call this for all tlbwe operation unless it is necessary.

Furthermore, how to distinguish we're creating TLB entry for the device assigned 
directly to the GS?

I think its unnecessary to always check if that is mmio's pfn since we have more 
non direct assigned devices.

So maybe we can introduce another helper to fixup that TLB entry in instead of 
this path.

Tiejun

>
> -Bharat
>
>>>>>>>> +		mas2_attr |= MAS2_I | MAS2_G;
>>>>>>>> +	} else {
>>>>>>>>      #ifdef CONFIG_SMP
>>>>>>>> -	return (mas2 & MAS2_ATTRIB_MASK) | MAS2_M;
>>>>>>>> -#else
>>>>>>>> -	return mas2 & MAS2_ATTRIB_MASK;
>>>>>>>> +		mas2_attr |= MAS2_M;
>>>>>>>>      #endif
>>>>>>>> +	}
>>>>>>>> +	return mas2_attr;
>>>>>>>>      }
>>>>>>>>
>>>>>>>>      /*
>>>>>>>> @@ -313,7 +320,7 @@ static void kvmppc_e500_setup_stlbe(
>>>>>>>>      	/* Force IPROT=0 for all guest mappings. */
>>>>>>>>      	stlbe->mas1 = MAS1_TSIZE(tsize) | get_tlb_sts(gtlbe) | MAS1_VALID;
>>>>>>>>      	stlbe->mas2 = (gvaddr & MAS2_EPN) |
>>>>>>>> -		      e500_shadow_mas2_attrib(gtlbe->mas2, pr);
>>>>>>>> +		      e500_shadow_mas2_attrib(gtlbe->mas2, pfn);
>>>>>>>>      	stlbe->mas7_3 = ((u64)pfn << PAGE_SHIFT) |
>>>>>>>>      			e500_shadow_mas3_attrib(gtlbe->mas7_3, pr);
>>>>>>>>
>>>>>>>>
>>>>>>>
>>>>>>
>>>>>
>>>>> --
>>>>> To unsubscribe from this list: send the line "unsubscribe kvm-ppc"
>>>>> in the body of a message to majordomo@vger.kernel.org More
>>>>> majordomo info at http://vger.kernel.org/majordomo-info.html
>>>>
>>>
>

--
To unsubscribe from this list: send the line "unsubscribe kvm-ppc" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Alexander Graf July 18, 2013, 9:44 a.m. UTC | #7
On 18.07.2013, at 10:55, “tiejun.chen” wrote:

> On 07/18/2013 04:25 PM, Bhushan Bharat-R65777 wrote:
>> 
>> 
>>> -----Original Message-----
>>> From: Bhushan Bharat-R65777
>>> Sent: Thursday, July 18, 2013 1:53 PM
>>> To: '"“tiejun.chen”"'
>>> Cc: kvm-ppc@vger.kernel.org; kvm@vger.kernel.org; agraf@suse.de; Wood Scott-
>>> B07421
>>> Subject: RE: [PATCH 2/2] kvm: powerpc: set cache coherency only for kernel
>>> managed pages
>>> 
>>> 
>>> 
>>>> -----Original Message-----
>>>> From: "“tiejun.chen”" [mailto:tiejun.chen@windriver.com]
>>>> Sent: Thursday, July 18, 2013 1:52 PM
>>>> To: Bhushan Bharat-R65777
>>>> Cc: kvm-ppc@vger.kernel.org; kvm@vger.kernel.org; agraf@suse.de; Wood
>>>> Scott-
>>>> B07421
>>>> Subject: Re: [PATCH 2/2] kvm: powerpc: set cache coherency only for
>>>> kernel managed pages
>>>> 
>>>> On 07/18/2013 04:08 PM, Bhushan Bharat-R65777 wrote:
>>>>> 
>>>>> 
>>>>>> -----Original Message-----
>>>>>> From: kvm-ppc-owner@vger.kernel.org
>>>>>> [mailto:kvm-ppc-owner@vger.kernel.org] On Behalf Of "“tiejun.chen”"
>>>>>> Sent: Thursday, July 18, 2013 1:01 PM
>>>>>> To: Bhushan Bharat-R65777
>>>>>> Cc: kvm-ppc@vger.kernel.org; kvm@vger.kernel.org; agraf@suse.de;
>>>>>> Wood
>>>>>> Scott-
>>>>>> B07421
>>>>>> Subject: Re: [PATCH 2/2] kvm: powerpc: set cache coherency only for
>>>>>> kernel managed pages
>>>>>> 
>>>>>> On 07/18/2013 03:12 PM, Bhushan Bharat-R65777 wrote:
>>>>>>> 
>>>>>>> 
>>>>>>>> -----Original Message-----
>>>>>>>> From: "“tiejun.chen”" [mailto:tiejun.chen@windriver.com]
>>>>>>>> Sent: Thursday, July 18, 2013 11:56 AM
>>>>>>>> To: Bhushan Bharat-R65777
>>>>>>>> Cc: kvm-ppc@vger.kernel.org; kvm@vger.kernel.org; agraf@suse.de;
>>>>>>>> Wood
>>>>>>>> Scott- B07421; Bhushan Bharat-R65777
>>>>>>>> Subject: Re: [PATCH 2/2] kvm: powerpc: set cache coherency only
>>>>>>>> for kernel managed pages
>>>>>>>> 
>>>>>>>> On 07/18/2013 02:04 PM, Bharat Bhushan wrote:
>>>>>>>>> If there is a struct page for the requested mapping then it's
>>>>>>>>> normal DDR and the mapping sets "M" bit (coherent, cacheable)
>>>>>>>>> else this is treated as I/O and we set  "I + G"  (cache
>>>>>>>>> inhibited,
>>>>>>>>> guarded)
>>>>>>>>> 
>>>>>>>>> This helps setting proper TLB mapping for direct assigned device
>>>>>>>>> 
>>>>>>>>> Signed-off-by: Bharat Bhushan <bharat.bhushan@freescale.com>
>>>>>>>>> ---
>>>>>>>>>     arch/powerpc/kvm/e500_mmu_host.c |   17 ++++++++++++-----
>>>>>>>>>     1 files changed, 12 insertions(+), 5 deletions(-)
>>>>>>>>> 
>>>>>>>>> diff --git a/arch/powerpc/kvm/e500_mmu_host.c
>>>>>>>>> b/arch/powerpc/kvm/e500_mmu_host.c
>>>>>>>>> index 1c6a9d7..089c227 100644
>>>>>>>>> --- a/arch/powerpc/kvm/e500_mmu_host.c
>>>>>>>>> +++ b/arch/powerpc/kvm/e500_mmu_host.c
>>>>>>>>> @@ -64,13 +64,20 @@ static inline u32
>>>>>>>>> e500_shadow_mas3_attrib(u32 mas3, int
>>>>>>>> usermode)
>>>>>>>>>     	return mas3;
>>>>>>>>>     }
>>>>>>>>> 
>>>>>>>>> -static inline u32 e500_shadow_mas2_attrib(u32 mas2, int
>>>>>>>>> usermode)
>>>>>>>>> +static inline u32 e500_shadow_mas2_attrib(u32 mas2, pfn_t pfn)
>>>>>>>>>     {
>>>>>>>>> +	u32 mas2_attr;
>>>>>>>>> +
>>>>>>>>> +	mas2_attr = mas2 & MAS2_ATTRIB_MASK;
>>>>>>>>> +
>>>>>>>>> +	if (!pfn_valid(pfn)) {
>>>>>>>> 
>>>>>>>> Why not directly use kvm_is_mmio_pfn()?
>>>>>>> 
>>>>>>> What I understand from this function (someone can correct me) is
>>>>>>> that it
>>>>>> returns "false" when the page is managed by kernel and is not
>>>>>> marked as RESERVED (for some reason). For us it does not matter
>>>>>> whether the page is reserved or not, if it is kernel visible page then it
>>> is DDR.
>>>>>>> 
>>>>>> 
>>>>>> I think you are setting I|G by addressing all mmio pages, right? If
>>>>>> so,
>>>>>> 
>>>>>>       KVM: direct mmio pfn check
>>>>>> 
>>>>>>       Userspace may specify memory slots that are backed by mmio
>>>>>> pages rather than
>>>>>>       normal RAM.  In some cases it is not enough to identify these
>>>>>> mmio
>>>> pages
>>>>>>       by pfn_valid().  This patch adds checking the PageReserved as well.
>>>>> 
>>>>> Do you know what are those "some cases" and how checking
>>>>> PageReserved helps in
>>>> those cases?
>>>> 
>>>> No, myself didn't see these actual cases in qemu,too. But this should
>>>> be chronically persistent as I understand ;-)
>>> 
>>> Then I will wait till someone educate me :)
>> 
>> The reason is , kvm_is_mmio_pfn() function looks pretty heavy and I do not want to call this for all tlbwe operation unless it is necessary.
> 
> Furthermore, how to distinguish we're creating TLB entry for the device assigned directly to the GS?

Because other devices wouldn't be available to the guest through memory slots.

> I think its unnecessary to always check if that is mmio's pfn since we have more non direct assigned devices.

I'm not sure I understand. The shadow TLB code only knows "here is a host virtual address". It needs to figure out whether the host physical address behind that is RAM (can access with cache enabled) or not (has to disable cache)

> So maybe we can introduce another helper to fixup that TLB entry in instead of this path.

This path does fix up the shadow (host) TLB entry :).


Alex

--
To unsubscribe from this list: send the line "unsubscribe kvm-ppc" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Alexander Graf July 18, 2013, 9:48 a.m. UTC | #8
On 18.07.2013, at 10:25, Bhushan Bharat-R65777 wrote:

> 
> 
>> -----Original Message-----
>> From: Bhushan Bharat-R65777
>> Sent: Thursday, July 18, 2013 1:53 PM
>> To: '"“tiejun.chen”"'
>> Cc: kvm-ppc@vger.kernel.org; kvm@vger.kernel.org; agraf@suse.de; Wood Scott-
>> B07421
>> Subject: RE: [PATCH 2/2] kvm: powerpc: set cache coherency only for kernel
>> managed pages
>> 
>> 
>> 
>>> -----Original Message-----
>>> From: "“tiejun.chen”" [mailto:tiejun.chen@windriver.com]
>>> Sent: Thursday, July 18, 2013 1:52 PM
>>> To: Bhushan Bharat-R65777
>>> Cc: kvm-ppc@vger.kernel.org; kvm@vger.kernel.org; agraf@suse.de; Wood
>>> Scott-
>>> B07421
>>> Subject: Re: [PATCH 2/2] kvm: powerpc: set cache coherency only for
>>> kernel managed pages
>>> 
>>> On 07/18/2013 04:08 PM, Bhushan Bharat-R65777 wrote:
>>>> 
>>>> 
>>>>> -----Original Message-----
>>>>> From: kvm-ppc-owner@vger.kernel.org
>>>>> [mailto:kvm-ppc-owner@vger.kernel.org] On Behalf Of "“tiejun.chen”"
>>>>> Sent: Thursday, July 18, 2013 1:01 PM
>>>>> To: Bhushan Bharat-R65777
>>>>> Cc: kvm-ppc@vger.kernel.org; kvm@vger.kernel.org; agraf@suse.de;
>>>>> Wood
>>>>> Scott-
>>>>> B07421
>>>>> Subject: Re: [PATCH 2/2] kvm: powerpc: set cache coherency only for
>>>>> kernel managed pages
>>>>> 
>>>>> On 07/18/2013 03:12 PM, Bhushan Bharat-R65777 wrote:
>>>>>> 
>>>>>> 
>>>>>>> -----Original Message-----
>>>>>>> From: "“tiejun.chen”" [mailto:tiejun.chen@windriver.com]
>>>>>>> Sent: Thursday, July 18, 2013 11:56 AM
>>>>>>> To: Bhushan Bharat-R65777
>>>>>>> Cc: kvm-ppc@vger.kernel.org; kvm@vger.kernel.org; agraf@suse.de;
>>>>>>> Wood
>>>>>>> Scott- B07421; Bhushan Bharat-R65777
>>>>>>> Subject: Re: [PATCH 2/2] kvm: powerpc: set cache coherency only
>>>>>>> for kernel managed pages
>>>>>>> 
>>>>>>> On 07/18/2013 02:04 PM, Bharat Bhushan wrote:
>>>>>>>> If there is a struct page for the requested mapping then it's
>>>>>>>> normal DDR and the mapping sets "M" bit (coherent, cacheable)
>>>>>>>> else this is treated as I/O and we set  "I + G"  (cache
>>>>>>>> inhibited,
>>>>>>>> guarded)
>>>>>>>> 
>>>>>>>> This helps setting proper TLB mapping for direct assigned device
>>>>>>>> 
>>>>>>>> Signed-off-by: Bharat Bhushan <bharat.bhushan@freescale.com>
>>>>>>>> ---
>>>>>>>>    arch/powerpc/kvm/e500_mmu_host.c |   17 ++++++++++++-----
>>>>>>>>    1 files changed, 12 insertions(+), 5 deletions(-)
>>>>>>>> 
>>>>>>>> diff --git a/arch/powerpc/kvm/e500_mmu_host.c
>>>>>>>> b/arch/powerpc/kvm/e500_mmu_host.c
>>>>>>>> index 1c6a9d7..089c227 100644
>>>>>>>> --- a/arch/powerpc/kvm/e500_mmu_host.c
>>>>>>>> +++ b/arch/powerpc/kvm/e500_mmu_host.c
>>>>>>>> @@ -64,13 +64,20 @@ static inline u32
>>>>>>>> e500_shadow_mas3_attrib(u32 mas3, int
>>>>>>> usermode)
>>>>>>>>    	return mas3;
>>>>>>>>    }
>>>>>>>> 
>>>>>>>> -static inline u32 e500_shadow_mas2_attrib(u32 mas2, int
>>>>>>>> usermode)
>>>>>>>> +static inline u32 e500_shadow_mas2_attrib(u32 mas2, pfn_t pfn)
>>>>>>>>    {
>>>>>>>> +	u32 mas2_attr;
>>>>>>>> +
>>>>>>>> +	mas2_attr = mas2 & MAS2_ATTRIB_MASK;
>>>>>>>> +
>>>>>>>> +	if (!pfn_valid(pfn)) {
>>>>>>> 
>>>>>>> Why not directly use kvm_is_mmio_pfn()?
>>>>>> 
>>>>>> What I understand from this function (someone can correct me) is
>>>>>> that it
>>>>> returns "false" when the page is managed by kernel and is not
>>>>> marked as RESERVED (for some reason). For us it does not matter
>>>>> whether the page is reserved or not, if it is kernel visible page then it
>> is DDR.
>>>>>> 
>>>>> 
>>>>> I think you are setting I|G by addressing all mmio pages, right? If
>>>>> so,
>>>>> 
>>>>>      KVM: direct mmio pfn check
>>>>> 
>>>>>      Userspace may specify memory slots that are backed by mmio
>>>>> pages rather than
>>>>>      normal RAM.  In some cases it is not enough to identify these
>>>>> mmio
>>> pages
>>>>>      by pfn_valid().  This patch adds checking the PageReserved as well.
>>>> 
>>>> Do you know what are those "some cases" and how checking
>>>> PageReserved helps in
>>> those cases?
>>> 
>>> No, myself didn't see these actual cases in qemu,too. But this should
>>> be chronically persistent as I understand ;-)
>> 
>> Then I will wait till someone educate me :)
> 
> The reason is , kvm_is_mmio_pfn() function looks pretty heavy and I do not want to call this for all tlbwe operation unless it is necessary.

It certainly does more than we need and potentially slows down the fast path (RAM mapping). The only thing it does on top of "if (pfn_valid())" is to check for pages that are declared reserved on the host. This happens in 2 cases:

  1) Non cache coherent DMA
  2) Memory hot remove

The non coherent DMA case would be interesting, as with the mechanism as it is in place in Linux today, we could potentially break normal guest operation if we don't take it into account. However, it's Kconfig guarded by:

        depends on 4xx || 8xx || E200 || PPC_MPC512x || GAMECUBE_COMMON
        default n if PPC_47x
        default y

so we never hit it with any core we care about ;).

Memory hot remove does not exist on e500 FWIW, so we don't have to worry about that one either.

Which means I think it's fine to slim this whole thing down to only check for pfn_valid(), as the code does in this patch. It would however be very useful to have a comment there that explains why it's safe to do so.



Alex

--
To unsubscribe from this list: send the line "unsubscribe kvm-ppc" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Bharat Bhushan July 18, 2013, 9:51 a.m. UTC | #9
> -----Original Message-----
> From: kvm-ppc-owner@vger.kernel.org [mailto:kvm-ppc-owner@vger.kernel.org] On
> Behalf Of Alexander Graf
> Sent: Thursday, July 18, 2013 3:19 PM
> To: Bhushan Bharat-R65777
> Cc: "“tiejun.chen”"; kvm-ppc@vger.kernel.org; kvm@vger.kernel.org; Wood Scott-
> B07421
> Subject: Re: [PATCH 2/2] kvm: powerpc: set cache coherency only for kernel
> managed pages
> 
> 
> On 18.07.2013, at 10:25, Bhushan Bharat-R65777 wrote:
> 
> >
> >
> >> -----Original Message-----
> >> From: Bhushan Bharat-R65777
> >> Sent: Thursday, July 18, 2013 1:53 PM
> >> To: '"“tiejun.chen”"'
> >> Cc: kvm-ppc@vger.kernel.org; kvm@vger.kernel.org; agraf@suse.de; Wood
> >> Scott-
> >> B07421
> >> Subject: RE: [PATCH 2/2] kvm: powerpc: set cache coherency only for
> >> kernel managed pages
> >>
> >>
> >>
> >>> -----Original Message-----
> >>> From: "“tiejun.chen”" [mailto:tiejun.chen@windriver.com]
> >>> Sent: Thursday, July 18, 2013 1:52 PM
> >>> To: Bhushan Bharat-R65777
> >>> Cc: kvm-ppc@vger.kernel.org; kvm@vger.kernel.org; agraf@suse.de;
> >>> Wood
> >>> Scott-
> >>> B07421
> >>> Subject: Re: [PATCH 2/2] kvm: powerpc: set cache coherency only for
> >>> kernel managed pages
> >>>
> >>> On 07/18/2013 04:08 PM, Bhushan Bharat-R65777 wrote:
> >>>>
> >>>>
> >>>>> -----Original Message-----
> >>>>> From: kvm-ppc-owner@vger.kernel.org
> >>>>> [mailto:kvm-ppc-owner@vger.kernel.org] On Behalf Of "“tiejun.chen”"
> >>>>> Sent: Thursday, July 18, 2013 1:01 PM
> >>>>> To: Bhushan Bharat-R65777
> >>>>> Cc: kvm-ppc@vger.kernel.org; kvm@vger.kernel.org; agraf@suse.de;
> >>>>> Wood
> >>>>> Scott-
> >>>>> B07421
> >>>>> Subject: Re: [PATCH 2/2] kvm: powerpc: set cache coherency only
> >>>>> for kernel managed pages
> >>>>>
> >>>>> On 07/18/2013 03:12 PM, Bhushan Bharat-R65777 wrote:
> >>>>>>
> >>>>>>
> >>>>>>> -----Original Message-----
> >>>>>>> From: "“tiejun.chen”" [mailto:tiejun.chen@windriver.com]
> >>>>>>> Sent: Thursday, July 18, 2013 11:56 AM
> >>>>>>> To: Bhushan Bharat-R65777
> >>>>>>> Cc: kvm-ppc@vger.kernel.org; kvm@vger.kernel.org; agraf@suse.de;
> >>>>>>> Wood
> >>>>>>> Scott- B07421; Bhushan Bharat-R65777
> >>>>>>> Subject: Re: [PATCH 2/2] kvm: powerpc: set cache coherency only
> >>>>>>> for kernel managed pages
> >>>>>>>
> >>>>>>> On 07/18/2013 02:04 PM, Bharat Bhushan wrote:
> >>>>>>>> If there is a struct page for the requested mapping then it's
> >>>>>>>> normal DDR and the mapping sets "M" bit (coherent, cacheable)
> >>>>>>>> else this is treated as I/O and we set  "I + G"  (cache
> >>>>>>>> inhibited,
> >>>>>>>> guarded)
> >>>>>>>>
> >>>>>>>> This helps setting proper TLB mapping for direct assigned
> >>>>>>>> device
> >>>>>>>>
> >>>>>>>> Signed-off-by: Bharat Bhushan <bharat.bhushan@freescale.com>
> >>>>>>>> ---
> >>>>>>>>    arch/powerpc/kvm/e500_mmu_host.c |   17 ++++++++++++-----
> >>>>>>>>    1 files changed, 12 insertions(+), 5 deletions(-)
> >>>>>>>>
> >>>>>>>> diff --git a/arch/powerpc/kvm/e500_mmu_host.c
> >>>>>>>> b/arch/powerpc/kvm/e500_mmu_host.c
> >>>>>>>> index 1c6a9d7..089c227 100644
> >>>>>>>> --- a/arch/powerpc/kvm/e500_mmu_host.c
> >>>>>>>> +++ b/arch/powerpc/kvm/e500_mmu_host.c
> >>>>>>>> @@ -64,13 +64,20 @@ static inline u32
> >>>>>>>> e500_shadow_mas3_attrib(u32 mas3, int
> >>>>>>> usermode)
> >>>>>>>>    	return mas3;
> >>>>>>>>    }
> >>>>>>>>
> >>>>>>>> -static inline u32 e500_shadow_mas2_attrib(u32 mas2, int
> >>>>>>>> usermode)
> >>>>>>>> +static inline u32 e500_shadow_mas2_attrib(u32 mas2, pfn_t pfn)
> >>>>>>>>    {
> >>>>>>>> +	u32 mas2_attr;
> >>>>>>>> +
> >>>>>>>> +	mas2_attr = mas2 & MAS2_ATTRIB_MASK;
> >>>>>>>> +
> >>>>>>>> +	if (!pfn_valid(pfn)) {
> >>>>>>>
> >>>>>>> Why not directly use kvm_is_mmio_pfn()?
> >>>>>>
> >>>>>> What I understand from this function (someone can correct me) is
> >>>>>> that it
> >>>>> returns "false" when the page is managed by kernel and is not
> >>>>> marked as RESERVED (for some reason). For us it does not matter
> >>>>> whether the page is reserved or not, if it is kernel visible page
> >>>>> then it
> >> is DDR.
> >>>>>>
> >>>>>
> >>>>> I think you are setting I|G by addressing all mmio pages, right?
> >>>>> If so,
> >>>>>
> >>>>>      KVM: direct mmio pfn check
> >>>>>
> >>>>>      Userspace may specify memory slots that are backed by mmio
> >>>>> pages rather than
> >>>>>      normal RAM.  In some cases it is not enough to identify these
> >>>>> mmio
> >>> pages
> >>>>>      by pfn_valid().  This patch adds checking the PageReserved as well.
> >>>>
> >>>> Do you know what are those "some cases" and how checking
> >>>> PageReserved helps in
> >>> those cases?
> >>>
> >>> No, myself didn't see these actual cases in qemu,too. But this
> >>> should be chronically persistent as I understand ;-)
> >>
> >> Then I will wait till someone educate me :)
> >
> > The reason is , kvm_is_mmio_pfn() function looks pretty heavy and I do not
> want to call this for all tlbwe operation unless it is necessary.
> 
> It certainly does more than we need and potentially slows down the fast path
> (RAM mapping). The only thing it does on top of "if (pfn_valid())" is to check
> for pages that are declared reserved on the host. This happens in 2 cases:
> 
>   1) Non cache coherent DMA
>   2) Memory hot remove
> 
> The non coherent DMA case would be interesting, as with the mechanism as it is
> in place in Linux today, we could potentially break normal guest operation if we
> don't take it into account. However, it's Kconfig guarded by:
> 
>         depends on 4xx || 8xx || E200 || PPC_MPC512x || GAMECUBE_COMMON
>         default n if PPC_47x
>         default y
> 
> so we never hit it with any core we care about ;).
> 
> Memory hot remove does not exist on e500 FWIW, so we don't have to worry about
> that one either.
> 
> Which means I think it's fine to slim this whole thing down to only check for
> pfn_valid(), as the code does in this patch. It would however be very useful to
> have a comment there that explains why it's safe to do so.

Big thanks for the details :-)

Will add a comment.

-Bharat

--
To unsubscribe from this list: send the line "unsubscribe kvm-ppc" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Tiejun Chen July 18, 2013, 9:56 a.m. UTC | #10
On 07/18/2013 05:44 PM, Alexander Graf wrote:
>
> On 18.07.2013, at 10:55, �tiejun.chen� wrote:
>
>> On 07/18/2013 04:25 PM, Bhushan Bharat-R65777 wrote:
>>>
>>>
>>>> -----Original Message-----
>>>> From: Bhushan Bharat-R65777
>>>> Sent: Thursday, July 18, 2013 1:53 PM
>>>> To: '"�tiejun.chen�"'
>>>> Cc: kvm-ppc@vger.kernel.org; kvm@vger.kernel.org; agraf@suse.de; Wood Scott-
>>>> B07421
>>>> Subject: RE: [PATCH 2/2] kvm: powerpc: set cache coherency only for kernel
>>>> managed pages
>>>>
>>>>
>>>>
>>>>> -----Original Message-----
>>>>> From: "�tiejun.chen�" [mailto:tiejun.chen@windriver.com]
>>>>> Sent: Thursday, July 18, 2013 1:52 PM
>>>>> To: Bhushan Bharat-R65777
>>>>> Cc: kvm-ppc@vger.kernel.org; kvm@vger.kernel.org; agraf@suse.de; Wood
>>>>> Scott-
>>>>> B07421
>>>>> Subject: Re: [PATCH 2/2] kvm: powerpc: set cache coherency only for
>>>>> kernel managed pages
>>>>>
>>>>> On 07/18/2013 04:08 PM, Bhushan Bharat-R65777 wrote:
>>>>>>
>>>>>>
>>>>>>> -----Original Message-----
>>>>>>> From: kvm-ppc-owner@vger.kernel.org
>>>>>>> [mailto:kvm-ppc-owner@vger.kernel.org] On Behalf Of "�tiejun.chen�"
>>>>>>> Sent: Thursday, July 18, 2013 1:01 PM
>>>>>>> To: Bhushan Bharat-R65777
>>>>>>> Cc: kvm-ppc@vger.kernel.org; kvm@vger.kernel.org; agraf@suse.de;
>>>>>>> Wood
>>>>>>> Scott-
>>>>>>> B07421
>>>>>>> Subject: Re: [PATCH 2/2] kvm: powerpc: set cache coherency only for
>>>>>>> kernel managed pages
>>>>>>>
>>>>>>> On 07/18/2013 03:12 PM, Bhushan Bharat-R65777 wrote:
>>>>>>>>
>>>>>>>>
>>>>>>>>> -----Original Message-----
>>>>>>>>> From: "�tiejun.chen�" [mailto:tiejun.chen@windriver.com]
>>>>>>>>> Sent: Thursday, July 18, 2013 11:56 AM
>>>>>>>>> To: Bhushan Bharat-R65777
>>>>>>>>> Cc: kvm-ppc@vger.kernel.org; kvm@vger.kernel.org; agraf@suse.de;
>>>>>>>>> Wood
>>>>>>>>> Scott- B07421; Bhushan Bharat-R65777
>>>>>>>>> Subject: Re: [PATCH 2/2] kvm: powerpc: set cache coherency only
>>>>>>>>> for kernel managed pages
>>>>>>>>>
>>>>>>>>> On 07/18/2013 02:04 PM, Bharat Bhushan wrote:
>>>>>>>>>> If there is a struct page for the requested mapping then it's
>>>>>>>>>> normal DDR and the mapping sets "M" bit (coherent, cacheable)
>>>>>>>>>> else this is treated as I/O and we set  "I + G"  (cache
>>>>>>>>>> inhibited,
>>>>>>>>>> guarded)
>>>>>>>>>>
>>>>>>>>>> This helps setting proper TLB mapping for direct assigned device
>>>>>>>>>>
>>>>>>>>>> Signed-off-by: Bharat Bhushan <bharat.bhushan@freescale.com>
>>>>>>>>>> ---
>>>>>>>>>>      arch/powerpc/kvm/e500_mmu_host.c |   17 ++++++++++++-----
>>>>>>>>>>      1 files changed, 12 insertions(+), 5 deletions(-)
>>>>>>>>>>
>>>>>>>>>> diff --git a/arch/powerpc/kvm/e500_mmu_host.c
>>>>>>>>>> b/arch/powerpc/kvm/e500_mmu_host.c
>>>>>>>>>> index 1c6a9d7..089c227 100644
>>>>>>>>>> --- a/arch/powerpc/kvm/e500_mmu_host.c
>>>>>>>>>> +++ b/arch/powerpc/kvm/e500_mmu_host.c
>>>>>>>>>> @@ -64,13 +64,20 @@ static inline u32
>>>>>>>>>> e500_shadow_mas3_attrib(u32 mas3, int
>>>>>>>>> usermode)
>>>>>>>>>>      	return mas3;
>>>>>>>>>>      }
>>>>>>>>>>
>>>>>>>>>> -static inline u32 e500_shadow_mas2_attrib(u32 mas2, int
>>>>>>>>>> usermode)
>>>>>>>>>> +static inline u32 e500_shadow_mas2_attrib(u32 mas2, pfn_t pfn)
>>>>>>>>>>      {
>>>>>>>>>> +	u32 mas2_attr;
>>>>>>>>>> +
>>>>>>>>>> +	mas2_attr = mas2 & MAS2_ATTRIB_MASK;
>>>>>>>>>> +
>>>>>>>>>> +	if (!pfn_valid(pfn)) {
>>>>>>>>>
>>>>>>>>> Why not directly use kvm_is_mmio_pfn()?
>>>>>>>>
>>>>>>>> What I understand from this function (someone can correct me) is
>>>>>>>> that it
>>>>>>> returns "false" when the page is managed by kernel and is not
>>>>>>> marked as RESERVED (for some reason). For us it does not matter
>>>>>>> whether the page is reserved or not, if it is kernel visible page then it
>>>> is DDR.
>>>>>>>>
>>>>>>>
>>>>>>> I think you are setting I|G by addressing all mmio pages, right? If
>>>>>>> so,
>>>>>>>
>>>>>>>        KVM: direct mmio pfn check
>>>>>>>
>>>>>>>        Userspace may specify memory slots that are backed by mmio
>>>>>>> pages rather than
>>>>>>>        normal RAM.  In some cases it is not enough to identify these
>>>>>>> mmio
>>>>> pages
>>>>>>>        by pfn_valid().  This patch adds checking the PageReserved as well.
>>>>>>
>>>>>> Do you know what are those "some cases" and how checking
>>>>>> PageReserved helps in
>>>>> those cases?
>>>>>
>>>>> No, myself didn't see these actual cases in qemu,too. But this should
>>>>> be chronically persistent as I understand ;-)
>>>>
>>>> Then I will wait till someone educate me :)
>>>
>>> The reason is , kvm_is_mmio_pfn() function looks pretty heavy and I do not want to call this for all tlbwe operation unless it is necessary.
>>
>> Furthermore, how to distinguish we're creating TLB entry for the device assigned directly to the GS?
>
> Because other devices wouldn't be available to the guest through memory slots.

Yes.

>
>> I think its unnecessary to always check if that is mmio's pfn since we have more non direct assigned devices.
>
> I'm not sure I understand. The shadow TLB code only knows "here is a host virtual address". It needs to figure out whether the host physical address behind that is RAM (can access with cache enabled) or not (has to disable cache)
>

Sorry, looks I'm misleading you :-P

>> So maybe we can introduce another helper to fixup that TLB entry in instead of this path.
>
> This path does fix up the shadow (host) TLB entry :).
>

I just mean whether we can have a separate path dedicated to those direct 
assigned devices, not go this common path :)

Tiejun
--
To unsubscribe from this list: send the line "unsubscribe kvm-ppc" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Alexander Graf July 18, 2013, 10 a.m. UTC | #11
On 18.07.2013, at 11:56, “tiejun.chen” wrote:

> On 07/18/2013 05:44 PM, Alexander Graf wrote:
>> 
>> On 18.07.2013, at 10:55, �tiejun.chen� wrote:
>> 
>>> On 07/18/2013 04:25 PM, Bhushan Bharat-R65777 wrote:
>>>> 
>>>> 
>>>>> -----Original Message-----
>>>>> From: Bhushan Bharat-R65777
>>>>> Sent: Thursday, July 18, 2013 1:53 PM
>>>>> To: '"�tiejun.chen�"'
>>>>> Cc: kvm-ppc@vger.kernel.org; kvm@vger.kernel.org; agraf@suse.de; Wood Scott-
>>>>> B07421
>>>>> Subject: RE: [PATCH 2/2] kvm: powerpc: set cache coherency only for kernel
>>>>> managed pages
>>>>> 
>>>>> 
>>>>> 
>>>>>> -----Original Message-----
>>>>>> From: "�tiejun.chen�" [mailto:tiejun.chen@windriver.com]
>>>>>> Sent: Thursday, July 18, 2013 1:52 PM
>>>>>> To: Bhushan Bharat-R65777
>>>>>> Cc: kvm-ppc@vger.kernel.org; kvm@vger.kernel.org; agraf@suse.de; Wood
>>>>>> Scott-
>>>>>> B07421
>>>>>> Subject: Re: [PATCH 2/2] kvm: powerpc: set cache coherency only for
>>>>>> kernel managed pages
>>>>>> 
>>>>>> On 07/18/2013 04:08 PM, Bhushan Bharat-R65777 wrote:
>>>>>>> 
>>>>>>> 
>>>>>>>> -----Original Message-----
>>>>>>>> From: kvm-ppc-owner@vger.kernel.org
>>>>>>>> [mailto:kvm-ppc-owner@vger.kernel.org] On Behalf Of "�tiejun.chen�"
>>>>>>>> Sent: Thursday, July 18, 2013 1:01 PM
>>>>>>>> To: Bhushan Bharat-R65777
>>>>>>>> Cc: kvm-ppc@vger.kernel.org; kvm@vger.kernel.org; agraf@suse.de;
>>>>>>>> Wood
>>>>>>>> Scott-
>>>>>>>> B07421
>>>>>>>> Subject: Re: [PATCH 2/2] kvm: powerpc: set cache coherency only for
>>>>>>>> kernel managed pages
>>>>>>>> 
>>>>>>>> On 07/18/2013 03:12 PM, Bhushan Bharat-R65777 wrote:
>>>>>>>>> 
>>>>>>>>> 
>>>>>>>>>> -----Original Message-----
>>>>>>>>>> From: "�tiejun.chen�" [mailto:tiejun.chen@windriver.com]
>>>>>>>>>> Sent: Thursday, July 18, 2013 11:56 AM
>>>>>>>>>> To: Bhushan Bharat-R65777
>>>>>>>>>> Cc: kvm-ppc@vger.kernel.org; kvm@vger.kernel.org; agraf@suse.de;
>>>>>>>>>> Wood
>>>>>>>>>> Scott- B07421; Bhushan Bharat-R65777
>>>>>>>>>> Subject: Re: [PATCH 2/2] kvm: powerpc: set cache coherency only
>>>>>>>>>> for kernel managed pages
>>>>>>>>>> 
>>>>>>>>>> On 07/18/2013 02:04 PM, Bharat Bhushan wrote:
>>>>>>>>>>> If there is a struct page for the requested mapping then it's
>>>>>>>>>>> normal DDR and the mapping sets "M" bit (coherent, cacheable)
>>>>>>>>>>> else this is treated as I/O and we set  "I + G"  (cache
>>>>>>>>>>> inhibited,
>>>>>>>>>>> guarded)
>>>>>>>>>>> 
>>>>>>>>>>> This helps setting proper TLB mapping for direct assigned device
>>>>>>>>>>> 
>>>>>>>>>>> Signed-off-by: Bharat Bhushan <bharat.bhushan@freescale.com>
>>>>>>>>>>> ---
>>>>>>>>>>>     arch/powerpc/kvm/e500_mmu_host.c |   17 ++++++++++++-----
>>>>>>>>>>>     1 files changed, 12 insertions(+), 5 deletions(-)
>>>>>>>>>>> 
>>>>>>>>>>> diff --git a/arch/powerpc/kvm/e500_mmu_host.c
>>>>>>>>>>> b/arch/powerpc/kvm/e500_mmu_host.c
>>>>>>>>>>> index 1c6a9d7..089c227 100644
>>>>>>>>>>> --- a/arch/powerpc/kvm/e500_mmu_host.c
>>>>>>>>>>> +++ b/arch/powerpc/kvm/e500_mmu_host.c
>>>>>>>>>>> @@ -64,13 +64,20 @@ static inline u32
>>>>>>>>>>> e500_shadow_mas3_attrib(u32 mas3, int
>>>>>>>>>> usermode)
>>>>>>>>>>>     	return mas3;
>>>>>>>>>>>     }
>>>>>>>>>>> 
>>>>>>>>>>> -static inline u32 e500_shadow_mas2_attrib(u32 mas2, int
>>>>>>>>>>> usermode)
>>>>>>>>>>> +static inline u32 e500_shadow_mas2_attrib(u32 mas2, pfn_t pfn)
>>>>>>>>>>>     {
>>>>>>>>>>> +	u32 mas2_attr;
>>>>>>>>>>> +
>>>>>>>>>>> +	mas2_attr = mas2 & MAS2_ATTRIB_MASK;
>>>>>>>>>>> +
>>>>>>>>>>> +	if (!pfn_valid(pfn)) {
>>>>>>>>>> 
>>>>>>>>>> Why not directly use kvm_is_mmio_pfn()?
>>>>>>>>> 
>>>>>>>>> What I understand from this function (someone can correct me) is
>>>>>>>>> that it
>>>>>>>> returns "false" when the page is managed by kernel and is not
>>>>>>>> marked as RESERVED (for some reason). For us it does not matter
>>>>>>>> whether the page is reserved or not, if it is kernel visible page then it
>>>>> is DDR.
>>>>>>>>> 
>>>>>>>> 
>>>>>>>> I think you are setting I|G by addressing all mmio pages, right? If
>>>>>>>> so,
>>>>>>>> 
>>>>>>>>       KVM: direct mmio pfn check
>>>>>>>> 
>>>>>>>>       Userspace may specify memory slots that are backed by mmio
>>>>>>>> pages rather than
>>>>>>>>       normal RAM.  In some cases it is not enough to identify these
>>>>>>>> mmio
>>>>>> pages
>>>>>>>>       by pfn_valid().  This patch adds checking the PageReserved as well.
>>>>>>> 
>>>>>>> Do you know what are those "some cases" and how checking
>>>>>>> PageReserved helps in
>>>>>> those cases?
>>>>>> 
>>>>>> No, myself didn't see these actual cases in qemu,too. But this should
>>>>>> be chronically persistent as I understand ;-)
>>>>> 
>>>>> Then I will wait till someone educate me :)
>>>> 
>>>> The reason is , kvm_is_mmio_pfn() function looks pretty heavy and I do not want to call this for all tlbwe operation unless it is necessary.
>>> 
>>> Furthermore, how to distinguish we're creating TLB entry for the device assigned directly to the GS?
>> 
>> Because other devices wouldn't be available to the guest through memory slots.
> 
> Yes.
> 
>> 
>>> I think its unnecessary to always check if that is mmio's pfn since we have more non direct assigned devices.
>> 
>> I'm not sure I understand. The shadow TLB code only knows "here is a host virtual address". It needs to figure out whether the host physical address behind that is RAM (can access with cache enabled) or not (has to disable cache)
>> 
> 
> Sorry, looks I'm misleading you :-P
> 
>>> So maybe we can introduce another helper to fixup that TLB entry in instead of this path.
>> 
>> This path does fix up the shadow (host) TLB entry :).
>> 
> 
> I just mean whether we can have a separate path dedicated to those direct assigned devices, not go this common path :)

I don't think it's possible to have a separate path without a certain level of trust. In the current flow we don't trust anyone. We just check every translated page whether we should enable caching or not.

We could take that information from 2 other side though:

  1) Memory Slot
  2) Guest TLB Flags

If we take it from the memory slot we would have to trust QEMU (or any other user space) to give us the right hints. Malicious user space could set invalid flags. Also we'd have to add logic to track this - which doesn't exist today.

If we take it from the guest we have to trust the guest. Malicious guests could set invalid flags.

Now why is setting invalid flags a problem? If I understand Scott correctly, it can break the host if you access certain host devices with caching enabled. But to be sure I'd say we ask him directly :).

Either way, not trusting anyone is definitely the safer choice.


Alex

--
To unsubscribe from this list: send the line "unsubscribe kvm-ppc" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Tiejun Chen July 18, 2013, 10:08 a.m. UTC | #12
On 07/18/2013 05:48 PM, Alexander Graf wrote:
>
> On 18.07.2013, at 10:25, Bhushan Bharat-R65777 wrote:
>
>>
>>
>>> -----Original Message-----
>>> From: Bhushan Bharat-R65777
>>> Sent: Thursday, July 18, 2013 1:53 PM
>>> To: '"�tiejun.chen�"'
>>> Cc: kvm-ppc@vger.kernel.org; kvm@vger.kernel.org; agraf@suse.de; Wood Scott-
>>> B07421
>>> Subject: RE: [PATCH 2/2] kvm: powerpc: set cache coherency only for kernel
>>> managed pages
>>>
>>>
>>>
>>>> -----Original Message-----
>>>> From: "�tiejun.chen�" [mailto:tiejun.chen@windriver.com]
>>>> Sent: Thursday, July 18, 2013 1:52 PM
>>>> To: Bhushan Bharat-R65777
>>>> Cc: kvm-ppc@vger.kernel.org; kvm@vger.kernel.org; agraf@suse.de; Wood
>>>> Scott-
>>>> B07421
>>>> Subject: Re: [PATCH 2/2] kvm: powerpc: set cache coherency only for
>>>> kernel managed pages
>>>>
>>>> On 07/18/2013 04:08 PM, Bhushan Bharat-R65777 wrote:
>>>>>
>>>>>
>>>>>> -----Original Message-----
>>>>>> From: kvm-ppc-owner@vger.kernel.org
>>>>>> [mailto:kvm-ppc-owner@vger.kernel.org] On Behalf Of "�tiejun.chen�"
>>>>>> Sent: Thursday, July 18, 2013 1:01 PM
>>>>>> To: Bhushan Bharat-R65777
>>>>>> Cc: kvm-ppc@vger.kernel.org; kvm@vger.kernel.org; agraf@suse.de;
>>>>>> Wood
>>>>>> Scott-
>>>>>> B07421
>>>>>> Subject: Re: [PATCH 2/2] kvm: powerpc: set cache coherency only for
>>>>>> kernel managed pages
>>>>>>
>>>>>> On 07/18/2013 03:12 PM, Bhushan Bharat-R65777 wrote:
>>>>>>>
>>>>>>>
>>>>>>>> -----Original Message-----
>>>>>>>> From: "�tiejun.chen�" [mailto:tiejun.chen@windriver.com]
>>>>>>>> Sent: Thursday, July 18, 2013 11:56 AM
>>>>>>>> To: Bhushan Bharat-R65777
>>>>>>>> Cc: kvm-ppc@vger.kernel.org; kvm@vger.kernel.org; agraf@suse.de;
>>>>>>>> Wood
>>>>>>>> Scott- B07421; Bhushan Bharat-R65777
>>>>>>>> Subject: Re: [PATCH 2/2] kvm: powerpc: set cache coherency only
>>>>>>>> for kernel managed pages
>>>>>>>>
>>>>>>>> On 07/18/2013 02:04 PM, Bharat Bhushan wrote:
>>>>>>>>> If there is a struct page for the requested mapping then it's
>>>>>>>>> normal DDR and the mapping sets "M" bit (coherent, cacheable)
>>>>>>>>> else this is treated as I/O and we set  "I + G"  (cache
>>>>>>>>> inhibited,
>>>>>>>>> guarded)
>>>>>>>>>
>>>>>>>>> This helps setting proper TLB mapping for direct assigned device
>>>>>>>>>
>>>>>>>>> Signed-off-by: Bharat Bhushan <bharat.bhushan@freescale.com>
>>>>>>>>> ---
>>>>>>>>>     arch/powerpc/kvm/e500_mmu_host.c |   17 ++++++++++++-----
>>>>>>>>>     1 files changed, 12 insertions(+), 5 deletions(-)
>>>>>>>>>
>>>>>>>>> diff --git a/arch/powerpc/kvm/e500_mmu_host.c
>>>>>>>>> b/arch/powerpc/kvm/e500_mmu_host.c
>>>>>>>>> index 1c6a9d7..089c227 100644
>>>>>>>>> --- a/arch/powerpc/kvm/e500_mmu_host.c
>>>>>>>>> +++ b/arch/powerpc/kvm/e500_mmu_host.c
>>>>>>>>> @@ -64,13 +64,20 @@ static inline u32
>>>>>>>>> e500_shadow_mas3_attrib(u32 mas3, int
>>>>>>>> usermode)
>>>>>>>>>     	return mas3;
>>>>>>>>>     }
>>>>>>>>>
>>>>>>>>> -static inline u32 e500_shadow_mas2_attrib(u32 mas2, int
>>>>>>>>> usermode)
>>>>>>>>> +static inline u32 e500_shadow_mas2_attrib(u32 mas2, pfn_t pfn)
>>>>>>>>>     {
>>>>>>>>> +	u32 mas2_attr;
>>>>>>>>> +
>>>>>>>>> +	mas2_attr = mas2 & MAS2_ATTRIB_MASK;
>>>>>>>>> +
>>>>>>>>> +	if (!pfn_valid(pfn)) {
>>>>>>>>
>>>>>>>> Why not directly use kvm_is_mmio_pfn()?
>>>>>>>
>>>>>>> What I understand from this function (someone can correct me) is
>>>>>>> that it
>>>>>> returns "false" when the page is managed by kernel and is not
>>>>>> marked as RESERVED (for some reason). For us it does not matter
>>>>>> whether the page is reserved or not, if it is kernel visible page then it
>>> is DDR.
>>>>>>>
>>>>>>
>>>>>> I think you are setting I|G by addressing all mmio pages, right? If
>>>>>> so,
>>>>>>
>>>>>>       KVM: direct mmio pfn check
>>>>>>
>>>>>>       Userspace may specify memory slots that are backed by mmio
>>>>>> pages rather than
>>>>>>       normal RAM.  In some cases it is not enough to identify these
>>>>>> mmio
>>>> pages
>>>>>>       by pfn_valid().  This patch adds checking the PageReserved as well.
>>>>>
>>>>> Do you know what are those "some cases" and how checking
>>>>> PageReserved helps in
>>>> those cases?
>>>>
>>>> No, myself didn't see these actual cases in qemu,too. But this should
>>>> be chronically persistent as I understand ;-)
>>>
>>> Then I will wait till someone educate me :)
>>
>> The reason is , kvm_is_mmio_pfn() function looks pretty heavy and I do not want to call this for all tlbwe operation unless it is necessary.
>
> It certainly does more than we need and potentially slows down the fast path (RAM mapping). The only thing it does on top of "if (pfn_valid())" is to check for pages that are declared reserved on the host. This happens in 2 cases:
>
>    1) Non cache coherent DMA
>    2) Memory hot remove
>
> The non coherent DMA case would be interesting, as with the mechanism as it is in place in Linux today, we could potentially break normal guest operation if we don't take it into account. However, it's Kconfig guarded by:
>
>          depends on 4xx || 8xx || E200 || PPC_MPC512x || GAMECUBE_COMMON
>          default n if PPC_47x
>          default y
>
> so we never hit it with any core we care about ;).
>
> Memory hot remove does not exist on e500 FWIW, so we don't have to worry about that one either.

Thanks for this good information :)

So why not limit those codes with CONFIG_MEMORY_HOTPLUG inside kvm_is_mmio_pfn() 
to make sure that check is only valid when that is really needed? This can 
decrease those unnecessary performance loss.

If I'm wrong please correct me :)

Tiejun
--
To unsubscribe from this list: send the line "unsubscribe kvm-ppc" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Alexander Graf July 18, 2013, 10:12 a.m. UTC | #13
On 18.07.2013, at 12:08, “tiejun.chen” wrote:

> On 07/18/2013 05:48 PM, Alexander Graf wrote:
>> 
>> On 18.07.2013, at 10:25, Bhushan Bharat-R65777 wrote:
>> 
>>> 
>>> 
>>>> -----Original Message-----
>>>> From: Bhushan Bharat-R65777
>>>> Sent: Thursday, July 18, 2013 1:53 PM
>>>> To: '"�tiejun.chen�"'
>>>> Cc: kvm-ppc@vger.kernel.org; kvm@vger.kernel.org; agraf@suse.de; Wood Scott-
>>>> B07421
>>>> Subject: RE: [PATCH 2/2] kvm: powerpc: set cache coherency only for kernel
>>>> managed pages
>>>> 
>>>> 
>>>> 
>>>>> -----Original Message-----
>>>>> From: "�tiejun.chen�" [mailto:tiejun.chen@windriver.com]
>>>>> Sent: Thursday, July 18, 2013 1:52 PM
>>>>> To: Bhushan Bharat-R65777
>>>>> Cc: kvm-ppc@vger.kernel.org; kvm@vger.kernel.org; agraf@suse.de; Wood
>>>>> Scott-
>>>>> B07421
>>>>> Subject: Re: [PATCH 2/2] kvm: powerpc: set cache coherency only for
>>>>> kernel managed pages
>>>>> 
>>>>> On 07/18/2013 04:08 PM, Bhushan Bharat-R65777 wrote:
>>>>>> 
>>>>>> 
>>>>>>> -----Original Message-----
>>>>>>> From: kvm-ppc-owner@vger.kernel.org
>>>>>>> [mailto:kvm-ppc-owner@vger.kernel.org] On Behalf Of "�tiejun.chen�"
>>>>>>> Sent: Thursday, July 18, 2013 1:01 PM
>>>>>>> To: Bhushan Bharat-R65777
>>>>>>> Cc: kvm-ppc@vger.kernel.org; kvm@vger.kernel.org; agraf@suse.de;
>>>>>>> Wood
>>>>>>> Scott-
>>>>>>> B07421
>>>>>>> Subject: Re: [PATCH 2/2] kvm: powerpc: set cache coherency only for
>>>>>>> kernel managed pages
>>>>>>> 
>>>>>>> On 07/18/2013 03:12 PM, Bhushan Bharat-R65777 wrote:
>>>>>>>> 
>>>>>>>> 
>>>>>>>>> -----Original Message-----
>>>>>>>>> From: "�tiejun.chen�" [mailto:tiejun.chen@windriver.com]
>>>>>>>>> Sent: Thursday, July 18, 2013 11:56 AM
>>>>>>>>> To: Bhushan Bharat-R65777
>>>>>>>>> Cc: kvm-ppc@vger.kernel.org; kvm@vger.kernel.org; agraf@suse.de;
>>>>>>>>> Wood
>>>>>>>>> Scott- B07421; Bhushan Bharat-R65777
>>>>>>>>> Subject: Re: [PATCH 2/2] kvm: powerpc: set cache coherency only
>>>>>>>>> for kernel managed pages
>>>>>>>>> 
>>>>>>>>> On 07/18/2013 02:04 PM, Bharat Bhushan wrote:
>>>>>>>>>> If there is a struct page for the requested mapping then it's
>>>>>>>>>> normal DDR and the mapping sets "M" bit (coherent, cacheable)
>>>>>>>>>> else this is treated as I/O and we set  "I + G"  (cache
>>>>>>>>>> inhibited,
>>>>>>>>>> guarded)
>>>>>>>>>> 
>>>>>>>>>> This helps setting proper TLB mapping for direct assigned device
>>>>>>>>>> 
>>>>>>>>>> Signed-off-by: Bharat Bhushan <bharat.bhushan@freescale.com>
>>>>>>>>>> ---
>>>>>>>>>>    arch/powerpc/kvm/e500_mmu_host.c |   17 ++++++++++++-----
>>>>>>>>>>    1 files changed, 12 insertions(+), 5 deletions(-)
>>>>>>>>>> 
>>>>>>>>>> diff --git a/arch/powerpc/kvm/e500_mmu_host.c
>>>>>>>>>> b/arch/powerpc/kvm/e500_mmu_host.c
>>>>>>>>>> index 1c6a9d7..089c227 100644
>>>>>>>>>> --- a/arch/powerpc/kvm/e500_mmu_host.c
>>>>>>>>>> +++ b/arch/powerpc/kvm/e500_mmu_host.c
>>>>>>>>>> @@ -64,13 +64,20 @@ static inline u32
>>>>>>>>>> e500_shadow_mas3_attrib(u32 mas3, int
>>>>>>>>> usermode)
>>>>>>>>>>    	return mas3;
>>>>>>>>>>    }
>>>>>>>>>> 
>>>>>>>>>> -static inline u32 e500_shadow_mas2_attrib(u32 mas2, int
>>>>>>>>>> usermode)
>>>>>>>>>> +static inline u32 e500_shadow_mas2_attrib(u32 mas2, pfn_t pfn)
>>>>>>>>>>    {
>>>>>>>>>> +	u32 mas2_attr;
>>>>>>>>>> +
>>>>>>>>>> +	mas2_attr = mas2 & MAS2_ATTRIB_MASK;
>>>>>>>>>> +
>>>>>>>>>> +	if (!pfn_valid(pfn)) {
>>>>>>>>> 
>>>>>>>>> Why not directly use kvm_is_mmio_pfn()?
>>>>>>>> 
>>>>>>>> What I understand from this function (someone can correct me) is
>>>>>>>> that it
>>>>>>> returns "false" when the page is managed by kernel and is not
>>>>>>> marked as RESERVED (for some reason). For us it does not matter
>>>>>>> whether the page is reserved or not, if it is kernel visible page then it
>>>> is DDR.
>>>>>>>> 
>>>>>>> 
>>>>>>> I think you are setting I|G by addressing all mmio pages, right? If
>>>>>>> so,
>>>>>>> 
>>>>>>>      KVM: direct mmio pfn check
>>>>>>> 
>>>>>>>      Userspace may specify memory slots that are backed by mmio
>>>>>>> pages rather than
>>>>>>>      normal RAM.  In some cases it is not enough to identify these
>>>>>>> mmio
>>>>> pages
>>>>>>>      by pfn_valid().  This patch adds checking the PageReserved as well.
>>>>>> 
>>>>>> Do you know what are those "some cases" and how checking
>>>>>> PageReserved helps in
>>>>> those cases?
>>>>> 
>>>>> No, myself didn't see these actual cases in qemu,too. But this should
>>>>> be chronically persistent as I understand ;-)
>>>> 
>>>> Then I will wait till someone educate me :)
>>> 
>>> The reason is , kvm_is_mmio_pfn() function looks pretty heavy and I do not want to call this for all tlbwe operation unless it is necessary.
>> 
>> It certainly does more than we need and potentially slows down the fast path (RAM mapping). The only thing it does on top of "if (pfn_valid())" is to check for pages that are declared reserved on the host. This happens in 2 cases:
>> 
>>   1) Non cache coherent DMA
>>   2) Memory hot remove
>> 
>> The non coherent DMA case would be interesting, as with the mechanism as it is in place in Linux today, we could potentially break normal guest operation if we don't take it into account. However, it's Kconfig guarded by:
>> 
>>         depends on 4xx || 8xx || E200 || PPC_MPC512x || GAMECUBE_COMMON
>>         default n if PPC_47x
>>         default y
>> 
>> so we never hit it with any core we care about ;).
>> 
>> Memory hot remove does not exist on e500 FWIW, so we don't have to worry about that one either.
> 
> Thanks for this good information :)
> 
> So why not limit those codes with CONFIG_MEMORY_HOTPLUG inside kvm_is_mmio_pfn() to make sure that check is only valid when that is really needed? This can decrease those unnecessary performance loss.
> 
> If I'm wrong please correct me :)

You're perfectly right, but this is generic KVM code. So it gets run across all architectures. What if someone has the great idea to add a new case here for x86, but doesn't tell us? In that case we potentially break x86.

I'd rather not like to break x86 :).

However, it'd be very interesting to see a benchmark with this change. Do you think you could just rip out the whole reserved check and run a few benchmarks and show us the results?


Alex

--
To unsubscribe from this list: send the line "unsubscribe kvm-ppc" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Tiejun Chen July 18, 2013, 10:14 a.m. UTC | #14
On 07/18/2013 06:00 PM, Alexander Graf wrote:
>
> On 18.07.2013, at 11:56, “tiejun.chen” wrote:
>
>> On 07/18/2013 05:44 PM, Alexander Graf wrote:
>>>
>>> On 18.07.2013, at 10:55, �tiejun.chen� wrote:
>>>
>>>> On 07/18/2013 04:25 PM, Bhushan Bharat-R65777 wrote:
>>>>>
>>>>>
>>>>>> -----Original Message-----
>>>>>> From: Bhushan Bharat-R65777
>>>>>> Sent: Thursday, July 18, 2013 1:53 PM
>>>>>> To: '"�tiejun.chen�"'
>>>>>> Cc: kvm-ppc@vger.kernel.org; kvm@vger.kernel.org; agraf@suse.de; Wood Scott-
>>>>>> B07421
>>>>>> Subject: RE: [PATCH 2/2] kvm: powerpc: set cache coherency only for kernel
>>>>>> managed pages
>>>>>>
>>>>>>
>>>>>>
>>>>>>> -----Original Message-----
>>>>>>> From: "�tiejun.chen�" [mailto:tiejun.chen@windriver.com]
>>>>>>> Sent: Thursday, July 18, 2013 1:52 PM
>>>>>>> To: Bhushan Bharat-R65777
>>>>>>> Cc: kvm-ppc@vger.kernel.org; kvm@vger.kernel.org; agraf@suse.de; Wood
>>>>>>> Scott-
>>>>>>> B07421
>>>>>>> Subject: Re: [PATCH 2/2] kvm: powerpc: set cache coherency only for
>>>>>>> kernel managed pages
>>>>>>>
>>>>>>> On 07/18/2013 04:08 PM, Bhushan Bharat-R65777 wrote:
>>>>>>>>
>>>>>>>>
>>>>>>>>> -----Original Message-----
>>>>>>>>> From: kvm-ppc-owner@vger.kernel.org
>>>>>>>>> [mailto:kvm-ppc-owner@vger.kernel.org] On Behalf Of "�tiejun.chen�"
>>>>>>>>> Sent: Thursday, July 18, 2013 1:01 PM
>>>>>>>>> To: Bhushan Bharat-R65777
>>>>>>>>> Cc: kvm-ppc@vger.kernel.org; kvm@vger.kernel.org; agraf@suse.de;
>>>>>>>>> Wood
>>>>>>>>> Scott-
>>>>>>>>> B07421
>>>>>>>>> Subject: Re: [PATCH 2/2] kvm: powerpc: set cache coherency only for
>>>>>>>>> kernel managed pages
>>>>>>>>>
>>>>>>>>> On 07/18/2013 03:12 PM, Bhushan Bharat-R65777 wrote:
>>>>>>>>>>
>>>>>>>>>>
>>>>>>>>>>> -----Original Message-----
>>>>>>>>>>> From: "�tiejun.chen�" [mailto:tiejun.chen@windriver.com]
>>>>>>>>>>> Sent: Thursday, July 18, 2013 11:56 AM
>>>>>>>>>>> To: Bhushan Bharat-R65777
>>>>>>>>>>> Cc: kvm-ppc@vger.kernel.org; kvm@vger.kernel.org; agraf@suse.de;
>>>>>>>>>>> Wood
>>>>>>>>>>> Scott- B07421; Bhushan Bharat-R65777
>>>>>>>>>>> Subject: Re: [PATCH 2/2] kvm: powerpc: set cache coherency only
>>>>>>>>>>> for kernel managed pages
>>>>>>>>>>>
>>>>>>>>>>> On 07/18/2013 02:04 PM, Bharat Bhushan wrote:
>>>>>>>>>>>> If there is a struct page for the requested mapping then it's
>>>>>>>>>>>> normal DDR and the mapping sets "M" bit (coherent, cacheable)
>>>>>>>>>>>> else this is treated as I/O and we set  "I + G"  (cache
>>>>>>>>>>>> inhibited,
>>>>>>>>>>>> guarded)
>>>>>>>>>>>>
>>>>>>>>>>>> This helps setting proper TLB mapping for direct assigned device
>>>>>>>>>>>>
>>>>>>>>>>>> Signed-off-by: Bharat Bhushan <bharat.bhushan@freescale.com>
>>>>>>>>>>>> ---
>>>>>>>>>>>>      arch/powerpc/kvm/e500_mmu_host.c |   17 ++++++++++++-----
>>>>>>>>>>>>      1 files changed, 12 insertions(+), 5 deletions(-)
>>>>>>>>>>>>
>>>>>>>>>>>> diff --git a/arch/powerpc/kvm/e500_mmu_host.c
>>>>>>>>>>>> b/arch/powerpc/kvm/e500_mmu_host.c
>>>>>>>>>>>> index 1c6a9d7..089c227 100644
>>>>>>>>>>>> --- a/arch/powerpc/kvm/e500_mmu_host.c
>>>>>>>>>>>> +++ b/arch/powerpc/kvm/e500_mmu_host.c
>>>>>>>>>>>> @@ -64,13 +64,20 @@ static inline u32
>>>>>>>>>>>> e500_shadow_mas3_attrib(u32 mas3, int
>>>>>>>>>>> usermode)
>>>>>>>>>>>>      	return mas3;
>>>>>>>>>>>>      }
>>>>>>>>>>>>
>>>>>>>>>>>> -static inline u32 e500_shadow_mas2_attrib(u32 mas2, int
>>>>>>>>>>>> usermode)
>>>>>>>>>>>> +static inline u32 e500_shadow_mas2_attrib(u32 mas2, pfn_t pfn)
>>>>>>>>>>>>      {
>>>>>>>>>>>> +	u32 mas2_attr;
>>>>>>>>>>>> +
>>>>>>>>>>>> +	mas2_attr = mas2 & MAS2_ATTRIB_MASK;
>>>>>>>>>>>> +
>>>>>>>>>>>> +	if (!pfn_valid(pfn)) {
>>>>>>>>>>>
>>>>>>>>>>> Why not directly use kvm_is_mmio_pfn()?
>>>>>>>>>>
>>>>>>>>>> What I understand from this function (someone can correct me) is
>>>>>>>>>> that it
>>>>>>>>> returns "false" when the page is managed by kernel and is not
>>>>>>>>> marked as RESERVED (for some reason). For us it does not matter
>>>>>>>>> whether the page is reserved or not, if it is kernel visible page then it
>>>>>> is DDR.
>>>>>>>>>>
>>>>>>>>>
>>>>>>>>> I think you are setting I|G by addressing all mmio pages, right? If
>>>>>>>>> so,
>>>>>>>>>
>>>>>>>>>        KVM: direct mmio pfn check
>>>>>>>>>
>>>>>>>>>        Userspace may specify memory slots that are backed by mmio
>>>>>>>>> pages rather than
>>>>>>>>>        normal RAM.  In some cases it is not enough to identify these
>>>>>>>>> mmio
>>>>>>> pages
>>>>>>>>>        by pfn_valid().  This patch adds checking the PageReserved as well.
>>>>>>>>
>>>>>>>> Do you know what are those "some cases" and how checking
>>>>>>>> PageReserved helps in
>>>>>>> those cases?
>>>>>>>
>>>>>>> No, myself didn't see these actual cases in qemu,too. But this should
>>>>>>> be chronically persistent as I understand ;-)
>>>>>>
>>>>>> Then I will wait till someone educate me :)
>>>>>
>>>>> The reason is , kvm_is_mmio_pfn() function looks pretty heavy and I do not want to call this for all tlbwe operation unless it is necessary.
>>>>
>>>> Furthermore, how to distinguish we're creating TLB entry for the device assigned directly to the GS?
>>>
>>> Because other devices wouldn't be available to the guest through memory slots.
>>
>> Yes.
>>
>>>
>>>> I think its unnecessary to always check if that is mmio's pfn since we have more non direct assigned devices.
>>>
>>> I'm not sure I understand. The shadow TLB code only knows "here is a host virtual address". It needs to figure out whether the host physical address behind that is RAM (can access with cache enabled) or not (has to disable cache)
>>>
>>
>> Sorry, looks I'm misleading you :-P
>>
>>>> So maybe we can introduce another helper to fixup that TLB entry in instead of this path.
>>>
>>> This path does fix up the shadow (host) TLB entry :).
>>>
>>
>> I just mean whether we can have a separate path dedicated to those direct assigned devices, not go this common path :)
>
> I don't think it's possible to have a separate path without a certain level of trust. In the current flow we don't trust anyone. We just check every translated page whether we should enable caching or not.
>
> We could take that information from 2 other side though:
>
>    1) Memory Slot
>    2) Guest TLB Flags
>
> If we take it from the memory slot we would have to trust QEMU (or any other user space) to give us the right hints. Malicious user space could set invalid flags. Also we'd have to add logic to track this - which doesn't exist today.
>
> If we take it from the guest we have to trust the guest. Malicious guests could set invalid flags.

Understood.

>
> Now why is setting invalid flags a problem? If I understand Scott correctly, it can break the host if you access certain host devices with caching enabled. But to be sure I'd say we ask him directly :).

Yes, we should certainly set I|G for that TLB entry mapping to device.

>
> Either way, not trusting anyone is definitely the safer choice.

Definitely :)

Tiejun
--
To unsubscribe from this list: send the line "unsubscribe kvm-ppc" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Tiejun Chen July 18, 2013, 10:19 a.m. UTC | #15
On 07/18/2013 06:12 PM, Alexander Graf wrote:
>
> On 18.07.2013, at 12:08, “tiejun.chen” wrote:
>
>> On 07/18/2013 05:48 PM, Alexander Graf wrote:
>>>
>>> On 18.07.2013, at 10:25, Bhushan Bharat-R65777 wrote:
>>>
>>>>
>>>>
>>>>> -----Original Message-----
>>>>> From: Bhushan Bharat-R65777
>>>>> Sent: Thursday, July 18, 2013 1:53 PM
>>>>> To: '"�tiejun.chen�"'
>>>>> Cc: kvm-ppc@vger.kernel.org; kvm@vger.kernel.org; agraf@suse.de; Wood Scott-
>>>>> B07421
>>>>> Subject: RE: [PATCH 2/2] kvm: powerpc: set cache coherency only for kernel
>>>>> managed pages
>>>>>
>>>>>
>>>>>
>>>>>> -----Original Message-----
>>>>>> From: "�tiejun.chen�" [mailto:tiejun.chen@windriver.com]
>>>>>> Sent: Thursday, July 18, 2013 1:52 PM
>>>>>> To: Bhushan Bharat-R65777
>>>>>> Cc: kvm-ppc@vger.kernel.org; kvm@vger.kernel.org; agraf@suse.de; Wood
>>>>>> Scott-
>>>>>> B07421
>>>>>> Subject: Re: [PATCH 2/2] kvm: powerpc: set cache coherency only for
>>>>>> kernel managed pages
>>>>>>
>>>>>> On 07/18/2013 04:08 PM, Bhushan Bharat-R65777 wrote:
>>>>>>>
>>>>>>>
>>>>>>>> -----Original Message-----
>>>>>>>> From: kvm-ppc-owner@vger.kernel.org
>>>>>>>> [mailto:kvm-ppc-owner@vger.kernel.org] On Behalf Of "�tiejun.chen�"
>>>>>>>> Sent: Thursday, July 18, 2013 1:01 PM
>>>>>>>> To: Bhushan Bharat-R65777
>>>>>>>> Cc: kvm-ppc@vger.kernel.org; kvm@vger.kernel.org; agraf@suse.de;
>>>>>>>> Wood
>>>>>>>> Scott-
>>>>>>>> B07421
>>>>>>>> Subject: Re: [PATCH 2/2] kvm: powerpc: set cache coherency only for
>>>>>>>> kernel managed pages
>>>>>>>>
>>>>>>>> On 07/18/2013 03:12 PM, Bhushan Bharat-R65777 wrote:
>>>>>>>>>
>>>>>>>>>
>>>>>>>>>> -----Original Message-----
>>>>>>>>>> From: "�tiejun.chen�" [mailto:tiejun.chen@windriver.com]
>>>>>>>>>> Sent: Thursday, July 18, 2013 11:56 AM
>>>>>>>>>> To: Bhushan Bharat-R65777
>>>>>>>>>> Cc: kvm-ppc@vger.kernel.org; kvm@vger.kernel.org; agraf@suse.de;
>>>>>>>>>> Wood
>>>>>>>>>> Scott- B07421; Bhushan Bharat-R65777
>>>>>>>>>> Subject: Re: [PATCH 2/2] kvm: powerpc: set cache coherency only
>>>>>>>>>> for kernel managed pages
>>>>>>>>>>
>>>>>>>>>> On 07/18/2013 02:04 PM, Bharat Bhushan wrote:
>>>>>>>>>>> If there is a struct page for the requested mapping then it's
>>>>>>>>>>> normal DDR and the mapping sets "M" bit (coherent, cacheable)
>>>>>>>>>>> else this is treated as I/O and we set  "I + G"  (cache
>>>>>>>>>>> inhibited,
>>>>>>>>>>> guarded)
>>>>>>>>>>>
>>>>>>>>>>> This helps setting proper TLB mapping for direct assigned device
>>>>>>>>>>>
>>>>>>>>>>> Signed-off-by: Bharat Bhushan <bharat.bhushan@freescale.com>
>>>>>>>>>>> ---
>>>>>>>>>>>     arch/powerpc/kvm/e500_mmu_host.c |   17 ++++++++++++-----
>>>>>>>>>>>     1 files changed, 12 insertions(+), 5 deletions(-)
>>>>>>>>>>>
>>>>>>>>>>> diff --git a/arch/powerpc/kvm/e500_mmu_host.c
>>>>>>>>>>> b/arch/powerpc/kvm/e500_mmu_host.c
>>>>>>>>>>> index 1c6a9d7..089c227 100644
>>>>>>>>>>> --- a/arch/powerpc/kvm/e500_mmu_host.c
>>>>>>>>>>> +++ b/arch/powerpc/kvm/e500_mmu_host.c
>>>>>>>>>>> @@ -64,13 +64,20 @@ static inline u32
>>>>>>>>>>> e500_shadow_mas3_attrib(u32 mas3, int
>>>>>>>>>> usermode)
>>>>>>>>>>>     	return mas3;
>>>>>>>>>>>     }
>>>>>>>>>>>
>>>>>>>>>>> -static inline u32 e500_shadow_mas2_attrib(u32 mas2, int
>>>>>>>>>>> usermode)
>>>>>>>>>>> +static inline u32 e500_shadow_mas2_attrib(u32 mas2, pfn_t pfn)
>>>>>>>>>>>     {
>>>>>>>>>>> +	u32 mas2_attr;
>>>>>>>>>>> +
>>>>>>>>>>> +	mas2_attr = mas2 & MAS2_ATTRIB_MASK;
>>>>>>>>>>> +
>>>>>>>>>>> +	if (!pfn_valid(pfn)) {
>>>>>>>>>>
>>>>>>>>>> Why not directly use kvm_is_mmio_pfn()?
>>>>>>>>>
>>>>>>>>> What I understand from this function (someone can correct me) is
>>>>>>>>> that it
>>>>>>>> returns "false" when the page is managed by kernel and is not
>>>>>>>> marked as RESERVED (for some reason). For us it does not matter
>>>>>>>> whether the page is reserved or not, if it is kernel visible page then it
>>>>> is DDR.
>>>>>>>>>
>>>>>>>>
>>>>>>>> I think you are setting I|G by addressing all mmio pages, right? If
>>>>>>>> so,
>>>>>>>>
>>>>>>>>       KVM: direct mmio pfn check
>>>>>>>>
>>>>>>>>       Userspace may specify memory slots that are backed by mmio
>>>>>>>> pages rather than
>>>>>>>>       normal RAM.  In some cases it is not enough to identify these
>>>>>>>> mmio
>>>>>> pages
>>>>>>>>       by pfn_valid().  This patch adds checking the PageReserved as well.
>>>>>>>
>>>>>>> Do you know what are those "some cases" and how checking
>>>>>>> PageReserved helps in
>>>>>> those cases?
>>>>>>
>>>>>> No, myself didn't see these actual cases in qemu,too. But this should
>>>>>> be chronically persistent as I understand ;-)
>>>>>
>>>>> Then I will wait till someone educate me :)
>>>>
>>>> The reason is , kvm_is_mmio_pfn() function looks pretty heavy and I do not want to call this for all tlbwe operation unless it is necessary.
>>>
>>> It certainly does more than we need and potentially slows down the fast path (RAM mapping). The only thing it does on top of "if (pfn_valid())" is to check for pages that are declared reserved on the host. This happens in 2 cases:
>>>
>>>    1) Non cache coherent DMA
>>>    2) Memory hot remove
>>>
>>> The non coherent DMA case would be interesting, as with the mechanism as it is in place in Linux today, we could potentially break normal guest operation if we don't take it into account. However, it's Kconfig guarded by:
>>>
>>>          depends on 4xx || 8xx || E200 || PPC_MPC512x || GAMECUBE_COMMON
>>>          default n if PPC_47x
>>>          default y
>>>
>>> so we never hit it with any core we care about ;).
>>>
>>> Memory hot remove does not exist on e500 FWIW, so we don't have to worry about that one either.
>>
>> Thanks for this good information :)
>>
>> So why not limit those codes with CONFIG_MEMORY_HOTPLUG inside kvm_is_mmio_pfn() to make sure that check is only valid when that is really needed? This can decrease those unnecessary performance loss.
>>
>> If I'm wrong please correct me :)
>
> You're perfectly right, but this is generic KVM code. So it gets run across all architectures. What if someone has the great idea to add a new case here for x86, but doesn't tell us? In that case we potentially break x86.
>
> I'd rather not like to break x86 :).
>
> However, it'd be very interesting to see a benchmark with this change. Do you think you could just rip out the whole reserved check and run a few benchmarks and show us the results?
>

Often what case should be adopted to validate this scenario?

Tiejun

--
To unsubscribe from this list: send the line "unsubscribe kvm-ppc" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Alexander Graf July 18, 2013, 10:27 a.m. UTC | #16
On 18.07.2013, at 12:19, “tiejun.chen” wrote:

> On 07/18/2013 06:12 PM, Alexander Graf wrote:
>> 
>> On 18.07.2013, at 12:08, “tiejun.chen” wrote:
>> 
>>> On 07/18/2013 05:48 PM, Alexander Graf wrote:
>>>> 
>>>> On 18.07.2013, at 10:25, Bhushan Bharat-R65777 wrote:
>>>> 
>>>>> 
>>>>> 
>>>>>> -----Original Message-----
>>>>>> From: Bhushan Bharat-R65777
>>>>>> Sent: Thursday, July 18, 2013 1:53 PM
>>>>>> To: '"�tiejun.chen�"'
>>>>>> Cc: kvm-ppc@vger.kernel.org; kvm@vger.kernel.org; agraf@suse.de; Wood Scott-
>>>>>> B07421
>>>>>> Subject: RE: [PATCH 2/2] kvm: powerpc: set cache coherency only for kernel
>>>>>> managed pages
>>>>>> 
>>>>>> 
>>>>>> 
>>>>>>> -----Original Message-----
>>>>>>> From: "�tiejun.chen�" [mailto:tiejun.chen@windriver.com]
>>>>>>> Sent: Thursday, July 18, 2013 1:52 PM
>>>>>>> To: Bhushan Bharat-R65777
>>>>>>> Cc: kvm-ppc@vger.kernel.org; kvm@vger.kernel.org; agraf@suse.de; Wood
>>>>>>> Scott-
>>>>>>> B07421
>>>>>>> Subject: Re: [PATCH 2/2] kvm: powerpc: set cache coherency only for
>>>>>>> kernel managed pages
>>>>>>> 
>>>>>>> On 07/18/2013 04:08 PM, Bhushan Bharat-R65777 wrote:
>>>>>>>> 
>>>>>>>> 
>>>>>>>>> -----Original Message-----
>>>>>>>>> From: kvm-ppc-owner@vger.kernel.org
>>>>>>>>> [mailto:kvm-ppc-owner@vger.kernel.org] On Behalf Of "�tiejun.chen�"
>>>>>>>>> Sent: Thursday, July 18, 2013 1:01 PM
>>>>>>>>> To: Bhushan Bharat-R65777
>>>>>>>>> Cc: kvm-ppc@vger.kernel.org; kvm@vger.kernel.org; agraf@suse.de;
>>>>>>>>> Wood
>>>>>>>>> Scott-
>>>>>>>>> B07421
>>>>>>>>> Subject: Re: [PATCH 2/2] kvm: powerpc: set cache coherency only for
>>>>>>>>> kernel managed pages
>>>>>>>>> 
>>>>>>>>> On 07/18/2013 03:12 PM, Bhushan Bharat-R65777 wrote:
>>>>>>>>>> 
>>>>>>>>>> 
>>>>>>>>>>> -----Original Message-----
>>>>>>>>>>> From: "�tiejun.chen�" [mailto:tiejun.chen@windriver.com]
>>>>>>>>>>> Sent: Thursday, July 18, 2013 11:56 AM
>>>>>>>>>>> To: Bhushan Bharat-R65777
>>>>>>>>>>> Cc: kvm-ppc@vger.kernel.org; kvm@vger.kernel.org; agraf@suse.de;
>>>>>>>>>>> Wood
>>>>>>>>>>> Scott- B07421; Bhushan Bharat-R65777
>>>>>>>>>>> Subject: Re: [PATCH 2/2] kvm: powerpc: set cache coherency only
>>>>>>>>>>> for kernel managed pages
>>>>>>>>>>> 
>>>>>>>>>>> On 07/18/2013 02:04 PM, Bharat Bhushan wrote:
>>>>>>>>>>>> If there is a struct page for the requested mapping then it's
>>>>>>>>>>>> normal DDR and the mapping sets "M" bit (coherent, cacheable)
>>>>>>>>>>>> else this is treated as I/O and we set  "I + G"  (cache
>>>>>>>>>>>> inhibited,
>>>>>>>>>>>> guarded)
>>>>>>>>>>>> 
>>>>>>>>>>>> This helps setting proper TLB mapping for direct assigned device
>>>>>>>>>>>> 
>>>>>>>>>>>> Signed-off-by: Bharat Bhushan <bharat.bhushan@freescale.com>
>>>>>>>>>>>> ---
>>>>>>>>>>>>    arch/powerpc/kvm/e500_mmu_host.c |   17 ++++++++++++-----
>>>>>>>>>>>>    1 files changed, 12 insertions(+), 5 deletions(-)
>>>>>>>>>>>> 
>>>>>>>>>>>> diff --git a/arch/powerpc/kvm/e500_mmu_host.c
>>>>>>>>>>>> b/arch/powerpc/kvm/e500_mmu_host.c
>>>>>>>>>>>> index 1c6a9d7..089c227 100644
>>>>>>>>>>>> --- a/arch/powerpc/kvm/e500_mmu_host.c
>>>>>>>>>>>> +++ b/arch/powerpc/kvm/e500_mmu_host.c
>>>>>>>>>>>> @@ -64,13 +64,20 @@ static inline u32
>>>>>>>>>>>> e500_shadow_mas3_attrib(u32 mas3, int
>>>>>>>>>>> usermode)
>>>>>>>>>>>>    	return mas3;
>>>>>>>>>>>>    }
>>>>>>>>>>>> 
>>>>>>>>>>>> -static inline u32 e500_shadow_mas2_attrib(u32 mas2, int
>>>>>>>>>>>> usermode)
>>>>>>>>>>>> +static inline u32 e500_shadow_mas2_attrib(u32 mas2, pfn_t pfn)
>>>>>>>>>>>>    {
>>>>>>>>>>>> +	u32 mas2_attr;
>>>>>>>>>>>> +
>>>>>>>>>>>> +	mas2_attr = mas2 & MAS2_ATTRIB_MASK;
>>>>>>>>>>>> +
>>>>>>>>>>>> +	if (!pfn_valid(pfn)) {
>>>>>>>>>>> 
>>>>>>>>>>> Why not directly use kvm_is_mmio_pfn()?
>>>>>>>>>> 
>>>>>>>>>> What I understand from this function (someone can correct me) is
>>>>>>>>>> that it
>>>>>>>>> returns "false" when the page is managed by kernel and is not
>>>>>>>>> marked as RESERVED (for some reason). For us it does not matter
>>>>>>>>> whether the page is reserved or not, if it is kernel visible page then it
>>>>>> is DDR.
>>>>>>>>>> 
>>>>>>>>> 
>>>>>>>>> I think you are setting I|G by addressing all mmio pages, right? If
>>>>>>>>> so,
>>>>>>>>> 
>>>>>>>>>      KVM: direct mmio pfn check
>>>>>>>>> 
>>>>>>>>>      Userspace may specify memory slots that are backed by mmio
>>>>>>>>> pages rather than
>>>>>>>>>      normal RAM.  In some cases it is not enough to identify these
>>>>>>>>> mmio
>>>>>>> pages
>>>>>>>>>      by pfn_valid().  This patch adds checking the PageReserved as well.
>>>>>>>> 
>>>>>>>> Do you know what are those "some cases" and how checking
>>>>>>>> PageReserved helps in
>>>>>>> those cases?
>>>>>>> 
>>>>>>> No, myself didn't see these actual cases in qemu,too. But this should
>>>>>>> be chronically persistent as I understand ;-)
>>>>>> 
>>>>>> Then I will wait till someone educate me :)
>>>>> 
>>>>> The reason is , kvm_is_mmio_pfn() function looks pretty heavy and I do not want to call this for all tlbwe operation unless it is necessary.
>>>> 
>>>> It certainly does more than we need and potentially slows down the fast path (RAM mapping). The only thing it does on top of "if (pfn_valid())" is to check for pages that are declared reserved on the host. This happens in 2 cases:
>>>> 
>>>>   1) Non cache coherent DMA
>>>>   2) Memory hot remove
>>>> 
>>>> The non coherent DMA case would be interesting, as with the mechanism as it is in place in Linux today, we could potentially break normal guest operation if we don't take it into account. However, it's Kconfig guarded by:
>>>> 
>>>>         depends on 4xx || 8xx || E200 || PPC_MPC512x || GAMECUBE_COMMON
>>>>         default n if PPC_47x
>>>>         default y
>>>> 
>>>> so we never hit it with any core we care about ;).
>>>> 
>>>> Memory hot remove does not exist on e500 FWIW, so we don't have to worry about that one either.
>>> 
>>> Thanks for this good information :)
>>> 
>>> So why not limit those codes with CONFIG_MEMORY_HOTPLUG inside kvm_is_mmio_pfn() to make sure that check is only valid when that is really needed? This can decrease those unnecessary performance loss.
>>> 
>>> If I'm wrong please correct me :)
>> 
>> You're perfectly right, but this is generic KVM code. So it gets run across all architectures. What if someone has the great idea to add a new case here for x86, but doesn't tell us? In that case we potentially break x86.
>> 
>> I'd rather not like to break x86 :).
>> 
>> However, it'd be very interesting to see a benchmark with this change. Do you think you could just rip out the whole reserved check and run a few benchmarks and show us the results?
>> 
> 
> Often what case should be adopted to validate this scenario?

Something which hammers the TLB emulation heavily. I usually just run /bin/echo a thousand times in "time" and see how long it takes ;)


Alex

--
To unsubscribe from this list: send the line "unsubscribe kvm-ppc" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Scott Wood July 18, 2013, 4:11 p.m. UTC | #17
On 07/18/2013 05:00:42 AM, Alexander Graf wrote:
> Now why is setting invalid flags a problem? If I understand Scott  
> correctly, it can break the host if you access certain host devices  
> with caching enabled. But to be sure I'd say we ask him directly :).

The architecture makes it illegal to mix cacheable and cache-inhibited  
mappings to the same physical page.  Mixing W or M bits is generally  
bad as well.  I've seen it cause machine checks, error interrupts, etc.  
-- not just corrupting the page in question.

-Scott
--
To unsubscribe from this list: send the line "unsubscribe kvm-ppc" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
diff mbox

Patch

diff --git a/arch/powerpc/kvm/e500_mmu_host.c b/arch/powerpc/kvm/e500_mmu_host.c
index 1c6a9d7..089c227 100644
--- a/arch/powerpc/kvm/e500_mmu_host.c
+++ b/arch/powerpc/kvm/e500_mmu_host.c
@@ -64,13 +64,20 @@  static inline u32 e500_shadow_mas3_attrib(u32 mas3, int usermode)
 	return mas3;
 }
 
-static inline u32 e500_shadow_mas2_attrib(u32 mas2, int usermode)
+static inline u32 e500_shadow_mas2_attrib(u32 mas2, pfn_t pfn)
 {
+	u32 mas2_attr;
+
+	mas2_attr = mas2 & MAS2_ATTRIB_MASK;
+
+	if (!pfn_valid(pfn)) {
+		mas2_attr |= MAS2_I | MAS2_G;
+	} else {
 #ifdef CONFIG_SMP
-	return (mas2 & MAS2_ATTRIB_MASK) | MAS2_M;
-#else
-	return mas2 & MAS2_ATTRIB_MASK;
+		mas2_attr |= MAS2_M;
 #endif
+	}
+	return mas2_attr;
 }
 
 /*
@@ -313,7 +320,7 @@  static void kvmppc_e500_setup_stlbe(
 	/* Force IPROT=0 for all guest mappings. */
 	stlbe->mas1 = MAS1_TSIZE(tsize) | get_tlb_sts(gtlbe) | MAS1_VALID;
 	stlbe->mas2 = (gvaddr & MAS2_EPN) |
-		      e500_shadow_mas2_attrib(gtlbe->mas2, pr);
+		      e500_shadow_mas2_attrib(gtlbe->mas2, pfn);
 	stlbe->mas7_3 = ((u64)pfn << PAGE_SHIFT) |
 			e500_shadow_mas3_attrib(gtlbe->mas7_3, pr);