diff mbox series

[v4,4/4] target/ppc: Add support for Radix partition-scoped translation

Message ID 20200403140056.59465-5-clg@kaod.org
State New
Headers show
Series target/ppc: Add support for Radix partition-scoped translation | expand

Commit Message

Cédric Le Goater April 3, 2020, 2 p.m. UTC
The Radix tree translation model currently supports process-scoped
translation for the PowerNV machine (Hypervisor mode) and for the
pSeries machine (Guest mode). Guests running under an emulated
Hypervisor (PowerNV machine) require a new type of Radix translation,
called partition-scoped, which is missing today.

The Radix tree translation is a 2 steps process. The first step,
process-scoped translation, converts an effective Address to a guest
real address, and the second step, partition-scoped translation,
converts a guest real address to a host real address.

There are difference cases to covers :

* Hypervisor real mode access: no Radix translation.

* Hypervisor or host application access (quadrant 0 and 3) with
  relocation on: process-scoped translation.

* Guest OS real mode access: only partition-scoped translation.

* Guest OS real or guest application access (quadrant 0 and 3) with
  relocation on: both process-scoped translation and partition-scoped
  translations.

* Hypervisor access in quadrant 1 and 2 with relocation on: both
  process-scoped translation and partition-scoped translations.

The radix tree partition-scoped translation is performed using tables
pointed to by the first double-word of the Partition Table Entries and
process-scoped translation uses tables pointed to by the Process Table
Entries (second double-word of the Partition Table Entries).

Both partition-scoped and process-scoped translations process are
identical and thus the radix tree traversing code is largely reused.
However, errors in partition-scoped translations generate hypervisor
exceptions.

Signed-off-by: Suraj Jitindar Singh <sjitindarsingh@gmail.com>
Signed-off-by: Greg Kurz <groug@kaod.org>
Signed-off-by: Cédric Le Goater <clg@kaod.org>
---
 target/ppc/cpu.h         |   3 +
 target/ppc/excp_helper.c |   3 +-
 target/ppc/mmu-radix64.c | 188 +++++++++++++++++++++++++++++++++++----
 3 files changed, 175 insertions(+), 19 deletions(-)

Comments

Greg Kurz April 3, 2020, 3:11 p.m. UTC | #1
On Fri,  3 Apr 2020 16:00:56 +0200
Cédric Le Goater <clg@kaod.org> wrote:

> The Radix tree translation model currently supports process-scoped
> translation for the PowerNV machine (Hypervisor mode) and for the
> pSeries machine (Guest mode). Guests running under an emulated
> Hypervisor (PowerNV machine) require a new type of Radix translation,
> called partition-scoped, which is missing today.
> 
> The Radix tree translation is a 2 steps process. The first step,
> process-scoped translation, converts an effective Address to a guest
> real address, and the second step, partition-scoped translation,
> converts a guest real address to a host real address.
> 
> There are difference cases to covers :
> 
> * Hypervisor real mode access: no Radix translation.
> 
> * Hypervisor or host application access (quadrant 0 and 3) with
>   relocation on: process-scoped translation.
> 
> * Guest OS real mode access: only partition-scoped translation.
> 
> * Guest OS real or guest application access (quadrant 0 and 3) with
>   relocation on: both process-scoped translation and partition-scoped
>   translations.
> 
> * Hypervisor access in quadrant 1 and 2 with relocation on: both
>   process-scoped translation and partition-scoped translations.
> 
> The radix tree partition-scoped translation is performed using tables
> pointed to by the first double-word of the Partition Table Entries and
> process-scoped translation uses tables pointed to by the Process Table
> Entries (second double-word of the Partition Table Entries).
> 
> Both partition-scoped and process-scoped translations process are
> identical and thus the radix tree traversing code is largely reused.
> However, errors in partition-scoped translations generate hypervisor
> exceptions.
> 
> Signed-off-by: Suraj Jitindar Singh <sjitindarsingh@gmail.com>
> Signed-off-by: Greg Kurz <groug@kaod.org>
> Signed-off-by: Cédric Le Goater <clg@kaod.org>
> ---
>  target/ppc/cpu.h         |   3 +
>  target/ppc/excp_helper.c |   3 +-
>  target/ppc/mmu-radix64.c | 188 +++++++++++++++++++++++++++++++++++----
>  3 files changed, 175 insertions(+), 19 deletions(-)
> 
> diff --git a/target/ppc/cpu.h b/target/ppc/cpu.h
> index f4a5304d4356..6b6dd7e483f1 100644
> --- a/target/ppc/cpu.h
> +++ b/target/ppc/cpu.h
> @@ -463,6 +463,9 @@ typedef struct ppc_v3_pate_t {
>  #define DSISR_AMR                0x00200000
>  /* Unsupported Radix Tree Configuration */
>  #define DSISR_R_BADCONFIG        0x00080000
> +#define DSISR_ATOMIC_RC          0x00040000
> +/* Unable to translate address of (guest) pde or process/page table entry */
> +#define DSISR_PRTABLE_FAULT      0x00020000
>  
>  /* SRR1 error code fields */
>  
> diff --git a/target/ppc/excp_helper.c b/target/ppc/excp_helper.c
> index 1acc3786de0e..f05297966472 100644
> --- a/target/ppc/excp_helper.c
> +++ b/target/ppc/excp_helper.c
> @@ -506,9 +506,10 @@ static inline void powerpc_excp(PowerPCCPU *cpu, int excp_model, int excp)
>      case POWERPC_EXCP_ISEG:      /* Instruction segment exception            */
>      case POWERPC_EXCP_TRACE:     /* Trace exception                          */
>          break;
> +    case POWERPC_EXCP_HISI:      /* Hypervisor instruction storage exception */
> +        msr |= env->error_code;
>      case POWERPC_EXCP_HDECR:     /* Hypervisor decrementer exception         */
>      case POWERPC_EXCP_HDSI:      /* Hypervisor data storage exception        */
> -    case POWERPC_EXCP_HISI:      /* Hypervisor instruction storage exception */
>      case POWERPC_EXCP_HDSEG:     /* Hypervisor data segment exception        */
>      case POWERPC_EXCP_HISEG:     /* Hypervisor instruction segment exception */
>      case POWERPC_EXCP_SDOOR_HV:  /* Hypervisor Doorbell interrupt            */
> diff --git a/target/ppc/mmu-radix64.c b/target/ppc/mmu-radix64.c
> index 2400da41e06c..d473dc742e11 100644
> --- a/target/ppc/mmu-radix64.c
> +++ b/target/ppc/mmu-radix64.c
> @@ -103,6 +103,27 @@ static void ppc_radix64_raise_si(PowerPCCPU *cpu, int rwx, vaddr eaddr,
>      }
>  }
>  
> +static void ppc_radix64_raise_hsi(PowerPCCPU *cpu, int rwx, vaddr eaddr,
> +                                  hwaddr g_raddr, uint32_t cause)
> +{
> +    CPUState *cs = CPU(cpu);
> +    CPUPPCState *env = &cpu->env;
> +
> +    if (rwx == 2) { /* H Instruction Storage Interrupt */
> +        cs->exception_index = POWERPC_EXCP_HISI;
> +        env->spr[SPR_ASDR] = g_raddr;
> +        env->error_code = cause;
> +    } else { /* H Data Storage Interrupt */
> +        cs->exception_index = POWERPC_EXCP_HDSI;
> +        if (rwx == 1) { /* Write -> Store */
> +            cause |= DSISR_ISSTORE;
> +        }
> +        env->spr[SPR_HDSISR] = cause;
> +        env->spr[SPR_HDAR] = eaddr;
> +        env->spr[SPR_ASDR] = g_raddr;
> +        env->error_code = 0;
> +    }
> +}
>  
>  static bool ppc_radix64_check_prot(PowerPCCPU *cpu, int rwx, uint64_t pte,
>                                     int *fault_cause, int *prot,
> @@ -243,6 +264,37 @@ static bool validate_pate(PowerPCCPU *cpu, uint64_t lpid, ppc_v3_pate_t *pate)
>      return true;
>  }
>  
> +static int ppc_radix64_partition_scoped_xlate(PowerPCCPU *cpu, int rwx,
> +                                              vaddr eaddr, hwaddr g_raddr,
> +                                              ppc_v3_pate_t pate,
> +                                              hwaddr *h_raddr, int *h_prot,
> +                                              int *h_page_size, bool pde_addr,
> +                                              bool cause_excp)
> +{
> +    int fault_cause = 0;
> +    hwaddr pte_addr;
> +    uint64_t pte;
> +
> +    *h_page_size = PRTBE_R_GET_RTS(pate.dw0);
> +    /* No valid pte or access denied due to protection */
> +    if (ppc_radix64_walk_tree(CPU(cpu)->as, g_raddr, pate.dw0 & PRTBE_R_RPDB,
> +                              pate.dw0 & PRTBE_R_RPDS, h_raddr, h_page_size,
> +                              &pte, &fault_cause, &pte_addr) ||
> +        ppc_radix64_check_prot(cpu, rwx, pte, &fault_cause, h_prot, true)) {
> +        if (pde_addr) /* address being translated was that of a guest pde */
> +            fault_cause |= DSISR_PRTABLE_FAULT;
> +        if (cause_excp) {
> +            ppc_radix64_raise_hsi(cpu, rwx, eaddr, g_raddr, fault_cause);
> +        }
> +        return 1;
> +    }
> +
> +    /* Update Reference and Change Bits */
> +    ppc_radix64_set_rc(cpu, rwx, pte, pte_addr, h_prot);
> +
> +    return 0;
> +}
> +
>  static int ppc_radix64_process_scoped_xlate(PowerPCCPU *cpu, int rwx,
>                                              vaddr eaddr, uint64_t pid,
>                                              ppc_v3_pate_t pate, hwaddr *g_raddr,
> @@ -250,9 +302,10 @@ static int ppc_radix64_process_scoped_xlate(PowerPCCPU *cpu, int rwx,
>                                              bool cause_excp)
>  {
>      CPUState *cs = CPU(cpu);
> -    uint64_t offset, size, prtbe_addr, prtbe0, pte;
> -    int fault_cause = 0;
> -    hwaddr pte_addr;
> +    CPUPPCState *env = &cpu->env;
> +    uint64_t offset, size, prtbe_addr, prtbe0, base_addr, nls, index, pte;
> +    int fault_cause = 0, h_page_size, h_prot;
> +    hwaddr h_raddr, pte_addr;
>      int ret;
>  
>      /* Index Process Table by PID to Find Corresponding Process Table Entry */
> @@ -266,18 +319,85 @@ static int ppc_radix64_process_scoped_xlate(PowerPCCPU *cpu, int rwx,
>          return 1;
>      }
>      prtbe_addr = (pate.dw1 & PATE1_R_PRTB) + offset;
> -    prtbe0 = ldq_phys(cs->as, prtbe_addr);
> +
> +    if (cpu->vhyp) {
> +        prtbe0 = ldq_phys(cs->as, prtbe_addr);
> +    } else {
> +        /*
> +         * Process table addresses are subject to partition-scoped
> +         * translation
> +         *
> +         * On a Radix host, the partition-scoped page table for LPID=0
> +         * is only used to translate the effective addresses of the
> +         * process table entries.
> +         */
> +        ret = ppc_radix64_partition_scoped_xlate(cpu, 0, eaddr, prtbe_addr,
> +                                                 pate, &h_raddr, &h_prot,
> +                                                 &h_page_size, 1, 1);
> +        if (ret) {
> +            return ret;
> +        }
> +        prtbe0 = ldq_phys(cs->as, h_raddr);
> +    }
>  
>      /* Walk Radix Tree from Process Table Entry to Convert EA to RA */
>      *g_page_size = PRTBE_R_GET_RTS(prtbe0);
> -    ret = ppc_radix64_walk_tree(cs->as, eaddr & R_EADDR_MASK,
> -                                prtbe0 & PRTBE_R_RPDB, prtbe0 & PRTBE_R_RPDS,
> -                                g_raddr, g_page_size, &pte, &fault_cause,
> -                                &pte_addr);
> -
> -    if (ret ||
> -        ppc_radix64_check_prot(cpu, rwx, pte, &fault_cause, g_prot, false)) {
> -        /* No valid pte or access denied due to protection */
> +    base_addr = prtbe0 & PRTBE_R_RPDB;
> +    nls = prtbe0 & PRTBE_R_RPDS;
> +    if (msr_hv || cpu->vhyp) {
> +        /*
> +         * Can treat process table addresses as real addresses
> +         */
> +        ret = ppc_radix64_walk_tree(cs->as, eaddr & R_EADDR_MASK, base_addr,
> +                                    nls, g_raddr, g_page_size, &pte,
> +                                    &fault_cause, &pte_addr);
> +        if (ret) {
> +            /* No valid PTE */
> +            if (cause_excp) {
> +                ppc_radix64_raise_si(cpu, rwx, eaddr, fault_cause);
> +            }
> +            return ret;
> +        }
> +    } else {
> +        uint64_t rpn, mask;
> +
> +        index = (eaddr & R_EADDR_MASK) >> (*g_page_size - nls); /* Shift */
> +        index &= ((1UL << nls) - 1);                            /* Mask */
> +        pte_addr = base_addr + (index * sizeof(pte));
> +
> +        /*
> +         * Each process table address is subject to a partition-scoped
> +         * translation
> +         */
> +        do {
> +            ret = ppc_radix64_partition_scoped_xlate(cpu, 0, eaddr, pte_addr,
> +                                                     pate, &h_raddr, &h_prot,
> +                                                     &h_page_size, 1, 1);
> +            if (ret) {
> +                return ret;
> +            }
> +
> +            ret = ppc_radix64_next_level(cs->as, eaddr & R_EADDR_MASK, &h_raddr,
> +                                         &nls, g_page_size, &pte, &fault_cause);
> +            if (ret) {
> +                /* No valid pte */
> +                if (cause_excp) {
> +                    ppc_radix64_raise_si(cpu, rwx, eaddr, fault_cause);
> +                }
> +                return ret;
> +            }
> +            pte_addr = h_raddr;
> +        } while (!(pte & R_PTE_LEAF));
> +
> +        rpn = pte & R_PTE_RPN;
> +        mask = (1UL << *g_page_size) - 1;
> +
> +        /* Or high bits of rpn and low bits to ea to form whole real addr */
> +        *g_raddr = (rpn & ~mask) | (eaddr & mask);
> +    }
> +
> +    if (ppc_radix64_check_prot(cpu, rwx, pte, &fault_cause, g_prot, false)) {
> +        /* Access denied due to protection */
>          if (cause_excp) {
>              ppc_radix64_raise_si(cpu, rwx, eaddr, fault_cause);
>          }
> @@ -289,11 +409,29 @@ static int ppc_radix64_process_scoped_xlate(PowerPCCPU *cpu, int rwx,
>      return 0;
>  }
>  
> +/*
> + * Radix tree translation is a 2 steps translation process:
> + *
> + * 1. Process-scoped translation:   Guest Eff Addr  -> Guest Real Addr
> + * 2. Partition-scoped translation: Guest Real Addr -> Host Real Addr
> + *
> + *                                  MSR[HV]
> + *              +-------------+----------------+---------------+
> + *              |             |     HV = 0     |     HV = 1    |
> + *              +-------------+----------------+---------------+
> + *              | Relocation  |    Partition   |      No       |
> + *              | = Off       |     Scoped     |  Translation  |
> + *  Relocation  +-------------+----------------+---------------+
> + *              | Relocation  |   Partition &  |    Process    |
> + *              | = On        | Process Scoped |    Scoped     |
> + *              +-------------+----------------+---------------+
> + */
>  static int ppc_radix64_xlate(PowerPCCPU *cpu, vaddr eaddr, int rwx,
>                               bool relocation,
>                               hwaddr *raddr, int *psizep, int *protp,
>                               bool cause_excp)
>  {
> +    CPUPPCState *env = &cpu->env;
>      uint64_t lpid = 0, pid = 0;
>      ppc_v3_pate_t pate;
>      int psize, prot;
> @@ -325,11 +463,6 @@ static int ppc_radix64_xlate(PowerPCCPU *cpu, vaddr eaddr, int rwx,
>              }
>              return 1;
>          }
> -        /* We don't support guest mode yet */
> -        if (lpid != 0) {
> -            error_report("PowerNV guest support Unimplemented");
> -            exit(1);
> -        }
>      }
>  
>      *psizep = INT_MAX;
> @@ -340,6 +473,8 @@ static int ppc_radix64_xlate(PowerPCCPU *cpu, vaddr eaddr, int rwx,
>       *
>       * - Translates an effective address to a host real address in
>       *   quadrants 0 and 3 when HV=1.
> +     *
> +     * - Translates an effective address to a guest real address.
>       */
>      if (relocation) {
>          int ret = ppc_radix64_process_scoped_xlate(cpu, rwx, eaddr, pid,
> @@ -354,7 +489,24 @@ static int ppc_radix64_xlate(PowerPCCPU *cpu, vaddr eaddr, int rwx,
>          g_raddr = eaddr & R_EADDR_MASK;
>      }
>  
> -    *raddr = g_raddr;
> +    /*
> +     * Perform partition-scoped translation if !HV or HV access to
> +     * quadrants 1 or 2. Translates a guest real address to a host
> +     * real address.
> +     */
> +    if ((lpid != 0) || (!cpu->vhyp && !msr_hv)) {

This check is too complex for my taste. Also it doesn't seem right
to look at lpid if the machine is pseries, even if it would happen
to work because pseries cannot have lpid != 0. I think we should
have distinct paths for powernv and pseries.

A bit like with the following squashed in:

=======================================
--- a/target/ppc/mmu-radix64.c
+++ b/target/ppc/mmu-radix64.c
@@ -489,22 +489,28 @@ static int ppc_radix64_xlate(PowerPCCPU *cpu, vaddr eaddr, int rwx,
         g_raddr = eaddr & R_EADDR_MASK;
     }
 
-    /*
-     * Perform partition-scoped translation if !HV or HV access to
-     * quadrants 1 or 2. Translates a guest real address to a host
-     * real address.
-     */
-    if ((lpid != 0) || (!cpu->vhyp && !msr_hv)) {
-        int ret = ppc_radix64_partition_scoped_xlate(cpu, rwx, eaddr, g_raddr,
+    if (cpu->vhyp) {
+        *raddr = g_raddr;
+    } else {
+        /*
+         * Perform partition-scoped translation if !HV or HV access to
+         * quadrants 1 or 2. Translates a guest real address to a host
+         * real address.
+         */
+        if (lpid || !msr_hv) {
+            int ret;
+
+            ret = ppc_radix64_partition_scoped_xlate(cpu, rwx, eaddr, g_raddr,
                                                      pate, raddr, &prot, &psize,
                                                      0, cause_excp);
-        if (ret) {
-            return ret;
+            if (ret) {
+                return ret;
+            }
+            *psizep = MIN(*psizep, psize);
+            *protp &= prot;
+        } else {
+            *raddr = g_raddr;
         }
-        *psizep = MIN(*psizep, psize);
-        *protp &= prot;
-    } else {
-        *raddr = g_raddr;
     }
 
     return 0;
=======================================

David,

If my comment makes sense to you, can you squash the above fix into
Cedric's patch ?

Cheers,

--
Greg

> +        int ret = ppc_radix64_partition_scoped_xlate(cpu, rwx, eaddr, g_raddr,
> +                                                     pate, raddr, &prot, &psize,
> +                                                     0, cause_excp);
> +        if (ret) {
> +            return ret;
> +        }
> +        *psizep = MIN(*psizep, psize);
> +        *protp &= prot;
> +    } else {
> +        *raddr = g_raddr;
> +    }
> +
>      return 0;
>  }
>
David Gibson April 8, 2020, 3:09 a.m. UTC | #2
On Fri, Apr 03, 2020 at 05:11:29PM +0200, Greg Kurz wrote:
> On Fri,  3 Apr 2020 16:00:56 +0200
> Cédric Le Goater <clg@kaod.org> wrote:
> 
> > The Radix tree translation model currently supports process-scoped
> > translation for the PowerNV machine (Hypervisor mode) and for the
> > pSeries machine (Guest mode). Guests running under an emulated
> > Hypervisor (PowerNV machine) require a new type of Radix translation,
> > called partition-scoped, which is missing today.
> > 
> > The Radix tree translation is a 2 steps process. The first step,
> > process-scoped translation, converts an effective Address to a guest
> > real address, and the second step, partition-scoped translation,
> > converts a guest real address to a host real address.
> > 
> > There are difference cases to covers :
> > 
> > * Hypervisor real mode access: no Radix translation.
> > 
> > * Hypervisor or host application access (quadrant 0 and 3) with
> >   relocation on: process-scoped translation.
> > 
> > * Guest OS real mode access: only partition-scoped translation.
> > 
> > * Guest OS real or guest application access (quadrant 0 and 3) with
> >   relocation on: both process-scoped translation and partition-scoped
> >   translations.
> > 
> > * Hypervisor access in quadrant 1 and 2 with relocation on: both
> >   process-scoped translation and partition-scoped translations.
> > 
> > The radix tree partition-scoped translation is performed using tables
> > pointed to by the first double-word of the Partition Table Entries and
> > process-scoped translation uses tables pointed to by the Process Table
> > Entries (second double-word of the Partition Table Entries).
> > 
> > Both partition-scoped and process-scoped translations process are
> > identical and thus the radix tree traversing code is largely reused.
> > However, errors in partition-scoped translations generate hypervisor
> > exceptions.
> > 
> > Signed-off-by: Suraj Jitindar Singh <sjitindarsingh@gmail.com>
> > Signed-off-by: Greg Kurz <groug@kaod.org>
> > Signed-off-by: Cédric Le Goater <clg@kaod.org>
> > ---
> >  target/ppc/cpu.h         |   3 +
> >  target/ppc/excp_helper.c |   3 +-
> >  target/ppc/mmu-radix64.c | 188 +++++++++++++++++++++++++++++++++++----
> >  3 files changed, 175 insertions(+), 19 deletions(-)
> > 
> > diff --git a/target/ppc/cpu.h b/target/ppc/cpu.h
> > index f4a5304d4356..6b6dd7e483f1 100644
> > --- a/target/ppc/cpu.h
> > +++ b/target/ppc/cpu.h
> > @@ -463,6 +463,9 @@ typedef struct ppc_v3_pate_t {
> >  #define DSISR_AMR                0x00200000
> >  /* Unsupported Radix Tree Configuration */
> >  #define DSISR_R_BADCONFIG        0x00080000
> > +#define DSISR_ATOMIC_RC          0x00040000
> > +/* Unable to translate address of (guest) pde or process/page table entry */
> > +#define DSISR_PRTABLE_FAULT      0x00020000
> >  
> >  /* SRR1 error code fields */
> >  
> > diff --git a/target/ppc/excp_helper.c b/target/ppc/excp_helper.c
> > index 1acc3786de0e..f05297966472 100644
> > --- a/target/ppc/excp_helper.c
> > +++ b/target/ppc/excp_helper.c
> > @@ -506,9 +506,10 @@ static inline void powerpc_excp(PowerPCCPU *cpu, int excp_model, int excp)
> >      case POWERPC_EXCP_ISEG:      /* Instruction segment exception            */
> >      case POWERPC_EXCP_TRACE:     /* Trace exception                          */
> >          break;
> > +    case POWERPC_EXCP_HISI:      /* Hypervisor instruction storage exception */
> > +        msr |= env->error_code;
> >      case POWERPC_EXCP_HDECR:     /* Hypervisor decrementer exception         */
> >      case POWERPC_EXCP_HDSI:      /* Hypervisor data storage exception        */
> > -    case POWERPC_EXCP_HISI:      /* Hypervisor instruction storage exception */
> >      case POWERPC_EXCP_HDSEG:     /* Hypervisor data segment exception        */
> >      case POWERPC_EXCP_HISEG:     /* Hypervisor instruction segment exception */
> >      case POWERPC_EXCP_SDOOR_HV:  /* Hypervisor Doorbell interrupt            */
> > diff --git a/target/ppc/mmu-radix64.c b/target/ppc/mmu-radix64.c
> > index 2400da41e06c..d473dc742e11 100644
> > --- a/target/ppc/mmu-radix64.c
> > +++ b/target/ppc/mmu-radix64.c
> > @@ -103,6 +103,27 @@ static void ppc_radix64_raise_si(PowerPCCPU *cpu, int rwx, vaddr eaddr,
> >      }
> >  }
> >  
> > +static void ppc_radix64_raise_hsi(PowerPCCPU *cpu, int rwx, vaddr eaddr,
> > +                                  hwaddr g_raddr, uint32_t cause)
> > +{
> > +    CPUState *cs = CPU(cpu);
> > +    CPUPPCState *env = &cpu->env;
> > +
> > +    if (rwx == 2) { /* H Instruction Storage Interrupt */
> > +        cs->exception_index = POWERPC_EXCP_HISI;
> > +        env->spr[SPR_ASDR] = g_raddr;
> > +        env->error_code = cause;
> > +    } else { /* H Data Storage Interrupt */
> > +        cs->exception_index = POWERPC_EXCP_HDSI;
> > +        if (rwx == 1) { /* Write -> Store */
> > +            cause |= DSISR_ISSTORE;
> > +        }
> > +        env->spr[SPR_HDSISR] = cause;
> > +        env->spr[SPR_HDAR] = eaddr;
> > +        env->spr[SPR_ASDR] = g_raddr;
> > +        env->error_code = 0;
> > +    }
> > +}
> >  
> >  static bool ppc_radix64_check_prot(PowerPCCPU *cpu, int rwx, uint64_t pte,
> >                                     int *fault_cause, int *prot,
> > @@ -243,6 +264,37 @@ static bool validate_pate(PowerPCCPU *cpu, uint64_t lpid, ppc_v3_pate_t *pate)
> >      return true;
> >  }
> >  
> > +static int ppc_radix64_partition_scoped_xlate(PowerPCCPU *cpu, int rwx,
> > +                                              vaddr eaddr, hwaddr g_raddr,
> > +                                              ppc_v3_pate_t pate,
> > +                                              hwaddr *h_raddr, int *h_prot,
> > +                                              int *h_page_size, bool pde_addr,
> > +                                              bool cause_excp)
> > +{
> > +    int fault_cause = 0;
> > +    hwaddr pte_addr;
> > +    uint64_t pte;
> > +
> > +    *h_page_size = PRTBE_R_GET_RTS(pate.dw0);
> > +    /* No valid pte or access denied due to protection */
> > +    if (ppc_radix64_walk_tree(CPU(cpu)->as, g_raddr, pate.dw0 & PRTBE_R_RPDB,
> > +                              pate.dw0 & PRTBE_R_RPDS, h_raddr, h_page_size,
> > +                              &pte, &fault_cause, &pte_addr) ||
> > +        ppc_radix64_check_prot(cpu, rwx, pte, &fault_cause, h_prot, true)) {
> > +        if (pde_addr) /* address being translated was that of a guest pde */
> > +            fault_cause |= DSISR_PRTABLE_FAULT;
> > +        if (cause_excp) {
> > +            ppc_radix64_raise_hsi(cpu, rwx, eaddr, g_raddr, fault_cause);
> > +        }
> > +        return 1;
> > +    }
> > +
> > +    /* Update Reference and Change Bits */
> > +    ppc_radix64_set_rc(cpu, rwx, pte, pte_addr, h_prot);
> > +
> > +    return 0;
> > +}
> > +
> >  static int ppc_radix64_process_scoped_xlate(PowerPCCPU *cpu, int rwx,
> >                                              vaddr eaddr, uint64_t pid,
> >                                              ppc_v3_pate_t pate, hwaddr *g_raddr,
> > @@ -250,9 +302,10 @@ static int ppc_radix64_process_scoped_xlate(PowerPCCPU *cpu, int rwx,
> >                                              bool cause_excp)
> >  {
> >      CPUState *cs = CPU(cpu);
> > -    uint64_t offset, size, prtbe_addr, prtbe0, pte;
> > -    int fault_cause = 0;
> > -    hwaddr pte_addr;
> > +    CPUPPCState *env = &cpu->env;
> > +    uint64_t offset, size, prtbe_addr, prtbe0, base_addr, nls, index, pte;
> > +    int fault_cause = 0, h_page_size, h_prot;
> > +    hwaddr h_raddr, pte_addr;
> >      int ret;
> >  
> >      /* Index Process Table by PID to Find Corresponding Process Table Entry */
> > @@ -266,18 +319,85 @@ static int ppc_radix64_process_scoped_xlate(PowerPCCPU *cpu, int rwx,
> >          return 1;
> >      }
> >      prtbe_addr = (pate.dw1 & PATE1_R_PRTB) + offset;
> > -    prtbe0 = ldq_phys(cs->as, prtbe_addr);
> > +
> > +    if (cpu->vhyp) {
> > +        prtbe0 = ldq_phys(cs->as, prtbe_addr);
> > +    } else {
> > +        /*
> > +         * Process table addresses are subject to partition-scoped
> > +         * translation
> > +         *
> > +         * On a Radix host, the partition-scoped page table for LPID=0
> > +         * is only used to translate the effective addresses of the
> > +         * process table entries.
> > +         */
> > +        ret = ppc_radix64_partition_scoped_xlate(cpu, 0, eaddr, prtbe_addr,
> > +                                                 pate, &h_raddr, &h_prot,
> > +                                                 &h_page_size, 1, 1);
> > +        if (ret) {
> > +            return ret;
> > +        }
> > +        prtbe0 = ldq_phys(cs->as, h_raddr);
> > +    }
> >  
> >      /* Walk Radix Tree from Process Table Entry to Convert EA to RA */
> >      *g_page_size = PRTBE_R_GET_RTS(prtbe0);
> > -    ret = ppc_radix64_walk_tree(cs->as, eaddr & R_EADDR_MASK,
> > -                                prtbe0 & PRTBE_R_RPDB, prtbe0 & PRTBE_R_RPDS,
> > -                                g_raddr, g_page_size, &pte, &fault_cause,
> > -                                &pte_addr);
> > -
> > -    if (ret ||
> > -        ppc_radix64_check_prot(cpu, rwx, pte, &fault_cause, g_prot, false)) {
> > -        /* No valid pte or access denied due to protection */
> > +    base_addr = prtbe0 & PRTBE_R_RPDB;
> > +    nls = prtbe0 & PRTBE_R_RPDS;
> > +    if (msr_hv || cpu->vhyp) {
> > +        /*
> > +         * Can treat process table addresses as real addresses
> > +         */
> > +        ret = ppc_radix64_walk_tree(cs->as, eaddr & R_EADDR_MASK, base_addr,
> > +                                    nls, g_raddr, g_page_size, &pte,
> > +                                    &fault_cause, &pte_addr);
> > +        if (ret) {
> > +            /* No valid PTE */
> > +            if (cause_excp) {
> > +                ppc_radix64_raise_si(cpu, rwx, eaddr, fault_cause);
> > +            }
> > +            return ret;
> > +        }
> > +    } else {
> > +        uint64_t rpn, mask;
> > +
> > +        index = (eaddr & R_EADDR_MASK) >> (*g_page_size - nls); /* Shift */
> > +        index &= ((1UL << nls) - 1);                            /* Mask */
> > +        pte_addr = base_addr + (index * sizeof(pte));
> > +
> > +        /*
> > +         * Each process table address is subject to a partition-scoped
> > +         * translation
> > +         */
> > +        do {
> > +            ret = ppc_radix64_partition_scoped_xlate(cpu, 0, eaddr, pte_addr,
> > +                                                     pate, &h_raddr, &h_prot,
> > +                                                     &h_page_size, 1, 1);
> > +            if (ret) {
> > +                return ret;
> > +            }
> > +
> > +            ret = ppc_radix64_next_level(cs->as, eaddr & R_EADDR_MASK, &h_raddr,
> > +                                         &nls, g_page_size, &pte, &fault_cause);
> > +            if (ret) {
> > +                /* No valid pte */
> > +                if (cause_excp) {
> > +                    ppc_radix64_raise_si(cpu, rwx, eaddr, fault_cause);
> > +                }
> > +                return ret;
> > +            }
> > +            pte_addr = h_raddr;
> > +        } while (!(pte & R_PTE_LEAF));
> > +
> > +        rpn = pte & R_PTE_RPN;
> > +        mask = (1UL << *g_page_size) - 1;
> > +
> > +        /* Or high bits of rpn and low bits to ea to form whole real addr */
> > +        *g_raddr = (rpn & ~mask) | (eaddr & mask);
> > +    }
> > +
> > +    if (ppc_radix64_check_prot(cpu, rwx, pte, &fault_cause, g_prot, false)) {
> > +        /* Access denied due to protection */
> >          if (cause_excp) {
> >              ppc_radix64_raise_si(cpu, rwx, eaddr, fault_cause);
> >          }
> > @@ -289,11 +409,29 @@ static int ppc_radix64_process_scoped_xlate(PowerPCCPU *cpu, int rwx,
> >      return 0;
> >  }
> >  
> > +/*
> > + * Radix tree translation is a 2 steps translation process:
> > + *
> > + * 1. Process-scoped translation:   Guest Eff Addr  -> Guest Real Addr
> > + * 2. Partition-scoped translation: Guest Real Addr -> Host Real Addr
> > + *
> > + *                                  MSR[HV]
> > + *              +-------------+----------------+---------------+
> > + *              |             |     HV = 0     |     HV = 1    |
> > + *              +-------------+----------------+---------------+
> > + *              | Relocation  |    Partition   |      No       |
> > + *              | = Off       |     Scoped     |  Translation  |
> > + *  Relocation  +-------------+----------------+---------------+
> > + *              | Relocation  |   Partition &  |    Process    |
> > + *              | = On        | Process Scoped |    Scoped     |
> > + *              +-------------+----------------+---------------+
> > + */
> >  static int ppc_radix64_xlate(PowerPCCPU *cpu, vaddr eaddr, int rwx,
> >                               bool relocation,
> >                               hwaddr *raddr, int *psizep, int *protp,
> >                               bool cause_excp)
> >  {
> > +    CPUPPCState *env = &cpu->env;
> >      uint64_t lpid = 0, pid = 0;
> >      ppc_v3_pate_t pate;
> >      int psize, prot;
> > @@ -325,11 +463,6 @@ static int ppc_radix64_xlate(PowerPCCPU *cpu, vaddr eaddr, int rwx,
> >              }
> >              return 1;
> >          }
> > -        /* We don't support guest mode yet */
> > -        if (lpid != 0) {
> > -            error_report("PowerNV guest support Unimplemented");
> > -            exit(1);
> > -        }
> >      }
> >  
> >      *psizep = INT_MAX;
> > @@ -340,6 +473,8 @@ static int ppc_radix64_xlate(PowerPCCPU *cpu, vaddr eaddr, int rwx,
> >       *
> >       * - Translates an effective address to a host real address in
> >       *   quadrants 0 and 3 when HV=1.
> > +     *
> > +     * - Translates an effective address to a guest real address.
> >       */
> >      if (relocation) {
> >          int ret = ppc_radix64_process_scoped_xlate(cpu, rwx, eaddr, pid,
> > @@ -354,7 +489,24 @@ static int ppc_radix64_xlate(PowerPCCPU *cpu, vaddr eaddr, int rwx,
> >          g_raddr = eaddr & R_EADDR_MASK;
> >      }
> >  
> > -    *raddr = g_raddr;
> > +    /*
> > +     * Perform partition-scoped translation if !HV or HV access to
> > +     * quadrants 1 or 2. Translates a guest real address to a host
> > +     * real address.
> > +     */
> > +    if ((lpid != 0) || (!cpu->vhyp && !msr_hv)) {
> 
> This check is too complex for my taste. Also it doesn't seem right
> to look at lpid if the machine is pseries, even if it would happen
> to work because pseries cannot have lpid != 0. I think we should
> have distinct paths for powernv and pseries.
> 
> A bit like with the following squashed in:
> 
> =======================================
> --- a/target/ppc/mmu-radix64.c
> +++ b/target/ppc/mmu-radix64.c
> @@ -489,22 +489,28 @@ static int ppc_radix64_xlate(PowerPCCPU *cpu, vaddr eaddr, int rwx,
>          g_raddr = eaddr & R_EADDR_MASK;
>      }
>  
> -    /*
> -     * Perform partition-scoped translation if !HV or HV access to
> -     * quadrants 1 or 2. Translates a guest real address to a host
> -     * real address.
> -     */
> -    if ((lpid != 0) || (!cpu->vhyp && !msr_hv)) {
> -        int ret = ppc_radix64_partition_scoped_xlate(cpu, rwx, eaddr, g_raddr,
> +    if (cpu->vhyp) {
> +        *raddr = g_raddr;
> +    } else {
> +        /*
> +         * Perform partition-scoped translation if !HV or HV access to
> +         * quadrants 1 or 2. Translates a guest real address to a host
> +         * real address.
> +         */
> +        if (lpid || !msr_hv) {
> +            int ret;
> +
> +            ret = ppc_radix64_partition_scoped_xlate(cpu, rwx, eaddr, g_raddr,
>                                                       pate, raddr, &prot, &psize,
>                                                       0, cause_excp);
> -        if (ret) {
> -            return ret;
> +            if (ret) {
> +                return ret;
> +            }
> +            *psizep = MIN(*psizep, psize);
> +            *protp &= prot;
> +        } else {
> +            *raddr = g_raddr;
>          }
> -        *psizep = MIN(*psizep, psize);
> -        *protp &= prot;
> -    } else {
> -        *raddr = g_raddr;
>      }
>  
>      return 0;
> =======================================
> 
> David,
> 
> If my comment makes sense to you, can you squash the above fix into
> Cedric's patch ?

Yes.  I also think we shouldn't be looking at lpid for the vhyp case.
I've applied the rest of the series to ppc-for-5.1, and folded in this
correction as suggested.
Cédric Le Goater April 8, 2020, 7:22 a.m. UTC | #3
>>> -    *raddr = g_raddr;
>>> +    /*
>>> +     * Perform partition-scoped translation if !HV or HV access to
>>> +     * quadrants 1 or 2. Translates a guest real address to a host
>>> +     * real address.
>>> +     */
>>> +    if ((lpid != 0) || (!cpu->vhyp && !msr_hv)) {
>>
>> This check is too complex for my taste. Also it doesn't seem right
>> to look at lpid if the machine is pseries, even if it would happen
>> to work because pseries cannot have lpid != 0. I think we should
>> have distinct paths for powernv and pseries.
>>
>> A bit like with the following squashed in:
>>
>> =======================================
>> --- a/target/ppc/mmu-radix64.c
>> +++ b/target/ppc/mmu-radix64.c
>> @@ -489,22 +489,28 @@ static int ppc_radix64_xlate(PowerPCCPU *cpu, vaddr eaddr, int rwx,
>>          g_raddr = eaddr & R_EADDR_MASK;
>>      }
>>  
>> -    /*
>> -     * Perform partition-scoped translation if !HV or HV access to
>> -     * quadrants 1 or 2. Translates a guest real address to a host
>> -     * real address.
>> -     */
>> -    if ((lpid != 0) || (!cpu->vhyp && !msr_hv)) {
>> -        int ret = ppc_radix64_partition_scoped_xlate(cpu, rwx, eaddr, g_raddr,
>> +    if (cpu->vhyp) {
>> +        *raddr = g_raddr;
>> +    } else {
>> +        /*
>> +         * Perform partition-scoped translation if !HV or HV access to
>> +         * quadrants 1 or 2. Translates a guest real address to a host
>> +         * real address.
>> +         */
>> +        if (lpid || !msr_hv) {
>> +            int ret;
>> +
>> +            ret = ppc_radix64_partition_scoped_xlate(cpu, rwx, eaddr, g_raddr,
>>                                                       pate, raddr, &prot, &psize,
>>                                                       0, cause_excp);
>> -        if (ret) {
>> -            return ret;
>> +            if (ret) {
>> +                return ret;
>> +            }
>> +            *psizep = MIN(*psizep, psize);
>> +            *protp &= prot;
>> +        } else {
>> +            *raddr = g_raddr;
>>          }
>> -        *psizep = MIN(*psizep, psize);
>> -        *protp &= prot;
>> -    } else {
>> -        *raddr = g_raddr;
>>      }
>>  
>>      return 0;
>> =======================================
>>
>> David,
>>
>> If my comment makes sense to you, can you squash the above fix into
>> Cedric's patch ?
> 
> Yes.  I also think we shouldn't be looking at lpid for the vhyp case.
> I've applied the rest of the series to ppc-for-5.1, and folded in this
> correction as suggested.


I explored a solution with two ppc_radix64_xlate() routines, one simple 
for pseries, a second more complex for powernv but it didn't look very
good. May be it will be easier now that the first patches are merged. 

Thanks,

C.
diff mbox series

Patch

diff --git a/target/ppc/cpu.h b/target/ppc/cpu.h
index f4a5304d4356..6b6dd7e483f1 100644
--- a/target/ppc/cpu.h
+++ b/target/ppc/cpu.h
@@ -463,6 +463,9 @@  typedef struct ppc_v3_pate_t {
 #define DSISR_AMR                0x00200000
 /* Unsupported Radix Tree Configuration */
 #define DSISR_R_BADCONFIG        0x00080000
+#define DSISR_ATOMIC_RC          0x00040000
+/* Unable to translate address of (guest) pde or process/page table entry */
+#define DSISR_PRTABLE_FAULT      0x00020000
 
 /* SRR1 error code fields */
 
diff --git a/target/ppc/excp_helper.c b/target/ppc/excp_helper.c
index 1acc3786de0e..f05297966472 100644
--- a/target/ppc/excp_helper.c
+++ b/target/ppc/excp_helper.c
@@ -506,9 +506,10 @@  static inline void powerpc_excp(PowerPCCPU *cpu, int excp_model, int excp)
     case POWERPC_EXCP_ISEG:      /* Instruction segment exception            */
     case POWERPC_EXCP_TRACE:     /* Trace exception                          */
         break;
+    case POWERPC_EXCP_HISI:      /* Hypervisor instruction storage exception */
+        msr |= env->error_code;
     case POWERPC_EXCP_HDECR:     /* Hypervisor decrementer exception         */
     case POWERPC_EXCP_HDSI:      /* Hypervisor data storage exception        */
-    case POWERPC_EXCP_HISI:      /* Hypervisor instruction storage exception */
     case POWERPC_EXCP_HDSEG:     /* Hypervisor data segment exception        */
     case POWERPC_EXCP_HISEG:     /* Hypervisor instruction segment exception */
     case POWERPC_EXCP_SDOOR_HV:  /* Hypervisor Doorbell interrupt            */
diff --git a/target/ppc/mmu-radix64.c b/target/ppc/mmu-radix64.c
index 2400da41e06c..d473dc742e11 100644
--- a/target/ppc/mmu-radix64.c
+++ b/target/ppc/mmu-radix64.c
@@ -103,6 +103,27 @@  static void ppc_radix64_raise_si(PowerPCCPU *cpu, int rwx, vaddr eaddr,
     }
 }
 
+static void ppc_radix64_raise_hsi(PowerPCCPU *cpu, int rwx, vaddr eaddr,
+                                  hwaddr g_raddr, uint32_t cause)
+{
+    CPUState *cs = CPU(cpu);
+    CPUPPCState *env = &cpu->env;
+
+    if (rwx == 2) { /* H Instruction Storage Interrupt */
+        cs->exception_index = POWERPC_EXCP_HISI;
+        env->spr[SPR_ASDR] = g_raddr;
+        env->error_code = cause;
+    } else { /* H Data Storage Interrupt */
+        cs->exception_index = POWERPC_EXCP_HDSI;
+        if (rwx == 1) { /* Write -> Store */
+            cause |= DSISR_ISSTORE;
+        }
+        env->spr[SPR_HDSISR] = cause;
+        env->spr[SPR_HDAR] = eaddr;
+        env->spr[SPR_ASDR] = g_raddr;
+        env->error_code = 0;
+    }
+}
 
 static bool ppc_radix64_check_prot(PowerPCCPU *cpu, int rwx, uint64_t pte,
                                    int *fault_cause, int *prot,
@@ -243,6 +264,37 @@  static bool validate_pate(PowerPCCPU *cpu, uint64_t lpid, ppc_v3_pate_t *pate)
     return true;
 }
 
+static int ppc_radix64_partition_scoped_xlate(PowerPCCPU *cpu, int rwx,
+                                              vaddr eaddr, hwaddr g_raddr,
+                                              ppc_v3_pate_t pate,
+                                              hwaddr *h_raddr, int *h_prot,
+                                              int *h_page_size, bool pde_addr,
+                                              bool cause_excp)
+{
+    int fault_cause = 0;
+    hwaddr pte_addr;
+    uint64_t pte;
+
+    *h_page_size = PRTBE_R_GET_RTS(pate.dw0);
+    /* No valid pte or access denied due to protection */
+    if (ppc_radix64_walk_tree(CPU(cpu)->as, g_raddr, pate.dw0 & PRTBE_R_RPDB,
+                              pate.dw0 & PRTBE_R_RPDS, h_raddr, h_page_size,
+                              &pte, &fault_cause, &pte_addr) ||
+        ppc_radix64_check_prot(cpu, rwx, pte, &fault_cause, h_prot, true)) {
+        if (pde_addr) /* address being translated was that of a guest pde */
+            fault_cause |= DSISR_PRTABLE_FAULT;
+        if (cause_excp) {
+            ppc_radix64_raise_hsi(cpu, rwx, eaddr, g_raddr, fault_cause);
+        }
+        return 1;
+    }
+
+    /* Update Reference and Change Bits */
+    ppc_radix64_set_rc(cpu, rwx, pte, pte_addr, h_prot);
+
+    return 0;
+}
+
 static int ppc_radix64_process_scoped_xlate(PowerPCCPU *cpu, int rwx,
                                             vaddr eaddr, uint64_t pid,
                                             ppc_v3_pate_t pate, hwaddr *g_raddr,
@@ -250,9 +302,10 @@  static int ppc_radix64_process_scoped_xlate(PowerPCCPU *cpu, int rwx,
                                             bool cause_excp)
 {
     CPUState *cs = CPU(cpu);
-    uint64_t offset, size, prtbe_addr, prtbe0, pte;
-    int fault_cause = 0;
-    hwaddr pte_addr;
+    CPUPPCState *env = &cpu->env;
+    uint64_t offset, size, prtbe_addr, prtbe0, base_addr, nls, index, pte;
+    int fault_cause = 0, h_page_size, h_prot;
+    hwaddr h_raddr, pte_addr;
     int ret;
 
     /* Index Process Table by PID to Find Corresponding Process Table Entry */
@@ -266,18 +319,85 @@  static int ppc_radix64_process_scoped_xlate(PowerPCCPU *cpu, int rwx,
         return 1;
     }
     prtbe_addr = (pate.dw1 & PATE1_R_PRTB) + offset;
-    prtbe0 = ldq_phys(cs->as, prtbe_addr);
+
+    if (cpu->vhyp) {
+        prtbe0 = ldq_phys(cs->as, prtbe_addr);
+    } else {
+        /*
+         * Process table addresses are subject to partition-scoped
+         * translation
+         *
+         * On a Radix host, the partition-scoped page table for LPID=0
+         * is only used to translate the effective addresses of the
+         * process table entries.
+         */
+        ret = ppc_radix64_partition_scoped_xlate(cpu, 0, eaddr, prtbe_addr,
+                                                 pate, &h_raddr, &h_prot,
+                                                 &h_page_size, 1, 1);
+        if (ret) {
+            return ret;
+        }
+        prtbe0 = ldq_phys(cs->as, h_raddr);
+    }
 
     /* Walk Radix Tree from Process Table Entry to Convert EA to RA */
     *g_page_size = PRTBE_R_GET_RTS(prtbe0);
-    ret = ppc_radix64_walk_tree(cs->as, eaddr & R_EADDR_MASK,
-                                prtbe0 & PRTBE_R_RPDB, prtbe0 & PRTBE_R_RPDS,
-                                g_raddr, g_page_size, &pte, &fault_cause,
-                                &pte_addr);
-
-    if (ret ||
-        ppc_radix64_check_prot(cpu, rwx, pte, &fault_cause, g_prot, false)) {
-        /* No valid pte or access denied due to protection */
+    base_addr = prtbe0 & PRTBE_R_RPDB;
+    nls = prtbe0 & PRTBE_R_RPDS;
+    if (msr_hv || cpu->vhyp) {
+        /*
+         * Can treat process table addresses as real addresses
+         */
+        ret = ppc_radix64_walk_tree(cs->as, eaddr & R_EADDR_MASK, base_addr,
+                                    nls, g_raddr, g_page_size, &pte,
+                                    &fault_cause, &pte_addr);
+        if (ret) {
+            /* No valid PTE */
+            if (cause_excp) {
+                ppc_radix64_raise_si(cpu, rwx, eaddr, fault_cause);
+            }
+            return ret;
+        }
+    } else {
+        uint64_t rpn, mask;
+
+        index = (eaddr & R_EADDR_MASK) >> (*g_page_size - nls); /* Shift */
+        index &= ((1UL << nls) - 1);                            /* Mask */
+        pte_addr = base_addr + (index * sizeof(pte));
+
+        /*
+         * Each process table address is subject to a partition-scoped
+         * translation
+         */
+        do {
+            ret = ppc_radix64_partition_scoped_xlate(cpu, 0, eaddr, pte_addr,
+                                                     pate, &h_raddr, &h_prot,
+                                                     &h_page_size, 1, 1);
+            if (ret) {
+                return ret;
+            }
+
+            ret = ppc_radix64_next_level(cs->as, eaddr & R_EADDR_MASK, &h_raddr,
+                                         &nls, g_page_size, &pte, &fault_cause);
+            if (ret) {
+                /* No valid pte */
+                if (cause_excp) {
+                    ppc_radix64_raise_si(cpu, rwx, eaddr, fault_cause);
+                }
+                return ret;
+            }
+            pte_addr = h_raddr;
+        } while (!(pte & R_PTE_LEAF));
+
+        rpn = pte & R_PTE_RPN;
+        mask = (1UL << *g_page_size) - 1;
+
+        /* Or high bits of rpn and low bits to ea to form whole real addr */
+        *g_raddr = (rpn & ~mask) | (eaddr & mask);
+    }
+
+    if (ppc_radix64_check_prot(cpu, rwx, pte, &fault_cause, g_prot, false)) {
+        /* Access denied due to protection */
         if (cause_excp) {
             ppc_radix64_raise_si(cpu, rwx, eaddr, fault_cause);
         }
@@ -289,11 +409,29 @@  static int ppc_radix64_process_scoped_xlate(PowerPCCPU *cpu, int rwx,
     return 0;
 }
 
+/*
+ * Radix tree translation is a 2 steps translation process:
+ *
+ * 1. Process-scoped translation:   Guest Eff Addr  -> Guest Real Addr
+ * 2. Partition-scoped translation: Guest Real Addr -> Host Real Addr
+ *
+ *                                  MSR[HV]
+ *              +-------------+----------------+---------------+
+ *              |             |     HV = 0     |     HV = 1    |
+ *              +-------------+----------------+---------------+
+ *              | Relocation  |    Partition   |      No       |
+ *              | = Off       |     Scoped     |  Translation  |
+ *  Relocation  +-------------+----------------+---------------+
+ *              | Relocation  |   Partition &  |    Process    |
+ *              | = On        | Process Scoped |    Scoped     |
+ *              +-------------+----------------+---------------+
+ */
 static int ppc_radix64_xlate(PowerPCCPU *cpu, vaddr eaddr, int rwx,
                              bool relocation,
                              hwaddr *raddr, int *psizep, int *protp,
                              bool cause_excp)
 {
+    CPUPPCState *env = &cpu->env;
     uint64_t lpid = 0, pid = 0;
     ppc_v3_pate_t pate;
     int psize, prot;
@@ -325,11 +463,6 @@  static int ppc_radix64_xlate(PowerPCCPU *cpu, vaddr eaddr, int rwx,
             }
             return 1;
         }
-        /* We don't support guest mode yet */
-        if (lpid != 0) {
-            error_report("PowerNV guest support Unimplemented");
-            exit(1);
-        }
     }
 
     *psizep = INT_MAX;
@@ -340,6 +473,8 @@  static int ppc_radix64_xlate(PowerPCCPU *cpu, vaddr eaddr, int rwx,
      *
      * - Translates an effective address to a host real address in
      *   quadrants 0 and 3 when HV=1.
+     *
+     * - Translates an effective address to a guest real address.
      */
     if (relocation) {
         int ret = ppc_radix64_process_scoped_xlate(cpu, rwx, eaddr, pid,
@@ -354,7 +489,24 @@  static int ppc_radix64_xlate(PowerPCCPU *cpu, vaddr eaddr, int rwx,
         g_raddr = eaddr & R_EADDR_MASK;
     }
 
-    *raddr = g_raddr;
+    /*
+     * Perform partition-scoped translation if !HV or HV access to
+     * quadrants 1 or 2. Translates a guest real address to a host
+     * real address.
+     */
+    if ((lpid != 0) || (!cpu->vhyp && !msr_hv)) {
+        int ret = ppc_radix64_partition_scoped_xlate(cpu, rwx, eaddr, g_raddr,
+                                                     pate, raddr, &prot, &psize,
+                                                     0, cause_excp);
+        if (ret) {
+            return ret;
+        }
+        *psizep = MIN(*psizep, psize);
+        *protp &= prot;
+    } else {
+        *raddr = g_raddr;
+    }
+
     return 0;
 }