diff mbox series

[2/2,v2] powerpc/powernv: Enable and setup PCI P2P

Message ID 20200430131520.51211-2-maxg@mellanox.com (mailing list archive)
State Changes Requested
Headers show
Series [1/2,v2] powerpc/dma: Define map/unmap mmio resource callbacks | expand

Checks

Context Check Description
snowpatch_ozlabs/apply_patch success Successfully applied on branch powerpc/merge (54dc28ff5e0b3585224d49a31b53e030342ca5c3)
snowpatch_ozlabs/build-ppc64le success Build succeeded
snowpatch_ozlabs/build-ppc64be success Build succeeded
snowpatch_ozlabs/build-ppc64e success Build succeeded
snowpatch_ozlabs/build-pmac32 warning Upstream build failed, couldn't test patch
snowpatch_ozlabs/checkpatch warning total: 0 errors, 0 warnings, 3 checks, 256 lines checked
snowpatch_ozlabs/needsstable success Patch has no Fixes tags

Commit Message

Max Gurtovoy April 30, 2020, 1:15 p.m. UTC
Implement the generic dma_map_resource callback on the PCI controller
for powernv. This will enable PCI P2P on POWER9 architecture. It will
allow catching a cross-PHB mmio mapping, which needs to be setup in
hardware by calling opal. Both the initiator and target PHBs need to be
configured, so we look for which PHB owns the mmio address being mapped.

Signed-off-by: Frederic Barrat <fbarrat@linux.ibm.com>
[maxg: added CONFIG_PCI_P2PDMA wrappers]
Signed-off-by: Max Gurtovoy <maxg@mellanox.com>
---

changes from v1:
 - remove CONFIG_PCI_P2PDMA around opal_pci_set_p2p decleration
 - divide pnv_pci_ioda_set_p2p to pnv_pci_ioda_enable_p2p/pnv_pci_ioda_disable_p2p
 - added pnv_pci_dma_dir_to_opal_p2p static helper

---
 arch/powerpc/include/asm/opal.h            |   3 +-
 arch/powerpc/platforms/powernv/opal-call.c |   1 +
 arch/powerpc/platforms/powernv/pci-ioda.c  | 212 +++++++++++++++++++++++++++--
 arch/powerpc/platforms/powernv/pci.h       |   9 ++
 4 files changed, 213 insertions(+), 12 deletions(-)

Comments

Oliver O'Halloran Aug. 3, 2020, 7:35 a.m. UTC | #1
On Thu, Apr 30, 2020 at 11:15 PM Max Gurtovoy <maxg@mellanox.com> wrote:
> diff --git a/arch/powerpc/platforms/powernv/pci-ioda.c b/arch/powerpc/platforms/powernv/pci-ioda.c
> index 57d3a6a..9ecc576 100644
> --- a/arch/powerpc/platforms/powernv/pci-ioda.c
> +++ b/arch/powerpc/platforms/powernv/pci-ioda.c
> @@ -3706,18 +3706,208 @@ static void pnv_pci_ioda_dma_bus_setup(struct pci_bus *bus)
>         }
>  }
>
> +#ifdef CONFIG_PCI_P2PDMA
> +static DEFINE_MUTEX(p2p_mutex);
> +
> +static bool pnv_pci_controller_owns_addr(struct pci_controller *hose,
> +                                        phys_addr_t addr, size_t size)
> +{
> +       int i;
> +
> +       /*
> +        * It seems safe to assume the full range is under the same PHB, so we
> +        * can ignore the size.
> +        */
> +       for (i = 0; i < ARRAY_SIZE(hose->mem_resources); i++) {
> +               struct resource *res = &hose->mem_resources[i];
> +
> +               if (res->flags && addr >= res->start && addr < res->end)
> +                       return true;
> +       }
> +       return false;
> +}
> +
> +/*
> + * find the phb owning a mmio address if not owned locally
> + */
> +static struct pnv_phb *pnv_pci_find_owning_phb(struct pci_dev *pdev,
> +                                              phys_addr_t addr, size_t size)
> +{
> +       struct pci_controller *hose;
> +
> +       /* fast path */
> +       if (pnv_pci_controller_owns_addr(pdev->bus->sysdata, addr, size))
> +               return NULL;

Do we actually need this fast path? It's going to be slow either way.
Also if a device is doing p2p to another device under the same PHB
then it should not be happening via the root complex. Is this a case
you've tested?

> +       list_for_each_entry(hose, &hose_list, list_node) {
> +               struct pnv_phb *phb = hose->private_data;
> +
> +               if (phb->type != PNV_PHB_NPU_NVLINK &&
> +                   phb->type != PNV_PHB_NPU_OCAPI) {
> +                       if (pnv_pci_controller_owns_addr(hose, addr, size))
> +                               return phb;
> +               }
> +       }
> +       return NULL;
> +}
> +
> +static u64 pnv_pci_dma_dir_to_opal_p2p(enum dma_data_direction dir)
> +{
> +       if (dir == DMA_TO_DEVICE)
> +               return OPAL_PCI_P2P_STORE;
> +       else if (dir == DMA_FROM_DEVICE)
> +               return OPAL_PCI_P2P_LOAD;
> +       else if (dir == DMA_BIDIRECTIONAL)
> +               return OPAL_PCI_P2P_LOAD | OPAL_PCI_P2P_STORE;
> +       else
> +               return 0;
> +}
> +
> +static int pnv_pci_ioda_enable_p2p(struct pci_dev *initiator,
> +                                  struct pnv_phb *phb_target,
> +                                  enum dma_data_direction dir)
> +{
> +       struct pci_controller *hose;
> +       struct pnv_phb *phb_init;
> +       struct pnv_ioda_pe *pe_init;
> +       u64 desc;
> +       int rc;
> +
> +       if (!opal_check_token(OPAL_PCI_SET_P2P))
> +               return -ENXIO;
> +

> +       hose = pci_bus_to_host(initiator->bus);
> +       phb_init = hose->private_data;

You can use the pci_bus_to_pnvhb() helper

> +
> +       pe_init = pnv_ioda_get_pe(initiator);
> +       if (!pe_init)
> +               return -ENODEV;
> +
> +       if (!pe_init->tce_bypass_enabled)
> +               return -EINVAL;
> +
> +       /*
> +        * Configuring the initiator's PHB requires to adjust its TVE#1
> +        * setting. Since the same device can be an initiator several times for
> +        * different target devices, we need to keep a reference count to know
> +        * when we can restore the default bypass setting on its TVE#1 when
> +        * disabling. Opal is not tracking PE states, so we add a reference
> +        * count on the PE in linux.
> +        *
> +        * For the target, the configuration is per PHB, so we keep a
> +        * target reference count on the PHB.
> +        */

This irks me a bit because configuring the DMA address limits for the
TVE is the kernel's job. What we really should be doing is using
opal_pci_map_pe_dma_window_real() to set the bypass-mode address limit
for the TVE to something large enough to hit the MMIO ranges rather
than having set_p2p do it as a side effect. Unfortunately, for some
reason skiboot doesn't implement support for enabling 56bit addressing
using opal_pci_map_pe_dma_window_real() and we do need to support
older kernel's which used this stuff so I guess we're stuck with it
for now. It'd be nice if we could fix this in the longer term
though...

> +       mutex_lock(&p2p_mutex);
> +
> +       desc = OPAL_PCI_P2P_ENABLE | pnv_pci_dma_dir_to_opal_p2p(dir);
> +       /* always go to opal to validate the configuration */
> +       rc = opal_pci_set_p2p(phb_init->opal_id, phb_target->opal_id, desc,
> +                             pe_init->pe_number);
> +       if (rc != OPAL_SUCCESS) {
> +               rc = -EIO;
> +               goto out;
> +       }
> +
> +       pe_init->p2p_initiator_count++;
> +       phb_target->p2p_target_count++;
> +
> +       rc = 0;
> +out:
> +       mutex_unlock(&p2p_mutex);
> +       return rc;
> +}
> +
> +static int pnv_pci_dma_map_resource(struct pci_dev *pdev,
> +                                   phys_addr_t phys_addr, size_t size,
> +                                   enum dma_data_direction dir)
> +{
> +       struct pnv_phb *target_phb;
> +
> +       target_phb = pnv_pci_find_owning_phb(pdev, phys_addr, size);
> +       if (!target_phb)
> +               return 0;
> +
> +       return pnv_pci_ioda_enable_p2p(pdev, target_phb, dir);
> +}
> +
> +static int pnv_pci_ioda_disable_p2p(struct pci_dev *initiator,
> +               struct pnv_phb *phb_target)
> +{
> +       struct pci_controller *hose;
> +       struct pnv_phb *phb_init;
> +       struct pnv_ioda_pe *pe_init;
> +       int rc;
> +
> +       if (!opal_check_token(OPAL_PCI_SET_P2P))
> +               return -ENXIO;

This should probably have a WARN_ON() since we can't hit this path
unless the initial map succeeds.

> +       hose = pci_bus_to_host(initiator->bus);
> +       phb_init = hose->private_data;

pci_bus_to_pnvhb()

> +       pe_init = pnv_ioda_get_pe(initiator);
> +       if (!pe_init)
> +               return -ENODEV;
> +
> +       mutex_lock(&p2p_mutex);
> +
> +       if (!pe_init->p2p_initiator_count || !phb_target->p2p_target_count) {
> +               rc = -EINVAL;
> +               goto out;
> +       }
> +
> +       if (--pe_init->p2p_initiator_count == 0)
> +               pnv_pci_ioda2_set_bypass(pe_init, true);
> +
> +       if (--phb_target->p2p_target_count == 0) {
> +               rc = opal_pci_set_p2p(phb_init->opal_id, phb_target->opal_id,
> +                                     0, pe_init->pe_number);
> +               if (rc != OPAL_SUCCESS) {
> +                       rc = -EIO;
> +                       goto out;
> +               }
> +       }
> +
> +       rc = 0;
> +out:
> +       mutex_unlock(&p2p_mutex);
> +       return rc;
> +}
> +
> +static void pnv_pci_dma_unmap_resource(struct pci_dev *pdev,
> +                                      dma_addr_t addr, size_t size,
> +                                      enum dma_data_direction dir)
> +{
> +       struct pnv_phb *target_phb;
> +       int rc;
> +
> +       target_phb = pnv_pci_find_owning_phb(pdev, addr, size);
> +       if (!target_phb)
> +               return;
> +
> +       rc = pnv_pci_ioda_disable_p2p(pdev, target_phb);
> +       if (rc)
> +               dev_err(&pdev->dev, "Failed to undo PCI peer-to-peer setup for address %llx: %d\n",
> +                       addr, rc);

Use pci_err() or pe_err().
Aneela Devarasetty Aug. 10, 2020, 8:24 p.m. UTC | #2
+ David from IBM.

-----Original Message-----
From: Oliver O'Halloran <oohall@gmail.com> 
Sent: Monday, August 3, 2020 2:35 AM
To: Max Gurtovoy <maxg@mellanox.com>
Cc: Christoph Hellwig <hch@lst.de>; linux-pci <linux-pci@vger.kernel.org>; linuxppc-dev <linuxppc-dev@lists.ozlabs.org>; Israel Rukshin <israelr@mellanox.com>; Idan Werpoler <Idanw@mellanox.com>; Vladimir Koushnir <vladimirk@mellanox.com>; Shlomi Nimrodi <shlomin@mellanox.com>; Frederic Barrat <fbarrat@linux.ibm.com>; Carol Soto <clsoto@us.ibm.com>; Aneela Devarasetty <aneela@mellanox.com>
Subject: Re: [PATCH 2/2 v2] powerpc/powernv: Enable and setup PCI P2P

On Thu, Apr 30, 2020 at 11:15 PM Max Gurtovoy <maxg@mellanox.com> wrote:
> diff --git a/arch/powerpc/platforms/powernv/pci-ioda.c 
> b/arch/powerpc/platforms/powernv/pci-ioda.c
> index 57d3a6a..9ecc576 100644
> --- a/arch/powerpc/platforms/powernv/pci-ioda.c
> +++ b/arch/powerpc/platforms/powernv/pci-ioda.c
> @@ -3706,18 +3706,208 @@ static void pnv_pci_ioda_dma_bus_setup(struct pci_bus *bus)
>         }
>  }
>
> +#ifdef CONFIG_PCI_P2PDMA
> +static DEFINE_MUTEX(p2p_mutex);
> +
> +static bool pnv_pci_controller_owns_addr(struct pci_controller *hose,
> +                                        phys_addr_t addr, size_t 
> +size) {
> +       int i;
> +
> +       /*
> +        * It seems safe to assume the full range is under the same PHB, so we
> +        * can ignore the size.
> +        */
> +       for (i = 0; i < ARRAY_SIZE(hose->mem_resources); i++) {
> +               struct resource *res = &hose->mem_resources[i];
> +
> +               if (res->flags && addr >= res->start && addr < res->end)
> +                       return true;
> +       }
> +       return false;
> +}
> +
> +/*
> + * find the phb owning a mmio address if not owned locally  */ static 
> +struct pnv_phb *pnv_pci_find_owning_phb(struct pci_dev *pdev,
> +                                              phys_addr_t addr, 
> +size_t size) {
> +       struct pci_controller *hose;
> +
> +       /* fast path */
> +       if (pnv_pci_controller_owns_addr(pdev->bus->sysdata, addr, size))
> +               return NULL;

Do we actually need this fast path? It's going to be slow either way.
Also if a device is doing p2p to another device under the same PHB then it should not be happening via the root complex. Is this a case you've tested?

> +       list_for_each_entry(hose, &hose_list, list_node) {
> +               struct pnv_phb *phb = hose->private_data;
> +
> +               if (phb->type != PNV_PHB_NPU_NVLINK &&
> +                   phb->type != PNV_PHB_NPU_OCAPI) {
> +                       if (pnv_pci_controller_owns_addr(hose, addr, size))
> +                               return phb;
> +               }
> +       }
> +       return NULL;
> +}
> +
> +static u64 pnv_pci_dma_dir_to_opal_p2p(enum dma_data_direction dir) {
> +       if (dir == DMA_TO_DEVICE)
> +               return OPAL_PCI_P2P_STORE;
> +       else if (dir == DMA_FROM_DEVICE)
> +               return OPAL_PCI_P2P_LOAD;
> +       else if (dir == DMA_BIDIRECTIONAL)
> +               return OPAL_PCI_P2P_LOAD | OPAL_PCI_P2P_STORE;
> +       else
> +               return 0;
> +}
> +
> +static int pnv_pci_ioda_enable_p2p(struct pci_dev *initiator,
> +                                  struct pnv_phb *phb_target,
> +                                  enum dma_data_direction dir) {
> +       struct pci_controller *hose;
> +       struct pnv_phb *phb_init;
> +       struct pnv_ioda_pe *pe_init;
> +       u64 desc;
> +       int rc;
> +
> +       if (!opal_check_token(OPAL_PCI_SET_P2P))
> +               return -ENXIO;
> +

> +       hose = pci_bus_to_host(initiator->bus);
> +       phb_init = hose->private_data;

You can use the pci_bus_to_pnvhb() helper

> +
> +       pe_init = pnv_ioda_get_pe(initiator);
> +       if (!pe_init)
> +               return -ENODEV;
> +
> +       if (!pe_init->tce_bypass_enabled)
> +               return -EINVAL;
> +
> +       /*
> +        * Configuring the initiator's PHB requires to adjust its TVE#1
> +        * setting. Since the same device can be an initiator several times for
> +        * different target devices, we need to keep a reference count to know
> +        * when we can restore the default bypass setting on its TVE#1 when
> +        * disabling. Opal is not tracking PE states, so we add a reference
> +        * count on the PE in linux.
> +        *
> +        * For the target, the configuration is per PHB, so we keep a
> +        * target reference count on the PHB.
> +        */

This irks me a bit because configuring the DMA address limits for the TVE is the kernel's job. What we really should be doing is using
opal_pci_map_pe_dma_window_real() to set the bypass-mode address limit for the TVE to something large enough to hit the MMIO ranges rather than having set_p2p do it as a side effect. Unfortunately, for some reason skiboot doesn't implement support for enabling 56bit addressing using opal_pci_map_pe_dma_window_real() and we do need to support older kernel's which used this stuff so I guess we're stuck with it for now. It'd be nice if we could fix this in the longer term though...

> +       mutex_lock(&p2p_mutex);
> +
> +       desc = OPAL_PCI_P2P_ENABLE | pnv_pci_dma_dir_to_opal_p2p(dir);
> +       /* always go to opal to validate the configuration */
> +       rc = opal_pci_set_p2p(phb_init->opal_id, phb_target->opal_id, desc,
> +                             pe_init->pe_number);
> +       if (rc != OPAL_SUCCESS) {
> +               rc = -EIO;
> +               goto out;
> +       }
> +
> +       pe_init->p2p_initiator_count++;
> +       phb_target->p2p_target_count++;
> +
> +       rc = 0;
> +out:
> +       mutex_unlock(&p2p_mutex);
> +       return rc;
> +}
> +
> +static int pnv_pci_dma_map_resource(struct pci_dev *pdev,
> +                                   phys_addr_t phys_addr, size_t size,
> +                                   enum dma_data_direction dir) {
> +       struct pnv_phb *target_phb;
> +
> +       target_phb = pnv_pci_find_owning_phb(pdev, phys_addr, size);
> +       if (!target_phb)
> +               return 0;
> +
> +       return pnv_pci_ioda_enable_p2p(pdev, target_phb, dir); }
> +
> +static int pnv_pci_ioda_disable_p2p(struct pci_dev *initiator,
> +               struct pnv_phb *phb_target) {
> +       struct pci_controller *hose;
> +       struct pnv_phb *phb_init;
> +       struct pnv_ioda_pe *pe_init;
> +       int rc;
> +
> +       if (!opal_check_token(OPAL_PCI_SET_P2P))
> +               return -ENXIO;

This should probably have a WARN_ON() since we can't hit this path unless the initial map succeeds.

> +       hose = pci_bus_to_host(initiator->bus);
> +       phb_init = hose->private_data;

pci_bus_to_pnvhb()

> +       pe_init = pnv_ioda_get_pe(initiator);
> +       if (!pe_init)
> +               return -ENODEV;
> +
> +       mutex_lock(&p2p_mutex);
> +
> +       if (!pe_init->p2p_initiator_count || !phb_target->p2p_target_count) {
> +               rc = -EINVAL;
> +               goto out;
> +       }
> +
> +       if (--pe_init->p2p_initiator_count == 0)
> +               pnv_pci_ioda2_set_bypass(pe_init, true);
> +
> +       if (--phb_target->p2p_target_count == 0) {
> +               rc = opal_pci_set_p2p(phb_init->opal_id, phb_target->opal_id,
> +                                     0, pe_init->pe_number);
> +               if (rc != OPAL_SUCCESS) {
> +                       rc = -EIO;
> +                       goto out;
> +               }
> +       }
> +
> +       rc = 0;
> +out:
> +       mutex_unlock(&p2p_mutex);
> +       return rc;
> +}
> +
> +static void pnv_pci_dma_unmap_resource(struct pci_dev *pdev,
> +                                      dma_addr_t addr, size_t size,
> +                                      enum dma_data_direction dir) {
> +       struct pnv_phb *target_phb;
> +       int rc;
> +
> +       target_phb = pnv_pci_find_owning_phb(pdev, addr, size);
> +       if (!target_phb)
> +               return;
> +
> +       rc = pnv_pci_ioda_disable_p2p(pdev, target_phb);
> +       if (rc)
> +               dev_err(&pdev->dev, "Failed to undo PCI peer-to-peer setup for address %llx: %d\n",
> +                       addr, rc);

Use pci_err() or pe_err().
Frederic Barrat Aug. 11, 2020, 5:25 p.m. UTC | #3
Le 03/08/2020 à 09:35, Oliver O'Halloran a écrit :
> On Thu, Apr 30, 2020 at 11:15 PM Max Gurtovoy <maxg@mellanox.com> wrote:
>> diff --git a/arch/powerpc/platforms/powernv/pci-ioda.c b/arch/powerpc/platforms/powernv/pci-ioda.c
>> index 57d3a6a..9ecc576 100644
>> --- a/arch/powerpc/platforms/powernv/pci-ioda.c
>> +++ b/arch/powerpc/platforms/powernv/pci-ioda.c
>> @@ -3706,18 +3706,208 @@ static void pnv_pci_ioda_dma_bus_setup(struct pci_bus *bus)
>>          }
>>   }
>>
>> +#ifdef CONFIG_PCI_P2PDMA
>> +static DEFINE_MUTEX(p2p_mutex);
>> +
>> +static bool pnv_pci_controller_owns_addr(struct pci_controller *hose,
>> +                                        phys_addr_t addr, size_t size)
>> +{
>> +       int i;
>> +
>> +       /*
>> +        * It seems safe to assume the full range is under the same PHB, so we
>> +        * can ignore the size.
>> +        */
>> +       for (i = 0; i < ARRAY_SIZE(hose->mem_resources); i++) {
>> +               struct resource *res = &hose->mem_resources[i];
>> +
>> +               if (res->flags && addr >= res->start && addr < res->end)
>> +                       return true;
>> +       }
>> +       return false;
>> +}
>> +
>> +/*
>> + * find the phb owning a mmio address if not owned locally
>> + */
>> +static struct pnv_phb *pnv_pci_find_owning_phb(struct pci_dev *pdev,
>> +                                              phys_addr_t addr, size_t size)
>> +{
>> +       struct pci_controller *hose;
>> +
>> +       /* fast path */
>> +       if (pnv_pci_controller_owns_addr(pdev->bus->sysdata, addr, size))
>> +               return NULL;
> 
> Do we actually need this fast path? It's going to be slow either way.
> Also if a device is doing p2p to another device under the same PHB
> then it should not be happening via the root complex. Is this a case
> you've tested?


The "fast path" comment is misleading and we should rephrase. The point 
is to catch if we're mapping a resource under the same PHB, in which 
case we don't modify the PHB configuration. So we need to catch it 
early, but it's not a fast path.
If the 2 devices are under the same PHB, the code above shouldn't do 
anything. So I guess behavior depends on the underlying bridge? We'll 
need another platform than witherspoon to test it. Probably worth checking.


>> +       list_for_each_entry(hose, &hose_list, list_node) {
>> +               struct pnv_phb *phb = hose->private_data;
>> +
>> +               if (phb->type != PNV_PHB_NPU_NVLINK &&
>> +                   phb->type != PNV_PHB_NPU_OCAPI) {
>> +                       if (pnv_pci_controller_owns_addr(hose, addr, size))
>> +                               return phb;
>> +               }
>> +       }
>> +       return NULL;
>> +}
>> +
>> +static u64 pnv_pci_dma_dir_to_opal_p2p(enum dma_data_direction dir)
>> +{
>> +       if (dir == DMA_TO_DEVICE)
>> +               return OPAL_PCI_P2P_STORE;
>> +       else if (dir == DMA_FROM_DEVICE)
>> +               return OPAL_PCI_P2P_LOAD;
>> +       else if (dir == DMA_BIDIRECTIONAL)
>> +               return OPAL_PCI_P2P_LOAD | OPAL_PCI_P2P_STORE;
>> +       else
>> +               return 0;
>> +}
>> +
>> +static int pnv_pci_ioda_enable_p2p(struct pci_dev *initiator,
>> +                                  struct pnv_phb *phb_target,
>> +                                  enum dma_data_direction dir)
>> +{
>> +       struct pci_controller *hose;
>> +       struct pnv_phb *phb_init;
>> +       struct pnv_ioda_pe *pe_init;
>> +       u64 desc;
>> +       int rc;
>> +
>> +       if (!opal_check_token(OPAL_PCI_SET_P2P))
>> +               return -ENXIO;
>> +
> 
>> +       hose = pci_bus_to_host(initiator->bus);
>> +       phb_init = hose->private_data;
> 
> You can use the pci_bus_to_pnvhb() helper
> 
>> +
>> +       pe_init = pnv_ioda_get_pe(initiator);
>> +       if (!pe_init)
>> +               return -ENODEV;
>> +
>> +       if (!pe_init->tce_bypass_enabled)
>> +               return -EINVAL;
>> +
>> +       /*
>> +        * Configuring the initiator's PHB requires to adjust its TVE#1
>> +        * setting. Since the same device can be an initiator several times for
>> +        * different target devices, we need to keep a reference count to know
>> +        * when we can restore the default bypass setting on its TVE#1 when
>> +        * disabling. Opal is not tracking PE states, so we add a reference
>> +        * count on the PE in linux.
>> +        *
>> +        * For the target, the configuration is per PHB, so we keep a
>> +        * target reference count on the PHB.
>> +        */
> 
> This irks me a bit because configuring the DMA address limits for the
> TVE is the kernel's job. What we really should be doing is using
> opal_pci_map_pe_dma_window_real() to set the bypass-mode address limit
> for the TVE to something large enough to hit the MMIO ranges rather
> than having set_p2p do it as a side effect. Unfortunately, for some
> reason skiboot doesn't implement support for enabling 56bit addressing
> using opal_pci_map_pe_dma_window_real() and we do need to support
> older kernel's which used this stuff so I guess we're stuck with it
> for now. It'd be nice if we could fix this in the longer term
> though...


OK. We'd need more than a 56-bit opal_pci_map_pe_dma_window_real() 
though, there's also a queue setting change on the target PHB.

   Fred


>> +       mutex_lock(&p2p_mutex);
>> +
>> +       desc = OPAL_PCI_P2P_ENABLE | pnv_pci_dma_dir_to_opal_p2p(dir);
>> +       /* always go to opal to validate the configuration */
>> +       rc = opal_pci_set_p2p(phb_init->opal_id, phb_target->opal_id, desc,
>> +                             pe_init->pe_number);
>> +       if (rc != OPAL_SUCCESS) {
>> +               rc = -EIO;
>> +               goto out;
>> +       }
>> +
>> +       pe_init->p2p_initiator_count++;
>> +       phb_target->p2p_target_count++;
>> +
>> +       rc = 0;
>> +out:
>> +       mutex_unlock(&p2p_mutex);
>> +       return rc;
>> +}
>> +
>> +static int pnv_pci_dma_map_resource(struct pci_dev *pdev,
>> +                                   phys_addr_t phys_addr, size_t size,
>> +                                   enum dma_data_direction dir)
>> +{
>> +       struct pnv_phb *target_phb;
>> +
>> +       target_phb = pnv_pci_find_owning_phb(pdev, phys_addr, size);
>> +       if (!target_phb)
>> +               return 0;
>> +
>> +       return pnv_pci_ioda_enable_p2p(pdev, target_phb, dir);
>> +}
>> +
>> +static int pnv_pci_ioda_disable_p2p(struct pci_dev *initiator,
>> +               struct pnv_phb *phb_target)
>> +{
>> +       struct pci_controller *hose;
>> +       struct pnv_phb *phb_init;
>> +       struct pnv_ioda_pe *pe_init;
>> +       int rc;
>> +
>> +       if (!opal_check_token(OPAL_PCI_SET_P2P))
>> +               return -ENXIO;
> 
> This should probably have a WARN_ON() since we can't hit this path
> unless the initial map succeeds.
> 
>> +       hose = pci_bus_to_host(initiator->bus);
>> +       phb_init = hose->private_data;
> 
> pci_bus_to_pnvhb()
> 
>> +       pe_init = pnv_ioda_get_pe(initiator);
>> +       if (!pe_init)
>> +               return -ENODEV;
>> +
>> +       mutex_lock(&p2p_mutex);
>> +
>> +       if (!pe_init->p2p_initiator_count || !phb_target->p2p_target_count) {
>> +               rc = -EINVAL;
>> +               goto out;
>> +       }
>> +
>> +       if (--pe_init->p2p_initiator_count == 0)
>> +               pnv_pci_ioda2_set_bypass(pe_init, true);
>> +
>> +       if (--phb_target->p2p_target_count == 0) {
>> +               rc = opal_pci_set_p2p(phb_init->opal_id, phb_target->opal_id,
>> +                                     0, pe_init->pe_number);
>> +               if (rc != OPAL_SUCCESS) {
>> +                       rc = -EIO;
>> +                       goto out;
>> +               }
>> +       }
>> +
>> +       rc = 0;
>> +out:
>> +       mutex_unlock(&p2p_mutex);
>> +       return rc;
>> +}
>> +
>> +static void pnv_pci_dma_unmap_resource(struct pci_dev *pdev,
>> +                                      dma_addr_t addr, size_t size,
>> +                                      enum dma_data_direction dir)
>> +{
>> +       struct pnv_phb *target_phb;
>> +       int rc;
>> +
>> +       target_phb = pnv_pci_find_owning_phb(pdev, addr, size);
>> +       if (!target_phb)
>> +               return;
>> +
>> +       rc = pnv_pci_ioda_disable_p2p(pdev, target_phb);
>> +       if (rc)
>> +               dev_err(&pdev->dev, "Failed to undo PCI peer-to-peer setup for address %llx: %d\n",
>> +                       addr, rc);
> 
> Use pci_err() or pe_err().
>
diff mbox series

Patch

diff --git a/arch/powerpc/include/asm/opal.h b/arch/powerpc/include/asm/opal.h
index 9986ac3..362f54b 100644
--- a/arch/powerpc/include/asm/opal.h
+++ b/arch/powerpc/include/asm/opal.h
@@ -284,7 +284,8 @@  int64_t opal_xive_set_queue_state(uint64_t vp, uint32_t prio,
 				  uint32_t qtoggle,
 				  uint32_t qindex);
 int64_t opal_xive_get_vp_state(uint64_t vp, __be64 *out_w01);
-
+int64_t opal_pci_set_p2p(uint64_t phb_init, uint64_t phb_target,
+			 uint64_t desc, uint16_t pe_number);
 int64_t opal_imc_counters_init(uint32_t type, uint64_t address,
 							uint64_t cpu_pir);
 int64_t opal_imc_counters_start(uint32_t type, uint64_t cpu_pir);
diff --git a/arch/powerpc/platforms/powernv/opal-call.c b/arch/powerpc/platforms/powernv/opal-call.c
index 5cd0f52..442d5445c 100644
--- a/arch/powerpc/platforms/powernv/opal-call.c
+++ b/arch/powerpc/platforms/powernv/opal-call.c
@@ -273,6 +273,7 @@  int64_t name(int64_t a0, int64_t a1, int64_t a2, int64_t a3,	\
 OPAL_CALL(opal_imc_counters_init,		OPAL_IMC_COUNTERS_INIT);
 OPAL_CALL(opal_imc_counters_start,		OPAL_IMC_COUNTERS_START);
 OPAL_CALL(opal_imc_counters_stop,		OPAL_IMC_COUNTERS_STOP);
+OPAL_CALL(opal_pci_set_p2p,			OPAL_PCI_SET_P2P);
 OPAL_CALL(opal_get_powercap,			OPAL_GET_POWERCAP);
 OPAL_CALL(opal_set_powercap,			OPAL_SET_POWERCAP);
 OPAL_CALL(opal_get_power_shift_ratio,		OPAL_GET_POWER_SHIFT_RATIO);
diff --git a/arch/powerpc/platforms/powernv/pci-ioda.c b/arch/powerpc/platforms/powernv/pci-ioda.c
index 57d3a6a..9ecc576 100644
--- a/arch/powerpc/platforms/powernv/pci-ioda.c
+++ b/arch/powerpc/platforms/powernv/pci-ioda.c
@@ -3706,18 +3706,208 @@  static void pnv_pci_ioda_dma_bus_setup(struct pci_bus *bus)
 	}
 }
 
+#ifdef CONFIG_PCI_P2PDMA
+static DEFINE_MUTEX(p2p_mutex);
+
+static bool pnv_pci_controller_owns_addr(struct pci_controller *hose,
+					 phys_addr_t addr, size_t size)
+{
+	int i;
+
+	/*
+	 * It seems safe to assume the full range is under the same PHB, so we
+	 * can ignore the size.
+	 */
+	for (i = 0; i < ARRAY_SIZE(hose->mem_resources); i++) {
+		struct resource *res = &hose->mem_resources[i];
+
+		if (res->flags && addr >= res->start && addr < res->end)
+			return true;
+	}
+	return false;
+}
+
+/*
+ * find the phb owning a mmio address if not owned locally
+ */
+static struct pnv_phb *pnv_pci_find_owning_phb(struct pci_dev *pdev,
+					       phys_addr_t addr, size_t size)
+{
+	struct pci_controller *hose;
+
+	/* fast path */
+	if (pnv_pci_controller_owns_addr(pdev->bus->sysdata, addr, size))
+		return NULL;
+
+	list_for_each_entry(hose, &hose_list, list_node) {
+		struct pnv_phb *phb = hose->private_data;
+
+		if (phb->type != PNV_PHB_NPU_NVLINK &&
+		    phb->type != PNV_PHB_NPU_OCAPI) {
+			if (pnv_pci_controller_owns_addr(hose, addr, size))
+				return phb;
+		}
+	}
+	return NULL;
+}
+
+static u64 pnv_pci_dma_dir_to_opal_p2p(enum dma_data_direction dir)
+{
+	if (dir == DMA_TO_DEVICE)
+		return OPAL_PCI_P2P_STORE;
+	else if (dir == DMA_FROM_DEVICE)
+		return OPAL_PCI_P2P_LOAD;
+	else if (dir == DMA_BIDIRECTIONAL)
+		return OPAL_PCI_P2P_LOAD | OPAL_PCI_P2P_STORE;
+	else
+		return 0;
+}
+
+static int pnv_pci_ioda_enable_p2p(struct pci_dev *initiator,
+				   struct pnv_phb *phb_target,
+				   enum dma_data_direction dir)
+{
+	struct pci_controller *hose;
+	struct pnv_phb *phb_init;
+	struct pnv_ioda_pe *pe_init;
+	u64 desc;
+	int rc;
+
+	if (!opal_check_token(OPAL_PCI_SET_P2P))
+		return -ENXIO;
+
+	hose = pci_bus_to_host(initiator->bus);
+	phb_init = hose->private_data;
+
+	pe_init = pnv_ioda_get_pe(initiator);
+	if (!pe_init)
+		return -ENODEV;
+
+	if (!pe_init->tce_bypass_enabled)
+		return -EINVAL;
+
+	/*
+	 * Configuring the initiator's PHB requires to adjust its TVE#1
+	 * setting. Since the same device can be an initiator several times for
+	 * different target devices, we need to keep a reference count to know
+	 * when we can restore the default bypass setting on its TVE#1 when
+	 * disabling. Opal is not tracking PE states, so we add a reference
+	 * count on the PE in linux.
+	 *
+	 * For the target, the configuration is per PHB, so we keep a
+	 * target reference count on the PHB.
+	 */
+	mutex_lock(&p2p_mutex);
+
+	desc = OPAL_PCI_P2P_ENABLE | pnv_pci_dma_dir_to_opal_p2p(dir);
+	/* always go to opal to validate the configuration */
+	rc = opal_pci_set_p2p(phb_init->opal_id, phb_target->opal_id, desc,
+			      pe_init->pe_number);
+	if (rc != OPAL_SUCCESS) {
+		rc = -EIO;
+		goto out;
+	}
+
+	pe_init->p2p_initiator_count++;
+	phb_target->p2p_target_count++;
+
+
+	rc = 0;
+out:
+	mutex_unlock(&p2p_mutex);
+	return rc;
+}
+
+static int pnv_pci_dma_map_resource(struct pci_dev *pdev,
+				    phys_addr_t phys_addr, size_t size,
+				    enum dma_data_direction dir)
+{
+	struct pnv_phb *target_phb;
+
+	target_phb = pnv_pci_find_owning_phb(pdev, phys_addr, size);
+	if (!target_phb)
+		return 0;
+
+	return pnv_pci_ioda_enable_p2p(pdev, target_phb, dir);
+}
+
+static int pnv_pci_ioda_disable_p2p(struct pci_dev *initiator,
+		struct pnv_phb *phb_target)
+{
+	struct pci_controller *hose;
+	struct pnv_phb *phb_init;
+	struct pnv_ioda_pe *pe_init;
+	int rc;
+
+	if (!opal_check_token(OPAL_PCI_SET_P2P))
+		return -ENXIO;
+
+	hose = pci_bus_to_host(initiator->bus);
+	phb_init = hose->private_data;
+
+	pe_init = pnv_ioda_get_pe(initiator);
+	if (!pe_init)
+		return -ENODEV;
+
+	mutex_lock(&p2p_mutex);
+
+	if (!pe_init->p2p_initiator_count || !phb_target->p2p_target_count) {
+		rc = -EINVAL;
+		goto out;
+	}
+
+	if (--pe_init->p2p_initiator_count == 0)
+		pnv_pci_ioda2_set_bypass(pe_init, true);
+
+	if (--phb_target->p2p_target_count == 0) {
+		rc = opal_pci_set_p2p(phb_init->opal_id, phb_target->opal_id,
+				      0, pe_init->pe_number);
+		if (rc != OPAL_SUCCESS) {
+			rc = -EIO;
+			goto out;
+		}
+	}
+
+	rc = 0;
+out:
+	mutex_unlock(&p2p_mutex);
+	return rc;
+}
+
+static void pnv_pci_dma_unmap_resource(struct pci_dev *pdev,
+				       dma_addr_t addr, size_t size,
+				       enum dma_data_direction dir)
+{
+	struct pnv_phb *target_phb;
+	int rc;
+
+	target_phb = pnv_pci_find_owning_phb(pdev, addr, size);
+	if (!target_phb)
+		return;
+
+	rc = pnv_pci_ioda_disable_p2p(pdev, target_phb);
+	if (rc)
+		dev_err(&pdev->dev, "Failed to undo PCI peer-to-peer setup for address %llx: %d\n",
+			addr, rc);
+}
+#endif
+
 static const struct pci_controller_ops pnv_pci_ioda_controller_ops = {
-	.dma_dev_setup		= pnv_pci_ioda_dma_dev_setup,
-	.dma_bus_setup		= pnv_pci_ioda_dma_bus_setup,
-	.iommu_bypass_supported	= pnv_pci_ioda_iommu_bypass_supported,
-	.setup_msi_irqs		= pnv_setup_msi_irqs,
-	.teardown_msi_irqs	= pnv_teardown_msi_irqs,
-	.enable_device_hook	= pnv_pci_enable_device_hook,
-	.release_device		= pnv_pci_release_device,
-	.window_alignment	= pnv_pci_window_alignment,
-	.setup_bridge		= pnv_pci_setup_bridge,
-	.reset_secondary_bus	= pnv_pci_reset_secondary_bus,
-	.shutdown		= pnv_pci_ioda_shutdown,
+	.dma_dev_setup			= pnv_pci_ioda_dma_dev_setup,
+	.dma_bus_setup			= pnv_pci_ioda_dma_bus_setup,
+	.iommu_bypass_supported		= pnv_pci_ioda_iommu_bypass_supported,
+	.setup_msi_irqs			= pnv_setup_msi_irqs,
+	.teardown_msi_irqs		= pnv_teardown_msi_irqs,
+	.enable_device_hook		= pnv_pci_enable_device_hook,
+	.release_device			= pnv_pci_release_device,
+	.window_alignment		= pnv_pci_window_alignment,
+	.setup_bridge			= pnv_pci_setup_bridge,
+	.reset_secondary_bus		= pnv_pci_reset_secondary_bus,
+	.shutdown			= pnv_pci_ioda_shutdown,
+#ifdef CONFIG_PCI_P2PDMA
+	.dma_direct_map_resource	= pnv_pci_dma_map_resource,
+	.dma_direct_unmap_resource	= pnv_pci_dma_unmap_resource,
+#endif
 };
 
 static const struct pci_controller_ops pnv_npu_ioda_controller_ops = {
diff --git a/arch/powerpc/platforms/powernv/pci.h b/arch/powerpc/platforms/powernv/pci.h
index d3bbdea..5f85d9c 100644
--- a/arch/powerpc/platforms/powernv/pci.h
+++ b/arch/powerpc/platforms/powernv/pci.h
@@ -79,6 +79,10 @@  struct pnv_ioda_pe {
 	struct pnv_ioda_pe	*master;
 	struct list_head	slaves;
 
+#ifdef CONFIG_PCI_P2PDMA
+	/* PCI peer-to-peer*/
+	int			p2p_initiator_count;
+#endif
 	/* Link in list of PE#s */
 	struct list_head	list;
 };
@@ -168,6 +172,11 @@  struct pnv_phb {
 	/* PHB and hub diagnostics */
 	unsigned int		diag_data_size;
 	u8			*diag_data;
+
+#ifdef CONFIG_PCI_P2PDMA
+	/* PCI peer-to-peer*/
+	int			p2p_target_count;
+#endif
 };
 
 extern struct pci_ops pnv_pci_ops;