diff mbox

[v2,1/4] pci: OF: Fix the conversion of IO ranges into IO resources.

Message ID 1393506402-11474-2-git-send-email-Liviu.Dudau@arm.com
State Deferred
Headers show

Commit Message

Liviu Dudau Feb. 27, 2014, 1:06 p.m. UTC
The ranges property for a host bridge controller in DT describes
the mapping between the PCI bus address and the CPU physical address.
The resources framework however expects that the IO resources start
at a pseudo "port" address 0 (zero) and have a maximum size of 64kb.
The conversion from pci ranges to resources failed to take that into
account.

In the process move the function into drivers/of/address.c as it
now depends on pci_address_to_pio() code.

Signed-off-by: Liviu Dudau <Liviu.Dudau@arm.com>

Comments

Arnd Bergmann Feb. 27, 2014, 1:20 p.m. UTC | #1
On Thursday 27 February 2014 13:06:39 Liviu Dudau wrote:
> +       res->flags = range->flags;
> +       if (res->flags & IORESOURCE_IO) {
> +               unsigned long port;
> +               port = pci_address_to_pio(range->pci_addr);
> +               if (port == (unsigned long)-1) {
> +                       res->start = (resource_size_t)OF_BAD_ADDR;
> +                       res->end = (resource_size_t)OF_BAD_ADDR;
> +                       return;
> +               }
> 

I think this conflicts with the way that pci_address_to_pio() is
defined on powerpc, where it expects a CPU address as the input,
not a PCI i/o address.

	Arnd
--
To unsubscribe from this list: send the line "unsubscribe linux-pci" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Andrew Murray Feb. 27, 2014, 1:22 p.m. UTC | #2
On 27 February 2014 13:06, Liviu Dudau <Liviu.Dudau@arm.com> wrote:
>
> The ranges property for a host bridge controller in DT describes
> the mapping between the PCI bus address and the CPU physical address.
> The resources framework however expects that the IO resources start
> at a pseudo "port" address 0 (zero) and have a maximum size of 64kb.

Is this just in the case of ARM? (I've tried to keep up with the
conversation, but apologies if I've misunderstood).

> The conversion from pci ranges to resources failed to take that into
> account.
>
> In the process move the function into drivers/of/address.c as it
> now depends on pci_address_to_pio() code.
>
> Signed-off-by: Liviu Dudau <Liviu.Dudau@arm.com>
>
> diff --git a/drivers/of/address.c b/drivers/of/address.c
> index 1a54f1f..7cf2b16 100644
> --- a/drivers/of/address.c
> +++ b/drivers/of/address.c
> @@ -719,3 +719,34 @@ void __iomem *of_iomap(struct device_node *np, int index)
>         return ioremap(res.start, resource_size(&res));
>  }
>  EXPORT_SYMBOL(of_iomap);
> +
> +/**
> + * of_pci_range_to_resource - Create a resource from an of_pci_range
> + * @range:     the PCI range that describes the resource
> + * @np:                device node where the range belongs to
> + * @res:       pointer to a valid resource that will be updated to
> + *              reflect the values contained in the range.
> + * Note that if the range is an IO range, the resource will be converted
> + * using pci_address_to_pio() which can fail if it is called to early or
> + * if the range cannot be matched to any host bridge IO space.
> + */
> +void of_pci_range_to_resource(struct of_pci_range *range,
> +       struct device_node *np, struct resource *res)
> +{
> +       res->flags = range->flags;
> +       if (res->flags & IORESOURCE_IO) {
> +               unsigned long port;
> +               port = pci_address_to_pio(range->pci_addr);

Is this likely to break existing users of of_pci_range_to_resource?

For example arch/mips: IO_SPACE_LIMIT defaults to 0xffff and there is
no overridden implementation for pci_address_to_pio, therefore this
will set res->start to OF_BAD_ADDR whereas previously it would have
been the CPU address for I/O (assuming the cpu_addr was previously >
64K).

I have no idea if I/O previously worked for mips, but this patch seems
to change that behavior. It may be a similar story for microblaze and
powerpc.

Andrew Murray

> +               if (port == (unsigned long)-1) {
> +                       res->start = (resource_size_t)OF_BAD_ADDR;
> +                       res->end = (resource_size_t)OF_BAD_ADDR;
> +                       return;
> +               }
> +               res->start = port;
> +       } else {
> +               res->start = range->cpu_addr;
> +       }
> +       res->end = res->start + range->size - 1;
> +       res->parent = res->child = res->sibling = NULL;
> +       res->name = np->full_name;
> +}
> diff --git a/include/linux/of_address.h b/include/linux/of_address.h
> index 5f6ed6b..a667762 100644
> --- a/include/linux/of_address.h
> +++ b/include/linux/of_address.h
> @@ -23,17 +23,8 @@ struct of_pci_range {
>  #define for_each_of_pci_range(parser, range) \
>         for (; of_pci_range_parser_one(parser, range);)
>
> -static inline void of_pci_range_to_resource(struct of_pci_range *range,
> -                                           struct device_node *np,
> -                                           struct resource *res)
> -{
> -       res->flags = range->flags;
> -       res->start = range->cpu_addr;
> -       res->end = range->cpu_addr + range->size - 1;
> -       res->parent = res->child = res->sibling = NULL;
> -       res->name = np->full_name;
> -}
> -
> +extern void of_pci_range_to_resource(struct of_pci_range *range,
> +               struct device_node *np, struct resource *res);
>  /* Translate a DMA address from device space to CPU space */
>  extern u64 of_translate_dma_address(struct device_node *dev,
>                                     const __be32 *in_addr);
> --
> 1.9.0
>
> --
> To unsubscribe from this list: send the line "unsubscribe linux-pci" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html
--
To unsubscribe from this list: send the line "unsubscribe linux-pci" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Liviu Dudau Feb. 27, 2014, 1:45 p.m. UTC | #3
On Thu, Feb 27, 2014 at 01:20:54PM +0000, Arnd Bergmann wrote:
> On Thursday 27 February 2014 13:06:39 Liviu Dudau wrote:
> > +       res->flags = range->flags;
> > +       if (res->flags & IORESOURCE_IO) {
> > +               unsigned long port;
> > +               port = pci_address_to_pio(range->pci_addr);
> > +               if (port == (unsigned long)-1) {
> > +                       res->start = (resource_size_t)OF_BAD_ADDR;
> > +                       res->end = (resource_size_t)OF_BAD_ADDR;
> > +                       return;
> > +               }
> > 
> 
> I think this conflicts with the way that pci_address_to_pio() is
> defined on powerpc, where it expects a CPU address as the input,
> not a PCI i/o address.

And you are right, maybe what I need is to fix weak version of the
function, as that one cannot cope with cpu addresses. But I think
the idea still stands.

Thanks for reviewing this version as well!

Liviu

> 
> 	Arnd
> 
>
Liviu Dudau Feb. 27, 2014, 1:56 p.m. UTC | #4
On Thu, Feb 27, 2014 at 01:22:19PM +0000, Andrew Murray wrote:
> On 27 February 2014 13:06, Liviu Dudau <Liviu.Dudau@arm.com> wrote:
> >
> > The ranges property for a host bridge controller in DT describes
> > the mapping between the PCI bus address and the CPU physical address.
> > The resources framework however expects that the IO resources start
> > at a pseudo "port" address 0 (zero) and have a maximum size of 64kb.
> 
> Is this just in the case of ARM? (I've tried to keep up with the
> conversation, but apologies if I've misunderstood).
> 
> > The conversion from pci ranges to resources failed to take that into
> > account.
> >
> > In the process move the function into drivers/of/address.c as it
> > now depends on pci_address_to_pio() code.
> >
> > Signed-off-by: Liviu Dudau <Liviu.Dudau@arm.com>
> >
> > diff --git a/drivers/of/address.c b/drivers/of/address.c
> > index 1a54f1f..7cf2b16 100644
> > --- a/drivers/of/address.c
> > +++ b/drivers/of/address.c
> > @@ -719,3 +719,34 @@ void __iomem *of_iomap(struct device_node *np, int index)
> >         return ioremap(res.start, resource_size(&res));
> >  }
> >  EXPORT_SYMBOL(of_iomap);
> > +
> > +/**
> > + * of_pci_range_to_resource - Create a resource from an of_pci_range
> > + * @range:     the PCI range that describes the resource
> > + * @np:                device node where the range belongs to
> > + * @res:       pointer to a valid resource that will be updated to
> > + *              reflect the values contained in the range.
> > + * Note that if the range is an IO range, the resource will be converted
> > + * using pci_address_to_pio() which can fail if it is called to early or
> > + * if the range cannot be matched to any host bridge IO space.
> > + */
> > +void of_pci_range_to_resource(struct of_pci_range *range,
> > +       struct device_node *np, struct resource *res)
> > +{
> > +       res->flags = range->flags;
> > +       if (res->flags & IORESOURCE_IO) {
> > +               unsigned long port;
> > +               port = pci_address_to_pio(range->pci_addr);
> 
> Is this likely to break existing users of of_pci_range_to_resource?

I've tested the change with a tegra2 based device (trimslice) and I've
got a functional board.

> 
> For example arch/mips: IO_SPACE_LIMIT defaults to 0xffff and there is
> no overridden implementation for pci_address_to_pio, therefore this
> will set res->start to OF_BAD_ADDR whereas previously it would have
> been the CPU address for I/O (assuming the cpu_addr was previously >
> 64K).

And that is why I'm using pci_addr as the physical address passed to
pci_address_to_pio. My idea is that when converting ranges into resources,
for IORESOURCE_IO we should generate a resource that uses logical I/O
addresses, not physical CPU addresses. How we get that needs debating,
currently I've took the shortcut of using the pci_addr to get the port
number.

> 
> I have no idea if I/O previously worked for mips, but this patch seems
> to change that behavior. It may be a similar story for microblaze and
> powerpc.

Both microblaze and powerpc share an identical implementation and it is
expecting that the physical address passed as parameter fits between
io_base_phys and io_base_phys + pcibios_io_size(hose). So yes, the
correct way is to use cpu_addr and fix the weak implementation? But we
don't have enough information for the weak implementation to work, as
we don't know where the physical IO base start (we are just about to
find out from DT).

Thanks for reviewing these patches!

Liviu

> 
> Andrew Murray
> 
> > +               if (port == (unsigned long)-1) {
> > +                       res->start = (resource_size_t)OF_BAD_ADDR;
> > +                       res->end = (resource_size_t)OF_BAD_ADDR;
> > +                       return;
> > +               }
> > +               res->start = port;
> > +       } else {
> > +               res->start = range->cpu_addr;
> > +       }
> > +       res->end = res->start + range->size - 1;
> > +       res->parent = res->child = res->sibling = NULL;
> > +       res->name = np->full_name;
> > +}
> > diff --git a/include/linux/of_address.h b/include/linux/of_address.h
> > index 5f6ed6b..a667762 100644
> > --- a/include/linux/of_address.h
> > +++ b/include/linux/of_address.h
> > @@ -23,17 +23,8 @@ struct of_pci_range {
> >  #define for_each_of_pci_range(parser, range) \
> >         for (; of_pci_range_parser_one(parser, range);)
> >
> > -static inline void of_pci_range_to_resource(struct of_pci_range *range,
> > -                                           struct device_node *np,
> > -                                           struct resource *res)
> > -{
> > -       res->flags = range->flags;
> > -       res->start = range->cpu_addr;
> > -       res->end = range->cpu_addr + range->size - 1;
> > -       res->parent = res->child = res->sibling = NULL;
> > -       res->name = np->full_name;
> > -}
> > -
> > +extern void of_pci_range_to_resource(struct of_pci_range *range,
> > +               struct device_node *np, struct resource *res);
> >  /* Translate a DMA address from device space to CPU space */
> >  extern u64 of_translate_dma_address(struct device_node *dev,
> >                                     const __be32 *in_addr);
> > --
> > 1.9.0
> >
> > --
> > To unsubscribe from this list: send the line "unsubscribe linux-pci" in
> > the body of a message to majordomo@vger.kernel.org
> > More majordomo info at  http://vger.kernel.org/majordomo-info.html
>
Arnd Bergmann Feb. 27, 2014, 1:58 p.m. UTC | #5
On Thursday 27 February 2014 13:22:19 Andrew Murray wrote:
> On 27 February 2014 13:06, Liviu Dudau <Liviu.Dudau@arm.com> wrote:
> >
> > The ranges property for a host bridge controller in DT describes
> > the mapping between the PCI bus address and the CPU physical address.
> > The resources framework however expects that the IO resources start
> > at a pseudo "port" address 0 (zero) and have a maximum size of 64kb.
> 
> Is this just in the case of ARM? (I've tried to keep up with the
> conversation, but apologies if I've misunderstood).

We are a bit inconsistent on Linux. The limitation cited above is
indeed something we came up with on ARM to simplify the possible
cases we have to worry about.

In theory, each PCI host can have its own 4GB I/O space, but in practice
limiting to 64KB is the most reasonable way to use it, and that
still provides plenty of room for I/O registers since most devices
don't use any, and at most a few bytes of address space.

The limit we enforce on Linux is IO_SPACE_LIMIT, which is sometimes set
to 0xffffffff, but I think most if not all of those cases are done so
in error.

> > + * of_pci_range_to_resource - Create a resource from an of_pci_range
> > + * @range:     the PCI range that describes the resource
> > + * @np:                device node where the range belongs to
> > + * @res:       pointer to a valid resource that will be updated to
> > + *              reflect the values contained in the range.
> > + * Note that if the range is an IO range, the resource will be converted
> > + * using pci_address_to_pio() which can fail if it is called to early or
> > + * if the range cannot be matched to any host bridge IO space.
> > + */
> > +void of_pci_range_to_resource(struct of_pci_range *range,
> > +       struct device_node *np, struct resource *res)
> > +{
> > +       res->flags = range->flags;
> > +       if (res->flags & IORESOURCE_IO) {
> > +               unsigned long port;
> > +               port = pci_address_to_pio(range->pci_addr);
> 
> Is this likely to break existing users of of_pci_range_to_resource?
> 
> For example arch/mips: IO_SPACE_LIMIT defaults to 0xffff and there is
> no overridden implementation for pci_address_to_pio, therefore this
> will set res->start to OF_BAD_ADDR whereas previously it would have
> been the CPU address for I/O (assuming the cpu_addr was previously >
> 64K).

The function is used on MIPS, Microblaze and ARM at the moment.
MIPS currently gets it wrong, by calling pci_add_resource_offset
on the CPU address for IORESOURCE_IO, which is the wrong space.
Limiting to IO_SPACE_LIMIT will fix it for the first host bridge
on MIPS, and the second one will still not work, until
IO_SPACE_LIMIT is fixed.

On ARM, I believe we have a couple of drivers that make the
same mistake, and others that at the moment override the
address with range->pci_addr, so they won't change.

Microblaze does 'range.cpu_addr = range.pci_addr;' for the I/O
space window to fix it up. We should probably take a closer look there.

	Arnd
--
To unsubscribe from this list: send the line "unsubscribe linux-pci" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Arnd Bergmann Feb. 27, 2014, 2:08 p.m. UTC | #6
On Thursday 27 February 2014 13:56:16 Liviu Dudau wrote:
> On Thu, Feb 27, 2014 at 01:22:19PM +0000, Andrew Murray wrote:
> > On 27 February 2014 13:06, Liviu Dudau <Liviu.Dudau@arm.com> wrote:
> > >
> > > +/**
> > > + * of_pci_range_to_resource - Create a resource from an of_pci_range
> > > + * @range:     the PCI range that describes the resource
> > > + * @np:                device node where the range belongs to
> > > + * @res:       pointer to a valid resource that will be updated to
> > > + *              reflect the values contained in the range.
> > > + * Note that if the range is an IO range, the resource will be converted
> > > + * using pci_address_to_pio() which can fail if it is called to early or
> > > + * if the range cannot be matched to any host bridge IO space.
> > > + */
> > > +void of_pci_range_to_resource(struct of_pci_range *range,
> > > +       struct device_node *np, struct resource *res)
> > > +{
> > > +       res->flags = range->flags;
> > > +       if (res->flags & IORESOURCE_IO) {
> > > +               unsigned long port;
> > > +               port = pci_address_to_pio(range->pci_addr);
> > 
> > Is this likely to break existing users of of_pci_range_to_resource?
> 
> I've tested the change with a tegra2 based device (trimslice) and I've
> got a functional board.

Did you have any devices using I/O ports though? They are fairly rare
these days.

> > I have no idea if I/O previously worked for mips, but this patch seems
> > to change that behavior. It may be a similar story for microblaze and
> > powerpc.
> 
> Both microblaze and powerpc share an identical implementation and it is
> expecting that the physical address passed as parameter fits between
> io_base_phys and io_base_phys + pcibios_io_size(hose). So yes, the
> correct way is to use cpu_addr and fix the weak implementation? But we
> don't have enough information for the weak implementation to work, as
> we don't know where the physical IO base start (we are just about to
> find out from DT).

I think using pci_address_to_pio() at that point is just wrong
in either way. Before the host is fully registered, you can't actually
look up the port number -- you are only trying to assign one at this time.

The implementation that Will wrote for ARM would work here: find the
next available virtual I/O range, call pci_ioremap_io on range->pci_addr
and then return the virtual address. Unfortunately that code is not
architecture independent at this time, and we will first have to come
up with something that can be made to work for powerpc, microblaze,
mips and arm.

	Arnd
--
To unsubscribe from this list: send the line "unsubscribe linux-pci" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Liviu Dudau Feb. 27, 2014, 2:21 p.m. UTC | #7
On Thu, Feb 27, 2014 at 02:08:44PM +0000, Arnd Bergmann wrote:
> On Thursday 27 February 2014 13:56:16 Liviu Dudau wrote:
> > On Thu, Feb 27, 2014 at 01:22:19PM +0000, Andrew Murray wrote:
> > > On 27 February 2014 13:06, Liviu Dudau <Liviu.Dudau@arm.com> wrote:
> > > >
> > > > +/**
> > > > + * of_pci_range_to_resource - Create a resource from an of_pci_range
> > > > + * @range:     the PCI range that describes the resource
> > > > + * @np:                device node where the range belongs to
> > > > + * @res:       pointer to a valid resource that will be updated to
> > > > + *              reflect the values contained in the range.
> > > > + * Note that if the range is an IO range, the resource will be converted
> > > > + * using pci_address_to_pio() which can fail if it is called to early or
> > > > + * if the range cannot be matched to any host bridge IO space.
> > > > + */
> > > > +void of_pci_range_to_resource(struct of_pci_range *range,
> > > > +       struct device_node *np, struct resource *res)
> > > > +{
> > > > +       res->flags = range->flags;
> > > > +       if (res->flags & IORESOURCE_IO) {
> > > > +               unsigned long port;
> > > > +               port = pci_address_to_pio(range->pci_addr);
> > > 
> > > Is this likely to break existing users of of_pci_range_to_resource?
> > 
> > I've tested the change with a tegra2 based device (trimslice) and I've
> > got a functional board.
> 
> Did you have any devices using I/O ports though? They are fairly rare
> these days.
> 
> > > I have no idea if I/O previously worked for mips, but this patch seems
> > > to change that behavior. It may be a similar story for microblaze and
> > > powerpc.
> > 
> > Both microblaze and powerpc share an identical implementation and it is
> > expecting that the physical address passed as parameter fits between
> > io_base_phys and io_base_phys + pcibios_io_size(hose). So yes, the
> > correct way is to use cpu_addr and fix the weak implementation? But we
> > don't have enough information for the weak implementation to work, as
> > we don't know where the physical IO base start (we are just about to
> > find out from DT).
> 
> I think using pci_address_to_pio() at that point is just wrong
> in either way. Before the host is fully registered, you can't actually
> look up the port number -- you are only trying to assign one at this time.
> 
> The implementation that Will wrote for ARM would work here: find the
> next available virtual I/O range, call pci_ioremap_io on range->pci_addr
> and then return the virtual address. Unfortunately that code is not
> architecture independent at this time, and we will first have to come
> up with something that can be made to work for powerpc, microblaze,
> mips and arm.

No, no, no... we cannot use the virtual address as the start of the resource
as this will later be used when doing pcibios_resource_to_bus(). What
we need here is a portable way of converting from PCI range that uses physical
CPU addresses to a IORESOURCE_IO type resource that uses logical IO
addresses. Using logical IO values works, as Bjorn's code treats it as
physical address.

The alternative is to introduce a thorough revision of the pci_host_bridge
calls that understand that IORESOURCE_IO and IORESOURCE_MEM are different
beasts. That is what caught Will and others a number of times.

Best regards,
Liviu

> 
> 	Arnd
> 
>
Liviu Dudau Feb. 27, 2014, 2:30 p.m. UTC | #8
On Thu, Feb 27, 2014 at 02:08:44PM +0000, Arnd Bergmann wrote:
> On Thursday 27 February 2014 13:56:16 Liviu Dudau wrote:
> > On Thu, Feb 27, 2014 at 01:22:19PM +0000, Andrew Murray wrote:
> > > On 27 February 2014 13:06, Liviu Dudau <Liviu.Dudau@arm.com> wrote:
> > > >
> > > > +/**
> > > > + * of_pci_range_to_resource - Create a resource from an of_pci_range
> > > > + * @range:     the PCI range that describes the resource
> > > > + * @np:                device node where the range belongs to
> > > > + * @res:       pointer to a valid resource that will be updated to
> > > > + *              reflect the values contained in the range.
> > > > + * Note that if the range is an IO range, the resource will be converted
> > > > + * using pci_address_to_pio() which can fail if it is called to early or
> > > > + * if the range cannot be matched to any host bridge IO space.
> > > > + */
> > > > +void of_pci_range_to_resource(struct of_pci_range *range,
> > > > +       struct device_node *np, struct resource *res)
> > > > +{
> > > > +       res->flags = range->flags;
> > > > +       if (res->flags & IORESOURCE_IO) {
> > > > +               unsigned long port;
> > > > +               port = pci_address_to_pio(range->pci_addr);
> > > 
> > > Is this likely to break existing users of of_pci_range_to_resource?
> > 
> > I've tested the change with a tegra2 based device (trimslice) and I've
> > got a functional board.
> 
> Did you have any devices using I/O ports though? They are fairly rare
> these days.

# cat /proc/ioports 
00001000-0000ffff : PCI0 I/O
  00001000-00001fff : PCI Bus 0000:01
    00001000-000010ff : 0000:01:00.0
      00001000-000010ff : r8169

Looks like it does.

Liviu

> 
> > > I have no idea if I/O previously worked for mips, but this patch seems
> > > to change that behavior. It may be a similar story for microblaze and
> > > powerpc.
> > 
> > Both microblaze and powerpc share an identical implementation and it is
> > expecting that the physical address passed as parameter fits between
> > io_base_phys and io_base_phys + pcibios_io_size(hose). So yes, the
> > correct way is to use cpu_addr and fix the weak implementation? But we
> > don't have enough information for the weak implementation to work, as
> > we don't know where the physical IO base start (we are just about to
> > find out from DT).
> 
> I think using pci_address_to_pio() at that point is just wrong
> in either way. Before the host is fully registered, you can't actually
> look up the port number -- you are only trying to assign one at this time.
> 
> The implementation that Will wrote for ARM would work here: find the
> next available virtual I/O range, call pci_ioremap_io on range->pci_addr
> and then return the virtual address. Unfortunately that code is not
> architecture independent at this time, and we will first have to come
> up with something that can be made to work for powerpc, microblaze,
> mips and arm.
> 
> 	Arnd
> 
>
Arnd Bergmann Feb. 27, 2014, 4 p.m. UTC | #9
On Thursday 27 February 2014 14:21:03 Liviu Dudau wrote:
> On Thu, Feb 27, 2014 at 02:08:44PM +0000, Arnd Bergmann wrote:

> > I think using pci_address_to_pio() at that point is just wrong
> > in either way. Before the host is fully registered, you can't actually
> > look up the port number -- you are only trying to assign one at this time.
> > 
> > The implementation that Will wrote for ARM would work here: find the
> > next available virtual I/O range, call pci_ioremap_io on range->pci_addr
> > and then return the virtual address. Unfortunately that code is not
> > architecture independent at this time, and we will first have to come
> > up with something that can be made to work for powerpc, microblaze,
> > mips and arm.
> 
> No, no, no... we cannot use the virtual address as the start of the resource
> as this will later be used when doing pcibios_resource_to_bus().

My mistake: I meant to say return the offset into the virtual window, i.e.
what IORESOURCE_IO space is about.

> What we need here is a portable way of converting from PCI range that uses physical
> CPU addresses to a IORESOURCE_IO type resource that uses logical IO
> addresses. Using logical IO values works, as Bjorn's code treats it as
> physical address.

Right.


	Arnd
--
To unsubscribe from this list: send the line "unsubscribe linux-pci" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Jason Gunthorpe Feb. 27, 2014, 6:19 p.m. UTC | #10
On Thu, Feb 27, 2014 at 01:06:39PM +0000, Liviu Dudau wrote:
> +	if (res->flags & IORESOURCE_IO) {
> +		unsigned long port;
> +		port = pci_address_to_pio(range->pci_addr);

This looks very suspicious, pci_addr is not unique across all domains,
so there is no way to convert from a pci_addr to the virtual IO
address without knowing the domain number as well.

I would like to see it be:
  port = pci_address_to_pio(range->cpu_addr);

cpu_addr is unique across all domains.

Looking at the microblaze and PPC versions I think the above version
is actually correct (assuming io_base_phys is the CPU address of the
IO window)

Jason
--
To unsubscribe from this list: send the line "unsubscribe linux-pci" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Liviu Dudau Feb. 27, 2014, 7:12 p.m. UTC | #11
On Thu, Feb 27, 2014 at 11:19:51AM -0700, Jason Gunthorpe wrote:
> On Thu, Feb 27, 2014 at 01:06:39PM +0000, Liviu Dudau wrote:
> > +	if (res->flags & IORESOURCE_IO) {
> > +		unsigned long port;
> > +		port = pci_address_to_pio(range->pci_addr);
> 
> This looks very suspicious, pci_addr is not unique across all domains,
> so there is no way to convert from a pci_addr to the virtual IO
> address without knowing the domain number as well.
> 
> I would like to see it be:
>   port = pci_address_to_pio(range->cpu_addr);
> 
> cpu_addr is unique across all domains.

Jason,

First thanks for reviewing the updated series.

We have agreed early on that indeed using range->cpu_addr is the correct aproach
and me taking the lazy shortcut of using range->pci_addr because of the broken
default implementation of pci_address_to_pio is wrong. I will fix this for v3.

The outstanding issue is how to fix pci_address_to_pio() as it will not
for for range->cpu_addr > IO_SPACE_LIMIT (16MB in my case).

Best regards,
Liviu

> 
> Looking at the microblaze and PPC versions I think the above version
> is actually correct (assuming io_base_phys is the CPU address of the
> IO window)
> 
> Jason
> --
> To unsubscribe from this list: send the line "unsubscribe linux-pci" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html
> 

--
To unsubscribe from this list: send the line "unsubscribe linux-pci" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Jason Gunthorpe Feb. 27, 2014, 7:36 p.m. UTC | #12
On Thu, Feb 27, 2014 at 07:12:59PM +0000, Liviu Dudau wrote:
> The outstanding issue is how to fix pci_address_to_pio() as it will not
> for for range->cpu_addr > IO_SPACE_LIMIT (16MB in my case).

The default actually looks fine to me, it is the correct behavior for
systems that actually have a dedicated IO space (like x86) where the
'CPU' value for IO is the exact value used in the IO accessor
instructions. In this case the IO_SPACE_LIMIT test is appropriate.

It also looks correct for architectures that use the CPU MMIO address
as the IO address directly (where IO_SPACE_LIMIT would be 4G)

Architectures that use the virtual IO window technique will always
require a custom pci_address_to_pio implementation.

BTW, something that occured to me after reading the patches:

For ARM64 you might want to think about doing away with the fixed
virtual IO window like we see in ARM32. Just use the CPU MMIO address
directly within the kernel, and implement a ioport_map to setup the MM
on demand.

I think the legacy reasons for having all those layers of translation
are probably not applicable to ARM64, and it is much simpler without
the extra translation step....

Arnd, what do you think?

Jason
--
To unsubscribe from this list: send the line "unsubscribe linux-pci" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Arnd Bergmann Feb. 27, 2014, 7:48 p.m. UTC | #13
On Thursday 27 February 2014 12:36:27 Jason Gunthorpe wrote:
> On Thu, Feb 27, 2014 at 07:12:59PM +0000, Liviu Dudau wrote:
> > The outstanding issue is how to fix pci_address_to_pio() as it will not
> > for for range->cpu_addr > IO_SPACE_LIMIT (16MB in my case).
> 
> The default actually looks fine to me, it is the correct behavior for
> systems that actually have a dedicated IO space (like x86) where the
> 'CPU' value for IO is the exact value used in the IO accessor
> instructions. In this case the IO_SPACE_LIMIT test is appropriate.

Right.

> It also looks correct for architectures that use the CPU MMIO address
> as the IO address directly (where IO_SPACE_LIMIT would be 4G)

Are you aware of any that still do? I thought we had stopped doing
that.

> Architectures that use the virtual IO window technique will always
> require a custom pci_address_to_pio implementation.

Hmm, at the moment we only call it from of_address_to_resource(),
which in turn does not get called on PCI devices, and does not
call pci_address_to_pio for 'simple' platform devices. The only
case I can think of where it actually matters is when we have
ISA devices in DT that use an I/O port address in the reg property,
and that case hopefully won't happen on ARM32 or ARM64.

> BTW, something that occured to me after reading the patches:
> 
> For ARM64 you might want to think about doing away with the fixed
> virtual IO window like we see in ARM32. Just use the CPU MMIO address
> directly within the kernel, and implement a ioport_map to setup the MM
> on demand.
> 
> I think the legacy reasons for having all those layers of translation
> are probably not applicable to ARM64, and it is much simpler without
> the extra translation step....
> 
> Arnd, what do you think?

Either I don't like it or I misunderstand you ;-)

Most PCI drivers normally don't call ioport_map or pci_iomap, so
we can't just do it there. If you are thinking of calling ioport_map
for every PCI device that has an I/O BAR and storing the virtual
address in the pci_dev resource, I don't see what that gains us
in terms of complexity, and it will also break /dev/port.

	Arnd
--
To unsubscribe from this list: send the line "unsubscribe linux-pci" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Jason Gunthorpe Feb. 27, 2014, 8:07 p.m. UTC | #14
On Thu, Feb 27, 2014 at 08:48:08PM +0100, Arnd Bergmann wrote:
> > It also looks correct for architectures that use the CPU MMIO address
> > as the IO address directly (where IO_SPACE_LIMIT would be 4G)
> 
> Are you aware of any that still do? I thought we had stopped doing
> that.

I thought ia64 used to, but it has been a long time since I've touched
one...

> > Architectures that use the virtual IO window technique will always
> > require a custom pci_address_to_pio implementation.
> 
> Hmm, at the moment we only call it from of_address_to_resource(),
> which in turn does not get called on PCI devices, and does not
> call pci_address_to_pio for 'simple' platform devices. The only
> case I can think of where it actually matters is when we have
> ISA devices in DT that use an I/O port address in the reg property,
> and that case hopefully won't happen on ARM32 or ARM64.

Sure, I ment, after Liviu's patch it will become required since he is
cleverly using it to figure out what the io mapping the bridge driver
setup before calling the helper.

> > I think the legacy reasons for having all those layers of translation
> > are probably not applicable to ARM64, and it is much simpler without
> > the extra translation step....
> > 
> > Arnd, what do you think?
> 
> Either I don't like it or I misunderstand you ;-)
> 
> Most PCI drivers normally don't call ioport_map or pci_iomap, so
> we can't just do it there. If you are thinking of calling ioport_map

Okay, that was one of the 'legacy reasons'. Certainly lots of drivers
do call pci_iomap, but if you think legacy drivers that don't are
important to ARM64 then it makes sense to use the virtual IO window.

> for every PCI device that has an I/O BAR and storing the virtual
> address in the pci_dev resource, I don't see what that gains us

Mainly we get to drop the fancy dynamic allocation stuff for the fixed
virtual window, and it gives the option to have a 1:1 relationship
between CPU addresses and PCI BARs.

> in terms of complexity, and it will also break /dev/port.

Yes, /dev/port needs updating, it would need to iomap (arguably it
probably should be doing that already anyhow), and the hardwired limit
of 65536 needs to be replaced with the arch's IO limit, but those do
not seem to be fundemental problems with the UAPI??

Jason
--
To unsubscribe from this list: send the line "unsubscribe linux-pci" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Arnd Bergmann Feb. 27, 2014, 8:22 p.m. UTC | #15
On Thursday 27 February 2014 13:07:29 Jason Gunthorpe wrote:
> On Thu, Feb 27, 2014 at 08:48:08PM +0100, Arnd Bergmann wrote:
> > > It also looks correct for architectures that use the CPU MMIO address
> > > as the IO address directly (where IO_SPACE_LIMIT would be 4G)
> > 
> > Are you aware of any that still do? I thought we had stopped doing
> > that.
> 
> I thought ia64 used to, but it has been a long time since I've touched
> one...

They have a different way of doing it now, no idea how it looked in
the past:

#define IO_SPACE_LIMIT          0xffffffffffffffffUL

#define MAX_IO_SPACES_BITS              8
#define MAX_IO_SPACES                   (1UL << MAX_IO_SPACES_BITS)
#define IO_SPACE_BITS                   24
#define IO_SPACE_SIZE                   (1UL << IO_SPACE_BITS)

#define IO_SPACE_NR(port)               ((port) >> IO_SPACE_BITS)
#define IO_SPACE_BASE(space)            ((space) << IO_SPACE_BITS)
#define IO_SPACE_PORT(port)             ((port) & (IO_SPACE_SIZE - 1))

#define IO_SPACE_SPARSE_ENCODING(p)     ((((p) >> 2) << 12) | ((p) & 0xfff))

So their port number is a logical token that contains the I/O space number
and a 16MB offset.

Apparently sparc64 uses physical memory addressing for I/O space, the
same way they do for memory space, and they just set IO_SPACE_LIMIT to
0xffffffffffffffffUL.

> > > Architectures that use the virtual IO window technique will always
> > > require a custom pci_address_to_pio implementation.
> > 
> > Hmm, at the moment we only call it from of_address_to_resource(),
> > which in turn does not get called on PCI devices, and does not
> > call pci_address_to_pio for 'simple' platform devices. The only
> > case I can think of where it actually matters is when we have
> > ISA devices in DT that use an I/O port address in the reg property,
> > and that case hopefully won't happen on ARM32 or ARM64.
> 
> Sure, I ment, after Liviu's patch it will become required since he is
> cleverly using it to figure out what the io mapping the bridge driver
> setup before calling the helper.

Ok. I was arguing more that we should add this dependency.

> > > I think the legacy reasons for having all those layers of translation
> > > are probably not applicable to ARM64, and it is much simpler without
> > > the extra translation step....
> > > 
> > > Arnd, what do you think?
> > 
> > Either I don't like it or I misunderstand you ;-)
> > 
> > Most PCI drivers normally don't call ioport_map or pci_iomap, so
> > we can't just do it there. If you are thinking of calling ioport_map
> 
> Okay, that was one of the 'legacy reasons'. Certainly lots of drivers
> do call pci_iomap, but if you think legacy drivers that don't are
> important to ARM64 then it makes sense to use the virtual IO window.

I think all uses of I/O space are legacy, but I don't think that
drivers doing inb/outb are more obsolete than those doing pci_iomap.
It's got more to do with the subsystem requirements, e.g. libata
requires the use of pci_iomap.

> > for every PCI device that has an I/O BAR and storing the virtual
> > address in the pci_dev resource, I don't see what that gains us
> 
> Mainly we get to drop the fancy dynamic allocation stuff for the fixed
> virtual window, and it gives the option to have a 1:1 relationship
> between CPU addresses and PCI BARs.

I don't think the allocation is much of a problem, as long as we
can localize it in one function that is shared by everyone.
The problems I saw were all about explaining to people how it
works, but they really shouldn't have to know.


	Arnd
--
To unsubscribe from this list: send the line "unsubscribe linux-pci" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Liviu Dudau Feb. 28, 2014, 12:50 p.m. UTC | #16
On Thu, Feb 27, 2014 at 08:22:12PM +0000, Arnd Bergmann wrote:
> On Thursday 27 February 2014 13:07:29 Jason Gunthorpe wrote:
> > On Thu, Feb 27, 2014 at 08:48:08PM +0100, Arnd Bergmann wrote:
> > > > It also looks correct for architectures that use the CPU MMIO address
> > > > as the IO address directly (where IO_SPACE_LIMIT would be 4G)
> > > 
> > > Are you aware of any that still do? I thought we had stopped doing
> > > that.
> > 
> > I thought ia64 used to, but it has been a long time since I've touched
> > one...
> 
> They have a different way of doing it now, no idea how it looked in
> the past:
> 
> #define IO_SPACE_LIMIT          0xffffffffffffffffUL
> 
> #define MAX_IO_SPACES_BITS              8
> #define MAX_IO_SPACES                   (1UL << MAX_IO_SPACES_BITS)
> #define IO_SPACE_BITS                   24
> #define IO_SPACE_SIZE                   (1UL << IO_SPACE_BITS)
> 
> #define IO_SPACE_NR(port)               ((port) >> IO_SPACE_BITS)
> #define IO_SPACE_BASE(space)            ((space) << IO_SPACE_BITS)
> #define IO_SPACE_PORT(port)             ((port) & (IO_SPACE_SIZE - 1))
> 
> #define IO_SPACE_SPARSE_ENCODING(p)     ((((p) >> 2) << 12) | ((p) & 0xfff))
> 
> So their port number is a logical token that contains the I/O space number
> and a 16MB offset.
> 
> Apparently sparc64 uses physical memory addressing for I/O space, the
> same way they do for memory space, and they just set IO_SPACE_LIMIT to
> 0xffffffffffffffffUL.
> 
> > > > Architectures that use the virtual IO window technique will always
> > > > require a custom pci_address_to_pio implementation.
> > > 
> > > Hmm, at the moment we only call it from of_address_to_resource(),
> > > which in turn does not get called on PCI devices, and does not
> > > call pci_address_to_pio for 'simple' platform devices. The only
> > > case I can think of where it actually matters is when we have
> > > ISA devices in DT that use an I/O port address in the reg property,
> > > and that case hopefully won't happen on ARM32 or ARM64.
> > 
> > Sure, I ment, after Liviu's patch it will become required since he is
> > cleverly using it to figure out what the io mapping the bridge driver
> > setup before calling the helper.
> 
> Ok. I was arguing more that we should add this dependency.

I've thought about this last night and I think I was trying to be too clever
for my own good. As Jason points out, arm64 needs its own version of
pci_address_to_pio(). I have an idea on how to borrow the powerpc/microblaze
one and make it useful without the need for pci_controller *hose. It would
be generic enough for other platforms that use virtual I/O windows can use,
but I'll start with it being defined for arm64 for discussions in this list.

I'll post v3 shortly.

Best regards,
Liviu

> 
> > > > I think the legacy reasons for having all those layers of translation
> > > > are probably not applicable to ARM64, and it is much simpler without
> > > > the extra translation step....
> > > > 
> > > > Arnd, what do you think?
> > > 
> > > Either I don't like it or I misunderstand you ;-)
> > > 
> > > Most PCI drivers normally don't call ioport_map or pci_iomap, so
> > > we can't just do it there. If you are thinking of calling ioport_map
> > 
> > Okay, that was one of the 'legacy reasons'. Certainly lots of drivers
> > do call pci_iomap, but if you think legacy drivers that don't are
> > important to ARM64 then it makes sense to use the virtual IO window.
> 
> I think all uses of I/O space are legacy, but I don't think that
> drivers doing inb/outb are more obsolete than those doing pci_iomap.
> It's got more to do with the subsystem requirements, e.g. libata
> requires the use of pci_iomap.
> 
> > > for every PCI device that has an I/O BAR and storing the virtual
> > > address in the pci_dev resource, I don't see what that gains us
> > 
> > Mainly we get to drop the fancy dynamic allocation stuff for the fixed
> > virtual window, and it gives the option to have a 1:1 relationship
> > between CPU addresses and PCI BARs.
> 
> I don't think the allocation is much of a problem, as long as we
> can localize it in one function that is shared by everyone.
> The problems I saw were all about explaining to people how it
> works, but they really shouldn't have to know.
> 
> 
> 	Arnd
> 
>
diff mbox

Patch

diff --git a/drivers/of/address.c b/drivers/of/address.c
index 1a54f1f..7cf2b16 100644
--- a/drivers/of/address.c
+++ b/drivers/of/address.c
@@ -719,3 +719,34 @@  void __iomem *of_iomap(struct device_node *np, int index)
 	return ioremap(res.start, resource_size(&res));
 }
 EXPORT_SYMBOL(of_iomap);
+
+/**
+ * of_pci_range_to_resource - Create a resource from an of_pci_range
+ * @range:	the PCI range that describes the resource
+ * @np:		device node where the range belongs to
+ * @res:	pointer to a valid resource that will be updated to
+ *              reflect the values contained in the range.
+ * Note that if the range is an IO range, the resource will be converted
+ * using pci_address_to_pio() which can fail if it is called to early or
+ * if the range cannot be matched to any host bridge IO space.
+ */
+void of_pci_range_to_resource(struct of_pci_range *range,
+	struct device_node *np, struct resource *res)
+{
+	res->flags = range->flags;
+	if (res->flags & IORESOURCE_IO) {
+		unsigned long port;
+		port = pci_address_to_pio(range->pci_addr);
+		if (port == (unsigned long)-1) {
+			res->start = (resource_size_t)OF_BAD_ADDR;
+			res->end = (resource_size_t)OF_BAD_ADDR;
+			return;
+		}
+		res->start = port;
+	} else {
+		res->start = range->cpu_addr;
+	}
+	res->end = res->start + range->size - 1;
+	res->parent = res->child = res->sibling = NULL;
+	res->name = np->full_name;
+}
diff --git a/include/linux/of_address.h b/include/linux/of_address.h
index 5f6ed6b..a667762 100644
--- a/include/linux/of_address.h
+++ b/include/linux/of_address.h
@@ -23,17 +23,8 @@  struct of_pci_range {
 #define for_each_of_pci_range(parser, range) \
 	for (; of_pci_range_parser_one(parser, range);)
 
-static inline void of_pci_range_to_resource(struct of_pci_range *range,
-					    struct device_node *np,
-					    struct resource *res)
-{
-	res->flags = range->flags;
-	res->start = range->cpu_addr;
-	res->end = range->cpu_addr + range->size - 1;
-	res->parent = res->child = res->sibling = NULL;
-	res->name = np->full_name;
-}
-
+extern void of_pci_range_to_resource(struct of_pci_range *range,
+		struct device_node *np, struct resource *res);
 /* Translate a DMA address from device space to CPU space */
 extern u64 of_translate_dma_address(struct device_node *dev,
 				    const __be32 *in_addr);