diff mbox

[v2] ARM: asm: add readq/writeq methods

Message ID 52A3472C.4010203@arkona-technologies.de
State New
Headers show

Commit Message

Matthias Mann Dec. 7, 2013, 4:05 p.m. UTC
Add readq/writeq methods for 32 bit ARM to allow transfering 64 bit words over
PCIe as a single transfer.

Signed-off-by: Matthias Mann <m.mann@arkona-technologies.de>
---
v2: Changed assembler according to comments from Måns Rullgård
    Check for ARM architecture support and added preprocessor guards as
    requested by Russel King
---
 arch/arm/include/asm/io.h | 47 +++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 47 insertions(+)

Comments

Peter Maydell Dec. 7, 2013, 9:38 p.m. UTC | #1
On 7 December 2013 16:05, Matthias Mann <M.Mann@arkona-technologies.de> wrote:
> Add readq/writeq methods for 32 bit ARM to allow transfering 64 bit words over
> PCIe as a single transfer.

> +#if __LINUX_ARM_ARCH__ >= 5
> +static inline u64 __raw_readq(const volatile void __iomem *addr)
> +{
> +       u64 val;
> +#if __LITTLE_ENDIAN
> +       asm volatile("ldrd %Q1, %R1, %0"
> +                    : "+Q" (*(volatile u64 __force *)addr),
> +                      "=r" (val));
> +#else
> +       asm volatile("ldrd %R1, %Q1, %0"
> +                    : "+Q" (*(volatile u64 __force *)addr),
> +                      "=r" (val));
> +#endif
> +       return val;
> +}

Given that ldrd/strd accesses are only a single 64 bit access
on CPUs with LPAE (on non-LPAE CPUs they may be
implemented as just a pair of 32 bit accesses) should the
condition be stricter than just __LINUX_ARM_ARCH__ >= 5 ?

thanks
-- PMM
Måns Rullgård Dec. 7, 2013, 9:41 p.m. UTC | #2
Peter Maydell <peter.maydell@linaro.org> writes:

> On 7 December 2013 16:05, Matthias Mann <M.Mann@arkona-technologies.de> wrote:
>> Add readq/writeq methods for 32 bit ARM to allow transfering 64 bit words over
>> PCIe as a single transfer.
>
>> +#if __LINUX_ARM_ARCH__ >= 5
>> +static inline u64 __raw_readq(const volatile void __iomem *addr)
>> +{
>> +       u64 val;
>> +#if __LITTLE_ENDIAN
>> +       asm volatile("ldrd %Q1, %R1, %0"
>> +                    : "+Q" (*(volatile u64 __force *)addr),
>> +                      "=r" (val));
>> +#else
>> +       asm volatile("ldrd %R1, %Q1, %0"
>> +                    : "+Q" (*(volatile u64 __force *)addr),
>> +                      "=r" (val));
>> +#endif
>> +       return val;
>> +}
>
> Given that ldrd/strd accesses are only a single 64 bit access
> on CPUs with LPAE (on non-LPAE CPUs they may be
> implemented as just a pair of 32 bit accesses) should the
> condition be stricter than just __LINUX_ARM_ARCH__ >= 5 ?

What do actual CPUs, e.g. the A9, do?
Matthias Mann Dec. 7, 2013, 10:02 p.m. UTC | #3
Måns Rullgård wrote:
> Peter Maydell <peter.maydell@linaro.org> writes:
>
>> On 7 December 2013 16:05, Matthias Mann <M.Mann@arkona-technologies.de> wrote:
>>> Add readq/writeq methods for 32 bit ARM to allow transfering 64 bit words over
>>> PCIe as a single transfer.
>>> +#if __LINUX_ARM_ARCH__ >= 5
>>> +static inline u64 __raw_readq(const volatile void __iomem *addr)
>>> +{
>>> +       u64 val;
>>> +#if __LITTLE_ENDIAN
>>> +       asm volatile("ldrd %Q1, %R1, %0"
>>> +                    : "+Q" (*(volatile u64 __force *)addr),
>>> +                      "=r" (val));
>>> +#else
>>> +       asm volatile("ldrd %R1, %Q1, %0"
>>> +                    : "+Q" (*(volatile u64 __force *)addr),
>>> +                      "=r" (val));
>>> +#endif
>>> +       return val;
>>> +}
>> Given that ldrd/strd accesses are only a single 64 bit access
>> on CPUs with LPAE (on non-LPAE CPUs they may be
>> implemented as just a pair of 32 bit accesses) should the
>> condition be stricter than just __LINUX_ARM_ARCH__ >= 5 ?
> What do actual CPUs, e.g. the A9, do?
>
I've tested that on an Freescale i.MX6D which is a Cortex-A9 with an Altera Arria V GZ FPGA connected via PCIe. Using strd / ldrd I see a 64 bit TLP (length = 2) on the PCIe interface (this is on an uncachable 32 bit BAR).
diff mbox

Patch

diff --git a/arch/arm/include/asm/io.h b/arch/arm/include/asm/io.h
index 3c597c2..89d4ecd 100644
--- a/arch/arm/include/asm/io.h
+++ b/arch/arm/include/asm/io.h
@@ -112,6 +112,36 @@  static inline u32 __raw_readl(const volatile void __iomem *addr)
 	return val;
 }
 
+#if __LINUX_ARM_ARCH__ >= 5
+static inline u64 __raw_readq(const volatile void __iomem *addr)
+{
+	u64 val;
+#if __LITTLE_ENDIAN
+	asm volatile("ldrd %Q1, %R1, %0"
+		     : "+Q" (*(volatile u64 __force *)addr),
+		       "=r" (val));
+#else
+	asm volatile("ldrd %R1, %Q1, %0"
+		     : "+Q" (*(volatile u64 __force *)addr),
+		       "=r" (val));
+#endif
+	return val;
+}
+
+static inline void __raw_writeq(u64 val, volatile void __iomem *addr)
+{
+#if __LITTLE_ENDIAN
+	asm volatile("strd %Q1, %R1, %0"
+		     : "+Q" (*(volatile u64 __force *)addr)
+		     : "r" (val));
+#else
+	asm volatile("strd %R1, %Q1, %0"
+		     : "+Q" (*(volatile u64 __force *)addr)
+		     : "r" (val));
+#endif
+}
+#endif	/* __LINUX_ARM_ARCH__ >= 5 */
+
 /*
  * Architecture ioremap implementation.
  */
@@ -320,6 +350,23 @@  extern void _memset_io(volatile void __iomem *, int, size_t);
 
 #endif	/* readl */
 
+#if __LINUX_ARM_ARCH__ >= 5
+
+#ifndef readq
+#define readq_relaxed(c) ({ u64 __r = le64_to_cpu((__force __le64) \
+					__raw_readq(c)); __r; })
+
+#define readq(c)	({ u64 __v = readq_relaxed(c); __iormb(); __v; })
+#endif	/* readq */
+
+#ifndef writeq
+#define writeq_relaxed(v,c)	__raw_writeq((__force u64) cpu_to_le64(v),c)
+
+#define writeq(v,c)	({ __iowmb(); writeq_relaxed(v,c); })
+#endif	/* writeq */
+
+#endif	/* __LINUX_ARM_ARCH__ >= 5 */
+
 /*
  * ioremap and friends.
  *