diff mbox series

[07/12] io: endian conversions for io accessors

Message ID 20190929074651.8787-8-npiggin@gmail.com
State Superseded
Headers show
Series little endian skiboot | expand

Checks

Context Check Description
snowpatch_ozlabs/apply_patch success Successfully applied on branch master (470ffb5f29d741c3bed600f7bb7bf0cbb270e05a)
snowpatch_ozlabs/snowpatch_job_snowpatch-skiboot fail Test snowpatch/job/snowpatch-skiboot on branch master
snowpatch_ozlabs/snowpatch_job_snowpatch-skiboot-dco success Signed-off-by present

Commit Message

Nicholas Piggin Sept. 29, 2019, 7:46 a.m. UTC
This requires a small change to flash drivers which assumed 4-byte LPC
reads would not change endian. _raw accessors could be added if this
becomes a signifcant pattern, but for now this hack works.

Signed-off-by: Nicholas Piggin <npiggin@gmail.com>
---
 include/io.h           | 72 +++++++++++++++++++++++++++++++++++-------
 libflash/ipmi-hiomap.c | 18 ++++++++---
 libflash/mbox-flash.c  | 18 ++++++++---
 3 files changed, 88 insertions(+), 20 deletions(-)

Comments

Oliver O'Halloran Oct. 1, 2019, 4:58 a.m. UTC | #1
On Sun, 2019-09-29 at 17:46 +1000, Nicholas Piggin wrote:
> This requires a small change to flash drivers which assumed 4-byte LPC
> reads would not change endian. _raw accessors could be added if this
> becomes a signifcant pattern, but for now this hack works.

Hmm, lpc_read() and lpc_write() are more or less directly wired to the
OPAL_LPC_READ/OPAL_LPC_WRITE calls so we should probably have a bit
more of a think about what's happening here.

I suspect the in_beXX() functions are used here mainly because we don't
have any "native" endian in/out accessor functions so Ben just went
with the BE versions since they don't swap.

> Signed-off-by: Nicholas Piggin <npiggin@gmail.com>
> ---
>  include/io.h           | 72 +++++++++++++++++++++++++++++++++++-------
>  libflash/ipmi-hiomap.c | 18 ++++++++---
>  libflash/mbox-flash.c  | 18 ++++++++---
>  3 files changed, 88 insertions(+), 20 deletions(-)
> 
> diff --git a/include/io.h b/include/io.h
> index c6203a274..3771dbb8a 100644
> --- a/include/io.h
> +++ b/include/io.h
> @@ -38,7 +38,7 @@ static inline uint16_t __in_be16(const volatile uint16_t *addr)
>  	uint16_t val;
>  	asm volatile("lhzcix %0,0,%1" :
>  		     "=r"(val) : "r"(addr), "m"(*addr) : "memory");
> -	return val;
> +	return be16_to_cpu(val);
>  }
>  
>  static inline uint16_t in_be16(const volatile uint16_t *addr)
> @@ -47,9 +47,18 @@ static inline uint16_t in_be16(const volatile uint16_t *addr)
>  	return __in_be16(addr);
>  }
>  
> +static inline uint16_t __in_le16(const volatile uint16_t *addr)
> +{
> +	uint16_t val;
> +	asm volatile("lhzcix %0,0,%1" :
> +		     "=r"(val) : "r"(addr), "m"(*addr) : "memory");
> +	return le16_to_cpu(val);
> +}
> +
>  static inline uint16_t in_le16(const volatile uint16_t *addr)
>  {
> -	return bswap_16(in_be16(addr));
> +	sync();
> +	return __in_le16(addr);
>  }
>  
>  static inline uint32_t __in_be32(const volatile uint32_t *addr)
> @@ -57,7 +66,7 @@ static inline uint32_t __in_be32(const volatile uint32_t *addr)
>  	uint32_t val;
>  	asm volatile("lwzcix %0,0,%1" :
>  		     "=r"(val) : "r"(addr), "m"(*addr) : "memory");
> -	return val;
> +	return be32_to_cpu(val);
>  }
>  
>  static inline uint32_t in_be32(const volatile uint32_t *addr)
> @@ -66,9 +75,18 @@ static inline uint32_t in_be32(const volatile uint32_t *addr)
>  	return __in_be32(addr);
>  }
>  
> +static inline uint32_t __in_le32(const volatile uint32_t *addr)
> +{
> +	uint32_t val;
> +	asm volatile("lwzcix %0,0,%1" :
> +		     "=r"(val) : "r"(addr), "m"(*addr) : "memory");
> +	return le32_to_cpu(val);
> +}
> +
>  static inline uint32_t in_le32(const volatile uint32_t *addr)
>  {
> -	return bswap_32(in_be32(addr));
> +	sync();
> +	return __in_le32(addr);
>  }
>  
>  static inline uint64_t __in_be64(const volatile uint64_t *addr)
> @@ -76,7 +94,7 @@ static inline uint64_t __in_be64(const volatile uint64_t *addr)
>  	uint64_t val;
>  	asm volatile("ldcix %0,0,%1" :
>  		     "=r"(val) : "r"(addr), "m"(*addr) : "memory");
> -	return val;
> +	return be64_to_cpu(val);
>  }
>  
>  static inline uint64_t in_be64(const volatile uint64_t *addr)
> @@ -85,9 +103,18 @@ static inline uint64_t in_be64(const volatile uint64_t *addr)
>  	return __in_be64(addr);
>  }
>  
> +static inline uint64_t __in_le64(const volatile uint64_t *addr)
> +{
> +	uint64_t val;
> +	asm volatile("ldcix %0,0,%1" :
> +		     "=r"(val) : "r"(addr), "m"(*addr) : "memory");
> +	return le64_to_cpu(val);
> +}
> +
>  static inline uint64_t in_le64(const volatile uint64_t *addr)
>  {
> -	return bswap_64(in_be64(addr));
> +	sync();
> +	return __in_le64(addr);
>  }
>  
>  static inline void __out_8(volatile uint8_t *addr, uint8_t val)
> @@ -105,7 +132,7 @@ static inline void out_8(volatile uint8_t *addr, uint8_t val)
>  static inline void __out_be16(volatile uint16_t *addr, uint16_t val)
>  {
>  	asm volatile("sthcix %0,0,%1"
> -		     : : "r"(val), "r"(addr), "m"(*addr) : "memory");
> +		     : : "r"(cpu_to_be16(val)), "r"(addr), "m"(*addr) : "memory");
>  }
>  
>  static inline void out_be16(volatile uint16_t *addr, uint16_t val)
> @@ -114,15 +141,22 @@ static inline void out_be16(volatile uint16_t *addr, uint16_t val)
>  	return __out_be16(addr, val);
>  }
>  
> +static inline void __out_le16(volatile uint16_t *addr, uint16_t val)
> +{
> +	asm volatile("sthcix %0,0,%1"
> +		     : : "r"(cpu_to_le16(val)), "r"(addr), "m"(*addr) : "memory");
> +}
> +
>  static inline void out_le16(volatile uint16_t *addr, uint16_t val)
>  {
> -	out_be16(addr, bswap_16(val));
> +	sync();
> +	return __out_le16(addr, val);
>  }
>  
>  static inline void __out_be32(volatile uint32_t *addr, uint32_t val)
>  {
>  	asm volatile("stwcix %0,0,%1"
> -		     : : "r"(val), "r"(addr), "m"(*addr) : "memory");
> +		     : : "r"(cpu_to_be32(val)), "r"(addr), "m"(*addr) : "memory");
>  }
>  
>  static inline void out_be32(volatile uint32_t *addr, uint32_t val)
> @@ -131,15 +165,22 @@ static inline void out_be32(volatile uint32_t *addr, uint32_t val)
>  	return __out_be32(addr, val);
>  }
>  
> +static inline void __out_le32(volatile uint32_t *addr, uint32_t val)
> +{
> +	asm volatile("stwcix %0,0,%1"
> +		     : : "r"(cpu_to_le32(val)), "r"(addr), "m"(*addr) : "memory");
> +}
> +
>  static inline void out_le32(volatile uint32_t *addr, uint32_t val)
>  {
> -	out_be32(addr, bswap_32(val));
> +	sync();
> +	return __out_le32(addr, val);
>  }
>  
>  static inline void __out_be64(volatile uint64_t *addr, uint64_t val)
>  {
>  	asm volatile("stdcix %0,0,%1"
> -		     : : "r"(val), "r"(addr), "m"(*addr) : "memory");
> +		     : : "r"(cpu_to_be64(val)), "r"(addr), "m"(*addr) : "memory");
>  }
>  
>  static inline void out_be64(volatile uint64_t *addr, uint64_t val)
> @@ -148,9 +189,16 @@ static inline void out_be64(volatile uint64_t *addr, uint64_t val)
>  	return __out_be64(addr, val);
>  }
>  
> +static inline void __out_le64(volatile uint64_t *addr, uint64_t val)
> +{
> +	asm volatile("stdcix %0,0,%1"
> +		     : : "r"(cpu_to_le64(val)), "r"(addr), "m"(*addr) : "memory");
> +}
> +
>  static inline void out_le64(volatile uint64_t *addr, uint64_t val)
>  {
> -	out_be64(addr, bswap_64(val));
> +	sync();
> +	return __out_le64(addr, val);
>  }
>  
>  /* Assistant to macros used to access PCI config space */
> diff --git a/libflash/ipmi-hiomap.c b/libflash/ipmi-hiomap.c
> index 7327b83a3..91d674231 100644
> --- a/libflash/ipmi-hiomap.c
> +++ b/libflash/ipmi-hiomap.c
> @@ -570,8 +570,13 @@ static int lpc_window_read(struct ipmi_hiomap *ctx, uint32_t pos,
>  		/* XXX: make this read until it's aligned */
>  		if (len > 3 && !(off & 3)) {
>  			rc = lpc_read(OPAL_LPC_FW, off, &dat, 4);
> -			if (!rc)
> -				*(uint32_t *)buf = dat;
> +			if (!rc) {
> +				/*
> +				 * lpc_read swaps to CPU endian but it's not
> +				 * really a 32-bit value, so convert back.
> +				 */
> +				*(uint32_t *)buf = cpu_to_be32(dat);
> +			}
>  			chunk = 4;
>  		} else {
>  			rc = lpc_read(OPAL_LPC_FW, off, &dat, 1);
> @@ -615,12 +620,17 @@ static int lpc_window_write(struct ipmi_hiomap *ctx, uint32_t pos,
>  		uint32_t chunk;
>  
>  		if (len > 3 && !(off & 3)) {
> +			/* endian swap: see lpc_window_write */
> +			uint32_t dat = be32_to_cpu(*(uint32_t *)buf);
> +
>  			rc = lpc_write(OPAL_LPC_FW, off,
> -				       *(uint32_t *)buf, 4);
> +				       dat, 4);
>  			chunk = 4;
>  		} else {
> +			uint8_t dat = *(uint8_t *)buf;
> +
>  			rc = lpc_write(OPAL_LPC_FW, off,
> -				       *(uint8_t *)buf, 1);
> +				       dat, 1);
>  			chunk = 1;
>  		}
>  		if (rc) {
> diff --git a/libflash/mbox-flash.c b/libflash/mbox-flash.c
> index 9d47fe7ea..6fed30a9b 100644
> --- a/libflash/mbox-flash.c
> +++ b/libflash/mbox-flash.c
> @@ -159,8 +159,13 @@ static int lpc_window_read(struct mbox_flash_data *mbox_flash, uint32_t pos,
>  		/* XXX: make this read until it's aligned */
>  		if (len > 3 && !(off & 3)) {
>  			rc = lpc_read(OPAL_LPC_FW, off, &dat, 4);
> -			if (!rc)
> -				*(uint32_t *)buf = dat;
> +			if (!rc) {
> +				/*
> +				 * lpc_read swaps to CPU endian but it's not
> +				 * really a 32-bit value, so convert back.
> +				 */
> +				*(uint32_t *)buf = cpu_to_be32(dat);
> +			}
>  			chunk = 4;
>  		} else {
>  			rc = lpc_read(OPAL_LPC_FW, off, &dat, 1);
> @@ -194,12 +199,17 @@ static int lpc_window_write(struct mbox_flash_data *mbox_flash, uint32_t pos,
>  		uint32_t chunk;
>  
>  		if (len > 3 && !(off & 3)) {
> +			/* endian swap: see lpc_window_write */
> +			uint32_t dat = be32_to_cpu(*(uint32_t *)buf);
> +
>  			rc = lpc_write(OPAL_LPC_FW, off,
> -				       *(uint32_t *)buf, 4);
> +				       dat, 4);
>  			chunk = 4;
>  		} else {
> +			uint8_t dat = *(uint8_t *)buf;
> +
>  			rc = lpc_write(OPAL_LPC_FW, off,
> -				       *(uint8_t *)buf, 1);
> +				       dat, 1);
>  			chunk = 1;
>  		}
>  		if (rc) {
Nicholas Piggin Oct. 2, 2019, 4:26 a.m. UTC | #2
Oliver O'Halloran's on October 1, 2019 2:58 pm:
> On Sun, 2019-09-29 at 17:46 +1000, Nicholas Piggin wrote:
>> This requires a small change to flash drivers which assumed 4-byte LPC
>> reads would not change endian. _raw accessors could be added if this
>> becomes a signifcant pattern, but for now this hack works.
> 
> Hmm, lpc_read() and lpc_write() are more or less directly wired to the
> OPAL_LPC_READ/OPAL_LPC_WRITE calls so we should probably have a bit
> more of a think about what's happening here.

I'm not sure what you mean.
 
> I suspect the in_beXX() functions are used here mainly because we don't
> have any "native" endian in/out accessor functions so Ben just went
> with the BE versions since they don't swap.

Right. It's a performance optimisation that's reading a window of bytes
using a function designed to read 4-byte big-endian values. I think
this is okay though, if you think about it the hack is actually not the
endian swap. The endian swap is just acknowledging the hack which is
that it's using lpc_read of 4 bytes to read a byte stream.

Thanks,
Nick
diff mbox series

Patch

diff --git a/include/io.h b/include/io.h
index c6203a274..3771dbb8a 100644
--- a/include/io.h
+++ b/include/io.h
@@ -38,7 +38,7 @@  static inline uint16_t __in_be16(const volatile uint16_t *addr)
 	uint16_t val;
 	asm volatile("lhzcix %0,0,%1" :
 		     "=r"(val) : "r"(addr), "m"(*addr) : "memory");
-	return val;
+	return be16_to_cpu(val);
 }
 
 static inline uint16_t in_be16(const volatile uint16_t *addr)
@@ -47,9 +47,18 @@  static inline uint16_t in_be16(const volatile uint16_t *addr)
 	return __in_be16(addr);
 }
 
+static inline uint16_t __in_le16(const volatile uint16_t *addr)
+{
+	uint16_t val;
+	asm volatile("lhzcix %0,0,%1" :
+		     "=r"(val) : "r"(addr), "m"(*addr) : "memory");
+	return le16_to_cpu(val);
+}
+
 static inline uint16_t in_le16(const volatile uint16_t *addr)
 {
-	return bswap_16(in_be16(addr));
+	sync();
+	return __in_le16(addr);
 }
 
 static inline uint32_t __in_be32(const volatile uint32_t *addr)
@@ -57,7 +66,7 @@  static inline uint32_t __in_be32(const volatile uint32_t *addr)
 	uint32_t val;
 	asm volatile("lwzcix %0,0,%1" :
 		     "=r"(val) : "r"(addr), "m"(*addr) : "memory");
-	return val;
+	return be32_to_cpu(val);
 }
 
 static inline uint32_t in_be32(const volatile uint32_t *addr)
@@ -66,9 +75,18 @@  static inline uint32_t in_be32(const volatile uint32_t *addr)
 	return __in_be32(addr);
 }
 
+static inline uint32_t __in_le32(const volatile uint32_t *addr)
+{
+	uint32_t val;
+	asm volatile("lwzcix %0,0,%1" :
+		     "=r"(val) : "r"(addr), "m"(*addr) : "memory");
+	return le32_to_cpu(val);
+}
+
 static inline uint32_t in_le32(const volatile uint32_t *addr)
 {
-	return bswap_32(in_be32(addr));
+	sync();
+	return __in_le32(addr);
 }
 
 static inline uint64_t __in_be64(const volatile uint64_t *addr)
@@ -76,7 +94,7 @@  static inline uint64_t __in_be64(const volatile uint64_t *addr)
 	uint64_t val;
 	asm volatile("ldcix %0,0,%1" :
 		     "=r"(val) : "r"(addr), "m"(*addr) : "memory");
-	return val;
+	return be64_to_cpu(val);
 }
 
 static inline uint64_t in_be64(const volatile uint64_t *addr)
@@ -85,9 +103,18 @@  static inline uint64_t in_be64(const volatile uint64_t *addr)
 	return __in_be64(addr);
 }
 
+static inline uint64_t __in_le64(const volatile uint64_t *addr)
+{
+	uint64_t val;
+	asm volatile("ldcix %0,0,%1" :
+		     "=r"(val) : "r"(addr), "m"(*addr) : "memory");
+	return le64_to_cpu(val);
+}
+
 static inline uint64_t in_le64(const volatile uint64_t *addr)
 {
-	return bswap_64(in_be64(addr));
+	sync();
+	return __in_le64(addr);
 }
 
 static inline void __out_8(volatile uint8_t *addr, uint8_t val)
@@ -105,7 +132,7 @@  static inline void out_8(volatile uint8_t *addr, uint8_t val)
 static inline void __out_be16(volatile uint16_t *addr, uint16_t val)
 {
 	asm volatile("sthcix %0,0,%1"
-		     : : "r"(val), "r"(addr), "m"(*addr) : "memory");
+		     : : "r"(cpu_to_be16(val)), "r"(addr), "m"(*addr) : "memory");
 }
 
 static inline void out_be16(volatile uint16_t *addr, uint16_t val)
@@ -114,15 +141,22 @@  static inline void out_be16(volatile uint16_t *addr, uint16_t val)
 	return __out_be16(addr, val);
 }
 
+static inline void __out_le16(volatile uint16_t *addr, uint16_t val)
+{
+	asm volatile("sthcix %0,0,%1"
+		     : : "r"(cpu_to_le16(val)), "r"(addr), "m"(*addr) : "memory");
+}
+
 static inline void out_le16(volatile uint16_t *addr, uint16_t val)
 {
-	out_be16(addr, bswap_16(val));
+	sync();
+	return __out_le16(addr, val);
 }
 
 static inline void __out_be32(volatile uint32_t *addr, uint32_t val)
 {
 	asm volatile("stwcix %0,0,%1"
-		     : : "r"(val), "r"(addr), "m"(*addr) : "memory");
+		     : : "r"(cpu_to_be32(val)), "r"(addr), "m"(*addr) : "memory");
 }
 
 static inline void out_be32(volatile uint32_t *addr, uint32_t val)
@@ -131,15 +165,22 @@  static inline void out_be32(volatile uint32_t *addr, uint32_t val)
 	return __out_be32(addr, val);
 }
 
+static inline void __out_le32(volatile uint32_t *addr, uint32_t val)
+{
+	asm volatile("stwcix %0,0,%1"
+		     : : "r"(cpu_to_le32(val)), "r"(addr), "m"(*addr) : "memory");
+}
+
 static inline void out_le32(volatile uint32_t *addr, uint32_t val)
 {
-	out_be32(addr, bswap_32(val));
+	sync();
+	return __out_le32(addr, val);
 }
 
 static inline void __out_be64(volatile uint64_t *addr, uint64_t val)
 {
 	asm volatile("stdcix %0,0,%1"
-		     : : "r"(val), "r"(addr), "m"(*addr) : "memory");
+		     : : "r"(cpu_to_be64(val)), "r"(addr), "m"(*addr) : "memory");
 }
 
 static inline void out_be64(volatile uint64_t *addr, uint64_t val)
@@ -148,9 +189,16 @@  static inline void out_be64(volatile uint64_t *addr, uint64_t val)
 	return __out_be64(addr, val);
 }
 
+static inline void __out_le64(volatile uint64_t *addr, uint64_t val)
+{
+	asm volatile("stdcix %0,0,%1"
+		     : : "r"(cpu_to_le64(val)), "r"(addr), "m"(*addr) : "memory");
+}
+
 static inline void out_le64(volatile uint64_t *addr, uint64_t val)
 {
-	out_be64(addr, bswap_64(val));
+	sync();
+	return __out_le64(addr, val);
 }
 
 /* Assistant to macros used to access PCI config space */
diff --git a/libflash/ipmi-hiomap.c b/libflash/ipmi-hiomap.c
index 7327b83a3..91d674231 100644
--- a/libflash/ipmi-hiomap.c
+++ b/libflash/ipmi-hiomap.c
@@ -570,8 +570,13 @@  static int lpc_window_read(struct ipmi_hiomap *ctx, uint32_t pos,
 		/* XXX: make this read until it's aligned */
 		if (len > 3 && !(off & 3)) {
 			rc = lpc_read(OPAL_LPC_FW, off, &dat, 4);
-			if (!rc)
-				*(uint32_t *)buf = dat;
+			if (!rc) {
+				/*
+				 * lpc_read swaps to CPU endian but it's not
+				 * really a 32-bit value, so convert back.
+				 */
+				*(uint32_t *)buf = cpu_to_be32(dat);
+			}
 			chunk = 4;
 		} else {
 			rc = lpc_read(OPAL_LPC_FW, off, &dat, 1);
@@ -615,12 +620,17 @@  static int lpc_window_write(struct ipmi_hiomap *ctx, uint32_t pos,
 		uint32_t chunk;
 
 		if (len > 3 && !(off & 3)) {
+			/* endian swap: see lpc_window_write */
+			uint32_t dat = be32_to_cpu(*(uint32_t *)buf);
+
 			rc = lpc_write(OPAL_LPC_FW, off,
-				       *(uint32_t *)buf, 4);
+				       dat, 4);
 			chunk = 4;
 		} else {
+			uint8_t dat = *(uint8_t *)buf;
+
 			rc = lpc_write(OPAL_LPC_FW, off,
-				       *(uint8_t *)buf, 1);
+				       dat, 1);
 			chunk = 1;
 		}
 		if (rc) {
diff --git a/libflash/mbox-flash.c b/libflash/mbox-flash.c
index 9d47fe7ea..6fed30a9b 100644
--- a/libflash/mbox-flash.c
+++ b/libflash/mbox-flash.c
@@ -159,8 +159,13 @@  static int lpc_window_read(struct mbox_flash_data *mbox_flash, uint32_t pos,
 		/* XXX: make this read until it's aligned */
 		if (len > 3 && !(off & 3)) {
 			rc = lpc_read(OPAL_LPC_FW, off, &dat, 4);
-			if (!rc)
-				*(uint32_t *)buf = dat;
+			if (!rc) {
+				/*
+				 * lpc_read swaps to CPU endian but it's not
+				 * really a 32-bit value, so convert back.
+				 */
+				*(uint32_t *)buf = cpu_to_be32(dat);
+			}
 			chunk = 4;
 		} else {
 			rc = lpc_read(OPAL_LPC_FW, off, &dat, 1);
@@ -194,12 +199,17 @@  static int lpc_window_write(struct mbox_flash_data *mbox_flash, uint32_t pos,
 		uint32_t chunk;
 
 		if (len > 3 && !(off & 3)) {
+			/* endian swap: see lpc_window_write */
+			uint32_t dat = be32_to_cpu(*(uint32_t *)buf);
+
 			rc = lpc_write(OPAL_LPC_FW, off,
-				       *(uint32_t *)buf, 4);
+				       dat, 4);
 			chunk = 4;
 		} else {
+			uint8_t dat = *(uint8_t *)buf;
+
 			rc = lpc_write(OPAL_LPC_FW, off,
-				       *(uint8_t *)buf, 1);
+				       dat, 1);
 			chunk = 1;
 		}
 		if (rc) {