diff mbox

[ARM] Unaligned accesses for packed structures [1/2]

Message ID 20110509180112.3f7fc7d9@rex.config
State New
Headers show

Commit Message

Julian Brown May 9, 2011, 5:01 p.m. UTC
On Fri, 06 May 2011 14:04:16 +0100
Richard Earnshaw <rearnsha@arm.com> wrote:

> On Fri, 2011-05-06 at 13:34 +0100, Julian Brown wrote:
> > This is the first of two patches to add unaligned-access support to
> > the ARM backend. [...]

> The compiler should fault -munaligned-access on cores that don't
> support it.

I've implemented this as a warning (which automatically disables the
option).

> +(define_insn "unaligned_loadsi"
> +  [(set (match_operand:SI 0 "s_register_operand" "=r")
> +       (unspec:SI [(match_operand:SI 1 "memory_operand" "m")]
> +                  UNSPEC_UNALIGNED_LOAD))]
> +  "unaligned_access"
> +  "ldr%?\t%0, %1\t@ unaligned"
> +  [(set_attr "predicable" "yes")
> +   (set_attr "type" "load1")])
> 
> I think the final condition should also include TARGET_32BIT, as these
> patterns aren't expected to work with Thumb-1.

Added.

> Secondly, they should be structured to get the length information
> right when a 16-bit encoding can be used in Thumb-2 (you'll keep
> Carrot happy that way :-) : just add an alternative that's only
> enabled for Thumb mode and which matches the requirements for a
> 16-bit instruction (beware however of the 16-bit write-back case).

I've done this, assuming that by "16-bit write-back case" you meant
singleton-register-list ldmia/stmia instructions masquerading as
post-increment ldr/str? I've disallowed that by adding a new
constraint. It's not entirely clear to me whether Thumb-2 (vs. Thumb-1)
will actually ever use 16-bit ldmia/stmia instead of 32-bit writeback
ldr/str though.

> Finally, I don't see anything to put out the correct build attribute
> information for unaligned access enabled (Tag_CPU_unaligned_access).
> Have I just missed it?

No you didn't miss it, it wasn't there :-). Added.

How does this look now? (Re-testing in progress.)

Thanks,

Julian

ChangeLog

    gcc/
    * config/arm/arm.c (arm_override_options): Add unaligned_access
    support.
    (arm_file_start): Emit attribute for unaligned access as
    appropriate.
    * config/arm/arm.md (UNSPEC_UNALIGNED_LOAD)
    (UNSPEC_UNALIGNED_STORE): Add constants for unspecs.
    (insv, extzv): Add unaligned-access support.
    (extv): Change to expander. Likewise.
    (unaligned_loadsi, unaligned_loadhis, unaligned_loadhiu)
    (unaligned_storesi, unaligned_storehi): New.
    (*extv_reg): New (previous extv implementation).
    * config/arm/arm.opt (munaligned_access): Add option.
    * config/arm/constraints.md (Uw): New constraint.
    * expmed.c (store_bit_field_1): Don't tweak bitfield numbering for
    memory locations if BITS_BIG_ENDIAN != BYTES_BIG_ENDIAN.
    (extract_bit_field_1): Likewise.

Comments

Julian Brown May 10, 2011, 12:58 p.m. UTC | #1
On Mon, 9 May 2011 18:01:12 +0100
Julian Brown <julian@codesourcery.com> wrote:

> How does this look now? (Re-testing in progress.)

Results from re-testing are fine, btw.

Cheers,

Julian
diff mbox

Patch

commit bd55df2538dd90e576dd0f69bc9c9d570c8eee08
Author: Julian Brown <julian@henry7.codesourcery.com>
Date:   Wed May 4 10:06:25 2011 -0700

    Permit regular ldr/str/ldrh/strh for packed-structure accesses etc.

diff --git a/gcc/config/arm/arm.c b/gcc/config/arm/arm.c
index 4f9c2aa..f0f1a73 100644
--- a/gcc/config/arm/arm.c
+++ b/gcc/config/arm/arm.c
@@ -1833,6 +1833,28 @@  arm_option_override (void)
 	fix_cm3_ldrd = 0;
     }
 
+  /* Enable -munaligned-access by default for
+     - all ARMv6 architecture-based processors
+     - ARMv7-A, ARMv7-R, and ARMv7-M architecture-based processors.
+
+     Disable -munaligned-access by default for
+     - all pre-ARMv6 architecture-based processors
+     - ARMv6-M architecture-based processors.  */
+
+  if (unaligned_access == 2)
+    {
+      if (arm_arch6 && (arm_arch_notm || arm_arch7))
+	unaligned_access = 1;
+      else
+	unaligned_access = 0;
+    }
+  else if (unaligned_access == 1
+	   && !(arm_arch6 && (arm_arch_notm || arm_arch7)))
+    {
+      warning (0, "target CPU does not support unaligned accesses");
+      unaligned_access = 0;
+    }
+
   if (TARGET_THUMB1 && flag_schedule_insns)
     {
       /* Don't warn since it's on by default in -O2.  */
@@ -21714,6 +21736,10 @@  arm_file_start (void)
 	val = 6;
       asm_fprintf (asm_out_file, "\t.eabi_attribute 30, %d\n", val);
 
+      /* Tag_CPU_unaligned_access.  */
+      asm_fprintf (asm_out_file, "\t.eabi_attribute 34, %d\n",
+		   unaligned_access);
+
       /* Tag_ABI_FP_16bit_format.  */
       if (arm_fp16_format)
 	asm_fprintf (asm_out_file, "\t.eabi_attribute 38, %d\n",
diff --git a/gcc/config/arm/arm.md b/gcc/config/arm/arm.md
index 40ebf35..59b9ffb 100644
--- a/gcc/config/arm/arm.md
+++ b/gcc/config/arm/arm.md
@@ -104,6 +104,10 @@ 
   UNSPEC_SYMBOL_OFFSET  ; The offset of the start of the symbol from
                         ; another symbolic address.
   UNSPEC_MEMORY_BARRIER ; Represent a memory barrier.
+  UNSPEC_UNALIGNED_LOAD	; Used to represent ldr/ldrh instructions that access
+			; unaligned locations, on architectures which support
+			; that.
+  UNSPEC_UNALIGNED_STORE ; Same for str/strh.
 ])
 
 ;; UNSPEC_VOLATILE Usage:
@@ -2393,7 +2397,7 @@ 
 ;;; this insv pattern, so this pattern needs to be reevalutated.
 
 (define_expand "insv"
-  [(set (zero_extract:SI (match_operand:SI 0 "s_register_operand" "")
+  [(set (zero_extract:SI (match_operand:SI 0 "nonimmediate_operand" "")
                          (match_operand:SI 1 "general_operand" "")
                          (match_operand:SI 2 "general_operand" ""))
         (match_operand:SI 3 "reg_or_int_operand" ""))]
@@ -2407,35 +2411,66 @@ 
 
     if (arm_arch_thumb2)
       {
-	bool use_bfi = TRUE;
-
-	if (GET_CODE (operands[3]) == CONST_INT)
+        if (unaligned_access && MEM_P (operands[0])
+	    && s_register_operand (operands[3], GET_MODE (operands[3]))
+	    && (width == 16 || width == 32) && (start_bit % BITS_PER_UNIT) == 0)
 	  {
-	    HOST_WIDE_INT val = INTVAL (operands[3]) & mask;
-
-	    if (val == 0)
+	    rtx base_addr;
+	    
+	    if (width == 32)
 	      {
-		emit_insn (gen_insv_zero (operands[0], operands[1],
-					  operands[2]));
-		DONE;
+	        base_addr = adjust_address (operands[0], SImode,
+					    start_bit / BITS_PER_UNIT);
+		emit_insn (gen_unaligned_storesi (base_addr, operands[3]));
 	      }
+	    else
+	      {
+	        rtx tmp = gen_reg_rtx (HImode);
 
-	    /* See if the set can be done with a single orr instruction.  */
-	    if (val == mask && const_ok_for_arm (val << start_bit))
-	      use_bfi = FALSE;
+	        base_addr = adjust_address (operands[0], HImode,
+					    start_bit / BITS_PER_UNIT);
+		emit_move_insn (tmp, gen_lowpart (HImode, operands[3]));
+		emit_insn (gen_unaligned_storehi (base_addr, tmp));
+	      }
+	    DONE;
 	  }
-	  
-	if (use_bfi)
+	else if (s_register_operand (operands[0], GET_MODE (operands[0])))
 	  {
-	    if (GET_CODE (operands[3]) != REG)
-	      operands[3] = force_reg (SImode, operands[3]);
+	    bool use_bfi = TRUE;
 
-	    emit_insn (gen_insv_t2 (operands[0], operands[1], operands[2],
-				    operands[3]));
-	    DONE;
+	    if (GET_CODE (operands[3]) == CONST_INT)
+	      {
+		HOST_WIDE_INT val = INTVAL (operands[3]) & mask;
+
+		if (val == 0)
+		  {
+		    emit_insn (gen_insv_zero (operands[0], operands[1],
+					      operands[2]));
+		    DONE;
+		  }
+
+		/* See if the set can be done with a single orr instruction.  */
+		if (val == mask && const_ok_for_arm (val << start_bit))
+		  use_bfi = FALSE;
+	      }
+
+	    if (use_bfi)
+	      {
+		if (GET_CODE (operands[3]) != REG)
+		  operands[3] = force_reg (SImode, operands[3]);
+
+		emit_insn (gen_insv_t2 (operands[0], operands[1], operands[2],
+					operands[3]));
+		DONE;
+	      }
 	  }
+	else
+	  FAIL;
       }
 
+    if (!s_register_operand (operands[0], GET_MODE (operands[0])))
+      FAIL;
+
     target = copy_rtx (operands[0]);
     /* Avoid using a subreg as a subtarget, and avoid writing a paradoxical 
        subreg as the final target.  */
@@ -3628,7 +3663,7 @@ 
 
 (define_expand "extzv"
   [(set (match_dup 4)
-	(ashift:SI (match_operand:SI   1 "register_operand" "")
+	(ashift:SI (match_operand:SI   1 "nonimmediate_operand" "")
 		   (match_operand:SI   2 "const_int_operand" "")))
    (set (match_operand:SI              0 "register_operand" "")
 	(lshiftrt:SI (match_dup 4)
@@ -3641,10 +3676,53 @@ 
     
     if (arm_arch_thumb2)
       {
-	emit_insn (gen_extzv_t2 (operands[0], operands[1], operands[2],
-				 operands[3]));
-	DONE;
+	HOST_WIDE_INT width = INTVAL (operands[2]);
+	HOST_WIDE_INT bitpos = INTVAL (operands[3]);
+
+	if (unaligned_access && MEM_P (operands[1])
+	    && (width == 16 || width == 32) && (bitpos % BITS_PER_UNIT) == 0)
+	  {
+	    rtx base_addr;
+
+	    if (width == 32)
+              {
+		base_addr = adjust_address (operands[1], SImode,
+					    bitpos / BITS_PER_UNIT);
+		emit_insn (gen_unaligned_loadsi (operands[0], base_addr));
+              }
+	    else
+              {
+		rtx dest = operands[0];
+		rtx tmp = gen_reg_rtx (SImode);
+
+		/* We may get a paradoxical subreg here.  Strip it off.  */
+		if (GET_CODE (dest) == SUBREG
+		    && GET_MODE (dest) == SImode
+		    && GET_MODE (SUBREG_REG (dest)) == HImode)
+		  dest = SUBREG_REG (dest);
+
+		if (GET_MODE_BITSIZE (GET_MODE (dest)) != width)
+		  FAIL;
+
+		base_addr = adjust_address (operands[1], HImode,
+					    bitpos / BITS_PER_UNIT);
+		emit_insn (gen_unaligned_loadhiu (tmp, base_addr));
+		emit_move_insn (gen_lowpart (SImode, dest), tmp);
+	      }
+	    DONE;
+	  }
+	else if (s_register_operand (operands[1], GET_MODE (operands[1])))
+	  {
+	    emit_insn (gen_extzv_t2 (operands[0], operands[1], operands[2],
+				     operands[3]));
+	    DONE;
+	  }
+	else
+	  FAIL;
       }
+    
+    if (!s_register_operand (operands[1], GET_MODE (operands[1])))
+      FAIL;
 
     operands[3] = GEN_INT (rshift);
     
@@ -3659,7 +3737,115 @@ 
   }"
 )
 
-(define_insn "extv"
+(define_expand "extv"
+  [(set (match_operand 0 "s_register_operand" "")
+	(sign_extract (match_operand 1 "nonimmediate_operand" "")
+		      (match_operand 2 "const_int_operand" "")
+		      (match_operand 3 "const_int_operand" "")))]
+  "arm_arch_thumb2"
+{
+  HOST_WIDE_INT width = INTVAL (operands[2]);
+  HOST_WIDE_INT bitpos = INTVAL (operands[3]);
+
+  if (unaligned_access && MEM_P (operands[1]) && (width == 16 || width == 32)
+      && (bitpos % BITS_PER_UNIT) == 0)
+    {
+      rtx base_addr;
+      
+      if (width == 32)
+        {
+	  base_addr = adjust_address (operands[1], SImode,
+				      bitpos / BITS_PER_UNIT);
+	  emit_insn (gen_unaligned_loadsi (operands[0], base_addr));
+        }
+      else
+        {
+	  rtx dest = operands[0];
+	  rtx tmp = gen_reg_rtx (SImode);
+	  
+	  /* We may get a paradoxical subreg here.  Strip it off.  */
+	  if (GET_CODE (dest) == SUBREG
+	      && GET_MODE (dest) == SImode
+	      && GET_MODE (SUBREG_REG (dest)) == HImode)
+	    dest = SUBREG_REG (dest);
+	  
+	  if (GET_MODE_BITSIZE (GET_MODE (dest)) != width)
+	    FAIL;
+	  
+	  base_addr = adjust_address (operands[1], HImode,
+				      bitpos / BITS_PER_UNIT);
+	  emit_insn (gen_unaligned_loadhis (tmp, base_addr));
+	  emit_move_insn (gen_lowpart (SImode, dest), tmp);
+	}
+
+      DONE;
+    }
+  else if (s_register_operand (operands[1], GET_MODE (operands[1])))
+    DONE;
+  else
+    FAIL;
+})
+
+; ARMv6+ unaligned load/store instructions (used for packed structure accesses).
+
+(define_insn "unaligned_loadsi"
+  [(set (match_operand:SI 0 "s_register_operand" "=l,r")
+	(unspec:SI [(match_operand:SI 1 "memory_operand" "Uw,m")]
+		   UNSPEC_UNALIGNED_LOAD))]
+  "unaligned_access && TARGET_32BIT"
+  "ldr%?\t%0, %1\t@ unaligned"
+  [(set_attr "arch" "t2,any")
+   (set_attr "length" "2,4")
+   (set_attr "predicable" "yes")
+   (set_attr "type" "load1")])
+
+(define_insn "unaligned_loadhis"
+  [(set (match_operand:SI 0 "s_register_operand" "=l,r")
+	(sign_extend:SI
+	  (unspec:HI [(match_operand:HI 1 "memory_operand" "Uw,m")]
+		     UNSPEC_UNALIGNED_LOAD)))]
+  "unaligned_access && TARGET_32BIT"
+  "ldr%(sh%)\t%0, %1\t@ unaligned"
+  [(set_attr "arch" "t2,any")
+   (set_attr "length" "2,4")
+   (set_attr "predicable" "yes")
+   (set_attr "type" "load_byte")])
+
+(define_insn "unaligned_loadhiu"
+  [(set (match_operand:SI 0 "s_register_operand" "=l,r")
+	(zero_extend:SI
+	  (unspec:HI [(match_operand:HI 1 "memory_operand" "Uw,m")]
+		     UNSPEC_UNALIGNED_LOAD)))]
+  "unaligned_access && TARGET_32BIT"
+  "ldr%(h%)\t%0, %1\t@ unaligned"
+  [(set_attr "arch" "t2,any")
+   (set_attr "length" "2,4")
+   (set_attr "predicable" "yes")
+   (set_attr "type" "load_byte")])
+
+(define_insn "unaligned_storesi"
+  [(set (match_operand:SI 0 "memory_operand" "=Uw,m")
+	(unspec:SI [(match_operand:SI 1 "s_register_operand" "l,r")]
+		   UNSPEC_UNALIGNED_STORE))]
+  "unaligned_access && TARGET_32BIT"
+  "str%?\t%1, %0\t@ unaligned"
+  [(set_attr "arch" "t2,any")
+   (set_attr "length" "2,4")
+   (set_attr "predicable" "yes")
+   (set_attr "type" "store1")])
+
+(define_insn "unaligned_storehi"
+  [(set (match_operand:HI 0 "memory_operand" "=Uw,m")
+	(unspec:HI [(match_operand:HI 1 "s_register_operand" "l,r")]
+		   UNSPEC_UNALIGNED_STORE))]
+  "unaligned_access && TARGET_32BIT"
+  "str%(h%)\t%1, %0\t@ unaligned"
+  [(set_attr "arch" "t2,any")
+   (set_attr "length" "2,4")
+   (set_attr "predicable" "yes")
+   (set_attr "type" "store1")])
+
+(define_insn "*extv_reg"
   [(set (match_operand:SI 0 "s_register_operand" "=r")
 	(sign_extract:SI (match_operand:SI 1 "s_register_operand" "r")
                          (match_operand:SI 2 "const_int_operand" "M")
diff --git a/gcc/config/arm/arm.opt b/gcc/config/arm/arm.opt
index 7d2d84c..3ed9016 100644
--- a/gcc/config/arm/arm.opt
+++ b/gcc/config/arm/arm.opt
@@ -170,3 +170,7 @@  mfix-cortex-m3-ldrd
 Target Report Var(fix_cm3_ldrd) Init(2)
 Avoid overlapping destination and address registers on LDRD instructions
 that may trigger Cortex-M3 errata.
+
+munaligned-access
+Target Report Var(unaligned_access) Init(2)
+Enable unaligned word and halfword accesses to packed data.
diff --git a/gcc/config/arm/constraints.md b/gcc/config/arm/constraints.md
index f5b8521..53c358e 100644
--- a/gcc/config/arm/constraints.md
+++ b/gcc/config/arm/constraints.md
@@ -36,7 +36,7 @@ 
 ;; The following memory constraints have been used:
 ;; in ARM/Thumb-2 state: Q, Ut, Uv, Uy, Un, Um, Us
 ;; in ARM state: Uq
-;; in Thumb state: Uu
+;; in Thumb state: Uu, Uw
 
 
 (define_register_constraint "f" "TARGET_ARM ? FPA_REGS : NO_REGS"
@@ -341,6 +341,19 @@ 
 		   && thumb1_legitimate_address_p (GET_MODE (op), XEXP (op, 0),
 						   0)")))
 
+; The 16-bit post-increment LDR/STR accepted by thumb1_legitimate_address_p
+; are actually LDM/STM instructions, so cannot be used to access unaligned
+; data.
+(define_memory_constraint "Uw"
+ "@internal
+  In Thumb state an address that is valid in 16bit encoding, and that can be
+  used for unaligned accesses."
+ (and (match_code "mem")
+      (match_test "TARGET_THUMB
+		   && thumb1_legitimate_address_p (GET_MODE (op), XEXP (op, 0),
+						   0)
+		   && GET_CODE (XEXP (op, 0)) != POST_INC")))
+
 ;; We used to have constraint letters for S and R in ARM state, but
 ;; all uses of these now appear to have been removed.
 
diff --git a/gcc/expmed.c b/gcc/expmed.c
index 7482747..a525184 100644
--- a/gcc/expmed.c
+++ b/gcc/expmed.c
@@ -648,7 +648,7 @@  store_bit_field_1 (rtx str_rtx, unsigned HOST_WIDE_INT bitsize,
       /* On big-endian machines, we count bits from the most significant.
 	 If the bit field insn does not, we must invert.  */
 
-      if (BITS_BIG_ENDIAN != BYTES_BIG_ENDIAN)
+      if (BITS_BIG_ENDIAN != BYTES_BIG_ENDIAN && !MEM_P (xop0))
 	xbitpos = unit - bitsize - xbitpos;
 
       /* We have been counting XBITPOS within UNIT.
@@ -1456,7 +1456,7 @@  extract_bit_field_1 (rtx str_rtx, unsigned HOST_WIDE_INT bitsize,
 
       /* On big-endian machines, we count bits from the most significant.
 	 If the bit field insn does not, we must invert.  */
-      if (BITS_BIG_ENDIAN != BYTES_BIG_ENDIAN)
+      if (BITS_BIG_ENDIAN != BYTES_BIG_ENDIAN && !MEM_P (xop0))
 	xbitpos = unit - bitsize - xbitpos;
 
       /* Now convert from counting within UNIT to counting in EXT_MODE.  */