package/gcc: Backport code generation fix for ARC700

Message ID 20180712111349.15162-1-abrodkin@synopsys.com
State Accepted
Headers show
Series
  • package/gcc: Backport code generation fix for ARC700
Related show

Commit Message

Alexey Brodkin July 12, 2018, 11:13 a.m.
Fixes assembler failure when compiling for ARC700 only:
------------------------>8---------------------
{standard input}: Assembler messages:
{standard input}:213: Error: operand out of range (128 is not between -128 and 127)
make[2]: *** [scripts/Makefile.build:317: net/ipv4/xfrm4_mode_tunnel.o] Error 1
------------------------>8---------------------

This is an upstream fix scheduled for the next release, see
https://github.com/gcc-mirror/gcc/commit/5afc07eeb18fb7dd351ac981367bce721ca22af3

Signed-off-by: Alexey Brodkin <abrodkin@synopsys.com>
Cc: Thomas Petazzoni <thomas.petazzoni@bootlin.com>
---
 ...ement-return-padding-operation-for-ARC700.patch | 355 +++++++++++++++++++++
 1 file changed, 355 insertions(+)
 create mode 100644 package/gcc/8.1.0/0006-ARC-Reimplement-return-padding-operation-for-ARC700.patch

Comments

Thomas Petazzoni July 17, 2018, 10:55 a.m. | #1
Hello,

On Thu, 12 Jul 2018 14:13:49 +0300, Alexey Brodkin wrote:
> Fixes assembler failure when compiling for ARC700 only:
> ------------------------>8---------------------  
> {standard input}: Assembler messages:
> {standard input}:213: Error: operand out of range (128 is not between -128 and 127)
> make[2]: *** [scripts/Makefile.build:317: net/ipv4/xfrm4_mode_tunnel.o] Error 1
> ------------------------>8---------------------  

Applied to master, thanks.

Thomas

Patch

diff --git a/package/gcc/8.1.0/0006-ARC-Reimplement-return-padding-operation-for-ARC700.patch b/package/gcc/8.1.0/0006-ARC-Reimplement-return-padding-operation-for-ARC700.patch
new file mode 100644
index 0000000000..2c7f50ea6f
--- /dev/null
+++ b/package/gcc/8.1.0/0006-ARC-Reimplement-return-padding-operation-for-ARC700.patch
@@ -0,0 +1,355 @@ 
+From d8d716f49c0057e239f2b64e7d902046b92d244f Mon Sep 17 00:00:00 2001
+From: Claudiu Zissulescu <claziss@synopsys.com>
+Date: Fri, 24 Mar 2017 11:55:54 +0100
+Subject: [PATCH] [ARC] Reimplement return padding operation for ARC700.
+
+For ARC700, adding padding if necessary to avoid a mispredict.  A
+return could happen immediately after the function start.  A
+call/return and return/return must be 6 bytes apart to avoid
+mispredict.
+
+The old implementation was doing this operation very late in the
+compilation process, and the additional nop instructions and/or
+forcing some other instruction to take their long form was not taken
+into account when generating brcc instructions. Thus, wrong code could
+be generated.
+
+gcc/
+2017-03-24  Claudiu Zissulescu  <claziss@synopsys.com>
+
+	* config/arc/arc-protos.h (arc_pad_return): Remove.
+	* config/arc/arc.c (machine_function): Remove force_short_suffix
+	and size_reason.
+	(arc_print_operand): Adjust printing of '&'.
+	(arc_verify_short): Remove conditional printing of short suffix.
+	(arc_final_prescan_insn): Remove reference to size_reason.
+	(pad_return): New function.
+	(arc_reorg): Call pad_return.
+	(arc_pad_return): Remove.
+	(arc_init_machine_status): Remove reference to force_short_suffix.
+	* config/arc/arc.md (vunspec): Add VUNSPEC_ARC_BLOCKAGE.
+	(attr length): When attribute iscompact is true force to 2
+	regardless; in the case of maybe check if we want to force the
+	instruction to have 4 bytes length.
+	(nopv): Change it to generate 4 byte long nop as well.
+	(blockage): New pattern.
+	(simple_return): Remove call to arc_pad_return.
+	(p_return_i): Likewise.
+
+git-svn-id: svn+ssh://gcc.gnu.org/svn/gcc/trunk@261542 138bc75d-0d04-0410-961f-82ee72b054a4
+Upstream-Status: Backport (trunk)
+Signed-off-by: Alexey Brodkin <abrodkin@synopsys.com>
+---
+ gcc/config/arc/arc-protos.h                 |   1 -
+ gcc/config/arc/arc.c                        | 156 +++++++++-----------
+ gcc/config/arc/arc.md                       |  26 +++-
+ 4 files changed, 128 insertions(+), 93 deletions(-)
+
+diff --git a/gcc/config/arc/arc-protos.h b/gcc/config/arc/arc-protos.h
+index 67f3b4e3226b..ce4b6f84749e 100644
+--- a/gcc/config/arc/arc-protos.h
++++ b/gcc/config/arc/arc-protos.h
+@@ -89,7 +89,6 @@ extern void arc_clear_unalign (void);
+ extern void arc_toggle_unalign (void);
+ extern void split_addsi (rtx *);
+ extern void split_subsi (rtx *);
+-extern void arc_pad_return (void);
+ extern void arc_split_move (rtx *);
+ extern const char *arc_short_long (rtx_insn *insn, const char *, const char *);
+ extern rtx arc_regno_use_in (unsigned int, rtx);
+diff --git a/gcc/config/arc/arc.c b/gcc/config/arc/arc.c
+index b1a09d82b72e..22f1442a027c 100644
+--- a/gcc/config/arc/arc.c
++++ b/gcc/config/arc/arc.c
+@@ -2648,8 +2648,6 @@ typedef struct GTY (()) machine_function
+   struct arc_frame_info frame_info;
+   /* To keep track of unalignment caused by short insns.  */
+   int unalign;
+-  int force_short_suffix; /* Used when disgorging return delay slot insns.  */
+-  const char *size_reason;
+   struct arc_ccfsm ccfsm_current;
+   /* Map from uid to ccfsm state during branch shortening.  */
+   rtx ccfsm_current_insn;
+@@ -4307,7 +4305,7 @@ arc_print_operand (FILE *file, rtx x, int code)
+ 	}
+       break;
+     case '&':
+-      if (TARGET_ANNOTATE_ALIGN && cfun->machine->size_reason)
++      if (TARGET_ANNOTATE_ALIGN)
+ 	fprintf (file, "; unalign: %d", cfun->machine->unalign);
+       return;
+     case '+':
+@@ -4980,7 +4978,6 @@ static int
+ arc_verify_short (rtx_insn *insn, int, int check_attr)
+ {
+   enum attr_iscompact iscompact;
+-  struct machine_function *machine;
+ 
+   if (check_attr > 0)
+     {
+@@ -4988,10 +4985,6 @@ arc_verify_short (rtx_insn *insn, int, int check_attr)
+       if (iscompact == ISCOMPACT_FALSE)
+ 	return 0;
+     }
+-  machine = cfun->machine;
+-
+-  if (machine->force_short_suffix >= 0)
+-    return machine->force_short_suffix;
+ 
+   return (get_attr_length (insn) & 2) != 0;
+ }
+@@ -5030,8 +5023,6 @@ arc_final_prescan_insn (rtx_insn *insn, rtx *opvec ATTRIBUTE_UNUSED,
+       cfun->machine->prescan_initialized = 1;
+     }
+   arc_ccfsm_advance (insn, &arc_ccfsm_current);
+-
+-  cfun->machine->size_reason = 0;
+ }
+ 
+ /* Given FROM and TO register numbers, say whether this elimination is allowed.
+@@ -7673,6 +7664,76 @@ jli_call_scan (void)
+     }
+ }
+ 
++/* Add padding if necessary to avoid a mispredict.  A return could
++   happen immediately after the function start.  A call/return and
++   return/return must be 6 bytes apart to avoid mispredict.  */
++
++static void
++pad_return (void)
++{
++  rtx_insn *insn;
++  long offset;
++
++  if (!TARGET_PAD_RETURN)
++    return;
++
++  for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
++    {
++      rtx_insn *prev0 = prev_active_insn (insn);
++      bool wantlong = false;
++
++      if (!INSN_P (insn) || GET_CODE (PATTERN (insn)) != SIMPLE_RETURN)
++	continue;
++
++      if (!prev0)
++	{
++	  prev0 = emit_insn_before (gen_nopv (), insn);
++	  /* REG_SAVE_NOTE is used by Haifa scheduler, we are in reorg
++	     so it is safe to reuse it for forcing a particular length
++	     for an instruction.  */
++	  add_reg_note (prev0, REG_SAVE_NOTE, GEN_INT (1));
++	  emit_insn_before (gen_nopv (), insn);
++	  continue;
++	}
++      offset = get_attr_length (prev0);
++
++      if (get_attr_length (prev0) == 2
++	  && get_attr_iscompact (prev0) != ISCOMPACT_TRUE)
++	{
++	  /* Force long version of the insn.  */
++	  wantlong = true;
++	  offset += 2;
++	}
++
++     rtx_insn *prev = prev_active_insn (prev0);
++      if (prev)
++	offset += get_attr_length (prev);
++
++      prev = prev_active_insn (prev);
++      if (prev)
++	offset += get_attr_length (prev);
++
++      switch (offset)
++	{
++	case 2:
++	  prev = emit_insn_before (gen_nopv (), insn);
++	  add_reg_note (prev, REG_SAVE_NOTE, GEN_INT (1));
++	  break;
++	case 4:
++	  emit_insn_before (gen_nopv (), insn);
++	  break;
++	default:
++	  continue;
++	}
++
++      if (wantlong)
++	add_reg_note (prev0, REG_SAVE_NOTE, GEN_INT (1));
++
++      /* Emit a blockage to avoid delay slot scheduling.  */
++      emit_insn_before (gen_blockage(), insn);
++    }
++}
++
+ static int arc_reorg_in_progress = 0;
+ 
+ /* ARC's machince specific reorg function.  */
+@@ -7698,6 +7759,7 @@ arc_reorg (void)
+ 
+   workaround_arc_anomaly ();
+   jli_call_scan ();
++  pad_return ();
+ 
+ /* FIXME: should anticipate ccfsm action, generate special patterns for
+    to-be-deleted branches that have no delay slot and have at least the
+@@ -9256,79 +9318,6 @@ arc_branch_size_unknown_p (void)
+   return !optimize_size && arc_reorg_in_progress;
+ }
+ 
+-/* We are about to output a return insn.  Add padding if necessary to avoid
+-   a mispredict.  A return could happen immediately after the function
+-   start, but after a call we know that there will be at least a blink
+-   restore.  */
+-
+-void
+-arc_pad_return (void)
+-{
+-  rtx_insn *insn = current_output_insn;
+-  rtx_insn *prev = prev_active_insn (insn);
+-  int want_long;
+-
+-  if (!prev)
+-    {
+-      fputs ("\tnop_s\n", asm_out_file);
+-      cfun->machine->unalign ^= 2;
+-      want_long = 1;
+-    }
+-  /* If PREV is a sequence, we know it must be a branch / jump or a tailcall,
+-     because after a call, we'd have to restore blink first.  */
+-  else if (GET_CODE (PATTERN (prev)) == SEQUENCE)
+-    return;
+-  else
+-    {
+-      want_long = (get_attr_length (prev) == 2);
+-      prev = prev_active_insn (prev);
+-    }
+-  if (!prev
+-      || ((NONJUMP_INSN_P (prev) && GET_CODE (PATTERN (prev)) == SEQUENCE)
+-	  ? CALL_ATTR (as_a <rtx_sequence *> (PATTERN (prev))->insn (0),
+-		       NON_SIBCALL)
+-	  : CALL_ATTR (prev, NON_SIBCALL)))
+-    {
+-      if (want_long)
+-	cfun->machine->size_reason
+-	  = "call/return and return/return must be 6 bytes apart to avoid mispredict";
+-      else if (TARGET_UNALIGN_BRANCH && cfun->machine->unalign)
+-	{
+-	  cfun->machine->size_reason
+-	    = "Long unaligned jump avoids non-delay slot penalty";
+-	  want_long = 1;
+-	}
+-      /* Disgorge delay insn, if there is any, and it may be moved.  */
+-      if (final_sequence
+-	  /* ??? Annulled would be OK if we can and do conditionalize
+-	     the delay slot insn accordingly.  */
+-	  && !INSN_ANNULLED_BRANCH_P (insn)
+-	  && (get_attr_cond (insn) != COND_USE
+-	      || !reg_set_p (gen_rtx_REG (CCmode, CC_REG),
+-			     XVECEXP (final_sequence, 0, 1))))
+-	{
+-	  prev = as_a <rtx_insn *> (XVECEXP (final_sequence, 0, 1));
+-	  gcc_assert (!prev_real_insn (insn)
+-		      || !arc_hazard (prev_real_insn (insn), prev));
+-	  cfun->machine->force_short_suffix = !want_long;
+-	  rtx save_pred = current_insn_predicate;
+-	  final_scan_insn (prev, asm_out_file, optimize, 1, NULL);
+-	  cfun->machine->force_short_suffix = -1;
+-	  prev->set_deleted ();
+-	  current_output_insn = insn;
+-	  current_insn_predicate = save_pred;
+-	}
+-      else if (want_long)
+-	fputs ("\tnop\n", asm_out_file);
+-      else
+-	{
+-	  fputs ("\tnop_s\n", asm_out_file);
+-	  cfun->machine->unalign ^= 2;
+-	}
+-    }
+-  return;
+-}
+-
+ /* The usual; we set up our machine_function data.  */
+ 
+ static struct machine_function *
+@@ -9337,7 +9326,6 @@ arc_init_machine_status (void)
+   struct machine_function *machine;
+   machine = ggc_cleared_alloc<machine_function> ();
+   machine->fn_type = ARC_FUNCTION_UNKNOWN;
+-  machine->force_short_suffix = -1;
+ 
+   return machine;
+ }
+diff --git a/gcc/config/arc/arc.md b/gcc/config/arc/arc.md
+index 5610bab694c6..2401926f08df 100644
+--- a/gcc/config/arc/arc.md
++++ b/gcc/config/arc/arc.md
+@@ -162,6 +162,7 @@
+   VUNSPEC_ARC_CAS
+   VUNSPEC_ARC_SC
+   VUNSPEC_ARC_LL
++  VUNSPEC_ARC_BLOCKAGE
+   ])
+ 
+ (define_constants
+@@ -385,13 +386,18 @@
+ ;; and insn lengths: insns with shimm values cannot be conditionally executed.
+ (define_attr "length" ""
+   (cond
+-    [(eq_attr "iscompact" "true,maybe")
++    [(eq_attr "iscompact" "true")
++      (const_int 2)
++
++     (eq_attr "iscompact" "maybe")
+      (cond
+        [(eq_attr "type" "sfunc")
+ 	(cond [(match_test "GET_CODE (PATTERN (insn)) == COND_EXEC")
+ 	       (const_int 12)]
+ 	      (const_int 10))
+-	(match_test "GET_CODE (PATTERN (insn)) == COND_EXEC") (const_int 4)]
++	(match_test "GET_CODE (PATTERN (insn)) == COND_EXEC") (const_int 4)
++	(match_test "find_reg_note (insn, REG_SAVE_NOTE, GEN_INT (1))")
++	(const_int 4)]
+       (const_int 2))
+ 
+     (eq_attr "iscompact" "true_limm")
+@@ -4447,8 +4453,16 @@ archs4x, archs4xd, archs4xd_slow"
+   ""
+   "nop%?"
+   [(set_attr "type" "misc")
+-   (set_attr "iscompact" "true")
+-   (set_attr "length" "2")])
++   (set_attr "iscompact" "maybe")
++   (set_attr "length" "*")])
++
++(define_insn "blockage"
++  [(unspec_volatile [(const_int 0)] VUNSPEC_ARC_BLOCKAGE)]
++  ""
++  ""
++  [(set_attr "length" "0")
++   (set_attr "type" "block")]
++)
+ 
+ ;; Split up troublesome insns for better scheduling.
+ 
+@@ -4993,8 +5007,6 @@ archs4x, archs4xd, archs4xd_slow"
+   {
+     return \"rtie\";
+   }
+-  if (TARGET_PAD_RETURN)
+-    arc_pad_return ();
+   output_asm_insn (\"j%!%* [%0]%&\", &reg);
+   return \"\";
+ }
+@@ -5038,8 +5050,6 @@ archs4x, archs4xd, archs4xd_slow"
+ 		   arc_return_address_register (arc_compute_function_type
+ 						(cfun)));
+ 
+-  if (TARGET_PAD_RETURN)
+-    arc_pad_return ();
+   output_asm_insn (\"j%d0%!%# [%1]%&\", xop);
+   /* record the condition in case there is a delay insn.  */
+   arc_ccfsm_record_condition (xop[0], false, insn, 0);
+-- 
+2.17.1
+