diff mbox series

PR target/86891 __builtin_<add/sub>_overflow issues on AArch64 (redux)

Message ID 5ef55e58-a442-2885-9128-0cc4a5820b48@arm.com
State New
Headers show
Series PR target/86891 __builtin_<add/sub>_overflow issues on AArch64 (redux) | expand

Commit Message

Richard Earnshaw (lists) Jan. 16, 2019, 3:22 p.m. UTC
Further investigation showed that my previous patch for this issue was
still incomplete.

The problem stemmed from what I suspect was a mis-understanding of the
way overflow is calculated on aarch64 when values are subtracted (and
hence in comparisons).  In this case, unlike addition, the carry flag is
/cleared/ if there is overflow (technically, underflow) and set when
that does not happen.  This patch clears up this issue by using CCmode
for all subtractive operations (this can fully describe the normal
overflow conditions without anything particularly fancy); clears up the
way we express normal unsigned overflow using CC_Cmode (the result of a
sum is less than one of the operands) and adds a new mode, CC_ADCmode to
handle expressing overflow of an add-with-carry operation, where the
standard idiom is no-longer sufficient to describe the overflow condition.

	PR target/86891
	* config/aarch64/aarch64-modes.def: Add comment about how the carry
	bit is set by add and compare.
	(CC_ADC): New CC_MODE.
	* config/aarch64/aarch64.c (aarch64_select_cc_mode): Use variables
	to cache the code and mode of X.  Adjust the shape of a CC_Cmode
	comparison.  Add detection for CC_ADCmode.
	(aarch64_get_condition_code_1): Update code support for CC_Cmode.  Add
	CC_ADCmode.
	* config/aarch64/aarch64.md (uaddv<mode>4): Use LTU with CCmode.
	(uaddvti4): Comparison result is in CC_ADCmode and the condition is GEU.
	(add<mode>3_compareC_cconly_imm): Delete.  Merge into...
	(add<mode>3_compareC_cconly): ... this.  Restructure the comparison
	to eliminate the need for zero-extending the operands.
	(add<mode>3_compareC_imm): Delete.  Merge into ...
	(add<mode>3_compareC): ... this.  Restructure the comparison to
	eliminate the need for zero-extending the operands.
	(add<mode>3_carryin): Use LTU for the overflow detection.
	(add<mode>3_carryinC): Use CC_ADCmode for the result of the carry out.
	Reexpress comparison for overflow.
	(add<mode>3_carryinC_zero): Update for change to add<mode>3_carryinC.
	(add<mode>3_carryinC): Likewise.
	(add<mode>3_carryinV): Use LTU for carry between partials.
	* config/aarch64/predicates.md (aarch64_carry_operation): Update
	handling of CC_Cmode and add CC_ADCmode.
	(aarch64_borrow_operation): Likewise.

Bootstrapped on aarch64-linux.  Applied to trunk.
diff mbox series

Patch

diff --git a/gcc/config/aarch64/aarch64-modes.def b/gcc/config/aarch64/aarch64-modes.def
index 5fe5ef0..14c1a43 100644
--- a/gcc/config/aarch64/aarch64-modes.def
+++ b/gcc/config/aarch64/aarch64-modes.def
@@ -18,12 +18,25 @@ 
    along with GCC; see the file COPYING3.  If not see
    <http://www.gnu.org/licenses/>.  */
 
+/* Important note about Carry generation in AArch64.
+
+   Unlike some architectures, the C flag generated by a subtract
+   operation, or a simple compare operation is set to 1 if the result
+   does not overflow in an unsigned sense.  That is, if there is no
+   borrow needed from a higher word.  That means that overflow from
+   addition will set C, but overflow from a subtraction will clear C.
+   We use CC_Cmode to represent detection of overflow from addition as
+   CCmode is used for 'normal' compare (subtraction) operations.  For
+   ADC, the representation becomes more complex still, since we cannot
+   use the normal idiom of comparing the result to one of the input
+   operands; instead we use CC_ADCmode to represent this case.  */
 CC_MODE (CCFP);
 CC_MODE (CCFPE);
 CC_MODE (CC_SWP);
 CC_MODE (CC_NZ);    /* Only N and Z bits of condition flags are valid.  */
 CC_MODE (CC_Z);     /* Only Z bit of condition flags is valid.  */
-CC_MODE (CC_C);     /* Only C bit of condition flags is valid.  */
+CC_MODE (CC_C);     /* C represents unsigned overflow of a simple addition.  */
+CC_MODE (CC_ADC);   /* Unsigned overflow from an ADC (add with carry).  */
 CC_MODE (CC_V);     /* Only V bit of condition flags is valid.  */
 
 /* Half-precision floating point for __fp16.  */
diff --git a/gcc/config/aarch64/aarch64.c b/gcc/config/aarch64/aarch64.c
index fd60bdd..da48c09 100644
--- a/gcc/config/aarch64/aarch64.c
+++ b/gcc/config/aarch64/aarch64.c
@@ -7089,9 +7089,12 @@  aarch64_emit_call_insn (rtx pat)
 machine_mode
 aarch64_select_cc_mode (RTX_CODE code, rtx x, rtx y)
 {
+  machine_mode mode_x = GET_MODE (x);
+  rtx_code code_x = GET_CODE (x);
+
   /* All floating point compares return CCFP if it is an equality
      comparison, and CCFPE otherwise.  */
-  if (GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT)
+  if (GET_MODE_CLASS (mode_x) == MODE_FLOAT)
     {
       switch (code)
 	{
@@ -7122,55 +7125,65 @@  aarch64_select_cc_mode (RTX_CODE code, rtx x, rtx y)
      using the TST instruction with the appropriate bitmask.  */
   if (y == const0_rtx && REG_P (x)
       && (code == EQ || code == NE)
-      && (GET_MODE (x) == HImode || GET_MODE (x) == QImode))
+      && (mode_x == HImode || mode_x == QImode))
     return CC_NZmode;
 
   /* Similarly, comparisons of zero_extends from shorter modes can
      be performed using an ANDS with an immediate mask.  */
-  if (y == const0_rtx && GET_CODE (x) == ZERO_EXTEND
-      && (GET_MODE (x) == SImode || GET_MODE (x) == DImode)
+  if (y == const0_rtx && code_x == ZERO_EXTEND
+      && (mode_x == SImode || mode_x == DImode)
       && (GET_MODE (XEXP (x, 0)) == HImode || GET_MODE (XEXP (x, 0)) == QImode)
       && (code == EQ || code == NE))
     return CC_NZmode;
 
-  if ((GET_MODE (x) == SImode || GET_MODE (x) == DImode)
+  if ((mode_x == SImode || mode_x == DImode)
       && y == const0_rtx
       && (code == EQ || code == NE || code == LT || code == GE)
-      && (GET_CODE (x) == PLUS || GET_CODE (x) == MINUS || GET_CODE (x) == AND
-	  || GET_CODE (x) == NEG
-	  || (GET_CODE (x) == ZERO_EXTRACT && CONST_INT_P (XEXP (x, 1))
+      && (code_x == PLUS || code_x == MINUS || code_x == AND
+	  || code_x == NEG
+	  || (code_x == ZERO_EXTRACT && CONST_INT_P (XEXP (x, 1))
 	      && CONST_INT_P (XEXP (x, 2)))))
     return CC_NZmode;
 
   /* A compare with a shifted operand.  Because of canonicalization,
      the comparison will have to be swapped when we emit the assembly
      code.  */
-  if ((GET_MODE (x) == SImode || GET_MODE (x) == DImode)
+  if ((mode_x == SImode || mode_x == DImode)
       && (REG_P (y) || GET_CODE (y) == SUBREG || y == const0_rtx)
-      && (GET_CODE (x) == ASHIFT || GET_CODE (x) == ASHIFTRT
-	  || GET_CODE (x) == LSHIFTRT
-	  || GET_CODE (x) == ZERO_EXTEND || GET_CODE (x) == SIGN_EXTEND))
+      && (code_x == ASHIFT || code_x == ASHIFTRT
+	  || code_x == LSHIFTRT
+	  || code_x == ZERO_EXTEND || code_x == SIGN_EXTEND))
     return CC_SWPmode;
 
   /* Similarly for a negated operand, but we can only do this for
      equalities.  */
-  if ((GET_MODE (x) == SImode || GET_MODE (x) == DImode)
+  if ((mode_x == SImode || mode_x == DImode)
       && (REG_P (y) || GET_CODE (y) == SUBREG)
       && (code == EQ || code == NE)
-      && GET_CODE (x) == NEG)
+      && code_x == NEG)
     return CC_Zmode;
 
-  /* A test for unsigned overflow.  */
-  if ((GET_MODE (x) == DImode || GET_MODE (x) == TImode)
-      && code == NE
-      && GET_CODE (x) == PLUS
-      && GET_CODE (y) == ZERO_EXTEND)
+  /* A test for unsigned overflow from an addition.  */
+  if ((mode_x == DImode || mode_x == TImode)
+      && (code == LTU || code == GEU)
+      && code_x == PLUS
+      && rtx_equal_p (XEXP (x, 0), y))
     return CC_Cmode;
 
+  /* A test for unsigned overflow from an add with carry.  */
+  if ((mode_x == DImode || mode_x == TImode)
+      && (code == LTU || code == GEU)
+      && code_x == PLUS
+      && CONST_SCALAR_INT_P (y)
+      && (rtx_mode_t (y, mode_x)
+	  == (wi::shwi (1, mode_x)
+	      << (GET_MODE_BITSIZE (mode_x).to_constant () / 2))))
+    return CC_ADCmode;
+
   /* A test for signed overflow.  */
-  if ((GET_MODE (x) == DImode || GET_MODE (x) == TImode)
+  if ((mode_x == DImode || mode_x == TImode)
       && code == NE
-      && GET_CODE (x) == PLUS
+      && code_x == PLUS
       && GET_CODE (y) == SIGN_EXTEND)
     return CC_Vmode;
 
@@ -7274,8 +7287,17 @@  aarch64_get_condition_code_1 (machine_mode mode, enum rtx_code comp_code)
     case E_CC_Cmode:
       switch (comp_code)
 	{
-	case NE: return AARCH64_CS;
-	case EQ: return AARCH64_CC;
+	case LTU: return AARCH64_CS;
+	case GEU: return AARCH64_CC;
+	default: return -1;
+	}
+      break;
+
+    case E_CC_ADCmode:
+      switch (comp_code)
+	{
+	case GEU: return AARCH64_CS;
+	case LTU: return AARCH64_CC;
 	default: return -1;
 	}
       break;
diff --git a/gcc/config/aarch64/aarch64.md b/gcc/config/aarch64/aarch64.md
index 513aec1..e65936a 100644
--- a/gcc/config/aarch64/aarch64.md
+++ b/gcc/config/aarch64/aarch64.md
@@ -1871,7 +1871,7 @@  (define_expand "uaddv<mode>4"
   ""
 {
   emit_insn (gen_add<mode>3_compareC (operands[0], operands[1], operands[2]));
-  aarch64_gen_unlikely_cbranch (NE, CC_Cmode, operands[3]);
+  aarch64_gen_unlikely_cbranch (LTU, CC_Cmode, operands[3]);
 
   DONE;
 })
@@ -1973,7 +1973,7 @@  (define_expand "uaddvti4"
   emit_move_insn (gen_lowpart (DImode, operands[0]), low_dest);
   emit_move_insn (gen_highpart (DImode, operands[0]), high_dest);
 
-  aarch64_gen_unlikely_cbranch (NE, CC_Cmode, operands[3]);
+  aarch64_gen_unlikely_cbranch (GEU, CC_ADCmode, operands[3]);
   DONE;
  })
 
@@ -2010,69 +2010,36 @@  (define_insn "*addsi3_compare0_uxtw"
   [(set_attr "type" "alus_sreg,alus_imm,alus_imm")]
 )
 
-(define_insn "*add<mode>3_compareC_cconly_imm"
-  [(set (reg:CC_C CC_REGNUM)
-	(ne:CC_C
-	  (plus:<DWI>
-	    (zero_extend:<DWI> (match_operand:GPI 0 "register_operand" "r,r"))
-	    (match_operand:<DWI> 2 "const_scalar_int_operand" ""))
-	  (zero_extend:<DWI>
-	    (plus:GPI
-	      (match_dup 0)
-	      (match_operand:GPI 1 "aarch64_plus_immediate" "I,J")))))]
-  "aarch64_zero_extend_const_eq (<DWI>mode, operands[2],
-				 <MODE>mode, operands[1])"
-  "@
-  cmn\\t%<w>0, %1
-  cmp\\t%<w>0, #%n1"
-  [(set_attr "type" "alus_imm")]
-)
-
 (define_insn "*add<mode>3_compareC_cconly"
   [(set (reg:CC_C CC_REGNUM)
-	(ne:CC_C
-	  (plus:<DWI>
-	    (zero_extend:<DWI> (match_operand:GPI 0 "register_operand" "r"))
-	    (zero_extend:<DWI> (match_operand:GPI 1 "register_operand" "r")))
-	  (zero_extend:<DWI> (plus:GPI (match_dup 0) (match_dup 1)))))]
+	(compare:CC_C
+	  (plus:GPI
+	    (match_operand:GPI 0 "register_operand" "r,r,r")
+	    (match_operand:GPI 1 "aarch64_plus_operand" "r,I,J"))
+	  (match_dup 0)))]
   ""
-  "cmn\\t%<w>0, %<w>1"
-  [(set_attr "type" "alus_sreg")]
-)
-
-(define_insn "*add<mode>3_compareC_imm"
-  [(set (reg:CC_C CC_REGNUM)
-	(ne:CC_C
-	  (plus:<DWI>
-	    (zero_extend:<DWI> (match_operand:GPI 1 "register_operand" "r,r"))
-	    (match_operand:<DWI> 3 "const_scalar_int_operand" ""))
-	  (zero_extend:<DWI>
-	    (plus:GPI
-	      (match_dup 1)
-	      (match_operand:GPI 2 "aarch64_plus_immediate" "I,J")))))
-   (set (match_operand:GPI 0 "register_operand" "=r,r")
-	(plus:GPI (match_dup 1) (match_dup 2)))]
-  "aarch64_zero_extend_const_eq (<DWI>mode, operands[3],
-                                 <MODE>mode, operands[2])"
   "@
-  adds\\t%<w>0, %<w>1, %2
-  subs\\t%<w>0, %<w>1, #%n2"
-  [(set_attr "type" "alus_imm")]
+  cmn\\t%<w>0, %<w>1
+  cmn\\t%<w>0, %1
+  cmp\\t%<w>0, #%n1"
+  [(set_attr "type" "alus_sreg,alus_imm,alus_imm")]
 )
 
 (define_insn "add<mode>3_compareC"
   [(set (reg:CC_C CC_REGNUM)
 	(compare:CC_C
-	  (plus:<DWI>
-	    (zero_extend:<DWI> (match_operand:GPI 1 "register_operand" "r"))
-	    (zero_extend:<DWI> (match_operand:GPI 2 "register_operand" "r")))
-	  (zero_extend:<DWI>
-	    (plus:GPI (match_dup 1) (match_dup 2)))))
-   (set (match_operand:GPI 0 "register_operand" "=r")
+	  (plus:GPI
+	    (match_operand:GPI 1 "register_operand" "r,r,r")
+	    (match_operand:GPI 2 "aarch64_plus_operand" "r,I,J"))
+	  (match_dup 1)))
+   (set (match_operand:GPI 0 "register_operand" "=r,r,r")
 	(plus:GPI (match_dup 1) (match_dup 2)))]
   ""
-  "adds\\t%<w>0, %<w>1, %<w>2"
-  [(set_attr "type" "alus_sreg")]
+  "@
+  adds\\t%<w>0, %<w>1, %<w>2
+  adds\\t%<w>0, %<w>1, %2
+  subs\\t%<w>0, %<w>1, #%n2"
+  [(set_attr "type" "alus_sreg,alus_imm,alus_imm")]
 )
 
 (define_insn "*add<mode>3_compareV_cconly_imm"
@@ -2470,7 +2437,7 @@  (define_expand "add<mode>3_carryin"
   [(set (match_operand:GPI 0 "register_operand")
 	(plus:GPI
 	  (plus:GPI
-	    (ne:GPI (reg:CC_C CC_REGNUM) (const_int 0))
+	    (ltu:GPI (reg:CC_C CC_REGNUM) (const_int 0))
 	    (match_operand:GPI 1 "aarch64_reg_or_zero"))
 	  (match_operand:GPI 2 "aarch64_reg_or_zero")))]
    ""
@@ -2509,7 +2476,7 @@  (define_insn "*addsi3_carryin_uxtw"
 (define_expand "add<mode>3_carryinC"
   [(parallel
      [(set (match_dup 3)
-	   (compare:CC_C
+	   (compare:CC_ADC
 	     (plus:<DWI>
 	       (plus:<DWI>
 		 (match_dup 4)
@@ -2517,57 +2484,54 @@  (define_expand "add<mode>3_carryinC"
 		   (match_operand:GPI 1 "register_operand" "")))
 	       (zero_extend:<DWI>
 		 (match_operand:GPI 2 "register_operand" "")))
-	   (zero_extend:<DWI>
-	     (plus:GPI
-	       (plus:GPI (match_dup 5) (match_dup 1))
-	       (match_dup 2)))))
+	     (match_dup 6)))
       (set (match_operand:GPI 0 "register_operand")
 	   (plus:GPI
 	     (plus:GPI (match_dup 5) (match_dup 1))
 	     (match_dup 2)))])]
    ""
 {
-  operands[3] = gen_rtx_REG (CC_Cmode, CC_REGNUM);
-  operands[4] = gen_rtx_NE (<DWI>mode, operands[3], const0_rtx);
-  operands[5] = gen_rtx_NE (<MODE>mode, operands[3], const0_rtx);
+  operands[3] = gen_rtx_REG (CC_ADCmode, CC_REGNUM);
+  rtx ccin = gen_rtx_REG (CC_Cmode, CC_REGNUM);
+  operands[4] = gen_rtx_LTU (<DWI>mode, ccin, const0_rtx);
+  operands[5] = gen_rtx_LTU (<MODE>mode, ccin, const0_rtx);
+  operands[6] = immed_wide_int_const (wi::shwi (1, <DWI>mode)
+				      << GET_MODE_BITSIZE (<MODE>mode),
+				      TImode);
 })
 
 (define_insn "*add<mode>3_carryinC_zero"
-  [(set (reg:CC_C CC_REGNUM)
-	(compare:CC_C
+  [(set (reg:CC_ADC CC_REGNUM)
+	(compare:CC_ADC
 	  (plus:<DWI>
 	    (match_operand:<DWI> 2 "aarch64_carry_operation" "")
 	    (zero_extend:<DWI> (match_operand:GPI 1 "register_operand" "r")))
-	  (zero_extend:<DWI>
-	    (plus:GPI
-	      (match_operand:GPI 3 "aarch64_carry_operation" "")
-	      (match_dup 1)))))
+	  (match_operand 4 "const_scalar_int_operand" "")))
    (set (match_operand:GPI 0 "register_operand" "=r")
-	(plus:GPI (match_dup 3) (match_dup 1)))]
-   ""
+	(plus:GPI (match_operand:GPI 3 "aarch64_carry_operation" "")
+		  (match_dup 1)))]
+  "rtx_mode_t (operands[4], <DWI>mode)
+   == (wi::shwi (1, <DWI>mode) << (unsigned) GET_MODE_BITSIZE (<MODE>mode))"
    "adcs\\t%<w>0, %<w>1, <w>zr"
   [(set_attr "type" "adc_reg")]
 )
 
 (define_insn "*add<mode>3_carryinC"
-  [(set (reg:CC_C CC_REGNUM)
-	(compare:CC_C
+  [(set (reg:CC_ADC CC_REGNUM)
+	(compare:CC_ADC
 	  (plus:<DWI>
 	    (plus:<DWI>
 	      (match_operand:<DWI> 3 "aarch64_carry_operation" "")
 	      (zero_extend:<DWI> (match_operand:GPI 1 "register_operand" "r")))
 	    (zero_extend:<DWI> (match_operand:GPI 2 "register_operand" "r")))
-	  (zero_extend:<DWI>
-	    (plus:GPI
-	      (plus:GPI
-		(match_operand:GPI 4 "aarch64_carry_operation" "")
-		(match_dup 1))
-	      (match_dup 2)))))
+	  (match_operand 5 "const_scalar_int_operand" "")))
    (set (match_operand:GPI 0 "register_operand" "=r")
 	(plus:GPI
-	  (plus:GPI (match_dup 4) (match_dup 1))
+	  (plus:GPI (match_operand:GPI 4 "aarch64_carry_operation" "")
+		    (match_dup 1))
 	  (match_dup 2)))]
-   ""
+  "rtx_mode_t (operands[5], <DWI>mode)
+   == (wi::shwi (1, <DWI>mode) << (unsigned) GET_MODE_BITSIZE (<MODE>mode))"
    "adcs\\t%<w>0, %<w>1, %<w>2"
   [(set_attr "type" "adc_reg")]
 )
@@ -2594,8 +2558,8 @@  (define_expand "add<mode>3_carryinV"
    ""
 {
   rtx cc = gen_rtx_REG (CC_Cmode, CC_REGNUM);
-  operands[3] = gen_rtx_NE (<DWI>mode, cc, const0_rtx);
-  operands[4] = gen_rtx_NE (<MODE>mode, cc, const0_rtx);
+  operands[3] = gen_rtx_LTU (<DWI>mode, cc, const0_rtx);
+  operands[4] = gen_rtx_LTU (<MODE>mode, cc, const0_rtx);
 })
 
 (define_insn "*add<mode>3_carryinV_zero"
diff --git a/gcc/config/aarch64/predicates.md b/gcc/config/aarch64/predicates.md
index 9103b82..855cf7b 100644
--- a/gcc/config/aarch64/predicates.md
+++ b/gcc/config/aarch64/predicates.md
@@ -347,23 +347,37 @@  (define_special_predicate "aarch64_equality_operator"
   (match_code "eq,ne"))
 
 (define_special_predicate "aarch64_carry_operation"
-  (match_code "ne,geu")
+  (match_code "ltu,geu")
 {
   if (XEXP (op, 1) != const0_rtx)
     return false;
-  machine_mode ccmode = (GET_CODE (op) == NE ? CC_Cmode : CCmode);
   rtx op0 = XEXP (op, 0);
-  return REG_P (op0) && REGNO (op0) == CC_REGNUM && GET_MODE (op0) == ccmode;
+  if (!REG_P (op0) || REGNO (op0) != CC_REGNUM)
+    return false;
+  machine_mode ccmode = GET_MODE (op0);
+  if (ccmode == CC_Cmode)
+    return GET_CODE (op) == LTU;
+  if (ccmode == CC_ADCmode || ccmode == CCmode)
+    return GET_CODE (op) == GEU;
+  return false;
 })
 
+; borrow is essentially the inverse of carry since the sense of the C flag
+; is inverted during subtraction.  See the note in aarch64-modes.def.
 (define_special_predicate "aarch64_borrow_operation"
-  (match_code "eq,ltu")
+  (match_code "geu,ltu")
 {
   if (XEXP (op, 1) != const0_rtx)
     return false;
-  machine_mode ccmode = (GET_CODE (op) == EQ ? CC_Cmode : CCmode);
   rtx op0 = XEXP (op, 0);
-  return REG_P (op0) && REGNO (op0) == CC_REGNUM && GET_MODE (op0) == ccmode;
+  if (!REG_P (op0) || REGNO (op0) != CC_REGNUM)
+    return false;
+  machine_mode ccmode = GET_MODE (op0);
+  if (ccmode == CC_Cmode)
+    return GET_CODE (op) == GEU;
+  if (ccmode == CC_ADCmode || ccmode == CCmode)
+    return GET_CODE (op) == LTU;
+  return false;
 })
 
 ;; True if the operand is memory reference suitable for a load/store exclusive.