diff mbox

Add AVG and UAVG rtx codes

Message ID 4DFA2A71.7030402@codesourcery.com
State New
Headers show

Commit Message

Bernd Schmidt June 16, 2011, 4:08 p.m. UTC
Another operation that exists on a number of processors is an "average"
operation, which computes "(ext (A) + ext (B) + 1) >> 1" from two input
operands. Some of our backends try to represnt this with complex RTL
expressions, others use unspec. This patch adds two new rtx codes.

Bootstrapped and tested on i686-linux (with the usual pass39-frag test
failures). Ok?


Bernd
* doc/rtl.texi (avg, uavg): Document.
	* rtl.def (AVG, UAVG): New.
	* simplify-rtx.c (simplify_binary_operation_1,
	simplify_const_binary_operation): Add them to default clauses.
	* config/i386/mmx.md (mmx_uavgv8qi3, *mmx_uavgv8qi3,
	mmx_uavgv4hi3, *mmx_uavgv4hi3): Rewrite to use UAVG.
	* config/i386/sse.md (sse2_uavgv16qi3, *sse2_uavgv16qi3,
	sse2_uavgv8hi3, *sse2_uavgv8hi3): Likewise.

Comments

Jakub Jelinek June 16, 2011, 4:14 p.m. UTC | #1
On Thu, Jun 16, 2011 at 06:08:17PM +0200, Bernd Schmidt wrote:
> Another operation that exists on a number of processors is an "average"
> operation, which computes "(ext (A) + ext (B) + 1) >> 1" from two input
> operands. Some of our backends try to represnt this with complex RTL
> expressions, others use unspec. This patch adds two new rtx codes.
> 
> Bootstrapped and tested on i686-linux (with the usual pass39-frag test
> failures). Ok?

Please add the new codes also to dwarf2out.c (mem_loc_descriptor), otherwise
if it will be optimized away you might get ICEs when emitting debug info.
Similarly for CRSB or whatever for the sign bit count.
If you don't feel like adding support for emitting it in DWARF
(CRSB might be similar to CLZ and might even use the same routine with
some tweaks, AVG/UAVG should be easy too), just add it to the
list of unhandled cases.

	Jakub
diff mbox

Patch

Index: gcc/doc/rtl.texi
===================================================================
--- gcc/doc/rtl.texi	(revision 174339)
+++ gcc/doc/rtl.texi	(working copy)
@@ -2385,6 +2385,18 @@  Represents the absolute value of @var{x}
 @code{ss_abs} ensures that an out-of-bounds result saturates to the
 maximum signed value.
 
+@findex avg
+@cindex average
+@findex uavg
+@cindex unsigned average
+@item (avg:@var{m} @var{x} @var{c})
+@itemx (uavg:@var{m} @var{x} @var{c})
+Represents an averaging operation.  Two integer values are added, the
+constant 1 is added to the result, and the whole is shifted right by one
+to produce the result.  The result has the same mode as the inputs, but
+the operation uses intermediate values which are one bit wider.  The
+inputs are sign-extended to that wider precision for @code{avg},
+zero-extended for @code{uavg}.
 
 @findex sqrt
 @cindex square root
Index: gcc/rtl.def
===================================================================
--- gcc/rtl.def	(revision 174339)
+++ gcc/rtl.def	(working copy)
@@ -674,6 +674,15 @@  DEF_RTL_EXPR(VEC_CONCAT, "vec_concat", "
    an integer multiple of the number of input parts.  */
 DEF_RTL_EXPR(VEC_DUPLICATE, "vec_duplicate", "e", RTX_UNARY)
 
+/* Describes an operation that averages two integer values by adding
+   them together, adding 1, and shifting the result right by 1.  The
+   result is as large as the inputs, but the operation uses
+   intermediate values with a precision one bit wider.  For AVG, the
+   input values are sign-extended to that wider precision, for UAVG,
+   they are zero-extended.  */
+DEF_RTL_EXPR(AVG, "avg", "ee", RTX_COMM_ARITH)
+DEF_RTL_EXPR(UAVG, "uavg", "ee", RTX_COMM_ARITH)
+
 /* Addition with signed saturation */
 DEF_RTL_EXPR(SS_PLUS, "ss_plus", "ee", RTX_COMM_ARITH)
 
Index: gcc/simplify-rtx.c
===================================================================
--- gcc/simplify-rtx.c	(revision 174339)
+++ gcc/simplify-rtx.c	(working copy)
@@ -2962,6 +2962,8 @@  simplify_binary_operation_1 (enum rtx_co
     case US_MULT:
     case SS_DIV:
     case US_DIV:
+    case AVG:
+    case UAVG:
       /* ??? There are simplifications that can be done.  */
       return 0;
 
@@ -3671,6 +3673,8 @@  simplify_const_binary_operation (enum rt
 	case US_DIV:
 	case SS_ASHIFT:
 	case US_ASHIFT:
+	case AVG:
+	case UAVG:
 	  /* ??? There are simplifications that can be done.  */
 	  return 0;
 
Index: gcc/config/i386/mmx.md
===================================================================
--- gcc/config/i386/mmx.md	(revision 174339)
+++ gcc/config/i386/mmx.md	(working copy)
@@ -1460,37 +1460,15 @@  (define_expand "vec_initv8qi"
 
 (define_expand "mmx_uavgv8qi3"
   [(set (match_operand:V8QI 0 "register_operand" "")
-	(truncate:V8QI
-	  (lshiftrt:V8HI
-	    (plus:V8HI
-	      (plus:V8HI
-		(zero_extend:V8HI
-		  (match_operand:V8QI 1 "nonimmediate_operand" ""))
-		(zero_extend:V8HI
-		  (match_operand:V8QI 2 "nonimmediate_operand" "")))
-	      (const_vector:V8HI [(const_int 1) (const_int 1)
-				  (const_int 1) (const_int 1)
-				  (const_int 1) (const_int 1)
-				  (const_int 1) (const_int 1)]))
-	    (const_int 1))))]
+	(uavg:V8QI (match_operand:V8QI 1 "nonimmediate_operand" "")
+		   (match_operand:V8QI 2 "nonimmediate_operand" "")))]
   "TARGET_SSE || TARGET_3DNOW"
   "ix86_fixup_binary_operands_no_copy (PLUS, V8QImode, operands);")
 
 (define_insn "*mmx_uavgv8qi3"
   [(set (match_operand:V8QI 0 "register_operand" "=y")
-	(truncate:V8QI
-	  (lshiftrt:V8HI
-	    (plus:V8HI
-	      (plus:V8HI
-		(zero_extend:V8HI
-		  (match_operand:V8QI 1 "nonimmediate_operand" "%0"))
-		(zero_extend:V8HI
-		  (match_operand:V8QI 2 "nonimmediate_operand" "ym")))
-	      (const_vector:V8HI [(const_int 1) (const_int 1)
-				  (const_int 1) (const_int 1)
-				  (const_int 1) (const_int 1)
-				  (const_int 1) (const_int 1)]))
-	    (const_int 1))))]
+	(uavg:V8QI (match_operand:V8QI 1 "nonimmediate_operand" "%0")
+		   (match_operand:V8QI 2 "nonimmediate_operand" "ym")))]
   "(TARGET_SSE || TARGET_3DNOW)
    && ix86_binary_operator_ok (PLUS, V8QImode, operands)"
 {
@@ -1511,33 +1489,15 @@  (define_insn "*mmx_uavgv8qi3"
 
 (define_expand "mmx_uavgv4hi3"
   [(set (match_operand:V4HI 0 "register_operand" "")
-	(truncate:V4HI
-	  (lshiftrt:V4SI
-	    (plus:V4SI
-	      (plus:V4SI
-		(zero_extend:V4SI
-		  (match_operand:V4HI 1 "nonimmediate_operand" ""))
-		(zero_extend:V4SI
-		  (match_operand:V4HI 2 "nonimmediate_operand" "")))
-	      (const_vector:V4SI [(const_int 1) (const_int 1)
-				  (const_int 1) (const_int 1)]))
-	    (const_int 1))))]
+	(uavg:V4HI (match_operand:V4HI 1 "nonimmediate_operand" "")
+		   (match_operand:V4HI 2 "nonimmediate_operand" "")))]
   "TARGET_SSE || TARGET_3DNOW_A"
   "ix86_fixup_binary_operands_no_copy (PLUS, V4HImode, operands);")
 
 (define_insn "*mmx_uavgv4hi3"
   [(set (match_operand:V4HI 0 "register_operand" "=y")
-	(truncate:V4HI
-	  (lshiftrt:V4SI
-	    (plus:V4SI
-	      (plus:V4SI
-		(zero_extend:V4SI
-		  (match_operand:V4HI 1 "nonimmediate_operand" "%0"))
-		(zero_extend:V4SI
-		  (match_operand:V4HI 2 "nonimmediate_operand" "ym")))
-	      (const_vector:V4SI [(const_int 1) (const_int 1)
-				  (const_int 1) (const_int 1)]))
-	    (const_int 1))))]
+	(uavg:V4HI (match_operand:V4HI 1 "nonimmediate_operand" "%0")
+		   (match_operand:V4HI 2 "nonimmediate_operand" "ym")))]
   "(TARGET_SSE || TARGET_3DNOW_A)
    && ix86_binary_operator_ok (PLUS, V4HImode, operands)"
   "pavgw\t{%2, %0|%0, %2}"
Index: gcc/config/i386/sse.md
===================================================================
--- gcc/config/i386/sse.md	(revision 174339)
+++ gcc/config/i386/sse.md	(working copy)
@@ -6734,45 +6734,15 @@  (define_expand "vec_unpacku_hi_<mode>"
 
 (define_expand "sse2_uavgv16qi3"
   [(set (match_operand:V16QI 0 "register_operand" "")
-	(truncate:V16QI
-	  (lshiftrt:V16HI
-	    (plus:V16HI
-	      (plus:V16HI
-		(zero_extend:V16HI
-		  (match_operand:V16QI 1 "nonimmediate_operand" ""))
-		(zero_extend:V16HI
-		  (match_operand:V16QI 2 "nonimmediate_operand" "")))
-	      (const_vector:V16QI [(const_int 1) (const_int 1)
-				   (const_int 1) (const_int 1)
-				   (const_int 1) (const_int 1)
-				   (const_int 1) (const_int 1)
-				   (const_int 1) (const_int 1)
-				   (const_int 1) (const_int 1)
-				   (const_int 1) (const_int 1)
-				   (const_int 1) (const_int 1)]))
-	    (const_int 1))))]
+	(uavg:V16QI (match_operand:V16QI 1 "nonimmediate_operand" "")
+		    (match_operand:V16QI 2 "nonimmediate_operand" "")))]
   "TARGET_SSE2"
   "ix86_fixup_binary_operands_no_copy (PLUS, V16QImode, operands);")
 
 (define_insn "*sse2_uavgv16qi3"
   [(set (match_operand:V16QI 0 "register_operand" "=x,x")
-	(truncate:V16QI
-	  (lshiftrt:V16HI
-	    (plus:V16HI
-	      (plus:V16HI
-		(zero_extend:V16HI
-		  (match_operand:V16QI 1 "nonimmediate_operand" "%0,x"))
-		(zero_extend:V16HI
-		  (match_operand:V16QI 2 "nonimmediate_operand" "xm,xm")))
-	      (const_vector:V16QI [(const_int 1) (const_int 1)
-				   (const_int 1) (const_int 1)
-				   (const_int 1) (const_int 1)
-				   (const_int 1) (const_int 1)
-				   (const_int 1) (const_int 1)
-				   (const_int 1) (const_int 1)
-				   (const_int 1) (const_int 1)
-				   (const_int 1) (const_int 1)]))
-	    (const_int 1))))]
+	(uavg:V16QI (match_operand:V16QI 1 "nonimmediate_operand" "%0,x")
+		    (match_operand:V16QI 2 "nonimmediate_operand" "xm,xm")))]
   "TARGET_SSE2 && ix86_binary_operator_ok (PLUS, V16QImode, operands)"
   "@
    pavgb\t{%2, %0|%0, %2}
@@ -6785,37 +6755,15 @@  (define_insn "*sse2_uavgv16qi3"
 
 (define_expand "sse2_uavgv8hi3"
   [(set (match_operand:V8HI 0 "register_operand" "")
-	(truncate:V8HI
-	  (lshiftrt:V8SI
-	    (plus:V8SI
-	      (plus:V8SI
-		(zero_extend:V8SI
-		  (match_operand:V8HI 1 "nonimmediate_operand" ""))
-		(zero_extend:V8SI
-		  (match_operand:V8HI 2 "nonimmediate_operand" "")))
-	      (const_vector:V8HI [(const_int 1) (const_int 1)
-				  (const_int 1) (const_int 1)
-				  (const_int 1) (const_int 1)
-				  (const_int 1) (const_int 1)]))
-	    (const_int 1))))]
+	(uavg:V8HI (match_operand:V8HI 1 "nonimmediate_operand" "")
+		   (match_operand:V8HI 2 "nonimmediate_operand" "")))]
   "TARGET_SSE2"
   "ix86_fixup_binary_operands_no_copy (PLUS, V8HImode, operands);")
 
 (define_insn "*sse2_uavgv8hi3"
   [(set (match_operand:V8HI 0 "register_operand" "=x,x")
-	(truncate:V8HI
-	  (lshiftrt:V8SI
-	    (plus:V8SI
-	      (plus:V8SI
-		(zero_extend:V8SI
-		  (match_operand:V8HI 1 "nonimmediate_operand" "%0,x"))
-		(zero_extend:V8SI
-		  (match_operand:V8HI 2 "nonimmediate_operand" "xm,xm")))
-	      (const_vector:V8HI [(const_int 1) (const_int 1)
-				  (const_int 1) (const_int 1)
-				  (const_int 1) (const_int 1)
-				  (const_int 1) (const_int 1)]))
-	    (const_int 1))))]
+	(uavg:V8HI (match_operand:V8HI 1 "nonimmediate_operand" "%0,x")
+		   (match_operand:V8HI 2 "nonimmediate_operand" "xm,xm")))]
   "TARGET_SSE2 && ix86_binary_operator_ok (PLUS, V8HImode, operands)"
   "@
    pavgw\t{%2, %0|%0, %2}