diff mbox

, PowerPC PR 61431, Fix little endian word swapping for 128-bit types

Message ID 20140606190217.GA8602@ibm-tiger.the-meissners.org
State New
Headers show

Commit Message

Michael Meissner June 6, 2014, 7:02 p.m. UTC
The pack01.c test fails on GCC 4.8 on little endian power8 systems. In looking
at it, it is a bug where the V1TI memory operations do not have the word
swapping define_split support.  GCC 4.9 and trunk can optimize the union to
stay in a register, so the test case passes on those systems, but it is still a
bug that would be exposed if you ever need to store vector __int128 values.
The test p8vector-int128-2.c is such a case, and it needs the fix.

I've tested this via bootstrap and make check on power8 big and little endian
systems, and there were no regressions.  The test p8vector-int128-2.c now
passes on the little endian power8 with this fix.

Are these patches ok to check into the trunk, and 4.9/4.8 branches?

2014-06-06  Michael Meissner  <meissner@linux.vnet.ibm.com>

	PR target/61431
	* config/rs6000/vsx.md (VSX_LE): Split VSX_D into 2 separate
	iterators, VSX_D that handles 64-bit types, and VSX_LE that
	handles swapping the two 64-bit double words on little endian
	systems.  Include V1TImode and optionally TImode in VSX_LE so that
	these types are properly swapped.  Change all of the insns and
	splits that do the 64-bit swaps to use VSX_LE.
	(vsx_le_perm_load_<mode>): Likewise.
	(vsx_le_perm_store_<mode>): Likewise.
	(splitters for little endian memory operations): Likewise.
	(vsx_xxpermdi2_le_<mode>): Likewise.
	(vsx_lxvd2x2_le_<mode>): Likewise.
	(vsx_stxvd2x2_le_<mode>): Likewise.

Comments

David Edelsohn June 6, 2014, 11:04 p.m. UTC | #1
On Fri, Jun 6, 2014 at 3:02 PM, Michael Meissner
<meissner@linux.vnet.ibm.com> wrote:
> The pack01.c test fails on GCC 4.8 on little endian power8 systems. In looking
> at it, it is a bug where the V1TI memory operations do not have the word
> swapping define_split support.  GCC 4.9 and trunk can optimize the union to
> stay in a register, so the test case passes on those systems, but it is still a
> bug that would be exposed if you ever need to store vector __int128 values.
> The test p8vector-int128-2.c is such a case, and it needs the fix.
>
> I've tested this via bootstrap and make check on power8 big and little endian
> systems, and there were no regressions.  The test p8vector-int128-2.c now
> passes on the little endian power8 with this fix.
>
> Are these patches ok to check into the trunk, and 4.9/4.8 branches?
>
> 2014-06-06  Michael Meissner  <meissner@linux.vnet.ibm.com>
>
>         PR target/61431
>         * config/rs6000/vsx.md (VSX_LE): Split VSX_D into 2 separate
>         iterators, VSX_D that handles 64-bit types, and VSX_LE that
>         handles swapping the two 64-bit double words on little endian
>         systems.  Include V1TImode and optionally TImode in VSX_LE so that
>         these types are properly swapped.  Change all of the insns and
>         splits that do the 64-bit swaps to use VSX_LE.
>         (vsx_le_perm_load_<mode>): Likewise.
>         (vsx_le_perm_store_<mode>): Likewise.
>         (splitters for little endian memory operations): Likewise.
>         (vsx_xxpermdi2_le_<mode>): Likewise.
>         (vsx_lxvd2x2_le_<mode>): Likewise.
>         (vsx_stxvd2x2_le_<mode>): Likewise.

okay.

Thanks, David
diff mbox

Patch

Index: gcc/config/rs6000/vsx.md
===================================================================
--- gcc/config/rs6000/vsx.md	(revision 211316)
+++ gcc/config/rs6000/vsx.md	(working copy)
@@ -24,6 +24,13 @@  (define_mode_iterator VSX_B [DF V4SF V2D
 ;; Iterator for the 2 64-bit vector types
 (define_mode_iterator VSX_D [V2DF V2DI])
 
+;; Iterator for the 2 64-bit vector types + 128-bit types that are loaded with
+;; lxvd2x to properly handle swapping words on little endian
+(define_mode_iterator VSX_LE [V2DF
+			      V2DI
+			      V1TI
+			      (TI	"VECTOR_MEM_VSX_P (TImode)")])
+
 ;; Iterator for the 2 32-bit vector types
 (define_mode_iterator VSX_W [V4SF V4SI])
 
@@ -228,8 +235,8 @@  (define_c_enum "unspec"
 ;; The patterns for LE permuted loads and stores come before the general
 ;; VSX moves so they match first.
 (define_insn_and_split "*vsx_le_perm_load_<mode>"
-  [(set (match_operand:VSX_D 0 "vsx_register_operand" "=wa")
-        (match_operand:VSX_D 1 "memory_operand" "Z"))]
+  [(set (match_operand:VSX_LE 0 "vsx_register_operand" "=wa")
+        (match_operand:VSX_LE 1 "memory_operand" "Z"))]
   "!BYTES_BIG_ENDIAN && TARGET_VSX"
   "#"
   "!BYTES_BIG_ENDIAN && TARGET_VSX"
@@ -342,16 +349,16 @@  (define_insn_and_split "*vsx_le_perm_loa
    (set_attr "length" "8")])
 
 (define_insn "*vsx_le_perm_store_<mode>"
-  [(set (match_operand:VSX_D 0 "memory_operand" "=Z")
-        (match_operand:VSX_D 1 "vsx_register_operand" "+wa"))]
+  [(set (match_operand:VSX_LE 0 "memory_operand" "=Z")
+        (match_operand:VSX_LE 1 "vsx_register_operand" "+wa"))]
   "!BYTES_BIG_ENDIAN && TARGET_VSX"
   "#"
   [(set_attr "type" "vecstore")
    (set_attr "length" "12")])
 
 (define_split
-  [(set (match_operand:VSX_D 0 "memory_operand" "")
-        (match_operand:VSX_D 1 "vsx_register_operand" ""))]
+  [(set (match_operand:VSX_LE 0 "memory_operand" "")
+        (match_operand:VSX_LE 1 "vsx_register_operand" ""))]
   "!BYTES_BIG_ENDIAN && TARGET_VSX && !reload_completed"
   [(set (match_dup 2)
         (vec_select:<MODE>
@@ -369,8 +376,8 @@  (define_split
 ;; The post-reload split requires that we re-permute the source
 ;; register in case it is still live.
 (define_split
-  [(set (match_operand:VSX_D 0 "memory_operand" "")
-        (match_operand:VSX_D 1 "vsx_register_operand" ""))]
+  [(set (match_operand:VSX_LE 0 "memory_operand" "")
+        (match_operand:VSX_LE 1 "vsx_register_operand" ""))]
   "!BYTES_BIG_ENDIAN && TARGET_VSX && reload_completed"
   [(set (match_dup 1)
         (vec_select:<MODE>
@@ -1353,9 +1360,9 @@  (define_insn "vsx_concat_v2sf"
 ;; xxpermdi for little endian loads and stores.  We need several of
 ;; these since the form of the PARALLEL differs by mode.
 (define_insn "*vsx_xxpermdi2_le_<mode>"
-  [(set (match_operand:VSX_D 0 "vsx_register_operand" "=wa")
-        (vec_select:VSX_D
-          (match_operand:VSX_D 1 "vsx_register_operand" "wa")
+  [(set (match_operand:VSX_LE 0 "vsx_register_operand" "=wa")
+        (vec_select:VSX_LE
+          (match_operand:VSX_LE 1 "vsx_register_operand" "wa")
           (parallel [(const_int 1) (const_int 0)])))]
   "!BYTES_BIG_ENDIAN && VECTOR_MEM_VSX_P (<MODE>mode)"
   "xxpermdi %x0,%x1,%x1,2"
@@ -1402,9 +1409,9 @@  (define_insn "*vsx_xxpermdi16_le_V16QI"
 ;; lxvd2x for little endian loads.  We need several of
 ;; these since the form of the PARALLEL differs by mode.
 (define_insn "*vsx_lxvd2x2_le_<mode>"
-  [(set (match_operand:VSX_D 0 "vsx_register_operand" "=wa")
-        (vec_select:VSX_D
-          (match_operand:VSX_D 1 "memory_operand" "Z")
+  [(set (match_operand:VSX_LE 0 "vsx_register_operand" "=wa")
+        (vec_select:VSX_LE
+          (match_operand:VSX_LE 1 "memory_operand" "Z")
           (parallel [(const_int 1) (const_int 0)])))]
   "!BYTES_BIG_ENDIAN && VECTOR_MEM_VSX_P (<MODE>mode)"
   "lxvd2x %x0,%y1"
@@ -1451,9 +1458,9 @@  (define_insn "*vsx_lxvd2x16_le_V16QI"
 ;; stxvd2x for little endian stores.  We need several of
 ;; these since the form of the PARALLEL differs by mode.
 (define_insn "*vsx_stxvd2x2_le_<mode>"
-  [(set (match_operand:VSX_D 0 "memory_operand" "=Z")
-        (vec_select:VSX_D
-          (match_operand:VSX_D 1 "vsx_register_operand" "wa")
+  [(set (match_operand:VSX_LE 0 "memory_operand" "=Z")
+        (vec_select:VSX_LE
+          (match_operand:VSX_LE 1 "vsx_register_operand" "wa")
           (parallel [(const_int 1) (const_int 0)])))]
   "!BYTES_BIG_ENDIAN && VECTOR_MEM_VSX_P (<MODE>mode)"
   "stxvd2x %x1,%y0"