Message ID | 1384974052.8558.8.camel@oc8801110288.ibm.com |
---|---|
State | New |
Headers | show |
On Wed, 2013-11-20 at 13:00 -0600, Bill Schmidt wrote: > Extracting element zero for big endian V2DI or V2DF mode is optimized > using the scalar register equivalence. Since we can similarly optimize > extraction of element one for big endian V2DI or V2DF mode, I added a ^ Oops. Clearly I mean little endian here. > variant that does this. I am not sure how useful this is, and we can > remove it if you like.
On Wed, Nov 20, 2013 at 2:00 PM, Bill Schmidt <wschmidt@linux.vnet.ibm.com> wrote: > Hi, > > This patch corrects the various vsx_set_* and vsx_extract_* patterns to > work correctly with little endian. For the most part this requires the > usual "subtract from N-1" modification, where N is the number of > elements. > > Extracting element zero for big endian V2DI or V2DF mode is optimized > using the scalar register equivalence. Since we can similarly optimize > extraction of element one for big endian V2DI or V2DF mode, I added a > variant that does this. I am not sure how useful this is, and we can > remove it if you like. > > The existing testcase gcc.target/powerpc/pr48258-1.c fails when counting > the number of occurrences of xxsldwi. It expects to see 6, but we > generate 9 of them for LE. This is because there are three extracts of > element zero of a V4SF in the testcase. The scalar equivalence allows > us to avoid the xxsldwi in BE but not in LE. Therefore I've disabled > this test for little endian. > > Bootstrapped and tested on powerpc64{,le}-unknown-linux-gnu with no > regressions. Is this ok for trunk? > > Thanks, > Bill > > > gcc: > > 2013-11-20 Bill Schmidt <wschmidt@linux.vnet.ibm.com> > > * config/rs6000/vsx.md (vsx_set_<mode>): Adjust for little endian. > (vsx_extract_<mode>): Likewise. > (*vsx_extract_<mode>_one_le): New LE variant on > *vsx_extract_<mode>_zero. > (vsx_extract_v4sf): Adjust for little endian. > > > gcc/testsuite: > > 2013-11-20 Bill Schmidt <wschmidt@linux.vnet.ibm.com> > > * gcc.target/powerpc/pr48258-1.c: Skip for little endian. Okay. And thanks for the optimization to extract element one for LE. Thanks, David
Index: gcc/testsuite/gcc.target/powerpc/pr48258-1.c =================================================================== --- gcc/testsuite/gcc.target/powerpc/pr48258-1.c (revision 205053) +++ gcc/testsuite/gcc.target/powerpc/pr48258-1.c (working copy) @@ -1,5 +1,6 @@ /* { dg-do compile } */ /* { dg-skip-if "" { powerpc*-*-darwin* } { "*" } { "" } } */ +/* { dg-skip-if "" { powerpc*le-*-* } { "*" } { "" } } */ /* { dg-require-effective-target powerpc_vsx_ok } */ /* { dg-options "-O3 -mcpu=power7 -mabi=altivec -ffast-math -fno-unroll-loops" } */ /* { dg-final { scan-assembler-times "xvaddsp" 3 } } */ Index: gcc/config/rs6000/vsx.md =================================================================== --- gcc/config/rs6000/vsx.md (revision 205053) +++ gcc/config/rs6000/vsx.md (working copy) @@ -1497,9 +1497,10 @@ UNSPEC_VSX_SET))] "VECTOR_MEM_VSX_P (<MODE>mode)" { - if (INTVAL (operands[3]) == 0) + int idx_first = BYTES_BIG_ENDIAN ? 0 : 1; + if (INTVAL (operands[3]) == idx_first) return \"xxpermdi %x0,%x2,%x1,1\"; - else if (INTVAL (operands[3]) == 1) + else if (INTVAL (operands[3]) == 1 - idx_first) return \"xxpermdi %x0,%x1,%x2,0\"; else gcc_unreachable (); @@ -1514,8 +1515,12 @@ [(match_operand:QI 2 "u5bit_cint_operand" "i,i,i")])))] "VECTOR_MEM_VSX_P (<MODE>mode)" { + int fldDM; gcc_assert (UINTVAL (operands[2]) <= 1); - operands[3] = GEN_INT (INTVAL (operands[2]) << 1); + fldDM = INTVAL (operands[2]) << 1; + if (!BYTES_BIG_ENDIAN) + fldDM = 3 - fldDM; + operands[3] = GEN_INT (fldDM); return \"xxpermdi %x0,%x1,%x1,%3\"; } [(set_attr "type" "vecperm")]) @@ -1535,6 +1540,21 @@ (const_string "fpload"))) (set_attr "length" "4")]) +;; Optimize extracting element 1 from memory for little endian +(define_insn "*vsx_extract_<mode>_one_le" + [(set (match_operand:<VS_scalar> 0 "vsx_register_operand" "=ws,d,?wa") + (vec_select:<VS_scalar> + (match_operand:VSX_D 1 "indexed_or_indirect_operand" "Z,Z,Z") + (parallel [(const_int 1)])))] + "VECTOR_MEM_VSX_P (<MODE>mode) && !WORDS_BIG_ENDIAN" + "lxsd%U1x %x0,%y1" + [(set (attr "type") + (if_then_else + (match_test "update_indexed_address_mem (operands[1], VOIDmode)") + (const_string "fpload_ux") + (const_string "fpload"))) + (set_attr "length" "4")]) + ;; Extract a SF element from V4SF (define_insn_and_split "vsx_extract_v4sf" [(set (match_operand:SF 0 "vsx_register_operand" "=f,f") @@ -1555,7 +1575,7 @@ rtx op2 = operands[2]; rtx op3 = operands[3]; rtx tmp; - HOST_WIDE_INT ele = INTVAL (op2); + HOST_WIDE_INT ele = BYTES_BIG_ENDIAN ? INTVAL (op2) : 3 - INTVAL (op2); if (ele == 0) tmp = op1;