Patchwork RFA: enable LRA for rs6000

login
register
mail settings
Submitter Vladimir Makarov
Date April 26, 2013, 11 p.m.
Message ID <517B0715.2010100@redhat.com>
Download mbox | patch
Permalink /patch/240058/
State New
Headers show

Comments

Vladimir Makarov - April 26, 2013, 11 p.m.
On 13-04-26 11:30 AM, Michael Meissner wrote:
> Vlad, in going through the LRA test differences, some of the bswap64 tests are
> failing because LRA converts the swaps for register/register converts into
> store/load.  For example, if gcc.target/powerpc/bswap64-4.c is compiled on
> 32-bit, for this function:
>
> long long swap_reg (long long a) { return __builtin_bswap64 (a); }
>
> LRA gives:
>
> swap_reg:
>          stwu 1,-16(1)
>          li 9,4
>          stw 3,8(1)
>          stw 4,12(1)
>          addi 10,1,8
>          lwbrx 3,9,10
>          lwbrx 4,0,10
>          addi 1,1,16
>          blr
>
> And the traditional code generation is:
>
> swap_reg:
>          rlwinm 9,4,8,0xffffffff
>          rlwinm 10,3,8,0xffffffff
>          rlwimi 9,4,24,0,7
>          rlwimi 10,3,24,0,7
>          rlwimi 9,4,24,16,23
>          rlwimi 10,3,24,16,23
>          mr 4,10
>          mr 3,9
>
> I assume the rlwinm's are to be preferred because there is no LHS, and also in
> this case, the 2 registers rlwinm's are done in parallel.
>
> The test gcc.target/powerpc/vect-83_64.c is failing in LRA:
>
> vect-83_64.c: In function ‘main1’:
> vect-83_64.c:30:1: internal compiler error: Max. number of generated reload insns per insn is achieved (90)
>
>   }
>   ^
> 0x104dca7f lra_constraints(bool)
>          /home/meissner/fsf-src/meissner-lra/gcc/lra-constraints.c:3613
> 0x104ca67b lra(_IO_FILE*)
>          /home/meissner/fsf-src/meissner-lra/gcc/lra.c:2278
> 0x1047d6eb do_reload
>          /home/meissner/fsf-src/meissner-lra/gcc/ira.c:4619
> 0x1047d6eb rest_of_handle_reload
>          /home/meissner/fsf-src/meissner-lra/gcc/ira.c:4731
> Please submit a full bug report,
> with preprocessed source if appropriate.
> Please include the complete backtrace with any bug report.
> See <http://gcc.gnu.org/bugs.html> for instructions.

The patch below solved the above problems.  The problem was in that I 
increased alternatives with '?' (although manual says that it should 
affect only regclass).  It improves slightly x86/x86-64 generated code 
performance.  But power has some strange insns with ???? (four ?).  It 
looks a woodo to me.

I've committed the patch into the branch.


> I'm also seeing quite a few Fortran failures for -m32:
It seems fixed too.
> gfortran.dg/PR19872.f
> gfortran.dg/advance_1.f90
> gfortran.dg/advance_4.f90
> gfortran.dg/advance_5.f90
> gfortran.dg/advance_6.f90
> gfortran.dg/append_1.f90
> gfortran.dg/associated_2.f90
> gfortran.dg/assumed_rank_1.f90
> gfortran.dg/assumed_rank_2.f90
> gfortran.dg/assumed_rank_7.f90
> gfortran.dg/assumed_type_2.f90
> gfortran.dg/backspace_10.f90
> gfortran.dg/backspace_2.f
> gfortran.dg/backspace_8.f
> gfortran.dg/backspace_9.f
> gfortran.dg/bound_2.f90
> gfortran.dg/bound_7.f90
> gfortran.dg/bound_8.f90
> gfortran.dg/char_cshift_1.f90
> gfortran.dg/char_cshift_2.f90
> gfortran.dg/char_cshift_3.f90
> gfortran.dg/char_eoshift_1.f90
> gfortran.dg/char_eoshift_2.f90
> gfortran.dg/char_eoshift_3.f90
> gfortran.dg/char_eoshift_4.f90
> gfortran.dg/char_eoshift_5.f90
> gfortran.dg/char_length_8.f90
> gfortran.dg/chmod_1.f90
> gfortran.dg/chmod_2.f90
> gfortran.dg/chmod_3.f90
> gfortran.dg/comma.f
> gfortran.dg/convert_2.f90
> gfortran.dg/convert_implied_open.f90
> gfortran.dg/cr_lf.f90
> gfortran.dg/cshift_bounds_1.f90
> gfortran.dg/cshift_bounds_2.f90
> gfortran.dg/cshift_bounds_3.f90
> gfortran.dg/cshift_bounds_4.f90
> gfortran.dg/cshift_nan_1.f90
> gfortran.dg/dev_null.F90
> gfortran.dg/direct_io_1.f90
> gfortran.dg/direct_io_11.f90
> gfortran.dg/direct_io_12.f90
> gfortran.dg/direct_io_2.f90
> gfortran.dg/direct_io_3.f90
> gfortran.dg/direct_io_5.f90
> gfortran.dg/direct_io_8.f90
> gfortran.dg/endfile.f90
> gfortran.dg/endfile_2.f90
> gfortran.dg/eof_4.f90
> gfortran.dg/eoshift.f90
> gfortran.dg/eoshift_bounds_1.f90
> gfortran.dg/error_format.f90
> gfortran.dg/f2003_inquire_1.f03
> gfortran.dg/f2003_io_1.f03
> gfortran.dg/f2003_io_5.f03
> gfortran.dg/f2003_io_7.f03
> gfortran.dg/fmt_cache_1.f
> gfortran.dg/fmt_error_4.f90
> gfortran.dg/fmt_error_5.f90
> gfortran.dg/fmt_t_5.f90
> gfortran.dg/fmt_t_7.f
> gfortran.dg/ftell_3.f90
> gfortran.dg/hollerith4.f90
> gfortran.dg/inquire_10.f90
> gfortran.dg/inquire_13.f90
> gfortran.dg/inquire_15.f90
> gfortran.dg/inquire_9.f90
> gfortran.dg/inquire_size.f90
> gfortran.dg/iomsg_1.f90
> gfortran.dg/iostat_2.f90
> gfortran.dg/list_read_10.f90
> gfortran.dg/list_read_6.f90
> gfortran.dg/list_read_7.f90
> gfortran.dg/list_read_9.f90
> gfortran.dg/matmul_1.f90
> gfortran.dg/matmul_5.f90
> gfortran.dg/maxloc_bounds_1.f90
> gfortran.dg/maxloc_bounds_2.f90
> gfortran.dg/maxloc_bounds_3.f90
> gfortran.dg/maxloc_bounds_6.f90
> gfortran.dg/maxloc_bounds_8.f90
> gfortran.dg/namelist_44.f90
> gfortran.dg/namelist_45.f90
> gfortran.dg/namelist_46.f90
> gfortran.dg/namelist_66.f90
> gfortran.dg/namelist_72.f
> gfortran.dg/namelist_82.f90
> gfortran.dg/negative_automatic_size.f90
> gfortran.dg/negative_unit.f
> gfortran.dg/negative_unit_int8.f
> gfortran.dg/newunit_1.f90
> gfortran.dg/newunit_3.f90
> gfortran.dg/open_access_append_1.f90
> gfortran.dg/open_errors.f90
> gfortran.dg/open_negative_unit_1.f90
> gfortran.dg/open_new.f90
> gfortran.dg/open_readonly_1.f90
> gfortran.dg/open_status_1.f90
> gfortran.dg/open_status_2.f90
> gfortran.dg/open_status_3.f90
> gfortran.dg/optional_dim_2.f90
> gfortran.dg/optional_dim_3.f90
> gfortran.dg/overwrite_1.f
> gfortran.dg/pr16597.f90
> gfortran.dg/pr16935.f90
> gfortran.dg/pr20954.f
> gfortran.dg/pr39865.f90
> gfortran.dg/pr46804.f90
> gfortran.dg/pr47878.f90
> gfortran.dg/read_comma.f
> gfortran.dg/read_eof_4.f90
> gfortran.dg/read_eof_8.f90
> gfortran.dg/read_eof_all.f90
> gfortran.dg/read_list_eof_1.f90
> gfortran.dg/read_many_1.f
> gfortran.dg/read_no_eor.f90
> gfortran.dg/readwrite_unf_direct_eor_1.f90
> gfortran.dg/realloc_on_assign_11.f90
> gfortran.dg/realloc_on_assign_7.f03
> gfortran.dg/record_marker_1.f90
> gfortran.dg/record_marker_3.f90
> gfortran.dg/runtime_warning_1.f90
> gfortran.dg/selected_char_kind_1.f90
> gfortran.dg/selected_char_kind_4.f90
> gfortran.dg/shift-alloc.f90
> gfortran.dg/shift-kind_2.f90
> gfortran.dg/stat_1.f90
> gfortran.dg/stat_2.f90
> gfortran.dg/streamio_1.f90
> gfortran.dg/streamio_10.f90
> gfortran.dg/streamio_12.f90
> gfortran.dg/streamio_14.f90
> gfortran.dg/streamio_15.f90
> gfortran.dg/streamio_16.f90
> gfortran.dg/streamio_2.f90
> gfortran.dg/streamio_3.f90
> gfortran.dg/streamio_4.f90
> gfortran.dg/streamio_5.f90
> gfortran.dg/streamio_6.f90
> gfortran.dg/streamio_7.f90
> gfortran.dg/streamio_8.f90
> gfortran.dg/streamio_9.f90
> gfortran.dg/tl_editing.f90
> gfortran.dg/unf_io_convert_1.f90
> gfortran.dg/unf_io_convert_2.f90
> gfortran.dg/unf_io_convert_3.f90
> gfortran.dg/unf_io_convert_4.f90
> gfortran.dg/unf_read_corrupted_1.f90
> gfortran.dg/unf_short_record_1.f90
> gfortran.dg/unformatted_subrecord_1.f90
> gfortran.dg/unpack_bounds_1.f90
> gfortran.dg/unpack_bounds_2.f90
> gfortran.dg/unpack_bounds_3.f90
> gfortran.dg/widechar_intrinsics_10.f90
> gfortran.dg/widechar_intrinsics_5.f90
> gfortran.dg/write_back.f
> gfortran.dg/write_check.f90
> gfortran.dg/write_check3.f90
> gfortran.dg/write_direct_eor.f90
> gfortran.dg/write_rewind_1.f
> gfortran.dg/write_rewind_2.f
> gfortran.dg/write_to_null.F90
> gfortran.dg/x_slash_2.f
> gfortran.dg/zero_sized_1.f90
> gfortran.fortran-torture/execute/backspace.f90
> gfortran.fortran-torture/execute/direct_io.f90
> gfortran.fortran-torture/execute/inquire_1.f90
> gfortran.fortran-torture/execute/inquire_2.f90
> gfortran.fortran-torture/execute/inquire_3.f90
> gfortran.fortran-torture/execute/inquire_4.f90
> gfortran.fortran-torture/execute/inquire_5.f90
> gfortran.fortran-torture/execute/intrinsic_associated.f90
> gfortran.fortran-torture/execute/intrinsic_associated_2.f90
> gfortran.fortran-torture/execute/intrinsic_cshift.f90
> gfortran.fortran-torture/execute/intrinsic_eoshift.f90
> gfortran.fortran-torture/execute/intrinsic_size.f90
> gfortran.fortran-torture/execute/list_read_1.f90
> gfortran.fortran-torture/execute/open_replace.f90
> gfortran.fortran-torture/execute/seq_io.f90
> gfortran.fortran-torture/execute/slash_edit.f90
> gfortran.fortran-torture/execute/unopened_unit_1.f90
>
2013-04-26  Vladimir Makarov  <vmakarov@redhat.com>

         * lra.c (setup_operand_alternative): Ignore '?'.
         * lra-constraints.c (process_alt_operands): Print cost dump for
         alternatives.  Check only moves for cycling.
         (curr_insn_transform): Print insn name.
Michael Meissner - April 26, 2013, 11:13 p.m.
On Fri, Apr 26, 2013 at 07:00:37PM -0400, Vladimir Makarov wrote:
> 2013-04-26  Vladimir Makarov  <vmakarov@redhat.com>
> 
>         * lra.c (setup_operand_alternative): Ignore '?'.
>         * lra-constraints.c (process_alt_operands): Print cost dump for
>         alternatives.  Check only moves for cycling.
>         (curr_insn_transform): Print insn name.

I'm not sure I'm comfortable with ignoring the '?' altogether.  For example, if
you do something in the GPR unit, instructions run at one cycle, while if you
do it in the vector unit, it runs in two cycles.  In the past, I've seen cases
where it wanted to spill floating point values from the floating point
registers to the CTR.  And if you spill to the LR, it can interfere with the
call cache.

Admitily, when to use '!', '?', and '*' is unclear, and unfortunately it has
changed over time.

Patch

Index: lra-constraints.c
===================================================================
--- lra-constraints.c   (revision 198344)
+++ lra-constraints.c   (working copy)
@@ -2038,7 +2038,13 @@  process_alt_operands (int only_alternati
              or non-important thing to be worth to do it.  */
           overall = losers * LRA_LOSER_COST_FACTOR + reject;
           if ((best_losers == 0 || losers != 0) && best_overall < overall)
-           goto fail;
+           {
+             if (lra_dump_file != NULL)
+               fprintf (lra_dump_file, " alt=%d,overall=%d,losers=%d,"
+                        "small_class_ops=%d,rld_nregs=%d -- reject\n",
+                nalt, overall, losers, small_class_operands_num, 
reload_nregs);
+             goto fail;
+           }

           curr_alt[nop] = this_alternative;
           COPY_HARD_REG_SET (curr_alt_set[nop], this_alternative_set);
@@ -2055,6 +2061,9 @@  process_alt_operands (int only_alternati
             early_clobbered_nops[early_clobbered_regs_num++] = nop;
         }
        if (curr_insn_set != NULL_RTX && n_operands == 2
+          /* Prevent processing non-move insns.  */
+          && (GET_CODE (SET_SRC (curr_insn_set)) == SUBREG
+              || SET_SRC (curr_insn_set) == no_subreg_reg_operand[1])
           && ((! curr_alt_win[0] && ! curr_alt_win[1]
                && REG_P (no_subreg_reg_operand[0])
                && REG_P (no_subreg_reg_operand[1])
@@ -2162,6 +2171,10 @@  process_alt_operands (int only_alternati
         small_class_operands_num
           += SMALL_REGISTER_CLASS_P (curr_alt[nop]) ? 1 : 0;

+      if (lra_dump_file != NULL)
+       fprintf (lra_dump_file, " alt=%d,overall=%d,losers=%d,"
+                "small_class_ops=%d,rld_nregs=%d\n",
+                nalt, overall, losers, small_class_operands_num, 
reload_nregs);
        /* If this alternative can be made to work by reloading, and it
          needs less reloading than the others checked so far, record
          it as the chosen goal for reloading.  */
@@ -2518,7 +2531,6 @@  process_address (int nop, rtx *before, r
                             code = -1;
                           }
                       }
-
                   }
               }
             if (code < 0)
@@ -3007,6 +3019,9 @@  curr_insn_transform (void)
           for (; *p != '\0' && *p != ',' && *p != '#'; p++)
             fputc (*p, lra_dump_file);
         }
+      if (INSN_CODE (curr_insn) >= 0
+         && (p = get_insn_name (INSN_CODE (curr_insn))) != NULL)
+       fprintf (lra_dump_file, " {%s}", p);
        fprintf (lra_dump_file, "\n");
      }

Index: lra.c
===================================================================
--- lra.c       (revision 198344)
+++ lra.c       (working copy)
@@ -784,9 +784,6 @@  setup_operand_alternative (lra_insn_reco
                   lra_assert (i != nop - 1);
                   break;

-               case '?':
-                 op_alt->reject += LRA_LOSER_COST_FACTOR;
-                 break;
                 case '!':
                   op_alt->reject += LRA_MAX_REJECT;
                   break;