diff mbox

[AArch64,PR65375] Fix RTX cost for vector SET

Message ID 55497891.4010801@linaro.org
State New
Headers show

Commit Message

Kugan Vivekanandarajah May 6, 2015, 2:12 a.m. UTC
On 05/05/15 16:17, James Greenhalgh wrote:
> On Sat, Apr 25, 2015 at 12:26:16AM +0100, Kugan wrote:
>>
>> Thanks for the review. I have updated the patch based on the comments
>> with some other minor changes. Bootstrapped and regression tested on
>> aarch64-none-linux-gnu with no-new regressions. Is this OK for trunk?
>>
>>
>> Thanks,
>> Kugan
>>
>>
>> gcc/ChangeLog:
>>
>> 2015-04-24  Kugan Vivekanandarajah  <kuganv@linaro.org>
>> 	    Jim Wilson  <jim.wilson@linaro.org>
>>
>> 	* config/arm/aarch-common-protos.h (struct mem_cost_table): Added
>> 	new  fields loadv and storev.
>> 	* config/aarch64/aarch64-cost-tables.h (thunderx_extra_costs):
>> 	Initialize loadv and storev.
>> 	* config/arm/aarch-cost-tables.h (generic_extra_costs): Likewise.
>> 	(cortexa53_extra_costs): Likewise.
>> 	(cortexa57_extra_costs): Likewise.
>> 	(xgene1_extra_costs): Likewise.
>> 	* config/aarch64/aarch64.c (aarch64_rtx_costs): Update vector
>> 	rtx_costs.
> 
> Hi Kugan,
> 
> Just a few syle comments, regarding the placements of comments in single-line
> if statements. I know the current code does not neccesarily always follow the
> comments below, I'll write a patch cleaning that up at some point when I'm back
> at my desk.
> 
> Thanks,
> James
> 
>> @@ -5667,6 +5668,14 @@ aarch64_rtx_costs (rtx x, int code, int outer ATTRIBUTE_UNUSED,
>>      case NEG:
>>        op0 = XEXP (x, 0);
>>  
>> +      if (VECTOR_MODE_P (mode))
>> +	{
>> +	  if (speed)
>> +	    /* FNEG.  */
>> +	    *cost += extra_cost->vect.alu;
>> +	  return false;
>> +	}
>> +
>>        if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT)
>>         {
>>            if (GET_RTX_CLASS (GET_CODE (op0)) == RTX_COMPARE
> 
> Personally, I find commented if statements without braces hard to
> quickly parse. Something like this is much faster for me:
> 
> 	  if (speed)
> 	    {
> 	      /* FNEG.  */
> 	      *cost += extra_cost->vect.alu;
> 	    }
> 
>> @@ -5844,7 +5872,10 @@ cost_minus:
>>  
>>  	if (speed)
>>  	  {
>> -	    if (GET_MODE_CLASS (mode) == MODE_INT)
>> +	    if (VECTOR_MODE_P (mode))
>> +	      /* Vector SUB.  */
>> +	      *cost += extra_cost->vect.alu;
>> +	    else if (GET_MODE_CLASS (mode) == MODE_INT)
>>  	      /* SUB(S).  */
>>  	      *cost += extra_cost->alu.arith;
>>  	    else if (GET_MODE_CLASS (mode) == MODE_FLOAT)
> 
> As above.
> 
>> @@ -5888,7 +5919,6 @@ cost_plus:
>>  	  {
>>  	    if (speed)
>>  	      *cost += extra_cost->alu.arith_shift;
>> -
>>  	    *cost += rtx_cost (XEXP (XEXP (op0, 0), 0),
>>  			       (enum rtx_code) GET_CODE (op0),
>>  			       0, speed);
> 
> Drop this whitespace change.
> 
>> @@ -5913,7 +5943,10 @@ cost_plus:
>>  
>>  	if (speed)
>>  	  {
>> -	    if (GET_MODE_CLASS (mode) == MODE_INT)
>> +	    if (VECTOR_MODE_P (mode))
>> +	      /* Vector ADD.  */
>> +	      *cost += extra_cost->vect.alu;
>> +	    else if (GET_MODE_CLASS (mode) == MODE_INT)
>>  	      /* ADD.  */
>>  	      *cost += extra_cost->alu.arith;
>>  	    else if (GET_MODE_CLASS (mode) == MODE_FLOAT)
> 
> As above.
> 
>> @@ -6013,10 +6061,15 @@ cost_plus:
>>        return false;
>>  
>>      case NOT:
>> -      /* MVN.  */
>>        if (speed)
>> -	*cost += extra_cost->alu.logical;
>> -
>> +	{
>> +	  /* Vector NOT.  */
>> +	  if (VECTOR_MODE_P (mode))
>> +	    *cost += extra_cost->vect.alu;
>> +	  /* MVN.  */
>> +	  else
>> +	    *cost += extra_cost->alu.logical;
>> +	}
>>        /* The logical instruction could have the shifted register form,
>>           but the cost is the same if the shift is processed as a separate
>>           instruction, so we don't bother with it here.  */
> 
> As above.
> 
>> @@ -6055,10 +6108,15 @@ cost_plus:
>>  	  return true;
>>  	}
>>  
>> -      /* UXTB/UXTH.  */
>>        if (speed)
>> -	*cost += extra_cost->alu.extend;
>> -
>> +	{
>> +	  if (VECTOR_MODE_P (mode))
>> +	    /* UMOV.  */
>> +	    *cost += extra_cost->vect.alu;
>> +	  else
>> +	    /* UXTB/UXTH.  */
>> +	    *cost += extra_cost->alu.extend;
>> +	}
>>        return false;
>>  
>>      ca§se SIGN_EXTEND:
> 
> And again :)
> 
>> @@ -6087,10 +6150,16 @@ cost_plus:
>>  
>>        if (CONST_INT_P (op1))
>>          {
>> -	  /* LSL (immediate), UBMF, UBFIZ and friends.  These are all
>> -	     aliases.  */
>>  	  if (speed)
>> -	    *cost += extra_cost->alu.shift;
>> +	    {
>> +	      /* Vector shift (immediate).  */
>> +	      if (VECTOR_MODE_P (mode))
>> +		*cost += extra_cost->vect.alu;
>> +	      /* LSL (immediate), UBMF, UBFIZ and friends.  These are all
>> +		 aliases.  */
>> +	      else
>> +		*cost += extra_cost->alu.shift;
>> +	    }
>>  
>>            /* We can incorporate zero/sign extend for free.  */
>>            if (GET_CODE (op0) == ZERO_EXTEND
> 
> Again, the comment here makes it very difficult to spot the form of
> the if/else statement.
> 
>> @@ -6102,10 +6171,15 @@ cost_plus:
>>          }
>>        else
>>          {
>> -	  /* LSLV.  */
>>  	  if (speed)
>> -	    *cost += extra_cost->alu.shift_reg;
>> -
>> +	    {
>> +	      /* Vector shift (register).  */
>> +	      if (VECTOR_MODE_P (mode))
>> +		*cost += extra_cost->vect.alu;
>> +	      /* LSLV.  */
>> +	      else
>> +		*cost += extra_cost->alu.shift_reg;
>> +	    }
>>  	  return false;  /* All arguments need to be in registers.  */
>>          }
>>  
> 
> Likewise here.
> 
> 

Thanks James for the review. Attached patch changes this. Is this OK ?


Thanks,
Kugan

Comments

James Greenhalgh May 7, 2015, 7:24 a.m. UTC | #1
On Wed, May 06, 2015 at 03:12:33AM +0100, Kugan wrote:
> >> gcc/ChangeLog:
> >>
> >> 2015-04-24  Kugan Vivekanandarajah  <kuganv@linaro.org>
> >> 	    Jim Wilson  <jim.wilson@linaro.org>
> >>
> >> 	* config/arm/aarch-common-protos.h (struct mem_cost_table): Added
> >> 	new  fields loadv and storev.
> >> 	* config/aarch64/aarch64-cost-tables.h (thunderx_extra_costs):
> >> 	Initialize loadv and storev.
> >> 	* config/arm/aarch-cost-tables.h (generic_extra_costs): Likewise.
> >> 	(cortexa53_extra_costs): Likewise.
> >> 	(cortexa57_extra_costs): Likewise.
> >> 	(xgene1_extra_costs): Likewise.
> >> 	* config/aarch64/aarch64.c (aarch64_rtx_costs): Update vector
> >> 	rtx_costs.
> 
> Thanks James for the review. Attached patch changes this. Is this OK ?

Hi Kugan,

Thanks for sticking with it through a long review, sorry that the replies
have been patchy, I'm still travelling.

This patch is OK for trunk, with an updated ChangeLog and assuming no
regressions after a test run (And a quick check with some popular
benchmarks if possible)

Thanks, and sorry again for the delay,
James
Kugan Vivekanandarajah May 20, 2015, 3:08 a.m. UTC | #2
On 07/05/15 17:24, James Greenhalgh wrote:
> On Wed, May 06, 2015 at 03:12:33AM +0100, Kugan wrote:
>>>> gcc/ChangeLog:
>>>>
>>>> 2015-04-24  Kugan Vivekanandarajah  <kuganv@linaro.org>
>>>> 	    Jim Wilson  <jim.wilson@linaro.org>
>>>>
>>>> 	* config/arm/aarch-common-protos.h (struct mem_cost_table): Added
>>>> 	new  fields loadv and storev.
>>>> 	* config/aarch64/aarch64-cost-tables.h (thunderx_extra_costs):
>>>> 	Initialize loadv and storev.
>>>> 	* config/arm/aarch-cost-tables.h (generic_extra_costs): Likewise.
>>>> 	(cortexa53_extra_costs): Likewise.
>>>> 	(cortexa57_extra_costs): Likewise.
>>>> 	(xgene1_extra_costs): Likewise.
>>>> 	* config/aarch64/aarch64.c (aarch64_rtx_costs): Update vector
>>>> 	rtx_costs.
>>
>> Thanks James for the review. Attached patch changes this. Is this OK ?
> 
> Hi Kugan,
> 
> Thanks for sticking with it through a long review, sorry that the replies
> have been patchy, I'm still travelling.
> 
> This patch is OK for trunk, with an updated ChangeLog and assuming no
> regressions after a test run (And a quick check with some popular
> benchmarks if possible)

Committed as r223432 after fresh bootstrap and spec2k benchmarking.

Thanks,
Kugan
diff mbox

Patch

diff --git a/gcc/config/aarch64/aarch64-cost-tables.h b/gcc/config/aarch64/aarch64-cost-tables.h
index ae2b547..939125c 100644
--- a/gcc/config/aarch64/aarch64-cost-tables.h
+++ b/gcc/config/aarch64/aarch64-cost-tables.h
@@ -83,7 +83,9 @@  const struct cpu_cost_table thunderx_extra_costs =
     0,			/* N/A: Stm_regs_per_insn_subsequent.  */
     0,			/* Storef.  */
     0,			/* Stored.  */
-    COSTS_N_INSNS (1)  /* Store_unaligned.  */
+    COSTS_N_INSNS (1),	/* Store_unaligned.  */
+    COSTS_N_INSNS (1),	/* Loadv.  */
+    COSTS_N_INSNS (1)	/* Storev.  */
   },
   {
     /* FP SFmode */
diff --git a/gcc/config/aarch64/aarch64.c b/gcc/config/aarch64/aarch64.c
index cba3c1a..586caaf 100644
--- a/gcc/config/aarch64/aarch64.c
+++ b/gcc/config/aarch64/aarch64.c
@@ -5499,16 +5499,6 @@  aarch64_rtx_costs (rtx x, int code, int outer ATTRIBUTE_UNUSED,
      above this default.  */
   *cost = COSTS_N_INSNS (1);
 
-  /* TODO: The cost infrastructure currently does not handle
-     vector operations.  Assume that all vector operations
-     are equally expensive.  */
-  if (VECTOR_MODE_P (mode))
-    {
-      if (speed)
-	*cost += extra_cost->vect.alu;
-      return true;
-    }
-
   switch (code)
     {
     case SET:
@@ -5523,7 +5513,9 @@  aarch64_rtx_costs (rtx x, int code, int outer ATTRIBUTE_UNUSED,
 	  if (speed)
 	    {
 	      rtx address = XEXP (op0, 0);
-	      if (GET_MODE_CLASS (mode) == MODE_INT)
+	      if (VECTOR_MODE_P (mode))
+		*cost += extra_cost->ldst.storev;
+	      else if (GET_MODE_CLASS (mode) == MODE_INT)
 		*cost += extra_cost->ldst.store;
 	      else if (mode == SFmode)
 		*cost += extra_cost->ldst.storef;
@@ -5544,15 +5536,22 @@  aarch64_rtx_costs (rtx x, int code, int outer ATTRIBUTE_UNUSED,
 
 	  /* Fall through.  */
 	case REG:
+	  /* The cost is one per vector-register copied.  */
+	  if (VECTOR_MODE_P (GET_MODE (op0)) && REG_P (op1))
+	    {
+	      int n_minus_1 = (GET_MODE_SIZE (GET_MODE (op0)) - 1)
+			      / GET_MODE_SIZE (V4SImode);
+	      *cost = COSTS_N_INSNS (n_minus_1 + 1);
+	    }
 	  /* const0_rtx is in general free, but we will use an
 	     instruction to set a register to 0.  */
-          if (REG_P (op1) || op1 == const0_rtx)
-            {
-              /* The cost is 1 per register copied.  */
-              int n_minus_1 = (GET_MODE_SIZE (GET_MODE (op0)) - 1)
+	  else if (REG_P (op1) || op1 == const0_rtx)
+	    {
+	      /* The cost is 1 per register copied.  */
+	      int n_minus_1 = (GET_MODE_SIZE (GET_MODE (op0)) - 1)
 			      / UNITS_PER_WORD;
-              *cost = COSTS_N_INSNS (n_minus_1 + 1);
-            }
+	      *cost = COSTS_N_INSNS (n_minus_1 + 1);
+	    }
           else
 	    /* Cost is just the cost of the RHS of the set.  */
 	    *cost += rtx_cost (op1, SET, 1, speed);
@@ -5650,7 +5649,9 @@  aarch64_rtx_costs (rtx x, int code, int outer ATTRIBUTE_UNUSED,
 	     approximation for the additional cost of the addressing
 	     mode.  */
 	  rtx address = XEXP (x, 0);
-	  if (GET_MODE_CLASS (mode) == MODE_INT)
+	  if (VECTOR_MODE_P (mode))
+	    *cost += extra_cost->ldst.loadv;
+	  else if (GET_MODE_CLASS (mode) == MODE_INT)
 	    *cost += extra_cost->ldst.load;
 	  else if (mode == SFmode)
 	    *cost += extra_cost->ldst.loadf;
@@ -5667,6 +5668,16 @@  aarch64_rtx_costs (rtx x, int code, int outer ATTRIBUTE_UNUSED,
     case NEG:
       op0 = XEXP (x, 0);
 
+      if (VECTOR_MODE_P (mode))
+	{
+	  if (speed)
+	    {
+	      /* FNEG.  */
+	      *cost += extra_cost->vect.alu;
+	    }
+	  return false;
+	}
+
       if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT)
        {
           if (GET_RTX_CLASS (GET_CODE (op0)) == RTX_COMPARE
@@ -5705,7 +5716,12 @@  aarch64_rtx_costs (rtx x, int code, int outer ATTRIBUTE_UNUSED,
     case CLRSB:
     case CLZ:
       if (speed)
-        *cost += extra_cost->alu.clz;
+	{
+	  if (VECTOR_MODE_P (mode))
+	    *cost += extra_cost->vect.alu;
+	  else
+	    *cost += extra_cost->alu.clz;
+	}
 
       return false;
 
@@ -5790,6 +5806,20 @@  aarch64_rtx_costs (rtx x, int code, int outer ATTRIBUTE_UNUSED,
           return false;
         }
 
+      if (VECTOR_MODE_P (mode))
+	{
+	  /* Vector compare.  */
+	  if (speed)
+	    *cost += extra_cost->vect.alu;
+
+	  if (aarch64_float_const_zero_rtx_p (op1))
+	    {
+	      /* Vector cm (eq|ge|gt|lt|le) supports constant 0.0 for no extra
+		 cost.  */
+	      return true;
+	    }
+	  return false;
+	}
       return false;
 
     case MINUS:
@@ -5844,12 +5874,21 @@  cost_minus:
 
 	if (speed)
 	  {
-	    if (GET_MODE_CLASS (mode) == MODE_INT)
-	      /* SUB(S).  */
-	      *cost += extra_cost->alu.arith;
+	    if (VECTOR_MODE_P (mode))
+	      {
+		/* Vector SUB.  */
+		*cost += extra_cost->vect.alu;
+	      }
+	    else if (GET_MODE_CLASS (mode) == MODE_INT)
+	      {
+		/* SUB(S).  */
+		*cost += extra_cost->alu.arith;
+	      }
 	    else if (GET_MODE_CLASS (mode) == MODE_FLOAT)
-	      /* FSUB.  */
-	      *cost += extra_cost->fp[mode == DFmode].addsub;
+	      {
+		/* FSUB.  */
+		*cost += extra_cost->fp[mode == DFmode].addsub;
+	      }
 	  }
 	return true;
       }
@@ -5913,12 +5952,21 @@  cost_plus:
 
 	if (speed)
 	  {
-	    if (GET_MODE_CLASS (mode) == MODE_INT)
-	      /* ADD.  */
-	      *cost += extra_cost->alu.arith;
+	    if (VECTOR_MODE_P (mode))
+	      {
+		/* Vector ADD.  */
+		*cost += extra_cost->vect.alu;
+	      }
+	    else if (GET_MODE_CLASS (mode) == MODE_INT)
+	      {
+		/* ADD.  */
+		*cost += extra_cost->alu.arith;
+	      }
 	    else if (GET_MODE_CLASS (mode) == MODE_FLOAT)
-	      /* FADD.  */
-	      *cost += extra_cost->fp[mode == DFmode].addsub;
+	      {
+		/* FADD.  */
+		*cost += extra_cost->fp[mode == DFmode].addsub;
+	      }
 	  }
 	return true;
       }
@@ -5927,8 +5975,12 @@  cost_plus:
       *cost = COSTS_N_INSNS (1);
 
       if (speed)
-        *cost += extra_cost->alu.rev;
-
+	{
+	  if (VECTOR_MODE_P (mode))
+	    *cost += extra_cost->vect.alu;
+	  else
+	    *cost += extra_cost->alu.rev;
+	}
       return false;
 
     case IOR:
@@ -5936,10 +5988,14 @@  cost_plus:
         {
           *cost = COSTS_N_INSNS (1);
 
-          if (speed)
-            *cost += extra_cost->alu.rev;
-
-          return true;
+	  if (speed)
+	    {
+	      if (VECTOR_MODE_P (mode))
+		*cost += extra_cost->vect.alu;
+	      else
+		*cost += extra_cost->alu.rev;
+	    }
+	  return true;
         }
     /* Fall through.  */
     case XOR:
@@ -5948,6 +6004,13 @@  cost_plus:
       op0 = XEXP (x, 0);
       op1 = XEXP (x, 1);
 
+      if (VECTOR_MODE_P (mode))
+	{
+	  if (speed)
+	    *cost += extra_cost->vect.alu;
+	  return true;
+	}
+
       if (code == AND
           && GET_CODE (op0) == MULT
           && CONST_INT_P (XEXP (op0, 1))
@@ -6013,10 +6076,19 @@  cost_plus:
       return false;
 
     case NOT:
-      /* MVN.  */
       if (speed)
-	*cost += extra_cost->alu.logical;
-
+	{
+	  if (VECTOR_MODE_P (mode))
+	    {
+	      /* Vector NOT.  */
+	      *cost += extra_cost->vect.alu;
+	    }
+	  else
+	    {
+	      /* MVN.  */
+	      *cost += extra_cost->alu.logical;
+	    }
+	}
       /* The logical instruction could have the shifted register form,
          but the cost is the same if the shift is processed as a separate
          instruction, so we don't bother with it here.  */
@@ -6055,10 +6127,19 @@  cost_plus:
 	  return true;
 	}
 
-      /* UXTB/UXTH.  */
       if (speed)
-	*cost += extra_cost->alu.extend;
-
+	{
+	  if (VECTOR_MODE_P (mode))
+	    {
+	      /* UMOV.  */
+	      *cost += extra_cost->vect.alu;
+	    }
+	  else
+	    {
+	      /* UXTB/UXTH.  */
+	      *cost += extra_cost->alu.extend;
+	    }
+	}
       return false;
 
     case SIGN_EXTEND:
@@ -6078,7 +6159,12 @@  cost_plus:
 	}
 
       if (speed)
-	*cost += extra_cost->alu.extend;
+	{
+	  if (VECTOR_MODE_P (mode))
+	    *cost += extra_cost->vect.alu;
+	  else
+	    *cost += extra_cost->alu.extend;
+	}
       return false;
 
     case ASHIFT:
@@ -6087,10 +6173,20 @@  cost_plus:
 
       if (CONST_INT_P (op1))
         {
-	  /* LSL (immediate), UBMF, UBFIZ and friends.  These are all
-	     aliases.  */
 	  if (speed)
-	    *cost += extra_cost->alu.shift;
+	    {
+	      if (VECTOR_MODE_P (mode))
+		{
+		  /* Vector shift (immediate).  */
+		  *cost += extra_cost->vect.alu;
+		}
+	      else
+		{
+		  /* LSL (immediate), UBMF, UBFIZ and friends.  These are all
+		     aliases.  */
+		  *cost += extra_cost->alu.shift;
+		}
+	    }
 
           /* We can incorporate zero/sign extend for free.  */
           if (GET_CODE (op0) == ZERO_EXTEND
@@ -6102,10 +6198,19 @@  cost_plus:
         }
       else
         {
-	  /* LSLV.  */
 	  if (speed)
-	    *cost += extra_cost->alu.shift_reg;
-
+	    {
+	      if (VECTOR_MODE_P (mode))
+		{
+		  /* Vector shift (register).  */
+		  *cost += extra_cost->vect.alu;
+		}
+	      else
+		{
+		  /* LSLV.  */
+		  *cost += extra_cost->alu.shift_reg;
+		}
+	    }
 	  return false;  /* All arguments need to be in registers.  */
         }
 
@@ -6120,7 +6225,12 @@  cost_plus:
 	{
 	  /* ASR (immediate) and friends.  */
 	  if (speed)
-	    *cost += extra_cost->alu.shift;
+	    {
+	      if (VECTOR_MODE_P (mode))
+		*cost += extra_cost->vect.alu;
+	      else
+		*cost += extra_cost->alu.shift;
+	    }
 
 	  *cost += rtx_cost (op0, (enum rtx_code) code, 0, speed);
 	  return true;
@@ -6130,8 +6240,12 @@  cost_plus:
 
 	  /* ASR (register) and friends.  */
 	  if (speed)
-	    *cost += extra_cost->alu.shift_reg;
-
+	    {
+	      if (VECTOR_MODE_P (mode))
+		*cost += extra_cost->vect.alu;
+	      else
+		*cost += extra_cost->alu.shift_reg;
+	    }
 	  return false;  /* All arguments need to be in registers.  */
 	}
 
@@ -6179,7 +6293,12 @@  cost_plus:
     case SIGN_EXTRACT:
       /* UBFX/SBFX.  */
       if (speed)
-	*cost += extra_cost->alu.bfx;
+	{
+	  if (VECTOR_MODE_P (mode))
+	    *cost += extra_cost->vect.alu;
+	  else
+	    *cost += extra_cost->alu.bfx;
+	}
 
       /* We can trust that the immediates used will be correct (there
 	 are no by-register forms), so we need only cost op0.  */
@@ -6196,7 +6315,9 @@  cost_plus:
     case UMOD:
       if (speed)
 	{
-	  if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT)
+	  if (VECTOR_MODE_P (mode))
+	    *cost += extra_cost->vect.alu;
+	  else if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT)
 	    *cost += (extra_cost->mult[GET_MODE (x) == DImode].add
 		      + extra_cost->mult[GET_MODE (x) == DImode].idiv);
 	  else if (GET_MODE (x) == DFmode)
@@ -6213,7 +6334,9 @@  cost_plus:
     case SQRT:
       if (speed)
 	{
-	  if (GET_MODE_CLASS (mode) == MODE_INT)
+	  if (VECTOR_MODE_P (mode))
+	    *cost += extra_cost->vect.alu;
+	  else if (GET_MODE_CLASS (mode) == MODE_INT)
 	    /* There is no integer SQRT, so only DIV and UDIV can get
 	       here.  */
 	    *cost += extra_cost->mult[mode == DImode].idiv;
@@ -6245,7 +6368,12 @@  cost_plus:
       op2 = XEXP (x, 2);
 
       if (speed)
-	*cost += extra_cost->fp[mode == DFmode].fma;
+	{
+	  if (VECTOR_MODE_P (mode))
+	    *cost += extra_cost->vect.alu;
+	  else
+	    *cost += extra_cost->fp[mode == DFmode].fma;
+	}
 
       /* FMSUB, FNMADD, and FNMSUB are free.  */
       if (GET_CODE (op0) == NEG)
@@ -6285,12 +6413,28 @@  cost_plus:
 
     case FLOAT_EXTEND:
       if (speed)
-	*cost += extra_cost->fp[mode == DFmode].widen;
+	{
+	  if (VECTOR_MODE_P (mode))
+	    {
+	      /*Vector truncate.  */
+	      *cost += extra_cost->vect.alu;
+	    }
+	  else
+	    *cost += extra_cost->fp[mode == DFmode].widen;
+	}
       return false;
 
     case FLOAT_TRUNCATE:
       if (speed)
-	*cost += extra_cost->fp[mode == DFmode].narrow;
+	{
+	  if (VECTOR_MODE_P (mode))
+	    {
+	      /*Vector conversion.  */
+	      *cost += extra_cost->vect.alu;
+	    }
+	  else
+	    *cost += extra_cost->fp[mode == DFmode].narrow;
+	}
       return false;
 
     case FIX:
@@ -6311,13 +6455,23 @@  cost_plus:
         }
 
       if (speed)
-        *cost += extra_cost->fp[GET_MODE (x) == DFmode].toint;
-
+	{
+	  if (VECTOR_MODE_P (mode))
+	    *cost += extra_cost->vect.alu;
+	  else
+	    *cost += extra_cost->fp[GET_MODE (x) == DFmode].toint;
+	}
       *cost += rtx_cost (x, (enum rtx_code) code, 0, speed);
       return true;
 
     case ABS:
-      if (GET_MODE_CLASS (mode) == MODE_FLOAT)
+      if (VECTOR_MODE_P (mode))
+	{
+	  /* ABS (vector).  */
+	  if (speed)
+	    *cost += extra_cost->vect.alu;
+	}
+      else if (GET_MODE_CLASS (mode) == MODE_FLOAT)
 	{
 	  /* FABS and FNEG are analogous.  */
 	  if (speed)
@@ -6338,10 +6492,15 @@  cost_plus:
     case SMIN:
       if (speed)
 	{
-	  /* FMAXNM/FMINNM/FMAX/FMIN.
-	     TODO: This may not be accurate for all implementations, but
-	     we do not model this in the cost tables.  */
-	  *cost += extra_cost->fp[mode == DFmode].addsub;
+	  if (VECTOR_MODE_P (mode))
+	    *cost += extra_cost->vect.alu;
+	  else
+	    {
+	      /* FMAXNM/FMINNM/FMAX/FMIN.
+	         TODO: This may not be accurate for all implementations, but
+	         we do not model this in the cost tables.  */
+	      *cost += extra_cost->fp[mode == DFmode].addsub;
+	    }
 	}
       return false;
 
diff --git a/gcc/config/arm/aarch-common-protos.h b/gcc/config/arm/aarch-common-protos.h
index 3ee7ebf..29f7c99 100644
--- a/gcc/config/arm/aarch-common-protos.h
+++ b/gcc/config/arm/aarch-common-protos.h
@@ -102,6 +102,8 @@  struct mem_cost_table
   const int storef;		/* SFmode.  */
   const int stored;		/* DFmode.  */
   const int store_unaligned;	/* Extra for unaligned stores.  */
+  const int loadv;		/* Vector load.  */
+  const int storev;		/* Vector store.  */
 };
 
 struct fp_cost_table
diff --git a/gcc/config/arm/aarch-cost-tables.h b/gcc/config/arm/aarch-cost-tables.h
index 05e96a9..809feb8 100644
--- a/gcc/config/arm/aarch-cost-tables.h
+++ b/gcc/config/arm/aarch-cost-tables.h
@@ -81,7 +81,9 @@  const struct cpu_cost_table generic_extra_costs =
     1,			/* stm_regs_per_insn_subsequent.  */
     COSTS_N_INSNS (2),	/* storef.  */
     COSTS_N_INSNS (3),	/* stored.  */
-    COSTS_N_INSNS (1)  /* store_unaligned.  */
+    COSTS_N_INSNS (1),	/* store_unaligned.  */
+    COSTS_N_INSNS (1),	/* loadv.  */
+    COSTS_N_INSNS (1)	/* storev.  */
   },
   {
     /* FP SFmode */
@@ -182,7 +184,9 @@  const struct cpu_cost_table cortexa53_extra_costs =
     2,				/* stm_regs_per_insn_subsequent.  */
     0,				/* storef.  */
     0,				/* stored.  */
-    COSTS_N_INSNS (1)		/* store_unaligned.  */
+    COSTS_N_INSNS (1),		/* store_unaligned.  */
+    COSTS_N_INSNS (1),		/* loadv.  */
+    COSTS_N_INSNS (1)		/* storev.  */
   },
   {
     /* FP SFmode */
@@ -283,7 +287,9 @@  const struct cpu_cost_table cortexa57_extra_costs =
     2,                         /* stm_regs_per_insn_subsequent.  */
     0,                         /* storef.  */
     0,                         /* stored.  */
-    COSTS_N_INSNS (1)          /* store_unaligned.  */
+    COSTS_N_INSNS (1),         /* store_unaligned.  */
+    COSTS_N_INSNS (1),         /* loadv.  */
+    COSTS_N_INSNS (1)          /* storev.  */
   },
   {
     /* FP SFmode */
@@ -385,6 +391,8 @@  const struct cpu_cost_table xgene1_extra_costs =
     0,                         /* storef.  */
     0,                         /* stored.  */
     0,                         /* store_unaligned.  */
+    COSTS_N_INSNS (1),         /* loadv.  */
+    COSTS_N_INSNS (1)          /* storev.  */
   },
   {
     /* FP SFmode */