@@ -256,12 +256,14 @@ struct sve_vec_cost : simd_vec_cost
unsigned int clast_cost,
unsigned int fadda_f16_cost,
unsigned int fadda_f32_cost,
- unsigned int fadda_f64_cost)
+ unsigned int fadda_f64_cost,
+ unsigned int scatter_store_elt_cost)
: simd_vec_cost (base),
clast_cost (clast_cost),
fadda_f16_cost (fadda_f16_cost),
fadda_f32_cost (fadda_f32_cost),
- fadda_f64_cost (fadda_f64_cost)
+ fadda_f64_cost (fadda_f64_cost),
+ scatter_store_elt_cost (scatter_store_elt_cost)
{}
/* The cost of a vector-to-scalar CLASTA or CLASTB instruction,
@@ -274,6 +276,9 @@ struct sve_vec_cost : simd_vec_cost
const int fadda_f16_cost;
const int fadda_f32_cost;
const int fadda_f64_cost;
+
+ /* The per-element cost of a scatter store. */
+ const int scatter_store_elt_cost;
};
/* Cost for vector insn classes. */
@@ -638,7 +638,8 @@ static const sve_vec_cost generic_sve_vector_cost =
2, /* clast_cost */
2, /* fadda_f16_cost */
2, /* fadda_f32_cost */
- 2 /* fadda_f64_cost */
+ 2, /* fadda_f64_cost */
+ 1 /* scatter_store_elt_cost */
};
/* Generic costs for vector insn classes. */
@@ -705,7 +706,8 @@ static const sve_vec_cost a64fx_sve_vector_cost =
13, /* clast_cost */
13, /* fadda_f16_cost */
13, /* fadda_f32_cost */
- 13 /* fadda_f64_cost */
+ 13, /* fadda_f64_cost */
+ 1 /* scatter_store_elt_cost */
};
static const struct cpu_vector_cost a64fx_vector_cost =
@@ -14279,6 +14281,13 @@ aarch64_detect_vector_stmt_subtype (vec_info *vinfo, vect_cost_for_stmt kind,
&& DR_IS_WRITE (STMT_VINFO_DATA_REF (stmt_info)))
return simd_costs->store_elt_extra_cost;
+ /* Detect cases in which a scalar_store is really storing one element
+ in a scatter operation. */
+ if (kind == scalar_store
+ && sve_costs
+ && STMT_VINFO_MEMORY_ACCESS_TYPE (stmt_info) == VMAT_GATHER_SCATTER)
+ return sve_costs->scatter_store_elt_cost;
+
/* Detect cases in which vec_to_scalar represents an in-loop reduction. */
if (kind == vec_to_scalar
&& where == vect_body