diff mbox series

Fix type handling in undistribute_bitref_for_vector (PR tree-optimization/92618)

Message ID 20191122142505.GD2466@tucnak
State New
Headers show
Series Fix type handling in undistribute_bitref_for_vector (PR tree-optimization/92618) | expand

Commit Message

Jakub Jelinek Nov. 22, 2019, 2:25 p.m. UTC
Hi!

The undistribute_bitref_for_vector function assumes that BIT_FIELD_REFs
with vector first argument must have always type of the vector element,
but that is not the case, BIT_FIELD_REF can extract any type with the
corresponding size from any other vector.

So, without this patch, when it saw e.g. addition in unsigned long long
type, even when the vector was 2x long long, it computed the addition
in 2x long long vector (in theory could run into undefined overflows)
and fail on IL checking because conversion from long long to unsigned long
long is not useless.  Even worse, for double addition with the vector 2x
long long, it would again perform the addition in 2x long long vector type,
which is completely wrong.

The following patch determines the right vector type and adds VCE around
the operands when needed.

Bootstrapped/regtested on x86_64-linux and i686-linux, ok for trunk?

2019-11-22  Jakub Jelinek  <jakub@redhat.com>

	PR tree-optimization/92618
	* tree-ssa-reassoc.c (v_info): Change from auto_vec to a struct
	containing the auto_vec and a tree.
	(undistribute_bitref_for_vector): Handle the case when element type
	of vec is not the same as type of the BIT_FIELD_REF.  Formatting
	fixes.

	* gcc.c-torture/compile/pr92618.c: New test.
	* gcc.c-torture/execute/pr92618.c: New test.


	Jakub

Comments

Richard Biener Nov. 22, 2019, 5:41 p.m. UTC | #1
On November 22, 2019 3:25:05 PM GMT+01:00, Jakub Jelinek <jakub@redhat.com> wrote:
>Hi!
>
>The undistribute_bitref_for_vector function assumes that BIT_FIELD_REFs
>with vector first argument must have always type of the vector element,
>but that is not the case, BIT_FIELD_REF can extract any type with the
>corresponding size from any other vector.
>
>So, without this patch, when it saw e.g. addition in unsigned long long
>type, even when the vector was 2x long long, it computed the addition
>in 2x long long vector (in theory could run into undefined overflows)
>and fail on IL checking because conversion from long long to unsigned
>long
>long is not useless.  Even worse, for double addition with the vector
>2x
>long long, it would again perform the addition in 2x long long vector
>type,
>which is completely wrong.
>
>The following patch determines the right vector type and adds VCE
>around
>the operands when needed.
>
>Bootstrapped/regtested on x86_64-linux and i686-linux, ok for trunk?

Ok. 

Thanks, 
Richard. 

>2019-11-22  Jakub Jelinek  <jakub@redhat.com>
>
>	PR tree-optimization/92618
>	* tree-ssa-reassoc.c (v_info): Change from auto_vec to a struct
>	containing the auto_vec and a tree.
>	(undistribute_bitref_for_vector): Handle the case when element type
>	of vec is not the same as type of the BIT_FIELD_REF.  Formatting
>	fixes.
>
>	* gcc.c-torture/compile/pr92618.c: New test.
>	* gcc.c-torture/execute/pr92618.c: New test.
>
>--- gcc/tree-ssa-reassoc.c.jj	2019-11-13 10:54:56.038884580 +0100
>+++ gcc/tree-ssa-reassoc.c	2019-11-22 10:57:24.956931307 +0100
>@@ -1775,7 +1775,10 @@ undistribute_ops_list (enum tree_code op
>    first: element index for each relevant BIT_FIELD_REF.
>    second: the index of vec ops* for each relevant BIT_FIELD_REF.  */
> typedef std::pair<unsigned, unsigned> v_info_elem;
>-typedef auto_vec<v_info_elem, 32> v_info;
>+struct v_info {
>+  tree vec_type;
>+  auto_vec<v_info_elem, 32> vec;
>+};
> typedef v_info *v_info_ptr;
> 
>/* Comparison function for qsort on VECTOR SSA_NAME trees by machine
>mode.  */
>@@ -1840,8 +1843,11 @@ undistribute_bitref_for_vector (enum tre
>   if (ops->length () <= 1)
>     return false;
> 
>-  if (opcode != PLUS_EXPR && opcode != MULT_EXPR && opcode !=
>BIT_XOR_EXPR
>-      && opcode != BIT_IOR_EXPR && opcode != BIT_AND_EXPR)
>+  if (opcode != PLUS_EXPR
>+      && opcode != MULT_EXPR
>+      && opcode != BIT_XOR_EXPR
>+      && opcode != BIT_IOR_EXPR
>+      && opcode != BIT_AND_EXPR)
>     return false;
> 
>   hash_map<tree, v_info_ptr> v_info_map;
>@@ -1879,9 +1885,45 @@ undistribute_bitref_for_vector (enum tre
>       if (!TYPE_VECTOR_SUBPARTS (vec_type).is_constant ())
> 	continue;
> 
>+      if (VECTOR_TYPE_P (TREE_TYPE (rhs))
>+	  || !is_a <scalar_mode> (TYPE_MODE (TREE_TYPE (rhs))))
>+	continue;
>+
>+      /* The type of BIT_FIELD_REF might not be equal to the element
>type of
>+	 the vector.  We want to use a vector type with element type the
>+	 same as the BIT_FIELD_REF and size the same as TREE_TYPE (vec).  */
>+      if (!useless_type_conversion_p (TREE_TYPE (rhs), TREE_TYPE
>(vec_type)))
>+	{
>+	  machine_mode simd_mode;
>+	  unsigned HOST_WIDE_INT size, nunits;
>+	  unsigned HOST_WIDE_INT elem_size
>+	    = tree_to_uhwi (TYPE_SIZE (TREE_TYPE (rhs)));
>+	  if (!GET_MODE_BITSIZE (TYPE_MODE (vec_type)).is_constant (&size))
>+	    continue;
>+	  if (size <= elem_size || (size % elem_size) != 0)
>+	    continue;
>+	  nunits = size / elem_size;
>+	  if (!mode_for_vector (SCALAR_TYPE_MODE (TREE_TYPE (rhs)),
>+				nunits).exists (&simd_mode))
>+	    continue;
>+	  vec_type = build_vector_type_for_mode (TREE_TYPE (rhs), simd_mode);
>+
>+	  /* Ignore it if target machine can't support this VECTOR type.  */
>+	  if (!VECTOR_MODE_P (TYPE_MODE (vec_type)))
>+	    continue;
>+
>+	  /* Check const vector type, constrain BIT_FIELD_REF offset and
>+	     size.  */
>+	  if (!TYPE_VECTOR_SUBPARTS (vec_type).is_constant ())
>+	    continue;
>+
>+	  if (maybe_ne (GET_MODE_SIZE (TYPE_MODE (vec_type)),
>+			GET_MODE_SIZE (TYPE_MODE (TREE_TYPE (vec)))))
>+	    continue;
>+	}
>+
>       tree elem_type = TREE_TYPE (vec_type);
>-      unsigned HOST_WIDE_INT elem_size
>-	= TREE_INT_CST_LOW (TYPE_SIZE (elem_type));
>+      unsigned HOST_WIDE_INT elem_size = tree_to_uhwi (TYPE_SIZE
>(elem_type));
>       if (maybe_ne (bit_field_size (rhs), elem_size))
> 	continue;
> 
>@@ -1898,8 +1940,13 @@ undistribute_bitref_for_vector (enum tre
>       bool existed;
>       v_info_ptr &info = v_info_map.get_or_insert (vec, &existed);
>       if (!existed)
>-	info = new v_info;
>-      info->safe_push (std::make_pair (idx, i));
>+	{
>+	  info = new v_info;
>+	  info->vec_type = vec_type;
>+	}
>+      else if (!types_compatible_p (vec_type, info->vec_type))
>+	continue;
>+      info->vec.safe_push (std::make_pair (idx, i));
>     }
> 
>   /* At least two VECTOR to combine.  */
>@@ -1919,14 +1966,15 @@ undistribute_bitref_for_vector (enum tre
>     {
>       tree cand_vec = (*it).first;
>       v_info_ptr cand_info = (*it).second;
>-      unsigned int num_elems = VECTOR_CST_NELTS (cand_vec).to_constant
>();
>-      if (cand_info->length () != num_elems)
>+      unsigned int num_elems
>+	= TYPE_VECTOR_SUBPARTS (cand_info->vec_type).to_constant ();
>+      if (cand_info->vec.length () != num_elems)
> 	continue;
>       sbitmap holes = sbitmap_alloc (num_elems);
>       bitmap_ones (holes);
>       bool valid = true;
>       v_info_elem *curr;
>-      FOR_EACH_VEC_ELT (*cand_info, i, curr)
>+      FOR_EACH_VEC_ELT (cand_info->vec, i, curr)
> 	{
> 	  if (!bitmap_bit_p (holes, curr->first))
> 	    {
>@@ -1962,25 +2010,53 @@ undistribute_bitref_for_vector (enum tre
> 
>       unsigned int idx, j;
>       gimple *sum = NULL;
>-      v_info_ptr info_ptr;
>       tree sum_vec = tvec;
>+      v_info_ptr info_ptr = *(v_info_map.get (tvec));
>       v_info_elem *elem;
>+      tree vec_type = info_ptr->vec_type;
> 
>       /* Build the sum for all candidates with same mode.  */
>       do
> 	{
>-	  sum = build_and_add_sum (TREE_TYPE (sum_vec), sum_vec,
>+	  sum = build_and_add_sum (vec_type, sum_vec,
> 				   valid_vecs[i + 1], opcode);
>+	  if (!useless_type_conversion_p (vec_type,
>+					  TREE_TYPE (valid_vecs[i + 1])))
>+	    {
>+	      /* Update the operands only after build_and_add_sum,
>+		 so that we don't have to repeat the placement algorithm
>+		 of build_and_add_sum.  */
>+	      gimple_stmt_iterator gsi = gsi_for_stmt (sum);
>+	      tree vce = build1 (VIEW_CONVERT_EXPR, vec_type,
>+				 valid_vecs[i + 1]);
>+	      tree lhs = make_ssa_name (vec_type);
>+	      gimple *g = gimple_build_assign (lhs, VIEW_CONVERT_EXPR, vce);
>+	      gimple_set_uid (g, gimple_uid (sum));
>+	      gsi_insert_before (&gsi, g, GSI_NEW_STMT);
>+	      gimple_assign_set_rhs2 (sum, lhs);
>+	      if (sum_vec == tvec)
>+		{
>+		  vce = build1 (VIEW_CONVERT_EXPR, vec_type, sum_vec);
>+		  lhs = make_ssa_name (vec_type);
>+		  g = gimple_build_assign (lhs, VIEW_CONVERT_EXPR, vce);
>+		  gimple_set_uid (g, gimple_uid (sum));
>+		  gsi_insert_before (&gsi, g, GSI_NEW_STMT);
>+		  gimple_assign_set_rhs1 (sum, lhs);
>+		}
>+	      update_stmt (sum);
>+	    }
> 	  sum_vec = gimple_get_lhs (sum);
> 	  info_ptr = *(v_info_map.get (valid_vecs[i + 1]));
>+	  gcc_assert (types_compatible_p (vec_type, info_ptr->vec_type));
> 	  /* Update those related ops of current candidate VECTOR.  */
>-	  FOR_EACH_VEC_ELT (*info_ptr, j, elem)
>+	  FOR_EACH_VEC_ELT (info_ptr->vec, j, elem)
> 	    {
> 	      idx = elem->second;
> 	      gimple *def = SSA_NAME_DEF_STMT ((*ops)[idx]->op);
> 	      /* Set this then op definition will get DCEd later.  */
> 	      gimple_set_visited (def, true);
>-	      if (opcode == PLUS_EXPR || opcode == BIT_XOR_EXPR
>+	      if (opcode == PLUS_EXPR
>+		  || opcode == BIT_XOR_EXPR
> 		  || opcode == BIT_IOR_EXPR)
> 		(*ops)[idx]->op = build_zero_cst (TREE_TYPE ((*ops)[idx]->op));
> 	      else if (opcode == MULT_EXPR)
>@@ -2007,16 +2083,16 @@ undistribute_bitref_for_vector (enum tre
>          BIT_FIELD_REF statements accordingly.  */
>       info_ptr = *(v_info_map.get (tvec));
>       gcc_assert (sum);
>-      tree elem_type = TREE_TYPE (TREE_TYPE (tvec));
>-      FOR_EACH_VEC_ELT (*info_ptr, j, elem)
>+      tree elem_type = TREE_TYPE (vec_type);
>+      FOR_EACH_VEC_ELT (info_ptr->vec, j, elem)
> 	{
> 	  idx = elem->second;
> 	  tree dst = make_ssa_name (elem_type);
>-	  gimple *gs = gimple_build_assign (
>-	    dst, BIT_FIELD_REF,
>-	    build3 (BIT_FIELD_REF, elem_type, sum_vec, TYPE_SIZE (elem_type),
>-		    bitsize_int (elem->first
>-				 * tree_to_uhwi (TYPE_SIZE (elem_type)))));
>+	  tree pos = bitsize_int (elem->first
>+				  * tree_to_uhwi (TYPE_SIZE (elem_type)));
>+	  tree bfr = build3 (BIT_FIELD_REF, elem_type, sum_vec,
>+			     TYPE_SIZE (elem_type), pos);
>+	  gimple *gs = gimple_build_assign (dst, BIT_FIELD_REF, bfr);
> 	  insert_stmt_after (gs, sum);
> 	  gimple *def = SSA_NAME_DEF_STMT ((*ops)[idx]->op);
> 	  /* Set this then op definition will get DCEd later.  */
>--- gcc/testsuite/gcc.c-torture/compile/pr92618.c.jj	2019-11-22
>11:11:52.971470532 +0100
>+++ gcc/testsuite/gcc.c-torture/compile/pr92618.c	2019-11-22
>11:11:39.434680213 +0100
>@@ -0,0 +1,67 @@
>+/* PR tree-optimization/92618 */
>+
>+typedef long long __m128i __attribute__((__may_alias__,
>__vector_size__(2 * sizeof (long long))));
>+typedef long long __m256i __attribute__((__may_alias__,
>__vector_size__(4 * sizeof (long long))));
>+typedef long long __m512i __attribute__((__may_alias__,
>__vector_size__(8 * sizeof (long long))));
>+
>+double a[32];
>+unsigned long long b[32];
>+__m128i bar (void);
>+__m256i qux (void);
>+__m512i corge (void);
>+
>+void
>+foo (unsigned long long *x)
>+{
>+  __m128i c = bar ();
>+  __m128i d = bar ();
>+  __m256i e = qux ();
>+  __m256i f = qux ();
>+  __m256i g = qux ();
>+  __m512i h = corge ();
>+  __m512i i = corge ();
>+  *(__m128i *) &b[0] = c;
>+  *(__m128i *) &b[2] = d;
>+  *(__m256i *) &b[4] = e;
>+  *(__m256i *) &b[8] = f;
>+  *(__m256i *) &b[12] = g;
>+  *(__m512i *) &b[16] = h;
>+  *(__m512i *) &b[24] = i;
>+  *x = b[0] + b[1] + b[2] + b[3]
>+     + b[4] + b[5] + b[6] + b[7]
>+     + b[8] + b[9] + b[10] + b[11]
>+     + b[12] + b[13] + b[14] + b[15]
>+     + b[16] + b[17] + b[18] + b[19]
>+     + b[20] + b[21] + b[22] + b[23]
>+     + b[24] + b[25] + b[26] + b[27]
>+     + b[28] + b[29] + b[30] + b[31];
>+}
>+
>+void
>+baz (double *x)
>+{
>+#if __SIZEOF_LONG_LONG__ == __SIZEOF_DOUBLE__
>+  __m128i c = bar ();
>+  __m128i d = bar ();
>+  __m256i e = qux ();
>+  __m256i f = qux ();
>+  __m256i g = qux ();
>+  __m512i h = corge ();
>+  __m512i i = corge ();
>+  *(__m128i *) &a[0] = c;
>+  *(__m128i *) &a[2] = d;
>+  *(__m256i *) &a[4] = e;
>+  *(__m256i *) &a[8] = f;
>+  *(__m256i *) &a[12] = g;
>+  *(__m512i *) &a[16] = h;
>+  *(__m512i *) &a[24] = i;
>+  *x = a[0] + a[1] + a[2] + a[3]
>+     + a[4] + a[5] + a[6] + a[7]
>+     + a[8] + a[9] + a[10] + a[11]
>+     + a[12] + a[13] + a[14] + a[15]
>+     + a[16] + a[17] + a[18] + a[19]
>+     + a[20] + a[21] + a[22] + a[23]
>+     + a[24] + a[25] + a[26] + a[27]
>+     + a[28] + a[29] + a[30] + a[31];
>+#endif
>+}
>--- gcc/testsuite/gcc.c-torture/execute/pr92618.c.jj	2019-11-22
>11:11:10.543127733 +0100
>+++ gcc/testsuite/gcc.c-torture/execute/pr92618.c	2019-11-22
>11:10:56.930338594 +0100
>@@ -0,0 +1,63 @@
>+/* PR tree-optimization/92618 */
>+
>+typedef long long __m128i __attribute__((__may_alias__,
>__vector_size__(2 * sizeof (long long))));
>+
>+double a[4];
>+unsigned long long b[4];
>+
>+__attribute__((noipa)) __m128i
>+bar (void)
>+{
>+  static int cnt;
>+  cnt += 2;
>+  return (__m128i) { cnt, cnt + 1 };
>+}
>+
>+#if __SIZEOF_LONG_LONG__ == __SIZEOF_DOUBLE__
>+typedef double __m128d __attribute__((__may_alias__, __vector_size__(2
>* sizeof (double))));
>+
>+__attribute__((noipa)) __m128i
>+qux (void)
>+{
>+  static double cnt;
>+  cnt += 2.0;
>+  return (__m128i) (__m128d) { cnt, cnt + 1.0 };
>+}
>+#endif
>+
>+void
>+foo (unsigned long long *x)
>+{
>+  __m128i c = bar ();
>+  __m128i d = bar ();
>+  *(__m128i *) &b[0] = c;
>+  *(__m128i *) &b[2] = d;
>+  *x = b[0] + b[1] + b[2] + b[3];
>+}
>+
>+void
>+baz (double *x)
>+{
>+#if __SIZEOF_LONG_LONG__ == __SIZEOF_DOUBLE__
>+  __m128i c = qux ();
>+  __m128i d = qux ();
>+  *(__m128i *) &a[0] = c;
>+  *(__m128i *) &a[2] = d;
>+  *x = a[0] + a[1] + a[2] + a[3];
>+#endif
>+}
>+
>+int
>+main ()
>+{
>+  unsigned long long c = 0;
>+  foo (&c);
>+  if (c != 2 + 3 + 4 + 5)
>+    __builtin_abort ();
>+#if __SIZEOF_LONG_LONG__ == __SIZEOF_DOUBLE__
>+  double d = 0.0;
>+  baz (&d);
>+  if (d != 2.0 + 3.0 + 4.0 + 5.0)
>+    __builtin_abort ();
>+#endif
>+}
>
>	Jakub
diff mbox series

Patch

--- gcc/tree-ssa-reassoc.c.jj	2019-11-13 10:54:56.038884580 +0100
+++ gcc/tree-ssa-reassoc.c	2019-11-22 10:57:24.956931307 +0100
@@ -1775,7 +1775,10 @@  undistribute_ops_list (enum tree_code op
    first: element index for each relevant BIT_FIELD_REF.
    second: the index of vec ops* for each relevant BIT_FIELD_REF.  */
 typedef std::pair<unsigned, unsigned> v_info_elem;
-typedef auto_vec<v_info_elem, 32> v_info;
+struct v_info {
+  tree vec_type;
+  auto_vec<v_info_elem, 32> vec;
+};
 typedef v_info *v_info_ptr;
 
 /* Comparison function for qsort on VECTOR SSA_NAME trees by machine mode.  */
@@ -1840,8 +1843,11 @@  undistribute_bitref_for_vector (enum tre
   if (ops->length () <= 1)
     return false;
 
-  if (opcode != PLUS_EXPR && opcode != MULT_EXPR && opcode != BIT_XOR_EXPR
-      && opcode != BIT_IOR_EXPR && opcode != BIT_AND_EXPR)
+  if (opcode != PLUS_EXPR
+      && opcode != MULT_EXPR
+      && opcode != BIT_XOR_EXPR
+      && opcode != BIT_IOR_EXPR
+      && opcode != BIT_AND_EXPR)
     return false;
 
   hash_map<tree, v_info_ptr> v_info_map;
@@ -1879,9 +1885,45 @@  undistribute_bitref_for_vector (enum tre
       if (!TYPE_VECTOR_SUBPARTS (vec_type).is_constant ())
 	continue;
 
+      if (VECTOR_TYPE_P (TREE_TYPE (rhs))
+	  || !is_a <scalar_mode> (TYPE_MODE (TREE_TYPE (rhs))))
+	continue;
+
+      /* The type of BIT_FIELD_REF might not be equal to the element type of
+	 the vector.  We want to use a vector type with element type the
+	 same as the BIT_FIELD_REF and size the same as TREE_TYPE (vec).  */
+      if (!useless_type_conversion_p (TREE_TYPE (rhs), TREE_TYPE (vec_type)))
+	{
+	  machine_mode simd_mode;
+	  unsigned HOST_WIDE_INT size, nunits;
+	  unsigned HOST_WIDE_INT elem_size
+	    = tree_to_uhwi (TYPE_SIZE (TREE_TYPE (rhs)));
+	  if (!GET_MODE_BITSIZE (TYPE_MODE (vec_type)).is_constant (&size))
+	    continue;
+	  if (size <= elem_size || (size % elem_size) != 0)
+	    continue;
+	  nunits = size / elem_size;
+	  if (!mode_for_vector (SCALAR_TYPE_MODE (TREE_TYPE (rhs)),
+				nunits).exists (&simd_mode))
+	    continue;
+	  vec_type = build_vector_type_for_mode (TREE_TYPE (rhs), simd_mode);
+
+	  /* Ignore it if target machine can't support this VECTOR type.  */
+	  if (!VECTOR_MODE_P (TYPE_MODE (vec_type)))
+	    continue;
+
+	  /* Check const vector type, constrain BIT_FIELD_REF offset and
+	     size.  */
+	  if (!TYPE_VECTOR_SUBPARTS (vec_type).is_constant ())
+	    continue;
+
+	  if (maybe_ne (GET_MODE_SIZE (TYPE_MODE (vec_type)),
+			GET_MODE_SIZE (TYPE_MODE (TREE_TYPE (vec)))))
+	    continue;
+	}
+
       tree elem_type = TREE_TYPE (vec_type);
-      unsigned HOST_WIDE_INT elem_size
-	= TREE_INT_CST_LOW (TYPE_SIZE (elem_type));
+      unsigned HOST_WIDE_INT elem_size = tree_to_uhwi (TYPE_SIZE (elem_type));
       if (maybe_ne (bit_field_size (rhs), elem_size))
 	continue;
 
@@ -1898,8 +1940,13 @@  undistribute_bitref_for_vector (enum tre
       bool existed;
       v_info_ptr &info = v_info_map.get_or_insert (vec, &existed);
       if (!existed)
-	info = new v_info;
-      info->safe_push (std::make_pair (idx, i));
+	{
+	  info = new v_info;
+	  info->vec_type = vec_type;
+	}
+      else if (!types_compatible_p (vec_type, info->vec_type))
+	continue;
+      info->vec.safe_push (std::make_pair (idx, i));
     }
 
   /* At least two VECTOR to combine.  */
@@ -1919,14 +1966,15 @@  undistribute_bitref_for_vector (enum tre
     {
       tree cand_vec = (*it).first;
       v_info_ptr cand_info = (*it).second;
-      unsigned int num_elems = VECTOR_CST_NELTS (cand_vec).to_constant ();
-      if (cand_info->length () != num_elems)
+      unsigned int num_elems
+	= TYPE_VECTOR_SUBPARTS (cand_info->vec_type).to_constant ();
+      if (cand_info->vec.length () != num_elems)
 	continue;
       sbitmap holes = sbitmap_alloc (num_elems);
       bitmap_ones (holes);
       bool valid = true;
       v_info_elem *curr;
-      FOR_EACH_VEC_ELT (*cand_info, i, curr)
+      FOR_EACH_VEC_ELT (cand_info->vec, i, curr)
 	{
 	  if (!bitmap_bit_p (holes, curr->first))
 	    {
@@ -1962,25 +2010,53 @@  undistribute_bitref_for_vector (enum tre
 
       unsigned int idx, j;
       gimple *sum = NULL;
-      v_info_ptr info_ptr;
       tree sum_vec = tvec;
+      v_info_ptr info_ptr = *(v_info_map.get (tvec));
       v_info_elem *elem;
+      tree vec_type = info_ptr->vec_type;
 
       /* Build the sum for all candidates with same mode.  */
       do
 	{
-	  sum = build_and_add_sum (TREE_TYPE (sum_vec), sum_vec,
+	  sum = build_and_add_sum (vec_type, sum_vec,
 				   valid_vecs[i + 1], opcode);
+	  if (!useless_type_conversion_p (vec_type,
+					  TREE_TYPE (valid_vecs[i + 1])))
+	    {
+	      /* Update the operands only after build_and_add_sum,
+		 so that we don't have to repeat the placement algorithm
+		 of build_and_add_sum.  */
+	      gimple_stmt_iterator gsi = gsi_for_stmt (sum);
+	      tree vce = build1 (VIEW_CONVERT_EXPR, vec_type,
+				 valid_vecs[i + 1]);
+	      tree lhs = make_ssa_name (vec_type);
+	      gimple *g = gimple_build_assign (lhs, VIEW_CONVERT_EXPR, vce);
+	      gimple_set_uid (g, gimple_uid (sum));
+	      gsi_insert_before (&gsi, g, GSI_NEW_STMT);
+	      gimple_assign_set_rhs2 (sum, lhs);
+	      if (sum_vec == tvec)
+		{
+		  vce = build1 (VIEW_CONVERT_EXPR, vec_type, sum_vec);
+		  lhs = make_ssa_name (vec_type);
+		  g = gimple_build_assign (lhs, VIEW_CONVERT_EXPR, vce);
+		  gimple_set_uid (g, gimple_uid (sum));
+		  gsi_insert_before (&gsi, g, GSI_NEW_STMT);
+		  gimple_assign_set_rhs1 (sum, lhs);
+		}
+	      update_stmt (sum);
+	    }
 	  sum_vec = gimple_get_lhs (sum);
 	  info_ptr = *(v_info_map.get (valid_vecs[i + 1]));
+	  gcc_assert (types_compatible_p (vec_type, info_ptr->vec_type));
 	  /* Update those related ops of current candidate VECTOR.  */
-	  FOR_EACH_VEC_ELT (*info_ptr, j, elem)
+	  FOR_EACH_VEC_ELT (info_ptr->vec, j, elem)
 	    {
 	      idx = elem->second;
 	      gimple *def = SSA_NAME_DEF_STMT ((*ops)[idx]->op);
 	      /* Set this then op definition will get DCEd later.  */
 	      gimple_set_visited (def, true);
-	      if (opcode == PLUS_EXPR || opcode == BIT_XOR_EXPR
+	      if (opcode == PLUS_EXPR
+		  || opcode == BIT_XOR_EXPR
 		  || opcode == BIT_IOR_EXPR)
 		(*ops)[idx]->op = build_zero_cst (TREE_TYPE ((*ops)[idx]->op));
 	      else if (opcode == MULT_EXPR)
@@ -2007,16 +2083,16 @@  undistribute_bitref_for_vector (enum tre
          BIT_FIELD_REF statements accordingly.  */
       info_ptr = *(v_info_map.get (tvec));
       gcc_assert (sum);
-      tree elem_type = TREE_TYPE (TREE_TYPE (tvec));
-      FOR_EACH_VEC_ELT (*info_ptr, j, elem)
+      tree elem_type = TREE_TYPE (vec_type);
+      FOR_EACH_VEC_ELT (info_ptr->vec, j, elem)
 	{
 	  idx = elem->second;
 	  tree dst = make_ssa_name (elem_type);
-	  gimple *gs = gimple_build_assign (
-	    dst, BIT_FIELD_REF,
-	    build3 (BIT_FIELD_REF, elem_type, sum_vec, TYPE_SIZE (elem_type),
-		    bitsize_int (elem->first
-				 * tree_to_uhwi (TYPE_SIZE (elem_type)))));
+	  tree pos = bitsize_int (elem->first
+				  * tree_to_uhwi (TYPE_SIZE (elem_type)));
+	  tree bfr = build3 (BIT_FIELD_REF, elem_type, sum_vec,
+			     TYPE_SIZE (elem_type), pos);
+	  gimple *gs = gimple_build_assign (dst, BIT_FIELD_REF, bfr);
 	  insert_stmt_after (gs, sum);
 	  gimple *def = SSA_NAME_DEF_STMT ((*ops)[idx]->op);
 	  /* Set this then op definition will get DCEd later.  */
--- gcc/testsuite/gcc.c-torture/compile/pr92618.c.jj	2019-11-22 11:11:52.971470532 +0100
+++ gcc/testsuite/gcc.c-torture/compile/pr92618.c	2019-11-22 11:11:39.434680213 +0100
@@ -0,0 +1,67 @@ 
+/* PR tree-optimization/92618 */
+
+typedef long long __m128i __attribute__((__may_alias__, __vector_size__(2 * sizeof (long long))));
+typedef long long __m256i __attribute__((__may_alias__, __vector_size__(4 * sizeof (long long))));
+typedef long long __m512i __attribute__((__may_alias__, __vector_size__(8 * sizeof (long long))));
+
+double a[32];
+unsigned long long b[32];
+__m128i bar (void);
+__m256i qux (void);
+__m512i corge (void);
+
+void
+foo (unsigned long long *x)
+{
+  __m128i c = bar ();
+  __m128i d = bar ();
+  __m256i e = qux ();
+  __m256i f = qux ();
+  __m256i g = qux ();
+  __m512i h = corge ();
+  __m512i i = corge ();
+  *(__m128i *) &b[0] = c;
+  *(__m128i *) &b[2] = d;
+  *(__m256i *) &b[4] = e;
+  *(__m256i *) &b[8] = f;
+  *(__m256i *) &b[12] = g;
+  *(__m512i *) &b[16] = h;
+  *(__m512i *) &b[24] = i;
+  *x = b[0] + b[1] + b[2] + b[3]
+     + b[4] + b[5] + b[6] + b[7]
+     + b[8] + b[9] + b[10] + b[11]
+     + b[12] + b[13] + b[14] + b[15]
+     + b[16] + b[17] + b[18] + b[19]
+     + b[20] + b[21] + b[22] + b[23]
+     + b[24] + b[25] + b[26] + b[27]
+     + b[28] + b[29] + b[30] + b[31];
+}
+
+void
+baz (double *x)
+{
+#if __SIZEOF_LONG_LONG__ == __SIZEOF_DOUBLE__
+  __m128i c = bar ();
+  __m128i d = bar ();
+  __m256i e = qux ();
+  __m256i f = qux ();
+  __m256i g = qux ();
+  __m512i h = corge ();
+  __m512i i = corge ();
+  *(__m128i *) &a[0] = c;
+  *(__m128i *) &a[2] = d;
+  *(__m256i *) &a[4] = e;
+  *(__m256i *) &a[8] = f;
+  *(__m256i *) &a[12] = g;
+  *(__m512i *) &a[16] = h;
+  *(__m512i *) &a[24] = i;
+  *x = a[0] + a[1] + a[2] + a[3]
+     + a[4] + a[5] + a[6] + a[7]
+     + a[8] + a[9] + a[10] + a[11]
+     + a[12] + a[13] + a[14] + a[15]
+     + a[16] + a[17] + a[18] + a[19]
+     + a[20] + a[21] + a[22] + a[23]
+     + a[24] + a[25] + a[26] + a[27]
+     + a[28] + a[29] + a[30] + a[31];
+#endif
+}
--- gcc/testsuite/gcc.c-torture/execute/pr92618.c.jj	2019-11-22 11:11:10.543127733 +0100
+++ gcc/testsuite/gcc.c-torture/execute/pr92618.c	2019-11-22 11:10:56.930338594 +0100
@@ -0,0 +1,63 @@ 
+/* PR tree-optimization/92618 */
+
+typedef long long __m128i __attribute__((__may_alias__, __vector_size__(2 * sizeof (long long))));
+
+double a[4];
+unsigned long long b[4];
+
+__attribute__((noipa)) __m128i
+bar (void)
+{
+  static int cnt;
+  cnt += 2;
+  return (__m128i) { cnt, cnt + 1 };
+}
+
+#if __SIZEOF_LONG_LONG__ == __SIZEOF_DOUBLE__
+typedef double __m128d __attribute__((__may_alias__, __vector_size__(2 * sizeof (double))));
+
+__attribute__((noipa)) __m128i
+qux (void)
+{
+  static double cnt;
+  cnt += 2.0;
+  return (__m128i) (__m128d) { cnt, cnt + 1.0 };
+}
+#endif
+
+void
+foo (unsigned long long *x)
+{
+  __m128i c = bar ();
+  __m128i d = bar ();
+  *(__m128i *) &b[0] = c;
+  *(__m128i *) &b[2] = d;
+  *x = b[0] + b[1] + b[2] + b[3];
+}
+
+void
+baz (double *x)
+{
+#if __SIZEOF_LONG_LONG__ == __SIZEOF_DOUBLE__
+  __m128i c = qux ();
+  __m128i d = qux ();
+  *(__m128i *) &a[0] = c;
+  *(__m128i *) &a[2] = d;
+  *x = a[0] + a[1] + a[2] + a[3];
+#endif
+}
+
+int
+main ()
+{
+  unsigned long long c = 0;
+  foo (&c);
+  if (c != 2 + 3 + 4 + 5)
+    __builtin_abort ();
+#if __SIZEOF_LONG_LONG__ == __SIZEOF_DOUBLE__
+  double d = 0.0;
+  baz (&d);
+  if (d != 2.0 + 3.0 + 4.0 + 5.0)
+    __builtin_abort ();
+#endif
+}