===================================================================
@@ -304,6 +304,8 @@
#endif
extern const char * ix86_output_call_insn (rtx_insn *insn, rtx call_op);
+extern bool ix86_operands_ok_for_move_multiple (rtx *operands, bool load,
+ enum machine_mode mode);
#ifdef RTX_CODE
/* Target data for multipass lookahead scheduling.
===================================================================
@@ -51726,6 +51726,92 @@
}
#endif
+/* If MEM is in the form of [base+offset], extract the two parts
+ of address and set to BASE and OFFSET, otherwise return false. */
+
+static bool
+extract_base_offset_in_addr (rtx mem, rtx *base, rtx *offset)
+{
+ rtx addr;
+
+ gcc_assert (MEM_P (mem));
+
+ addr = XEXP (mem, 0);
+
+ if (GET_CODE (addr) == CONST)
+ addr = XEXP (addr, 0);
+
+ if (REG_P (addr) || GET_CODE (addr) == SYMBOL_REF)
+ {
+ *base = addr;
+ *offset = const0_rtx;
+ return true;
+ }
+
+ if (GET_CODE (addr) == PLUS
+ && (REG_P (XEXP (addr, 0))
+ || GET_CODE (XEXP (addr, 0)) == SYMBOL_REF)
+ && CONST_INT_P (XEXP (addr, 1)))
+ {
+ *base = XEXP (addr, 0);
+ *offset = XEXP (addr, 1);
+ return true;
+ }
+
+ return false;
+}
+
+/* Given OPERANDS of consecutive load/store, check if we can merge
+ them into move multiple. LOAD is true if they are load instructions.
+ MODE is the mode of memory operands. */
+
+bool
+ix86_operands_ok_for_move_multiple (rtx *operands, bool load,
+ enum machine_mode mode)
+{
+ HOST_WIDE_INT offval_1, offval_2, msize;
+ rtx mem_1, mem_2, reg_1, reg_2, base_1, base_2, offset_1, offset_2;
+
+ if (load)
+ {
+ mem_1 = operands[1];
+ mem_2 = operands[3];
+ reg_1 = operands[0];
+ reg_2 = operands[2];
+ }
+ else
+ {
+ mem_1 = operands[0];
+ mem_2 = operands[2];
+ reg_1 = operands[1];
+ reg_2 = operands[3];
+ }
+
+ gcc_assert (REG_P (reg_1) && REG_P (reg_2));
+
+ if (REGNO (reg_1) != REGNO (reg_2))
+ return false;
+
+ /* Check if the addresses are in the form of [base+offset]. */
+ if (!extract_base_offset_in_addr (mem_1, &base_1, &offset_1))
+ return false;
+ if (!extract_base_offset_in_addr (mem_2, &base_2, &offset_2))
+ return false;
+
+ /* Check if the bases are the same. */
+ if (!rtx_equal_p (base_1, base_2))
+ return false;
+
+ offval_1 = INTVAL (offset_1);
+ offval_2 = INTVAL (offset_2);
+ msize = GET_MODE_SIZE (mode);
+ /* Check if mem_1 is adjacent to mem_2 and mem_1 has lower address. */
+ if (offval_1 + msize != offval_2)
+ return false;
+
+ return true;
+}
+
/* Initialize the GCC target structure. */
#undef TARGET_RETURN_IN_MEMORY
#define TARGET_RETURN_IN_MEMORY ix86_return_in_memory
===================================================================
@@ -1183,6 +1183,21 @@
]
(const_string "<MODE>")))])
+;; Merge movsd/movhpd to movupd for TARGET_SSE_UNALIGNED_LOAD_OPTIMAL targets.
+(define_peephole2
+ [(set (match_operand:V2DF 0 "register_operand")
+ (vec_concat:V2DF (match_operand:DF 1 "memory_operand")
+ (match_operand:DF 4 "const0_operand")))
+ (set (match_operand:V2DF 2 "register_operand")
+ (vec_concat:V2DF (vec_select:DF (match_dup 0)
+ (parallel [(const_int 0)]))
+ (match_operand:DF 3 "memory_operand")))]
+ "TARGET_SSE2 && TARGET_SSE_UNALIGNED_LOAD_OPTIMAL
+ && ix86_operands_ok_for_move_multiple (operands, true, DFmode)"
+ [(set (match_dup 2)
+ (unspec:V2DF [(match_dup 4)] UNSPEC_LOADU))]
+ "operands[4] = adjust_address (operands[1], V2DFmode, 0);")
+
(define_insn "<sse>_storeu<ssemodesuffix><avxsizesuffix>"
[(set (match_operand:VF 0 "memory_operand" "=m")
(unspec:VF
@@ -1242,6 +1257,20 @@
(set_attr "prefix" "evex")
(set_attr "mode" "<sseinsnmode>")])
+;; Merge movlpd/movhpd to movupd for TARGET_SSE_UNALIGNED_STORE_OPTIMAL targets.
+(define_peephole2
+ [(set (match_operand:DF 0 "memory_operand")
+ (vec_select:DF (match_operand:V2DF 1 "register_operand")
+ (parallel [(const_int 0)])))
+ (set (match_operand:DF 2 "memory_operand")
+ (vec_select:DF (match_operand:V2DF 3 "register_operand")
+ (parallel [(const_int 1)])))]
+ "TARGET_SSE2 && TARGET_SSE_UNALIGNED_STORE_OPTIMAL
+ && ix86_operands_ok_for_move_multiple (operands, false, DFmode)"
+ [(set (match_dup 4)
+ (unspec:V2DF [(match_dup 1)] UNSPEC_STOREU))]
+ "operands[4] = adjust_address (operands[0], V2DFmode, 0);")
+
/* For AVX, normal *mov<mode>_internal pattern will handle unaligned loads
just fine if misaligned_operand is true, and without the UNSPEC it can
be combined with arithmetic instructions. If misaligned_operand is
===================================================================
@@ -0,0 +1,24 @@
+/* { dg-do compile } */
+/* { dg-options "-march=corei7 -O2" } */
+
+#include <emmintrin.h>
+
+double a[8];
+
+__m128d load_1 ()
+{
+ __m128d res;
+ res = _mm_load_sd (&a[1]);
+ res = _mm_loadh_pd (res, &a[2]);
+ return res;
+}
+
+__m128d load_2 (double *a)
+{
+ __m128d res;
+ res = _mm_load_sd (&a[1]);
+ res = _mm_loadh_pd (res, &a[2]);
+ return res;
+}
+
+/* { dg-final { scan-assembler-times "movup" 2 } } */
===================================================================
@@ -0,0 +1,20 @@
+/* { dg-do compile } */
+/* { dg-options "-march=corei7 -O2" } */
+
+#include <emmintrin.h>
+
+double a[8];
+
+void store_1 (__m128d val)
+{
+ _mm_store_sd (&a[1], val);
+ _mm_storeh_pd (&a[2], val);
+}
+
+void store_2 (__m128d val, double *a)
+{
+ _mm_store_sd (&a[1], val);
+ _mm_storeh_pd (&a[2], val);
+}
+
+/* { dg-final { scan-assembler-times "movup" 2 } } */