Patchwork [4.7,i386] : Enable prefetchw for selected AMD targets

login
register
mail settings
Submitter Uros Bizjak
Date Sept. 12, 2012, 6:20 p.m.
Message ID <CAFULd4ZuzsEFXaENWfRcBO27McaEHMJQM2-_vTK-GJJhokn3jA@mail.gmail.com>
Download mbox | patch
Permalink /patch/183422/
State New
Headers show

Comments

Uros Bizjak - Sept. 12, 2012, 6:20 p.m.
Hello!

Newer AMD processors does not enable 3dNOW anymore. However, prefetchw
depends on TARGET_3DNOW, so it is not generated anymore. Following
patch is a 4.7 version of mainline patch [1].

2012-09-12  Uros Bizjak  <ubizjak@gmail.com>

	* config/i386/i386.h (x86_prefetchw): New global variable.
	(TARGET_PREFETCHW): New macro.
	* config/i386/i386.c (PTA_PREFETCHW): Ditto.
	(processor_alias_table): Add PTA_PREFETCHW to
	bdver1, bdver2 and btver1.
	(ix86_option_override_internal): Set x86_prefetchw for
	PTA_PREFETCHW targets.
	* config/i386/i386.md (prefetch): Expand to prefetchw
	for TARGET_PREFETCHW.
	(*prefetch_3dnow_<mode>): Also enable for TARGET_PREFETCHW.

Patch was bootstrapped and regression tested on x86_64-pc-linux-gnu
{,-m32}. Will be committed to 4.7 branch after [1] is committed to
mainline.

[1] http://gcc.gnu.org/ml/gcc-patches/2012-09/msg00670.html

Uros.
H.J. Lu - Sept. 12, 2012, 6:30 p.m.
On Wed, Sep 12, 2012 at 11:20 AM, Uros Bizjak <ubizjak@gmail.com> wrote:
> Hello!
>
> Newer AMD processors does not enable 3dNOW anymore. However, prefetchw
> depends on TARGET_3DNOW, so it is not generated anymore. Following
> patch is a 4.7 version of mainline patch [1].
>
> 2012-09-12  Uros Bizjak  <ubizjak@gmail.com>
>
>         * config/i386/i386.h (x86_prefetchw): New global variable.
>         (TARGET_PREFETCHW): New macro.
>         * config/i386/i386.c (PTA_PREFETCHW): Ditto.
>         (processor_alias_table): Add PTA_PREFETCHW to
>         bdver1, bdver2 and btver1.
>         (ix86_option_override_internal): Set x86_prefetchw for
>         PTA_PREFETCHW targets.
>         * config/i386/i386.md (prefetch): Expand to prefetchw
>         for TARGET_PREFETCHW.
>         (*prefetch_3dnow_<mode>): Also enable for TARGET_PREFETCHW.
>
> Patch was bootstrapped and regression tested on x86_64-pc-linux-gnu
> {,-m32}. Will be committed to 4.7 branch after [1] is committed to
> mainline.
>
> [1] http://gcc.gnu.org/ml/gcc-patches/2012-09/msg00670.html

Do we need to update driver-i386.c?
Uros Bizjak - Sept. 12, 2012, 6:58 p.m.
On Wed, Sep 12, 2012 at 8:30 PM, H.J. Lu <hjl.tools@gmail.com> wrote:

>> Newer AMD processors does not enable 3dNOW anymore. However, prefetchw
>> depends on TARGET_3DNOW, so it is not generated anymore. Following
>> patch is a 4.7 version of mainline patch [1].
>>
>> 2012-09-12  Uros Bizjak  <ubizjak@gmail.com>
>>
>>         * config/i386/i386.h (x86_prefetchw): New global variable.
>>         (TARGET_PREFETCHW): New macro.
>>         * config/i386/i386.c (PTA_PREFETCHW): Ditto.
>>         (processor_alias_table): Add PTA_PREFETCHW to
>>         bdver1, bdver2 and btver1.
>>         (ix86_option_override_internal): Set x86_prefetchw for
>>         PTA_PREFETCHW targets.
>>         * config/i386/i386.md (prefetch): Expand to prefetchw
>>         for TARGET_PREFETCHW.
>>         (*prefetch_3dnow_<mode>): Also enable for TARGET_PREFETCHW.
>>
>> Patch was bootstrapped and regression tested on x86_64-pc-linux-gnu
>> {,-m32}. Will be committed to 4.7 branch after [1] is committed to
>> mainline.
>>
>> [1] http://gcc.gnu.org/ml/gcc-patches/2012-09/msg00670.html
>
> Do we need to update driver-i386.c?

No, the patch is not a backport of -mprfchw, but statically enables
prefetchw for targets that have prefetchw, but don't define
TARGET_3DNOW anymore.

Uros.

Patch

Index: config/i386/i386.c
===================================================================
--- config/i386/i386.c	(revision 191227)
+++ config/i386/i386.c	(working copy)
@@ -2428,9 +2428,12 @@  enum processor_type ix86_tune;
 /* Which instruction set architecture to use.  */
 enum processor_type ix86_arch;
 
-/* true if sse prefetch instruction is not NOOP.  */
+/* True if processor has SSE prefetch instruction.  */
 int x86_prefetch_sse;
 
+/* True if processor has prefetchw instruction.  */
+int x86_prefetchw;
+ 
 /* -mstackrealign option */
 static const char ix86_force_align_arg_pointer_string[]
   = "force_align_arg_pointer";
@@ -2931,6 +2934,8 @@  ix86_option_override_internal (bool main_args_p)
 #define PTA_XOP		 	(HOST_WIDE_INT_1 << 29)
 #define PTA_AVX2		(HOST_WIDE_INT_1 << 30)
 #define PTA_BMI2	 	(HOST_WIDE_INT_1 << 31)
+#define PTA_PREFETCHW		(HOST_WIDE_INT_1 << 32)
+
 /* if this reaches 64, need to widen struct pta flags below */
 
   static struct pta
@@ -3038,19 +3043,19 @@  ix86_option_override_internal (bool main_args_p)
 	PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
 	| PTA_SSE2 | PTA_SSE3 | PTA_SSE4A | PTA_CX16 | PTA_ABM},
       {"bdver1", PROCESSOR_BDVER1, CPU_BDVER1,
-	PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
-	| PTA_SSE4A | PTA_CX16 | PTA_ABM | PTA_SSSE3 | PTA_SSE4_1
-	| PTA_SSE4_2 | PTA_AES | PTA_PCLMUL | PTA_AVX | PTA_FMA4
-	| PTA_XOP | PTA_LWP},
+	PTA_64BIT | PTA_MMX | PTA_PREFETCHW | PTA_SSE | PTA_SSE2
+	| PTA_SSE3 | PTA_SSE4A | PTA_CX16 | PTA_ABM | PTA_SSSE3
+	| PTA_SSE4_1 | PTA_SSE4_2 | PTA_AES | PTA_PCLMUL | PTA_AVX
+	| PTA_FMA4 | PTA_XOP | PTA_LWP},
       {"bdver2", PROCESSOR_BDVER2, CPU_BDVER2,
-	PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
-	| PTA_SSE4A | PTA_CX16 | PTA_ABM | PTA_SSSE3 | PTA_SSE4_1
-	| PTA_SSE4_2 | PTA_AES | PTA_PCLMUL | PTA_AVX | PTA_FMA4
-	| PTA_XOP | PTA_LWP | PTA_BMI | PTA_TBM | PTA_F16C
+	PTA_64BIT | PTA_MMX | PTA_PREFETCHW | PTA_SSE | PTA_SSE2
+	| PTA_SSE3 | PTA_SSE4A | PTA_CX16 | PTA_ABM | PTA_SSSE3
+	| PTA_SSE4_1 | PTA_SSE4_2 | PTA_AES | PTA_PCLMUL | PTA_AVX
+	| PTA_FMA4 | PTA_XOP | PTA_LWP | PTA_BMI | PTA_TBM | PTA_F16C
 	| PTA_FMA},
       {"btver1", PROCESSOR_BTVER1, CPU_GENERIC64,
-        PTA_64BIT | PTA_MMX |  PTA_SSE  | PTA_SSE2 | PTA_SSE3
-        | PTA_SSSE3 | PTA_SSE4A |PTA_ABM | PTA_CX16},
+	PTA_64BIT | PTA_MMX | PTA_PREFETCHW | PTA_SSE | PTA_SSE2
+	| PTA_SSE3 | PTA_SSSE3 | PTA_SSE4A | PTA_ABM | PTA_CX16},
       {"generic32", PROCESSOR_GENERIC32, CPU_PENTIUMPRO,
 	0 /* flags are only used for -march switch.  */ },
       {"generic64", PROCESSOR_GENERIC64, CPU_GENERIC64,
@@ -3358,6 +3363,8 @@  ix86_option_override_internal (bool main_args_p)
 	  ix86_isa_flags |= OPTION_MASK_ISA_F16C;
 	if (processor_alias_table[i].flags & (PTA_PREFETCH_SSE | PTA_SSE))
 	  x86_prefetch_sse = true;
+	if (processor_alias_table[i].flags & PTA_PREFETCHW)
+	  x86_prefetchw = true;
 
 	break;
       }
Index: config/i386/i386.h
===================================================================
--- config/i386/i386.h	(revision 191226)
+++ config/i386/i386.h	(working copy)
@@ -450,9 +450,11 @@  extern unsigned char ix86_arch_features[X86_ARCH_L
 #define TARGET_FISTTP		(TARGET_SSE3 && TARGET_80387)
 
 extern int x86_prefetch_sse;
-
 #define TARGET_PREFETCH_SSE	x86_prefetch_sse
 
+extern int x86_prefetchw;
+#define TARGET_PREFETCHW	x86_prefetchw
+
 #define ASSEMBLER_DIALECT	(ix86_asm_dialect)
 
 #define TARGET_SSE_MATH		((ix86_fpmath & FPMATH_SSE) != 0)
Index: config/i386/i386.md
===================================================================
--- config/i386/i386.md	(revision 191227)
+++ config/i386/i386.md	(working copy)
@@ -17671,12 +17671,14 @@ 
   gcc_assert (locality >= 0 && locality <= 3);
   gcc_assert (GET_MODE (operands[0]) == Pmode
 	      || GET_MODE (operands[0]) == VOIDmode);
+  if (TARGET_PREFETCHW && rw)
+    operands[2] = GEN_INT (3);
 
   /* Use 3dNOW prefetch in case we are asking for write prefetch not
      supported by SSE counterpart or the SSE prefetch is not available
      (K6 machines).  Otherwise use SSE prefetch as it allows specifying
      of locality.  */
-  if (TARGET_3DNOW && (!TARGET_PREFETCH_SSE || rw))
+  else if (TARGET_3DNOW && (!TARGET_PREFETCH_SSE || rw))
     operands[2] = GEN_INT (3);
   else
     operands[1] = const0_rtx;
@@ -17707,7 +17709,7 @@ 
   [(prefetch (match_operand:P 0 "address_operand" "p")
 	     (match_operand:SI 1 "const_int_operand" "n")
 	     (const_int 3))]
-  "TARGET_3DNOW"
+  "TARGET_3DNOW || TARGET_PREFETCHW"
 {
   if (INTVAL (operands[1]) == 0)
     return "prefetch\t%a0";