diff mbox series

i386: Fix up -fdollars-in-identifiers with identifiers starting with $ in -masm=att (PR target/91298)

Message ID 20200118123021.GU10088@tucnak
State New
Headers show
Series i386: Fix up -fdollars-in-identifiers with identifiers starting with $ in -masm=att (PR target/91298) | expand

Commit Message

Jakub Jelinek Jan. 18, 2020, 12:30 p.m. UTC
Hi!

In AT&T syntax leading $ is special, so if we have identifiers that start
with dollar, we usually fail to assemble it (or assemble incorrectly).
As mentioned in the PR, what works is wrapping the identifiers inside of
parens, like:
	movl	$($a), %eax
	leaq	($a)(,%rdi,4), %rax
	movl	($a)(%rip), %eax
	movl	($a)+16(%rip), %eax
	.globl	$a
	.type	$a, @object
	.size	$a, 72
$a:
	.string	"$a"
	.quad	($a)
(this is x86_64 -fno-pic -O2).  In some places ($a) is not accepted,
like as .globl operand, in .type, .size, so the patch overrides
ASM_OUTPUT_SYMBOL_REF rather than e.g. ASM_OUTPUT_LABELREF.
I didn't want to duplicate what assemble_name is doing (following
transparent aliases), so split assemble_name into two parts; just
mere looking at the first character of a name before calling assemble_name
wouldn't be good enough, a transparent alias could lead from a name
not starting with $ to one starting with it and vice versa.

Bootstrapped/regtested on x86_64-linux and i686-linux, ok for trunk?

2020-01-18  Jakub Jelinek  <jakub@redhat.com>

	PR target/91298
	* output.h (assemble_name_resolve): Declare.
	* varasm.c (assemble_name_resolve): New function.
	(assemble_name): Use it.
	* config/i386/i386.h (ASM_OUTPUT_SYMBOL_REF): Define.

	* gcc.target/i386/pr91298-1.c: New test.
	* gcc.target/i386/pr91298-2.c: New test.


	Jakub

Comments

Uros Bizjak Jan. 18, 2020, 1:24 p.m. UTC | #1
On Sat, Jan 18, 2020 at 1:30 PM Jakub Jelinek <jakub@redhat.com> wrote:
>
> Hi!
>
> In AT&T syntax leading $ is special, so if we have identifiers that start
> with dollar, we usually fail to assemble it (or assemble incorrectly).
> As mentioned in the PR, what works is wrapping the identifiers inside of
> parens, like:
>         movl    $($a), %eax
>         leaq    ($a)(,%rdi,4), %rax
>         movl    ($a)(%rip), %eax
>         movl    ($a)+16(%rip), %eax
>         .globl  $a
>         .type   $a, @object
>         .size   $a, 72
> $a:
>         .string "$a"
>         .quad   ($a)
> (this is x86_64 -fno-pic -O2).  In some places ($a) is not accepted,
> like as .globl operand, in .type, .size, so the patch overrides
> ASM_OUTPUT_SYMBOL_REF rather than e.g. ASM_OUTPUT_LABELREF.
> I didn't want to duplicate what assemble_name is doing (following
> transparent aliases), so split assemble_name into two parts; just
> mere looking at the first character of a name before calling assemble_name
> wouldn't be good enough, a transparent alias could lead from a name
> not starting with $ to one starting with it and vice versa.
>
> Bootstrapped/regtested on x86_64-linux and i686-linux, ok for trunk?
>
> 2020-01-18  Jakub Jelinek  <jakub@redhat.com>
>
>         PR target/91298
>         * output.h (assemble_name_resolve): Declare.
>         * varasm.c (assemble_name_resolve): New function.
>         (assemble_name): Use it.
>         * config/i386/i386.h (ASM_OUTPUT_SYMBOL_REF): Define.
>
>         * gcc.target/i386/pr91298-1.c: New test.
>         * gcc.target/i386/pr91298-2.c: New test.

x86 part is OK, with a nit below.

Thanks,
Uros.

> --- gcc/output.h.jj     2020-01-12 11:54:36.692409199 +0100
> +++ gcc/output.h        2020-01-17 09:51:27.606873017 +0100
> @@ -237,6 +237,12 @@ extern void assemble_label (FILE *, cons
>     addition of an underscore).  */
>  extern void assemble_name_raw (FILE *, const char *);
>
> +/* Return NAME that should actually be emitted, looking through
> +   transparent aliases.  If NAME refers to an entity that is also
> +   represented as a tree (like a function or variable), mark the entity
> +   as referenced.  */
> +extern const char *assemble_name_resolve (const char *);
> +
>  /* Like assemble_name_raw, but should be used when NAME might refer to
>     an entity that is also represented as a tree (like a function or
>     variable).  If NAME does refer to such an entity, that entity will
> --- gcc/varasm.c.jj     2020-01-12 11:54:38.533381424 +0100
> +++ gcc/varasm.c        2020-01-17 09:53:14.430239885 +0100
> @@ -2589,20 +2589,16 @@ assemble_name_raw (FILE *file, const cha
>      ASM_OUTPUT_LABELREF (file, name);
>  }
>
> -/* Like assemble_name_raw, but should be used when NAME might refer to
> -   an entity that is also represented as a tree (like a function or
> -   variable).  If NAME does refer to such an entity, that entity will
> -   be marked as referenced.  */
> -
> -void
> -assemble_name (FILE *file, const char *name)
> +/* Return NAME that should actually be emitted, looking through
> +   transparent aliases.  If NAME refers to an entity that is also
> +   represented as a tree (like a function or variable), mark the entity
> +   as referenced.  */
> +const char *
> +assemble_name_resolve (const char *name)
>  {
> -  const char *real_name;
> -  tree id;
> +  const char *real_name = targetm.strip_name_encoding (name);
> +  tree id = maybe_get_identifier (real_name);
>
> -  real_name = targetm.strip_name_encoding (name);
> -
> -  id = maybe_get_identifier (real_name);
>    if (id)
>      {
>        tree id_orig = id;
> @@ -2614,7 +2610,18 @@ assemble_name (FILE *file, const char *n
>        gcc_assert (! TREE_CHAIN (id));
>      }
>
> -  assemble_name_raw (file, name);
> +  return name;
> +}
> +
> +/* Like assemble_name_raw, but should be used when NAME might refer to
> +   an entity that is also represented as a tree (like a function or
> +   variable).  If NAME does refer to such an entity, that entity will
> +   be marked as referenced.  */
> +
> +void
> +assemble_name (FILE *file, const char *name)
> +{
> +  assemble_name_raw (file, assemble_name_resolve (name));
>  }
>
>  /* Allocate SIZE bytes writable static space with a gensym name
> --- gcc/config/i386/i386.h.jj   2020-01-17 09:22:47.524148012 +0100
> +++ gcc/config/i386/i386.h      2020-01-17 10:21:10.822480641 +0100
> @@ -2258,6 +2258,31 @@ extern int const svr4_dbx_register_map[F
>  #define ASM_OUTPUT_FUNCTION_LABEL(FILE, NAME, DECL) \
>    ix86_asm_output_function_label ((FILE), (NAME), (DECL))
>
> +/* A C statement (sans semicolon) to output a reference to SYMBOL_REF SYM.
> +   If not defined, assemble_name will be used to output the name of the
> +   symbol.  This macro may be used to modify the way a symbol is referenced
> +   depending on information encoded by TARGET_ENCODE_SECTION_INFO.  */
> +
> +#ifndef ASM_OUTPUT_SYMBOL_REF
> +#define ASM_OUTPUT_SYMBOL_REF(FILE, SYM) \
> +  do {                                                 \
> +    const char *name                                   \
> +      = assemble_name_resolve (XSTR (x, 0));           \
> +    /* In -masm=att wrap identifiers that start with $ \
> +       into parens.  */                                        \
> +    if (name[0] == '$'                                 \
> +       && user_label_prefix[0] == '\0'                 \
> +       && ASSEMBLER_DIALECT == ASM_ATT)                \

I'd putthe  check for assembler dialect first, it looks more descriptive.

> +      {                                                        \
> +       fputc ('(', (FILE));                            \
> +       assemble_name_raw ((FILE), name);               \
> +       fputc (')', (FILE));                            \
> +      }                                                        \
> +    else                                               \
> +      assemble_name_raw ((FILE), name);                        \
> +  } while (0)
> +#endif
> +
>  /* Under some conditions we need jump tables in the text section,
>     because the assembler cannot handle label differences between
>     sections.  This is the case for x86_64 on Mach-O for example.  */
> --- gcc/testsuite/gcc.target/i386/pr91298-1.c.jj        2020-01-17 10:45:24.172222204 +0100
> +++ gcc/testsuite/gcc.target/i386/pr91298-1.c   2020-01-17 10:44:46.388800409 +0100
> @@ -0,0 +1,14 @@
> +/* PR target/91298 */
> +/* { dg-do assemble } */
> +/* { dg-options "-O2 -g -fdollars-in-identifiers" } */
> +
> +int $a[18];
> +int *foo (void) { return &$a[0]; }
> +int *bar (int x) { return &$a[x]; }
> +int baz (void) { return $a[0]; }
> +int qux (void) { return $a[4]; }
> +int $quux (void) { return 1; }
> +int corge (void) { return $quux (); }
> +int grault (void) { return $quux () + 1; }
> +typedef int (*fn) (void);
> +fn foobar (void) { return $quux; }
> --- gcc/testsuite/gcc.target/i386/pr91298-2.c.jj        2020-01-17 10:45:30.834120262 +0100
> +++ gcc/testsuite/gcc.target/i386/pr91298-2.c   2020-01-17 10:45:57.460712790 +0100
> @@ -0,0 +1,5 @@
> +/* PR target/91298 */
> +/* { dg-do assemble { target fpic } } */
> +/* { dg-options "-O2 -g -fdollars-in-identifiers -fpic" } */
> +
> +#include "pr91298-1.c"
>
>         Jakub
>
Rainer Orth Jan. 23, 2020, 8:12 p.m. UTC | #2
Hi Jakub,

> In AT&T syntax leading $ is special, so if we have identifiers that start
> with dollar, we usually fail to assemble it (or assemble incorrectly).
> As mentioned in the PR, what works is wrapping the identifiers inside of
> parens, like:
> 	movl	$($a), %eax
> 	leaq	($a)(,%rdi,4), %rax
> 	movl	($a)(%rip), %eax
> 	movl	($a)+16(%rip), %eax
> 	.globl	$a
> 	.type	$a, @object
> 	.size	$a, 72
> $a:
> 	.string	"$a"
> 	.quad	($a)
> (this is x86_64 -fno-pic -O2).  In some places ($a) is not accepted,
> like as .globl operand, in .type, .size, so the patch overrides
> ASM_OUTPUT_SYMBOL_REF rather than e.g. ASM_OUTPUT_LABELREF.
> I didn't want to duplicate what assemble_name is doing (following
> transparent aliases), so split assemble_name into two parts; just
> mere looking at the first character of a name before calling assemble_name
> wouldn't be good enough, a transparent alias could lead from a name
> not starting with $ to one starting with it and vice versa.
>
> Bootstrapped/regtested on x86_64-linux and i686-linux, ok for trunk?

the new testcases FAIL on Solaris/x86 with the native assembler:

+FAIL: gcc.target/i386/pr91298-1.c (test for excess errors)

Excess errors:
Assembler: pr91298-1.c
        "/var/tmp//ccE6r3xb.s", line 5 : Syntax error
        Near line: "    .globl  $quux"
        "/var/tmp//ccE6r3xb.s", line 6 : Syntax error
        Near line: "    .type   $quux, @function"
        "/var/tmp//ccE6r3xb.s", line 7 : Syntax error
        Near line: "$quux:"
        "/var/tmp//ccE6r3xb.s", line 15 : Syntax error
        Near line: "    .size   $quux, .-$quux"
        "/var/tmp//ccE6r3xb.s", line 24 : Syntax error
        Near line: "    movl    $($a), %eax"
        "/var/tmp//ccE6r3xb.s", line 38 : Syntax error
        Near line: "    leal    ($a)(,%eax,4), %eax"
        "/var/tmp//ccE6r3xb.s", line 51 : Syntax error
        Near line: "    movl    ($a), %eax"
        "/var/tmp//ccE6r3xb.s", line 63 : Syntax error
        Near line: "    movl    ($a)+16, %eax"
        "/var/tmp//ccE6r3xb.s", line 97 : Syntax error
        Near line: "    movl    $($quux), %eax"
        "/var/tmp//ccE6r3xb.s", line 101 : Syntax error
        Near line: "    .globl  $a"
        "/var/tmp//ccE6r3xb.s", line 104 : Syntax error
        Near line: "    .type   $a, @object"
        "/var/tmp//ccE6r3xb.s", line 105 : Syntax error
        Near line: "    .size   $a, 72"
        "/var/tmp//ccE6r3xb.s", line 106 : Syntax error
        Near line: "$a:"
        "/var/tmp//ccE6r3xb.s", line 228 : Syntax error
        Near line: "    .long   ($a)"

+FAIL: gcc.target/i386/pr91298-2.c (test for excess errors)

It only allows letters, digits, '_' and '.' in identifiers:
https://docs.oracle.com/cd/E37838_01/html/E61064/eqbsx.html#XALRMeoqjw

Right now, we don't have an effective-target keyword matching
-fdollars-in-identifiers.  There are only 3 other tests using that
option:

  gcc.dg/cpp/lexident.c		preprocess only
  gcc.dg/ucnid-5-utf8.c		skipped on avr*-*, powerpc-ibm-aix* (no $)
  				skipped if ! ucn
  gcc.dg/ucnid-5.c		skipped on avr*-*, powerpc-ibm-aix* (no $)

So we might just xfail the new ones on *86*-*-solaris2* && !gas, I guess.

	Rainer
Jakub Jelinek Jan. 23, 2020, 8:16 p.m. UTC | #3
On Thu, Jan 23, 2020 at 09:12:27PM +0100, Rainer Orth wrote:
> +FAIL: gcc.target/i386/pr91298-2.c (test for excess errors)
> 
> It only allows letters, digits, '_' and '.' in identifiers:
> https://docs.oracle.com/cd/E37838_01/html/E61064/eqbsx.html#XALRMeoqjw

So, no way to emit other letters in identifiers through some other syntax?

> Right now, we don't have an effective-target keyword matching
> -fdollars-in-identifiers.  There are only 3 other tests using that
> option:
> 
>   gcc.dg/cpp/lexident.c		preprocess only
>   gcc.dg/ucnid-5-utf8.c		skipped on avr*-*, powerpc-ibm-aix* (no $)
>   				skipped if ! ucn
>   gcc.dg/ucnid-5.c		skipped on avr*-*, powerpc-ibm-aix* (no $)
> 
> So we might just xfail the new ones on *86*-*-solaris2* && !gas, I guess.

Or even dg-skip-if or { target { ! *86*-*-solaris2* || gas } }, I guess it
really depends on whether there is any hope it might work there.

	Jakub
Rainer Orth Jan. 23, 2020, 8:19 p.m. UTC | #4
Hi Jakub,

> On Thu, Jan 23, 2020 at 09:12:27PM +0100, Rainer Orth wrote:
>> +FAIL: gcc.target/i386/pr91298-2.c (test for excess errors)
>> 
>> It only allows letters, digits, '_' and '.' in identifiers:
>> https://docs.oracle.com/cd/E37838_01/html/E61064/eqbsx.html#XALRMeoqjw
>
> So, no way to emit other letters in identifiers through some other syntax?

none that I know of.  There's certainly nothing documented.

>> Right now, we don't have an effective-target keyword matching
>> -fdollars-in-identifiers.  There are only 3 other tests using that
>> option:
>> 
>>   gcc.dg/cpp/lexident.c		preprocess only
>>   gcc.dg/ucnid-5-utf8.c		skipped on avr*-*, powerpc-ibm-aix* (no $)
>>   				skipped if ! ucn
>>   gcc.dg/ucnid-5.c		skipped on avr*-*, powerpc-ibm-aix* (no $)
>> 
>> So we might just xfail the new ones on *86*-*-solaris2* && !gas, I guess.
>
> Or even dg-skip-if or { target { ! *86*-*-solaris2* || gas } }, I guess it
> really depends on whether there is any hope it might work there.

Unlikely: there's barely any development on the Solaris assemblers these
days, though I prefer to xfail tests if possible to get notified if they
suddenly start to work for some reason.

	Rainer
Jakub Jelinek Jan. 23, 2020, 8:21 p.m. UTC | #5
On Thu, Jan 23, 2020 at 09:19:52PM +0100, Rainer Orth wrote:
> Unlikely: there's barely any development on the Solaris assemblers these
> days, though I prefer to xfail tests if possible to get notified if they
> suddenly start to work for some reason.

Ok.

	Jakub
Rainer Orth Jan. 26, 2020, 8:43 p.m. UTC | #6
Hi Jakub,

> On Thu, Jan 23, 2020 at 09:19:52PM +0100, Rainer Orth wrote:
>> Unlikely: there's barely any development on the Solaris assemblers these
>> days, though I prefer to xfail tests if possible to get notified if they
>> suddenly start to work for some reason.
>
> Ok.

here's what I've installed on master and the gcc-9 branch after testing
on i386-pc-solaris2.11 with as and gas and x86_64-pc-linux-gnu.

	Rainer
diff mbox series

Patch

--- gcc/output.h.jj	2020-01-12 11:54:36.692409199 +0100
+++ gcc/output.h	2020-01-17 09:51:27.606873017 +0100
@@ -237,6 +237,12 @@  extern void assemble_label (FILE *, cons
    addition of an underscore).  */
 extern void assemble_name_raw (FILE *, const char *);
 
+/* Return NAME that should actually be emitted, looking through
+   transparent aliases.  If NAME refers to an entity that is also
+   represented as a tree (like a function or variable), mark the entity
+   as referenced.  */
+extern const char *assemble_name_resolve (const char *);
+
 /* Like assemble_name_raw, but should be used when NAME might refer to
    an entity that is also represented as a tree (like a function or
    variable).  If NAME does refer to such an entity, that entity will
--- gcc/varasm.c.jj	2020-01-12 11:54:38.533381424 +0100
+++ gcc/varasm.c	2020-01-17 09:53:14.430239885 +0100
@@ -2589,20 +2589,16 @@  assemble_name_raw (FILE *file, const cha
     ASM_OUTPUT_LABELREF (file, name);
 }
 
-/* Like assemble_name_raw, but should be used when NAME might refer to
-   an entity that is also represented as a tree (like a function or
-   variable).  If NAME does refer to such an entity, that entity will
-   be marked as referenced.  */
-
-void
-assemble_name (FILE *file, const char *name)
+/* Return NAME that should actually be emitted, looking through
+   transparent aliases.  If NAME refers to an entity that is also
+   represented as a tree (like a function or variable), mark the entity
+   as referenced.  */
+const char *
+assemble_name_resolve (const char *name)
 {
-  const char *real_name;
-  tree id;
+  const char *real_name = targetm.strip_name_encoding (name);
+  tree id = maybe_get_identifier (real_name);
 
-  real_name = targetm.strip_name_encoding (name);
-
-  id = maybe_get_identifier (real_name);
   if (id)
     {
       tree id_orig = id;
@@ -2614,7 +2610,18 @@  assemble_name (FILE *file, const char *n
       gcc_assert (! TREE_CHAIN (id));
     }
 
-  assemble_name_raw (file, name);
+  return name;
+}
+
+/* Like assemble_name_raw, but should be used when NAME might refer to
+   an entity that is also represented as a tree (like a function or
+   variable).  If NAME does refer to such an entity, that entity will
+   be marked as referenced.  */
+
+void
+assemble_name (FILE *file, const char *name)
+{
+  assemble_name_raw (file, assemble_name_resolve (name));
 }
 
 /* Allocate SIZE bytes writable static space with a gensym name
--- gcc/config/i386/i386.h.jj	2020-01-17 09:22:47.524148012 +0100
+++ gcc/config/i386/i386.h	2020-01-17 10:21:10.822480641 +0100
@@ -2258,6 +2258,31 @@  extern int const svr4_dbx_register_map[F
 #define ASM_OUTPUT_FUNCTION_LABEL(FILE, NAME, DECL) \
   ix86_asm_output_function_label ((FILE), (NAME), (DECL))
 
+/* A C statement (sans semicolon) to output a reference to SYMBOL_REF SYM.
+   If not defined, assemble_name will be used to output the name of the
+   symbol.  This macro may be used to modify the way a symbol is referenced
+   depending on information encoded by TARGET_ENCODE_SECTION_INFO.  */
+
+#ifndef ASM_OUTPUT_SYMBOL_REF
+#define ASM_OUTPUT_SYMBOL_REF(FILE, SYM) \
+  do {							\
+    const char *name					\
+      = assemble_name_resolve (XSTR (x, 0));		\
+    /* In -masm=att wrap identifiers that start with $	\
+       into parens.  */					\
+    if (name[0] == '$'					\
+	&& user_label_prefix[0] == '\0'			\
+	&& ASSEMBLER_DIALECT == ASM_ATT)		\
+      {							\
+	fputc ('(', (FILE));				\
+	assemble_name_raw ((FILE), name);		\
+	fputc (')', (FILE));				\
+      }							\
+    else						\
+      assemble_name_raw ((FILE), name);			\
+  } while (0)
+#endif
+
 /* Under some conditions we need jump tables in the text section,
    because the assembler cannot handle label differences between
    sections.  This is the case for x86_64 on Mach-O for example.  */
--- gcc/testsuite/gcc.target/i386/pr91298-1.c.jj	2020-01-17 10:45:24.172222204 +0100
+++ gcc/testsuite/gcc.target/i386/pr91298-1.c	2020-01-17 10:44:46.388800409 +0100
@@ -0,0 +1,14 @@ 
+/* PR target/91298 */
+/* { dg-do assemble } */
+/* { dg-options "-O2 -g -fdollars-in-identifiers" } */
+
+int $a[18];
+int *foo (void) { return &$a[0]; }
+int *bar (int x) { return &$a[x]; }
+int baz (void) { return $a[0]; }
+int qux (void) { return $a[4]; }
+int $quux (void) { return 1; }
+int corge (void) { return $quux (); }
+int grault (void) { return $quux () + 1; }
+typedef int (*fn) (void);
+fn foobar (void) { return $quux; }
--- gcc/testsuite/gcc.target/i386/pr91298-2.c.jj	2020-01-17 10:45:30.834120262 +0100
+++ gcc/testsuite/gcc.target/i386/pr91298-2.c	2020-01-17 10:45:57.460712790 +0100
@@ -0,0 +1,5 @@ 
+/* PR target/91298 */
+/* { dg-do assemble { target fpic } } */
+/* { dg-options "-O2 -g -fdollars-in-identifiers -fpic" } */
+
+#include "pr91298-1.c"