Message ID | 20200118123021.GU10088@tucnak |
---|---|
State | New |
Headers | show |
Series | i386: Fix up -fdollars-in-identifiers with identifiers starting with $ in -masm=att (PR target/91298) | expand |
On Sat, Jan 18, 2020 at 1:30 PM Jakub Jelinek <jakub@redhat.com> wrote: > > Hi! > > In AT&T syntax leading $ is special, so if we have identifiers that start > with dollar, we usually fail to assemble it (or assemble incorrectly). > As mentioned in the PR, what works is wrapping the identifiers inside of > parens, like: > movl $($a), %eax > leaq ($a)(,%rdi,4), %rax > movl ($a)(%rip), %eax > movl ($a)+16(%rip), %eax > .globl $a > .type $a, @object > .size $a, 72 > $a: > .string "$a" > .quad ($a) > (this is x86_64 -fno-pic -O2). In some places ($a) is not accepted, > like as .globl operand, in .type, .size, so the patch overrides > ASM_OUTPUT_SYMBOL_REF rather than e.g. ASM_OUTPUT_LABELREF. > I didn't want to duplicate what assemble_name is doing (following > transparent aliases), so split assemble_name into two parts; just > mere looking at the first character of a name before calling assemble_name > wouldn't be good enough, a transparent alias could lead from a name > not starting with $ to one starting with it and vice versa. > > Bootstrapped/regtested on x86_64-linux and i686-linux, ok for trunk? > > 2020-01-18 Jakub Jelinek <jakub@redhat.com> > > PR target/91298 > * output.h (assemble_name_resolve): Declare. > * varasm.c (assemble_name_resolve): New function. > (assemble_name): Use it. > * config/i386/i386.h (ASM_OUTPUT_SYMBOL_REF): Define. > > * gcc.target/i386/pr91298-1.c: New test. > * gcc.target/i386/pr91298-2.c: New test. x86 part is OK, with a nit below. Thanks, Uros. > --- gcc/output.h.jj 2020-01-12 11:54:36.692409199 +0100 > +++ gcc/output.h 2020-01-17 09:51:27.606873017 +0100 > @@ -237,6 +237,12 @@ extern void assemble_label (FILE *, cons > addition of an underscore). */ > extern void assemble_name_raw (FILE *, const char *); > > +/* Return NAME that should actually be emitted, looking through > + transparent aliases. If NAME refers to an entity that is also > + represented as a tree (like a function or variable), mark the entity > + as referenced. */ > +extern const char *assemble_name_resolve (const char *); > + > /* Like assemble_name_raw, but should be used when NAME might refer to > an entity that is also represented as a tree (like a function or > variable). If NAME does refer to such an entity, that entity will > --- gcc/varasm.c.jj 2020-01-12 11:54:38.533381424 +0100 > +++ gcc/varasm.c 2020-01-17 09:53:14.430239885 +0100 > @@ -2589,20 +2589,16 @@ assemble_name_raw (FILE *file, const cha > ASM_OUTPUT_LABELREF (file, name); > } > > -/* Like assemble_name_raw, but should be used when NAME might refer to > - an entity that is also represented as a tree (like a function or > - variable). If NAME does refer to such an entity, that entity will > - be marked as referenced. */ > - > -void > -assemble_name (FILE *file, const char *name) > +/* Return NAME that should actually be emitted, looking through > + transparent aliases. If NAME refers to an entity that is also > + represented as a tree (like a function or variable), mark the entity > + as referenced. */ > +const char * > +assemble_name_resolve (const char *name) > { > - const char *real_name; > - tree id; > + const char *real_name = targetm.strip_name_encoding (name); > + tree id = maybe_get_identifier (real_name); > > - real_name = targetm.strip_name_encoding (name); > - > - id = maybe_get_identifier (real_name); > if (id) > { > tree id_orig = id; > @@ -2614,7 +2610,18 @@ assemble_name (FILE *file, const char *n > gcc_assert (! TREE_CHAIN (id)); > } > > - assemble_name_raw (file, name); > + return name; > +} > + > +/* Like assemble_name_raw, but should be used when NAME might refer to > + an entity that is also represented as a tree (like a function or > + variable). If NAME does refer to such an entity, that entity will > + be marked as referenced. */ > + > +void > +assemble_name (FILE *file, const char *name) > +{ > + assemble_name_raw (file, assemble_name_resolve (name)); > } > > /* Allocate SIZE bytes writable static space with a gensym name > --- gcc/config/i386/i386.h.jj 2020-01-17 09:22:47.524148012 +0100 > +++ gcc/config/i386/i386.h 2020-01-17 10:21:10.822480641 +0100 > @@ -2258,6 +2258,31 @@ extern int const svr4_dbx_register_map[F > #define ASM_OUTPUT_FUNCTION_LABEL(FILE, NAME, DECL) \ > ix86_asm_output_function_label ((FILE), (NAME), (DECL)) > > +/* A C statement (sans semicolon) to output a reference to SYMBOL_REF SYM. > + If not defined, assemble_name will be used to output the name of the > + symbol. This macro may be used to modify the way a symbol is referenced > + depending on information encoded by TARGET_ENCODE_SECTION_INFO. */ > + > +#ifndef ASM_OUTPUT_SYMBOL_REF > +#define ASM_OUTPUT_SYMBOL_REF(FILE, SYM) \ > + do { \ > + const char *name \ > + = assemble_name_resolve (XSTR (x, 0)); \ > + /* In -masm=att wrap identifiers that start with $ \ > + into parens. */ \ > + if (name[0] == '$' \ > + && user_label_prefix[0] == '\0' \ > + && ASSEMBLER_DIALECT == ASM_ATT) \ I'd putthe check for assembler dialect first, it looks more descriptive. > + { \ > + fputc ('(', (FILE)); \ > + assemble_name_raw ((FILE), name); \ > + fputc (')', (FILE)); \ > + } \ > + else \ > + assemble_name_raw ((FILE), name); \ > + } while (0) > +#endif > + > /* Under some conditions we need jump tables in the text section, > because the assembler cannot handle label differences between > sections. This is the case for x86_64 on Mach-O for example. */ > --- gcc/testsuite/gcc.target/i386/pr91298-1.c.jj 2020-01-17 10:45:24.172222204 +0100 > +++ gcc/testsuite/gcc.target/i386/pr91298-1.c 2020-01-17 10:44:46.388800409 +0100 > @@ -0,0 +1,14 @@ > +/* PR target/91298 */ > +/* { dg-do assemble } */ > +/* { dg-options "-O2 -g -fdollars-in-identifiers" } */ > + > +int $a[18]; > +int *foo (void) { return &$a[0]; } > +int *bar (int x) { return &$a[x]; } > +int baz (void) { return $a[0]; } > +int qux (void) { return $a[4]; } > +int $quux (void) { return 1; } > +int corge (void) { return $quux (); } > +int grault (void) { return $quux () + 1; } > +typedef int (*fn) (void); > +fn foobar (void) { return $quux; } > --- gcc/testsuite/gcc.target/i386/pr91298-2.c.jj 2020-01-17 10:45:30.834120262 +0100 > +++ gcc/testsuite/gcc.target/i386/pr91298-2.c 2020-01-17 10:45:57.460712790 +0100 > @@ -0,0 +1,5 @@ > +/* PR target/91298 */ > +/* { dg-do assemble { target fpic } } */ > +/* { dg-options "-O2 -g -fdollars-in-identifiers -fpic" } */ > + > +#include "pr91298-1.c" > > Jakub >
Hi Jakub, > In AT&T syntax leading $ is special, so if we have identifiers that start > with dollar, we usually fail to assemble it (or assemble incorrectly). > As mentioned in the PR, what works is wrapping the identifiers inside of > parens, like: > movl $($a), %eax > leaq ($a)(,%rdi,4), %rax > movl ($a)(%rip), %eax > movl ($a)+16(%rip), %eax > .globl $a > .type $a, @object > .size $a, 72 > $a: > .string "$a" > .quad ($a) > (this is x86_64 -fno-pic -O2). In some places ($a) is not accepted, > like as .globl operand, in .type, .size, so the patch overrides > ASM_OUTPUT_SYMBOL_REF rather than e.g. ASM_OUTPUT_LABELREF. > I didn't want to duplicate what assemble_name is doing (following > transparent aliases), so split assemble_name into two parts; just > mere looking at the first character of a name before calling assemble_name > wouldn't be good enough, a transparent alias could lead from a name > not starting with $ to one starting with it and vice versa. > > Bootstrapped/regtested on x86_64-linux and i686-linux, ok for trunk? the new testcases FAIL on Solaris/x86 with the native assembler: +FAIL: gcc.target/i386/pr91298-1.c (test for excess errors) Excess errors: Assembler: pr91298-1.c "/var/tmp//ccE6r3xb.s", line 5 : Syntax error Near line: " .globl $quux" "/var/tmp//ccE6r3xb.s", line 6 : Syntax error Near line: " .type $quux, @function" "/var/tmp//ccE6r3xb.s", line 7 : Syntax error Near line: "$quux:" "/var/tmp//ccE6r3xb.s", line 15 : Syntax error Near line: " .size $quux, .-$quux" "/var/tmp//ccE6r3xb.s", line 24 : Syntax error Near line: " movl $($a), %eax" "/var/tmp//ccE6r3xb.s", line 38 : Syntax error Near line: " leal ($a)(,%eax,4), %eax" "/var/tmp//ccE6r3xb.s", line 51 : Syntax error Near line: " movl ($a), %eax" "/var/tmp//ccE6r3xb.s", line 63 : Syntax error Near line: " movl ($a)+16, %eax" "/var/tmp//ccE6r3xb.s", line 97 : Syntax error Near line: " movl $($quux), %eax" "/var/tmp//ccE6r3xb.s", line 101 : Syntax error Near line: " .globl $a" "/var/tmp//ccE6r3xb.s", line 104 : Syntax error Near line: " .type $a, @object" "/var/tmp//ccE6r3xb.s", line 105 : Syntax error Near line: " .size $a, 72" "/var/tmp//ccE6r3xb.s", line 106 : Syntax error Near line: "$a:" "/var/tmp//ccE6r3xb.s", line 228 : Syntax error Near line: " .long ($a)" +FAIL: gcc.target/i386/pr91298-2.c (test for excess errors) It only allows letters, digits, '_' and '.' in identifiers: https://docs.oracle.com/cd/E37838_01/html/E61064/eqbsx.html#XALRMeoqjw Right now, we don't have an effective-target keyword matching -fdollars-in-identifiers. There are only 3 other tests using that option: gcc.dg/cpp/lexident.c preprocess only gcc.dg/ucnid-5-utf8.c skipped on avr*-*, powerpc-ibm-aix* (no $) skipped if ! ucn gcc.dg/ucnid-5.c skipped on avr*-*, powerpc-ibm-aix* (no $) So we might just xfail the new ones on *86*-*-solaris2* && !gas, I guess. Rainer
On Thu, Jan 23, 2020 at 09:12:27PM +0100, Rainer Orth wrote: > +FAIL: gcc.target/i386/pr91298-2.c (test for excess errors) > > It only allows letters, digits, '_' and '.' in identifiers: > https://docs.oracle.com/cd/E37838_01/html/E61064/eqbsx.html#XALRMeoqjw So, no way to emit other letters in identifiers through some other syntax? > Right now, we don't have an effective-target keyword matching > -fdollars-in-identifiers. There are only 3 other tests using that > option: > > gcc.dg/cpp/lexident.c preprocess only > gcc.dg/ucnid-5-utf8.c skipped on avr*-*, powerpc-ibm-aix* (no $) > skipped if ! ucn > gcc.dg/ucnid-5.c skipped on avr*-*, powerpc-ibm-aix* (no $) > > So we might just xfail the new ones on *86*-*-solaris2* && !gas, I guess. Or even dg-skip-if or { target { ! *86*-*-solaris2* || gas } }, I guess it really depends on whether there is any hope it might work there. Jakub
Hi Jakub, > On Thu, Jan 23, 2020 at 09:12:27PM +0100, Rainer Orth wrote: >> +FAIL: gcc.target/i386/pr91298-2.c (test for excess errors) >> >> It only allows letters, digits, '_' and '.' in identifiers: >> https://docs.oracle.com/cd/E37838_01/html/E61064/eqbsx.html#XALRMeoqjw > > So, no way to emit other letters in identifiers through some other syntax? none that I know of. There's certainly nothing documented. >> Right now, we don't have an effective-target keyword matching >> -fdollars-in-identifiers. There are only 3 other tests using that >> option: >> >> gcc.dg/cpp/lexident.c preprocess only >> gcc.dg/ucnid-5-utf8.c skipped on avr*-*, powerpc-ibm-aix* (no $) >> skipped if ! ucn >> gcc.dg/ucnid-5.c skipped on avr*-*, powerpc-ibm-aix* (no $) >> >> So we might just xfail the new ones on *86*-*-solaris2* && !gas, I guess. > > Or even dg-skip-if or { target { ! *86*-*-solaris2* || gas } }, I guess it > really depends on whether there is any hope it might work there. Unlikely: there's barely any development on the Solaris assemblers these days, though I prefer to xfail tests if possible to get notified if they suddenly start to work for some reason. Rainer
On Thu, Jan 23, 2020 at 09:19:52PM +0100, Rainer Orth wrote: > Unlikely: there's barely any development on the Solaris assemblers these > days, though I prefer to xfail tests if possible to get notified if they > suddenly start to work for some reason. Ok. Jakub
Hi Jakub, > On Thu, Jan 23, 2020 at 09:19:52PM +0100, Rainer Orth wrote: >> Unlikely: there's barely any development on the Solaris assemblers these >> days, though I prefer to xfail tests if possible to get notified if they >> suddenly start to work for some reason. > > Ok. here's what I've installed on master and the gcc-9 branch after testing on i386-pc-solaris2.11 with as and gas and x86_64-pc-linux-gnu. Rainer
--- gcc/output.h.jj 2020-01-12 11:54:36.692409199 +0100 +++ gcc/output.h 2020-01-17 09:51:27.606873017 +0100 @@ -237,6 +237,12 @@ extern void assemble_label (FILE *, cons addition of an underscore). */ extern void assemble_name_raw (FILE *, const char *); +/* Return NAME that should actually be emitted, looking through + transparent aliases. If NAME refers to an entity that is also + represented as a tree (like a function or variable), mark the entity + as referenced. */ +extern const char *assemble_name_resolve (const char *); + /* Like assemble_name_raw, but should be used when NAME might refer to an entity that is also represented as a tree (like a function or variable). If NAME does refer to such an entity, that entity will --- gcc/varasm.c.jj 2020-01-12 11:54:38.533381424 +0100 +++ gcc/varasm.c 2020-01-17 09:53:14.430239885 +0100 @@ -2589,20 +2589,16 @@ assemble_name_raw (FILE *file, const cha ASM_OUTPUT_LABELREF (file, name); } -/* Like assemble_name_raw, but should be used when NAME might refer to - an entity that is also represented as a tree (like a function or - variable). If NAME does refer to such an entity, that entity will - be marked as referenced. */ - -void -assemble_name (FILE *file, const char *name) +/* Return NAME that should actually be emitted, looking through + transparent aliases. If NAME refers to an entity that is also + represented as a tree (like a function or variable), mark the entity + as referenced. */ +const char * +assemble_name_resolve (const char *name) { - const char *real_name; - tree id; + const char *real_name = targetm.strip_name_encoding (name); + tree id = maybe_get_identifier (real_name); - real_name = targetm.strip_name_encoding (name); - - id = maybe_get_identifier (real_name); if (id) { tree id_orig = id; @@ -2614,7 +2610,18 @@ assemble_name (FILE *file, const char *n gcc_assert (! TREE_CHAIN (id)); } - assemble_name_raw (file, name); + return name; +} + +/* Like assemble_name_raw, but should be used when NAME might refer to + an entity that is also represented as a tree (like a function or + variable). If NAME does refer to such an entity, that entity will + be marked as referenced. */ + +void +assemble_name (FILE *file, const char *name) +{ + assemble_name_raw (file, assemble_name_resolve (name)); } /* Allocate SIZE bytes writable static space with a gensym name --- gcc/config/i386/i386.h.jj 2020-01-17 09:22:47.524148012 +0100 +++ gcc/config/i386/i386.h 2020-01-17 10:21:10.822480641 +0100 @@ -2258,6 +2258,31 @@ extern int const svr4_dbx_register_map[F #define ASM_OUTPUT_FUNCTION_LABEL(FILE, NAME, DECL) \ ix86_asm_output_function_label ((FILE), (NAME), (DECL)) +/* A C statement (sans semicolon) to output a reference to SYMBOL_REF SYM. + If not defined, assemble_name will be used to output the name of the + symbol. This macro may be used to modify the way a symbol is referenced + depending on information encoded by TARGET_ENCODE_SECTION_INFO. */ + +#ifndef ASM_OUTPUT_SYMBOL_REF +#define ASM_OUTPUT_SYMBOL_REF(FILE, SYM) \ + do { \ + const char *name \ + = assemble_name_resolve (XSTR (x, 0)); \ + /* In -masm=att wrap identifiers that start with $ \ + into parens. */ \ + if (name[0] == '$' \ + && user_label_prefix[0] == '\0' \ + && ASSEMBLER_DIALECT == ASM_ATT) \ + { \ + fputc ('(', (FILE)); \ + assemble_name_raw ((FILE), name); \ + fputc (')', (FILE)); \ + } \ + else \ + assemble_name_raw ((FILE), name); \ + } while (0) +#endif + /* Under some conditions we need jump tables in the text section, because the assembler cannot handle label differences between sections. This is the case for x86_64 on Mach-O for example. */ --- gcc/testsuite/gcc.target/i386/pr91298-1.c.jj 2020-01-17 10:45:24.172222204 +0100 +++ gcc/testsuite/gcc.target/i386/pr91298-1.c 2020-01-17 10:44:46.388800409 +0100 @@ -0,0 +1,14 @@ +/* PR target/91298 */ +/* { dg-do assemble } */ +/* { dg-options "-O2 -g -fdollars-in-identifiers" } */ + +int $a[18]; +int *foo (void) { return &$a[0]; } +int *bar (int x) { return &$a[x]; } +int baz (void) { return $a[0]; } +int qux (void) { return $a[4]; } +int $quux (void) { return 1; } +int corge (void) { return $quux (); } +int grault (void) { return $quux () + 1; } +typedef int (*fn) (void); +fn foobar (void) { return $quux; } --- gcc/testsuite/gcc.target/i386/pr91298-2.c.jj 2020-01-17 10:45:30.834120262 +0100 +++ gcc/testsuite/gcc.target/i386/pr91298-2.c 2020-01-17 10:45:57.460712790 +0100 @@ -0,0 +1,5 @@ +/* PR target/91298 */ +/* { dg-do assemble { target fpic } } */ +/* { dg-options "-O2 -g -fdollars-in-identifiers -fpic" } */ + +#include "pr91298-1.c"