diff mbox series

[committed] d: Fix gdc -O2 -mavx generates misaligned vmovdqa instruction [PR114171]

Message ID 20240303014837.808610-1-ibuclaw@gdcproject.org
State New
Headers show
Series [committed] d: Fix gdc -O2 -mavx generates misaligned vmovdqa instruction [PR114171] | expand

Commit Message

Iain Buclaw March 3, 2024, 1:48 a.m. UTC
Hi,

This patch fixes a wrong code issue in the D front-end where lowered
struct comparisons would reinterpret fields with a different (usually
bigger) alignment than the original.  Use `build_aligned_type' to
preserve the alignment when casting away for such comparisons.

Bootstrapped and regression tested on x86_64-linux-gnu/-m32, committed
to mainline, and backported to releases/gcc-13, releases/gcc-12, and
releases/gcc-11.

Regards,
Iain.
---
	PR d/114171

gcc/d/ChangeLog:

	* d-codegen.cc (lower_struct_comparison): Keep alignment of original
	type in reinterpret cast for comparison.

gcc/testsuite/ChangeLog:

	* gdc.dg/torture/pr114171.d: New test.
---
 gcc/d/d-codegen.cc                      |  1 +
 gcc/testsuite/gdc.dg/torture/pr114171.d | 29 +++++++++++++++++++++++++
 2 files changed, 30 insertions(+)
 create mode 100644 gcc/testsuite/gdc.dg/torture/pr114171.d

Comments

Richard Biener March 3, 2024, 10:41 a.m. UTC | #1
> Am 03.03.2024 um 02:51 schrieb Iain Buclaw <ibuclaw@gdcproject.org>:
> 
> Hi,
> 
> This patch fixes a wrong code issue in the D front-end where lowered
> struct comparisons would reinterpret fields with a different (usually
> bigger) alignment than the original.  Use `build_aligned_type' to
> preserve the alignment when casting away for such comparisons.
> 
> Bootstrapped and regression tested on x86_64-linux-gnu/-m32, committed
> to mainline, and backported to releases/gcc-13, releases/gcc-12, and
> releases/gcc-11.

LGTM.  You might want to experiment with not doing the premature optimization but instead use __builtin_memcmp_eq (assuming that’s a general good fit).  The middle-end should better turn that into target optimal code.

Richard 

> Regards,
> Iain.
> ---
>    PR d/114171
> 
> gcc/d/ChangeLog:
> 
>    * d-codegen.cc (lower_struct_comparison): Keep alignment of original
>    type in reinterpret cast for comparison.
> 
> gcc/testsuite/ChangeLog:
> 
>    * gdc.dg/torture/pr114171.d: New test.
> ---
> gcc/d/d-codegen.cc                      |  1 +
> gcc/testsuite/gdc.dg/torture/pr114171.d | 29 +++++++++++++++++++++++++
> 2 files changed, 30 insertions(+)
> create mode 100644 gcc/testsuite/gdc.dg/torture/pr114171.d
> 
> diff --git a/gcc/d/d-codegen.cc b/gcc/d/d-codegen.cc
> index 5bc233928aa..43d7739f8fc 100644
> --- a/gcc/d/d-codegen.cc
> +++ b/gcc/d/d-codegen.cc
> @@ -1006,6 +1006,7 @@ lower_struct_comparison (tree_code code, StructDeclaration *sd,
>          if (tmode == NULL_TREE)
>        tmode = make_unsigned_type (GET_MODE_BITSIZE (mode.require ()));
> 
> +          tmode = build_aligned_type (tmode, TYPE_ALIGN (stype));
>          t1ref = build_vconvert (tmode, t1ref);
>          t2ref = build_vconvert (tmode, t2ref);
> 
> diff --git a/gcc/testsuite/gdc.dg/torture/pr114171.d b/gcc/testsuite/gdc.dg/torture/pr114171.d
> new file mode 100644
> index 00000000000..0f9ffcab916
> --- /dev/null
> +++ b/gcc/testsuite/gdc.dg/torture/pr114171.d
> @@ -0,0 +1,29 @@
> +// { dg-do run }
> +// { dg-additional-options "-mavx" { target avx_runtime } }
> +// { dg-skip-if "needs gcc/config.d" { ! d_runtime } }
> +import gcc.builtins;
> +
> +struct S1
> +{
> +    string label;
> +}
> +
> +struct S2
> +{
> +    ulong pad;
> +    S1 label;
> +}
> +
> +pragma(inline, false)
> +auto newitem()
> +{
> +    void *p = __builtin_malloc(S2.sizeof);
> +    __builtin_memset(p, 0, S2.sizeof);
> +    return cast(S2*) p;
> +}
> +
> +int main()
> +{
> +    auto bn = newitem();
> +    return bn.label is S1.init ? 0 : 1;
> +}
> --
> 2.40.1
>
Iain Buclaw March 3, 2024, 2:23 p.m. UTC | #2
Excerpts from Richard Biener's message of März 3, 2024 11:41 am:
> 
> 
>> Am 03.03.2024 um 02:51 schrieb Iain Buclaw <ibuclaw@gdcproject.org>:
>> 
>> Hi,
>> 
>> This patch fixes a wrong code issue in the D front-end where lowered
>> struct comparisons would reinterpret fields with a different (usually
>> bigger) alignment than the original.  Use `build_aligned_type' to
>> preserve the alignment when casting away for such comparisons.
>> 
>> Bootstrapped and regression tested on x86_64-linux-gnu/-m32, committed
>> to mainline, and backported to releases/gcc-13, releases/gcc-12, and
>> releases/gcc-11.
> 
> LGTM.  You might want to experiment with not doing the premature optimization but instead use __builtin_memcmp_eq (assuming that’s a general good fit).  The middle-end should better turn that into target optimal code.
> 

Indeed, just looking at the history, it was introduced well over ten
years ago so I can't comment on the original context for it (it doesn't
directly fix any old issues).

Small comparison between this optimization and memcmp_eq
---
  _5 = newitem ();
  # DEBUG bn => _5
  _1 = MEM[(ucent *)_5 + 8B];
  _2 = _1 != 0;
  _6 = (int) _2;
  return _6;
---
        call    _D8pr1141717newitemFNbNiZPSQz2S2
.LVL29:
        vmovdqu 8(%rax), %xmm0
        xorl    %eax, %eax
.LVL30:
        vptest  %xmm0, %xmm0
        setne   %al
        addq    $8, %rsp
        ret


---
  _6 = newitem ();
  # DEBUG bn => _6
  D.2335.length = 0;
  D.2335.ptr = 0B;
  _1 = &_6->label.label;
  _2 = __builtin_memcmp_eq (_1, &D.2335, 16);
  _3 = _2 != 0;
  _9 = (int) _3;
  return _9;
---
        call    _D8pr1141717newitemFNbNiZPSQz2S2
.LVL29:
        movq    $0, (%rsp)
        movq    $0, 8(%rsp)
        vmovdqu 8(%rax), %xmm0
        xorl    %eax, %eax
.LVL30:
        vpxor   (%rsp), %xmm0, %xmm0
        vptest  %xmm0, %xmm0
        setne   %al
        addq    $24, %rsp
        ret
Andrew Pinski March 3, 2024, 10:49 p.m. UTC | #3
On Sat, Mar 2, 2024 at 5:51 PM Iain Buclaw <ibuclaw@gdcproject.org> wrote:
>
> Hi,
>
> This patch fixes a wrong code issue in the D front-end where lowered
> struct comparisons would reinterpret fields with a different (usually
> bigger) alignment than the original.  Use `build_aligned_type' to
> preserve the alignment when casting away for such comparisons.
>
> Bootstrapped and regression tested on x86_64-linux-gnu/-m32, committed
> to mainline, and backported to releases/gcc-13, releases/gcc-12, and
> releases/gcc-11.
>
> Regards,
> Iain.
> ---
>         PR d/114171
>
> gcc/d/ChangeLog:
>
>         * d-codegen.cc (lower_struct_comparison): Keep alignment of original
>         type in reinterpret cast for comparison.
>
> gcc/testsuite/ChangeLog:
>
>         * gdc.dg/torture/pr114171.d: New test.
> ---
>  gcc/d/d-codegen.cc                      |  1 +
>  gcc/testsuite/gdc.dg/torture/pr114171.d | 29 +++++++++++++++++++++++++
>  2 files changed, 30 insertions(+)
>  create mode 100644 gcc/testsuite/gdc.dg/torture/pr114171.d
>
> diff --git a/gcc/d/d-codegen.cc b/gcc/d/d-codegen.cc
> index 5bc233928aa..43d7739f8fc 100644
> --- a/gcc/d/d-codegen.cc
> +++ b/gcc/d/d-codegen.cc
> @@ -1006,6 +1006,7 @@ lower_struct_comparison (tree_code code, StructDeclaration *sd,
>               if (tmode == NULL_TREE)
>                 tmode = make_unsigned_type (GET_MODE_BITSIZE (mode.require ()));
>
> +             tmode = build_aligned_type (tmode, TYPE_ALIGN (stype));

You might also need to build a may_alias variant too. Or make sure the
access is using the correct aliasing set.
I have not checked if the D front-end does anything special for
aliasing sets so I am not sure if that is needed or not but I suspect
it is.

Thanks,
Andrew Pinski


>               t1ref = build_vconvert (tmode, t1ref);
>               t2ref = build_vconvert (tmode, t2ref);
>
> diff --git a/gcc/testsuite/gdc.dg/torture/pr114171.d b/gcc/testsuite/gdc.dg/torture/pr114171.d
> new file mode 100644
> index 00000000000..0f9ffcab916
> --- /dev/null
> +++ b/gcc/testsuite/gdc.dg/torture/pr114171.d
> @@ -0,0 +1,29 @@
> +// { dg-do run }
> +// { dg-additional-options "-mavx" { target avx_runtime } }
> +// { dg-skip-if "needs gcc/config.d" { ! d_runtime } }
> +import gcc.builtins;
> +
> +struct S1
> +{
> +    string label;
> +}
> +
> +struct S2
> +{
> +    ulong pad;
> +    S1 label;
> +}
> +
> +pragma(inline, false)
> +auto newitem()
> +{
> +    void *p = __builtin_malloc(S2.sizeof);
> +    __builtin_memset(p, 0, S2.sizeof);
> +    return cast(S2*) p;
> +}
> +
> +int main()
> +{
> +    auto bn = newitem();
> +    return bn.label is S1.init ? 0 : 1;
> +}
> --
> 2.40.1
>
Iain Buclaw March 3, 2024, 11:09 p.m. UTC | #4
Excerpts from Andrew Pinski's message of März 3, 2024 11:49 pm:
> On Sat, Mar 2, 2024 at 5:51 PM Iain Buclaw <ibuclaw@gdcproject.org> wrote:
>>
>> Hi,
>>
>> This patch fixes a wrong code issue in the D front-end where lowered
>> struct comparisons would reinterpret fields with a different (usually
>> bigger) alignment than the original.  Use `build_aligned_type' to
>> preserve the alignment when casting away for such comparisons.
>>
>> Bootstrapped and regression tested on x86_64-linux-gnu/-m32, committed
>> to mainline, and backported to releases/gcc-13, releases/gcc-12, and
>> releases/gcc-11.
>>
>> Regards,
>> Iain.
>> ---
>>         PR d/114171
>>
>> gcc/d/ChangeLog:
>>
>>         * d-codegen.cc (lower_struct_comparison): Keep alignment of original
>>         type in reinterpret cast for comparison.
>>
>> gcc/testsuite/ChangeLog:
>>
>>         * gdc.dg/torture/pr114171.d: New test.
>> ---
>>  gcc/d/d-codegen.cc                      |  1 +
>>  gcc/testsuite/gdc.dg/torture/pr114171.d | 29 +++++++++++++++++++++++++
>>  2 files changed, 30 insertions(+)
>>  create mode 100644 gcc/testsuite/gdc.dg/torture/pr114171.d
>>
>> diff --git a/gcc/d/d-codegen.cc b/gcc/d/d-codegen.cc
>> index 5bc233928aa..43d7739f8fc 100644
>> --- a/gcc/d/d-codegen.cc
>> +++ b/gcc/d/d-codegen.cc
>> @@ -1006,6 +1006,7 @@ lower_struct_comparison (tree_code code, StructDeclaration *sd,
>>               if (tmode == NULL_TREE)
>>                 tmode = make_unsigned_type (GET_MODE_BITSIZE (mode.require ()));
>>
>> +             tmode = build_aligned_type (tmode, TYPE_ALIGN (stype));
> 
> You might also need to build a may_alias variant too. Or make sure the
> access is using the correct aliasing set.
> I have not checked if the D front-end does anything special for
> aliasing sets so I am not sure if that is needed or not but I suspect
> it is.
> 

There are no alias sets defined in the D front-end - the reference
compiler doesn't enforce it, which has allowed enough code out there
to expect modifying bits (eg: of a float) through a reinterpret cast
(such as an int*) to just work.

Thanks for the reminder though.
Iain.
diff mbox series

Patch

diff --git a/gcc/d/d-codegen.cc b/gcc/d/d-codegen.cc
index 5bc233928aa..43d7739f8fc 100644
--- a/gcc/d/d-codegen.cc
+++ b/gcc/d/d-codegen.cc
@@ -1006,6 +1006,7 @@  lower_struct_comparison (tree_code code, StructDeclaration *sd,
 	      if (tmode == NULL_TREE)
 		tmode = make_unsigned_type (GET_MODE_BITSIZE (mode.require ()));
 
+	      tmode = build_aligned_type (tmode, TYPE_ALIGN (stype));
 	      t1ref = build_vconvert (tmode, t1ref);
 	      t2ref = build_vconvert (tmode, t2ref);
 
diff --git a/gcc/testsuite/gdc.dg/torture/pr114171.d b/gcc/testsuite/gdc.dg/torture/pr114171.d
new file mode 100644
index 00000000000..0f9ffcab916
--- /dev/null
+++ b/gcc/testsuite/gdc.dg/torture/pr114171.d
@@ -0,0 +1,29 @@ 
+// { dg-do run }
+// { dg-additional-options "-mavx" { target avx_runtime } }
+// { dg-skip-if "needs gcc/config.d" { ! d_runtime } }
+import gcc.builtins;
+
+struct S1
+{
+    string label;
+}
+
+struct S2
+{
+    ulong pad;
+    S1 label;
+}
+
+pragma(inline, false)
+auto newitem()
+{
+    void *p = __builtin_malloc(S2.sizeof);
+    __builtin_memset(p, 0, S2.sizeof);
+    return cast(S2*) p;
+}
+
+int main()
+{
+    auto bn = newitem();
+    return bn.label is S1.init ? 0 : 1;
+}