diff mbox series

Fix #27777 - now use a doubly-linked list for _IO_list_all

Message ID 20240426141847.2458829-1-alexandre.ferrieux@orange.com
State New
Headers show
Series Fix #27777 - now use a doubly-linked list for _IO_list_all | expand

Commit Message

alexandre.ferrieux@orange.com April 26, 2024, 2:18 p.m. UTC
From: Alexandre Ferrieux <alexandre.ferrieux@orange.com>

Hi,

This patch fixes #27777 "fclose does a linear search, takes ages when many FILE* are opened".
Simply put, the master list of opened (FILE*), namely _IO_list_all, is a singly-linked list.
As a consequence, the removal of a single element is in O(N), which cripples the performance
of fopen(). The patch switches to a doubly-linked list, yielding O(1) removal.

As an illustration, after opening 1 million (FILE*), the fclose() of 100 of them takes more
than 6 seconds without the patch, and under a millisecond with it.

Signed-off-by: Alexandre Ferrieux <alexandre.ferrieux@orange.com>


---
 elf/libc_early_init.c          |  3 +++
 libio/bits/types/struct_FILE.h |  2 +-
 libio/genops.c                 | 19 +++++++++----------
 libio/libioP.h                 | 11 +++++++----
 libio/stdfiles.c               |  8 ++++++++
 5 files changed, 28 insertions(+), 15 deletions(-)

Comments

H.J. Lu April 26, 2024, 2:45 p.m. UTC | #1
On Fri, Apr 26, 2024 at 7:19 AM <alexandre.ferrieux@orange.com> wrote:
>
> From: Alexandre Ferrieux <alexandre.ferrieux@orange.com>
>
> Hi,
>
> This patch fixes #27777 "fclose does a linear search, takes ages when many FILE* are opened".
> Simply put, the master list of opened (FILE*), namely _IO_list_all, is a singly-linked list.
> As a consequence, the removal of a single element is in O(N), which cripples the performance
> of fopen(). The patch switches to a doubly-linked list, yielding O(1) removal.
>
> As an illustration, after opening 1 million (FILE*), the fclose() of 100 of them takes more
> than 6 seconds without the patch, and under a millisecond with it.
>
> Signed-off-by: Alexandre Ferrieux <alexandre.ferrieux@orange.com>
>
>
> ---
>  elf/libc_early_init.c          |  3 +++
>  libio/bits/types/struct_FILE.h |  2 +-
>  libio/genops.c                 | 19 +++++++++----------
>  libio/libioP.h                 | 11 +++++++----
>  libio/stdfiles.c               |  8 ++++++++
>  5 files changed, 28 insertions(+), 15 deletions(-)
>
> diff --git a/elf/libc_early_init.c b/elf/libc_early_init.c
> index 575b837f8f..756274f652 100644
> --- a/elf/libc_early_init.c
> +++ b/elf/libc_early_init.c
> @@ -23,6 +23,7 @@
>  #include <lowlevellock.h>
>  #include <pthread_early_init.h>
>  #include <sys/single_threaded.h>
> +#include <libioP.h>
>
>  #ifdef SHARED
>  _Bool __libc_initial;
> @@ -41,6 +42,8 @@ __libc_early_init (_Bool initial)
>    __libc_single_threaded_internal = __libc_initial = initial;
>  #endif
>
> +  __stdfiles_init ();
> +
>    __pthread_early_init ();
>
>  #if ENABLE_ELISION_SUPPORT
> diff --git a/libio/bits/types/struct_FILE.h b/libio/bits/types/struct_FILE.h
> index 7cdaae86f8..daebd6ce0a 100644
> --- a/libio/bits/types/struct_FILE.h
> +++ b/libio/bits/types/struct_FILE.h
> @@ -67,7 +67,7 @@ struct _IO_FILE
>
>    struct _IO_marker *_markers;
>
> -  struct _IO_FILE *_chain;
> +  struct _IO_FILE *_chain,**_prevchain;

We need to add a new symbol version for stdio functions
when FILE size is changed.

>    int _fileno;
>    int _flags2;
> diff --git a/libio/genops.c b/libio/genops.c
> index bc45e60a09..a27f65581b 100644
> --- a/libio/genops.c
> +++ b/libio/genops.c
> @@ -53,7 +53,6 @@ _IO_un_link (struct _IO_FILE_plus *fp)
>  {
>    if (fp->file._flags & _IO_LINKED)
>      {
> -      FILE **f;
>  #ifdef _IO_MTSAFE_IO
>        _IO_cleanup_region_start_noarg (flush_cleanup);
>        _IO_lock_lock (list_all_lock);
> @@ -62,15 +61,13 @@ _IO_un_link (struct _IO_FILE_plus *fp)
>  #endif
>        if (_IO_list_all == NULL)
>         ;
> -      else if (fp == _IO_list_all)
> -       _IO_list_all = (struct _IO_FILE_plus *) _IO_list_all->file._chain;
> -      else
> -       for (f = &_IO_list_all->file._chain; *f; f = &(*f)->_chain)
> -         if (*f == (FILE *) fp)
> -           {
> -             *f = fp->file._chain;
> -             break;
> -           }
> +      else {
> +       struct _IO_FILE_plus **pr,*nx;
> +       pr=(struct _IO_FILE_plus**)fp->file._prevchain;
> +       nx=(struct _IO_FILE_plus*)fp->file._chain;
> +       *pr=nx;
> +       if (nx) nx->file._prevchain=(FILE **)pr;
> +      }
>        fp->file._flags &= ~_IO_LINKED;
>  #ifdef _IO_MTSAFE_IO
>        _IO_funlockfile ((FILE *) fp);
> @@ -95,6 +92,8 @@ _IO_link_in (struct _IO_FILE_plus *fp)
>        _IO_flockfile ((FILE *) fp);
>  #endif
>        fp->file._chain = (FILE *) _IO_list_all;
> +      fp->file._prevchain = (FILE **) &_IO_list_all;
> +      _IO_list_all->file._prevchain=&fp->file._chain;
>        _IO_list_all = fp;
>  #ifdef _IO_MTSAFE_IO
>        _IO_funlockfile ((FILE *) fp);
> diff --git a/libio/libioP.h b/libio/libioP.h
> index 1af287b19f..5862f815bc 100644
> --- a/libio/libioP.h
> +++ b/libio/libioP.h
> @@ -429,6 +429,9 @@ libc_hidden_proto (_IO_list_resetlock)
>  extern void _IO_enable_locks (void) __THROW;
>  libc_hidden_proto (_IO_enable_locks)
>
> +/* Needed for proper initialization of the doubly-linked list */
> +extern void __stdfiles_init(void);
> +
>  /* Default jumptable functions. */
>
>  extern int _IO_default_underflow (FILE *) __THROW;
> @@ -904,12 +907,12 @@ extern int _IO_vscanf (const char *, va_list) __THROW;
>  # ifdef _IO_USE_OLD_IO_FILE
>  #  define FILEBUF_LITERAL(CHAIN, FLAGS, FD, WDP) \
>         { _IO_MAGIC+_IO_LINKED+_IO_IS_FILEBUF+FLAGS, \
> -        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, (FILE *) CHAIN, FD, \
> +        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, (FILE *) CHAIN, NULL, FD,  \
>          0, _IO_pos_BAD, 0, 0, { 0 }, &_IO_stdfile_##FD##_lock }
>  # else
>  #  define FILEBUF_LITERAL(CHAIN, FLAGS, FD, WDP) \
>         { _IO_MAGIC+_IO_LINKED+_IO_IS_FILEBUF+FLAGS, \
> -        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, (FILE *) CHAIN, FD, \
> +        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, (FILE *) CHAIN, NULL, FD,  \
>          0, _IO_pos_BAD, 0, 0, { 0 }, &_IO_stdfile_##FD##_lock, _IO_pos_BAD,\
>          NULL, WDP, 0 }
>  # endif
> @@ -917,12 +920,12 @@ extern int _IO_vscanf (const char *, va_list) __THROW;
>  # ifdef _IO_USE_OLD_IO_FILE
>  #  define FILEBUF_LITERAL(CHAIN, FLAGS, FD, WDP) \
>         { _IO_MAGIC+_IO_LINKED+_IO_IS_FILEBUF+FLAGS, \
> -        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, (FILE *) CHAIN, FD, \
> +        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, (FILE *) CHAIN, NULL, FD,  \
>          0, _IO_pos_BAD }
>  # else
>  #  define FILEBUF_LITERAL(CHAIN, FLAGS, FD, WDP) \
>         { _IO_MAGIC+_IO_LINKED+_IO_IS_FILEBUF+FLAGS, \
> -        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, (FILE *) CHAIN, FD, \
> +        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, (FILE *) CHAIN, NULL, FD,  \
>          0, _IO_pos_BAD, 0, 0, { 0 }, 0, _IO_pos_BAD, \
>          NULL, WDP, 0 }
>  # endif
> diff --git a/libio/stdfiles.c b/libio/stdfiles.c
> index cd8eca8bf3..87c9b249df 100644
> --- a/libio/stdfiles.c
> +++ b/libio/stdfiles.c
> @@ -54,4 +54,12 @@ DEF_STDFILE(_IO_2_1_stdout_, 1, &_IO_2_1_stdin_, _IO_NO_READS);
>  DEF_STDFILE(_IO_2_1_stderr_, 2, &_IO_2_1_stdout_, _IO_NO_READS+_IO_UNBUFFERED);
>
>  struct _IO_FILE_plus *_IO_list_all = &_IO_2_1_stderr_;
> +
> +void __stdfiles_init(void) // finish the double-linking for stdfiles as static initialization cannot
> +{
> +  struct _IO_FILE **f;
> +
> +  for(f=(struct _IO_FILE **)&_IO_list_all;(*f);f=&(**f)._chain) (**f)._prevchain=f;
> +}
> +
>  libc_hidden_data_def (_IO_list_all)
> --
> 2.30.2
>
> ____________________________________________________________________________________________________________
> Ce message et ses pieces jointes peuvent contenir des informations confidentielles ou privilegiees et ne doivent donc
> pas etre diffuses, exploites ou copies sans autorisation. Si vous avez recu ce message par erreur, veuillez le signaler
> a l'expediteur et le detruire ainsi que les pieces jointes. Les messages electroniques etant susceptibles d'alteration,
> Orange decline toute responsabilite si ce message a ete altere, deforme ou falsifie. Merci.
>
> This message and its attachments may contain confidential or privileged information that may be protected by law;
> they should not be distributed, used or copied without authorisation.
> If you have received this email in error, please notify the sender and delete this message and its attachments.
> As emails may be altered, Orange is not liable for messages that have been modified, changed or falsified.
> Thank you.
>
H.J. Lu April 26, 2024, 3:05 p.m. UTC | #2
On Fri, Apr 26, 2024 at 8:02 AM <alexandre.ferrieux@orange.com> wrote:
>
> On 26/04/2024 16:45, H.J. Lu wrote:
> >
> >> -  struct _IO_FILE *_chain;
> >> +  struct _IO_FILE *_chain,**_prevchain;
> >
> > We need to add a new symbol version for stdio functions
> > when FILE size is changed.
>
> Is there a README introducing newcomers (like me) to the shenanigans of symvers,
> the rules chosen for libc and their rationale ?

Please do "git grep COMPAT libio" for stdio symbol version examples.

> Otherwise, it looks like you'll be way faster than me to do it. Be my guest.
>
H.J. Lu April 26, 2024, 3:12 p.m. UTC | #3
On Fri, Apr 26, 2024 at 8:05 AM H.J. Lu <hjl.tools@gmail.com> wrote:
>
> On Fri, Apr 26, 2024 at 8:02 AM <alexandre.ferrieux@orange.com> wrote:
> >
> > On 26/04/2024 16:45, H.J. Lu wrote:
> > >
> > >> -  struct _IO_FILE *_chain;
> > >> +  struct _IO_FILE *_chain,**_prevchain;

When expanding FILE, the new field should be placed at the end
so that the compat stdio functions can work properly.

> > >
> > > We need to add a new symbol version for stdio functions
> > > when FILE size is changed.
> >
> > Is there a README introducing newcomers (like me) to the shenanigans of symvers,
> > the rules chosen for libc and their rationale ?
>
> Please do "git grep COMPAT libio" for stdio symbol version examples.
>
> > Otherwise, it looks like you'll be way faster than me to do it. Be my guest.
> >
>
>
> --
> H.J.
H.J. Lu April 26, 2024, 3:16 p.m. UTC | #4
On Fri, Apr 26, 2024 at 8:13 AM <alexandre.ferrieux@orange.com> wrote:
>
> On 26/04/2024 17:05, H.J. Lu wrote:
> > --------------------------------------------------------------------------------------------------------------
> > CAUTION : This email originated outside the company. Do not click on any links or open attachments unless you are expecting them from the sender.
> >
> > ATTENTION : Cet e-mail provient de l'extérieur de l'entreprise. Ne cliquez pas sur les liens ou n'ouvrez pas les pièces jointes à moins de connaitre l'expéditeur.
> > --------------------------------------------------------------------------------------------------------------
> >
> > On Fri, Apr 26, 2024 at 8:02 AM <alexandre.ferrieux@orange.com> wrote:
> >>
> >> On 26/04/2024 16:45, H.J. Lu wrote:
> >> >
> >> >> -  struct _IO_FILE *_chain;
> >> >> +  struct _IO_FILE *_chain,**_prevchain;
> >> >
> >> > We need to add a new symbol version for stdio functions
> >> > when FILE size is changed.
> >>
> >> Is there a README introducing newcomers (like me) to the shenanigans of symvers,
> >> the rules chosen for libc and their rationale ?
> >
> > Please do "git grep COMPAT libio" for stdio symbol version examples.
>
> Sorry, I won't guess from examples. Rules and rationale, anybody ?

It is very straightforward. The binaries compiled against the old FILE
must work correctly with the new glibc.
H.J. Lu April 26, 2024, 4:08 p.m. UTC | #5
On Fri, Apr 26, 2024 at 8:19 AM <alexandre.ferrieux@orange.com> wrote:
>
>
>
> On 26/04/2024 17:16, H.J. Lu wrote:
> >
> > On Fri, Apr 26, 2024 at 8:13 AM <alexandre.ferrieux@orange.com> wrote:
> >>
> >> On 26/04/2024 17:05, H.J. Lu wrote:
> >> >
> >> > On Fri, Apr 26, 2024 at 8:02 AM <alexandre.ferrieux@orange.com> wrote:
> >> >>
> >> >> On 26/04/2024 16:45, H.J. Lu wrote:
> >> >> >
> >> >> >> -  struct _IO_FILE *_chain;
> >> >> >> +  struct _IO_FILE *_chain,**_prevchain;
> >> >> >
> >> >> > We need to add a new symbol version for stdio functions
> >> >> > when FILE size is changed.
> >> >>
> >> >> Is there a README introducing newcomers (like me) to the shenanigans of symvers,
> >> >> the rules chosen for libc and their rationale ?
> >> >
> >> > Please do "git grep COMPAT libio" for stdio symbol version examples.
> >>
> >> Sorry, I won't guess from examples. Rules and rationale, anybody ?
> >
> > It is very straightforward. The binaries compiled against the old FILE
> > must work correctly with the new glibc.
>
> I had the naive assumption that FILE was an opaque type for libc users.
> Is it wrong ? See why a rationale document is needed ?

I updated your patch with a new version.  _IO_un_link@@GLIBC_2.40
must be added to all libc.abilist files.
H.J. Lu April 26, 2024, 4:24 p.m. UTC | #6
On Fri, Apr 26, 2024 at 9:21 AM <alexandre.ferrieux@orange.com> wrote:
>
>
>
> On 26/04/2024 18:08, H.J. Lu wrote:
> >> >> >> >
> >> >> >> >> -  struct _IO_FILE *_chain;
> >> >> >> >> +  struct _IO_FILE *_chain,**_prevchain;
> >> >> >> >
> >> >> >> > We need to add a new symbol version for stdio functions
> >> >> >> > when FILE size is changed.
> >> >> >>
> >> >> >> Is there a README introducing newcomers (like me) to the shenanigans of symvers,
> >> >> >> the rules chosen for libc and their rationale ?
> >> >> >
> >> >> > Please do "git grep COMPAT libio" for stdio symbol version examples.
> >> >>
> >> >> Sorry, I won't guess from examples. Rules and rationale, anybody ?
> >> >
> >> > It is very straightforward. The binaries compiled against the old FILE
> >> > must work correctly with the new glibc.
> >>
> >> I had the naive assumption that FILE was an opaque type for libc users.
> >> Is it wrong ? See why a rationale document is needed ?
> >
> > I updated your patch with a new version.  _IO_un_link@@GLIBC_2.40
> > must be added to all libc.abilist files.
>
>
> Thanks a lot.
> But why is only _IO_un_link concerned by this versioning, and not also _IO_link_in ?
>

Yes, _IO_link_in also needs a new version.
Florian Weimer April 26, 2024, 5:51 p.m. UTC | #7
* alexandre ferrieux:

> diff --git a/libio/bits/types/struct_FILE.h b/libio/bits/types/struct_FILE.h
> index 7cdaae86f8..daebd6ce0a 100644
> --- a/libio/bits/types/struct_FILE.h
> +++ b/libio/bits/types/struct_FILE.h
> @@ -67,7 +67,7 @@ struct _IO_FILE
>  
>    struct _IO_marker *_markers;
>  
> -  struct _IO_FILE *_chain;
> +  struct _IO_FILE *_chain,**_prevchain;
>  
>    int _fileno;
>    int _flags2;

Unfortunately, this struct is part of the ABI for several reasons.  For
example, _IO_2_1_stdin_ is a global variable exported by libc.so.6, and
it has a struct _IO_FILE member.  It cannot change size due to copy
relocations, even if nothing accesses members after _chain.

I disagree with H.J.'s suggestion that the answer is symbol
versions. 8-)

Two options come to my mind: Use the FILE * value itself as a hash table
key (and use _chain for resolving bucket conflicts), or use a dynarray
and replace _chain with a union of a pointer and an unsigned int,
containing the dynarray index for that particular file.  The latter has
the advantage that we can use it to implement arbitrary extensions in
the future.  Either approach should preserve compatibility with GCC 2.95
libstdc++ if we are a bit careful.

Historically, we used a single-linked list here because we wanted to
traverse it without locking, but we are no longer doing that, so we can
pick any data structure we want instead.

Thanks,
Florian
alexandre.ferrieux@orange.com April 26, 2024, 6:20 p.m. UTC | #8
On 26/04/2024 19:51, Florian Weimer wrote:
> 
> * alexandre ferrieux:
> 
>> diff --git a/libio/bits/types/struct_FILE.h b/libio/bits/types/struct_FILE.h
>> index 7cdaae86f8..daebd6ce0a 100644
>> --- a/libio/bits/types/struct_FILE.h
>> +++ b/libio/bits/types/struct_FILE.h
>> @@ -67,7 +67,7 @@ struct _IO_FILE
>>  
>>    struct _IO_marker *_markers;
>>  
>> -  struct _IO_FILE *_chain;
>> +  struct _IO_FILE *_chain,**_prevchain;
>>  
>>    int _fileno;
>>    int _flags2;
> 
> Unfortunately, this struct is part of the ABI for several reasons.  For
> example, _IO_2_1_stdin_ is a global variable exported by libc.so.6, and
> it has a struct _IO_FILE member.  It cannot change size due to copy
> relocations, even if nothing accesses members after _chain.
> 
> I disagree with H.J.'s suggestion that the answer is symbol
> versions. 8-)

Interesting !

So we are in the presence of software that can no longer change, _ever_ ?
What about bumping to _IO_2_2 ?

> Two options come to my mind: Use the FILE * value itself as a hash table
> key (and use _chain for resolving bucket conflicts), or use a dynarray
> and replace _chain with a union of a pointer and an unsigned int,
> containing the dynarray index for that particular file.  The latter has
> the advantage that we can use it to implement arbitrary extensions in
> the future.  Either approach should preserve compatibility with GCC 2.95
> libstdc++ if we are a bit careful.

Argggh. Jumping through complex, inefficient and hard-to-maintain hoops,
because a fundamentally opaque type has been inadvertently exposed ?

Imagine the state of _IO_FILE 50 years from now, after a dozen of similar hacks 
have piled up...

Are there absolutely no plans to someday deprecate the problematic, but 
extremely rare, accesses to those types that should have remained opaque ?
alexandre.ferrieux@orange.com April 26, 2024, 6:44 p.m. UTC | #9
On 26/04/2024 20:20, FERRIEUX Alexandre INNOV/NET wrote:
> 
> 
> On 26/04/2024 19:51, Florian Weimer wrote:
>>
>> * alexandre ferrieux:
>>
>>> diff --git a/libio/bits/types/struct_FILE.h b/libio/bits/types/struct_FILE.h
>>> index 7cdaae86f8..daebd6ce0a 100644
>>> --- a/libio/bits/types/struct_FILE.h
>>> +++ b/libio/bits/types/struct_FILE.h
>>> @@ -67,7 +67,7 @@ struct _IO_FILE
>>>
>>>    struct _IO_marker *_markers;
>>>
>>> -  struct _IO_FILE *_chain;
>>> +  struct _IO_FILE *_chain,**_prevchain;
>>>
>>>    int _fileno;
>>>    int _flags2;
>>
>> Unfortunately, this struct is part of the ABI for several reasons.  For
>> example, _IO_2_1_stdin_ is a global variable exported by libc.so.6, and
>> it has a struct _IO_FILE member.  It cannot change size due to copy
>> relocations, even if nothing accesses members after _chain.

Now what about sticking the field to the end of _IO_FILE_plus ?

  struct _IO_FILE_plus
  {
    FILE file;
    const struct _IO_jump_t *vtable;
+  struct _IO_FILE_plus **_prevchain;
  };

Wouldn't that be backwards compatible with everything ?
Or do you suspect that some aircraft, dams and power plants routinely rely on 
sizeof(_IO_FILE_plus) as they allocate and fill their file pointers by hand ?
H.J. Lu April 26, 2024, 6:50 p.m. UTC | #10
On Fri, Apr 26, 2024 at 10:51 AM Florian Weimer <fweimer@redhat.com> wrote:
>
> * alexandre ferrieux:
>
> > diff --git a/libio/bits/types/struct_FILE.h b/libio/bits/types/struct_FILE.h
> > index 7cdaae86f8..daebd6ce0a 100644
> > --- a/libio/bits/types/struct_FILE.h
> > +++ b/libio/bits/types/struct_FILE.h
> > @@ -67,7 +67,7 @@ struct _IO_FILE
> >
> >    struct _IO_marker *_markers;
> >
> > -  struct _IO_FILE *_chain;
> > +  struct _IO_FILE *_chain,**_prevchain;
> >
> >    int _fileno;
> >    int _flags2;
>
> Unfortunately, this struct is part of the ABI for several reasons.  For
> example, _IO_2_1_stdin_ is a global variable exported by libc.so.6, and
> it has a struct _IO_FILE member.  It cannot change size due to copy
> relocations, even if nothing accesses members after _chain.
>
> I disagree with H.J.'s suggestion that the answer is symbol
> versions. 8-)

libio/bits/types/struct_FILE.h has

  __off64_t _offset;
  /* Wide character stream stuff.  */
  struct _IO_codecvt *_codecvt;
  struct _IO_wide_data *_wide_data;
  struct _IO_FILE *_freeres_list;
  void *_freeres_buf;
  size_t __pad5;
  int _mode;
  /* Make sure we don't get into trouble again.  */
  char _unused2[15 * sizeof (int) - 4 * sizeof (void *) - sizeof (size_t)];
};

We can add a pointer without changing FILE size.

> Two options come to my mind: Use the FILE * value itself as a hash table
> key (and use _chain for resolving bucket conflicts), or use a dynarray
> and replace _chain with a union of a pointer and an unsigned int,
> containing the dynarray index for that particular file.  The latter has
> the advantage that we can use it to implement arbitrary extensions in
> the future.  Either approach should preserve compatibility with GCC 2.95
> libstdc++ if we are a bit careful.
>
> Historically, we used a single-linked list here because we wanted to
> traverse it without locking, but we are no longer doing that, so we can
> pick any data structure we want instead.
>
> Thanks,
> Florian
>
alexandre.ferrieux@orange.com April 26, 2024, 7:04 p.m. UTC | #11
On 26/04/2024 20:50, H.J. Lu wrote:
>
> On Fri, Apr 26, 2024 at 10:51 AM Florian Weimer <fweimer@redhat.com> wrote:
>>
>> * alexandre ferrieux:
>>
>> > diff --git a/libio/bits/types/struct_FILE.h b/libio/bits/types/struct_FILE.h
>> > index 7cdaae86f8..daebd6ce0a 100644
>> > --- a/libio/bits/types/struct_FILE.h
>> > +++ b/libio/bits/types/struct_FILE.h
>> > @@ -67,7 +67,7 @@ struct _IO_FILE
>> >
>> >    struct _IO_marker *_markers;
>> >
>> > -  struct _IO_FILE *_chain;
>> > +  struct _IO_FILE *_chain,**_prevchain;
>> >
>> >    int _fileno;
>> >    int _flags2;
>>
>> Unfortunately, this struct is part of the ABI for several reasons.  For
>> example, _IO_2_1_stdin_ is a global variable exported by libc.so.6, and
>> it has a struct _IO_FILE member.  It cannot change size due to copy
>> relocations, even if nothing accesses members after _chain.
>>
>> I disagree with H.J.'s suggestion that the answer is symbol
>> versions. 8-)
> 
> libio/bits/types/struct_FILE.h has
> 
>    __off64_t _offset;
>    /* Wide character stream stuff.  */
>    struct _IO_codecvt *_codecvt;
>    struct _IO_wide_data *_wide_data;
>    struct _IO_FILE *_freeres_list;
>    void *_freeres_buf;
>    size_t __pad5;
>    int _mode;
>    /* Make sure we don't get into trouble again.  */
>    char _unused2[15 * sizeof (int) - 4 * sizeof (void *) - sizeof (size_t)];
> };
> 
> We can add a pointer without changing FILE size.

Ha ! That's a clever trick. Presumably someone was already bitten once ;)
(Sorry I didn't notice it was present in your corrected patch).

Now, I still have worries about the symbol versioning approach: why restrict the 
version bump to _IO_un_link and _IO_link_in ? Indeed, any program calling 
fclose()  ends up calling _IO_un_link(), so the version check should apply too, 
right ? So shouldn't we tag all functions using (FILE *) in their arguments or 
return value ?
Florian Weimer April 26, 2024, 7:08 p.m. UTC | #12
* alexandre ferrieux:

> On 26/04/2024 19:51, Florian Weimer wrote:
>> * alexandre ferrieux:
>> 
>>> diff --git a/libio/bits/types/struct_FILE.h b/libio/bits/types/struct_FILE.h
>>> index 7cdaae86f8..daebd6ce0a 100644
>>> --- a/libio/bits/types/struct_FILE.h
>>> +++ b/libio/bits/types/struct_FILE.h
>>> @@ -67,7 +67,7 @@ struct _IO_FILE
>>>     struct _IO_marker *_markers;
>>>  -  struct _IO_FILE *_chain;
>>> +  struct _IO_FILE *_chain,**_prevchain;
>>>     int _fileno;
>>>    int _flags2;
>> Unfortunately, this struct is part of the ABI for several reasons.
>> For
>> example, _IO_2_1_stdin_ is a global variable exported by libc.so.6, and
>> it has a struct _IO_FILE member.  It cannot change size due to copy
>> relocations, even if nothing accesses members after _chain.
>> I disagree with H.J.'s suggestion that the answer is symbol
>> versions. 8-)
>
> Interesting !
>
> So we are in the presence of software that can no longer change, _ever_ ?
> What about bumping to _IO_2_2 ?

That requires specific code paths to deal with the earlier versions.

>> Two options come to my mind: Use the FILE * value itself as a hash table
>> key (and use _chain for resolving bucket conflicts), or use a dynarray
>> and replace _chain with a union of a pointer and an unsigned int,
>> containing the dynarray index for that particular file.  The latter has
>> the advantage that we can use it to implement arbitrary extensions in
>> the future.  Either approach should preserve compatibility with GCC 2.95
>> libstdc++ if we are a bit careful.
>
> Argggh. Jumping through complex, inefficient and hard-to-maintain hoops,
> because a fundamentally opaque type has been inadvertently exposed ?

The approaches I mention have the advantage that they work for both
glibc 2.0 and glibc 2.1 (current) file streams, using the same code,
which simplifies testing.

If you want to work on hiding the internals of struct _IO_FILE, that
would be very welcome, but it's a much larger project than fixing this
particular performance issue.

Thanks,
Florian
Florian Weimer April 26, 2024, 7:08 p.m. UTC | #13
* alexandre ferrieux:

> On 26/04/2024 20:20, FERRIEUX Alexandre INNOV/NET wrote:
>> On 26/04/2024 19:51, Florian Weimer wrote:
>>>
>>> * alexandre ferrieux:
>>>
>>>> diff --git a/libio/bits/types/struct_FILE.h b/libio/bits/types/struct_FILE.h
>>>> index 7cdaae86f8..daebd6ce0a 100644
>>>> --- a/libio/bits/types/struct_FILE.h
>>>> +++ b/libio/bits/types/struct_FILE.h
>>>> @@ -67,7 +67,7 @@ struct _IO_FILE
>>>>
>>>>    struct _IO_marker *_markers;
>>>>
>>>> -  struct _IO_FILE *_chain;
>>>> +  struct _IO_FILE *_chain,**_prevchain;
>>>>
>>>>    int _fileno;
>>>>    int _flags2;
>>>
>>> Unfortunately, this struct is part of the ABI for several reasons.  For
>>> example, _IO_2_1_stdin_ is a global variable exported by libc.so.6, and
>>> it has a struct _IO_FILE member.  It cannot change size due to copy
>>> relocations, even if nothing accesses members after _chain.
>
> Now what about sticking the field to the end of _IO_FILE_plus ?
>
>  struct _IO_FILE_plus
>  {
>    FILE file;
>    const struct _IO_jump_t *vtable;
> +  struct _IO_FILE_plus **_prevchain;
>  };
>
> Wouldn't that be backwards compatible with everything ?

Sorry, no, struct _IO_FILE_plus is used to define _IO_2_1_stdin_.

Florian
Florian Weimer April 26, 2024, 7:09 p.m. UTC | #14
* H. J. Lu:

> On Fri, Apr 26, 2024 at 10:51 AM Florian Weimer <fweimer@redhat.com> wrote:
>>
>> * alexandre ferrieux:
>>
>> > diff --git a/libio/bits/types/struct_FILE.h b/libio/bits/types/struct_FILE.h
>> > index 7cdaae86f8..daebd6ce0a 100644
>> > --- a/libio/bits/types/struct_FILE.h
>> > +++ b/libio/bits/types/struct_FILE.h
>> > @@ -67,7 +67,7 @@ struct _IO_FILE
>> >
>> >    struct _IO_marker *_markers;
>> >
>> > -  struct _IO_FILE *_chain;
>> > +  struct _IO_FILE *_chain,**_prevchain;
>> >
>> >    int _fileno;
>> >    int _flags2;
>>
>> Unfortunately, this struct is part of the ABI for several reasons.  For
>> example, _IO_2_1_stdin_ is a global variable exported by libc.so.6, and
>> it has a struct _IO_FILE member.  It cannot change size due to copy
>> relocations, even if nothing accesses members after _chain.
>>
>> I disagree with H.J.'s suggestion that the answer is symbol
>> versions. 8-)
>
> libio/bits/types/struct_FILE.h has
>
>   __off64_t _offset;
>   /* Wide character stream stuff.  */
>   struct _IO_codecvt *_codecvt;
>   struct _IO_wide_data *_wide_data;
>   struct _IO_FILE *_freeres_list;
>   void *_freeres_buf;
>   size_t __pad5;
>   int _mode;
>   /* Make sure we don't get into trouble again.  */
>   char _unused2[15 * sizeof (int) - 4 * sizeof (void *) - sizeof (size_t)];
> };
>
> We can add a pointer without changing FILE size.

Indeed, but that still needs special-cases for glibc 2.0 file handles,
which do not necessarily have the extended tail.  Maybe it's possible to
make the backlink processing optional for glibc 2.0 targets.

Thanks,
Florian
Florian Weimer April 26, 2024, 7:16 p.m. UTC | #15
* alexandre ferrieux:

> On 26/04/2024 20:50, H.J. Lu wrote:
>>
>> On Fri, Apr 26, 2024 at 10:51 AM Florian Weimer <fweimer@redhat.com> wrote:
>>>
>>> * alexandre ferrieux:
>>>
>>> > diff --git a/libio/bits/types/struct_FILE.h b/libio/bits/types/struct_FILE.h
>>> > index 7cdaae86f8..daebd6ce0a 100644
>>> > --- a/libio/bits/types/struct_FILE.h
>>> > +++ b/libio/bits/types/struct_FILE.h
>>> > @@ -67,7 +67,7 @@ struct _IO_FILE
>>> >
>>> >    struct _IO_marker *_markers;
>>> >
>>> > -  struct _IO_FILE *_chain;
>>> > +  struct _IO_FILE *_chain,**_prevchain;
>>> >
>>> >    int _fileno;
>>> >    int _flags2;
>>>
>>> Unfortunately, this struct is part of the ABI for several reasons.  For
>>> example, _IO_2_1_stdin_ is a global variable exported by libc.so.6, and
>>> it has a struct _IO_FILE member.  It cannot change size due to copy
>>> relocations, even if nothing accesses members after _chain.
>>>
>>> I disagree with H.J.'s suggestion that the answer is symbol
>>> versions. 8-)
>> libio/bits/types/struct_FILE.h has
>>    __off64_t _offset;
>>    /* Wide character stream stuff.  */
>>    struct _IO_codecvt *_codecvt;
>>    struct _IO_wide_data *_wide_data;
>>    struct _IO_FILE *_freeres_list;
>>    void *_freeres_buf;
>>    size_t __pad5;
>>    int _mode;
>>    /* Make sure we don't get into trouble again.  */
>>    char _unused2[15 * sizeof (int) - 4 * sizeof (void *) - sizeof (size_t)];
>> };
>> We can add a pointer without changing FILE size.
>
> Ha ! That's a clever trick. Presumably someone was already bitten once ;)
> (Sorry I didn't notice it was present in your corrected patch).
>
> Now, I still have worries about the symbol versioning approach: why
> restrict the version bump to _IO_un_link and _IO_link_in ? Indeed, any
> program calling fclose()  ends up calling _IO_un_link(), so the
> version check should apply too, right ? So shouldn't we tag all
> functions using (FILE *) in their arguments or return value ?

No new symbol versions should be needed.  You just need to be careful
not to access that part of the struct when it does not exist.

I think the way to check for old-style streams is via _IO_vtable_offset,
as can be seen at the start of the __fgetln function in an (unmerged)
patch I submitted a while back:

  [PATCH] stdio-common: Add the fgetln function
  <https://inbox.sourceware.org/libc-alpha/871qxbxe2i.fsf@oldenburg.str.redhat.com/>

In your case, you would have to do the full traversal once you encounter
a missing backlink.  But that would only happen if old-style file
handles are actually used.  Maybe this is even easier to implement than
the hash table or index approach.

Thanks,
Florian
alexandre.ferrieux@orange.com April 26, 2024, 8:15 p.m. UTC | #16
On 26/04/2024 21:16, Florian Weimer wrote:
> 
> * alexandre ferrieux:
> 
>> Now, I still have worries about the symbol versioning approach: why
>> restrict the version bump to _IO_un_link and _IO_link_in ? Indeed, any
>> program calling fclose()  ends up calling _IO_un_link(), so the
>> version check should apply too, right ? So shouldn't we tag all
>> functions using (FILE *) in their arguments or return value ?
> 
> No new symbol versions should be needed.  You just need to be careful
> not to access that part of the struct when it does not exist.
> 
> I think the way to check for old-style streams is via _IO_vtable_offset,
> as can be seen at the start of the __fgetln function in an (unmerged)
> patch I submitted a while back:
> 
>    [PATCH] stdio-common: Add the fgetln function
>    <https://inbox.sourceware.org/libc-alpha/871qxbxe2i.fsf@oldenburg.str.redhat.com/>
> 
> In your case, you would have to do the full traversal once you encounter
> a missing backlink.  But that would only happen if old-style file
> handles are actually used.  Maybe this is even easier to implement than
> the hash table or index approach.

Thanks for pointing me in the right direction. Now I see I need a runtime check 
for each individual file pointer on the list, as one cannot exclude a scenario 
where a (disgusting) old binary would malloc a decades-old variant of FILE, and 
stick it to the head of the list through an innocent _IO_link_in().

To do this in a robust manner, how about using _flags2 ?
(I see _flags has one value left, 0x4000, but it's "reserved for compat"... too bad)

Something like:

   #define _IO_FLAGS2_HAS_PREVCHAIN 256

   void 
 
 

   _IO_old_init (FILE *fp, int flags) 
 
 

   {
      ...
      fp->flags2 |= _IO_FLAGS2_HAS_PREVCHAIN ;
   }

   void __stdfiles_init(void)
   {
      ...
      (**f).flags2 |= _IO_FLAGS2_HAS_PREVCHAIN ;
   }


Then, (fp->flags2&_IO_FLAGS2_HAS_PREVCHAIN) becomes a reliable criterion for 
_IO_link_in and _IO_un_link to decide whether to use the new algorithm or the 
old one.

What do you think ?
Florian Weimer April 29, 2024, 1:20 p.m. UTC | #17
* alexandre ferrieux:

> To do this in a robust manner, how about using _flags2 ?
> (I see _flags has one value left, 0x4000, but it's "reserved for compat"... too bad)
>
> Something like:
>
>   #define _IO_FLAGS2_HAS_PREVCHAIN 256
>
>   void      _IO_old_init (FILE *fp, int flags)      {
>      ...
>      fp->flags2 |= _IO_FLAGS2_HAS_PREVCHAIN ;
>   }
>
>   void __stdfiles_init(void)
>   {
>      ...
>      (**f).flags2 |= _IO_FLAGS2_HAS_PREVCHAIN ;
>   }
>
>
> Then, (fp->flags2&_IO_FLAGS2_HAS_PREVCHAIN) becomes a reliable
> criterion for _IO_link_in and _IO_un_link to decide whether to use the
> new algorithm or the old one.
>
> What do you think ?

I believe you can use the vtable_offset field as that flag.  See this
code in stdio-common/vfprintf-internal.c:

    133 
    134 # define ORIENT         if (_IO_vtable_offset (s) == 0 && _IO_fwide (s,
    134  -1) != -1)\
    135                           return -1

Thanks,
Florian
alexandre.ferrieux@orange.com April 29, 2024, 7:05 p.m. UTC | #18
On 29/04/2024 15:20, Florian Weimer wrote:
> 
> * alexandre ferrieux:
> 
>> To do this in a robust manner, how about using _flags2 ?
>> (I see _flags has one value left, 0x4000, but it's "reserved for compat"... too bad)
>>
>> Something like:
>>
>>   #define _IO_FLAGS2_HAS_PREVCHAIN 256
>>
>>   void      _IO_old_init (FILE *fp, int flags)      {
>>      ...
>>      fp->flags2 |= _IO_FLAGS2_HAS_PREVCHAIN ;
>>   }
>>
>>   void __stdfiles_init(void)
>>   {
>>      ...
>>      (**f).flags2 |= _IO_FLAGS2_HAS_PREVCHAIN ;
>>   }
>>
>>
>> Then, (fp->flags2&_IO_FLAGS2_HAS_PREVCHAIN) becomes a reliable
>> criterion for _IO_link_in and _IO_un_link to decide whether to use the
>> new algorithm or the old one.
>>
>> What do you think ?
> 
> I believe you can use the vtable_offset field as that flag.  See this
> code in stdio-common/vfprintf-internal.c:
> 
>      133
>      134 # define ORIENT         if (_IO_vtable_offset (s) == 0 && _IO_fwide (s,
>      134  -1) != -1)\
>      135                           return -1

Why is it sufficient ? Doesn't the nullity of the vtable_offset identify a 
different point in versions history ?

An "old binary", recent enough to have the modern vtable_offset, but compiled 
with the include files just before the patch, can presumably create an old 
_IO_FILE, insert it "by hand" into _IO_list_all, *not* updating the prevchain of 
the next-in-line, and then crash when the latter is closed.
H.J. Lu April 30, 2024, 2:47 a.m. UTC | #19
On Mon, Apr 29, 2024 at 12:05 PM <alexandre.ferrieux@orange.com> wrote:
>
> On 29/04/2024 15:20, Florian Weimer wrote:
> >
> > * alexandre ferrieux:
> >
> >> To do this in a robust manner, how about using _flags2 ?
> >> (I see _flags has one value left, 0x4000, but it's "reserved for compat"... too bad)
> >>
> >> Something like:
> >>
> >>   #define _IO_FLAGS2_HAS_PREVCHAIN 256
> >>
> >>   void      _IO_old_init (FILE *fp, int flags)      {
> >>      ...
> >>      fp->flags2 |= _IO_FLAGS2_HAS_PREVCHAIN ;
> >>   }
> >>
> >>   void __stdfiles_init(void)
> >>   {
> >>      ...
> >>      (**f).flags2 |= _IO_FLAGS2_HAS_PREVCHAIN ;
> >>   }
> >>
> >>
> >> Then, (fp->flags2&_IO_FLAGS2_HAS_PREVCHAIN) becomes a reliable

This won't work for copy relocation in old binaries.

> >> criterion for _IO_link_in and _IO_un_link to decide whether to use the
> >> new algorithm or the old one.
> >>
> >> What do you think ?
> >
> > I believe you can use the vtable_offset field as that flag.  See this
> > code in stdio-common/vfprintf-internal.c:
> >
> >      133
> >      134 # define ORIENT         if (_IO_vtable_offset (s) == 0 && _IO_fwide (s,
> >      134  -1) != -1)\
> >      135                           return -1
>
> Why is it sufficient ? Doesn't the nullity of the vtable_offset identify a
> different point in versions history ?
>
> An "old binary", recent enough to have the modern vtable_offset, but compiled
> with the include files just before the patch, can presumably create an old
> _IO_FILE, insert it "by hand" into _IO_list_all, *not* updating the prevchain of
> the next-in-line, and then crash when the latter is closed.
>

Here is a patch with _IO_vtable_offset (s) == 0 check.
H.J. Lu April 30, 2024, 5:22 p.m. UTC | #20
On Mon, Apr 29, 2024 at 7:47 PM H.J. Lu <hjl.tools@gmail.com> wrote:
>
> On Mon, Apr 29, 2024 at 12:05 PM <alexandre.ferrieux@orange.com> wrote:
> >
> > On 29/04/2024 15:20, Florian Weimer wrote:
> > >
> > > * alexandre ferrieux:
> > >
> > >> To do this in a robust manner, how about using _flags2 ?
> > >> (I see _flags has one value left, 0x4000, but it's "reserved for compat"... too bad)
> > >>
> > >> Something like:
> > >>
> > >>   #define _IO_FLAGS2_HAS_PREVCHAIN 256
> > >>
> > >>   void      _IO_old_init (FILE *fp, int flags)      {
> > >>      ...
> > >>      fp->flags2 |= _IO_FLAGS2_HAS_PREVCHAIN ;
> > >>   }
> > >>
> > >>   void __stdfiles_init(void)
> > >>   {
> > >>      ...
> > >>      (**f).flags2 |= _IO_FLAGS2_HAS_PREVCHAIN ;
> > >>   }
> > >>
> > >>
> > >> Then, (fp->flags2&_IO_FLAGS2_HAS_PREVCHAIN) becomes a reliable
>
> This won't work for copy relocation in old binaries.
>
> > >> criterion for _IO_link_in and _IO_un_link to decide whether to use the
> > >> new algorithm or the old one.
> > >>
> > >> What do you think ?
> > >
> > > I believe you can use the vtable_offset field as that flag.  See this
> > > code in stdio-common/vfprintf-internal.c:
> > >
> > >      133
> > >      134 # define ORIENT         if (_IO_vtable_offset (s) == 0 && _IO_fwide (s,
> > >      134  -1) != -1)\
> > >      135                           return -1
> >
> > Why is it sufficient ? Doesn't the nullity of the vtable_offset identify a
> > different point in versions history ?
> >
> > An "old binary", recent enough to have the modern vtable_offset, but compiled
> > with the include files just before the patch, can presumably create an old
> > _IO_FILE, insert it "by hand" into _IO_list_all, *not* updating the prevchain of
> > the next-in-line, and then crash when the latter is closed.
> >
>
> Here is a patch with _IO_vtable_offset (s) == 0 check.
>

Here is the updated patch with a testcase:

https://patchwork.sourceware.org/project/glibc/list/?series=33314
diff mbox series

Patch

diff --git a/elf/libc_early_init.c b/elf/libc_early_init.c
index 575b837f8f..756274f652 100644
--- a/elf/libc_early_init.c
+++ b/elf/libc_early_init.c
@@ -23,6 +23,7 @@ 
 #include <lowlevellock.h>
 #include <pthread_early_init.h>
 #include <sys/single_threaded.h>
+#include <libioP.h>
 
 #ifdef SHARED
 _Bool __libc_initial;
@@ -41,6 +42,8 @@  __libc_early_init (_Bool initial)
   __libc_single_threaded_internal = __libc_initial = initial;
 #endif
 
+  __stdfiles_init ();
+
   __pthread_early_init ();
 
 #if ENABLE_ELISION_SUPPORT
diff --git a/libio/bits/types/struct_FILE.h b/libio/bits/types/struct_FILE.h
index 7cdaae86f8..daebd6ce0a 100644
--- a/libio/bits/types/struct_FILE.h
+++ b/libio/bits/types/struct_FILE.h
@@ -67,7 +67,7 @@  struct _IO_FILE
 
   struct _IO_marker *_markers;
 
-  struct _IO_FILE *_chain;
+  struct _IO_FILE *_chain,**_prevchain;
 
   int _fileno;
   int _flags2;
diff --git a/libio/genops.c b/libio/genops.c
index bc45e60a09..a27f65581b 100644
--- a/libio/genops.c
+++ b/libio/genops.c
@@ -53,7 +53,6 @@  _IO_un_link (struct _IO_FILE_plus *fp)
 {
   if (fp->file._flags & _IO_LINKED)
     {
-      FILE **f;
 #ifdef _IO_MTSAFE_IO
       _IO_cleanup_region_start_noarg (flush_cleanup);
       _IO_lock_lock (list_all_lock);
@@ -62,15 +61,13 @@  _IO_un_link (struct _IO_FILE_plus *fp)
 #endif
       if (_IO_list_all == NULL)
 	;
-      else if (fp == _IO_list_all)
-	_IO_list_all = (struct _IO_FILE_plus *) _IO_list_all->file._chain;
-      else
-	for (f = &_IO_list_all->file._chain; *f; f = &(*f)->_chain)
-	  if (*f == (FILE *) fp)
-	    {
-	      *f = fp->file._chain;
-	      break;
-	    }
+      else {
+	struct _IO_FILE_plus **pr,*nx;
+	pr=(struct _IO_FILE_plus**)fp->file._prevchain;
+	nx=(struct _IO_FILE_plus*)fp->file._chain;
+	*pr=nx;
+	if (nx) nx->file._prevchain=(FILE **)pr;
+      }
       fp->file._flags &= ~_IO_LINKED;
 #ifdef _IO_MTSAFE_IO
       _IO_funlockfile ((FILE *) fp);
@@ -95,6 +92,8 @@  _IO_link_in (struct _IO_FILE_plus *fp)
       _IO_flockfile ((FILE *) fp);
 #endif
       fp->file._chain = (FILE *) _IO_list_all;
+      fp->file._prevchain = (FILE **) &_IO_list_all;
+      _IO_list_all->file._prevchain=&fp->file._chain;
       _IO_list_all = fp;
 #ifdef _IO_MTSAFE_IO
       _IO_funlockfile ((FILE *) fp);
diff --git a/libio/libioP.h b/libio/libioP.h
index 1af287b19f..5862f815bc 100644
--- a/libio/libioP.h
+++ b/libio/libioP.h
@@ -429,6 +429,9 @@  libc_hidden_proto (_IO_list_resetlock)
 extern void _IO_enable_locks (void) __THROW;
 libc_hidden_proto (_IO_enable_locks)
 
+/* Needed for proper initialization of the doubly-linked list */  
+extern void __stdfiles_init(void);
+
 /* Default jumptable functions. */
 
 extern int _IO_default_underflow (FILE *) __THROW;
@@ -904,12 +907,12 @@  extern int _IO_vscanf (const char *, va_list) __THROW;
 # ifdef _IO_USE_OLD_IO_FILE
 #  define FILEBUF_LITERAL(CHAIN, FLAGS, FD, WDP) \
        { _IO_MAGIC+_IO_LINKED+_IO_IS_FILEBUF+FLAGS, \
-	 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, (FILE *) CHAIN, FD, \
+	 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, (FILE *) CHAIN, NULL, FD,	\
 	 0, _IO_pos_BAD, 0, 0, { 0 }, &_IO_stdfile_##FD##_lock }
 # else
 #  define FILEBUF_LITERAL(CHAIN, FLAGS, FD, WDP) \
        { _IO_MAGIC+_IO_LINKED+_IO_IS_FILEBUF+FLAGS, \
-	 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, (FILE *) CHAIN, FD, \
+	 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, (FILE *) CHAIN, NULL, FD,	\
 	 0, _IO_pos_BAD, 0, 0, { 0 }, &_IO_stdfile_##FD##_lock, _IO_pos_BAD,\
 	 NULL, WDP, 0 }
 # endif
@@ -917,12 +920,12 @@  extern int _IO_vscanf (const char *, va_list) __THROW;
 # ifdef _IO_USE_OLD_IO_FILE
 #  define FILEBUF_LITERAL(CHAIN, FLAGS, FD, WDP) \
        { _IO_MAGIC+_IO_LINKED+_IO_IS_FILEBUF+FLAGS, \
-	 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, (FILE *) CHAIN, FD, \
+	 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, (FILE *) CHAIN, NULL, FD,	\
 	 0, _IO_pos_BAD }
 # else
 #  define FILEBUF_LITERAL(CHAIN, FLAGS, FD, WDP) \
        { _IO_MAGIC+_IO_LINKED+_IO_IS_FILEBUF+FLAGS, \
-	 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, (FILE *) CHAIN, FD, \
+	 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, (FILE *) CHAIN, NULL, FD,	\
 	 0, _IO_pos_BAD, 0, 0, { 0 }, 0, _IO_pos_BAD, \
 	 NULL, WDP, 0 }
 # endif
diff --git a/libio/stdfiles.c b/libio/stdfiles.c
index cd8eca8bf3..87c9b249df 100644
--- a/libio/stdfiles.c
+++ b/libio/stdfiles.c
@@ -54,4 +54,12 @@  DEF_STDFILE(_IO_2_1_stdout_, 1, &_IO_2_1_stdin_, _IO_NO_READS);
 DEF_STDFILE(_IO_2_1_stderr_, 2, &_IO_2_1_stdout_, _IO_NO_READS+_IO_UNBUFFERED);
 
 struct _IO_FILE_plus *_IO_list_all = &_IO_2_1_stderr_;
+
+void __stdfiles_init(void) // finish the double-linking for stdfiles as static initialization cannot
+{
+  struct _IO_FILE **f;
+
+  for(f=(struct _IO_FILE **)&_IO_list_all;(*f);f=&(**f)._chain) (**f)._prevchain=f;
+}
+
 libc_hidden_data_def (_IO_list_all)