diff mbox

[v2] Single threaded stdio optimization

Message ID 36ac5bbf-8ee0-2898-9c20-d50a39aca1ab@linaro.org
State New
Headers show

Commit Message

Adhemerval Zanella Netto July 7, 2017, 8:46 p.m. UTC
On 07/07/2017 14:30, Szabolcs Nagy wrote:
> On 07/07/17 16:02, Szabolcs Nagy wrote:
>> On 07/07/17 14:38, Szabolcs Nagy wrote:
>>> On 07/07/17 13:38, Florian Weimer wrote:
>>>> This patch breaks the attached test case.  Not all stream objects are
>>>> linked into the global list, so the locking upgrade doesn't happen for
>>>> some of them.
>>>>
>>>
>>> i thought all files need to be flushed on exit
>>> or on fflush(0), if glibc does not do that that's
>>> observably non-conforming.
>>>
>>>> The global list management is quite expensive because the list is
>>>> single-linked, so we can't just add all stream objects when not yet
>>>> running multi-threaded.
>>>>
>>>> I fear that this patch may have to be backed out again, until we can
>>>> address these issues.
>>>>
>>>
>>> i can back it out, or try to create all the
>>> problematic files with the optimization-disabling
>>> flag set (in case that's simple.. will look into
>>> it in a minute).
>>>
>>
>> it seems both changing the optimization flag or adding
>> these streams to the list are easy.
>>
>> i think glibc should just fix the open_memstream bug,
>> https://sourceware.org/bugzilla/show_bug.cgi?id=21735
>> i'll work on a patch.
>>
>> (i don't expect a large number of open/close memstream
>> operations, so it should not have a huge impact)
>>
> 
> sorry, i cannot work on this until monday,
> i hope it's ok to leave multithread open_memstream
> broken for a few days.

What about patch below to add memstream FILE objects to the fflush
list along with a size update on write operation update?  I do not like
the cast, but currently 'struct _IO_FILE_plus' and 'struct _IO_streambuf'
are essentially the same type so it should be safe. I think for 2.27 we
can think about cleaning up these definitions.

Comments

Szabolcs Nagy July 7, 2017, 9:31 p.m. UTC | #1
* Adhemerval Zanella <adhemerval.zanella@linaro.org> [2017-07-07 17:46:56 -0300]:
> On 07/07/2017 14:30, Szabolcs Nagy wrote:
> > On 07/07/17 16:02, Szabolcs Nagy wrote:
> >> On 07/07/17 14:38, Szabolcs Nagy wrote:
> >>> On 07/07/17 13:38, Florian Weimer wrote:
> >>>> This patch breaks the attached test case.  Not all stream objects are
> >>>> linked into the global list, so the locking upgrade doesn't happen for
> >>>> some of them.
> >>>>
> >>>
> >>> i thought all files need to be flushed on exit
> >>> or on fflush(0), if glibc does not do that that's
> >>> observably non-conforming.
> >>>
> >>>> The global list management is quite expensive because the list is
> >>>> single-linked, so we can't just add all stream objects when not yet
> >>>> running multi-threaded.
> >>>>
> >>>> I fear that this patch may have to be backed out again, until we can
> >>>> address these issues.
> >>>>
> >>>
> >>> i can back it out, or try to create all the
> >>> problematic files with the optimization-disabling
> >>> flag set (in case that's simple.. will look into
> >>> it in a minute).
> >>>
> >>
> >> it seems both changing the optimization flag or adding
> >> these streams to the list are easy.
> >>
> >> i think glibc should just fix the open_memstream bug,
> >> https://sourceware.org/bugzilla/show_bug.cgi?id=21735
> >> i'll work on a patch.
> >>
> >> (i don't expect a large number of open/close memstream
> >> operations, so it should not have a huge impact)
> >>
> > 
> > sorry, i cannot work on this until monday,
> > i hope it's ok to leave multithread open_memstream
> > broken for a few days.
> 
> What about patch below to add memstream FILE objects to the fflush
> list along with a size update on write operation update?  I do not like
> the cast, but currently 'struct _IO_FILE_plus' and 'struct _IO_streambuf'
> are essentially the same type so it should be safe. I think for 2.27 we
> can think about cleaning up these definitions.
> 
> diff --git a/libio/memstream.c b/libio/memstream.c
> index f83d4a5..3be5b58 100644
> --- a/libio/memstream.c
> +++ b/libio/memstream.c
> @@ -31,13 +31,14 @@ struct _IO_FILE_memstream
>  
>  static int _IO_mem_sync (_IO_FILE* fp) __THROW;
>  static void _IO_mem_finish (_IO_FILE* fp, int) __THROW;
> +static int _IO_mem_overflow (_IO_FILE *fp, int c) __THROW;
>  
>  
>  static const struct _IO_jump_t _IO_mem_jumps libio_vtable =
>  {
>    JUMP_INIT_DUMMY,
>    JUMP_INIT (finish, _IO_mem_finish),
> -  JUMP_INIT (overflow, _IO_str_overflow),
> +  JUMP_INIT (overflow, _IO_mem_overflow),
>    JUMP_INIT (underflow, _IO_str_underflow),
>    JUMP_INIT (uflow, _IO_default_uflow),
>    JUMP_INIT (pbackfail, _IO_str_pbackfail),
> @@ -87,6 +88,7 @@ __open_memstream (char **bufloc, _IO_size_t *sizeloc)
>        return NULL;
>      }
>    _IO_init_internal (&new_f->fp._sf._sbf._f, 0);
> +  _IO_link_in ((struct _IO_FILE_plus *) &new_f->fp._sf._sbf);

i think if you link_in, then you have to unlink somewhere,
but i havent tracked down the ways in which the io callbacks
handle that, in principle it should be at close time.

>    _IO_JUMPS_FILE_plus (&new_f->fp._sf._sbf) = &_IO_mem_jumps;
>    _IO_str_init_static_internal (&new_f->fp._sf, buf, _IO_BUFSIZ, buf);
>    new_f->fp._sf._sbf._f._flags &= ~_IO_USER_BUF;
> @@ -137,3 +139,14 @@ _IO_mem_finish (_IO_FILE *fp, int dummy)
>  
>    _IO_str_finish (fp, 0);
>  }
> +
> +static int
> +_IO_mem_overflow (_IO_FILE *fp, int c)
> +{
> +  int ret = _IO_str_overflow (fp, c);
> +
> +  struct _IO_FILE_memstream *mp = (struct _IO_FILE_memstream *) fp;
> +  *mp->sizeloc = fp->_IO_write_ptr - fp->_IO_write_base;
> +
> +  return ret;
> +}
Szabolcs Nagy July 10, 2017, 2:28 p.m. UTC | #2
On 07/07/17 22:31, Szabolcs Nagy wrote:
> * Adhemerval Zanella <adhemerval.zanella@linaro.org> [2017-07-07 17:46:56 -0300]:
>> On 07/07/2017 14:30, Szabolcs Nagy wrote:
>>> On 07/07/17 16:02, Szabolcs Nagy wrote:
>>>> On 07/07/17 14:38, Szabolcs Nagy wrote:
>>>>> On 07/07/17 13:38, Florian Weimer wrote:
>>>>>> This patch breaks the attached test case.  Not all stream objects are
>>>>>> linked into the global list, so the locking upgrade doesn't happen for
>>>>>> some of them.
>>>>>>
>>>>>
>>>>> i thought all files need to be flushed on exit
>>>>> or on fflush(0), if glibc does not do that that's
>>>>> observably non-conforming.
>>>>>
>>>>>> The global list management is quite expensive because the list is
>>>>>> single-linked, so we can't just add all stream objects when not yet
>>>>>> running multi-threaded.
>>>>>>
>>>>>> I fear that this patch may have to be backed out again, until we can
>>>>>> address these issues.
>>>>>>
>>>>>
>>>>> i can back it out, or try to create all the
>>>>> problematic files with the optimization-disabling
>>>>> flag set (in case that's simple.. will look into
>>>>> it in a minute).
>>>>>
>>>>
>>>> it seems both changing the optimization flag or adding
>>>> these streams to the list are easy.
>>>>
>>>> i think glibc should just fix the open_memstream bug,
>>>> https://sourceware.org/bugzilla/show_bug.cgi?id=21735
>>>> i'll work on a patch.
>>>>
>>>> (i don't expect a large number of open/close memstream
>>>> operations, so it should not have a huge impact)
>>>>
>>>
>>> sorry, i cannot work on this until monday,
>>> i hope it's ok to leave multithread open_memstream
>>> broken for a few days.
>>
>> What about patch below to add memstream FILE objects to the fflush
>> list along with a size update on write operation update?  I do not like
>> the cast, but currently 'struct _IO_FILE_plus' and 'struct _IO_streambuf'
>> are essentially the same type so it should be safe. I think for 2.27 we
>> can think about cleaning up these definitions.
>>
>> diff --git a/libio/memstream.c b/libio/memstream.c
>> index f83d4a5..3be5b58 100644
>> --- a/libio/memstream.c
>> +++ b/libio/memstream.c
>> @@ -31,13 +31,14 @@ struct _IO_FILE_memstream
>>  
>>  static int _IO_mem_sync (_IO_FILE* fp) __THROW;
>>  static void _IO_mem_finish (_IO_FILE* fp, int) __THROW;
>> +static int _IO_mem_overflow (_IO_FILE *fp, int c) __THROW;
>>  
>>  
>>  static const struct _IO_jump_t _IO_mem_jumps libio_vtable =
>>  {
>>    JUMP_INIT_DUMMY,
>>    JUMP_INIT (finish, _IO_mem_finish),
>> -  JUMP_INIT (overflow, _IO_str_overflow),
>> +  JUMP_INIT (overflow, _IO_mem_overflow),
>>    JUMP_INIT (underflow, _IO_str_underflow),
>>    JUMP_INIT (uflow, _IO_default_uflow),
>>    JUMP_INIT (pbackfail, _IO_str_pbackfail),
>> @@ -87,6 +88,7 @@ __open_memstream (char **bufloc, _IO_size_t *sizeloc)
>>        return NULL;
>>      }
>>    _IO_init_internal (&new_f->fp._sf._sbf._f, 0);
>> +  _IO_link_in ((struct _IO_FILE_plus *) &new_f->fp._sf._sbf);
> 
> i think if you link_in, then you have to unlink somewhere,
> but i havent tracked down the ways in which the io callbacks
> handle that, in principle it should be at close time.
> 

after further investigation i found that actually both
fclose and _IO_str_finish call _IO_un_link if the file
is linked.
(which is suboptimal, but my worry was not justified)

and that

_IO_flush_all calls the overflow callback
_IO_fflush calls the sync callback
_IO_fclose calls the finish callback

so all three callbacks need to set *sizeloc.
(which is suboptimal, but we should not redesign libio now)

so your patch seems to be the right fix for BZ 21735 .


>>    _IO_JUMPS_FILE_plus (&new_f->fp._sf._sbf) = &_IO_mem_jumps;
>>    _IO_str_init_static_internal (&new_f->fp._sf, buf, _IO_BUFSIZ, buf);
>>    new_f->fp._sf._sbf._f._flags &= ~_IO_USER_BUF;
>> @@ -137,3 +139,14 @@ _IO_mem_finish (_IO_FILE *fp, int dummy)
>>  
>>    _IO_str_finish (fp, 0);
>>  }
>> +
>> +static int
>> +_IO_mem_overflow (_IO_FILE *fp, int c)
>> +{
>> +  int ret = _IO_str_overflow (fp, c);
>> +
>> +  struct _IO_FILE_memstream *mp = (struct _IO_FILE_memstream *) fp;
>> +  *mp->sizeloc = fp->_IO_write_ptr - fp->_IO_write_base;
>> +
>> +  return ret;
>> +}
Adhemerval Zanella Netto July 10, 2017, 3:05 p.m. UTC | #3
On 10/07/2017 11:28, Szabolcs Nagy wrote:
> On 07/07/17 22:31, Szabolcs Nagy wrote:
>> * Adhemerval Zanella <adhemerval.zanella@linaro.org> [2017-07-07 17:46:56 -0300]:
>>> On 07/07/2017 14:30, Szabolcs Nagy wrote:
>>>> On 07/07/17 16:02, Szabolcs Nagy wrote:
>>>>> On 07/07/17 14:38, Szabolcs Nagy wrote:
>>>>>> On 07/07/17 13:38, Florian Weimer wrote:
>>>>>>> This patch breaks the attached test case.  Not all stream objects are
>>>>>>> linked into the global list, so the locking upgrade doesn't happen for
>>>>>>> some of them.
>>>>>>>
>>>>>>
>>>>>> i thought all files need to be flushed on exit
>>>>>> or on fflush(0), if glibc does not do that that's
>>>>>> observably non-conforming.
>>>>>>
>>>>>>> The global list management is quite expensive because the list is
>>>>>>> single-linked, so we can't just add all stream objects when not yet
>>>>>>> running multi-threaded.
>>>>>>>
>>>>>>> I fear that this patch may have to be backed out again, until we can
>>>>>>> address these issues.
>>>>>>>
>>>>>>
>>>>>> i can back it out, or try to create all the
>>>>>> problematic files with the optimization-disabling
>>>>>> flag set (in case that's simple.. will look into
>>>>>> it in a minute).
>>>>>>
>>>>>
>>>>> it seems both changing the optimization flag or adding
>>>>> these streams to the list are easy.
>>>>>
>>>>> i think glibc should just fix the open_memstream bug,
>>>>> https://sourceware.org/bugzilla/show_bug.cgi?id=21735
>>>>> i'll work on a patch.
>>>>>
>>>>> (i don't expect a large number of open/close memstream
>>>>> operations, so it should not have a huge impact)
>>>>>
>>>>
>>>> sorry, i cannot work on this until monday,
>>>> i hope it's ok to leave multithread open_memstream
>>>> broken for a few days.
>>>
>>> What about patch below to add memstream FILE objects to the fflush
>>> list along with a size update on write operation update?  I do not like
>>> the cast, but currently 'struct _IO_FILE_plus' and 'struct _IO_streambuf'
>>> are essentially the same type so it should be safe. I think for 2.27 we
>>> can think about cleaning up these definitions.
>>>
>>> diff --git a/libio/memstream.c b/libio/memstream.c
>>> index f83d4a5..3be5b58 100644
>>> --- a/libio/memstream.c
>>> +++ b/libio/memstream.c
>>> @@ -31,13 +31,14 @@ struct _IO_FILE_memstream
>>>  
>>>  static int _IO_mem_sync (_IO_FILE* fp) __THROW;
>>>  static void _IO_mem_finish (_IO_FILE* fp, int) __THROW;
>>> +static int _IO_mem_overflow (_IO_FILE *fp, int c) __THROW;
>>>  
>>>  
>>>  static const struct _IO_jump_t _IO_mem_jumps libio_vtable =
>>>  {
>>>    JUMP_INIT_DUMMY,
>>>    JUMP_INIT (finish, _IO_mem_finish),
>>> -  JUMP_INIT (overflow, _IO_str_overflow),
>>> +  JUMP_INIT (overflow, _IO_mem_overflow),
>>>    JUMP_INIT (underflow, _IO_str_underflow),
>>>    JUMP_INIT (uflow, _IO_default_uflow),
>>>    JUMP_INIT (pbackfail, _IO_str_pbackfail),
>>> @@ -87,6 +88,7 @@ __open_memstream (char **bufloc, _IO_size_t *sizeloc)
>>>        return NULL;
>>>      }
>>>    _IO_init_internal (&new_f->fp._sf._sbf._f, 0);
>>> +  _IO_link_in ((struct _IO_FILE_plus *) &new_f->fp._sf._sbf);
>>
>> i think if you link_in, then you have to unlink somewhere,
>> but i havent tracked down the ways in which the io callbacks
>> handle that, in principle it should be at close time.
>>
> 
> after further investigation i found that actually both
> fclose and _IO_str_finish call _IO_un_link if the file
> is linked.
> (which is suboptimal, but my worry was not justified)
> 
> and that
> 
> _IO_flush_all calls the overflow callback
> _IO_fflush calls the sync callback
> _IO_fclose calls the finish callback
> 
> so all three callbacks need to set *sizeloc.
> (which is suboptimal, but we should not redesign libio now)
> 
> so your patch seems to be the right fix for BZ 21735 .

Yes, I was about to reply with the same observations.  I am finishing the 
patch itself with a proper testcase.

> 
> 
>>>    _IO_JUMPS_FILE_plus (&new_f->fp._sf._sbf) = &_IO_mem_jumps;
>>>    _IO_str_init_static_internal (&new_f->fp._sf, buf, _IO_BUFSIZ, buf);
>>>    new_f->fp._sf._sbf._f._flags &= ~_IO_USER_BUF;
>>> @@ -137,3 +139,14 @@ _IO_mem_finish (_IO_FILE *fp, int dummy)
>>>  
>>>    _IO_str_finish (fp, 0);
>>>  }
>>> +
>>> +static int
>>> +_IO_mem_overflow (_IO_FILE *fp, int c)
>>> +{
>>> +  int ret = _IO_str_overflow (fp, c);
>>> +
>>> +  struct _IO_FILE_memstream *mp = (struct _IO_FILE_memstream *) fp;
>>> +  *mp->sizeloc = fp->_IO_write_ptr - fp->_IO_write_base;
>>> +
>>> +  return ret;
>>> +}
>
diff mbox

Patch

diff --git a/libio/memstream.c b/libio/memstream.c
index f83d4a5..3be5b58 100644
--- a/libio/memstream.c
+++ b/libio/memstream.c
@@ -31,13 +31,14 @@  struct _IO_FILE_memstream
 
 static int _IO_mem_sync (_IO_FILE* fp) __THROW;
 static void _IO_mem_finish (_IO_FILE* fp, int) __THROW;
+static int _IO_mem_overflow (_IO_FILE *fp, int c) __THROW;
 
 
 static const struct _IO_jump_t _IO_mem_jumps libio_vtable =
 {
   JUMP_INIT_DUMMY,
   JUMP_INIT (finish, _IO_mem_finish),
-  JUMP_INIT (overflow, _IO_str_overflow),
+  JUMP_INIT (overflow, _IO_mem_overflow),
   JUMP_INIT (underflow, _IO_str_underflow),
   JUMP_INIT (uflow, _IO_default_uflow),
   JUMP_INIT (pbackfail, _IO_str_pbackfail),
@@ -87,6 +88,7 @@  __open_memstream (char **bufloc, _IO_size_t *sizeloc)
       return NULL;
     }
   _IO_init_internal (&new_f->fp._sf._sbf._f, 0);
+  _IO_link_in ((struct _IO_FILE_plus *) &new_f->fp._sf._sbf);
   _IO_JUMPS_FILE_plus (&new_f->fp._sf._sbf) = &_IO_mem_jumps;
   _IO_str_init_static_internal (&new_f->fp._sf, buf, _IO_BUFSIZ, buf);
   new_f->fp._sf._sbf._f._flags &= ~_IO_USER_BUF;
@@ -137,3 +139,14 @@  _IO_mem_finish (_IO_FILE *fp, int dummy)
 
   _IO_str_finish (fp, 0);
 }
+
+static int
+_IO_mem_overflow (_IO_FILE *fp, int c)
+{
+  int ret = _IO_str_overflow (fp, c);
+
+  struct _IO_FILE_memstream *mp = (struct _IO_FILE_memstream *) fp;
+  *mp->sizeloc = fp->_IO_write_ptr - fp->_IO_write_base;
+
+  return ret;
+}