Patchwork [RFC,07/12] Store the data to send also in iovec

login
register
mail settings
Submitter Orit Wasserman
Date March 21, 2013, 9:09 a.m.
Message ID <1363856971-4601-8-git-send-email-owasserm@redhat.com>
Download mbox | patch
Permalink /patch/229602/
State New
Headers show

Comments

Orit Wasserman - March 21, 2013, 9:09 a.m.
All data is still copied into the static buffer.

Signed-off-by: Orit Wasserman <owasserm@redhat.com>
---
 savevm.c | 13 +++++++++++--
 1 file changed, 11 insertions(+), 2 deletions(-)
Paolo Bonzini - March 21, 2013, 9:56 a.m.
Il 21/03/2013 10:09, Orit Wasserman ha scritto:
> All data is still copied into the static buffer.
> 
> Signed-off-by: Orit Wasserman <owasserm@redhat.com>
> ---
>  savevm.c | 13 +++++++++++--
>  1 file changed, 11 insertions(+), 2 deletions(-)
> 
> diff --git a/savevm.c b/savevm.c
> index ec64533..d5834ca 100644
> --- a/savevm.c
> +++ b/savevm.c
> @@ -114,6 +114,7 @@ void qemu_announce_self(void)
>  /* savevm/loadvm support */
>  
>  #define IO_BUF_SIZE 32768
> +#define MAX_IOV_SIZE 64

You could use IOV_MAX, or min(IOV_MAX, 64).  The 64 should be tuned on a
good 10G network...

Paolo

>  struct QEMUFile {
>      const QEMUFileOps *ops;
> @@ -129,6 +130,9 @@ struct QEMUFile {
>      int buf_size; /* 0 when writing */
>      uint8_t buf[IO_BUF_SIZE];
>  
> +    struct iovec iov[MAX_IOV_SIZE];
> +    unsigned int iovcnt;
> +
>      int last_error;
>  };
>  
> @@ -546,6 +550,7 @@ static void qemu_fflush(QEMUFile *f)
>              f->pos += f->buf_index;
>          }
>          f->buf_index = 0;
> +        f->iovcnt = 0;
>      }
>      if (ret < 0) {
>          qemu_file_set_error(f, ret);
> @@ -638,12 +643,14 @@ void qemu_put_buffer(QEMUFile *f, const uint8_t *buf, int size)
>          if (l > size)
>              l = size;
>          memcpy(f->buf + f->buf_index, buf, l);
> +        f->iov[f->iovcnt].iov_base = f->buf + f->buf_index;
> +        f->iov[f->iovcnt++].iov_len = l;
>          f->is_write = 1;
>          f->buf_index += l;
>          f->bytes_xfer += l;
>          buf += l;
>          size -= l;
> -        if (f->buf_index >= IO_BUF_SIZE) {
> +        if (f->buf_index >= IO_BUF_SIZE || f->iovcnt >= MAX_IOV_SIZE) {
>              qemu_fflush(f);
>              if (qemu_file_get_error(f)) {
>                  break;
> @@ -667,8 +674,10 @@ void qemu_put_byte(QEMUFile *f, int v)
>      f->buf[f->buf_index++] = v;
>      f->is_write = 1;
>      f->bytes_xfer += 1;
> +    f->iov[f->iovcnt].iov_base = f->buf + (f->buf_index - 1);
> +    f->iov[f->iovcnt++].iov_len = 1;
>  
> -    if (f->buf_index >= IO_BUF_SIZE) {
> +    if (f->buf_index >= IO_BUF_SIZE || f->iovcnt >= MAX_IOV_SIZE) {
>          qemu_fflush(f);
>      }
>  }
>
Orit Wasserman - March 21, 2013, 11:10 a.m.
On 03/21/2013 11:56 AM, Paolo Bonzini wrote:
> Il 21/03/2013 10:09, Orit Wasserman ha scritto:
>> All data is still copied into the static buffer.
>>
>> Signed-off-by: Orit Wasserman <owasserm@redhat.com>
>> ---
>>  savevm.c | 13 +++++++++++--
>>  1 file changed, 11 insertions(+), 2 deletions(-)
>>
>> diff --git a/savevm.c b/savevm.c
>> index ec64533..d5834ca 100644
>> --- a/savevm.c
>> +++ b/savevm.c
>> @@ -114,6 +114,7 @@ void qemu_announce_self(void)
>>  /* savevm/loadvm support */
>>  
>>  #define IO_BUF_SIZE 32768
>> +#define MAX_IOV_SIZE 64
> 
> You could use IOV_MAX, or min(IOV_MAX, 64).
Sure
 The 64 should be tuned on a
> good 10G network...

You need to remember that iovec of 64 is equivalent to 32 guest pages which are 
128K data. This is a large TCP packet that will be fragmented even with jumbo frames.
I can make it configurable but not sure if it will be useful.
Micheal, what do you think?

Orit
> 
> Paolo
> 
>>  struct QEMUFile {
>>      const QEMUFileOps *ops;
>> @@ -129,6 +130,9 @@ struct QEMUFile {
>>      int buf_size; /* 0 when writing */
>>      uint8_t buf[IO_BUF_SIZE];
>>  
>> +    struct iovec iov[MAX_IOV_SIZE];
>> +    unsigned int iovcnt;
>> +
>>      int last_error;
>>  };
>>  
>> @@ -546,6 +550,7 @@ static void qemu_fflush(QEMUFile *f)
>>              f->pos += f->buf_index;
>>          }
>>          f->buf_index = 0;
>> +        f->iovcnt = 0;
>>      }
>>      if (ret < 0) {
>>          qemu_file_set_error(f, ret);
>> @@ -638,12 +643,14 @@ void qemu_put_buffer(QEMUFile *f, const uint8_t *buf, int size)
>>          if (l > size)
>>              l = size;
>>          memcpy(f->buf + f->buf_index, buf, l);
>> +        f->iov[f->iovcnt].iov_base = f->buf + f->buf_index;
>> +        f->iov[f->iovcnt++].iov_len = l;
>>          f->is_write = 1;
>>          f->buf_index += l;
>>          f->bytes_xfer += l;
>>          buf += l;
>>          size -= l;
>> -        if (f->buf_index >= IO_BUF_SIZE) {
>> +        if (f->buf_index >= IO_BUF_SIZE || f->iovcnt >= MAX_IOV_SIZE) {
>>              qemu_fflush(f);
>>              if (qemu_file_get_error(f)) {
>>                  break;
>> @@ -667,8 +674,10 @@ void qemu_put_byte(QEMUFile *f, int v)
>>      f->buf[f->buf_index++] = v;
>>      f->is_write = 1;
>>      f->bytes_xfer += 1;
>> +    f->iov[f->iovcnt].iov_base = f->buf + (f->buf_index - 1);
>> +    f->iov[f->iovcnt++].iov_len = 1;
>>  
>> -    if (f->buf_index >= IO_BUF_SIZE) {
>> +    if (f->buf_index >= IO_BUF_SIZE || f->iovcnt >= MAX_IOV_SIZE) {
>>          qemu_fflush(f);
>>      }
>>  }
>>
>
Michael S. Tsirkin - March 21, 2013, 11:14 a.m.
On Thu, Mar 21, 2013 at 01:10:40PM +0200, Orit Wasserman wrote:
> On 03/21/2013 11:56 AM, Paolo Bonzini wrote:
> > Il 21/03/2013 10:09, Orit Wasserman ha scritto:
> >> All data is still copied into the static buffer.
> >>
> >> Signed-off-by: Orit Wasserman <owasserm@redhat.com>
> >> ---
> >>  savevm.c | 13 +++++++++++--
> >>  1 file changed, 11 insertions(+), 2 deletions(-)
> >>
> >> diff --git a/savevm.c b/savevm.c
> >> index ec64533..d5834ca 100644
> >> --- a/savevm.c
> >> +++ b/savevm.c
> >> @@ -114,6 +114,7 @@ void qemu_announce_self(void)
> >>  /* savevm/loadvm support */
> >>  
> >>  #define IO_BUF_SIZE 32768
> >> +#define MAX_IOV_SIZE 64
> > 
> > You could use IOV_MAX, or min(IOV_MAX, 64).
> Sure
>  The 64 should be tuned on a
> > good 10G network...
> 
> You need to remember that iovec of 64 is equivalent to 32 guest pages which are 
> 128K data. This is a large TCP packet that will be fragmented even with jumbo frames.
> I can make it configurable but not sure if it will be useful.
> Micheal, what do you think?
> 
> Orit

You are both right :). 64 looks like a sane value, and we can
try and tune it some more if we have the time.

> > 
> > Paolo
> > 
> >>  struct QEMUFile {
> >>      const QEMUFileOps *ops;
> >> @@ -129,6 +130,9 @@ struct QEMUFile {
> >>      int buf_size; /* 0 when writing */
> >>      uint8_t buf[IO_BUF_SIZE];
> >>  
> >> +    struct iovec iov[MAX_IOV_SIZE];
> >> +    unsigned int iovcnt;
> >> +
> >>      int last_error;
> >>  };
> >>  
> >> @@ -546,6 +550,7 @@ static void qemu_fflush(QEMUFile *f)
> >>              f->pos += f->buf_index;
> >>          }
> >>          f->buf_index = 0;
> >> +        f->iovcnt = 0;
> >>      }
> >>      if (ret < 0) {
> >>          qemu_file_set_error(f, ret);
> >> @@ -638,12 +643,14 @@ void qemu_put_buffer(QEMUFile *f, const uint8_t *buf, int size)
> >>          if (l > size)
> >>              l = size;
> >>          memcpy(f->buf + f->buf_index, buf, l);
> >> +        f->iov[f->iovcnt].iov_base = f->buf + f->buf_index;
> >> +        f->iov[f->iovcnt++].iov_len = l;
> >>          f->is_write = 1;
> >>          f->buf_index += l;
> >>          f->bytes_xfer += l;
> >>          buf += l;
> >>          size -= l;
> >> -        if (f->buf_index >= IO_BUF_SIZE) {
> >> +        if (f->buf_index >= IO_BUF_SIZE || f->iovcnt >= MAX_IOV_SIZE) {
> >>              qemu_fflush(f);
> >>              if (qemu_file_get_error(f)) {
> >>                  break;
> >> @@ -667,8 +674,10 @@ void qemu_put_byte(QEMUFile *f, int v)
> >>      f->buf[f->buf_index++] = v;
> >>      f->is_write = 1;
> >>      f->bytes_xfer += 1;
> >> +    f->iov[f->iovcnt].iov_base = f->buf + (f->buf_index - 1);
> >> +    f->iov[f->iovcnt++].iov_len = 1;
> >>  
> >> -    if (f->buf_index >= IO_BUF_SIZE) {
> >> +    if (f->buf_index >= IO_BUF_SIZE || f->iovcnt >= MAX_IOV_SIZE) {
> >>          qemu_fflush(f);
> >>      }
> >>  }
> >>
> >
Orit Wasserman - March 21, 2013, 12:50 p.m.
On 03/21/2013 01:14 PM, Michael S. Tsirkin wrote:
> On Thu, Mar 21, 2013 at 01:10:40PM +0200, Orit Wasserman wrote:
>> On 03/21/2013 11:56 AM, Paolo Bonzini wrote:
>>> Il 21/03/2013 10:09, Orit Wasserman ha scritto:
>>>> All data is still copied into the static buffer.
>>>>
>>>> Signed-off-by: Orit Wasserman <owasserm@redhat.com>
>>>> ---
>>>>  savevm.c | 13 +++++++++++--
>>>>  1 file changed, 11 insertions(+), 2 deletions(-)
>>>>
>>>> diff --git a/savevm.c b/savevm.c
>>>> index ec64533..d5834ca 100644
>>>> --- a/savevm.c
>>>> +++ b/savevm.c
>>>> @@ -114,6 +114,7 @@ void qemu_announce_self(void)
>>>>  /* savevm/loadvm support */
>>>>  
>>>>  #define IO_BUF_SIZE 32768
>>>> +#define MAX_IOV_SIZE 64
>>>
>>> You could use IOV_MAX, or min(IOV_MAX, 64).
>> Sure
>>  The 64 should be tuned on a
>>> good 10G network...
>>
>> You need to remember that iovec of 64 is equivalent to 32 guest pages which are 
>> 128K data. This is a large TCP packet that will be fragmented even with jumbo frames.
>> I can make it configurable but not sure if it will be useful.
>> Micheal, what do you think?
>>
>> Orit
> 
> You are both right :). 64 looks like a sane value, and we can
> try and tune it some more if we have the time.
> 
Paolo, are you ok with leaving it MIN(IOV_MAX, 64) at the moment?
Later we can make it changeable.

Orit
>>>
>>> Paolo
>>>
>>>>  struct QEMUFile {
>>>>      const QEMUFileOps *ops;
>>>> @@ -129,6 +130,9 @@ struct QEMUFile {
>>>>      int buf_size; /* 0 when writing */
>>>>      uint8_t buf[IO_BUF_SIZE];
>>>>  
>>>> +    struct iovec iov[MAX_IOV_SIZE];
>>>> +    unsigned int iovcnt;
>>>> +
>>>>      int last_error;
>>>>  };
>>>>  
>>>> @@ -546,6 +550,7 @@ static void qemu_fflush(QEMUFile *f)
>>>>              f->pos += f->buf_index;
>>>>          }
>>>>          f->buf_index = 0;
>>>> +        f->iovcnt = 0;
>>>>      }
>>>>      if (ret < 0) {
>>>>          qemu_file_set_error(f, ret);
>>>> @@ -638,12 +643,14 @@ void qemu_put_buffer(QEMUFile *f, const uint8_t *buf, int size)
>>>>          if (l > size)
>>>>              l = size;
>>>>          memcpy(f->buf + f->buf_index, buf, l);
>>>> +        f->iov[f->iovcnt].iov_base = f->buf + f->buf_index;
>>>> +        f->iov[f->iovcnt++].iov_len = l;
>>>>          f->is_write = 1;
>>>>          f->buf_index += l;
>>>>          f->bytes_xfer += l;
>>>>          buf += l;
>>>>          size -= l;
>>>> -        if (f->buf_index >= IO_BUF_SIZE) {
>>>> +        if (f->buf_index >= IO_BUF_SIZE || f->iovcnt >= MAX_IOV_SIZE) {
>>>>              qemu_fflush(f);
>>>>              if (qemu_file_get_error(f)) {
>>>>                  break;
>>>> @@ -667,8 +674,10 @@ void qemu_put_byte(QEMUFile *f, int v)
>>>>      f->buf[f->buf_index++] = v;
>>>>      f->is_write = 1;
>>>>      f->bytes_xfer += 1;
>>>> +    f->iov[f->iovcnt].iov_base = f->buf + (f->buf_index - 1);
>>>> +    f->iov[f->iovcnt++].iov_len = 1;
>>>>  
>>>> -    if (f->buf_index >= IO_BUF_SIZE) {
>>>> +    if (f->buf_index >= IO_BUF_SIZE || f->iovcnt >= MAX_IOV_SIZE) {
>>>>          qemu_fflush(f);
>>>>      }
>>>>  }
>>>>
>>>
Paolo Bonzini - March 21, 2013, 1 p.m.
Il 21/03/2013 13:50, Orit Wasserman ha scritto:
> On 03/21/2013 01:14 PM, Michael S. Tsirkin wrote:
>> On Thu, Mar 21, 2013 at 01:10:40PM +0200, Orit Wasserman wrote:
>>> On 03/21/2013 11:56 AM, Paolo Bonzini wrote:
>>>> Il 21/03/2013 10:09, Orit Wasserman ha scritto:
>>>>> All data is still copied into the static buffer.
>>>>>
>>>>> Signed-off-by: Orit Wasserman <owasserm@redhat.com>
>>>>> ---
>>>>>  savevm.c | 13 +++++++++++--
>>>>>  1 file changed, 11 insertions(+), 2 deletions(-)
>>>>>
>>>>> diff --git a/savevm.c b/savevm.c
>>>>> index ec64533..d5834ca 100644
>>>>> --- a/savevm.c
>>>>> +++ b/savevm.c
>>>>> @@ -114,6 +114,7 @@ void qemu_announce_self(void)
>>>>>  /* savevm/loadvm support */
>>>>>  
>>>>>  #define IO_BUF_SIZE 32768
>>>>> +#define MAX_IOV_SIZE 64
>>>>
>>>> You could use IOV_MAX, or min(IOV_MAX, 64).
>
> Paolo, are you ok with leaving it MIN(IOV_MAX, 64) at the moment?

Of course! (When I use "..." it means "I don't expect you to do that..."
:)).

Paolo

Patch

diff --git a/savevm.c b/savevm.c
index ec64533..d5834ca 100644
--- a/savevm.c
+++ b/savevm.c
@@ -114,6 +114,7 @@  void qemu_announce_self(void)
 /* savevm/loadvm support */
 
 #define IO_BUF_SIZE 32768
+#define MAX_IOV_SIZE 64
 
 struct QEMUFile {
     const QEMUFileOps *ops;
@@ -129,6 +130,9 @@  struct QEMUFile {
     int buf_size; /* 0 when writing */
     uint8_t buf[IO_BUF_SIZE];
 
+    struct iovec iov[MAX_IOV_SIZE];
+    unsigned int iovcnt;
+
     int last_error;
 };
 
@@ -546,6 +550,7 @@  static void qemu_fflush(QEMUFile *f)
             f->pos += f->buf_index;
         }
         f->buf_index = 0;
+        f->iovcnt = 0;
     }
     if (ret < 0) {
         qemu_file_set_error(f, ret);
@@ -638,12 +643,14 @@  void qemu_put_buffer(QEMUFile *f, const uint8_t *buf, int size)
         if (l > size)
             l = size;
         memcpy(f->buf + f->buf_index, buf, l);
+        f->iov[f->iovcnt].iov_base = f->buf + f->buf_index;
+        f->iov[f->iovcnt++].iov_len = l;
         f->is_write = 1;
         f->buf_index += l;
         f->bytes_xfer += l;
         buf += l;
         size -= l;
-        if (f->buf_index >= IO_BUF_SIZE) {
+        if (f->buf_index >= IO_BUF_SIZE || f->iovcnt >= MAX_IOV_SIZE) {
             qemu_fflush(f);
             if (qemu_file_get_error(f)) {
                 break;
@@ -667,8 +674,10 @@  void qemu_put_byte(QEMUFile *f, int v)
     f->buf[f->buf_index++] = v;
     f->is_write = 1;
     f->bytes_xfer += 1;
+    f->iov[f->iovcnt].iov_base = f->buf + (f->buf_index - 1);
+    f->iov[f->iovcnt++].iov_len = 1;
 
-    if (f->buf_index >= IO_BUF_SIZE) {
+    if (f->buf_index >= IO_BUF_SIZE || f->iovcnt >= MAX_IOV_SIZE) {
         qemu_fflush(f);
     }
 }