diff mbox

[RFC,v5,07/21] virtagent: add va.getfile RPC

Message ID 1291399402-20366-8-git-send-email-mdroth@linux.vnet.ibm.com
State New
Headers show

Commit Message

Michael Roth Dec. 3, 2010, 6:03 p.m. UTC
Add RPC to retrieve a guest file. This interface is intended
for smaller reads like peeking at logs and /proc and such.

Signed-off-by: Michael Roth <mdroth@linux.vnet.ibm.com>
---
 virtagent-server.c |   59 ++++++++++++++++++++++++++++++++++++++++++++++++++++
 1 files changed, 59 insertions(+), 0 deletions(-)

Comments

Adam Litke Dec. 6, 2010, 10:06 p.m. UTC | #1
On Fri, 2010-12-03 at 12:03 -0600, Michael Roth wrote:
> Add RPC to retrieve a guest file. This interface is intended
> for smaller reads like peeking at logs and /proc and such.
> 
> Signed-off-by: Michael Roth <mdroth@linux.vnet.ibm.com>
> ---
>  virtagent-server.c |   59 ++++++++++++++++++++++++++++++++++++++++++++++++++++
>  1 files changed, 59 insertions(+), 0 deletions(-)
> 
> diff --git a/virtagent-server.c b/virtagent-server.c
> index 36fbae2..a430b58 100644
> --- a/virtagent-server.c
> +++ b/virtagent-server.c
> @@ -26,12 +26,71 @@ static bool va_enable_syslog = false; /* enable syslog'ing of RPCs */
>      syslog(LOG_INFO, "virtagent, %s", msg_buf); \
>  } while(0)
> 
> +/* RPC functions common to guest/host daemons */
> +
> +/* va_getfile(): return file contents
> + * rpc return values:
> + *   - base64-encoded file contents
> + */
> +static xmlrpc_value *va_getfile(xmlrpc_env *env,
> +                                xmlrpc_value *param,
> +                                void *user_data)
> +{
> +    const char *path;
> +    char *file_contents = NULL;
> +    char buf[VA_FILEBUF_LEN];
> +    int fd, ret, count = 0;
> +    xmlrpc_value *result = NULL;
> +
> +    /* parse argument array */
> +    xmlrpc_decompose_value(env, param, "(s)", &path);
> +    if (env->fault_occurred) {
> +        return NULL;
> +    }
> +
> +    SLOG("va_getfile(), path:%s", path);

I would log all calls (even those that would fail
xmlrpc_decompose_value(). You might catch someone trying to do something
tricky.

> +
> +    fd = open(path, O_RDONLY);
> +    if (fd == -1) {
> +        LOG("open failed: %s", strerror(errno));
> +        xmlrpc_faultf(env, "open failed: %s", strerror(errno));
> +        return NULL;
> +    }
> +
> +    while ((ret = read(fd, buf, VA_FILEBUF_LEN)) > 0) {
> +        file_contents = qemu_realloc(file_contents, count + VA_FILEBUF_LEN);
> +        memcpy(file_contents + count, buf, ret);
> +        count += ret;
> +        if (count > VA_GETFILE_MAX) {
> +            xmlrpc_faultf(env, "max file size (%d bytes) exceeded",
> +                          VA_GETFILE_MAX);
> +            goto EXIT_CLOSE_BAD;
> +        }
> +    }
> +    if (ret == -1) {
> +        LOG("read failed: %s", strerror(errno));
> +        xmlrpc_faultf(env, "read failed: %s", strerror(errno));
> +        goto EXIT_CLOSE_BAD;
> +    }
> +
> +    result = xmlrpc_build_value(env, "6", file_contents, count);
> +
> +EXIT_CLOSE_BAD:
> +    if (file_contents) {
> +        qemu_free(file_contents);
> +    }
> +    close(fd);
> +    return result;
> +}
> +
>  typedef struct RPCFunction {
>      xmlrpc_value *(*func)(xmlrpc_env *env, xmlrpc_value *param, void *unused);
>      const char *func_name;
>  } RPCFunction;
> 
>  static RPCFunction guest_functions[] = {
> +    { .func = va_getfile,
> +      .func_name = "va.getfile" },
>      { NULL, NULL }
>  };
>  static RPCFunction host_functions[] = {
Michael Roth Dec. 6, 2010, 11:23 p.m. UTC | #2
On 12/06/2010 04:06 PM, Adam Litke wrote:
> On Fri, 2010-12-03 at 12:03 -0600, Michael Roth wrote:
>> Add RPC to retrieve a guest file. This interface is intended
>> for smaller reads like peeking at logs and /proc and such.
>>
>> Signed-off-by: Michael Roth<mdroth@linux.vnet.ibm.com>
>> ---
>>   virtagent-server.c |   59 ++++++++++++++++++++++++++++++++++++++++++++++++++++
>>   1 files changed, 59 insertions(+), 0 deletions(-)
>>
>> diff --git a/virtagent-server.c b/virtagent-server.c
>> index 36fbae2..a430b58 100644
>> --- a/virtagent-server.c
>> +++ b/virtagent-server.c
>> @@ -26,12 +26,71 @@ static bool va_enable_syslog = false; /* enable syslog'ing of RPCs */
>>       syslog(LOG_INFO, "virtagent, %s", msg_buf); \
>>   } while(0)
>>
>> +/* RPC functions common to guest/host daemons */
>> +
>> +/* va_getfile(): return file contents
>> + * rpc return values:
>> + *   - base64-encoded file contents
>> + */
>> +static xmlrpc_value *va_getfile(xmlrpc_env *env,
>> +                                xmlrpc_value *param,
>> +                                void *user_data)
>> +{
>> +    const char *path;
>> +    char *file_contents = NULL;
>> +    char buf[VA_FILEBUF_LEN];
>> +    int fd, ret, count = 0;
>> +    xmlrpc_value *result = NULL;
>> +
>> +    /* parse argument array */
>> +    xmlrpc_decompose_value(env, param, "(s)",&path);
>> +    if (env->fault_occurred) {
>> +        return NULL;
>> +    }
>> +
>> +    SLOG("va_getfile(), path:%s", path);
>
> I would log all calls (even those that would fail
> xmlrpc_decompose_value(). You might catch someone trying to do something
> tricky.
>

Agreed, I'll fix this up for the next round

>> +
>> +    fd = open(path, O_RDONLY);
>> +    if (fd == -1) {
>> +        LOG("open failed: %s", strerror(errno));
>> +        xmlrpc_faultf(env, "open failed: %s", strerror(errno));
>> +        return NULL;
>> +    }
>> +
>> +    while ((ret = read(fd, buf, VA_FILEBUF_LEN))>  0) {
>> +        file_contents = qemu_realloc(file_contents, count + VA_FILEBUF_LEN);
>> +        memcpy(file_contents + count, buf, ret);
>> +        count += ret;
>> +        if (count>  VA_GETFILE_MAX) {
>> +            xmlrpc_faultf(env, "max file size (%d bytes) exceeded",
>> +                          VA_GETFILE_MAX);
>> +            goto EXIT_CLOSE_BAD;
>> +        }
>> +    }
>> +    if (ret == -1) {
>> +        LOG("read failed: %s", strerror(errno));
>> +        xmlrpc_faultf(env, "read failed: %s", strerror(errno));
>> +        goto EXIT_CLOSE_BAD;
>> +    }
>> +
>> +    result = xmlrpc_build_value(env, "6", file_contents, count);
>> +
>> +EXIT_CLOSE_BAD:
>> +    if (file_contents) {
>> +        qemu_free(file_contents);
>> +    }
>> +    close(fd);
>> +    return result;
>> +}
>> +
>>   typedef struct RPCFunction {
>>       xmlrpc_value *(*func)(xmlrpc_env *env, xmlrpc_value *param, void *unused);
>>       const char *func_name;
>>   } RPCFunction;
>>
>>   static RPCFunction guest_functions[] = {
>> +    { .func = va_getfile,
>> +      .func_name = "va.getfile" },
>>       { NULL, NULL }
>>   };
>>   static RPCFunction host_functions[] = {
>
Jes Sorensen Dec. 7, 2010, 2:18 p.m. UTC | #3
On 12/03/10 19:03, Michael Roth wrote:
> Add RPC to retrieve a guest file. This interface is intended
> for smaller reads like peeking at logs and /proc and such.

I think you need to redesign your approach here..... see below.

In 06/21 you had:

+#define VA_GETFILE_MAX 1 << 30

> +    while ((ret = read(fd, buf, VA_FILEBUF_LEN)) > 0) {
> +        file_contents = qemu_realloc(file_contents, count + VA_FILEBUF_LEN);
> +        memcpy(file_contents + count, buf, ret);

UH OH!

realloc will do a malloc and a memcpy of the data, this is going to turn
into a really nasty malloc memcpy loop if someone tries to transfer a
large file using this method. You could end up with almost 4GB of
parallel allocations for a guest that might have been configured as a
1GB guest. This would allow the guest to effectively blow the expected
memory consumption out of the water. It's not exactly going to be fast
either :(

Maybe use a tmp file, and write data out to that as you receive it to
avoid the malloc ballooning.

Jes
Adam Litke Dec. 7, 2010, 4 p.m. UTC | #4
Hi Jes, you raise some good points and pitfalls with the current getfile
approach.  I've been thinking about an alternative and am wondering what
you (and others) think...

First off, I think we should switch to a copyfile() API that allows us
to avoid presenting the file contents to the user.  Neither the human
monitor nor the control monitor are designed to be file pagers.  Let the
user decide how to consume the data once it has been transferred.  Now
we don't need to care if the file is binary or text.

The virtagent RPC protocol is bi-directional and supports asynchronous
events.  We can use these to implement a better copyfile RPC that can
transfer larger files without wasting memory.  The host issues a
copyfile(<guest-path>, <host-path>) RPC.  The immediate result of this
call will indicate whether the guest is able to initiate the transfer.
The guest will generate a series of events (<offset>, <size>, <payload>)
until the entire contents has been transferred.  The host and guest
could negotiate the chunk size if necessary.  Once the transfer is
complete, the guest sends a final event to indicate this (<file-size>,
0).

This interface could be integrated into the monitor with a pair of
commands (va_copyfile and info va_copyfile), the former used to initiate
transfers and the latter to check on the status.

Thoughts on this?

On Tue, 2010-12-07 at 15:18 +0100, Jes Sorensen wrote:
> On 12/03/10 19:03, Michael Roth wrote:
> > Add RPC to retrieve a guest file. This interface is intended
> > for smaller reads like peeking at logs and /proc and such.
> 
> I think you need to redesign your approach here..... see below.
> 
> In 06/21 you had:
> 
> +#define VA_GETFILE_MAX 1 << 30
> 
> > +    while ((ret = read(fd, buf, VA_FILEBUF_LEN)) > 0) {
> > +        file_contents = qemu_realloc(file_contents, count + VA_FILEBUF_LEN);
> > +        memcpy(file_contents + count, buf, ret);
> 
> UH OH!
> 
> realloc will do a malloc and a memcpy of the data, this is going to turn
> into a really nasty malloc memcpy loop if someone tries to transfer a
> large file using this method. You could end up with almost 4GB of
> parallel allocations for a guest that might have been configured as a
> 1GB guest. This would allow the guest to effectively blow the expected
> memory consumption out of the water. It's not exactly going to be fast
> either :(
> 
> Maybe use a tmp file, and write data out to that as you receive it to
> avoid the malloc ballooning.
> 
> Jes
Jes Sorensen Dec. 8, 2010, 7:19 p.m. UTC | #5
On 12/07/10 17:00, Adam Litke wrote:
> Hi Jes, you raise some good points and pitfalls with the current getfile
> approach.  I've been thinking about an alternative and am wondering what
> you (and others) think...
> 
> First off, I think we should switch to a copyfile() API that allows us
> to avoid presenting the file contents to the user.  Neither the human
> monitor nor the control monitor are designed to be file pagers.  Let the
> user decide how to consume the data once it has been transferred.  Now
> we don't need to care if the file is binary or text.
> 
> The virtagent RPC protocol is bi-directional and supports asynchronous
> events.  We can use these to implement a better copyfile RPC that can
> transfer larger files without wasting memory.  The host issues a
> copyfile(<guest-path>, <host-path>) RPC.  The immediate result of this
> call will indicate whether the guest is able to initiate the transfer.
> The guest will generate a series of events (<offset>, <size>, <payload>)
> until the entire contents has been transferred.  The host and guest
> could negotiate the chunk size if necessary.  Once the transfer is
> complete, the guest sends a final event to indicate this (<file-size>,
> 0).
> 
> This interface could be integrated into the monitor with a pair of
> commands (va_copyfile and info va_copyfile), the former used to initiate
> transfers and the latter to check on the status.
> 
> Thoughts on this?

Hi Adam,

This sounds a lot safer than the current approach. Intuitively I would
think it should be the host controlling the copy, but otherwise it
sounds good. Or is there a reason why the guest should control it?

I think it is vital that we do it in a way where a copy cannot blow
QEMU's memory consumption out of the water, but the approach you suggest
seems to take care of that.

Cheers,
Jes
Adam Litke Dec. 9, 2010, 2:40 p.m. UTC | #6
On Wed, 2010-12-08 at 20:19 +0100, Jes Sorensen wrote:
> On 12/07/10 17:00, Adam Litke wrote:
> > Hi Jes, you raise some good points and pitfalls with the current getfile
> > approach.  I've been thinking about an alternative and am wondering what
> > you (and others) think...
> > 
> > First off, I think we should switch to a copyfile() API that allows us
> > to avoid presenting the file contents to the user.  Neither the human
> > monitor nor the control monitor are designed to be file pagers.  Let the
> > user decide how to consume the data once it has been transferred.  Now
> > we don't need to care if the file is binary or text.
> > 
> > The virtagent RPC protocol is bi-directional and supports asynchronous
> > events.  We can use these to implement a better copyfile RPC that can
> > transfer larger files without wasting memory.  The host issues a
> > copyfile(<guest-path>, <host-path>) RPC.  The immediate result of this
> > call will indicate whether the guest is able to initiate the transfer.
> > The guest will generate a series of events (<offset>, <size>, <payload>)
> > until the entire contents has been transferred.  The host and guest
> > could negotiate the chunk size if necessary.  Once the transfer is
> > complete, the guest sends a final event to indicate this (<file-size>,
> > 0).
> > 
> > This interface could be integrated into the monitor with a pair of
> > commands (va_copyfile and info va_copyfile), the former used to initiate
> > transfers and the latter to check on the status.
> > 
> > Thoughts on this?
> 
> Hi Adam,
> 
> This sounds a lot safer than the current approach. Intuitively I would
> think it should be the host controlling the copy, but otherwise it
> sounds good. Or is there a reason why the guest should control it?

Actually, a host-controlled interface would be both simpler to implement
(on both ends) and would concentrate control in the host (which is what
we probably want anyway).  

> I think it is vital that we do it in a way where a copy cannot blow
> QEMU's memory consumption out of the water, but the approach you suggest
> seems to take care of that.
> 
> Cheers,
> Jes
>
Michael Roth Dec. 9, 2010, 9:04 p.m. UTC | #7
On 12/09/2010 08:40 AM, Adam Litke wrote:
> On Wed, 2010-12-08 at 20:19 +0100, Jes Sorensen wrote:
>> On 12/07/10 17:00, Adam Litke wrote:
>>> Hi Jes, you raise some good points and pitfalls with the current getfile
>>> approach.  I've been thinking about an alternative and am wondering what
>>> you (and others) think...
>>>
>>> First off, I think we should switch to a copyfile() API that allows us
>>> to avoid presenting the file contents to the user.  Neither the human
>>> monitor nor the control monitor are designed to be file pagers.  Let the
>>> user decide how to consume the data once it has been transferred.  Now
>>> we don't need to care if the file is binary or text.
>>>
>>> The virtagent RPC protocol is bi-directional and supports asynchronous
>>> events.  We can use these to implement a better copyfile RPC that can
>>> transfer larger files without wasting memory.  The host issues a
>>> copyfile(<guest-path>,<host-path>) RPC.  The immediate result of this
>>> call will indicate whether the guest is able to initiate the transfer.
>>> The guest will generate a series of events (<offset>,<size>,<payload>)
>>> until the entire contents has been transferred.  The host and guest
>>> could negotiate the chunk size if necessary.  Once the transfer is
>>> complete, the guest sends a final event to indicate this (<file-size>,
>>> 0).
>>>
>>> This interface could be integrated into the monitor with a pair of
>>> commands (va_copyfile and info va_copyfile), the former used to initiate
>>> transfers and the latter to check on the status.
>>>
>>> Thoughts on this?
>>
>> Hi Adam,
>>
>> This sounds a lot safer than the current approach. Intuitively I would
>> think it should be the host controlling the copy, but otherwise it
>> sounds good. Or is there a reason why the guest should control it?
>
> Actually, a host-controlled interface would be both simpler to implement
> (on both ends) and would concentrate control in the host (which is what
> we probably want anyway).
>

I also like the host-controlled mechanism. I think the difficulty is 
about the same either way though. Both would require an FD to be kept 
open, and some mechanism to read/seek in chunks and then send it back.

I don't think this should replace the underlying RPC for viewfile 
however. I'd much rather it be an additional RPC, and we just put strict 
limits on the size of files viewfile/getfile can handle and make it 
clear that it is intended for quickly viewing text. I'll rename the 
getfile RPC to viewfile to make this a bit clearer as well.

>> I think it is vital that we do it in a way where a copy cannot blow
>> QEMU's memory consumption out of the water, but the approach you suggest
>> seems to take care of that.
>>
>> Cheers,
>> Jes
>>
>
Jes Sorensen Dec. 10, 2010, 6:38 a.m. UTC | #8
On 12/09/10 22:04, Michael Roth wrote:
> On 12/09/2010 08:40 AM, Adam Litke wrote:
>> Actually, a host-controlled interface would be both simpler to implement
>> (on both ends) and would concentrate control in the host (which is what
>> we probably want anyway).
> 
> I also like the host-controlled mechanism. I think the difficulty is
> about the same either way though. Both would require an FD to be kept
> open, and some mechanism to read/seek in chunks and then send it back.

> I don't think this should replace the underlying RPC for viewfile
> however. I'd much rather it be an additional RPC, and we just put strict
> limits on the size of files viewfile/getfile can handle and make it
> clear that it is intended for quickly viewing text. I'll rename the
> getfile RPC to viewfile to make this a bit clearer as well.

I really think the viewfile stuff needs to go away, it is a rats trap
for things that can go wrong. It would really only be useful from the
human monitor, whereas any other application will be happy to implement
it at the higher level.

Having it in the human monitor you risk messing it up by viewing a
binary file and you end up having to kill you guest to fix it.

Putting artificial limits on it means someone will eventually try to
change those.

Lets keep this at the higher level, where it IMHO belongs.

Cheers,
Jes
diff mbox

Patch

diff --git a/virtagent-server.c b/virtagent-server.c
index 36fbae2..a430b58 100644
--- a/virtagent-server.c
+++ b/virtagent-server.c
@@ -26,12 +26,71 @@  static bool va_enable_syslog = false; /* enable syslog'ing of RPCs */
     syslog(LOG_INFO, "virtagent, %s", msg_buf); \
 } while(0)
 
+/* RPC functions common to guest/host daemons */
+
+/* va_getfile(): return file contents
+ * rpc return values:
+ *   - base64-encoded file contents
+ */
+static xmlrpc_value *va_getfile(xmlrpc_env *env,
+                                xmlrpc_value *param,
+                                void *user_data)
+{
+    const char *path;
+    char *file_contents = NULL;
+    char buf[VA_FILEBUF_LEN];
+    int fd, ret, count = 0;
+    xmlrpc_value *result = NULL;
+
+    /* parse argument array */
+    xmlrpc_decompose_value(env, param, "(s)", &path);
+    if (env->fault_occurred) {
+        return NULL;
+    }
+
+    SLOG("va_getfile(), path:%s", path);
+
+    fd = open(path, O_RDONLY);
+    if (fd == -1) {
+        LOG("open failed: %s", strerror(errno));
+        xmlrpc_faultf(env, "open failed: %s", strerror(errno));
+        return NULL;
+    }
+
+    while ((ret = read(fd, buf, VA_FILEBUF_LEN)) > 0) {
+        file_contents = qemu_realloc(file_contents, count + VA_FILEBUF_LEN);
+        memcpy(file_contents + count, buf, ret);
+        count += ret;
+        if (count > VA_GETFILE_MAX) {
+            xmlrpc_faultf(env, "max file size (%d bytes) exceeded",
+                          VA_GETFILE_MAX);
+            goto EXIT_CLOSE_BAD;
+        }
+    }
+    if (ret == -1) {
+        LOG("read failed: %s", strerror(errno));
+        xmlrpc_faultf(env, "read failed: %s", strerror(errno));
+        goto EXIT_CLOSE_BAD;
+    }
+
+    result = xmlrpc_build_value(env, "6", file_contents, count);
+
+EXIT_CLOSE_BAD:
+    if (file_contents) {
+        qemu_free(file_contents);
+    }
+    close(fd);
+    return result;
+}
+
 typedef struct RPCFunction {
     xmlrpc_value *(*func)(xmlrpc_env *env, xmlrpc_value *param, void *unused);
     const char *func_name;
 } RPCFunction;
 
 static RPCFunction guest_functions[] = {
+    { .func = va_getfile,
+      .func_name = "va.getfile" },
     { NULL, NULL }
 };
 static RPCFunction host_functions[] = {