Patchwork [3/3] qga: implement qmp_guest_set_vcpus() for Linux with sysfs

login
register
mail settings
Submitter Laszlo Ersek
Date March 4, 2013, 10:19 p.m.
Message ID <1362435597-20018-4-git-send-email-lersek@redhat.com>
Download mbox | patch
Permalink /patch/224847/
State New
Headers show

Comments

Laszlo Ersek - March 4, 2013, 10:19 p.m.
Signed-off-by: Laszlo Ersek <lersek@redhat.com>
---
 qga/commands-posix.c |   51 ++++++++++++++++++++++++++++++++++++++++++++++++++
 1 files changed, 51 insertions(+), 0 deletions(-)
Michael Roth - March 5, 2013, 8:09 p.m.
On Mon, Mar 04, 2013 at 11:19:57PM +0100, Laszlo Ersek wrote:
> 
> Signed-off-by: Laszlo Ersek <lersek@redhat.com>
> ---
>  qga/commands-posix.c |   51 ++++++++++++++++++++++++++++++++++++++++++++++++++
>  1 files changed, 51 insertions(+), 0 deletions(-)
> 
> diff --git a/qga/commands-posix.c b/qga/commands-posix.c
> index d4b6bdc..1848df8 100644
> --- a/qga/commands-posix.c
> +++ b/qga/commands-posix.c
> @@ -40,6 +40,7 @@ extern char **environ;
>  #include <arpa/inet.h>
>  #include <sys/socket.h>
>  #include <net/if.h>
> +#include <inttypes.h>
> 
>  #ifdef FIFREEZE
>  #define CONFIG_FSFREEZE
> @@ -1178,7 +1179,57 @@ GuestLogicalProcessorList *qmp_guest_get_vcpus(Error **errp)
> 
>  void qmp_guest_set_vcpus(GuestLogicalProcessorList *vcpus, Error **errp)
>  {
> +#if defined(__linux__)
> +    const GuestLogicalProcessorList *entry;
> +    Error *local_err = NULL;
> +
> +    for (entry = vcpus; local_err == NULL && entry != NULL;
> +         entry = entry->next) {
> +        const GuestLogicalProcessor *vcpu;
> +
> +        vcpu = entry->value;
> +        if (vcpu->logical_id == 0) {
> +            if (!vcpu->online) {
> +                error_setg(&local_err,
> +                           "unable to offline logical processor #0");
> +            }
> +        } else {
> +            char *buf;
> +            FILE *f;
> +
> +            buf = g_strdup_printf("/sys/devices/system/cpu/cpu%"PRId64
> +                                  "/online", vcpu->logical_id);
> +            f = fopen(buf, "r+");
> +            if (f == NULL) {
> +                error_setg_errno(&local_err, errno, "fopen(\"%s\", \"r+\")",
> +                                 buf);
> +            } else {
> +                unsigned online;
> +
> +                if (fscanf(f, "%u", &online) != 1) {
> +                    error_setg(&local_err, "failed to read or parse \"%s\"",
> +                               buf);
> +                } else if ((online != 0) != vcpu->online) {
> +                    errno = 0;
> +                    rewind(f);
> +                    if (errno != 0 ||
> +                        fprintf(f, "%u\n", (unsigned)vcpu->online) < 0) {

Similar to comments in patch 2: is this supposed to be similar to the
/sys/devices/system/cpu/{probe,release} entries currently documented?

> +                        error_setg_errno(&local_err, errno,
> +                                         "rewind()/fprintf() on \"%s\"", buf);
> +                    }
> +                }
> +
> +                if (fclose(f) == EOF && local_err == NULL) {
> +                    error_setg_errno(&local_err, errno, "fclose(\"%s\")", buf);
> +                }
> +            }
> +            g_free(buf);
> +        }
> +    }
> +    error_propagate(errp, local_err);
> +#else
>      error_set(errp, QERR_UNSUPPORTED);
> +#endif
>  }
> 
>  /* register init/cleanup routines for stateful command groups */
> -- 
> 1.7.1
>
Laszlo Ersek - March 5, 2013, 9:09 p.m.
On 03/05/13 21:09, mdroth wrote:

> Similar to comments in patch 2: is this supposed to be similar to the
> /sys/devices/system/cpu/{probe,release} entries currently documented?

I don't have those on RHEL-6, and they aren't mentioned in upstream
"Documentation/cpu-hotplug.txt".

"Documentation/ABI/testing/sysfs-devices-system-cpu" says about probe &
release:

    Description:    Dynamic addition and removal of CPU's.  This is not hotplug
                    removal, this is meant complete removal/addition of the CPU
                    from the system.

That's not my purpose here, since that would be probably even more
complex than (or the same as) real ACPI CPU hotplug / hot-unplug.

Thanks!
Laszlo
Eric Blake - March 5, 2013, 9:19 p.m.
On 03/04/2013 03:19 PM, Laszlo Ersek wrote:
> Signed-off-by: Laszlo Ersek <lersek@redhat.com>
> ---
>  qga/commands-posix.c |   51 ++++++++++++++++++++++++++++++++++++++++++++++++++
>  1 files changed, 51 insertions(+), 0 deletions(-)
> 
> diff --git a/qga/commands-posix.c b/qga/commands-posix.c
> index d4b6bdc..1848df8 100644
> --- a/qga/commands-posix.c
> +++ b/qga/commands-posix.c
> @@ -40,6 +40,7 @@ extern char **environ;
>  #include <arpa/inet.h>
>  #include <sys/socket.h>
>  #include <net/if.h>
> +#include <inttypes.h>
>  
>  #ifdef FIFREEZE
>  #define CONFIG_FSFREEZE
> @@ -1178,7 +1179,57 @@ GuestLogicalProcessorList *qmp_guest_get_vcpus(Error **errp)
>  
>  void qmp_guest_set_vcpus(GuestLogicalProcessorList *vcpus, Error **errp)
>  {
> +#if defined(__linux__)
> +    const GuestLogicalProcessorList *entry;
> +    Error *local_err = NULL;
> +
> +    for (entry = vcpus; local_err == NULL && entry != NULL;
> +         entry = entry->next) {
> +        const GuestLogicalProcessor *vcpu;
> +
> +        vcpu = entry->value;
> +        if (vcpu->logical_id == 0) {
> +            if (!vcpu->online) {
> +                error_setg(&local_err,
> +                           "unable to offline logical processor #0");
> +            }

This is not quite accurate; my understanding is that there are setups
where cpu0 can be offlined.  More accurate would be to reject attempts
to offline ANY cpu where the cpu/cpuNN/online file does not exist (which
will catch the fact that cpu0 must always be on for the hardware you are
testing with).

> +        } else {
> +            char *buf;
> +            FILE *f;
> +
> +            buf = g_strdup_printf("/sys/devices/system/cpu/cpu%"PRId64
> +                                  "/online", vcpu->logical_id);
> +            f = fopen(buf, "r+");
> +            if (f == NULL) {
> +                error_setg_errno(&local_err, errno, "fopen(\"%s\", \"r+\")",
> +                                 buf);

Again, if the file doesn't exist, but the user asked for online, then
this should not be an error.

> +            } else {
> +                unsigned online;
> +
> +                if (fscanf(f, "%u", &online) != 1) {
> +                    error_setg(&local_err, "failed to read or parse \"%s\"",
> +                               buf);

Does doing a scan of the file's existing contents buy us any safety?
Why not just blindly write into the file, instead of first reading it?

> +                } else if ((online != 0) != vcpu->online) {
> +                    errno = 0;
> +                    rewind(f);
> +                    if (errno != 0 ||
> +                        fprintf(f, "%u\n", (unsigned)vcpu->online) < 0) {

Do you really want to be printing NUL or \1?  I though the kernel
interface expected the literal character '0' or '1' (in ascii, \x30 or
\x31).

Why even bother with stdio buffering?  Wouldn't it be simpler to
open()/write() instead of fopen()/fprintf()?

> +                        error_setg_errno(&local_err, errno,
> +                                         "rewind()/fprintf() on \"%s\"", buf);
> +                    }
> +                }
> +
> +                if (fclose(f) == EOF && local_err == NULL) {
> +                    error_setg_errno(&local_err, errno, "fclose(\"%s\")", buf);
> +                }
> +            }
> +            g_free(buf);
> +        }
> +    }
> +    error_propagate(errp, local_err);
> +#else
>      error_set(errp, QERR_UNSUPPORTED);
> +#endif
>  }
>  
>  /* register init/cleanup routines for stateful command groups */
>
Laszlo Ersek - March 5, 2013, 11:23 p.m.
On 03/05/13 22:19, Eric Blake wrote:
> On 03/04/2013 03:19 PM, Laszlo Ersek wrote:

>> +            } else {
>> +                unsigned online;
>> +
>> +                if (fscanf(f, "%u", &online) != 1) {
>> +                    error_setg(&local_err, "failed to read or parse \"%s\"",
>> +                               buf);
> 
> Does doing a scan of the file's existing contents buy us any safety?
> Why not just blindly write into the file, instead of first reading it?

:)

For an already online CPU:

# dd of=/sys/devices/system/cpu/cpu1/online bs=1 count=1 <<<1
dd: writing `/sys/devices/system/cpu/cpu1/online': Invalid argument
[...]

In the kernel,

store_online() [drivers/base/cpu.c]
  cpu_up() [kernel/cpu.c]
    _cpu_up()

	if (cpu_online(cpu) || !cpu_present(cpu)) {
		ret = -EINVAL;
		goto out;
	}

This logic seems to have been present since the origin of the current
Linux repo (1da177e4 "Linux-2.6.12-rc2").

Checking the history tree at
<git://git.kernel.org/pub/scm/linux/kernel/git/tglx/history.git>, the
logic dates back to the very first committed version of cpu_up():

commit c5e062079a7090891ea5cd1b23a7eab52b156b2a
Author: Rusty Russell <rusty@rustcorp.com.au>
Date:   Fri Jul 26 01:28:07 2002 -0700

    [PATCH] Hot-plug CPU Boot Changes
...

> 
>> +                } else if ((online != 0) != vcpu->online) {
>> +                    errno = 0;
>> +                    rewind(f);
>> +                    if (errno != 0 ||
>> +                        fprintf(f, "%u\n", (unsigned)vcpu->online) < 0) {
> 
> Do you really want to be printing NUL or \1?  I though the kernel
> interface expected the literal character '0' or '1' (in ascii, \x30 or
> \x31).

I'm using the %u conversion specifier, which turns the unsigned int
argument into decimal string representation. Same as %d for signed int.

Thanks for all review comments!
Laszlo
Eric Blake - March 5, 2013, 11:37 p.m.
On 03/05/2013 04:23 PM, Laszlo Ersek wrote:
> For an already online CPU:
> 
> # dd of=/sys/devices/system/cpu/cpu1/online bs=1 count=1 <<<1
> dd: writing `/sys/devices/system/cpu/cpu1/online': Invalid argument
> [...]

So we really do have to read existing state to avoid an error when the
user didn't request a state change.  Thanks for the research, and a
convincing answer :)

>>> +                    if (errno != 0 ||
>>> +                        fprintf(f, "%u\n", (unsigned)vcpu->online) < 0) {
>>
>> Do you really want to be printing NUL or \1?  I though the kernel
>> interface expected the literal character '0' or '1' (in ascii, \x30 or
>> \x31).
> 
> I'm using the %u conversion specifier, which turns the unsigned int
> argument into decimal string representation. Same as %d for signed int.

I guess I had in my mind %c instead of %u; still, I can't help but
wonder if fprintf() and buffering is overkill, compared to just doing
something like this:
 write(fd, &"01"[vcpu->online], 1);

(okay, I hope you would favor readability over my compact
representation, but you get the point).  Oh, and I guess I didn't check
whether a trailing newline is essential to the kernel interpreting the
input, so maybe it would have to be more like:

 char buf[2] = { "01"[vcpu->online], '\n' };
 write(fd, buf, 2);
Laszlo Ersek - March 6, 2013, 12:44 a.m.
On 03/06/13 00:37, Eric Blake wrote:

> I guess I had in my mind %c instead of %u; still, I can't help but
> wonder if fprintf() and buffering is overkill, compared to just doing
> something like this:
>  write(fd, &"01"[vcpu->online], 1);
> 
> (okay, I hope you would favor readability over my compact
> representation, but you get the point).

I'd be crucified on qemu-devel if I tried to pull off something like the
above :)

> Oh, and I guess I didn't check
> whether a trailing newline is essential to the kernel interpreting the
> input, so maybe it would have to be more like:
> 
>  char buf[2] = { "01"[vcpu->online], '\n' };
>  write(fd, buf, 2);

The newline is probably not important.

Anyway I'd prefer to avoid direct write()s with nbyte > 1 as I'd have to
handle partial transfers, if for nothing else than principle. (IIRC
avoiding that loop was my main motivation for stdio.)

I guess I'll use open(O_DIRECTORY) + openat("online") + read(..., 1) +
pwrite(..., 1, 0). RHEL-5 seems to support all of these (I can't find
O_SEARCH in the manual there, which is why I'll probably omit it from
open()). If people still complain then I can switch from open(directory)
+ openat("online") to lstat(directory) + open("/.../online").

Thanks
Laszlo
Paolo Bonzini - March 6, 2013, 9:57 a.m.
> On 03/06/13 00:37, Eric Blake wrote:
> 
> > I guess I had in my mind %c instead of %u; still, I can't help but
> > wonder if fprintf() and buffering is overkill, compared to just
> > doing
> > something like this:
> >  write(fd, &"01"[vcpu->online], 1);
> > 
> > (okay, I hope you would favor readability over my compact
> > representation, but you get the point).
> 
> I'd be crucified on qemu-devel if I tried to pull off something like
> the above :)

I think stdio is just fine...

Paolo
Eric Blake - March 6, 2013, 1:46 p.m.
On 03/05/2013 05:44 PM, Laszlo Ersek wrote:
> 
> Anyway I'd prefer to avoid direct write()s with nbyte > 1 as I'd have to
> handle partial transfers, if for nothing else than principle. (IIRC
> avoiding that loop was my main motivation for stdio.)

Fair enough - this code is not a hotspot, so stdio is fine.

Patch

diff --git a/qga/commands-posix.c b/qga/commands-posix.c
index d4b6bdc..1848df8 100644
--- a/qga/commands-posix.c
+++ b/qga/commands-posix.c
@@ -40,6 +40,7 @@  extern char **environ;
 #include <arpa/inet.h>
 #include <sys/socket.h>
 #include <net/if.h>
+#include <inttypes.h>
 
 #ifdef FIFREEZE
 #define CONFIG_FSFREEZE
@@ -1178,7 +1179,57 @@  GuestLogicalProcessorList *qmp_guest_get_vcpus(Error **errp)
 
 void qmp_guest_set_vcpus(GuestLogicalProcessorList *vcpus, Error **errp)
 {
+#if defined(__linux__)
+    const GuestLogicalProcessorList *entry;
+    Error *local_err = NULL;
+
+    for (entry = vcpus; local_err == NULL && entry != NULL;
+         entry = entry->next) {
+        const GuestLogicalProcessor *vcpu;
+
+        vcpu = entry->value;
+        if (vcpu->logical_id == 0) {
+            if (!vcpu->online) {
+                error_setg(&local_err,
+                           "unable to offline logical processor #0");
+            }
+        } else {
+            char *buf;
+            FILE *f;
+
+            buf = g_strdup_printf("/sys/devices/system/cpu/cpu%"PRId64
+                                  "/online", vcpu->logical_id);
+            f = fopen(buf, "r+");
+            if (f == NULL) {
+                error_setg_errno(&local_err, errno, "fopen(\"%s\", \"r+\")",
+                                 buf);
+            } else {
+                unsigned online;
+
+                if (fscanf(f, "%u", &online) != 1) {
+                    error_setg(&local_err, "failed to read or parse \"%s\"",
+                               buf);
+                } else if ((online != 0) != vcpu->online) {
+                    errno = 0;
+                    rewind(f);
+                    if (errno != 0 ||
+                        fprintf(f, "%u\n", (unsigned)vcpu->online) < 0) {
+                        error_setg_errno(&local_err, errno,
+                                         "rewind()/fprintf() on \"%s\"", buf);
+                    }
+                }
+
+                if (fclose(f) == EOF && local_err == NULL) {
+                    error_setg_errno(&local_err, errno, "fclose(\"%s\")", buf);
+                }
+            }
+            g_free(buf);
+        }
+    }
+    error_propagate(errp, local_err);
+#else
     error_set(errp, QERR_UNSUPPORTED);
+#endif
 }
 
 /* register init/cleanup routines for stateful command groups */