diff mbox series

[v3,3/4] qga: Add optional `merge-output` flag to guest-exec qapi

Message ID 1575b08b853385eeaec6159b88b8c66525baec65.1677617035.git.dxu@dxuuu.xyz
State New
Headers show
Series qga: Add optional `merge-output` flag to guest-exec QAPI | expand

Commit Message

Daniel Xu Feb. 28, 2023, 8:48 p.m. UTC
Currently, the captured output (via `capture-output`) is segregated into
separate GuestExecStatus fields (`out-data` and `err-data`). This means
that downstream consumers have no way to reassemble the captured data
back into the original stream.

This is relevant for chatty and semi-interactive (ie. read only) CLI
tools.  Such tools may deliberately interleave stdout and stderr for
visual effect. If segregated, the output becomes harder to visually
understand.

This commit adds a new optional flag to the guest-exec qapi to merge the
output streams such that consumers can have a pristine view of the
original command output.

Signed-off-by: Daniel Xu <dxu@dxuuu.xyz>
---
 qga/commands.c       | 28 ++++++++++++++++++++++++++--
 qga/qapi-schema.json |  6 +++++-
 2 files changed, 31 insertions(+), 3 deletions(-)

Comments

Daniel P. Berrangé March 1, 2023, 9:03 a.m. UTC | #1
On Tue, Feb 28, 2023 at 01:48:03PM -0700, Daniel Xu wrote:
> Currently, the captured output (via `capture-output`) is segregated into
> separate GuestExecStatus fields (`out-data` and `err-data`). This means
> that downstream consumers have no way to reassemble the captured data
> back into the original stream.
> 
> This is relevant for chatty and semi-interactive (ie. read only) CLI
> tools.  Such tools may deliberately interleave stdout and stderr for
> visual effect. If segregated, the output becomes harder to visually
> understand.
> 
> This commit adds a new optional flag to the guest-exec qapi to merge the
> output streams such that consumers can have a pristine view of the
> original command output.
> 
> Signed-off-by: Daniel Xu <dxu@dxuuu.xyz>
> ---
>  qga/commands.c       | 28 ++++++++++++++++++++++++++--
>  qga/qapi-schema.json |  6 +++++-
>  2 files changed, 31 insertions(+), 3 deletions(-)
> 
> diff --git a/qga/commands.c b/qga/commands.c
> index 172826f8f8..cfce13d034 100644
> --- a/qga/commands.c
> +++ b/qga/commands.c
> @@ -270,12 +270,26 @@ static void guest_exec_child_watch(GPid pid, gint status, gpointer data)
>      g_spawn_close_pid(pid);
>  }
>  
> -/** Reset ignored signals back to default. */
>  static void guest_exec_task_setup(gpointer data)
>  {
>  #if !defined(G_OS_WIN32)
> +    bool has_merge = *(bool *)data;
>      struct sigaction sigact;
>  
> +    if (has_merge) {
> +        /*
> +         * FIXME: When `GLIB_VERSION_MIN_REQUIRED` is bumped to 2.58+, use
> +         * g_spawn_async_with_fds() to be portable on windows. The current
> +         * logic does not work on windows b/c `GSpawnChildSetupFunc` is run
> +         * inside the parent, not the child.
> +         */
> +        if (dup2(STDOUT_FILENO, STDERR_FILENO) != 0) {
> +            slog("dup2() failed to merge stderr into stdout: %s",
> +                 strerror(errno));
> +        }
> +    }
> +
> +    /* Reset ignored signals back to default. */
>      memset(&sigact, 0, sizeof(struct sigaction));
>      sigact.sa_handler = SIG_DFL;
>  
> @@ -384,6 +398,7 @@ GuestExec *qmp_guest_exec(const char *path,
>                         bool has_env, strList *env,
>                         const char *input_data,
>                         bool has_capture_output, bool capture_output,
> +                       bool has_merge_output, bool merge_output,
>                         Error **errp)
>  {
>      GPid pid;
> @@ -397,6 +412,7 @@ GuestExec *qmp_guest_exec(const char *path,
>      GIOChannel *in_ch, *out_ch, *err_ch;
>      GSpawnFlags flags;
>      bool has_output = (has_capture_output && capture_output);
> +    bool has_merge = (has_merge_output && merge_output);
>      g_autofree uint8_t *input = NULL;
>      size_t ninput = 0;
>  
> @@ -410,6 +426,14 @@ GuestExec *qmp_guest_exec(const char *path,
>          }
>      }
>  
> +#if defined(G_OS_WIN32)
> +    /* FIXME: see comment in guest_exec_task_setup() */
> +    if (has_merge) {
> +        error_setg(errp, "merge-output unsupported on windows");
> +        return NULL;
> +    }
> +#endif
> +
>      argv = guest_exec_get_args(&arglist, true);
>      envp = has_env ? guest_exec_get_args(env, false) : NULL;
>  
> @@ -420,7 +444,7 @@ GuestExec *qmp_guest_exec(const char *path,
>      }
>  
>      ret = g_spawn_async_with_pipes(NULL, argv, envp, flags,
> -            guest_exec_task_setup, NULL, &pid, input_data ? &in_fd : NULL,
> +            guest_exec_task_setup, &has_merge, &pid, input_data ? &in_fd : NULL,
>              has_output ? &out_fd : NULL, has_output ? &err_fd : NULL, &gerr);
>      if (!ret) {
>          error_setg(errp, QERR_QGA_COMMAND_FAILED, gerr->message);
> diff --git a/qga/qapi-schema.json b/qga/qapi-schema.json
> index 796434ed34..9c2367acdf 100644
> --- a/qga/qapi-schema.json
> +++ b/qga/qapi-schema.json
> @@ -1211,6 +1211,9 @@
>  # @input-data: data to be passed to process stdin (base64 encoded)
>  # @capture-output: bool flag to enable capture of
>  #                  stdout/stderr of running process. defaults to false.
> +# @merge-output: bool flag to merge stdout/stderr of running process
> +#                into stdout. only effective if used with @capture-output.
> +#                not effective on windows guests. defaults to false. (since 8.0)
>  #
>  # Returns: PID on success.
>  #
> @@ -1218,7 +1221,8 @@
>  ##
>  { 'command': 'guest-exec',
>    'data':    { 'path': 'str', '*arg': ['str'], '*env': ['str'],
> -               '*input-data': 'str', '*capture-output': 'bool' },
> +               '*input-data': 'str', '*capture-output': 'bool',
> +               '*merge-output': 'bool' },
>    'returns': 'GuestExec' }

I feel like 'merge-output' is a somewhat specialized policy. What if
we want to capture only stderr, and discard stdout, or vica-verca ?
IMHO, the original 'capture-output' field was poorly designed and
should have been an enum. I believe we can retrofit greater
flexibility by using an enum plus and alternate thus:

 { 'enum': 'GuestExecCaptureOutputMode',
   'data': [ 'none', 'stdout', 'stderr', 'all' ] }

 { 'alternate': 'GuestExecCaptureOutput',
   'data': { 'flag': 'bool',
             'mode': 'GuestExecCaptureOutputMode'} }

And then change 'guest-exec':

    '*capture-output': 'GuestExecCaptureOutput'

the use of the alternate makes this backwards compatible, as we can
distinguish a JSON bool on the wire from an enum represented as a
string.

This should be easy to implement, as it just involves selectively
toggling G_SPAWN_STDOUT_TO_DEV_NULL / G_SPAWN_STDERR_TO_DEV_NULL
flags, instead of setting them both together.

With regards,
Daniel
Daniel Xu March 1, 2023, 4 p.m. UTC | #2
Hi Daniel,

On Wed, Mar 01, 2023 at 09:03:53AM +0000, Daniel P. Berrangé wrote:
> On Tue, Feb 28, 2023 at 01:48:03PM -0700, Daniel Xu wrote:
> > Currently, the captured output (via `capture-output`) is segregated into
> > separate GuestExecStatus fields (`out-data` and `err-data`). This means
> > that downstream consumers have no way to reassemble the captured data
> > back into the original stream.
> > 
> > This is relevant for chatty and semi-interactive (ie. read only) CLI
> > tools.  Such tools may deliberately interleave stdout and stderr for
> > visual effect. If segregated, the output becomes harder to visually
> > understand.
> > 
> > This commit adds a new optional flag to the guest-exec qapi to merge the
> > output streams such that consumers can have a pristine view of the
> > original command output.
> > 
> > Signed-off-by: Daniel Xu <dxu@dxuuu.xyz>
> > ---
> >  qga/commands.c       | 28 ++++++++++++++++++++++++++--
> >  qga/qapi-schema.json |  6 +++++-
> >  2 files changed, 31 insertions(+), 3 deletions(-)
> > 
> > diff --git a/qga/commands.c b/qga/commands.c
> > index 172826f8f8..cfce13d034 100644
> > --- a/qga/commands.c
> > +++ b/qga/commands.c
> > @@ -270,12 +270,26 @@ static void guest_exec_child_watch(GPid pid, gint status, gpointer data)
> >      g_spawn_close_pid(pid);
> >  }
> >  
> > -/** Reset ignored signals back to default. */
> >  static void guest_exec_task_setup(gpointer data)
> >  {
> >  #if !defined(G_OS_WIN32)
> > +    bool has_merge = *(bool *)data;
> >      struct sigaction sigact;
> >  
> > +    if (has_merge) {
> > +        /*
> > +         * FIXME: When `GLIB_VERSION_MIN_REQUIRED` is bumped to 2.58+, use
> > +         * g_spawn_async_with_fds() to be portable on windows. The current
> > +         * logic does not work on windows b/c `GSpawnChildSetupFunc` is run
> > +         * inside the parent, not the child.
> > +         */
> > +        if (dup2(STDOUT_FILENO, STDERR_FILENO) != 0) {
> > +            slog("dup2() failed to merge stderr into stdout: %s",
> > +                 strerror(errno));
> > +        }
> > +    }
> > +
> > +    /* Reset ignored signals back to default. */
> >      memset(&sigact, 0, sizeof(struct sigaction));
> >      sigact.sa_handler = SIG_DFL;
> >  
> > @@ -384,6 +398,7 @@ GuestExec *qmp_guest_exec(const char *path,
> >                         bool has_env, strList *env,
> >                         const char *input_data,
> >                         bool has_capture_output, bool capture_output,
> > +                       bool has_merge_output, bool merge_output,
> >                         Error **errp)
> >  {
> >      GPid pid;
> > @@ -397,6 +412,7 @@ GuestExec *qmp_guest_exec(const char *path,
> >      GIOChannel *in_ch, *out_ch, *err_ch;
> >      GSpawnFlags flags;
> >      bool has_output = (has_capture_output && capture_output);
> > +    bool has_merge = (has_merge_output && merge_output);
> >      g_autofree uint8_t *input = NULL;
> >      size_t ninput = 0;
> >  
> > @@ -410,6 +426,14 @@ GuestExec *qmp_guest_exec(const char *path,
> >          }
> >      }
> >  
> > +#if defined(G_OS_WIN32)
> > +    /* FIXME: see comment in guest_exec_task_setup() */
> > +    if (has_merge) {
> > +        error_setg(errp, "merge-output unsupported on windows");
> > +        return NULL;
> > +    }
> > +#endif
> > +
> >      argv = guest_exec_get_args(&arglist, true);
> >      envp = has_env ? guest_exec_get_args(env, false) : NULL;
> >  
> > @@ -420,7 +444,7 @@ GuestExec *qmp_guest_exec(const char *path,
> >      }
> >  
> >      ret = g_spawn_async_with_pipes(NULL, argv, envp, flags,
> > -            guest_exec_task_setup, NULL, &pid, input_data ? &in_fd : NULL,
> > +            guest_exec_task_setup, &has_merge, &pid, input_data ? &in_fd : NULL,
> >              has_output ? &out_fd : NULL, has_output ? &err_fd : NULL, &gerr);
> >      if (!ret) {
> >          error_setg(errp, QERR_QGA_COMMAND_FAILED, gerr->message);
> > diff --git a/qga/qapi-schema.json b/qga/qapi-schema.json
> > index 796434ed34..9c2367acdf 100644
> > --- a/qga/qapi-schema.json
> > +++ b/qga/qapi-schema.json
> > @@ -1211,6 +1211,9 @@
> >  # @input-data: data to be passed to process stdin (base64 encoded)
> >  # @capture-output: bool flag to enable capture of
> >  #                  stdout/stderr of running process. defaults to false.
> > +# @merge-output: bool flag to merge stdout/stderr of running process
> > +#                into stdout. only effective if used with @capture-output.
> > +#                not effective on windows guests. defaults to false. (since 8.0)
> >  #
> >  # Returns: PID on success.
> >  #
> > @@ -1218,7 +1221,8 @@
> >  ##
> >  { 'command': 'guest-exec',
> >    'data':    { 'path': 'str', '*arg': ['str'], '*env': ['str'],
> > -               '*input-data': 'str', '*capture-output': 'bool' },
> > +               '*input-data': 'str', '*capture-output': 'bool',
> > +               '*merge-output': 'bool' },
> >    'returns': 'GuestExec' }
> 
> I feel like 'merge-output' is a somewhat specialized policy. What if
> we want to capture only stderr, and discard stdout, or vica-verca ?
> IMHO, the original 'capture-output' field was poorly designed and
> should have been an enum. I believe we can retrofit greater
> flexibility by using an enum plus and alternate thus:
> 
>  { 'enum': 'GuestExecCaptureOutputMode',
>    'data': [ 'none', 'stdout', 'stderr', 'all' ] }
> 
>  { 'alternate': 'GuestExecCaptureOutput',
>    'data': { 'flag': 'bool',
>              'mode': 'GuestExecCaptureOutputMode'} }
> 
> And then change 'guest-exec':
> 
>     '*capture-output': 'GuestExecCaptureOutput'
> 
> the use of the alternate makes this backwards compatible, as we can
> distinguish a JSON bool on the wire from an enum represented as a
> string.
> 
> This should be easy to implement, as it just involves selectively
> toggling G_SPAWN_STDOUT_TO_DEV_NULL / G_SPAWN_STDERR_TO_DEV_NULL
> flags, instead of setting them both together.

Thank you for taking a look. What you're describing makes sense to me.

I'll split out the first 2 sanitizer fixes in the series today. I'll
rework the rest of the patches per you suggestion likely this weekend.

Thanks,
Daniel

[...]
diff mbox series

Patch

diff --git a/qga/commands.c b/qga/commands.c
index 172826f8f8..cfce13d034 100644
--- a/qga/commands.c
+++ b/qga/commands.c
@@ -270,12 +270,26 @@  static void guest_exec_child_watch(GPid pid, gint status, gpointer data)
     g_spawn_close_pid(pid);
 }
 
-/** Reset ignored signals back to default. */
 static void guest_exec_task_setup(gpointer data)
 {
 #if !defined(G_OS_WIN32)
+    bool has_merge = *(bool *)data;
     struct sigaction sigact;
 
+    if (has_merge) {
+        /*
+         * FIXME: When `GLIB_VERSION_MIN_REQUIRED` is bumped to 2.58+, use
+         * g_spawn_async_with_fds() to be portable on windows. The current
+         * logic does not work on windows b/c `GSpawnChildSetupFunc` is run
+         * inside the parent, not the child.
+         */
+        if (dup2(STDOUT_FILENO, STDERR_FILENO) != 0) {
+            slog("dup2() failed to merge stderr into stdout: %s",
+                 strerror(errno));
+        }
+    }
+
+    /* Reset ignored signals back to default. */
     memset(&sigact, 0, sizeof(struct sigaction));
     sigact.sa_handler = SIG_DFL;
 
@@ -384,6 +398,7 @@  GuestExec *qmp_guest_exec(const char *path,
                        bool has_env, strList *env,
                        const char *input_data,
                        bool has_capture_output, bool capture_output,
+                       bool has_merge_output, bool merge_output,
                        Error **errp)
 {
     GPid pid;
@@ -397,6 +412,7 @@  GuestExec *qmp_guest_exec(const char *path,
     GIOChannel *in_ch, *out_ch, *err_ch;
     GSpawnFlags flags;
     bool has_output = (has_capture_output && capture_output);
+    bool has_merge = (has_merge_output && merge_output);
     g_autofree uint8_t *input = NULL;
     size_t ninput = 0;
 
@@ -410,6 +426,14 @@  GuestExec *qmp_guest_exec(const char *path,
         }
     }
 
+#if defined(G_OS_WIN32)
+    /* FIXME: see comment in guest_exec_task_setup() */
+    if (has_merge) {
+        error_setg(errp, "merge-output unsupported on windows");
+        return NULL;
+    }
+#endif
+
     argv = guest_exec_get_args(&arglist, true);
     envp = has_env ? guest_exec_get_args(env, false) : NULL;
 
@@ -420,7 +444,7 @@  GuestExec *qmp_guest_exec(const char *path,
     }
 
     ret = g_spawn_async_with_pipes(NULL, argv, envp, flags,
-            guest_exec_task_setup, NULL, &pid, input_data ? &in_fd : NULL,
+            guest_exec_task_setup, &has_merge, &pid, input_data ? &in_fd : NULL,
             has_output ? &out_fd : NULL, has_output ? &err_fd : NULL, &gerr);
     if (!ret) {
         error_setg(errp, QERR_QGA_COMMAND_FAILED, gerr->message);
diff --git a/qga/qapi-schema.json b/qga/qapi-schema.json
index 796434ed34..9c2367acdf 100644
--- a/qga/qapi-schema.json
+++ b/qga/qapi-schema.json
@@ -1211,6 +1211,9 @@ 
 # @input-data: data to be passed to process stdin (base64 encoded)
 # @capture-output: bool flag to enable capture of
 #                  stdout/stderr of running process. defaults to false.
+# @merge-output: bool flag to merge stdout/stderr of running process
+#                into stdout. only effective if used with @capture-output.
+#                not effective on windows guests. defaults to false. (since 8.0)
 #
 # Returns: PID on success.
 #
@@ -1218,7 +1221,8 @@ 
 ##
 { 'command': 'guest-exec',
   'data':    { 'path': 'str', '*arg': ['str'], '*env': ['str'],
-               '*input-data': 'str', '*capture-output': 'bool' },
+               '*input-data': 'str', '*capture-output': 'bool',
+               '*merge-output': 'bool' },
   'returns': 'GuestExec' }