diff mbox series

[v1,06/14] tests/vm: Add logging of console to file.

Message ID 20200205212920.467-7-robert.foley@linaro.org
State New
Headers show
Series tests/vm: Add support for aarch64 VMs | expand

Commit Message

Robert Foley Feb. 5, 2020, 9:29 p.m. UTC
This adds logging of the char device used by the console
to a file.  The basevm.py then uses this file to read
chars from the console.
One reason to add this is to aid with debugging.
But another is because there is an issue where the QEMU
might hang if the characters from the character device
are not consumed by the script.

Signed-off-by: Robert Foley <robert.foley@linaro.org>
---
 tests/vm/basevm.py        | 48 ++++++++++++++++++++++---
 tests/vm/socket_thread.py | 73 +++++++++++++++++++++++++++++++++++++++
 2 files changed, 116 insertions(+), 5 deletions(-)
 create mode 100644 tests/vm/socket_thread.py

Comments

Alex Bennée Feb. 7, 2020, 5:12 p.m. UTC | #1
Robert Foley <robert.foley@linaro.org> writes:

> This adds logging of the char device used by the console
> to a file.  The basevm.py then uses this file to read
> chars from the console.
> One reason to add this is to aid with debugging.

I can certainly see an argument for saving the install log.

> But another is because there is an issue where the QEMU
> might hang if the characters from the character device
> are not consumed by the script.

I'm a little confused by this. Outputting to a file and then parsing the
file seems a bit more janky than injesting the output in the script and
then logging it.

Is this to work around the hang because the socket buffers fill up and
aren't drained?

>
> Signed-off-by: Robert Foley <robert.foley@linaro.org>
> ---
>  tests/vm/basevm.py        | 48 ++++++++++++++++++++++---
>  tests/vm/socket_thread.py | 73 +++++++++++++++++++++++++++++++++++++++
>  2 files changed, 116 insertions(+), 5 deletions(-)
>  create mode 100644 tests/vm/socket_thread.py
>
> diff --git a/tests/vm/basevm.py b/tests/vm/basevm.py
> index a926211da8..87a484c55c 100755
> --- a/tests/vm/basevm.py
> +++ b/tests/vm/basevm.py
> @@ -31,12 +31,17 @@ import tempfile
>  import shutil
>  import multiprocessing
>  import traceback
> +from socket_thread import SocketThread
>  
>  SSH_KEY = open(os.path.join(os.path.dirname(__file__),
>                 "..", "keys", "id_rsa")).read()
>  SSH_PUB_KEY = open(os.path.join(os.path.dirname(__file__),
>                     "..", "keys", "id_rsa.pub")).read()
>  
> +class ConsoleTimeoutException(Exception):
> +    """Raise this exception when read from console times out."""
> +    pass
> +
>  class BaseVM(object):
>      GUEST_USER = "qemu"
>      GUEST_PASS = "qemupass"
> @@ -59,12 +64,18 @@ class BaseVM(object):
>      poweroff = "poweroff"
>      # enable IPv6 networking
>      ipv6 = True
> +    # This is the timeout on the wait for console bytes.
> +    socket_timeout = 120
>      # Scale up some timeouts under TCG.
>      # 4 is arbitrary, but greater than 2,
>      # since we found we need to wait more than twice as long.
>      tcg_ssh_timeout_multiplier = 4
> +    console_logfile = "console.log"

We should probably dump the log somewhere other than cwd. Given we cache
stuff in ~/.cache/qemu-vm maybe something of the form:

  ~/.cache/qemu-vm/ubuntu.aarch64.install.log

?

>      def __init__(self, debug=False, vcpus=None):
>          self._guest = None
> +        self._console_fd = None
> +        self._socket_thread = None
> +        self._console_timeout_sec = self.socket_timeout
>          self._tmpdir = os.path.realpath(tempfile.mkdtemp(prefix="vm-test-",
>                                                           suffix=".tmp",
>                                                           dir="."))
> @@ -179,6 +190,15 @@ class BaseVM(object):
>                              "-device",
>                              "virtio-blk,drive=%s,serial=%s,bootindex=1" % (name, name)]
>  
> +    def init_console(self, socket):
> +        """Init the thread to dump console to a file.
> +           Also open the file descriptor we will use to
> +           read from the console."""
> +        self._socket_thread = SocketThread(socket, self.console_logfile)
> +        self._console_fd = open(self.console_logfile, "r")
> +        self._socket_thread.start()
> +        print("console logfile is: {}".format(self.console_logfile))
> +
>      def boot(self, img, extra_args=[]):
>          args = self._args + [
>              "-device", "VGA",
> @@ -201,6 +221,7 @@ class BaseVM(object):
>              raise
>          atexit.register(self.shutdown)
>          self._guest = guest
> +        self.init_console(guest.console_socket)
>          usernet_info = guest.qmp("human-monitor-command",
>                                   command_line="info usernet")
>          self.ssh_port = None
> @@ -212,9 +233,10 @@ class BaseVM(object):
>              raise Exception("Cannot find ssh port from 'info usernet':\n%s" % \
>                              usernet_info)
>  
> -    def console_init(self, timeout = 120):
> -        vm = self._guest
> -        vm.console_socket.settimeout(timeout)
> +    def console_init(self, timeout = None):
> +        if timeout == None:
> +            timeout = self.socket_timeout
> +        self._console_timeout_sec = timeout
>  
>      def console_log(self, text):
>          for line in re.split("[\r\n]", text):
> @@ -230,13 +252,27 @@ class BaseVM(object):
>              # log console line
>              sys.stderr.write("con recv: %s\n" % line)
>  
> +    def console_recv(self, n):
> +        """Read n chars from the console_logfile being dumped to
> +           by the socket thread we created earlier."""
> +        start_time = time.time()
> +        while True:
> +            data = self._console_fd.read(1)
> +            if data != "":
> +                break
> +            time.sleep(0.1)
> +            elapsed_sec = time.time() - start_time
> +            if elapsed_sec > self._console_timeout_sec:
> +                raise ConsoleTimeoutException
> +        return data.encode('latin1')
> +

Is latin1 really the best choice here? I would expect things to be utf-8 clean.

>      def console_wait(self, expect, expectalt = None):
>          vm = self._guest
>          output = ""
>          while True:
>              try:
> -                chars = vm.console_socket.recv(1)
> -            except socket.timeout:
> +                chars = self.console_recv(1)
> +            except ConsoleTimeoutException:
>                  sys.stderr.write("console: *** read timeout ***\n")
>                  sys.stderr.write("console: waiting for: '%s'\n" % expect)
>                  if not expectalt is None:
> @@ -335,6 +371,8 @@ class BaseVM(object):
>              raise Exception("Timeout while waiting for guest ssh")
>  
>      def shutdown(self):
> +        self._socket_thread.join()
> +        self._console_fd.close()
>          self._guest.shutdown()
>      def wait(self):
>          self._guest.wait()
> diff --git a/tests/vm/socket_thread.py b/tests/vm/socket_thread.py
> new file mode 100644
> index 0000000000..6160e9163d
> --- /dev/null
> +++ b/tests/vm/socket_thread.py
> @@ -0,0 +1,73 @@
> +#!/usr/bin/env python
> +#
> +# This python module defines a thread object which
> +# reads from a socket and dumps it to a file.
> +#
> +# The main use case is for reading QEMU console char dev and
> +# dumping them to a file either for debugging or for
> +# parsing by QEMU itself.
> +#
> +# Copyright 2020 Linaro
> +#
> +# Authors:
> +#  Robert Foley <robert.foley@linaro.org>
> +#
> +# This code is licensed under the GPL version 2 or later.  See
> +# the COPYING file in the top-level directory.
> +#
> +import sys
> +import re
> +import threading
> +import time
> +import traceback
> +import gettext
> +
> +class SocketThread(threading.Thread):
> +    """ Implements the standard threading.Thread API.(start, join, etc.).
> +        dumps all characters received on socket into a file.
> +    """
> +    def __init__(self, socket, filename):
> +        super(SocketThread, self).__init__()
> +        self.alive = threading.Event()
> +        self.alive.set()
> +        self.socket = socket
> +        self.log_file = open(filename, "w")
> +        self.debug = True
> +
> +    def receive(self):
> +        """Until the user calls join, we will read chars from
> +           the socket and dump them as is to the file."""
> +        self.socket.setblocking(0)
> +        self.socket.settimeout(1.0)
> +        while self.alive.isSet():
> +            try:
> +                chars = self.socket.recv(1)
> +            except:
> +                continue
> +            output = chars.decode("latin1")
> +            self.log_file.write("{}".format(output))
> +            # Flush the file since we need the characters to be
> +            # always up to date in case someone is reading the file
> +            # waiting for some characters to show up.
> +            self.log_file.flush()
> +        self.socket.setblocking(1)
> +
> +    def run(self):
> +        """This is the main loop of the socket thread.
> +           Simply receive from the file until the user
> +           calls join."""
> +        while self.alive.isSet():
> +            try:
> +                self.receive()
> +            except Exception as e:
> +                sys.stderr.write("Exception encountered\n")
> +                traceback.print_exc()
> +                continue
> +
> +    def join(self, timeout=None):
> +        """Time to destroy the thread.
> +           Clear the event to stop the thread, and wait for
> +           it to complete."""
> +        self.alive.clear()
> +        threading.Thread.join(self, timeout)
> +        self.log_file.close()

I'm note sure about this - introducing threading into Python seems very
un-pythonic. I wonder if the python experts have any view on a better
way to achieve what we want which I think is:

  - a buffer that properly drains output from QEMU
  - which can optionally be serialised onto disk for logging

What else are we trying to achieve here?
Robert Foley Feb. 7, 2020, 10:20 p.m. UTC | #2
On Fri, 7 Feb 2020 at 12:12, Alex Bennée <alex.bennee@linaro.org> wrote:
> Robert Foley <robert.foley@linaro.org> writes:
>
> > This adds logging of the char device used by the console
> > to a file.  The basevm.py then uses this file to read
> > chars from the console.
> > One reason to add this is to aid with debugging.
>
> I can certainly see an argument for saving the install log.
>
> > But another is because there is an issue where the QEMU
> > might hang if the characters from the character device
> > are not consumed by the script.
>
> I'm a little confused by this. Outputting to a file and then parsing the
> file seems a bit more janky than injesting the output in the script and
> then logging it.
>
> Is this to work around the hang because the socket buffers fill up and
> aren't drained?

Yes, exactly.  This is to work around the hang we are seeing when we
try to use these new VMs.

> > +    console_logfile = "console.log"
>
> We should probably dump the log somewhere other than cwd. Given we cache
> stuff in ~/.cache/qemu-vm maybe something of the form:
>
>   ~/.cache/qemu-vm/ubuntu.aarch64.install.log
>
> ?

Good point, we will locate the log file there.

> > +            elapsed_sec = time.time() - start_time
> > +            if elapsed_sec > self._console_timeout_sec:
> > +                raise ConsoleTimeoutException
> > +        return data.encode('latin1')
> > +
>
> Is latin1 really the best choice here? I would expect things to be utf-8 clean.

We were trying to follow the existing code, which is using latin1.
For example, console_wait() or console_consume() are using latin1.
However on further inspection we see that console_send() is using utf-8.
We will look at changing these latin1 cases to be utf-8.
I also found a case in get_qemu_version() we will change to utf-8 also.

> > +
> > +    def join(self, timeout=None):
> > +        """Time to destroy the thread.
> > +           Clear the event to stop the thread, and wait for
> > +           it to complete."""
> > +        self.alive.clear()
> > +        threading.Thread.join(self, timeout)
> > +        self.log_file.close()
>
> I'm note sure about this - introducing threading into Python seems very
> un-pythonic. I wonder if the python experts have any view on a better
> way to achieve what we want which I think is:
>
>   - a buffer that properly drains output from QEMU
>   - which can optionally be serialised onto disk for logging
>
> What else are we trying to achieve here?

I think that covers what we are trying to achieve here.
The logging to file is a nice to have, but
the draining of the output from QEMU is the main objective here.
We will do a bit more research here to seek out a cleaner way to achieve this,
but certainly we would also be interested if any python experts have a
view on a cleaner approach.

Thanks & Regards,
-Rob
>
> --
> Alex Bennée
Robert Foley Feb. 10, 2020, 6:21 p.m. UTC | #3
On Fri, 7 Feb 2020 at 17:20, Robert Foley <robert.foley@linaro.org> wrote:
>
> On Fri, 7 Feb 2020 at 12:12, Alex Bennée <alex.bennee@linaro.org> wrote:
> > > +
> > > +    def join(self, timeout=None):
> > > +        """Time to destroy the thread.
> > > +           Clear the event to stop the thread, and wait for
> > > +           it to complete."""
> > > +        self.alive.clear()
> > > +        threading.Thread.join(self, timeout)
> > > +        self.log_file.close()
> >
> > I'm note sure about this - introducing threading into Python seems very
> > un-pythonic. I wonder if the python experts have any view on a better
> > way to achieve what we want which I think is:
> >
> >   - a buffer that properly drains output from QEMU
> >   - which can optionally be serialised onto disk for logging
> >
> > What else are we trying to achieve here?
>
> I think that covers what we are trying to achieve here.
> The logging to file is a nice to have, but
> the draining of the output from QEMU is the main objective here.
> We will do a bit more research here to seek out a cleaner way to achieve this,
> but certainly we would also be interested if any python experts have a
> view on a cleaner approach.
>
We have a few more ideas on how to make this a bit cleaner.

We could create a new class "ConsoleSocket", based on asyncore.dispatcher.
The asyncore python library allows for overriding certain functionality of
an underlying socket.
We could override the handle_read() method in order to get a callback
when data is available to be read from the console.  This is much cleaner
than us simply waiting with a read() on the socket in a thread.
Note that even asyncore requires a thread around asyncore.loop(), but we would
only be responsible for start/stop of the thread and the thread body
is defined for us (literally just asyncore.loop()).
The thread starting/stopping would be completely transparent
to the users of this new ConsoleSocket.  The thread would be created on
initialization of ConsoleSocket, and destroyed upon close() of the socket.

As the console bytes get consumed we can dump them to an in memory stream,
rather than dumping them to a file right away.
We could perhaps use an io.StringIO text stream.  This stream will get
the bytes of the console and the test will consume the bytes directly
from this stream.
In addition, we could optionally dump the bytes to a file as well.

How does this sound?  Just looking for a bit of feedback before we
head in this direction.

Thanks & Regards,
-Rob
diff mbox series

Patch

diff --git a/tests/vm/basevm.py b/tests/vm/basevm.py
index a926211da8..87a484c55c 100755
--- a/tests/vm/basevm.py
+++ b/tests/vm/basevm.py
@@ -31,12 +31,17 @@  import tempfile
 import shutil
 import multiprocessing
 import traceback
+from socket_thread import SocketThread
 
 SSH_KEY = open(os.path.join(os.path.dirname(__file__),
                "..", "keys", "id_rsa")).read()
 SSH_PUB_KEY = open(os.path.join(os.path.dirname(__file__),
                    "..", "keys", "id_rsa.pub")).read()
 
+class ConsoleTimeoutException(Exception):
+    """Raise this exception when read from console times out."""
+    pass
+
 class BaseVM(object):
     GUEST_USER = "qemu"
     GUEST_PASS = "qemupass"
@@ -59,12 +64,18 @@  class BaseVM(object):
     poweroff = "poweroff"
     # enable IPv6 networking
     ipv6 = True
+    # This is the timeout on the wait for console bytes.
+    socket_timeout = 120
     # Scale up some timeouts under TCG.
     # 4 is arbitrary, but greater than 2,
     # since we found we need to wait more than twice as long.
     tcg_ssh_timeout_multiplier = 4
+    console_logfile = "console.log"
     def __init__(self, debug=False, vcpus=None):
         self._guest = None
+        self._console_fd = None
+        self._socket_thread = None
+        self._console_timeout_sec = self.socket_timeout
         self._tmpdir = os.path.realpath(tempfile.mkdtemp(prefix="vm-test-",
                                                          suffix=".tmp",
                                                          dir="."))
@@ -179,6 +190,15 @@  class BaseVM(object):
                             "-device",
                             "virtio-blk,drive=%s,serial=%s,bootindex=1" % (name, name)]
 
+    def init_console(self, socket):
+        """Init the thread to dump console to a file.
+           Also open the file descriptor we will use to
+           read from the console."""
+        self._socket_thread = SocketThread(socket, self.console_logfile)
+        self._console_fd = open(self.console_logfile, "r")
+        self._socket_thread.start()
+        print("console logfile is: {}".format(self.console_logfile))
+
     def boot(self, img, extra_args=[]):
         args = self._args + [
             "-device", "VGA",
@@ -201,6 +221,7 @@  class BaseVM(object):
             raise
         atexit.register(self.shutdown)
         self._guest = guest
+        self.init_console(guest.console_socket)
         usernet_info = guest.qmp("human-monitor-command",
                                  command_line="info usernet")
         self.ssh_port = None
@@ -212,9 +233,10 @@  class BaseVM(object):
             raise Exception("Cannot find ssh port from 'info usernet':\n%s" % \
                             usernet_info)
 
-    def console_init(self, timeout = 120):
-        vm = self._guest
-        vm.console_socket.settimeout(timeout)
+    def console_init(self, timeout = None):
+        if timeout == None:
+            timeout = self.socket_timeout
+        self._console_timeout_sec = timeout
 
     def console_log(self, text):
         for line in re.split("[\r\n]", text):
@@ -230,13 +252,27 @@  class BaseVM(object):
             # log console line
             sys.stderr.write("con recv: %s\n" % line)
 
+    def console_recv(self, n):
+        """Read n chars from the console_logfile being dumped to
+           by the socket thread we created earlier."""
+        start_time = time.time()
+        while True:
+            data = self._console_fd.read(1)
+            if data != "":
+                break
+            time.sleep(0.1)
+            elapsed_sec = time.time() - start_time
+            if elapsed_sec > self._console_timeout_sec:
+                raise ConsoleTimeoutException
+        return data.encode('latin1')
+
     def console_wait(self, expect, expectalt = None):
         vm = self._guest
         output = ""
         while True:
             try:
-                chars = vm.console_socket.recv(1)
-            except socket.timeout:
+                chars = self.console_recv(1)
+            except ConsoleTimeoutException:
                 sys.stderr.write("console: *** read timeout ***\n")
                 sys.stderr.write("console: waiting for: '%s'\n" % expect)
                 if not expectalt is None:
@@ -335,6 +371,8 @@  class BaseVM(object):
             raise Exception("Timeout while waiting for guest ssh")
 
     def shutdown(self):
+        self._socket_thread.join()
+        self._console_fd.close()
         self._guest.shutdown()
     def wait(self):
         self._guest.wait()
diff --git a/tests/vm/socket_thread.py b/tests/vm/socket_thread.py
new file mode 100644
index 0000000000..6160e9163d
--- /dev/null
+++ b/tests/vm/socket_thread.py
@@ -0,0 +1,73 @@ 
+#!/usr/bin/env python
+#
+# This python module defines a thread object which
+# reads from a socket and dumps it to a file.
+#
+# The main use case is for reading QEMU console char dev and
+# dumping them to a file either for debugging or for
+# parsing by QEMU itself.
+#
+# Copyright 2020 Linaro
+#
+# Authors:
+#  Robert Foley <robert.foley@linaro.org>
+#
+# This code is licensed under the GPL version 2 or later.  See
+# the COPYING file in the top-level directory.
+#
+import sys
+import re
+import threading
+import time
+import traceback
+import gettext
+
+class SocketThread(threading.Thread):
+    """ Implements the standard threading.Thread API.(start, join, etc.).
+        dumps all characters received on socket into a file.
+    """
+    def __init__(self, socket, filename):
+        super(SocketThread, self).__init__()
+        self.alive = threading.Event()
+        self.alive.set()
+        self.socket = socket
+        self.log_file = open(filename, "w")
+        self.debug = True
+
+    def receive(self):
+        """Until the user calls join, we will read chars from
+           the socket and dump them as is to the file."""
+        self.socket.setblocking(0)
+        self.socket.settimeout(1.0)
+        while self.alive.isSet():
+            try:
+                chars = self.socket.recv(1)
+            except:
+                continue
+            output = chars.decode("latin1")
+            self.log_file.write("{}".format(output))
+            # Flush the file since we need the characters to be
+            # always up to date in case someone is reading the file
+            # waiting for some characters to show up.
+            self.log_file.flush()
+        self.socket.setblocking(1)
+
+    def run(self):
+        """This is the main loop of the socket thread.
+           Simply receive from the file until the user
+           calls join."""
+        while self.alive.isSet():
+            try:
+                self.receive()
+            except Exception as e:
+                sys.stderr.write("Exception encountered\n")
+                traceback.print_exc()
+                continue
+
+    def join(self, timeout=None):
+        """Time to destroy the thread.
+           Clear the event to stop the thread, and wait for
+           it to complete."""
+        self.alive.clear()
+        threading.Thread.join(self, timeout)
+        self.log_file.close()