diff mbox

[2/3] tests: add nbd-fault-injector.py utility

Message ID 1393322998-10289-3-git-send-email-stefanha@redhat.com
State New
Headers show

Commit Message

Stefan Hajnoczi Feb. 25, 2014, 10:09 a.m. UTC
The nbd-fault-injector.py script is a special kind of NBD server.  It
throws away all writes and produces zeroes for reads.  Given a list of
fault injection rules, it can simulate NBD protocol errors and is useful
for testing NBD client error handling code paths.

See the patch for documentation.  This scripts is modelled after Kevin
Wolf <kwolf@redhat.com>'s blkdebug block driver.

Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
---
 tests/qemu-iotests/nbd-fault-injector.py | 231 +++++++++++++++++++++++++++++++
 1 file changed, 231 insertions(+)
 create mode 100755 tests/qemu-iotests/nbd-fault-injector.py

Comments

Nicholas Thomas Feb. 25, 2014, 11:45 a.m. UTC | #1
Hi,

On 25/02/14 10:09, Stefan Hajnoczi wrote:
> The nbd-fault-injector.py script is a special kind of NBD server.  It
> throws away all writes and produces zeroes for reads.  Given a list of
> fault injection rules, it can simulate NBD protocol errors and is useful
> for testing NBD client error handling code paths.
> 
> See the patch for documentation.  This scripts is modelled after Kevin
> Wolf <kwolf@redhat.com>'s blkdebug block driver.
> 
> Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
> ---
>  tests/qemu-iotests/nbd-fault-injector.py | 231 +++++++++++++++++++++++++++++++
>  1 file changed, 231 insertions(+)
>  create mode 100755 tests/qemu-iotests/nbd-fault-injector.py
> 
> diff --git a/tests/qemu-iotests/nbd-fault-injector.py b/tests/qemu-iotests/nbd-fault-injector.py
> new file mode 100755
> index 0000000..b4011f4
> --- /dev/null
> +++ b/tests/qemu-iotests/nbd-fault-injector.py
> @@ -0,0 +1,231 @@
> +#!/usr/bin/env python
> +# NBD server - fault injection utility
> +#
> +# Configuration file syntax:
> +#   [inject-error "disconnect-neg1"]
> +#   event=neg1
> +#   io=readwrite
> +#   when=before
> +#
> +# Note that Python's ConfigParser squashes together all sections with the same
> +# name, so give each [inject-error] a unique name.
> +#
> +# inject-error options:
> +#   event - name of the trigger event
> +#           "neg1" - first part of negotiation struct
> +#           "export" - export struct
> +#           "neg2" - second part of negotiation struct
> +#           "request" - NBD request struct
> +#           "reply" - NBD reply struct
> +#           "data" - request/reply data
> +#   io    - I/O direction that triggers this rule:
> +#           "read", "write", or "readwrite"
> +#           default: readwrite
> +#   when  - when to inject the fault relative to the I/O operation
> +#           "before" or "after"
> +#           default: before
> +#
> +# Currently the only error injection action is to terminate the server process.
> +# This resets the TCP connection and thus forces the client to handle
> +# unexpected connection termination.
> +#
> +# Other error injection actions could be added in the future.
> +#
> +# Copyright Red Hat, Inc. 2014
> +#
> +# Authors:
> +#   Stefan Hajnoczi <stefanha@redhat.com>
> +#
> +# This work is licensed under the terms of the GNU GPL, version 2 or later.
> +# See the COPYING file in the top-level directory.
> +
> +import sys
> +import socket
> +import struct
> +import collections
> +import ConfigParser
> +
> +# Protocol constants
> +NBD_CMD_READ = 0
> +NBD_CMD_WRITE = 1
> +NBD_CMD_DISC = 2
> +NBD_REQUEST_MAGIC = 0x25609513
> +NBD_REPLY_MAGIC = 0x67446698
> +NBD_PASSWD = 0x4e42444d41474943
> +NBD_OPTS_MAGIC = 0x49484156454F5054
> +NBD_OPT_EXPORT_NAME = 1 << 0
> +
> +# Protocol structs
> +neg1_struct = struct.Struct('>QQH')
> +export_tuple = collections.namedtuple('Export', 'reserved magic opt len')
> +export_struct = struct.Struct('>IQII')
> +neg2_struct = struct.Struct('>QH124x')
> +request_tuple = collections.namedtuple('Request', 'magic type handle from_ len')
> +request_struct = struct.Struct('>IIQQI')
> +reply_struct = struct.Struct('>IIQ')
> +
> +def err(msg):
> +    sys.stderr.write(msg + '\n')
> +    sys.exit(1)
> +
> +def recvall(sock, bufsize):
> +    received = 0
> +    chunks = []
> +    while received < bufsize:
> +        chunk = sock.recv(bufsize - received)
> +        if len(chunk) == 0:
> +            raise Exception('unexpected disconnect')
> +        chunks.append(chunk)
> +        received += len(chunk)
> +    return ''.join(chunks)
> +
> +class Rule(object):
> +    def __init__(self, name, event, io, when):
> +        self.name = name
> +        self.event = event
> +        self.io = io
> +        self.when = when
> +
> +    def match(self, event, io, when):
> +        if when != self.when:
> +            return False
> +        if event != self.event:
> +            return False
> +        if io != self.io and self.io != 'readwrite':
> +            return False
> +        return True
> +
> +class FaultInjectionSocket(object):
> +    def __init__(self, sock, rules):
> +        self.sock = sock
> +        self.rules = rules
> +
> +    def check(self, event, io, when):
> +        for rule in self.rules:
> +            if rule.match(event, io, when):
> +                print 'Closing connection on rule match %s' % rule.name
> +                sys.exit(0)
> +
> +    def send(self, buf, event):
> +        self.check(event, 'write', 'before')
> +        self.sock.sendall(buf)
> +        self.check(event, 'write', 'after')
> +
> +    def recv(self, bufsize, event):
> +        self.check(event, 'read', 'before')
> +        data = recvall(self.sock, bufsize)
> +        self.check(event, 'read', 'after')
> +        return data

There's a class of error I recently encountered in our out-of-tree proxy
component that only shows up if a read or write is interrupted partway
through. Perhaps you could have a "during" event here that happens after
bufsize/2 bytes is written?

I've not looked at qemu's block/nbd code recently, so I don't know if
that exercises a particular failure path.

> +    def close(self):
> +        self.sock.close()
> +
> +def negotiate(conn):
> +    '''Negotiate export with client'''
> +    # Send negotiation part 1
> +    buf = neg1_struct.pack(NBD_PASSWD, NBD_OPTS_MAGIC, 0)
> +    conn.send(buf, event='neg1')
> +
> +    # Receive export option
> +    buf = conn.recv(export_struct.size, event='export')
> +    export = export_tuple._make(export_struct.unpack(buf))
> +    assert export.magic == NBD_OPTS_MAGIC
> +    assert export.opt == NBD_OPT_EXPORT_NAME
> +    name = conn.recv(export.len, event='export-name')
> +
> +    # Send negotiation part 2
> +    buf = neg2_struct.pack(8 * 1024 * 1024 * 1024, 0) # 8 GB capacity
> +    conn.send(buf, event='neg2')

Is it worth exercising the old-style negotiation too?

> +def read_request(conn):
> +    '''Parse NBD request from client'''
> +    buf = conn.recv(request_struct.size, event='request')
> +    req = request_tuple._make(request_struct.unpack(buf))
> +    assert req.magic == NBD_REQUEST_MAGIC
> +    return req
> +
> +def write_reply(conn, error, handle):
> +    buf = reply_struct.pack(NBD_REPLY_MAGIC, error, handle)
> +    conn.send(buf, event='reply')
> +
> +def handle_connection(conn):
> +    negotiate(conn)
> +    while True:
> +        req = read_request(conn)
> +        if req.type == NBD_CMD_READ:
> +            write_reply(conn, 0, req.handle)
> +            conn.send('\0' * req.len, event='data')
> +        elif req.type == NBD_CMD_WRITE:
> +            _ = conn.recv(req.len, event='data')
> +            write_reply(conn, 0, req.handle)
> +        elif req.type == NBD_CMD_DISC:
> +            break
> +        else:
> +            print 'unrecognized command type %#02x' % req.type
> +            break
> +    conn.close()
> +
> +def run_server(sock, rules):
> +    while True:
> +        conn, _ = sock.accept()
> +        handle_connection(FaultInjectionSocket(conn, rules))
> +
> +def parse_inject_error(name, options):
> +    if 'event' not in options:
> +        err('missing \"event\" option in %s' % name)
> +    event = options['event']
> +    if event not in ('neg1', 'export', 'neg2', 'request', 'reply', 'data'):
> +        err('invalid \"event\" option value \"%s\" in %s' % (event, name))
> +    io = options.get('io', 'readwrite')
> +    if io not in ('read', 'write', 'readwrite'):
> +        err('invalid \"io\" option value \"%s\" in %s' % (io, name))
> +    when = options.get('when', 'before')
> +    if when not in ('before', 'after'):
> +        err('invalid \"when\" option value \"%s\" in %s' % (when, name))
> +    return Rule(name, event, io, when)
> +
> +def parse_config(config):
> +    rules = []
> +    for name in config.sections():
> +        if name.startswith('inject-error'):
> +            options = dict(config.items(name))
> +            rules.append(parse_inject_error(name, options))
> +        else:
> +            err('invalid config section name: %s' % name)
> +    return rules
> +
> +def load_rules(filename):
> +    config = ConfigParser.RawConfigParser()
> +    with open(filename, 'rt') as f:
> +        config.readfp(f, filename)
> +    return parse_config(config)
> +
> +def open_socket(path):
> +    '''Open a TCP or UNIX domain listen socket'''
> +    if ':' in path:
> +        host, port = path.split(':', 1)
> +        sock = socket.socket()
> +        sock.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEADDR, 1)
> +        sock.bind((host, int(port)))
> +    else:
> +        sock = socket.socket(socket.AF_UNIX)
> +        sock.bind(path)
> +    sock.listen(0)
> +    print 'Listening on %s' % path
> +    return sock
> +
> +def usage(args):
> +    sys.stderr.write('usage: %s <tcp-port>|<unix-path> <config-file>\n' % args[0])
> +    sys.stderr.write('Run an fault injector NBD server with rules defined in a config file.\n')
> +    sys.exit(1)
> +
> +def main(args):
> +    if len(args) != 3:
> +        usage(args)
> +    sock = open_socket(args[1])
> +    rules = load_rules(args[2])
> +    run_server(sock, rules)
> +    return 0
> +
> +if __name__ == '__main__':
> +    sys.exit(main(sys.argv))
>
Stefan Hajnoczi Feb. 25, 2014, 4:25 p.m. UTC | #2
On Tue, Feb 25, 2014 at 11:45:11AM +0000, Nick Thomas wrote:
> On 25/02/14 10:09, Stefan Hajnoczi wrote:
> > +    def send(self, buf, event):
> > +        self.check(event, 'write', 'before')
> > +        self.sock.sendall(buf)
> > +        self.check(event, 'write', 'after')
> > +
> > +    def recv(self, bufsize, event):
> > +        self.check(event, 'read', 'before')
> > +        data = recvall(self.sock, bufsize)
> > +        self.check(event, 'read', 'after')
> > +        return data
> 
> There's a class of error I recently encountered in our out-of-tree proxy
> component that only shows up if a read or write is interrupted partway
> through. Perhaps you could have a "during" event here that happens after
> bufsize/2 bytes is written?
> 
> I've not looked at qemu's block/nbd code recently, so I don't know if
> that exercises a particular failure path.

Yes, it can involve different code paths in the client.  For example,
the client may receive fields in separate recv(2) syscalls so there may
be a unique path for each field.

I think the easiest approach is to turn the 'when' option into a byte
count.  The connection will be terminated after the given number of
bytes.

Then 'before' becomes an alias for 0.  'after' becomes an alias for -1,
the magic value for the entire data length.

> > +    def close(self):
> > +        self.sock.close()
> > +
> > +def negotiate(conn):
> > +    '''Negotiate export with client'''
> > +    # Send negotiation part 1
> > +    buf = neg1_struct.pack(NBD_PASSWD, NBD_OPTS_MAGIC, 0)
> > +    conn.send(buf, event='neg1')
> > +
> > +    # Receive export option
> > +    buf = conn.recv(export_struct.size, event='export')
> > +    export = export_tuple._make(export_struct.unpack(buf))
> > +    assert export.magic == NBD_OPTS_MAGIC
> > +    assert export.opt == NBD_OPT_EXPORT_NAME
> > +    name = conn.recv(export.len, event='export-name')
> > +
> > +    # Send negotiation part 2
> > +    buf = neg2_struct.pack(8 * 1024 * 1024 * 1024, 0) # 8 GB capacity
> > +    conn.send(buf, event='neg2')
> 
> Is it worth exercising the old-style negotiation too?

Yes, probably a good idea.
diff mbox

Patch

diff --git a/tests/qemu-iotests/nbd-fault-injector.py b/tests/qemu-iotests/nbd-fault-injector.py
new file mode 100755
index 0000000..b4011f4
--- /dev/null
+++ b/tests/qemu-iotests/nbd-fault-injector.py
@@ -0,0 +1,231 @@ 
+#!/usr/bin/env python
+# NBD server - fault injection utility
+#
+# Configuration file syntax:
+#   [inject-error "disconnect-neg1"]
+#   event=neg1
+#   io=readwrite
+#   when=before
+#
+# Note that Python's ConfigParser squashes together all sections with the same
+# name, so give each [inject-error] a unique name.
+#
+# inject-error options:
+#   event - name of the trigger event
+#           "neg1" - first part of negotiation struct
+#           "export" - export struct
+#           "neg2" - second part of negotiation struct
+#           "request" - NBD request struct
+#           "reply" - NBD reply struct
+#           "data" - request/reply data
+#   io    - I/O direction that triggers this rule:
+#           "read", "write", or "readwrite"
+#           default: readwrite
+#   when  - when to inject the fault relative to the I/O operation
+#           "before" or "after"
+#           default: before
+#
+# Currently the only error injection action is to terminate the server process.
+# This resets the TCP connection and thus forces the client to handle
+# unexpected connection termination.
+#
+# Other error injection actions could be added in the future.
+#
+# Copyright Red Hat, Inc. 2014
+#
+# Authors:
+#   Stefan Hajnoczi <stefanha@redhat.com>
+#
+# This work is licensed under the terms of the GNU GPL, version 2 or later.
+# See the COPYING file in the top-level directory.
+
+import sys
+import socket
+import struct
+import collections
+import ConfigParser
+
+# Protocol constants
+NBD_CMD_READ = 0
+NBD_CMD_WRITE = 1
+NBD_CMD_DISC = 2
+NBD_REQUEST_MAGIC = 0x25609513
+NBD_REPLY_MAGIC = 0x67446698
+NBD_PASSWD = 0x4e42444d41474943
+NBD_OPTS_MAGIC = 0x49484156454F5054
+NBD_OPT_EXPORT_NAME = 1 << 0
+
+# Protocol structs
+neg1_struct = struct.Struct('>QQH')
+export_tuple = collections.namedtuple('Export', 'reserved magic opt len')
+export_struct = struct.Struct('>IQII')
+neg2_struct = struct.Struct('>QH124x')
+request_tuple = collections.namedtuple('Request', 'magic type handle from_ len')
+request_struct = struct.Struct('>IIQQI')
+reply_struct = struct.Struct('>IIQ')
+
+def err(msg):
+    sys.stderr.write(msg + '\n')
+    sys.exit(1)
+
+def recvall(sock, bufsize):
+    received = 0
+    chunks = []
+    while received < bufsize:
+        chunk = sock.recv(bufsize - received)
+        if len(chunk) == 0:
+            raise Exception('unexpected disconnect')
+        chunks.append(chunk)
+        received += len(chunk)
+    return ''.join(chunks)
+
+class Rule(object):
+    def __init__(self, name, event, io, when):
+        self.name = name
+        self.event = event
+        self.io = io
+        self.when = when
+
+    def match(self, event, io, when):
+        if when != self.when:
+            return False
+        if event != self.event:
+            return False
+        if io != self.io and self.io != 'readwrite':
+            return False
+        return True
+
+class FaultInjectionSocket(object):
+    def __init__(self, sock, rules):
+        self.sock = sock
+        self.rules = rules
+
+    def check(self, event, io, when):
+        for rule in self.rules:
+            if rule.match(event, io, when):
+                print 'Closing connection on rule match %s' % rule.name
+                sys.exit(0)
+
+    def send(self, buf, event):
+        self.check(event, 'write', 'before')
+        self.sock.sendall(buf)
+        self.check(event, 'write', 'after')
+
+    def recv(self, bufsize, event):
+        self.check(event, 'read', 'before')
+        data = recvall(self.sock, bufsize)
+        self.check(event, 'read', 'after')
+        return data
+
+    def close(self):
+        self.sock.close()
+
+def negotiate(conn):
+    '''Negotiate export with client'''
+    # Send negotiation part 1
+    buf = neg1_struct.pack(NBD_PASSWD, NBD_OPTS_MAGIC, 0)
+    conn.send(buf, event='neg1')
+
+    # Receive export option
+    buf = conn.recv(export_struct.size, event='export')
+    export = export_tuple._make(export_struct.unpack(buf))
+    assert export.magic == NBD_OPTS_MAGIC
+    assert export.opt == NBD_OPT_EXPORT_NAME
+    name = conn.recv(export.len, event='export-name')
+
+    # Send negotiation part 2
+    buf = neg2_struct.pack(8 * 1024 * 1024 * 1024, 0) # 8 GB capacity
+    conn.send(buf, event='neg2')
+
+def read_request(conn):
+    '''Parse NBD request from client'''
+    buf = conn.recv(request_struct.size, event='request')
+    req = request_tuple._make(request_struct.unpack(buf))
+    assert req.magic == NBD_REQUEST_MAGIC
+    return req
+
+def write_reply(conn, error, handle):
+    buf = reply_struct.pack(NBD_REPLY_MAGIC, error, handle)
+    conn.send(buf, event='reply')
+
+def handle_connection(conn):
+    negotiate(conn)
+    while True:
+        req = read_request(conn)
+        if req.type == NBD_CMD_READ:
+            write_reply(conn, 0, req.handle)
+            conn.send('\0' * req.len, event='data')
+        elif req.type == NBD_CMD_WRITE:
+            _ = conn.recv(req.len, event='data')
+            write_reply(conn, 0, req.handle)
+        elif req.type == NBD_CMD_DISC:
+            break
+        else:
+            print 'unrecognized command type %#02x' % req.type
+            break
+    conn.close()
+
+def run_server(sock, rules):
+    while True:
+        conn, _ = sock.accept()
+        handle_connection(FaultInjectionSocket(conn, rules))
+
+def parse_inject_error(name, options):
+    if 'event' not in options:
+        err('missing \"event\" option in %s' % name)
+    event = options['event']
+    if event not in ('neg1', 'export', 'neg2', 'request', 'reply', 'data'):
+        err('invalid \"event\" option value \"%s\" in %s' % (event, name))
+    io = options.get('io', 'readwrite')
+    if io not in ('read', 'write', 'readwrite'):
+        err('invalid \"io\" option value \"%s\" in %s' % (io, name))
+    when = options.get('when', 'before')
+    if when not in ('before', 'after'):
+        err('invalid \"when\" option value \"%s\" in %s' % (when, name))
+    return Rule(name, event, io, when)
+
+def parse_config(config):
+    rules = []
+    for name in config.sections():
+        if name.startswith('inject-error'):
+            options = dict(config.items(name))
+            rules.append(parse_inject_error(name, options))
+        else:
+            err('invalid config section name: %s' % name)
+    return rules
+
+def load_rules(filename):
+    config = ConfigParser.RawConfigParser()
+    with open(filename, 'rt') as f:
+        config.readfp(f, filename)
+    return parse_config(config)
+
+def open_socket(path):
+    '''Open a TCP or UNIX domain listen socket'''
+    if ':' in path:
+        host, port = path.split(':', 1)
+        sock = socket.socket()
+        sock.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEADDR, 1)
+        sock.bind((host, int(port)))
+    else:
+        sock = socket.socket(socket.AF_UNIX)
+        sock.bind(path)
+    sock.listen(0)
+    print 'Listening on %s' % path
+    return sock
+
+def usage(args):
+    sys.stderr.write('usage: %s <tcp-port>|<unix-path> <config-file>\n' % args[0])
+    sys.stderr.write('Run an fault injector NBD server with rules defined in a config file.\n')
+    sys.exit(1)
+
+def main(args):
+    if len(args) != 3:
+        usage(args)
+    sock = open_socket(args[1])
+    rules = load_rules(args[2])
+    run_server(sock, rules)
+    return 0
+
+if __name__ == '__main__':
+    sys.exit(main(sys.argv))