diff mbox

[03/12] char: introduce tx queue to enable Unix style flow control

Message ID 1312208590-25502-4-git-send-email-aliguori@us.ibm.com
State New
Headers show

Commit Message

Anthony Liguori Aug. 1, 2011, 2:23 p.m. UTC
The char layer tries very hard to avoid using an intermediate buffer.  The
implication of this is that when the backend does a write(), the data for that
write must be immediately passed to the front end.

Flow control is needed to handle the likely event that the front end is not
able to handle the data at this point in time.  We implement flow control
today by allowing the front ends to register a polling function.  The polling
function returns non-zero when it is able to receive data.

This works okay because most backends are tied to some sort of file descriptor
and our main loop allows polling to be included with file descriptor
registration.

This falls completely apart when dealing with the front end writing to the
back end though because the front end (devices) don't have an obvious place to
integrate polling.

Short summary: we're broken by design.  A way to fix this is to eliminate
polling entirely and use a Unix style flow control mechanism.  This involves
using an intermediate buffer and allowing registration of notifications when
the buffer either has data in it (readability) or is not full (writability).

This patch introduces a queue and uses it for front end -> back end writes.

Signed-off-by: Anthony Liguori <aliguori@us.ibm.com>
---
 qemu-char.c |   58 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++-
 qemu-char.h |   11 +++++++++++
 2 files changed, 68 insertions(+), 1 deletions(-)

Comments

Avi Kivity Aug. 4, 2011, 4:04 p.m. UTC | #1
On 08/01/2011 05:23 PM, Anthony Liguori wrote:
> The char layer tries very hard to avoid using an intermediate buffer.  The
> implication of this is that when the backend does a write(), the data for that
> write must be immediately passed to the front end.
>
> Flow control is needed to handle the likely event that the front end is not
> able to handle the data at this point in time.  We implement flow control
> today by allowing the front ends to register a polling function.  The polling
> function returns non-zero when it is able to receive data.
>
> This works okay because most backends are tied to some sort of file descriptor
> and our main loop allows polling to be included with file descriptor
> registration.
>
> This falls completely apart when dealing with the front end writing to the
> back end though because the front end (devices) don't have an obvious place to
> integrate polling.
>
> Short summary: we're broken by design.  A way to fix this is to eliminate
> polling entirely and use a Unix style flow control mechanism.  This involves
> using an intermediate buffer and allowing registration of notifications when
> the buffer either has data in it (readability) or is not full (writability).
>

If you don't have an obvious place to integrate polling, how do you poll 
for writability?

Although, providing a reasonably sized buffer and blocking the vcpu when 
it's full is a lot better than what we have now.
Anthony Liguori Aug. 4, 2011, 4:31 p.m. UTC | #2
On 08/04/2011 11:04 AM, Avi Kivity wrote:
> On 08/01/2011 05:23 PM, Anthony Liguori wrote:
>> The char layer tries very hard to avoid using an intermediate buffer. The
>> implication of this is that when the backend does a write(), the data
>> for that
>> write must be immediately passed to the front end.
>>
>> Flow control is needed to handle the likely event that the front end
>> is not
>> able to handle the data at this point in time. We implement flow control
>> today by allowing the front ends to register a polling function. The
>> polling
>> function returns non-zero when it is able to receive data.
>>
>> This works okay because most backends are tied to some sort of file
>> descriptor
>> and our main loop allows polling to be included with file descriptor
>> registration.
>>
>> This falls completely apart when dealing with the front end writing to
>> the
>> back end though because the front end (devices) don't have an obvious
>> place to
>> integrate polling.
>>
>> Short summary: we're broken by design. A way to fix this is to eliminate
>> polling entirely and use a Unix style flow control mechanism. This
>> involves
>> using an intermediate buffer and allowing registration of
>> notifications when
>> the buffer either has data in it (readability) or is not full
>> (writability).
>>
>
> If you don't have an obvious place to integrate polling, how do you poll
> for writability?

You poll by trying to write.  If write fails, you can set a callback for 
notification for when it becomes writable.

> Although, providing a reasonably sized buffer and blocking the vcpu when
> it's full is a lot better than what we have now.

This series won't block the vcpu.  The devices can (and will) register 
callbacks for when the pipe is writable again.

Regards,

Anthony Liguori

>
diff mbox

Patch

diff --git a/qemu-char.c b/qemu-char.c
index 795a3cc..3f9b32c 100644
--- a/qemu-char.c
+++ b/qemu-char.c
@@ -139,9 +139,65 @@  void qemu_chr_generic_open(CharDriverState *s)
     }
 }
 
+static size_t char_queue_write(CharQueue *q, const void *data, size_t size)
+{
+    const uint8_t *ptr = data;
+    size_t i;
+
+    for (i = 0; i < size; i++) {
+        if ((q->prod - q->cons) == sizeof(q->ring)) {
+            break;
+        }
+
+        q->ring[q->prod % sizeof(q->ring)] = ptr[i];
+        q->prod++;
+    }
+
+    return i;
+}
+
+static size_t char_queue_read(CharQueue *q, void *data, size_t size)
+{
+    uint8_t *ptr = data;
+    size_t i;
+
+    for (i = 0; i < size; i++) {
+        if (q->cons == q->prod) {
+            break;
+        }
+
+        ptr[i] = q->ring[q->cons % sizeof(q->ring)];
+        q->cons++;
+    }
+
+    return i;
+}
+
+static void qemu_chr_flush_fe_tx(CharDriverState *s)
+{
+    uint8_t buf[MAX_CHAR_QUEUE_RING];
+    int len, written_len;
+
+    /* Drain the queue into a flat buffer */
+    len = char_queue_read(&s->fe_tx, buf, sizeof(buf));
+
+    written_len = s->chr_write(s, buf, len);
+    if (written_len < len) {
+        /* If the backend didn't accept the full write, queue the unwritten
+         * data back in the queue. */
+        char_queue_write(&s->fe_tx, &buf[written_len], len - written_len);
+    }
+}
+
 int qemu_chr_fe_write(CharDriverState *s, const uint8_t *buf, int len)
 {
-    return s->chr_write(s, buf, len);
+    int ret;
+
+    ret = char_queue_write(&s->fe_tx, buf, len);
+
+    qemu_chr_flush_fe_tx(s);
+
+    return ret;
 }
 
 int qemu_chr_ioctl(CharDriverState *s, int cmd, void *arg)
diff --git a/qemu-char.h b/qemu-char.h
index bcd413c..bb9c1a7 100644
--- a/qemu-char.h
+++ b/qemu-char.h
@@ -51,6 +51,15 @@  typedef struct {
 
 typedef void IOEventHandler(void *opaque, int event);
 
+#define MAX_CHAR_QUEUE_RING 1024
+
+typedef struct CharQueue
+{
+    uint32_t prod;
+    uint32_t cons;
+    uint8_t ring[MAX_CHAR_QUEUE_RING];
+} CharQueue;
+
 struct CharDriverState {
     void (*init)(struct CharDriverState *s);
     int (*chr_write)(struct CharDriverState *s, const uint8_t *buf, int len);
@@ -75,6 +84,8 @@  struct CharDriverState {
     int opened;
     int avail_connections;
 
+    CharQueue fe_tx;
+
     QTAILQ_ENTRY(CharDriverState) next;
 };