diff mbox

[RFC,1/1] dataplane: alternative approach to locking

Message ID 1446622348-2479-1-git-send-email-den@openvz.org
State New
Headers show

Commit Message

Denis V. Lunev Nov. 4, 2015, 7:32 a.m. UTC
What about this? Is it simple enough for you keeping lock around
qemu_fopen_bdrv/qemu_fclose as suggested in patch 1?

This is not tested at all, just sent as an idea for a discussion.

Signed-off-by: Denis V. Lunev <den@openvz.org>
CC: Stefan Hajnoczi <stefanha@redhat.com>
CC: Juan Quintela <quintela@redhat.com>
---
 block.c               | 17 +++++++++++++++++
 include/block/block.h |  2 ++
 migration/savevm.c    | 23 +++++++++++++++--------
 monitor.c             |  2 +-
 4 files changed, 35 insertions(+), 9 deletions(-)

Comments

Juan Quintela Nov. 4, 2015, 9:49 a.m. UTC | #1
"Denis V. Lunev" <den@openvz.org> wrote:
D> What about this? Is it simple enough for you keeping lock around
> qemu_fopen_bdrv/qemu_fclose as suggested in patch 1?
>
> This is not tested at all, just sent as an idea for a discussion.
>
> Signed-off-by: Denis V. Lunev <den@openvz.org>
> CC: Stefan Hajnoczi <stefanha@redhat.com>
> CC: Juan Quintela <quintela@redhat.com>
> ---
>  block.c               | 17 +++++++++++++++++
>  include/block/block.h |  2 ++
>  migration/savevm.c    | 23 +++++++++++++++--------
>  monitor.c             |  2 +-
>  4 files changed, 35 insertions(+), 9 deletions(-)
>
> diff --git a/block.c b/block.c
> index 044897e..d376ec2 100644
> --- a/block.c
> +++ b/block.c
> @@ -2741,6 +2741,23 @@ BlockDriverState *bdrv_next(BlockDriverState *bs)
>      return QTAILQ_NEXT(bs, device_list);
>  }
>  
> +BlockDriverState *bdrv_next_lock(BlockDriverState *bs)
> +{
> +    if (bs != NULL) {
> +        aio_context_release(bdrv_get_aio_context(bs));
> +    }
> +    bs = bdrv_next(bs);
> +    if (bs != NULL) {
> +        aio_context_acquire(bdrv_get_aio_context(bs));
> +    }
> +    return bs;
> +}
> +
> +void bdrv_unlock(BlockDriverState *bs)
> +{
> +    aio_context_release(bdrv_get_aio_context(bs));
> +}

I think I preffer bdrv_ref/unref

And once there, having bdrv_next_lock() only remove the need to do a
bdrv_ref (or lock if you preffer).
> diff --git a/include/block/block.h b/include/block/block.h
> index 610db92..b29dd5b 100644
> --- a/include/block/block.h
> +++ b/include/block/block.h
> @@ -401,6 +401,8 @@ BlockDriverState *bdrv_lookup_bs(const char *device,
>  bool bdrv_chain_contains(BlockDriverState *top, BlockDriverState *base);
>  BlockDriverState *bdrv_next_node(BlockDriverState *bs);
>  BlockDriverState *bdrv_next(BlockDriverState *bs);
> +BlockDriverState *bdrv_next_lock(BlockDriverState *bs);
> +void bdrv_unlock(BlockDriverState *bs);
>  int bdrv_is_encrypted(BlockDriverState *bs);
>  int bdrv_key_required(BlockDriverState *bs);
>  int bdrv_set_key(BlockDriverState *bs, const char *key);
> diff --git a/migration/savevm.c b/migration/savevm.c
> index dbcc39a..cf06a10 100644
> --- a/migration/savevm.c
> +++ b/migration/savevm.c
> @@ -1240,8 +1240,9 @@ out:
>  static BlockDriverState *find_vmstate_bs(void)
>  {
>      BlockDriverState *bs = NULL;
> -    while ((bs = bdrv_next(bs))) {
> +    while ((bs = bdrv_next_lock(bs))) {
>          if (bdrv_can_snapshot(bs)) {
> +            bdrv_unlock(bs);

Once here, why don't we need it to return it locked?

>              return bs;
>          }


Looking for one thousand feet view, I think that it is just easier to
export that function from block.c:

BlockDriverState *bdrv_find_snapshot_bs(void)
{
    BlockDriverState *bs = NULL;
    while ((bs = bdrv_next(bs))) {
        if (bdrv_can_snapshot(bs)) {
            return bs;
        }
    }
    return NULL;
}

or something like that?

export something like:

char *name bdrv_remove_snapshots(const char *name, Error *err)
{
    BlockDriverState *bs;
    QEMUSnapshotInfo sn1, *snapshot = &sn1;

    bs = NULL;
    while ((bs = bdrv_next(bs))) {
        if (bdrv_can_snapshot(bs) &&
            bdrv_snapshot_find(bs, snapshot, name) >= 0) {
            bdrv_snapshot_delete_by_id_or_name(bs, name, &err);
            if (err) {
                return bdrv_get_device_name(bs);
            }
        }
    }
    return NULL;
}


And use like that:

static int del_existing_snapshots(Monitor *mon, const char *name)
{
    Error *err = NULL;
    char *name;

    name = bdrv_remove_snapshots(name, &err);

    if (err) {
        monitor_printf(mon,
                       "Error while deleting snapshot on device '%s': %s\n",=
                       name, error_get_pretty(err));
        return -1;
    }
    return 0;
}

Yes, we go through pains to just not teach block.c about the monitor.


void hmp_delvm(Monitor *mon, const QDict *qdict)
{
    const char *name = qdict_get_str(qdict, "name");

    if (!bdrv_find_snapshot_bs()) {
        monitor_printf(mon, "No block device supports snapshots\n");
        return;
    }

    del_existing_snapshots(mon, name);
}

Yes, we have changed the semantics "slightly".  Pervious version of
hmp_delvm() will try to remove all the snapshots from any device with
that name.  This one would remove them until it finds one error.  I
think that the code reuse and the consistence trumps the change in
semantics (really the change is only on error cases).



> @@ -1292,13 +1294,14 @@ void hmp_savevm(Monitor *mon, const QDict *qdict)
>  
>      /* Verify if there is a device that doesn't support snapshots and is writable */
>      bs = NULL;
> -    while ((bs = bdrv_next(bs))) {
> +    while ((bs = bdrv_next_lock(bs))) {
>  
>          if (!bdrv_is_inserted(bs) || bdrv_is_read_only(bs)) {
>              continue;
>          }
>  
>          if (!bdrv_can_snapshot(bs)) {
> +            bdrv_unlock(bs);
>              monitor_printf(mon, "Device '%s' is writable but does not support snapshots.\n",
>                                 bdrv_get_device_name(bs));
>              return;


Export this bit of code as:

bdrv_snapshot_supported() or somesuch?
Migration code only needs a true/false value.



> @@ -1365,7 +1368,7 @@ void hmp_savevm(Monitor *mon, const QDict *qdict)
>      /* create the snapshots */
>  
>      bs1 = NULL;
> -    while ((bs1 = bdrv_next(bs1))) {
> +    while ((bs1 = bdrv_next_lock(bs1))) {
>          if (bdrv_can_snapshot(bs1)) {
>              /* Write VM state size only to the image that contains the state */
>              sn->vm_state_size = (bs == bs1 ? vm_state_size : 0);
> @@ -1436,13 +1439,14 @@ int load_vmstate(const char *name)
>      /* Verify if there is any device that doesn't support snapshots and is
>      writable and check if the requested snapshot is available too. */
>      bs = NULL;
> -    while ((bs = bdrv_next(bs))) {
> +    while ((bs = bdrv_next_lock(bs))) {
>  
>          if (!bdrv_is_inserted(bs) || bdrv_is_read_only(bs)) {
>              continue;
>          }
>  
>          if (!bdrv_can_snapshot(bs)) {
> +            bdrv_unlock(bs);
>              error_report("Device '%s' is writable but does not support snapshots.",
>                                 bdrv_get_device_name(bs));
>              return -ENOTSUP;
> @@ -1450,6 +1454,7 @@ int load_vmstate(const char *name)
>  
>          ret = bdrv_snapshot_find(bs, &sn, name);
>          if (ret < 0) {
> +            bdrv_unlock(bs);
>              error_report("Device '%s' does not have the requested snapshot '%s'",
>                             bdrv_get_device_name(bs), name);
>              return ret;


rest of code until here from bdrv_* functions is basically migration
layer asking block layer:

Pretty, pretty please, give me an device where I can do an snapshot in
or one error.

> @@ -1460,10 +1465,11 @@ int load_vmstate(const char *name)
>      bdrv_drain_all();
>  
>      bs = NULL;
> -    while ((bs = bdrv_next(bs))) {
> +    while ((bs = bdrv_next_lock(bs))) {
>          if (bdrv_can_snapshot(bs)) {
>              ret = bdrv_snapshot_goto(bs, name);
>              if (ret < 0) {
> +                bdrv_unlock(bs);
>                  error_report("Error %d while activating snapshot '%s' on '%s'",
>                               ret, name, bdrv_get_device_name(bs));
>                  return ret;
> @@ -1504,7 +1510,7 @@ void hmp_delvm(Monitor *mon, const QDict *qdict)
>      }
>  
>      bs = NULL;
> -    while ((bs = bdrv_next(bs))) {
> +    while ((bs = bdrv_next_lock(bs))) {
>          if (bdrv_can_snapshot(bs)) {
>              err = NULL;
>              bdrv_snapshot_delete_by_id_or_name(bs, name, &err);
> @@ -1552,10 +1558,11 @@ void hmp_info_snapshots(Monitor *mon, const QDict *qdict)
>          available = 1;
>          bs1 = NULL;
>  
> -        while ((bs1 = bdrv_next(bs1))) {
> +        while ((bs1 = bdrv_next_lock(bs1))) {
>              if (bdrv_can_snapshot(bs1) && bs1 != bs) {
>                  ret = bdrv_snapshot_find(bs1, sn_info, sn->id_str);
>                  if (ret < 0) {
> +                    bdrv_unlock(bs);
>                      available = 0;
>                      break;
>                  }

I will claim that this command belongs to the block layer, you already
have to work with the monitor, and there are nothing migration related
on it.

Notice that the reason that I ask to move things to the block layer is
that then you can export less internal things.  I think that
bdrv_can_snapshot() is a function that shouldn't be exported.  What
users want is:  give_me_a_device_to_do_one_snapshot(), they don't care
about the details.

> diff --git a/monitor.c b/monitor.c
> index 301a143..ea1a917 100644
> --- a/monitor.c
> +++ b/monitor.c
> @@ -3374,7 +3374,7 @@ static void vm_completion(ReadLineState *rs, const char *str)
>  
>      len = strlen(str);
>      readline_set_completion_index(rs, len);
> -    while ((bs = bdrv_next(bs))) {
> +    while ((bs = bdrv_next_lock(bs))) {
>          SnapshotInfoList *snapshots, *snapshot;
>  
>          if (!bdrv_can_snapshot(bs)) {

We don't need to unlock device here?

Yes, I know, that makes things much, much more difficult :-( Instead of
a trivial patch, we got this.


Later, Juan.
Denis V. Lunev Nov. 4, 2015, 11:12 a.m. UTC | #2
On 11/04/2015 12:49 PM, Juan Quintela wrote:
> void hmp_delvm(Monitor *mon, const QDict *qdict)
> {
>      const char *name = qdict_get_str(qdict, "name");
>
>      if (!bdrv_find_snapshot_bs()) {
>          monitor_printf(mon, "No block device supports snapshots\n");
>          return;
>      }
>
>      del_existing_snapshots(mon, name);
> }
>
> Yes, we have changed the semantics "slightly".  Pervious version of
> hmp_delvm() will try to remove all the snapshots from any device with
> that name.  This one would remove them until it finds one error.  I
> think that the code reuse and the consistence trumps the change in
> semantics (really the change is only on error cases).

I think you are wrong here. You can not abort operation if one
disk does not have a snapshot assuming the following situation
- VM has one disk
- snapshot XXX is made
- 2nd disk is added
- remove XXX snapshot

Your position is understood. I'll send yet another proof of concept
in an hour.

Den
Juan Quintela Nov. 4, 2015, 12:03 p.m. UTC | #3
"Denis V. Lunev" <den-lists@parallels.com> wrote:
> On 11/04/2015 12:49 PM, Juan Quintela wrote:
>> void hmp_delvm(Monitor *mon, const QDict *qdict)
>> {
>>      const char *name = qdict_get_str(qdict, "name");
>>
>>      if (!bdrv_find_snapshot_bs()) {
>>          monitor_printf(mon, "No block device supports snapshots\n");
>>          return;
>>      }
>>
>>      del_existing_snapshots(mon, name);
>> }
>>
>> Yes, we have changed the semantics "slightly".  Pervious version of
>> hmp_delvm() will try to remove all the snapshots from any device with
>> that name.  This one would remove them until it finds one error.  I
>> think that the code reuse and the consistence trumps the change in
>> semantics (really the change is only on error cases).
>
> I think you are wrong here. You can not abort operation if one
> disk does not have a snapshot assuming the following situation
> - VM has one disk
> - snapshot XXX is made
> - 2nd disk is added
> - remove XXX snapshot

I think that my *completely* untested suggestion handled that well.

char *name bdrv_remove_snapshots(const char *name, Error *err)
{
    BlockDriverState *bs;
    QEMUSnapshotInfo sn1, *snapshot = &sn1;

    bs = NULL;
    while ((bs = bdrv_next(bs))) {
        if (bdrv_can_snapshot(bs) &&
            bdrv_snapshot_find(bs, snapshot, name) >= 0) {
            bdrv_snapshot_delete_by_id_or_name(bs, name, &err);
            if (err) {
                return bdrv_get_device_name(bs);
            }
        }
    }
    return NULL;
}

It only stops without removing an snapshot if there is one error
deleting one snapshot.  Current code just tells that there is one error
and continues in the rest of the disks.

Notice that we are going to have problems on this operation, we have
found a disk with one snapshot with the name that we want to remove and
we have failed.


>
> Your position is understood. I'll send yet another proof of concept
> in an hour.

Thanks, Juan.
Denis V. Lunev Nov. 4, 2015, 12:07 p.m. UTC | #4
On 11/04/2015 03:03 PM, Juan Quintela wrote:
> "Denis V. Lunev" <den-lists@parallels.com> wrote:
>> On 11/04/2015 12:49 PM, Juan Quintela wrote:
>>> void hmp_delvm(Monitor *mon, const QDict *qdict)
>>> {
>>>       const char *name = qdict_get_str(qdict, "name");
>>>
>>>       if (!bdrv_find_snapshot_bs()) {
>>>           monitor_printf(mon, "No block device supports snapshots\n");
>>>           return;
>>>       }
>>>
>>>       del_existing_snapshots(mon, name);
>>> }
>>>
>>> Yes, we have changed the semantics "slightly".  Pervious version of
>>> hmp_delvm() will try to remove all the snapshots from any device with
>>> that name.  This one would remove them until it finds one error.  I
>>> think that the code reuse and the consistence trumps the change in
>>> semantics (really the change is only on error cases).
>> I think you are wrong here. You can not abort operation if one
>> disk does not have a snapshot assuming the following situation
>> - VM has one disk
>> - snapshot XXX is made
>> - 2nd disk is added
>> - remove XXX snapshot
> I think that my *completely* untested suggestion handled that well.
>
> char *name bdrv_remove_snapshots(const char *name, Error *err)
> {
>      BlockDriverState *bs;
>      QEMUSnapshotInfo sn1, *snapshot = &sn1;
>
>      bs = NULL;
>      while ((bs = bdrv_next(bs))) {
>          if (bdrv_can_snapshot(bs) &&
>              bdrv_snapshot_find(bs, snapshot, name) >= 0) {
>              bdrv_snapshot_delete_by_id_or_name(bs, name, &err);
>              if (err) {
>                  return bdrv_get_device_name(bs);
>              }
>          }
>      }
>      return NULL;
> }
>
> It only stops without removing an snapshot if there is one error
> deleting one snapshot.  Current code just tells that there is one error
> and continues in the rest of the disks.
>
> Notice that we are going to have problems on this operation, we have
> found a disk with one snapshot with the name that we want to remove and
> we have failed.
>
>
>> Your position is understood. I'll send yet another proof of concept
>> in an hour.
> Thanks, Juan.
yes. we should follow this way in both branches.
I like this and done similar thing in my RFC :)

Den
diff mbox

Patch

diff --git a/block.c b/block.c
index 044897e..d376ec2 100644
--- a/block.c
+++ b/block.c
@@ -2741,6 +2741,23 @@  BlockDriverState *bdrv_next(BlockDriverState *bs)
     return QTAILQ_NEXT(bs, device_list);
 }
 
+BlockDriverState *bdrv_next_lock(BlockDriverState *bs)
+{
+    if (bs != NULL) {
+        aio_context_release(bdrv_get_aio_context(bs));
+    }
+    bs = bdrv_next(bs);
+    if (bs != NULL) {
+        aio_context_acquire(bdrv_get_aio_context(bs));
+    }
+    return bs;
+}
+
+void bdrv_unlock(BlockDriverState *bs)
+{
+    aio_context_release(bdrv_get_aio_context(bs));
+}
+
 const char *bdrv_get_node_name(const BlockDriverState *bs)
 {
     return bs->node_name;
diff --git a/include/block/block.h b/include/block/block.h
index 610db92..b29dd5b 100644
--- a/include/block/block.h
+++ b/include/block/block.h
@@ -401,6 +401,8 @@  BlockDriverState *bdrv_lookup_bs(const char *device,
 bool bdrv_chain_contains(BlockDriverState *top, BlockDriverState *base);
 BlockDriverState *bdrv_next_node(BlockDriverState *bs);
 BlockDriverState *bdrv_next(BlockDriverState *bs);
+BlockDriverState *bdrv_next_lock(BlockDriverState *bs);
+void bdrv_unlock(BlockDriverState *bs);
 int bdrv_is_encrypted(BlockDriverState *bs);
 int bdrv_key_required(BlockDriverState *bs);
 int bdrv_set_key(BlockDriverState *bs, const char *key);
diff --git a/migration/savevm.c b/migration/savevm.c
index dbcc39a..cf06a10 100644
--- a/migration/savevm.c
+++ b/migration/savevm.c
@@ -1240,8 +1240,9 @@  out:
 static BlockDriverState *find_vmstate_bs(void)
 {
     BlockDriverState *bs = NULL;
-    while ((bs = bdrv_next(bs))) {
+    while ((bs = bdrv_next_lock(bs))) {
         if (bdrv_can_snapshot(bs)) {
+            bdrv_unlock(bs);
             return bs;
         }
     }
@@ -1258,11 +1259,12 @@  static int del_existing_snapshots(Monitor *mon, const char *name)
     Error *err = NULL;
 
     bs = NULL;
-    while ((bs = bdrv_next(bs))) {
+    while ((bs = bdrv_next_lock(bs))) {
         if (bdrv_can_snapshot(bs) &&
             bdrv_snapshot_find(bs, snapshot, name) >= 0) {
             bdrv_snapshot_delete_by_id_or_name(bs, name, &err);
             if (err) {
+                bdrv_unlock(bs);
                 monitor_printf(mon,
                                "Error while deleting snapshot on device '%s':"
                                " %s\n",
@@ -1292,13 +1294,14 @@  void hmp_savevm(Monitor *mon, const QDict *qdict)
 
     /* Verify if there is a device that doesn't support snapshots and is writable */
     bs = NULL;
-    while ((bs = bdrv_next(bs))) {
+    while ((bs = bdrv_next_lock(bs))) {
 
         if (!bdrv_is_inserted(bs) || bdrv_is_read_only(bs)) {
             continue;
         }
 
         if (!bdrv_can_snapshot(bs)) {
+            bdrv_unlock(bs);
             monitor_printf(mon, "Device '%s' is writable but does not support snapshots.\n",
                                bdrv_get_device_name(bs));
             return;
@@ -1365,7 +1368,7 @@  void hmp_savevm(Monitor *mon, const QDict *qdict)
     /* create the snapshots */
 
     bs1 = NULL;
-    while ((bs1 = bdrv_next(bs1))) {
+    while ((bs1 = bdrv_next_lock(bs1))) {
         if (bdrv_can_snapshot(bs1)) {
             /* Write VM state size only to the image that contains the state */
             sn->vm_state_size = (bs == bs1 ? vm_state_size : 0);
@@ -1436,13 +1439,14 @@  int load_vmstate(const char *name)
     /* Verify if there is any device that doesn't support snapshots and is
     writable and check if the requested snapshot is available too. */
     bs = NULL;
-    while ((bs = bdrv_next(bs))) {
+    while ((bs = bdrv_next_lock(bs))) {
 
         if (!bdrv_is_inserted(bs) || bdrv_is_read_only(bs)) {
             continue;
         }
 
         if (!bdrv_can_snapshot(bs)) {
+            bdrv_unlock(bs);
             error_report("Device '%s' is writable but does not support snapshots.",
                                bdrv_get_device_name(bs));
             return -ENOTSUP;
@@ -1450,6 +1454,7 @@  int load_vmstate(const char *name)
 
         ret = bdrv_snapshot_find(bs, &sn, name);
         if (ret < 0) {
+            bdrv_unlock(bs);
             error_report("Device '%s' does not have the requested snapshot '%s'",
                            bdrv_get_device_name(bs), name);
             return ret;
@@ -1460,10 +1465,11 @@  int load_vmstate(const char *name)
     bdrv_drain_all();
 
     bs = NULL;
-    while ((bs = bdrv_next(bs))) {
+    while ((bs = bdrv_next_lock(bs))) {
         if (bdrv_can_snapshot(bs)) {
             ret = bdrv_snapshot_goto(bs, name);
             if (ret < 0) {
+                bdrv_unlock(bs);
                 error_report("Error %d while activating snapshot '%s' on '%s'",
                              ret, name, bdrv_get_device_name(bs));
                 return ret;
@@ -1504,7 +1510,7 @@  void hmp_delvm(Monitor *mon, const QDict *qdict)
     }
 
     bs = NULL;
-    while ((bs = bdrv_next(bs))) {
+    while ((bs = bdrv_next_lock(bs))) {
         if (bdrv_can_snapshot(bs)) {
             err = NULL;
             bdrv_snapshot_delete_by_id_or_name(bs, name, &err);
@@ -1552,10 +1558,11 @@  void hmp_info_snapshots(Monitor *mon, const QDict *qdict)
         available = 1;
         bs1 = NULL;
 
-        while ((bs1 = bdrv_next(bs1))) {
+        while ((bs1 = bdrv_next_lock(bs1))) {
             if (bdrv_can_snapshot(bs1) && bs1 != bs) {
                 ret = bdrv_snapshot_find(bs1, sn_info, sn->id_str);
                 if (ret < 0) {
+                    bdrv_unlock(bs);
                     available = 0;
                     break;
                 }
diff --git a/monitor.c b/monitor.c
index 301a143..ea1a917 100644
--- a/monitor.c
+++ b/monitor.c
@@ -3374,7 +3374,7 @@  static void vm_completion(ReadLineState *rs, const char *str)
 
     len = strlen(str);
     readline_set_completion_index(rs, len);
-    while ((bs = bdrv_next(bs))) {
+    while ((bs = bdrv_next_lock(bs))) {
         SnapshotInfoList *snapshots, *snapshot;
 
         if (!bdrv_can_snapshot(bs)) {