diff mbox series

[v3,2/3] KVM: keep track of running ioctls

Message ID 20221111154758.1372674-3-eesposit@redhat.com
State New
Headers show
Series KVM: allow listener to stop all vcpus before | expand

Commit Message

Emanuele Giuseppe Esposito Nov. 11, 2022, 3:47 p.m. UTC
Using the new accel-blocker API, mark where ioctls are being called
in KVM. Next, we will implement the critical section that will take
care of performing memslots modifications atomically, therefore
preventing any new ioctl from running and allowing the running ones
to finish.

Signed-off-by: David Hildenbrand <david@redhat.com>
Signed-off-by: Emanuele Giuseppe Esposito <eesposit@redhat.com>
---
 accel/kvm/kvm-all.c | 7 +++++++
 1 file changed, 7 insertions(+)

Comments

David Hildenbrand Nov. 17, 2022, 7:27 p.m. UTC | #1
On 11.11.22 16:47, Emanuele Giuseppe Esposito wrote:
> Using the new accel-blocker API, mark where ioctls are being called
> in KVM. Next, we will implement the critical section that will take
> care of performing memslots modifications atomically, therefore
> preventing any new ioctl from running and allowing the running ones
> to finish.
> 
> Signed-off-by: David Hildenbrand <david@redhat.com>

You might want to drop that and instead mention something like "This 
patch is based on a protoype patch by David Hildenbrand".

> Signed-off-by: Emanuele Giuseppe Esposito <eesposit@redhat.com>
> ---
>   accel/kvm/kvm-all.c | 7 +++++++
>   1 file changed, 7 insertions(+)
> 
> diff --git a/accel/kvm/kvm-all.c b/accel/kvm/kvm-all.c
> index f99b0becd8..ff660fd469 100644
> --- a/accel/kvm/kvm-all.c
> +++ b/accel/kvm/kvm-all.c
> @@ -2310,6 +2310,7 @@ static int kvm_init(MachineState *ms)
>       assert(TARGET_PAGE_SIZE <= qemu_real_host_page_size());
>   
>       s->sigmask_len = 8;
> +    accel_blocker_init();
>   
>   #ifdef KVM_CAP_SET_GUEST_DEBUG
>       QTAILQ_INIT(&s->kvm_sw_breakpoints);
> @@ -3014,7 +3015,9 @@ int kvm_vm_ioctl(KVMState *s, int type, ...)
>       va_end(ap);
>   
>       trace_kvm_vm_ioctl(type, arg);
> +    accel_ioctl_begin();
>       ret = ioctl(s->vmfd, type, arg);
> +    accel_ioctl_end();
>       if (ret == -1) {
>           ret = -errno;
>       }
> @@ -3032,7 +3035,9 @@ int kvm_vcpu_ioctl(CPUState *cpu, int type, ...)
>       va_end(ap);
>   
>       trace_kvm_vcpu_ioctl(cpu->cpu_index, type, arg);
> +    accel_cpu_ioctl_begin(cpu);
>       ret = ioctl(cpu->kvm_fd, type, arg);
> +    accel_cpu_ioctl_end(cpu);
>       if (ret == -1) {
>           ret = -errno;
>       }
> @@ -3050,7 +3055,9 @@ int kvm_device_ioctl(int fd, int type, ...)
>       va_end(ap);
>   
>       trace_kvm_device_ioctl(fd, type, arg);
> +    accel_ioctl_begin();
>       ret = ioctl(fd, type, arg);
> +    accel_ioctl_end();
>       if (ret == -1) {
>           ret = -errno;
>       }

I recall that I had some additional patches that tried to catch some of 
more calls:

https://lore.kernel.org/qemu-devel/20200312161217.3590-2-david@redhat.com/

https://lore.kernel.org/qemu-devel/20200312161217.3590-3-david@redhat.com/

Do they still apply? Is there more?
Emanuele Giuseppe Esposito Nov. 18, 2022, 9:53 a.m. UTC | #2
Am 17/11/2022 um 20:27 schrieb David Hildenbrand:
> On 11.11.22 16:47, Emanuele Giuseppe Esposito wrote:
>> Using the new accel-blocker API, mark where ioctls are being called
>> in KVM. Next, we will implement the critical section that will take
>> care of performing memslots modifications atomically, therefore
>> preventing any new ioctl from running and allowing the running ones
>> to finish.
>>
>> Signed-off-by: David Hildenbrand <david@redhat.com>
> 
> You might want to drop that and instead mention something like "This
> patch is based on a protoype patch by David Hildenbrand".
> 
>> Signed-off-by: Emanuele Giuseppe Esposito <eesposit@redhat.com>
>> ---
>>   accel/kvm/kvm-all.c | 7 +++++++
>>   1 file changed, 7 insertions(+)
>>
>> diff --git a/accel/kvm/kvm-all.c b/accel/kvm/kvm-all.c
>> index f99b0becd8..ff660fd469 100644
>> --- a/accel/kvm/kvm-all.c
>> +++ b/accel/kvm/kvm-all.c
>> @@ -2310,6 +2310,7 @@ static int kvm_init(MachineState *ms)
>>       assert(TARGET_PAGE_SIZE <= qemu_real_host_page_size());
>>         s->sigmask_len = 8;
>> +    accel_blocker_init();
>>     #ifdef KVM_CAP_SET_GUEST_DEBUG
>>       QTAILQ_INIT(&s->kvm_sw_breakpoints);
>> @@ -3014,7 +3015,9 @@ int kvm_vm_ioctl(KVMState *s, int type, ...)
>>       va_end(ap);
>>         trace_kvm_vm_ioctl(type, arg);
>> +    accel_ioctl_begin();
>>       ret = ioctl(s->vmfd, type, arg);
>> +    accel_ioctl_end();
>>       if (ret == -1) {
>>           ret = -errno;
>>       }
>> @@ -3032,7 +3035,9 @@ int kvm_vcpu_ioctl(CPUState *cpu, int type, ...)
>>       va_end(ap);
>>         trace_kvm_vcpu_ioctl(cpu->cpu_index, type, arg);
>> +    accel_cpu_ioctl_begin(cpu);
>>       ret = ioctl(cpu->kvm_fd, type, arg);
>> +    accel_cpu_ioctl_end(cpu);
>>       if (ret == -1) {
>>           ret = -errno;
>>       }
>> @@ -3050,7 +3055,9 @@ int kvm_device_ioctl(int fd, int type, ...)
>>       va_end(ap);
>>         trace_kvm_device_ioctl(fd, type, arg);
>> +    accel_ioctl_begin();
>>       ret = ioctl(fd, type, arg);
>> +    accel_ioctl_end();
>>       if (ret == -1) {
>>           ret = -errno;
>>       }
> 
> I recall that I had some additional patches that tried to catch some of
> more calls:
> 
> https://lore.kernel.org/qemu-devel/20200312161217.3590-2-david@redhat.com/
> 
> https://lore.kernel.org/qemu-devel/20200312161217.3590-3-david@redhat.com/
> 
> Do they still apply? Is there more?
> 

Apologies, what do you mean with "do they still apply"?
Looks fine to me

Emanuele
Robert Hoo Dec. 2, 2022, 6:54 a.m. UTC | #3
On Fri, 2022-11-11 at 10:47 -0500, Emanuele Giuseppe Esposito wrote:
> Using the new accel-blocker API, mark where ioctls are being called
> in KVM. Next, we will implement the critical section that will take
> care of performing memslots modifications atomically, therefore
> preventing any new ioctl from running and allowing the running ones
> to finish.
> 
> Signed-off-by: David Hildenbrand <david@redhat.com>
> Signed-off-by: Emanuele Giuseppe Esposito <eesposit@redhat.com>
> ---
>  accel/kvm/kvm-all.c | 7 +++++++
>  1 file changed, 7 insertions(+)
> 
> diff --git a/accel/kvm/kvm-all.c b/accel/kvm/kvm-all.c
> index f99b0becd8..ff660fd469 100644
> --- a/accel/kvm/kvm-all.c
> +++ b/accel/kvm/kvm-all.c
> @@ -2310,6 +2310,7 @@ static int kvm_init(MachineState *ms)
>      assert(TARGET_PAGE_SIZE <= qemu_real_host_page_size());
>  
>      s->sigmask_len = 8;
> +    accel_blocker_init();
>  
>  #ifdef KVM_CAP_SET_GUEST_DEBUG
>      QTAILQ_INIT(&s->kvm_sw_breakpoints);
> @@ -3014,7 +3015,9 @@ int kvm_vm_ioctl(KVMState *s, int type, ...)
>      va_end(ap);
>  
>      trace_kvm_vm_ioctl(type, arg);
> +    accel_ioctl_begin();
>      ret = ioctl(s->vmfd, type, arg);
> +    accel_ioctl_end();
>      if (ret == -1) {
>          ret = -errno;
>      }
> @@ -3032,7 +3035,9 @@ int kvm_vcpu_ioctl(CPUState *cpu, int type,
> ...)
>      va_end(ap);
>  
>      trace_kvm_vcpu_ioctl(cpu->cpu_index, type, arg);
> +    accel_cpu_ioctl_begin(cpu);

Does this mean that kvm_region_commit() can inhibit any other vcpus
doing any ioctls?

>      ret = ioctl(cpu->kvm_fd, type, arg);
> +    accel_cpu_ioctl_end(cpu);
>      if (ret == -1) {
>          ret = -errno;
>      }
> @@ -3050,7 +3055,9 @@ int kvm_device_ioctl(int fd, int type, ...)
>      va_end(ap);
>  
>      trace_kvm_device_ioctl(fd, type, arg);
> +    accel_ioctl_begin();
>      ret = ioctl(fd, type, arg);
> +    accel_ioctl_end();
>      if (ret == -1) {
>          ret = -errno;
>      }
Emanuele Giuseppe Esposito Dec. 2, 2022, 12:03 p.m. UTC | #4
Am 02/12/2022 um 07:54 schrieb Robert Hoo:
> On Fri, 2022-11-11 at 10:47 -0500, Emanuele Giuseppe Esposito wrote:
>> Using the new accel-blocker API, mark where ioctls are being called
>> in KVM. Next, we will implement the critical section that will take
>> care of performing memslots modifications atomically, therefore
>> preventing any new ioctl from running and allowing the running ones
>> to finish.
>>
>> Signed-off-by: David Hildenbrand <david@redhat.com>
>> Signed-off-by: Emanuele Giuseppe Esposito <eesposit@redhat.com>
>> ---
>>  accel/kvm/kvm-all.c | 7 +++++++
>>  1 file changed, 7 insertions(+)
>>
>> diff --git a/accel/kvm/kvm-all.c b/accel/kvm/kvm-all.c
>> index f99b0becd8..ff660fd469 100644
>> --- a/accel/kvm/kvm-all.c
>> +++ b/accel/kvm/kvm-all.c
>> @@ -2310,6 +2310,7 @@ static int kvm_init(MachineState *ms)
>>      assert(TARGET_PAGE_SIZE <= qemu_real_host_page_size());
>>  
>>      s->sigmask_len = 8;
>> +    accel_blocker_init();
>>  
>>  #ifdef KVM_CAP_SET_GUEST_DEBUG
>>      QTAILQ_INIT(&s->kvm_sw_breakpoints);
>> @@ -3014,7 +3015,9 @@ int kvm_vm_ioctl(KVMState *s, int type, ...)
>>      va_end(ap);
>>  
>>      trace_kvm_vm_ioctl(type, arg);
>> +    accel_ioctl_begin();
>>      ret = ioctl(s->vmfd, type, arg);
>> +    accel_ioctl_end();
>>      if (ret == -1) {
>>          ret = -errno;
>>      }
>> @@ -3032,7 +3035,9 @@ int kvm_vcpu_ioctl(CPUState *cpu, int type,
>> ...)
>>      va_end(ap);
>>  
>>      trace_kvm_vcpu_ioctl(cpu->cpu_index, type, arg);
>> +    accel_cpu_ioctl_begin(cpu);
> 
> Does this mean that kvm_region_commit() can inhibit any other vcpus
> doing any ioctls?

Yes, because we must prevent any vcpu from reading memslots while we are
updating them.

> 
>>      ret = ioctl(cpu->kvm_fd, type, arg);
>> +    accel_cpu_ioctl_end(cpu);
>>      if (ret == -1) {
>>          ret = -errno;
>>      }
>> @@ -3050,7 +3055,9 @@ int kvm_device_ioctl(int fd, int type, ...)
>>      va_end(ap);
>>  
>>      trace_kvm_device_ioctl(fd, type, arg);
>> +    accel_ioctl_begin();
>>      ret = ioctl(fd, type, arg);
>> +    accel_ioctl_end();
>>      if (ret == -1) {
>>          ret = -errno;
>>      }
>
Robert Hoo Dec. 2, 2022, 1:32 p.m. UTC | #5
On Fri, 2022-12-02 at 13:03 +0100, Emanuele Giuseppe Esposito wrote:
...
> > > @@ -3032,7 +3035,9 @@ int kvm_vcpu_ioctl(CPUState *cpu, int type,
> > > ...)
> > >      va_end(ap);
> > >  
> > >      trace_kvm_vcpu_ioctl(cpu->cpu_index, type, arg);
> > > +    accel_cpu_ioctl_begin(cpu);
> > 
> > Does this mean that kvm_region_commit() can inhibit any other vcpus
> > doing any ioctls?
> 
> Yes, because we must prevent any vcpu from reading memslots while we
> are
> updating them.
> 
But do most other vm/vcpu ioctls contend with memslot operations?
Emanuele Giuseppe Esposito Dec. 2, 2022, 2:32 p.m. UTC | #6
Am 02/12/2022 um 14:32 schrieb Robert Hoo:
> On Fri, 2022-12-02 at 13:03 +0100, Emanuele Giuseppe Esposito wrote:
> ...
>>>> @@ -3032,7 +3035,9 @@ int kvm_vcpu_ioctl(CPUState *cpu, int type,
>>>> ...)
>>>>      va_end(ap);
>>>>  
>>>>      trace_kvm_vcpu_ioctl(cpu->cpu_index, type, arg);
>>>> +    accel_cpu_ioctl_begin(cpu);
>>>
>>> Does this mean that kvm_region_commit() can inhibit any other vcpus
>>> doing any ioctls?
>>
>> Yes, because we must prevent any vcpu from reading memslots while we
>> are
>> updating them.
>>
> But do most other vm/vcpu ioctls contend with memslot operations?
> 

I think this is the simplest way. I agree not all ioctls contend with
memslot operations, but there are also not so many memslot operations
too. Instead of going one by one in all possible ioctls, covering all of
them is the simplest way and it covers also the case of a new ioctl
reading memslots that could be added in the future (alternatively we
would be always updating the list of ioctls to block).
diff mbox series

Patch

diff --git a/accel/kvm/kvm-all.c b/accel/kvm/kvm-all.c
index f99b0becd8..ff660fd469 100644
--- a/accel/kvm/kvm-all.c
+++ b/accel/kvm/kvm-all.c
@@ -2310,6 +2310,7 @@  static int kvm_init(MachineState *ms)
     assert(TARGET_PAGE_SIZE <= qemu_real_host_page_size());
 
     s->sigmask_len = 8;
+    accel_blocker_init();
 
 #ifdef KVM_CAP_SET_GUEST_DEBUG
     QTAILQ_INIT(&s->kvm_sw_breakpoints);
@@ -3014,7 +3015,9 @@  int kvm_vm_ioctl(KVMState *s, int type, ...)
     va_end(ap);
 
     trace_kvm_vm_ioctl(type, arg);
+    accel_ioctl_begin();
     ret = ioctl(s->vmfd, type, arg);
+    accel_ioctl_end();
     if (ret == -1) {
         ret = -errno;
     }
@@ -3032,7 +3035,9 @@  int kvm_vcpu_ioctl(CPUState *cpu, int type, ...)
     va_end(ap);
 
     trace_kvm_vcpu_ioctl(cpu->cpu_index, type, arg);
+    accel_cpu_ioctl_begin(cpu);
     ret = ioctl(cpu->kvm_fd, type, arg);
+    accel_cpu_ioctl_end(cpu);
     if (ret == -1) {
         ret = -errno;
     }
@@ -3050,7 +3055,9 @@  int kvm_device_ioctl(int fd, int type, ...)
     va_end(ap);
 
     trace_kvm_device_ioctl(fd, type, arg);
+    accel_ioctl_begin();
     ret = ioctl(fd, type, arg);
+    accel_ioctl_end();
     if (ret == -1) {
         ret = -errno;
     }