diff mbox series

[v3,17/22] virtio-pmem: prototype

Message ID 20180920103243.28474-18-david@redhat.com
State New
Headers show
Series memory-device: complete refactoring + virtio-pmem | expand

Commit Message

David Hildenbrand Sept. 20, 2018, 10:32 a.m. UTC
From: Pankaj Gupta <pagupta@redhat.com>

This is the current protoype of virtio-pmem. Support will require
machine changes for the architectures that will support it, so it will
not yet be compiled.

Signed-off-by: Pankaj Gupta <pagupta@redhat.com>
[ MemoryDevice/MemoryRegion changes, cleanups, addr property "memaddr",
  split up patches ]
Signed-off-by: David Hildenbrand <david@redhat.com>
---
 hw/virtio/Makefile.objs                     |   1 +
 hw/virtio/virtio-pmem.c                     | 224 ++++++++++++++++++++
 include/hw/virtio/virtio-pmem.h             |  40 ++++
 include/standard-headers/linux/virtio_ids.h |   1 +
 qapi/misc.json                              |  26 ++-
 5 files changed, 291 insertions(+), 1 deletion(-)
 create mode 100644 hw/virtio/virtio-pmem.c
 create mode 100644 include/hw/virtio/virtio-pmem.h

Comments

Markus Armbruster Sept. 21, 2018, 8:07 a.m. UTC | #1
Quick review of just the QAPI part.

David Hildenbrand <david@redhat.com> writes:

> From: Pankaj Gupta <pagupta@redhat.com>
>
> This is the current protoype of virtio-pmem. Support will require
> machine changes for the architectures that will support it, so it will
> not yet be compiled.
>
> Signed-off-by: Pankaj Gupta <pagupta@redhat.com>
> [ MemoryDevice/MemoryRegion changes, cleanups, addr property "memaddr",
>   split up patches ]
> Signed-off-by: David Hildenbrand <david@redhat.com>
[...]
> diff --git a/qapi/misc.json b/qapi/misc.json
> index 7c36de0464..cadbca26ac 100644
> --- a/qapi/misc.json
> +++ b/qapi/misc.json
> @@ -2907,6 +2907,29 @@
>            }
>  }
>  
> +##
> +# @VirtioPMemDeviceInfo:
> +#
> +# VirtioPMem state information
> +#
> +# @id: device's ID
> +#
> +# @memaddr: physical address in memory, where device is mapped
> +#
> +# @size: size of memory that the device provides
> +#
> +# @memdev: memory backend linked with device
> +#
> +# Since: 3.1
> +##
> +{ 'struct': 'VirtioPMemDeviceInfo',
> +  'data': { '*id': 'str',
> +            'memaddr': 'size',
> +            'size': 'size',
> +            'memdev': 'str'
> +          }
> +}

This set of members is a proper subset of PCDIMMDeviceInfo's, except

* It uses 'size' instead of 'int', which is an improvement.  Improve
  PCDIMMDeviceInfo as well?

* The physical address member is called 'memaddr' instead of 'addr'.
  Why?

> +
>  ##
>  # @MemoryDeviceInfo:
>  #
> @@ -2916,7 +2939,8 @@
>  ##
>  { 'union': 'MemoryDeviceInfo',
>    'data': { 'dimm': 'PCDIMMDeviceInfo',
> -            'nvdimm': 'PCDIMMDeviceInfo'
> +            'nvdimm': 'PCDIMMDeviceInfo',
> +            'virtio-pmem': 'VirtioPMemDeviceInfo'
>            }
>  }
David Hildenbrand Sept. 21, 2018, 8:23 a.m. UTC | #2
On 21/09/2018 10:07, Markus Armbruster wrote:
> Quick review of just the QAPI part.
> 
> David Hildenbrand <david@redhat.com> writes:
> 
>> From: Pankaj Gupta <pagupta@redhat.com>
>>
>> This is the current protoype of virtio-pmem. Support will require
>> machine changes for the architectures that will support it, so it will
>> not yet be compiled.
>>
>> Signed-off-by: Pankaj Gupta <pagupta@redhat.com>
>> [ MemoryDevice/MemoryRegion changes, cleanups, addr property "memaddr",
>>   split up patches ]
>> Signed-off-by: David Hildenbrand <david@redhat.com>
> [...]
>> diff --git a/qapi/misc.json b/qapi/misc.json
>> index 7c36de0464..cadbca26ac 100644
>> --- a/qapi/misc.json
>> +++ b/qapi/misc.json
>> @@ -2907,6 +2907,29 @@
>>            }
>>  }
>>  
>> +##
>> +# @VirtioPMemDeviceInfo:
>> +#
>> +# VirtioPMem state information
>> +#
>> +# @id: device's ID
>> +#
>> +# @memaddr: physical address in memory, where device is mapped
>> +#
>> +# @size: size of memory that the device provides
>> +#
>> +# @memdev: memory backend linked with device
>> +#
>> +# Since: 3.1
>> +##
>> +{ 'struct': 'VirtioPMemDeviceInfo',
>> +  'data': { '*id': 'str',
>> +            'memaddr': 'size',
>> +            'size': 'size',
>> +            'memdev': 'str'
>> +          }
>> +}
> 
> This set of members is a proper subset of PCDIMMDeviceInfo's, except
> 
> * It uses 'size' instead of 'int', which is an improvement.  Improve
>   PCDIMMDeviceInfo as well?

That certainly makes sense.

@Pankaj do you want to take care of that?

> 
> * The physical address member is called 'memaddr' instead of 'addr'.
>   Why?
> 

Very good point. Have a look at "memory-device: add device class
function set_addr()" (patch #10).

Summary: The property name on the device will be called "memaddr", as
"addr" is already (unfortunately) used for virtio addressing, that's why
I feel like we should name it here "memaddr", too.

("addr" is too generic, a collision had to happen :( )
Pankaj Gupta Sept. 21, 2018, 8:56 a.m. UTC | #3
> 
> On 21/09/2018 10:07, Markus Armbruster wrote:
> > Quick review of just the QAPI part.
> > 
> > David Hildenbrand <david@redhat.com> writes:
> > 
> >> From: Pankaj Gupta <pagupta@redhat.com>
> >>
> >> This is the current protoype of virtio-pmem. Support will require
> >> machine changes for the architectures that will support it, so it will
> >> not yet be compiled.
> >>
> >> Signed-off-by: Pankaj Gupta <pagupta@redhat.com>
> >> [ MemoryDevice/MemoryRegion changes, cleanups, addr property "memaddr",
> >>   split up patches ]
> >> Signed-off-by: David Hildenbrand <david@redhat.com>
> > [...]
> >> diff --git a/qapi/misc.json b/qapi/misc.json
> >> index 7c36de0464..cadbca26ac 100644
> >> --- a/qapi/misc.json
> >> +++ b/qapi/misc.json
> >> @@ -2907,6 +2907,29 @@
> >>            }
> >>  }
> >>  
> >> +##
> >> +# @VirtioPMemDeviceInfo:
> >> +#
> >> +# VirtioPMem state information
> >> +#
> >> +# @id: device's ID
> >> +#
> >> +# @memaddr: physical address in memory, where device is mapped
> >> +#
> >> +# @size: size of memory that the device provides
> >> +#
> >> +# @memdev: memory backend linked with device
> >> +#
> >> +# Since: 3.1
> >> +##
> >> +{ 'struct': 'VirtioPMemDeviceInfo',
> >> +  'data': { '*id': 'str',
> >> +            'memaddr': 'size',
> >> +            'size': 'size',
> >> +            'memdev': 'str'
> >> +          }
> >> +}
> > 
> > This set of members is a proper subset of PCDIMMDeviceInfo's, except
> > 
> > * It uses 'size' instead of 'int', which is an improvement.  Improve
> >   PCDIMMDeviceInfo as well?
> 
> That certainly makes sense.
> 
> @Pankaj do you want to take care of that?

Sure.

Thanks,
Pankaj

> 
> > 
> > * The physical address member is called 'memaddr' instead of 'addr'.
> >   Why?
> > 
> 
> Very good point. Have a look at "memory-device: add device class
> function set_addr()" (patch #10).
> 
> Summary: The property name on the device will be called "memaddr", as
> "addr" is already (unfortunately) used for virtio addressing, that's why
> I feel like we should name it here "memaddr", too.
> 
> ("addr" is too generic, a collision had to happen :( )
> 
> --
> 
> Thanks,
> 
> David / dhildenb
>
Markus Armbruster Sept. 21, 2018, 12:28 p.m. UTC | #4
David Hildenbrand <david@redhat.com> writes:

> On 21/09/2018 10:07, Markus Armbruster wrote:
>> Quick review of just the QAPI part.
>> 
>> David Hildenbrand <david@redhat.com> writes:
>> 
>>> From: Pankaj Gupta <pagupta@redhat.com>
>>>
>>> This is the current protoype of virtio-pmem. Support will require
>>> machine changes for the architectures that will support it, so it will
>>> not yet be compiled.
>>>
>>> Signed-off-by: Pankaj Gupta <pagupta@redhat.com>
>>> [ MemoryDevice/MemoryRegion changes, cleanups, addr property "memaddr",
>>>   split up patches ]
>>> Signed-off-by: David Hildenbrand <david@redhat.com>
>> [...]
>>> diff --git a/qapi/misc.json b/qapi/misc.json
>>> index 7c36de0464..cadbca26ac 100644
>>> --- a/qapi/misc.json
>>> +++ b/qapi/misc.json
>>> @@ -2907,6 +2907,29 @@
>>>            }
>>>  }
>>>  
>>> +##
>>> +# @VirtioPMemDeviceInfo:
>>> +#
>>> +# VirtioPMem state information
>>> +#
>>> +# @id: device's ID
>>> +#
>>> +# @memaddr: physical address in memory, where device is mapped
>>> +#
>>> +# @size: size of memory that the device provides
>>> +#
>>> +# @memdev: memory backend linked with device
>>> +#
>>> +# Since: 3.1
>>> +##
>>> +{ 'struct': 'VirtioPMemDeviceInfo',
>>> +  'data': { '*id': 'str',
>>> +            'memaddr': 'size',
>>> +            'size': 'size',
>>> +            'memdev': 'str'
>>> +          }
>>> +}
>> 
>> This set of members is a proper subset of PCDIMMDeviceInfo's, except
>> 
>> * It uses 'size' instead of 'int', which is an improvement.  Improve
>>   PCDIMMDeviceInfo as well?
>
> That certainly makes sense.
>
> @Pankaj do you want to take care of that?
>
>> 
>> * The physical address member is called 'memaddr' instead of 'addr'.
>>   Why?
>> 
>
> Very good point. Have a look at "memory-device: add device class
> function set_addr()" (patch #10).
>
> Summary: The property name on the device will be called "memaddr", as
> "addr" is already (unfortunately) used for virtio addressing, that's why
> I feel like we should name it here "memaddr", too.
>
> ("addr" is too generic, a collision had to happen :( )

Hmm.  Let's see whether I understand.

Existing device "pc-dimm" has property "addr", and it's the physical
address.

Abstract device "pci-device" has property "addr", and it's the PCI
address.

Device "virtio-pmem-pci" is a "virtio-pci" is a "pci-device".  It thus
inherits "addr".  You therefore can't name the physical address property
"addr", and name it "memaddr" instead.

There is no such clash in VirtioPMemDeviceInfo.  You could name the
member "addr" there.  But that would trade the inconsistency with
PCDIMMDeviceInfo for an inconsistency with the device property.

Is this correct?
David Hildenbrand Sept. 21, 2018, 12:32 p.m. UTC | #5
On 21/09/2018 14:28, Markus Armbruster wrote:
> David Hildenbrand <david@redhat.com> writes:
> 
>> On 21/09/2018 10:07, Markus Armbruster wrote:
>>> Quick review of just the QAPI part.
>>>
>>> David Hildenbrand <david@redhat.com> writes:
>>>
>>>> From: Pankaj Gupta <pagupta@redhat.com>
>>>>
>>>> This is the current protoype of virtio-pmem. Support will require
>>>> machine changes for the architectures that will support it, so it will
>>>> not yet be compiled.
>>>>
>>>> Signed-off-by: Pankaj Gupta <pagupta@redhat.com>
>>>> [ MemoryDevice/MemoryRegion changes, cleanups, addr property "memaddr",
>>>>   split up patches ]
>>>> Signed-off-by: David Hildenbrand <david@redhat.com>
>>> [...]
>>>> diff --git a/qapi/misc.json b/qapi/misc.json
>>>> index 7c36de0464..cadbca26ac 100644
>>>> --- a/qapi/misc.json
>>>> +++ b/qapi/misc.json
>>>> @@ -2907,6 +2907,29 @@
>>>>            }
>>>>  }
>>>>  
>>>> +##
>>>> +# @VirtioPMemDeviceInfo:
>>>> +#
>>>> +# VirtioPMem state information
>>>> +#
>>>> +# @id: device's ID
>>>> +#
>>>> +# @memaddr: physical address in memory, where device is mapped
>>>> +#
>>>> +# @size: size of memory that the device provides
>>>> +#
>>>> +# @memdev: memory backend linked with device
>>>> +#
>>>> +# Since: 3.1
>>>> +##
>>>> +{ 'struct': 'VirtioPMemDeviceInfo',
>>>> +  'data': { '*id': 'str',
>>>> +            'memaddr': 'size',
>>>> +            'size': 'size',
>>>> +            'memdev': 'str'
>>>> +          }
>>>> +}
>>>
>>> This set of members is a proper subset of PCDIMMDeviceInfo's, except
>>>
>>> * It uses 'size' instead of 'int', which is an improvement.  Improve
>>>   PCDIMMDeviceInfo as well?
>>
>> That certainly makes sense.
>>
>> @Pankaj do you want to take care of that?
>>
>>>
>>> * The physical address member is called 'memaddr' instead of 'addr'.
>>>   Why?
>>>
>>
>> Very good point. Have a look at "memory-device: add device class
>> function set_addr()" (patch #10).
>>
>> Summary: The property name on the device will be called "memaddr", as
>> "addr" is already (unfortunately) used for virtio addressing, that's why
>> I feel like we should name it here "memaddr", too.
>>
>> ("addr" is too generic, a collision had to happen :( )
> 
> Hmm.  Let's see whether I understand.
> 
> Existing device "pc-dimm" has property "addr", and it's the physical
> address.
> 
> Abstract device "pci-device" has property "addr", and it's the PCI
> address.
> 
> Device "virtio-pmem-pci" is a "virtio-pci" is a "pci-device".  It thus
> inherits "addr".  You therefore can't name the physical address property
> "addr", and name it "memaddr" instead.

Almost correct.

virtio-pmem-pci is a proxy of virtio-pci and aliases all properties
(that part is confusing). So all properties of virtio-pci become
properties of virtio-pmem-pci. And the user only configures
virtio-pmem-pci via the properties.

> 
> There is no such clash in VirtioPMemDeviceInfo.  You could name the
> member "addr" there.  But that would trade the inconsistency with
> PCDIMMDeviceInfo for an inconsistency with the device property.
> 
> Is this correct?
> 

Yes, I chose to name it like the property. (that felt to be the right
thing). As far as I see this is perfectly fine. It's unfortunate but we
can't do anything about it.
Markus Armbruster Sept. 21, 2018, 1:39 p.m. UTC | #6
David Hildenbrand <david@redhat.com> writes:

> On 21/09/2018 14:28, Markus Armbruster wrote:
>> David Hildenbrand <david@redhat.com> writes:
>> 
>>> On 21/09/2018 10:07, Markus Armbruster wrote:
>>>> Quick review of just the QAPI part.
>>>>
>>>> David Hildenbrand <david@redhat.com> writes:
>>>>
>>>>> From: Pankaj Gupta <pagupta@redhat.com>
>>>>>
>>>>> This is the current protoype of virtio-pmem. Support will require
>>>>> machine changes for the architectures that will support it, so it will
>>>>> not yet be compiled.
>>>>>
>>>>> Signed-off-by: Pankaj Gupta <pagupta@redhat.com>
>>>>> [ MemoryDevice/MemoryRegion changes, cleanups, addr property "memaddr",
>>>>>   split up patches ]
>>>>> Signed-off-by: David Hildenbrand <david@redhat.com>
>>>> [...]
>>>>> diff --git a/qapi/misc.json b/qapi/misc.json
>>>>> index 7c36de0464..cadbca26ac 100644
>>>>> --- a/qapi/misc.json
>>>>> +++ b/qapi/misc.json
>>>>> @@ -2907,6 +2907,29 @@
>>>>>            }
>>>>>  }
>>>>>  
>>>>> +##
>>>>> +# @VirtioPMemDeviceInfo:
>>>>> +#
>>>>> +# VirtioPMem state information
>>>>> +#
>>>>> +# @id: device's ID
>>>>> +#
>>>>> +# @memaddr: physical address in memory, where device is mapped
>>>>> +#
>>>>> +# @size: size of memory that the device provides
>>>>> +#
>>>>> +# @memdev: memory backend linked with device
>>>>> +#
>>>>> +# Since: 3.1
>>>>> +##
>>>>> +{ 'struct': 'VirtioPMemDeviceInfo',
>>>>> +  'data': { '*id': 'str',
>>>>> +            'memaddr': 'size',
>>>>> +            'size': 'size',
>>>>> +            'memdev': 'str'
>>>>> +          }
>>>>> +}
>>>>
>>>> This set of members is a proper subset of PCDIMMDeviceInfo's, except
>>>>
>>>> * It uses 'size' instead of 'int', which is an improvement.  Improve
>>>>   PCDIMMDeviceInfo as well?
>>>
>>> That certainly makes sense.
>>>
>>> @Pankaj do you want to take care of that?
>>>
>>>>
>>>> * The physical address member is called 'memaddr' instead of 'addr'.
>>>>   Why?
>>>>
>>>
>>> Very good point. Have a look at "memory-device: add device class
>>> function set_addr()" (patch #10).
>>>
>>> Summary: The property name on the device will be called "memaddr", as
>>> "addr" is already (unfortunately) used for virtio addressing, that's why
>>> I feel like we should name it here "memaddr", too.
>>>
>>> ("addr" is too generic, a collision had to happen :( )
>> 
>> Hmm.  Let's see whether I understand.
>> 
>> Existing device "pc-dimm" has property "addr", and it's the physical
>> address.
>> 
>> Abstract device "pci-device" has property "addr", and it's the PCI
>> address.
>> 
>> Device "virtio-pmem-pci" is a "virtio-pci" is a "pci-device".  It thus
>> inherits "addr".  You therefore can't name the physical address property
>> "addr", and name it "memaddr" instead.
>
> Almost correct.
>
> virtio-pmem-pci is a proxy of virtio-pci and aliases all properties
> (that part is confusing). So all properties of virtio-pci become
> properties of virtio-pmem-pci. And the user only configures
> virtio-pmem-pci via the properties.
>
>> 
>> There is no such clash in VirtioPMemDeviceInfo.  You could name the
>> member "addr" there.  But that would trade the inconsistency with
>> PCDIMMDeviceInfo for an inconsistency with the device property.
>> 
>> Is this correct?
>> 
>
> Yes, I chose to name it like the property. (that felt to be the right
> thing). As far as I see this is perfectly fine.

Now work that into the commit message or perhaps a comment, please :)

>                                                 It's unfortunate but we
> can't do anything about it.

Well, if we really, really wanted to, we could: rename pc-dimm's
property, keep the old name as a deprecated alias.  Would that be better
than the inconsistency, and the code you need to work around it?  You
decide.
David Hildenbrand Sept. 21, 2018, 2:09 p.m. UTC | #7
>>>
>>> There is no such clash in VirtioPMemDeviceInfo.  You could name the
>>> member "addr" there.  But that would trade the inconsistency with
>>> PCDIMMDeviceInfo for an inconsistency with the device property.
>>>
>>> Is this correct?
>>>
>>
>> Yes, I chose to name it like the property. (that felt to be the right
>> thing). As far as I see this is perfectly fine.
> 
> Now work that into the commit message or perhaps a comment, please :)

Yes, once this get's picked up in the !RFC version of virtio-pmem, we
should highlight that (and best, split VirtioPMemDeviceInfo introduction
off from the remaining part).

> 
>>                                                 It's unfortunate but we
>> can't do anything about it.
> 
> Well, if we really, really wanted to, we could: rename pc-dimm's
> property, keep the old name as a deprecated alias.  Would that be better
> than the inconsistency, and the code you need to work around it?  You
> decide.
> 

I had the same in mind initially but considered it not useful right now.
There is not that much code to work around that (it's basically just two
functions get_addr() and set_addr() on the memory device interface), and
this way we can at least keep property setting/getting code out of
memory-device code (I don't consider properties useful when it comes to
internal interfaces).

If we ever see a problem with that, we can introduce the alias (+
optionally deprecate) later.

Thanks!
David Gibson Sept. 24, 2018, 5:45 a.m. UTC | #8
On Thu, Sep 20, 2018 at 12:32:38PM +0200, David Hildenbrand wrote:
> From: Pankaj Gupta <pagupta@redhat.com>
> 
> This is the current protoype of virtio-pmem. Support will require
> machine changes for the architectures that will support it, so it will
> not yet be compiled.
> 
> Signed-off-by: Pankaj Gupta <pagupta@redhat.com>
> [ MemoryDevice/MemoryRegion changes, cleanups, addr property "memaddr",
>   split up patches ]
> Signed-off-by: David Hildenbrand <david@redhat.com>

Reviewed-by: David Gibson <david@gibson.dropbear.id.au>

Seems generally sane.  Is there a (craft?) virtio-pmem spec around to
see what this is actually trying to implement?

And one nit..

[snip]
> +static int worker_cb(void *opaque)
> +{
> +    VirtIODeviceRequest *req = opaque;
> +    int err = 0;
> +
> +    printf("\n performing flush ...");

.. I assume the plan is to remove these debugging prints..

> +    /* flush raw backing image */
> +    err = fsync(req->fd);
> +    printf("\n performed flush ...:errcode::%d", err);
> +    if (err != 0) {
> +        err = EIO;
> +    }
> +    req->resp.ret = err;
> +
> +    return 0;
> +}
David Hildenbrand Sept. 24, 2018, 8:06 a.m. UTC | #9
On 24/09/2018 07:45, David Gibson wrote:
> On Thu, Sep 20, 2018 at 12:32:38PM +0200, David Hildenbrand wrote:
>> From: Pankaj Gupta <pagupta@redhat.com>
>>
>> This is the current protoype of virtio-pmem. Support will require
>> machine changes for the architectures that will support it, so it will
>> not yet be compiled.
>>
>> Signed-off-by: Pankaj Gupta <pagupta@redhat.com>
>> [ MemoryDevice/MemoryRegion changes, cleanups, addr property "memaddr",
>>   split up patches ]
>> Signed-off-by: David Hildenbrand <david@redhat.com>
> 
> Reviewed-by: David Gibson <david@gibson.dropbear.id.au>
> 
> Seems generally sane.  Is there a (craft?) virtio-pmem spec around to
> see what this is actually trying to implement?

I am not aware of any. The first goal is to get something implemented
that works and doesn't break important concepts. E.g. hotplugging memory
devices was one of these concepts that Igor saw as a potential problem
for getting virtio-pmem upstream.

But maybe Pankaj already has some draft version of a spec lying around.

> 
> And one nit..
> 
> [snip]
>> +static int worker_cb(void *opaque)
>> +{
>> +    VirtIODeviceRequest *req = opaque;
>> +    int err = 0;
>> +
>> +    printf("\n performing flush ...");
> 
> .. I assume the plan is to remove these debugging prints..
> 

Yes, these are still in place for verifying that the machinery works.
Pankaj Gupta Sept. 24, 2018, 9:48 a.m. UTC | #10
> On 24/09/2018 07:45, David Gibson wrote:
> > On Thu, Sep 20, 2018 at 12:32:38PM +0200, David Hildenbrand wrote:
> >> From: Pankaj Gupta <pagupta@redhat.com>
> >>
> >> This is the current protoype of virtio-pmem. Support will require
> >> machine changes for the architectures that will support it, so it will
> >> not yet be compiled.
> >>
> >> Signed-off-by: Pankaj Gupta <pagupta@redhat.com>
> >> [ MemoryDevice/MemoryRegion changes, cleanups, addr property "memaddr",
> >>   split up patches ]
> >> Signed-off-by: David Hildenbrand <david@redhat.com>
> > 
> > Reviewed-by: David Gibson <david@gibson.dropbear.id.au>
> > 
> > Seems generally sane.  Is there a (craft?) virtio-pmem spec around to
> > see what this is actually trying to implement?
> 
> I am not aware of any. The first goal is to get something implemented
> that works and doesn't break important concepts. E.g. hotplugging memory
> devices was one of these concepts that Igor saw as a potential problem
> for getting virtio-pmem upstream.
> 
> But maybe Pankaj already has some draft version of a spec lying around.

I have created document but no official spec. Project idea is shared here [1] 
and kernel part of series [2]. I will create a spec and share.

[1] https://www.spinics.net/lists/kvm/msg153095.html
[2] https://marc.info/?l=kvm&m=153572224719212&w=2

Thanks,
Pankaj
 
> 
> > 
> > And one nit..
> > 
> > [snip]
> >> +static int worker_cb(void *opaque)
> >> +{
> >> +    VirtIODeviceRequest *req = opaque;
> >> +    int err = 0;
> >> +
> >> +    printf("\n performing flush ...");
> > 
> > .. I assume the plan is to remove these debugging prints..
> > 
> 
> Yes, these are still in place for verifying that the machinery works.
> 
> --
> 
> Thanks,
> 
> David / dhildenb
> 
>
David Gibson Oct. 8, 2018, 2:04 a.m. UTC | #11
On Mon, Sep 24, 2018 at 05:48:14AM -0400, Pankaj Gupta wrote:
> 
> > On 24/09/2018 07:45, David Gibson wrote:
> > > On Thu, Sep 20, 2018 at 12:32:38PM +0200, David Hildenbrand wrote:
> > >> From: Pankaj Gupta <pagupta@redhat.com>
> > >>
> > >> This is the current protoype of virtio-pmem. Support will require
> > >> machine changes for the architectures that will support it, so it will
> > >> not yet be compiled.
> > >>
> > >> Signed-off-by: Pankaj Gupta <pagupta@redhat.com>
> > >> [ MemoryDevice/MemoryRegion changes, cleanups, addr property "memaddr",
> > >>   split up patches ]
> > >> Signed-off-by: David Hildenbrand <david@redhat.com>
> > > 
> > > Reviewed-by: David Gibson <david@gibson.dropbear.id.au>
> > > 
> > > Seems generally sane.  Is there a (craft?) virtio-pmem spec around to
> > > see what this is actually trying to implement?
> > 
> > I am not aware of any. The first goal is to get something implemented
> > that works and doesn't break important concepts. E.g. hotplugging memory
> > devices was one of these concepts that Igor saw as a potential problem
> > for getting virtio-pmem upstream.
> > 
> > But maybe Pankaj already has some draft version of a spec lying around.
> 
> I have created document but no official spec. Project idea is shared here [1] 
> and kernel part of series [2]. I will create a spec and share.
> 
> [1] https://www.spinics.net/lists/kvm/msg153095.html
> [2] https://marc.info/?l=kvm&m=153572224719212&w=2

Ok.  Because this creates a new interface we could be working with for
a while, I think we do need at least a semi-formal spec - and to get
some review of that spec - before we merge.
Pankaj Gupta Oct. 8, 2018, 3:50 a.m. UTC | #12
Hello David,

Thanks for the review.

> > 
> > > On 24/09/2018 07:45, David Gibson wrote:
> > > > On Thu, Sep 20, 2018 at 12:32:38PM +0200, David Hildenbrand wrote:
> > > >> From: Pankaj Gupta <pagupta@redhat.com>
> > > >>
> > > >> This is the current protoype of virtio-pmem. Support will require
> > > >> machine changes for the architectures that will support it, so it will
> > > >> not yet be compiled.
> > > >>
> > > >> Signed-off-by: Pankaj Gupta <pagupta@redhat.com>
> > > >> [ MemoryDevice/MemoryRegion changes, cleanups, addr property
> > > >> "memaddr",
> > > >>   split up patches ]
> > > >> Signed-off-by: David Hildenbrand <david@redhat.com>
> > > > 
> > > > Reviewed-by: David Gibson <david@gibson.dropbear.id.au>
> > > > 
> > > > Seems generally sane.  Is there a (craft?) virtio-pmem spec around to
> > > > see what this is actually trying to implement?
> > > 
> > > I am not aware of any. The first goal is to get something implemented
> > > that works and doesn't break important concepts. E.g. hotplugging memory
> > > devices was one of these concepts that Igor saw as a potential problem
> > > for getting virtio-pmem upstream.
> > > 
> > > But maybe Pankaj already has some draft version of a spec lying around.
> > 
> > I have created document but no official spec. Project idea is shared here
> > [1]
> > and kernel part of series [2]. I will create a spec and share.
> > 
> > [1] https://www.spinics.net/lists/kvm/msg153095.html
> > [2] https://marc.info/?l=kvm&m=153572224719212&w=2
> 
> Ok.  Because this creates a new interface we could be working with for
> a while, I think we do need at least a semi-formal spec - and to get
> some review of that spec - before we merge.

Sure, Will create one and try to send for review this week.

Thanks,
Pankaj

> 
> --
> David Gibson			| I'll have my music baroque, and my code
> david AT gibson.dropbear.id.au	| minimalist, thank you.  NOT _the_ _other_
> 				| _way_ _around_!
> http://www.ozlabs.org/~dgibson
>
diff mbox series

Patch

diff --git a/hw/virtio/Makefile.objs b/hw/virtio/Makefile.objs
index 1b2799cfd8..75cdd90264 100644
--- a/hw/virtio/Makefile.objs
+++ b/hw/virtio/Makefile.objs
@@ -5,6 +5,7 @@  obj-y += virtio.o
 common-obj-$(CONFIG_VIRTIO_RNG) += virtio-rng.o
 common-obj-$(CONFIG_VIRTIO_PCI) += virtio-pci.o
 common-obj-$(CONFIG_VIRTIO_MMIO) += virtio-mmio.o
+obj-$(call land,$(CONFIG_VIRTIO_PMEM),$(CONFIG_LINUX)) += virtio-pmem.o
 obj-$(CONFIG_VIRTIO_BALLOON) += virtio-balloon.o
 obj-$(CONFIG_VIRTIO_CRYPTO) += virtio-crypto.o
 obj-$(call land,$(CONFIG_VIRTIO_CRYPTO),$(CONFIG_VIRTIO_PCI)) += virtio-crypto-pci.o
diff --git a/hw/virtio/virtio-pmem.c b/hw/virtio/virtio-pmem.c
new file mode 100644
index 0000000000..0f8b509f0f
--- /dev/null
+++ b/hw/virtio/virtio-pmem.c
@@ -0,0 +1,224 @@ 
+/*
+ * Virtio pmem device
+ *
+ * Copyright (C) 2018 Red Hat, Inc.
+ * Copyright (C) 2018 Pankaj Gupta <pagupta@redhat.com>
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2.
+ * See the COPYING file in the top-level directory.
+ *
+ */
+
+#include "qemu/osdep.h"
+#include "qapi/error.h"
+#include "qemu-common.h"
+#include "qemu/error-report.h"
+#include "hw/virtio/virtio-access.h"
+#include "hw/virtio/virtio-pmem.h"
+#include "hw/mem/memory-device.h"
+#include "block/aio.h"
+#include "block/thread-pool.h"
+
+typedef struct VirtIOPMEMresp {
+    int ret;
+} VirtIOPMEMResp;
+
+typedef struct VirtIODeviceRequest {
+    VirtQueueElement elem;
+    int fd;
+    VirtIOPMEM *pmem;
+    VirtIOPMEMResp resp;
+} VirtIODeviceRequest;
+
+static int worker_cb(void *opaque)
+{
+    VirtIODeviceRequest *req = opaque;
+    int err = 0;
+
+    printf("\n performing flush ...");
+    /* flush raw backing image */
+    err = fsync(req->fd);
+    printf("\n performed flush ...:errcode::%d", err);
+    if (err != 0) {
+        err = EIO;
+    }
+    req->resp.ret = err;
+
+    return 0;
+}
+
+static void done_cb(void *opaque, int ret)
+{
+    VirtIODeviceRequest *req = opaque;
+    int len = iov_from_buf(req->elem.in_sg, req->elem.in_num, 0,
+                              &req->resp, sizeof(VirtIOPMEMResp));
+
+    /* Callbacks are serialized, so no need to use atomic ops.  */
+    virtqueue_push(req->pmem->rq_vq, &req->elem, len);
+    virtio_notify((VirtIODevice *)req->pmem, req->pmem->rq_vq);
+    g_free(req);
+}
+
+static void virtio_pmem_flush(VirtIODevice *vdev, VirtQueue *vq)
+{
+    VirtIODeviceRequest *req;
+    VirtIOPMEM *pmem = VIRTIO_PMEM(vdev);
+    HostMemoryBackend *backend = MEMORY_BACKEND(pmem->memdev);
+    ThreadPool *pool = aio_get_thread_pool(qemu_get_aio_context());
+
+    req = virtqueue_pop(vq, sizeof(VirtIODeviceRequest));
+    if (!req) {
+        virtio_error(vdev, "virtio-pmem missing request data");
+        return;
+    }
+
+    if (req->elem.out_num < 1 || req->elem.in_num < 1) {
+        virtio_error(vdev, "virtio-pmem request not proper");
+        g_free(req);
+        return;
+    }
+    req->fd = memory_region_get_fd(&backend->mr);
+    req->pmem = pmem;
+    thread_pool_submit_aio(pool, worker_cb, req, done_cb, req);
+}
+
+static void virtio_pmem_get_config(VirtIODevice *vdev, uint8_t *config)
+{
+    VirtIOPMEM *pmem = VIRTIO_PMEM(vdev);
+    struct virtio_pmem_config *pmemcfg = (struct virtio_pmem_config *) config;
+
+    virtio_stq_p(vdev, &pmemcfg->start, pmem->start);
+    virtio_stq_p(vdev, &pmemcfg->size, memory_region_size(&pmem->memdev->mr));
+}
+
+static uint64_t virtio_pmem_get_features(VirtIODevice *vdev, uint64_t features,
+                                        Error **errp)
+{
+    return features;
+}
+
+static void virtio_pmem_realize(DeviceState *dev, Error **errp)
+{
+    VirtIODevice   *vdev   = VIRTIO_DEVICE(dev);
+    VirtIOPMEM     *pmem   = VIRTIO_PMEM(dev);
+
+    if (!pmem->memdev) {
+        error_setg(errp, "virtio-pmem memdev not set");
+        return;
+    } else if (host_memory_backend_is_mapped(pmem->memdev)) {
+        char *path = object_get_canonical_path_component(OBJECT(pmem->memdev));
+        error_setg(errp, "can't use already busy memdev: %s", path);
+        g_free(path);
+        return;
+    }
+
+    host_memory_backend_set_mapped(pmem->memdev, true);
+    virtio_init(vdev, TYPE_VIRTIO_PMEM, VIRTIO_ID_PMEM,
+                                          sizeof(struct virtio_pmem_config));
+    pmem->rq_vq = virtio_add_queue(vdev, 128, virtio_pmem_flush);
+}
+
+static const char *virtio_pmem_md_get_device_id(const MemoryDeviceState *md)
+{
+    Object *obj = OBJECT(md);
+
+    /* always return the ID of the proxy device the user configured */
+    if (obj->parent && object_dynamic_cast(obj->parent, TYPE_DEVICE)) {
+        const DeviceState *parent_dev = DEVICE(obj->parent);
+
+        return parent_dev->id;
+    }
+    return NULL;
+}
+
+static void virtio_pmem_md_fill_device_info(const MemoryDeviceState *md,
+                                            MemoryDeviceInfo *info)
+{
+    VirtioPMemDeviceInfo *vi = g_new0(VirtioPMemDeviceInfo, 1);
+    VirtIOPMEM *pmem = VIRTIO_PMEM(md);
+    const char *id = virtio_pmem_md_get_device_id(md);
+
+    if (id) {
+        vi->has_id = true;
+        vi->id = g_strdup(id);
+    }
+
+    vi->memaddr = pmem->start;
+    vi->size = pmem->memdev ? memory_region_size(&pmem->memdev->mr) : 0;
+    vi->memdev = object_get_canonical_path(OBJECT(pmem->memdev));
+
+    info->u.virtio_pmem.data = vi;
+    info->type = MEMORY_DEVICE_INFO_KIND_VIRTIO_PMEM;
+}
+
+static uint64_t virtio_pmem_md_get_addr(const MemoryDeviceState *md)
+{
+    VirtIOPMEM *pmem = VIRTIO_PMEM(md);
+
+    return pmem->start;
+}
+
+static void virtio_pmem_md_set_addr(MemoryDeviceState *md, uint64_t addr,
+                                    Error **errp)
+{
+    object_property_set_uint(OBJECT(md), addr, VIRTIO_PMEM_ADDR_PROP, errp);
+}
+
+static MemoryRegion *virtio_pmem_md_get_memory_region(MemoryDeviceState *md,
+                                                      Error **errp)
+{
+    VirtIOPMEM *pmem = VIRTIO_PMEM(md);
+
+    if (!pmem->memdev) {
+        error_setg(errp, "'%s' property must be set", VIRTIO_PMEM_MEMDEV_PROP);
+        return NULL;
+    }
+
+    return &pmem->memdev->mr;
+}
+
+static Property virtio_pmem_properties[] = {
+    DEFINE_PROP_UINT64(VIRTIO_PMEM_ADDR_PROP, VirtIOPMEM, start, 0),
+    DEFINE_PROP_LINK(VIRTIO_PMEM_MEMDEV_PROP, VirtIOPMEM, memdev,
+                     TYPE_MEMORY_BACKEND, HostMemoryBackend *),
+    DEFINE_PROP_END_OF_LIST(),
+};
+
+static void virtio_pmem_class_init(ObjectClass *klass, void *data)
+{
+    DeviceClass *dc = DEVICE_CLASS(klass);
+    VirtioDeviceClass *vdc = VIRTIO_DEVICE_CLASS(klass);
+    MemoryDeviceClass *mdc = MEMORY_DEVICE_CLASS(klass);
+
+    dc->props = virtio_pmem_properties;
+
+    vdc->realize      =  virtio_pmem_realize;
+    vdc->get_config   =  virtio_pmem_get_config;
+    vdc->get_features =  virtio_pmem_get_features;
+
+    mdc->get_addr         = virtio_pmem_md_get_addr;
+    mdc->set_addr         = virtio_pmem_md_set_addr;
+    /* for virtio-pmem plugged_size == region_size */
+    mdc->get_plugged_size = memory_device_get_region_size;
+    mdc->get_memory_region  = virtio_pmem_md_get_memory_region;
+    mdc->fill_device_info = virtio_pmem_md_fill_device_info;
+    mdc->get_device_id = virtio_pmem_md_get_device_id;
+}
+
+static TypeInfo virtio_pmem_info = {
+    .name          = TYPE_VIRTIO_PMEM,
+    .parent        = TYPE_VIRTIO_DEVICE,
+    .class_init    = virtio_pmem_class_init,
+    .instance_size = sizeof(VirtIOPMEM),
+    .interfaces = (InterfaceInfo[]) {
+        { TYPE_MEMORY_DEVICE },
+        { }
+  },
+};
+
+static void virtio_register_types(void)
+{
+    type_register_static(&virtio_pmem_info);
+}
+
+type_init(virtio_register_types)
diff --git a/include/hw/virtio/virtio-pmem.h b/include/hw/virtio/virtio-pmem.h
new file mode 100644
index 0000000000..11e549e0f1
--- /dev/null
+++ b/include/hw/virtio/virtio-pmem.h
@@ -0,0 +1,40 @@ 
+/*
+ * Virtio pmem Device
+ *
+ * Copyright Red Hat, Inc. 2018
+ * Copyright Pankaj Gupta <pagupta@redhat.com>
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or
+ * (at your option) any later version.  See the COPYING file in the
+ * top-level directory.
+ */
+
+#ifndef QEMU_VIRTIO_PMEM_H
+#define QEMU_VIRTIO_PMEM_H
+
+#include "hw/virtio/virtio.h"
+#include "sysemu/hostmem.h"
+#include "standard-headers/linux/virtio_ids.h"
+
+#define TYPE_VIRTIO_PMEM "virtio-pmem"
+
+#define VIRTIO_PMEM(obj) \
+        OBJECT_CHECK(VirtIOPMEM, (obj), TYPE_VIRTIO_PMEM)
+
+#define VIRTIO_PMEM_ADDR_PROP "memaddr"
+#define VIRTIO_PMEM_MEMDEV_PROP "memdev"
+
+/* VirtIOPMEM device structure */
+typedef struct VirtIOPMEM {
+    VirtIODevice parent_obj;
+
+    VirtQueue *rq_vq;
+    uint64_t start;
+    HostMemoryBackend *memdev;
+} VirtIOPMEM;
+
+struct virtio_pmem_config {
+    uint64_t start;
+    uint64_t size;
+};
+#endif
diff --git a/include/standard-headers/linux/virtio_ids.h b/include/standard-headers/linux/virtio_ids.h
index 6d5c3b2d4f..346389565a 100644
--- a/include/standard-headers/linux/virtio_ids.h
+++ b/include/standard-headers/linux/virtio_ids.h
@@ -43,5 +43,6 @@ 
 #define VIRTIO_ID_INPUT        18 /* virtio input */
 #define VIRTIO_ID_VSOCK        19 /* virtio vsock transport */
 #define VIRTIO_ID_CRYPTO       20 /* virtio crypto */
+#define VIRTIO_ID_PMEM         25 /* virtio pmem */
 
 #endif /* _LINUX_VIRTIO_IDS_H */
diff --git a/qapi/misc.json b/qapi/misc.json
index 7c36de0464..cadbca26ac 100644
--- a/qapi/misc.json
+++ b/qapi/misc.json
@@ -2907,6 +2907,29 @@ 
           }
 }
 
+##
+# @VirtioPMemDeviceInfo:
+#
+# VirtioPMem state information
+#
+# @id: device's ID
+#
+# @memaddr: physical address in memory, where device is mapped
+#
+# @size: size of memory that the device provides
+#
+# @memdev: memory backend linked with device
+#
+# Since: 3.1
+##
+{ 'struct': 'VirtioPMemDeviceInfo',
+  'data': { '*id': 'str',
+            'memaddr': 'size',
+            'size': 'size',
+            'memdev': 'str'
+          }
+}
+
 ##
 # @MemoryDeviceInfo:
 #
@@ -2916,7 +2939,8 @@ 
 ##
 { 'union': 'MemoryDeviceInfo',
   'data': { 'dimm': 'PCDIMMDeviceInfo',
-            'nvdimm': 'PCDIMMDeviceInfo'
+            'nvdimm': 'PCDIMMDeviceInfo',
+            'virtio-pmem': 'VirtioPMemDeviceInfo'
           }
 }