PCI: hv: support reporting serial number as slot information

Message ID 20180612164037.10672-1-sthemmin@microsoft.com
State Under Review
Delegated to: Lorenzo Pieralisi
Headers show
Series
  • PCI: hv: support reporting serial number as slot information
Related show

Commit Message

Stephen Hemminger June 12, 2018, 4:40 p.m.
The Hyper-V host API for PCI provides a unique "serial number" which
can be used as the basis for sysfs PCI slot table. This can be useful
for cases where userspace wants to find the PCI device based on
serial number.

When an SR-IOV NIC is added, the host sends an attach message
with a serial number. The kernel doesn't use the serial number, but
it is useful when doing the same thing in a userspace driver such
as the DPDK. By having /sys/bus/pci/slots/N it provides a direct
way to find the matching PCI device.

There may be some cases where the serial number is not unique such
as when using GPU's. But the PCI slot infrastructure will handle
that.

This also shortens the network device names generated by
systemd/udev. The new names use slot (ens2) rather than
PCI address (enP2p0s2).

Signed-off-by: Stephen Hemminger <sthemmin@microsoft.com>
---
v2
  - retarget for current filenames in PCI next
  - remove debug log message

 drivers/pci/controller/pci-hyperv.c | 30 +++++++++++++++++++++++++++++
 1 file changed, 30 insertions(+)

Comments

Lorenzo Pieralisi July 4, 2018, 4:08 p.m. | #1
On Tue, Jun 12, 2018 at 09:40:37AM -0700, Stephen Hemminger wrote:
> The Hyper-V host API for PCI provides a unique "serial number" which
> can be used as the basis for sysfs PCI slot table. This can be useful
> for cases where userspace wants to find the PCI device based on
> serial number.
> 
> When an SR-IOV NIC is added, the host sends an attach message
> with a serial number. The kernel doesn't use the serial number, but
> it is useful when doing the same thing in a userspace driver such
> as the DPDK. By having /sys/bus/pci/slots/N it provides a direct
> way to find the matching PCI device.
> 
> There may be some cases where the serial number is not unique such
> as when using GPU's. But the PCI slot infrastructure will handle
> that.
> 
> This also shortens the network device names generated by
> systemd/udev. The new names use slot (ens2) rather than
> PCI address (enP2p0s2).

Hi Stephen,

I wanted to apply this patch but wanted to make sure all HV
maintainers are in agreement first since this looks like
a significant user-space ABI change.

I would also ask Bjorn's opinion on this since he has more
insights into the slot interface history.

Thanks,
Lorenzo

> Signed-off-by: Stephen Hemminger <sthemmin@microsoft.com>
> ---
> v2
>   - retarget for current filenames in PCI next
>   - remove debug log message
> 
>  drivers/pci/controller/pci-hyperv.c | 30 +++++++++++++++++++++++++++++
>  1 file changed, 30 insertions(+)
> 
> diff --git a/drivers/pci/controller/pci-hyperv.c b/drivers/pci/controller/pci-hyperv.c
> index 6cc5036ac83c..4e3575716ced 100644
> --- a/drivers/pci/controller/pci-hyperv.c
> +++ b/drivers/pci/controller/pci-hyperv.c
> @@ -88,6 +88,8 @@ static enum pci_protocol_version_t pci_protocol_version;
>  
>  #define STATUS_REVISION_MISMATCH 0xC0000059
>  
> +#define SLOT_NAME_SIZE 21
> +
>  /*
>   * Message Types
>   */
> @@ -493,6 +495,7 @@ struct hv_pci_dev {
>  	struct list_head list_entry;
>  	refcount_t refs;
>  	enum hv_pcichild_state state;
> +	struct pci_slot *pci_slot;
>  	struct pci_function_description desc;
>  	bool reported_missing;
>  	struct hv_pcibus_device *hbus;
> @@ -1454,6 +1457,28 @@ static void prepopulate_bars(struct hv_pcibus_device *hbus)
>  	spin_unlock_irqrestore(&hbus->device_list_lock, flags);
>  }
>  
> +static void hv_pci_assign_slots(struct hv_pcibus_device *hbus)
> +{
> +	struct hv_pci_dev *hpdev;
> +	char name[SLOT_NAME_SIZE];
> +	unsigned long flags;
> +	int slot_nr;
> +
> +	spin_lock_irqsave(&hbus->device_list_lock, flags);
> +	list_for_each_entry(hpdev, &hbus->children, list_entry) {
> +		if (hpdev->pci_slot)
> +			continue;
> +
> +		slot_nr = PCI_SLOT(wslot_to_devfn(hpdev->desc.win_slot.slot));
> +		snprintf(name, SLOT_NAME_SIZE, "%u", hpdev->desc.ser);
> +		hpdev->pci_slot = pci_create_slot(hbus->pci_bus, slot_nr,
> +					  name, NULL);
> +		if (!hpdev->pci_slot)
> +			pr_warn("pci_create slot %s failed\n", name);
> +	}
> +	spin_unlock_irqrestore(&hbus->device_list_lock, flags);
> +}
> +
>  /**
>   * create_root_hv_pci_bus() - Expose a new root PCI bus
>   * @hbus:	Root PCI bus, as understood by this driver
> @@ -1477,6 +1502,7 @@ static int create_root_hv_pci_bus(struct hv_pcibus_device *hbus)
>  	pci_lock_rescan_remove();
>  	pci_scan_child_bus(hbus->pci_bus);
>  	pci_bus_assign_resources(hbus->pci_bus);
> +	hv_pci_assign_slots(hbus);
>  	pci_bus_add_devices(hbus->pci_bus);
>  	pci_unlock_rescan_remove();
>  	hbus->state = hv_pcibus_installed;
> @@ -1739,6 +1765,7 @@ static void pci_devices_present_work(struct work_struct *work)
>  		 */
>  		pci_lock_rescan_remove();
>  		pci_scan_child_bus(hbus->pci_bus);
> +		hv_pci_assign_slots(hbus);
>  		pci_unlock_rescan_remove();
>  		break;
>  
> @@ -1855,6 +1882,9 @@ static void hv_eject_device_work(struct work_struct *work)
>  	list_del(&hpdev->list_entry);
>  	spin_unlock_irqrestore(&hpdev->hbus->device_list_lock, flags);
>  
> +	if (hpdev->pci_slot)
> +		pci_destroy_slot(hpdev->pci_slot);
> +
>  	memset(&ctxt, 0, sizeof(ctxt));
>  	ejct_pkt = (struct pci_eject_response *)&ctxt.pkt.message;
>  	ejct_pkt->message_type.type = PCI_EJECTION_COMPLETE;
> -- 
> 2.17.1
>
Stephen Hemminger July 10, 2018, 11:11 p.m. | #2
On Wed, 4 Jul 2018 17:08:34 +0100
Lorenzo Pieralisi <lorenzo.pieralisi@arm.com> wrote:

> On Tue, Jun 12, 2018 at 09:40:37AM -0700, Stephen Hemminger wrote:
> > The Hyper-V host API for PCI provides a unique "serial number" which
> > can be used as the basis for sysfs PCI slot table. This can be useful
> > for cases where userspace wants to find the PCI device based on
> > serial number.
> > 
> > When an SR-IOV NIC is added, the host sends an attach message
> > with a serial number. The kernel doesn't use the serial number, but
> > it is useful when doing the same thing in a userspace driver such
> > as the DPDK. By having /sys/bus/pci/slots/N it provides a direct
> > way to find the matching PCI device.
> > 
> > There may be some cases where the serial number is not unique such
> > as when using GPU's. But the PCI slot infrastructure will handle
> > that.
> > 
> > This also shortens the network device names generated by
> > systemd/udev. The new names use slot (ens2) rather than
> > PCI address (enP2p0s2).  
> 
> Hi Stephen,
> 
> I wanted to apply this patch but wanted to make sure all HV
> maintainers are in agreement first since this looks like
> a significant user-space ABI change.
> 
> I would also ask Bjorn's opinion on this since he has more
> insights into the slot interface history.
> 
> Thanks,
> Lorenzo

It adds an API, but does not change existing sysfs entries.
The potential issues are more about how that new API will cause systemd/udev
to respond. It makes Hyper-V look more like KVM and that is generally
a good thing.

The systemd developers should like this change, but it may cause some
users to have problems since in general the existing udev model does
not do persistent names on PCI devices.  But the PCI device for networking
is only used as a VF device paired with a PV device (netvsc). All network
configuration is done on the PV device; the name of the VF device doesn't
really matter.

The purpose of this was to take incremental steps to use the serial
number for matching PV and VF devices (rather than by MAC address).
There is ongoing work in DPDK (in userspace) where being able to find
a PCI device by serial number would be useful.
Bjorn Helgaas July 11, 2018, 1:49 p.m. | #3
On Wed, Jul 04, 2018 at 05:08:34PM +0100, Lorenzo Pieralisi wrote:
> On Tue, Jun 12, 2018 at 09:40:37AM -0700, Stephen Hemminger wrote:
> > The Hyper-V host API for PCI provides a unique "serial number" which
> > can be used as the basis for sysfs PCI slot table. This can be useful
> > for cases where userspace wants to find the PCI device based on
> > serial number.
> > 
> > When an SR-IOV NIC is added, the host sends an attach message
> > with a serial number. The kernel doesn't use the serial number, but
> > it is useful when doing the same thing in a userspace driver such
> > as the DPDK. By having /sys/bus/pci/slots/N it provides a direct
> > way to find the matching PCI device.

Is this essentially a way to expose the serial number to userspace?

I confess I'm not super familiar with what the slot infrastructure
gives us, and I'm trying to figure out if it's the right way to do
this.  Would a file directly in the device directory, e.g.,

  /sys/devices/pci0000:00/0000:00:00.0/serial

be a possibility?  There is an optional PCIe Device Serial Number
capability and also an optional serial number item in the VPD.  We
don't support either in the core yet, but maybe they could all be
exposed similarly?

Or is there something you need from the slot infrastructure in
addition to the serial number exposure?

You mentioned that this patch is along the lines of what KVM does.
Can you connect the dots a little bit for me as far as how this is
implemented for KVM?  I don't see KVM-related calls to
pci_create_slot() or pci_hp_register().

> > There may be some cases where the serial number is not unique such
> > as when using GPU's. But the PCI slot infrastructure will handle
> > that.
> > 
> > This also shortens the network device names generated by
> > systemd/udev. The new names use slot (ens2) rather than
> > PCI address (enP2p0s2).

I don't recognize "enP2p0s2" as a PCI address.  Is that related to a
domain/bus/device/function address somehow, or is this some sort of
Windows-specific address?

> Hi Stephen,
> 
> I wanted to apply this patch but wanted to make sure all HV
> maintainers are in agreement first since this looks like
> a significant user-space ABI change.
> 
> I would also ask Bjorn's opinion on this since he has more
> insights into the slot interface history.
> 
> Thanks,
> Lorenzo
> 
> > Signed-off-by: Stephen Hemminger <sthemmin@microsoft.com>
> > ---
> > v2
> >   - retarget for current filenames in PCI next
> >   - remove debug log message
> > 
> >  drivers/pci/controller/pci-hyperv.c | 30 +++++++++++++++++++++++++++++
> >  1 file changed, 30 insertions(+)
> > 
> > diff --git a/drivers/pci/controller/pci-hyperv.c b/drivers/pci/controller/pci-hyperv.c
> > index 6cc5036ac83c..4e3575716ced 100644
> > --- a/drivers/pci/controller/pci-hyperv.c
> > +++ b/drivers/pci/controller/pci-hyperv.c
> > @@ -88,6 +88,8 @@ static enum pci_protocol_version_t pci_protocol_version;
> >  
> >  #define STATUS_REVISION_MISMATCH 0xC0000059
> >  
> > +#define SLOT_NAME_SIZE 21
> > +
> >  /*
> >   * Message Types
> >   */
> > @@ -493,6 +495,7 @@ struct hv_pci_dev {
> >  	struct list_head list_entry;
> >  	refcount_t refs;
> >  	enum hv_pcichild_state state;
> > +	struct pci_slot *pci_slot;
> >  	struct pci_function_description desc;
> >  	bool reported_missing;
> >  	struct hv_pcibus_device *hbus;
> > @@ -1454,6 +1457,28 @@ static void prepopulate_bars(struct hv_pcibus_device *hbus)
> >  	spin_unlock_irqrestore(&hbus->device_list_lock, flags);
> >  }
> >  
> > +static void hv_pci_assign_slots(struct hv_pcibus_device *hbus)
> > +{
> > +	struct hv_pci_dev *hpdev;
> > +	char name[SLOT_NAME_SIZE];
> > +	unsigned long flags;
> > +	int slot_nr;
> > +
> > +	spin_lock_irqsave(&hbus->device_list_lock, flags);
> > +	list_for_each_entry(hpdev, &hbus->children, list_entry) {
> > +		if (hpdev->pci_slot)
> > +			continue;
> > +
> > +		slot_nr = PCI_SLOT(wslot_to_devfn(hpdev->desc.win_slot.slot));
> > +		snprintf(name, SLOT_NAME_SIZE, "%u", hpdev->desc.ser);
> > +		hpdev->pci_slot = pci_create_slot(hbus->pci_bus, slot_nr,
> > +					  name, NULL);
> > +		if (!hpdev->pci_slot)
> > +			pr_warn("pci_create slot %s failed\n", name);

Is there any way this could be a dev_warn() so it's connected with
something?  Does hpdev->hbus->hdev->device make sense?

> > +	}
> > +	spin_unlock_irqrestore(&hbus->device_list_lock, flags);
> > +}
> > +
> >  /**
> >   * create_root_hv_pci_bus() - Expose a new root PCI bus
> >   * @hbus:	Root PCI bus, as understood by this driver
> > @@ -1477,6 +1502,7 @@ static int create_root_hv_pci_bus(struct hv_pcibus_device *hbus)
> >  	pci_lock_rescan_remove();
> >  	pci_scan_child_bus(hbus->pci_bus);
> >  	pci_bus_assign_resources(hbus->pci_bus);
> > +	hv_pci_assign_slots(hbus);
> >  	pci_bus_add_devices(hbus->pci_bus);
> >  	pci_unlock_rescan_remove();
> >  	hbus->state = hv_pcibus_installed;
> > @@ -1739,6 +1765,7 @@ static void pci_devices_present_work(struct work_struct *work)
> >  		 */
> >  		pci_lock_rescan_remove();
> >  		pci_scan_child_bus(hbus->pci_bus);
> > +		hv_pci_assign_slots(hbus);
> >  		pci_unlock_rescan_remove();
> >  		break;
> >  
> > @@ -1855,6 +1882,9 @@ static void hv_eject_device_work(struct work_struct *work)
> >  	list_del(&hpdev->list_entry);
> >  	spin_unlock_irqrestore(&hpdev->hbus->device_list_lock, flags);
> >  
> > +	if (hpdev->pci_slot)
> > +		pci_destroy_slot(hpdev->pci_slot);
> > +
> >  	memset(&ctxt, 0, sizeof(ctxt));
> >  	ejct_pkt = (struct pci_eject_response *)&ctxt.pkt.message;
> >  	ejct_pkt->message_type.type = PCI_EJECTION_COMPLETE;
> > -- 
> > 2.17.1
> >
Stephen Hemminger July 13, 2018, 9:38 p.m. | #4
On Wed, 11 Jul 2018 08:49:08 -0500
Bjorn Helgaas <helgaas@kernel.org> wrote:

> On Wed, Jul 04, 2018 at 05:08:34PM +0100, Lorenzo Pieralisi wrote:
> > On Tue, Jun 12, 2018 at 09:40:37AM -0700, Stephen Hemminger wrote:  
> > > The Hyper-V host API for PCI provides a unique "serial number" which
> > > can be used as the basis for sysfs PCI slot table. This can be useful
> > > for cases where userspace wants to find the PCI device based on
> > > serial number.
> > > 
> > > When an SR-IOV NIC is added, the host sends an attach message
> > > with a serial number. The kernel doesn't use the serial number, but
> > > it is useful when doing the same thing in a userspace driver such
> > > as the DPDK. By having /sys/bus/pci/slots/N it provides a direct
> > > way to find the matching PCI device.  
> 
> Is this essentially a way to expose the serial number to userspace?
> 
> I confess I'm not super familiar with what the slot infrastructure
> gives us, and I'm trying to figure out if it's the right way to do
> this.  Would a file directly in the device directory, e.g.,
> 
>   /sys/devices/pci0000:00/0000:00:00.0/serial
> 
> be a possibility?  There is an optional PCIe Device Serial Number
> capability and also an optional serial number item in the VPD.  We
> don't support either in the core yet, but maybe they could all be
> exposed similarly?
> 
> Or is there something you need from the slot infrastructure in
> addition to the serial number exposure?
> 
> You mentioned that this patch is along the lines of what KVM does.
> Can you connect the dots a little bit for me as far as how this is
> implemented for KVM?  I don't see KVM-related calls to
> pci_create_slot() or pci_hp_register().
> 
> > > There may be some cases where the serial number is not unique such
> > > as when using GPU's. But the PCI slot infrastructure will handle
> > > that.
> > > 
> > > This also shortens the network device names generated by
> > > systemd/udev. The new names use slot (ens2) rather than
> > > PCI address (enP2p0s2).  
> 
> I don't recognize "enP2p0s2" as a PCI address.  Is that related to a
> domain/bus/device/function address somehow, or is this some sort of
> Windows-specific address?
> 
> > Hi Stephen,
> > 
> > I wanted to apply this patch but wanted to make sure all HV
> > maintainers are in agreement first since this looks like
> > a significant user-space ABI change.
> > 
> > I would also ask Bjorn's opinion on this since he has more
> > insights into the slot interface history.
> > 
> > Thanks,
> > Lorenzo
> >   
> > > Signed-off-by: Stephen Hemminger <sthemmin@microsoft.com>

Yes, this is a way to expose serial number to userspace as well as a
way to help udev with naming.

Today, a VF device shows up as:
$ lspci -n
0002:00:02.0 0200: 15b3:1014 (rev 80)


And therefore is named: enP2p0s2 based on these rules:
See: https://github.com/systemd/systemd/blob/master/src/udev/udev-builtin-net_id.c#L20

 * Two character prefixes based on the type of interface:
 *   en — Ethernet
 *   sl — serial line IP (slip)
 *   wl — wlan
 *   ww — wwan
 *
 * Type of names:

 *   o<index>[n<phys_port_name>|d<dev_port>]
 *                                         — on-board device index number
 *   s<slot>[f<function>][n<phys_port_name>|d<dev_port>]
 *                                         — hotplug slot index number
 *   x<MAC>                                — MAC address
 *   [P<domain>]p<bus>s<slot>[f<function>][n<phys_port_name>|d<dev_port>]
 *                                         — PCI geographical location

I looked into several other options:
  - put serial number into the device index number


On KVM, the ACPI emulation provides PCI slot information so that is why
the devices are named "ens3".

I looked into several other options (none of which worked well):
  * provide sysfs /index file but that wants to be handled by smbios
  * use VPD serial number but that is a GUID not an integer
  * full hotplug (but Hyper-V doesn't support real PCI hotplug)
  * new device_attribute serial, but that is not easy to hook
    into existing code, and udev won't know about it.

Open to any ideas about how to best represent this.

Thanks,
Stephen



PS: it is  nuisance that when the PCI support was moved to drivers/pci/controllers
it was not done in a way  that git correctly tracked the rename. Therefore
all the commit history was broken :-(
Lorenzo Pieralisi July 19, 2018, 4:58 p.m. | #5
On Fri, Jul 13, 2018 at 02:38:19PM -0700, Stephen Hemminger wrote:
> On Wed, 11 Jul 2018 08:49:08 -0500
> Bjorn Helgaas <helgaas@kernel.org> wrote:
> 
> > On Wed, Jul 04, 2018 at 05:08:34PM +0100, Lorenzo Pieralisi wrote:
> > > On Tue, Jun 12, 2018 at 09:40:37AM -0700, Stephen Hemminger wrote:  
> > > > The Hyper-V host API for PCI provides a unique "serial number" which
> > > > can be used as the basis for sysfs PCI slot table. This can be useful
> > > > for cases where userspace wants to find the PCI device based on
> > > > serial number.
> > > > 
> > > > When an SR-IOV NIC is added, the host sends an attach message
> > > > with a serial number. The kernel doesn't use the serial number, but
> > > > it is useful when doing the same thing in a userspace driver such
> > > > as the DPDK. By having /sys/bus/pci/slots/N it provides a direct
> > > > way to find the matching PCI device.  
> > 
> > Is this essentially a way to expose the serial number to userspace?
> > 
> > I confess I'm not super familiar with what the slot infrastructure
> > gives us, and I'm trying to figure out if it's the right way to do
> > this.  Would a file directly in the device directory, e.g.,
> > 
> >   /sys/devices/pci0000:00/0000:00:00.0/serial
> > 
> > be a possibility?  There is an optional PCIe Device Serial Number
> > capability and also an optional serial number item in the VPD.  We
> > don't support either in the core yet, but maybe they could all be
> > exposed similarly?
> > 
> > Or is there something you need from the slot infrastructure in
> > addition to the serial number exposure?
> > 
> > You mentioned that this patch is along the lines of what KVM does.
> > Can you connect the dots a little bit for me as far as how this is
> > implemented for KVM?  I don't see KVM-related calls to
> > pci_create_slot() or pci_hp_register().
> > 
> > > > There may be some cases where the serial number is not unique such
> > > > as when using GPU's. But the PCI slot infrastructure will handle
> > > > that.
> > > > 
> > > > This also shortens the network device names generated by
> > > > systemd/udev. The new names use slot (ens2) rather than
> > > > PCI address (enP2p0s2).  
> > 
> > I don't recognize "enP2p0s2" as a PCI address.  Is that related to a
> > domain/bus/device/function address somehow, or is this some sort of
> > Windows-specific address?
> > 
> > > Hi Stephen,
> > > 
> > > I wanted to apply this patch but wanted to make sure all HV
> > > maintainers are in agreement first since this looks like
> > > a significant user-space ABI change.
> > > 
> > > I would also ask Bjorn's opinion on this since he has more
> > > insights into the slot interface history.
> > > 
> > > Thanks,
> > > Lorenzo
> > >   
> > > > Signed-off-by: Stephen Hemminger <sthemmin@microsoft.com>
> 
> Yes, this is a way to expose serial number to userspace as well as a
> way to help udev with naming.

When you say "to help" it means "to fix" or to bring it on a par
with KVM ? This change is Hyper-V specific so I am not really against
it, still, I am not a big fan of making a serial number look like
a slot number to please udev either.

> Today, a VF device shows up as:
> $ lspci -n
> 0002:00:02.0 0200: 15b3:1014 (rev 80)
> 
> 
> And therefore is named: enP2p0s2 based on these rules:
> See: https://github.com/systemd/systemd/blob/master/src/udev/udev-builtin-net_id.c#L20
> 
>  * Two character prefixes based on the type of interface:
>  *   en — Ethernet
>  *   sl — serial line IP (slip)
>  *   wl — wlan
>  *   ww — wwan
>  *
>  * Type of names:
> 
>  *   o<index>[n<phys_port_name>|d<dev_port>]
>  *                                         — on-board device index number
>  *   s<slot>[f<function>][n<phys_port_name>|d<dev_port>]
>  *                                         — hotplug slot index number
>  *   x<MAC>                                — MAC address
>  *   [P<domain>]p<bus>s<slot>[f<function>][n<phys_port_name>|d<dev_port>]
>  *                                         — PCI geographical location
> 
> I looked into several other options:
>   - put serial number into the device index number
> 
> 
> On KVM, the ACPI emulation provides PCI slot information so that is why
> the devices are named "ens3".
> 
> I looked into several other options (none of which worked well):
>   * provide sysfs /index file but that wants to be handled by smbios
>   * use VPD serial number but that is a GUID not an integer
>   * full hotplug (but Hyper-V doesn't support real PCI hotplug)
>   * new device_attribute serial, but that is not easy to hook
>     into existing code, and udev won't know about it.
> 
> Open to any ideas about how to best represent this.

By trawling PCI patchwork I found this series that was not merged, maybe
it is a starting point to implement the latter suggestion:

https://patchwork.ozlabs.org/patch/264610/

udev won't know about it but as I say above that's not a bug
we are fixing either AFAICS.

At the end of the day it is Hyper-V maintainers call, since that's
Hyper-V specific; exposing the serial attribute is something we
can do regardless, depending on what Bjorn thinks about it and you
can use it to provide userspace with the information you need.

> Thanks,
> Stephen
> 
> 
> 
> PS: it is  nuisance that when the PCI support was moved to
> drivers/pci/controllers it was not done in a way  that git correctly
> tracked the rename. Therefore all the commit history was broken :-(

Sorry about that.

Isn't "git log --follow" enough for this purpose ?

Thanks,
Lorenzo
Bjorn Helgaas July 19, 2018, 5:28 p.m. | #6
On Fri, Jul 13, 2018 at 02:38:19PM -0700, Stephen Hemminger wrote:
> PS: it is  nuisance that when the PCI support was moved to
> drivers/pci/controllers it was not done in a way  that git correctly
> tracked the rename. Therefore all the commit history was broken :-(

Oooh, that's my fault, and I'm really sorry about that.  For future
reference, what *should* I have done?

Both the original patch [1] and 6e0832fa432e ("PCI: Collect all native
drivers under drivers/pci/controller/") *look* like they comprehend
renames, e.g.,

  diff --git a/drivers/pci/dwc/pci-exynos.c b/drivers/pci/controller/dwc/pci-exynos.c
  similarity index 100%
  rename from drivers/pci/dwc/pci-exynos.c
  rename to drivers/pci/controller/dwc/pci-exynos.c

IIRC I just applied that patch from email with "stg import", but maybe
I should have used "git mv" instead?  I did try "git mv" just now, and
"git log" doesn't show history before the rename, while
"git log --follow" does.

Anyway, sorry about breaking history; I really hate that.

Bjorn

[1] https://lkml.kernel.org/r/1527729157-201428-1-git-send-email-shawn.lin@rock-chips.com
Stephen Hemminger July 19, 2018, 5:36 p.m. | #7
On Thu, 19 Jul 2018 17:58:59 +0100
Lorenzo Pieralisi <lorenzo.pieralisi@arm.com> wrote:

> > Thanks,
> > Stephen
> > 
> > 
> > 
> > PS: it is  nuisance that when the PCI support was moved to
> > drivers/pci/controllers it was not done in a way  that git correctly
> > tracked the rename. Therefore all the commit history was broken :-(  
> 
> Sorry about that.
> 
> Isn't "git log --follow" enough for this purpose ?
> 
> Thanks,
> Lorenzo

Your right --follow works, I will figure out how to make that
the default in .gitconfig
Stephen Hemminger July 30, 2018, 7:22 p.m. | #8
On Thu, 19 Jul 2018 17:58:59 +0100
Lorenzo Pieralisi <lorenzo.pieralisi@arm.com> wrote:

> On Fri, Jul 13, 2018 at 02:38:19PM -0700, Stephen Hemminger wrote:
> > On Wed, 11 Jul 2018 08:49:08 -0500
> > Bjorn Helgaas <helgaas@kernel.org> wrote:
> >   
> > > On Wed, Jul 04, 2018 at 05:08:34PM +0100, Lorenzo Pieralisi wrote:  
> > > > On Tue, Jun 12, 2018 at 09:40:37AM -0700, Stephen Hemminger wrote:    
> > > > > The Hyper-V host API for PCI provides a unique "serial number" which
> > > > > can be used as the basis for sysfs PCI slot table. This can be useful
> > > > > for cases where userspace wants to find the PCI device based on
> > > > > serial number.
> > > > > 
> > > > > When an SR-IOV NIC is added, the host sends an attach message
> > > > > with a serial number. The kernel doesn't use the serial number, but
> > > > > it is useful when doing the same thing in a userspace driver such
> > > > > as the DPDK. By having /sys/bus/pci/slots/N it provides a direct
> > > > > way to find the matching PCI device.    
> > > 
> > > Is this essentially a way to expose the serial number to userspace?
> > > 
> > > I confess I'm not super familiar with what the slot infrastructure
> > > gives us, and I'm trying to figure out if it's the right way to do
> > > this.  Would a file directly in the device directory, e.g.,
> > > 
> > >   /sys/devices/pci0000:00/0000:00:00.0/serial
> > > 
> > > be a possibility?  There is an optional PCIe Device Serial Number
> > > capability and also an optional serial number item in the VPD.  We
> > > don't support either in the core yet, but maybe they could all be
> > > exposed similarly?
> > > 
> > > Or is there something you need from the slot infrastructure in
> > > addition to the serial number exposure?
> > > 
> > > You mentioned that this patch is along the lines of what KVM does.
> > > Can you connect the dots a little bit for me as far as how this is
> > > implemented for KVM?  I don't see KVM-related calls to
> > > pci_create_slot() or pci_hp_register().
> > >   
> > > > > There may be some cases where the serial number is not unique such
> > > > > as when using GPU's. But the PCI slot infrastructure will handle
> > > > > that.
> > > > > 
> > > > > This also shortens the network device names generated by
> > > > > systemd/udev. The new names use slot (ens2) rather than
> > > > > PCI address (enP2p0s2).    
> > > 
> > > I don't recognize "enP2p0s2" as a PCI address.  Is that related to a
> > > domain/bus/device/function address somehow, or is this some sort of
> > > Windows-specific address?
> > >   
> > > > Hi Stephen,
> > > > 
> > > > I wanted to apply this patch but wanted to make sure all HV
> > > > maintainers are in agreement first since this looks like
> > > > a significant user-space ABI change.
> > > > 
> > > > I would also ask Bjorn's opinion on this since he has more
> > > > insights into the slot interface history.
> > > > 
> > > > Thanks,
> > > > Lorenzo
> > > >     
> > > > > Signed-off-by: Stephen Hemminger <sthemmin@microsoft.com>  
> > 
> > Yes, this is a way to expose serial number to userspace as well as a
> > way to help udev with naming.  
> 
> When you say "to help" it means "to fix" or to bring it on a par
> with KVM ? This change is Hyper-V specific so I am not really against
> it, still, I am not a big fan of making a serial number look like
> a slot number to please udev either.

It provides udev with shorter more concise value that gives better result.
Still would like something persistent and repeatable but it doesn't seem
to be guaranteed by the host.

> 
> > Today, a VF device shows up as:
> > $ lspci -n
> > 0002:00:02.0 0200: 15b3:1014 (rev 80)
> > 
> > 
> > And therefore is named: enP2p0s2 based on these rules:
> > See: https://github.com/systemd/systemd/blob/master/src/udev/udev-builtin-net_id.c#L20
> > 
> >  * Two character prefixes based on the type of interface:
> >  *   en — Ethernet
> >  *   sl — serial line IP (slip)
> >  *   wl — wlan
> >  *   ww — wwan
> >  *
> >  * Type of names:
> > 
> >  *   o<index>[n<phys_port_name>|d<dev_port>]
> >  *                                         — on-board device index number
> >  *   s<slot>[f<function>][n<phys_port_name>|d<dev_port>]
> >  *                                         — hotplug slot index number
> >  *   x<MAC>                                — MAC address
> >  *   [P<domain>]p<bus>s<slot>[f<function>][n<phys_port_name>|d<dev_port>]
> >  *                                         — PCI geographical location
> > 
> > I looked into several other options:
> >   - put serial number into the device index number
> > 
> > 
> > On KVM, the ACPI emulation provides PCI slot information so that is why
> > the devices are named "ens3".
> > 
> > I looked into several other options (none of which worked well):
> >   * provide sysfs /index file but that wants to be handled by smbios
> >   * use VPD serial number but that is a GUID not an integer
> >   * full hotplug (but Hyper-V doesn't support real PCI hotplug)
> >   * new device_attribute serial, but that is not easy to hook
> >     into existing code, and udev won't know about it.
> > 
> > Open to any ideas about how to best represent this.  
> 
> By trawling PCI patchwork I found this series that was not merged, maybe
> it is a starting point to implement the latter suggestion:
> 
> https://patchwork.ozlabs.org/patch/264610/
> 
> udev won't know about it but as I say above that's not a bug
> we are fixing either AFAICS.

Right this isn't a bug.

The PCIe Device serial number (in patchwork) has a different meaning and
can't be used. The PCIe device serial number comes from device config space
and the serial number (i.e it is a property of the board).
The value this patch is addressing is assigned by the host (similar to ACPI slots).

On latest kernels (now that PCI domain is 32 bits), the SR-IOV VF device
is even more ugly (enP37727p0s2) that was one of the motivations for this patch.
With the patch udev will generate ens2.

Patch

diff --git a/drivers/pci/controller/pci-hyperv.c b/drivers/pci/controller/pci-hyperv.c
index 6cc5036ac83c..4e3575716ced 100644
--- a/drivers/pci/controller/pci-hyperv.c
+++ b/drivers/pci/controller/pci-hyperv.c
@@ -88,6 +88,8 @@  static enum pci_protocol_version_t pci_protocol_version;
 
 #define STATUS_REVISION_MISMATCH 0xC0000059
 
+#define SLOT_NAME_SIZE 21
+
 /*
  * Message Types
  */
@@ -493,6 +495,7 @@  struct hv_pci_dev {
 	struct list_head list_entry;
 	refcount_t refs;
 	enum hv_pcichild_state state;
+	struct pci_slot *pci_slot;
 	struct pci_function_description desc;
 	bool reported_missing;
 	struct hv_pcibus_device *hbus;
@@ -1454,6 +1457,28 @@  static void prepopulate_bars(struct hv_pcibus_device *hbus)
 	spin_unlock_irqrestore(&hbus->device_list_lock, flags);
 }
 
+static void hv_pci_assign_slots(struct hv_pcibus_device *hbus)
+{
+	struct hv_pci_dev *hpdev;
+	char name[SLOT_NAME_SIZE];
+	unsigned long flags;
+	int slot_nr;
+
+	spin_lock_irqsave(&hbus->device_list_lock, flags);
+	list_for_each_entry(hpdev, &hbus->children, list_entry) {
+		if (hpdev->pci_slot)
+			continue;
+
+		slot_nr = PCI_SLOT(wslot_to_devfn(hpdev->desc.win_slot.slot));
+		snprintf(name, SLOT_NAME_SIZE, "%u", hpdev->desc.ser);
+		hpdev->pci_slot = pci_create_slot(hbus->pci_bus, slot_nr,
+					  name, NULL);
+		if (!hpdev->pci_slot)
+			pr_warn("pci_create slot %s failed\n", name);
+	}
+	spin_unlock_irqrestore(&hbus->device_list_lock, flags);
+}
+
 /**
  * create_root_hv_pci_bus() - Expose a new root PCI bus
  * @hbus:	Root PCI bus, as understood by this driver
@@ -1477,6 +1502,7 @@  static int create_root_hv_pci_bus(struct hv_pcibus_device *hbus)
 	pci_lock_rescan_remove();
 	pci_scan_child_bus(hbus->pci_bus);
 	pci_bus_assign_resources(hbus->pci_bus);
+	hv_pci_assign_slots(hbus);
 	pci_bus_add_devices(hbus->pci_bus);
 	pci_unlock_rescan_remove();
 	hbus->state = hv_pcibus_installed;
@@ -1739,6 +1765,7 @@  static void pci_devices_present_work(struct work_struct *work)
 		 */
 		pci_lock_rescan_remove();
 		pci_scan_child_bus(hbus->pci_bus);
+		hv_pci_assign_slots(hbus);
 		pci_unlock_rescan_remove();
 		break;
 
@@ -1855,6 +1882,9 @@  static void hv_eject_device_work(struct work_struct *work)
 	list_del(&hpdev->list_entry);
 	spin_unlock_irqrestore(&hpdev->hbus->device_list_lock, flags);
 
+	if (hpdev->pci_slot)
+		pci_destroy_slot(hpdev->pci_slot);
+
 	memset(&ctxt, 0, sizeof(ctxt));
 	ejct_pkt = (struct pci_eject_response *)&ctxt.pkt.message;
 	ejct_pkt->message_type.type = PCI_EJECTION_COMPLETE;