diff mbox

Kernel 4.6.7-rt13: Intel Ethernet driver igb causes huge latencies in cyclictest

Message ID f395ea6cd1274fac83d2744121da7441@FE-MBX1012.de.bosch.com
State Not Applicable
Headers show

Commit Message

Koehrer Mathias (ETAS/ESW5) Oct. 14, 2016, 8:58 a.m. UTC
Hi Julia,
> Have you tested on a vanilla (non-RT) kernel?  I doubt there is anything RT specific
> about what you are seeing, but it might be nice to get confirmation.  Also, bisection
> would probably be easier if you confirm on a vanilla kernel.
> 
> I find it unlikely that it's a kernel config option that changed which regressed you, but
> instead was a code change to a driver.  Which driver is now the question, and the
> surface area is still big (processor mapping attributes for this region, PCI root
> complex configuration, PCI brige configuration, igb driver itself, etc.).
> 
> Big enough that I'd recommend a bisection.  It looks like a bisection between 3.18
> and 4.8 would take you about 18 tries to narrow down, assuming all goes well.
> 

I have now repeated my tests using the vanilla kernel.
There I got the very same issue.
Using kernel 4.0 is fine, however starting with kernel 4.1, the issue appears.


Here is my exact (reproducible) test description:
I applied the following patch to the kernel to get the igb trace.
This patch instruments the igb_rd32() function to measure the call
to readl() which is used to access registers of the igb NIC.


++++++++++++++++++ BEGIN PATCH ++++++++++++++++++++++++++++++++++++



++++++++++++++++++ END PATCH ++++++++++++++++++++++++++++++++++++


I build the kernel with this patch applied, rebooted the PC to run this kernel and used the 
following script for my test.

++++++++++++++++++ BEGIN SCRIPT  +++++++++++++++++++++++++++++++++
#!/bin/bash

for f in /sys/devices/system/cpu/cpu[0-9]*/cpufreq/scaling_governor ; do
    if [ -w $f ]; then
        echo "performance" > $f
    fi
done

if true; then
    rmmod igb
    modprobe igb
    ethtool -L eth2 combined 1
    ifconfig eth2 up 192.168.100.111
fi

ifconfig

mount /sys/kernel/debug

( cd /sys/kernel/debug/tracing
  echo 0 > tracing_on
  echo 0 > events/enable
  echo 1 > events/igb/enable
  echo "print-parent" > trace_options
  echo "latency-format" > trace_options
  echo 1 > tracing_on

  sleep 4
  cat trace
)
++++++++++++++++++ END SCRIPT  +++++++++++++++++++++++++++++++++

The results of this for kernel 4.0:
[...]
kworker/-1239    3...1 49699046us : igb: val: 801
kworker/-1239    3...1 49699047us : igb: val: 802
kworker/-1239    3...1 49699047us : igb: val: 801
kworker/-1239    3...1 49699048us+: igb: val: 802
kworker/-1239    3...1 49699099us : igb: val: 801
kworker/-1239    3...1 49699100us : igb: val: 802
kworker/-1239    3...1 49699100us : igb: val: 801
kworker/-1239    3...1 49699102us : igb: val: 802
kworker/-1239    3...1 49699102us : igb: val: 801
kworker/-1239    3...1 49699103us : igb: val: 802
kworker/-1239    3...1 49699103us : igb: val: 801
kworker/-1239    3...1 49699104us : igb: val: 802
kworker/-1239    3...1 49699104us : igb: val: 801
kworker/-1239    3...1 49699105us : igb: val: 802
kworker/-1239    3...1 49699105us : igb: val: 801
kworker/-1239    3...1 49699107us : igb: val: 802
kworker/-1239    3...1 49699107us : igb: val: 801
kworker/-1239    3...1 49699108us : igb: val: 802
kworker/-1239    3...1 49699108us : igb: val: 801
kworker/-1239    3...1 49699109us : igb: val: 802
kworker/-1239    3...1 49699109us : igb: val: 801
kworker/-1239    3...1 49699110us : igb: val: 802
kworker/-1239    3...1 49699110us : igb: val: 801
kworker/-1239    3...1 49699111us : igb: val: 802
kworker/-1239    3...1 49699111us : igb: val: 801
kworker/-1239    3...1 49699113us+: igb: val: 802
kworker/-1239    3...1 49699163us : igb: val: 801
kworker/-1239    3...1 49699164us : igb: val: 802
kworker/-1239    3...1 49699164us : igb: val: 801
kworker/-1239    3...1 49699166us : igb: val: 802
kworker/-1239    3...1 49699166us : igb: val: 801
kworker/-1239    3...1 49699167us : igb: val: 802
kworker/-1239    3...1 49699167us : igb: val: 801
kworker/-1239    3...1 49699168us : igb: val: 802
kworker/-1239    3...1 49699168us : igb: val: 801
kworker/-1239    3...1 49699169us : igb: val: 802
kworker/-1239    3...1 49699169us : igb: val: 801
kworker/-1239    3...1 49699170us : igb: val: 802
kworker/-1239    3...1 49699171us : igb: val: 801
kworker/-1239    3...1 49699173us : igb: val: 802
kworker/-1239    3...1 49699173us : igb: val: 801
kworker/-1239    3...1 49699174us : igb: val: 802





The results of this for kernel 4.1:
[...]
kworker/-1333    2...1 75697302us+: igb: val: 801
kworker/-1333    2...1 75697323us+: igb: val: 802
kworker/-1333    2...1 75697373us+: igb: val: 801
kworker/-1333    2...1 75697395us : igb: val: 802
kworker/-1333    2...1 75697395us+: igb: val: 801
kworker/-1333    2...1 75697415us : igb: val: 802
kworker/-1333    2...1 75697416us+: igb: val: 801
kworker/-1333    2...1 75697436us : igb: val: 802
kworker/-1333    2...1 75697436us+: igb: val: 801
kworker/-1333    2...1 75697457us : igb: val: 802
kworker/-1333    2...1 75697457us+: igb: val: 801
kworker/-1333    2...1 75697478us : igb: val: 802
kworker/-1333    2...1 75697478us+: igb: val: 801
kworker/-1333    2...1 75697499us : igb: val: 802
kworker/-1333    2...1 75697499us+: igb: val: 801
kworker/-1333    2...1 75697520us : igb: val: 802
kworker/-1333    2...1 75697520us+: igb: val: 801
kworker/-1333    2...1 75697541us : igb: val: 802
kworker/-1333    2...1 75697541us+: igb: val: 801
kworker/-1333    2...1 75697562us : igb: val: 802
kworker/-1333    2...1 75697562us+: igb: val: 801
kworker/-1333    2...1 75697583us : igb: val: 802
kworker/-1333    2...1 75697583us+: igb: val: 801
kworker/-1333    2...1 75697604us+: igb: val: 802
kworker/-1333    2...1 75697654us+: igb: val: 801
kworker/-1333    2...1 75697675us : igb: val: 802
kworker/-1333    2...1 75697675us+: igb: val: 801
kworker/-1333    2...1 75697696us : igb: val: 802
kworker/-1333    2...1 75697696us+: igb: val: 801
kworker/-1333    2...1 75697717us : igb: val: 802
kworker/-1333    2...1 75697717us+: igb: val: 801
kworker/-1333    2...1 75697738us : igb: val: 802
kworker/-1333    2...1 75697738us+: igb: val: 801
kworker/-1333    2...1 75697759us : igb: val: 802
kworker/-1333    2...1 75697759us+: igb: val: 801
kworker/-1333    2...1 75697780us : igb: val: 802
kworker/-1333    2...1 75697781us+: igb: val: 801
kworker/-1333    2...1 75697801us : igb: val: 802
kworker/-1333    2...1 75697801us+: igb: val: 801
kworker/-1333    2...1 75697822us : igb: val: 802


It can be cleary seen that with the kernel 4.1 the time for the execution of the "readl"
function is much slower! It tasks always about 21us!
In comparison to that the kernel 4.0 is fast. Here the time for "readl" is about
1-2 us.
All measurements have been done on the very same PC.
The PC is an HP EliteDesk 800 G1, 
a dual port Intel i350-T2 server adapter has been plugged in.
CPU: Intel(R) Core(TM) i7-4790 CPU @ 3.60GHz
x86_64 bit mode.

Please find attached the kernel configuration for the 4.1 kernel.

I had a look at the generated .o file of igb_main.o.
However, in both cases - kernel 4.0 and kernel 4.1 - the code for the 
igb_rd32 function looks the same.
Especially the call to the readl() function is the very same.

This means, that I think that some other stuff in kernel 4.1 has changed,
which has impact on the igb accesses.

Any idea what component could cause this kind of issue?

Thanks for any feedback!

Regards

Mathias

Comments

Julia Cartwright Oct. 14, 2016, 7:55 p.m. UTC | #1
On Fri, Oct 14, 2016 at 08:58:22AM +0000, Koehrer Mathias (ETAS/ESW5) wrote:
> Hi Julia,
>
> > Have you tested on a vanilla (non-RT) kernel?  I doubt there is anything RT specific
> > about what you are seeing, but it might be nice to get confirmation.  Also, bisection
> > would probably be easier if you confirm on a vanilla kernel.
> >
> > I find it unlikely that it's a kernel config option that changed which regressed you, but
> > instead was a code change to a driver.  Which driver is now the question, and the
> > surface area is still big (processor mapping attributes for this region, PCI root
> > complex configuration, PCI brige configuration, igb driver itself, etc.).
> >
> > Big enough that I'd recommend a bisection.  It looks like a bisection between 3.18
> > and 4.8 would take you about 18 tries to narrow down, assuming all goes well.
> >
>
> I have now repeated my tests using the vanilla kernel.
> There I got the very same issue.
> Using kernel 4.0 is fine, however starting with kernel 4.1, the issue appears.

Great, thanks for confirming!  That helps narrow things down quite a
bit.

> Here is my exact (reproducible) test description:
> I applied the following patch to the kernel to get the igb trace.
> This patch instruments the igb_rd32() function to measure the call
> to readl() which is used to access registers of the igb NIC.

I took your test setup and ran it between 4.0 and 4.1 on the hardware on
my desk, which is an Atom-based board with dual I210s, however I didn't
see much difference.

However, it's a fairly simple board, with a much simpler PCI topology
than your workstation.  I'll see if I can find some other hardware to
test on.

[..]
> This means, that I think that some other stuff in kernel 4.1 has changed,
> which has impact on the igb accesses.
>
> Any idea what component could cause this kind of issue?

Can you continue your bisection using 'git bisect'?  You've already
narrowed it down between 4.0 and 4.1, so you're well on your way.

Another option might be to try to eliminate igb from the picture as
well, and try reading from another device from the same (or, perhaps
nearest) bus segment, and see if you see the same results.

   Julia
Richard Cochran Oct. 14, 2016, 10:06 p.m. UTC | #2
On Fri, Oct 14, 2016 at 08:58:22AM +0000, Koehrer Mathias (ETAS/ESW5) wrote:
> @@ -753,7 +756,9 @@ u32 igb_rd32(struct e1000_hw *hw, u32 re
>  	if (E1000_REMOVED(hw_addr))
>  		return ~value;
>  
> +        trace_igb(801);
>  	value = readl(&hw_addr[reg]);
> +        trace_igb(802);

Nothing prevents this code from being preempted between the two trace
points, and so you can't be sure whether the time delta in the trace
is caused by the PCIe read stalling or not.

Thanks,
Richard
Koehrer Mathias (ETAS/ESW5) Oct. 17, 2016, 3 p.m. UTC | #3
Hi Julia!
> > > Have you tested on a vanilla (non-RT) kernel?  I doubt there is
> > > anything RT specific about what you are seeing, but it might be nice
> > > to get confirmation.  Also, bisection would probably be easier if you confirm on a
> vanilla kernel.
> > >
> > > I find it unlikely that it's a kernel config option that changed
> > > which regressed you, but instead was a code change to a driver.
> > > Which driver is now the question, and the surface area is still big
> > > (processor mapping attributes for this region, PCI root complex configuration,
> PCI brige configuration, igb driver itself, etc.).
> > >
> > > Big enough that I'd recommend a bisection.  It looks like a
> > > bisection between 3.18 and 4.8 would take you about 18 tries to narrow down,
> assuming all goes well.
> > >
> >
> > I have now repeated my tests using the vanilla kernel.
> > There I got the very same issue.
> > Using kernel 4.0 is fine, however starting with kernel 4.1, the issue appears.
> 
> Great, thanks for confirming!  That helps narrow things down quite a bit.
> 
> > Here is my exact (reproducible) test description:
> > I applied the following patch to the kernel to get the igb trace.
> > This patch instruments the igb_rd32() function to measure the call to
> > readl() which is used to access registers of the igb NIC.
> 
> I took your test setup and ran it between 4.0 and 4.1 on the hardware on my desk,
> which is an Atom-based board with dual I210s, however I didn't see much
> difference.
> 
> However, it's a fairly simple board, with a much simpler PCI topology than your
> workstation.  I'll see if I can find some other hardware to test on.
> 
> [..]
> > This means, that I think that some other stuff in kernel 4.1 has
> > changed, which has impact on the igb accesses.
> >
> > Any idea what component could cause this kind of issue?
> 
> Can you continue your bisection using 'git bisect'?  You've already narrowed it down
> between 4.0 and 4.1, so you're well on your way.
> 

OK - done.
And finally I was successful!
The following git commit is the one that is causing the trouble!
(The full commit is in the attachment).
+++++++++++++++++++++ BEGIN +++++++++++++++++++++++++++
commit 387d37577fdd05e9472c20885464c2a53b3c945f
Author: Matthew Garrett <mjg59@coreos.com>
Date:   Tue Apr 7 11:07:00 2015 -0700

    PCI: Don't clear ASPM bits when the FADT declares it's unsupported

    Communications with a hardware vendor confirm that the expected behaviour
    on systems that set the FADT ASPM disable bit but which still grant full
    PCIe control is for the OS to leave any BIOS configuration intact and
    refuse to touch the ASPM bits.  This mimics the behaviour of Windows.

    Signed-off-by: Matthew Garrett <mjg59@coreos.com>
    Signed-off-by: Bjorn Helgaas <bhelgaas@google.com>
+++++++++++++++++++++ HEADER +++++++++++++++++++++++++++

The only files that are modified by this commit are 
drivers/acpi/pci_root.c
drivers/pci/pcie/aspm.c
include/linux/pci-aspm.h

This is all generic PCIe stuff - however I do not really understand what
the changes of the commit are...

In my setup I am using a dual port igb Ethernet adapter.
This has an onboard PCIe switch and it might be that the configuration of this
PCIe switch on the Intel board is causing the trouble.

Please see also the output of "lspci -v" in the attachment.
The relevant PCI address of the NIC is 04:00.0 / 04:00.1

Any feedback on this is welcome!

Thanks

Mathias
Alexander H Duyck Oct. 17, 2016, 3:39 p.m. UTC | #4
On Mon, Oct 17, 2016 at 8:00 AM, Koehrer Mathias (ETAS/ESW5)
<mathias.koehrer@etas.com> wrote:
> Hi Julia!
>> > > Have you tested on a vanilla (non-RT) kernel?  I doubt there is
>> > > anything RT specific about what you are seeing, but it might be nice
>> > > to get confirmation.  Also, bisection would probably be easier if you confirm on a
>> vanilla kernel.
>> > >
>> > > I find it unlikely that it's a kernel config option that changed
>> > > which regressed you, but instead was a code change to a driver.
>> > > Which driver is now the question, and the surface area is still big
>> > > (processor mapping attributes for this region, PCI root complex configuration,
>> PCI brige configuration, igb driver itself, etc.).
>> > >
>> > > Big enough that I'd recommend a bisection.  It looks like a
>> > > bisection between 3.18 and 4.8 would take you about 18 tries to narrow down,
>> assuming all goes well.
>> > >
>> >
>> > I have now repeated my tests using the vanilla kernel.
>> > There I got the very same issue.
>> > Using kernel 4.0 is fine, however starting with kernel 4.1, the issue appears.
>>
>> Great, thanks for confirming!  That helps narrow things down quite a bit.
>>
>> > Here is my exact (reproducible) test description:
>> > I applied the following patch to the kernel to get the igb trace.
>> > This patch instruments the igb_rd32() function to measure the call to
>> > readl() which is used to access registers of the igb NIC.
>>
>> I took your test setup and ran it between 4.0 and 4.1 on the hardware on my desk,
>> which is an Atom-based board with dual I210s, however I didn't see much
>> difference.
>>
>> However, it's a fairly simple board, with a much simpler PCI topology than your
>> workstation.  I'll see if I can find some other hardware to test on.
>>
>> [..]
>> > This means, that I think that some other stuff in kernel 4.1 has
>> > changed, which has impact on the igb accesses.
>> >
>> > Any idea what component could cause this kind of issue?
>>
>> Can you continue your bisection using 'git bisect'?  You've already narrowed it down
>> between 4.0 and 4.1, so you're well on your way.
>>
>
> OK - done.
> And finally I was successful!
> The following git commit is the one that is causing the trouble!
> (The full commit is in the attachment).
> +++++++++++++++++++++ BEGIN +++++++++++++++++++++++++++
> commit 387d37577fdd05e9472c20885464c2a53b3c945f
> Author: Matthew Garrett <mjg59@coreos.com>
> Date:   Tue Apr 7 11:07:00 2015 -0700
>
>     PCI: Don't clear ASPM bits when the FADT declares it's unsupported
>
>     Communications with a hardware vendor confirm that the expected behaviour
>     on systems that set the FADT ASPM disable bit but which still grant full
>     PCIe control is for the OS to leave any BIOS configuration intact and
>     refuse to touch the ASPM bits.  This mimics the behaviour of Windows.
>
>     Signed-off-by: Matthew Garrett <mjg59@coreos.com>
>     Signed-off-by: Bjorn Helgaas <bhelgaas@google.com>
> +++++++++++++++++++++ HEADER +++++++++++++++++++++++++++
>
> The only files that are modified by this commit are
> drivers/acpi/pci_root.c
> drivers/pci/pcie/aspm.c
> include/linux/pci-aspm.h
>
> This is all generic PCIe stuff - however I do not really understand what
> the changes of the commit are...
>
> In my setup I am using a dual port igb Ethernet adapter.
> This has an onboard PCIe switch and it might be that the configuration of this
> PCIe switch on the Intel board is causing the trouble.
>
> Please see also the output of "lspci -v" in the attachment.
> The relevant PCI address of the NIC is 04:00.0 / 04:00.1
>
> Any feedback on this is welcome!
>
> Thanks
>
> Mathias

Hi Mathias,

If you could set the output of lspci -vvv it might be more useful as
most of the configuration data isn't present in the lspci dump you had
attached.  Specifically if you could do this for the working case and
the non-working case we could verify if this issue is actually due to
the ASPM configuration on the device.

Also one thing you might try is booting your kernel with the kernel
parameter "pcie_aspm=off".  It sounds like the extra latency is likely
due to your platform enabling ASPM on the device and this in turn will
add latency if the PCIe link is disabled when you attempt to perform a
read as it takes some time to bring the PCIe link up when in L1 state.

Thanks for bisecting this.

- Alex
Julia Cartwright Oct. 17, 2016, 6:32 p.m. UTC | #5
+linux-pci

On Mon, Oct 17, 2016 at 08:39:40AM -0700, Alexander Duyck wrote:
> On Mon, Oct 17, 2016 at 8:00 AM, Koehrer Mathias (ETAS/ESW5)
> <mathias.koehrer@etas.com> wrote:
> > Hi Julia!
> >> > > Have you tested on a vanilla (non-RT) kernel?  I doubt there is
> >> > > anything RT specific about what you are seeing, but it might be nice
> >> > > to get confirmation.  Also, bisection would probably be easier if you confirm on a
> >> vanilla kernel.
> >> > >
> >> > > I find it unlikely that it's a kernel config option that changed
> >> > > which regressed you, but instead was a code change to a driver.
> >> > > Which driver is now the question, and the surface area is still big
> >> > > (processor mapping attributes for this region, PCI root complex configuration,
> >> PCI brige configuration, igb driver itself, etc.).
> >> > >
> >> > > Big enough that I'd recommend a bisection.  It looks like a
> >> > > bisection between 3.18 and 4.8 would take you about 18 tries to narrow down,
> >> assuming all goes well.
> >> > >
> >> >
> >> > I have now repeated my tests using the vanilla kernel.
> >> > There I got the very same issue.
> >> > Using kernel 4.0 is fine, however starting with kernel 4.1, the issue appears.
> >>
> >> Great, thanks for confirming!  That helps narrow things down quite a bit.
> >>
> >> > Here is my exact (reproducible) test description:
> >> > I applied the following patch to the kernel to get the igb trace.
> >> > This patch instruments the igb_rd32() function to measure the call to
> >> > readl() which is used to access registers of the igb NIC.
> >>
> >> I took your test setup and ran it between 4.0 and 4.1 on the hardware on my desk,
> >> which is an Atom-based board with dual I210s, however I didn't see much
> >> difference.
> >>
> >> However, it's a fairly simple board, with a much simpler PCI topology than your
> >> workstation.  I'll see if I can find some other hardware to test on.
> >>
> >> [..]
> >> > This means, that I think that some other stuff in kernel 4.1 has
> >> > changed, which has impact on the igb accesses.
> >> >
> >> > Any idea what component could cause this kind of issue?
> >>
> >> Can you continue your bisection using 'git bisect'?  You've already narrowed it down
> >> between 4.0 and 4.1, so you're well on your way.
> >>
> >
> > OK - done.
> > And finally I was successful!
> > The following git commit is the one that is causing the trouble!
> > (The full commit is in the attachment).
> > +++++++++++++++++++++ BEGIN +++++++++++++++++++++++++++
> > commit 387d37577fdd05e9472c20885464c2a53b3c945f
> > Author: Matthew Garrett <mjg59@coreos.com>
> > Date:   Tue Apr 7 11:07:00 2015 -0700
> >
> >     PCI: Don't clear ASPM bits when the FADT declares it's unsupported
> >
> >     Communications with a hardware vendor confirm that the expected behaviour
> >     on systems that set the FADT ASPM disable bit but which still grant full
> >     PCIe control is for the OS to leave any BIOS configuration intact and
> >     refuse to touch the ASPM bits.  This mimics the behaviour of Windows.
> >
> >     Signed-off-by: Matthew Garrett <mjg59@coreos.com>
> >     Signed-off-by: Bjorn Helgaas <bhelgaas@google.com>
> > +++++++++++++++++++++ HEADER +++++++++++++++++++++++++++
> >
> > The only files that are modified by this commit are
> > drivers/acpi/pci_root.c
> > drivers/pci/pcie/aspm.c
> > include/linux/pci-aspm.h
> >
> > This is all generic PCIe stuff - however I do not really understand what
> > the changes of the commit are...
> >
> > In my setup I am using a dual port igb Ethernet adapter.
> > This has an onboard PCIe switch and it might be that the configuration of this
> > PCIe switch on the Intel board is causing the trouble.
> >
> > Please see also the output of "lspci -v" in the attachment.
> > The relevant PCI address of the NIC is 04:00.0 / 04:00.1
> >
> > Any feedback on this is welcome!
> >
> > Thanks
> >
> > Mathias
>
> Hi Mathias,
>
> If you could set the output of lspci -vvv it might be more useful as
> most of the configuration data isn't present in the lspci dump you had
> attached.  Specifically if you could do this for the working case and
> the non-working case we could verify if this issue is actually due to
> the ASPM configuration on the device.
>
> Also one thing you might try is booting your kernel with the kernel
> parameter "pcie_aspm=off".  It sounds like the extra latency is likely
> due to your platform enabling ASPM on the device and this in turn will
> add latency if the PCIe link is disabled when you attempt to perform a
> read as it takes some time to bring the PCIe link up when in L1 state.

So if we assume it's this commit causing the regression, then it's safe
to assume that this system's BIOS is claiming to not support ASPM in the
FADT, but the BIOS is leaving ASPM configured in some way on the
relevant devices.

Also, unfortunately, taking a look at the code which handles
"pcie_aspm=off", it won't be sufficient to disable ASPM on these
this system, as disabling these states is skipped when the FADT doesn't
advertise ASPM support.

What would be needed is an option like "force", but which force
_disables_ ASPM.  "force-disable", maybe.

   Julia
Julia Cartwright Oct. 17, 2016, 6:36 p.m. UTC | #6
On Sat, Oct 15, 2016 at 12:06:33AM +0200, Richard Cochran wrote:
> On Fri, Oct 14, 2016 at 08:58:22AM +0000, Koehrer Mathias (ETAS/ESW5) wrote:
> > @@ -753,7 +756,9 @@ u32 igb_rd32(struct e1000_hw *hw, u32 re
> >  	if (E1000_REMOVED(hw_addr))
> >  		return ~value;
> >  
> > +        trace_igb(801);
> >  	value = readl(&hw_addr[reg]);
> > +        trace_igb(802);
> 
> Nothing prevents this code from being preempted between the two trace
> points, and so you can't be sure whether the time delta in the trace
> is caused by the PCIe read stalling or not.

While that is certainly the case, and would explain the most egregious
of measured latency spikes, it doesn't invalidate the test if you
consider the valuable data point(s) to be the minimum and/or median
latencies.

   Julia
Richard Cochran Oct. 17, 2016, 7:03 p.m. UTC | #7
On Mon, Oct 17, 2016 at 01:36:45PM -0500, Julia Cartwright wrote:
> While that is certainly the case, and would explain the most egregious
> of measured latency spikes, it doesn't invalidate the test if you
> consider the valuable data point(s) to be the minimum and/or median
> latencies.

Well, consider the case where an interrupt is stuck on.  That is a
possible cause, and it can be positively excluded by either disabling
local interrupts around the time stamps or by putting the vector
events into the trace.

(Doesn't matter now that bisection fingered the PCIe setup, just sayin.)

Thanks,
Richard
diff mbox

Patch

Index: linux-4.8/drivers/net/ethernet/intel/igb/trace.h
===================================================================
--- /dev/null
+++ linux-4.8/drivers/net/ethernet/intel/igb/trace.h
@@ -0,0 +1,34 @@ 
+
+#if !defined(_TRACE_IGB_H_) || defined(TRACE_HEADER_MULTI_READ)
+#define _TRACE_IGB_H_ 
+
+#include <linux/tracepoint.h>
+#undef TRACE_SYSTEM
+#define TRACE_SYSTEM igb
+
+
+#define _TRACE_H_
+
+
+TRACE_EVENT(igb,
+        TP_PROTO(u32 val),
+        TP_ARGS(val),
+        TP_STRUCT__entry(
+                __field(u32, val)
+        ),
+        TP_fast_assign(
+                __entry->val = val;
+        ),
+        TP_printk("val: %u",
+                   __entry->val)
+);
+
+
+#endif
+
+#undef TRACE_INCLUDE_PATH
+#define TRACE_INCLUDE_PATH drivers/net/ethernet/intel/igb 
+#undef TRACE_INCLUDE_FILE
+#define TRACE_INCLUDE_FILE trace
+
+#include <trace/define_trace.h>
Index: linux-4.8/drivers/net/ethernet/intel/igb/Makefile
===================================================================
--- linux-4.8.orig/drivers/net/ethernet/intel/igb/Makefile
+++ linux-4.8/drivers/net/ethernet/intel/igb/Makefile
@@ -28,6 +28,7 @@ 
 #
 # Makefile for the Intel(R) 82575 PCI-Express ethernet driver
 #
+ccflags-y += -I.
 
 obj-$(CONFIG_IGB) += igb.o
 
Index: linux-4.8/drivers/net/ethernet/intel/igb/igb_main.c
===================================================================
--- linux-4.8.orig/drivers/net/ethernet/intel/igb/igb_main.c
+++ linux-4.8/drivers/net/ethernet/intel/igb/igb_main.c
@@ -57,6 +57,9 @@ 
 #include <linux/i2c.h>
 #include "igb.h"
 
+#define CREATE_TRACE_POINTS
+#include "trace.h"
+
 #define MAJ 5
 #define MIN 3
 #define BUILD 0
@@ -753,7 +756,9 @@  u32 igb_rd32(struct e1000_hw *hw, u32 re
 	if (E1000_REMOVED(hw_addr))
 		return ~value;
 
+        trace_igb(801);
 	value = readl(&hw_addr[reg]);
+        trace_igb(802);
 
 	/* reads should not return all F's */
 	if (!(~value) && (!reg || !(~readl(hw_addr)))) {