diff mbox series

[2/3] e1000e: Make mei_me active when e1000e is in use

Message ID 20210712133500.1126371-2-kai.heng.feng@canonical.com
State Deferred
Headers show
Series [1/3] e1000e: Separate TGP from SPT | expand

Commit Message

Kai-Heng Feng July 12, 2021, 1:34 p.m. UTC
Many users report rather sluggish RX speed on TGP I219. Since
"intel_idle.max_cstate=1" doesn't help, so it's not caused by deeper
package C-state.

A workaround that always works is to make sure mei_me is runtime active
when e1000e is in use.

The root cause is still unknown, but since many users are affected by
the issue, implment the quirk in the driver as a temporary workaround.

Also adds mei_me as soft dependency to ensure the device link can be
created if e1000e is in initramfs.

BugLink: https://bugs.launchpad.net/bugs/1927925
Bugzilla: https://bugzilla.kernel.org/show_bug.cgi?id=213377
Bugzilla: https://bugzilla.kernel.org/show_bug.cgi?id=213651
Signed-off-by: Kai-Heng Feng <kai.heng.feng@canonical.com>
---
 drivers/net/ethernet/intel/e1000e/netdev.c | 26 ++++++++++++++++++++++
 1 file changed, 26 insertions(+)

Comments

Sasha Neftin July 14, 2021, 5:39 a.m. UTC | #1
On 7/12/2021 16:34, Kai-Heng Feng wrote:
> Many users report rather sluggish RX speed on TGP I219. Since
> "intel_idle.max_cstate=1" doesn't help, so it's not caused by deeper
> package C-state.
> 
> A workaround that always works is to make sure mei_me is runtime active
> when e1000e is in use.
> 
> The root cause is still unknown, but since many users are affected by
> the issue, implment the quirk in the driver as a temporary workaround.
Hello Kai-Heng,
First - thanks for the investigation of this problem. As I know CSME/AMT 
not POR on Linux and not supported. Recently we started add support for 
CSME/AMT none provisioned version (handshake with CSME in s0ix flow - 
only CSME with s0ix will support). It is not related to rx bandwidth 
problem.
I do not know how MEI driver affect 1Gbe driver - so, I would suggest to 
involve our CSME engineer (alexander.usyskin@intel.com) and try to 
investigate this problem.
Does this problem observed on Dell systems? As I heard no reproduction 
on Intel's RVP platform.
Another question: does disable mei_me runpm solve your problem?
> 
> Also adds mei_me as soft dependency to ensure the device link can be
> created if e1000e is in initramfs.
> 
> BugLink: https://bugs.launchpad.net/bugs/1927925
> Bugzilla: https://bugzilla.kernel.org/show_bug.cgi?id=213377
> Bugzilla: https://bugzilla.kernel.org/show_bug.cgi?id=213651
> Signed-off-by: Kai-Heng Feng <kai.heng.feng@canonical.com>
> ---
>   drivers/net/ethernet/intel/e1000e/netdev.c | 26 ++++++++++++++++++++++
>   1 file changed, 26 insertions(+)
> 
> diff --git a/drivers/net/ethernet/intel/e1000e/netdev.c b/drivers/net/ethernet/intel/e1000e/netdev.c
> index 5835d6cf2f51..e63445a8ce12 100644
> --- a/drivers/net/ethernet/intel/e1000e/netdev.c
> +++ b/drivers/net/ethernet/intel/e1000e/netdev.c
> @@ -7317,6 +7317,27 @@ static const struct net_device_ops e1000e_netdev_ops = {
>   	.ndo_features_check	= passthru_features_check,
>   };
>   
> +static void e1000e_create_device_links(struct pci_dev *pdev)
> +{
> +	struct pci_dev *tgp_mei_me;
> +
> +	/* Find TGP mei_me devices and make e1000e power depend on mei_me */
> +	tgp_mei_me = pci_get_device(PCI_VENDOR_ID_INTEL, 0xa0e0, NULL);
> +	if (!tgp_mei_me) {
> +		tgp_mei_me = pci_get_device(PCI_VENDOR_ID_INTEL, 0x43e0, NULL);
> +		if (!tgp_mei_me)
> +			return;
> +	}
> +
> +	if (device_link_add(&pdev->dev, &tgp_mei_me->dev,
> +			    DL_FLAG_PM_RUNTIME | DL_FLAG_RPM_ACTIVE |
> +			    DL_FLAG_AUTOREMOVE_CONSUMER))
> +		pci_info(pdev, "System and runtime PM depends on %s\n",
> +			 pci_name(tgp_mei_me));
> +
> +	pci_dev_put(tgp_mei_me);
> +}
> +
>   /**
>    * e1000_probe - Device Initialization Routine
>    * @pdev: PCI device information struct
> @@ -7645,6 +7666,9 @@ static int e1000_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
>   	if (pci_dev_run_wake(pdev) && hw->mac.type != e1000_pch_cnp)
>   		pm_runtime_put_noidle(&pdev->dev);
>   
> +	if (hw->mac.type == e1000_pch_tgp)
> +		e1000e_create_device_links(pdev);
> +
>   	return 0;
>   
>   err_register:
> @@ -7917,6 +7941,8 @@ static void __exit e1000_exit_module(void)
>   }
>   module_exit(e1000_exit_module);
>   
> +/* Ensure device link can be created if e1000e is in the initramfs. */
> +MODULE_SOFTDEP("pre: mei_me");
>   MODULE_AUTHOR("Intel Corporation, <linux.nics@intel.com>");
>   MODULE_DESCRIPTION("Intel(R) PRO/1000 Network Driver");
>   MODULE_LICENSE("GPL v2");
> 
Thanks,Sasha
Kai-Heng Feng July 14, 2021, 6:28 a.m. UTC | #2
Hi Sasha,

On Wed, Jul 14, 2021 at 1:39 PM Sasha Neftin <sasha.neftin@intel.com> wrote:
>
> On 7/12/2021 16:34, Kai-Heng Feng wrote:
> > Many users report rather sluggish RX speed on TGP I219. Since
> > "intel_idle.max_cstate=1" doesn't help, so it's not caused by deeper
> > package C-state.
> >
> > A workaround that always works is to make sure mei_me is runtime active
> > when e1000e is in use.
> >
> > The root cause is still unknown, but since many users are affected by
> > the issue, implment the quirk in the driver as a temporary workaround.
> Hello Kai-Heng,
> First - thanks for the investigation of this problem. As I know CSME/AMT
> not POR on Linux and not supported. Recently we started add support for
> CSME/AMT none provisioned version (handshake with CSME in s0ix flow -
> only CSME with s0ix will support). It is not related to rx bandwidth
> problem.

I am aware that ME is not POR under Linux, so the commit message
states clearly that the patch is just a "temporary workaround".
Not every laptop can disable ME/AMT, and I don't think asking user to
fiddle with BIOS is a good thing, hence the patch.

> I do not know how MEI driver affect 1Gbe driver - so, I would suggest to
> involve our CSME engineer (alexander.usyskin@intel.com) and try to
> investigate this problem.
> Does this problem observed on Dell systems? As I heard no reproduction
> on Intel's RVP platform.
> Another question: does disable mei_me runpm solve your problem?

Yes, disabling runpm on mei_me can workaround the issue, and that's
essentially what this patch does by adding DL_FLAG_PM_RUNTIME |
DL_FLAG_RPM_ACTIVE flag.

Kai-Heng

> >
> > Also adds mei_me as soft dependency to ensure the device link can be
> > created if e1000e is in initramfs.
> >
> > BugLink: https://bugs.launchpad.net/bugs/1927925
> > Bugzilla: https://bugzilla.kernel.org/show_bug.cgi?id=213377
> > Bugzilla: https://bugzilla.kernel.org/show_bug.cgi?id=213651
> > Signed-off-by: Kai-Heng Feng <kai.heng.feng@canonical.com>
> > ---
> >   drivers/net/ethernet/intel/e1000e/netdev.c | 26 ++++++++++++++++++++++
> >   1 file changed, 26 insertions(+)
> >
> > diff --git a/drivers/net/ethernet/intel/e1000e/netdev.c b/drivers/net/ethernet/intel/e1000e/netdev.c
> > index 5835d6cf2f51..e63445a8ce12 100644
> > --- a/drivers/net/ethernet/intel/e1000e/netdev.c
> > +++ b/drivers/net/ethernet/intel/e1000e/netdev.c
> > @@ -7317,6 +7317,27 @@ static const struct net_device_ops e1000e_netdev_ops = {
> >       .ndo_features_check     = passthru_features_check,
> >   };
> >
> > +static void e1000e_create_device_links(struct pci_dev *pdev)
> > +{
> > +     struct pci_dev *tgp_mei_me;
> > +
> > +     /* Find TGP mei_me devices and make e1000e power depend on mei_me */
> > +     tgp_mei_me = pci_get_device(PCI_VENDOR_ID_INTEL, 0xa0e0, NULL);
> > +     if (!tgp_mei_me) {
> > +             tgp_mei_me = pci_get_device(PCI_VENDOR_ID_INTEL, 0x43e0, NULL);
> > +             if (!tgp_mei_me)
> > +                     return;
> > +     }
> > +
> > +     if (device_link_add(&pdev->dev, &tgp_mei_me->dev,
> > +                         DL_FLAG_PM_RUNTIME | DL_FLAG_RPM_ACTIVE |
> > +                         DL_FLAG_AUTOREMOVE_CONSUMER))
> > +             pci_info(pdev, "System and runtime PM depends on %s\n",
> > +                      pci_name(tgp_mei_me));
> > +
> > +     pci_dev_put(tgp_mei_me);
> > +}
> > +
> >   /**
> >    * e1000_probe - Device Initialization Routine
> >    * @pdev: PCI device information struct
> > @@ -7645,6 +7666,9 @@ static int e1000_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
> >       if (pci_dev_run_wake(pdev) && hw->mac.type != e1000_pch_cnp)
> >               pm_runtime_put_noidle(&pdev->dev);
> >
> > +     if (hw->mac.type == e1000_pch_tgp)
> > +             e1000e_create_device_links(pdev);
> > +
> >       return 0;
> >
> >   err_register:
> > @@ -7917,6 +7941,8 @@ static void __exit e1000_exit_module(void)
> >   }
> >   module_exit(e1000_exit_module);
> >
> > +/* Ensure device link can be created if e1000e is in the initramfs. */
> > +MODULE_SOFTDEP("pre: mei_me");
> >   MODULE_AUTHOR("Intel Corporation, <linux.nics@intel.com>");
> >   MODULE_DESCRIPTION("Intel(R) PRO/1000 Network Driver");
> >   MODULE_LICENSE("GPL v2");
> >
> Thanks,Sasha
Ruinskiy, Dima July 14, 2021, 9:05 a.m. UTC | #3
On 14/07/2021 9:28, Kai-Heng Feng wrote:
>> I do not know how MEI driver affect 1Gbe driver - so, I would suggest to
>> involve our CSME engineer (alexander.usyskin@intel.com) and try to
>> investigate this problem.
>> Does this problem observed on Dell systems? As I heard no reproduction
>> on Intel's RVP platform.
>> Another question: does disable mei_me runpm solve your problem?
> 
> Yes, disabling runpm on mei_me can workaround the issue, and that's
> essentially what this patch does by adding DL_FLAG_PM_RUNTIME |
> DL_FLAG_RPM_ACTIVE flag.
> 
> Kai-Heng
Hi, Kai-Heng,

If the goal of the patch is to essentially disable runpm on mei_me, then 
why is the patch touching code in the e1000e driver?

I agree with Sasha Neftin; it seems like the wrong location, and the 
wrong way to do it, even if it currently works. We need to understand 
what causes runpm of mei_me to adversely affect LAN Rx, and for this we 
need the involvement of mei_me owners.

--Dima
---------------------------------------------------------------------
Intel Israel (74) Limited

This e-mail and any attachments may contain confidential material for
the sole use of the intended recipient(s). Any review or distribution
by others is strictly prohibited. If you are not the intended
recipient, please contact the sender and delete all copies.
Kai-Heng Feng July 14, 2021, 9:52 a.m. UTC | #4
On Wed, Jul 14, 2021 at 5:06 PM Ruinskiy, Dima <dima.ruinskiy@intel.com> wrote:
>
> On 14/07/2021 9:28, Kai-Heng Feng wrote:
> >> I do not know how MEI driver affect 1Gbe driver - so, I would suggest to
> >> involve our CSME engineer (alexander.usyskin@intel.com) and try to
> >> investigate this problem.
> >> Does this problem observed on Dell systems? As I heard no reproduction
> >> on Intel's RVP platform.
> >> Another question: does disable mei_me runpm solve your problem?
> >
> > Yes, disabling runpm on mei_me can workaround the issue, and that's
> > essentially what this patch does by adding DL_FLAG_PM_RUNTIME |
> > DL_FLAG_RPM_ACTIVE flag.
> >
> > Kai-Heng
> Hi, Kai-Heng,
>
> If the goal of the patch is to essentially disable runpm on mei_me, then
> why is the patch touching code in the e1000e driver?

We can put the workaround in e1000e, mei_me or as PCI quirk.
But since the bug itself manifests in e1000e, I think it's more
appropriate to put it here.

To be more specific, it doesn't disable runtime suspend on mei_me, it
makes mei_me the power supplier of e1000e.
So when e1000e can be runtime suspended (i.e. no link partner), mei_me
can also get runtime suspended too.

>
> I agree with Sasha Neftin; it seems like the wrong location, and the
> wrong way to do it, even if it currently works. We need to understand
> what causes runpm of mei_me to adversely affect LAN Rx, and for this we
> need the involvement of mei_me owners.

I think it's the right location, however I totally agree with your
other arguments.
There are many users already affected by this bug, so if a proper fix
isn't available for now, the temporary workaround can help here.

Kai-Heng

>
> --Dima
> ---------------------------------------------------------------------
> Intel Israel (74) Limited
>
> This e-mail and any attachments may contain confidential material for
> the sole use of the intended recipient(s). Any review or distribution
> by others is strictly prohibited. If you are not the intended
> recipient, please contact the sender and delete all copies.
Sasha Neftin July 18, 2021, 8:37 a.m. UTC | #5
On 7/14/2021 12:52, Kai-Heng Feng wrote:
> On Wed, Jul 14, 2021 at 5:06 PM Ruinskiy, Dima <dima.ruinskiy@intel.com> wrote:
>>
>> On 14/07/2021 9:28, Kai-Heng Feng wrote:
>>>> I do not know how MEI driver affect 1Gbe driver - so, I would suggest to
>>>> involve our CSME engineer (alexander.usyskin@intel.com) and try to
>>>> investigate this problem.
>>>> Does this problem observed on Dell systems? As I heard no reproduction
>>>> on Intel's RVP platform.
>>>> Another question: does disable mei_me runpm solve your problem?
>>>
>>> Yes, disabling runpm on mei_me can workaround the issue, and that's
>>> essentially what this patch does by adding DL_FLAG_PM_RUNTIME |
>>> DL_FLAG_RPM_ACTIVE flag.
>>>
>>> Kai-Heng
>> Hi, Kai-Heng,
>>
>> If the goal of the patch is to essentially disable runpm on mei_me, then
>> why is the patch touching code in the e1000e driver?
> 
> We can put the workaround in e1000e, mei_me or as PCI quirk.
> But since the bug itself manifests in e1000e, I think it's more
> appropriate to put it here.
> 
> To be more specific, it doesn't disable runtime suspend on mei_me, it
> makes mei_me the power supplier of e1000e.
> So when e1000e can be runtime suspended (i.e. no link partner), mei_me
> can also get runtime suspended too.
>>
>> I agree with Sasha Neftin; it seems like the wrong location, and the
>> wrong way to do it, even if it currently works. We need to understand
>> what causes runpm of mei_me to adversely affect LAN Rx, and for this we
>> need the involvement of mei_me owners.
> 
> I think it's the right location, however I totally agree with your
> other arguments.
> There are many users already affected by this bug, so if a proper fix
> isn't available for now, the temporary workaround can help here.
Hello Kai-Heng,
The temporary workaround without root cause is vague. Please, let's work 
with the manageability FW engineer (alexander.usyskin@intel.com) and 
understand the root cause.
Also, here is interfere with runpm flow of CSME - we must their inputs.
sasha
> 
> Kai-Heng
> 
>>
>> --Dima
>> ---------------------------------------------------------------------
>> Intel Israel (74) Limited
>>
>> This e-mail and any attachments may contain confidential material for
>> the sole use of the intended recipient(s). Any review or distribution
>> by others is strictly prohibited. If you are not the intended
>> recipient, please contact the sender and delete all copies.
diff mbox series

Patch

diff --git a/drivers/net/ethernet/intel/e1000e/netdev.c b/drivers/net/ethernet/intel/e1000e/netdev.c
index 5835d6cf2f51..e63445a8ce12 100644
--- a/drivers/net/ethernet/intel/e1000e/netdev.c
+++ b/drivers/net/ethernet/intel/e1000e/netdev.c
@@ -7317,6 +7317,27 @@  static const struct net_device_ops e1000e_netdev_ops = {
 	.ndo_features_check	= passthru_features_check,
 };
 
+static void e1000e_create_device_links(struct pci_dev *pdev)
+{
+	struct pci_dev *tgp_mei_me;
+
+	/* Find TGP mei_me devices and make e1000e power depend on mei_me */
+	tgp_mei_me = pci_get_device(PCI_VENDOR_ID_INTEL, 0xa0e0, NULL);
+	if (!tgp_mei_me) {
+		tgp_mei_me = pci_get_device(PCI_VENDOR_ID_INTEL, 0x43e0, NULL);
+		if (!tgp_mei_me)
+			return;
+	}
+
+	if (device_link_add(&pdev->dev, &tgp_mei_me->dev,
+			    DL_FLAG_PM_RUNTIME | DL_FLAG_RPM_ACTIVE |
+			    DL_FLAG_AUTOREMOVE_CONSUMER))
+		pci_info(pdev, "System and runtime PM depends on %s\n",
+			 pci_name(tgp_mei_me));
+
+	pci_dev_put(tgp_mei_me);
+}
+
 /**
  * e1000_probe - Device Initialization Routine
  * @pdev: PCI device information struct
@@ -7645,6 +7666,9 @@  static int e1000_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
 	if (pci_dev_run_wake(pdev) && hw->mac.type != e1000_pch_cnp)
 		pm_runtime_put_noidle(&pdev->dev);
 
+	if (hw->mac.type == e1000_pch_tgp)
+		e1000e_create_device_links(pdev);
+
 	return 0;
 
 err_register:
@@ -7917,6 +7941,8 @@  static void __exit e1000_exit_module(void)
 }
 module_exit(e1000_exit_module);
 
+/* Ensure device link can be created if e1000e is in the initramfs. */
+MODULE_SOFTDEP("pre: mei_me");
 MODULE_AUTHOR("Intel Corporation, <linux.nics@intel.com>");
 MODULE_DESCRIPTION("Intel(R) PRO/1000 Network Driver");
 MODULE_LICENSE("GPL v2");