diff mbox series

[iwl-net,v1,1/1] e1000e: change usleep_range to udelay in PHY mdic access

Message ID 20240417190320.3159360-1-vitaly.lifshits@intel.com
State Under Review
Delegated to: Anthony Nguyen
Headers show
Series [iwl-net,v1,1/1] e1000e: change usleep_range to udelay in PHY mdic access | expand

Commit Message

Vitaly Lifshits April 17, 2024, 7:03 p.m. UTC
This is a partial revert of commit 6dbdd4de0362 ("e1000e: Workaround
for sporadic MDI error on Meteor Lake systems"). The referenced commit
introduced an issue on vPro systems, where disconnecting and reconnecting
the LAN cable might result in a kernel panic.

This was root caused to the usage of usleep_range in an atomic content
while trying to access the PHY. Change back the usleep_range calls to
udelay.

Fixes: 6dbdd4de0362 ("e1000e: Workaround for sporadic MDI error on Meteor Lake systems")
Co-developed-by: Sasha Neftin <sasha.neftin@intel.com>
Signed-off-by: Sasha Neftin <sasha.neftin@intel.com>
Signed-off-by: Vitaly Lifshits <vitaly.lifshits@intel.com>
---
 drivers/net/ethernet/intel/e1000e/phy.c | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

Comments

Ruinskiy, Dima April 24, 2024, 6:50 a.m. UTC | #1
On 17/04/2024 22:03, Lifshits, Vitaly wrote:
> This is a partial revert of commit 6dbdd4de0362 ("e1000e: Workaround
> for sporadic MDI error on Meteor Lake systems"). The referenced commit
> introduced an issue on vPro systems, where disconnecting and reconnecting
> the LAN cable might result in a kernel panic.
> 
> This was root caused to the usage of usleep_range in an atomic content
> while trying to access the PHY. Change back the usleep_range calls to
> udelay.
> 
> Fixes: 6dbdd4de0362 ("e1000e: Workaround for sporadic MDI error on Meteor Lake systems")
> Co-developed-by: Sasha Neftin <sasha.neftin@intel.com>
> Signed-off-by: Sasha Neftin <sasha.neftin@intel.com>
> Signed-off-by: Vitaly Lifshits <vitaly.lifshits@intel.com>
> ---
>   drivers/net/ethernet/intel/e1000e/phy.c | 8 ++++----
>   1 file changed, 4 insertions(+), 4 deletions(-)
> 
> diff --git a/drivers/net/ethernet/intel/e1000e/phy.c b/drivers/net/ethernet/intel/e1000e/phy.c
> index 93544f1cc2a5..f7ae0e0aa4a4 100644
> --- a/drivers/net/ethernet/intel/e1000e/phy.c
> +++ b/drivers/net/ethernet/intel/e1000e/phy.c
> @@ -157,7 +157,7 @@ s32 e1000e_read_phy_reg_mdic(struct e1000_hw *hw, u32 offset, u16 *data)
>                   * the lower time out
>                   */
>                  for (i = 0; i < (E1000_GEN_POLL_TIMEOUT * 3); i++) {
> -                       usleep_range(50, 60);
> +                       udelay(50);
>                          mdic = er32(MDIC);
>                          if (mdic & E1000_MDIC_READY)
>                                  break;
> @@ -181,7 +181,7 @@ s32 e1000e_read_phy_reg_mdic(struct e1000_hw *hw, u32 offset, u16 *data)
>                   * reading duplicate data in the next MDIC transaction.
>                   */
>                  if (hw->mac.type == e1000_pch2lan)
> -                       usleep_range(100, 150);
> +                       udelay(100);
> 
>                  if (success) {
>                          *data = (u16)mdic;
> @@ -237,7 +237,7 @@ s32 e1000e_write_phy_reg_mdic(struct e1000_hw *hw, u32 offset, u16 data)
>                   * the lower time out
>                   */
>                  for (i = 0; i < (E1000_GEN_POLL_TIMEOUT * 3); i++) {
> -                       usleep_range(50, 60);
> +                       udelay(50);
>                          mdic = er32(MDIC);
>                          if (mdic & E1000_MDIC_READY)
>                                  break;
> @@ -261,7 +261,7 @@ s32 e1000e_write_phy_reg_mdic(struct e1000_hw *hw, u32 offset, u16 data)
>                   * reading duplicate data in the next MDIC transaction.
>                   */
>                  if (hw->mac.type == e1000_pch2lan)
> -                       usleep_range(100, 150);
> +                       udelay(100);
> 
>                  if (success)
>                          return 0;
> --
> 2.34.1
> 

Tested-by: Dima Ruinskiy <dima.ruinskiy@intel.com>
Paul Menzel April 24, 2024, 8:30 a.m. UTC | #2
Dear Vitaly,


Thank you for the patch.


Am 17.04.24 um 21:03 schrieb Vitaly Lifshits:
> This is a partial revert of commit 6dbdd4de0362 ("e1000e: Workaround
> for sporadic MDI error on Meteor Lake systems"). The referenced commit
> introduced an issue on vPro systems, where disconnecting and reconnecting
> the LAN cable might result in a kernel panic.

Please paste the panic, and document the test system.

> This was root caused to the usage of usleep_range in an atomic content

conte*x*t?

> while trying to access the PHY. Change back the usleep_range calls to
> udelay.

Please explain, why this is an atomic context.

> Fixes: 6dbdd4de0362 ("e1000e: Workaround for sporadic MDI error on Meteor Lake systems")
> Co-developed-by: Sasha Neftin <sasha.neftin@intel.com>
> Signed-off-by: Sasha Neftin <sasha.neftin@intel.com>
> Signed-off-by: Vitaly Lifshits <vitaly.lifshits@intel.com>
> ---
>   drivers/net/ethernet/intel/e1000e/phy.c | 8 ++++----
>   1 file changed, 4 insertions(+), 4 deletions(-)
> 
> diff --git a/drivers/net/ethernet/intel/e1000e/phy.c b/drivers/net/ethernet/intel/e1000e/phy.c
> index 93544f1cc2a5..f7ae0e0aa4a4 100644
> --- a/drivers/net/ethernet/intel/e1000e/phy.c
> +++ b/drivers/net/ethernet/intel/e1000e/phy.c
> @@ -157,7 +157,7 @@ s32 e1000e_read_phy_reg_mdic(struct e1000_hw *hw, u32 offset, u16 *data)
>   		 * the lower time out
>   		 */
>   		for (i = 0; i < (E1000_GEN_POLL_TIMEOUT * 3); i++) {
> -			usleep_range(50, 60);
> +			udelay(50);
>   			mdic = er32(MDIC);
>   			if (mdic & E1000_MDIC_READY)
>   				break;
> @@ -181,7 +181,7 @@ s32 e1000e_read_phy_reg_mdic(struct e1000_hw *hw, u32 offset, u16 *data)
>   		 * reading duplicate data in the next MDIC transaction.
>   		 */
>   		if (hw->mac.type == e1000_pch2lan)
> -			usleep_range(100, 150);
> +			udelay(100);
>   
>   		if (success) {
>   			*data = (u16)mdic;
> @@ -237,7 +237,7 @@ s32 e1000e_write_phy_reg_mdic(struct e1000_hw *hw, u32 offset, u16 data)
>   		 * the lower time out
>   		 */
>   		for (i = 0; i < (E1000_GEN_POLL_TIMEOUT * 3); i++) {
> -			usleep_range(50, 60);
> +			udelay(50);
>   			mdic = er32(MDIC);
>   			if (mdic & E1000_MDIC_READY)
>   				break;
> @@ -261,7 +261,7 @@ s32 e1000e_write_phy_reg_mdic(struct e1000_hw *hw, u32 offset, u16 data)
>   		 * reading duplicate data in the next MDIC transaction.
>   		 */
>   		if (hw->mac.type == e1000_pch2lan)
> -			usleep_range(100, 150);
> +			udelay(100);
>   
>   		if (success)
>   			return 0;

It’d also be interested where the delay values are specified.


Kind regards,

Paul
Linux regression tracking (Thorsten Leemhuis) April 26, 2024, 6:25 a.m. UTC | #3
[CCing the maintainers and a few lists]

On 17.04.24 21:03, Vitaly Lifshits wrote:
> This is a partial revert of commit 6dbdd4de0362 ("e1000e: Workaround
> for sporadic MDI error on Meteor Lake systems"). The referenced commit
> introduced an issue on vPro systems, where disconnecting and reconnecting
> the LAN cable might result in a kernel panic.
> 
> This was root caused to the usage of usleep_range in an atomic content
> while trying to access the PHY. Change back the usleep_range calls to
> udelay.
>
> Fixes: 6dbdd4de0362 ("e1000e: Workaround for sporadic MDI error on Meteor Lake systems")

Hi everyone. What's the status here? It seems like this regression fix
did not make any progress for about a week. Which is not really ideal,
as the issue afaics causes quite a few people headaches, as a quick and
rough search indicates (there might be some false positives in here):

https://bugzilla.kernel.org/show_bug.cgi?id=218740
https://bugzilla.suse.com/show_bug.cgi?id=1223109
https://bugzilla.suse.com/show_bug.cgi?id=1222945
https://bbs.archlinux.org/viewtopic.php?id=294913
https://bbs.archlinux.org/viewtopic.php?id=294828
https://forum.manjaro.org/t/networkmanager-stability-issues-since-latest-update/159960
https://discussion.fedoraproject.org/t/kernel-6-8-5-stops-at-splash-screen/113519
https://bugzilla.redhat.com/show_bug.cgi?id=2276325
https://bugzilla.redhat.com/show_bug.cgi?id=2276852
https://www.reddit.com/r/voidlinux/comments/1c9s8ut/bug_scheduling_while_atomic/

Side note: would be nice to add these tags to the patch
description, too:

 Reported-by: Jérôme Carretero <cJ@zougloub.eu>
 Closes: https://bugzilla.kernel.org/show_bug.cgi?id=218740
 Closes: https://lore.kernel.org/lkml/a7eb665c74b5efb5140e6979759ed243072cb24a.camel@zougloub.eu/ 

Ciao, Thorsten (wearing his 'the Linux kernel's regression tracker' hat)
--
Everything you wanna know about Linux kernel regression tracking:
https://linux-regtracking.leemhuis.info/about/#tldr
If I did something stupid, please tell me, as explained on that page.
Jiri Slaby April 30, 2024, 7:45 a.m. UTC | #4
On 17. 04. 24, 21:03, Vitaly Lifshits wrote:
> This is a partial revert of commit 6dbdd4de0362 ("e1000e: Workaround
> for sporadic MDI error on Meteor Lake systems"). The referenced commit
> introduced an issue on vPro systems, where disconnecting and reconnecting
> the LAN cable might result in a kernel panic.
> 
> This was root caused to the usage of usleep_range in an atomic content
> while trying to access the PHY. Change back the usleep_range calls to
> udelay.

FTR 
https://lore.kernel.org/all/809b5785-e65f-47f4-b52b-f9d2af0a3484@kernel.org/
elaborates on the stack trace:
e1000_watchdog_task():
    spin_lock(&adapter->stats64_lock);
    e1000e_update_stats(adapter);
    -> e1000e_update_phy_stats()
       -> e1000e_read_phy_reg_mdic()
          -> usleep_range() ----> Boom.

> Fixes: 6dbdd4de0362 ("e1000e: Workaround for sporadic MDI error on Meteor Lake systems")
> Co-developed-by: Sasha Neftin <sasha.neftin@intel.com>
> Signed-off-by: Sasha Neftin <sasha.neftin@intel.com>
> Signed-off-by: Vitaly Lifshits <vitaly.lifshits@intel.com>
> ---
>   drivers/net/ethernet/intel/e1000e/phy.c | 8 ++++----
>   1 file changed, 4 insertions(+), 4 deletions(-)
> 
> diff --git a/drivers/net/ethernet/intel/e1000e/phy.c b/drivers/net/ethernet/intel/e1000e/phy.c
> index 93544f1cc2a5..f7ae0e0aa4a4 100644
> --- a/drivers/net/ethernet/intel/e1000e/phy.c
> +++ b/drivers/net/ethernet/intel/e1000e/phy.c
> @@ -157,7 +157,7 @@ s32 e1000e_read_phy_reg_mdic(struct e1000_hw *hw, u32 offset, u16 *data)
>   		 * the lower time out
>   		 */
>   		for (i = 0; i < (E1000_GEN_POLL_TIMEOUT * 3); i++) {
> -			usleep_range(50, 60);
> +			udelay(50);
>   			mdic = er32(MDIC);
>   			if (mdic & E1000_MDIC_READY)
>   				break;
> @@ -181,7 +181,7 @@ s32 e1000e_read_phy_reg_mdic(struct e1000_hw *hw, u32 offset, u16 *data)
>   		 * reading duplicate data in the next MDIC transaction.
>   		 */
>   		if (hw->mac.type == e1000_pch2lan)
> -			usleep_range(100, 150);
> +			udelay(100);
>   
>   		if (success) {
>   			*data = (u16)mdic;
> @@ -237,7 +237,7 @@ s32 e1000e_write_phy_reg_mdic(struct e1000_hw *hw, u32 offset, u16 data)
>   		 * the lower time out
>   		 */
>   		for (i = 0; i < (E1000_GEN_POLL_TIMEOUT * 3); i++) {
> -			usleep_range(50, 60);
> +			udelay(50);
>   			mdic = er32(MDIC);
>   			if (mdic & E1000_MDIC_READY)
>   				break;
> @@ -261,7 +261,7 @@ s32 e1000e_write_phy_reg_mdic(struct e1000_hw *hw, u32 offset, u16 data)
>   		 * reading duplicate data in the next MDIC transaction.
>   		 */
>   		if (hw->mac.type == e1000_pch2lan)
> -			usleep_range(100, 150);
> +			udelay(100);
>   
>   		if (success)
>   			return 0;
diff mbox series

Patch

diff --git a/drivers/net/ethernet/intel/e1000e/phy.c b/drivers/net/ethernet/intel/e1000e/phy.c
index 93544f1cc2a5..f7ae0e0aa4a4 100644
--- a/drivers/net/ethernet/intel/e1000e/phy.c
+++ b/drivers/net/ethernet/intel/e1000e/phy.c
@@ -157,7 +157,7 @@  s32 e1000e_read_phy_reg_mdic(struct e1000_hw *hw, u32 offset, u16 *data)
 		 * the lower time out
 		 */
 		for (i = 0; i < (E1000_GEN_POLL_TIMEOUT * 3); i++) {
-			usleep_range(50, 60);
+			udelay(50);
 			mdic = er32(MDIC);
 			if (mdic & E1000_MDIC_READY)
 				break;
@@ -181,7 +181,7 @@  s32 e1000e_read_phy_reg_mdic(struct e1000_hw *hw, u32 offset, u16 *data)
 		 * reading duplicate data in the next MDIC transaction.
 		 */
 		if (hw->mac.type == e1000_pch2lan)
-			usleep_range(100, 150);
+			udelay(100);
 
 		if (success) {
 			*data = (u16)mdic;
@@ -237,7 +237,7 @@  s32 e1000e_write_phy_reg_mdic(struct e1000_hw *hw, u32 offset, u16 data)
 		 * the lower time out
 		 */
 		for (i = 0; i < (E1000_GEN_POLL_TIMEOUT * 3); i++) {
-			usleep_range(50, 60);
+			udelay(50);
 			mdic = er32(MDIC);
 			if (mdic & E1000_MDIC_READY)
 				break;
@@ -261,7 +261,7 @@  s32 e1000e_write_phy_reg_mdic(struct e1000_hw *hw, u32 offset, u16 data)
 		 * reading duplicate data in the next MDIC transaction.
 		 */
 		if (hw->mac.type == e1000_pch2lan)
-			usleep_range(100, 150);
+			udelay(100);
 
 		if (success)
 			return 0;