[2/3] e1000e: ignore status during auto-negotiation

Message ID 1546608667-25498-3-git-send-email-glogow@fbihome.de
State Changes Requested
Headers show
Series
  • e1000e driver stuck at 10Mbps after reconnection
Related show

Commit Message

Jan-Marek Glogowski Jan. 4, 2019, 1:31 p.m.
My problem is the fallback of the hardware to 10 Mbps after a
re-connect, which happens almost all times. In the broken case
the status field has always the 0x40000000 bit set.

Still the naming for the status flag is just a guess. Ignoring
the status, when this bit is set, solves my problem. But I just
have one notebook hardware (I219-LM, rev 21), which exhibits the
problem. It doesn't happen for my other notebook with I219-V
(rev 21) hardware (or it's just much more unlikely).

Signed-off-by: Jan-Marek Glogowski <glogow@fbihome.de>
---
 drivers/net/ethernet/intel/e1000e/defines.h | 1 +
 drivers/net/ethernet/intel/e1000e/ich8lan.c | 3 ++-
 drivers/net/ethernet/intel/e1000e/mac.c     | 2 ++
 3 files changed, 5 insertions(+), 1 deletion(-)

Comments

Neftin, Sasha Jan. 6, 2019, 3:28 p.m. | #1
On 1/4/2019 15:31, Jan-Marek Glogowski wrote:
> My problem is the fallback of the hardware to 10 Mbps after a
> re-connect, which happens almost all times. In the broken case
> the status field has always the 0x40000000 bit set.
> 
> Still the naming for the status flag is just a guess. Ignoring
> the status, when this bit is set, solves my problem. But I just
> have one notebook hardware (I219-LM, rev 21), which exhibits the
> problem. It doesn't happen for my other notebook with I219-V
> (rev 21) hardware (or it's just much more unlikely).
> 
> Signed-off-by: Jan-Marek Glogowski <glogow@fbihome.de>
> ---
>   drivers/net/ethernet/intel/e1000e/defines.h | 1 +
>   drivers/net/ethernet/intel/e1000e/ich8lan.c | 3 ++-
>   drivers/net/ethernet/intel/e1000e/mac.c     | 2 ++
>   3 files changed, 5 insertions(+), 1 deletion(-)
> 
> diff --git a/drivers/net/ethernet/intel/e1000e/defines.h b/drivers/net/ethernet/intel/e1000e/defines.h
> index fd550de..3cd9f99 100644
> --- a/drivers/net/ethernet/intel/e1000e/defines.h
> +++ b/drivers/net/ethernet/intel/e1000e/defines.h
> @@ -221,6 +221,7 @@
>   #define E1000_STATUS_LAN_INIT_DONE 0x00000200   /* Lan Init Completion by NVM */
>   #define E1000_STATUS_PHYRA      0x00000400      /* PHY Reset Asserted */
>   #define E1000_STATUS_GIO_MASTER_ENABLE	0x00080000	/* Master Req status */
> +#define E1000_STATUS_AUTONEG    0x40000000      /* in auto-negotiation */
>   
There is no such indication. Should be removed.
>   #define HALF_DUPLEX 1
>   #define FULL_DUPLEX 2
> diff --git a/drivers/net/ethernet/intel/e1000e/ich8lan.c b/drivers/net/ethernet/intel/e1000e/ich8lan.c
> index fd59970..8588eb7 100644
> --- a/drivers/net/ethernet/intel/e1000e/ich8lan.c
> +++ b/drivers/net/ethernet/intel/e1000e/ich8lan.c
> @@ -1390,7 +1390,8 @@ static s32 e1000_check_for_copper_link_ich8lan(struct e1000_hw *hw)
>   		u16 speed;
>   		u8 duplex;
>   
> -		e1000e_get_speed_and_duplex_copper(hw, &speed, &duplex);
> +		if (e1000e_get_speed_and_duplex_copper(hw, &speed, &duplex))
> +			goto out;
>   		tipg_reg = er32(TIPG);
>   		tipg_reg &= ~E1000_TIPG_IPGT_MASK;
>   
> diff --git a/drivers/net/ethernet/intel/e1000e/mac.c b/drivers/net/ethernet/intel/e1000e/mac.c
> index 19c816c..ada8fbb 100644
> --- a/drivers/net/ethernet/intel/e1000e/mac.c
> +++ b/drivers/net/ethernet/intel/e1000e/mac.c
> @@ -1310,6 +1310,8 @@ s32 e1000e_get_speed_and_duplex_copper(struct e1000_hw *hw, u16 *speed,
>   
>   	status = er32(STATUS);
>   
> +	if (status & E1000_STATUS_AUTONEG)
> +		return 1;
This is wrong. We have no AUTONEG indication in bit 30 of E1000_STATUS 
(0x0008) register. These code piece should be removed.
>   	if (!(status & E1000_STATUS_LU))
>   		return 1;
>   
> 
Hello Jan-Marek,
That's okay to use u8 size for a duplex indication and u16 size for a 
link indication, as you refer in previous patch. But use the 'autoneg 
status' is wrong. I wonder how this can solve the problem. Do you 
encountered with this problem on other platforms with our devices? (I 
meant different, no similar HW)
Anyway, 0x40000000 indication is not relevant to the auto-negotiation.
May I ask do your experiments with ME disable (via BIOS) and see if same 
problem still happen.
Thanks,
Sasha
Jan-Marek Glogowski Jan. 6, 2019, 7:53 p.m. | #2
Am 6. Januar 2019 16:28:42 MEZ schrieb "Neftin, Sasha" <sasha.neftin@intel.com>:
>On 1/4/2019 15:31, Jan-Marek Glogowski wrote:
>> My problem is the fallback of the hardware to 10 Mbps after a
>> re-connect, which happens almost all times. In the broken case
>> the status field has always the 0x40000000 bit set.
>> 
>> Still the naming for the status flag is just a guess. Ignoring
>> the status, when this bit is set, solves my problem. But I just
>> have one notebook hardware (I219-LM, rev 21), which exhibits the
>> problem. It doesn't happen for my other notebook with I219-V
>> (rev 21) hardware (or it's just much more unlikely).
>> 
>> Signed-off-by: Jan-Marek Glogowski <glogow@fbihome.de>
>> ---
>>   drivers/net/ethernet/intel/e1000e/defines.h | 1 +
>>   drivers/net/ethernet/intel/e1000e/ich8lan.c | 3 ++-
>>   drivers/net/ethernet/intel/e1000e/mac.c     | 2 ++
>>   3 files changed, 5 insertions(+), 1 deletion(-)
>> 
>> diff --git a/drivers/net/ethernet/intel/e1000e/defines.h
>b/drivers/net/ethernet/intel/e1000e/defines.h
>> index fd550de..3cd9f99 100644
>> --- a/drivers/net/ethernet/intel/e1000e/defines.h
>> +++ b/drivers/net/ethernet/intel/e1000e/defines.h
>> @@ -221,6 +221,7 @@
>>   #define E1000_STATUS_LAN_INIT_DONE 0x00000200   /* Lan Init
>Completion by NVM */
>>   #define E1000_STATUS_PHYRA      0x00000400      /* PHY Reset
>Asserted */
>>   #define E1000_STATUS_GIO_MASTER_ENABLE	0x00080000	/* Master Req
>status */
>> +#define E1000_STATUS_AUTONEG    0x40000000      /* in
>auto-negotiation */
>>   
>There is no such indication. Should be removed.
>>   #define HALF_DUPLEX 1
>>   #define FULL_DUPLEX 2
>> diff --git a/drivers/net/ethernet/intel/e1000e/ich8lan.c
>b/drivers/net/ethernet/intel/e1000e/ich8lan.c
>> index fd59970..8588eb7 100644
>> --- a/drivers/net/ethernet/intel/e1000e/ich8lan.c
>> +++ b/drivers/net/ethernet/intel/e1000e/ich8lan.c
>> @@ -1390,7 +1390,8 @@ static s32
>e1000_check_for_copper_link_ich8lan(struct e1000_hw *hw)
>>   		u16 speed;
>>   		u8 duplex;
>>   
>> -		e1000e_get_speed_and_duplex_copper(hw, &speed, &duplex);
>> +		if (e1000e_get_speed_and_duplex_copper(hw, &speed, &duplex))
>> +			goto out;
>>   		tipg_reg = er32(TIPG);
>>   		tipg_reg &= ~E1000_TIPG_IPGT_MASK;
>>   
>> diff --git a/drivers/net/ethernet/intel/e1000e/mac.c
>b/drivers/net/ethernet/intel/e1000e/mac.c
>> index 19c816c..ada8fbb 100644
>> --- a/drivers/net/ethernet/intel/e1000e/mac.c
>> +++ b/drivers/net/ethernet/intel/e1000e/mac.c
>> @@ -1310,6 +1310,8 @@ s32 e1000e_get_speed_and_duplex_copper(struct
>e1000_hw *hw, u16 *speed,
>>   
>>   	status = er32(STATUS);
>>   
>> +	if (status & E1000_STATUS_AUTONEG)
>> +		return 1;
>This is wrong. We have no AUTONEG indication in bit 30 of E1000_STATUS 
>(0x0008) register. These code piece should be removed.
>>   	if (!(status & E1000_STATUS_LU))
>>   		return 1;
>>   
>> 
>Hello Jan-Marek,
>That's okay to use u8 size for a duplex indication and u16 size for a 
>link indication, as you refer in previous patch.
> But use the 'autoneg status' is wrong.

Just as a reminder: I have no idea what this bit actually indicates. This is just a guess I had when looking into the problem. I don't know if the device was still negotiating at this point, but this bit was set in the status register.

> I wonder how this can solve the problem. Do you 
>encountered with this problem on other platforms with our devices? (I meant different, no similar HW)

Other platforms as Windows? I'm just doing Linux development, but I'll ask the Windows people and can check, if this problem also happens there.

I don't see this problem with older HW (Fujitsu E7x6, also Skylake based, but I219-V). It happens with both of my U7x7 test notebooks. I have some older Haswell based HW (E7x4), which I didn't yet test. Google tells me they have "Intel 82579LM Gigabit" ethernet.

All of these three series are in use and we have a few hundred or even thousand of them. This problem was found during the tests for our next Ubuntu 18.04 based release. This just seems to happen with the "new" U-series. I'm not aware of any problems like this with the older E-series HW.
And it probably just happens more often now for whatever reason.

>Anyway, 0x40000000 indication is not relevant to the auto-negotiation.
>May I ask do your experiments with ME disable (via BIOS) and see if
>same problem still happen.

Disabling ME shouldn't be a problem to test.

I'll continue testing all the HW tomorrow, with both our releases, and report back. And maybe there is an easier way to trigger the problem then re-plugging the cable all the time (maybe better to get a switch and power cycle that...).

Please tell me if there is anything else I should look for or test.

JMG
Neftin, Sasha Jan. 7, 2019, 6:32 a.m. | #3
On 1/6/2019 21:53, Jan-Marek Glogowski wrote:
> Am 6. Januar 2019 16:28:42 MEZ schrieb "Neftin, Sasha" <sasha.neftin@intel.com>:
>> On 1/4/2019 15:31, Jan-Marek Glogowski wrote:
>>> My problem is the fallback of the hardware to 10 Mbps after a
>>> re-connect, which happens almost all times. In the broken case
>>> the status field has always the 0x40000000 bit set.
>>>
>>> Still the naming for the status flag is just a guess. Ignoring
>>> the status, when this bit is set, solves my problem. But I just
>>> have one notebook hardware (I219-LM, rev 21), which exhibits the
>>> problem. It doesn't happen for my other notebook with I219-V
>>> (rev 21) hardware (or it's just much more unlikely).
>>>
>>> Signed-off-by: Jan-Marek Glogowski <glogow@fbihome.de>
>>> ---
>>>    drivers/net/ethernet/intel/e1000e/defines.h | 1 +
>>>    drivers/net/ethernet/intel/e1000e/ich8lan.c | 3 ++-
>>>    drivers/net/ethernet/intel/e1000e/mac.c     | 2 ++
>>>    3 files changed, 5 insertions(+), 1 deletion(-)
>>>
>>> diff --git a/drivers/net/ethernet/intel/e1000e/defines.h
>> b/drivers/net/ethernet/intel/e1000e/defines.h
>>> index fd550de..3cd9f99 100644
>>> --- a/drivers/net/ethernet/intel/e1000e/defines.h
>>> +++ b/drivers/net/ethernet/intel/e1000e/defines.h
>>> @@ -221,6 +221,7 @@
>>>    #define E1000_STATUS_LAN_INIT_DONE 0x00000200   /* Lan Init
>> Completion by NVM */
>>>    #define E1000_STATUS_PHYRA      0x00000400      /* PHY Reset
>> Asserted */
>>>    #define E1000_STATUS_GIO_MASTER_ENABLE	0x00080000	/* Master Req
>> status */
>>> +#define E1000_STATUS_AUTONEG    0x40000000      /* in
>> auto-negotiation */
>>>    
>> There is no such indication. Should be removed.
>>>    #define HALF_DUPLEX 1
>>>    #define FULL_DUPLEX 2
>>> diff --git a/drivers/net/ethernet/intel/e1000e/ich8lan.c
>> b/drivers/net/ethernet/intel/e1000e/ich8lan.c
>>> index fd59970..8588eb7 100644
>>> --- a/drivers/net/ethernet/intel/e1000e/ich8lan.c
>>> +++ b/drivers/net/ethernet/intel/e1000e/ich8lan.c
>>> @@ -1390,7 +1390,8 @@ static s32
>> e1000_check_for_copper_link_ich8lan(struct e1000_hw *hw)
>>>    		u16 speed;
>>>    		u8 duplex;
>>>    
>>> -		e1000e_get_speed_and_duplex_copper(hw, &speed, &duplex);
>>> +		if (e1000e_get_speed_and_duplex_copper(hw, &speed, &duplex))
>>> +			goto out;
>>>    		tipg_reg = er32(TIPG);
>>>    		tipg_reg &= ~E1000_TIPG_IPGT_MASK;
>>>    
>>> diff --git a/drivers/net/ethernet/intel/e1000e/mac.c
>> b/drivers/net/ethernet/intel/e1000e/mac.c
>>> index 19c816c..ada8fbb 100644
>>> --- a/drivers/net/ethernet/intel/e1000e/mac.c
>>> +++ b/drivers/net/ethernet/intel/e1000e/mac.c
>>> @@ -1310,6 +1310,8 @@ s32 e1000e_get_speed_and_duplex_copper(struct
>> e1000_hw *hw, u16 *speed,
>>>    
>>>    	status = er32(STATUS);
>>>    
>>> +	if (status & E1000_STATUS_AUTONEG)
>>> +		return 1;
>> This is wrong. We have no AUTONEG indication in bit 30 of E1000_STATUS
>> (0x0008) register. These code piece should be removed.
>>>    	if (!(status & E1000_STATUS_LU))
>>>    		return 1;
>>>    
>>>
>> Hello Jan-Marek,
>> That's okay to use u8 size for a duplex indication and u16 size for a
>> link indication, as you refer in previous patch.
>> But use the 'autoneg status' is wrong.
> 
> Just as a reminder: I have no idea what this bit actually indicates. This is just a guess I had when looking into the problem. I don't know if the device was still negotiating at this point, but this bit was set in the status register.
> 
>> I wonder how this can solve the problem. Do you
>> encountered with this problem on other platforms with our devices? (I meant different, no similar HW)
> 
> Other platforms as Windows? I'm just doing Linux development, but I'll ask the Windows people and can check, if this problem also happens there.
> 
> I don't see this problem with older HW (Fujitsu E7x6, also Skylake based, but I219-V). It happens with both of my U7x7 test notebooks. I have some older Haswell based HW (E7x4), which I didn't yet test. Google tells me they have "Intel 82579LM Gigabit" ethernet.
> 
> All of these three series are in use and we have a few hundred or even thousand of them. This problem was found during the tests for our next Ubuntu 18.04 based release. This just seems to happen with the "new" U-series. I'm not aware of any problems like this with the older E-series HW.
> And it probably just happens more often now for whatever reason.
> 
>> Anyway, 0x40000000 indication is not relevant to the auto-negotiation.
>> May I ask do your experiments with ME disable (via BIOS) and see if
>> same problem still happen.
> 
> Disabling ME shouldn't be a problem to test.
> 
You have mentioned that there is no problem on I219-V. The main 
difference between I219-LM and I219-V is 'Intel Standard Manageability' 
feature. So, I suggest to disable ME and re-check.
> I'll continue testing all the HW tomorrow, with both our releases, and report back. And maybe there is an easier way to trigger the problem then re-plugging the cable all the time (maybe better to get a switch and power cycle that...).
> 
> Please tell me if there is anything else I should look for or test.
> Further step more likely should be dump registers and try access to a 
PHY. But let's check ME disabled as the first step.
> JMG
> 
Sasha
Jan-Marek Glogowski Jan. 7, 2019, 9 a.m. | #4
Am 07.01.19 um 07:32 schrieb Neftin, Sasha:
> On 1/6/2019 21:53, Jan-Marek Glogowski wrote:
>> Am 6. Januar 2019 16:28:42 MEZ schrieb "Neftin, Sasha" <sasha.neftin@intel.com>:
>>> On 1/4/2019 15:31, Jan-Marek Glogowski wrote:
>>>> My problem is the fallback of the hardware to 10 Mbps after a
>>>> re-connect, which happens almost all times. In the broken case
>>>> the status field has always the 0x40000000 bit set.
>>>>
>>>> Still the naming for the status flag is just a guess. Ignoring
>>>> the status, when this bit is set, solves my problem. But I just
>>>> have one notebook hardware (I219-LM, rev 21), which exhibits the
>>>> problem. It doesn't happen for my other notebook with I219-V
>>>> (rev 21) hardware (or it's just much more unlikely).
>>>>
>>>> Signed-off-by: Jan-Marek Glogowski <glogow@fbihome.de>
>>>> ---
>>>>    drivers/net/ethernet/intel/e1000e/defines.h | 1 +
>>>>    drivers/net/ethernet/intel/e1000e/ich8lan.c | 3 ++-
>>>>    drivers/net/ethernet/intel/e1000e/mac.c     | 2 ++
>>>>    3 files changed, 5 insertions(+), 1 deletion(-)
>>>>
>>>> diff --git a/drivers/net/ethernet/intel/e1000e/defines.h
>>> b/drivers/net/ethernet/intel/e1000e/defines.h
>>>> index fd550de..3cd9f99 100644
>>>> --- a/drivers/net/ethernet/intel/e1000e/defines.h
>>>> +++ b/drivers/net/ethernet/intel/e1000e/defines.h
>>>> @@ -221,6 +221,7 @@
>>>>    #define E1000_STATUS_LAN_INIT_DONE 0x00000200   /* Lan Init
>>> Completion by NVM */
>>>>    #define E1000_STATUS_PHYRA      0x00000400      /* PHY Reset
>>> Asserted */
>>>>    #define E1000_STATUS_GIO_MASTER_ENABLE    0x00080000    /* Master Req
>>> status */
>>>> +#define E1000_STATUS_AUTONEG    0x40000000      /* in
>>> auto-negotiation */
>>>>    
>>> There is no such indication. Should be removed.
>>>>    #define HALF_DUPLEX 1
>>>>    #define FULL_DUPLEX 2
>>>> diff --git a/drivers/net/ethernet/intel/e1000e/ich8lan.c
>>> b/drivers/net/ethernet/intel/e1000e/ich8lan.c
>>>> index fd59970..8588eb7 100644
>>>> --- a/drivers/net/ethernet/intel/e1000e/ich8lan.c
>>>> +++ b/drivers/net/ethernet/intel/e1000e/ich8lan.c
>>>> @@ -1390,7 +1390,8 @@ static s32
>>> e1000_check_for_copper_link_ich8lan(struct e1000_hw *hw)
>>>>            u16 speed;
>>>>            u8 duplex;
>>>>    -        e1000e_get_speed_and_duplex_copper(hw, &speed, &duplex);
>>>> +        if (e1000e_get_speed_and_duplex_copper(hw, &speed, &duplex))
>>>> +            goto out;
>>>>            tipg_reg = er32(TIPG);
>>>>            tipg_reg &= ~E1000_TIPG_IPGT_MASK;
>>>>    diff --git a/drivers/net/ethernet/intel/e1000e/mac.c
>>> b/drivers/net/ethernet/intel/e1000e/mac.c
>>>> index 19c816c..ada8fbb 100644
>>>> --- a/drivers/net/ethernet/intel/e1000e/mac.c
>>>> +++ b/drivers/net/ethernet/intel/e1000e/mac.c
>>>> @@ -1310,6 +1310,8 @@ s32 e1000e_get_speed_and_duplex_copper(struct
>>> e1000_hw *hw, u16 *speed,
>>>>           status = er32(STATUS);
>>>>    +    if (status & E1000_STATUS_AUTONEG)
>>>> +        return 1;
>>> This is wrong. We have no AUTONEG indication in bit 30 of E1000_STATUS
>>> (0x0008) register. These code piece should be removed.
>>>>        if (!(status & E1000_STATUS_LU))
>>>>            return 1;
>>>>   
>>> Hello Jan-Marek,
>>> That's okay to use u8 size for a duplex indication and u16 size for a
>>> link indication, as you refer in previous patch.
>>> But use the 'autoneg status' is wrong.
>>
>> Just as a reminder: I have no idea what this bit actually indicates. This is just a guess I had
>> when looking into the problem. I don't know if the device was still negotiating at this point, but
>> this bit was set in the status register.
>>
>>> I wonder how this can solve the problem. Do you
>>> encountered with this problem on other platforms with our devices? (I meant different, no similar
>>> HW)
>>
>> Other platforms as Windows? I'm just doing Linux development, but I'll ask the Windows people and
>> can check, if this problem also happens there.
>>
>> I don't see this problem with older HW (Fujitsu E7x6, also Skylake based, but I219-V). It happens
>> with both of my U7x7 test notebooks. I have some older Haswell based HW (E7x4), which I didn't yet
>> test. Google tells me they have "Intel 82579LM Gigabit" ethernet.
>>
>> All of these three series are in use and we have a few hundred or even thousand of them. This
>> problem was found during the tests for our next Ubuntu 18.04 based release. This just seems to
>> happen with the "new" U-series. I'm not aware of any problems like this with the older E-series HW.
>> And it probably just happens more often now for whatever reason.
>>
>>> Anyway, 0x40000000 indication is not relevant to the auto-negotiation.
>>> May I ask do your experiments with ME disable (via BIOS) and see if
>>> same problem still happen.
>>
>> Disabling ME shouldn't be a problem to test.
>>
> You have mentioned that there is no problem on I219-V. The main difference between I219-LM and
> I219-V is 'Intel Standard Manageability' feature. So, I suggest to disable ME and re-check.
>> I'll continue testing all the HW tomorrow, with both our releases, and report back. And maybe
>> there is an easier way to trigger the problem then re-plugging the cable all the time (maybe
>> better to get a switch and power cycle that...).
>>
>> Please tell me if there is anything else I should look for or test.
>> Further step more likely should be dump registers and try access to a 
> PHY. But let's check ME disabled as the first step.

According to the BIOS ME is actually disabled.
Nevertheless I selected "UnConfigure ME", which didn'tr change anything in the BIOS (ME
v11.8.50.3425 FWIW). I did look for vendor BIOS updates, as you think this problem might be ME
related. There is an update available.

JMG
Jan-Marek Glogowski Jan. 7, 2019, 2:15 p.m. | #5
Am 07.01.19 um 10:00 schrieb Jan-Marek Glogowski:
> 
> 
> Am 07.01.19 um 07:32 schrieb Neftin, Sasha:
>> On 1/6/2019 21:53, Jan-Marek Glogowski wrote:
>>> Am 6. Januar 2019 16:28:42 MEZ schrieb "Neftin, Sasha" <sasha.neftin@intel.com>:
>>>> On 1/4/2019 15:31, Jan-Marek Glogowski wrote:
>>>>> My problem is the fallback of the hardware to 10 Mbps after a
>>>>> re-connect, which happens almost all times. In the broken case
>>>>> the status field has always the 0x40000000 bit set.
>>>>>
>>>>> Still the naming for the status flag is just a guess. Ignoring
>>>>> the status, when this bit is set, solves my problem. But I just
>>>>> have one notebook hardware (I219-LM, rev 21), which exhibits the
>>>>> problem. It doesn't happen for my other notebook with I219-V
>>>>> (rev 21) hardware (or it's just much more unlikely).
>>>>>
>>>>> Signed-off-by: Jan-Marek Glogowski <glogow@fbihome.de>
>>>>> ---
>>>>>    drivers/net/ethernet/intel/e1000e/defines.h | 1 +
>>>>>    drivers/net/ethernet/intel/e1000e/ich8lan.c | 3 ++-
>>>>>    drivers/net/ethernet/intel/e1000e/mac.c     | 2 ++
>>>>>    3 files changed, 5 insertions(+), 1 deletion(-)
>>>>>
>>>>> diff --git a/drivers/net/ethernet/intel/e1000e/defines.h
>>>> b/drivers/net/ethernet/intel/e1000e/defines.h
>>>>> index fd550de..3cd9f99 100644
>>>>> --- a/drivers/net/ethernet/intel/e1000e/defines.h
>>>>> +++ b/drivers/net/ethernet/intel/e1000e/defines.h
>>>>> @@ -221,6 +221,7 @@
>>>>>    #define E1000_STATUS_LAN_INIT_DONE 0x00000200   /* Lan Init
>>>> Completion by NVM */
>>>>>    #define E1000_STATUS_PHYRA      0x00000400      /* PHY Reset
>>>> Asserted */
>>>>>    #define E1000_STATUS_GIO_MASTER_ENABLE    0x00080000    /* Master Req
>>>> status */
>>>>> +#define E1000_STATUS_AUTONEG    0x40000000      /* in
>>>> auto-negotiation */
>>>>>    
>>>> There is no such indication. Should be removed.
>>>>>    #define HALF_DUPLEX 1
>>>>>    #define FULL_DUPLEX 2
>>>>> diff --git a/drivers/net/ethernet/intel/e1000e/ich8lan.c
>>>> b/drivers/net/ethernet/intel/e1000e/ich8lan.c
>>>>> index fd59970..8588eb7 100644
>>>>> --- a/drivers/net/ethernet/intel/e1000e/ich8lan.c
>>>>> +++ b/drivers/net/ethernet/intel/e1000e/ich8lan.c
>>>>> @@ -1390,7 +1390,8 @@ static s32
>>>> e1000_check_for_copper_link_ich8lan(struct e1000_hw *hw)
>>>>>            u16 speed;
>>>>>            u8 duplex;
>>>>>    -        e1000e_get_speed_and_duplex_copper(hw, &speed, &duplex);
>>>>> +        if (e1000e_get_speed_and_duplex_copper(hw, &speed, &duplex))
>>>>> +            goto out;
>>>>>            tipg_reg = er32(TIPG);
>>>>>            tipg_reg &= ~E1000_TIPG_IPGT_MASK;
>>>>>    diff --git a/drivers/net/ethernet/intel/e1000e/mac.c
>>>> b/drivers/net/ethernet/intel/e1000e/mac.c
>>>>> index 19c816c..ada8fbb 100644
>>>>> --- a/drivers/net/ethernet/intel/e1000e/mac.c
>>>>> +++ b/drivers/net/ethernet/intel/e1000e/mac.c
>>>>> @@ -1310,6 +1310,8 @@ s32 e1000e_get_speed_and_duplex_copper(struct
>>>> e1000_hw *hw, u16 *speed,
>>>>>           status = er32(STATUS);
>>>>>    +    if (status & E1000_STATUS_AUTONEG)
>>>>> +        return 1;
>>>> This is wrong. We have no AUTONEG indication in bit 30 of E1000_STATUS
>>>> (0x0008) register. These code piece should be removed.
>>>>>        if (!(status & E1000_STATUS_LU))
>>>>>            return 1;
>>>>>   
>>>> Hello Jan-Marek,
>>>> That's okay to use u8 size for a duplex indication and u16 size for a
>>>> link indication, as you refer in previous patch.
>>>> But use the 'autoneg status' is wrong.
>>>
>>> Just as a reminder: I have no idea what this bit actually indicates. This is just a guess I had
>>> when looking into the problem. I don't know if the device was still negotiating at this point, but
>>> this bit was set in the status register.
>>>
>>>> I wonder how this can solve the problem. Do you
>>>> encountered with this problem on other platforms with our devices? (I meant different, no similar
>>>> HW)
>>>
>>> Other platforms as Windows? I'm just doing Linux development, but I'll ask the Windows people and
>>> can check, if this problem also happens there.
>>>
>>> I don't see this problem with older HW (Fujitsu E7x6, also Skylake based, but I219-V). It happens
>>> with both of my U7x7 test notebooks. I have some older Haswell based HW (E7x4), which I didn't yet
>>> test. Google tells me they have "Intel 82579LM Gigabit" ethernet.
>>>
>>> All of these three series are in use and we have a few hundred or even thousand of them. This
>>> problem was found during the tests for our next Ubuntu 18.04 based release. This just seems to
>>> happen with the "new" U-series. I'm not aware of any problems like this with the older E-series HW.
>>> And it probably just happens more often now for whatever reason.
>>>
>>>> Anyway, 0x40000000 indication is not relevant to the auto-negotiation.
>>>> May I ask do your experiments with ME disable (via BIOS) and see if
>>>> same problem still happen.
>>>
>>> Disabling ME shouldn't be a problem to test.
>>>
>> You have mentioned that there is no problem on I219-V. The main difference between I219-LM and
>> I219-V is 'Intel Standard Manageability' feature. So, I suggest to disable ME and re-check.
>>> I'll continue testing all the HW tomorrow, with both our releases, and report back. And maybe
>>> there is an easier way to trigger the problem then re-plugging the cable all the time (maybe
>>> better to get a switch and power cycle that...).
>>>
>>> Please tell me if there is anything else I should look for or test.
>>> Further step more likely should be dump registers and try access to a 
>> PHY. But let's check ME disabled as the first step.
> 
> According to the BIOS ME is actually disabled.
> Nevertheless I selected "UnConfigure ME", which didn'tr change anything in the BIOS (ME
> v11.8.50.3425 FWIW). I did look for vendor BIOS updates, as you think this problem might be ME
> related. There is an update available.

So I did the BIOS update - no changes regarding the network auto-negotiation behavior.

I also tried both of my E-Series. The old Haswell series (E7x4) also has a disabled ME and as
suspected the following HW:

00:19.0 Ethernet controller: Intel Corporation Ethernet Connection I217-LM (rev 04)
        Subsystem: Fujitsu Limited. Ethernet Connection I217-LM
        Flags: bus master, fast devsel, latency 0, IRQ 27
        Memory at f0500000 (32-bit, non-prefetchable) [size=128K]
        Memory at f053f000 (32-bit, non-prefetchable) [size=4K]
        I/O ports at 3080 [size=32]
        Capabilities: [c8] Power Management version 2
        Capabilities: [d0] MSI: Enable+ Count=1/1 Maskable- 64bit+
        Capabilities: [e0] PCI Advanced Features
        Kernel driver in use: e1000e
        Kernel modules: e1000e

I tried the patched module on both E-series HW and they always have the 0x40000000 bit set when
decoding the speed from the status register (always 0x40080083), either with or without the ME
available. So my patch breaks my older HW, as you probably suspected. I removed the 0x40000000 test
from the module, and they always negotiated 1000 Mbps just fine.

I've attached logs for all three notebooks with my patched module (without the  0x40000000 test) and
a debug filter for all files of the module (echo "file */e1000e-20/* +p;" >
/sys/kernel/debug/dynamic_debug/control).

My test consisted of rmmod'ing, sleep 1, insmod'ing, set debug filter + two reconnects.

So I'm basically back to square one.

How to proceed?

JMG
Neftin, Sasha Jan. 7, 2019, 3:49 p.m. | #6
On 1/7/2019 16:15, Jan-Marek Glogowski wrote:
> 
> 
> Am 07.01.19 um 10:00 schrieb Jan-Marek Glogowski:
>>
>>
>> Am 07.01.19 um 07:32 schrieb Neftin, Sasha:
>>> On 1/6/2019 21:53, Jan-Marek Glogowski wrote:
>>>> Am 6. Januar 2019 16:28:42 MEZ schrieb "Neftin, Sasha" <sasha.neftin@intel.com>:
>>>>> On 1/4/2019 15:31, Jan-Marek Glogowski wrote:
>>>>>> My problem is the fallback of the hardware to 10 Mbps after a
>>>>>> re-connect, which happens almost all times. In the broken case
>>>>>> the status field has always the 0x40000000 bit set.
>>>>>>
>>>>>> Still the naming for the status flag is just a guess. Ignoring
>>>>>> the status, when this bit is set, solves my problem. But I just
>>>>>> have one notebook hardware (I219-LM, rev 21), which exhibits the
>>>>>> problem. It doesn't happen for my other notebook with I219-V
>>>>>> (rev 21) hardware (or it's just much more unlikely).
>>>>>>
>>>>>> Signed-off-by: Jan-Marek Glogowski <glogow@fbihome.de>
>>>>>> ---
>>>>>>     drivers/net/ethernet/intel/e1000e/defines.h | 1 +
>>>>>>     drivers/net/ethernet/intel/e1000e/ich8lan.c | 3 ++-
>>>>>>     drivers/net/ethernet/intel/e1000e/mac.c     | 2 ++
>>>>>>     3 files changed, 5 insertions(+), 1 deletion(-)
>>>>>>
>>>>>> diff --git a/drivers/net/ethernet/intel/e1000e/defines.h
>>>>> b/drivers/net/ethernet/intel/e1000e/defines.h
>>>>>> index fd550de..3cd9f99 100644
>>>>>> --- a/drivers/net/ethernet/intel/e1000e/defines.h
>>>>>> +++ b/drivers/net/ethernet/intel/e1000e/defines.h
>>>>>> @@ -221,6 +221,7 @@
>>>>>>     #define E1000_STATUS_LAN_INIT_DONE 0x00000200   /* Lan Init
>>>>> Completion by NVM */
>>>>>>     #define E1000_STATUS_PHYRA      0x00000400      /* PHY Reset
>>>>> Asserted */
>>>>>>     #define E1000_STATUS_GIO_MASTER_ENABLE    0x00080000    /* Master Req
>>>>> status */
>>>>>> +#define E1000_STATUS_AUTONEG    0x40000000      /* in
>>>>> auto-negotiation */
>>>>>>     
>>>>> There is no such indication. Should be removed.
>>>>>>     #define HALF_DUPLEX 1
>>>>>>     #define FULL_DUPLEX 2
>>>>>> diff --git a/drivers/net/ethernet/intel/e1000e/ich8lan.c
>>>>> b/drivers/net/ethernet/intel/e1000e/ich8lan.c
>>>>>> index fd59970..8588eb7 100644
>>>>>> --- a/drivers/net/ethernet/intel/e1000e/ich8lan.c
>>>>>> +++ b/drivers/net/ethernet/intel/e1000e/ich8lan.c
>>>>>> @@ -1390,7 +1390,8 @@ static s32
>>>>> e1000_check_for_copper_link_ich8lan(struct e1000_hw *hw)
>>>>>>             u16 speed;
>>>>>>             u8 duplex;
>>>>>>     -        e1000e_get_speed_and_duplex_copper(hw, &speed, &duplex);
>>>>>> +        if (e1000e_get_speed_and_duplex_copper(hw, &speed, &duplex))
>>>>>> +            goto out;
>>>>>>             tipg_reg = er32(TIPG);
>>>>>>             tipg_reg &= ~E1000_TIPG_IPGT_MASK;
>>>>>>     diff --git a/drivers/net/ethernet/intel/e1000e/mac.c
>>>>> b/drivers/net/ethernet/intel/e1000e/mac.c
>>>>>> index 19c816c..ada8fbb 100644
>>>>>> --- a/drivers/net/ethernet/intel/e1000e/mac.c
>>>>>> +++ b/drivers/net/ethernet/intel/e1000e/mac.c
>>>>>> @@ -1310,6 +1310,8 @@ s32 e1000e_get_speed_and_duplex_copper(struct
>>>>> e1000_hw *hw, u16 *speed,
>>>>>>            status = er32(STATUS);
>>>>>>     +    if (status & E1000_STATUS_AUTONEG)
>>>>>> +        return 1;
>>>>> This is wrong. We have no AUTONEG indication in bit 30 of E1000_STATUS
>>>>> (0x0008) register. These code piece should be removed.
>>>>>>         if (!(status & E1000_STATUS_LU))
>>>>>>             return 1;
>>>>>>    
>>>>> Hello Jan-Marek,
>>>>> That's okay to use u8 size for a duplex indication and u16 size for a
>>>>> link indication, as you refer in previous patch.
>>>>> But use the 'autoneg status' is wrong.
>>>>
>>>> Just as a reminder: I have no idea what this bit actually indicates. This is just a guess I had
>>>> when looking into the problem. I don't know if the device was still negotiating at this point, but
>>>> this bit was set in the status register.
>>>>
>>>>> I wonder how this can solve the problem. Do you
>>>>> encountered with this problem on other platforms with our devices? (I meant different, no similar
>>>>> HW)
>>>>
>>>> Other platforms as Windows? I'm just doing Linux development, but I'll ask the Windows people and
>>>> can check, if this problem also happens there.
>>>>
>>>> I don't see this problem with older HW (Fujitsu E7x6, also Skylake based, but I219-V). It happens
>>>> with both of my U7x7 test notebooks. I have some older Haswell based HW (E7x4), which I didn't yet
>>>> test. Google tells me they have "Intel 82579LM Gigabit" ethernet.
>>>>
>>>> All of these three series are in use and we have a few hundred or even thousand of them. This
>>>> problem was found during the tests for our next Ubuntu 18.04 based release. This just seems to
>>>> happen with the "new" U-series. I'm not aware of any problems like this with the older E-series HW.
>>>> And it probably just happens more often now for whatever reason.
>>>>
>>>>> Anyway, 0x40000000 indication is not relevant to the auto-negotiation.
>>>>> May I ask do your experiments with ME disable (via BIOS) and see if
>>>>> same problem still happen.
>>>>
>>>> Disabling ME shouldn't be a problem to test.
>>>>
>>> You have mentioned that there is no problem on I219-V. The main difference between I219-LM and
>>> I219-V is 'Intel Standard Manageability' feature. So, I suggest to disable ME and re-check.
>>>> I'll continue testing all the HW tomorrow, with both our releases, and report back. And maybe
>>>> there is an easier way to trigger the problem then re-plugging the cable all the time (maybe
>>>> better to get a switch and power cycle that...).
>>>>
>>>> Please tell me if there is anything else I should look for or test.
>>>> Further step more likely should be dump registers and try access to a
>>> PHY. But let's check ME disabled as the first step.
>>
>> According to the BIOS ME is actually disabled.
>> Nevertheless I selected "UnConfigure ME", which didn'tr change anything in the BIOS (ME
>> v11.8.50.3425 FWIW). I did look for vendor BIOS updates, as you think this problem might be ME
>> related. There is an update available.
> 
> So I did the BIOS update - no changes regarding the network auto-negotiation behavior.
> 
> I also tried both of my E-Series. The old Haswell series (E7x4) also has a disabled ME and as
> suspected the following HW:
> 
> 00:19.0 Ethernet controller: Intel Corporation Ethernet Connection I217-LM (rev 04)
>          Subsystem: Fujitsu Limited. Ethernet Connection I217-LM
>          Flags: bus master, fast devsel, latency 0, IRQ 27
>          Memory at f0500000 (32-bit, non-prefetchable) [size=128K]
>          Memory at f053f000 (32-bit, non-prefetchable) [size=4K]
>          I/O ports at 3080 [size=32]
>          Capabilities: [c8] Power Management version 2
>          Capabilities: [d0] MSI: Enable+ Count=1/1 Maskable- 64bit+
>          Capabilities: [e0] PCI Advanced Features
>          Kernel driver in use: e1000e
>          Kernel modules: e1000e
> 
> I tried the patched module on both E-series HW and they always have the 0x40000000 bit set when
> decoding the speed from the status register (always 0x40080083), either with or without the ME
> available. So my patch breaks my older HW, as you probably suspected. I removed the 0x40000000 test
> from the module, and they always negotiated 1000 Mbps just fine.
> 
> I've attached logs for all three notebooks with my patched module (without the  0x40000000 test) and
> a debug filter for all files of the module (echo "file */e1000e-20/* +p;" >
> /sys/kernel/debug/dynamic_debug/control).
> 
> My test consisted of rmmod'ing, sleep 1, insmod'ing, set debug filter + two reconnects.
> 
> So I'm basically back to square one.
> 
> How to proceed?
> 
ME disabled - good. How long time you wait for 1000Mbps after a re 
connection of the cable? Could please, wait 5-10s and see if link back 
to the 1000Mbps?
Unfortunately we have no such HW in our labs. I will try ask if our PAE 
can help with more debug if need.
> JMG
> 
Sasha
Jan-Marek Glogowski Jan. 7, 2019, 4:37 p.m. | #7
Am 07.01.19 um 16:49 schrieb Neftin, Sasha:
> On 1/7/2019 16:15, Jan-Marek Glogowski wrote:
>>
>>
>> Am 07.01.19 um 10:00 schrieb Jan-Marek Glogowski:
>>>
>>>
>>> Am 07.01.19 um 07:32 schrieb Neftin, Sasha:
>>>> On 1/6/2019 21:53, Jan-Marek Glogowski wrote:
>>>>> Am 6. Januar 2019 16:28:42 MEZ schrieb "Neftin, Sasha" <sasha.neftin@intel.com>:
>>>>>> On 1/4/2019 15:31, Jan-Marek Glogowski wrote:
>>>>>>> My problem is the fallback of the hardware to 10 Mbps after a
>>>>>>> re-connect, which happens almost all times. In the broken case
>>>>>>> the status field has always the 0x40000000 bit set.
>>>>>>>
>>>>>>> Still the naming for the status flag is just a guess. Ignoring
>>>>>>> the status, when this bit is set, solves my problem. But I just
>>>>>>> have one notebook hardware (I219-LM, rev 21), which exhibits the
>>>>>>> problem. It doesn't happen for my other notebook with I219-V
>>>>>>> (rev 21) hardware (or it's just much more unlikely).
>>>>>>>
>>>>>>> Signed-off-by: Jan-Marek Glogowski <glogow@fbihome.de>
>>>>>>> ---
>>>>>>>     drivers/net/ethernet/intel/e1000e/defines.h | 1 +
>>>>>>>     drivers/net/ethernet/intel/e1000e/ich8lan.c | 3 ++-
>>>>>>>     drivers/net/ethernet/intel/e1000e/mac.c     | 2 ++
>>>>>>>     3 files changed, 5 insertions(+), 1 deletion(-)
>>>>>>>
>>>>>>> diff --git a/drivers/net/ethernet/intel/e1000e/defines.h
>>>>>> b/drivers/net/ethernet/intel/e1000e/defines.h
>>>>>>> index fd550de..3cd9f99 100644
>>>>>>> --- a/drivers/net/ethernet/intel/e1000e/defines.h
>>>>>>> +++ b/drivers/net/ethernet/intel/e1000e/defines.h
>>>>>>> @@ -221,6 +221,7 @@
>>>>>>>     #define E1000_STATUS_LAN_INIT_DONE 0x00000200   /* Lan Init
>>>>>> Completion by NVM */
>>>>>>>     #define E1000_STATUS_PHYRA      0x00000400      /* PHY Reset
>>>>>> Asserted */
>>>>>>>     #define E1000_STATUS_GIO_MASTER_ENABLE    0x00080000    /* Master Req
>>>>>> status */
>>>>>>> +#define E1000_STATUS_AUTONEG    0x40000000      /* in
>>>>>> auto-negotiation */
>>>>>>>     
>>>>>> There is no such indication. Should be removed.
>>>>>>>     #define HALF_DUPLEX 1
>>>>>>>     #define FULL_DUPLEX 2
>>>>>>> diff --git a/drivers/net/ethernet/intel/e1000e/ich8lan.c
>>>>>> b/drivers/net/ethernet/intel/e1000e/ich8lan.c
>>>>>>> index fd59970..8588eb7 100644
>>>>>>> --- a/drivers/net/ethernet/intel/e1000e/ich8lan.c
>>>>>>> +++ b/drivers/net/ethernet/intel/e1000e/ich8lan.c
>>>>>>> @@ -1390,7 +1390,8 @@ static s32
>>>>>> e1000_check_for_copper_link_ich8lan(struct e1000_hw *hw)
>>>>>>>             u16 speed;
>>>>>>>             u8 duplex;
>>>>>>>     -        e1000e_get_speed_and_duplex_copper(hw, &speed, &duplex);
>>>>>>> +        if (e1000e_get_speed_and_duplex_copper(hw, &speed, &duplex))
>>>>>>> +            goto out;
>>>>>>>             tipg_reg = er32(TIPG);
>>>>>>>             tipg_reg &= ~E1000_TIPG_IPGT_MASK;
>>>>>>>     diff --git a/drivers/net/ethernet/intel/e1000e/mac.c
>>>>>> b/drivers/net/ethernet/intel/e1000e/mac.c
>>>>>>> index 19c816c..ada8fbb 100644
>>>>>>> --- a/drivers/net/ethernet/intel/e1000e/mac.c
>>>>>>> +++ b/drivers/net/ethernet/intel/e1000e/mac.c
>>>>>>> @@ -1310,6 +1310,8 @@ s32 e1000e_get_speed_and_duplex_copper(struct
>>>>>> e1000_hw *hw, u16 *speed,
>>>>>>>            status = er32(STATUS);
>>>>>>>     +    if (status & E1000_STATUS_AUTONEG)
>>>>>>> +        return 1;
>>>>>> This is wrong. We have no AUTONEG indication in bit 30 of E1000_STATUS
>>>>>> (0x0008) register. These code piece should be removed.
>>>>>>>         if (!(status & E1000_STATUS_LU))
>>>>>>>             return 1;
>>>>>>>    
>>>>>> Hello Jan-Marek,
>>>>>> That's okay to use u8 size for a duplex indication and u16 size for a
>>>>>> link indication, as you refer in previous patch.
>>>>>> But use the 'autoneg status' is wrong.
>>>>>
>>>>> Just as a reminder: I have no idea what this bit actually indicates. This is just a guess I had
>>>>> when looking into the problem. I don't know if the device was still negotiating at this point, but
>>>>> this bit was set in the status register.
>>>>>
>>>>>> I wonder how this can solve the problem. Do you
>>>>>> encountered with this problem on other platforms with our devices? (I meant different, no similar
>>>>>> HW)
>>>>>
>>>>> Other platforms as Windows? I'm just doing Linux development, but I'll ask the Windows people and
>>>>> can check, if this problem also happens there.
>>>>>
>>>>> I don't see this problem with older HW (Fujitsu E7x6, also Skylake based, but I219-V). It happens
>>>>> with both of my U7x7 test notebooks. I have some older Haswell based HW (E7x4), which I didn't yet
>>>>> test. Google tells me they have "Intel 82579LM Gigabit" ethernet.
>>>>>
>>>>> All of these three series are in use and we have a few hundred or even thousand of them. This
>>>>> problem was found during the tests for our next Ubuntu 18.04 based release. This just seems to
>>>>> happen with the "new" U-series. I'm not aware of any problems like this with the older E-series
>>>>> HW.
>>>>> And it probably just happens more often now for whatever reason.
>>>>>
>>>>>> Anyway, 0x40000000 indication is not relevant to the auto-negotiation.
>>>>>> May I ask do your experiments with ME disable (via BIOS) and see if
>>>>>> same problem still happen.
>>>>>
>>>>> Disabling ME shouldn't be a problem to test.
>>>>>
>>>> You have mentioned that there is no problem on I219-V. The main difference between I219-LM and
>>>> I219-V is 'Intel Standard Manageability' feature. So, I suggest to disable ME and re-check.
>>>>> I'll continue testing all the HW tomorrow, with both our releases, and report back. And maybe
>>>>> there is an easier way to trigger the problem then re-plugging the cable all the time (maybe
>>>>> better to get a switch and power cycle that...).
>>>>>
>>>>> Please tell me if there is anything else I should look for or test.
>>>>> Further step more likely should be dump registers and try access to a
>>>> PHY. But let's check ME disabled as the first step.
>>>
>>> According to the BIOS ME is actually disabled.
>>> Nevertheless I selected "UnConfigure ME", which didn'tr change anything in the BIOS (ME
>>> v11.8.50.3425 FWIW). I did look for vendor BIOS updates, as you think this problem might be ME
>>> related. There is an update available.
>>
>> So I did the BIOS update - no changes regarding the network auto-negotiation behavior.
>>
>> I also tried both of my E-Series. The old Haswell series (E7x4) also has a disabled ME and as
>> suspected the following HW:
>>
>> 00:19.0 Ethernet controller: Intel Corporation Ethernet Connection I217-LM (rev 04)
>>          Subsystem: Fujitsu Limited. Ethernet Connection I217-LM
>>          Flags: bus master, fast devsel, latency 0, IRQ 27
>>          Memory at f0500000 (32-bit, non-prefetchable) [size=128K]
>>          Memory at f053f000 (32-bit, non-prefetchable) [size=4K]
>>          I/O ports at 3080 [size=32]
>>          Capabilities: [c8] Power Management version 2
>>          Capabilities: [d0] MSI: Enable+ Count=1/1 Maskable- 64bit+
>>          Capabilities: [e0] PCI Advanced Features
>>          Kernel driver in use: e1000e
>>          Kernel modules: e1000e
>>
>> I tried the patched module on both E-series HW and they always have the 0x40000000 bit set when
>> decoding the speed from the status register (always 0x40080083), either with or without the ME
>> available. So my patch breaks my older HW, as you probably suspected. I removed the 0x40000000 test
>> from the module, and they always negotiated 1000 Mbps just fine.
>>
>> I've attached logs for all three notebooks with my patched module (without the  0x40000000 test) and
>> a debug filter for all files of the module (echo "file */e1000e-20/* +p;" >
>> /sys/kernel/debug/dynamic_debug/control).
>>
>> My test consisted of rmmod'ing, sleep 1, insmod'ing, set debug filter + two reconnects.
>>
>> So I'm basically back to square one.
>>
>> How to proceed?
>>
> ME disabled - good. How long time you wait for 1000Mbps after a re connection of the cable? Could
> please, wait 5-10s and see if link back to the 1000Mbps?

From the U757 logs attached to the last mail:

[11750.669940] e1000e 0000:00:1f.6 enp0s31f6: reading PHY page 0 (or 0x0 shifted) reg 0x1
[11750.670054] e1000e 0000:00:1f.6 enp0s31f6: reading PHY page 0 (or 0x0 shifted) reg 0x1
[11750.670165] e1000e 0000:00:1f.6 enp0s31f6: ARC subsystem not valid.
[11750.670166] e1000e: enp0s31f6 NIC Link is Down
[11752.925934] e1000e 0000:00:1f.6 enp0s31f6: reading PHY page 0 (or 0x0 shifted) reg 0x1
[11752.926065] e1000e 0000:00:1f.6 enp0s31f6: reading PHY page 0 (or 0x0 shifted) reg 0x1
[11752.926193] e1000e 0000:00:1f.6 enp0s31f6: ARC subsystem not valid.
...
[11754.813959] e1000e 0000:00:1f.6 enp0s31f6: reading PHY page 0 (or 0x0 shifted) reg 0x1
[11754.814034] e1000e 0000:00:1f.6 enp0s31f6: reading PHY page 0 (or 0x0 shifted) reg 0x1
[11754.814106] e1000e 0000:00:1f.6 enp0s31f6: ARC subsystem not valid.
...
[11768.142020] e1000e 0000:00:1f.6 enp0s31f6: status 0x40080003 => 10 Mbps, Full Duplex
...
[11768.151411] e1000e: enp0s31f6 NIC Link is Up 10 Mbps Full Duplex, Flow Control: None

Which is something about 16s.

Actually IMHO there is a larger chance to fall to 10 Mbits if you wait longer disconnected.
Still reloading the module remedies this condition.

> Unfortunately we have no such HW in our labs. I will try ask if our PAE can help with more debug if
> need.

Hmmm.

JMG
Neftin, Sasha Jan. 8, 2019, 8:31 a.m. | #8
On 1/7/2019 18:37, Jan-Marek Glogowski wrote:
> 
> 
> Am 07.01.19 um 16:49 schrieb Neftin, Sasha:
>> On 1/7/2019 16:15, Jan-Marek Glogowski wrote:
>>>
>>>
>>> Am 07.01.19 um 10:00 schrieb Jan-Marek Glogowski:
>>>>
>>>>
>>>> Am 07.01.19 um 07:32 schrieb Neftin, Sasha:
>>>>> On 1/6/2019 21:53, Jan-Marek Glogowski wrote:
>>>>>> Am 6. Januar 2019 16:28:42 MEZ schrieb "Neftin, Sasha" <sasha.neftin@intel.com>:
>>>>>>> On 1/4/2019 15:31, Jan-Marek Glogowski wrote:
>>>>>>>> My problem is the fallback of the hardware to 10 Mbps after a
>>>>>>>> re-connect, which happens almost all times. In the broken case
>>>>>>>> the status field has always the 0x40000000 bit set.
>>>>>>>>
>>>>>>>> Still the naming for the status flag is just a guess. Ignoring
>>>>>>>> the status, when this bit is set, solves my problem. But I just
>>>>>>>> have one notebook hardware (I219-LM, rev 21), which exhibits the
>>>>>>>> problem. It doesn't happen for my other notebook with I219-V
>>>>>>>> (rev 21) hardware (or it's just much more unlikely).
>>>>>>>>
>>>>>>>> Signed-off-by: Jan-Marek Glogowski <glogow@fbihome.de>
>>>>>>>> ---
>>>>>>>>      drivers/net/ethernet/intel/e1000e/defines.h | 1 +
>>>>>>>>      drivers/net/ethernet/intel/e1000e/ich8lan.c | 3 ++-
>>>>>>>>      drivers/net/ethernet/intel/e1000e/mac.c     | 2 ++
>>>>>>>>      3 files changed, 5 insertions(+), 1 deletion(-)
>>>>>>>>
>>>>>>>> diff --git a/drivers/net/ethernet/intel/e1000e/defines.h
>>>>>>> b/drivers/net/ethernet/intel/e1000e/defines.h
>>>>>>>> index fd550de..3cd9f99 100644
>>>>>>>> --- a/drivers/net/ethernet/intel/e1000e/defines.h
>>>>>>>> +++ b/drivers/net/ethernet/intel/e1000e/defines.h
>>>>>>>> @@ -221,6 +221,7 @@
>>>>>>>>      #define E1000_STATUS_LAN_INIT_DONE 0x00000200   /* Lan Init
>>>>>>> Completion by NVM */
>>>>>>>>      #define E1000_STATUS_PHYRA      0x00000400      /* PHY Reset
>>>>>>> Asserted */
>>>>>>>>      #define E1000_STATUS_GIO_MASTER_ENABLE    0x00080000    /* Master Req
>>>>>>> status */
>>>>>>>> +#define E1000_STATUS_AUTONEG    0x40000000      /* in
>>>>>>> auto-negotiation */
>>>>>>>>      
>>>>>>> There is no such indication. Should be removed.
>>>>>>>>      #define HALF_DUPLEX 1
>>>>>>>>      #define FULL_DUPLEX 2
>>>>>>>> diff --git a/drivers/net/ethernet/intel/e1000e/ich8lan.c
>>>>>>> b/drivers/net/ethernet/intel/e1000e/ich8lan.c
>>>>>>>> index fd59970..8588eb7 100644
>>>>>>>> --- a/drivers/net/ethernet/intel/e1000e/ich8lan.c
>>>>>>>> +++ b/drivers/net/ethernet/intel/e1000e/ich8lan.c
>>>>>>>> @@ -1390,7 +1390,8 @@ static s32
>>>>>>> e1000_check_for_copper_link_ich8lan(struct e1000_hw *hw)
>>>>>>>>              u16 speed;
>>>>>>>>              u8 duplex;
>>>>>>>>      -        e1000e_get_speed_and_duplex_copper(hw, &speed, &duplex);
>>>>>>>> +        if (e1000e_get_speed_and_duplex_copper(hw, &speed, &duplex))
>>>>>>>> +            goto out;
>>>>>>>>              tipg_reg = er32(TIPG);
>>>>>>>>              tipg_reg &= ~E1000_TIPG_IPGT_MASK;
>>>>>>>>      diff --git a/drivers/net/ethernet/intel/e1000e/mac.c
>>>>>>> b/drivers/net/ethernet/intel/e1000e/mac.c
>>>>>>>> index 19c816c..ada8fbb 100644
>>>>>>>> --- a/drivers/net/ethernet/intel/e1000e/mac.c
>>>>>>>> +++ b/drivers/net/ethernet/intel/e1000e/mac.c
>>>>>>>> @@ -1310,6 +1310,8 @@ s32 e1000e_get_speed_and_duplex_copper(struct
>>>>>>> e1000_hw *hw, u16 *speed,
>>>>>>>>             status = er32(STATUS);
>>>>>>>>      +    if (status & E1000_STATUS_AUTONEG)
>>>>>>>> +        return 1;
>>>>>>> This is wrong. We have no AUTONEG indication in bit 30 of E1000_STATUS
>>>>>>> (0x0008) register. These code piece should be removed.
>>>>>>>>          if (!(status & E1000_STATUS_LU))
>>>>>>>>              return 1;
>>>>>>>>     
>>>>>>> Hello Jan-Marek,
>>>>>>> That's okay to use u8 size for a duplex indication and u16 size for a
>>>>>>> link indication, as you refer in previous patch.
>>>>>>> But use the 'autoneg status' is wrong.
>>>>>>
>>>>>> Just as a reminder: I have no idea what this bit actually indicates. This is just a guess I had
>>>>>> when looking into the problem. I don't know if the device was still negotiating at this point, but
>>>>>> this bit was set in the status register.
>>>>>>
>>>>>>> I wonder how this can solve the problem. Do you
>>>>>>> encountered with this problem on other platforms with our devices? (I meant different, no similar
>>>>>>> HW)
>>>>>>
>>>>>> Other platforms as Windows? I'm just doing Linux development, but I'll ask the Windows people and
>>>>>> can check, if this problem also happens there.
>>>>>>
>>>>>> I don't see this problem with older HW (Fujitsu E7x6, also Skylake based, but I219-V). It happens
>>>>>> with both of my U7x7 test notebooks. I have some older Haswell based HW (E7x4), which I didn't yet
>>>>>> test. Google tells me they have "Intel 82579LM Gigabit" ethernet.
>>>>>>
>>>>>> All of these three series are in use and we have a few hundred or even thousand of them. This
>>>>>> problem was found during the tests for our next Ubuntu 18.04 based release. This just seems to
>>>>>> happen with the "new" U-series. I'm not aware of any problems like this with the older E-series
>>>>>> HW.
>>>>>> And it probably just happens more often now for whatever reason.
>>>>>>
>>>>>>> Anyway, 0x40000000 indication is not relevant to the auto-negotiation.
>>>>>>> May I ask do your experiments with ME disable (via BIOS) and see if
>>>>>>> same problem still happen.
>>>>>>
>>>>>> Disabling ME shouldn't be a problem to test.
>>>>>>
>>>>> You have mentioned that there is no problem on I219-V. The main difference between I219-LM and
>>>>> I219-V is 'Intel Standard Manageability' feature. So, I suggest to disable ME and re-check.
>>>>>> I'll continue testing all the HW tomorrow, with both our releases, and report back. And maybe
>>>>>> there is an easier way to trigger the problem then re-plugging the cable all the time (maybe
>>>>>> better to get a switch and power cycle that...).
>>>>>>
>>>>>> Please tell me if there is anything else I should look for or test.
>>>>>> Further step more likely should be dump registers and try access to a
>>>>> PHY. But let's check ME disabled as the first step.
>>>>
>>>> According to the BIOS ME is actually disabled.
>>>> Nevertheless I selected "UnConfigure ME", which didn'tr change anything in the BIOS (ME
>>>> v11.8.50.3425 FWIW). I did look for vendor BIOS updates, as you think this problem might be ME
>>>> related. There is an update available.
>>>
>>> So I did the BIOS update - no changes regarding the network auto-negotiation behavior.
>>>
>>> I also tried both of my E-Series. The old Haswell series (E7x4) also has a disabled ME and as
>>> suspected the following HW:
>>>
>>> 00:19.0 Ethernet controller: Intel Corporation Ethernet Connection I217-LM (rev 04)
>>>           Subsystem: Fujitsu Limited. Ethernet Connection I217-LM
>>>           Flags: bus master, fast devsel, latency 0, IRQ 27
>>>           Memory at f0500000 (32-bit, non-prefetchable) [size=128K]
>>>           Memory at f053f000 (32-bit, non-prefetchable) [size=4K]
>>>           I/O ports at 3080 [size=32]
>>>           Capabilities: [c8] Power Management version 2
>>>           Capabilities: [d0] MSI: Enable+ Count=1/1 Maskable- 64bit+
>>>           Capabilities: [e0] PCI Advanced Features
>>>           Kernel driver in use: e1000e
>>>           Kernel modules: e1000e
>>>
>>> I tried the patched module on both E-series HW and they always have the 0x40000000 bit set when
>>> decoding the speed from the status register (always 0x40080083), either with or without the ME
>>> available. So my patch breaks my older HW, as you probably suspected. I removed the 0x40000000 test
>>> from the module, and they always negotiated 1000 Mbps just fine.
>>>
>>> I've attached logs for all three notebooks with my patched module (without the  0x40000000 test) and
>>> a debug filter for all files of the module (echo "file */e1000e-20/* +p;" >
>>> /sys/kernel/debug/dynamic_debug/control).
>>>
>>> My test consisted of rmmod'ing, sleep 1, insmod'ing, set debug filter + two reconnects.
>>>
>>> So I'm basically back to square one.
>>>
>>> How to proceed?
>>>
>> ME disabled - good. How long time you wait for 1000Mbps after a re connection of the cable? Could
>> please, wait 5-10s and see if link back to the 1000Mbps?
> 
>  From the U757 logs attached to the last mail:
> 
> [11750.669940] e1000e 0000:00:1f.6 enp0s31f6: reading PHY page 0 (or 0x0 shifted) reg 0x1
> [11750.670054] e1000e 0000:00:1f.6 enp0s31f6: reading PHY page 0 (or 0x0 shifted) reg 0x1
> [11750.670165] e1000e 0000:00:1f.6 enp0s31f6: ARC subsystem not valid.
> [11750.670166] e1000e: enp0s31f6 NIC Link is Down
> [11752.925934] e1000e 0000:00:1f.6 enp0s31f6: reading PHY page 0 (or 0x0 shifted) reg 0x1
> [11752.926065] e1000e 0000:00:1f.6 enp0s31f6: reading PHY page 0 (or 0x0 shifted) reg 0x1
> [11752.926193] e1000e 0000:00:1f.6 enp0s31f6: ARC subsystem not valid.
> ...
> [11754.813959] e1000e 0000:00:1f.6 enp0s31f6: reading PHY page 0 (or 0x0 shifted) reg 0x1
> [11754.814034] e1000e 0000:00:1f.6 enp0s31f6: reading PHY page 0 (or 0x0 shifted) reg 0x1
> [11754.814106] e1000e 0000:00:1f.6 enp0s31f6: ARC subsystem not valid.
> ...
> [11768.142020] e1000e 0000:00:1f.6 enp0s31f6: status 0x40080003 => 10 Mbps, Full Duplex
> ...
> [11768.151411] e1000e: enp0s31f6 NIC Link is Up 10 Mbps Full Duplex, Flow Control: None
> 
> Which is something about 16s.
> 
> Actually IMHO there is a larger chance to fall to 10 Mbits if you wait longer disconnected.
> Still reloading the module remedies this condition.
> 
>> Unfortunately we have no such HW in our labs. I will try ask if our PAE can help with more debug if
>> need.
> 
> Hmmm.
> 
Since you still read 0x40000000 value in the status register it is 
causing me to think that ME works. Another way I think you should to go 
ask your vendors for last updated NVM (or with ME disabled if possible) 
for your HW. Since I219V works as properly, I expected I219-LM without 
ME works too.
Let's do follow experiments on your side. Please, do rmmod e1000e.ko, 
bring up the machine without the driver. Then re-connect the cable few 
times and see what is link's speed up. You can be trusted on the LED 
indicators.
> JMG
> 
Sasha
Jan-Marek Glogowski Jan. 8, 2019, 9:59 a.m. | #9
Am 08.01.19 um 09:31 schrieb Neftin, Sasha:
> On 1/7/2019 18:37, Jan-Marek Glogowski wrote:
>>
>>
>> Am 07.01.19 um 16:49 schrieb Neftin, Sasha:
>>> On 1/7/2019 16:15, Jan-Marek Glogowski wrote:
>>>>
>>>>
>>>> Am 07.01.19 um 10:00 schrieb Jan-Marek Glogowski:
>>>>>
>>>>>
>>>>> Am 07.01.19 um 07:32 schrieb Neftin, Sasha:
>>>>>> On 1/6/2019 21:53, Jan-Marek Glogowski wrote:
>>>>>>> Am 6. Januar 2019 16:28:42 MEZ schrieb "Neftin, Sasha" <sasha.neftin@intel.com>:
>>>>>>>> On 1/4/2019 15:31, Jan-Marek Glogowski wrote:
>>>>>>>>> My problem is the fallback of the hardware to 10 Mbps after a
>>>>>>>>> re-connect, which happens almost all times. In the broken case
>>>>>>>>> the status field has always the 0x40000000 bit set.
>>>>>>>>>
>>>>>>>>> Still the naming for the status flag is just a guess. Ignoring
>>>>>>>>> the status, when this bit is set, solves my problem. But I just
>>>>>>>>> have one notebook hardware (I219-LM, rev 21), which exhibits the
>>>>>>>>> problem. It doesn't happen for my other notebook with I219-V
>>>>>>>>> (rev 21) hardware (or it's just much more unlikely).
>>>>>>>>>
>>>>>>>>> Signed-off-by: Jan-Marek Glogowski <glogow@fbihome.de>
>>>>>>>>> ---
>>>>>>>>>      drivers/net/ethernet/intel/e1000e/defines.h | 1 +
>>>>>>>>>      drivers/net/ethernet/intel/e1000e/ich8lan.c | 3 ++-
>>>>>>>>>      drivers/net/ethernet/intel/e1000e/mac.c     | 2 ++
>>>>>>>>>      3 files changed, 5 insertions(+), 1 deletion(-)
>>>>>>>>>
>>>>>>>>> diff --git a/drivers/net/ethernet/intel/e1000e/defines.h
>>>>>>>> b/drivers/net/ethernet/intel/e1000e/defines.h
>>>>>>>>> index fd550de..3cd9f99 100644
>>>>>>>>> --- a/drivers/net/ethernet/intel/e1000e/defines.h
>>>>>>>>> +++ b/drivers/net/ethernet/intel/e1000e/defines.h
>>>>>>>>> @@ -221,6 +221,7 @@
>>>>>>>>>      #define E1000_STATUS_LAN_INIT_DONE 0x00000200   /* Lan Init
>>>>>>>> Completion by NVM */
>>>>>>>>>      #define E1000_STATUS_PHYRA      0x00000400      /* PHY Reset
>>>>>>>> Asserted */
>>>>>>>>>      #define E1000_STATUS_GIO_MASTER_ENABLE    0x00080000    /* Master Req
>>>>>>>> status */
>>>>>>>>> +#define E1000_STATUS_AUTONEG    0x40000000      /* in
>>>>>>>> auto-negotiation */
>>>>>>>>>      
>>>>>>>> There is no such indication. Should be removed.
>>>>>>>>>      #define HALF_DUPLEX 1
>>>>>>>>>      #define FULL_DUPLEX 2
>>>>>>>>> diff --git a/drivers/net/ethernet/intel/e1000e/ich8lan.c
>>>>>>>> b/drivers/net/ethernet/intel/e1000e/ich8lan.c
>>>>>>>>> index fd59970..8588eb7 100644
>>>>>>>>> --- a/drivers/net/ethernet/intel/e1000e/ich8lan.c
>>>>>>>>> +++ b/drivers/net/ethernet/intel/e1000e/ich8lan.c
>>>>>>>>> @@ -1390,7 +1390,8 @@ static s32
>>>>>>>> e1000_check_for_copper_link_ich8lan(struct e1000_hw *hw)
>>>>>>>>>              u16 speed;
>>>>>>>>>              u8 duplex;
>>>>>>>>>      -        e1000e_get_speed_and_duplex_copper(hw, &speed, &duplex);
>>>>>>>>> +        if (e1000e_get_speed_and_duplex_copper(hw, &speed, &duplex))
>>>>>>>>> +            goto out;
>>>>>>>>>              tipg_reg = er32(TIPG);
>>>>>>>>>              tipg_reg &= ~E1000_TIPG_IPGT_MASK;
>>>>>>>>>      diff --git a/drivers/net/ethernet/intel/e1000e/mac.c
>>>>>>>> b/drivers/net/ethernet/intel/e1000e/mac.c
>>>>>>>>> index 19c816c..ada8fbb 100644
>>>>>>>>> --- a/drivers/net/ethernet/intel/e1000e/mac.c
>>>>>>>>> +++ b/drivers/net/ethernet/intel/e1000e/mac.c
>>>>>>>>> @@ -1310,6 +1310,8 @@ s32 e1000e_get_speed_and_duplex_copper(struct
>>>>>>>> e1000_hw *hw, u16 *speed,
>>>>>>>>>             status = er32(STATUS);
>>>>>>>>>      +    if (status & E1000_STATUS_AUTONEG)
>>>>>>>>> +        return 1;
>>>>>>>> This is wrong. We have no AUTONEG indication in bit 30 of E1000_STATUS
>>>>>>>> (0x0008) register. These code piece should be removed.
>>>>>>>>>          if (!(status & E1000_STATUS_LU))
>>>>>>>>>              return 1;
>>>>>>>>>     
>>>>>>>> Hello Jan-Marek,
>>>>>>>> That's okay to use u8 size for a duplex indication and u16 size for a
>>>>>>>> link indication, as you refer in previous patch.
>>>>>>>> But use the 'autoneg status' is wrong.
>>>>>>>
>>>>>>> Just as a reminder: I have no idea what this bit actually indicates. This is just a guess I had
>>>>>>> when looking into the problem. I don't know if the device was still negotiating at this
>>>>>>> point, but
>>>>>>> this bit was set in the status register.
>>>>>>>
>>>>>>>> I wonder how this can solve the problem. Do you
>>>>>>>> encountered with this problem on other platforms with our devices? (I meant different, no
>>>>>>>> similar
>>>>>>>> HW)
>>>>>>>
>>>>>>> Other platforms as Windows? I'm just doing Linux development, but I'll ask the Windows people
>>>>>>> and
>>>>>>> can check, if this problem also happens there.
>>>>>>>
>>>>>>> I don't see this problem with older HW (Fujitsu E7x6, also Skylake based, but I219-V). It
>>>>>>> happens
>>>>>>> with both of my U7x7 test notebooks. I have some older Haswell based HW (E7x4), which I
>>>>>>> didn't yet
>>>>>>> test. Google tells me they have "Intel 82579LM Gigabit" ethernet.
>>>>>>>
>>>>>>> All of these three series are in use and we have a few hundred or even thousand of them. This
>>>>>>> problem was found during the tests for our next Ubuntu 18.04 based release. This just seems to
>>>>>>> happen with the "new" U-series. I'm not aware of any problems like this with the older E-series
>>>>>>> HW.
>>>>>>> And it probably just happens more often now for whatever reason.
>>>>>>>
>>>>>>>> Anyway, 0x40000000 indication is not relevant to the auto-negotiation.
>>>>>>>> May I ask do your experiments with ME disable (via BIOS) and see if
>>>>>>>> same problem still happen.
>>>>>>>
>>>>>>> Disabling ME shouldn't be a problem to test.
>>>>>>>
>>>>>> You have mentioned that there is no problem on I219-V. The main difference between I219-LM and
>>>>>> I219-V is 'Intel Standard Manageability' feature. So, I suggest to disable ME and re-check.
>>>>>>> I'll continue testing all the HW tomorrow, with both our releases, and report back. And maybe
>>>>>>> there is an easier way to trigger the problem then re-plugging the cable all the time (maybe
>>>>>>> better to get a switch and power cycle that...).
>>>>>>>
>>>>>>> Please tell me if there is anything else I should look for or test.
>>>>>>> Further step more likely should be dump registers and try access to a
>>>>>> PHY. But let's check ME disabled as the first step.
>>>>>
>>>>> According to the BIOS ME is actually disabled.
>>>>> Nevertheless I selected "UnConfigure ME", which didn'tr change anything in the BIOS (ME
>>>>> v11.8.50.3425 FWIW). I did look for vendor BIOS updates, as you think this problem might be ME
>>>>> related. There is an update available.
>>>>
>>>> So I did the BIOS update - no changes regarding the network auto-negotiation behavior.
>>>>
>>>> I also tried both of my E-Series. The old Haswell series (E7x4) also has a disabled ME and as
>>>> suspected the following HW:
>>>>
>>>> 00:19.0 Ethernet controller: Intel Corporation Ethernet Connection I217-LM (rev 04)
>>>>           Subsystem: Fujitsu Limited. Ethernet Connection I217-LM
>>>>           Flags: bus master, fast devsel, latency 0, IRQ 27
>>>>           Memory at f0500000 (32-bit, non-prefetchable) [size=128K]
>>>>           Memory at f053f000 (32-bit, non-prefetchable) [size=4K]
>>>>           I/O ports at 3080 [size=32]
>>>>           Capabilities: [c8] Power Management version 2
>>>>           Capabilities: [d0] MSI: Enable+ Count=1/1 Maskable- 64bit+
>>>>           Capabilities: [e0] PCI Advanced Features
>>>>           Kernel driver in use: e1000e
>>>>           Kernel modules: e1000e
>>>>
>>>> I tried the patched module on both E-series HW and they always have the 0x40000000 bit set when
>>>> decoding the speed from the status register (always 0x40080083), either with or without the ME
>>>> available. So my patch breaks my older HW, as you probably suspected. I removed the 0x40000000 test
>>>> from the module, and they always negotiated 1000 Mbps just fine.
>>>>
>>>> I've attached logs for all three notebooks with my patched module (without the  0x40000000 test)
>>>> and
>>>> a debug filter for all files of the module (echo "file */e1000e-20/* +p;" >
>>>> /sys/kernel/debug/dynamic_debug/control).
>>>>
>>>> My test consisted of rmmod'ing, sleep 1, insmod'ing, set debug filter + two reconnects.
>>>>
>>>> So I'm basically back to square one.
>>>>
>>>> How to proceed?
>>>>
>>> ME disabled - good. How long time you wait for 1000Mbps after a re connection of the cable? Could
>>> please, wait 5-10s and see if link back to the 1000Mbps?
>>
>>  From the U757 logs attached to the last mail:
>>
>> [11750.669940] e1000e 0000:00:1f.6 enp0s31f6: reading PHY page 0 (or 0x0 shifted) reg 0x1
>> [11750.670054] e1000e 0000:00:1f.6 enp0s31f6: reading PHY page 0 (or 0x0 shifted) reg 0x1
>> [11750.670165] e1000e 0000:00:1f.6 enp0s31f6: ARC subsystem not valid.
>> [11750.670166] e1000e: enp0s31f6 NIC Link is Down
>> [11752.925934] e1000e 0000:00:1f.6 enp0s31f6: reading PHY page 0 (or 0x0 shifted) reg 0x1
>> [11752.926065] e1000e 0000:00:1f.6 enp0s31f6: reading PHY page 0 (or 0x0 shifted) reg 0x1
>> [11752.926193] e1000e 0000:00:1f.6 enp0s31f6: ARC subsystem not valid.
>> ...
>> [11754.813959] e1000e 0000:00:1f.6 enp0s31f6: reading PHY page 0 (or 0x0 shifted) reg 0x1
>> [11754.814034] e1000e 0000:00:1f.6 enp0s31f6: reading PHY page 0 (or 0x0 shifted) reg 0x1
>> [11754.814106] e1000e 0000:00:1f.6 enp0s31f6: ARC subsystem not valid.
>> ...
>> [11768.142020] e1000e 0000:00:1f.6 enp0s31f6: status 0x40080003 => 10 Mbps, Full Duplex
>> ...
>> [11768.151411] e1000e: enp0s31f6 NIC Link is Up 10 Mbps Full Duplex, Flow Control: None
>>
>> Which is something about 16s.
>>
>> Actually IMHO there is a larger chance to fall to 10 Mbits if you wait longer disconnected.
>> Still reloading the module remedies this condition.
>>
>>> Unfortunately we have no such HW in our labs. I will try ask if our PAE can help with more debug if
>>> need.
>>
>> Hmmm.
>>
> Since you still read 0x40000000 value in the status register it is causing me to think that ME
> works. Another way I think you should to go ask your vendors for last updated NVM (or with ME
> disabled if possible) for your HW. Since I219V works as properly, I expected I219-LM without ME
> works too.

From the logs I sent:

I217-LM: "status 0x40080083 => 1000 Mbps, Full Duplex"
I219-V: "status 0x40080083 => 1000 Mbps, Full Duplex"
I219-LM auto-nego ok: "status 0x80083 => 1000 Mbps, Full Duplex"
I219-LM auto-nego broken: "status 0x40080003 => 10 Mbps, Full Duplex"

According to the BIOS both LM-variants have Intel ME disabled. If I can't trust the BIOS, is there a
way to check this?

> Let's do follow experiments on your side. Please, do rmmod e1000e.ko, bring up the machine without
> the driver. Then re-connect the cable few times and see what is link's speed up. You can be trusted
> on the LED indicators.

So even if the driver (+ ethtool) indicates 10 MBits both lights are on. Same without the driver.
But wget speed is is just 10 MBits, as ethtool indicates. Manually overriding to 1000 Mbps still works.

Now I directly connected the I219-V with my I219-LM.

On the I219-V I ran "ethtool -s enp0s31f6 speed n autoneg off"
These are the results on the I219-LM side:
n=1000 : green and yellow lights on. "status 0x80083 => 1000 Mbps, Full Duplex"
n=100 : green and yellow lights on. "status 0x80042 => 100 Mbps, Half Duplex"
n=10 : just green light on. "status 0x80082 => 10 Mbps, Half Duplex"

The light indicators are the same on the I219-LM without the driver.

All status output on the I219-V has the 0x40000000 bit set, the I219-LM normally never.
The I219-V always had full duplex according to ethtool and the status.

If I set both sides to auto-neg, the first negotiation is correct (both have 1000 Mbps). If I
reconnect it becomes 10 Mbps on the I219-LM side and 1000 Mbps on the I219-V side. Both lights are
still on. The I219-LM side *just* has the status bit 0x40000000 set the first time it goes to 10
Mbits (what I based my patch on without testing it on other HW).

JMG
Paul Menzel Jan. 8, 2019, 10:15 a.m. | #10
Dear Jan-Marek,


On 01/08/19 10:59, Jan-Marek Glogowski wrote:

> Am 08.01.19 um 09:31 schrieb Neftin, Sasha:
>> On 1/7/2019 18:37, Jan-Marek Glogowski wrote:

[…]

>> Since you still read 0x40000000 value in the status register it is causing me to think that ME
>> works. Another way I think you should to go ask your vendors for last updated NVM (or with ME
>> disabled if possible) for your HW. Since I219V works as properly, I expected I219-LM without ME
>> works too.
> 
> From the logs I sent:
> 
> I217-LM: "status 0x40080083 => 1000 Mbps, Full Duplex"
> I219-V: "status 0x40080083 => 1000 Mbps, Full Duplex"
> I219-LM auto-nego ok: "status 0x80083 => 1000 Mbps, Full Duplex"
> I219-LM auto-nego broken: "status 0x40080003 => 10 Mbps, Full Duplex"
> 
> According to the BIOS both LM-variants have Intel ME disabled. If I can't trust the BIOS, is there a
> way to check this?

What does intelmetool from the coreboot project show?

    $ git clone https://review.coreboot.org/coreboot.git
    $ cd coreboot
    $ cd util/intelmetoolo
    $ make -j
    $ sudo ./intelmetool -m


Kind regards,

Paul
Jan-Marek Glogowski Jan. 8, 2019, 10:15 a.m. | #11
Am 08.01.19 um 10:59 schrieb Jan-Marek Glogowski:
> 
> 
> Am 08.01.19 um 09:31 schrieb Neftin, Sasha:
>> On 1/7/2019 18:37, Jan-Marek Glogowski wrote:
>>>
>>>
>>> Am 07.01.19 um 16:49 schrieb Neftin, Sasha:
>>>> On 1/7/2019 16:15, Jan-Marek Glogowski wrote:
>>>>>
>>>>>
>>>>> Am 07.01.19 um 10:00 schrieb Jan-Marek Glogowski:
>>>>>>
>>>>>>
>>>>>> Am 07.01.19 um 07:32 schrieb Neftin, Sasha:
>>>>>>> On 1/6/2019 21:53, Jan-Marek Glogowski wrote:
>>>>>>>> Am 6. Januar 2019 16:28:42 MEZ schrieb "Neftin, Sasha" <sasha.neftin@intel.com>:
>>>>>>>>> On 1/4/2019 15:31, Jan-Marek Glogowski wrote:
>>>>>>>>>> My problem is the fallback of the hardware to 10 Mbps after a
>>>>>>>>>> re-connect, which happens almost all times. In the broken case
>>>>>>>>>> the status field has always the 0x40000000 bit set.
>>>>>>>>>>
>>>>>>>>>> Still the naming for the status flag is just a guess. Ignoring
>>>>>>>>>> the status, when this bit is set, solves my problem. But I just
>>>>>>>>>> have one notebook hardware (I219-LM, rev 21), which exhibits the
>>>>>>>>>> problem. It doesn't happen for my other notebook with I219-V
>>>>>>>>>> (rev 21) hardware (or it's just much more unlikely).
>>>>>>>>>>
>>>>>>>>>> Signed-off-by: Jan-Marek Glogowski <glogow@fbihome.de>
>>>>>>>>>> ---
>>>>>>>>>>      drivers/net/ethernet/intel/e1000e/defines.h | 1 +
>>>>>>>>>>      drivers/net/ethernet/intel/e1000e/ich8lan.c | 3 ++-
>>>>>>>>>>      drivers/net/ethernet/intel/e1000e/mac.c     | 2 ++
>>>>>>>>>>      3 files changed, 5 insertions(+), 1 deletion(-)
>>>>>>>>>>
>>>>>>>>>> diff --git a/drivers/net/ethernet/intel/e1000e/defines.h
>>>>>>>>> b/drivers/net/ethernet/intel/e1000e/defines.h
>>>>>>>>>> index fd550de..3cd9f99 100644
>>>>>>>>>> --- a/drivers/net/ethernet/intel/e1000e/defines.h
>>>>>>>>>> +++ b/drivers/net/ethernet/intel/e1000e/defines.h
>>>>>>>>>> @@ -221,6 +221,7 @@
>>>>>>>>>>      #define E1000_STATUS_LAN_INIT_DONE 0x00000200   /* Lan Init
>>>>>>>>> Completion by NVM */
>>>>>>>>>>      #define E1000_STATUS_PHYRA      0x00000400      /* PHY Reset
>>>>>>>>> Asserted */
>>>>>>>>>>      #define E1000_STATUS_GIO_MASTER_ENABLE    0x00080000    /* Master Req
>>>>>>>>> status */
>>>>>>>>>> +#define E1000_STATUS_AUTONEG    0x40000000      /* in
>>>>>>>>> auto-negotiation */
>>>>>>>>>>      
>>>>>>>>> There is no such indication. Should be removed.
>>>>>>>>>>      #define HALF_DUPLEX 1
>>>>>>>>>>      #define FULL_DUPLEX 2
>>>>>>>>>> diff --git a/drivers/net/ethernet/intel/e1000e/ich8lan.c
>>>>>>>>> b/drivers/net/ethernet/intel/e1000e/ich8lan.c
>>>>>>>>>> index fd59970..8588eb7 100644
>>>>>>>>>> --- a/drivers/net/ethernet/intel/e1000e/ich8lan.c
>>>>>>>>>> +++ b/drivers/net/ethernet/intel/e1000e/ich8lan.c
>>>>>>>>>> @@ -1390,7 +1390,8 @@ static s32
>>>>>>>>> e1000_check_for_copper_link_ich8lan(struct e1000_hw *hw)
>>>>>>>>>>              u16 speed;
>>>>>>>>>>              u8 duplex;
>>>>>>>>>>      -        e1000e_get_speed_and_duplex_copper(hw, &speed, &duplex);
>>>>>>>>>> +        if (e1000e_get_speed_and_duplex_copper(hw, &speed, &duplex))
>>>>>>>>>> +            goto out;
>>>>>>>>>>              tipg_reg = er32(TIPG);
>>>>>>>>>>              tipg_reg &= ~E1000_TIPG_IPGT_MASK;
>>>>>>>>>>      diff --git a/drivers/net/ethernet/intel/e1000e/mac.c
>>>>>>>>> b/drivers/net/ethernet/intel/e1000e/mac.c
>>>>>>>>>> index 19c816c..ada8fbb 100644
>>>>>>>>>> --- a/drivers/net/ethernet/intel/e1000e/mac.c
>>>>>>>>>> +++ b/drivers/net/ethernet/intel/e1000e/mac.c
>>>>>>>>>> @@ -1310,6 +1310,8 @@ s32 e1000e_get_speed_and_duplex_copper(struct
>>>>>>>>> e1000_hw *hw, u16 *speed,
>>>>>>>>>>             status = er32(STATUS);
>>>>>>>>>>      +    if (status & E1000_STATUS_AUTONEG)
>>>>>>>>>> +        return 1;
>>>>>>>>> This is wrong. We have no AUTONEG indication in bit 30 of E1000_STATUS
>>>>>>>>> (0x0008) register. These code piece should be removed.
>>>>>>>>>>          if (!(status & E1000_STATUS_LU))
>>>>>>>>>>              return 1;
>>>>>>>>>>     
>>>>>>>>> Hello Jan-Marek,
>>>>>>>>> That's okay to use u8 size for a duplex indication and u16 size for a
>>>>>>>>> link indication, as you refer in previous patch.
>>>>>>>>> But use the 'autoneg status' is wrong.
>>>>>>>>
>>>>>>>> Just as a reminder: I have no idea what this bit actually indicates. This is just a guess I had
>>>>>>>> when looking into the problem. I don't know if the device was still negotiating at this
>>>>>>>> point, but
>>>>>>>> this bit was set in the status register.
>>>>>>>>
>>>>>>>>> I wonder how this can solve the problem. Do you
>>>>>>>>> encountered with this problem on other platforms with our devices? (I meant different, no
>>>>>>>>> similar
>>>>>>>>> HW)
>>>>>>>>
>>>>>>>> Other platforms as Windows? I'm just doing Linux development, but I'll ask the Windows people
>>>>>>>> and
>>>>>>>> can check, if this problem also happens there.
>>>>>>>>
>>>>>>>> I don't see this problem with older HW (Fujitsu E7x6, also Skylake based, but I219-V). It
>>>>>>>> happens
>>>>>>>> with both of my U7x7 test notebooks. I have some older Haswell based HW (E7x4), which I
>>>>>>>> didn't yet
>>>>>>>> test. Google tells me they have "Intel 82579LM Gigabit" ethernet.
>>>>>>>>
>>>>>>>> All of these three series are in use and we have a few hundred or even thousand of them. This
>>>>>>>> problem was found during the tests for our next Ubuntu 18.04 based release. This just seems to
>>>>>>>> happen with the "new" U-series. I'm not aware of any problems like this with the older E-series
>>>>>>>> HW.
>>>>>>>> And it probably just happens more often now for whatever reason.
>>>>>>>>
>>>>>>>>> Anyway, 0x40000000 indication is not relevant to the auto-negotiation.
>>>>>>>>> May I ask do your experiments with ME disable (via BIOS) and see if
>>>>>>>>> same problem still happen.
>>>>>>>>
>>>>>>>> Disabling ME shouldn't be a problem to test.
>>>>>>>>
>>>>>>> You have mentioned that there is no problem on I219-V. The main difference between I219-LM and
>>>>>>> I219-V is 'Intel Standard Manageability' feature. So, I suggest to disable ME and re-check.
>>>>>>>> I'll continue testing all the HW tomorrow, with both our releases, and report back. And maybe
>>>>>>>> there is an easier way to trigger the problem then re-plugging the cable all the time (maybe
>>>>>>>> better to get a switch and power cycle that...).
>>>>>>>>
>>>>>>>> Please tell me if there is anything else I should look for or test.
>>>>>>>> Further step more likely should be dump registers and try access to a
>>>>>>> PHY. But let's check ME disabled as the first step.
>>>>>>
>>>>>> According to the BIOS ME is actually disabled.
>>>>>> Nevertheless I selected "UnConfigure ME", which didn'tr change anything in the BIOS (ME
>>>>>> v11.8.50.3425 FWIW). I did look for vendor BIOS updates, as you think this problem might be ME
>>>>>> related. There is an update available.
>>>>>
>>>>> So I did the BIOS update - no changes regarding the network auto-negotiation behavior.
>>>>>
>>>>> I also tried both of my E-Series. The old Haswell series (E7x4) also has a disabled ME and as
>>>>> suspected the following HW:
>>>>>
>>>>> 00:19.0 Ethernet controller: Intel Corporation Ethernet Connection I217-LM (rev 04)
>>>>>           Subsystem: Fujitsu Limited. Ethernet Connection I217-LM
>>>>>           Flags: bus master, fast devsel, latency 0, IRQ 27
>>>>>           Memory at f0500000 (32-bit, non-prefetchable) [size=128K]
>>>>>           Memory at f053f000 (32-bit, non-prefetchable) [size=4K]
>>>>>           I/O ports at 3080 [size=32]
>>>>>           Capabilities: [c8] Power Management version 2
>>>>>           Capabilities: [d0] MSI: Enable+ Count=1/1 Maskable- 64bit+
>>>>>           Capabilities: [e0] PCI Advanced Features
>>>>>           Kernel driver in use: e1000e
>>>>>           Kernel modules: e1000e
>>>>>
>>>>> I tried the patched module on both E-series HW and they always have the 0x40000000 bit set when
>>>>> decoding the speed from the status register (always 0x40080083), either with or without the ME
>>>>> available. So my patch breaks my older HW, as you probably suspected. I removed the 0x40000000 test
>>>>> from the module, and they always negotiated 1000 Mbps just fine.
>>>>>
>>>>> I've attached logs for all three notebooks with my patched module (without the  0x40000000 test)
>>>>> and
>>>>> a debug filter for all files of the module (echo "file */e1000e-20/* +p;" >
>>>>> /sys/kernel/debug/dynamic_debug/control).
>>>>>
>>>>> My test consisted of rmmod'ing, sleep 1, insmod'ing, set debug filter + two reconnects.
>>>>>
>>>>> So I'm basically back to square one.
>>>>>
>>>>> How to proceed?
>>>>>
>>>> ME disabled - good. How long time you wait for 1000Mbps after a re connection of the cable? Could
>>>> please, wait 5-10s and see if link back to the 1000Mbps?
>>>
>>>  From the U757 logs attached to the last mail:
>>>
>>> [11750.669940] e1000e 0000:00:1f.6 enp0s31f6: reading PHY page 0 (or 0x0 shifted) reg 0x1
>>> [11750.670054] e1000e 0000:00:1f.6 enp0s31f6: reading PHY page 0 (or 0x0 shifted) reg 0x1
>>> [11750.670165] e1000e 0000:00:1f.6 enp0s31f6: ARC subsystem not valid.
>>> [11750.670166] e1000e: enp0s31f6 NIC Link is Down
>>> [11752.925934] e1000e 0000:00:1f.6 enp0s31f6: reading PHY page 0 (or 0x0 shifted) reg 0x1
>>> [11752.926065] e1000e 0000:00:1f.6 enp0s31f6: reading PHY page 0 (or 0x0 shifted) reg 0x1
>>> [11752.926193] e1000e 0000:00:1f.6 enp0s31f6: ARC subsystem not valid.
>>> ...
>>> [11754.813959] e1000e 0000:00:1f.6 enp0s31f6: reading PHY page 0 (or 0x0 shifted) reg 0x1
>>> [11754.814034] e1000e 0000:00:1f.6 enp0s31f6: reading PHY page 0 (or 0x0 shifted) reg 0x1
>>> [11754.814106] e1000e 0000:00:1f.6 enp0s31f6: ARC subsystem not valid.
>>> ...
>>> [11768.142020] e1000e 0000:00:1f.6 enp0s31f6: status 0x40080003 => 10 Mbps, Full Duplex
>>> ...
>>> [11768.151411] e1000e: enp0s31f6 NIC Link is Up 10 Mbps Full Duplex, Flow Control: None
>>>
>>> Which is something about 16s.
>>>
>>> Actually IMHO there is a larger chance to fall to 10 Mbits if you wait longer disconnected.
>>> Still reloading the module remedies this condition.
>>>
>>>> Unfortunately we have no such HW in our labs. I will try ask if our PAE can help with more debug if
>>>> need.
>>>
>>> Hmmm.
>>>
>> Since you still read 0x40000000 value in the status register it is causing me to think that ME
>> works. Another way I think you should to go ask your vendors for last updated NVM (or with ME
>> disabled if possible) for your HW. Since I219V works as properly, I expected I219-LM without ME
>> works too.
> 
> From the logs I sent:
> 
> I217-LM: "status 0x40080083 => 1000 Mbps, Full Duplex"
> I219-V: "status 0x40080083 => 1000 Mbps, Full Duplex"
> I219-LM auto-nego ok: "status 0x80083 => 1000 Mbps, Full Duplex"
> I219-LM auto-nego broken: "status 0x40080003 => 10 Mbps, Full Duplex"
> 
> According to the BIOS both LM-variants have Intel ME disabled. If I can't trust the BIOS, is there a
> way to check this?
> 
>> Let's do follow experiments on your side. Please, do rmmod e1000e.ko, bring up the machine without
>> the driver. Then re-connect the cable few times and see what is link's speed up. You can be trusted
>> on the LED indicators.
> 
> So even if the driver (+ ethtool) indicates 10 MBits both lights are on. Same without the driver.
> But wget speed is is just 10 MBits, as ethtool indicates. Manually overriding to 1000 Mbps still works.
> 
> Now I directly connected the I219-V with my I219-LM.
> 
> On the I219-V I ran "ethtool -s enp0s31f6 speed n autoneg off"
> These are the results on the I219-LM side:
> n=1000 : green and yellow lights on. "status 0x80083 => 1000 Mbps, Full Duplex"
> n=100 : green and yellow lights on. "status 0x80042 => 100 Mbps, Half Duplex"
> n=10 : just green light on. "status 0x80082 => 10 Mbps, Half Duplex"
> 
> The light indicators are the same on the I219-LM without the driver.
> 
> All status output on the I219-V has the 0x40000000 bit set, the I219-LM normally never.
> The I219-V always had full duplex according to ethtool and the status.
> 
> If I set both sides to auto-neg, the first negotiation is correct (both have 1000 Mbps). If I
> reconnect it becomes 10 Mbps on the I219-LM side and 1000 Mbps on the I219-V side. Both lights are
> still on. The I219-LM side *just* has the status bit 0x40000000 set the first time it goes to 10
> Mbits (what I based my patch on without testing it on other HW).

Re-anabling auto-negotiation also works (I219-V ethtool => I219-LM status)

1. "ethtool -s enp0s31f6 speed 100 autoneg off" => "status 0x80042 => 100 Mbps, Half Duplex"
2. "ethtool -s enp0s31f6 autoneg on" => "status 0x80083 => 1000 Mbps, Full Duplex"

according to dmesg (incl. status) and ethtool.
Until I reconnect.

JMG
Jan-Marek Glogowski Jan. 8, 2019, 11:15 a.m. | #12
Hi Paul,

Am 08.01.19 um 11:15 schrieb Paul Menzel:
> On 01/08/19 10:59, Jan-Marek Glogowski wrote:
> 
>> Am 08.01.19 um 09:31 schrieb Neftin, Sasha:
>>> On 1/7/2019 18:37, Jan-Marek Glogowski wrote:
> 
> […]
> 
>>> Since you still read 0x40000000 value in the status register it is causing me to think that ME
>>> works. Another way I think you should to go ask your vendors for last updated NVM (or with ME
>>> disabled if possible) for your HW. Since I219V works as properly, I expected I219-LM without ME
>>> works too.
>>
>> From the logs I sent:
>>
>> I217-LM: "status 0x40080083 => 1000 Mbps, Full Duplex"
>> I219-V: "status 0x40080083 => 1000 Mbps, Full Duplex"
>> I219-LM auto-nego ok: "status 0x80083 => 1000 Mbps, Full Duplex"
>> I219-LM auto-nego broken: "status 0x40080003 => 10 Mbps, Full Duplex"
>>
>> According to the BIOS both LM-variants have Intel ME disabled. If I can't trust the BIOS, is there a
>> way to check this?
> 
> What does intelmetool from the coreboot project show?
> 
>     $ git clone https://review.coreboot.org/coreboot.git
>     $ cd coreboot
>     $ cd util/intelmetoolo
>     $ make -j
>     $ sudo ./intelmetool -m


me-e736.log-norm
----------------
MEI found: [8086:9d3a] Sunrise Point-LP CSME HECI #1

ME Status   : 0x90000245
ME Status 2 : 0x86110306

ME: FW Partition Table      : OK
ME: Bringup Loader Failure  : NO
ME: Firmware Init Complete  : YES
ME: Manufacturing Mode      : NO
ME: Boot Options Present    : NO
ME: Update In Progress      : NO
ME: Current Working State   : Normal
ME: Current Operation State : M0 with UMA
ME: Current Operation Mode  : Normal
ME: Error Code              : No Error
ME: Progress Phase          : Clean Moff->Mx wake
ME: Power Management Event  : Pseudo-global reset
ME: Progress Phase State    : Unknown 0x11

ME: Extend Register not valid

ME: Firmware Version 11.0.1173.0 (code) 11.0.1173.0 (recovery) 11.0.1173.0 (fitc)

ME Capability: Full Network manageability                 : OFF
ME Capability: Regular Network manageability              : OFF
ME Capability: Manageability                              : OFF
ME Capability: Small business technology                  : OFF
ME Capability: Level III manageability                    : OFF
ME Capability: IntelR Anti-Theft (AT)                     : OFF
ME Capability: IntelR Capability Licensing Service (CLS)  : ON
ME Capability: IntelR Power Sharing Technology (MPC)      : OFF
ME Capability: ICC Over Clocking                          : ON
ME Capability: Protected Audio Video Path (PAVP)          : ON
ME Capability: IPV6                                       : OFF
ME Capability: KVM Remote Control (KVM)                   : OFF
ME Capability: Outbreak Containment Heuristic (OCH)       : OFF
ME Capability: Virtual LAN (VLAN)                         : ON
ME Capability: TLS                                        : OFF
ME Capability: Wireless LAN (WLAN)                        : OFF


me-e754.log-norm
----------------
Bad news, you have a `QM87 Express LPC Controller` so you have ME hardware on board and you can't
control or disable it, continuing...

MEI found: [8086:8c3a] 8 Series/C220 Series Chipset Family MEI Controller #1

ME Status   : 0x1e000245
ME Status 2 : 0x60002306

ME: FW Partition Table      : OK
ME: Bringup Loader Failure  : NO
ME: Firmware Init Complete  : YES
ME: Manufacturing Mode      : NO
ME: Boot Options Present    : NO
ME: Update In Progress      : NO
ME: Current Working State   : Normal
ME: Current Operation State : M0 with UMA
ME: Current Operation Mode  : Normal
ME: Error Code              : No Error
ME: Progress Phase          : Host Communication
ME: Power Management Event  : Clean Moff->Mx wake
ME: Progress Phase State    : Host communication established

ME: Extend SHA-256: d536aea220d776c0d26baaffc9832af56871511a7e304b37783b0fe7b8929503

ME: Firmware Version 9.0.1467.22 (code) 9.0.1467.22 (recovery) 9.0.1452.21 (fitc)

ME Capability: Full Network manageability                 : OFF
ME Capability: Regular Network manageability              : OFF
ME Capability: Manageability                              : ON
ME Capability: Small business technology                  : ON
ME Capability: Level III manageability                    : OFF
ME Capability: IntelR Anti-Theft (AT)                     : ON
ME Capability: IntelR Capability Licensing Service (CLS)  : ON
ME Capability: IntelR Power Sharing Technology (MPC)      : OFF
ME Capability: ICC Over Clocking                          : ON
ME Capability: Protected Audio Video Path (PAVP)          : ON
ME Capability: IPV6                                       : OFF
ME Capability: KVM Remote Control (KVM)                   : OFF
ME Capability: Outbreak Containment Heuristic (OCH)       : OFF
ME Capability: Virtual LAN (VLAN)                         : ON
ME Capability: TLS                                        : ON
ME Capability: Wireless LAN (WLAN)                        : OFF


me-u727.log-norm
----------------
MEI found: [8086:9d3a] Sunrise Point-LP CSME HECI #1

ME Status   : 0xa0000245
ME Status 2 : 0x89108106

ME: FW Partition Table      : OK
ME: Bringup Loader Failure  : NO
ME: Firmware Init Complete  : YES
ME: Manufacturing Mode      : NO
ME: Boot Options Present    : NO
ME: Update In Progress      : NO
ME: Current Working State   : Normal
ME: Current Operation State : M0 with UMA
ME: Current Operation Mode  : Normal
ME: Error Code              : No Error
ME: Progress Phase          : Clean Moff->Mx wake
ME: Power Management Event  : Non-power cycle reset
ME: Progress Phase State    : Unknown 0x10

ME: Extend Register not valid

ME: Firmware Version 11.6.3287.29 (code) 11.6.3287.29 (recovery) 11.6.3287.29 (fitc)

ME Capability: Full Network manageability                 : OFF
ME Capability: Regular Network manageability              : OFF
ME Capability: Manageability                              : ON
ME Capability: Small business technology                  : ON
ME Capability: Level III manageability                    : OFF
ME Capability: IntelR Anti-Theft (AT)                     : OFF
ME Capability: IntelR Capability Licensing Service (CLS)  : ON
ME Capability: IntelR Power Sharing Technology (MPC)      : OFF
ME Capability: ICC Over Clocking                          : OFF
ME Capability: Protected Audio Video Path (PAVP)          : ON
ME Capability: IPV6                                       : OFF
ME Capability: KVM Remote Control (KVM)                   : OFF
ME Capability: Outbreak Containment Heuristic (OCH)       : OFF
ME Capability: Virtual LAN (VLAN)                         : ON
ME Capability: TLS                                        : OFF
ME Capability: Wireless LAN (WLAN)                        : OFF


me-u757.log-norm
----------------
MEI found: [8086:9d3a] Sunrise Point-LP CSME HECI #1

ME Status   : 0x90000245
ME Status 2 : 0x89108106

ME: FW Partition Table      : OK
ME: Bringup Loader Failure  : NO
ME: Firmware Init Complete  : YES
ME: Manufacturing Mode      : NO
ME: Boot Options Present    : NO
ME: Update In Progress      : NO
ME: Current Working State   : Normal
ME: Current Operation State : M0 with UMA
ME: Current Operation Mode  : Normal
ME: Error Code              : No Error
ME: Progress Phase          : Clean Moff->Mx wake
ME: Power Management Event  : Non-power cycle reset
ME: Progress Phase State    : Unknown 0x10

ME: Extend Register not valid

ME: Firmware Version 11.8.3425.50 (code) 11.8.3425.50 (recovery) 11.8.3425.50 (fitc)

ME Capability: Full Network manageability                 : ON
ME Capability: Regular Network manageability              : OFF
ME Capability: Manageability                              : ON
ME Capability: Small business technology                  : OFF
ME Capability: Level III manageability                    : OFF
ME Capability: IntelR Anti-Theft (AT)                     : OFF
ME Capability: IntelR Capability Licensing Service (CLS)  : ON
ME Capability: IntelR Power Sharing Technology (MPC)      : OFF
ME Capability: ICC Over Clocking                          : OFF
ME Capability: Protected Audio Video Path (PAVP)          : ON
ME Capability: IPV6                                       : ON
ME Capability: KVM Remote Control (KVM)                   : ON
ME Capability: Outbreak Containment Heuristic (OCH)       : OFF
ME Capability: Virtual LAN (VLAN)                         : ON
ME Capability: TLS                                        : ON
ME Capability: Wireless LAN (WLAN)                        : ON


What do we make of this?

I see same problem with both u757 and u727.
No problem with either e736 and e754.

Jan-Marek
Neftin, Sasha Jan. 9, 2019, 3:07 p.m. | #13
On 1/8/2019 13:15, Jan-Marek Glogowski wrote:
> Hi Paul,
> 
> Am 08.01.19 um 11:15 schrieb Paul Menzel:
>> On 01/08/19 10:59, Jan-Marek Glogowski wrote:
>>
>>> Am 08.01.19 um 09:31 schrieb Neftin, Sasha:
>>>> On 1/7/2019 18:37, Jan-Marek Glogowski wrote:
>>
>> […]
>>
>>>> Since you still read 0x40000000 value in the status register it is causing me to think that ME
>>>> works. Another way I think you should to go ask your vendors for last updated NVM (or with ME
>>>> disabled if possible) for your HW. Since I219V works as properly, I expected I219-LM without ME
>>>> works too.
>>>
>>>  From the logs I sent:
>>>
>>> I217-LM: "status 0x40080083 => 1000 Mbps, Full Duplex"
>>> I219-V: "status 0x40080083 => 1000 Mbps, Full Duplex"
>>> I219-LM auto-nego ok: "status 0x80083 => 1000 Mbps, Full Duplex"
>>> I219-LM auto-nego broken: "status 0x40080003 => 10 Mbps, Full Duplex"
>>>
>>> According to the BIOS both LM-variants have Intel ME disabled. If I can't trust the BIOS, is there a
>>> way to check this?
>>
>> What does intelmetool from the coreboot project show?
>>
>>      $ git clone https://review.coreboot.org/coreboot.git
>>      $ cd coreboot
>>      $ cd util/intelmetoolo
>>      $ make -j
>>      $ sudo ./intelmetool -m
> 
> 
> me-e736.log-norm
> ----------------
> MEI found: [8086:9d3a] Sunrise Point-LP CSME HECI #1
> 
> ME Status   : 0x90000245
> ME Status 2 : 0x86110306
> 
> ME: FW Partition Table      : OK
> ME: Bringup Loader Failure  : NO
> ME: Firmware Init Complete  : YES
> ME: Manufacturing Mode      : NO
> ME: Boot Options Present    : NO
> ME: Update In Progress      : NO
> ME: Current Working State   : Normal
> ME: Current Operation State : M0 with UMA
> ME: Current Operation Mode  : Normal
> ME: Error Code              : No Error
> ME: Progress Phase          : Clean Moff->Mx wake
> ME: Power Management Event  : Pseudo-global reset
> ME: Progress Phase State    : Unknown 0x11
> 
> ME: Extend Register not valid
> 
> ME: Firmware Version 11.0.1173.0 (code) 11.0.1173.0 (recovery) 11.0.1173.0 (fitc)
> 
> ME Capability: Full Network manageability                 : OFF
> ME Capability: Regular Network manageability              : OFF
> ME Capability: Manageability                              : OFF
> ME Capability: Small business technology                  : OFF
> ME Capability: Level III manageability                    : OFF
> ME Capability: IntelR Anti-Theft (AT)                     : OFF
> ME Capability: IntelR Capability Licensing Service (CLS)  : ON
> ME Capability: IntelR Power Sharing Technology (MPC)      : OFF
> ME Capability: ICC Over Clocking                          : ON
> ME Capability: Protected Audio Video Path (PAVP)          : ON
> ME Capability: IPV6                                       : OFF
> ME Capability: KVM Remote Control (KVM)                   : OFF
> ME Capability: Outbreak Containment Heuristic (OCH)       : OFF
> ME Capability: Virtual LAN (VLAN)                         : ON
> ME Capability: TLS                                        : OFF
> ME Capability: Wireless LAN (WLAN)                        : OFF
> 
> 
> me-e754.log-norm
> ----------------
> Bad news, you have a `QM87 Express LPC Controller` so you have ME hardware on board and you can't
> control or disable it, continuing...
> 
> MEI found: [8086:8c3a] 8 Series/C220 Series Chipset Family MEI Controller #1
> 
> ME Status   : 0x1e000245
> ME Status 2 : 0x60002306
> 
> ME: FW Partition Table      : OK
> ME: Bringup Loader Failure  : NO
> ME: Firmware Init Complete  : YES
> ME: Manufacturing Mode      : NO
> ME: Boot Options Present    : NO
> ME: Update In Progress      : NO
> ME: Current Working State   : Normal
> ME: Current Operation State : M0 with UMA
> ME: Current Operation Mode  : Normal
> ME: Error Code              : No Error
> ME: Progress Phase          : Host Communication
> ME: Power Management Event  : Clean Moff->Mx wake
> ME: Progress Phase State    : Host communication established
> 
> ME: Extend SHA-256: d536aea220d776c0d26baaffc9832af56871511a7e304b37783b0fe7b8929503
> 
> ME: Firmware Version 9.0.1467.22 (code) 9.0.1467.22 (recovery) 9.0.1452.21 (fitc)
> 
> ME Capability: Full Network manageability                 : OFF
> ME Capability: Regular Network manageability              : OFF
> ME Capability: Manageability                              : ON
> ME Capability: Small business technology                  : ON
> ME Capability: Level III manageability                    : OFF
> ME Capability: IntelR Anti-Theft (AT)                     : ON
> ME Capability: IntelR Capability Licensing Service (CLS)  : ON
> ME Capability: IntelR Power Sharing Technology (MPC)      : OFF
> ME Capability: ICC Over Clocking                          : ON
> ME Capability: Protected Audio Video Path (PAVP)          : ON
> ME Capability: IPV6                                       : OFF
> ME Capability: KVM Remote Control (KVM)                   : OFF
> ME Capability: Outbreak Containment Heuristic (OCH)       : OFF
> ME Capability: Virtual LAN (VLAN)                         : ON
> ME Capability: TLS                                        : ON
> ME Capability: Wireless LAN (WLAN)                        : OFF
> 
> 
> me-u727.log-norm
> ----------------
> MEI found: [8086:9d3a] Sunrise Point-LP CSME HECI #1
> 
> ME Status   : 0xa0000245
> ME Status 2 : 0x89108106
> 
> ME: FW Partition Table      : OK
> ME: Bringup Loader Failure  : NO
> ME: Firmware Init Complete  : YES
> ME: Manufacturing Mode      : NO
> ME: Boot Options Present    : NO
> ME: Update In Progress      : NO
> ME: Current Working State   : Normal
> ME: Current Operation State : M0 with UMA
> ME: Current Operation Mode  : Normal
> ME: Error Code              : No Error
> ME: Progress Phase          : Clean Moff->Mx wake
> ME: Power Management Event  : Non-power cycle reset
> ME: Progress Phase State    : Unknown 0x10
> 
> ME: Extend Register not valid
> 
> ME: Firmware Version 11.6.3287.29 (code) 11.6.3287.29 (recovery) 11.6.3287.29 (fitc)
> 
> ME Capability: Full Network manageability                 : OFF
> ME Capability: Regular Network manageability              : OFF
> ME Capability: Manageability                              : ON
> ME Capability: Small business technology                  : ON
> ME Capability: Level III manageability                    : OFF
> ME Capability: IntelR Anti-Theft (AT)                     : OFF
> ME Capability: IntelR Capability Licensing Service (CLS)  : ON
> ME Capability: IntelR Power Sharing Technology (MPC)      : OFF
> ME Capability: ICC Over Clocking                          : OFF
> ME Capability: Protected Audio Video Path (PAVP)          : ON
> ME Capability: IPV6                                       : OFF
> ME Capability: KVM Remote Control (KVM)                   : OFF
> ME Capability: Outbreak Containment Heuristic (OCH)       : OFF
> ME Capability: Virtual LAN (VLAN)                         : ON
> ME Capability: TLS                                        : OFF
> ME Capability: Wireless LAN (WLAN)                        : OFF
> 
> 
> me-u757.log-norm
> ----------------
> MEI found: [8086:9d3a] Sunrise Point-LP CSME HECI #1
> 
> ME Status   : 0x90000245
> ME Status 2 : 0x89108106
> 
> ME: FW Partition Table      : OK
> ME: Bringup Loader Failure  : NO
> ME: Firmware Init Complete  : YES
> ME: Manufacturing Mode      : NO
> ME: Boot Options Present    : NO
> ME: Update In Progress      : NO
> ME: Current Working State   : Normal
> ME: Current Operation State : M0 with UMA
> ME: Current Operation Mode  : Normal
> ME: Error Code              : No Error
> ME: Progress Phase          : Clean Moff->Mx wake
> ME: Power Management Event  : Non-power cycle reset
> ME: Progress Phase State    : Unknown 0x10
> 
> ME: Extend Register not valid
> 
> ME: Firmware Version 11.8.3425.50 (code) 11.8.3425.50 (recovery) 11.8.3425.50 (fitc)
> 
> ME Capability: Full Network manageability                 : ON
> ME Capability: Regular Network manageability              : OFF
> ME Capability: Manageability                              : ON
> ME Capability: Small business technology                  : OFF
> ME Capability: Level III manageability                    : OFF
> ME Capability: IntelR Anti-Theft (AT)                     : OFF
> ME Capability: IntelR Capability Licensing Service (CLS)  : ON
> ME Capability: IntelR Power Sharing Technology (MPC)      : OFF
> ME Capability: ICC Over Clocking                          : OFF
> ME Capability: Protected Audio Video Path (PAVP)          : ON
> ME Capability: IPV6                                       : ON
> ME Capability: KVM Remote Control (KVM)                   : ON
> ME Capability: Outbreak Containment Heuristic (OCH)       : OFF
> ME Capability: Virtual LAN (VLAN)                         : ON
> ME Capability: TLS                                        : ON
> ME Capability: Wireless LAN (WLAN)                        : ON
> 
> 
> What do we make of this?
> 
You might try to contact your HW vendor. Probably your HW was Windows OS 
oriented.You may ask for a FW/NVM update no ME or try to replace the HW 
on none ME.
> I see same problem with both u757 and u727.
> No problem with either e736 and e754.
> 
> Jan-Marek
> 
Sasha
Jan-Marek Glogowski Jan. 9, 2019, 5:07 p.m. | #14
Am 09.01.19 um 16:07 schrieb Neftin, Sasha:
> On 1/8/2019 13:15, Jan-Marek Glogowski wrote:
>> Hi Paul,
>>
>> Am 08.01.19 um 11:15 schrieb Paul Menzel:
>>> On 01/08/19 10:59, Jan-Marek Glogowski wrote:
>>>
>>>> Am 08.01.19 um 09:31 schrieb Neftin, Sasha:
>>>>> On 1/7/2019 18:37, Jan-Marek Glogowski wrote:
>>>
>>> […]
>>>
>>>>> Since you still read 0x40000000 value in the status register it is causing me to think that ME
>>>>> works. Another way I think you should to go ask your vendors for last updated NVM (or with ME
>>>>> disabled if possible) for your HW. Since I219V works as properly, I expected I219-LM without ME
>>>>> works too.
>>>>
>>>>  From the logs I sent:
>>>>
>>>> I217-LM: "status 0x40080083 => 1000 Mbps, Full Duplex"
>>>> I219-V: "status 0x40080083 => 1000 Mbps, Full Duplex"
>>>> I219-LM auto-nego ok: "status 0x80083 => 1000 Mbps, Full Duplex"
>>>> I219-LM auto-nego broken: "status 0x40080003 => 10 Mbps, Full Duplex"
>>>>
>>>> According to the BIOS both LM-variants have Intel ME disabled. If I can't trust the BIOS, is
>>>> there a
>>>> way to check this?
>>>
>>> What does intelmetool from the coreboot project show?
>>>
>>>      $ git clone https://review.coreboot.org/coreboot.git
>>>      $ cd coreboot
>>>      $ cd util/intelmetoolo
>>>      $ make -j
>>>      $ sudo ./intelmetool -m
>>
>>
>> me-e736.log-norm
>> ----------------
>> MEI found: [8086:9d3a] Sunrise Point-LP CSME HECI #1
>>
>> ME Status   : 0x90000245
>> ME Status 2 : 0x86110306
>>
>> ME: FW Partition Table      : OK
>> ME: Bringup Loader Failure  : NO
>> ME: Firmware Init Complete  : YES
>> ME: Manufacturing Mode      : NO
>> ME: Boot Options Present    : NO
>> ME: Update In Progress      : NO
>> ME: Current Working State   : Normal
>> ME: Current Operation State : M0 with UMA
>> ME: Current Operation Mode  : Normal
>> ME: Error Code              : No Error
>> ME: Progress Phase          : Clean Moff->Mx wake
>> ME: Power Management Event  : Pseudo-global reset
>> ME: Progress Phase State    : Unknown 0x11
>>
>> ME: Extend Register not valid
>>
>> ME: Firmware Version 11.0.1173.0 (code) 11.0.1173.0 (recovery) 11.0.1173.0 (fitc)
>>
>> ME Capability: Full Network manageability                 : OFF
>> ME Capability: Regular Network manageability              : OFF
>> ME Capability: Manageability                              : OFF
>> ME Capability: Small business technology                  : OFF
>> ME Capability: Level III manageability                    : OFF
>> ME Capability: IntelR Anti-Theft (AT)                     : OFF
>> ME Capability: IntelR Capability Licensing Service (CLS)  : ON
>> ME Capability: IntelR Power Sharing Technology (MPC)      : OFF
>> ME Capability: ICC Over Clocking                          : ON
>> ME Capability: Protected Audio Video Path (PAVP)          : ON
>> ME Capability: IPV6                                       : OFF
>> ME Capability: KVM Remote Control (KVM)                   : OFF
>> ME Capability: Outbreak Containment Heuristic (OCH)       : OFF
>> ME Capability: Virtual LAN (VLAN)                         : ON
>> ME Capability: TLS                                        : OFF
>> ME Capability: Wireless LAN (WLAN)                        : OFF
>>
>>
>> me-e754.log-norm
>> ----------------
>> Bad news, you have a `QM87 Express LPC Controller` so you have ME hardware on board and you can't
>> control or disable it, continuing...
>>
>> MEI found: [8086:8c3a] 8 Series/C220 Series Chipset Family MEI Controller #1
>>
>> ME Status   : 0x1e000245
>> ME Status 2 : 0x60002306
>>
>> ME: FW Partition Table      : OK
>> ME: Bringup Loader Failure  : NO
>> ME: Firmware Init Complete  : YES
>> ME: Manufacturing Mode      : NO
>> ME: Boot Options Present    : NO
>> ME: Update In Progress      : NO
>> ME: Current Working State   : Normal
>> ME: Current Operation State : M0 with UMA
>> ME: Current Operation Mode  : Normal
>> ME: Error Code              : No Error
>> ME: Progress Phase          : Host Communication
>> ME: Power Management Event  : Clean Moff->Mx wake
>> ME: Progress Phase State    : Host communication established
>>
>> ME: Extend SHA-256: d536aea220d776c0d26baaffc9832af56871511a7e304b37783b0fe7b8929503
>>
>> ME: Firmware Version 9.0.1467.22 (code) 9.0.1467.22 (recovery) 9.0.1452.21 (fitc)
>>
>> ME Capability: Full Network manageability                 : OFF
>> ME Capability: Regular Network manageability              : OFF
>> ME Capability: Manageability                              : ON
>> ME Capability: Small business technology                  : ON
>> ME Capability: Level III manageability                    : OFF
>> ME Capability: IntelR Anti-Theft (AT)                     : ON
>> ME Capability: IntelR Capability Licensing Service (CLS)  : ON
>> ME Capability: IntelR Power Sharing Technology (MPC)      : OFF
>> ME Capability: ICC Over Clocking                          : ON
>> ME Capability: Protected Audio Video Path (PAVP)          : ON
>> ME Capability: IPV6                                       : OFF
>> ME Capability: KVM Remote Control (KVM)                   : OFF
>> ME Capability: Outbreak Containment Heuristic (OCH)       : OFF
>> ME Capability: Virtual LAN (VLAN)                         : ON
>> ME Capability: TLS                                        : ON
>> ME Capability: Wireless LAN (WLAN)                        : OFF
>>
>>
>> me-u727.log-norm
>> ----------------
>> MEI found: [8086:9d3a] Sunrise Point-LP CSME HECI #1
>>
>> ME Status   : 0xa0000245
>> ME Status 2 : 0x89108106
>>
>> ME: FW Partition Table      : OK
>> ME: Bringup Loader Failure  : NO
>> ME: Firmware Init Complete  : YES
>> ME: Manufacturing Mode      : NO
>> ME: Boot Options Present    : NO
>> ME: Update In Progress      : NO
>> ME: Current Working State   : Normal
>> ME: Current Operation State : M0 with UMA
>> ME: Current Operation Mode  : Normal
>> ME: Error Code              : No Error
>> ME: Progress Phase          : Clean Moff->Mx wake
>> ME: Power Management Event  : Non-power cycle reset
>> ME: Progress Phase State    : Unknown 0x10
>>
>> ME: Extend Register not valid
>>
>> ME: Firmware Version 11.6.3287.29 (code) 11.6.3287.29 (recovery) 11.6.3287.29 (fitc)
>>
>> ME Capability: Full Network manageability                 : OFF
>> ME Capability: Regular Network manageability              : OFF
>> ME Capability: Manageability                              : ON
>> ME Capability: Small business technology                  : ON
>> ME Capability: Level III manageability                    : OFF
>> ME Capability: IntelR Anti-Theft (AT)                     : OFF
>> ME Capability: IntelR Capability Licensing Service (CLS)  : ON
>> ME Capability: IntelR Power Sharing Technology (MPC)      : OFF
>> ME Capability: ICC Over Clocking                          : OFF
>> ME Capability: Protected Audio Video Path (PAVP)          : ON
>> ME Capability: IPV6                                       : OFF
>> ME Capability: KVM Remote Control (KVM)                   : OFF
>> ME Capability: Outbreak Containment Heuristic (OCH)       : OFF
>> ME Capability: Virtual LAN (VLAN)                         : ON
>> ME Capability: TLS                                        : OFF
>> ME Capability: Wireless LAN (WLAN)                        : OFF
>>
>>
>> me-u757.log-norm
>> ----------------
>> MEI found: [8086:9d3a] Sunrise Point-LP CSME HECI #1
>>
>> ME Status   : 0x90000245
>> ME Status 2 : 0x89108106
>>
>> ME: FW Partition Table      : OK
>> ME: Bringup Loader Failure  : NO
>> ME: Firmware Init Complete  : YES
>> ME: Manufacturing Mode      : NO
>> ME: Boot Options Present    : NO
>> ME: Update In Progress      : NO
>> ME: Current Working State   : Normal
>> ME: Current Operation State : M0 with UMA
>> ME: Current Operation Mode  : Normal
>> ME: Error Code              : No Error
>> ME: Progress Phase          : Clean Moff->Mx wake
>> ME: Power Management Event  : Non-power cycle reset
>> ME: Progress Phase State    : Unknown 0x10
>>
>> ME: Extend Register not valid
>>
>> ME: Firmware Version 11.8.3425.50 (code) 11.8.3425.50 (recovery) 11.8.3425.50 (fitc)
>>
>> ME Capability: Full Network manageability                 : ON
>> ME Capability: Regular Network manageability              : OFF
>> ME Capability: Manageability                              : ON
>> ME Capability: Small business technology                  : OFF
>> ME Capability: Level III manageability                    : OFF
>> ME Capability: IntelR Anti-Theft (AT)                     : OFF
>> ME Capability: IntelR Capability Licensing Service (CLS)  : ON
>> ME Capability: IntelR Power Sharing Technology (MPC)      : OFF
>> ME Capability: ICC Over Clocking                          : OFF
>> ME Capability: Protected Audio Video Path (PAVP)          : ON
>> ME Capability: IPV6                                       : ON
>> ME Capability: KVM Remote Control (KVM)                   : ON
>> ME Capability: Outbreak Containment Heuristic (OCH)       : OFF
>> ME Capability: Virtual LAN (VLAN)                         : ON
>> ME Capability: TLS                                        : ON
>> ME Capability: Wireless LAN (WLAN)                        : ON
>>
>>
>> What do we make of this?
>>
> You might try to contact your HW vendor. Probably your HW was Windows OS oriented.You may ask for a
> FW/NVM update no ME or try to replace the HW on none ME.

What part of ME needs to be disabled?
And - as you can see - both of the e-series also have some ME settings on and work.

The first two dumps are from my E-series notebooks and they work just fine with their ME settings.
The other two dumps are from my broken U-series. I don't understand why ME should make a problem for
U and not for E.

And Intel CPU don't work without ME AFAIK.
And all my HW are laptops, so there is nothing I can replace.

So please tell me what kind of problems you see with the U-series settings, compared to their E
counterparts, that might need a vendor fix.

Thanks

Jan-Marek

Patch

diff --git a/drivers/net/ethernet/intel/e1000e/defines.h b/drivers/net/ethernet/intel/e1000e/defines.h
index fd550de..3cd9f99 100644
--- a/drivers/net/ethernet/intel/e1000e/defines.h
+++ b/drivers/net/ethernet/intel/e1000e/defines.h
@@ -221,6 +221,7 @@ 
 #define E1000_STATUS_LAN_INIT_DONE 0x00000200   /* Lan Init Completion by NVM */
 #define E1000_STATUS_PHYRA      0x00000400      /* PHY Reset Asserted */
 #define E1000_STATUS_GIO_MASTER_ENABLE	0x00080000	/* Master Req status */
+#define E1000_STATUS_AUTONEG    0x40000000      /* in auto-negotiation */
 
 #define HALF_DUPLEX 1
 #define FULL_DUPLEX 2
diff --git a/drivers/net/ethernet/intel/e1000e/ich8lan.c b/drivers/net/ethernet/intel/e1000e/ich8lan.c
index fd59970..8588eb7 100644
--- a/drivers/net/ethernet/intel/e1000e/ich8lan.c
+++ b/drivers/net/ethernet/intel/e1000e/ich8lan.c
@@ -1390,7 +1390,8 @@  static s32 e1000_check_for_copper_link_ich8lan(struct e1000_hw *hw)
 		u16 speed;
 		u8 duplex;
 
-		e1000e_get_speed_and_duplex_copper(hw, &speed, &duplex);
+		if (e1000e_get_speed_and_duplex_copper(hw, &speed, &duplex))
+			goto out;
 		tipg_reg = er32(TIPG);
 		tipg_reg &= ~E1000_TIPG_IPGT_MASK;
 
diff --git a/drivers/net/ethernet/intel/e1000e/mac.c b/drivers/net/ethernet/intel/e1000e/mac.c
index 19c816c..ada8fbb 100644
--- a/drivers/net/ethernet/intel/e1000e/mac.c
+++ b/drivers/net/ethernet/intel/e1000e/mac.c
@@ -1310,6 +1310,8 @@  s32 e1000e_get_speed_and_duplex_copper(struct e1000_hw *hw, u16 *speed,
 
 	status = er32(STATUS);
 
+	if (status & E1000_STATUS_AUTONEG)
+		return 1;
 	if (!(status & E1000_STATUS_LU))
 		return 1;