diff mbox

[net-next,v6,4/7] ixgbevf: Add a RETA query code

Message ID 1427050887-19412-5-git-send-email-vladz@cloudius-systems.com
State Awaiting Upstream, archived
Delegated to: David Miller
Headers show

Commit Message

Vlad Zolotarov March 22, 2015, 7:01 p.m. UTC
- Added a new API version support.
   - Added the query implementation in the ixgbevf.

Signed-off-by: Vlad Zolotarov <vladz@cloudius-systems.com>
---
New in v6:
   - Add a proper return code when an operation is blocked by PF.

New in v3:
   - Adjusted to the new interface IXGBE_VF_GET_RETA command.
   - Added a proper support for x550 devices.

New in v1 (compared to RFC):
   - Use "if-else" statement instead of a "switch-case" for a single option case
     (in ixgbevf_get_reta()).
---
 drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c |  4 +-
 drivers/net/ethernet/intel/ixgbevf/mbx.h          |  8 ++
 drivers/net/ethernet/intel/ixgbevf/vf.c           | 92 +++++++++++++++++++++++
 drivers/net/ethernet/intel/ixgbevf/vf.h           |  1 +
 4 files changed, 104 insertions(+), 1 deletion(-)

Comments

Kirsher, Jeffrey T March 23, 2015, 10:31 a.m. UTC | #1
On Sun, 2015-03-22 at 21:01 +0200, Vlad Zolotarov wrote:
>    - Added a new API version support.
>    - Added the query implementation in the ixgbevf.
> 
> Signed-off-by: Vlad Zolotarov <vladz@cloudius-systems.com>
> ---
> New in v6:
>    - Add a proper return code when an operation is blocked by PF.
> 
> New in v3:
>    - Adjusted to the new interface IXGBE_VF_GET_RETA command.
>    - Added a proper support for x550 devices.
> 
> New in v1 (compared to RFC):
>    - Use "if-else" statement instead of a "switch-case" for a single
> option case
>      (in ixgbevf_get_reta()).
> ---
>  drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c |  4 +-
>  drivers/net/ethernet/intel/ixgbevf/mbx.h          |  8 ++
>  drivers/net/ethernet/intel/ixgbevf/vf.c           | 92
> +++++++++++++++++++++++
>  drivers/net/ethernet/intel/ixgbevf/vf.h           |  1 +
>  4 files changed, 104 insertions(+), 1 deletion(-)

Thanks Vlad, I have applied your patch to my next-queue tree
Tantilov, Emil S March 23, 2015, 11:46 p.m. UTC | #2
>-----Original Message-----
>From: netdev-owner@vger.kernel.org [mailto:netdev-owner@vger.kernel.org] On Behalf Of Vlad Zolotarov
>Sent: Sunday, March 22, 2015 12:01 PM
>
>Subject: [PATCH net-next v6 4/7] ixgbevf: Add a RETA query code
>
>   - Added a new API version support.
>   - Added the query implementation in the ixgbevf.
>
>Signed-off-by: Vlad Zolotarov <vladz@cloudius-systems.com>
>---
>New in v6:
>   - Add a proper return code when an operation is blocked by PF.
>
>New in v3:
>   - Adjusted to the new interface IXGBE_VF_GET_RETA command.
>   - Added a proper support for x550 devices.
>
>New in v1 (compared to RFC):
>   - Use "if-else" statement instead of a "switch-case" for a single option case
>     (in ixgbevf_get_reta()).
>---
> drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c |  4 +-
> drivers/net/ethernet/intel/ixgbevf/mbx.h          |  8 ++
> drivers/net/ethernet/intel/ixgbevf/vf.c           | 92 +++++++++++++++++++++++
> drivers/net/ethernet/intel/ixgbevf/vf.h           |  1 +
> 4 files changed, 104 insertions(+), 1 deletion(-)
>
>diff --git a/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c b/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c
>index 4ee15ad..4787fcf 100644
>--- a/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c
>+++ b/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c
>@@ -2030,7 +2030,8 @@ static void ixgbevf_init_last_counter_stats(struct ixgbevf_adapter *adapter)
> static void ixgbevf_negotiate_api(struct ixgbevf_adapter *adapter)
> {
> 	struct ixgbe_hw *hw = &adapter->hw;
>-	int api[] = { ixgbe_mbox_api_11,
>+	int api[] = { ixgbe_mbox_api_12,
>+		      ixgbe_mbox_api_11,
> 		      ixgbe_mbox_api_10,
> 		      ixgbe_mbox_api_unknown };
> 	int err = 0, idx = 0;
>@@ -3712,6 +3713,7 @@ static int ixgbevf_change_mtu(struct net_device *netdev, int new_mtu)
> 
> 	switch (adapter->hw.api_version) {
> 	case ixgbe_mbox_api_11:
>+	case ixgbe_mbox_api_12:
> 		max_possible_frame = IXGBE_MAX_JUMBO_FRAME_SIZE;
> 		break;
> 	default:

You need another case for ixgbe_mbox_api_12 in ixgbevf_set_num_queues(), otherwise the driver will load with a single queue.

Thanks,
Emil

--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Vlad Zolotarov March 24, 2015, 10:25 a.m. UTC | #3
On 03/24/15 01:46, Tantilov, Emil S wrote:
>> -----Original Message-----
>> From: netdev-owner@vger.kernel.org [mailto:netdev-owner@vger.kernel.org] On Behalf Of Vlad Zolotarov
>> Sent: Sunday, March 22, 2015 12:01 PM
>>
>> Subject: [PATCH net-next v6 4/7] ixgbevf: Add a RETA query code
>>
>>    - Added a new API version support.
>>    - Added the query implementation in the ixgbevf.
>>
>> Signed-off-by: Vlad Zolotarov <vladz@cloudius-systems.com>
>> ---
>> New in v6:
>>    - Add a proper return code when an operation is blocked by PF.
>>
>> New in v3:
>>    - Adjusted to the new interface IXGBE_VF_GET_RETA command.
>>    - Added a proper support for x550 devices.
>>
>> New in v1 (compared to RFC):
>>    - Use "if-else" statement instead of a "switch-case" for a single option case
>>      (in ixgbevf_get_reta()).
>> ---
>> drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c |  4 +-
>> drivers/net/ethernet/intel/ixgbevf/mbx.h          |  8 ++
>> drivers/net/ethernet/intel/ixgbevf/vf.c           | 92 +++++++++++++++++++++++
>> drivers/net/ethernet/intel/ixgbevf/vf.h           |  1 +
>> 4 files changed, 104 insertions(+), 1 deletion(-)
>>
>> diff --git a/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c b/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c
>> index 4ee15ad..4787fcf 100644
>> --- a/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c
>> +++ b/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c
>> @@ -2030,7 +2030,8 @@ static void ixgbevf_init_last_counter_stats(struct ixgbevf_adapter *adapter)
>> static void ixgbevf_negotiate_api(struct ixgbevf_adapter *adapter)
>> {
>> 	struct ixgbe_hw *hw = &adapter->hw;
>> -	int api[] = { ixgbe_mbox_api_11,
>> +	int api[] = { ixgbe_mbox_api_12,
>> +		      ixgbe_mbox_api_11,
>> 		      ixgbe_mbox_api_10,
>> 		      ixgbe_mbox_api_unknown };
>> 	int err = 0, idx = 0;
>> @@ -3712,6 +3713,7 @@ static int ixgbevf_change_mtu(struct net_device *netdev, int new_mtu)
>>
>> 	switch (adapter->hw.api_version) {
>> 	case ixgbe_mbox_api_11:
>> +	case ixgbe_mbox_api_12:
>> 		max_possible_frame = IXGBE_MAX_JUMBO_FRAME_SIZE;
>> 		break;
>> 	default:
> You need another case for ixgbe_mbox_api_12 in ixgbevf_set_num_queues(), otherwise the driver will load with a single queue.

Right. I've missed this place since it's been added after the first 
series version and 'git rebase' silently integrated it... ;)

I have a question about this new code though. Why does ixgbevf ignores 
IXGBE_VF_RX_QUEUES PF returns it in IXGBE_VF_GET_QUEUE?
PF returns there 1 by the way (see ixgbe_get_vf_queues()) despite the 
fact it configures the VF to have up to 4 queues... I do understand why 
it allows up to 4 queues but why does it return 1 for IXGBE_VF_RX_QUEUES 
in IXGBE_VF_GET_QUEUE? Shouldn't it return 4 there?

Could somebody clarify this design to me, please?

>
> Thanks,
> Emil
>
> --
> To unsubscribe from this list: send the line "unsubscribe netdev" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html

--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Tantilov, Emil S March 24, 2015, 6:12 p.m. UTC | #4
>-----Original Message-----
>From: Vlad Zolotarov [mailto:vladz@cloudius-systems.com] 
>Sent: Tuesday, March 24, 2015 3:26 AM
>Subject: Re: [PATCH net-next v6 4/7] ixgbevf: Add a RETA query code
>
>
>
>On 03/24/15 01:46, Tantilov, Emil S wrote:
>>> -----Original Message-----
>>> From: netdev-owner@vger.kernel.org [mailto:netdev-owner@vger.kernel.org] On Behalf Of Vlad Zolotarov
>>> Sent: Sunday, March 22, 2015 12:01 PM
>>>
>>> Subject: [PATCH net-next v6 4/7] ixgbevf: Add a RETA query code
>>>
>>>    - Added a new API version support.
>>>    - Added the query implementation in the ixgbevf.
>>>
>>> Signed-off-by: Vlad Zolotarov <vladz@cloudius-systems.com>
>>> ---
>>> New in v6:
>>>    - Add a proper return code when an operation is blocked by PF.
>>>
>>> New in v3:
>>>    - Adjusted to the new interface IXGBE_VF_GET_RETA command.
>>>    - Added a proper support for x550 devices.
>>>
>>> New in v1 (compared to RFC):
>>>    - Use "if-else" statement instead of a "switch-case" for a single option case
>>>      (in ixgbevf_get_reta()).
>>> ---
>>> drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c |  4 +-
>>> drivers/net/ethernet/intel/ixgbevf/mbx.h          |  8 ++
>>> drivers/net/ethernet/intel/ixgbevf/vf.c           | 92 +++++++++++++++++++++++
>>> drivers/net/ethernet/intel/ixgbevf/vf.h           |  1 +
>>> 4 files changed, 104 insertions(+), 1 deletion(-)
>>>
>>> diff --git a/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c >>>b/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c
>>> index 4ee15ad..4787fcf 100644
>>> --- a/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c
>>> +++ b/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c
>>> @@ -2030,7 +2030,8 @@ static void ixgbevf_init_last_counter_stats(struct ixgbevf_adapter *adapter)
>>> static void ixgbevf_negotiate_api(struct ixgbevf_adapter *adapter)
>>> {
>>> 	struct ixgbe_hw *hw = &adapter->hw;
>>> -	int api[] = { ixgbe_mbox_api_11,
>>> +	int api[] = { ixgbe_mbox_api_12,
>>> +		      ixgbe_mbox_api_11,
>>> 		      ixgbe_mbox_api_10,
>>> 		      ixgbe_mbox_api_unknown };
>>> 	int err = 0, idx = 0;
>>> @@ -3712,6 +3713,7 @@ static int ixgbevf_change_mtu(struct net_device *netdev, int new_mtu)
>>>
>>> 	switch (adapter->hw.api_version) {
>>> 	case ixgbe_mbox_api_11:
>>> +	case ixgbe_mbox_api_12:
>>> 		max_possible_frame = IXGBE_MAX_JUMBO_FRAME_SIZE;
>>> 		break;
>>> 	default:
>> You need another case for ixgbe_mbox_api_12 in ixgbevf_set_num_queues(), otherwise the driver will load with a >>single queue.
>
>Right. I've missed this place since it's been added after the first 
>series version and 'git rebase' silently integrated it... ;)
>
>I have a question about this new code though. Why does ixgbevf ignores 
>IXGBE_VF_RX_QUEUES PF returns it in IXGBE_VF_GET_QUEUE?
>PF returns there 1 by the way (see ixgbe_get_vf_queues()) despite the 
>fact it configures the VF to have up to 4 queues... I do understand why 
>it allows up to 4 queues but why does it return 1 for IXGBE_VF_RX_QUEUES 
>in IXGBE_VF_GET_QUEUE? Shouldn't it return 4 there?

I'm not sure where you see this, on my setup ixgbevf_get_queues() gets 4 in msg[IXGBE_VF_R/TX_QUEUES] which is used to set hw->mac.max_t/rx_queues.

BTW - there are other issues with your patches. The indirection table seems to come out as all 0s and the VF driver reports link down/up when querying it.

Where is this information useful anyway - what is the use case? There is no description in your patches for why all this is needed.

Thanks,
Emil


--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Vlad Zolotarov March 24, 2015, 7:06 p.m. UTC | #5
On 03/24/15 20:12, Tantilov, Emil S wrote:
>> -----Original Message-----
>> From: Vlad Zolotarov [mailto:vladz@cloudius-systems.com]
>> Sent: Tuesday, March 24, 2015 3:26 AM
>> Subject: Re: [PATCH net-next v6 4/7] ixgbevf: Add a RETA query code
>>
>>
>>
>> On 03/24/15 01:46, Tantilov, Emil S wrote:
>>>> -----Original Message-----
>>>> From: netdev-owner@vger.kernel.org [mailto:netdev-owner@vger.kernel.org] On Behalf Of Vlad Zolotarov
>>>> Sent: Sunday, March 22, 2015 12:01 PM
>>>>
>>>> Subject: [PATCH net-next v6 4/7] ixgbevf: Add a RETA query code
>>>>
>>>>     - Added a new API version support.
>>>>     - Added the query implementation in the ixgbevf.
>>>>
>>>> Signed-off-by: Vlad Zolotarov <vladz@cloudius-systems.com>
>>>> ---
>>>> New in v6:
>>>>     - Add a proper return code when an operation is blocked by PF.
>>>>
>>>> New in v3:
>>>>     - Adjusted to the new interface IXGBE_VF_GET_RETA command.
>>>>     - Added a proper support for x550 devices.
>>>>
>>>> New in v1 (compared to RFC):
>>>>     - Use "if-else" statement instead of a "switch-case" for a single option case
>>>>       (in ixgbevf_get_reta()).
>>>> ---
>>>> drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c |  4 +-
>>>> drivers/net/ethernet/intel/ixgbevf/mbx.h          |  8 ++
>>>> drivers/net/ethernet/intel/ixgbevf/vf.c           | 92 +++++++++++++++++++++++
>>>> drivers/net/ethernet/intel/ixgbevf/vf.h           |  1 +
>>>> 4 files changed, 104 insertions(+), 1 deletion(-)
>>>>
>>>> diff --git a/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c >>>b/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c
>>>> index 4ee15ad..4787fcf 100644
>>>> --- a/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c
>>>> +++ b/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c
>>>> @@ -2030,7 +2030,8 @@ static void ixgbevf_init_last_counter_stats(struct ixgbevf_adapter *adapter)
>>>> static void ixgbevf_negotiate_api(struct ixgbevf_adapter *adapter)
>>>> {
>>>> 	struct ixgbe_hw *hw = &adapter->hw;
>>>> -	int api[] = { ixgbe_mbox_api_11,
>>>> +	int api[] = { ixgbe_mbox_api_12,
>>>> +		      ixgbe_mbox_api_11,
>>>> 		      ixgbe_mbox_api_10,
>>>> 		      ixgbe_mbox_api_unknown };
>>>> 	int err = 0, idx = 0;
>>>> @@ -3712,6 +3713,7 @@ static int ixgbevf_change_mtu(struct net_device *netdev, int new_mtu)
>>>>
>>>> 	switch (adapter->hw.api_version) {
>>>> 	case ixgbe_mbox_api_11:
>>>> +	case ixgbe_mbox_api_12:
>>>> 		max_possible_frame = IXGBE_MAX_JUMBO_FRAME_SIZE;
>>>> 		break;
>>>> 	default:
>>> You need another case for ixgbe_mbox_api_12 in ixgbevf_set_num_queues(), otherwise the driver will load with a >>single queue.
>> Right. I've missed this place since it's been added after the first
>> series version and 'git rebase' silently integrated it... ;)
>>
>> I have a question about this new code though. Why does ixgbevf ignores
>> IXGBE_VF_RX_QUEUES PF returns it in IXGBE_VF_GET_QUEUE?
>> PF returns there 1 by the way (see ixgbe_get_vf_queues()) despite the
>> fact it configures the VF to have up to 4 queues... I do understand why
>> it allows up to 4 queues but why does it return 1 for IXGBE_VF_RX_QUEUES
>> in IXGBE_VF_GET_QUEUE? Shouldn't it return 4 there?
> I'm not sure where you see this, on my setup ixgbevf_get_queues() gets 4 in msg[IXGBE_VF_R/TX_QUEUES] which is used to set hw->mac.max_t/rx_queues.

Right. I misread the __ALIGN_MASK() macro.
But then I can't see where max_rx_queues is used. 
ixgbevf_set_num_queues() sets adapter->num_rx_queues ignoring the above 
value if num_tcs less or equal to 1:

/* fetch queue configuration from the PF */
	err = ixgbevf_get_queues(hw, &num_tcs, &def_q);

	spin_unlock_bh(&adapter->mbx_lock);

	if (err)
		return;

	/* we need as many queues as traffic classes */
	if (num_tcs > 1) {
		adapter->num_rx_queues = num_tcs;
	} else {
		u16 rss = min_t(u16, num_online_cpus(), IXGBEVF_MAX_RSS_QUEUES);

		switch (hw->api_version) {
		case ixgbe_mbox_api_11:
		case ixgbe_mbox_api_12:
			adapter->num_rx_queues = rss;
			adapter->num_tx_queues = rss;
		default:
			break;
		}
	}

This means that if PF returned in IXGBE_VF_RX_QUEUES 1 and if u have 
more than 1 CPU u will still go and configure 2 Rx queues for a VF. This 
unless I miss something again... ;)
>
> BTW - there are other issues with your patches. The indirection table seems to come out as all 0s and the VF driver reports link down/up when querying it.

Worked just fine to me on x540.
What is your setup? How did u check it? Did u remember to patch "ip" 
tool and enable the querying?

>
> Where is this information useful anyway - what is the use case? There is no description in your patches for why all this is needed.

I'm not sure it's required to explain why would I want to add a standard 
ethtool functionality to a driver.
However, since u've asked - we are developing a software infrastructure 
that needs the ability to query the VF's indirection table and RSS hash 
key from inside the Guest OS (particularly on AWS) in order to be able 
to open the socket the way that its traffic would arrive to a specific CPU.

Once this patches are accepted here we may start asking Amazon to pull 
them too.

thanks,
vlad

>
> Thanks,
> Emil
>
>

--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Tantilov, Emil S March 24, 2015, 9:04 p.m. UTC | #6
>-----Original Message-----
>From: Vlad Zolotarov [mailto:vladz@cloudius-systems.com] 
>Sent: Tuesday, March 24, 2015 12:06 PM
> Subject: Re: [PATCH net-next v6 4/7] ixgbevf: Add a RETA query code
> I'm not sure where you see this, on my setup ixgbevf_get_queues() gets 4 in msg[IXGBE_VF_R/TX_QUEUES] which is used to set hw->mac.max_t/rx_queues.
>
>Right. I misread the __ALIGN_MASK() macro.
>But then I can't see where max_rx_queues is used. 

It is used when stopping the Tx/Rx queues in ixgbevf_stop_hw_vf().

>ixgbevf_set_num_queues() sets adapter->num_rx_queues ignoring the above 
>value if num_tcs less or equal to 1:
>
>/* fetch queue configuration from the PF */
>	err = ixgbevf_get_queues(hw, &num_tcs, &def_q);
>
>	spin_unlock_bh(&adapter->mbx_lock);
>
>	if (err)
>		return;
>
>	/* we need as many queues as traffic classes */
>	if (num_tcs > 1) {
>		adapter->num_rx_queues = num_tcs;
>	} else {
>		u16 rss = min_t(u16, num_online_cpus(), IXGBEVF_MAX_RSS_QUEUES);
>
>		switch (hw->api_version) {
>		case ixgbe_mbox_api_11:
>		case ixgbe_mbox_api_12:
>			adapter->num_rx_queues = rss;
>			adapter->num_tx_queues = rss;
>		default:
>			break;
>		}
>	}
>
>This means that if PF returned in IXGBE_VF_RX_QUEUES 1 and if u have 
>more than 1 CPU u will still go and configure 2 Rx queues for a VF. This 
>unless I miss something again... ;)

From what I can see vmdq->mask can be only for 4 or 8 queues, so the PF will not return 1, unless you meant something else.

The comment about the 1 queue is outdated though - I think it's leftover from the time the VF only allowed single queue.

>> BTW - there are other issues with your patches. The indirection table seems to come out as all 0s and the VF driver reports link >> down/up when querying it.

>Worked just fine to me on x540.
>What is your setup? How did u check it? Did u remember to patch "ip" 
>tool and enable the querying?

I have x540 as well and used the modified ip, otherwise the operation won't be allowed. I will do some more debugging when I get a chance and will get back to you.

>>
>> Where is this information useful anyway - what is the use case? There is no description in your patches for why all this is >>needed.
>
>I'm not sure it's required to explain why would I want to add a standard 
>ethtool functionality to a driver.
>However, since u've asked - we are developing a software infrastructure 
>that needs the ability to query the VF's indirection table and RSS hash 
>key from inside the Guest OS (particularly on AWS) in order to be able 
>to open the socket the way that its traffic would arrive to a specific CPU.

Understanding the use case can help with the implementation. Because you need to get the RSS info from the PF for macs < x550 this opens up the possibility to abuse (even if inadvertently) the mailbox if someone decided to query that info in a loop - like we have seen this happen with net-snmp.

Have you tested what happens if you run:

while true
do
	ethtool --show-rxfh-indir ethX
done

in the background while passing traffic through the VF?

Perhaps storing the RSS key and the table is better option than having to invoke the mailbox on every read.

Thanks,
Emil


--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Tantilov, Emil S March 24, 2015, 10:50 p.m. UTC | #7
>-----Original Message-----
>From: Vlad Zolotarov [mailto:vladz@cloudius-systems.com] 
>Sent: Tuesday, March 24, 2015 11:41 AM
>To: Tantilov, Emil S; netdev@vger.kernel.org
>Cc: Kirsher, Jeffrey T; avi@cloudius-systems.com; gleb@cloudius-systems.com
>Subject: Re: [PATCH net-next v6 4/7] ixgbevf: Add a RETA query code
>
>
>
>>On 03/24/15 20:12, Tantilov, Emil S wrote:
>>BTW - there are other issues with your patches. The indirection table seems to come out as all 0s and the VF driver reports link down/up when querying it.
>
>Worked just fine to me on x540.
>What is your setup? How did u check it? Did u remember to patch "ip" tool and enable the querying?

The issue with the link is that you do not have proper locking when calling the mailbox which is messing up the link check subtask. Basically you need to protect the calls to ixgbevf_get_rss_key and get_reta with the mbx_lock similar to how the driver does it in all cases where the mailbox is called. 

Thanks,
Emil

--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Vlad Zolotarov March 25, 2015, 9:27 a.m. UTC | #8
On 03/24/15 23:04, Tantilov, Emil S wrote:
>> -----Original Message-----
>> From: Vlad Zolotarov [mailto:vladz@cloudius-systems.com]
>> Sent: Tuesday, March 24, 2015 12:06 PM
>> Subject: Re: [PATCH net-next v6 4/7] ixgbevf: Add a RETA query code
>> I'm not sure where you see this, on my setup ixgbevf_get_queues() gets 4 in msg[IXGBE_VF_R/TX_QUEUES] which is used to set hw->mac.max_t/rx_queues.
>>
>> Right. I misread the __ALIGN_MASK() macro.
>> But then I can't see where max_rx_queues is used.
> It is used when stopping the Tx/Rx queues in ixgbevf_stop_hw_vf().

Of course, I meant it's not used during the adapter->num_rx_queues value 
calculation. Let's move this discussion to the appropriate patch thread.

>
>> ixgbevf_set_num_queues() sets adapter->num_rx_queues ignoring the above
>> value if num_tcs less or equal to 1:
>>
>> /* fetch queue configuration from the PF */
>> 	err = ixgbevf_get_queues(hw, &num_tcs, &def_q);
>>
>> 	spin_unlock_bh(&adapter->mbx_lock);
>>
>> 	if (err)
>> 		return;
>>
>> 	/* we need as many queues as traffic classes */
>> 	if (num_tcs > 1) {
>> 		adapter->num_rx_queues = num_tcs;
>> 	} else {
>> 		u16 rss = min_t(u16, num_online_cpus(), IXGBEVF_MAX_RSS_QUEUES);

>>
>> 		switch (hw->api_version) {
>> 		case ixgbe_mbox_api_11:
>> 		case ixgbe_mbox_api_12:
>> 			adapter->num_rx_queues = rss;
>> 			adapter->num_tx_queues = rss;
>> 		default:
>> 			break;
>> 		}
>> 	}
>>
>> This means that if PF returned in IXGBE_VF_RX_QUEUES 1 and if u have
>> more than 1 CPU u will still go and configure 2 Rx queues for a VF. This
>> unless I miss something again... ;)
>  From what I can see vmdq->mask can be only for 4 or 8 queues, so the PF will not return 1, unless you meant something else.
This is only if PF is driven by a Linux ixgbe PF driver as it is in the 
upstream tree right now. AFAIK VF driver should be completely decoupled 
from the PF driver and all the configuration decisions should be made 
based on the VF-PF communication via VF-PF channel (mailbox).
Pls., see my comments on the thread of your patches that have added 
these lines.

>
> The comment about the 1 queue is outdated though - I think it's leftover from the time the VF only allowed single queue.

Looks like it. ;)

>
>>> BTW - there are other issues with your patches. The indirection table seems to come out as all 0s and the VF driver reports link >> down/up when querying it.
>> Worked just fine to me on x540.
>> What is your setup? How did u check it? Did u remember to patch "ip"
>> tool and enable the querying?
> I have x540 as well and used the modified ip, otherwise the operation won't be allowed. I will do some more debugging when I get a chance and will get back to you.

Pls., make sure u use v7 patches.

One problem that there may still be is that I may have protected the 
mbox access by adapter->mbx_lock spinlock similarly to 
ixgbevf_set_num_queues() since I don't think ethtool ensures the atomicy 
of its requests in a context of a specific device and thus mbox could be 
trashed by ethtool operations running in parallel on different CPUs.


>
>>> Where is this information useful anyway - what is the use case? There is no description in your patches for why all this is >>needed.
>> I'm not sure it's required to explain why would I want to add a standard
>> ethtool functionality to a driver.
>> However, since u've asked - we are developing a software infrastructure
>> that needs the ability to query the VF's indirection table and RSS hash
>> key from inside the Guest OS (particularly on AWS) in order to be able
>> to open the socket the way that its traffic would arrive to a specific CPU.
> Understanding the use case can help with the implementation. Because you need to get the RSS info from the PF for macs < x550 this opens up the possibility to abuse (even if inadvertently) the mailbox if someone decided to query that info in a loop - like we have seen this happen with net-snmp.
>
> Have you tested what happens if you run:
>
> while true
> do
> 	ethtool --show-rxfh-indir ethX
> done
>
> in the background while passing traffic through the VF?
I understand your concerns but let's start with clarifying a few things. 
First, VF driver is by definition not trusted. If it (or its user) 
decides to do anything malicious (like u proposed above) that would 
eventually hurt (only this) VF's performance - nobody should care. 
However the right question here would be: "How the above use case may 
hurt the corresponding PF or other VFs' performance?" And since the 
mailbox operation involves quite a few MMIO writes and reads this may 
slow the PF quite a bit and this may be a problem that should be taken 
care of. However it wasn't my patch series that have introduced it. The 
same problem would arise if Guest would change VF's MAC address in a 
tight loop like above. Namely any VF slow path operation that would 
eventually cause the VF-PF channel transaction may be used to create an 
attack on a PF.

This problem naturally may not be resolved on a VF level but only on a 
PF level since VF is not a trusted component. One option could be to 
allow only a specific number of mbox operations from a specific VF in a 
specific time period: e.g. at most 1 operation every jiffy.

I don't see any code handling any protection of this sort in a PF driver 
at the moment. However I maybe missing some HW configuration that limits 
the slow path interrupts rate thus limiting the number of mbox requests 
rate...

>
> Perhaps storing the RSS key and the table is better option than having to invoke the mailbox on every read.

I don't think this could work if I understand your proposal correctly. 
The only way to cache the result that would decrease the number of mbox 
transactions would be to cache it in the VF. But how could i invalidate 
this cache if the table content has been changed by a PF? I think the 
main source of a confusion here is that u assume that PF driver is a 
Linux ixgbe driver that doesn't support an indirection table change at 
the moment. As I have explained above - this should not be assumed.

thanks,
vlad

>
> Thanks,
> Emil
>
>

--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Vlad Zolotarov March 25, 2015, 9:29 a.m. UTC | #9
On 03/25/15 00:50, Tantilov, Emil S wrote:
>> -----Original Message-----
>> From: Vlad Zolotarov [mailto:vladz@cloudius-systems.com]
>> Sent: Tuesday, March 24, 2015 11:41 AM
>> To: Tantilov, Emil S; netdev@vger.kernel.org
>> Cc: Kirsher, Jeffrey T; avi@cloudius-systems.com; gleb@cloudius-systems.com
>> Subject: Re: [PATCH net-next v6 4/7] ixgbevf: Add a RETA query code
>>
>>
>>
>>> On 03/24/15 20:12, Tantilov, Emil S wrote:
>>> BTW - there are other issues with your patches. The indirection table seems to come out as all 0s and the VF driver reports link down/up when querying it.
>> Worked just fine to me on x540.
>> What is your setup? How did u check it? Did u remember to patch "ip" tool and enable the querying?
> The issue with the link is that you do not have proper locking when calling the mailbox which is messing up the link check subtask. Basically you need to protect the calls to ixgbevf_get_rss_key and get_reta with the mbx_lock similar to how the driver does it in all cases where the mailbox is called.

:D Just wrote u the same thing in a reply on your first email...
Sure. Let me fix it in v8.

thanks,
vlad

>
> Thanks,
> Emil
>

--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Kirsher, Jeffrey T March 25, 2015, 9:39 a.m. UTC | #10
On Wed, 2015-03-25 at 11:29 +0200, Vlad Zolotarov wrote:
> On 03/25/15 00:50, Tantilov, Emil S wrote:
> >> -----Original Message-----
> >> From: Vlad Zolotarov [mailto:vladz@cloudius-systems.com]
> >> Sent: Tuesday, March 24, 2015 11:41 AM
> >> To: Tantilov, Emil S; netdev@vger.kernel.org
> >> Cc: Kirsher, Jeffrey T; avi@cloudius-systems.com;
> gleb@cloudius-systems.com
> >> Subject: Re: [PATCH net-next v6 4/7] ixgbevf: Add a RETA query code
> >>
> >>
> >>
> >>> On 03/24/15 20:12, Tantilov, Emil S wrote:
> >>> BTW - there are other issues with your patches. The indirection
> table seems to come out as all 0s and the VF driver reports link
> down/up when querying it.
> >> Worked just fine to me on x540.
> >> What is your setup? How did u check it? Did u remember to patch
> "ip" tool and enable the querying?
> > The issue with the link is that you do not have proper locking when
> calling the mailbox which is messing up the link check subtask.
> Basically you need to protect the calls to ixgbevf_get_rss_key and
> get_reta with the mbx_lock similar to how the driver does it in all
> cases where the mailbox is called.
> 
> :D Just wrote u the same thing in a reply on your first email...
> Sure. Let me fix it in v8.

Since you will be producing a v8, can you please CC
intel-wired-lan@lists.osuosl.org it is a mailing list we just created
for all Linux kernel patches and kernel development for Intel wired
Ethernet drivers.

We also have a public patchworks project:
http://patchwork.ozlabs.org/project/intel-wired-lan/list/

I had planned on making an announcement on netdev later this week.
Vlad Zolotarov March 25, 2015, 9:41 a.m. UTC | #11
On 03/25/15 11:39, Jeff Kirsher wrote:
> On Wed, 2015-03-25 at 11:29 +0200, Vlad Zolotarov wrote:
>> On 03/25/15 00:50, Tantilov, Emil S wrote:
>>>> -----Original Message-----
>>>> From: Vlad Zolotarov [mailto:vladz@cloudius-systems.com]
>>>> Sent: Tuesday, March 24, 2015 11:41 AM
>>>> To: Tantilov, Emil S; netdev@vger.kernel.org
>>>> Cc: Kirsher, Jeffrey T; avi@cloudius-systems.com;
>> gleb@cloudius-systems.com
>>>> Subject: Re: [PATCH net-next v6 4/7] ixgbevf: Add a RETA query code
>>>>
>>>>
>>>>
>>>>> On 03/24/15 20:12, Tantilov, Emil S wrote:
>>>>> BTW - there are other issues with your patches. The indirection
>> table seems to come out as all 0s and the VF driver reports link
>> down/up when querying it.
>>>> Worked just fine to me on x540.
>>>> What is your setup? How did u check it? Did u remember to patch
>> "ip" tool and enable the querying?
>>> The issue with the link is that you do not have proper locking when
>> calling the mailbox which is messing up the link check subtask.
>> Basically you need to protect the calls to ixgbevf_get_rss_key and
>> get_reta with the mbx_lock similar to how the driver does it in all
>> cases where the mailbox is called.
>>
>> :D Just wrote u the same thing in a reply on your first email...
>> Sure. Let me fix it in v8.
> Since you will be producing a v8, can you please CC
> intel-wired-lan@lists.osuosl.org it is a mailing list we just created
> for all Linux kernel patches and kernel development for Intel wired
> Ethernet drivers.
>
> We also have a public patchworks project:
> http://patchwork.ozlabs.org/project/intel-wired-lan/list/
>
> I had planned on making an announcement on netdev later this week.

Sure thing, Jeff.


--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Tantilov, Emil S March 25, 2015, 6:35 p.m. UTC | #12
>-----Original Message-----
>From: Vlad Zolotarov [mailto:vladz@cloudius-systems.com] 
>Sent: Wednesday, March 25, 2015 2:28 AM
>Subject: Re: [PATCH net-next v6 4/7] ixgbevf: Add a RETA query code

<snip>

>> Have you tested what happens if you run:
>>
>> while true
>> do
>> 	ethtool --show-rxfh-indir ethX
>> done
>>
>> in the background while passing traffic through the VF?
>I understand your concerns but let's start with clarifying a few things. 
>First, VF driver is by definition not trusted. If it (or its user) 
>decides to do anything malicious (like u proposed above) that would 
>eventually hurt (only this) VF's performance - nobody should care. 
>However the right question here would be: "How the above use case may 
>hurt the corresponding PF or other VFs' performance?" And since the 
>mailbox operation involves quite a few MMIO writes and reads this may 
>slow the PF quite a bit and this may be a problem that should be taken 
>care of. However it wasn't my patch series that have introduced it. The 
>same problem would arise if Guest would change VF's MAC address in a 
>tight loop like above. Namely any VF slow path operation that would 
>eventually cause the VF-PF channel transaction may be used to create an 
>attack on a PF.

There are operations that can be disruptive to the VF I am not arguing that,
the issue introduced by these patches has mostly to do with the fact that now
we can hit the mailbox more often for what is mostly static information.

Especially with ethtool we already had to deal with an issue caused by net-snmp:
https://sourceforge.net/p/e1000/mailman/message/32188362/

Where net-snmp was being too aggressive when collecting information, even if most of it was static.

>
> Perhaps storing the RSS key and the table is better option than having to invoke the mailbox on every read.

>I don't think this could work if I understand your proposal correctly. 
>The only way to cache the result that would decrease the number of mbox 
>transactions would be to cache it in the VF. But how could i invalidate 
>this cache if the table content has been changed by a PF? I think the 
>main source of a confusion here is that u assume that PF driver is a 
>Linux ixgbe driver that doesn't support an indirection table change at 
>the moment. As I have explained above - this should not be assumed.

You keep mentioning other drivers - what other driver do you mean?
All the PF drivers that enable SRIOV are maintained and supported by Intel.

For HW older than X550 we can simply not allow the RSS hash to be modified if the driver is loaded in SRIOV mode.
This way the RSS info can be read once the driver is loaded. For X550 this can all be done in the VF, so you can avoid calling the mailbox altogether.
I understand this is a bit limiting, but this is due to HW limitation anyway (VFs do not have their own RSS config).

Thanks,
Emil


--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Vlad Zolotarov March 25, 2015, 8:17 p.m. UTC | #13
On 03/25/15 20:35, Tantilov, Emil S wrote:
>> -----Original Message-----
>> From: Vlad Zolotarov [mailto:vladz@cloudius-systems.com]
>> Sent: Wednesday, March 25, 2015 2:28 AM
>> Subject: Re: [PATCH net-next v6 4/7] ixgbevf: Add a RETA query code
> <snip>
>
>>> Have you tested what happens if you run:
>>>
>>> while true
>>> do
>>> 	ethtool --show-rxfh-indir ethX
>>> done
>>>
>>> in the background while passing traffic through the VF?
>> I understand your concerns but let's start with clarifying a few things.
>> First, VF driver is by definition not trusted. If it (or its user)
>> decides to do anything malicious (like u proposed above) that would
>> eventually hurt (only this) VF's performance - nobody should care.
>> However the right question here would be: "How the above use case may
>> hurt the corresponding PF or other VFs' performance?" And since the
>> mailbox operation involves quite a few MMIO writes and reads this may
>> slow the PF quite a bit and this may be a problem that should be taken
>> care of. However it wasn't my patch series that have introduced it. The
>> same problem would arise if Guest would change VF's MAC address in a
>> tight loop like above. Namely any VF slow path operation that would
>> eventually cause the VF-PF channel transaction may be used to create an
>> attack on a PF.
> There are operations that can be disruptive to the VF I am not arguing that,
> the issue introduced by these patches has mostly to do with the fact that now
> we can hit the mailbox more often for what is mostly static information.
>
> Especially with ethtool we already had to deal with an issue caused by net-snmp:
> https://sourceforge.net/p/e1000/mailman/message/32188362/
>
> Where net-snmp was being too aggressive when collecting information, even if most of it was static.

Emil, I don't really understand what are u trying to protect here 
against. If a user would want to shoot him/herself in the leg - he/she 
would still be able to do it with the other mailbox involving operations 
like MAC change. So, what's the sense to add useless lines?

>
>> Perhaps storing the RSS key and the table is better option than having to invoke the mailbox on every read.
>> I don't think this could work if I understand your proposal correctly.
>> The only way to cache the result that would decrease the number of mbox
>> transactions would be to cache it in the VF. But how could i invalidate
>> this cache if the table content has been changed by a PF? I think the
>> main source of a confusion here is that u assume that PF driver is a
>> Linux ixgbe driver that doesn't support an indirection table change at
>> the moment. As I have explained above - this should not be assumed.
> You keep mentioning other drivers - what other driver do you mean?
> All the PF drivers that enable SRIOV are maintained and supported by Intel.
>
> For HW older than X550 we can simply not allow the RSS hash to be modified if the driver is loaded in SRIOV mode.
> This way the RSS info can be read once the driver is loaded. For X550 this can all be done in the VF, so you can avoid calling the mailbox altogether.
> I understand this is a bit limiting, but this is due to HW limitation anyway (VFs do not have their own RSS config).

Let me remind u that Linux, FreeBSD, XEN  and DPDK PF drivers are all 
open source so u can't actually go and "not allow" things. ;) And 
although Intel developers contribute most of the code there are and will 
be other contributors too so I doubt the proposed above approach fits 
the open source spirit well. ;)

The user should actually not query the indirection table and a hash key 
too often. And if he/she does - it should be his/her problem.
However, if like with the ixgbevf_set_num_queues() u insist on your way 
of doing this (on caching the indirection table and hash key) - then 
please let me know and I will add it. Because, frankly, I care about the 
PF part of this series much more than for the VF part... ;)

>
> Thanks,
> Emil
>
>

--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Alexander Duyck March 25, 2015, 9:04 p.m. UTC | #14
On 03/25/2015 01:17 PM, Vlad Zolotarov wrote:
>
>
> On 03/25/15 20:35, Tantilov, Emil S wrote:
>>> -----Original Message-----
>>> From: Vlad Zolotarov [mailto:vladz@cloudius-systems.com]
>>> Sent: Wednesday, March 25, 2015 2:28 AM
>>> Subject: Re: [PATCH net-next v6 4/7] ixgbevf: Add a RETA query code
>> <snip>
>>
>>>> Have you tested what happens if you run:
>>>>
>>>> while true
>>>> do
>>>>     ethtool --show-rxfh-indir ethX
>>>> done
>>>>
>>>> in the background while passing traffic through the VF?
>>> I understand your concerns but let's start with clarifying a few 
>>> things.
>>> First, VF driver is by definition not trusted. If it (or its user)
>>> decides to do anything malicious (like u proposed above) that would
>>> eventually hurt (only this) VF's performance - nobody should care.
>>> However the right question here would be: "How the above use case may
>>> hurt the corresponding PF or other VFs' performance?" And since the
>>> mailbox operation involves quite a few MMIO writes and reads this may
>>> slow the PF quite a bit and this may be a problem that should be taken
>>> care of. However it wasn't my patch series that have introduced it. The
>>> same problem would arise if Guest would change VF's MAC address in a
>>> tight loop like above. Namely any VF slow path operation that would
>>> eventually cause the VF-PF channel transaction may be used to create an
>>> attack on a PF.
>> There are operations that can be disruptive to the VF I am not 
>> arguing that,
>> the issue introduced by these patches has mostly to do with the fact 
>> that now
>> we can hit the mailbox more often for what is mostly static information.
>>
>> Especially with ethtool we already had to deal with an issue caused 
>> by net-snmp:
>> https://sourceforge.net/p/e1000/mailman/message/32188362/
>>
>> Where net-snmp was being too aggressive when collecting information, 
>> even if most of it was static.
>
> Emil, I don't really understand what are u trying to protect here 
> against. If a user would want to shoot him/herself in the leg - he/she 
> would still be able to do it with the other mailbox involving 
> operations like MAC change. So, what's the sense to add useless lines?
>
>>
>>> Perhaps storing the RSS key and the table is better option than 
>>> having to invoke the mailbox on every read.
>>> I don't think this could work if I understand your proposal correctly.
>>> The only way to cache the result that would decrease the number of mbox
>>> transactions would be to cache it in the VF. But how could i invalidate
>>> this cache if the table content has been changed by a PF? I think the
>>> main source of a confusion here is that u assume that PF driver is a
>>> Linux ixgbe driver that doesn't support an indirection table change at
>>> the moment. As I have explained above - this should not be assumed.
>> You keep mentioning other drivers - what other driver do you mean?
>> All the PF drivers that enable SRIOV are maintained and supported by 
>> Intel.
>>
>> For HW older than X550 we can simply not allow the RSS hash to be 
>> modified if the driver is loaded in SRIOV mode.
>> This way the RSS info can be read once the driver is loaded. For X550 
>> this can all be done in the VF, so you can avoid calling the mailbox 
>> altogether.
>> I understand this is a bit limiting, but this is due to HW limitation 
>> anyway (VFs do not have their own RSS config).
>
> Let me remind u that Linux, FreeBSD, XEN  and DPDK PF drivers are all 
> open source so u can't actually go and "not allow" things. ;) And 
> although Intel developers contribute most of the code there are and 
> will be other contributors too so I doubt the proposed above approach 
> fits the open source spirit well. ;)

Actually these drivers already support multiple OSes just fine.  The 
part where I think you are confused is that you assume they all use the 
same Mailbox API which they likely wouldn't.  I would suggest taking a 
look at ixgbe_pfvf_api_rev in mbx.h of the VF driver. Different OSes 
have different things that can be supported, so for example the 
ixgbe_mbox_api_20 is reserved for a Solaris based PF/VF combination.  I 
would suspect that FreeBSD will likely have to conform to the existing 
APIs, or report that it only supports a different version of the mailbox 
API.

>
> The user should actually not query the indirection table and a hash 
> key too often. And if he/she does - it should be his/her problem.
> However, if like with the ixgbevf_set_num_queues() u insist on your 
> way of doing this (on caching the indirection table and hash key) - 
> then please let me know and I will add it. Because, frankly, I care 
> about the PF part of this series much more than for the VF part... ;)

I would say you don't need to cache it, but for 82599 and x540 there 
isn't any need to store more than 3 bits per entry, 384b, or 12 DWORDs 
for the entire RETA of the VF since the hardware can support at most 8 
queues w/ SR-IOV.  Then you only need one message instead of 3 which 
will reduce quite a bit of the complication with all of this.

Also it might make more sense to start working on displaying this on the 
PF before you start trying to do this on the VF.  As far as I know ixgbe 
still doesn't have this functionality and it would make much more sense 
to enable that first on ixgbe before you start trying to find a way to 
feed the data to the VF.

- Alex
--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Tantilov, Emil S March 26, 2015, 1:09 a.m. UTC | #15
>-----Original Message-----
>From: Alexander Duyck [mailto:alexander.h.duyck@redhat.com] 
>Sent: Wednesday, March 25, 2015 2:05 PM
>Subject: Re: [PATCH net-next v6 4/7] ixgbevf: Add a RETA query code
>
>
>On 03/25/2015 01:17 PM, Vlad Zolotarov wrote:
>>
>>
>> On 03/25/15 20:35, Tantilov, Emil S wrote:
>>>> -----Original Message-----
>>>> From: Vlad Zolotarov [mailto:vladz@cloudius-systems.com]
>>>> Sent: Wednesday, March 25, 2015 2:28 AM
>>>> Subject: Re: [PATCH net-next v6 4/7] ixgbevf: Add a RETA query code
>>> <snip>
>>>
>>>>> Have you tested what happens if you run:
>>>>>
>>>>> while true
>>>>> do
>>>>>     ethtool --show-rxfh-indir ethX
>>>>> done
>>>>>
>>>>> in the background while passing traffic through the VF?
>>>> I understand your concerns but let's start with clarifying a few 
>>>> things.
>>>> First, VF driver is by definition not trusted. If it (or its user)
>>>> decides to do anything malicious (like u proposed above) that would
>>>> eventually hurt (only this) VF's performance - nobody should care.
>>>> However the right question here would be: "How the above use case may
>>>> hurt the corresponding PF or other VFs' performance?" And since the
>>>> mailbox operation involves quite a few MMIO writes and reads this may
>>>> slow the PF quite a bit and this may be a problem that should be taken
>>>> care of. However it wasn't my patch series that have introduced it. The
>>>> same problem would arise if Guest would change VF's MAC address in a
>>>> tight loop like above. Namely any VF slow path operation that would
>>>> eventually cause the VF-PF channel transaction may be used to create an
>>>> attack on a PF.
>>> There are operations that can be disruptive to the VF I am not 
>>> arguing that,
>>> the issue introduced by these patches has mostly to do with the fact 
>>> that now
>>> we can hit the mailbox more often for what is mostly static information.
>>>
>>> Especially with ethtool we already had to deal with an issue caused 
>>> by net-snmp:
>>> https://sourceforge.net/p/e1000/mailman/message/32188362/
>>>
>>> Where net-snmp was being too aggressive when collecting information, 
>>> even if most of it was static.
>>
>> Emil, I don't really understand what are u trying to protect here 
>> against. If a user would want to shoot him/herself in the leg - he/she 
>> would still be able to do it with the other mailbox involving 
>> operations like MAC change. So, what's the sense to add useless lines?
>>
>>>
>>>> Perhaps storing the RSS key and the table is better option than 
>>>> having to invoke the mailbox on every read.
>>>> I don't think this could work if I understand your proposal correctly.
>>>> The only way to cache the result that would decrease the number of mbox
>>>> transactions would be to cache it in the VF. But how could i invalidate
>>>> this cache if the table content has been changed by a PF? I think the
>>>> main source of a confusion here is that u assume that PF driver is a
>>>> Linux ixgbe driver that doesn't support an indirection table change at
>>>> the moment. As I have explained above - this should not be assumed.
>>> You keep mentioning other drivers - what other driver do you mean?
>>> All the PF drivers that enable SRIOV are maintained and supported by 
>>> Intel.
>>>
>>> For HW older than X550 we can simply not allow the RSS hash to be 
>>> modified if the driver is loaded in SRIOV mode.
>>> This way the RSS info can be read once the driver is loaded. For X550 
>>> this can all be done in the VF, so you can avoid calling the mailbox 
>>> altogether.
>>> I understand this is a bit limiting, but this is due to HW limitation 
>>> anyway (VFs do not have their own RSS config).
>>
>> Let me remind u that Linux, FreeBSD, XEN  and DPDK PF drivers are all 
>> open source so u can't actually go and "not allow" things. ;) And 
>> although Intel developers contribute most of the code there are and 
>> will be other contributors too so I doubt the proposed above approach 
>> fits the open source spirit well. ;)
>
>Actually these drivers already support multiple OSes just fine.  The 
>part where I think you are confused is that you assume they all use the 
>same Mailbox API which they likely wouldn't.  I would suggest taking a 
>look at ixgbe_pfvf_api_rev in mbx.h of the VF driver. Different OSes 
>have different things that can be supported, so for example the 
>ixgbe_mbox_api_20 is reserved for a Solaris based PF/VF combination.  I 
>would suspect that FreeBSD will likely have to conform to the existing 
>APIs, or report that it only supports a different version of the mailbox 
>API.
>
>> The user should actually not query the indirection table and a hash 
>> key too often. And if he/she does - it should be his/her problem.
>> However, if like with the ixgbevf_set_num_queues() u insist on your 
>> way of doing this (on caching the indirection table and hash key) - 
>> then please let me know and I will add it. Because, frankly, I care 
>> about the PF part of this series much more than for the VF part... ;)
>
>I would say you don't need to cache it, but for 82599 and x540 there 
>isn't any need to store more than 3 bits per entry, 384b, or 12 DWORDs 
>for the entire RETA of the VF since the hardware can support at most 8 
>queues w/ SR-IOV.  Then you only need one message instead of 3 which 
>will reduce quite a bit of the complication with all of this.

This sounds like a good idea to me.

Thanks Alex,
Emil

>
>Also it might make more sense to start working on displaying this on the 
>PF before you start trying to do this on the VF.  As far as I know ixgbe 
>still doesn't have this functionality and it would make much more sense 
>to enable that first on ixgbe before you start trying to find a way to 
>feed the data to the VF.
>
>- Alex
--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Vlad Zolotarov March 26, 2015, 9:35 a.m. UTC | #16
On 03/25/15 23:04, Alexander Duyck wrote:
>
> On 03/25/2015 01:17 PM, Vlad Zolotarov wrote:
>>
>>
>> On 03/25/15 20:35, Tantilov, Emil S wrote:
>>>> -----Original Message-----
>>>> From: Vlad Zolotarov [mailto:vladz@cloudius-systems.com]
>>>> Sent: Wednesday, March 25, 2015 2:28 AM
>>>> Subject: Re: [PATCH net-next v6 4/7] ixgbevf: Add a RETA query code
>>> <snip>
>>>
>>>>> Have you tested what happens if you run:
>>>>>
>>>>> while true
>>>>> do
>>>>>     ethtool --show-rxfh-indir ethX
>>>>> done
>>>>>
>>>>> in the background while passing traffic through the VF?
>>>> I understand your concerns but let's start with clarifying a few 
>>>> things.
>>>> First, VF driver is by definition not trusted. If it (or its user)
>>>> decides to do anything malicious (like u proposed above) that would
>>>> eventually hurt (only this) VF's performance - nobody should care.
>>>> However the right question here would be: "How the above use case may
>>>> hurt the corresponding PF or other VFs' performance?" And since the
>>>> mailbox operation involves quite a few MMIO writes and reads this may
>>>> slow the PF quite a bit and this may be a problem that should be taken
>>>> care of. However it wasn't my patch series that have introduced it. 
>>>> The
>>>> same problem would arise if Guest would change VF's MAC address in a
>>>> tight loop like above. Namely any VF slow path operation that would
>>>> eventually cause the VF-PF channel transaction may be used to 
>>>> create an
>>>> attack on a PF.
>>> There are operations that can be disruptive to the VF I am not 
>>> arguing that,
>>> the issue introduced by these patches has mostly to do with the fact 
>>> that now
>>> we can hit the mailbox more often for what is mostly static 
>>> information.
>>>
>>> Especially with ethtool we already had to deal with an issue caused 
>>> by net-snmp:
>>> https://sourceforge.net/p/e1000/mailman/message/32188362/
>>>
>>> Where net-snmp was being too aggressive when collecting information, 
>>> even if most of it was static.
>>
>> Emil, I don't really understand what are u trying to protect here 
>> against. If a user would want to shoot him/herself in the leg - 
>> he/she would still be able to do it with the other mailbox involving 
>> operations like MAC change. So, what's the sense to add useless lines?
>>
>>>
>>>> Perhaps storing the RSS key and the table is better option than 
>>>> having to invoke the mailbox on every read.
>>>> I don't think this could work if I understand your proposal correctly.
>>>> The only way to cache the result that would decrease the number of 
>>>> mbox
>>>> transactions would be to cache it in the VF. But how could i 
>>>> invalidate
>>>> this cache if the table content has been changed by a PF? I think the
>>>> main source of a confusion here is that u assume that PF driver is a
>>>> Linux ixgbe driver that doesn't support an indirection table change at
>>>> the moment. As I have explained above - this should not be assumed.
>>> You keep mentioning other drivers - what other driver do you mean?
>>> All the PF drivers that enable SRIOV are maintained and supported by 
>>> Intel.
>>>
>>> For HW older than X550 we can simply not allow the RSS hash to be 
>>> modified if the driver is loaded in SRIOV mode.
>>> This way the RSS info can be read once the driver is loaded. For 
>>> X550 this can all be done in the VF, so you can avoid calling the 
>>> mailbox altogether.
>>> I understand this is a bit limiting, but this is due to HW 
>>> limitation anyway (VFs do not have their own RSS config).
>>
>> Let me remind u that Linux, FreeBSD, XEN  and DPDK PF drivers are all 
>> open source so u can't actually go and "not allow" things. ;) And 
>> although Intel developers contribute most of the code there are and 
>> will be other contributors too so I doubt the proposed above approach 
>> fits the open source spirit well. ;)
>
> Actually these drivers already support multiple OSes just fine. The 
> part where I think you are confused is that you assume they all use 
> the same Mailbox API which they likely wouldn't.  I would suggest 
> taking a look at ixgbe_pfvf_api_rev in mbx.h of the VF driver. 
> Different OSes have different things that can be supported, so for 
> example the ixgbe_mbox_api_20 is reserved for a Solaris based PF/VF 
> combination.  I would suspect that FreeBSD will likely have to conform 
> to the existing APIs, or report that it only supports a different 
> version of the mailbox API.

I didn't assume a common API at all. My point was that u can't just go 
and "forbid" standard things like changing the indirection table in the 
open source project(s). Therefore u shouldn't assume it and thus caching 
the indirection table doesn't seem a future-proof solution to me.


>
>>
>> The user should actually not query the indirection table and a hash 
>> key too often. And if he/she does - it should be his/her problem.
>> However, if like with the ixgbevf_set_num_queues() u insist on your 
>> way of doing this (on caching the indirection table and hash key) - 
>> then please let me know and I will add it. Because, frankly, I care 
>> about the PF part of this series much more than for the VF part... ;)
>
> I would say you don't need to cache it, but for 82599 and x540 there 
> isn't any need to store more than 3 bits per entry, 384b, or 12 DWORDs 
> for the entire RETA of the VF since the hardware can support at most 8 
> queues w/ SR-IOV.  Then you only need one message instead of 3 which 
> will reduce quite a bit of the complication with all of this.
>
> Also it might make more sense to start working on displaying this on 
> the PF before you start trying to do this on the VF.  As far as I know 
> ixgbe still doesn't have this functionality and it would make much 
> more sense to enable that first on ixgbe before you start trying to 
> find a way to feed the data to the VF.

Let's agree on the next steps:

 1. I'll reduce the series scope to 82599 and x540 devices.
 2. I'll add the same ethtool operations I've already added to VF to PF
    devices as well.
 3. I'll implement the compression the Alex so desperately wants... ;)
 4. I won't implement the caching of the indirection and RSS hash key
    query results in the VF level.


Pls., confirm that all u guys (Alex, Emil, Jeff) agree to that.
Thanks,
vlad



>
> - Alex

--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Alexander Duyck March 26, 2015, 2:40 p.m. UTC | #17
On 03/26/2015 02:35 AM, Vlad Zolotarov wrote:
>
>
> On 03/25/15 23:04, Alexander Duyck wrote:
>>
>> On 03/25/2015 01:17 PM, Vlad Zolotarov wrote:
>>>
>>>
>>> On 03/25/15 20:35, Tantilov, Emil S wrote:
>>>>> -----Original Message-----
>>>>> From: Vlad Zolotarov [mailto:vladz@cloudius-systems.com]
>>>>> Sent: Wednesday, March 25, 2015 2:28 AM
>>>>> Subject: Re: [PATCH net-next v6 4/7] ixgbevf: Add a RETA query code
>>>>
>>>
<snip>
>>>>
>>>>> Perhaps storing the RSS key and the table is better option than 
>>>>> having to invoke the mailbox on every read.
>>>>> I don't think this could work if I understand your proposal 
>>>>> correctly.
>>>>> The only way to cache the result that would decrease the number of 
>>>>> mbox
>>>>> transactions would be to cache it in the VF. But how could i 
>>>>> invalidate
>>>>> this cache if the table content has been changed by a PF? I think the
>>>>> main source of a confusion here is that u assume that PF driver is a
>>>>> Linux ixgbe driver that doesn't support an indirection table 
>>>>> change at
>>>>> the moment. As I have explained above - this should not be assumed.
>>>> You keep mentioning other drivers - what other driver do you mean?
>>>> All the PF drivers that enable SRIOV are maintained and supported 
>>>> by Intel.
>>>>
>>>> For HW older than X550 we can simply not allow the RSS hash to be 
>>>> modified if the driver is loaded in SRIOV mode.
>>>> This way the RSS info can be read once the driver is loaded. For 
>>>> X550 this can all be done in the VF, so you can avoid calling the 
>>>> mailbox altogether.
>>>> I understand this is a bit limiting, but this is due to HW 
>>>> limitation anyway (VFs do not have their own RSS config).
>>>
>>> Let me remind u that Linux, FreeBSD, XEN  and DPDK PF drivers are 
>>> all open source so u can't actually go and "not allow" things. ;) 
>>> And although Intel developers contribute most of the code there are 
>>> and will be other contributors too so I doubt the proposed above 
>>> approach fits the open source spirit well. ;)
>>
>> Actually these drivers already support multiple OSes just fine. The 
>> part where I think you are confused is that you assume they all use 
>> the same Mailbox API which they likely wouldn't.  I would suggest 
>> taking a look at ixgbe_pfvf_api_rev in mbx.h of the VF driver. 
>> Different OSes have different things that can be supported, so for 
>> example the ixgbe_mbox_api_20 is reserved for a Solaris based PF/VF 
>> combination.  I would suspect that FreeBSD will likely have to 
>> conform to the existing APIs, or report that it only supports a 
>> different version of the mailbox API.
>
> I didn't assume a common API at all. My point was that u can't just go 
> and "forbid" standard things like changing the indirection table in 
> the open source project(s). Therefore u shouldn't assume it and thus 
> caching the indirection table doesn't seem a future-proof solution to me.
>

The point was the current driver doesn't provide any such option. If you 
plan on adding it, then add it first and then we can talk about it.  
Otherwise it isn't a requirement for any current solution that you might 
implement.  That was the point I was trying to get across.  My advice is 
to avoid scoping in features that aren't there.  If you want to go ahead 
and add it then do so, but otherwise don't limit implementation by 
things that have not yet been added.

>
>>
>>>
>>> The user should actually not query the indirection table and a hash 
>>> key too often. And if he/she does - it should be his/her problem.
>>> However, if like with the ixgbevf_set_num_queues() u insist on your 
>>> way of doing this (on caching the indirection table and hash key) - 
>>> then please let me know and I will add it. Because, frankly, I care 
>>> about the PF part of this series much more than for the VF part... ;)
>>
>> I would say you don't need to cache it, but for 82599 and x540 there 
>> isn't any need to store more than 3 bits per entry, 384b, or 12 
>> DWORDs for the entire RETA of the VF since the hardware can support 
>> at most 8 queues w/ SR-IOV.  Then you only need one message instead 
>> of 3 which will reduce quite a bit of the complication with all of this.
>>
>> Also it might make more sense to start working on displaying this on 
>> the PF before you start trying to do this on the VF.  As far as I 
>> know ixgbe still doesn't have this functionality and it would make 
>> much more sense to enable that first on ixgbe before you start trying 
>> to find a way to feed the data to the VF.
>
> Let's agree on the next steps:
>
> 1. I'll reduce the series scope to 82599 and x540 devices.

For mailbox implementation that is okay, my advice for the x550 would be 
to just read the registers.  They should already be defined in the 
driver so it is just a matter of reversing the process for writing the 
RETA.  You should have a solid grasp of that once you implement the 
solution for the PF.

> 2. I'll add the same ethtool operations I've already added to VF to PF
>    devices as well.

Yes, I would recommend PF first, and then VF.  The PF allows for a much 
simpler approach, and you will likely have a better idea of the solution 
for X550.  For the most part you can probably just copy the code already 
in igb.

> 3. I'll implement the compression the Alex so desperately wants... ;)

I've done enough of these mailbox type setups on this hardware to 
realize that you don't want to be dealing with a multi-part message.  It 
will make things so much easier for you to have this processed as a 
single message.

> 4. I won't implement the caching of the indirection and RSS hash key
>    query results in the VF level.

The idea with caching is to keep the number of messages across the 
mailbox to a minimum.  Compressing it down to 3 bits per entry will help 
some, but I am not sure if that is enough to completely mitigate the 
need for caching.  For now though the caching isn't a must-have, and it 
won't be needed for x550 since it can access the table directly.  The 
main thing I would recommend keeping an eye on would be how long it will 
take to complete the messages necessary to get the ethtool info.  If 
there are any scenarios where things take more than 1 second then I 
would say we are taking too long and we would have to look into caching 
because we want to avoid holding the rtnl lock for any extended period 
of time.

My advice is get the PF patches in first, then we can look at trying to 
get feature parity on the VF.

- Alex
--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Vlad Zolotarov March 26, 2015, 3:10 p.m. UTC | #18
On 03/26/15 16:40, Alexander Duyck wrote:
>
> On 03/26/2015 02:35 AM, Vlad Zolotarov wrote:
>>
>>
>> On 03/25/15 23:04, Alexander Duyck wrote:
>>>
>>> On 03/25/2015 01:17 PM, Vlad Zolotarov wrote:
>>>>
>>>>
>>>> On 03/25/15 20:35, Tantilov, Emil S wrote:
>>>>>> -----Original Message-----
>>>>>> From: Vlad Zolotarov [mailto:vladz@cloudius-systems.com]
>>>>>> Sent: Wednesday, March 25, 2015 2:28 AM
>>>>>> Subject: Re: [PATCH net-next v6 4/7] ixgbevf: Add a RETA query code
>>>>>
>>>>
> <snip>
>>>>>
>>>>>> Perhaps storing the RSS key and the table is better option than 
>>>>>> having to invoke the mailbox on every read.
>>>>>> I don't think this could work if I understand your proposal 
>>>>>> correctly.
>>>>>> The only way to cache the result that would decrease the number 
>>>>>> of mbox
>>>>>> transactions would be to cache it in the VF. But how could i 
>>>>>> invalidate
>>>>>> this cache if the table content has been changed by a PF? I think 
>>>>>> the
>>>>>> main source of a confusion here is that u assume that PF driver is a
>>>>>> Linux ixgbe driver that doesn't support an indirection table 
>>>>>> change at
>>>>>> the moment. As I have explained above - this should not be assumed.
>>>>> You keep mentioning other drivers - what other driver do you mean?
>>>>> All the PF drivers that enable SRIOV are maintained and supported 
>>>>> by Intel.
>>>>>
>>>>> For HW older than X550 we can simply not allow the RSS hash to be 
>>>>> modified if the driver is loaded in SRIOV mode.
>>>>> This way the RSS info can be read once the driver is loaded. For 
>>>>> X550 this can all be done in the VF, so you can avoid calling the 
>>>>> mailbox altogether.
>>>>> I understand this is a bit limiting, but this is due to HW 
>>>>> limitation anyway (VFs do not have their own RSS config).
>>>>
>>>> Let me remind u that Linux, FreeBSD, XEN  and DPDK PF drivers are 
>>>> all open source so u can't actually go and "not allow" things. ;) 
>>>> And although Intel developers contribute most of the code there are 
>>>> and will be other contributors too so I doubt the proposed above 
>>>> approach fits the open source spirit well. ;)
>>>
>>> Actually these drivers already support multiple OSes just fine. The 
>>> part where I think you are confused is that you assume they all use 
>>> the same Mailbox API which they likely wouldn't.  I would suggest 
>>> taking a look at ixgbe_pfvf_api_rev in mbx.h of the VF driver. 
>>> Different OSes have different things that can be supported, so for 
>>> example the ixgbe_mbox_api_20 is reserved for a Solaris based PF/VF 
>>> combination.  I would suspect that FreeBSD will likely have to 
>>> conform to the existing APIs, or report that it only supports a 
>>> different version of the mailbox API.
>>
>> I didn't assume a common API at all. My point was that u can't just 
>> go and "forbid" standard things like changing the indirection table 
>> in the open source project(s). Therefore u shouldn't assume it and 
>> thus caching the indirection table doesn't seem a future-proof 
>> solution to me.
>>
>
> The point was the current driver doesn't provide any such option. If 
> you plan on adding it, then add it first and then we can talk about 
> it.  Otherwise it isn't a requirement for any current solution that 
> you might implement.  That was the point I was trying to get across.  
> My advice is to avoid scoping in features that aren't there.  If you 
> want to go ahead and add it then do so, but otherwise don't limit 
> implementation by things that have not yet been added.

Let me remind that the above is said in the context of caching. The 
indirection table modification is an important tool for load balancing. 
The fact that ixgbe hasn't implemented it yet doesn't seem justified by 
anything but by the fact that u just haven't got to it yet. If u won't 
do it sometimes soon - somebody else will. That's why IMHO caching in VF 
is not the most wise direction since I'm not aware of a tool how PF can 
indicate to VF (driver) to invalidate it. All this in the context of 
82599 and x540 devices of course.


>
>>
>>>
>>>>
>>>> The user should actually not query the indirection table and a hash 
>>>> key too often. And if he/she does - it should be his/her problem.
>>>> However, if like with the ixgbevf_set_num_queues() u insist on your 
>>>> way of doing this (on caching the indirection table and hash key) - 
>>>> then please let me know and I will add it. Because, frankly, I care 
>>>> about the PF part of this series much more than for the VF part... ;)
>>>
>>> I would say you don't need to cache it, but for 82599 and x540 there 
>>> isn't any need to store more than 3 bits per entry, 384b, or 12 
>>> DWORDs for the entire RETA of the VF since the hardware can support 
>>> at most 8 queues w/ SR-IOV.  Then you only need one message instead 
>>> of 3 which will reduce quite a bit of the complication with all of 
>>> this.
>>>
>>> Also it might make more sense to start working on displaying this on 
>>> the PF before you start trying to do this on the VF. As far as I 
>>> know ixgbe still doesn't have this functionality and it would make 
>>> much more sense to enable that first on ixgbe before you start 
>>> trying to find a way to feed the data to the VF.
>>
>> Let's agree on the next steps:
>>
>> 1. I'll reduce the series scope to 82599 and x540 devices.
>
> For mailbox implementation that is okay, my advice for the x550 would 
> be to just read the registers.  They should already be defined in the 
> driver so it is just a matter of reversing the process for writing the 
> RETA.  You should have a solid grasp of that once you implement the 
> solution for the PF.

The problem is that I have no means to validate the solution - I have 
only x540 devices (nor specs). I'd prefer not to work in darkness and 
let u, guys, do it...

>
>> 2. I'll add the same ethtool operations I've already added to VF to PF
>>    devices as well.
>
> Yes, I would recommend PF first, and then VF.  The PF allows for a 
> much simpler approach, and you will likely have a better idea of the 
> solution for X550.  For the most part you can probably just copy the 
> code already in igb.



>
>> 3. I'll implement the compression the Alex so desperately wants... ;)
>
> I've done enough of these mailbox type setups on this hardware to 
> realize that you don't want to be dealing with a multi-part message.  
> It will make things so much easier for you to have this processed as a 
> single message.
>
>> 4. I won't implement the caching of the indirection and RSS hash key
>>    query results in the VF level.
>
> The idea with caching is to keep the number of messages across the 
> mailbox to a minimum.  Compressing it down to 3 bits per entry will 
> help some, but I am not sure if that is enough to completely mitigate 
> the need for caching.  For now though the caching isn't a must-have, 
> and it won't be needed for x550 since it can access the table 
> directly.  The main thing I would recommend keeping an eye on would be 
> how long it will take to complete the messages necessary to get the 
> ethtool info.  If there are any scenarios where things take more than 
> 1 second then I would say we are taking too long and we would have to 
> look into caching because we want to avoid holding the rtnl lock for 
> any extended period of time.

I've described the limitations the caching approach imposes above. I 
suggest we follow your approach here - solve the problems when they 
arrive. ;) I haven't succeeded to make it take as long as even near one 
second.

>
> My advice is get the PF patches in first, then we can look at trying 
> to get feature parity on the VF.

"feature parity on the VF"?

I'll send the patches later today. I'll incorporate the new PF code into 
the existing series.

>
> - Alex

--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Alexander Duyck March 26, 2015, 3:59 p.m. UTC | #19
On 03/26/2015 08:10 AM, Vlad Zolotarov wrote:
>
>
> On 03/26/15 16:40, Alexander Duyck wrote:
>>
>> On 03/26/2015 02:35 AM, Vlad Zolotarov wrote:
>>>
>>>
>>> On 03/25/15 23:04, Alexander Duyck wrote:
>>>>
>>>> On 03/25/2015 01:17 PM, Vlad Zolotarov wrote:
>>>>>
>>>>>
>>>>> On 03/25/15 20:35, Tantilov, Emil S wrote:
>>>>>>> -----Original Message-----
>>>>>>> From: Vlad Zolotarov [mailto:vladz@cloudius-systems.com]
>>>>>>> Sent: Wednesday, March 25, 2015 2:28 AM
>>>>>>> Subject: Re: [PATCH net-next v6 4/7] ixgbevf: Add a RETA query code
>>>>>>
>>>>>
>>

<snip>

>>
>>>
>>>>
>>>>>
>>>>> The user should actually not query the indirection table and a 
>>>>> hash key too often. And if he/she does - it should be his/her 
>>>>> problem.
>>>>> However, if like with the ixgbevf_set_num_queues() u insist on 
>>>>> your way of doing this (on caching the indirection table and hash 
>>>>> key) - then please let me know and I will add it. Because, 
>>>>> frankly, I care about the PF part of this series much more than 
>>>>> for the VF part... ;)
>>>>
>>>> I would say you don't need to cache it, but for 82599 and x540 
>>>> there isn't any need to store more than 3 bits per entry, 384b, or 
>>>> 12 DWORDs for the entire RETA of the VF since the hardware can 
>>>> support at most 8 queues w/ SR-IOV. Then you only need one message 
>>>> instead of 3 which will reduce quite a bit of the complication with 
>>>> all of this.
>>>>
>>>> Also it might make more sense to start working on displaying this 
>>>> on the PF before you start trying to do this on the VF. As far as I 
>>>> know ixgbe still doesn't have this functionality and it would make 
>>>> much more sense to enable that first on ixgbe before you start 
>>>> trying to find a way to feed the data to the VF.
>>>
>>> Let's agree on the next steps:
>>>
>>> 1. I'll reduce the series scope to 82599 and x540 devices.
>>
>> For mailbox implementation that is okay, my advice for the x550 would 
>> be to just read the registers.  They should already be defined in the 
>> driver so it is just a matter of reversing the process for writing 
>> the RETA.  You should have a solid grasp of that once you implement 
>> the solution for the PF.
>
> The problem is that I have no means to validate the solution - I have 
> only x540 devices (nor specs). I'd prefer not to work in darkness and 
> let u, guys, do it...

If it helps the specs for the x540 can be found at 
http://www.intel.com/content/www/us/en/network-adapters/10-gigabit-network-adapters/ethernet-x540-datasheet.html. 
Section 8 has a full list of the register definitions.

All the code you need should already be stored in ixgbe_setup_reta and 
ixgbe_setup_vfreta.  If you do it right you shouldn't even need to read 
the hardware.  Odds are you could generate it in software and that is 
what you would be accessing on the PF.  After all, if at some point you 
reset the hardware you would need to restore it from software wouldn't 
you, and since the table is static for now you should be able to 
calculate the value without need of reading a register.  If at some 
point in the future you can change the table then it means a copy has to 
be maintained in kernel memory and at that point you could use that for 
the data you would be sending to the VF.

>>
>>> 4. I won't implement the caching of the indirection and RSS hash key
>>>    query results in the VF level.
>>
>> The idea with caching is to keep the number of messages across the 
>> mailbox to a minimum.  Compressing it down to 3 bits per entry will 
>> help some, but I am not sure if that is enough to completely mitigate 
>> the need for caching.  For now though the caching isn't a must-have, 
>> and it won't be needed for x550 since it can access the table 
>> directly.  The main thing I would recommend keeping an eye on would 
>> be how long it will take to complete the messages necessary to get 
>> the ethtool info.  If there are any scenarios where things take more 
>> than 1 second then I would say we are taking too long and we would 
>> have to look into caching because we want to avoid holding the rtnl 
>> lock for any extended period of time.
>
> I've described the limitations the caching approach imposes above. I 
> suggest we follow your approach here - solve the problems when they 
> arrive. ;) I haven't succeeded to make it take as long as even near 
> one second.
>
>>
>> My advice is get the PF patches in first, then we can look at trying 
>> to get feature parity on the VF.
>
> "feature parity on the VF"?

Get "ethtool -x/X" working on the PF, and then get it working on the 
VF.  That way it makes for very simple verification as you can inspect 
that the PF and VF tables are consistent.  As I said if nothing else you 
can probably take a look at the igb driver to see how this was done on 
the 1Gbps Intel hardware.

>
> I'll send the patches later today. I'll incorporate the new PF code 
> into the existing series.

You might want to break things out into a couple series of patches. Do 
the PF driver first just to make sure you have an understanding of what 
you expect, think of it as a proof of concept if nothing else for how 
you want the VF to function, then submit the VF series as a separate 
patch set and include the mailbox API changes.

- Alex

--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Vlad Zolotarov March 26, 2015, 4:02 p.m. UTC | #20
On 03/26/15 17:59, Alexander Duyck wrote:
>
> On 03/26/2015 08:10 AM, Vlad Zolotarov wrote:
>>
>>
>> On 03/26/15 16:40, Alexander Duyck wrote:
>>>
>>> On 03/26/2015 02:35 AM, Vlad Zolotarov wrote:
>>>>
>>>>
>>>> On 03/25/15 23:04, Alexander Duyck wrote:
>>>>>
>>>>> On 03/25/2015 01:17 PM, Vlad Zolotarov wrote:
>>>>>>
>>>>>>
>>>>>> On 03/25/15 20:35, Tantilov, Emil S wrote:
>>>>>>>> -----Original Message-----
>>>>>>>> From: Vlad Zolotarov [mailto:vladz@cloudius-systems.com]
>>>>>>>> Sent: Wednesday, March 25, 2015 2:28 AM
>>>>>>>> Subject: Re: [PATCH net-next v6 4/7] ixgbevf: Add a RETA query 
>>>>>>>> code
>>>>>>>
>>>>>>
>>>
>
> <snip>
>
>>>
>>>>
>>>>>
>>>>>>
>>>>>> The user should actually not query the indirection table and a 
>>>>>> hash key too often. And if he/she does - it should be his/her 
>>>>>> problem.
>>>>>> However, if like with the ixgbevf_set_num_queues() u insist on 
>>>>>> your way of doing this (on caching the indirection table and hash 
>>>>>> key) - then please let me know and I will add it. Because, 
>>>>>> frankly, I care about the PF part of this series much more than 
>>>>>> for the VF part... ;)
>>>>>
>>>>> I would say you don't need to cache it, but for 82599 and x540 
>>>>> there isn't any need to store more than 3 bits per entry, 384b, or 
>>>>> 12 DWORDs for the entire RETA of the VF since the hardware can 
>>>>> support at most 8 queues w/ SR-IOV. Then you only need one message 
>>>>> instead of 3 which will reduce quite a bit of the complication 
>>>>> with all of this.
>>>>>
>>>>> Also it might make more sense to start working on displaying this 
>>>>> on the PF before you start trying to do this on the VF. As far as 
>>>>> I know ixgbe still doesn't have this functionality and it would 
>>>>> make much more sense to enable that first on ixgbe before you 
>>>>> start trying to find a way to feed the data to the VF.
>>>>
>>>> Let's agree on the next steps:
>>>>
>>>> 1. I'll reduce the series scope to 82599 and x540 devices.
>>>
>>> For mailbox implementation that is okay, my advice for the x550 
>>> would be to just read the registers.  They should already be defined 
>>> in the driver so it is just a matter of reversing the process for 
>>> writing the RETA.  You should have a solid grasp of that once you 
>>> implement the solution for the PF.
>>
>> The problem is that I have no means to validate the solution - I have 
>> only x540 devices (nor specs). I'd prefer not to work in darkness and 
>> let u, guys, do it...
>
> If it helps the specs for the x540 can be found at 
> http://www.intel.com/content/www/us/en/network-adapters/10-gigabit-network-adapters/ethernet-x540-datasheet.html. 
> Section 8 has a full list of the register definitions.

I meant I don't have x550 specs - I'm working against x540 spec quite 
intensively... ;)

>
> All the code you need should already be stored in ixgbe_setup_reta and 
> ixgbe_setup_vfreta.  If you do it right you shouldn't even need to 
> read the hardware.  Odds are you could generate it in software and 
> that is what you would be accessing on the PF.  After all, if at some 
> point you reset the hardware you would need to restore it from 
> software wouldn't you, and since the table is static for now you 
> should be able to calculate the value without need of reading a 
> register.  If at some point in the future you can change the table 
> then it means a copy has to be maintained in kernel memory and at that 
> point you could use that for the data you would be sending to the VF.
>
>>>
>>>> 4. I won't implement the caching of the indirection and RSS hash key
>>>>    query results in the VF level.
>>>
>>> The idea with caching is to keep the number of messages across the 
>>> mailbox to a minimum.  Compressing it down to 3 bits per entry will 
>>> help some, but I am not sure if that is enough to completely 
>>> mitigate the need for caching.  For now though the caching isn't a 
>>> must-have, and it won't be needed for x550 since it can access the 
>>> table directly.  The main thing I would recommend keeping an eye on 
>>> would be how long it will take to complete the messages necessary to 
>>> get the ethtool info.  If there are any scenarios where things take 
>>> more than 1 second then I would say we are taking too long and we 
>>> would have to look into caching because we want to avoid holding the 
>>> rtnl lock for any extended period of time.
>>
>> I've described the limitations the caching approach imposes above. I 
>> suggest we follow your approach here - solve the problems when they 
>> arrive. ;) I haven't succeeded to make it take as long as even near 
>> one second.
>>
>>>
>>> My advice is get the PF patches in first, then we can look at trying 
>>> to get feature parity on the VF.
>>
>> "feature parity on the VF"?
>
> Get "ethtool -x/X" working on the PF, and then get it working on the 
> VF.  That way it makes for very simple verification as you can inspect 
> that the PF and VF tables are consistent.  As I said if nothing else 
> you can probably take a look at the igb driver to see how this was 
> done on the 1Gbps Intel hardware.
>
>>
>> I'll send the patches later today. I'll incorporate the new PF code 
>> into the existing series.
>
> You might want to break things out into a couple series of patches. Do 
> the PF driver first just to make sure you have an understanding of 
> what you expect, think of it as a proof of concept if nothing else for 
> how you want the VF to function, then submit the VF series as a 
> separate patch set and include the mailbox API changes.
>
> - Alex
>

--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Vlad Zolotarov March 26, 2015, 4:29 p.m. UTC | #21
On 03/26/15 17:59, Alexander Duyck wrote:
>
> On 03/26/2015 08:10 AM, Vlad Zolotarov wrote:
>>
>>
>> On 03/26/15 16:40, Alexander Duyck wrote:
>>>
>>> On 03/26/2015 02:35 AM, Vlad Zolotarov wrote:
>>>>
>>>>
>>>> On 03/25/15 23:04, Alexander Duyck wrote:
>>>>>
>>>>> On 03/25/2015 01:17 PM, Vlad Zolotarov wrote:
>>>>>>
>>>>>>
>>>>>> On 03/25/15 20:35, Tantilov, Emil S wrote:
>>>>>>>> -----Original Message-----
>>>>>>>> From: Vlad Zolotarov [mailto:vladz@cloudius-systems.com]
>>>>>>>> Sent: Wednesday, March 25, 2015 2:28 AM
>>>>>>>> Subject: Re: [PATCH net-next v6 4/7] ixgbevf: Add a RETA query 
>>>>>>>> code
>>>>>>>
>>>>>>
>>>
>
> <snip>
>
>>>
>>>>
>>>>>
>>>>>>
>>>>>> The user should actually not query the indirection table and a 
>>>>>> hash key too often. And if he/she does - it should be his/her 
>>>>>> problem.
>>>>>> However, if like with the ixgbevf_set_num_queues() u insist on 
>>>>>> your way of doing this (on caching the indirection table and hash 
>>>>>> key) - then please let me know and I will add it. Because, 
>>>>>> frankly, I care about the PF part of this series much more than 
>>>>>> for the VF part... ;)
>>>>>
>>>>> I would say you don't need to cache it, but for 82599 and x540 
>>>>> there isn't any need to store more than 3 bits per entry, 384b, or 
>>>>> 12 DWORDs for the entire RETA of the VF since the hardware can 
>>>>> support at most 8 queues w/ SR-IOV. Then you only need one message 
>>>>> instead of 3 which will reduce quite a bit of the complication 
>>>>> with all of this.
>>>>>
>>>>> Also it might make more sense to start working on displaying this 
>>>>> on the PF before you start trying to do this on the VF. As far as 
>>>>> I know ixgbe still doesn't have this functionality and it would 
>>>>> make much more sense to enable that first on ixgbe before you 
>>>>> start trying to find a way to feed the data to the VF.
>>>>
>>>> Let's agree on the next steps:
>>>>
>>>> 1. I'll reduce the series scope to 82599 and x540 devices.
>>>
>>> For mailbox implementation that is okay, my advice for the x550 
>>> would be to just read the registers.  They should already be defined 
>>> in the driver so it is just a matter of reversing the process for 
>>> writing the RETA.  You should have a solid grasp of that once you 
>>> implement the solution for the PF.
>>
>> The problem is that I have no means to validate the solution - I have 
>> only x540 devices (nor specs). I'd prefer not to work in darkness and 
>> let u, guys, do it...
>
> If it helps the specs for the x540 can be found at 
> http://www.intel.com/content/www/us/en/network-adapters/10-gigabit-network-adapters/ethernet-x540-datasheet.html. 
> Section 8 has a full list of the register definitions.
>
> All the code you need should already be stored in ixgbe_setup_reta and 
> ixgbe_setup_vfreta.  If you do it right you shouldn't even need to 
> read the hardware.  Odds are you could generate it in software and 
> that is what you would be accessing on the PF. 

I have no problems implementing the PF x550 part. I would need the 
proper spec to implement the VF part though. And since I can't have it 
now I'd rather not to it at the moment... ;)


Sure. This is what I have already implemented. The same is for RSS hash 
key.

> After all, if at some point you reset the hardware you would need to 
> restore it from software wouldn't you, and since the table is static 
> for now you should be able to calculate the value without need of 
> reading a register.  If at some point in the future you can change the 
> table then it means a copy has to be maintained in kernel memory and 
> at that point you could use that for the data you would be sending to 
> the VF.

Right.

>
>>>
>>>> 4. I won't implement the caching of the indirection and RSS hash key
>>>>    query results in the VF level.
>>>
>>> The idea with caching is to keep the number of messages across the 
>>> mailbox to a minimum.  Compressing it down to 3 bits per entry will 
>>> help some, but I am not sure if that is enough to completely 
>>> mitigate the need for caching.  For now though the caching isn't a 
>>> must-have, and it won't be needed for x550 since it can access the 
>>> table directly.  The main thing I would recommend keeping an eye on 
>>> would be how long it will take to complete the messages necessary to 
>>> get the ethtool info.  If there are any scenarios where things take 
>>> more than 1 second then I would say we are taking too long and we 
>>> would have to look into caching because we want to avoid holding the 
>>> rtnl lock for any extended period of time.
>>
>> I've described the limitations the caching approach imposes above. I 
>> suggest we follow your approach here - solve the problems when they 
>> arrive. ;) I haven't succeeded to make it take as long as even near 
>> one second.
>>
>>>
>>> My advice is get the PF patches in first, then we can look at trying 
>>> to get feature parity on the VF.
>>
>> "feature parity on the VF"?
>
> Get "ethtool -x/X" working on the PF, and then get it working on the 
> VF.  That way it makes for very simple verification as you can inspect 
> that the PF and VF tables are consistent.  As I said if nothing else 
> you can probably take a look at the igb driver to see how this was 
> done on the 1Gbps Intel hardware.

If memory serves me well I've written the ethtool -x|-X implementation 
for bnx2x once so I have a general idea of what there should be. ;)
Making ethtool -X work is a bit more sensitive since a simplistic 
implementation like u have in igb may lead to a momentary out-of-order 
packet arrival in the context of the same stream - the packet that 
arrives later on wire may be served earlier since there may still be 
unhandled packets on the "old" RSS queue when it arrives. This is unless 
updating the RSS indirection table registers ensures the fast path 
queues flushing.

The proper solution should ensure the fast path queues draining before 
updating the indirection table. In any case, I'd prefer not to implement 
the -X option and concentrate on -x option only.

>
>>
>> I'll send the patches later today. I'll incorporate the new PF code 
>> into the existing series.
>
> You might want to break things out into a couple series of patches. Do 
> the PF driver first just to make sure you have an understanding of 
> what you expect, think of it as a proof of concept if nothing else for 
> how you want the VF to function, then submit the VF series as a 
> separate patch set and include the mailbox API changes.

Sound reasonable.

>
> - Alex
>

--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Vlad Zolotarov March 26, 2015, 6:10 p.m. UTC | #22
On 03/25/15 23:04, Alexander Duyck wrote:
>
> On 03/25/2015 01:17 PM, Vlad Zolotarov wrote:
>>
>>
>> On 03/25/15 20:35, Tantilov, Emil S wrote:
>>>> -----Original Message-----
>>>> From: Vlad Zolotarov [mailto:vladz@cloudius-systems.com]
>>>> Sent: Wednesday, March 25, 2015 2:28 AM
>>>> Subject: Re: [PATCH net-next v6 4/7] ixgbevf: Add a RETA query code
>>> <snip>
>>>
>>>>> Have you tested what happens if you run:
>>>>>
>>>>> while true
>>>>> do
>>>>>     ethtool --show-rxfh-indir ethX
>>>>> done
>>>>>
>>>>> in the background while passing traffic through the VF?
>>>> I understand your concerns but let's start with clarifying a few 
>>>> things.
>>>> First, VF driver is by definition not trusted. If it (or its user)
>>>> decides to do anything malicious (like u proposed above) that would
>>>> eventually hurt (only this) VF's performance - nobody should care.
>>>> However the right question here would be: "How the above use case may
>>>> hurt the corresponding PF or other VFs' performance?" And since the
>>>> mailbox operation involves quite a few MMIO writes and reads this may
>>>> slow the PF quite a bit and this may be a problem that should be taken
>>>> care of. However it wasn't my patch series that have introduced it. 
>>>> The
>>>> same problem would arise if Guest would change VF's MAC address in a
>>>> tight loop like above. Namely any VF slow path operation that would
>>>> eventually cause the VF-PF channel transaction may be used to 
>>>> create an
>>>> attack on a PF.
>>> There are operations that can be disruptive to the VF I am not 
>>> arguing that,
>>> the issue introduced by these patches has mostly to do with the fact 
>>> that now
>>> we can hit the mailbox more often for what is mostly static 
>>> information.
>>>
>>> Especially with ethtool we already had to deal with an issue caused 
>>> by net-snmp:
>>> https://sourceforge.net/p/e1000/mailman/message/32188362/
>>>
>>> Where net-snmp was being too aggressive when collecting information, 
>>> even if most of it was static.
>>
>> Emil, I don't really understand what are u trying to protect here 
>> against. If a user would want to shoot him/herself in the leg - 
>> he/she would still be able to do it with the other mailbox involving 
>> operations like MAC change. So, what's the sense to add useless lines?
>>
>>>
>>>> Perhaps storing the RSS key and the table is better option than 
>>>> having to invoke the mailbox on every read.
>>>> I don't think this could work if I understand your proposal correctly.
>>>> The only way to cache the result that would decrease the number of 
>>>> mbox
>>>> transactions would be to cache it in the VF. But how could i 
>>>> invalidate
>>>> this cache if the table content has been changed by a PF? I think the
>>>> main source of a confusion here is that u assume that PF driver is a
>>>> Linux ixgbe driver that doesn't support an indirection table change at
>>>> the moment. As I have explained above - this should not be assumed.
>>> You keep mentioning other drivers - what other driver do you mean?
>>> All the PF drivers that enable SRIOV are maintained and supported by 
>>> Intel.
>>>
>>> For HW older than X550 we can simply not allow the RSS hash to be 
>>> modified if the driver is loaded in SRIOV mode.
>>> This way the RSS info can be read once the driver is loaded. For 
>>> X550 this can all be done in the VF, so you can avoid calling the 
>>> mailbox altogether.
>>> I understand this is a bit limiting, but this is due to HW 
>>> limitation anyway (VFs do not have their own RSS config).
>>
>> Let me remind u that Linux, FreeBSD, XEN  and DPDK PF drivers are all 
>> open source so u can't actually go and "not allow" things. ;) And 
>> although Intel developers contribute most of the code there are and 
>> will be other contributors too so I doubt the proposed above approach 
>> fits the open source spirit well. ;)
>
> Actually these drivers already support multiple OSes just fine. The 
> part where I think you are confused is that you assume they all use 
> the same Mailbox API which they likely wouldn't.  I would suggest 
> taking a look at ixgbe_pfvf_api_rev in mbx.h of the VF driver. 
> Different OSes have different things that can be supported, so for 
> example the ixgbe_mbox_api_20 is reserved for a Solaris based PF/VF 
> combination.  I would suspect that FreeBSD will likely have to conform 
> to the existing APIs, or report that it only supports a different 
> version of the mailbox API.
>
>>
>> The user should actually not query the indirection table and a hash 
>> key too often. And if he/she does - it should be his/her problem.
>> However, if like with the ixgbevf_set_num_queues() u insist on your 
>> way of doing this (on caching the indirection table and hash key) - 
>> then please let me know and I will add it. Because, frankly, I care 
>> about the PF part of this series much more than for the VF part... ;)
>
> I would say you don't need to cache it, but for 82599 and x540 there 
> isn't any need to store more than 3 bits per entry, 384b, or 12 DWORDs 
> for the entire RETA of the VF since the hardware can support at most 8 
> queues w/ SR-IOV.

Isn't it 2 bits? VF may have up to 8 queues in total but it supports up 
to 4 RSS queues (according to MRQC.MRQE and PSRTYPE[n].RQPL description...

> Then you only need one message instead of 3 which will reduce quite a 
> bit of the complication with all of this.
>
> Also it might make more sense to start working on displaying this on 
> the PF before you start trying to do this on the VF.  As far as I know 
> ixgbe still doesn't have this functionality and it would make much 
> more sense to enable that first on ixgbe before you start trying to 
> find a way to feed the data to the VF.
>
> - Alex

--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Alexander Duyck March 26, 2015, 6:25 p.m. UTC | #23
On 03/26/2015 11:10 AM, Vlad Zolotarov wrote:
>
>
> On 03/25/15 23:04, Alexander Duyck wrote:
>>
>> On 03/25/2015 01:17 PM, Vlad Zolotarov wrote:
>>>
>>>
>>> On 03/25/15 20:35, Tantilov, Emil S wrote:
>>>>> -----Original Message-----
>>>>> From: Vlad Zolotarov [mailto:vladz@cloudius-systems.com]
>>>>> Sent: Wednesday, March 25, 2015 2:28 AM
>>>>> Subject: Re: [PATCH net-next v6 4/7] ixgbevf: Add a RETA query code
>>>> <snip>
>>>>
>>>>>> Have you tested what happens if you run:
>>>>>>
>>>>>> while true
>>>>>> do
>>>>>>     ethtool --show-rxfh-indir ethX
>>>>>> done
>>>>>>
>>>>>> in the background while passing traffic through the VF?
>>>>> I understand your concerns but let's start with clarifying a few 
>>>>> things.
>>>>> First, VF driver is by definition not trusted. If it (or its user)
>>>>> decides to do anything malicious (like u proposed above) that would
>>>>> eventually hurt (only this) VF's performance - nobody should care.
>>>>> However the right question here would be: "How the above use case may
>>>>> hurt the corresponding PF or other VFs' performance?" And since the
>>>>> mailbox operation involves quite a few MMIO writes and reads this may
>>>>> slow the PF quite a bit and this may be a problem that should be 
>>>>> taken
>>>>> care of. However it wasn't my patch series that have introduced 
>>>>> it. The
>>>>> same problem would arise if Guest would change VF's MAC address in a
>>>>> tight loop like above. Namely any VF slow path operation that would
>>>>> eventually cause the VF-PF channel transaction may be used to 
>>>>> create an
>>>>> attack on a PF.
>>>> There are operations that can be disruptive to the VF I am not 
>>>> arguing that,
>>>> the issue introduced by these patches has mostly to do with the 
>>>> fact that now
>>>> we can hit the mailbox more often for what is mostly static 
>>>> information.
>>>>
>>>> Especially with ethtool we already had to deal with an issue caused 
>>>> by net-snmp:
>>>> https://sourceforge.net/p/e1000/mailman/message/32188362/
>>>>
>>>> Where net-snmp was being too aggressive when collecting 
>>>> information, even if most of it was static.
>>>
>>> Emil, I don't really understand what are u trying to protect here 
>>> against. If a user would want to shoot him/herself in the leg - 
>>> he/she would still be able to do it with the other mailbox involving 
>>> operations like MAC change. So, what's the sense to add useless lines?
>>>
>>>>
>>>>> Perhaps storing the RSS key and the table is better option than 
>>>>> having to invoke the mailbox on every read.
>>>>> I don't think this could work if I understand your proposal 
>>>>> correctly.
>>>>> The only way to cache the result that would decrease the number of 
>>>>> mbox
>>>>> transactions would be to cache it in the VF. But how could i 
>>>>> invalidate
>>>>> this cache if the table content has been changed by a PF? I think the
>>>>> main source of a confusion here is that u assume that PF driver is a
>>>>> Linux ixgbe driver that doesn't support an indirection table 
>>>>> change at
>>>>> the moment. As I have explained above - this should not be assumed.
>>>> You keep mentioning other drivers - what other driver do you mean?
>>>> All the PF drivers that enable SRIOV are maintained and supported 
>>>> by Intel.
>>>>
>>>> For HW older than X550 we can simply not allow the RSS hash to be 
>>>> modified if the driver is loaded in SRIOV mode.
>>>> This way the RSS info can be read once the driver is loaded. For 
>>>> X550 this can all be done in the VF, so you can avoid calling the 
>>>> mailbox altogether.
>>>> I understand this is a bit limiting, but this is due to HW 
>>>> limitation anyway (VFs do not have their own RSS config).
>>>
>>> Let me remind u that Linux, FreeBSD, XEN  and DPDK PF drivers are 
>>> all open source so u can't actually go and "not allow" things. ;) 
>>> And although Intel developers contribute most of the code there are 
>>> and will be other contributors too so I doubt the proposed above 
>>> approach fits the open source spirit well. ;)
>>
>> Actually these drivers already support multiple OSes just fine. The 
>> part where I think you are confused is that you assume they all use 
>> the same Mailbox API which they likely wouldn't.  I would suggest 
>> taking a look at ixgbe_pfvf_api_rev in mbx.h of the VF driver. 
>> Different OSes have different things that can be supported, so for 
>> example the ixgbe_mbox_api_20 is reserved for a Solaris based PF/VF 
>> combination.  I would suspect that FreeBSD will likely have to 
>> conform to the existing APIs, or report that it only supports a 
>> different version of the mailbox API.
>>
>>>
>>> The user should actually not query the indirection table and a hash 
>>> key too often. And if he/she does - it should be his/her problem.
>>> However, if like with the ixgbevf_set_num_queues() u insist on your 
>>> way of doing this (on caching the indirection table and hash key) - 
>>> then please let me know and I will add it. Because, frankly, I care 
>>> about the PF part of this series much more than for the VF part... ;)
>>
>> I would say you don't need to cache it, but for 82599 and x540 there 
>> isn't any need to store more than 3 bits per entry, 384b, or 12 
>> DWORDs for the entire RETA of the VF since the hardware can support 
>> at most 8 queues w/ SR-IOV.
>
> Isn't it 2 bits? VF may have up to 8 queues in total but it supports 
> up to 4 RSS queues (according to MRQC.MRQE and PSRTYPE[n].RQPL 
> description...

You might be right.  I was thinking that there was a mode with 16 pools, 
each with 8 RSS queues per pool, however it looks like that mode is 
Virtualization w/ DCB only.

- Alex

--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
diff mbox

Patch

diff --git a/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c b/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c
index 4ee15ad..4787fcf 100644
--- a/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c
+++ b/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c
@@ -2030,7 +2030,8 @@  static void ixgbevf_init_last_counter_stats(struct ixgbevf_adapter *adapter)
 static void ixgbevf_negotiate_api(struct ixgbevf_adapter *adapter)
 {
 	struct ixgbe_hw *hw = &adapter->hw;
-	int api[] = { ixgbe_mbox_api_11,
+	int api[] = { ixgbe_mbox_api_12,
+		      ixgbe_mbox_api_11,
 		      ixgbe_mbox_api_10,
 		      ixgbe_mbox_api_unknown };
 	int err = 0, idx = 0;
@@ -3712,6 +3713,7 @@  static int ixgbevf_change_mtu(struct net_device *netdev, int new_mtu)
 
 	switch (adapter->hw.api_version) {
 	case ixgbe_mbox_api_11:
+	case ixgbe_mbox_api_12:
 		max_possible_frame = IXGBE_MAX_JUMBO_FRAME_SIZE;
 		break;
 	default:
diff --git a/drivers/net/ethernet/intel/ixgbevf/mbx.h b/drivers/net/ethernet/intel/ixgbevf/mbx.h
index 6253e93..7342554 100644
--- a/drivers/net/ethernet/intel/ixgbevf/mbx.h
+++ b/drivers/net/ethernet/intel/ixgbevf/mbx.h
@@ -83,6 +83,7 @@  enum ixgbe_pfvf_api_rev {
 	ixgbe_mbox_api_10,	/* API version 1.0, linux/freebsd VF driver */
 	ixgbe_mbox_api_20,	/* API version 2.0, solaris Phase1 VF driver */
 	ixgbe_mbox_api_11,	/* API version 1.1, linux/freebsd VF driver */
+	ixgbe_mbox_api_12,	/* API version 1.2, linux/freebsd VF driver */
 	/* This value should always be last */
 	ixgbe_mbox_api_unknown,	/* indicates that API version is not known */
 };
@@ -107,6 +108,13 @@  enum ixgbe_pfvf_api_rev {
 #define IXGBE_VF_TRANS_VLAN	3 /* Indication of port VLAN */
 #define IXGBE_VF_DEF_QUEUE	4 /* Default queue offset */
 
+/* mailbox API, version 1.2 VF requests */
+#define IXGBE_VF_GET_RETA	0x0a	/* VF request for RETA */
+
+/* GET_RETA request data indices within the mailbox */
+#define IXGBE_VF_RETA_SZ        1	/* Number of RETA DWs to bring */
+#define IXGBE_VF_RETA_OFFSET    2	/* Offset in RETA */
+
 /* length of permanent address message returned from PF */
 #define IXGBE_VF_PERMADDR_MSG_LEN	4
 /* word in permanent address message with the current multicast type */
diff --git a/drivers/net/ethernet/intel/ixgbevf/vf.c b/drivers/net/ethernet/intel/ixgbevf/vf.c
index 2614fd3..f31e097 100644
--- a/drivers/net/ethernet/intel/ixgbevf/vf.c
+++ b/drivers/net/ethernet/intel/ixgbevf/vf.c
@@ -256,6 +256,97 @@  static s32 ixgbevf_set_uc_addr_vf(struct ixgbe_hw *hw, u32 index, u8 *addr)
 	return ret_val;
 }
 
+static inline int _ixgbevf_get_reta(struct ixgbe_hw *hw, u32 *msgbuf,
+				    u32 *reta, u32 reta_offset_dw, u32 dwords)
+{
+	int err;
+
+	msgbuf[0]                    = IXGBE_VF_GET_RETA;
+	msgbuf[IXGBE_VF_RETA_SZ]     = dwords;
+	msgbuf[IXGBE_VF_RETA_OFFSET] = reta_offset_dw;
+
+	err = hw->mbx.ops.write_posted(hw, msgbuf, 3);
+
+	if (err)
+		return err;
+
+	err = hw->mbx.ops.read_posted(hw, msgbuf, 1 + dwords);
+
+	if (err)
+		return err;
+
+	msgbuf[0] &= ~IXGBE_VT_MSGTYPE_CTS;
+
+	/* If the operation has been refused by a PF return -EPERM */
+	if (msgbuf[0] == (IXGBE_VF_GET_RETA | IXGBE_VT_MSGTYPE_NACK))
+		return -EPERM;
+
+	/* If we didn't get an ACK there must have been
+	 * some sort of mailbox error so we should treat it
+	 * as such.
+	 */
+	if (msgbuf[0] != (IXGBE_VF_GET_RETA | IXGBE_VT_MSGTYPE_ACK))
+		return IXGBE_ERR_MBX;
+
+	memcpy(reta + reta_offset_dw, msgbuf + 1, 4 * dwords);
+
+	return 0;
+}
+
+/**
+ * ixgbevf_get_reta - get the RSS redirection table (RETA) contents.
+ * @hw: pointer to the HW structure
+ * @reta: buffer to fill with RETA contents.
+ *
+ * The "reta" buffer should be big enough to contain 32 registers.
+ *
+ * Returns: 0 on success.
+ *          if API doesn't support this operation - (-EPERM).
+ */
+int ixgbevf_get_reta(struct ixgbe_hw *hw, u32 *reta)
+{
+	int err;
+	u32 msgbuf[IXGBE_VFMAILBOX_SIZE];
+
+	/* Return an error if API doesn't RETA querying. */
+	if (hw->api_version != ixgbe_mbox_api_12)
+		return -EPERM;
+
+	/* x550 devices have a separate RETA for each VF: 64 bytes each.
+	 *
+	 * We'll get it in 2 steps due to mailbox size limitation - we can bring
+	 * up to 15 dwords every time. Therefore we'll bring 12 and 4 dwords.
+	 *
+	 * Older devices share a RETA table with the PF: 128 bytes.
+	 *
+	 * For them we do it in 3 steps. Therefore we'll bring it in 3 steps:
+	 * 12, 12 and 8 dwords in each step correspondingly.
+	 */
+
+	/* RETA[0..11] */
+	err = _ixgbevf_get_reta(hw, msgbuf, reta, 0, 12);
+	if (err)
+		return err;
+
+	if (hw->mac.type >= ixgbe_mac_X550_vf) {
+		/* RETA[12..15] */
+		err = _ixgbevf_get_reta(hw, msgbuf, reta, 12, 4);
+		if (err)
+			return err;
+
+	} else {
+		/* RETA[12..23] */
+		err = _ixgbevf_get_reta(hw, msgbuf, reta, 12, 12);
+		if (err)
+			return err;
+
+		/* RETA[24..31] */
+		err = _ixgbevf_get_reta(hw, msgbuf, reta, 24, 8);
+	}
+
+	return err;
+}
+
 /**
  *  ixgbevf_set_rar_vf - set device MAC address
  *  @hw: pointer to hardware structure
@@ -545,6 +636,7 @@  int ixgbevf_get_queues(struct ixgbe_hw *hw, unsigned int *num_tcs,
 	/* do nothing if API doesn't support ixgbevf_get_queues */
 	switch (hw->api_version) {
 	case ixgbe_mbox_api_11:
+	case ixgbe_mbox_api_12:
 		break;
 	default:
 		return 0;
diff --git a/drivers/net/ethernet/intel/ixgbevf/vf.h b/drivers/net/ethernet/intel/ixgbevf/vf.h
index 6688250..89bc88e 100644
--- a/drivers/net/ethernet/intel/ixgbevf/vf.h
+++ b/drivers/net/ethernet/intel/ixgbevf/vf.h
@@ -210,4 +210,5 @@  void ixgbevf_rlpml_set_vf(struct ixgbe_hw *hw, u16 max_size);
 int ixgbevf_negotiate_api_version(struct ixgbe_hw *hw, int api);
 int ixgbevf_get_queues(struct ixgbe_hw *hw, unsigned int *num_tcs,
 		       unsigned int *default_tc);
+int ixgbevf_get_reta(struct ixgbe_hw *hw, u32 *reta);
 #endif /* __IXGBE_VF_H__ */