diff mbox series

[net-next,v3,3/5] net: Enable Tx queue selection based on Rx queues

Message ID 152818788079.20862.16138764303449301927.stgit@anamdev.jf.intel.com
State Deferred, archived
Delegated to: David Miller
Headers show
Series Symmetric queue selection using XPS for Rx queues | expand

Commit Message

Nambiar, Amritha June 5, 2018, 8:38 a.m. UTC
This patch adds support to pick Tx queue based on the Rx queue(s) map
configuration set by the admin through the sysfs attribute
for each Tx queue. If the user configuration for receive queue(s) map
does not apply, then the Tx queue selection falls back to CPU(s) map
based selection and finally to hashing.

Signed-off-by: Amritha Nambiar <amritha.nambiar@intel.com>
Signed-off-by: Sridhar Samudrala <sridhar.samudrala@intel.com>
---
 include/net/busy_poll.h |    3 ++
 include/net/sock.h      |   14 +++++++++++
 net/core/dev.c          |   60 ++++++++++++++++++++++++++++++++---------------
 net/core/sock.c         |    4 +++
 net/ipv4/tcp_input.c    |    3 ++
 5 files changed, 65 insertions(+), 19 deletions(-)

Comments

Willem de Bruijn June 6, 2018, 6:56 p.m. UTC | #1
On Tue, Jun 5, 2018 at 4:38 AM, Amritha Nambiar
<amritha.nambiar@intel.com> wrote:
> This patch adds support to pick Tx queue based on the Rx queue(s) map
> configuration set by the admin through the sysfs attribute
> for each Tx queue. If the user configuration for receive queue(s) map
> does not apply, then the Tx queue selection falls back to CPU(s) map
> based selection and finally to hashing.
>
> Signed-off-by: Amritha Nambiar <amritha.nambiar@intel.com>
> Signed-off-by: Sridhar Samudrala <sridhar.samudrala@intel.com>
> ---

>  int sysctl_tcp_max_orphans __read_mostly = NR_FILE;
>
> @@ -5574,6 +5575,7 @@ void tcp_finish_connect(struct sock *sk, struct sk_buff *skb)
>         if (skb) {
>                 icsk->icsk_af_ops->sk_rx_dst_set(sk, skb);
>                 security_inet_conn_established(sk, skb);
> +               sk_mark_napi_id(sk, skb);
>         }

This and the call below should be in a standalone patch, as the mark
changes are not rxq-xps specific. Is the additional earlier marking really
required?

I would separate out the entire rxq caching with sk_rx_queue_mapping from
the refactoring of get_xps_queue. The two changes are quite independent.

>         tcp_init_transfer(sk, BPF_SOCK_OPS_ACTIVE_ESTABLISHED_CB);
> @@ -6402,6 +6404,7 @@ int tcp_conn_request(struct request_sock_ops *rsk_ops,
>         tcp_rsk(req)->snt_isn = isn;
>         tcp_rsk(req)->txhash = net_tx_rndhash();
>         tcp_openreq_init_rwin(req, sk, dst);
> +       sk_mark_rx_queue(req_to_sk(req), skb);
Samudrala, Sridhar June 6, 2018, 7:08 p.m. UTC | #2
On 6/6/2018 11:56 AM, Willem de Bruijn wrote:
> On Tue, Jun 5, 2018 at 4:38 AM, Amritha Nambiar
> <amritha.nambiar@intel.com> wrote:
>> This patch adds support to pick Tx queue based on the Rx queue(s) map
>> configuration set by the admin through the sysfs attribute
>> for each Tx queue. If the user configuration for receive queue(s) map
>> does not apply, then the Tx queue selection falls back to CPU(s) map
>> based selection and finally to hashing.
>>
>> Signed-off-by: Amritha Nambiar <amritha.nambiar@intel.com>
>> Signed-off-by: Sridhar Samudrala <sridhar.samudrala@intel.com>
>> ---
>>   int sysctl_tcp_max_orphans __read_mostly = NR_FILE;
>>
>> @@ -5574,6 +5575,7 @@ void tcp_finish_connect(struct sock *sk, struct sk_buff *skb)
>>          if (skb) {
>>                  icsk->icsk_af_ops->sk_rx_dst_set(sk, skb);
>>                  security_inet_conn_established(sk, skb);
>> +               sk_mark_napi_id(sk, skb);
>>          }
> This and the call below should be in a standalone patch, as the mark
> changes are not rxq-xps specific. Is the additional earlier marking really
> required?

The additional earlier marking in tcp_finish_connect() allows a client app to do
SO_INCOMING_NAPI_ID after a a connect() call to get the right queue association
for a socket.

The marking in tcp_conn_request() allows syn-ack to go on the right tx-queue
associated with the queue on which syn is received.


>
> I would separate out the entire rxq caching with sk_rx_queue_mapping from
> the refactoring of get_xps_queue. The two changes are quite independent.
>
>>          tcp_init_transfer(sk, BPF_SOCK_OPS_ACTIVE_ESTABLISHED_CB);
>> @@ -6402,6 +6404,7 @@ int tcp_conn_request(struct request_sock_ops *rsk_ops,
>>          tcp_rsk(req)->snt_isn = isn;
>>          tcp_rsk(req)->txhash = net_tx_rndhash();
>>          tcp_openreq_init_rwin(req, sk, dst);
>> +       sk_mark_rx_queue(req_to_sk(req), skb);
Willem de Bruijn June 6, 2018, 7:13 p.m. UTC | #3
On Wed, Jun 6, 2018 at 3:08 PM, Samudrala, Sridhar
<sridhar.samudrala@intel.com> wrote:
>
> On 6/6/2018 11:56 AM, Willem de Bruijn wrote:
>>
>> On Tue, Jun 5, 2018 at 4:38 AM, Amritha Nambiar
>> <amritha.nambiar@intel.com> wrote:
>>>
>>> This patch adds support to pick Tx queue based on the Rx queue(s) map
>>> configuration set by the admin through the sysfs attribute
>>> for each Tx queue. If the user configuration for receive queue(s) map
>>> does not apply, then the Tx queue selection falls back to CPU(s) map
>>> based selection and finally to hashing.
>>>
>>> Signed-off-by: Amritha Nambiar <amritha.nambiar@intel.com>
>>> Signed-off-by: Sridhar Samudrala <sridhar.samudrala@intel.com>
>>> ---
>>>   int sysctl_tcp_max_orphans __read_mostly = NR_FILE;
>>>
>>> @@ -5574,6 +5575,7 @@ void tcp_finish_connect(struct sock *sk, struct
>>> sk_buff *skb)
>>>          if (skb) {
>>>                  icsk->icsk_af_ops->sk_rx_dst_set(sk, skb);
>>>                  security_inet_conn_established(sk, skb);
>>> +               sk_mark_napi_id(sk, skb);
>>>          }
>>
>> This and the call below should be in a standalone patch, as the mark
>> changes are not rxq-xps specific. Is the additional earlier marking really
>> required?
>
>
> The additional earlier marking in tcp_finish_connect() allows a client app
> to do
> SO_INCOMING_NAPI_ID after a a connect() call to get the right queue
> association
> for a socket.
>
> The marking in tcp_conn_request() allows syn-ack to go on the right tx-queue
> associated with the queue on which syn is received.

I understand the intent. My question really is whether it is needed.
Marking has been slightly lossy in this regard in the past, not
necessarily as an oversight. I don't mean to make that call here,
but it's worth discussion and its own patch.
Nambiar, Amritha June 6, 2018, 10:52 p.m. UTC | #4
On 6/5/2018 10:57 AM, Tom Herbert wrote:
> 
> 
> On Tue, Jun 5, 2018 at 1:38 AM, Amritha Nambiar
> <amritha.nambiar@intel.com <mailto:amritha.nambiar@intel.com>> wrote:
> 
>     This patch adds support to pick Tx queue based on the Rx queue(s) map
>     configuration set by the admin through the sysfs attribute
>     for each Tx queue. If the user configuration for receive queue(s) map
>     does not apply, then the Tx queue selection falls back to CPU(s) map
>     based selection and finally to hashing.
> 
>     Signed-off-by: Amritha Nambiar <amritha.nambiar@intel.com
>     <mailto:amritha.nambiar@intel.com>>
>     Signed-off-by: Sridhar Samudrala <sridhar.samudrala@intel.com
>     <mailto:sridhar.samudrala@intel.com>>
>     ---
>      include/net/busy_poll.h |    3 ++
>      include/net/sock.h      |   14 +++++++++++
>      net/core/dev.c          |   60
>     ++++++++++++++++++++++++++++++++---------------
>      net/core/sock.c         |    4 +++
>      net/ipv4/tcp_input.c    |    3 ++
>      5 files changed, 65 insertions(+), 19 deletions(-)
> 
>     diff --git a/include/net/busy_poll.h b/include/net/busy_poll.h
>     index 71c72a9..fc4fb68 100644
>     --- a/include/net/busy_poll.h
>     +++ b/include/net/busy_poll.h
>     @@ -136,6 +136,9 @@ static inline void sk_mark_napi_id(struct sock
>     *sk, const struct sk_buff *skb)
>      #ifdef CONFIG_NET_RX_BUSY_POLL
>             sk->sk_napi_id = skb->napi_id;
>      #endif
>     +#ifdef CONFIG_XPS
>     +       sk->sk_rx_queue_mapping = skb_get_rx_queue(skb);
>     +#endif
>      }
> 
>      /* variant used for unconnected sockets */
>     diff --git a/include/net/sock.h b/include/net/sock.h
>     index 4f7c584..12313653 100644
>     --- a/include/net/sock.h
>     +++ b/include/net/sock.h
>     @@ -139,6 +139,7 @@ typedef __u64 __bitwise __addrpair;
>       *     @skc_node: main hash linkage for various protocol lookup tables
>       *     @skc_nulls_node: main hash linkage for TCP/UDP/UDP-Lite protocol
>       *     @skc_tx_queue_mapping: tx queue number for this connection
>     + *     @skc_rx_queue_mapping: rx queue number for this connection
>       *     @skc_flags: place holder for sk_flags
>       *             %SO_LINGER (l_onoff), %SO_BROADCAST, %SO_KEEPALIVE,
>       *             %SO_OOBINLINE settings, %SO_TIMESTAMPING settings
>     @@ -215,6 +216,9 @@ struct sock_common {
>                     struct hlist_nulls_node skc_nulls_node;
>             };
>             int                     skc_tx_queue_mapping;
>     +#ifdef CONFIG_XPS
>     +       int                     skc_rx_queue_mapping;
> 
> 
> This is still expensive cost to be adding an int field into sock_common
> for a relatively rare use case. Maybe there should be a CONFIG_XPS_RQS?
> Or maybe skc_tx_queue_mapping and skc_rx_queue_mapping could be shorts
> (so maximum queue mapping would then be 2^16-2).

Thanks for the review, Tom. I will fix up the code incorporating all
your feedback in the next version (v4). I could have a new config option
CONFIG_XPS_RXQS that would be default off, in addition to the CONFIG_XPS
option that's already there. With changing the 'skc_tx_queue_mapping' to
short, my concern is that the change would become extensive, there are a
lot of places where this gets filled with int or u32 values.

> 
>     +#endif
>             union {
>                     int             skc_incoming_cpu;
>                     u32             skc_rcv_wnd;
>     @@ -326,6 +330,9 @@ struct sock {
>      #define sk_nulls_node          __sk_common.skc_nulls_node
>      #define sk_refcnt              __sk_common.skc_refcnt
>      #define sk_tx_queue_mapping    __sk_common.skc_tx_queue_mapping
>     +#ifdef CONFIG_XPS
>     +#define sk_rx_queue_mapping    __sk_common.skc_rx_queue_mapping
>     +#endif
> 
>      #define sk_dontcopy_begin      __sk_common.skc_dontcopy_begin
>      #define sk_dontcopy_end                __sk_common.skc_dontcopy_end
>     @@ -1696,6 +1703,13 @@ static inline int sk_tx_queue_get(const
>     struct sock *sk)
>             return sk ? sk->sk_tx_queue_mapping : -1;
>      }
> 
>     +static inline void sk_mark_rx_queue(struct sock *sk, struct sk_buff
>     *skb)
>     +{
>     +#ifdef CONFIG_XPS
>     +       sk->sk_rx_queue_mapping = skb_get_rx_queue(skb);
>     +#endif
>     +}
>     +
>      static inline void sk_set_socket(struct sock *sk, struct socket *sock)
>      {
>             sk_tx_queue_clear(sk);
>     diff --git a/net/core/dev.c b/net/core/dev.c
>     index bba755f..1880e6c 100644
>     --- a/net/core/dev.c
>     +++ b/net/core/dev.c
>     @@ -3479,36 +3479,58 @@ sch_handle_egress(struct sk_buff *skb, int
>     *ret, struct net_device *dev)
>      }
>      #endif /* CONFIG_NET_EGRESS */
> 
>     -static inline int get_xps_queue(struct net_device *dev, struct
>     sk_buff *skb)
>     +#ifdef CONFIG_XPS
>     +static int __get_xps_queue_idx(struct net_device *dev, struct
>     sk_buff *skb,
>     +                              struct xps_dev_maps *dev_maps,
>     unsigned int tci)
>     +{
>     +       struct xps_map *map;
>     +       int queue_index = -1;
>     +
>     +       if (dev->num_tc) {
>     +               tci *= dev->num_tc;
>     +               tci += netdev_get_prio_tc_map(dev, skb->priority);
>     +       }
>     +
>     +       map = rcu_dereference(dev_maps->attr_map[tci]);
>     +       if (map) {
>     +               if (map->len == 1)
>     +                       queue_index = map->queues[0];
>     +               else
>     +                       queue_index = map->queues[reciprocal_scale(
>     +                                               skb_get_hash(skb),
>     map->len)];
>     +               if (unlikely(queue_index >= dev->real_num_tx_queues))
>     +                       queue_index = -1;
>     +       }
>     +       return queue_index;
>     +}
>     +#endif
>     +
>     +static int get_xps_queue(struct net_device *dev, struct sk_buff *skb)
>      {
>      #ifdef CONFIG_XPS
>             struct xps_dev_maps *dev_maps;
>     -       struct xps_map *map;
>     +       struct sock *sk = skb->sk;
>             int queue_index = -1;
>     +       unsigned int tci = 0;
> 
>             if (!static_key_false(&xps_needed))
>                     return -1;
> 
>     +       if (sk && sk->sk_rx_queue_mapping <= dev->num_rx_queues)
>     +               tci = sk->sk_rx_queue_mapping;
> 
> 
> This is only be needed if xps_rxqs_map is not null so it should be in
> the block below.
>  
> 
>     +
>             rcu_read_lock();
>     -       dev_maps = rcu_dereference(dev->xps_cpus_map);
>     -       if (dev_maps) {
>     -               unsigned int tci = skb->sender_cpu - 1;
>     +       dev_maps = rcu_dereference(dev->xps_rxqs_map);
>     +       if (dev_maps)
>     +               queue_index = __get_xps_queue_idx(dev, skb,
>     dev_maps, tci);
> 
>     -               if (dev->num_tc) {
>     -                       tci *= dev->num_tc;
>     -                       tci += netdev_get_prio_tc_map(dev,
>     skb->priority);
>     -               }
> 
>     -               map = rcu_dereference(dev_maps->attr_map[tci]);
>     -               if (map) {
>     -                       if (map->len == 1)
>     -                               queue_index = map->queues[0];
>     -                       else
>     -                               queue_index =
>     map->queues[reciprocal_scale(skb_get_hash(skb),
>     -                                                                   
>           map->len)];
>     -                       if (unlikely(queue_index >=
>     dev->real_num_tx_queues))
>     -                               queue_index = -1;
>     -               }
>     +       if (queue_index < 0) {
>     +               tci = skb->sender_cpu - 1;
>     +               dev_maps = rcu_dereference(dev->xps_cpus_map);
>     +               if (dev_maps)
>     +                       queue_index = __get_xps_queue_idx(dev, skb,
>     dev_maps,
>     +                                                         tci);
>             }
>             rcu_read_unlock();
> 
>     diff --git a/net/core/sock.c b/net/core/sock.c
>     index 435a0ba..3c10d31 100644
>     --- a/net/core/sock.c
>     +++ b/net/core/sock.c
>     @@ -2824,6 +2824,10 @@ void sock_init_data(struct socket *sock,
>     struct sock *sk)
>             sk->sk_pacing_rate = ~0U;
>             sk->sk_pacing_shift = 10;
>             sk->sk_incoming_cpu = -1;
>     +
>     +#ifdef CONFIG_XPS
>     +       sk->sk_rx_queue_mapping = -1;
>     +#endif
>             /*
>              * Before updating sk_refcnt, we must commit prior changes
>     to memory
>              * (Documentation/RCU/rculist_nulls.txt for details)
>     diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
>     index d5ffb57..cc69f75 100644
>     --- a/net/ipv4/tcp_input.c
>     +++ b/net/ipv4/tcp_input.c
>     @@ -78,6 +78,7 @@
>      #include <linux/errqueue.h>
>      #include <trace/events/tcp.h>
>      #include <linux/static_key.h>
>     +#include <net/busy_poll.h>
> 
>      int sysctl_tcp_max_orphans __read_mostly = NR_FILE;
> 
>     @@ -5574,6 +5575,7 @@ void tcp_finish_connect(struct sock *sk,
>     struct sk_buff *skb)
>             if (skb) {
>                     icsk->icsk_af_ops->sk_rx_dst_set(sk, skb);
>                     security_inet_conn_established(sk, skb);
>     +               sk_mark_napi_id(sk, skb);
>             }
> 
>             tcp_init_transfer(sk, BPF_SOCK_OPS_ACTIVE_ESTABLISHED_CB);
>     @@ -6402,6 +6404,7 @@ int tcp_conn_request(struct request_sock_ops
>     *rsk_ops,
>             tcp_rsk(req)->snt_isn = isn;
>             tcp_rsk(req)->txhash = net_tx_rndhash();
>             tcp_openreq_init_rwin(req, sk, dst);
>     +       sk_mark_rx_queue(req_to_sk(req), skb);
>             if (!want_cookie) {
>                     tcp_reqsk_record_syn(sk, req, skb);
>                     fastopen_sk = tcp_try_fastopen(sk, skb, req, &foc, dst);
> 
>
Nambiar, Amritha June 6, 2018, 11:02 p.m. UTC | #5
On 6/6/2018 12:13 PM, Willem de Bruijn wrote:
> On Wed, Jun 6, 2018 at 3:08 PM, Samudrala, Sridhar
> <sridhar.samudrala@intel.com> wrote:
>>
>> On 6/6/2018 11:56 AM, Willem de Bruijn wrote:
>>>
>>> On Tue, Jun 5, 2018 at 4:38 AM, Amritha Nambiar
>>> <amritha.nambiar@intel.com> wrote:
>>>>
>>>> This patch adds support to pick Tx queue based on the Rx queue(s) map
>>>> configuration set by the admin through the sysfs attribute
>>>> for each Tx queue. If the user configuration for receive queue(s) map
>>>> does not apply, then the Tx queue selection falls back to CPU(s) map
>>>> based selection and finally to hashing.
>>>>
>>>> Signed-off-by: Amritha Nambiar <amritha.nambiar@intel.com>
>>>> Signed-off-by: Sridhar Samudrala <sridhar.samudrala@intel.com>
>>>> ---
>>>>   int sysctl_tcp_max_orphans __read_mostly = NR_FILE;
>>>>
>>>> @@ -5574,6 +5575,7 @@ void tcp_finish_connect(struct sock *sk, struct
>>>> sk_buff *skb)
>>>>          if (skb) {
>>>>                  icsk->icsk_af_ops->sk_rx_dst_set(sk, skb);
>>>>                  security_inet_conn_established(sk, skb);
>>>> +               sk_mark_napi_id(sk, skb);
>>>>          }
>>>
>>> This and the call below should be in a standalone patch, as the mark
>>> changes are not rxq-xps specific. Is the additional earlier marking really
>>> required?
>>
>>
>> The additional earlier marking in tcp_finish_connect() allows a client app
>> to do
>> SO_INCOMING_NAPI_ID after a a connect() call to get the right queue
>> association
>> for a socket.
>>
>> The marking in tcp_conn_request() allows syn-ack to go on the right tx-queue
>> associated with the queue on which syn is received.
> 
> I understand the intent. My question really is whether it is needed.
> Marking has been slightly lossy in this regard in the past, not
> necessarily as an oversight. I don't mean to make that call here,
> but it's worth discussion and its own patch.
> 
Will separate this out into a standalone patch in v4.
diff mbox series

Patch

diff --git a/include/net/busy_poll.h b/include/net/busy_poll.h
index 71c72a9..fc4fb68 100644
--- a/include/net/busy_poll.h
+++ b/include/net/busy_poll.h
@@ -136,6 +136,9 @@  static inline void sk_mark_napi_id(struct sock *sk, const struct sk_buff *skb)
 #ifdef CONFIG_NET_RX_BUSY_POLL
 	sk->sk_napi_id = skb->napi_id;
 #endif
+#ifdef CONFIG_XPS
+	sk->sk_rx_queue_mapping = skb_get_rx_queue(skb);
+#endif
 }
 
 /* variant used for unconnected sockets */
diff --git a/include/net/sock.h b/include/net/sock.h
index 4f7c584..12313653 100644
--- a/include/net/sock.h
+++ b/include/net/sock.h
@@ -139,6 +139,7 @@  typedef __u64 __bitwise __addrpair;
  *	@skc_node: main hash linkage for various protocol lookup tables
  *	@skc_nulls_node: main hash linkage for TCP/UDP/UDP-Lite protocol
  *	@skc_tx_queue_mapping: tx queue number for this connection
+ *	@skc_rx_queue_mapping: rx queue number for this connection
  *	@skc_flags: place holder for sk_flags
  *		%SO_LINGER (l_onoff), %SO_BROADCAST, %SO_KEEPALIVE,
  *		%SO_OOBINLINE settings, %SO_TIMESTAMPING settings
@@ -215,6 +216,9 @@  struct sock_common {
 		struct hlist_nulls_node skc_nulls_node;
 	};
 	int			skc_tx_queue_mapping;
+#ifdef CONFIG_XPS
+	int			skc_rx_queue_mapping;
+#endif
 	union {
 		int		skc_incoming_cpu;
 		u32		skc_rcv_wnd;
@@ -326,6 +330,9 @@  struct sock {
 #define sk_nulls_node		__sk_common.skc_nulls_node
 #define sk_refcnt		__sk_common.skc_refcnt
 #define sk_tx_queue_mapping	__sk_common.skc_tx_queue_mapping
+#ifdef CONFIG_XPS
+#define sk_rx_queue_mapping	__sk_common.skc_rx_queue_mapping
+#endif
 
 #define sk_dontcopy_begin	__sk_common.skc_dontcopy_begin
 #define sk_dontcopy_end		__sk_common.skc_dontcopy_end
@@ -1696,6 +1703,13 @@  static inline int sk_tx_queue_get(const struct sock *sk)
 	return sk ? sk->sk_tx_queue_mapping : -1;
 }
 
+static inline void sk_mark_rx_queue(struct sock *sk, struct sk_buff *skb)
+{
+#ifdef CONFIG_XPS
+	sk->sk_rx_queue_mapping = skb_get_rx_queue(skb);
+#endif
+}
+
 static inline void sk_set_socket(struct sock *sk, struct socket *sock)
 {
 	sk_tx_queue_clear(sk);
diff --git a/net/core/dev.c b/net/core/dev.c
index bba755f..1880e6c 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -3479,36 +3479,58 @@  sch_handle_egress(struct sk_buff *skb, int *ret, struct net_device *dev)
 }
 #endif /* CONFIG_NET_EGRESS */
 
-static inline int get_xps_queue(struct net_device *dev, struct sk_buff *skb)
+#ifdef CONFIG_XPS
+static int __get_xps_queue_idx(struct net_device *dev, struct sk_buff *skb,
+			       struct xps_dev_maps *dev_maps, unsigned int tci)
+{
+	struct xps_map *map;
+	int queue_index = -1;
+
+	if (dev->num_tc) {
+		tci *= dev->num_tc;
+		tci += netdev_get_prio_tc_map(dev, skb->priority);
+	}
+
+	map = rcu_dereference(dev_maps->attr_map[tci]);
+	if (map) {
+		if (map->len == 1)
+			queue_index = map->queues[0];
+		else
+			queue_index = map->queues[reciprocal_scale(
+						skb_get_hash(skb), map->len)];
+		if (unlikely(queue_index >= dev->real_num_tx_queues))
+			queue_index = -1;
+	}
+	return queue_index;
+}
+#endif
+
+static int get_xps_queue(struct net_device *dev, struct sk_buff *skb)
 {
 #ifdef CONFIG_XPS
 	struct xps_dev_maps *dev_maps;
-	struct xps_map *map;
+	struct sock *sk = skb->sk;
 	int queue_index = -1;
+	unsigned int tci = 0;
 
 	if (!static_key_false(&xps_needed))
 		return -1;
 
+	if (sk && sk->sk_rx_queue_mapping <= dev->num_rx_queues)
+		tci = sk->sk_rx_queue_mapping;
+
 	rcu_read_lock();
-	dev_maps = rcu_dereference(dev->xps_cpus_map);
-	if (dev_maps) {
-		unsigned int tci = skb->sender_cpu - 1;
+	dev_maps = rcu_dereference(dev->xps_rxqs_map);
+	if (dev_maps)
+		queue_index = __get_xps_queue_idx(dev, skb, dev_maps, tci);
 
-		if (dev->num_tc) {
-			tci *= dev->num_tc;
-			tci += netdev_get_prio_tc_map(dev, skb->priority);
-		}
 
-		map = rcu_dereference(dev_maps->attr_map[tci]);
-		if (map) {
-			if (map->len == 1)
-				queue_index = map->queues[0];
-			else
-				queue_index = map->queues[reciprocal_scale(skb_get_hash(skb),
-									   map->len)];
-			if (unlikely(queue_index >= dev->real_num_tx_queues))
-				queue_index = -1;
-		}
+	if (queue_index < 0) {
+		tci = skb->sender_cpu - 1;
+		dev_maps = rcu_dereference(dev->xps_cpus_map);
+		if (dev_maps)
+			queue_index = __get_xps_queue_idx(dev, skb, dev_maps,
+							  tci);
 	}
 	rcu_read_unlock();
 
diff --git a/net/core/sock.c b/net/core/sock.c
index 435a0ba..3c10d31 100644
--- a/net/core/sock.c
+++ b/net/core/sock.c
@@ -2824,6 +2824,10 @@  void sock_init_data(struct socket *sock, struct sock *sk)
 	sk->sk_pacing_rate = ~0U;
 	sk->sk_pacing_shift = 10;
 	sk->sk_incoming_cpu = -1;
+
+#ifdef CONFIG_XPS
+	sk->sk_rx_queue_mapping = -1;
+#endif
 	/*
 	 * Before updating sk_refcnt, we must commit prior changes to memory
 	 * (Documentation/RCU/rculist_nulls.txt for details)
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index d5ffb57..cc69f75 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -78,6 +78,7 @@ 
 #include <linux/errqueue.h>
 #include <trace/events/tcp.h>
 #include <linux/static_key.h>
+#include <net/busy_poll.h>
 
 int sysctl_tcp_max_orphans __read_mostly = NR_FILE;
 
@@ -5574,6 +5575,7 @@  void tcp_finish_connect(struct sock *sk, struct sk_buff *skb)
 	if (skb) {
 		icsk->icsk_af_ops->sk_rx_dst_set(sk, skb);
 		security_inet_conn_established(sk, skb);
+		sk_mark_napi_id(sk, skb);
 	}
 
 	tcp_init_transfer(sk, BPF_SOCK_OPS_ACTIVE_ESTABLISHED_CB);
@@ -6402,6 +6404,7 @@  int tcp_conn_request(struct request_sock_ops *rsk_ops,
 	tcp_rsk(req)->snt_isn = isn;
 	tcp_rsk(req)->txhash = net_tx_rndhash();
 	tcp_openreq_init_rwin(req, sk, dst);
+	sk_mark_rx_queue(req_to_sk(req), skb);
 	if (!want_cookie) {
 		tcp_reqsk_record_syn(sk, req, skb);
 		fastopen_sk = tcp_try_fastopen(sk, skb, req, &foc, dst);