Patch Detail
get:
Show a patch.
patch:
Update a patch.
put:
Update a patch.
GET /api/1.2/patches/808464/?format=api
{ "id": 808464, "url": "http://patchwork.ozlabs.org/api/1.2/patches/808464/?format=api", "web_url": "http://patchwork.ozlabs.org/project/netdev/patch/1504222032-6337-1-git-send-email-sridhar.samudrala@intel.com/", "project": { "id": 7, "url": "http://patchwork.ozlabs.org/api/1.2/projects/7/?format=api", "name": "Linux network development", "link_name": "netdev", "list_id": "netdev.vger.kernel.org", "list_email": "netdev@vger.kernel.org", "web_url": null, "scm_url": null, "webscm_url": null, "list_archive_url": "", "list_archive_url_format": "", "commit_url_format": "" }, "msgid": "<1504222032-6337-1-git-send-email-sridhar.samudrala@intel.com>", "list_archive_url": null, "date": "2017-08-31T23:27:12", "name": "[RFC] net: Introduce a socket option to enable picking tx queue based on rx queue.", "commit_ref": null, "pull_url": null, "state": "rfc", "archived": true, "hash": "2c8790ad20af5cb428ab44d352e7553c6a1f3b0f", "submitter": { "id": 65219, "url": "http://patchwork.ozlabs.org/api/1.2/people/65219/?format=api", "name": "Samudrala, Sridhar", "email": "sridhar.samudrala@intel.com" }, "delegate": { "id": 34, "url": "http://patchwork.ozlabs.org/api/1.2/users/34/?format=api", "username": "davem", "first_name": "David", "last_name": "Miller", "email": "davem@davemloft.net" }, "mbox": "http://patchwork.ozlabs.org/project/netdev/patch/1504222032-6337-1-git-send-email-sridhar.samudrala@intel.com/mbox/", "series": [ { "id": 922, "url": "http://patchwork.ozlabs.org/api/1.2/series/922/?format=api", "web_url": "http://patchwork.ozlabs.org/project/netdev/list/?series=922", "date": "2017-08-31T23:27:12", "name": "[RFC] net: Introduce a socket option to enable picking tx queue based on rx queue.", "version": 1, "mbox": "http://patchwork.ozlabs.org/series/922/mbox/" } ], "comments": "http://patchwork.ozlabs.org/api/patches/808464/comments/", "check": "pending", "checks": "http://patchwork.ozlabs.org/api/patches/808464/checks/", "tags": {}, "related": [], "headers": { "Return-Path": "<netdev-owner@vger.kernel.org>", "X-Original-To": "patchwork-incoming@ozlabs.org", "Delivered-To": "patchwork-incoming@ozlabs.org", "Authentication-Results": "ozlabs.org;\n\tspf=none (mailfrom) smtp.mailfrom=vger.kernel.org\n\t(client-ip=209.132.180.67; helo=vger.kernel.org;\n\tenvelope-from=netdev-owner@vger.kernel.org;\n\treceiver=<UNKNOWN>)", "Received": [ "from vger.kernel.org (vger.kernel.org [209.132.180.67])\n\tby ozlabs.org (Postfix) with ESMTP id 3xjz6s4VH1z9s7p\n\tfor <patchwork-incoming@ozlabs.org>;\n\tFri, 1 Sep 2017 09:27:16 +1000 (AEST)", "(majordomo@vger.kernel.org) by vger.kernel.org via listexpand\n\tid S1751581AbdHaX1O (ORCPT <rfc822;patchwork-incoming@ozlabs.org>);\n\tThu, 31 Aug 2017 19:27:14 -0400", "from mga11.intel.com ([192.55.52.93]:42537 \"EHLO mga11.intel.com\"\n\trhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP\n\tid S1751307AbdHaX1N (ORCPT <rfc822;netdev@vger.kernel.org>);\n\tThu, 31 Aug 2017 19:27:13 -0400", "from fmsmga005.fm.intel.com ([10.253.24.32])\n\tby fmsmga102.fm.intel.com with ESMTP/TLS/DHE-RSA-AES256-GCM-SHA384;\n\t31 Aug 2017 16:27:12 -0700", "from arch-p20.jf.intel.com ([10.166.186.60])\n\tby fmsmga005.fm.intel.com with ESMTP; 31 Aug 2017 16:27:12 -0700" ], "X-ExtLoop1": "1", "X-IronPort-AV": "E=Sophos;i=\"5.41,455,1498546800\"; d=\"scan'208\";a=\"146275449\"", "From": "Sridhar Samudrala <sridhar.samudrala@intel.com>", "To": "alexander.h.duyck@intel.com, netdev@vger.kernel.org", "Subject": "[RFC PATCH] net: Introduce a socket option to enable picking tx\n\tqueue based on rx queue.", "Date": "Thu, 31 Aug 2017 16:27:12 -0700", "Message-Id": "<1504222032-6337-1-git-send-email-sridhar.samudrala@intel.com>", "X-Mailer": "git-send-email 1.8.3.1", "Sender": "netdev-owner@vger.kernel.org", "Precedence": "bulk", "List-ID": "<netdev.vger.kernel.org>", "X-Mailing-List": "netdev@vger.kernel.org" }, "content": "This patch introduces a new socket option SO_SYMMETRIC_QUEUES that can be used\nto enable symmetric tx and rx queues on a socket.\n\nThis option is specifically useful for epoll based multi threaded workloads\nwhere each thread handles packets received on a single RX queue . In this model,\nwe have noticed that it helps to send the packets on the same TX queue\ncorresponding to the queue-pair associated with the RX queue specifically when\nbusy poll is enabled with epoll().\n\nTwo new fields are added to struct sock_common to cache the last rx ifindex and\nthe rx queue in the receive path of an SKB. __netdev_pick_tx() returns the cached\nrx queue when this option is enabled and the TX is happening on the same device.\n\nSigned-off-by: Sridhar Samudrala <sridhar.samudrala@intel.com>\n---\n include/net/request_sock.h | 1 +\n include/net/sock.h | 17 +++++++++++++++++\n include/uapi/asm-generic/socket.h | 2 ++\n net/core/dev.c | 8 +++++++-\n net/core/sock.c | 10 ++++++++++\n net/ipv4/tcp_input.c | 1 +\n net/ipv4/tcp_ipv4.c | 1 +\n net/ipv4/tcp_minisocks.c | 1 +\n 8 files changed, 40 insertions(+), 1 deletion(-)", "diff": "diff --git a/include/net/request_sock.h b/include/net/request_sock.h\nindex 23e2205..c3bc12e 100644\n--- a/include/net/request_sock.h\n+++ b/include/net/request_sock.h\n@@ -100,6 +100,7 @@ static inline struct sock *req_to_sk(struct request_sock *req)\n \treq_to_sk(req)->sk_prot = sk_listener->sk_prot;\n \tsk_node_init(&req_to_sk(req)->sk_node);\n \tsk_tx_queue_clear(req_to_sk(req));\n+\treq_to_sk(req)->sk_symmetric_queues = sk_listener->sk_symmetric_queues;\n \treq->saved_syn = NULL;\n \trefcount_set(&req->rsk_refcnt, 0);\n \ndiff --git a/include/net/sock.h b/include/net/sock.h\nindex 03a3625..3421809 100644\n--- a/include/net/sock.h\n+++ b/include/net/sock.h\n@@ -138,11 +138,14 @@ void SOCK_DEBUG(const struct sock *sk, const char *msg, ...)\n *\t@skc_node: main hash linkage for various protocol lookup tables\n *\t@skc_nulls_node: main hash linkage for TCP/UDP/UDP-Lite protocol\n *\t@skc_tx_queue_mapping: tx queue number for this connection\n+ *\t@skc_rx_queue_mapping: rx queue number for this connection\n+ *\t@skc_rx_ifindex: rx ifindex for this connection\n *\t@skc_flags: place holder for sk_flags\n *\t\t%SO_LINGER (l_onoff), %SO_BROADCAST, %SO_KEEPALIVE,\n *\t\t%SO_OOBINLINE settings, %SO_TIMESTAMPING settings\n *\t@skc_incoming_cpu: record/match cpu processing incoming packets\n *\t@skc_refcnt: reference count\n+ *\t@skc_symmetric_queues: symmetric tx/rx queues\n *\n *\tThis is the minimal network layer representation of sockets, the header\n *\tfor struct sock and struct inet_timewait_sock.\n@@ -177,6 +180,7 @@ struct sock_common {\n \tunsigned char\t\tskc_reuseport:1;\n \tunsigned char\t\tskc_ipv6only:1;\n \tunsigned char\t\tskc_net_refcnt:1;\n+\tunsigned char\t\tskc_symmetric_queues:1;\n \tint\t\t\tskc_bound_dev_if;\n \tunion {\n \t\tstruct hlist_node\tskc_bind_node;\n@@ -214,6 +218,8 @@ struct sock_common {\n \t\tstruct hlist_nulls_node skc_nulls_node;\n \t};\n \tint\t\t\tskc_tx_queue_mapping;\n+\tint\t\t\tskc_rx_queue_mapping;\n+\tint\t\t\tskc_rx_ifindex;\n \tunion {\n \t\tint\t\tskc_incoming_cpu;\n \t\tu32\t\tskc_rcv_wnd;\n@@ -324,6 +330,8 @@ struct sock {\n #define sk_nulls_node\t\t__sk_common.skc_nulls_node\n #define sk_refcnt\t\t__sk_common.skc_refcnt\n #define sk_tx_queue_mapping\t__sk_common.skc_tx_queue_mapping\n+#define sk_rx_queue_mapping\t__sk_common.skc_rx_queue_mapping\n+#define sk_rx_ifindex\t\t__sk_common.skc_rx_ifindex\n \n #define sk_dontcopy_begin\t__sk_common.skc_dontcopy_begin\n #define sk_dontcopy_end\t\t__sk_common.skc_dontcopy_end\n@@ -340,6 +348,7 @@ struct sock {\n #define sk_reuseport\t\t__sk_common.skc_reuseport\n #define sk_ipv6only\t\t__sk_common.skc_ipv6only\n #define sk_net_refcnt\t\t__sk_common.skc_net_refcnt\n+#define sk_symmetric_queues\t__sk_common.skc_symmetric_queues\n #define sk_bound_dev_if\t\t__sk_common.skc_bound_dev_if\n #define sk_bind_node\t\t__sk_common.skc_bind_node\n #define sk_prot\t\t\t__sk_common.skc_prot\n@@ -1676,6 +1685,14 @@ static inline int sk_tx_queue_get(const struct sock *sk)\n \treturn sk ? sk->sk_tx_queue_mapping : -1;\n }\n \n+static inline void sk_mark_rx_queue(struct sock *sk, struct sk_buff *skb)\n+{\n+\tif (sk->sk_symmetric_queues) {\n+\t\tsk->sk_rx_ifindex = skb->skb_iif;\n+\t\tsk->sk_rx_queue_mapping = skb_get_rx_queue(skb);\n+\t}\n+}\n+\n static inline void sk_set_socket(struct sock *sk, struct socket *sock)\n {\n \tsk_tx_queue_clear(sk);\ndiff --git a/include/uapi/asm-generic/socket.h b/include/uapi/asm-generic/socket.h\nindex e47c9e4..f6b416e 100644\n--- a/include/uapi/asm-generic/socket.h\n+++ b/include/uapi/asm-generic/socket.h\n@@ -106,4 +106,6 @@\n \n #define SO_ZEROCOPY\t\t60\n \n+#define SO_SYMMETRIC_QUEUES\t61\n+\n #endif /* __ASM_GENERIC_SOCKET_H */\ndiff --git a/net/core/dev.c b/net/core/dev.c\nindex 270b547..d96cda8 100644\n--- a/net/core/dev.c\n+++ b/net/core/dev.c\n@@ -3322,7 +3322,13 @@ static u16 __netdev_pick_tx(struct net_device *dev, struct sk_buff *skb)\n \n \tif (queue_index < 0 || skb->ooo_okay ||\n \t queue_index >= dev->real_num_tx_queues) {\n-\t\tint new_index = get_xps_queue(dev, skb);\n+\t\tint new_index = -1;\n+\n+\t\tif (sk && sk->sk_symmetric_queues && dev->ifindex == sk->sk_rx_ifindex)\n+\t\t\tnew_index = sk->sk_rx_queue_mapping;\n+\n+\t\tif (new_index < 0 || new_index >= dev->real_num_tx_queues)\n+\t\t\tnew_index = get_xps_queue(dev, skb);\n \n \t\tif (new_index < 0)\n \t\t\tnew_index = skb_tx_hash(dev, skb);\ndiff --git a/net/core/sock.c b/net/core/sock.c\nindex 9b7b6bb..3876cce 100644\n--- a/net/core/sock.c\n+++ b/net/core/sock.c\n@@ -1059,6 +1059,10 @@ int sock_setsockopt(struct socket *sock, int level, int optname,\n \t\t\tsock_valbool_flag(sk, SOCK_ZEROCOPY, valbool);\n \t\tbreak;\n \n+\tcase SO_SYMMETRIC_QUEUES:\n+\t\tsk->sk_symmetric_queues = valbool;\n+\t\tbreak;\n+\n \tdefault:\n \t\tret = -ENOPROTOOPT;\n \t\tbreak;\n@@ -1391,6 +1395,10 @@ int sock_getsockopt(struct socket *sock, int level, int optname,\n \t\tv.val = sock_flag(sk, SOCK_ZEROCOPY);\n \t\tbreak;\n \n+\tcase SO_SYMMETRIC_QUEUES:\n+\t\tv.val = sk->sk_symmetric_queues;\n+\t\tbreak;\n+\n \tdefault:\n \t\t/* We implement the SO_SNDLOWAT etc to not be settable\n \t\t * (1003.1g 7).\n@@ -2738,6 +2746,8 @@ void sock_init_data(struct socket *sock, struct sock *sk)\n \tsk->sk_max_pacing_rate = ~0U;\n \tsk->sk_pacing_rate = ~0U;\n \tsk->sk_incoming_cpu = -1;\n+\tsk->sk_rx_ifindex = -1;\n+\tsk->sk_rx_queue_mapping = -1;\n \t/*\n \t * Before updating sk_refcnt, we must commit prior changes to memory\n \t * (Documentation/RCU/rculist_nulls.txt for details)\ndiff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c\nindex c5d7656..12381e0 100644\n--- a/net/ipv4/tcp_input.c\n+++ b/net/ipv4/tcp_input.c\n@@ -6356,6 +6356,7 @@ int tcp_conn_request(struct request_sock_ops *rsk_ops,\n \ttcp_rsk(req)->snt_isn = isn;\n \ttcp_rsk(req)->txhash = net_tx_rndhash();\n \ttcp_openreq_init_rwin(req, sk, dst);\n+\tsk_mark_rx_queue(req_to_sk(req), skb);\n \tif (!want_cookie) {\n \t\ttcp_reqsk_record_syn(sk, req, skb);\n \t\tfastopen_sk = tcp_try_fastopen(sk, skb, req, &foc);\ndiff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c\nindex a63486a..82f9af4 100644\n--- a/net/ipv4/tcp_ipv4.c\n+++ b/net/ipv4/tcp_ipv4.c\n@@ -1450,6 +1450,7 @@ int tcp_v4_do_rcv(struct sock *sk, struct sk_buff *skb)\n \n \t\tsock_rps_save_rxhash(sk, skb);\n \t\tsk_mark_napi_id(sk, skb);\n+\t\tsk_mark_rx_queue(sk, skb);\n \t\tif (dst) {\n \t\t\tif (inet_sk(sk)->rx_dst_ifindex != skb->skb_iif ||\n \t\t\t !dst->ops->check(dst, 0)) {\ndiff --git a/net/ipv4/tcp_minisocks.c b/net/ipv4/tcp_minisocks.c\nindex 188a6f3..2b5efd5 100644\n--- a/net/ipv4/tcp_minisocks.c\n+++ b/net/ipv4/tcp_minisocks.c\n@@ -809,6 +809,7 @@ int tcp_child_process(struct sock *parent, struct sock *child,\n \n \t/* record NAPI ID of child */\n \tsk_mark_napi_id(child, skb);\n+\tsk_mark_rx_queue(child, skb);\n \n \ttcp_segs_in(tcp_sk(child), skb);\n \tif (!sock_owned_by_user(child)) {\n", "prefixes": [ "RFC" ] }