Patch Detail
get:
Show a patch.
patch:
Update a patch.
put:
Update a patch.
GET /api/patches/807776/?format=api
{ "id": 807776, "url": "http://patchwork.ozlabs.org/api/patches/807776/?format=api", "web_url": "http://patchwork.ozlabs.org/project/netdev/patch/20170830172458.18544-3-fw@strlen.de/", "project": { "id": 7, "url": "http://patchwork.ozlabs.org/api/projects/7/?format=api", "name": "Linux network development", "link_name": "netdev", "list_id": "netdev.vger.kernel.org", "list_email": "netdev@vger.kernel.org", "web_url": null, "scm_url": null, "webscm_url": null, "list_archive_url": "", "list_archive_url_format": "", "commit_url_format": "" }, "msgid": "<20170830172458.18544-3-fw@strlen.de>", "list_archive_url": null, "date": "2017-08-30T17:24:58", "name": "[net-next,2/2] tcp: Revert \"tcp: remove header prediction\"", "commit_ref": null, "pull_url": null, "state": "accepted", "archived": true, "hash": "a8e9d4012a322f12f862a6361a478dec9780d779", "submitter": { "id": 1025, "url": "http://patchwork.ozlabs.org/api/people/1025/?format=api", "name": "Florian Westphal", "email": "fw@strlen.de" }, "delegate": { "id": 34, "url": "http://patchwork.ozlabs.org/api/users/34/?format=api", "username": "davem", "first_name": "David", "last_name": "Miller", "email": "davem@davemloft.net" }, "mbox": "http://patchwork.ozlabs.org/project/netdev/patch/20170830172458.18544-3-fw@strlen.de/mbox/", "series": [ { "id": 665, "url": "http://patchwork.ozlabs.org/api/series/665/?format=api", "web_url": "http://patchwork.ozlabs.org/project/netdev/list/?series=665", "date": "2017-08-30T17:24:56", "name": "tcp: re-add header prediction", "version": 1, "mbox": "http://patchwork.ozlabs.org/series/665/mbox/" } ], "comments": "http://patchwork.ozlabs.org/api/patches/807776/comments/", "check": "pending", "checks": "http://patchwork.ozlabs.org/api/patches/807776/checks/", "tags": {}, "related": [], "headers": { "Return-Path": "<netdev-owner@vger.kernel.org>", "X-Original-To": "patchwork-incoming@ozlabs.org", "Delivered-To": "patchwork-incoming@ozlabs.org", "Authentication-Results": "ozlabs.org;\n\tspf=none (mailfrom) smtp.mailfrom=vger.kernel.org\n\t(client-ip=209.132.180.67; helo=vger.kernel.org;\n\tenvelope-from=netdev-owner@vger.kernel.org;\n\treceiver=<UNKNOWN>)", "Received": [ "from vger.kernel.org (vger.kernel.org [209.132.180.67])\n\tby ozlabs.org (Postfix) with ESMTP id 3xjC766FgWz9sN7\n\tfor <patchwork-incoming@ozlabs.org>;\n\tThu, 31 Aug 2017 03:24:50 +1000 (AEST)", "(majordomo@vger.kernel.org) by vger.kernel.org via listexpand\n\tid S1752160AbdH3RYt (ORCPT <rfc822;patchwork-incoming@ozlabs.org>);\n\tWed, 30 Aug 2017 13:24:49 -0400", "from Chamillionaire.breakpoint.cc ([146.0.238.67]:35766 \"EHLO\n\tChamillionaire.breakpoint.cc\" rhost-flags-OK-OK-OK-OK)\n\tby vger.kernel.org with ESMTP id S1751955AbdH3RYq (ORCPT\n\t<rfc822;netdev@vger.kernel.org>); Wed, 30 Aug 2017 13:24:46 -0400", "from fw by Chamillionaire.breakpoint.cc with local (Exim 4.84_2)\n\t(envelope-from <fw@breakpoint.cc>)\n\tid 1dn6gj-0007WP-Nr; Wed, 30 Aug 2017 19:22:01 +0200" ], "From": "Florian Westphal <fw@strlen.de>", "To": "<netdev@vger.kernel.org>", "Cc": "edumazet@google.com, Florian Westphal <fw@strlen.de>", "Subject": "[PATCH net-next 2/2] tcp: Revert \"tcp: remove header prediction\"", "Date": "Wed, 30 Aug 2017 19:24:58 +0200", "Message-Id": "<20170830172458.18544-3-fw@strlen.de>", "X-Mailer": "git-send-email 2.13.0", "In-Reply-To": "<20170830172458.18544-1-fw@strlen.de>", "References": "<20170830172458.18544-1-fw@strlen.de>", "Sender": "netdev-owner@vger.kernel.org", "Precedence": "bulk", "List-ID": "<netdev.vger.kernel.org>", "X-Mailing-List": "netdev@vger.kernel.org" }, "content": "This reverts commit 45f119bf936b1f9f546a0b139c5b56f9bb2bdc78.\n\nEric Dumazet says:\n We found at Google a significant regression caused by\n 45f119bf936b1f9f546a0b139c5b56f9bb2bdc78 tcp: remove header prediction\n\n In typical RPC (TCP_RR), when a TCP socket receives data, we now call\n tcp_ack() while we used to not call it.\n\n This touches enough cache lines to cause a slowdown.\n\nso problem does not seem to be HP removal itself but the tcp_ack()\ncall. Therefore, it might be possible to remove HP after all, provided\none finds a way to elide tcp_ack for most cases.\n\nReported-by: Eric Dumazet <edumazet@google.com>\nSigned-off-by: Florian Westphal <fw@strlen.de>\n---\n include/linux/tcp.h | 6 ++\n include/net/tcp.h | 23 ++++++\n include/uapi/linux/snmp.h | 2 +\n net/ipv4/proc.c | 2 +\n net/ipv4/tcp.c | 4 +-\n net/ipv4/tcp_input.c | 188 ++++++++++++++++++++++++++++++++++++++++++++--\n net/ipv4/tcp_minisocks.c | 2 +\n net/ipv4/tcp_output.c | 2 +\n 8 files changed, 223 insertions(+), 6 deletions(-)", "diff": "diff --git a/include/linux/tcp.h b/include/linux/tcp.h\nindex 267164a1d559..4aa40ef02d32 100644\n--- a/include/linux/tcp.h\n+++ b/include/linux/tcp.h\n@@ -148,6 +148,12 @@ struct tcp_sock {\n \tu16\tgso_segs;\t/* Max number of segs per GSO packet\t*/\n \n /*\n+ *\tHeader prediction flags\n+ *\t0x5?10 << 16 + snd_wnd in net byte order\n+ */\n+\t__be32\tpred_flags;\n+\n+/*\n *\tRFC793 variables by their proper names. This means you can\n *\tread the code and the spec side by side (and laugh ...)\n *\tSee RFC793 and RFC1122. The RFC writes these in capitals.\ndiff --git a/include/net/tcp.h b/include/net/tcp.h\nindex c546d13ffbca..9c3db054e47f 100644\n--- a/include/net/tcp.h\n+++ b/include/net/tcp.h\n@@ -634,6 +634,29 @@ static inline u32 __tcp_set_rto(const struct tcp_sock *tp)\n \treturn usecs_to_jiffies((tp->srtt_us >> 3) + tp->rttvar_us);\n }\n \n+static inline void __tcp_fast_path_on(struct tcp_sock *tp, u32 snd_wnd)\n+{\n+\ttp->pred_flags = htonl((tp->tcp_header_len << 26) |\n+\t\t\t ntohl(TCP_FLAG_ACK) |\n+\t\t\t snd_wnd);\n+}\n+\n+static inline void tcp_fast_path_on(struct tcp_sock *tp)\n+{\n+\t__tcp_fast_path_on(tp, tp->snd_wnd >> tp->rx_opt.snd_wscale);\n+}\n+\n+static inline void tcp_fast_path_check(struct sock *sk)\n+{\n+\tstruct tcp_sock *tp = tcp_sk(sk);\n+\n+\tif (RB_EMPTY_ROOT(&tp->out_of_order_queue) &&\n+\t tp->rcv_wnd &&\n+\t atomic_read(&sk->sk_rmem_alloc) < sk->sk_rcvbuf &&\n+\t !tp->urg_data)\n+\t\ttcp_fast_path_on(tp);\n+}\n+\n /* Compute the actual rto_min value */\n static inline u32 tcp_rto_min(struct sock *sk)\n {\ndiff --git a/include/uapi/linux/snmp.h b/include/uapi/linux/snmp.h\nindex b3f346fb9fe3..758f12b58541 100644\n--- a/include/uapi/linux/snmp.h\n+++ b/include/uapi/linux/snmp.h\n@@ -184,7 +184,9 @@ enum\n \tLINUX_MIB_DELAYEDACKLOST,\t\t/* DelayedACKLost */\n \tLINUX_MIB_LISTENOVERFLOWS,\t\t/* ListenOverflows */\n \tLINUX_MIB_LISTENDROPS,\t\t\t/* ListenDrops */\n+\tLINUX_MIB_TCPHPHITS,\t\t\t/* TCPHPHits */\n \tLINUX_MIB_TCPPUREACKS,\t\t\t/* TCPPureAcks */\n+\tLINUX_MIB_TCPHPACKS,\t\t\t/* TCPHPAcks */\n \tLINUX_MIB_TCPRENORECOVERY,\t\t/* TCPRenoRecovery */\n \tLINUX_MIB_TCPSACKRECOVERY,\t\t/* TCPSackRecovery */\n \tLINUX_MIB_TCPSACKRENEGING,\t\t/* TCPSACKReneging */\ndiff --git a/net/ipv4/proc.c b/net/ipv4/proc.c\nindex b6d3fe03feb3..127153f1ed8a 100644\n--- a/net/ipv4/proc.c\n+++ b/net/ipv4/proc.c\n@@ -206,7 +206,9 @@ static const struct snmp_mib snmp4_net_list[] = {\n \tSNMP_MIB_ITEM(\"DelayedACKLost\", LINUX_MIB_DELAYEDACKLOST),\n \tSNMP_MIB_ITEM(\"ListenOverflows\", LINUX_MIB_LISTENOVERFLOWS),\n \tSNMP_MIB_ITEM(\"ListenDrops\", LINUX_MIB_LISTENDROPS),\n+\tSNMP_MIB_ITEM(\"TCPHPHits\", LINUX_MIB_TCPHPHITS),\n \tSNMP_MIB_ITEM(\"TCPPureAcks\", LINUX_MIB_TCPPUREACKS),\n+\tSNMP_MIB_ITEM(\"TCPHPAcks\", LINUX_MIB_TCPHPACKS),\n \tSNMP_MIB_ITEM(\"TCPRenoRecovery\", LINUX_MIB_TCPRENORECOVERY),\n \tSNMP_MIB_ITEM(\"TCPSackRecovery\", LINUX_MIB_TCPSACKRECOVERY),\n \tSNMP_MIB_ITEM(\"TCPSACKReneging\", LINUX_MIB_TCPSACKRENEGING),\ndiff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c\nindex 566083ee2654..21ca2df274c5 100644\n--- a/net/ipv4/tcp.c\n+++ b/net/ipv4/tcp.c\n@@ -1963,8 +1963,10 @@ int tcp_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, int nonblock,\n \t\ttcp_rcv_space_adjust(sk);\n \n skip_copy:\n-\t\tif (tp->urg_data && after(tp->copied_seq, tp->urg_seq))\n+\t\tif (tp->urg_data && after(tp->copied_seq, tp->urg_seq)) {\n \t\t\ttp->urg_data = 0;\n+\t\t\ttcp_fast_path_check(sk);\n+\t\t}\n \t\tif (used + offset < skb->len)\n \t\t\tcontinue;\n \ndiff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c\nindex a0e436366d31..c5d7656beeee 100644\n--- a/net/ipv4/tcp_input.c\n+++ b/net/ipv4/tcp_input.c\n@@ -103,6 +103,7 @@ int sysctl_tcp_invalid_ratelimit __read_mostly = HZ/2;\n #define FLAG_DATA_SACKED\t0x20 /* New SACK.\t\t\t\t*/\n #define FLAG_ECE\t\t0x40 /* ECE in this ACK\t\t\t\t*/\n #define FLAG_LOST_RETRANS\t0x80 /* This ACK marks some retransmission lost */\n+#define FLAG_SLOWPATH\t\t0x100 /* Do not skip RFC checks for window update.*/\n #define FLAG_ORIG_SACK_ACKED\t0x200 /* Never retransmitted data are (s)acked\t*/\n #define FLAG_SND_UNA_ADVANCED\t0x400 /* Snd_una was changed (!= FLAG_DATA_ACKED) */\n #define FLAG_DSACKING_ACK\t0x800 /* SACK blocks contained D-SACK info */\n@@ -3371,6 +3372,12 @@ static int tcp_ack_update_window(struct sock *sk, const struct sk_buff *skb, u32\n \t\tif (tp->snd_wnd != nwin) {\n \t\t\ttp->snd_wnd = nwin;\n \n+\t\t\t/* Note, it is the only place, where\n+\t\t\t * fast path is recovered for sending TCP.\n+\t\t\t */\n+\t\t\ttp->pred_flags = 0;\n+\t\t\ttcp_fast_path_check(sk);\n+\n \t\t\tif (tcp_send_head(sk))\n \t\t\t\ttcp_slow_start_after_idle_check(sk);\n \n@@ -3592,7 +3599,19 @@ static int tcp_ack(struct sock *sk, const struct sk_buff *skb, int flag)\n \tif (flag & FLAG_UPDATE_TS_RECENT)\n \t\ttcp_replace_ts_recent(tp, TCP_SKB_CB(skb)->seq);\n \n-\t{\n+\tif (!(flag & FLAG_SLOWPATH) && after(ack, prior_snd_una)) {\n+\t\t/* Window is constant, pure forward advance.\n+\t\t * No more checks are required.\n+\t\t * Note, we use the fact that SND.UNA>=SND.WL2.\n+\t\t */\n+\t\ttcp_update_wl(tp, ack_seq);\n+\t\ttcp_snd_una_update(tp, ack);\n+\t\tflag |= FLAG_WIN_UPDATE;\n+\n+\t\ttcp_in_ack_event(sk, CA_ACK_WIN_UPDATE);\n+\n+\t\tNET_INC_STATS(sock_net(sk), LINUX_MIB_TCPHPACKS);\n+\t} else {\n \t\tu32 ack_ev_flags = CA_ACK_SLOWPATH;\n \n \t\tif (ack_seq != TCP_SKB_CB(skb)->end_seq)\n@@ -4407,6 +4426,8 @@ static void tcp_data_queue_ofo(struct sock *sk, struct sk_buff *skb)\n \tif (TCP_SKB_CB(skb)->has_rxtstamp)\n \t\tTCP_SKB_CB(skb)->swtstamp = skb->tstamp;\n \n+\t/* Disable header prediction. */\n+\ttp->pred_flags = 0;\n \tinet_csk_schedule_ack(sk);\n \n \tNET_INC_STATS(sock_net(sk), LINUX_MIB_TCPOFOQUEUE);\n@@ -4647,6 +4668,8 @@ static void tcp_data_queue(struct sock *sk, struct sk_buff *skb)\n \t\tif (tp->rx_opt.num_sacks)\n \t\t\ttcp_sack_remove(tp);\n \n+\t\ttcp_fast_path_check(sk);\n+\n \t\tif (eaten > 0)\n \t\t\tkfree_skb_partial(skb, fragstolen);\n \t\tif (!sock_flag(sk, SOCK_DEAD))\n@@ -4972,6 +4995,7 @@ static int tcp_prune_queue(struct sock *sk)\n \tNET_INC_STATS(sock_net(sk), LINUX_MIB_RCVPRUNED);\n \n \t/* Massive buffer overcommit. */\n+\ttp->pred_flags = 0;\n \treturn -1;\n }\n \n@@ -5143,6 +5167,9 @@ static void tcp_check_urg(struct sock *sk, const struct tcphdr *th)\n \n \ttp->urg_data = TCP_URG_NOTYET;\n \ttp->urg_seq = ptr;\n+\n+\t/* Disable header prediction. */\n+\ttp->pred_flags = 0;\n }\n \n /* This is the 'fast' part of urgent handling. */\n@@ -5301,6 +5328,26 @@ static bool tcp_validate_incoming(struct sock *sk, struct sk_buff *skb,\n \n /*\n *\tTCP receive function for the ESTABLISHED state.\n+ *\n+ *\tIt is split into a fast path and a slow path. The fast path is\n+ * \tdisabled when:\n+ *\t- A zero window was announced from us - zero window probing\n+ * is only handled properly in the slow path.\n+ *\t- Out of order segments arrived.\n+ *\t- Urgent data is expected.\n+ *\t- There is no buffer space left\n+ *\t- Unexpected TCP flags/window values/header lengths are received\n+ *\t (detected by checking the TCP header against pred_flags)\n+ *\t- Data is sent in both directions. Fast path only supports pure senders\n+ *\t or pure receivers (this means either the sequence number or the ack\n+ *\t value must stay constant)\n+ *\t- Unexpected TCP option.\n+ *\n+ *\tWhen these conditions are not satisfied it drops into a standard\n+ *\treceive procedure patterned after RFC793 to handle all cases.\n+ *\tThe first three cases are guaranteed by proper pred_flags setting,\n+ *\tthe rest is checked inline. Fast processing is turned on in\n+ *\ttcp_data_queue when everything is OK.\n */\n void tcp_rcv_established(struct sock *sk, struct sk_buff *skb,\n \t\t\t const struct tcphdr *th)\n@@ -5311,19 +5358,144 @@ void tcp_rcv_established(struct sock *sk, struct sk_buff *skb,\n \ttcp_mstamp_refresh(tp);\n \tif (unlikely(!sk->sk_rx_dst))\n \t\tinet_csk(sk)->icsk_af_ops->sk_rx_dst_set(sk, skb);\n+\t/*\n+\t *\tHeader prediction.\n+\t *\tThe code loosely follows the one in the famous\n+\t *\t\"30 instruction TCP receive\" Van Jacobson mail.\n+\t *\n+\t *\tVan's trick is to deposit buffers into socket queue\n+\t *\ton a device interrupt, to call tcp_recv function\n+\t *\ton the receive process context and checksum and copy\n+\t *\tthe buffer to user space. smart...\n+\t *\n+\t *\tOur current scheme is not silly either but we take the\n+\t *\textra cost of the net_bh soft interrupt processing...\n+\t *\tWe do checksum and copy also but from device to kernel.\n+\t */\n \n \ttp->rx_opt.saw_tstamp = 0;\n \n+\t/*\tpred_flags is 0xS?10 << 16 + snd_wnd\n+\t *\tif header_prediction is to be made\n+\t *\t'S' will always be tp->tcp_header_len >> 2\n+\t *\t'?' will be 0 for the fast path, otherwise pred_flags is 0 to\n+\t * turn it off\t(when there are holes in the receive\n+\t *\t space for instance)\n+\t *\tPSH flag is ignored.\n+\t */\n+\n+\tif ((tcp_flag_word(th) & TCP_HP_BITS) == tp->pred_flags &&\n+\t TCP_SKB_CB(skb)->seq == tp->rcv_nxt &&\n+\t !after(TCP_SKB_CB(skb)->ack_seq, tp->snd_nxt)) {\n+\t\tint tcp_header_len = tp->tcp_header_len;\n+\n+\t\t/* Timestamp header prediction: tcp_header_len\n+\t\t * is automatically equal to th->doff*4 due to pred_flags\n+\t\t * match.\n+\t\t */\n+\n+\t\t/* Check timestamp */\n+\t\tif (tcp_header_len == sizeof(struct tcphdr) + TCPOLEN_TSTAMP_ALIGNED) {\n+\t\t\t/* No? Slow path! */\n+\t\t\tif (!tcp_parse_aligned_timestamp(tp, th))\n+\t\t\t\tgoto slow_path;\n+\n+\t\t\t/* If PAWS failed, check it more carefully in slow path */\n+\t\t\tif ((s32)(tp->rx_opt.rcv_tsval - tp->rx_opt.ts_recent) < 0)\n+\t\t\t\tgoto slow_path;\n+\n+\t\t\t/* DO NOT update ts_recent here, if checksum fails\n+\t\t\t * and timestamp was corrupted part, it will result\n+\t\t\t * in a hung connection since we will drop all\n+\t\t\t * future packets due to the PAWS test.\n+\t\t\t */\n+\t\t}\n+\n+\t\tif (len <= tcp_header_len) {\n+\t\t\t/* Bulk data transfer: sender */\n+\t\t\tif (len == tcp_header_len) {\n+\t\t\t\t/* Predicted packet is in window by definition.\n+\t\t\t\t * seq == rcv_nxt and rcv_wup <= rcv_nxt.\n+\t\t\t\t * Hence, check seq<=rcv_wup reduces to:\n+\t\t\t\t */\n+\t\t\t\tif (tcp_header_len ==\n+\t\t\t\t (sizeof(struct tcphdr) + TCPOLEN_TSTAMP_ALIGNED) &&\n+\t\t\t\t tp->rcv_nxt == tp->rcv_wup)\n+\t\t\t\t\ttcp_store_ts_recent(tp);\n+\n+\t\t\t\t/* We know that such packets are checksummed\n+\t\t\t\t * on entry.\n+\t\t\t\t */\n+\t\t\t\ttcp_ack(sk, skb, 0);\n+\t\t\t\t__kfree_skb(skb);\n+\t\t\t\ttcp_data_snd_check(sk);\n+\t\t\t\treturn;\n+\t\t\t} else { /* Header too small */\n+\t\t\t\tTCP_INC_STATS(sock_net(sk), TCP_MIB_INERRS);\n+\t\t\t\tgoto discard;\n+\t\t\t}\n+\t\t} else {\n+\t\t\tint eaten = 0;\n+\t\t\tbool fragstolen = false;\n+\n+\t\t\tif (tcp_checksum_complete(skb))\n+\t\t\t\tgoto csum_error;\n+\n+\t\t\tif ((int)skb->truesize > sk->sk_forward_alloc)\n+\t\t\t\tgoto step5;\n+\n+\t\t\t/* Predicted packet is in window by definition.\n+\t\t\t * seq == rcv_nxt and rcv_wup <= rcv_nxt.\n+\t\t\t * Hence, check seq<=rcv_wup reduces to:\n+\t\t\t */\n+\t\t\tif (tcp_header_len ==\n+\t\t\t (sizeof(struct tcphdr) + TCPOLEN_TSTAMP_ALIGNED) &&\n+\t\t\t tp->rcv_nxt == tp->rcv_wup)\n+\t\t\t\ttcp_store_ts_recent(tp);\n+\n+\t\t\ttcp_rcv_rtt_measure_ts(sk, skb);\n+\n+\t\t\tNET_INC_STATS(sock_net(sk), LINUX_MIB_TCPHPHITS);\n+\n+\t\t\t/* Bulk data transfer: receiver */\n+\t\t\teaten = tcp_queue_rcv(sk, skb, tcp_header_len,\n+\t\t\t\t\t &fragstolen);\n+\n+\t\t\ttcp_event_data_recv(sk, skb);\n+\n+\t\t\tif (TCP_SKB_CB(skb)->ack_seq != tp->snd_una) {\n+\t\t\t\t/* Well, only one small jumplet in fast path... */\n+\t\t\t\ttcp_ack(sk, skb, FLAG_DATA);\n+\t\t\t\ttcp_data_snd_check(sk);\n+\t\t\t\tif (!inet_csk_ack_scheduled(sk))\n+\t\t\t\t\tgoto no_ack;\n+\t\t\t}\n+\n+\t\t\t__tcp_ack_snd_check(sk, 0);\n+no_ack:\n+\t\t\tif (eaten)\n+\t\t\t\tkfree_skb_partial(skb, fragstolen);\n+\t\t\tsk->sk_data_ready(sk);\n+\t\t\treturn;\n+\t\t}\n+\t}\n+\n+slow_path:\n \tif (len < (th->doff << 2) || tcp_checksum_complete(skb))\n \t\tgoto csum_error;\n \n \tif (!th->ack && !th->rst && !th->syn)\n \t\tgoto discard;\n \n+\t/*\n+\t *\tStandard slow path.\n+\t */\n+\n \tif (!tcp_validate_incoming(sk, skb, th, 1))\n \t\treturn;\n \n-\tif (tcp_ack(sk, skb, FLAG_UPDATE_TS_RECENT) < 0)\n+step5:\n+\tif (tcp_ack(sk, skb, FLAG_SLOWPATH | FLAG_UPDATE_TS_RECENT) < 0)\n \t\tgoto discard;\n \n \ttcp_rcv_rtt_measure_ts(sk, skb);\n@@ -5376,6 +5548,11 @@ void tcp_finish_connect(struct sock *sk, struct sk_buff *skb)\n \n \tif (sock_flag(sk, SOCK_KEEPOPEN))\n \t\tinet_csk_reset_keepalive_timer(sk, keepalive_time_when(tp));\n+\n+\tif (!tp->rx_opt.snd_wscale)\n+\t\t__tcp_fast_path_on(tp, tp->snd_wnd);\n+\telse\n+\t\ttp->pred_flags = 0;\n }\n \n static bool tcp_rcv_fastopen_synack(struct sock *sk, struct sk_buff *synack,\n@@ -5504,7 +5681,7 @@ static int tcp_rcv_synsent_state_process(struct sock *sk, struct sk_buff *skb,\n \t\ttcp_ecn_rcv_synack(tp, th);\n \n \t\ttcp_init_wl(tp, TCP_SKB_CB(skb)->seq);\n-\t\ttcp_ack(sk, skb, 0);\n+\t\ttcp_ack(sk, skb, FLAG_SLOWPATH);\n \n \t\t/* Ok.. it's good. Set up sequence numbers and\n \t\t * move to established.\n@@ -5740,8 +5917,8 @@ int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb)\n \t\treturn 0;\n \n \t/* step 5: check the ACK field */\n-\n-\tacceptable = tcp_ack(sk, skb, FLAG_UPDATE_TS_RECENT |\n+\tacceptable = tcp_ack(sk, skb, FLAG_SLOWPATH |\n+\t\t\t\t FLAG_UPDATE_TS_RECENT |\n \t\t\t\t FLAG_NO_CHALLENGE_ACK) > 0;\n \n \tif (!acceptable) {\n@@ -5809,6 +5986,7 @@ int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb)\n \t\ttp->lsndtime = tcp_jiffies32;\n \n \t\ttcp_initialize_rcv_mss(sk);\n+\t\ttcp_fast_path_on(tp);\n \t\tbreak;\n \n \tcase TCP_FIN_WAIT1: {\ndiff --git a/net/ipv4/tcp_minisocks.c b/net/ipv4/tcp_minisocks.c\nindex 1537b87c657f..188a6f31356d 100644\n--- a/net/ipv4/tcp_minisocks.c\n+++ b/net/ipv4/tcp_minisocks.c\n@@ -436,6 +436,8 @@ struct sock *tcp_create_openreq_child(const struct sock *sk,\n \t\tstruct tcp_sock *newtp = tcp_sk(newsk);\n \n \t\t/* Now setup tcp_sock */\n+\t\tnewtp->pred_flags = 0;\n+\n \t\tnewtp->rcv_wup = newtp->copied_seq =\n \t\tnewtp->rcv_nxt = treq->rcv_isn + 1;\n \t\tnewtp->segs_in = 1;\ndiff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c\nindex 3e0d19631534..5b6690d05abb 100644\n--- a/net/ipv4/tcp_output.c\n+++ b/net/ipv4/tcp_output.c\n@@ -295,7 +295,9 @@ static u16 tcp_select_window(struct sock *sk)\n \t/* RFC1323 scaling applied */\n \tnew_win >>= tp->rx_opt.rcv_wscale;\n \n+\t/* If we advertise zero window, disable fast path. */\n \tif (new_win == 0) {\n+\t\ttp->pred_flags = 0;\n \t\tif (old_win)\n \t\t\tNET_INC_STATS(sock_net(sk),\n \t\t\t\t LINUX_MIB_TCPTOZEROWINDOWADV);\n", "prefixes": [ "net-next", "2/2" ] }