Patch Detail
get:
Show a patch.
patch:
Update a patch.
put:
Update a patch.
GET /api/patches/2228723/?format=api
{ "id": 2228723, "url": "http://patchwork.ozlabs.org/api/patches/2228723/?format=api", "web_url": "http://patchwork.ozlabs.org/project/openvswitch/patch/20260427091153.3210301-3-amorenoz@redhat.com/", "project": { "id": 47, "url": "http://patchwork.ozlabs.org/api/projects/47/?format=api", "name": "Open vSwitch", "link_name": "openvswitch", "list_id": "ovs-dev.openvswitch.org", "list_email": "ovs-dev@openvswitch.org", "web_url": "http://openvswitch.org/", "scm_url": "git@github.com:openvswitch/ovs.git", "webscm_url": "https://github.com/openvswitch/ovs", "list_archive_url": "", "list_archive_url_format": "", "commit_url_format": "" }, "msgid": "<20260427091153.3210301-3-amorenoz@redhat.com>", "list_archive_url": null, "date": "2026-04-27T09:11:48", "name": "[ovs-dev,net-next,v3,2/2] net: openvswitch: decouple flow_table from ovs_mutex", "commit_ref": null, "pull_url": null, "state": "new", "archived": false, "hash": "e910dc77484af036ffcf3349057754cdc96e9780", "submitter": { "id": 77477, "url": "http://patchwork.ozlabs.org/api/people/77477/?format=api", "name": "Adrian Moreno", "email": "amorenoz@redhat.com" }, "delegate": null, "mbox": "http://patchwork.ozlabs.org/project/openvswitch/patch/20260427091153.3210301-3-amorenoz@redhat.com/mbox/", "series": [ { "id": 501610, "url": "http://patchwork.ozlabs.org/api/series/501610/?format=api", "web_url": "http://patchwork.ozlabs.org/project/openvswitch/list/?series=501610", "date": "2026-04-27T09:11:46", "name": "Decouple flow operations from RTNL", "version": 3, "mbox": "http://patchwork.ozlabs.org/series/501610/mbox/" } ], "comments": "http://patchwork.ozlabs.org/api/patches/2228723/comments/", "check": "pending", "checks": "http://patchwork.ozlabs.org/api/patches/2228723/checks/", "tags": {}, "related": [], "headers": { "Return-Path": "<ovs-dev-bounces@openvswitch.org>", "X-Original-To": [ "incoming@patchwork.ozlabs.org", "dev@openvswitch.org" ], "Delivered-To": [ "patchwork-incoming@legolas.ozlabs.org", "ovs-dev@lists.linuxfoundation.org" ], "Authentication-Results": [ "legolas.ozlabs.org;\n\tdkim=fail reason=\"signature verification failed\" (1024-bit key;\n unprotected) header.d=redhat.com header.i=@redhat.com header.a=rsa-sha256\n header.s=mimecast20190719 header.b=UyKTTL47;\n\tdkim-atps=neutral", "legolas.ozlabs.org;\n spf=pass (sender SPF authorized) smtp.mailfrom=openvswitch.org\n (client-ip=2605:bc80:3010::137; helo=smtp4.osuosl.org;\n envelope-from=ovs-dev-bounces@openvswitch.org; receiver=patchwork.ozlabs.org)", "smtp4.osuosl.org;\n\tdkim=fail reason=\"signature verification failed\" (1024-bit key)\n header.d=redhat.com header.i=@redhat.com header.a=rsa-sha256\n header.s=mimecast20190719 header.b=UyKTTL47", "smtp4.osuosl.org; dmarc=pass (p=quarantine dis=none)\n header.from=redhat.com" ], "Received": [ "from smtp4.osuosl.org (smtp4.osuosl.org [IPv6:2605:bc80:3010::137])\n\t(using TLSv1.3 with cipher TLS_AES_256_GCM_SHA384 (256/256 bits)\n\t key-exchange x25519 server-signature ECDSA (secp384r1) server-digest SHA384)\n\t(No client certificate requested)\n\tby legolas.ozlabs.org (Postfix) with ESMTPS id 4g3yWp0b6Lz1xvV\n\tfor <incoming@patchwork.ozlabs.org>; Mon, 27 Apr 2026 19:12:38 +1000 (AEST)", "from localhost (localhost [127.0.0.1])\n\tby smtp4.osuosl.org (Postfix) with ESMTP id 6674542F61;\n\tMon, 27 Apr 2026 09:12:36 +0000 (UTC)", "from smtp4.osuosl.org ([127.0.0.1])\n by localhost (smtp4.osuosl.org [127.0.0.1]) (amavis, port 10024) with ESMTP\n id P5wZZ8rAsy3S; Mon, 27 Apr 2026 09:12:31 +0000 (UTC)", "from lists.linuxfoundation.org (lf-lists.osuosl.org\n [IPv6:2605:bc80:3010:104::8cd3:938])\n\tby smtp4.osuosl.org (Postfix) with ESMTPS id E25F342F72;\n\tMon, 27 Apr 2026 09:12:30 +0000 (UTC)", "from lf-lists.osuosl.org (localhost [127.0.0.1])\n\tby lists.linuxfoundation.org (Postfix) with ESMTP id BD169C058F;\n\tMon, 27 Apr 2026 09:12:30 +0000 (UTC)", "from smtp4.osuosl.org (smtp4.osuosl.org [IPv6:2605:bc80:3010::137])\n by lists.linuxfoundation.org (Postfix) with ESMTP id 2C45FC04FB\n for <dev@openvswitch.org>; Mon, 27 Apr 2026 09:12:30 +0000 (UTC)", "from localhost (localhost [127.0.0.1])\n by smtp4.osuosl.org (Postfix) with ESMTP id 0C77242F58\n for <dev@openvswitch.org>; Mon, 27 Apr 2026 09:12:25 +0000 (UTC)", "from smtp4.osuosl.org ([127.0.0.1])\n by localhost (smtp4.osuosl.org [127.0.0.1]) (amavis, port 10024) with ESMTP\n id nC_-KVEFlgRq for <dev@openvswitch.org>;\n Mon, 27 Apr 2026 09:12:23 +0000 (UTC)", "from us-smtp-delivery-124.mimecast.com\n (us-smtp-delivery-124.mimecast.com [170.10.129.124])\n by smtp4.osuosl.org (Postfix) with ESMTPS id D4C7C42F5A\n for <dev@openvswitch.org>; Mon, 27 Apr 2026 09:12:22 +0000 (UTC)", "from mx-prod-mc-05.mail-002.prod.us-west-2.aws.redhat.com\n (ec2-54-186-198-63.us-west-2.compute.amazonaws.com [54.186.198.63]) by\n relay.mimecast.com with ESMTP with STARTTLS (version=TLSv1.3,\n cipher=TLS_AES_256_GCM_SHA384) id us-mta-567-ehW92uRnP7uf8eUjzlpw-A-1; Mon,\n 27 Apr 2026 05:12:19 -0400", "from mx-prod-int-08.mail-002.prod.us-west-2.aws.redhat.com\n (mx-prod-int-08.mail-002.prod.us-west-2.aws.redhat.com [10.30.177.111])\n (using TLSv1.3 with cipher TLS_AES_256_GCM_SHA384 (256/256 bits)\n key-exchange X25519 server-signature RSA-PSS (2048 bits) server-digest\n SHA256)\n (No client certificate requested)\n by mx-prod-mc-05.mail-002.prod.us-west-2.aws.redhat.com (Postfix) with ESMTPS\n id 385F7195608D; Mon, 27 Apr 2026 09:12:18 +0000 (UTC)", "from antares.redhat.com (unknown [10.44.33.10])\n by mx-prod-int-08.mail-002.prod.us-west-2.aws.redhat.com (Postfix) with ESMTP\n id F02AB1800446; Mon, 27 Apr 2026 09:12:11 +0000 (UTC)" ], "X-Virus-Scanned": [ "amavis at osuosl.org", "amavis at osuosl.org" ], "X-Comment": "SPF check N/A for local connections -\n client-ip=2605:bc80:3010:104::8cd3:938; helo=lists.linuxfoundation.org;\n envelope-from=ovs-dev-bounces@openvswitch.org; receiver=<UNKNOWN> ", "DKIM-Filter": [ "OpenDKIM Filter v2.11.0 smtp4.osuosl.org E25F342F72", "OpenDKIM Filter v2.11.0 smtp4.osuosl.org D4C7C42F5A" ], "Received-SPF": "Pass (mailfrom) identity=mailfrom; client-ip=170.10.129.124;\n helo=us-smtp-delivery-124.mimecast.com; envelope-from=amorenoz@redhat.com;\n receiver=<UNKNOWN>", "DMARC-Filter": "OpenDMARC Filter v1.4.2 smtp4.osuosl.org D4C7C42F5A", "DKIM-Signature": "v=1; a=rsa-sha256; c=relaxed/relaxed; d=redhat.com;\n s=mimecast20190719; t=1777281141;\n h=from:from:reply-to:subject:subject:date:date:message-id:message-id:\n to:to:cc:cc:mime-version:mime-version:content-type:content-type:\n content-transfer-encoding:content-transfer-encoding:\n in-reply-to:in-reply-to:references:references;\n bh=fwKBGjfV1r+fQUBVUmNS5djQl//HV1ZBg8Tyat/NBj4=;\n b=UyKTTL47h/x0LNdPUq+r9xRgrUZkY9ap2jv/oCggLYoWvM+uPL5k/ZgBsYzjPTPoxCmRDO\n SrcMHQZWlUqdY0sZHVBV+3dk0CRZMZ5FDtlk/v+HD+rdQnGzxIB/6vqap+pC1QxlQSQOee\n uSlt3BIVGnWmZqy+FaqVukK9ZbPT9M8=", "X-MC-Unique": "ehW92uRnP7uf8eUjzlpw-A-1", "X-Mimecast-MFC-AGG-ID": "ehW92uRnP7uf8eUjzlpw-A_1777281138", "To": "netdev@vger.kernel.org", "Date": "Mon, 27 Apr 2026 11:11:48 +0200", "Message-ID": "<20260427091153.3210301-3-amorenoz@redhat.com>", "In-Reply-To": "<20260427091153.3210301-1-amorenoz@redhat.com>", "References": "<20260427091153.3210301-1-amorenoz@redhat.com>", "MIME-Version": "1.0", "X-Scanned-By": "MIMEDefang 3.4.1 on 10.30.177.111", "X-Mimecast-MFC-PROC-ID": "_0U044V9NtTBPtSjKKn9a5htenWOBGVjXb--94fRO5Y_1777281138", "X-Mimecast-Originator": "redhat.com", "Subject": "[ovs-dev] [PATCH net-next v3 2/2] net: openvswitch: decouple\n flow_table from ovs_mutex", "X-BeenThere": "ovs-dev@openvswitch.org", "X-Mailman-Version": "2.1.30", "Precedence": "list", "List-Id": "<ovs-dev.openvswitch.org>", "List-Unsubscribe": "<https://mail.openvswitch.org/mailman/options/ovs-dev>,\n <mailto:ovs-dev-request@openvswitch.org?subject=unsubscribe>", "List-Archive": "<http://mail.openvswitch.org/pipermail/ovs-dev/>", "List-Post": "<mailto:ovs-dev@openvswitch.org>", "List-Help": "<mailto:ovs-dev-request@openvswitch.org?subject=help>", "List-Subscribe": "<https://mail.openvswitch.org/mailman/listinfo/ovs-dev>,\n <mailto:ovs-dev-request@openvswitch.org?subject=subscribe>", "From": "Adrian Moreno via dev <ovs-dev@openvswitch.org>", "Reply-To": "Adrian Moreno <amorenoz@redhat.com>", "Cc": "\"open list:OPENVSWITCH\" <dev@openvswitch.org>,\n open list <linux-kernel@vger.kernel.org>, Ilya Maximets <i.maximets@ovn.org>,\n Eric Dumazet <edumazet@google.com>, Simon Horman <horms@kernel.org>,\n Jakub Kicinski <kuba@kernel.org>, pabeni@redhat.com,\n \"David S. Miller\" <davem@davemloft.net>", "Content-Type": "text/plain; charset=\"us-ascii\"", "Content-Transfer-Encoding": "7bit", "Errors-To": "ovs-dev-bounces@openvswitch.org", "Sender": "\"dev\" <ovs-dev-bounces@openvswitch.org>" }, "content": "In order to protect flow operations from RTNL contention, this patch\ndecouples flow_table modifications from ovs_mutex by means of the\nfollowing:\n\n1 - Create a new mutex inside the flow_table that protects it from\nconcurrent modifications.\nPutting the mutex inside flow_table makes it easier to consume for\nfunctions inside flow_table.c that do not currently take pointers to the\ndatapath.\nSome function signatures need to be changed to accept flow_table so that\nlockdep checks can be performed.\n\n2 - Create a reference count to temporarily extend rcu protection from\nthe datapath to the flow_table.\nOne reference is held by the datapath, the other is temporarily\nincreased during flow modifications.\n\nSigned-off-by: Adrian Moreno <amorenoz@redhat.com>\n---\n net/openvswitch/datapath.c | 230 ++++++++++++++++++++++-------------\n net/openvswitch/flow.c | 13 +-\n net/openvswitch/flow.h | 9 +-\n net/openvswitch/flow_table.c | 173 ++++++++++++++++----------\n net/openvswitch/flow_table.h | 53 +++++++-\n 5 files changed, 318 insertions(+), 160 deletions(-)", "diff": "diff --git a/net/openvswitch/datapath.c b/net/openvswitch/datapath.c\nindex b2243ba866a6..0495114012ba 100644\n--- a/net/openvswitch/datapath.c\n+++ b/net/openvswitch/datapath.c\n@@ -88,13 +88,17 @@ static void ovs_notify(struct genl_family *family,\n * DOC: Locking:\n *\n * All writes e.g. Writes to device state (add/remove datapath, port, set\n- * operations on vports, etc.), Writes to other state (flow table\n- * modifications, set miscellaneous datapath parameters, etc.) are protected\n- * by ovs_lock.\n+ * operations on vports, etc.) and writes to other datapath parameters\n+ * are protected by ovs_lock.\n+ *\n+ * Writes to the flow table are NOT protected by ovs_lock. Instead, a per-table\n+ * mutex and reference count are used (see comment above \"struct flow_table\"\n+ * definition). On some few occasions, the per-flow table mutex is nested\n+ * inside ovs_mutex.\n *\n * Reads are protected by RCU.\n *\n- * There are a few special cases (mostly stats) that have their own\n+ * There are a few other special cases (mostly stats) that have their own\n * synchronization but they nest under all of above and don't interact with\n * each other.\n *\n@@ -759,16 +763,19 @@ static struct genl_family dp_packet_genl_family __ro_after_init = {\n static void get_dp_stats(const struct datapath *dp, struct ovs_dp_stats *stats,\n \t\t\t struct ovs_dp_megaflow_stats *mega_stats)\n {\n-\tstruct flow_table *table = ovsl_dereference(dp->table);\n+\tstruct flow_table *table;\n \tint i;\n \n \tmemset(mega_stats, 0, sizeof(*mega_stats));\n \tmemset(stats, 0, sizeof(*stats));\n \n+\trcu_read_lock();\n+\ttable = rcu_dereference(dp->table);\n \tif (table) {\n \t\tstats->n_flows = ovs_flow_tbl_count(table);\n \t\tmega_stats->n_masks = ovs_flow_tbl_num_masks(table);\n \t}\n+\trcu_read_unlock();\n \n \tstats->n_hit = stats->n_missed = stats->n_lost = 0;\n \n@@ -840,15 +847,16 @@ static size_t ovs_flow_cmd_msg_size(const struct sw_flow_actions *acts,\n \t\t+ nla_total_size_64bit(8); /* OVS_FLOW_ATTR_USED */\n }\n \n-/* Called with ovs_mutex or RCU read lock. */\n+/* Called with table->lock or RCU read lock. */\n static int ovs_flow_cmd_fill_stats(const struct sw_flow *flow,\n+\t\t\t\t const struct flow_table *table,\n \t\t\t\t struct sk_buff *skb)\n {\n \tstruct ovs_flow_stats stats;\n \t__be16 tcp_flags;\n \tunsigned long used;\n \n-\tovs_flow_stats_get(flow, &stats, &used, &tcp_flags);\n+\tovs_flow_stats_get(flow, table, &stats, &used, &tcp_flags);\n \n \tif (used &&\n \t nla_put_u64_64bit(skb, OVS_FLOW_ATTR_USED, ovs_flow_used_time(used),\n@@ -868,8 +876,9 @@ static int ovs_flow_cmd_fill_stats(const struct sw_flow *flow,\n \treturn 0;\n }\n \n-/* Called with ovs_mutex or RCU read lock. */\n+/* Called with RCU read lock or table->lock held. */\n static int ovs_flow_cmd_fill_actions(const struct sw_flow *flow,\n+\t\t\t\t const struct flow_table *table,\n \t\t\t\t struct sk_buff *skb, int skb_orig_len)\n {\n \tstruct nlattr *start;\n@@ -889,7 +898,7 @@ static int ovs_flow_cmd_fill_actions(const struct sw_flow *flow,\n \tif (start) {\n \t\tconst struct sw_flow_actions *sf_acts;\n \n-\t\tsf_acts = rcu_dereference_ovsl(flow->sf_acts);\n+\t\tsf_acts = rcu_dereference_ovs_tbl(flow->sf_acts, table);\n \t\terr = ovs_nla_put_actions(sf_acts->actions,\n \t\t\t\t\t sf_acts->actions_len, skb);\n \n@@ -908,8 +917,10 @@ static int ovs_flow_cmd_fill_actions(const struct sw_flow *flow,\n \treturn 0;\n }\n \n-/* Called with ovs_mutex or RCU read lock. */\n-static int ovs_flow_cmd_fill_info(const struct sw_flow *flow, int dp_ifindex,\n+/* Called with table->lock or RCU read lock. */\n+static int ovs_flow_cmd_fill_info(const struct sw_flow *flow,\n+\t\t\t\t const struct flow_table *table,\n+\t\t\t\t int dp_ifindex,\n \t\t\t\t struct sk_buff *skb, u32 portid,\n \t\t\t\t u32 seq, u32 flags, u8 cmd, u32 ufid_flags)\n {\n@@ -940,12 +951,12 @@ static int ovs_flow_cmd_fill_info(const struct sw_flow *flow, int dp_ifindex,\n \t\t\tgoto error;\n \t}\n \n-\terr = ovs_flow_cmd_fill_stats(flow, skb);\n+\terr = ovs_flow_cmd_fill_stats(flow, table, skb);\n \tif (err)\n \t\tgoto error;\n \n \tif (should_fill_actions(ufid_flags)) {\n-\t\terr = ovs_flow_cmd_fill_actions(flow, skb, skb_orig_len);\n+\t\terr = ovs_flow_cmd_fill_actions(flow, table, skb, skb_orig_len);\n \t\tif (err)\n \t\t\tgoto error;\n \t}\n@@ -979,8 +990,9 @@ static struct sk_buff *ovs_flow_cmd_alloc_info(const struct sw_flow_actions *act\n \treturn skb;\n }\n \n-/* Called with ovs_mutex. */\n+/* Called with table->lock. */\n static struct sk_buff *ovs_flow_cmd_build_info(const struct sw_flow *flow,\n+\t\t\t\t\t const struct flow_table *table,\n \t\t\t\t\t int dp_ifindex,\n \t\t\t\t\t struct genl_info *info, u8 cmd,\n \t\t\t\t\t bool always, u32 ufid_flags)\n@@ -988,12 +1000,12 @@ static struct sk_buff *ovs_flow_cmd_build_info(const struct sw_flow *flow,\n \tstruct sk_buff *skb;\n \tint retval;\n \n-\tskb = ovs_flow_cmd_alloc_info(ovsl_dereference(flow->sf_acts),\n+\tskb = ovs_flow_cmd_alloc_info(ovs_tbl_dereference(flow->sf_acts, table),\n \t\t\t\t &flow->id, info, always, ufid_flags);\n \tif (IS_ERR_OR_NULL(skb))\n \t\treturn skb;\n \n-\tretval = ovs_flow_cmd_fill_info(flow, dp_ifindex, skb,\n+\tretval = ovs_flow_cmd_fill_info(flow, table, dp_ifindex, skb,\n \t\t\t\t\tinfo->snd_portid, info->snd_seq, 0,\n \t\t\t\t\tcmd, ufid_flags);\n \tif (WARN_ON_ONCE(retval < 0)) {\n@@ -1076,17 +1088,25 @@ static int ovs_flow_cmd_new(struct sk_buff *skb, struct genl_info *info)\n \t\tgoto err_kfree_acts;\n \t}\n \n-\tovs_lock();\n+\trcu_read_lock();\n \tdp = get_dp(net, ovs_header->dp_ifindex);\n \tif (unlikely(!dp)) {\n \t\terror = -ENODEV;\n-\t\tgoto err_unlock_ovs;\n+\t\trcu_read_unlock();\n+\t\tgoto err_kfree_reply;\n \t}\n-\ttable = ovsl_dereference(dp->table);\n-\tif (!table) {\n+\ttable = rcu_dereference(dp->table);\n+\tif (!table || !ovs_flow_tbl_get(table)) {\n \t\terror = -ENODEV;\n-\t\tgoto err_unlock_ovs;\n+\t\trcu_read_unlock();\n+\t\tgoto err_kfree_reply;\n \t}\n+\trcu_read_unlock();\n+\n+\t/* It is safe to dereference \"table\" after leaving rcu read-protected\n+\t * region because it's pinned by refcount.\n+\t */\n+\tmutex_lock(&table->lock);\n \n \t/* Check if this is a duplicate flow */\n \tif (ovs_identifier_is_ufid(&new_flow->id))\n@@ -1100,11 +1120,11 @@ static int ovs_flow_cmd_new(struct sk_buff *skb, struct genl_info *info)\n \t\terror = ovs_flow_tbl_insert(table, new_flow, &mask);\n \t\tif (unlikely(error)) {\n \t\t\tacts = NULL;\n-\t\t\tgoto err_unlock_ovs;\n+\t\t\tgoto err_unlock_tbl;\n \t\t}\n \n \t\tif (unlikely(reply)) {\n-\t\t\terror = ovs_flow_cmd_fill_info(new_flow,\n+\t\t\terror = ovs_flow_cmd_fill_info(new_flow, table,\n \t\t\t\t\t\t ovs_header->dp_ifindex,\n \t\t\t\t\t\t reply, info->snd_portid,\n \t\t\t\t\t\t info->snd_seq, 0,\n@@ -1112,7 +1132,8 @@ static int ovs_flow_cmd_new(struct sk_buff *skb, struct genl_info *info)\n \t\t\t\t\t\t ufid_flags);\n \t\t\tBUG_ON(error < 0);\n \t\t}\n-\t\tovs_unlock();\n+\t\tmutex_unlock(&table->lock);\n+\t\tovs_flow_tbl_put(table);\n \t} else {\n \t\tstruct sw_flow_actions *old_acts;\n \n@@ -1125,7 +1146,7 @@ static int ovs_flow_cmd_new(struct sk_buff *skb, struct genl_info *info)\n \t\tif (unlikely(info->nlhdr->nlmsg_flags & (NLM_F_CREATE\n \t\t\t\t\t\t\t | NLM_F_EXCL))) {\n \t\t\terror = -EEXIST;\n-\t\t\tgoto err_unlock_ovs;\n+\t\t\tgoto err_unlock_tbl;\n \t\t}\n \t\t/* The flow identifier has to be the same for flow updates.\n \t\t * Look for any overlapping flow.\n@@ -1138,15 +1159,15 @@ static int ovs_flow_cmd_new(struct sk_buff *skb, struct genl_info *info)\n \t\t\t\tflow = NULL;\n \t\t\tif (!flow) {\n \t\t\t\terror = -ENOENT;\n-\t\t\t\tgoto err_unlock_ovs;\n+\t\t\t\tgoto err_unlock_tbl;\n \t\t\t}\n \t\t}\n \t\t/* Update actions. */\n-\t\told_acts = ovsl_dereference(flow->sf_acts);\n+\t\told_acts = ovs_tbl_dereference(flow->sf_acts, table);\n \t\trcu_assign_pointer(flow->sf_acts, acts);\n \n \t\tif (unlikely(reply)) {\n-\t\t\terror = ovs_flow_cmd_fill_info(flow,\n+\t\t\terror = ovs_flow_cmd_fill_info(flow, table,\n \t\t\t\t\t\t ovs_header->dp_ifindex,\n \t\t\t\t\t\t reply, info->snd_portid,\n \t\t\t\t\t\t info->snd_seq, 0,\n@@ -1154,7 +1175,8 @@ static int ovs_flow_cmd_new(struct sk_buff *skb, struct genl_info *info)\n \t\t\t\t\t\t ufid_flags);\n \t\t\tBUG_ON(error < 0);\n \t\t}\n-\t\tovs_unlock();\n+\t\tmutex_unlock(&table->lock);\n+\t\tovs_flow_tbl_put(table);\n \n \t\tovs_nla_free_flow_actions_rcu(old_acts);\n \t\tovs_flow_free(new_flow, false);\n@@ -1166,8 +1188,10 @@ static int ovs_flow_cmd_new(struct sk_buff *skb, struct genl_info *info)\n \tkfree(key);\n \treturn 0;\n \n-err_unlock_ovs:\n-\tovs_unlock();\n+err_unlock_tbl:\n+\tmutex_unlock(&table->lock);\n+\tovs_flow_tbl_put(table);\n+err_kfree_reply:\n \tkfree_skb(reply);\n err_kfree_acts:\n \tovs_nla_free_flow_actions(acts);\n@@ -1296,17 +1320,26 @@ static int ovs_flow_cmd_set(struct sk_buff *skb, struct genl_info *info)\n \t\t}\n \t}\n \n-\tovs_lock();\n+\trcu_read_lock();\n \tdp = get_dp(net, ovs_header->dp_ifindex);\n \tif (unlikely(!dp)) {\n \t\terror = -ENODEV;\n-\t\tgoto err_unlock_ovs;\n+\t\trcu_read_unlock();\n+\t\tgoto err_free_reply;\n \t}\n-\ttable = ovsl_dereference(dp->table);\n-\tif (!table) {\n+\ttable = rcu_dereference(dp->table);\n+\tif (!table || !ovs_flow_tbl_get(table)) {\n+\t\trcu_read_unlock();\n \t\terror = -ENODEV;\n-\t\tgoto err_unlock_ovs;\n+\t\tgoto err_free_reply;\n \t}\n+\trcu_read_unlock();\n+\n+\t/* It is safe to dereference \"table\" after leaving rcu read-protected\n+\t * region because it's pinned by refcount.\n+\t */\n+\tmutex_lock(&table->lock);\n+\n \t/* Check that the flow exists. */\n \tif (ufid_present)\n \t\tflow = ovs_flow_tbl_lookup_ufid(table, &sfid);\n@@ -1314,16 +1347,16 @@ static int ovs_flow_cmd_set(struct sk_buff *skb, struct genl_info *info)\n \t\tflow = ovs_flow_tbl_lookup_exact(table, &match);\n \tif (unlikely(!flow)) {\n \t\terror = -ENOENT;\n-\t\tgoto err_unlock_ovs;\n+\t\tgoto err_unlock_tbl;\n \t}\n \n \t/* Update actions, if present. */\n \tif (likely(acts)) {\n-\t\told_acts = ovsl_dereference(flow->sf_acts);\n+\t\told_acts = ovs_tbl_dereference(flow->sf_acts, table);\n \t\trcu_assign_pointer(flow->sf_acts, acts);\n \n \t\tif (unlikely(reply)) {\n-\t\t\terror = ovs_flow_cmd_fill_info(flow,\n+\t\t\terror = ovs_flow_cmd_fill_info(flow, table,\n \t\t\t\t\t\t ovs_header->dp_ifindex,\n \t\t\t\t\t\t reply, info->snd_portid,\n \t\t\t\t\t\t info->snd_seq, 0,\n@@ -1333,20 +1366,22 @@ static int ovs_flow_cmd_set(struct sk_buff *skb, struct genl_info *info)\n \t\t}\n \t} else {\n \t\t/* Could not alloc without acts before locking. */\n-\t\treply = ovs_flow_cmd_build_info(flow, ovs_header->dp_ifindex,\n+\t\treply = ovs_flow_cmd_build_info(flow, table,\n+\t\t\t\t\t\tovs_header->dp_ifindex,\n \t\t\t\t\t\tinfo, OVS_FLOW_CMD_SET, false,\n \t\t\t\t\t\tufid_flags);\n \n \t\tif (IS_ERR(reply)) {\n \t\t\terror = PTR_ERR(reply);\n-\t\t\tgoto err_unlock_ovs;\n+\t\t\tgoto err_unlock_tbl;\n \t\t}\n \t}\n \n \t/* Clear stats. */\n \tif (a[OVS_FLOW_ATTR_CLEAR])\n-\t\tovs_flow_stats_clear(flow);\n-\tovs_unlock();\n+\t\tovs_flow_stats_clear(flow, table);\n+\tmutex_unlock(&table->lock);\n+\tovs_flow_tbl_put(table);\n \n \tif (reply)\n \t\tovs_notify(&dp_flow_genl_family, reply, info);\n@@ -1355,8 +1390,10 @@ static int ovs_flow_cmd_set(struct sk_buff *skb, struct genl_info *info)\n \n \treturn 0;\n \n-err_unlock_ovs:\n-\tovs_unlock();\n+err_unlock_tbl:\n+\tmutex_unlock(&table->lock);\n+\tovs_flow_tbl_put(table);\n+err_free_reply:\n \tkfree_skb(reply);\n err_kfree_acts:\n \tovs_nla_free_flow_actions(acts);\n@@ -1394,17 +1431,24 @@ static int ovs_flow_cmd_get(struct sk_buff *skb, struct genl_info *info)\n \tif (err)\n \t\treturn err;\n \n-\tovs_lock();\n+\trcu_read_lock();\n \tdp = get_dp(sock_net(skb->sk), ovs_header->dp_ifindex);\n \tif (!dp) {\n-\t\terr = -ENODEV;\n-\t\tgoto unlock;\n+\t\trcu_read_unlock();\n+\t\treturn -ENODEV;\n \t}\n-\ttable = ovsl_dereference(dp->table);\n-\tif (!table) {\n-\t\terr = -ENODEV;\n-\t\tgoto unlock;\n+\ttable = rcu_dereference(dp->table);\n+\tif (!table || !ovs_flow_tbl_get(table)) {\n+\t\trcu_read_unlock();\n+\t\treturn -ENODEV;\n \t}\n+\trcu_read_unlock();\n+\n+\t/* It is safe to dereference \"table\" after leaving rcu read-protected\n+\t * region because it's pinned by refcount.\n+\t */\n+\tmutex_lock(&table->lock);\n+\n \n \tif (ufid_present)\n \t\tflow = ovs_flow_tbl_lookup_ufid(table, &ufid);\n@@ -1415,17 +1459,20 @@ static int ovs_flow_cmd_get(struct sk_buff *skb, struct genl_info *info)\n \t\tgoto unlock;\n \t}\n \n-\treply = ovs_flow_cmd_build_info(flow, ovs_header->dp_ifindex, info,\n-\t\t\t\t\tOVS_FLOW_CMD_GET, true, ufid_flags);\n+\treply = ovs_flow_cmd_build_info(flow, table, ovs_header->dp_ifindex,\n+\t\t\t\t\tinfo, OVS_FLOW_CMD_GET, true,\n+\t\t\t\t\tufid_flags);\n \tif (IS_ERR(reply)) {\n \t\terr = PTR_ERR(reply);\n \t\tgoto unlock;\n \t}\n \n-\tovs_unlock();\n+\tmutex_unlock(&table->lock);\n+\tovs_flow_tbl_put(table);\n \treturn genlmsg_reply(reply, info);\n unlock:\n-\tovs_unlock();\n+\tmutex_unlock(&table->lock);\n+\tovs_flow_tbl_put(table);\n \treturn err;\n }\n \n@@ -1455,17 +1502,24 @@ static int ovs_flow_cmd_del(struct sk_buff *skb, struct genl_info *info)\n \t\t\treturn err;\n \t}\n \n-\tovs_lock();\n+\trcu_read_lock();\n \tdp = get_dp(sock_net(skb->sk), ovs_header->dp_ifindex);\n \tif (unlikely(!dp)) {\n-\t\terr = -ENODEV;\n-\t\tgoto unlock;\n+\t\trcu_read_unlock();\n+\t\treturn -ENODEV;\n \t}\n-\ttable = ovsl_dereference(dp->table);\n-\tif (!table) {\n-\t\terr = -ENODEV;\n-\t\tgoto unlock;\n+\ttable = rcu_dereference(dp->table);\n+\tif (!table || !ovs_flow_tbl_get(table)) {\n+\t\trcu_read_unlock();\n+\t\treturn -ENODEV;\n \t}\n+\trcu_read_unlock();\n+\n+\t/* It is safe to dereference \"table\" after leaving rcu read-protected\n+\t * region because it's pinned by refcount.\n+\t */\n+\tmutex_lock(&table->lock);\n+\n \n \tif (unlikely(!a[OVS_FLOW_ATTR_KEY] && !ufid_present)) {\n \t\terr = ovs_flow_tbl_flush(table);\n@@ -1482,14 +1536,15 @@ static int ovs_flow_cmd_del(struct sk_buff *skb, struct genl_info *info)\n \t}\n \n \tovs_flow_tbl_remove(table, flow);\n-\tovs_unlock();\n+\tmutex_unlock(&table->lock);\n \n \treply = ovs_flow_cmd_alloc_info((const struct sw_flow_actions __force *) flow->sf_acts,\n \t\t\t\t\t&flow->id, info, false, ufid_flags);\n \tif (likely(reply)) {\n \t\tif (!IS_ERR(reply)) {\n \t\t\trcu_read_lock();\t/*To keep RCU checker happy. */\n-\t\t\terr = ovs_flow_cmd_fill_info(flow, ovs_header->dp_ifindex,\n+\t\t\terr = ovs_flow_cmd_fill_info(flow, table,\n+\t\t\t\t\t\t ovs_header->dp_ifindex,\n \t\t\t\t\t\t reply, info->snd_portid,\n \t\t\t\t\t\t info->snd_seq, 0,\n \t\t\t\t\t\t OVS_FLOW_CMD_DEL,\n@@ -1508,10 +1563,12 @@ static int ovs_flow_cmd_del(struct sk_buff *skb, struct genl_info *info)\n \t}\n \n out_free:\n+\tovs_flow_tbl_put(table);\n \tovs_flow_free(flow, true);\n \treturn 0;\n unlock:\n-\tovs_unlock();\n+\tmutex_unlock(&table->lock);\n+\tovs_flow_tbl_put(table);\n \treturn err;\n }\n \n@@ -1537,7 +1594,7 @@ static int ovs_flow_cmd_dump(struct sk_buff *skb, struct netlink_callback *cb)\n \t\trcu_read_unlock();\n \t\treturn -ENODEV;\n \t}\n-\ttable = rcu_dereference_ovsl(dp->table);\n+\ttable = rcu_dereference(dp->table);\n \tif (!table) {\n \t\trcu_read_unlock();\n \t\treturn -ENODEV;\n@@ -1554,8 +1611,8 @@ static int ovs_flow_cmd_dump(struct sk_buff *skb, struct netlink_callback *cb)\n \t\tif (!flow)\n \t\t\tbreak;\n \n-\t\tif (ovs_flow_cmd_fill_info(flow, ovs_header->dp_ifindex, skb,\n-\t\t\t\t\t NETLINK_CB(cb->skb).portid,\n+\t\tif (ovs_flow_cmd_fill_info(flow, table, ovs_header->dp_ifindex,\n+\t\t\t\t\t skb, NETLINK_CB(cb->skb).portid,\n \t\t\t\t\t cb->nlh->nlmsg_seq, NLM_F_MULTI,\n \t\t\t\t\t OVS_FLOW_CMD_GET, ufid_flags) < 0)\n \t\t\tbreak;\n@@ -1642,10 +1699,6 @@ static int ovs_dp_cmd_fill_info(struct datapath *dp, struct sk_buff *skb,\n \tstruct flow_table *table;\n \tint err, pids_len;\n \n-\ttable = ovsl_dereference(dp->table);\n-\tif (!table)\n-\t\treturn -ENODEV;\n-\n \tovs_header = genlmsg_put(skb, portid, seq, &dp_datapath_genl_family,\n \t\t\t\t flags, cmd);\n \tif (!ovs_header)\n@@ -1670,8 +1723,12 @@ static int ovs_dp_cmd_fill_info(struct datapath *dp, struct sk_buff *skb,\n \tif (nla_put_u32(skb, OVS_DP_ATTR_USER_FEATURES, dp->user_features))\n \t\tgoto nla_put_failure;\n \n-\tif (nla_put_u32(skb, OVS_DP_ATTR_MASKS_CACHE_SIZE,\n-\t\t\tovs_flow_tbl_masks_cache_size(table)))\n+\trcu_read_lock();\n+\ttable = rcu_dereference(dp->table);\n+\terr = table ? nla_put_u32(skb, OVS_DP_ATTR_MASKS_CACHE_SIZE,\n+\t\t\t\t ovs_flow_tbl_masks_cache_size(table)) : 0;\n+\trcu_read_unlock();\n+\tif (err)\n \t\tgoto nla_put_failure;\n \n \tif (dp->user_features & OVS_DP_F_DISPATCH_UPCALL_PER_CPU && pids) {\n@@ -1809,7 +1866,9 @@ static int ovs_dp_change(struct datapath *dp, struct nlattr *a[])\n \t\t\treturn -ENODEV;\n \n \t\tcache_size = nla_get_u32(a[OVS_DP_ATTR_MASKS_CACHE_SIZE]);\n+\t\tmutex_lock(&table->lock);\n \t\terr = ovs_flow_tbl_masks_cache_resize(table, cache_size);\n+\t\tmutex_unlock(&table->lock);\n \t\tif (err)\n \t\t\treturn err;\n \t}\n@@ -1960,7 +2019,7 @@ static int ovs_dp_cmd_new(struct sk_buff *skb, struct genl_info *info)\n err_destroy_stats:\n \tfree_percpu(dp->stats_percpu);\n err_destroy_table:\n-\tcall_rcu(&table->rcu, ovs_flow_tbl_destroy_rcu);\n+\tovs_flow_tbl_put(table);\n err_destroy_dp:\n \tkfree(dp);\n err_destroy_reply:\n@@ -1972,7 +2031,8 @@ static int ovs_dp_cmd_new(struct sk_buff *skb, struct genl_info *info)\n /* Called with ovs_mutex. */\n static void __dp_destroy(struct datapath *dp)\n {\n-\tstruct flow_table *table = ovsl_dereference(dp->table);\n+\tstruct flow_table *table = rcu_dereference_protected(dp->table,\n+\t\t\t\t\tlockdep_ovsl_is_held());\n \tint i;\n \n \tif (dp->user_features & OVS_DP_F_TC_RECIRC_SHARING)\n@@ -1994,16 +2054,11 @@ static void __dp_destroy(struct datapath *dp)\n \t */\n \tovs_dp_detach_port(ovs_vport_ovsl(dp, OVSP_LOCAL));\n \n-\t/* Flush sw_flow in the tables. RCU cb only releases resource\n-\t * such as dp, ports and tables. That may avoid some issues\n-\t * such as RCU usage warning.\n-\t */\n-\ttable_instance_flow_flush(table, ovsl_dereference(table->ti),\n-\t\t\t\t ovsl_dereference(table->ufid_ti));\n+\trcu_assign_pointer(dp->table, NULL);\n+\tovs_flow_tbl_put(table);\n \n-\t/* RCU destroy the ports, meters and flow tables. */\n+\t/* RCU destroy the ports and meters. */\n \tcall_rcu(&dp->rcu, destroy_dp_rcu);\n-\tcall_rcu(&table->rcu, ovs_flow_tbl_destroy_rcu);\n }\n \n static int ovs_dp_cmd_del(struct sk_buff *skb, struct genl_info *info)\n@@ -2616,9 +2671,12 @@ static void ovs_dp_masks_rebalance(struct work_struct *work)\n \tovs_lock();\n \tlist_for_each_entry(dp, &ovs_net->dps, list_node) {\n \t\ttable = ovsl_dereference(dp->table);\n-\t\tif (!table)\n+\t\tif (!table || !ovs_flow_tbl_get(table))\n \t\t\tcontinue;\n+\t\tmutex_lock(&table->lock);\n \t\tovs_flow_masks_rebalance(table);\n+\t\tmutex_unlock(&table->lock);\n+\t\tovs_flow_tbl_put(table);\n \t}\n \tovs_unlock();\n \ndiff --git a/net/openvswitch/flow.c b/net/openvswitch/flow.c\nindex 66366982f604..0a748cf20f53 100644\n--- a/net/openvswitch/flow.c\n+++ b/net/openvswitch/flow.c\n@@ -124,8 +124,9 @@ void ovs_flow_stats_update(struct sw_flow *flow, __be16 tcp_flags,\n \tspin_unlock(&stats->lock);\n }\n \n-/* Must be called with rcu_read_lock or ovs_mutex. */\n+/* Must be called with rcu_read_lock or table->lock held. */\n void ovs_flow_stats_get(const struct sw_flow *flow,\n+\t\t\tconst struct flow_table *table,\n \t\t\tstruct ovs_flow_stats *ovs_stats,\n \t\t\tunsigned long *used, __be16 *tcp_flags)\n {\n@@ -136,7 +137,8 @@ void ovs_flow_stats_get(const struct sw_flow *flow,\n \tmemset(ovs_stats, 0, sizeof(*ovs_stats));\n \n \tfor_each_cpu(cpu, flow->cpu_used_mask) {\n-\t\tstruct sw_flow_stats *stats = rcu_dereference_ovsl(flow->stats[cpu]);\n+\t\tstruct sw_flow_stats *stats =\n+\t\t\trcu_dereference_ovs_tbl(flow->stats[cpu], table);\n \n \t\tif (stats) {\n \t\t\t/* Local CPU may write on non-local stats, so we must\n@@ -153,13 +155,14 @@ void ovs_flow_stats_get(const struct sw_flow *flow,\n \t}\n }\n \n-/* Called with ovs_mutex. */\n-void ovs_flow_stats_clear(struct sw_flow *flow)\n+/* Called with table->lock held. */\n+void ovs_flow_stats_clear(struct sw_flow *flow, struct flow_table *table)\n {\n \tunsigned int cpu;\n \n \tfor_each_cpu(cpu, flow->cpu_used_mask) {\n-\t\tstruct sw_flow_stats *stats = ovsl_dereference(flow->stats[cpu]);\n+\t\tstruct sw_flow_stats *stats =\n+\t\t\tovs_tbl_dereference(flow->stats[cpu], table);\n \n \t\tif (stats) {\n \t\t\tspin_lock_bh(&stats->lock);\ndiff --git a/net/openvswitch/flow.h b/net/openvswitch/flow.h\nindex b5711aff6e76..e05ed6796e4e 100644\n--- a/net/openvswitch/flow.h\n+++ b/net/openvswitch/flow.h\n@@ -23,6 +23,7 @@\n #include <net/dst_metadata.h>\n #include <net/nsh.h>\n \n+struct flow_table;\n struct sk_buff;\n \n enum sw_flow_mac_proto {\n@@ -280,9 +281,11 @@ static inline bool ovs_identifier_is_key(const struct sw_flow_id *sfid)\n \n void ovs_flow_stats_update(struct sw_flow *, __be16 tcp_flags,\n \t\t\t const struct sk_buff *);\n-void ovs_flow_stats_get(const struct sw_flow *, struct ovs_flow_stats *,\n-\t\t\tunsigned long *used, __be16 *tcp_flags);\n-void ovs_flow_stats_clear(struct sw_flow *);\n+void ovs_flow_stats_get(const struct sw_flow *flow,\n+\t\t\tconst struct flow_table *table,\n+\t\t\tstruct ovs_flow_stats *stats, unsigned long *used,\n+\t\t\t__be16 *tcp_flags);\n+void ovs_flow_stats_clear(struct sw_flow *flow, struct flow_table *table);\n u64 ovs_flow_used_time(unsigned long flow_jiffies);\n \n int ovs_flow_key_update(struct sk_buff *skb, struct sw_flow_key *key);\ndiff --git a/net/openvswitch/flow_table.c b/net/openvswitch/flow_table.c\nindex 3b7518e3394d..3934873a44c3 100644\n--- a/net/openvswitch/flow_table.c\n+++ b/net/openvswitch/flow_table.c\n@@ -45,6 +45,16 @@\n static struct kmem_cache *flow_cache;\n struct kmem_cache *flow_stats_cache __read_mostly;\n \n+#ifdef CONFIG_LOCKDEP\n+int lockdep_ovs_tbl_is_held(const struct flow_table *table)\n+{\n+\tif (debug_locks)\n+\t\treturn lockdep_is_held(&table->lock);\n+\telse\n+\t\treturn 1;\n+}\n+#endif\n+\n static u16 range_n_bytes(const struct sw_flow_key_range *range)\n {\n \treturn range->end - range->start;\n@@ -102,7 +112,7 @@ struct sw_flow *ovs_flow_alloc(void)\n \n int ovs_flow_tbl_count(const struct flow_table *table)\n {\n-\treturn table->count;\n+\treturn READ_ONCE(table->count);\n }\n \n static void flow_free(struct sw_flow *flow)\n@@ -249,12 +259,12 @@ static int tbl_mask_array_realloc(struct flow_table *tbl, int size)\n \tif (!new)\n \t\treturn -ENOMEM;\n \n-\told = ovsl_dereference(tbl->mask_array);\n+\told = ovs_tbl_dereference(tbl->mask_array, tbl);\n \tif (old) {\n \t\tint i;\n \n \t\tfor (i = 0; i < old->max; i++) {\n-\t\t\tif (ovsl_dereference(old->masks[i]))\n+\t\t\tif (ovs_tbl_dereference(old->masks[i], tbl))\n \t\t\t\tnew->masks[new->count++] = old->masks[i];\n \t\t}\n \t\tcall_rcu(&old->rcu, mask_array_rcu_cb);\n@@ -268,7 +278,7 @@ static int tbl_mask_array_realloc(struct flow_table *tbl, int size)\n static int tbl_mask_array_add_mask(struct flow_table *tbl,\n \t\t\t\t struct sw_flow_mask *new)\n {\n-\tstruct mask_array *ma = ovsl_dereference(tbl->mask_array);\n+\tstruct mask_array *ma = ovs_tbl_dereference(tbl->mask_array, tbl);\n \tint err, ma_count = READ_ONCE(ma->count);\n \n \tif (ma_count >= ma->max) {\n@@ -277,7 +287,7 @@ static int tbl_mask_array_add_mask(struct flow_table *tbl,\n \t\tif (err)\n \t\t\treturn err;\n \n-\t\tma = ovsl_dereference(tbl->mask_array);\n+\t\tma = ovs_tbl_dereference(tbl->mask_array, tbl);\n \t} else {\n \t\t/* On every add or delete we need to reset the counters so\n \t\t * every new mask gets a fair chance of being prioritized.\n@@ -285,7 +295,7 @@ static int tbl_mask_array_add_mask(struct flow_table *tbl,\n \t\ttbl_mask_array_reset_counters(ma);\n \t}\n \n-\tBUG_ON(ovsl_dereference(ma->masks[ma_count]));\n+\tWARN_ON_ONCE(ovs_tbl_dereference(ma->masks[ma_count], tbl));\n \n \trcu_assign_pointer(ma->masks[ma_count], new);\n \tWRITE_ONCE(ma->count, ma_count + 1);\n@@ -296,12 +306,12 @@ static int tbl_mask_array_add_mask(struct flow_table *tbl,\n static void tbl_mask_array_del_mask(struct flow_table *tbl,\n \t\t\t\t struct sw_flow_mask *mask)\n {\n-\tstruct mask_array *ma = ovsl_dereference(tbl->mask_array);\n+\tstruct mask_array *ma = ovs_tbl_dereference(tbl->mask_array, tbl);\n \tint i, ma_count = READ_ONCE(ma->count);\n \n \t/* Remove the deleted mask pointers from the array */\n \tfor (i = 0; i < ma_count; i++) {\n-\t\tif (mask == ovsl_dereference(ma->masks[i]))\n+\t\tif (mask == ovs_tbl_dereference(ma->masks[i], tbl))\n \t\t\tgoto found;\n \t}\n \n@@ -329,10 +339,10 @@ static void tbl_mask_array_del_mask(struct flow_table *tbl,\n static void flow_mask_remove(struct flow_table *tbl, struct sw_flow_mask *mask)\n {\n \tif (mask) {\n-\t\t/* ovs-lock is required to protect mask-refcount and\n+\t\t/* table lock is required to protect mask-refcount and\n \t\t * mask list.\n \t\t */\n-\t\tASSERT_OVSL();\n+\t\tASSERT_OVS_TBL(tbl);\n \t\tBUG_ON(!mask->ref_count);\n \t\tmask->ref_count--;\n \n@@ -386,7 +396,8 @@ static struct mask_cache *tbl_mask_cache_alloc(u32 size)\n }\n int ovs_flow_tbl_masks_cache_resize(struct flow_table *table, u32 size)\n {\n-\tstruct mask_cache *mc = rcu_dereference_ovsl(table->mask_cache);\n+\tstruct mask_cache *mc = rcu_dereference_ovs_tbl(table->mask_cache,\n+\t\t\t\t\t\t\ttable);\n \tstruct mask_cache *new;\n \n \tif (size == mc->cache_size)\n@@ -416,6 +427,10 @@ struct flow_table *ovs_flow_tbl_alloc(void)\n \ttable = kzalloc_obj(*table, GFP_KERNEL);\n \tif (!table)\n \t\treturn ERR_PTR(-ENOMEM);\n+\n+\tmutex_init(&table->lock);\n+\trefcount_set(&table->refcnt, 1);\n+\n \tmc = tbl_mask_cache_alloc(MC_DEFAULT_HASH_ENTRIES);\n \tif (!mc)\n \t\tgoto free_table;\n@@ -448,6 +463,7 @@ struct flow_table *ovs_flow_tbl_alloc(void)\n free_mask_cache:\n \t__mask_cache_destroy(mc);\n free_table:\n+\tmutex_destroy(&table->lock);\n \tkfree(table);\n \treturn ERR_PTR(-ENOMEM);\n }\n@@ -466,7 +482,7 @@ static void table_instance_flow_free(struct flow_table *table,\n \t\t\t\t struct sw_flow *flow)\n {\n \thlist_del_rcu(&flow->flow_table.node[ti->node_ver]);\n-\ttable->count--;\n+\tWRITE_ONCE(table->count, table->count - 1);\n \n \tif (ovs_identifier_is_ufid(&flow->id)) {\n \t\thlist_del_rcu(&flow->ufid_table.node[ufid_ti->node_ver]);\n@@ -476,10 +492,10 @@ static void table_instance_flow_free(struct flow_table *table,\n \tflow_mask_remove(table, flow->mask);\n }\n \n-/* Must be called with OVS mutex held. */\n-void table_instance_flow_flush(struct flow_table *table,\n-\t\t\t struct table_instance *ti,\n-\t\t\t struct table_instance *ufid_ti)\n+/* Must be called with table mutex held. */\n+static void table_instance_flow_flush(struct flow_table *table,\n+\t\t\t\t struct table_instance *ti,\n+\t\t\t\t struct table_instance *ufid_ti)\n {\n \tint i;\n \n@@ -499,7 +515,7 @@ void table_instance_flow_flush(struct flow_table *table,\n \n \tif (WARN_ON(table->count != 0 ||\n \t\t table->ufid_count != 0)) {\n-\t\ttable->count = 0;\n+\t\tWRITE_ONCE(table->count, 0);\n \t\ttable->ufid_count = 0;\n \t}\n }\n@@ -512,7 +528,7 @@ static void table_instance_destroy(struct table_instance *ti,\n }\n \n /* No need for locking this function is called from RCU callback. */\n-void ovs_flow_tbl_destroy_rcu(struct rcu_head *rcu)\n+static void ovs_flow_tbl_destroy_rcu(struct rcu_head *rcu)\n {\n \tstruct flow_table *table = container_of(rcu, struct flow_table, rcu);\n \n@@ -524,9 +540,22 @@ void ovs_flow_tbl_destroy_rcu(struct rcu_head *rcu)\n \tcall_rcu(&mc->rcu, mask_cache_rcu_cb);\n \tcall_rcu(&ma->rcu, mask_array_rcu_cb);\n \ttable_instance_destroy(ti, ufid_ti);\n+\tmutex_destroy(&table->lock);\n \tkfree(table);\n }\n \n+void ovs_flow_tbl_put(struct flow_table *table)\n+{\n+\tif (refcount_dec_and_test(&table->refcnt)) {\n+\t\tmutex_lock(&table->lock);\n+\t\ttable_instance_flow_flush(table,\n+\t\t\t\t\t ovs_tbl_dereference(table->ti, table),\n+\t\t\t\t\t ovs_tbl_dereference(table->ufid_ti, table));\n+\t\tmutex_unlock(&table->lock);\n+\t\tcall_rcu(&table->rcu, ovs_flow_tbl_destroy_rcu);\n+\t}\n+}\n+\n struct sw_flow *ovs_flow_tbl_dump_next(struct table_instance *ti,\n \t\t\t\t u32 *bucket, u32 *last)\n {\n@@ -578,7 +607,8 @@ static void ufid_table_instance_insert(struct table_instance *ti,\n \thlist_add_head_rcu(&flow->ufid_table.node[ti->node_ver], head);\n }\n \n-static void flow_table_copy_flows(struct table_instance *old,\n+static void flow_table_copy_flows(struct flow_table *table,\n+\t\t\t\t struct table_instance *old,\n \t\t\t\t struct table_instance *new, bool ufid)\n {\n \tint old_ver;\n@@ -595,17 +625,18 @@ static void flow_table_copy_flows(struct table_instance *old,\n \t\tif (ufid)\n \t\t\thlist_for_each_entry_rcu(flow, head,\n \t\t\t\t\t\t ufid_table.node[old_ver],\n-\t\t\t\t\t\t lockdep_ovsl_is_held())\n+\t\t\t\t\t\t lockdep_ovs_tbl_is_held(table))\n \t\t\t\tufid_table_instance_insert(new, flow);\n \t\telse\n \t\t\thlist_for_each_entry_rcu(flow, head,\n \t\t\t\t\t\t flow_table.node[old_ver],\n-\t\t\t\t\t\t lockdep_ovsl_is_held())\n+\t\t\t\t\t\t lockdep_ovs_tbl_is_held(table))\n \t\t\t\ttable_instance_insert(new, flow);\n \t}\n }\n \n-static struct table_instance *table_instance_rehash(struct table_instance *ti,\n+static struct table_instance *table_instance_rehash(struct flow_table *table,\n+\t\t\t\t\t\t struct table_instance *ti,\n \t\t\t\t\t\t int n_buckets, bool ufid)\n {\n \tstruct table_instance *new_ti;\n@@ -614,16 +645,19 @@ static struct table_instance *table_instance_rehash(struct table_instance *ti,\n \tif (!new_ti)\n \t\treturn NULL;\n \n-\tflow_table_copy_flows(ti, new_ti, ufid);\n+\tflow_table_copy_flows(table, ti, new_ti, ufid);\n \n \treturn new_ti;\n }\n \n+/* Must be called with flow_table->lock held. */\n int ovs_flow_tbl_flush(struct flow_table *flow_table)\n {\n \tstruct table_instance *old_ti, *new_ti;\n \tstruct table_instance *old_ufid_ti, *new_ufid_ti;\n \n+\tASSERT_OVS_TBL(flow_table);\n+\n \tnew_ti = table_instance_alloc(TBL_MIN_BUCKETS);\n \tif (!new_ti)\n \t\treturn -ENOMEM;\n@@ -631,8 +665,8 @@ int ovs_flow_tbl_flush(struct flow_table *flow_table)\n \tif (!new_ufid_ti)\n \t\tgoto err_free_ti;\n \n-\told_ti = ovsl_dereference(flow_table->ti);\n-\told_ufid_ti = ovsl_dereference(flow_table->ufid_ti);\n+\told_ti = ovs_tbl_dereference(flow_table->ti, flow_table);\n+\told_ufid_ti = ovs_tbl_dereference(flow_table->ufid_ti, flow_table);\n \n \trcu_assign_pointer(flow_table->ti, new_ti);\n \trcu_assign_pointer(flow_table->ufid_ti, new_ufid_ti);\n@@ -700,7 +734,8 @@ static bool ovs_flow_cmp_unmasked_key(const struct sw_flow *flow,\n \treturn cmp_key(flow->id.unmasked_key, key, key_start, key_end);\n }\n \n-static struct sw_flow *masked_flow_lookup(struct table_instance *ti,\n+static struct sw_flow *masked_flow_lookup(struct flow_table *tbl,\n+\t\t\t\t\t struct table_instance *ti,\n \t\t\t\t\t const struct sw_flow_key *unmasked,\n \t\t\t\t\t const struct sw_flow_mask *mask,\n \t\t\t\t\t u32 *n_mask_hit)\n@@ -716,7 +751,7 @@ static struct sw_flow *masked_flow_lookup(struct table_instance *ti,\n \t(*n_mask_hit)++;\n \n \thlist_for_each_entry_rcu(flow, head, flow_table.node[ti->node_ver],\n-\t\t\t\t lockdep_ovsl_is_held()) {\n+\t\t\t\t lockdep_ovs_tbl_is_held(tbl)) {\n \t\tif (flow->mask == mask && flow->flow_table.hash == hash &&\n \t\t flow_cmp_masked_key(flow, &masked_key, &mask->range))\n \t\t\treturn flow;\n@@ -743,9 +778,9 @@ static struct sw_flow *flow_lookup(struct flow_table *tbl,\n \tint i;\n \n \tif (likely(*index < ma->max)) {\n-\t\tmask = rcu_dereference_ovsl(ma->masks[*index]);\n+\t\tmask = rcu_dereference_ovs_tbl(ma->masks[*index], tbl);\n \t\tif (mask) {\n-\t\t\tflow = masked_flow_lookup(ti, key, mask, n_mask_hit);\n+\t\t\tflow = masked_flow_lookup(tbl, ti, key, mask, n_mask_hit);\n \t\t\tif (flow) {\n \t\t\t\tu64_stats_update_begin(&stats->syncp);\n \t\t\t\tstats->usage_cntrs[*index]++;\n@@ -761,11 +796,11 @@ static struct sw_flow *flow_lookup(struct flow_table *tbl,\n \t\tif (i == *index)\n \t\t\tcontinue;\n \n-\t\tmask = rcu_dereference_ovsl(ma->masks[i]);\n+\t\tmask = rcu_dereference_ovs_tbl(ma->masks[i], tbl);\n \t\tif (unlikely(!mask))\n \t\t\tbreak;\n \n-\t\tflow = masked_flow_lookup(ti, key, mask, n_mask_hit);\n+\t\tflow = masked_flow_lookup(tbl, ti, key, mask, n_mask_hit);\n \t\tif (flow) { /* Found */\n \t\t\t*index = i;\n \t\t\tu64_stats_update_begin(&stats->syncp);\n@@ -852,8 +887,8 @@ struct sw_flow *ovs_flow_tbl_lookup_stats(struct flow_table *tbl,\n struct sw_flow *ovs_flow_tbl_lookup(struct flow_table *tbl,\n \t\t\t\t const struct sw_flow_key *key)\n {\n-\tstruct table_instance *ti = rcu_dereference_ovsl(tbl->ti);\n-\tstruct mask_array *ma = rcu_dereference_ovsl(tbl->mask_array);\n+\tstruct table_instance *ti = rcu_dereference_ovs_tbl(tbl->ti, tbl);\n+\tstruct mask_array *ma = rcu_dereference_ovs_tbl(tbl->mask_array, tbl);\n \tu32 __always_unused n_mask_hit;\n \tu32 __always_unused n_cache_hit;\n \tstruct sw_flow *flow;\n@@ -872,21 +907,22 @@ struct sw_flow *ovs_flow_tbl_lookup(struct flow_table *tbl,\n struct sw_flow *ovs_flow_tbl_lookup_exact(struct flow_table *tbl,\n \t\t\t\t\t const struct sw_flow_match *match)\n {\n-\tstruct mask_array *ma = ovsl_dereference(tbl->mask_array);\n+\tstruct mask_array *ma = ovs_tbl_dereference(tbl->mask_array, tbl);\n \tint i;\n \n-\t/* Always called under ovs-mutex. */\n+\t/* Always called under tbl->lock. */\n \tfor (i = 0; i < ma->max; i++) {\n-\t\tstruct table_instance *ti = rcu_dereference_ovsl(tbl->ti);\n+\t\tstruct table_instance *ti =\n+\t\t\t\trcu_dereference_ovs_tbl(tbl->ti, tbl);\n \t\tu32 __always_unused n_mask_hit;\n \t\tstruct sw_flow_mask *mask;\n \t\tstruct sw_flow *flow;\n \n-\t\tmask = ovsl_dereference(ma->masks[i]);\n+\t\tmask = ovs_tbl_dereference(ma->masks[i], tbl);\n \t\tif (!mask)\n \t\t\tcontinue;\n \n-\t\tflow = masked_flow_lookup(ti, match->key, mask, &n_mask_hit);\n+\t\tflow = masked_flow_lookup(tbl, ti, match->key, mask, &n_mask_hit);\n \t\tif (flow && ovs_identifier_is_key(&flow->id) &&\n \t\t ovs_flow_cmp_unmasked_key(flow, match)) {\n \t\t\treturn flow;\n@@ -922,7 +958,7 @@ bool ovs_flow_cmp(const struct sw_flow *flow,\n struct sw_flow *ovs_flow_tbl_lookup_ufid(struct flow_table *tbl,\n \t\t\t\t\t const struct sw_flow_id *ufid)\n {\n-\tstruct table_instance *ti = rcu_dereference_ovsl(tbl->ufid_ti);\n+\tstruct table_instance *ti = rcu_dereference_ovs_tbl(tbl->ufid_ti, tbl);\n \tstruct sw_flow *flow;\n \tstruct hlist_head *head;\n \tu32 hash;\n@@ -930,7 +966,7 @@ struct sw_flow *ovs_flow_tbl_lookup_ufid(struct flow_table *tbl,\n \thash = ufid_hash(ufid);\n \thead = find_bucket(ti, hash);\n \thlist_for_each_entry_rcu(flow, head, ufid_table.node[ti->node_ver],\n-\t\t\t\t lockdep_ovsl_is_held()) {\n+\t\t\t\t lockdep_ovs_tbl_is_held(tbl)) {\n \t\tif (flow->ufid_table.hash == hash &&\n \t\t ovs_flow_cmp_ufid(flow, ufid))\n \t\t\treturn flow;\n@@ -940,28 +976,33 @@ struct sw_flow *ovs_flow_tbl_lookup_ufid(struct flow_table *tbl,\n \n int ovs_flow_tbl_num_masks(const struct flow_table *table)\n {\n-\tstruct mask_array *ma = rcu_dereference_ovsl(table->mask_array);\n+\tstruct mask_array *ma = rcu_dereference_ovs_tbl(table->mask_array,\n+\t\t\t\t\t\t\ttable);\n \treturn READ_ONCE(ma->count);\n }\n \n u32 ovs_flow_tbl_masks_cache_size(const struct flow_table *table)\n {\n-\tstruct mask_cache *mc = rcu_dereference_ovsl(table->mask_cache);\n+\tstruct mask_cache *mc = rcu_dereference_ovs_tbl(table->mask_cache,\n+\t\t\t\t\t\t\ttable);\n \n \treturn READ_ONCE(mc->cache_size);\n }\n \n-static struct table_instance *table_instance_expand(struct table_instance *ti,\n+static struct table_instance *table_instance_expand(struct flow_table *table,\n+\t\t\t\t\t\t struct table_instance *ti,\n \t\t\t\t\t\t bool ufid)\n {\n-\treturn table_instance_rehash(ti, ti->n_buckets * 2, ufid);\n+\treturn table_instance_rehash(table, ti, ti->n_buckets * 2, ufid);\n }\n \n-/* Must be called with OVS mutex held. */\n+/* Must be called with table mutex held. */\n void ovs_flow_tbl_remove(struct flow_table *table, struct sw_flow *flow)\n {\n-\tstruct table_instance *ti = ovsl_dereference(table->ti);\n-\tstruct table_instance *ufid_ti = ovsl_dereference(table->ufid_ti);\n+\tstruct table_instance *ti = ovs_tbl_dereference(table->ti,\n+\t\t\t\t\t\t\ttable);\n+\tstruct table_instance *ufid_ti = ovs_tbl_dereference(table->ufid_ti,\n+\t\t\t\t\t\t\t table);\n \n \tBUG_ON(table->count == 0);\n \ttable_instance_flow_free(table, ti, ufid_ti, flow);\n@@ -995,10 +1036,10 @@ static struct sw_flow_mask *flow_mask_find(const struct flow_table *tbl,\n \tstruct mask_array *ma;\n \tint i;\n \n-\tma = ovsl_dereference(tbl->mask_array);\n+\tma = ovs_tbl_dereference(tbl->mask_array, tbl);\n \tfor (i = 0; i < ma->max; i++) {\n \t\tstruct sw_flow_mask *t;\n-\t\tt = ovsl_dereference(ma->masks[i]);\n+\t\tt = ovs_tbl_dereference(ma->masks[i], tbl);\n \n \t\tif (t && mask_equal(mask, t))\n \t\t\treturn t;\n@@ -1036,22 +1077,25 @@ static int flow_mask_insert(struct flow_table *tbl, struct sw_flow *flow,\n \treturn 0;\n }\n \n-/* Must be called with OVS mutex held. */\n+/* Must be called with table mutex held. */\n static void flow_key_insert(struct flow_table *table, struct sw_flow *flow)\n {\n \tstruct table_instance *new_ti = NULL;\n \tstruct table_instance *ti;\n \n+\tASSERT_OVS_TBL(table);\n+\n \tflow->flow_table.hash = flow_hash(&flow->key, &flow->mask->range);\n-\tti = ovsl_dereference(table->ti);\n+\tti = ovs_tbl_dereference(table->ti, table);\n \ttable_instance_insert(ti, flow);\n-\ttable->count++;\n+\tWRITE_ONCE(table->count, table->count + 1);\n \n \t/* Expand table, if necessary, to make room. */\n \tif (table->count > ti->n_buckets)\n-\t\tnew_ti = table_instance_expand(ti, false);\n+\t\tnew_ti = table_instance_expand(table, ti, false);\n \telse if (time_after(jiffies, table->last_rehash + REHASH_INTERVAL))\n-\t\tnew_ti = table_instance_rehash(ti, ti->n_buckets, false);\n+\t\tnew_ti = table_instance_rehash(table, ti, ti->n_buckets,\n+\t\t\t\t\t false);\n \n \tif (new_ti) {\n \t\trcu_assign_pointer(table->ti, new_ti);\n@@ -1060,13 +1104,15 @@ static void flow_key_insert(struct flow_table *table, struct sw_flow *flow)\n \t}\n }\n \n-/* Must be called with OVS mutex held. */\n+/* Must be called with table mutex held. */\n static void flow_ufid_insert(struct flow_table *table, struct sw_flow *flow)\n {\n \tstruct table_instance *ti;\n \n+\tASSERT_OVS_TBL(table);\n+\n \tflow->ufid_table.hash = ufid_hash(&flow->id);\n-\tti = ovsl_dereference(table->ufid_ti);\n+\tti = ovs_tbl_dereference(table->ufid_ti, table);\n \tufid_table_instance_insert(ti, flow);\n \ttable->ufid_count++;\n \n@@ -1074,7 +1120,7 @@ static void flow_ufid_insert(struct flow_table *table, struct sw_flow *flow)\n \tif (table->ufid_count > ti->n_buckets) {\n \t\tstruct table_instance *new_ti;\n \n-\t\tnew_ti = table_instance_expand(ti, true);\n+\t\tnew_ti = table_instance_expand(table, ti, true);\n \t\tif (new_ti) {\n \t\t\trcu_assign_pointer(table->ufid_ti, new_ti);\n \t\t\tcall_rcu(&ti->rcu, flow_tbl_destroy_rcu_cb);\n@@ -1082,12 +1128,14 @@ static void flow_ufid_insert(struct flow_table *table, struct sw_flow *flow)\n \t}\n }\n \n-/* Must be called with OVS mutex held. */\n+/* Must be called with table mutex held. */\n int ovs_flow_tbl_insert(struct flow_table *table, struct sw_flow *flow,\n \t\t\tconst struct sw_flow_mask *mask)\n {\n \tint err;\n \n+\tASSERT_OVS_TBL(table);\n+\n \terr = flow_mask_insert(table, flow, mask);\n \tif (err)\n \t\treturn err;\n@@ -1106,10 +1154,11 @@ static int compare_mask_and_count(const void *a, const void *b)\n \treturn (s64)mc_b->counter - (s64)mc_a->counter;\n }\n \n-/* Must be called with OVS mutex held. */\n+/* Must be called with table->lock held. */\n void ovs_flow_masks_rebalance(struct flow_table *table)\n {\n-\tstruct mask_array *ma = rcu_dereference_ovsl(table->mask_array);\n+\tstruct mask_array *ma = rcu_dereference_ovs_tbl(table->mask_array,\n+\t\t\t\t\t\t\ttable);\n \tstruct mask_count *masks_and_count;\n \tstruct mask_array *new;\n \tint masks_entries = 0;\n@@ -1124,7 +1173,7 @@ void ovs_flow_masks_rebalance(struct flow_table *table)\n \t\tstruct sw_flow_mask *mask;\n \t\tint cpu;\n \n-\t\tmask = rcu_dereference_ovsl(ma->masks[i]);\n+\t\tmask = rcu_dereference_ovs_tbl(ma->masks[i], table);\n \t\tif (unlikely(!mask))\n \t\t\tbreak;\n \n@@ -1178,7 +1227,7 @@ void ovs_flow_masks_rebalance(struct flow_table *table)\n \tfor (i = 0; i < masks_entries; i++) {\n \t\tint index = masks_and_count[i].index;\n \n-\t\tif (ovsl_dereference(ma->masks[index]))\n+\t\tif (ovs_tbl_dereference(ma->masks[index], table))\n \t\t\tnew->masks[new->count++] = ma->masks[index];\n \t}\n \ndiff --git a/net/openvswitch/flow_table.h b/net/openvswitch/flow_table.h\nindex 6211bcc72655..1b5242a97813 100644\n--- a/net/openvswitch/flow_table.h\n+++ b/net/openvswitch/flow_table.h\n@@ -59,7 +59,31 @@ struct table_instance {\n \tu32 hash_seed;\n };\n \n+/* Locking:\n+ *\n+ * flow_table is _not_ protected by ovs_lock (see comment above ovs_mutex\n+ * in datapath.c).\n+ *\n+ * All writes to flow_table are protected by the embedded \"lock\".\n+ * In order to ensure datapath destruction does not trigger the destruction\n+ * of the flow_table, \"refcnt\" is used. Therefore, writers must:\n+ * 1 - Enter rcu read-protected section\n+ * 2 - Increase \"table->refcnt\"\n+ * 3 - Leave rcu read-protected section (to avoid using mutexes inside rcu)\n+ * 4 - Lock \"table->lock\"\n+ * 5 - Perform modifications\n+ * 6 - Release \"table->lock\"\n+ * 7 - Decrease \"table->refcnt\"\n+ *\n+ * Reads are protected by RCU.\n+ *\n+ * Note with this schema, it's possible that a flow operation is performed on a\n+ * flow_table that is about to be freed.\n+ */\n struct flow_table {\n+\t/* Locks flow table writes. */\n+\tstruct mutex lock;\n+\trefcount_t refcnt;\n \tstruct rcu_head rcu;\n \tstruct table_instance __rcu *ti;\n \tstruct table_instance __rcu *ufid_ti;\n@@ -72,6 +96,26 @@ struct flow_table {\n \n extern struct kmem_cache *flow_stats_cache;\n \n+#ifdef CONFIG_LOCKDEP\n+int lockdep_ovs_tbl_is_held(const struct flow_table *table);\n+#else\n+static inline int lockdep_ovs_tbl_is_held(const struct flow_table *table\n+\t\t\t\t\t __always_unused)\n+{\n+\treturn 1;\n+}\n+#endif\n+\n+#define ASSERT_OVS_TBL(tbl) WARN_ON(!lockdep_ovs_tbl_is_held(tbl))\n+\n+/* Lock-protected update-allowed dereferences.*/\n+#define ovs_tbl_dereference(p, tbl)\t\\\n+\trcu_dereference_protected(p, lockdep_ovs_tbl_is_held(tbl))\n+\n+/* Read dereferences can be protected by either RCU, table lock. */\n+#define rcu_dereference_ovs_tbl(p, tbl) \\\n+\trcu_dereference_check(p, lockdep_ovs_tbl_is_held(tbl))\n+\n int ovs_flow_init(void);\n void ovs_flow_exit(void);\n \n@@ -79,7 +123,11 @@ struct sw_flow *ovs_flow_alloc(void);\n void ovs_flow_free(struct sw_flow *, bool deferred);\n \n struct flow_table *ovs_flow_tbl_alloc(void);\n-void ovs_flow_tbl_destroy_rcu(struct rcu_head *table);\n+void ovs_flow_tbl_put(struct flow_table *table);\n+static inline bool ovs_flow_tbl_get(struct flow_table *table)\n+{\n+\treturn refcount_inc_not_zero(&table->refcnt);\n+}\n int ovs_flow_tbl_count(const struct flow_table *table);\n int ovs_flow_tbl_flush(struct flow_table *flow_table);\n \n@@ -109,8 +157,5 @@ void ovs_flow_mask_key(struct sw_flow_key *dst, const struct sw_flow_key *src,\n \t\t bool full, const struct sw_flow_mask *mask);\n \n void ovs_flow_masks_rebalance(struct flow_table *table);\n-void table_instance_flow_flush(struct flow_table *table,\n-\t\t\t struct table_instance *ti,\n-\t\t\t struct table_instance *ufid_ti);\n \n #endif /* flow_table.h */\n", "prefixes": [ "ovs-dev", "net-next", "v3", "2/2" ] }