Patch Detail
get:
Show a patch.
patch:
Update a patch.
put:
Update a patch.
GET /api/patches/817519/?format=api
{ "id": 817519, "url": "http://patchwork.ozlabs.org/api/patches/817519/?format=api", "web_url": "http://patchwork.ozlabs.org/project/openvswitch/patch/1506088063-20920-3-git-send-email-billy.o.mahony@intel.com/", "project": { "id": 47, "url": "http://patchwork.ozlabs.org/api/projects/47/?format=api", "name": "Open vSwitch", "link_name": "openvswitch", "list_id": "ovs-dev.openvswitch.org", "list_email": "ovs-dev@openvswitch.org", "web_url": "http://openvswitch.org/", "scm_url": "git@github.com:openvswitch/ovs.git", "webscm_url": "https://github.com/openvswitch/ovs", "list_archive_url": "", "list_archive_url_format": "", "commit_url_format": "" }, "msgid": "<1506088063-20920-3-git-send-email-billy.o.mahony@intel.com>", "list_archive_url": null, "date": "2017-09-22T13:47:43", "name": "[ovs-dev,RFC,2/2] dpif-netdev: RFC EMC load-shedding", "commit_ref": null, "pull_url": null, "state": "rfc", "archived": false, "hash": "13b286bb173debb18a67628cd067b16984fc28f1", "submitter": { "id": 71090, "url": "http://patchwork.ozlabs.org/api/people/71090/?format=api", "name": "Billy O'Mahony", "email": "billy.o.mahony@intel.com" }, "delegate": null, "mbox": "http://patchwork.ozlabs.org/project/openvswitch/patch/1506088063-20920-3-git-send-email-billy.o.mahony@intel.com/mbox/", "series": [ { "id": 4631, "url": "http://patchwork.ozlabs.org/api/series/4631/?format=api", "web_url": "http://patchwork.ozlabs.org/project/openvswitch/list/?series=4631", "date": "2017-09-22T13:47:41", "name": "EMC load-shedding", "version": 1, "mbox": "http://patchwork.ozlabs.org/series/4631/mbox/" } ], "comments": "http://patchwork.ozlabs.org/api/patches/817519/comments/", "check": "pending", "checks": "http://patchwork.ozlabs.org/api/patches/817519/checks/", "tags": {}, "related": [], "headers": { "Return-Path": "<ovs-dev-bounces@openvswitch.org>", "X-Original-To": [ "incoming@patchwork.ozlabs.org", "dev@openvswitch.org" ], "Delivered-To": [ "patchwork-incoming@bilbo.ozlabs.org", "ovs-dev@mail.linuxfoundation.org" ], "Authentication-Results": "ozlabs.org;\n\tspf=pass (mailfrom) smtp.mailfrom=openvswitch.org\n\t(client-ip=140.211.169.12; helo=mail.linuxfoundation.org;\n\tenvelope-from=ovs-dev-bounces@openvswitch.org;\n\treceiver=<UNKNOWN>)", "Received": [ "from mail.linuxfoundation.org (mail.linuxfoundation.org\n\t[140.211.169.12])\n\t(using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256\n\tbits)) (No client certificate requested)\n\tby ozlabs.org (Postfix) with ESMTPS id 3xzFFs4bgRz9sRW\n\tfor <incoming@patchwork.ozlabs.org>;\n\tFri, 22 Sep 2017 23:49:21 +1000 (AEST)", "from mail.linux-foundation.org (localhost [127.0.0.1])\n\tby mail.linuxfoundation.org (Postfix) with ESMTP id A09CFBD3;\n\tFri, 22 Sep 2017 13:48:00 +0000 (UTC)", "from smtp1.linuxfoundation.org (smtp1.linux-foundation.org\n\t[172.17.192.35])\n\tby mail.linuxfoundation.org (Postfix) with ESMTPS id CBB47BC8\n\tfor <dev@openvswitch.org>; Fri, 22 Sep 2017 13:47:57 +0000 (UTC)", "from mga03.intel.com (mga03.intel.com [134.134.136.65])\n\tby smtp1.linuxfoundation.org (Postfix) with ESMTPS id 4B694157\n\tfor <dev@openvswitch.org>; Fri, 22 Sep 2017 13:47:57 +0000 (UTC)", "from fmsmga006.fm.intel.com ([10.253.24.20])\n\tby orsmga103.jf.intel.com with ESMTP/TLS/DHE-RSA-AES256-GCM-SHA384;\n\t22 Sep 2017 06:47:57 -0700", "from sivswdev01.ir.intel.com (HELO localhost.localdomain)\n\t([10.237.217.45])\n\tby fmsmga006.fm.intel.com with ESMTP; 22 Sep 2017 06:47:55 -0700" ], "X-Greylist": "domain auto-whitelisted by SQLgrey-1.7.6", "X-ExtLoop1": "1", "X-IronPort-AV": "E=Sophos;i=\"5.42,427,1500966000\"; d=\"scan'208\";a=\"154870344\"", "From": "Billy O'Mahony <billy.o.mahony@intel.com>", "To": "dev@openvswitch.org", "Date": "Fri, 22 Sep 2017 14:47:43 +0100", "Message-Id": "<1506088063-20920-3-git-send-email-billy.o.mahony@intel.com>", "X-Mailer": "git-send-email 1.7.0.7", "In-Reply-To": "<1506088063-20920-1-git-send-email-billy.o.mahony@intel.com>", "References": "<1506088063-20920-1-git-send-email-billy.o.mahony@intel.com>", "X-Spam-Status": "No, score=-5.0 required=5.0 tests=RCVD_IN_DNSWL_HI,\n\tRP_MATCHES_RCVD autolearn=disabled version=3.3.1", "X-Spam-Checker-Version": "SpamAssassin 3.3.1 (2010-03-16) on\n\tsmtp1.linux-foundation.org", "Cc": "i.maximets@samsung.com", "Subject": "[ovs-dev] [RFC 2/2] dpif-netdev: RFC EMC load-shedding", "X-BeenThere": "ovs-dev@openvswitch.org", "X-Mailman-Version": "2.1.12", "Precedence": "list", "List-Id": "<ovs-dev.openvswitch.org>", "List-Unsubscribe": "<https://mail.openvswitch.org/mailman/options/ovs-dev>,\n\t<mailto:ovs-dev-request@openvswitch.org?subject=unsubscribe>", "List-Archive": "<http://mail.openvswitch.org/pipermail/ovs-dev/>", "List-Post": "<mailto:ovs-dev@openvswitch.org>", "List-Help": "<mailto:ovs-dev-request@openvswitch.org?subject=help>", "List-Subscribe": "<https://mail.openvswitch.org/mailman/listinfo/ovs-dev>,\n\t<mailto:ovs-dev-request@openvswitch.org?subject=subscribe>", "MIME-Version": "1.0", "Content-Type": "text/plain; charset=\"us-ascii\"", "Content-Transfer-Encoding": "7bit", "Sender": "ovs-dev-bounces@openvswitch.org", "Errors-To": "ovs-dev-bounces@openvswitch.org" }, "content": "When EMC hit rate goes down start start shedding load from the EMC.\n---\n lib/dpif-netdev.c | 107 ++++++++++++++++++++++++++++++++++++++++++++++++++++--\n 1 file changed, 103 insertions(+), 4 deletions(-)", "diff": "diff --git a/lib/dpif-netdev.c b/lib/dpif-netdev.c\nindex e3a5590..f77e79a 100644\n--- a/lib/dpif-netdev.c\n+++ b/lib/dpif-netdev.c\n@@ -158,6 +158,13 @@ struct netdev_flow_key {\n #define DEFAULT_EM_FLOW_INSERT_MIN (UINT32_MAX / \\\n DEFAULT_EM_FLOW_INSERT_INV_PROB)\n \n+struct emc_shed_state {\n+ unsigned long long last_hit_cnt;\n+ unsigned long long last_miss_cnt;\n+ unsigned long long last_skip_cnt;\n+ uint32_t shed_threshold;\n+};\n+\n struct emc_entry {\n struct dp_netdev_flow *flow;\n struct netdev_flow_key key; /* key.hash used for emc hash value. */\n@@ -166,6 +173,7 @@ struct emc_entry {\n struct emc_cache {\n struct emc_entry entries[EM_FLOW_HASH_ENTRIES];\n int sweep_idx; /* For emc_cache_slow_sweep(). */\n+ struct emc_shed_state emc_shed_state;\n };\n \n /* Iterate in the exact match cache through every entry that might contain a\n@@ -337,6 +345,7 @@ enum dp_stat_type {\n DP_STAT_LOST, /* Packets not passed up to the client. */\n DP_STAT_LOOKUP_HIT, /* Number of subtable lookups for flow table\n hits */\n+ DP_STAT_EXACT_SKIPPED, /* Packets where EMC lookup skipped */\n DP_N_STATS\n };\n \n@@ -733,6 +742,10 @@ emc_cache_init(struct emc_cache *flow_cache)\n int i;\n \n flow_cache->sweep_idx = 0;\n+ flow_cache->emc_shed_state.last_hit_cnt = 0;\n+ flow_cache->emc_shed_state.last_miss_cnt = 0;\n+ flow_cache->emc_shed_state.last_skip_cnt = 0;\n+ flow_cache->emc_shed_state.shed_threshold = 0;\n for (i = 0; i < ARRAY_SIZE(flow_cache->entries); i++) {\n flow_cache->entries[i].flow = NULL;\n flow_cache->entries[i].key.hash = 0;\n@@ -749,6 +762,10 @@ emc_cache_uninit(struct emc_cache *flow_cache)\n for (i = 0; i < ARRAY_SIZE(flow_cache->entries); i++) {\n emc_clear_entry(&flow_cache->entries[i]);\n }\n+ flow_cache->emc_shed_state.last_hit_cnt = 0;\n+ flow_cache->emc_shed_state.last_miss_cnt = 0;\n+ flow_cache->emc_shed_state.last_skip_cnt = 0;\n+ flow_cache->emc_shed_state.shed_threshold = 0;\n }\n \n /* Check and clear dead flow references slowly (one entry at each\n@@ -839,11 +856,28 @@ pmd_info_show_stats(struct ds *reply,\n }\n ds_put_cstr(reply, \":\\n\");\n \n+ /* XXX some added items added here are for debug */\n ds_put_format(reply,\n \"\\temc hits:%llu\\n\\tmegaflow hits:%llu\\n\"\n+ \"\\tshed thresh:0x%08X\\n\"\n+ \"\\temc skips:%llu\\n\"\n+ \"\\temc hit rate (nett) :%llu%%\\n\"\n+ \"\\temc hit rate (gross):%llu%%\\n\"\n \"\\tavg. subtable lookups per hit:%.2f\\n\"\n \"\\tmiss:%llu\\n\\tlost:%llu\\n\",\n stats[DP_STAT_EXACT_HIT], stats[DP_STAT_MASKED_HIT],\n+ pmd->flow_cache.emc_shed_state.shed_threshold,\n+ stats[DP_STAT_EXACT_SKIPPED],\n+ (stats[DP_STAT_EXACT_HIT] + stats[DP_STAT_MASKED_HIT] -\n+ stats[DP_STAT_EXACT_SKIPPED])\n+ ? ((stats[DP_STAT_EXACT_HIT] * 100) /\n+ (stats[DP_STAT_EXACT_HIT] + stats[DP_STAT_MASKED_HIT] -\n+ stats[DP_STAT_EXACT_SKIPPED]))\n+ : 0,\n+ (stats[DP_STAT_EXACT_HIT] + stats[DP_STAT_MASKED_HIT])\n+ ? ((stats[DP_STAT_EXACT_HIT] * 100) /\n+ (stats[DP_STAT_EXACT_HIT] + stats[DP_STAT_MASKED_HIT]))\n+ : 0,\n stats[DP_STAT_MASKED_HIT] > 0\n ? (1.0*stats[DP_STAT_LOOKUP_HIT])/stats[DP_STAT_MASKED_HIT]\n : 0,\n@@ -1470,6 +1504,8 @@ dpif_netdev_get_stats(const struct dpif *dpif, struct dpif_dp_stats *stats)\n stats->n_hit += n;\n atomic_read_relaxed(&pmd->stats.n[DP_STAT_EXACT_HIT], &n);\n stats->n_hit += n;\n+ atomic_read_relaxed(&pmd->stats.n[DP_STAT_EXACT_SKIPPED], &n);\n+ stats->n_hit += n;\n atomic_read_relaxed(&pmd->stats.n[DP_STAT_MISS], &n);\n stats->n_missed += n;\n atomic_read_relaxed(&pmd->stats.n[DP_STAT_LOST], &n);\n@@ -4849,6 +4885,54 @@ dp_netdev_queue_batches(struct dp_packet *pkt,\n packet_batch_per_flow_update(batch, pkt, mf);\n }\n \n+#define SHED_ADJ_INTERVAL_PKTS (3e6)\n+#define SHED_ADJ_QUANTUM (0x10000000)\n+#define SHED_THRESH_MAX (SHED_ADJ_QUANTUM + \\\n+ (SHED_ADJ_QUANTUM << 1) + \\\n+ (SHED_ADJ_QUANTUM << 2) + \\\n+ (SHED_ADJ_QUANTUM << 3))\n+/* XXX use cost of EMC lookup & miss in cycles to replace hard bounds */\n+#define SHED_HIT_RATE_LOWER_PC (50)\n+#define SHED_HIT_RATE_UPPER_PC (70)\n+\n+\n+static inline void\n+adjust_emc_shedding (struct dp_netdev_pmd_thread *pmd)\n+{\n+ struct emc_cache *emc = &pmd->flow_cache;\n+ unsigned long long emc_hit_cnt = pmd->stats.n[DP_STAT_EXACT_HIT] -\n+ emc->emc_shed_state.last_hit_cnt;\n+ unsigned long long emc_miss_cnt = pmd->stats.n[DP_STAT_MASKED_HIT] -\n+ emc->emc_shed_state.last_miss_cnt;\n+\n+ if (emc_hit_cnt + emc_miss_cnt > SHED_ADJ_INTERVAL_PKTS) {\n+ /* XXX protect against counter wrap around */\n+ unsigned long long emc_skip_cnt = pmd->stats.n[DP_STAT_EXACT_SKIPPED] -\n+ emc->emc_shed_state.last_skip_cnt;\n+ unsigned long long emc_offered_cnt =\n+ emc_hit_cnt + emc_miss_cnt - emc_skip_cnt;\n+\n+ unsigned int hit_rate_pc = (emc_hit_cnt * 100) / emc_offered_cnt;\n+\n+ emc->emc_shed_state.last_hit_cnt = pmd->stats.n[DP_STAT_EXACT_HIT];\n+ emc->emc_shed_state.last_miss_cnt = pmd->stats.n[DP_STAT_MASKED_HIT];\n+ emc->emc_shed_state.last_skip_cnt =\n+ pmd->stats.n[DP_STAT_EXACT_SKIPPED];\n+\n+ /* As hit rate goes down shed thresh goes up (more is shed from EMC) */\n+ /* XXX consider increment more if further out of bounds */\n+ if (hit_rate_pc > SHED_HIT_RATE_UPPER_PC && \\\n+ emc->emc_shed_state.shed_threshold >= SHED_ADJ_QUANTUM) {\n+ emc->emc_shed_state.shed_threshold -= SHED_ADJ_QUANTUM;\n+ } else if (hit_rate_pc < SHED_HIT_RATE_LOWER_PC && \\\n+ emc->emc_shed_state.shed_threshold < SHED_THRESH_MAX) {\n+ emc->emc_shed_state.shed_threshold += SHED_ADJ_QUANTUM;\n+ }\n+ }\n+}\n+\n+\n+\n /* Try to process all ('cnt') the 'packets' using only the exact match cache\n * 'pmd->flow_cache'. If a flow is not found for a packet 'packets[i]', the\n * miniflow is copied into 'keys' and the packet pointer is moved at the\n@@ -4869,7 +4953,7 @@ emc_processing(struct dp_netdev_pmd_thread *pmd,\n {\n struct emc_cache *flow_cache = &pmd->flow_cache;\n struct netdev_flow_key *key = &keys[0];\n- size_t n_missed = 0, n_dropped = 0;\n+ size_t n_missed = 0, n_dropped = 0, n_skipped = 0;\n struct dp_packet *packet;\n const size_t size = dp_packet_batch_size(packets_);\n uint32_t cur_min;\n@@ -4900,8 +4984,17 @@ emc_processing(struct dp_netdev_pmd_thread *pmd,\n key->len = 0; /* Not computed yet. */\n key->hash = dpif_netdev_packet_get_rss_hash(packet, &key->mf);\n \n+ adjust_emc_shedding(pmd);\n+\n /* If EMC is disabled skip emc_lookup */\n- flow = (cur_min == 0) ? NULL: emc_lookup(flow_cache, key);\n+ if ((key->hash > flow_cache->emc_shed_state.shed_threshold) &&\n+ cur_min) {\n+ flow = emc_lookup(flow_cache, key);\n+ } else {\n+ flow = NULL;\n+ n_skipped++;\n+ }\n+\n if (OVS_LIKELY(flow)) {\n dp_netdev_queue_batches(packet, flow, &key->mf, batches,\n n_batches);\n@@ -4916,6 +5009,8 @@ emc_processing(struct dp_netdev_pmd_thread *pmd,\n }\n }\n \n+ dp_netdev_count_packet(pmd, DP_STAT_EXACT_SKIPPED,\n+ n_skipped);\n dp_netdev_count_packet(pmd, DP_STAT_EXACT_HIT,\n size - n_dropped - n_missed);\n \n@@ -4986,7 +5081,9 @@ handle_packet_upcall(struct dp_netdev_pmd_thread *pmd,\n add_actions->size);\n }\n ovs_mutex_unlock(&pmd->flow_mutex);\n- emc_probabilistic_insert(pmd, key, netdev_flow);\n+ if (key->hash > pmd->flow_cache.emc_shed_state.shed_threshold) {\n+ emc_probabilistic_insert(pmd, key, netdev_flow);\n+ }\n }\n }\n \n@@ -5079,7 +5176,9 @@ fast_path_processing(struct dp_netdev_pmd_thread *pmd,\n \n flow = dp_netdev_flow_cast(rules[i]);\n \n- emc_probabilistic_insert(pmd, &keys[i], flow);\n+ if (keys[i].hash > pmd->flow_cache.emc_shed_state.shed_threshold) {\n+ emc_probabilistic_insert(pmd, &keys[i], flow);\n+ }\n dp_netdev_queue_batches(packet, flow, &keys[i].mf, batches, n_batches);\n }\n \n", "prefixes": [ "ovs-dev", "RFC", "2/2" ] }