From patchwork Wed Sep 12 17:48:59 2018 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Anand Kumar X-Patchwork-Id: 969158 Return-Path: X-Original-To: incoming@patchwork.ozlabs.org Delivered-To: patchwork-incoming@bilbo.ozlabs.org Authentication-Results: ozlabs.org; spf=pass (mailfrom) smtp.mailfrom=openvswitch.org (client-ip=140.211.169.12; helo=mail.linuxfoundation.org; envelope-from=ovs-dev-bounces@openvswitch.org; receiver=) Authentication-Results: ozlabs.org; dmarc=fail (p=none dis=none) header.from=vmware.com Received: from mail.linuxfoundation.org (mail.linuxfoundation.org [140.211.169.12]) (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits)) (No client certificate requested) by ozlabs.org (Postfix) with ESMTPS id 429Tmw4XCWz9s4V for ; Thu, 13 Sep 2018 03:49:20 +1000 (AEST) Received: from mail.linux-foundation.org (localhost [127.0.0.1]) by mail.linuxfoundation.org (Postfix) with ESMTP id 96DA3EDF; Wed, 12 Sep 2018 17:49:18 +0000 (UTC) X-Original-To: dev@openvswitch.org Delivered-To: ovs-dev@mail.linuxfoundation.org Received: from smtp1.linuxfoundation.org (smtp1.linux-foundation.org [172.17.192.35]) by mail.linuxfoundation.org (Postfix) with ESMTPS id F2943E51 for ; Wed, 12 Sep 2018 17:49:16 +0000 (UTC) X-Greylist: domain auto-whitelisted by SQLgrey-1.7.6 Received: from EX13-EDG-OU-001.vmware.com (ex13-edg-ou-001.vmware.com [208.91.0.189]) by smtp1.linuxfoundation.org (Postfix) with ESMTPS id D84677CD for ; Wed, 12 Sep 2018 17:49:14 +0000 (UTC) Received: from sc9-mailhost2.vmware.com (10.113.161.72) by EX13-EDG-OU-001.vmware.com (10.113.208.155) with Microsoft SMTP Server id 15.0.1156.6; Wed, 12 Sep 2018 10:48:59 -0700 Received: from WIN-ANAND1.vmware.com (win-anand1.prom.eng.vmware.com [10.33.79.212]) by sc9-mailhost2.vmware.com (Postfix) with ESMTP id BFAACB0574; Wed, 12 Sep 2018 13:49:13 -0400 (EDT) From: Anand Kumar To: Date: Wed, 12 Sep 2018 10:48:59 -0700 Message-ID: <20180912174859.7468-1-kumaranand@vmware.com> X-Mailer: git-send-email 2.9.3.windows.1 MIME-Version: 1.0 Received-SPF: None (EX13-EDG-OU-001.vmware.com: kumaranand@vmware.com does not designate permitted sender hosts) X-Spam-Status: No, score=-4.2 required=5.0 tests=BAYES_00, RCVD_IN_DNSWL_MED autolearn=ham version=3.3.1 X-Spam-Checker-Version: SpamAssassin 3.3.1 (2010-03-16) on smtp1.linux-foundation.org Subject: [ovs-dev] [PATCH v3] datapath-windows: Add support to configure ct zone limits X-BeenThere: ovs-dev@openvswitch.org X-Mailman-Version: 2.1.12 Precedence: list List-Id: List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , Sender: ovs-dev-bounces@openvswitch.org Errors-To: ovs-dev-bounces@openvswitch.org This patch implements limiting conntrack entries per zone using dpctl commands. Example: ovs-appctl dpctl/ct-set-limits default=5 zone=1,limit=2 zone=1,limit=3 ovs-appctl dpct/ct-del-limits zone=4 ovs-appctl dpct/ct-get-limits zone=1,2,3 - Also update the netlink-socket.c to support netlink family 'OVS_WIN_NL_CTLIMIT_FAMILY_ID' for conntrack zone limit. Signed-off-by: Anand Kumar --- v2->v3: - Change loop index variable from UINT16 to UINT32 v1->v2: - Use spinlock to guard against multiple access. - Use Interlock api to update zone counters. - Address review comments. --- datapath-windows/include/OvsDpInterfaceExt.h | 1 + datapath-windows/ovsext/Conntrack.c | 167 ++++++++++++++++++++++++++- datapath-windows/ovsext/Conntrack.h | 12 ++ datapath-windows/ovsext/Datapath.c | 34 +++++- lib/netlink-socket.c | 5 + 5 files changed, 216 insertions(+), 3 deletions(-) diff --git a/datapath-windows/include/OvsDpInterfaceExt.h b/datapath-windows/include/OvsDpInterfaceExt.h index db91c3e..5fd8000 100644 --- a/datapath-windows/include/OvsDpInterfaceExt.h +++ b/datapath-windows/include/OvsDpInterfaceExt.h @@ -72,6 +72,7 @@ */ #define OVS_WIN_NL_CT_FAMILY_ID (NLMSG_MIN_TYPE + 7) +#define OVS_WIN_NL_CTLIMIT_FAMILY_ID (NLMSG_MIN_TYPE + 8) #define OVS_WIN_NL_INVALID_MCGRP_ID 0 #define OVS_WIN_NL_MCGRP_START_ID 100 diff --git a/datapath-windows/ovsext/Conntrack.c b/datapath-windows/ovsext/Conntrack.c index dd16602..537f1d8 100644 --- a/datapath-windows/ovsext/Conntrack.c +++ b/datapath-windows/ovsext/Conntrack.c @@ -27,13 +27,17 @@ #define WINDOWS_TICK 10000000 #define SEC_TO_UNIX_EPOCH 11644473600LL #define SEC_TO_NANOSEC 1000000000LL +#define CT_MAX_ZONE UINT16_MAX + 1 KSTART_ROUTINE OvsConntrackEntryCleaner; static PLIST_ENTRY ovsConntrackTable; static OVS_CT_THREAD_CTX ctThreadCtx; static PNDIS_RW_LOCK_EX *ovsCtBucketLock = NULL; +static NDIS_SPIN_LOCK ovsCtZoneLock; +static POVS_CT_ZONE_INFO zoneInfo = NULL; extern POVS_SWITCH_CONTEXT gOvsSwitchContext; static ULONG ctTotalEntries; +static ULONG defaultCtLimit; static __inline OvsCtFlush(UINT16 zone, struct ovs_key_ct_tuple_ipv4 *tuple); static __inline NDIS_STATUS @@ -94,6 +98,20 @@ OvsInitConntrack(POVS_SWITCH_CONTEXT context) ZwClose(threadHandle); threadHandle = NULL; + zoneInfo = OvsAllocateMemoryWithTag(sizeof(OVS_CT_ZONE_INFO) * + CT_MAX_ZONE, OVS_CT_POOL_TAG); + if (zoneInfo == NULL) { + status = STATUS_INSUFFICIENT_RESOURCES; + goto freeBucketLock; + } + + NdisAllocateSpinLock(&ovsCtZoneLock); + defaultCtLimit = CT_MAX_ENTRIES; + for (UINT32 i = 0; i < CT_MAX_ZONE; i++) { + zoneInfo[i].entries = 0; + zoneInfo[i].limit = defaultCtLimit; + } + status = OvsNatInit(); if (status != STATUS_SUCCESS) { @@ -149,6 +167,25 @@ OvsCleanupConntrack(VOID) OvsFreeMemoryWithTag(ovsCtBucketLock, OVS_CT_POOL_TAG); ovsCtBucketLock = NULL; OvsNatCleanup(); + NdisFreeSpinLock(&ovsCtZoneLock); + if (zoneInfo) { + OvsFreeMemoryWithTag(zoneInfo, OVS_CT_POOL_TAG); + } +} + +VOID +OvsCtSetZoneLimit(int zone, ULONG value) { + NdisAcquireSpinLock(&ovsCtZoneLock); + if (zone == -1) { + /* Set default limit for all zones. */ + defaultCtLimit = value; + for (UINT32 i = 0; i < CT_MAX_ZONE; i++) { + zoneInfo[i].limit = value; + } + } else { + zoneInfo[(UINT16)zone].limit = value; + } + NdisReleaseSpinLock(&ovsCtZoneLock); } /* @@ -263,6 +300,7 @@ OvsCtAddEntry(POVS_CT_ENTRY entry, &entry->link); NdisInterlockedIncrement((PLONG)&ctTotalEntries); + NdisInterlockedIncrement((PLONG)&zoneInfo[ctx->key.zone].entries); NdisReleaseRWLock(ovsCtBucketLock[bucketIdx], &lockState); return TRUE; } @@ -437,6 +475,7 @@ OvsCtEntryDelete(POVS_CT_ENTRY entry, BOOLEAN forceDelete) if (entry->natInfo.natAction) { OvsNatDeleteKey(&entry->key); } + NdisInterlockedDecrement((PLONG)&zoneInfo[entry->key.zone].entries); OvsPostCtEventEntry(entry, OVS_EVENT_CT_DELETE); RemoveEntryList(&entry->link); OVS_RELEASE_SPIN_LOCK(&(entry->lock), irql); @@ -877,12 +916,16 @@ OvsCtExecute_(OvsForwardingContext *fwdCtx, &entryCreated); } else { - if (commit && ctTotalEntries >= CT_MAX_ENTRIES) { + if (commit && (ctTotalEntries >= CT_MAX_ENTRIES || + zoneInfo[ctx.key.zone].entries >= zoneInfo[ctx.key.zone].limit)) { /* Don't proceed with processing if the max limit has been hit. * This blocks only new entries from being created and doesn't * affect existing connections. */ - OVS_LOG_ERROR("Conntrack Limit hit: %lu", ctTotalEntries); + OVS_LOG_ERROR("Conntrack Limit hit: zone(%u), zoneLimit(%lu)," + "zoneEntries(%lu), ctTotalEntries(%lu)", + zone, zoneInfo[ctx.key.zone].limit, + zoneInfo[ctx.key.zone].entries, ctTotalEntries); return NDIS_STATUS_RESOURCES; } /* If no matching entry was found, create one and add New state */ @@ -1783,4 +1826,124 @@ OvsCtDumpCmdHandler(POVS_USER_PARAMS_CONTEXT usrParamsCtx, return STATUS_SUCCESS; } +static NTSTATUS +OvsCreateNlMsgFromCtLimit(POVS_MESSAGE msgIn, + PVOID outBuffer, + UINT32 outBufLen, + PCHAR attr, + UINT32 numAttrs, + int dpIfIndex) +{ + NTSTATUS status = STATUS_SUCCESS; + NL_BUFFER nlBuffer; + PNL_MSG_HDR nlMsg; + PGENL_MSG_HDR genlMsgHdr = &(msgIn->genlMsg); + + NlBufInit(&nlBuffer, outBuffer, outBufLen); + + if (!NlFillOvsMsg(&nlBuffer, msgIn->nlMsg.nlmsgType, NLM_F_MULTI, + msgIn->nlMsg.nlmsgSeq, msgIn->nlMsg.nlmsgPid, + msgIn->genlMsg.cmd, msgIn->genlMsg.version, + dpIfIndex)) { + return STATUS_INVALID_BUFFER_SIZE; + } + + if (genlMsgHdr->cmd == OVS_CT_LIMIT_CMD_GET && numAttrs) { + POVS_CT_ZONE_LIMIT zoneLimitAttr = (POVS_CT_ZONE_LIMIT) attr; + UINT32 offset = NlMsgStartNested(&nlBuffer, OVS_CT_LIMIT_ATTR_ZONE_LIMIT); + if (!offset) { + /* Starting the nested attribute failed. */ + status = STATUS_INVALID_BUFFER_SIZE; + goto done; + } + + /* Insert OVS_CT_ZONE_LIMIT attributes.*/ + for (UINT32 i = 0; i < numAttrs; i++) { + if (zoneLimitAttr) { + zoneLimitAttr->limit = zoneInfo[zoneLimitAttr->zone_id].limit; + zoneLimitAttr->count = zoneInfo[zoneLimitAttr->zone_id].entries; + if (zoneLimitAttr->zone_id == -1) { + zoneLimitAttr->limit = defaultCtLimit; + } + NlMsgPutTail(&nlBuffer, (const PCHAR)zoneLimitAttr, + sizeof(OVS_CT_ZONE_LIMIT)); + } else { + status = STATUS_INVALID_PARAMETER; + break; + } + zoneLimitAttr = (POVS_CT_ZONE_LIMIT)((PCHAR) zoneLimitAttr + + sizeof(OVS_CT_ZONE_LIMIT)); + } + NlMsgEndNested(&nlBuffer, offset); + } + +done: + nlMsg = (PNL_MSG_HDR)NlBufAt(&nlBuffer, 0, 0); + nlMsg->nlmsgLen = NlBufSize(&nlBuffer); + + return status; +} + +NTSTATUS +OvsCtLimitHandler(POVS_USER_PARAMS_CONTEXT usrParamsCtx, + UINT32 *replyLen) +{ + NTSTATUS status; + POVS_MESSAGE msgIn = (POVS_MESSAGE)usrParamsCtx->inputBuffer; + POVS_MESSAGE msgOut = (POVS_MESSAGE)usrParamsCtx->outputBuffer; + PNL_MSG_HDR nlMsgHdr = &(msgIn->nlMsg); + PGENL_MSG_HDR genlMsgHdr = &(msgIn->genlMsg); + POVS_HDR ovsHdr = &(msgIn->ovsHdr); + PCHAR attr = NULL; + UINT32 numAttrs = 0; + UINT32 attrOffset = NLMSG_HDRLEN + GENL_HDRLEN + OVS_HDRLEN; + + static const NL_POLICY ovsCtLimitPolicy[] = { + [OVS_CT_LIMIT_ATTR_ZONE_LIMIT] = { .type = NL_A_NESTED, .optional = TRUE } + }; + PNL_ATTR nlAttrs[ARRAY_SIZE(ovsCtLimitPolicy)]; + + if ((NlAttrParse(nlMsgHdr, attrOffset, NlMsgAttrsLen(nlMsgHdr), + ovsCtLimitPolicy, ARRAY_SIZE(ovsCtLimitPolicy), + nlAttrs, ARRAY_SIZE(nlAttrs))) + != TRUE) { + OVS_LOG_ERROR("Attr Parsing failed for msg: %p", nlMsgHdr); + return STATUS_INVALID_PARAMETER; + } + + if (nlAttrs[OVS_CT_LIMIT_ATTR_ZONE_LIMIT]) { + numAttrs = NlAttrGetSize(nlAttrs[OVS_CT_LIMIT_ATTR_ZONE_LIMIT])/sizeof(OVS_CT_ZONE_LIMIT); + attr = NlAttrGet(nlAttrs[OVS_CT_LIMIT_ATTR_ZONE_LIMIT]); + } + + if (genlMsgHdr->cmd == OVS_CT_LIMIT_CMD_SET || + genlMsgHdr->cmd == OVS_CT_LIMIT_CMD_DEL) { + POVS_CT_ZONE_LIMIT zoneLimitAttr = (POVS_CT_ZONE_LIMIT)attr; + for (UINT32 i = 0; i < numAttrs; i++) { + /* Parse zone limit attributes. */ + if (zoneLimitAttr) { + if (genlMsgHdr->cmd == OVS_CT_LIMIT_CMD_DEL) { + zoneLimitAttr->limit = CT_MAX_ENTRIES; + } + OvsCtSetZoneLimit(zoneLimitAttr->zone_id, zoneLimitAttr->limit); + } else { + OVS_LOG_ERROR("Failed to get zone limit attribute at index(%u)," + " numAttrs(%u)", i, numAttrs); + return STATUS_INVALID_PARAMETER; + } + zoneLimitAttr = (POVS_CT_ZONE_LIMIT)((PCHAR) zoneLimitAttr + + sizeof(OVS_CT_ZONE_LIMIT)); + } + } + + /* Output buffer has been validated while validating transact dev op. */ + ASSERT(msgOut != NULL && usrParamsCtx->outputLength >= sizeof *msgOut); + status = OvsCreateNlMsgFromCtLimit(msgIn, msgOut, + usrParamsCtx->outputLength, + attr, numAttrs, ovsHdr->dp_ifindex); + *replyLen = msgOut->nlMsg.nlmsgLen; + + return status; +} + #pragma warning(pop) diff --git a/datapath-windows/ovsext/Conntrack.h b/datapath-windows/ovsext/Conntrack.h index d4152b3..4678ed0 100644 --- a/datapath-windows/ovsext/Conntrack.h +++ b/datapath-windows/ovsext/Conntrack.h @@ -132,6 +132,18 @@ typedef struct OvsConntrackKeyLookupCtx { BOOLEAN related; } OvsConntrackKeyLookupCtx; +/* Per zone strucuture. */ +typedef struct _OVS_CT_ZONE_INFO { + ULONG limit; + ULONG entries; +} OVS_CT_ZONE_INFO, *POVS_CT_ZONE_INFO; + +typedef struct _OVS_CT_ZONE_LIMIT { + int zone_id; + ULONG limit; + ULONG count; +} OVS_CT_ZONE_LIMIT, *POVS_CT_ZONE_LIMIT; + #define CT_MAX_ENTRIES 1 << 21 #define CT_HASH_TABLE_SIZE ((UINT32)1 << 10) #define CT_HASH_TABLE_MASK (CT_HASH_TABLE_SIZE - 1) diff --git a/datapath-windows/ovsext/Datapath.c b/datapath-windows/ovsext/Datapath.c index 34ef2b4..fa99484 100644 --- a/datapath-windows/ovsext/Datapath.c +++ b/datapath-windows/ovsext/Datapath.c @@ -99,7 +99,8 @@ NetlinkCmdHandler OvsGetNetdevCmdHandler, OvsSubscribePacketCmdHandler, OvsReadPacketCmdHandler, OvsCtDeleteCmdHandler, - OvsCtDumpCmdHandler; + OvsCtDumpCmdHandler, + OvsCtLimitHandler; static NTSTATUS HandleGetDpTransaction(POVS_USER_PARAMS_CONTEXT usrParamsCtx, UINT32 *replyLen); @@ -324,6 +325,34 @@ NETLINK_FAMILY nlNetdevFamilyOps = { .opsCount = ARRAY_SIZE(nlNetdevFamilyCmdOps) }; + +/* Netlink conntrack limit family. */ +NETLINK_CMD nlCtLimitFamilyCmdOps[] = { + { .cmd = OVS_CT_LIMIT_CMD_GET, + .handler = OvsCtLimitHandler, + .supportedDevOp = OVS_TRANSACTION_DEV_OP, + .validateDpIndex = FALSE + }, + { .cmd = OVS_CT_LIMIT_CMD_SET, + .handler = OvsCtLimitHandler, + .supportedDevOp = OVS_TRANSACTION_DEV_OP, + .validateDpIndex = FALSE + }, + { .cmd = OVS_CT_LIMIT_CMD_DEL, + .handler = OvsCtLimitHandler, + .supportedDevOp = OVS_TRANSACTION_DEV_OP, + .validateDpIndex = FALSE + }, +}; + +NETLINK_FAMILY nlCtLimitFamilyOps = { + .name = OVS_CT_LIMIT_FAMILY, + .id = OVS_WIN_NL_CTLIMIT_FAMILY_ID, + .version = OVS_CT_LIMIT_VERSION, + .maxAttr = OVS_CT_LIMIT_ATTR_MAX, + .cmds = nlCtLimitFamilyCmdOps, + .opsCount = ARRAY_SIZE(nlCtLimitFamilyCmdOps) +}; static NTSTATUS MapIrpOutputBuffer(PIRP irp, UINT32 bufferLength, UINT32 requiredLength, @@ -941,6 +970,9 @@ OvsDeviceControl(PDEVICE_OBJECT deviceObject, case OVS_WIN_NL_NETDEV_FAMILY_ID: nlFamilyOps = &nlNetdevFamilyOps; break; + case OVS_WIN_NL_CTLIMIT_FAMILY_ID: + nlFamilyOps = &nlCtLimitFamilyOps; + break; default: status = STATUS_INVALID_PARAMETER; goto done; diff --git a/lib/netlink-socket.c b/lib/netlink-socket.c index 5234ca3..47077e9 100644 --- a/lib/netlink-socket.c +++ b/lib/netlink-socket.c @@ -1571,6 +1571,11 @@ do_lookup_genl_family(const char *name, struct nlattr **attrs, family_name = OVS_WIN_NETDEV_FAMILY; family_version = OVS_WIN_NETDEV_VERSION; family_attrmax = OVS_WIN_NETDEV_ATTR_MAX; + } else if (!strcmp(name, OVS_CT_LIMIT_FAMILY)) { + family_id = OVS_WIN_NL_CTLIMIT_FAMILY_ID; + family_name = OVS_CT_LIMIT_FAMILY; + family_version = OVS_CT_LIMIT_VERSION; + family_attrmax = OVS_CT_LIMIT_ATTR_MAX; } else { ofpbuf_delete(reply); return EINVAL;