[ovs-dev,v3] datapath-windows: Add support to configure ct zone limits

Message ID 20180912174859.7468-1-kumaranand@vmware.com
State Changes Requested
Headers show
Series
  • [ovs-dev,v3] datapath-windows: Add support to configure ct zone limits
Related show

Commit Message

Anand Kumar Sept. 12, 2018, 5:48 p.m.
This patch implements limiting conntrack entries
per zone using dpctl commands.

Example:
ovs-appctl dpctl/ct-set-limits default=5 zone=1,limit=2 zone=1,limit=3
ovs-appctl dpct/ct-del-limits zone=4
ovs-appctl dpct/ct-get-limits zone=1,2,3

- Also update the netlink-socket.c to support netlink family
  'OVS_WIN_NL_CTLIMIT_FAMILY_ID' for conntrack zone limit.

Signed-off-by: Anand Kumar <kumaranand@vmware.com>
---
v2->v3:
  - Change loop index variable from UINT16 to UINT32
v1->v2:
  - Use spinlock to guard against multiple access.
  - Use Interlock api to update zone counters.
  - Address review comments.
---
 datapath-windows/include/OvsDpInterfaceExt.h |   1 +
 datapath-windows/ovsext/Conntrack.c          | 167 ++++++++++++++++++++++++++-
 datapath-windows/ovsext/Conntrack.h          |  12 ++
 datapath-windows/ovsext/Datapath.c           |  34 +++++-
 lib/netlink-socket.c                         |   5 +
 5 files changed, 216 insertions(+), 3 deletions(-)

Comments

Ben Pfaff Sept. 18, 2018, 8:28 a.m. | #1
I didn't really review this but a glance at it showed one possible
issue.

Missing () around definition here:
> +#define CT_MAX_ZONE UINT16_MAX + 1

Bang?
> +    zoneInfo = OvsAllocateMemoryWithTag(sizeof(OVS_CT_ZONE_INFO) *
> +                                        CT_MAX_ZONE, OVS_CT_POOL_TAG);

Thanks,

Ben.
Anand Kumar Sept. 19, 2018, 6:42 p.m. | #2
Hi Ben,

Thanks for the review. I think this bug got introduced when addressing review comments.
I have fixed it and sent out a v4.

Regards,
Anand Kumar

On 9/18/18, 1:28 AM, "Ben Pfaff" <blp@ovn.org> wrote:

    I didn't really review this but a glance at it showed one possible
    issue.
    
    Missing () around definition here:
    > +#define CT_MAX_ZONE UINT16_MAX + 1
    
    Bang?
    > +    zoneInfo = OvsAllocateMemoryWithTag(sizeof(OVS_CT_ZONE_INFO) *
    > +                                        CT_MAX_ZONE, OVS_CT_POOL_TAG);
    
    Thanks,
    
    Ben.

Patch

diff --git a/datapath-windows/include/OvsDpInterfaceExt.h b/datapath-windows/include/OvsDpInterfaceExt.h
index db91c3e..5fd8000 100644
--- a/datapath-windows/include/OvsDpInterfaceExt.h
+++ b/datapath-windows/include/OvsDpInterfaceExt.h
@@ -72,6 +72,7 @@ 
  */
 
 #define OVS_WIN_NL_CT_FAMILY_ID              (NLMSG_MIN_TYPE + 7)
+#define OVS_WIN_NL_CTLIMIT_FAMILY_ID         (NLMSG_MIN_TYPE + 8)
 
 #define OVS_WIN_NL_INVALID_MCGRP_ID          0
 #define OVS_WIN_NL_MCGRP_START_ID            100
diff --git a/datapath-windows/ovsext/Conntrack.c b/datapath-windows/ovsext/Conntrack.c
index dd16602..537f1d8 100644
--- a/datapath-windows/ovsext/Conntrack.c
+++ b/datapath-windows/ovsext/Conntrack.c
@@ -27,13 +27,17 @@ 
 #define WINDOWS_TICK 10000000
 #define SEC_TO_UNIX_EPOCH 11644473600LL
 #define SEC_TO_NANOSEC 1000000000LL
+#define CT_MAX_ZONE UINT16_MAX + 1
 
 KSTART_ROUTINE OvsConntrackEntryCleaner;
 static PLIST_ENTRY ovsConntrackTable;
 static OVS_CT_THREAD_CTX ctThreadCtx;
 static PNDIS_RW_LOCK_EX *ovsCtBucketLock = NULL;
+static NDIS_SPIN_LOCK ovsCtZoneLock;
+static POVS_CT_ZONE_INFO zoneInfo = NULL;
 extern POVS_SWITCH_CONTEXT gOvsSwitchContext;
 static ULONG ctTotalEntries;
+static ULONG defaultCtLimit;
 
 static __inline OvsCtFlush(UINT16 zone, struct ovs_key_ct_tuple_ipv4 *tuple);
 static __inline NDIS_STATUS
@@ -94,6 +98,20 @@  OvsInitConntrack(POVS_SWITCH_CONTEXT context)
     ZwClose(threadHandle);
     threadHandle = NULL;
 
+    zoneInfo = OvsAllocateMemoryWithTag(sizeof(OVS_CT_ZONE_INFO) *
+                                        CT_MAX_ZONE, OVS_CT_POOL_TAG);
+    if (zoneInfo == NULL) {
+        status = STATUS_INSUFFICIENT_RESOURCES;
+        goto freeBucketLock;
+    }
+
+    NdisAllocateSpinLock(&ovsCtZoneLock);
+    defaultCtLimit = CT_MAX_ENTRIES;
+    for (UINT32 i = 0; i < CT_MAX_ZONE; i++) {
+        zoneInfo[i].entries = 0;
+        zoneInfo[i].limit = defaultCtLimit;
+    }
+
     status = OvsNatInit();
 
     if (status != STATUS_SUCCESS) {
@@ -149,6 +167,25 @@  OvsCleanupConntrack(VOID)
     OvsFreeMemoryWithTag(ovsCtBucketLock, OVS_CT_POOL_TAG);
     ovsCtBucketLock = NULL;
     OvsNatCleanup();
+    NdisFreeSpinLock(&ovsCtZoneLock);
+    if (zoneInfo) {
+        OvsFreeMemoryWithTag(zoneInfo, OVS_CT_POOL_TAG);
+    }
+}
+
+VOID
+OvsCtSetZoneLimit(int zone, ULONG value) {
+    NdisAcquireSpinLock(&ovsCtZoneLock);
+    if (zone == -1) {
+        /* Set default limit for all zones. */
+        defaultCtLimit = value;
+        for (UINT32 i = 0; i < CT_MAX_ZONE; i++) {
+            zoneInfo[i].limit = value;
+        }
+    } else {
+        zoneInfo[(UINT16)zone].limit = value;
+    }
+    NdisReleaseSpinLock(&ovsCtZoneLock);
 }
 
 /*
@@ -263,6 +300,7 @@  OvsCtAddEntry(POVS_CT_ENTRY entry,
                    &entry->link);
 
     NdisInterlockedIncrement((PLONG)&ctTotalEntries);
+    NdisInterlockedIncrement((PLONG)&zoneInfo[ctx->key.zone].entries);
     NdisReleaseRWLock(ovsCtBucketLock[bucketIdx], &lockState);
     return TRUE;
 }
@@ -437,6 +475,7 @@  OvsCtEntryDelete(POVS_CT_ENTRY entry, BOOLEAN forceDelete)
         if (entry->natInfo.natAction) {
             OvsNatDeleteKey(&entry->key);
         }
+        NdisInterlockedDecrement((PLONG)&zoneInfo[entry->key.zone].entries);
         OvsPostCtEventEntry(entry, OVS_EVENT_CT_DELETE);
         RemoveEntryList(&entry->link);
         OVS_RELEASE_SPIN_LOCK(&(entry->lock), irql);
@@ -877,12 +916,16 @@  OvsCtExecute_(OvsForwardingContext *fwdCtx,
                                          &entryCreated);
 
     } else {
-        if (commit && ctTotalEntries >= CT_MAX_ENTRIES) {
+        if (commit && (ctTotalEntries >= CT_MAX_ENTRIES ||
+            zoneInfo[ctx.key.zone].entries >= zoneInfo[ctx.key.zone].limit)) {
             /* Don't proceed with processing if the max limit has been hit.
              * This blocks only new entries from being created and doesn't
              * affect existing connections.
              */
-            OVS_LOG_ERROR("Conntrack Limit hit: %lu", ctTotalEntries);
+            OVS_LOG_ERROR("Conntrack Limit hit: zone(%u), zoneLimit(%lu),"
+                          "zoneEntries(%lu), ctTotalEntries(%lu)",
+                           zone, zoneInfo[ctx.key.zone].limit,
+                           zoneInfo[ctx.key.zone].entries, ctTotalEntries);
             return NDIS_STATUS_RESOURCES;
         }
         /* If no matching entry was found, create one and add New state */
@@ -1783,4 +1826,124 @@  OvsCtDumpCmdHandler(POVS_USER_PARAMS_CONTEXT usrParamsCtx,
     return STATUS_SUCCESS;
 }
 
+static NTSTATUS
+OvsCreateNlMsgFromCtLimit(POVS_MESSAGE msgIn,
+                          PVOID outBuffer,
+                          UINT32 outBufLen,
+                          PCHAR attr,
+                          UINT32 numAttrs,
+                          int dpIfIndex)
+{
+    NTSTATUS status = STATUS_SUCCESS;
+    NL_BUFFER nlBuffer;
+    PNL_MSG_HDR nlMsg;
+    PGENL_MSG_HDR genlMsgHdr = &(msgIn->genlMsg);
+
+    NlBufInit(&nlBuffer, outBuffer, outBufLen);
+
+    if (!NlFillOvsMsg(&nlBuffer, msgIn->nlMsg.nlmsgType, NLM_F_MULTI,
+                      msgIn->nlMsg.nlmsgSeq, msgIn->nlMsg.nlmsgPid,
+                      msgIn->genlMsg.cmd, msgIn->genlMsg.version,
+                      dpIfIndex)) {
+        return STATUS_INVALID_BUFFER_SIZE;
+    }
+
+    if (genlMsgHdr->cmd == OVS_CT_LIMIT_CMD_GET && numAttrs) {
+        POVS_CT_ZONE_LIMIT zoneLimitAttr = (POVS_CT_ZONE_LIMIT) attr;
+        UINT32 offset = NlMsgStartNested(&nlBuffer, OVS_CT_LIMIT_ATTR_ZONE_LIMIT);
+        if (!offset) {
+            /* Starting the nested attribute failed. */
+            status = STATUS_INVALID_BUFFER_SIZE;
+            goto done;
+        }
+
+        /* Insert OVS_CT_ZONE_LIMIT attributes.*/
+        for (UINT32 i = 0; i < numAttrs; i++) {
+            if (zoneLimitAttr) {
+                zoneLimitAttr->limit = zoneInfo[zoneLimitAttr->zone_id].limit;
+                zoneLimitAttr->count = zoneInfo[zoneLimitAttr->zone_id].entries;
+                if (zoneLimitAttr->zone_id == -1) {
+                    zoneLimitAttr->limit = defaultCtLimit;
+                }
+                NlMsgPutTail(&nlBuffer, (const PCHAR)zoneLimitAttr,
+                             sizeof(OVS_CT_ZONE_LIMIT));
+            } else {
+                status = STATUS_INVALID_PARAMETER;
+                break;
+            }
+            zoneLimitAttr = (POVS_CT_ZONE_LIMIT)((PCHAR) zoneLimitAttr +
+                                sizeof(OVS_CT_ZONE_LIMIT));
+        }
+        NlMsgEndNested(&nlBuffer, offset);
+    }
+
+done:
+    nlMsg = (PNL_MSG_HDR)NlBufAt(&nlBuffer, 0, 0);
+    nlMsg->nlmsgLen = NlBufSize(&nlBuffer);
+
+    return status;
+}
+
+NTSTATUS
+OvsCtLimitHandler(POVS_USER_PARAMS_CONTEXT usrParamsCtx,
+                  UINT32 *replyLen)
+{
+    NTSTATUS status;
+    POVS_MESSAGE msgIn = (POVS_MESSAGE)usrParamsCtx->inputBuffer;
+    POVS_MESSAGE msgOut = (POVS_MESSAGE)usrParamsCtx->outputBuffer;
+    PNL_MSG_HDR nlMsgHdr = &(msgIn->nlMsg);
+    PGENL_MSG_HDR genlMsgHdr = &(msgIn->genlMsg);
+    POVS_HDR ovsHdr = &(msgIn->ovsHdr);
+    PCHAR attr = NULL;
+    UINT32 numAttrs = 0;
+    UINT32 attrOffset = NLMSG_HDRLEN + GENL_HDRLEN + OVS_HDRLEN;
+
+    static const NL_POLICY ovsCtLimitPolicy[] = {
+        [OVS_CT_LIMIT_ATTR_ZONE_LIMIT] = { .type = NL_A_NESTED, .optional = TRUE }
+    };
+    PNL_ATTR nlAttrs[ARRAY_SIZE(ovsCtLimitPolicy)];
+
+    if ((NlAttrParse(nlMsgHdr, attrOffset, NlMsgAttrsLen(nlMsgHdr),
+                     ovsCtLimitPolicy, ARRAY_SIZE(ovsCtLimitPolicy),
+                     nlAttrs, ARRAY_SIZE(nlAttrs)))
+                     != TRUE) {
+        OVS_LOG_ERROR("Attr Parsing failed for msg: %p", nlMsgHdr);
+        return STATUS_INVALID_PARAMETER;
+    }
+
+    if (nlAttrs[OVS_CT_LIMIT_ATTR_ZONE_LIMIT]) {
+        numAttrs = NlAttrGetSize(nlAttrs[OVS_CT_LIMIT_ATTR_ZONE_LIMIT])/sizeof(OVS_CT_ZONE_LIMIT);
+        attr = NlAttrGet(nlAttrs[OVS_CT_LIMIT_ATTR_ZONE_LIMIT]);
+    }
+
+    if (genlMsgHdr->cmd == OVS_CT_LIMIT_CMD_SET ||
+        genlMsgHdr->cmd == OVS_CT_LIMIT_CMD_DEL) {
+        POVS_CT_ZONE_LIMIT zoneLimitAttr = (POVS_CT_ZONE_LIMIT)attr;
+        for (UINT32 i = 0; i < numAttrs; i++) {
+            /* Parse zone limit attributes. */
+            if (zoneLimitAttr) {
+                if (genlMsgHdr->cmd == OVS_CT_LIMIT_CMD_DEL) {
+                    zoneLimitAttr->limit = CT_MAX_ENTRIES;
+                }
+                OvsCtSetZoneLimit(zoneLimitAttr->zone_id, zoneLimitAttr->limit);
+            } else {
+                OVS_LOG_ERROR("Failed to get zone limit attribute at index(%u),"
+                              " numAttrs(%u)", i, numAttrs);
+                return STATUS_INVALID_PARAMETER;
+            }
+            zoneLimitAttr = (POVS_CT_ZONE_LIMIT)((PCHAR) zoneLimitAttr +
+                                sizeof(OVS_CT_ZONE_LIMIT));
+        }
+    }
+
+    /* Output buffer has been validated while validating transact dev op. */
+    ASSERT(msgOut != NULL && usrParamsCtx->outputLength >= sizeof *msgOut);
+    status = OvsCreateNlMsgFromCtLimit(msgIn, msgOut,
+                                       usrParamsCtx->outputLength,
+                                       attr, numAttrs, ovsHdr->dp_ifindex);
+    *replyLen = msgOut->nlMsg.nlmsgLen;
+
+    return status;
+}
+
 #pragma warning(pop)
diff --git a/datapath-windows/ovsext/Conntrack.h b/datapath-windows/ovsext/Conntrack.h
index d4152b3..4678ed0 100644
--- a/datapath-windows/ovsext/Conntrack.h
+++ b/datapath-windows/ovsext/Conntrack.h
@@ -132,6 +132,18 @@  typedef struct OvsConntrackKeyLookupCtx {
     BOOLEAN         related;
 } OvsConntrackKeyLookupCtx;
 
+/* Per zone strucuture. */
+typedef struct _OVS_CT_ZONE_INFO {
+    ULONG limit;
+    ULONG entries;
+} OVS_CT_ZONE_INFO, *POVS_CT_ZONE_INFO;
+
+typedef struct _OVS_CT_ZONE_LIMIT {
+    int zone_id;
+    ULONG limit;
+    ULONG count;
+} OVS_CT_ZONE_LIMIT, *POVS_CT_ZONE_LIMIT;
+
 #define CT_MAX_ENTRIES 1 << 21
 #define CT_HASH_TABLE_SIZE ((UINT32)1 << 10)
 #define CT_HASH_TABLE_MASK (CT_HASH_TABLE_SIZE - 1)
diff --git a/datapath-windows/ovsext/Datapath.c b/datapath-windows/ovsext/Datapath.c
index 34ef2b4..fa99484 100644
--- a/datapath-windows/ovsext/Datapath.c
+++ b/datapath-windows/ovsext/Datapath.c
@@ -99,7 +99,8 @@  NetlinkCmdHandler        OvsGetNetdevCmdHandler,
                          OvsSubscribePacketCmdHandler,
                          OvsReadPacketCmdHandler,
                          OvsCtDeleteCmdHandler,
-                         OvsCtDumpCmdHandler;
+                         OvsCtDumpCmdHandler,
+                         OvsCtLimitHandler;
 
 static NTSTATUS HandleGetDpTransaction(POVS_USER_PARAMS_CONTEXT usrParamsCtx,
                                        UINT32 *replyLen);
@@ -324,6 +325,34 @@  NETLINK_FAMILY nlNetdevFamilyOps = {
     .opsCount = ARRAY_SIZE(nlNetdevFamilyCmdOps)
 };
 
+
+/* Netlink conntrack limit family. */
+NETLINK_CMD nlCtLimitFamilyCmdOps[] = {
+    { .cmd = OVS_CT_LIMIT_CMD_GET,
+      .handler = OvsCtLimitHandler,
+      .supportedDevOp = OVS_TRANSACTION_DEV_OP,
+      .validateDpIndex = FALSE
+    },
+    { .cmd = OVS_CT_LIMIT_CMD_SET,
+      .handler = OvsCtLimitHandler,
+      .supportedDevOp = OVS_TRANSACTION_DEV_OP,
+      .validateDpIndex = FALSE
+    },
+    { .cmd = OVS_CT_LIMIT_CMD_DEL,
+      .handler = OvsCtLimitHandler,
+      .supportedDevOp = OVS_TRANSACTION_DEV_OP,
+      .validateDpIndex = FALSE
+    },
+};
+
+NETLINK_FAMILY nlCtLimitFamilyOps = {
+    .name     = OVS_CT_LIMIT_FAMILY,
+    .id       = OVS_WIN_NL_CTLIMIT_FAMILY_ID,
+    .version  = OVS_CT_LIMIT_VERSION,
+    .maxAttr  = OVS_CT_LIMIT_ATTR_MAX,
+    .cmds     = nlCtLimitFamilyCmdOps,
+    .opsCount = ARRAY_SIZE(nlCtLimitFamilyCmdOps)
+};
 static NTSTATUS MapIrpOutputBuffer(PIRP irp,
                                    UINT32 bufferLength,
                                    UINT32 requiredLength,
@@ -941,6 +970,9 @@  OvsDeviceControl(PDEVICE_OBJECT deviceObject,
     case OVS_WIN_NL_NETDEV_FAMILY_ID:
         nlFamilyOps = &nlNetdevFamilyOps;
         break;
+    case OVS_WIN_NL_CTLIMIT_FAMILY_ID:
+        nlFamilyOps = &nlCtLimitFamilyOps;
+        break;
     default:
         status = STATUS_INVALID_PARAMETER;
         goto done;
diff --git a/lib/netlink-socket.c b/lib/netlink-socket.c
index 5234ca3..47077e9 100644
--- a/lib/netlink-socket.c
+++ b/lib/netlink-socket.c
@@ -1571,6 +1571,11 @@  do_lookup_genl_family(const char *name, struct nlattr **attrs,
         family_name = OVS_WIN_NETDEV_FAMILY;
         family_version = OVS_WIN_NETDEV_VERSION;
         family_attrmax = OVS_WIN_NETDEV_ATTR_MAX;
+    } else if (!strcmp(name, OVS_CT_LIMIT_FAMILY)) {
+        family_id = OVS_WIN_NL_CTLIMIT_FAMILY_ID;
+        family_name = OVS_CT_LIMIT_FAMILY;
+        family_version = OVS_CT_LIMIT_VERSION;
+        family_attrmax = OVS_CT_LIMIT_ATTR_MAX;
     } else {
         ofpbuf_delete(reply);
         return EINVAL;