diff mbox

[v2] linux: Signal datapath that unaligned Netlink message can be received

Message ID a960865f3ad68da1016f35085c1d84126e92f0a5.1385814128.git.tgraf@redhat.com
State Awaiting Upstream, archived
Delegated to: David Miller
Headers show

Commit Message

Thomas Graf Nov. 30, 2013, 12:25 p.m. UTC
Following commit (''netlink: Do not enforce alignment of last Netlink
attribute''), signal the ability to receive unaligned Netlink messages
to the datapath to enable utilization of zerocopy optimizations.

Signed-off-by: Thomas Graf <tgraf@redhat.com>
---
V2: - Only provide OVS_DP_ATTR_USER_FEATURES on OVS_DP_CMD_NEW

 include/linux/openvswitch.h | 15 ++++++++++++++-
 lib/dpif-linux.c            |  6 ++++++
 2 files changed, 20 insertions(+), 1 deletion(-)

Comments

Jesse Gross Dec. 17, 2013, 1:23 a.m. UTC | #1
On Sat, Nov 30, 2013 at 4:25 AM, Thomas Graf <tgraf@redhat.com> wrote:
> diff --git a/lib/dpif-linux.c b/lib/dpif-linux.c
> index 6c482d0..2d8a1aa 100644
> --- a/lib/dpif-linux.c
> +++ b/lib/dpif-linux.c
> @@ -73,6 +73,7 @@ struct dpif_linux_dp {
>      /* Attributes. */
>      const char *name;                  /* OVS_DP_ATTR_NAME. */
>      const uint32_t *upcall_pid;        /* OVS_DP_ATTR_UPCALL_PID. */
> +    uint32_t user_features;            /* OVS_DP_ATTR_USER_FEATURES */
>      struct ovs_dp_stats stats;         /* OVS_DP_ATTR_STATS. */
>      struct ovs_dp_megaflow_stats megaflow_stats;
>                                         /* OVS_DP_ATTR_MEGAFLOW_STATS.*/
> @@ -228,6 +229,7 @@ dpif_linux_open(const struct dpif_class *class OVS_UNUSED, const char *name,
>          dp_request.cmd = OVS_DP_CMD_NEW;
>          upcall_pid = 0;
>          dp_request.upcall_pid = &upcall_pid;
> +        dp_request.user_features |= OVS_DP_F_UNALIGNED;
>      } else {
>          dp_request.cmd = OVS_DP_CMD_GET;
>      }

Does this handle the case where we are opening an existing datapath
and need to update the features?
--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Thomas Graf Dec. 17, 2013, 9:52 a.m. UTC | #2
On 12/17/2013 02:23 AM, Jesse Gross wrote:
> On Sat, Nov 30, 2013 at 4:25 AM, Thomas Graf <tgraf@redhat.com> wrote:
>> diff --git a/lib/dpif-linux.c b/lib/dpif-linux.c
>> index 6c482d0..2d8a1aa 100644
>> --- a/lib/dpif-linux.c
>> +++ b/lib/dpif-linux.c
>> @@ -73,6 +73,7 @@ struct dpif_linux_dp {
>>       /* Attributes. */
>>       const char *name;                  /* OVS_DP_ATTR_NAME. */
>>       const uint32_t *upcall_pid;        /* OVS_DP_ATTR_UPCALL_PID. */
>> +    uint32_t user_features;            /* OVS_DP_ATTR_USER_FEATURES */
>>       struct ovs_dp_stats stats;         /* OVS_DP_ATTR_STATS. */
>>       struct ovs_dp_megaflow_stats megaflow_stats;
>>                                          /* OVS_DP_ATTR_MEGAFLOW_STATS.*/
>> @@ -228,6 +229,7 @@ dpif_linux_open(const struct dpif_class *class OVS_UNUSED, const char *name,
>>           dp_request.cmd = OVS_DP_CMD_NEW;
>>           upcall_pid = 0;
>>           dp_request.upcall_pid = &upcall_pid;
>> +        dp_request.user_features |= OVS_DP_F_UNALIGNED;
>>       } else {
>>           dp_request.cmd = OVS_DP_CMD_GET;
>>       }
>
> Does this handle the case where we are opening an existing datapath
> and need to update the features?

The patch (''openvswitch: Drop user features if old user space attempted
to create datapath'') takes care of resetting the features for the
downgrade case. The upgrade case with a persistent datapath object is
currently not implemented. The datapath object needs to be recreated.

Defining the NLM_F_REPLACE semantics is non trivial if we want to do
more than just update the settings. I will propose this in a follow up
patch.
--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Jesse Gross Dec. 17, 2013, 5:49 p.m. UTC | #3
On Tue, Dec 17, 2013 at 1:52 AM, Thomas Graf <tgraf@redhat.com> wrote:
> On 12/17/2013 02:23 AM, Jesse Gross wrote:
>>
>> On Sat, Nov 30, 2013 at 4:25 AM, Thomas Graf <tgraf@redhat.com> wrote:
>>>
>>> diff --git a/lib/dpif-linux.c b/lib/dpif-linux.c
>>> index 6c482d0..2d8a1aa 100644
>>> --- a/lib/dpif-linux.c
>>> +++ b/lib/dpif-linux.c
>>> @@ -73,6 +73,7 @@ struct dpif_linux_dp {
>>>       /* Attributes. */
>>>       const char *name;                  /* OVS_DP_ATTR_NAME. */
>>>       const uint32_t *upcall_pid;        /* OVS_DP_ATTR_UPCALL_PID. */
>>> +    uint32_t user_features;            /* OVS_DP_ATTR_USER_FEATURES */
>>>       struct ovs_dp_stats stats;         /* OVS_DP_ATTR_STATS. */
>>>       struct ovs_dp_megaflow_stats megaflow_stats;
>>>                                          /* OVS_DP_ATTR_MEGAFLOW_STATS.*/
>>> @@ -228,6 +229,7 @@ dpif_linux_open(const struct dpif_class *class
>>> OVS_UNUSED, const char *name,
>>>           dp_request.cmd = OVS_DP_CMD_NEW;
>>>           upcall_pid = 0;
>>>           dp_request.upcall_pid = &upcall_pid;
>>> +        dp_request.user_features |= OVS_DP_F_UNALIGNED;
>>>       } else {
>>>           dp_request.cmd = OVS_DP_CMD_GET;
>>>       }
>>
>>
>> Does this handle the case where we are opening an existing datapath
>> and need to update the features?
>
>
> The patch (''openvswitch: Drop user features if old user space attempted
> to create datapath'') takes care of resetting the features for the
> downgrade case. The upgrade case with a persistent datapath object is
> currently not implemented. The datapath object needs to be recreated.

I think there's also a potential downgrade issue if we add a new
feature to the list of capabilities - it won't automatically reset
since userspace is now using v2 of the netlink protocol. Obviously,
this isn't an issue yet but it we should make sure that it is
addressed before there is a release.

> Defining the NLM_F_REPLACE semantics is non trivial if we want to do
> more than just update the settings. I will propose this in a follow up
> patch.

Couldn't userspace just issue an OVS_DP_CMD_SET on start?
--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Thomas Graf Dec. 17, 2013, 6:08 p.m. UTC | #4
On 12/17/2013 06:49 PM, Jesse Gross wrote:
> I think there's also a potential downgrade issue if we add a new
> feature to the list of capabilities - it won't automatically reset
> since userspace is now using v2 of the netlink protocol. Obviously,
> this isn't an issue yet but it we should make sure that it is
> addressed before there is a release.

>> Defining the NLM_F_REPLACE semantics is non trivial if we want to do
>> more than just update the settings. I will propose this in a follow up
>> patch.
>
> Couldn't userspace just issue an OVS_DP_CMD_SET on start?

Right, that works as well but introduces a small race compared to
NLM_F_REPLACE which would be atomic. I think we can live with that.

I will send a v3 of this patch with dpif-linux changed to issue
OVS_DP_CMD_SET first and fall back to OVS_DP_CMD_NEW if no DP exists.
--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Jesse Gross Dec. 17, 2013, 7:21 p.m. UTC | #5
On Tue, Dec 17, 2013 at 10:08 AM, Thomas Graf <tgraf@redhat.com> wrote:
> On 12/17/2013 06:49 PM, Jesse Gross wrote:
>>
>> I think there's also a potential downgrade issue if we add a new
>> feature to the list of capabilities - it won't automatically reset
>> since userspace is now using v2 of the netlink protocol. Obviously,
>> this isn't an issue yet but it we should make sure that it is
>> addressed before there is a release.
>
>
>>> Defining the NLM_F_REPLACE semantics is non trivial if we want to do
>>> more than just update the settings. I will propose this in a follow up
>>> patch.
>>
>>
>> Couldn't userspace just issue an OVS_DP_CMD_SET on start?
>
>
> Right, that works as well but introduces a small race compared to
> NLM_F_REPLACE which would be atomic. I think we can live with that.
>
> I will send a v3 of this patch with dpif-linux changed to issue
> OVS_DP_CMD_SET first and fall back to OVS_DP_CMD_NEW if no DP exists.

That sounds good to me. I have a hard time imagining a case where the
race condition would matter at all since we are still in the process
of starting up and therefore shouldn't be processing packets yet.
--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
diff mbox

Patch

diff --git a/include/linux/openvswitch.h b/include/linux/openvswitch.h
index b429201..80266b1 100644
--- a/include/linux/openvswitch.h
+++ b/include/linux/openvswitch.h
@@ -60,7 +60,16 @@  struct ovs_header {
 
 #define OVS_DATAPATH_FAMILY  "ovs_datapath"
 #define OVS_DATAPATH_MCGROUP "ovs_datapath"
-#define OVS_DATAPATH_VERSION 0x1
+
+/**
+ * V2:
+ *   - API users are expected to provide OVS_DP_ATTR_USER_FEATURES
+ *     when creating or updating the datapath.
+ */
+#define OVS_DATAPATH_VERSION 2
+
+/* First OVS datapath version to support features */
+#define OVS_DP_VER_FEATURES 2
 
 enum ovs_datapath_cmd {
 	OVS_DP_CMD_UNSPEC,
@@ -95,6 +104,7 @@  enum ovs_datapath_attr {
 	OVS_DP_ATTR_UPCALL_PID,		/* Netlink PID to receive upcalls */
 	OVS_DP_ATTR_STATS,		/* struct ovs_dp_stats */
 	OVS_DP_ATTR_MEGAFLOW_STATS,	/* struct ovs_dp_megaflow_stats */
+	OVS_DP_ATTR_USER_FEATURES,	/* OVS_DP_F_*  */
 	__OVS_DP_ATTR_MAX
 };
 
@@ -126,6 +136,9 @@  struct ovs_vport_stats {
 	__u64   tx_dropped;		/* no space available in linux  */
 };
 
+/* Allow last Netlink attribute to be unaligned */
+#define OVS_DP_F_UNALIGNED (1 << 0)
+
 /* Fixed logical ports. */
 #define OVSP_LOCAL      ((__u32)0)
 
diff --git a/lib/dpif-linux.c b/lib/dpif-linux.c
index 6c482d0..2d8a1aa 100644
--- a/lib/dpif-linux.c
+++ b/lib/dpif-linux.c
@@ -73,6 +73,7 @@  struct dpif_linux_dp {
     /* Attributes. */
     const char *name;                  /* OVS_DP_ATTR_NAME. */
     const uint32_t *upcall_pid;        /* OVS_DP_ATTR_UPCALL_PID. */
+    uint32_t user_features;            /* OVS_DP_ATTR_USER_FEATURES */
     struct ovs_dp_stats stats;         /* OVS_DP_ATTR_STATS. */
     struct ovs_dp_megaflow_stats megaflow_stats;
                                        /* OVS_DP_ATTR_MEGAFLOW_STATS.*/
@@ -228,6 +229,7 @@  dpif_linux_open(const struct dpif_class *class OVS_UNUSED, const char *name,
         dp_request.cmd = OVS_DP_CMD_NEW;
         upcall_pid = 0;
         dp_request.upcall_pid = &upcall_pid;
+        dp_request.user_features |= OVS_DP_F_UNALIGNED;
     } else {
         dp_request.cmd = OVS_DP_CMD_GET;
     }
@@ -1839,6 +1841,10 @@  dpif_linux_dp_to_ofpbuf(const struct dpif_linux_dp *dp, struct ofpbuf *buf)
         nl_msg_put_u32(buf, OVS_DP_ATTR_UPCALL_PID, *dp->upcall_pid);
     }
 
+    if (dp->user_features) {
+        nl_msg_put_u32(buf, OVS_DP_ATTR_USER_FEATURES, dp->user_features);
+    }
+
     /* Skip OVS_DP_ATTR_STATS since we never have a reason to serialize it. */
 }