diff mbox

[ovs-dev,v2,09/22] datapath: Refactor labels initialization.

Message ID CAPWQB7Grtv6=KH26d9CncYHb_Lyzgy2io3ge+bP7HScGTkazGA@mail.gmail.com
State Not Applicable
Headers show

Commit Message

Joe Stringer March 3, 2017, 9:44 p.m. UTC
On 3/03/2017 10:37, "Jarno Rajahalme" <jarno@ovn.org> wrote:


On Mar 2, 2017, at 5:26 PM, Joe Stringer <joe@ovn.org> wrote:

On 28 February 2017 at 17:17, Jarno Rajahalme <jarno@ovn.org> wrote:

Upstream commit:

   Refactoring conntrack labels initialization makes changes in later
   patches easier to review.

   Signed-off-by: Jarno Rajahalme <jarno@ovn.org>
   Acked-by: Pravin B Shelar <pshelar@ovn.org>
   Acked-by: Joe Stringer <joe@ovn.org>
   Signed-off-by: David S. Miller <davem@davemloft.net>

Signed-off-by: Jarno Rajahalme <jarno@ovn.org>
---
datapath/conntrack.c | 113 ++++++++++++++++++++++++++++++
---------------------
1 file changed, 66 insertions(+), 47 deletions(-)

       if (ct->mark != new_mark) {
               ct->mark = new_mark;
@@ -270,56 +263,71 @@ static int ovs_ct_set_mark(struct sk_buff *skb,
struct sw_flow_key *key,
#endif
}

-static int ovs_ct_set_labels(struct sk_buff *skb, struct sw_flow_key *key,
-                            const struct ovs_key_ct_labels *labels,
-                            const struct ovs_key_ct_labels *mask)
+static struct nf_conn_labels *ovs_ct_get_conn_labels(struct nf_conn *ct)
{
-       enum ip_conntrack_info ctinfo;
       struct nf_conn_labels *cl;
-       struct nf_conn *ct;
-
-       /* The connection could be invalid, in which case set_label is
no-op.*/
-       ct = nf_ct_get(skb, &ctinfo);
-       if (!ct)
-               return 0;

       cl = nf_ct_labels_find(ct);
       if (!cl) {
               nf_ct_labels_ext_add(ct);
               cl = nf_ct_labels_find(ct);
       }
+       if (cl && ovs_ct_get_labels_len(cl) < OVS_CT_LABELS_LEN)
+               return NULL;


The above two lines were not introduced in the upstream code. Do you
intend to introduce them?


Should have mentioned this in a commit message or in a comment. The
inclusion of this test is intentional, and the rationale is that it might
be possible that the kernel is configured with too little space for labels.
However, it is possible that the way OVS kernel module initializes the
number of words in labels for older kernels already takes care of this, do
you have a take on this?


When we compile the out of tree module for a particular kernel, this
information should be available. I don't think that we try to support
compiling against one kernel with one definition of the labels length, then
allow that same module to run on another kernel with a different
definition. So it should be fine to omit so long as there are still the
compile time checks.


  Jarno

For my current working tree for review/build/test, I will drop these lines.

+       return cl;
+}
+
+/* Initialize labels for a new, yet to be committed conntrack entry.  Note
that
+ * since the new connection is not yet confirmed, and thus no-one else has
+ * access to it's labels, we simply write them over.
+ */
+static int ovs_ct_init_labels(struct nf_conn *ct, struct sw_flow_key *key,
+                             const struct ovs_key_ct_labels *labels,
+                             const struct ovs_key_ct_labels *mask)
+{
+       struct nf_conn_labels *cl;
+       u32 *dst;
+       int i;

-       if (!cl || ovs_ct_get_labels_len(cl) < OVS_CT_LABELS_LEN)
+       cl = ovs_ct_get_conn_labels(ct);
+       if (!cl)
               return -ENOSPC;

-       if (nf_ct_is_confirmed(ct)) {
-               /* Triggers a change event, which makes sense only for
-                * confirmed connections.
-                */
-               int err = nf_connlabels_replace(ct, labels->ct_labels_32,
-                                               mask->ct_labels_32,
-                                               OVS_CT_LABELS_LEN_32);
-               if (err)
-                       return err;
-       } else {
-               u32 *dst = (u32 *)cl->bits;
-               const u32 *msk = mask->ct_labels_32;
-               const u32 *lbl = labels->ct_labels_32;
-               int i;
+       dst = (u32 *)cl->bits;
+       for (i = 0; i < OVS_CT_LABELS_LEN_32; i++)
+               dst[i] = (dst[i] & ~mask->ct_labels_32[i]) |
+                       (labels->ct_labels_32[i] & mask->ct_labels_32[i]);

-               /* No-one else has access to the non-confirmed entry, copy
-                * labels over, keeping any bits we are not explicitly
setting.
-                */
-               for (i = 0; i < OVS_CT_LABELS_LEN_32; i++)
-                       dst[i] = (dst[i] & ~msk[i]) | (lbl[i] & msk[i]);
+       /* Labels are included in the IPCTNL_MSG_CT_NEW event only if the
+        * IPCT_LABEL bit it set in the event cache.
+        */
+       nf_conntrack_event_cache(IPCT_LABEL, ct);

-               /* Labels are included in the IPCTNL_MSG_CT_NEW event only
if
-                * the IPCT_LABEL bit it set in the event cache.
-                */
-               nf_conntrack_event_cache(IPCT_LABEL, ct);
-       }
+       memcpy(&key->ct.labels, cl->bits, OVS_CT_LABELS_LEN);
+
+       return 0;
+}
+
+static int ovs_ct_set_labels(struct nf_conn *ct, struct sw_flow_key *key,
+                            const struct ovs_key_ct_labels *labels,
+                            const struct ovs_key_ct_labels *mask)
+{
+       struct nf_conn_labels *cl;
+       int err;
+
+       cl = ovs_ct_get_conn_labels(ct);
+       if (!cl)
+               return -ENOSPC;
+
+       err = nf_connlabels_replace(ct, labels->ct_labels_32,
+                                   mask->ct_labels_32,
+                                   OVS_CT_LABELS_LEN_32);
+       if (err)
+               return err;
+
+       memcpy(&key->ct.labels, cl->bits, OVS_CT_LABELS_LEN);

-       ovs_ct_get_labels(ct, &key->ct.labels);
       return 0;
}

@@ -926,25 +934,36 @@ static int ovs_ct_commit(struct net *net, struct
sw_flow_key *key,
                        const struct ovs_conntrack_info *info,
                        struct sk_buff *skb)
{
+       enum ip_conntrack_info ctinfo;
+       struct nf_conn *ct;
       int err;

       err = __ovs_ct_lookup(net, key, info, skb);
       if (err)
               return err;

+       /* The connection could be invalid, in which case this is a no-op.*/
+       ct = nf_ct_get(skb, &ctinfo);
+       if (!ct)
+               return 0;
+
       /* Apply changes before confirming the connection so that the initial
        * conntrack NEW netlink event carries the values given in the CT
        * action.
        */
       if (info->mark.mask) {
-               err = ovs_ct_set_mark(skb, key, info->mark.value,
+               err = ovs_ct_set_mark(ct, key, info->mark.value,
                                     info->mark.mask);
               if (err)
                       return err;
       }
       if (labels_nonzero(&info->labels.mask)) {
-               err = ovs_ct_set_labels(skb, key, &info->labels.value,
-                                       &info->labels.mask);
+               if (!nf_ct_is_confirmed(ct))
+                       err = ovs_ct_init_labels(ct, key,
&info->labels.value,
+                                                &info->labels.mask);
+               else
+                       err = ovs_ct_set_labels(ct, key,
&info->labels.value,
+                                               &info->labels.mask);
               if (err)
                       return err;
       }
--
2.1.4

Comments

Jarno Rajahalme March 4, 2017, 12:04 a.m. UTC | #1
> On Mar 3, 2017, at 1:44 PM, Joe Stringer <joe@ovn.org> wrote:
> 
> 
> 
> On 3/03/2017 10:37, "Jarno Rajahalme" <jarno@ovn.org <mailto:jarno@ovn.org>> wrote:
> 
>> On Mar 2, 2017, at 5:26 PM, Joe Stringer <joe@ovn.org <mailto:joe@ovn.org>> wrote:
>> 
>> On 28 February 2017 at 17:17, Jarno Rajahalme <jarno@ovn.org <mailto:jarno@ovn.org>> wrote:
>>> Upstream commit:
>>> 
>>>    Refactoring conntrack labels initialization makes changes in later
>>>    patches easier to review.
>>> 
>>>    Signed-off-by: Jarno Rajahalme <jarno@ovn.org <mailto:jarno@ovn.org>>
>>>    Acked-by: Pravin B Shelar <pshelar@ovn.org <mailto:pshelar@ovn.org>>
>>>    Acked-by: Joe Stringer <joe@ovn.org <mailto:joe@ovn.org>>
>>>    Signed-off-by: David S. Miller <davem@davemloft.net <mailto:davem@davemloft.net>>
>>> 
>>> Signed-off-by: Jarno Rajahalme <jarno@ovn.org <mailto:jarno@ovn.org>>
>>> ---
>>> datapath/conntrack.c | 113 ++++++++++++++++++++++++++++++---------------------
>>> 1 file changed, 66 insertions(+), 47 deletions(-)
>>> 
>>> diff --git a/datapath/conntrack.c b/datapath/conntrack.c
>>> index dacf34c..adc4315 100644
>>> --- a/datapath/conntrack.c
>>> +++ b/datapath/conntrack.c
>>> @@ -243,19 +243,12 @@ int ovs_ct_put_key(const struct sw_flow_key *key, struct sk_buff *skb)
>>>        return 0;
>>> }
>>> 
>>> -static int ovs_ct_set_mark(struct sk_buff *skb, struct sw_flow_key *key,
>>> +static int ovs_ct_set_mark(struct nf_conn *ct, struct sw_flow_key *key,
>>>                           u32 ct_mark, u32 mask)
>>> {
>>> #if IS_ENABLED(CONFIG_NF_CONNTRACK_MARK)
>>> -       enum ip_conntrack_info ctinfo;
>>> -       struct nf_conn *ct;
>>>        u32 new_mark;
>>> 
>>> -       /* The connection could be invalid, in which case set_mark is no-op. */
>>> -       ct = nf_ct_get(skb, &ctinfo);
>>> -       if (!ct)
>>> -               return 0;
>>> -
>>>        new_mark = ct_mark | (ct->mark & ~(mask));
>>>        if (ct->mark != new_mark) {
>>>                ct->mark = new_mark;
>>> @@ -270,56 +263,71 @@ static int ovs_ct_set_mark(struct sk_buff *skb, struct sw_flow_key *key,
>>> #endif
>>> }
>>> 
>>> -static int ovs_ct_set_labels(struct sk_buff *skb, struct sw_flow_key *key,
>>> -                            const struct ovs_key_ct_labels *labels,
>>> -                            const struct ovs_key_ct_labels *mask)
>>> +static struct nf_conn_labels *ovs_ct_get_conn_labels(struct nf_conn *ct)
>>> {
>>> -       enum ip_conntrack_info ctinfo;
>>>        struct nf_conn_labels *cl;
>>> -       struct nf_conn *ct;
>>> -
>>> -       /* The connection could be invalid, in which case set_label is no-op.*/
>>> -       ct = nf_ct_get(skb, &ctinfo);
>>> -       if (!ct)
>>> -               return 0;
>>> 
>>>        cl = nf_ct_labels_find(ct);
>>>        if (!cl) {
>>>                nf_ct_labels_ext_add(ct);
>>>                cl = nf_ct_labels_find(ct);
>>>        }
>>> +       if (cl && ovs_ct_get_labels_len(cl) < OVS_CT_LABELS_LEN)
>>> +               return NULL;
>> 
>> The above two lines were not introduced in the upstream code. Do you
>> intend to introduce them?
>> 
> 
> Should have mentioned this in a commit message or in a comment. The inclusion of this test is intentional, and the rationale is that it might be possible that the kernel is configured with too little space for labels. However, it is possible that the way OVS kernel module initializes the number of words in labels for older kernels already takes care of this, do you have a take on this?
> 
> When we compile the out of tree module for a particular kernel, this information should be available. I don't think that we try to support compiling against one kernel with one definition of the labels length, then allow that same module to run on another kernel with a different definition. So it should be fine to omit so long as there are still the compile time checks.
> 

But my understanding is that the compile time checks only apply to newer kernels, where the available storage for labels is a compile time configuration, rather than a run-time number of words.

  Jarno

> 
>   Jarno
> 
>> For my current working tree for review/build/test, I will drop these lines.
>> 
>>> +       return cl;
>>> +}
>>> +
>>> +/* Initialize labels for a new, yet to be committed conntrack entry.  Note that
>>> + * since the new connection is not yet confirmed, and thus no-one else has
>>> + * access to it's labels, we simply write them over.
>>> + */
>>> +static int ovs_ct_init_labels(struct nf_conn *ct, struct sw_flow_key *key,
>>> +                             const struct ovs_key_ct_labels *labels,
>>> +                             const struct ovs_key_ct_labels *mask)
>>> +{
>>> +       struct nf_conn_labels *cl;
>>> +       u32 *dst;
>>> +       int i;
>>> 
>>> -       if (!cl || ovs_ct_get_labels_len(cl) < OVS_CT_LABELS_LEN)
>>> +       cl = ovs_ct_get_conn_labels(ct);
>>> +       if (!cl)
>>>                return -ENOSPC;
>>> 
>>> -       if (nf_ct_is_confirmed(ct)) {
>>> -               /* Triggers a change event, which makes sense only for
>>> -                * confirmed connections.
>>> -                */
>>> -               int err = nf_connlabels_replace(ct, labels->ct_labels_32,
>>> -                                               mask->ct_labels_32,
>>> -                                               OVS_CT_LABELS_LEN_32);
>>> -               if (err)
>>> -                       return err;
>>> -       } else {
>>> -               u32 *dst = (u32 *)cl->bits;
>>> -               const u32 *msk = mask->ct_labels_32;
>>> -               const u32 *lbl = labels->ct_labels_32;
>>> -               int i;
>>> +       dst = (u32 *)cl->bits;
>>> +       for (i = 0; i < OVS_CT_LABELS_LEN_32; i++)
>>> +               dst[i] = (dst[i] & ~mask->ct_labels_32[i]) |
>>> +                       (labels->ct_labels_32[i] & mask->ct_labels_32[i]);
>>> 
>>> -               /* No-one else has access to the non-confirmed entry, copy
>>> -                * labels over, keeping any bits we are not explicitly setting.
>>> -                */
>>> -               for (i = 0; i < OVS_CT_LABELS_LEN_32; i++)
>>> -                       dst[i] = (dst[i] & ~msk[i]) | (lbl[i] & msk[i]);
>>> +       /* Labels are included in the IPCTNL_MSG_CT_NEW event only if the
>>> +        * IPCT_LABEL bit it set in the event cache.
>>> +        */
>>> +       nf_conntrack_event_cache(IPCT_LABEL, ct);
>>> 
>>> -               /* Labels are included in the IPCTNL_MSG_CT_NEW event only if
>>> -                * the IPCT_LABEL bit it set in the event cache.
>>> -                */
>>> -               nf_conntrack_event_cache(IPCT_LABEL, ct);
>>> -       }
>>> +       memcpy(&key->ct.labels, cl->bits, OVS_CT_LABELS_LEN);
>>> +
>>> +       return 0;
>>> +}
>>> +
>>> +static int ovs_ct_set_labels(struct nf_conn *ct, struct sw_flow_key *key,
>>> +                            const struct ovs_key_ct_labels *labels,
>>> +                            const struct ovs_key_ct_labels *mask)
>>> +{
>>> +       struct nf_conn_labels *cl;
>>> +       int err;
>>> +
>>> +       cl = ovs_ct_get_conn_labels(ct);
>>> +       if (!cl)
>>> +               return -ENOSPC;
>>> +
>>> +       err = nf_connlabels_replace(ct, labels->ct_labels_32,
>>> +                                   mask->ct_labels_32,
>>> +                                   OVS_CT_LABELS_LEN_32);
>>> +       if (err)
>>> +               return err;
>>> +
>>> +       memcpy(&key->ct.labels, cl->bits, OVS_CT_LABELS_LEN);
>>> 
>>> -       ovs_ct_get_labels(ct, &key->ct.labels);
>>>        return 0;
>>> }
>>> 
>>> @@ -926,25 +934,36 @@ static int ovs_ct_commit(struct net *net, struct sw_flow_key *key,
>>>                         const struct ovs_conntrack_info *info,
>>>                         struct sk_buff *skb)
>>> {
>>> +       enum ip_conntrack_info ctinfo;
>>> +       struct nf_conn *ct;
>>>        int err;
>>> 
>>>        err = __ovs_ct_lookup(net, key, info, skb);
>>>        if (err)
>>>                return err;
>>> 
>>> +       /* The connection could be invalid, in which case this is a no-op.*/
>>> +       ct = nf_ct_get(skb, &ctinfo);
>>> +       if (!ct)
>>> +               return 0;
>>> +
>>>        /* Apply changes before confirming the connection so that the initial
>>>         * conntrack NEW netlink event carries the values given in the CT
>>>         * action.
>>>         */
>>>        if (info->mark.mask) {
>>> -               err = ovs_ct_set_mark(skb, key, info->mark.value,
>>> +               err = ovs_ct_set_mark(ct, key, info->mark.value,
>>>                                      info->mark.mask);
>>>                if (err)
>>>                        return err;
>>>        }
>>>        if (labels_nonzero(&info->labels.mask)) {
>>> -               err = ovs_ct_set_labels(skb, key, &info->labels.value,
>>> -                                       &info->labels.mask);
>>> +               if (!nf_ct_is_confirmed(ct))
>>> +                       err = ovs_ct_init_labels(ct, key, &info->labels.value,
>>> +                                                &info->labels.mask);
>>> +               else
>>> +                       err = ovs_ct_set_labels(ct, key, &info->labels.value,
>>> +                                               &info->labels.mask);
>>>                if (err)
>>>                        return err;
>>>        }
>>> --
>>> 2.1.4
>>> 
>>> _______________________________________________
>>> dev mailing list
>>> dev@openvswitch.org <mailto:dev@openvswitch.org>
>>> https://mail.openvswitch.org/mailman/listinfo/ovs-dev <https://mail.openvswitch.org/mailman/listinfo/ovs-dev>
Joe Stringer March 4, 2017, 12:47 a.m. UTC | #2
On 3 March 2017 at 16:04, Jarno Rajahalme <jarno@ovn.org> wrote:
>
> On Mar 3, 2017, at 1:44 PM, Joe Stringer <joe@ovn.org> wrote:
>
>
>
> On 3/03/2017 10:37, "Jarno Rajahalme" <jarno@ovn.org> wrote:
>
>
> On Mar 2, 2017, at 5:26 PM, Joe Stringer <joe@ovn.org> wrote:
>
> On 28 February 2017 at 17:17, Jarno Rajahalme <jarno@ovn.org> wrote:
>
> Upstream commit:
>
>    Refactoring conntrack labels initialization makes changes in later
>    patches easier to review.
>
>    Signed-off-by: Jarno Rajahalme <jarno@ovn.org>
>    Acked-by: Pravin B Shelar <pshelar@ovn.org>
>    Acked-by: Joe Stringer <joe@ovn.org>
>    Signed-off-by: David S. Miller <davem@davemloft.net>
>
> Signed-off-by: Jarno Rajahalme <jarno@ovn.org>
> ---
> datapath/conntrack.c | 113
> ++++++++++++++++++++++++++++++---------------------
> 1 file changed, 66 insertions(+), 47 deletions(-)
>
> diff --git a/datapath/conntrack.c b/datapath/conntrack.c
> index dacf34c..adc4315 100644
> --- a/datapath/conntrack.c
> +++ b/datapath/conntrack.c
> @@ -243,19 +243,12 @@ int ovs_ct_put_key(const struct sw_flow_key *key,
> struct sk_buff *skb)
>        return 0;
> }
>
> -static int ovs_ct_set_mark(struct sk_buff *skb, struct sw_flow_key *key,
> +static int ovs_ct_set_mark(struct nf_conn *ct, struct sw_flow_key *key,
>                           u32 ct_mark, u32 mask)
> {
> #if IS_ENABLED(CONFIG_NF_CONNTRACK_MARK)
> -       enum ip_conntrack_info ctinfo;
> -       struct nf_conn *ct;
>        u32 new_mark;
>
> -       /* The connection could be invalid, in which case set_mark is no-op.
> */
> -       ct = nf_ct_get(skb, &ctinfo);
> -       if (!ct)
> -               return 0;
> -
>        new_mark = ct_mark | (ct->mark & ~(mask));
>        if (ct->mark != new_mark) {
>                ct->mark = new_mark;
> @@ -270,56 +263,71 @@ static int ovs_ct_set_mark(struct sk_buff *skb, struct
> sw_flow_key *key,
> #endif
> }
>
> -static int ovs_ct_set_labels(struct sk_buff *skb, struct sw_flow_key *key,
> -                            const struct ovs_key_ct_labels *labels,
> -                            const struct ovs_key_ct_labels *mask)
> +static struct nf_conn_labels *ovs_ct_get_conn_labels(struct nf_conn *ct)
> {
> -       enum ip_conntrack_info ctinfo;
>        struct nf_conn_labels *cl;
> -       struct nf_conn *ct;
> -
> -       /* The connection could be invalid, in which case set_label is
> no-op.*/
> -       ct = nf_ct_get(skb, &ctinfo);
> -       if (!ct)
> -               return 0;
>
>        cl = nf_ct_labels_find(ct);
>        if (!cl) {
>                nf_ct_labels_ext_add(ct);
>                cl = nf_ct_labels_find(ct);
>        }
> +       if (cl && ovs_ct_get_labels_len(cl) < OVS_CT_LABELS_LEN)
> +               return NULL;
>
>
> The above two lines were not introduced in the upstream code. Do you
> intend to introduce them?
>
>
> Should have mentioned this in a commit message or in a comment. The
> inclusion of this test is intentional, and the rationale is that it might be
> possible that the kernel is configured with too little space for labels.
> However, it is possible that the way OVS kernel module initializes the
> number of words in labels for older kernels already takes care of this, do
> you have a take on this?
>
>
> When we compile the out of tree module for a particular kernel, this
> information should be available. I don't think that we try to support
> compiling against one kernel with one definition of the labels length, then
> allow that same module to run on another kernel with a different definition.
> So it should be fine to omit so long as there are still the compile time
> checks.
>
>
> But my understanding is that the compile time checks only apply to newer
> kernels, where the available storage for labels is a compile time
> configuration, rather than a run-time number of words.

OK, there's a couple of pieces here:

The compile time checks check that the connlabels support up to the
size that OVS needs.

Kernels up up until 3.15 do not define the NF_CT_LABELS_MAX_SIZE in
the headers, but allows labels up until 128 bits.
Kernels 3.15 up until 4.9 define the above, allowing labels up until 128 bits.
Kernels 4.9 and above also allow up to 128 bits.

Then runtime:

Kernels up until 4.9 will dynamically change the labels length to the
largest of all users within the namespace.
Kernels 4.9 and above will always set the labels length to 128 bits -
so when the labels extension is added to the conntrack entry, it's
always big enough.

At module load time, openvswitch attempts to set connlabel 127 (via
nf_connlabels_get()) to ensure that connlabels extensions are added to
conntarck entries with enough space to satisfy the OVS CT_LABELS API.

There's a small possibility that someone configures an older kernel
with a lower number of labels, runs connections through, then loads
the OVS module, then OVS looks up an existing CT entry and finds that
entry with a lower labels size. If we want to cover this, then perhaps
we can shift the two lines that we're discussing into the
implementation of nf_ct_labels_find() in the compat code, activated by
a check that the kernel hasn't yet included upstream commit
23014011ba4209a086931ff402eac1c41abbe456.
diff mbox

Patch

diff --git a/datapath/conntrack.c b/datapath/conntrack.c
index dacf34c..adc4315 100644
--- a/datapath/conntrack.c
+++ b/datapath/conntrack.c
@@ -243,19 +243,12 @@  int ovs_ct_put_key(const struct sw_flow_key *key,
struct sk_buff *skb)
       return 0;
}

-static int ovs_ct_set_mark(struct sk_buff *skb, struct sw_flow_key *key,
+static int ovs_ct_set_mark(struct nf_conn *ct, struct sw_flow_key *key,
                          u32 ct_mark, u32 mask)
{
#if IS_ENABLED(CONFIG_NF_CONNTRACK_MARK)
-       enum ip_conntrack_info ctinfo;
-       struct nf_conn *ct;
       u32 new_mark;

-       /* The connection could be invalid, in which case set_mark is
no-op. */
-       ct = nf_ct_get(skb, &ctinfo);
-       if (!ct)
-               return 0;
-
       new_mark = ct_mark | (ct->mark & ~(mask));