diff mbox series

[ovs-dev,net-next,v4,05/10] net: openvswitch: optimize flow-mask looking up

Message ID 1571135440-24313-6-git-send-email-xiangxia.m.yue@gmail.com
State Changes Requested
Headers show
Series optimize openvswitch flow looking up | expand

Commit Message

Tonghao Zhang Oct. 15, 2019, 10:30 a.m. UTC
From: Tonghao Zhang <xiangxia.m.yue@gmail.com>

The full looking up on flow table traverses all mask array.
If mask-array is too large, the number of invalid flow-mask
increase, performance will be drop.

One bad case, for example: M means flow-mask is valid and NULL
of flow-mask means deleted.

+-------------------------------------------+
| M | NULL | ...                  | NULL | M|
+-------------------------------------------+

In that case, without this patch, openvswitch will traverses all
mask array, because there will be one flow-mask in the tail. This
patch changes the way of flow-mask inserting and deleting, and the
mask array will be keep as below: there is not a NULL hole. In the
fast path, we can "break" "for" (not "continue") in flow_lookup
when we get a NULL flow-mask.

         "break"
            v
+-------------------------------------------+
| M | M |  NULL |...           | NULL | NULL|
+-------------------------------------------+

This patch don't optimize slow or control path, still using ma->max
to traverse. Slow path:
* tbl_mask_array_realloc
* ovs_flow_tbl_lookup_exact
* flow_mask_find

Signed-off-by: Tonghao Zhang <xiangxia.m.yue@gmail.com>
Tested-by: Greg Rose <gvrose8192@gmail.com>
---
 net/openvswitch/flow_table.c | 101 ++++++++++++++++++++++---------------------
 1 file changed, 52 insertions(+), 49 deletions(-)

Comments

William Tu Oct. 18, 2019, 11:26 p.m. UTC | #1
On Wed, Oct 16, 2019 at 5:54 AM <xiangxia.m.yue@gmail.com> wrote:
>
> From: Tonghao Zhang <xiangxia.m.yue@gmail.com>
>
> The full looking up on flow table traverses all mask array.
> If mask-array is too large, the number of invalid flow-mask
> increase, performance will be drop.
>
> One bad case, for example: M means flow-mask is valid and NULL
> of flow-mask means deleted.
>
> +-------------------------------------------+
> | M | NULL | ...                  | NULL | M|
> +-------------------------------------------+
>
> In that case, without this patch, openvswitch will traverses all
> mask array, because there will be one flow-mask in the tail. This
> patch changes the way of flow-mask inserting and deleting, and the
> mask array will be keep as below: there is not a NULL hole. In the
> fast path, we can "break" "for" (not "continue") in flow_lookup
> when we get a NULL flow-mask.
>
>          "break"
>             v
> +-------------------------------------------+
> | M | M |  NULL |...           | NULL | NULL|
> +-------------------------------------------+
>
> This patch don't optimize slow or control path, still using ma->max
> to traverse. Slow path:
> * tbl_mask_array_realloc
> * ovs_flow_tbl_lookup_exact
> * flow_mask_find


Hi Tonghao,

Does this improve performance? After all, the original code simply
check whether the mask is NULL, then goto next mask.

However, with your patch,  isn't this invalidated the mask cache entry which
point to the "M" you swap to the front? See my commands inline.

>
> Signed-off-by: Tonghao Zhang <xiangxia.m.yue@gmail.com>
> Tested-by: Greg Rose <gvrose8192@gmail.com>
> ---
>  net/openvswitch/flow_table.c | 101 ++++++++++++++++++++++---------------------
>  1 file changed, 52 insertions(+), 49 deletions(-)
>
> diff --git a/net/openvswitch/flow_table.c b/net/openvswitch/flow_table.c
> index 8d4f50d..a10d421 100644
> --- a/net/openvswitch/flow_table.c
> +++ b/net/openvswitch/flow_table.c
> @@ -538,7 +538,7 @@ static struct sw_flow *flow_lookup(struct flow_table *tbl,
>
>                 mask = rcu_dereference_ovsl(ma->masks[i]);
>                 if (!mask)
> -                       continue;
> +                       break;
>
>                 flow = masked_flow_lookup(ti, key, mask, n_mask_hit);
>                 if (flow) { /* Found */
> @@ -695,7 +695,7 @@ struct sw_flow *ovs_flow_tbl_lookup_ufid(struct flow_table *tbl,
>  int ovs_flow_tbl_num_masks(const struct flow_table *table)
>  {
>         struct mask_array *ma = rcu_dereference_ovsl(table->mask_array);
> -       return ma->count;
> +       return READ_ONCE(ma->count);
>  }
>
>  static struct table_instance *table_instance_expand(struct table_instance *ti,
> @@ -704,21 +704,33 @@ static struct table_instance *table_instance_expand(struct table_instance *ti,
>         return table_instance_rehash(ti, ti->n_buckets * 2, ufid);
>  }
>
> -static void tbl_mask_array_delete_mask(struct mask_array *ma,
> -                                      struct sw_flow_mask *mask)
> +static void tbl_mask_array_del_mask(struct flow_table *tbl,
> +                                   struct sw_flow_mask *mask)
>  {
> -       int i;
> +       struct mask_array *ma = ovsl_dereference(tbl->mask_array);
> +       int i, ma_count = READ_ONCE(ma->count);
>
>         /* Remove the deleted mask pointers from the array */
> -       for (i = 0; i < ma->max; i++) {
> -               if (mask == ovsl_dereference(ma->masks[i])) {
> -                       RCU_INIT_POINTER(ma->masks[i], NULL);
> -                       ma->count--;
> -                       kfree_rcu(mask, rcu);
> -                       return;
> -               }
> +       for (i = 0; i < ma_count; i++) {
> +               if (mask == ovsl_dereference(ma->masks[i]))
> +                       goto found;
>         }
> +
>         BUG();
> +       return;
> +
> +found:
> +       WRITE_ONCE(ma->count, ma_count -1);
> +
> +       rcu_assign_pointer(ma->masks[i], ma->masks[ma_count -1]);
> +       RCU_INIT_POINTER(ma->masks[ma_count -1], NULL);

So when you swap the ma->masks[ma_count -1], the mask cache entry
who's 'mask_index == ma_count' become all invalid?

Regards,
William

<snip>
Tonghao Zhang Oct. 21, 2019, 4:51 a.m. UTC | #2
On Sat, Oct 19, 2019 at 7:27 AM William Tu <u9012063@gmail.com> wrote:
>
> On Wed, Oct 16, 2019 at 5:54 AM <xiangxia.m.yue@gmail.com> wrote:
> >
> > From: Tonghao Zhang <xiangxia.m.yue@gmail.com>
> >
> > The full looking up on flow table traverses all mask array.
> > If mask-array is too large, the number of invalid flow-mask
> > increase, performance will be drop.
> >
> > One bad case, for example: M means flow-mask is valid and NULL
> > of flow-mask means deleted.
> >
> > +-------------------------------------------+
> > | M | NULL | ...                  | NULL | M|
> > +-------------------------------------------+
> >
> > In that case, without this patch, openvswitch will traverses all
> > mask array, because there will be one flow-mask in the tail. This
> > patch changes the way of flow-mask inserting and deleting, and the
> > mask array will be keep as below: there is not a NULL hole. In the
> > fast path, we can "break" "for" (not "continue") in flow_lookup
> > when we get a NULL flow-mask.
> >
> >          "break"
> >             v
> > +-------------------------------------------+
> > | M | M |  NULL |...           | NULL | NULL|
> > +-------------------------------------------+
> >
> > This patch don't optimize slow or control path, still using ma->max
> > to traverse. Slow path:
> > * tbl_mask_array_realloc
> > * ovs_flow_tbl_lookup_exact
> > * flow_mask_find
>
>
> Hi Tonghao,
>
> Does this improve performance? After all, the original code simply
> check whether the mask is NULL, then goto next mask.
I tested the performance, but I disable the mask cache, and use the
dpdk-pktgen to generate packets:
The test ovs flow:
ovs-dpctl add-dp system@ovs-system
ovs-dpctl add-if system@ovs-system eth6
ovs-dpctl add-if system@ovs-system eth7

for m in $(seq 1 100 | xargs printf '%.2x\n'); do
       ovs-dpctl add-flow ovs-system
"in_port(1),eth(dst=00:$m:00:00:00:00/ff:ff:ff:ff:ff:$m),eth_type(0x0800),ipv4(frag=no)"
2
done

ovs-dpctl add-flow ovs-system
"in_port(1),eth(dst=98:03:9b:6e:4a:f5/ff:ff:ff:ff:ff:ff),eth_type(0x0800),ipv4(frag=no)"
2
ovs-dpctl add-flow ovs-system
"in_port(2),eth(dst=98:03:9b:6e:4a:f4/ff:ff:ff:ff:ff:ff),eth_type(0x0800),ipv4(frag=no)"
1

for m in $(seq 101 160 | xargs printf '%.2x\n'); do
        ovs-dpctl add-flow ovs-system
"in_port(1),eth(dst=00:$m:00:00:00:00/ff:ff:ff:ff:ff:$m),eth_type(0x0800),ipv4(frag=no)"
2
done

for m in $(seq 1 100 | xargs printf '%.2x\n'); do
         ovs-dpctl del-flow ovs-system
"in_port(1),eth(dst=00:$m:00:00:00:00/ff:ff:ff:ff:ff:$m),eth_type(0x0800),ipv4(frag=no)"
done

Without this patch: 982481pps (64B)
With this patch: 1112495 pps (64B), about 13% improve

> However, with your patch,  isn't this invalidated the mask cache entry which
> point to the "M" you swap to the front? See my commands inline.
>
> >
> > Signed-off-by: Tonghao Zhang <xiangxia.m.yue@gmail.com>
> > Tested-by: Greg Rose <gvrose8192@gmail.com>
> > ---
> >  net/openvswitch/flow_table.c | 101 ++++++++++++++++++++++---------------------
> >  1 file changed, 52 insertions(+), 49 deletions(-)
> >
> > diff --git a/net/openvswitch/flow_table.c b/net/openvswitch/flow_table.c
> > index 8d4f50d..a10d421 100644
> > --- a/net/openvswitch/flow_table.c
> > +++ b/net/openvswitch/flow_table.c
> > @@ -538,7 +538,7 @@ static struct sw_flow *flow_lookup(struct flow_table *tbl,
> >
> >                 mask = rcu_dereference_ovsl(ma->masks[i]);
> >                 if (!mask)
> > -                       continue;
> > +                       break;
> >
> >                 flow = masked_flow_lookup(ti, key, mask, n_mask_hit);
> >                 if (flow) { /* Found */
> > @@ -695,7 +695,7 @@ struct sw_flow *ovs_flow_tbl_lookup_ufid(struct flow_table *tbl,
> >  int ovs_flow_tbl_num_masks(const struct flow_table *table)
> >  {
> >         struct mask_array *ma = rcu_dereference_ovsl(table->mask_array);
> > -       return ma->count;
> > +       return READ_ONCE(ma->count);
> >  }
> >
> >  static struct table_instance *table_instance_expand(struct table_instance *ti,
> > @@ -704,21 +704,33 @@ static struct table_instance *table_instance_expand(struct table_instance *ti,
> >         return table_instance_rehash(ti, ti->n_buckets * 2, ufid);
> >  }
> >
> > -static void tbl_mask_array_delete_mask(struct mask_array *ma,
> > -                                      struct sw_flow_mask *mask)
> > +static void tbl_mask_array_del_mask(struct flow_table *tbl,
> > +                                   struct sw_flow_mask *mask)
> >  {
> > -       int i;
> > +       struct mask_array *ma = ovsl_dereference(tbl->mask_array);
> > +       int i, ma_count = READ_ONCE(ma->count);
> >
> >         /* Remove the deleted mask pointers from the array */
> > -       for (i = 0; i < ma->max; i++) {
> > -               if (mask == ovsl_dereference(ma->masks[i])) {
> > -                       RCU_INIT_POINTER(ma->masks[i], NULL);
> > -                       ma->count--;
> > -                       kfree_rcu(mask, rcu);
> > -                       return;
> > -               }
> > +       for (i = 0; i < ma_count; i++) {
> > +               if (mask == ovsl_dereference(ma->masks[i]))
> > +                       goto found;
> >         }
> > +
> >         BUG();
> > +       return;
> > +
> > +found:
> > +       WRITE_ONCE(ma->count, ma_count -1);
> > +
> > +       rcu_assign_pointer(ma->masks[i], ma->masks[ma_count -1]);
> > +       RCU_INIT_POINTER(ma->masks[ma_count -1], NULL);
>
> So when you swap the ma->masks[ma_count -1], the mask cache entry
> who's 'mask_index == ma_count' become all invalid?
Yes, a little tricky.
> Regards,
> William
>
> <snip>
William Tu Oct. 21, 2019, 5:58 p.m. UTC | #3
> > Hi Tonghao,
> >
> > Does this improve performance? After all, the original code simply
> > check whether the mask is NULL, then goto next mask.
> I tested the performance, but I disable the mask cache, and use the
> dpdk-pktgen to generate packets:
> The test ovs flow:
> ovs-dpctl add-dp system@ovs-system
> ovs-dpctl add-if system@ovs-system eth6
> ovs-dpctl add-if system@ovs-system eth7
>
> for m in $(seq 1 100 | xargs printf '%.2x\n'); do
>        ovs-dpctl add-flow ovs-system
> "in_port(1),eth(dst=00:$m:00:00:00:00/ff:ff:ff:ff:ff:$m),eth_type(0x0800),ipv4(frag=no)"
> 2
> done
>
> ovs-dpctl add-flow ovs-system
> "in_port(1),eth(dst=98:03:9b:6e:4a:f5/ff:ff:ff:ff:ff:ff),eth_type(0x0800),ipv4(frag=no)"
> 2
> ovs-dpctl add-flow ovs-system
> "in_port(2),eth(dst=98:03:9b:6e:4a:f4/ff:ff:ff:ff:ff:ff),eth_type(0x0800),ipv4(frag=no)"
> 1
>
> for m in $(seq 101 160 | xargs printf '%.2x\n'); do
>         ovs-dpctl add-flow ovs-system
> "in_port(1),eth(dst=00:$m:00:00:00:00/ff:ff:ff:ff:ff:$m),eth_type(0x0800),ipv4(frag=no)"
> 2
> done
>
> for m in $(seq 1 100 | xargs printf '%.2x\n'); do
>          ovs-dpctl del-flow ovs-system
> "in_port(1),eth(dst=00:$m:00:00:00:00/ff:ff:ff:ff:ff:$m),eth_type(0x0800),ipv4(frag=no)"
> done
>
> Without this patch: 982481pps (64B)
> With this patch: 1112495 pps (64B), about 13% improve
>

Hi Tonghao,

Thanks for doing the measurement.
Based on the result (skipping 100 NULL mask lookup with 13% improvement),
and with additional overhead of mask cache being invalidate while
refilling these 100
gap, I'd argue that this patch is not necessary. But let's wait for
others comments.

Regards,
William

> > However, with your patch,  isn't this invalidated the mask cache entry which
> > point to the "M" you swap to the front? See my commands inline.
> >
> > >
> > > Signed-off-by: Tonghao Zhang <xiangxia.m.yue@gmail.com>
> > > Tested-by: Greg Rose <gvrose8192@gmail.com>
> > > ---

<snip>

> > >  static struct table_instance *table_instance_expand(struct table_instance *ti,
> > > @@ -704,21 +704,33 @@ static struct table_instance *table_instance_expand(struct table_instance *ti,
> > >         return table_instance_rehash(ti, ti->n_buckets * 2, ufid);
> > >  }
> > >
> > > -static void tbl_mask_array_delete_mask(struct mask_array *ma,
> > > -                                      struct sw_flow_mask *mask)
> > > +static void tbl_mask_array_del_mask(struct flow_table *tbl,
> > > +                                   struct sw_flow_mask *mask)
> > >  {
> > > -       int i;
> > > +       struct mask_array *ma = ovsl_dereference(tbl->mask_array);
> > > +       int i, ma_count = READ_ONCE(ma->count);
> > >
> > >         /* Remove the deleted mask pointers from the array */
> > > -       for (i = 0; i < ma->max; i++) {
> > > -               if (mask == ovsl_dereference(ma->masks[i])) {
> > > -                       RCU_INIT_POINTER(ma->masks[i], NULL);
> > > -                       ma->count--;
> > > -                       kfree_rcu(mask, rcu);
> > > -                       return;
> > > -               }
> > > +       for (i = 0; i < ma_count; i++) {
> > > +               if (mask == ovsl_dereference(ma->masks[i]))
> > > +                       goto found;
> > >         }
> > > +
> > >         BUG();
> > > +       return;
> > > +
> > > +found:
> > > +       WRITE_ONCE(ma->count, ma_count -1);
> > > +
> > > +       rcu_assign_pointer(ma->masks[i], ma->masks[ma_count -1]);
> > > +       RCU_INIT_POINTER(ma->masks[ma_count -1], NULL);
> >
> > So when you swap the ma->masks[ma_count -1], the mask cache entry
> > who's 'mask_index == ma_count' become all invalid?
> Yes, a little tricky.
> > Regards,
> > William
> >
diff mbox series

Patch

diff --git a/net/openvswitch/flow_table.c b/net/openvswitch/flow_table.c
index 8d4f50d..a10d421 100644
--- a/net/openvswitch/flow_table.c
+++ b/net/openvswitch/flow_table.c
@@ -538,7 +538,7 @@  static struct sw_flow *flow_lookup(struct flow_table *tbl,
 
 		mask = rcu_dereference_ovsl(ma->masks[i]);
 		if (!mask)
-			continue;
+			break;
 
 		flow = masked_flow_lookup(ti, key, mask, n_mask_hit);
 		if (flow) { /* Found */
@@ -695,7 +695,7 @@  struct sw_flow *ovs_flow_tbl_lookup_ufid(struct flow_table *tbl,
 int ovs_flow_tbl_num_masks(const struct flow_table *table)
 {
 	struct mask_array *ma = rcu_dereference_ovsl(table->mask_array);
-	return ma->count;
+	return READ_ONCE(ma->count);
 }
 
 static struct table_instance *table_instance_expand(struct table_instance *ti,
@@ -704,21 +704,33 @@  static struct table_instance *table_instance_expand(struct table_instance *ti,
 	return table_instance_rehash(ti, ti->n_buckets * 2, ufid);
 }
 
-static void tbl_mask_array_delete_mask(struct mask_array *ma,
-				       struct sw_flow_mask *mask)
+static void tbl_mask_array_del_mask(struct flow_table *tbl,
+				    struct sw_flow_mask *mask)
 {
-	int i;
+	struct mask_array *ma = ovsl_dereference(tbl->mask_array);
+	int i, ma_count = READ_ONCE(ma->count);
 
 	/* Remove the deleted mask pointers from the array */
-	for (i = 0; i < ma->max; i++) {
-		if (mask == ovsl_dereference(ma->masks[i])) {
-			RCU_INIT_POINTER(ma->masks[i], NULL);
-			ma->count--;
-			kfree_rcu(mask, rcu);
-			return;
-		}
+	for (i = 0; i < ma_count; i++) {
+		if (mask == ovsl_dereference(ma->masks[i]))
+			goto found;
 	}
+
 	BUG();
+	return;
+
+found:
+	WRITE_ONCE(ma->count, ma_count -1);
+
+	rcu_assign_pointer(ma->masks[i], ma->masks[ma_count -1]);
+	RCU_INIT_POINTER(ma->masks[ma_count -1], NULL);
+
+	kfree_rcu(mask, rcu);
+
+	/* Shrink the mask array if necessary. */
+	if (ma->max >= (MASK_ARRAY_SIZE_MIN * 2) &&
+	    ma_count <= (ma->max / 3))
+		tbl_mask_array_realloc(tbl, ma->max / 2);
 }
 
 /* Remove 'mask' from the mask list, if it is not needed any more. */
@@ -732,17 +744,8 @@  static void flow_mask_remove(struct flow_table *tbl, struct sw_flow_mask *mask)
 		BUG_ON(!mask->ref_count);
 		mask->ref_count--;
 
-		if (!mask->ref_count) {
-			struct mask_array *ma;
-
-			ma = ovsl_dereference(tbl->mask_array);
-			tbl_mask_array_delete_mask(ma, mask);
-
-			/* Shrink the mask array if necessary. */
-			if (ma->max >= (MASK_ARRAY_SIZE_MIN * 2) &&
-			    ma->count <= (ma->max / 3))
-				tbl_mask_array_realloc(tbl, ma->max / 2);
-		}
+		if (!mask->ref_count)
+			tbl_mask_array_del_mask(tbl, mask);
 	}
 }
 
@@ -806,6 +809,29 @@  static struct sw_flow_mask *flow_mask_find(const struct flow_table *tbl,
 	return NULL;
 }
 
+static int tbl_mask_array_add_mask(struct flow_table *tbl,
+				   struct sw_flow_mask *new)
+{
+	struct mask_array *ma = ovsl_dereference(tbl->mask_array);
+	int err, ma_count = READ_ONCE(ma->count);
+
+	if (ma_count >= ma->max) {
+		err = tbl_mask_array_realloc(tbl, ma->max +
+					      MASK_ARRAY_SIZE_MIN);
+		if (err)
+			return err;
+
+		ma = ovsl_dereference(tbl->mask_array);
+	}
+
+	BUG_ON(ovsl_dereference(ma->masks[ma_count]));
+
+	rcu_assign_pointer(ma->masks[ma_count], new);
+	WRITE_ONCE(ma->count, ma_count +1);
+
+	return 0;
+}
+
 /* Add 'mask' into the mask list, if it is not already there. */
 static int flow_mask_insert(struct flow_table *tbl, struct sw_flow *flow,
 			    const struct sw_flow_mask *new)
@@ -814,9 +840,6 @@  static int flow_mask_insert(struct flow_table *tbl, struct sw_flow *flow,
 
 	mask = flow_mask_find(tbl, new);
 	if (!mask) {
-		struct mask_array *ma;
-		int i;
-
 		/* Allocate a new mask if none exsits. */
 		mask = mask_alloc();
 		if (!mask)
@@ -825,29 +848,9 @@  static int flow_mask_insert(struct flow_table *tbl, struct sw_flow *flow,
 		mask->range = new->range;
 
 		/* Add mask to mask-list. */
-		ma = ovsl_dereference(tbl->mask_array);
-		if (ma->count >= ma->max) {
-			int err;
-
-			err = tbl_mask_array_realloc(tbl, ma->max +
-						     MASK_ARRAY_SIZE_MIN);
-			if (err) {
-				kfree(mask);
-				return err;
-			}
-
-			ma = ovsl_dereference(tbl->mask_array);
-		}
-
-		for (i = 0; i < ma->max; i++) {
-			const struct sw_flow_mask *t;
-
-			t = ovsl_dereference(ma->masks[i]);
-			if (!t) {
-				rcu_assign_pointer(ma->masks[i], mask);
-				ma->count++;
-				break;
-			}
+		if (tbl_mask_array_add_mask(tbl, mask)) {
+			kfree(mask);
+			return -ENOMEM;
 		}
 	} else {
 		BUG_ON(!mask->ref_count);