diff mbox

[ovs-dev,v1] ovn: Fix BFD error config on gateway

Message ID 20170816064748.7972-1-sysugaozhenyu@gmail.com
State Superseded
Delegated to: Russell Bryant
Headers show

Commit Message

Gao Zhenyu Aug. 16, 2017, 6:47 a.m. UTC
The bfd_calculate_chassis function calculates gateway's peer
datapaths to figure our which chassis should be add in bfd_chassis.
But in most circumstance, a gateway's peer datapath has NO chassis.
So it may disable BFD on some tunnel ports. Then a port on a remote
ovs cannot send packet out because it believes all remote gateway are
down.

This patch will go though whole graph and visit all datapath's port
which has connect with gateway.

Signed-off-by: Zhenyu Gao <sysugaozhenyu@gmail.com>
---
 ovn/controller/bfd.c | 102 ++++++++++++++++++++++++++++++++++++++++++---------
 1 file changed, 84 insertions(+), 18 deletions(-)

Comments

Anil Venkata Aug. 17, 2017, 5:21 p.m. UTC | #1
Hi Zhenyu Gao

Is it possible for you to add a test case for this scenario. This test on
the master code( without your patch) should fail, and your patch should
make the test pass.

Thanks
Anil

On Wed, Aug 16, 2017 at 12:17 PM, Zhenyu Gao <sysugaozhenyu@gmail.com>
wrote:

> The bfd_calculate_chassis function calculates gateway's peer
> datapaths to figure our which chassis should be add in bfd_chassis.
> But in most circumstance, a gateway's peer datapath has NO chassis.
> So it may disable BFD on some tunnel ports. Then a port on a remote
> ovs cannot send packet out because it believes all remote gateway are
> down.
>
> This patch will go though whole graph and visit all datapath's port
> which has connect with gateway.
>
> Signed-off-by: Zhenyu Gao <sysugaozhenyu@gmail.com>
> ---
>  ovn/controller/bfd.c | 102 ++++++++++++++++++++++++++++++
> ++++++++++++---------
>  1 file changed, 84 insertions(+), 18 deletions(-)
>
> diff --git a/ovn/controller/bfd.c b/ovn/controller/bfd.c
> index d1448b1..dccfc98 100644
> --- a/ovn/controller/bfd.c
> +++ b/ovn/controller/bfd.c
> @@ -100,6 +100,88 @@ bfd_calculate_active_tunnels(const struct
> ovsrec_bridge *br_int,
>      }
>  }
>
> +struct local_datapath_node {
> +    struct ovs_list node;
> +    struct local_datapath *dp;
> +};
> +
> +static void
> +bfd_travel_gw_related_chassis(struct local_datapath *dp,
> +                              const struct hmap *local_datapaths,
> +                              struct ovsdb_idl_index_cursor *cursor,
> +                              struct sbrec_port_binding *lpval,
> +                              struct sset *bfd_chassis)
> +{
> +    struct ovs_list dp_list;
> +    const struct sbrec_port_binding *pb;
> +    struct sset visited_dp = SSET_INITIALIZER(&visited_dp);
> +    const char *dp_key;
> +    struct local_datapath_node *dp_binding;
> +
> +    if (!(dp_key = smap_get(&dp->datapath->external_ids,
> "logical-router")) &&
> +        !(dp_key = smap_get(&dp->datapath->external_ids,
> "logical-switch"))) {
> +        VLOG_INFO("datapath has no uuid, cannot travel graph");
> +        return;
> +    }
> +
> +    sset_add(&visited_dp, dp_key);
> +
> +    ovs_list_init(&dp_list);
> +    dp_binding = xmalloc(sizeof *dp_binding);
> +    dp_binding->dp = dp;
> +    ovs_list_push_back(&dp_list, &dp_binding->node);
> +
> +    /*
> +     * Go through whole graph to figure out all chassis which may deliver
> +     * packetsto gateway. */
> +    do {
> +        dp_binding = CONTAINER_OF(ovs_list_pop_front(&dp_list),
> +                                  struct local_datapath_node, node);
> +        dp = dp_binding->dp;
> +        free(dp_binding);
> +        for (size_t i = 0; i < dp->n_peer_dps; i++) {
> +            const struct sbrec_datapath_binding *pdp = dp->peer_dps[i];
> +            if (!pdp) {
> +                continue;
> +            }
> +
> +            if (!(dp_key = smap_get(&pdp->external_ids,
> "logical-router")) &&
> +                !(dp_key = smap_get(&pdp->external_ids,
> "logical-switch"))) {
> +                continue;
> +            }
> +
> +            if (sset_contains(&visited_dp, dp_key)) {
> +                continue;
> +            }
> +
> +            sset_add(&visited_dp, dp_key);
> +
> +            struct hmap_node *node = hmap_first_with_hash(local_
> datapaths,
> +
> pdp->tunnel_key);
> +            if (!node) {
> +                continue;
> +            }
> +
> +            dp_binding = xmalloc(sizeof *dp_binding);
> +            dp_binding->dp = CONTAINER_OF(node, struct local_datapath,
> +                                          hmap_node);
> +            ovs_list_push_back(&dp_list, &dp_binding->node);
> +
> +            sbrec_port_binding_index_set_datapath(lpval, pdp);
> +            SBREC_PORT_BINDING_FOR_EACH_EQUAL (pb, cursor, lpval) {
> +                if (pb->chassis) {
> +                    const char *chassis_name = pb->chassis->name;
> +                    if (chassis_name) {
> +                        sset_add(bfd_chassis, chassis_name);
> +                    }
> +                }
> +            }
> +        }
> +    } while (!ovs_list_is_empty(&dp_list));
> +
> +    sset_destroy(&visited_dp);
> +}
> +
>  static void
>  bfd_calculate_chassis(struct controller_ctx *ctx,
>                        const struct sbrec_chassis *our_chassis,
> @@ -155,24 +237,8 @@ bfd_calculate_chassis(struct controller_ctx *ctx,
>              }
>          }
>          if (our_chassis_is_gw_for_dp) {
> -            for (size_t i = 0; i < dp->n_peer_dps; i++) {
> -                const struct sbrec_datapath_binding *pdp =
> dp->peer_dps[i];
> -                if (!pdp) {
> -                    continue;
> -                }
> -
> -                sbrec_port_binding_index_set_datapath(lpval, pdp);
> -                SBREC_PORT_BINDING_FOR_EACH_EQUAL (pb, &cursor, lpval) {
> -                    if (pb->chassis) {
> -                        /* Gateway node has to enable bfd to all nodes
> hosting
> -                         * connected network ports */
> -                        const char *chassis_name = pb->chassis->name;
> -                        if (chassis_name) {
> -                            sset_add(bfd_chassis, chassis_name);
> -                        }
> -                    }
> -                }
> -            }
> +            bfd_travel_gw_related_chassis(dp, local_datapaths, &cursor,
> +                                          lpval, bfd_chassis);
>          }
>      }
>      sbrec_port_binding_index_destroy_row(lpval);
> --
> 1.8.3.1
>
>
Gao Zhenyu Aug. 18, 2017, 1:24 a.m. UTC | #2
Thanks for the suggestion.  A testcase would be add in ovn testings. But I
am not familiar with ovn test and busy on other stuff now.
Since this issue affects many consumers who try to use HA gateways. I
prefer to push this fix in ovs master first, then we have more time to make
a testcase for it.

Thanks
Zhenyu Gao

2017-08-18 1:21 GMT+08:00 Anil Venkata <anilvenkata@redhat.com>:

> Hi Zhenyu Gao
>
> Is it possible for you to add a test case for this scenario. This test on
> the master code( without your patch) should fail, and your patch should
> make the test pass.
>
> Thanks
> Anil
>
> On Wed, Aug 16, 2017 at 12:17 PM, Zhenyu Gao <sysugaozhenyu@gmail.com>
> wrote:
>
>> The bfd_calculate_chassis function calculates gateway's peer
>> datapaths to figure our which chassis should be add in bfd_chassis.
>> But in most circumstance, a gateway's peer datapath has NO chassis.
>> So it may disable BFD on some tunnel ports. Then a port on a remote
>> ovs cannot send packet out because it believes all remote gateway are
>> down.
>>
>> This patch will go though whole graph and visit all datapath's port
>> which has connect with gateway.
>>
>> Signed-off-by: Zhenyu Gao <sysugaozhenyu@gmail.com>
>> ---
>>  ovn/controller/bfd.c | 102 ++++++++++++++++++++++++++++++
>> ++++++++++++---------
>>  1 file changed, 84 insertions(+), 18 deletions(-)
>>
>> diff --git a/ovn/controller/bfd.c b/ovn/controller/bfd.c
>> index d1448b1..dccfc98 100644
>> --- a/ovn/controller/bfd.c
>> +++ b/ovn/controller/bfd.c
>> @@ -100,6 +100,88 @@ bfd_calculate_active_tunnels(const struct
>> ovsrec_bridge *br_int,
>>      }
>>  }
>>
>> +struct local_datapath_node {
>> +    struct ovs_list node;
>> +    struct local_datapath *dp;
>> +};
>> +
>> +static void
>> +bfd_travel_gw_related_chassis(struct local_datapath *dp,
>> +                              const struct hmap *local_datapaths,
>> +                              struct ovsdb_idl_index_cursor *cursor,
>> +                              struct sbrec_port_binding *lpval,
>> +                              struct sset *bfd_chassis)
>> +{
>> +    struct ovs_list dp_list;
>> +    const struct sbrec_port_binding *pb;
>> +    struct sset visited_dp = SSET_INITIALIZER(&visited_dp);
>> +    const char *dp_key;
>> +    struct local_datapath_node *dp_binding;
>> +
>> +    if (!(dp_key = smap_get(&dp->datapath->external_ids,
>> "logical-router")) &&
>> +        !(dp_key = smap_get(&dp->datapath->external_ids,
>> "logical-switch"))) {
>> +        VLOG_INFO("datapath has no uuid, cannot travel graph");
>> +        return;
>> +    }
>> +
>> +    sset_add(&visited_dp, dp_key);
>> +
>> +    ovs_list_init(&dp_list);
>> +    dp_binding = xmalloc(sizeof *dp_binding);
>> +    dp_binding->dp = dp;
>> +    ovs_list_push_back(&dp_list, &dp_binding->node);
>> +
>> +    /*
>> +     * Go through whole graph to figure out all chassis which may deliver
>> +     * packetsto gateway. */
>> +    do {
>> +        dp_binding = CONTAINER_OF(ovs_list_pop_front(&dp_list),
>> +                                  struct local_datapath_node, node);
>> +        dp = dp_binding->dp;
>> +        free(dp_binding);
>> +        for (size_t i = 0; i < dp->n_peer_dps; i++) {
>> +            const struct sbrec_datapath_binding *pdp = dp->peer_dps[i];
>> +            if (!pdp) {
>> +                continue;
>> +            }
>> +
>> +            if (!(dp_key = smap_get(&pdp->external_ids,
>> "logical-router")) &&
>> +                !(dp_key = smap_get(&pdp->external_ids,
>> "logical-switch"))) {
>> +                continue;
>> +            }
>> +
>> +            if (sset_contains(&visited_dp, dp_key)) {
>> +                continue;
>> +            }
>> +
>> +            sset_add(&visited_dp, dp_key);
>> +
>> +            struct hmap_node *node = hmap_first_with_hash(local_dat
>> apaths,
>> +
>> pdp->tunnel_key);
>> +            if (!node) {
>> +                continue;
>> +            }
>> +
>> +            dp_binding = xmalloc(sizeof *dp_binding);
>> +            dp_binding->dp = CONTAINER_OF(node, struct local_datapath,
>> +                                          hmap_node);
>> +            ovs_list_push_back(&dp_list, &dp_binding->node);
>> +
>> +            sbrec_port_binding_index_set_datapath(lpval, pdp);
>> +            SBREC_PORT_BINDING_FOR_EACH_EQUAL (pb, cursor, lpval) {
>> +                if (pb->chassis) {
>> +                    const char *chassis_name = pb->chassis->name;
>> +                    if (chassis_name) {
>> +                        sset_add(bfd_chassis, chassis_name);
>> +                    }
>> +                }
>> +            }
>> +        }
>> +    } while (!ovs_list_is_empty(&dp_list));
>> +
>> +    sset_destroy(&visited_dp);
>> +}
>> +
>>  static void
>>  bfd_calculate_chassis(struct controller_ctx *ctx,
>>                        const struct sbrec_chassis *our_chassis,
>> @@ -155,24 +237,8 @@ bfd_calculate_chassis(struct controller_ctx *ctx,
>>              }
>>          }
>>          if (our_chassis_is_gw_for_dp) {
>> -            for (size_t i = 0; i < dp->n_peer_dps; i++) {
>> -                const struct sbrec_datapath_binding *pdp =
>> dp->peer_dps[i];
>> -                if (!pdp) {
>> -                    continue;
>> -                }
>> -
>> -                sbrec_port_binding_index_set_datapath(lpval, pdp);
>> -                SBREC_PORT_BINDING_FOR_EACH_EQUAL (pb, &cursor, lpval) {
>> -                    if (pb->chassis) {
>> -                        /* Gateway node has to enable bfd to all nodes
>> hosting
>> -                         * connected network ports */
>> -                        const char *chassis_name = pb->chassis->name;
>> -                        if (chassis_name) {
>> -                            sset_add(bfd_chassis, chassis_name);
>> -                        }
>> -                    }
>> -                }
>> -            }
>> +            bfd_travel_gw_related_chassis(dp, local_datapaths, &cursor,
>> +                                          lpval, bfd_chassis);
>>          }
>>      }
>>      sbrec_port_binding_index_destroy_row(lpval);
>> --
>> 1.8.3.1
>>
>>
>
Miguel Angel Ajo Aug. 18, 2017, 6:55 a.m. UTC | #3
On Fri, Aug 18, 2017 at 3:24 AM, Gao Zhenyu <sysugaozhenyu@gmail.com> wrote:

> Thanks for the suggestion.  A testcase would be add in ovn testings. But I
> am not familiar with ovn test and busy on other stuff now.
> Since this issue affects many consumers who try to use HA gateways. I
> prefer to push this fix in ovs master first, then we have more time to make
> a testcase for it.
>


I know your feeling Gao, but, IMO it's better if we put the tests and code
together, it's has a couple of benefits:

1) Your expected behaviour can more easily be understood from the patch
itself.

2) Your behaviour changes are locked in, because if later patches break
them the test will fail.


Anil, I'll be away for the next two weeks, but could you give Gao a hand to
write the tests quickly?

Then we need to backport this to 2.8.0


>
> Thanks
> Zhenyu Gao
>
> 2017-08-18 1:21 GMT+08:00 Anil Venkata <anilvenkata@redhat.com>:
>
>> Hi Zhenyu Gao
>>
>> Is it possible for you to add a test case for this scenario. This test on
>> the master code( without your patch) should fail, and your patch should
>> make the test pass.
>>
>> Thanks
>> Anil
>>
>> On Wed, Aug 16, 2017 at 12:17 PM, Zhenyu Gao <sysugaozhenyu@gmail.com>
>> wrote:
>>
>>> The bfd_calculate_chassis function calculates gateway's peer
>>> datapaths to figure our which chassis should be add in bfd_chassis.
>>> But in most circumstance, a gateway's peer datapath has NO chassis.
>>> So it may disable BFD on some tunnel ports. Then a port on a remote
>>> ovs cannot send packet out because it believes all remote gateway are
>>> down.
>>>
>>> This patch will go though whole graph and visit all datapath's port
>>> which has connect with gateway.
>>>
>>> Signed-off-by: Zhenyu Gao <sysugaozhenyu@gmail.com>
>>> ---
>>>  ovn/controller/bfd.c | 102 ++++++++++++++++++++++++++++++
>>> ++++++++++++---------
>>>  1 file changed, 84 insertions(+), 18 deletions(-)
>>>
>>> diff --git a/ovn/controller/bfd.c b/ovn/controller/bfd.c
>>> index d1448b1..dccfc98 100644
>>> --- a/ovn/controller/bfd.c
>>> +++ b/ovn/controller/bfd.c
>>> @@ -100,6 +100,88 @@ bfd_calculate_active_tunnels(const struct
>>> ovsrec_bridge *br_int,
>>>      }
>>>  }
>>>
>>> +struct local_datapath_node {
>>> +    struct ovs_list node;
>>> +    struct local_datapath *dp;
>>> +};
>>> +
>>> +static void
>>> +bfd_travel_gw_related_chassis(struct local_datapath *dp,
>>> +                              const struct hmap *local_datapaths,
>>> +                              struct ovsdb_idl_index_cursor *cursor,
>>> +                              struct sbrec_port_binding *lpval,
>>> +                              struct sset *bfd_chassis)
>>> +{
>>> +    struct ovs_list dp_list;
>>> +    const struct sbrec_port_binding *pb;
>>> +    struct sset visited_dp = SSET_INITIALIZER(&visited_dp);
>>> +    const char *dp_key;
>>> +    struct local_datapath_node *dp_binding;
>>> +
>>> +    if (!(dp_key = smap_get(&dp->datapath->external_ids,
>>> "logical-router")) &&
>>> +        !(dp_key = smap_get(&dp->datapath->external_ids,
>>> "logical-switch"))) {
>>> +        VLOG_INFO("datapath has no uuid, cannot travel graph");
>>> +        return;
>>> +    }
>>> +
>>> +    sset_add(&visited_dp, dp_key);
>>> +
>>> +    ovs_list_init(&dp_list);
>>> +    dp_binding = xmalloc(sizeof *dp_binding);
>>> +    dp_binding->dp = dp;
>>> +    ovs_list_push_back(&dp_list, &dp_binding->node);
>>> +
>>> +    /*
>>> +     * Go through whole graph to figure out all chassis which may
>>> deliver
>>> +     * packetsto gateway. */
>>> +    do {
>>> +        dp_binding = CONTAINER_OF(ovs_list_pop_front(&dp_list),
>>> +                                  struct local_datapath_node, node);
>>> +        dp = dp_binding->dp;
>>> +        free(dp_binding);
>>> +        for (size_t i = 0; i < dp->n_peer_dps; i++) {
>>> +            const struct sbrec_datapath_binding *pdp = dp->peer_dps[i];
>>> +            if (!pdp) {
>>> +                continue;
>>> +            }
>>> +
>>> +            if (!(dp_key = smap_get(&pdp->external_ids,
>>> "logical-router")) &&
>>> +                !(dp_key = smap_get(&pdp->external_ids,
>>> "logical-switch"))) {
>>> +                continue;
>>> +            }
>>> +
>>> +            if (sset_contains(&visited_dp, dp_key)) {
>>> +                continue;
>>> +            }
>>> +
>>> +            sset_add(&visited_dp, dp_key);
>>> +
>>> +            struct hmap_node *node = hmap_first_with_hash(local_dat
>>> apaths,
>>> +
>>> pdp->tunnel_key);
>>> +            if (!node) {
>>> +                continue;
>>> +            }
>>> +
>>> +            dp_binding = xmalloc(sizeof *dp_binding);
>>> +            dp_binding->dp = CONTAINER_OF(node, struct local_datapath,
>>> +                                          hmap_node);
>>> +            ovs_list_push_back(&dp_list, &dp_binding->node);
>>> +
>>> +            sbrec_port_binding_index_set_datapath(lpval, pdp);
>>> +            SBREC_PORT_BINDING_FOR_EACH_EQUAL (pb, cursor, lpval) {
>>> +                if (pb->chassis) {
>>> +                    const char *chassis_name = pb->chassis->name;
>>> +                    if (chassis_name) {
>>> +                        sset_add(bfd_chassis, chassis_name);
>>> +                    }
>>> +                }
>>> +            }
>>> +        }
>>> +    } while (!ovs_list_is_empty(&dp_list));
>>> +
>>> +    sset_destroy(&visited_dp);
>>> +}
>>> +
>>>  static void
>>>  bfd_calculate_chassis(struct controller_ctx *ctx,
>>>                        const struct sbrec_chassis *our_chassis,
>>> @@ -155,24 +237,8 @@ bfd_calculate_chassis(struct controller_ctx *ctx,
>>>              }
>>>          }
>>>          if (our_chassis_is_gw_for_dp) {
>>> -            for (size_t i = 0; i < dp->n_peer_dps; i++) {
>>> -                const struct sbrec_datapath_binding *pdp =
>>> dp->peer_dps[i];
>>> -                if (!pdp) {
>>> -                    continue;
>>> -                }
>>> -
>>> -                sbrec_port_binding_index_set_datapath(lpval, pdp);
>>> -                SBREC_PORT_BINDING_FOR_EACH_EQUAL (pb, &cursor, lpval)
>>> {
>>> -                    if (pb->chassis) {
>>> -                        /* Gateway node has to enable bfd to all nodes
>>> hosting
>>> -                         * connected network ports */
>>> -                        const char *chassis_name = pb->chassis->name;
>>> -                        if (chassis_name) {
>>> -                            sset_add(bfd_chassis, chassis_name);
>>> -                        }
>>> -                    }
>>> -                }
>>> -            }
>>> +            bfd_travel_gw_related_chassis(dp, local_datapaths, &cursor,
>>> +                                          lpval, bfd_chassis);
>>>          }
>>>      }
>>>      sbrec_port_binding_index_destroy_row(lpval);
>>> --
>>> 1.8.3.1
>>>
>>>
>>
>
Gao Zhenyu Aug. 18, 2017, 7:15 a.m. UTC | #4
Thanks for the comments.
OK, I see. I will revise it and add testing in ovn tests, but it may be in
next week.

Thanks
Zhenyu Gao

2017-08-18 14:55 GMT+08:00 Miguel Angel Ajo Pelayo <majopela@redhat.com>:

>
>
> On Fri, Aug 18, 2017 at 3:24 AM, Gao Zhenyu <sysugaozhenyu@gmail.com>
> wrote:
>
>> Thanks for the suggestion.  A testcase would be add in ovn testings. But
>> I am not familiar with ovn test and busy on other stuff now.
>> Since this issue affects many consumers who try to use HA gateways. I
>> prefer to push this fix in ovs master first, then we have more time to make
>> a testcase for it.
>>
>
>
> I know your feeling Gao, but, IMO it's better if we put the tests and code
> together, it's has a couple of benefits:
>
> 1) Your expected behaviour can more easily be understood from the patch
> itself.
>
> 2) Your behaviour changes are locked in, because if later patches break
> them the test will fail.
>
>
> Anil, I'll be away for the next two weeks, but could you give Gao a hand
> to write the tests quickly?
>
> Then we need to backport this to 2.8.0
>
>
>>
>> Thanks
>> Zhenyu Gao
>>
>> 2017-08-18 1:21 GMT+08:00 Anil Venkata <anilvenkata@redhat.com>:
>>
>>> Hi Zhenyu Gao
>>>
>>> Is it possible for you to add a test case for this scenario. This test
>>> on the master code( without your patch) should fail, and your patch should
>>> make the test pass.
>>>
>>> Thanks
>>> Anil
>>>
>>> On Wed, Aug 16, 2017 at 12:17 PM, Zhenyu Gao <sysugaozhenyu@gmail.com>
>>> wrote:
>>>
>>>> The bfd_calculate_chassis function calculates gateway's peer
>>>> datapaths to figure our which chassis should be add in bfd_chassis.
>>>> But in most circumstance, a gateway's peer datapath has NO chassis.
>>>> So it may disable BFD on some tunnel ports. Then a port on a remote
>>>> ovs cannot send packet out because it believes all remote gateway are
>>>> down.
>>>>
>>>> This patch will go though whole graph and visit all datapath's port
>>>> which has connect with gateway.
>>>>
>>>> Signed-off-by: Zhenyu Gao <sysugaozhenyu@gmail.com>
>>>> ---
>>>>  ovn/controller/bfd.c | 102 ++++++++++++++++++++++++++++++
>>>> ++++++++++++---------
>>>>  1 file changed, 84 insertions(+), 18 deletions(-)
>>>>
>>>> diff --git a/ovn/controller/bfd.c b/ovn/controller/bfd.c
>>>> index d1448b1..dccfc98 100644
>>>> --- a/ovn/controller/bfd.c
>>>> +++ b/ovn/controller/bfd.c
>>>> @@ -100,6 +100,88 @@ bfd_calculate_active_tunnels(const struct
>>>> ovsrec_bridge *br_int,
>>>>      }
>>>>  }
>>>>
>>>> +struct local_datapath_node {
>>>> +    struct ovs_list node;
>>>> +    struct local_datapath *dp;
>>>> +};
>>>> +
>>>> +static void
>>>> +bfd_travel_gw_related_chassis(struct local_datapath *dp,
>>>> +                              const struct hmap *local_datapaths,
>>>> +                              struct ovsdb_idl_index_cursor *cursor,
>>>> +                              struct sbrec_port_binding *lpval,
>>>> +                              struct sset *bfd_chassis)
>>>> +{
>>>> +    struct ovs_list dp_list;
>>>> +    const struct sbrec_port_binding *pb;
>>>> +    struct sset visited_dp = SSET_INITIALIZER(&visited_dp);
>>>> +    const char *dp_key;
>>>> +    struct local_datapath_node *dp_binding;
>>>> +
>>>> +    if (!(dp_key = smap_get(&dp->datapath->external_ids,
>>>> "logical-router")) &&
>>>> +        !(dp_key = smap_get(&dp->datapath->external_ids,
>>>> "logical-switch"))) {
>>>> +        VLOG_INFO("datapath has no uuid, cannot travel graph");
>>>> +        return;
>>>> +    }
>>>> +
>>>> +    sset_add(&visited_dp, dp_key);
>>>> +
>>>> +    ovs_list_init(&dp_list);
>>>> +    dp_binding = xmalloc(sizeof *dp_binding);
>>>> +    dp_binding->dp = dp;
>>>> +    ovs_list_push_back(&dp_list, &dp_binding->node);
>>>> +
>>>> +    /*
>>>> +     * Go through whole graph to figure out all chassis which may
>>>> deliver
>>>> +     * packetsto gateway. */
>>>> +    do {
>>>> +        dp_binding = CONTAINER_OF(ovs_list_pop_front(&dp_list),
>>>> +                                  struct local_datapath_node, node);
>>>> +        dp = dp_binding->dp;
>>>> +        free(dp_binding);
>>>> +        for (size_t i = 0; i < dp->n_peer_dps; i++) {
>>>> +            const struct sbrec_datapath_binding *pdp = dp->peer_dps[i];
>>>> +            if (!pdp) {
>>>> +                continue;
>>>> +            }
>>>> +
>>>> +            if (!(dp_key = smap_get(&pdp->external_ids,
>>>> "logical-router")) &&
>>>> +                !(dp_key = smap_get(&pdp->external_ids,
>>>> "logical-switch"))) {
>>>> +                continue;
>>>> +            }
>>>> +
>>>> +            if (sset_contains(&visited_dp, dp_key)) {
>>>> +                continue;
>>>> +            }
>>>> +
>>>> +            sset_add(&visited_dp, dp_key);
>>>> +
>>>> +            struct hmap_node *node = hmap_first_with_hash(local_dat
>>>> apaths,
>>>> +
>>>> pdp->tunnel_key);
>>>> +            if (!node) {
>>>> +                continue;
>>>> +            }
>>>> +
>>>> +            dp_binding = xmalloc(sizeof *dp_binding);
>>>> +            dp_binding->dp = CONTAINER_OF(node, struct local_datapath,
>>>> +                                          hmap_node);
>>>> +            ovs_list_push_back(&dp_list, &dp_binding->node);
>>>> +
>>>> +            sbrec_port_binding_index_set_datapath(lpval, pdp);
>>>> +            SBREC_PORT_BINDING_FOR_EACH_EQUAL (pb, cursor, lpval) {
>>>> +                if (pb->chassis) {
>>>> +                    const char *chassis_name = pb->chassis->name;
>>>> +                    if (chassis_name) {
>>>> +                        sset_add(bfd_chassis, chassis_name);
>>>> +                    }
>>>> +                }
>>>> +            }
>>>> +        }
>>>> +    } while (!ovs_list_is_empty(&dp_list));
>>>> +
>>>> +    sset_destroy(&visited_dp);
>>>> +}
>>>> +
>>>>  static void
>>>>  bfd_calculate_chassis(struct controller_ctx *ctx,
>>>>                        const struct sbrec_chassis *our_chassis,
>>>> @@ -155,24 +237,8 @@ bfd_calculate_chassis(struct controller_ctx *ctx,
>>>>              }
>>>>          }
>>>>          if (our_chassis_is_gw_for_dp) {
>>>> -            for (size_t i = 0; i < dp->n_peer_dps; i++) {
>>>> -                const struct sbrec_datapath_binding *pdp =
>>>> dp->peer_dps[i];
>>>> -                if (!pdp) {
>>>> -                    continue;
>>>> -                }
>>>> -
>>>> -                sbrec_port_binding_index_set_datapath(lpval, pdp);
>>>> -                SBREC_PORT_BINDING_FOR_EACH_EQUAL (pb, &cursor,
>>>> lpval) {
>>>> -                    if (pb->chassis) {
>>>> -                        /* Gateway node has to enable bfd to all nodes
>>>> hosting
>>>> -                         * connected network ports */
>>>> -                        const char *chassis_name = pb->chassis->name;
>>>> -                        if (chassis_name) {
>>>> -                            sset_add(bfd_chassis, chassis_name);
>>>> -                        }
>>>> -                    }
>>>> -                }
>>>> -            }
>>>> +            bfd_travel_gw_related_chassis(dp, local_datapaths,
>>>> &cursor,
>>>> +                                          lpval, bfd_chassis);
>>>>          }
>>>>      }
>>>>      sbrec_port_binding_index_destroy_row(lpval);
>>>> --
>>>> 1.8.3.1
>>>>
>>>>
>>>
>>
>
Gao Zhenyu Aug. 20, 2017, 2:40 p.m. UTC | #5
Please take a look at version 2: https://patchwork.ozlabs.org/patch/803731/
:)

Thanks
Zhenyu Gao

2017-08-18 15:15 GMT+08:00 Gao Zhenyu <sysugaozhenyu@gmail.com>:

> Thanks for the comments.
> OK, I see. I will revise it and add testing in ovn tests, but it may be in
> next week.
>
> Thanks
> Zhenyu Gao
>
> 2017-08-18 14:55 GMT+08:00 Miguel Angel Ajo Pelayo <majopela@redhat.com>:
>
>>
>>
>> On Fri, Aug 18, 2017 at 3:24 AM, Gao Zhenyu <sysugaozhenyu@gmail.com>
>> wrote:
>>
>>> Thanks for the suggestion.  A testcase would be add in ovn testings. But
>>> I am not familiar with ovn test and busy on other stuff now.
>>> Since this issue affects many consumers who try to use HA gateways. I
>>> prefer to push this fix in ovs master first, then we have more time to make
>>> a testcase for it.
>>>
>>
>>
>> I know your feeling Gao, but, IMO it's better if we put the tests and
>> code together, it's has a couple of benefits:
>>
>> 1) Your expected behaviour can more easily be understood from the patch
>> itself.
>>
>> 2) Your behaviour changes are locked in, because if later patches break
>> them the test will fail.
>>
>>
>> Anil, I'll be away for the next two weeks, but could you give Gao a hand
>> to write the tests quickly?
>>
>> Then we need to backport this to 2.8.0
>>
>>
>>>
>>> Thanks
>>> Zhenyu Gao
>>>
>>> 2017-08-18 1:21 GMT+08:00 Anil Venkata <anilvenkata@redhat.com>:
>>>
>>>> Hi Zhenyu Gao
>>>>
>>>> Is it possible for you to add a test case for this scenario. This test
>>>> on the master code( without your patch) should fail, and your patch should
>>>> make the test pass.
>>>>
>>>> Thanks
>>>> Anil
>>>>
>>>> On Wed, Aug 16, 2017 at 12:17 PM, Zhenyu Gao <sysugaozhenyu@gmail.com>
>>>> wrote:
>>>>
>>>>> The bfd_calculate_chassis function calculates gateway's peer
>>>>> datapaths to figure our which chassis should be add in bfd_chassis.
>>>>> But in most circumstance, a gateway's peer datapath has NO chassis.
>>>>> So it may disable BFD on some tunnel ports. Then a port on a remote
>>>>> ovs cannot send packet out because it believes all remote gateway are
>>>>> down.
>>>>>
>>>>> This patch will go though whole graph and visit all datapath's port
>>>>> which has connect with gateway.
>>>>>
>>>>> Signed-off-by: Zhenyu Gao <sysugaozhenyu@gmail.com>
>>>>> ---
>>>>>  ovn/controller/bfd.c | 102 ++++++++++++++++++++++++++++++
>>>>> ++++++++++++---------
>>>>>  1 file changed, 84 insertions(+), 18 deletions(-)
>>>>>
>>>>> diff --git a/ovn/controller/bfd.c b/ovn/controller/bfd.c
>>>>> index d1448b1..dccfc98 100644
>>>>> --- a/ovn/controller/bfd.c
>>>>> +++ b/ovn/controller/bfd.c
>>>>> @@ -100,6 +100,88 @@ bfd_calculate_active_tunnels(const struct
>>>>> ovsrec_bridge *br_int,
>>>>>      }
>>>>>  }
>>>>>
>>>>> +struct local_datapath_node {
>>>>> +    struct ovs_list node;
>>>>> +    struct local_datapath *dp;
>>>>> +};
>>>>> +
>>>>> +static void
>>>>> +bfd_travel_gw_related_chassis(struct local_datapath *dp,
>>>>> +                              const struct hmap *local_datapaths,
>>>>> +                              struct ovsdb_idl_index_cursor *cursor,
>>>>> +                              struct sbrec_port_binding *lpval,
>>>>> +                              struct sset *bfd_chassis)
>>>>> +{
>>>>> +    struct ovs_list dp_list;
>>>>> +    const struct sbrec_port_binding *pb;
>>>>> +    struct sset visited_dp = SSET_INITIALIZER(&visited_dp);
>>>>> +    const char *dp_key;
>>>>> +    struct local_datapath_node *dp_binding;
>>>>> +
>>>>> +    if (!(dp_key = smap_get(&dp->datapath->external_ids,
>>>>> "logical-router")) &&
>>>>> +        !(dp_key = smap_get(&dp->datapath->external_ids,
>>>>> "logical-switch"))) {
>>>>> +        VLOG_INFO("datapath has no uuid, cannot travel graph");
>>>>> +        return;
>>>>> +    }
>>>>> +
>>>>> +    sset_add(&visited_dp, dp_key);
>>>>> +
>>>>> +    ovs_list_init(&dp_list);
>>>>> +    dp_binding = xmalloc(sizeof *dp_binding);
>>>>> +    dp_binding->dp = dp;
>>>>> +    ovs_list_push_back(&dp_list, &dp_binding->node);
>>>>> +
>>>>> +    /*
>>>>> +     * Go through whole graph to figure out all chassis which may
>>>>> deliver
>>>>> +     * packetsto gateway. */
>>>>> +    do {
>>>>> +        dp_binding = CONTAINER_OF(ovs_list_pop_front(&dp_list),
>>>>> +                                  struct local_datapath_node, node);
>>>>> +        dp = dp_binding->dp;
>>>>> +        free(dp_binding);
>>>>> +        for (size_t i = 0; i < dp->n_peer_dps; i++) {
>>>>> +            const struct sbrec_datapath_binding *pdp =
>>>>> dp->peer_dps[i];
>>>>> +            if (!pdp) {
>>>>> +                continue;
>>>>> +            }
>>>>> +
>>>>> +            if (!(dp_key = smap_get(&pdp->external_ids,
>>>>> "logical-router")) &&
>>>>> +                !(dp_key = smap_get(&pdp->external_ids,
>>>>> "logical-switch"))) {
>>>>> +                continue;
>>>>> +            }
>>>>> +
>>>>> +            if (sset_contains(&visited_dp, dp_key)) {
>>>>> +                continue;
>>>>> +            }
>>>>> +
>>>>> +            sset_add(&visited_dp, dp_key);
>>>>> +
>>>>> +            struct hmap_node *node = hmap_first_with_hash(local_dat
>>>>> apaths,
>>>>> +
>>>>> pdp->tunnel_key);
>>>>> +            if (!node) {
>>>>> +                continue;
>>>>> +            }
>>>>> +
>>>>> +            dp_binding = xmalloc(sizeof *dp_binding);
>>>>> +            dp_binding->dp = CONTAINER_OF(node, struct local_datapath,
>>>>> +                                          hmap_node);
>>>>> +            ovs_list_push_back(&dp_list, &dp_binding->node);
>>>>> +
>>>>> +            sbrec_port_binding_index_set_datapath(lpval, pdp);
>>>>> +            SBREC_PORT_BINDING_FOR_EACH_EQUAL (pb, cursor, lpval) {
>>>>> +                if (pb->chassis) {
>>>>> +                    const char *chassis_name = pb->chassis->name;
>>>>> +                    if (chassis_name) {
>>>>> +                        sset_add(bfd_chassis, chassis_name);
>>>>> +                    }
>>>>> +                }
>>>>> +            }
>>>>> +        }
>>>>> +    } while (!ovs_list_is_empty(&dp_list));
>>>>> +
>>>>> +    sset_destroy(&visited_dp);
>>>>> +}
>>>>> +
>>>>>  static void
>>>>>  bfd_calculate_chassis(struct controller_ctx *ctx,
>>>>>                        const struct sbrec_chassis *our_chassis,
>>>>> @@ -155,24 +237,8 @@ bfd_calculate_chassis(struct controller_ctx *ctx,
>>>>>              }
>>>>>          }
>>>>>          if (our_chassis_is_gw_for_dp) {
>>>>> -            for (size_t i = 0; i < dp->n_peer_dps; i++) {
>>>>> -                const struct sbrec_datapath_binding *pdp =
>>>>> dp->peer_dps[i];
>>>>> -                if (!pdp) {
>>>>> -                    continue;
>>>>> -                }
>>>>> -
>>>>> -                sbrec_port_binding_index_set_datapath(lpval, pdp);
>>>>> -                SBREC_PORT_BINDING_FOR_EACH_EQUAL (pb, &cursor,
>>>>> lpval) {
>>>>> -                    if (pb->chassis) {
>>>>> -                        /* Gateway node has to enable bfd to all
>>>>> nodes hosting
>>>>> -                         * connected network ports */
>>>>> -                        const char *chassis_name = pb->chassis->name;
>>>>> -                        if (chassis_name) {
>>>>> -                            sset_add(bfd_chassis, chassis_name);
>>>>> -                        }
>>>>> -                    }
>>>>> -                }
>>>>> -            }
>>>>> +            bfd_travel_gw_related_chassis(dp, local_datapaths,
>>>>> &cursor,
>>>>> +                                          lpval, bfd_chassis);
>>>>>          }
>>>>>      }
>>>>>      sbrec_port_binding_index_destroy_row(lpval);
>>>>> --
>>>>> 1.8.3.1
>>>>>
>>>>>
>>>>
>>>
>>
>
diff mbox

Patch

diff --git a/ovn/controller/bfd.c b/ovn/controller/bfd.c
index d1448b1..dccfc98 100644
--- a/ovn/controller/bfd.c
+++ b/ovn/controller/bfd.c
@@ -100,6 +100,88 @@  bfd_calculate_active_tunnels(const struct ovsrec_bridge *br_int,
     }
 }
 
+struct local_datapath_node {
+    struct ovs_list node;
+    struct local_datapath *dp;
+};
+
+static void
+bfd_travel_gw_related_chassis(struct local_datapath *dp,
+                              const struct hmap *local_datapaths,
+                              struct ovsdb_idl_index_cursor *cursor,
+                              struct sbrec_port_binding *lpval,
+                              struct sset *bfd_chassis)
+{
+    struct ovs_list dp_list;
+    const struct sbrec_port_binding *pb;
+    struct sset visited_dp = SSET_INITIALIZER(&visited_dp);
+    const char *dp_key;
+    struct local_datapath_node *dp_binding;
+
+    if (!(dp_key = smap_get(&dp->datapath->external_ids, "logical-router")) &&
+        !(dp_key = smap_get(&dp->datapath->external_ids, "logical-switch"))) {
+        VLOG_INFO("datapath has no uuid, cannot travel graph");
+        return;
+    }
+
+    sset_add(&visited_dp, dp_key);
+
+    ovs_list_init(&dp_list);
+    dp_binding = xmalloc(sizeof *dp_binding);
+    dp_binding->dp = dp;
+    ovs_list_push_back(&dp_list, &dp_binding->node);
+
+    /*
+     * Go through whole graph to figure out all chassis which may deliver
+     * packetsto gateway. */
+    do {
+        dp_binding = CONTAINER_OF(ovs_list_pop_front(&dp_list),
+                                  struct local_datapath_node, node);
+        dp = dp_binding->dp;
+        free(dp_binding);
+        for (size_t i = 0; i < dp->n_peer_dps; i++) {
+            const struct sbrec_datapath_binding *pdp = dp->peer_dps[i];
+            if (!pdp) {
+                continue;
+            }
+
+            if (!(dp_key = smap_get(&pdp->external_ids, "logical-router")) &&
+                !(dp_key = smap_get(&pdp->external_ids, "logical-switch"))) {
+                continue;
+            }
+
+            if (sset_contains(&visited_dp, dp_key)) {
+                continue;
+            }
+
+            sset_add(&visited_dp, dp_key);
+
+            struct hmap_node *node = hmap_first_with_hash(local_datapaths,
+                                                          pdp->tunnel_key);
+            if (!node) {
+                continue;
+            }
+
+            dp_binding = xmalloc(sizeof *dp_binding);
+            dp_binding->dp = CONTAINER_OF(node, struct local_datapath,
+                                          hmap_node);
+            ovs_list_push_back(&dp_list, &dp_binding->node);
+
+            sbrec_port_binding_index_set_datapath(lpval, pdp);
+            SBREC_PORT_BINDING_FOR_EACH_EQUAL (pb, cursor, lpval) {
+                if (pb->chassis) {
+                    const char *chassis_name = pb->chassis->name;
+                    if (chassis_name) {
+                        sset_add(bfd_chassis, chassis_name);
+                    }
+                }
+            }
+        }
+    } while (!ovs_list_is_empty(&dp_list));
+
+    sset_destroy(&visited_dp);
+}
+
 static void
 bfd_calculate_chassis(struct controller_ctx *ctx,
                       const struct sbrec_chassis *our_chassis,
@@ -155,24 +237,8 @@  bfd_calculate_chassis(struct controller_ctx *ctx,
             }
         }
         if (our_chassis_is_gw_for_dp) {
-            for (size_t i = 0; i < dp->n_peer_dps; i++) {
-                const struct sbrec_datapath_binding *pdp = dp->peer_dps[i];
-                if (!pdp) {
-                    continue;
-                }
-
-                sbrec_port_binding_index_set_datapath(lpval, pdp);
-                SBREC_PORT_BINDING_FOR_EACH_EQUAL (pb, &cursor, lpval) {
-                    if (pb->chassis) {
-                        /* Gateway node has to enable bfd to all nodes hosting
-                         * connected network ports */
-                        const char *chassis_name = pb->chassis->name;
-                        if (chassis_name) {
-                            sset_add(bfd_chassis, chassis_name);
-                        }
-                    }
-                }
-            }
+            bfd_travel_gw_related_chassis(dp, local_datapaths, &cursor,
+                                          lpval, bfd_chassis);
         }
     }
     sbrec_port_binding_index_destroy_row(lpval);