diff mbox series

[ovs-dev,v8,2/5] northd: make connected routes have higher priority than static

Message ID 20211113094353.17690-3-odivlad@gmail.com
State Superseded, archived
Headers show
Series Add multiple routing tables support to Logical Routers | expand

Checks

Context Check Description
ovsrobot/apply-robot success apply and check: success
ovsrobot/github-robot-_Build_and_Test success github build: passed
ovsrobot/github-robot-_ovn-kubernetes success github build: passed

Commit Message

Vladislav Odintsov Nov. 13, 2021, 9:43 a.m. UTC
With this patch routes to connected networks have higher
priority than static routes with same ip_prefix.

This brings commonly-used behaviour for routes lookup order:
1: longest prefix match
2: metric

The metric has next lookup order:
1: connected routes
2: static routes

Earlier static and connected routes with same ip_prefix had
the same priority, so it was impossible to predict which one
is used for routing decision.

Each route's prefix length has its own 'slot' in lflow prios.
Now prefix length space is calculated using next information:
to calculate route's priority prefixlen multiplied by 3
+ route origin offset (0 - source-based route; 1 - directly-
connected route; 2 - static route).

Also, enlarge prio for generic records in lr_in_ip_routing stage
by 10000.

Signed-off-by: Vladislav Odintsov <odivlad@gmail.com>
---
 northd/northd.c         | 50 ++++++++++++++++++++++++++++-------------
 northd/ovn-northd.8.xml | 12 +++++-----
 tests/ovn-northd.at     |  8 +++----
 3 files changed, 45 insertions(+), 25 deletions(-)

Comments

Numan Siddique Nov. 17, 2021, 2:17 a.m. UTC | #1
On Sat, Nov 13, 2021 at 4:44 AM Vladislav Odintsov <odivlad@gmail.com> wrote:
>
> With this patch routes to connected networks have higher
> priority than static routes with same ip_prefix.
>
> This brings commonly-used behaviour for routes lookup order:
> 1: longest prefix match
> 2: metric
>
> The metric has next lookup order:
> 1: connected routes
> 2: static routes
>
> Earlier static and connected routes with same ip_prefix had
> the same priority, so it was impossible to predict which one
> is used for routing decision.
>
> Each route's prefix length has its own 'slot' in lflow prios.
> Now prefix length space is calculated using next information:
> to calculate route's priority prefixlen multiplied by 3
> + route origin offset (0 - source-based route; 1 - directly-
> connected route; 2 - static route).
>
> Also, enlarge prio for generic records in lr_in_ip_routing stage
> by 10000.
>
> Signed-off-by: Vladislav Odintsov <odivlad@gmail.com>

Hi Vladislav,

Thanks for the patch.  Overall it looks good to me.  I've one comment.
Looks like the documentation updated in ovn-northd.8.xml is not accurate.

This patch modifies the flows in the lr_in_ip_routing stage but this
patch doesn't update the documentation.
Also the patch updates the documentation for the flow in lr_in_defrag
stage, which seems not correct.

Can you please update the documentation accurately ?

Numan

> ---
>  northd/northd.c         | 50 ++++++++++++++++++++++++++++-------------
>  northd/ovn-northd.8.xml | 12 +++++-----
>  tests/ovn-northd.at     |  8 +++----
>  3 files changed, 45 insertions(+), 25 deletions(-)
>
> diff --git a/northd/northd.c b/northd/northd.c
> index 1e8a3457c..0d513f039 100644
> --- a/northd/northd.c
> +++ b/northd/northd.c
> @@ -305,6 +305,15 @@ enum ovn_stage {
>   *
>   */
>
> +/*
> + * Route offsets implement logic to prioritize traffic for routes with
> + * same ip_prefix values:
> + *  -  connected route overrides static one;
> + *  -  static route overrides connected route. */
> +#define ROUTE_PRIO_OFFSET_MULTIPLIER 3
> +#define ROUTE_PRIO_OFFSET_STATIC 1
> +#define ROUTE_PRIO_OFFSET_CONNECTED 2
> +
>  /* Returns an "enum ovn_stage" built from the arguments. */
>  static enum ovn_stage
>  ovn_stage_build(enum ovn_datapath_type dp_type, enum ovn_pipeline pipeline,
> @@ -8782,6 +8791,7 @@ struct ecmp_groups_node {
>      struct in6_addr prefix;
>      unsigned int plen;
>      bool is_src_route;
> +    const char *origin;
>      uint16_t route_count;
>      struct ovs_list route_list; /* Contains ecmp_route_list_node */
>  };
> @@ -8819,6 +8829,7 @@ ecmp_groups_add(struct hmap *ecmp_groups,
>      eg->prefix = route->prefix;
>      eg->plen = route->plen;
>      eg->is_src_route = route->is_src_route;
> +    eg->origin = smap_get_def(&route->route->options, "origin", "");
>      ovs_list_init(&eg->route_list);
>      ecmp_groups_add_route(eg, route);
>
> @@ -8919,19 +8930,20 @@ build_route_prefix_s(const struct in6_addr *prefix, unsigned int plen)
>  static void
>  build_route_match(const struct ovn_port *op_inport, const char *network_s,
>                    int plen, bool is_src_route, bool is_ipv4, struct ds *match,
> -                  uint16_t *priority)
> +                  uint16_t *priority, int ofs)
>  {
>      const char *dir;
>      /* The priority here is calculated to implement longest-prefix-match
>       * routing. */
>      if (is_src_route) {
>          dir = "src";
> -        *priority = plen * 2;
> +        ofs = 0;
>      } else {
>          dir = "dst";
> -        *priority = (plen * 2) + 1;
>      }
>
> +    *priority = (plen * ROUTE_PRIO_OFFSET_MULTIPLIER) + ofs;
> +
>      if (op_inport) {
>          ds_put_format(match, "inport == %s && ", op_inport->json_key);
>      }
> @@ -9073,7 +9085,7 @@ add_ecmp_symmetric_reply_flows(struct hmap *lflows,
>                    out_port->lrp_networks.ea_s,
>                    IN6_IS_ADDR_V4MAPPED(&route->prefix) ? "" : "xx",
>                    port_ip, out_port->json_key);
> -    ovn_lflow_add_with_hint(lflows, od, S_ROUTER_IN_IP_ROUTING, 300,
> +    ovn_lflow_add_with_hint(lflows, od, S_ROUTER_IN_IP_ROUTING, 10300,
>                             ds_cstr(&match), ds_cstr(&actions),
>                             &st_route->header_);
>
> @@ -9103,8 +9115,10 @@ build_ecmp_route_flow(struct hmap *lflows, struct ovn_datapath *od,
>      struct ds route_match = DS_EMPTY_INITIALIZER;
>
>      char *prefix_s = build_route_prefix_s(&eg->prefix, eg->plen);
> +    int ofs = !strcmp(eg->origin, ROUTE_ORIGIN_CONNECTED) ?
> +        ROUTE_PRIO_OFFSET_CONNECTED: ROUTE_PRIO_OFFSET_STATIC;
>      build_route_match(NULL, prefix_s, eg->plen, eg->is_src_route, is_ipv4,
> -                      &route_match, &priority);
> +                      &route_match, &priority, ofs);
>      free(prefix_s);
>
>      struct ds actions = DS_EMPTY_INITIALIZER;
> @@ -9180,7 +9194,7 @@ add_route(struct hmap *lflows, struct ovn_datapath *od,
>            const struct ovn_port *op, const char *lrp_addr_s,
>            const char *network_s, int plen, const char *gateway,
>            bool is_src_route, const struct ovsdb_idl_row *stage_hint,
> -          bool is_discard_route)
> +          bool is_discard_route, int ofs)
>  {
>      bool is_ipv4 = strchr(network_s, '.') ? true : false;
>      struct ds match = DS_EMPTY_INITIALIZER;
> @@ -9196,7 +9210,7 @@ add_route(struct hmap *lflows, struct ovn_datapath *od,
>          }
>      }
>      build_route_match(op_inport, network_s, plen, is_src_route, is_ipv4,
> -                      &match, &priority);
> +                      &match, &priority, ofs);
>
>      struct ds common_actions = DS_EMPTY_INITIALIZER;
>      struct ds actions = DS_EMPTY_INITIALIZER;
> @@ -9256,10 +9270,15 @@ build_static_route_flow(struct hmap *lflows, struct ovn_datapath *od,
>          }
>      }
>
> +    int ofs = !strcmp(smap_get_def(&route->options, "origin", ""),
> +                      ROUTE_ORIGIN_CONNECTED) ? ROUTE_PRIO_OFFSET_CONNECTED
> +                                              : ROUTE_PRIO_OFFSET_STATIC;
> +
>      char *prefix_s = build_route_prefix_s(&route_->prefix, route_->plen);
>      add_route(lflows, route_->is_discard_route ? od : out_port->od, out_port,
>                lrp_addr_s, prefix_s, route_->plen, route->nexthop,
> -              route_->is_src_route, &route->header_, route_->is_discard_route);
> +              route_->is_src_route, &route->header_, route_->is_discard_route,
> +              ofs);
>
>      free(prefix_s);
>  }
> @@ -10672,14 +10691,14 @@ build_ip_routing_flows_for_lrouter_port(
>              add_route(lflows, op->od, op, op->lrp_networks.ipv4_addrs[i].addr_s,
>                        op->lrp_networks.ipv4_addrs[i].network_s,
>                        op->lrp_networks.ipv4_addrs[i].plen, NULL, false,
> -                      &op->nbrp->header_, false);
> +                      &op->nbrp->header_, false, ROUTE_PRIO_OFFSET_CONNECTED);
>          }
>
>          for (int i = 0; i < op->lrp_networks.n_ipv6_addrs; i++) {
>              add_route(lflows, op->od, op, op->lrp_networks.ipv6_addrs[i].addr_s,
>                        op->lrp_networks.ipv6_addrs[i].network_s,
>                        op->lrp_networks.ipv6_addrs[i].plen, NULL, false,
> -                      &op->nbrp->header_, false);
> +                      &op->nbrp->header_, false, ROUTE_PRIO_OFFSET_CONNECTED);
>          }
>      } else if (lsp_is_router(op->nbsp)) {
>          struct ovn_port *peer = ovn_port_get_peer(ports, op);
> @@ -10702,7 +10721,8 @@ build_ip_routing_flows_for_lrouter_port(
>                                peer->lrp_networks.ipv4_addrs[0].addr_s,
>                                laddrs->ipv4_addrs[k].network_s,
>                                laddrs->ipv4_addrs[k].plen, NULL, false,
> -                              &peer->nbrp->header_, false);
> +                              &peer->nbrp->header_, false,
> +                              ROUTE_PRIO_OFFSET_CONNECTED);
>                  }
>              }
>          }
> @@ -10773,7 +10793,7 @@ build_mcast_lookup_flows_for_lrouter(
>          /* Drop IPv6 multicast traffic that shouldn't be forwarded,
>           * i.e., router solicitation and router advertisement.
>           */
> -        ovn_lflow_add(lflows, od, S_ROUTER_IN_IP_ROUTING, 550,
> +        ovn_lflow_add(lflows, od, S_ROUTER_IN_IP_ROUTING, 10550,
>                        "nd_rs || nd_ra", "drop;");
>          if (!od->mcast_info.rtr.relay) {
>              return;
> @@ -10801,7 +10821,7 @@ build_mcast_lookup_flows_for_lrouter(
>              }
>              ds_put_format(actions, "outport = \"%s\"; ip.ttl--; next;",
>                            igmp_group->mcgroup.name);
> -            ovn_lflow_add(lflows, od, S_ROUTER_IN_IP_ROUTING, 500,
> +            ovn_lflow_add(lflows, od, S_ROUTER_IN_IP_ROUTING, 10500,
>                            ds_cstr(match), ds_cstr(actions));
>          }
>
> @@ -10809,7 +10829,7 @@ build_mcast_lookup_flows_for_lrouter(
>           * ports. Otherwise drop any multicast traffic.
>           */
>          if (od->mcast_info.rtr.flood_static) {
> -            ovn_lflow_add(lflows, od, S_ROUTER_IN_IP_ROUTING, 450,
> +            ovn_lflow_add(lflows, od, S_ROUTER_IN_IP_ROUTING, 10450,
>                            "ip4.mcast || ip6.mcast",
>                            "clone { "
>                                  "outport = \""MC_STATIC"\"; "
> @@ -10817,7 +10837,7 @@ build_mcast_lookup_flows_for_lrouter(
>                                  "next; "
>                            "};");
>          } else {
> -            ovn_lflow_add(lflows, od, S_ROUTER_IN_IP_ROUTING, 450,
> +            ovn_lflow_add(lflows, od, S_ROUTER_IN_IP_ROUTING, 10450,
>                            "ip4.mcast || ip6.mcast", "drop;");
>          }
>      }
> diff --git a/northd/ovn-northd.8.xml b/northd/ovn-northd.8.xml
> index fb67395e3..4f3a9d5e3 100644
> --- a/northd/ovn-northd.8.xml
> +++ b/northd/ovn-northd.8.xml
> @@ -2945,12 +2945,12 @@ icmp6 {
>
>      <p>
>        If ECMP routes with symmetric reply are configured in the
> -      <code>OVN_Northbound</code> database for a gateway router, a priority-300
> -      flow is added for each router port on which symmetric replies are
> -      configured. The matching logic for these ports essentially reverses the
> -      configured logic of the ECMP route. So for instance, a route with a
> -      destination routing policy will instead match if the source IP address
> -      matches the static route's prefix. The flow uses the action
> +      <code>OVN_Northbound</code> database for a gateway router, a
> +      priority-10300 flow is added for each router port on which symmetric
> +      replies are configured. The matching logic for these ports essentially
> +      reverses the configured logic of the ECMP route. So for instance, a route
> +      with a destination routing policy will instead match if the source IP
> +      address matches the static route's prefix. The flow uses the action
>        <code>ct_next</code> to send IP packets to the connection tracker for
>        packet de-fragmentation and tracking before sending it to the next table.
>      </p>
> diff --git a/tests/ovn-northd.at b/tests/ovn-northd.at
> index 85b47a18f..3c1a97f73 100644
> --- a/tests/ovn-northd.at
> +++ b/tests/ovn-northd.at
> @@ -5430,7 +5430,7 @@ check ovn-nbctl --wait=sb --ecmp-symmetric-reply lr-route-add lr0 1.0.0.1 192.16
>
>  ovn-sbctl dump-flows lr0 > lr0flows
>  AT_CHECK([grep -e "lr_in_ip_routing.*select" lr0flows | sed 's/table=../table=??/' | sort], [0], [dnl
> -  table=??(lr_in_ip_routing   ), priority=65   , match=(ip4.dst == 1.0.0.1/32), action=(ip.ttl--; flags.loopback = 1; reg8[[0..15]] = 1; reg8[[16..31]] = select(1, 2);)
> +  table=??(lr_in_ip_routing   ), priority=97   , match=(ip4.dst == 1.0.0.1/32), action=(ip.ttl--; flags.loopback = 1; reg8[[0..15]] = 1; reg8[[16..31]] = select(1, 2);)
>  ])
>  AT_CHECK([grep -e "lr_in_ip_routing_ecmp" lr0flows | sed 's/192\.168\.0\..0/192.168.0.??/' | sed 's/table=../table=??/' | sort], [0], [dnl
>    table=??(lr_in_ip_routing_ecmp), priority=100  , match=(reg8[[0..15]] == 1 && reg8[[16..31]] == 1), action=(reg0 = 192.168.0.??; reg1 = 192.168.0.1; eth.src = 00:00:20:20:12:13; outport = "lr0-public"; next;)
> @@ -5443,7 +5443,7 @@ check ovn-nbctl --wait=sb --ecmp-symmetric-reply lr-route-add lr0 1.0.0.1 192.16
>
>  ovn-sbctl dump-flows lr0 > lr0flows
>  AT_CHECK([grep -e "lr_in_ip_routing.*select" lr0flows | sed 's/table=../table=??/' | sort], [0], [dnl
> -  table=??(lr_in_ip_routing   ), priority=65   , match=(ip4.dst == 1.0.0.1/32), action=(ip.ttl--; flags.loopback = 1; reg8[[0..15]] = 1; reg8[[16..31]] = select(1, 2);)
> +  table=??(lr_in_ip_routing   ), priority=97   , match=(ip4.dst == 1.0.0.1/32), action=(ip.ttl--; flags.loopback = 1; reg8[[0..15]] = 1; reg8[[16..31]] = select(1, 2);)
>  ])
>  AT_CHECK([grep -e "lr_in_ip_routing_ecmp" lr0flows | sed 's/192\.168\.0\..0/192.168.0.??/' | sed 's/table=../table=??/' | sort], [0], [dnl
>    table=??(lr_in_ip_routing_ecmp), priority=100  , match=(reg8[[0..15]] == 1 && reg8[[16..31]] == 1), action=(reg0 = 192.168.0.??; reg1 = 192.168.0.1; eth.src = 00:00:20:20:12:13; outport = "lr0-public"; next;)
> @@ -5458,14 +5458,14 @@ check ovn-nbctl --wait=sb lr-route-add lr0 1.0.0.0/24 192.168.0.10
>  ovn-sbctl dump-flows lr0 > lr0flows
>
>  AT_CHECK([grep -e "lr_in_ip_routing.*192.168.0.10" lr0flows | sed 's/table=../table=??/' | sort], [0], [dnl
> -  table=??(lr_in_ip_routing   ), priority=49   , match=(ip4.dst == 1.0.0.0/24), action=(ip.ttl--; reg8[[0..15]] = 0; reg0 = 192.168.0.10; reg1 = 192.168.0.1; eth.src = 00:00:20:20:12:13; outport = "lr0-public"; flags.loopback = 1; next;)
> +  table=??(lr_in_ip_routing   ), priority=73   , match=(ip4.dst == 1.0.0.0/24), action=(ip.ttl--; reg8[[0..15]] = 0; reg0 = 192.168.0.10; reg1 = 192.168.0.1; eth.src = 00:00:20:20:12:13; outport = "lr0-public"; flags.loopback = 1; next;)
>  ])
>
>  check ovn-nbctl --wait=sb lr-route-add lr0 2.0.0.0/24 lr0-public
>
>  ovn-sbctl dump-flows lr0 > lr0flows
>  AT_CHECK([grep -e "lr_in_ip_routing.*2.0.0.0" lr0flows | sed 's/table=../table=??/' | sort], [0], [dnl
> -  table=??(lr_in_ip_routing   ), priority=49   , match=(ip4.dst == 2.0.0.0/24), action=(ip.ttl--; reg8[[0..15]] = 0; reg0 = ip4.dst; reg1 = 192.168.0.1; eth.src = 00:00:20:20:12:13; outport = "lr0-public"; flags.loopback = 1; next;)
> +  table=??(lr_in_ip_routing   ), priority=73   , match=(ip4.dst == 2.0.0.0/24), action=(ip.ttl--; reg8[[0..15]] = 0; reg0 = ip4.dst; reg1 = 192.168.0.1; eth.src = 00:00:20:20:12:13; outport = "lr0-public"; flags.loopback = 1; next;)
>  ])
>
>  AT_CLEANUP
> --
> 2.30.0
>
> _______________________________________________
> dev mailing list
> dev@openvswitch.org
> https://mail.openvswitch.org/mailman/listinfo/ovs-dev
>
Vladislav Odintsov Nov. 17, 2021, 10:51 a.m. UTC | #2
Hi Numan,

Thanks for the review.
Sure I will fix this. Should I wait for more comments or that’s all and I can send v9?

Regards,
Vladislav Odintsov

> On 17 Nov 2021, at 05:17, Numan Siddique <numans@ovn.org> wrote:
> 
> On Sat, Nov 13, 2021 at 4:44 AM Vladislav Odintsov <odivlad@gmail.com <mailto:odivlad@gmail.com>> wrote:
>> 
>> With this patch routes to connected networks have higher
>> priority than static routes with same ip_prefix.
>> 
>> This brings commonly-used behaviour for routes lookup order:
>> 1: longest prefix match
>> 2: metric
>> 
>> The metric has next lookup order:
>> 1: connected routes
>> 2: static routes
>> 
>> Earlier static and connected routes with same ip_prefix had
>> the same priority, so it was impossible to predict which one
>> is used for routing decision.
>> 
>> Each route's prefix length has its own 'slot' in lflow prios.
>> Now prefix length space is calculated using next information:
>> to calculate route's priority prefixlen multiplied by 3
>> + route origin offset (0 - source-based route; 1 - directly-
>> connected route; 2 - static route).
>> 
>> Also, enlarge prio for generic records in lr_in_ip_routing stage
>> by 10000.
>> 
>> Signed-off-by: Vladislav Odintsov <odivlad@gmail.com>
> 
> Hi Vladislav,
> 
> Thanks for the patch.  Overall it looks good to me.  I've one comment.
> Looks like the documentation updated in ovn-northd.8.xml is not accurate.
> 
> This patch modifies the flows in the lr_in_ip_routing stage but this
> patch doesn't update the documentation.
> Also the patch updates the documentation for the flow in lr_in_defrag
> stage, which seems not correct.
> 
> Can you please update the documentation accurately ?
> 
> Numan
> 
>> ---
>> northd/northd.c         | 50 ++++++++++++++++++++++++++++-------------
>> northd/ovn-northd.8.xml | 12 +++++-----
>> tests/ovn-northd.at     |  8 +++----
>> 3 files changed, 45 insertions(+), 25 deletions(-)
>> 
>> diff --git a/northd/northd.c b/northd/northd.c
>> index 1e8a3457c..0d513f039 100644
>> --- a/northd/northd.c
>> +++ b/northd/northd.c
>> @@ -305,6 +305,15 @@ enum ovn_stage {
>>  *
>>  */
>> 
>> +/*
>> + * Route offsets implement logic to prioritize traffic for routes with
>> + * same ip_prefix values:
>> + *  -  connected route overrides static one;
>> + *  -  static route overrides connected route. */
>> +#define ROUTE_PRIO_OFFSET_MULTIPLIER 3
>> +#define ROUTE_PRIO_OFFSET_STATIC 1
>> +#define ROUTE_PRIO_OFFSET_CONNECTED 2
>> +
>> /* Returns an "enum ovn_stage" built from the arguments. */
>> static enum ovn_stage
>> ovn_stage_build(enum ovn_datapath_type dp_type, enum ovn_pipeline pipeline,
>> @@ -8782,6 +8791,7 @@ struct ecmp_groups_node {
>>     struct in6_addr prefix;
>>     unsigned int plen;
>>     bool is_src_route;
>> +    const char *origin;
>>     uint16_t route_count;
>>     struct ovs_list route_list; /* Contains ecmp_route_list_node */
>> };
>> @@ -8819,6 +8829,7 @@ ecmp_groups_add(struct hmap *ecmp_groups,
>>     eg->prefix = route->prefix;
>>     eg->plen = route->plen;
>>     eg->is_src_route = route->is_src_route;
>> +    eg->origin = smap_get_def(&route->route->options, "origin", "");
>>     ovs_list_init(&eg->route_list);
>>     ecmp_groups_add_route(eg, route);
>> 
>> @@ -8919,19 +8930,20 @@ build_route_prefix_s(const struct in6_addr *prefix, unsigned int plen)
>> static void
>> build_route_match(const struct ovn_port *op_inport, const char *network_s,
>>                   int plen, bool is_src_route, bool is_ipv4, struct ds *match,
>> -                  uint16_t *priority)
>> +                  uint16_t *priority, int ofs)
>> {
>>     const char *dir;
>>     /* The priority here is calculated to implement longest-prefix-match
>>      * routing. */
>>     if (is_src_route) {
>>         dir = "src";
>> -        *priority = plen * 2;
>> +        ofs = 0;
>>     } else {
>>         dir = "dst";
>> -        *priority = (plen * 2) + 1;
>>     }
>> 
>> +    *priority = (plen * ROUTE_PRIO_OFFSET_MULTIPLIER) + ofs;
>> +
>>     if (op_inport) {
>>         ds_put_format(match, "inport == %s && ", op_inport->json_key);
>>     }
>> @@ -9073,7 +9085,7 @@ add_ecmp_symmetric_reply_flows(struct hmap *lflows,
>>                   out_port->lrp_networks.ea_s,
>>                   IN6_IS_ADDR_V4MAPPED(&route->prefix) ? "" : "xx",
>>                   port_ip, out_port->json_key);
>> -    ovn_lflow_add_with_hint(lflows, od, S_ROUTER_IN_IP_ROUTING, 300,
>> +    ovn_lflow_add_with_hint(lflows, od, S_ROUTER_IN_IP_ROUTING, 10300,
>>                            ds_cstr(&match), ds_cstr(&actions),
>>                            &st_route->header_);
>> 
>> @@ -9103,8 +9115,10 @@ build_ecmp_route_flow(struct hmap *lflows, struct ovn_datapath *od,
>>     struct ds route_match = DS_EMPTY_INITIALIZER;
>> 
>>     char *prefix_s = build_route_prefix_s(&eg->prefix, eg->plen);
>> +    int ofs = !strcmp(eg->origin, ROUTE_ORIGIN_CONNECTED) ?
>> +        ROUTE_PRIO_OFFSET_CONNECTED: ROUTE_PRIO_OFFSET_STATIC;
>>     build_route_match(NULL, prefix_s, eg->plen, eg->is_src_route, is_ipv4,
>> -                      &route_match, &priority);
>> +                      &route_match, &priority, ofs);
>>     free(prefix_s);
>> 
>>     struct ds actions = DS_EMPTY_INITIALIZER;
>> @@ -9180,7 +9194,7 @@ add_route(struct hmap *lflows, struct ovn_datapath *od,
>>           const struct ovn_port *op, const char *lrp_addr_s,
>>           const char *network_s, int plen, const char *gateway,
>>           bool is_src_route, const struct ovsdb_idl_row *stage_hint,
>> -          bool is_discard_route)
>> +          bool is_discard_route, int ofs)
>> {
>>     bool is_ipv4 = strchr(network_s, '.') ? true : false;
>>     struct ds match = DS_EMPTY_INITIALIZER;
>> @@ -9196,7 +9210,7 @@ add_route(struct hmap *lflows, struct ovn_datapath *od,
>>         }
>>     }
>>     build_route_match(op_inport, network_s, plen, is_src_route, is_ipv4,
>> -                      &match, &priority);
>> +                      &match, &priority, ofs);
>> 
>>     struct ds common_actions = DS_EMPTY_INITIALIZER;
>>     struct ds actions = DS_EMPTY_INITIALIZER;
>> @@ -9256,10 +9270,15 @@ build_static_route_flow(struct hmap *lflows, struct ovn_datapath *od,
>>         }
>>     }
>> 
>> +    int ofs = !strcmp(smap_get_def(&route->options, "origin", ""),
>> +                      ROUTE_ORIGIN_CONNECTED) ? ROUTE_PRIO_OFFSET_CONNECTED
>> +                                              : ROUTE_PRIO_OFFSET_STATIC;
>> +
>>     char *prefix_s = build_route_prefix_s(&route_->prefix, route_->plen);
>>     add_route(lflows, route_->is_discard_route ? od : out_port->od, out_port,
>>               lrp_addr_s, prefix_s, route_->plen, route->nexthop,
>> -              route_->is_src_route, &route->header_, route_->is_discard_route);
>> +              route_->is_src_route, &route->header_, route_->is_discard_route,
>> +              ofs);
>> 
>>     free(prefix_s);
>> }
>> @@ -10672,14 +10691,14 @@ build_ip_routing_flows_for_lrouter_port(
>>             add_route(lflows, op->od, op, op->lrp_networks.ipv4_addrs[i].addr_s,
>>                       op->lrp_networks.ipv4_addrs[i].network_s,
>>                       op->lrp_networks.ipv4_addrs[i].plen, NULL, false,
>> -                      &op->nbrp->header_, false);
>> +                      &op->nbrp->header_, false, ROUTE_PRIO_OFFSET_CONNECTED);
>>         }
>> 
>>         for (int i = 0; i < op->lrp_networks.n_ipv6_addrs; i++) {
>>             add_route(lflows, op->od, op, op->lrp_networks.ipv6_addrs[i].addr_s,
>>                       op->lrp_networks.ipv6_addrs[i].network_s,
>>                       op->lrp_networks.ipv6_addrs[i].plen, NULL, false,
>> -                      &op->nbrp->header_, false);
>> +                      &op->nbrp->header_, false, ROUTE_PRIO_OFFSET_CONNECTED);
>>         }
>>     } else if (lsp_is_router(op->nbsp)) {
>>         struct ovn_port *peer = ovn_port_get_peer(ports, op);
>> @@ -10702,7 +10721,8 @@ build_ip_routing_flows_for_lrouter_port(
>>                               peer->lrp_networks.ipv4_addrs[0].addr_s,
>>                               laddrs->ipv4_addrs[k].network_s,
>>                               laddrs->ipv4_addrs[k].plen, NULL, false,
>> -                              &peer->nbrp->header_, false);
>> +                              &peer->nbrp->header_, false,
>> +                              ROUTE_PRIO_OFFSET_CONNECTED);
>>                 }
>>             }
>>         }
>> @@ -10773,7 +10793,7 @@ build_mcast_lookup_flows_for_lrouter(
>>         /* Drop IPv6 multicast traffic that shouldn't be forwarded,
>>          * i.e., router solicitation and router advertisement.
>>          */
>> -        ovn_lflow_add(lflows, od, S_ROUTER_IN_IP_ROUTING, 550,
>> +        ovn_lflow_add(lflows, od, S_ROUTER_IN_IP_ROUTING, 10550,
>>                       "nd_rs || nd_ra", "drop;");
>>         if (!od->mcast_info.rtr.relay) {
>>             return;
>> @@ -10801,7 +10821,7 @@ build_mcast_lookup_flows_for_lrouter(
>>             }
>>             ds_put_format(actions, "outport = \"%s\"; ip.ttl--; next;",
>>                           igmp_group->mcgroup.name);
>> -            ovn_lflow_add(lflows, od, S_ROUTER_IN_IP_ROUTING, 500,
>> +            ovn_lflow_add(lflows, od, S_ROUTER_IN_IP_ROUTING, 10500,
>>                           ds_cstr(match), ds_cstr(actions));
>>         }
>> 
>> @@ -10809,7 +10829,7 @@ build_mcast_lookup_flows_for_lrouter(
>>          * ports. Otherwise drop any multicast traffic.
>>          */
>>         if (od->mcast_info.rtr.flood_static) {
>> -            ovn_lflow_add(lflows, od, S_ROUTER_IN_IP_ROUTING, 450,
>> +            ovn_lflow_add(lflows, od, S_ROUTER_IN_IP_ROUTING, 10450,
>>                           "ip4.mcast || ip6.mcast",
>>                           "clone { "
>>                                 "outport = \""MC_STATIC"\"; "
>> @@ -10817,7 +10837,7 @@ build_mcast_lookup_flows_for_lrouter(
>>                                 "next; "
>>                           "};");
>>         } else {
>> -            ovn_lflow_add(lflows, od, S_ROUTER_IN_IP_ROUTING, 450,
>> +            ovn_lflow_add(lflows, od, S_ROUTER_IN_IP_ROUTING, 10450,
>>                           "ip4.mcast || ip6.mcast", "drop;");
>>         }
>>     }
>> diff --git a/northd/ovn-northd.8.xml b/northd/ovn-northd.8.xml
>> index fb67395e3..4f3a9d5e3 100644
>> --- a/northd/ovn-northd.8.xml
>> +++ b/northd/ovn-northd.8.xml
>> @@ -2945,12 +2945,12 @@ icmp6 {
>> 
>>     <p>
>>       If ECMP routes with symmetric reply are configured in the
>> -      <code>OVN_Northbound</code> database for a gateway router, a priority-300
>> -      flow is added for each router port on which symmetric replies are
>> -      configured. The matching logic for these ports essentially reverses the
>> -      configured logic of the ECMP route. So for instance, a route with a
>> -      destination routing policy will instead match if the source IP address
>> -      matches the static route's prefix. The flow uses the action
>> +      <code>OVN_Northbound</code> database for a gateway router, a
>> +      priority-10300 flow is added for each router port on which symmetric
>> +      replies are configured. The matching logic for these ports essentially
>> +      reverses the configured logic of the ECMP route. So for instance, a route
>> +      with a destination routing policy will instead match if the source IP
>> +      address matches the static route's prefix. The flow uses the action
>>       <code>ct_next</code> to send IP packets to the connection tracker for
>>       packet de-fragmentation and tracking before sending it to the next table.
>>     </p>
>> diff --git a/tests/ovn-northd.at b/tests/ovn-northd.at
>> index 85b47a18f..3c1a97f73 100644
>> --- a/tests/ovn-northd.at
>> +++ b/tests/ovn-northd.at
>> @@ -5430,7 +5430,7 @@ check ovn-nbctl --wait=sb --ecmp-symmetric-reply lr-route-add lr0 1.0.0.1 192.16
>> 
>> ovn-sbctl dump-flows lr0 > lr0flows
>> AT_CHECK([grep -e "lr_in_ip_routing.*select" lr0flows | sed 's/table=../table=??/' | sort], [0], [dnl
>> -  table=??(lr_in_ip_routing   ), priority=65   , match=(ip4.dst == 1.0.0.1/32), action=(ip.ttl--; flags.loopback = 1; reg8[[0..15]] = 1; reg8[[16..31]] = select(1, 2);)
>> +  table=??(lr_in_ip_routing   ), priority=97   , match=(ip4.dst == 1.0.0.1/32), action=(ip.ttl--; flags.loopback = 1; reg8[[0..15]] = 1; reg8[[16..31]] = select(1, 2);)
>> ])
>> AT_CHECK([grep -e "lr_in_ip_routing_ecmp" lr0flows | sed 's/192\.168\.0\..0/192.168.0.??/' | sed 's/table=../table=??/' | sort], [0], [dnl
>>   table=??(lr_in_ip_routing_ecmp), priority=100  , match=(reg8[[0..15]] == 1 && reg8[[16..31]] == 1), action=(reg0 = 192.168.0.??; reg1 = 192.168.0.1; eth.src = 00:00:20:20:12:13; outport = "lr0-public"; next;)
>> @@ -5443,7 +5443,7 @@ check ovn-nbctl --wait=sb --ecmp-symmetric-reply lr-route-add lr0 1.0.0.1 192.16
>> 
>> ovn-sbctl dump-flows lr0 > lr0flows
>> AT_CHECK([grep -e "lr_in_ip_routing.*select" lr0flows | sed 's/table=../table=??/' | sort], [0], [dnl
>> -  table=??(lr_in_ip_routing   ), priority=65   , match=(ip4.dst == 1.0.0.1/32), action=(ip.ttl--; flags.loopback = 1; reg8[[0..15]] = 1; reg8[[16..31]] = select(1, 2);)
>> +  table=??(lr_in_ip_routing   ), priority=97   , match=(ip4.dst == 1.0.0.1/32), action=(ip.ttl--; flags.loopback = 1; reg8[[0..15]] = 1; reg8[[16..31]] = select(1, 2);)
>> ])
>> AT_CHECK([grep -e "lr_in_ip_routing_ecmp" lr0flows | sed 's/192\.168\.0\..0/192.168.0.??/' | sed 's/table=../table=??/' | sort], [0], [dnl
>>   table=??(lr_in_ip_routing_ecmp), priority=100  , match=(reg8[[0..15]] == 1 && reg8[[16..31]] == 1), action=(reg0 = 192.168.0.??; reg1 = 192.168.0.1; eth.src = 00:00:20:20:12:13; outport = "lr0-public"; next;)
>> @@ -5458,14 +5458,14 @@ check ovn-nbctl --wait=sb lr-route-add lr0 1.0.0.0/24 192.168.0.10
>> ovn-sbctl dump-flows lr0 > lr0flows
>> 
>> AT_CHECK([grep -e "lr_in_ip_routing.*192.168.0.10" lr0flows | sed 's/table=../table=??/' | sort], [0], [dnl
>> -  table=??(lr_in_ip_routing   ), priority=49   , match=(ip4.dst == 1.0.0.0/24), action=(ip.ttl--; reg8[[0..15]] = 0; reg0 = 192.168.0.10; reg1 = 192.168.0.1; eth.src = 00:00:20:20:12:13; outport = "lr0-public"; flags.loopback = 1; next;)
>> +  table=??(lr_in_ip_routing   ), priority=73   , match=(ip4.dst == 1.0.0.0/24), action=(ip.ttl--; reg8[[0..15]] = 0; reg0 = 192.168.0.10; reg1 = 192.168.0.1; eth.src = 00:00:20:20:12:13; outport = "lr0-public"; flags.loopback = 1; next;)
>> ])
>> 
>> check ovn-nbctl --wait=sb lr-route-add lr0 2.0.0.0/24 lr0-public
>> 
>> ovn-sbctl dump-flows lr0 > lr0flows
>> AT_CHECK([grep -e "lr_in_ip_routing.*2.0.0.0" lr0flows | sed 's/table=../table=??/' | sort], [0], [dnl
>> -  table=??(lr_in_ip_routing   ), priority=49   , match=(ip4.dst == 2.0.0.0/24), action=(ip.ttl--; reg8[[0..15]] = 0; reg0 = ip4.dst; reg1 = 192.168.0.1; eth.src = 00:00:20:20:12:13; outport = "lr0-public"; flags.loopback = 1; next;)
>> +  table=??(lr_in_ip_routing   ), priority=73   , match=(ip4.dst == 2.0.0.0/24), action=(ip.ttl--; reg8[[0..15]] = 0; reg0 = ip4.dst; reg1 = 192.168.0.1; eth.src = 00:00:20:20:12:13; outport = "lr0-public"; flags.loopback = 1; next;)
>> ])
>> 
>> AT_CLEANUP
>> --
>> 2.30.0
>> 
>> _______________________________________________
>> dev mailing list
>> dev@openvswitch.org <mailto:dev@openvswitch.org>
>> https://mail.openvswitch.org/mailman/listinfo/ovs-dev <https://mail.openvswitch.org/mailman/listinfo/ovs-dev>
Vladislav Odintsov Nov. 17, 2021, 2:23 p.m. UTC | #3
Two additions:

1. Regarding documentation for flow in lr_in_defrag section:

It seems to me that documentation for it is written in a wrong section (lr_in_defrag).
Since the flow is installed in lr_in_ip_routing, it should be documented there.
I’ll move it if you don’t mind.


2. Documentation for other flow changes was added, but I’ve committed it to a wrong patch (#3).
I’ll move documentation update between patches.

Regards,
Vladislav Odintsov

> On 17 Nov 2021, at 13:51, Vladislav Odintsov <odivlad@gmail.com> wrote:
> 
> Hi Numan,
> 
> Thanks for the review.
> Sure I will fix this. Should I wait for more comments or that’s all and I can send v9?
> 
> Regards,
> Vladislav Odintsov
> 
>> On 17 Nov 2021, at 05:17, Numan Siddique <numans@ovn.org> wrote:
>> 
>> On Sat, Nov 13, 2021 at 4:44 AM Vladislav Odintsov <odivlad@gmail.com <mailto:odivlad@gmail.com>> wrote:
>>> 
>>> With this patch routes to connected networks have higher
>>> priority than static routes with same ip_prefix.
>>> 
>>> This brings commonly-used behaviour for routes lookup order:
>>> 1: longest prefix match
>>> 2: metric
>>> 
>>> The metric has next lookup order:
>>> 1: connected routes
>>> 2: static routes
>>> 
>>> Earlier static and connected routes with same ip_prefix had
>>> the same priority, so it was impossible to predict which one
>>> is used for routing decision.
>>> 
>>> Each route's prefix length has its own 'slot' in lflow prios.
>>> Now prefix length space is calculated using next information:
>>> to calculate route's priority prefixlen multiplied by 3
>>> + route origin offset (0 - source-based route; 1 - directly-
>>> connected route; 2 - static route).
>>> 
>>> Also, enlarge prio for generic records in lr_in_ip_routing stage
>>> by 10000.
>>> 
>>> Signed-off-by: Vladislav Odintsov <odivlad@gmail.com>
>> 
>> Hi Vladislav,
>> 
>> Thanks for the patch.  Overall it looks good to me.  I've one comment.
>> Looks like the documentation updated in ovn-northd.8.xml is not accurate.
>> 
>> This patch modifies the flows in the lr_in_ip_routing stage but this
>> patch doesn't update the documentation.
>> Also the patch updates the documentation for the flow in lr_in_defrag
>> stage, which seems not correct.
>> 
>> Can you please update the documentation accurately ?
>> 
>> Numan
>> 
>>> ---
>>> northd/northd.c         | 50 ++++++++++++++++++++++++++++-------------
>>> northd/ovn-northd.8.xml | 12 +++++-----
>>> tests/ovn-northd.at     |  8 +++----
>>> 3 files changed, 45 insertions(+), 25 deletions(-)
>>> 
>>> diff --git a/northd/northd.c b/northd/northd.c
>>> index 1e8a3457c..0d513f039 100644
>>> --- a/northd/northd.c
>>> +++ b/northd/northd.c
>>> @@ -305,6 +305,15 @@ enum ovn_stage {
>>> *
>>> */
>>> 
>>> +/*
>>> + * Route offsets implement logic to prioritize traffic for routes with
>>> + * same ip_prefix values:
>>> + *  -  connected route overrides static one;
>>> + *  -  static route overrides connected route. */
>>> +#define ROUTE_PRIO_OFFSET_MULTIPLIER 3
>>> +#define ROUTE_PRIO_OFFSET_STATIC 1
>>> +#define ROUTE_PRIO_OFFSET_CONNECTED 2
>>> +
>>> /* Returns an "enum ovn_stage" built from the arguments. */
>>> static enum ovn_stage
>>> ovn_stage_build(enum ovn_datapath_type dp_type, enum ovn_pipeline pipeline,
>>> @@ -8782,6 +8791,7 @@ struct ecmp_groups_node {
>>>    struct in6_addr prefix;
>>>    unsigned int plen;
>>>    bool is_src_route;
>>> +    const char *origin;
>>>    uint16_t route_count;
>>>    struct ovs_list route_list; /* Contains ecmp_route_list_node */
>>> };
>>> @@ -8819,6 +8829,7 @@ ecmp_groups_add(struct hmap *ecmp_groups,
>>>    eg->prefix = route->prefix;
>>>    eg->plen = route->plen;
>>>    eg->is_src_route = route->is_src_route;
>>> +    eg->origin = smap_get_def(&route->route->options, "origin", "");
>>>    ovs_list_init(&eg->route_list);
>>>    ecmp_groups_add_route(eg, route);
>>> 
>>> @@ -8919,19 +8930,20 @@ build_route_prefix_s(const struct in6_addr *prefix, unsigned int plen)
>>> static void
>>> build_route_match(const struct ovn_port *op_inport, const char *network_s,
>>>                  int plen, bool is_src_route, bool is_ipv4, struct ds *match,
>>> -                  uint16_t *priority)
>>> +                  uint16_t *priority, int ofs)
>>> {
>>>    const char *dir;
>>>    /* The priority here is calculated to implement longest-prefix-match
>>>     * routing. */
>>>    if (is_src_route) {
>>>        dir = "src";
>>> -        *priority = plen * 2;
>>> +        ofs = 0;
>>>    } else {
>>>        dir = "dst";
>>> -        *priority = (plen * 2) + 1;
>>>    }
>>> 
>>> +    *priority = (plen * ROUTE_PRIO_OFFSET_MULTIPLIER) + ofs;
>>> +
>>>    if (op_inport) {
>>>        ds_put_format(match, "inport == %s && ", op_inport->json_key);
>>>    }
>>> @@ -9073,7 +9085,7 @@ add_ecmp_symmetric_reply_flows(struct hmap *lflows,
>>>                  out_port->lrp_networks.ea_s,
>>>                  IN6_IS_ADDR_V4MAPPED(&route->prefix) ? "" : "xx",
>>>                  port_ip, out_port->json_key);
>>> -    ovn_lflow_add_with_hint(lflows, od, S_ROUTER_IN_IP_ROUTING, 300,
>>> +    ovn_lflow_add_with_hint(lflows, od, S_ROUTER_IN_IP_ROUTING, 10300,
>>>                           ds_cstr(&match), ds_cstr(&actions),
>>>                           &st_route->header_);
>>> 
>>> @@ -9103,8 +9115,10 @@ build_ecmp_route_flow(struct hmap *lflows, struct ovn_datapath *od,
>>>    struct ds route_match = DS_EMPTY_INITIALIZER;
>>> 
>>>    char *prefix_s = build_route_prefix_s(&eg->prefix, eg->plen);
>>> +    int ofs = !strcmp(eg->origin, ROUTE_ORIGIN_CONNECTED) ?
>>> +        ROUTE_PRIO_OFFSET_CONNECTED: ROUTE_PRIO_OFFSET_STATIC;
>>>    build_route_match(NULL, prefix_s, eg->plen, eg->is_src_route, is_ipv4,
>>> -                      &route_match, &priority);
>>> +                      &route_match, &priority, ofs);
>>>    free(prefix_s);
>>> 
>>>    struct ds actions = DS_EMPTY_INITIALIZER;
>>> @@ -9180,7 +9194,7 @@ add_route(struct hmap *lflows, struct ovn_datapath *od,
>>>          const struct ovn_port *op, const char *lrp_addr_s,
>>>          const char *network_s, int plen, const char *gateway,
>>>          bool is_src_route, const struct ovsdb_idl_row *stage_hint,
>>> -          bool is_discard_route)
>>> +          bool is_discard_route, int ofs)
>>> {
>>>    bool is_ipv4 = strchr(network_s, '.') ? true : false;
>>>    struct ds match = DS_EMPTY_INITIALIZER;
>>> @@ -9196,7 +9210,7 @@ add_route(struct hmap *lflows, struct ovn_datapath *od,
>>>        }
>>>    }
>>>    build_route_match(op_inport, network_s, plen, is_src_route, is_ipv4,
>>> -                      &match, &priority);
>>> +                      &match, &priority, ofs);
>>> 
>>>    struct ds common_actions = DS_EMPTY_INITIALIZER;
>>>    struct ds actions = DS_EMPTY_INITIALIZER;
>>> @@ -9256,10 +9270,15 @@ build_static_route_flow(struct hmap *lflows, struct ovn_datapath *od,
>>>        }
>>>    }
>>> 
>>> +    int ofs = !strcmp(smap_get_def(&route->options, "origin", ""),
>>> +                      ROUTE_ORIGIN_CONNECTED) ? ROUTE_PRIO_OFFSET_CONNECTED
>>> +                                              : ROUTE_PRIO_OFFSET_STATIC;
>>> +
>>>    char *prefix_s = build_route_prefix_s(&route_->prefix, route_->plen);
>>>    add_route(lflows, route_->is_discard_route ? od : out_port->od, out_port,
>>>              lrp_addr_s, prefix_s, route_->plen, route->nexthop,
>>> -              route_->is_src_route, &route->header_, route_->is_discard_route);
>>> +              route_->is_src_route, &route->header_, route_->is_discard_route,
>>> +              ofs);
>>> 
>>>    free(prefix_s);
>>> }
>>> @@ -10672,14 +10691,14 @@ build_ip_routing_flows_for_lrouter_port(
>>>            add_route(lflows, op->od, op, op->lrp_networks.ipv4_addrs[i].addr_s,
>>>                      op->lrp_networks.ipv4_addrs[i].network_s,
>>>                      op->lrp_networks.ipv4_addrs[i].plen, NULL, false,
>>> -                      &op->nbrp->header_, false);
>>> +                      &op->nbrp->header_, false, ROUTE_PRIO_OFFSET_CONNECTED);
>>>        }
>>> 
>>>        for (int i = 0; i < op->lrp_networks.n_ipv6_addrs; i++) {
>>>            add_route(lflows, op->od, op, op->lrp_networks.ipv6_addrs[i].addr_s,
>>>                      op->lrp_networks.ipv6_addrs[i].network_s,
>>>                      op->lrp_networks.ipv6_addrs[i].plen, NULL, false,
>>> -                      &op->nbrp->header_, false);
>>> +                      &op->nbrp->header_, false, ROUTE_PRIO_OFFSET_CONNECTED);
>>>        }
>>>    } else if (lsp_is_router(op->nbsp)) {
>>>        struct ovn_port *peer = ovn_port_get_peer(ports, op);
>>> @@ -10702,7 +10721,8 @@ build_ip_routing_flows_for_lrouter_port(
>>>                              peer->lrp_networks.ipv4_addrs[0].addr_s,
>>>                              laddrs->ipv4_addrs[k].network_s,
>>>                              laddrs->ipv4_addrs[k].plen, NULL, false,
>>> -                              &peer->nbrp->header_, false);
>>> +                              &peer->nbrp->header_, false,
>>> +                              ROUTE_PRIO_OFFSET_CONNECTED);
>>>                }
>>>            }
>>>        }
>>> @@ -10773,7 +10793,7 @@ build_mcast_lookup_flows_for_lrouter(
>>>        /* Drop IPv6 multicast traffic that shouldn't be forwarded,
>>>         * i.e., router solicitation and router advertisement.
>>>         */
>>> -        ovn_lflow_add(lflows, od, S_ROUTER_IN_IP_ROUTING, 550,
>>> +        ovn_lflow_add(lflows, od, S_ROUTER_IN_IP_ROUTING, 10550,
>>>                      "nd_rs || nd_ra", "drop;");
>>>        if (!od->mcast_info.rtr.relay) {
>>>            return;
>>> @@ -10801,7 +10821,7 @@ build_mcast_lookup_flows_for_lrouter(
>>>            }
>>>            ds_put_format(actions, "outport = \"%s\"; ip.ttl--; next;",
>>>                          igmp_group->mcgroup.name);
>>> -            ovn_lflow_add(lflows, od, S_ROUTER_IN_IP_ROUTING, 500,
>>> +            ovn_lflow_add(lflows, od, S_ROUTER_IN_IP_ROUTING, 10500,
>>>                          ds_cstr(match), ds_cstr(actions));
>>>        }
>>> 
>>> @@ -10809,7 +10829,7 @@ build_mcast_lookup_flows_for_lrouter(
>>>         * ports. Otherwise drop any multicast traffic.
>>>         */
>>>        if (od->mcast_info.rtr.flood_static) {
>>> -            ovn_lflow_add(lflows, od, S_ROUTER_IN_IP_ROUTING, 450,
>>> +            ovn_lflow_add(lflows, od, S_ROUTER_IN_IP_ROUTING, 10450,
>>>                          "ip4.mcast || ip6.mcast",
>>>                          "clone { "
>>>                                "outport = \""MC_STATIC"\"; "
>>> @@ -10817,7 +10837,7 @@ build_mcast_lookup_flows_for_lrouter(
>>>                                "next; "
>>>                          "};");
>>>        } else {
>>> -            ovn_lflow_add(lflows, od, S_ROUTER_IN_IP_ROUTING, 450,
>>> +            ovn_lflow_add(lflows, od, S_ROUTER_IN_IP_ROUTING, 10450,
>>>                          "ip4.mcast || ip6.mcast", "drop;");
>>>        }
>>>    }
>>> diff --git a/northd/ovn-northd.8.xml b/northd/ovn-northd.8.xml
>>> index fb67395e3..4f3a9d5e3 100644
>>> --- a/northd/ovn-northd.8.xml
>>> +++ b/northd/ovn-northd.8.xml
>>> @@ -2945,12 +2945,12 @@ icmp6 {
>>> 
>>>    <p>
>>>      If ECMP routes with symmetric reply are configured in the
>>> -      <code>OVN_Northbound</code> database for a gateway router, a priority-300
>>> -      flow is added for each router port on which symmetric replies are
>>> -      configured. The matching logic for these ports essentially reverses the
>>> -      configured logic of the ECMP route. So for instance, a route with a
>>> -      destination routing policy will instead match if the source IP address
>>> -      matches the static route's prefix. The flow uses the action
>>> +      <code>OVN_Northbound</code> database for a gateway router, a
>>> +      priority-10300 flow is added for each router port on which symmetric
>>> +      replies are configured. The matching logic for these ports essentially
>>> +      reverses the configured logic of the ECMP route. So for instance, a route
>>> +      with a destination routing policy will instead match if the source IP
>>> +      address matches the static route's prefix. The flow uses the action
>>>      <code>ct_next</code> to send IP packets to the connection tracker for
>>>      packet de-fragmentation and tracking before sending it to the next table.
>>>    </p>
>>> diff --git a/tests/ovn-northd.at b/tests/ovn-northd.at
>>> index 85b47a18f..3c1a97f73 100644
>>> --- a/tests/ovn-northd.at
>>> +++ b/tests/ovn-northd.at
>>> @@ -5430,7 +5430,7 @@ check ovn-nbctl --wait=sb --ecmp-symmetric-reply lr-route-add lr0 1.0.0.1 192.16
>>> 
>>> ovn-sbctl dump-flows lr0 > lr0flows
>>> AT_CHECK([grep -e "lr_in_ip_routing.*select" lr0flows | sed 's/table=../table=??/' | sort], [0], [dnl
>>> -  table=??(lr_in_ip_routing   ), priority=65   , match=(ip4.dst == 1.0.0.1/32), action=(ip.ttl--; flags.loopback = 1; reg8[[0..15]] = 1; reg8[[16..31]] = select(1, 2);)
>>> +  table=??(lr_in_ip_routing   ), priority=97   , match=(ip4.dst == 1.0.0.1/32), action=(ip.ttl--; flags.loopback = 1; reg8[[0..15]] = 1; reg8[[16..31]] = select(1, 2);)
>>> ])
>>> AT_CHECK([grep -e "lr_in_ip_routing_ecmp" lr0flows | sed 's/192\.168\.0\..0/192.168.0.??/' | sed 's/table=../table=??/' | sort], [0], [dnl
>>>  table=??(lr_in_ip_routing_ecmp), priority=100  , match=(reg8[[0..15]] == 1 && reg8[[16..31]] == 1), action=(reg0 = 192.168.0.??; reg1 = 192.168.0.1; eth.src = 00:00:20:20:12:13; outport = "lr0-public"; next;)
>>> @@ -5443,7 +5443,7 @@ check ovn-nbctl --wait=sb --ecmp-symmetric-reply lr-route-add lr0 1.0.0.1 192.16
>>> 
>>> ovn-sbctl dump-flows lr0 > lr0flows
>>> AT_CHECK([grep -e "lr_in_ip_routing.*select" lr0flows | sed 's/table=../table=??/' | sort], [0], [dnl
>>> -  table=??(lr_in_ip_routing   ), priority=65   , match=(ip4.dst == 1.0.0.1/32), action=(ip.ttl--; flags.loopback = 1; reg8[[0..15]] = 1; reg8[[16..31]] = select(1, 2);)
>>> +  table=??(lr_in_ip_routing   ), priority=97   , match=(ip4.dst == 1.0.0.1/32), action=(ip.ttl--; flags.loopback = 1; reg8[[0..15]] = 1; reg8[[16..31]] = select(1, 2);)
>>> ])
>>> AT_CHECK([grep -e "lr_in_ip_routing_ecmp" lr0flows | sed 's/192\.168\.0\..0/192.168.0.??/' | sed 's/table=../table=??/' | sort], [0], [dnl
>>>  table=??(lr_in_ip_routing_ecmp), priority=100  , match=(reg8[[0..15]] == 1 && reg8[[16..31]] == 1), action=(reg0 = 192.168.0.??; reg1 = 192.168.0.1; eth.src = 00:00:20:20:12:13; outport = "lr0-public"; next;)
>>> @@ -5458,14 +5458,14 @@ check ovn-nbctl --wait=sb lr-route-add lr0 1.0.0.0/24 192.168.0.10
>>> ovn-sbctl dump-flows lr0 > lr0flows
>>> 
>>> AT_CHECK([grep -e "lr_in_ip_routing.*192.168.0.10" lr0flows | sed 's/table=../table=??/' | sort], [0], [dnl
>>> -  table=??(lr_in_ip_routing   ), priority=49   , match=(ip4.dst == 1.0.0.0/24), action=(ip.ttl--; reg8[[0..15]] = 0; reg0 = 192.168.0.10; reg1 = 192.168.0.1; eth.src = 00:00:20:20:12:13; outport = "lr0-public"; flags.loopback = 1; next;)
>>> +  table=??(lr_in_ip_routing   ), priority=73   , match=(ip4.dst == 1.0.0.0/24), action=(ip.ttl--; reg8[[0..15]] = 0; reg0 = 192.168.0.10; reg1 = 192.168.0.1; eth.src = 00:00:20:20:12:13; outport = "lr0-public"; flags.loopback = 1; next;)
>>> ])
>>> 
>>> check ovn-nbctl --wait=sb lr-route-add lr0 2.0.0.0/24 lr0-public
>>> 
>>> ovn-sbctl dump-flows lr0 > lr0flows
>>> AT_CHECK([grep -e "lr_in_ip_routing.*2.0.0.0" lr0flows | sed 's/table=../table=??/' | sort], [0], [dnl
>>> -  table=??(lr_in_ip_routing   ), priority=49   , match=(ip4.dst == 2.0.0.0/24), action=(ip.ttl--; reg8[[0..15]] = 0; reg0 = ip4.dst; reg1 = 192.168.0.1; eth.src = 00:00:20:20:12:13; outport = "lr0-public"; flags.loopback = 1; next;)
>>> +  table=??(lr_in_ip_routing   ), priority=73   , match=(ip4.dst == 2.0.0.0/24), action=(ip.ttl--; reg8[[0..15]] = 0; reg0 = ip4.dst; reg1 = 192.168.0.1; eth.src = 00:00:20:20:12:13; outport = "lr0-public"; flags.loopback = 1; next;)
>>> ])
>>> 
>>> AT_CLEANUP
>>> --
>>> 2.30.0
>>> 
>>> _______________________________________________
>>> dev mailing list
>>> dev@openvswitch.org <mailto:dev@openvswitch.org>
>>> https://mail.openvswitch.org/mailman/listinfo/ovs-dev <https://mail.openvswitch.org/mailman/listinfo/ovs-dev>
> _______________________________________________
> dev mailing list
> dev@openvswitch.org
> https://mail.openvswitch.org/mailman/listinfo/ovs-dev
Numan Siddique Nov. 17, 2021, 5:57 p.m. UTC | #4
On Wed, Nov 17, 2021 at 9:24 AM Vladislav Odintsov <odivlad@gmail.com> wrote:
>
> Two additions:
>
> 1. Regarding documentation for flow in lr_in_defrag section:
>
> It seems to me that documentation for it is written in a wrong section (lr_in_defrag).
> Since the flow is installed in lr_in_ip_routing, it should be documented there.
> I’ll move it if you don’t mind.

Sure.  Thanks.  I'd suggest having a separate patch for fixing the
documentation.
>
>
> 2. Documentation for other flow changes was added, but I’ve committed it to a wrong patch (#3).
> I’ll move documentation update between patches.

Ack.

Thanks
Numan

>
> Regards,
> Vladislav Odintsov
>
> > On 17 Nov 2021, at 13:51, Vladislav Odintsov <odivlad@gmail.com> wrote:
> >
> > Hi Numan,
> >
> > Thanks for the review.
> > Sure I will fix this. Should I wait for more comments or that’s all and I can send v9?
> >
> > Regards,
> > Vladislav Odintsov
> >
> >> On 17 Nov 2021, at 05:17, Numan Siddique <numans@ovn.org> wrote:
> >>
> >> On Sat, Nov 13, 2021 at 4:44 AM Vladislav Odintsov <odivlad@gmail.com <mailto:odivlad@gmail.com>> wrote:
> >>>
> >>> With this patch routes to connected networks have higher
> >>> priority than static routes with same ip_prefix.
> >>>
> >>> This brings commonly-used behaviour for routes lookup order:
> >>> 1: longest prefix match
> >>> 2: metric
> >>>
> >>> The metric has next lookup order:
> >>> 1: connected routes
> >>> 2: static routes
> >>>
> >>> Earlier static and connected routes with same ip_prefix had
> >>> the same priority, so it was impossible to predict which one
> >>> is used for routing decision.
> >>>
> >>> Each route's prefix length has its own 'slot' in lflow prios.
> >>> Now prefix length space is calculated using next information:
> >>> to calculate route's priority prefixlen multiplied by 3
> >>> + route origin offset (0 - source-based route; 1 - directly-
> >>> connected route; 2 - static route).
> >>>
> >>> Also, enlarge prio for generic records in lr_in_ip_routing stage
> >>> by 10000.
> >>>
> >>> Signed-off-by: Vladislav Odintsov <odivlad@gmail.com>
> >>
> >> Hi Vladislav,
> >>
> >> Thanks for the patch.  Overall it looks good to me.  I've one comment.
> >> Looks like the documentation updated in ovn-northd.8.xml is not accurate.
> >>
> >> This patch modifies the flows in the lr_in_ip_routing stage but this
> >> patch doesn't update the documentation.
> >> Also the patch updates the documentation for the flow in lr_in_defrag
> >> stage, which seems not correct.
> >>
> >> Can you please update the documentation accurately ?
> >>
> >> Numan
> >>
> >>> ---
> >>> northd/northd.c         | 50 ++++++++++++++++++++++++++++-------------
> >>> northd/ovn-northd.8.xml | 12 +++++-----
> >>> tests/ovn-northd.at     |  8 +++----
> >>> 3 files changed, 45 insertions(+), 25 deletions(-)
> >>>
> >>> diff --git a/northd/northd.c b/northd/northd.c
> >>> index 1e8a3457c..0d513f039 100644
> >>> --- a/northd/northd.c
> >>> +++ b/northd/northd.c
> >>> @@ -305,6 +305,15 @@ enum ovn_stage {
> >>> *
> >>> */
> >>>
> >>> +/*
> >>> + * Route offsets implement logic to prioritize traffic for routes with
> >>> + * same ip_prefix values:
> >>> + *  -  connected route overrides static one;
> >>> + *  -  static route overrides connected route. */
> >>> +#define ROUTE_PRIO_OFFSET_MULTIPLIER 3
> >>> +#define ROUTE_PRIO_OFFSET_STATIC 1
> >>> +#define ROUTE_PRIO_OFFSET_CONNECTED 2
> >>> +
> >>> /* Returns an "enum ovn_stage" built from the arguments. */
> >>> static enum ovn_stage
> >>> ovn_stage_build(enum ovn_datapath_type dp_type, enum ovn_pipeline pipeline,
> >>> @@ -8782,6 +8791,7 @@ struct ecmp_groups_node {
> >>>    struct in6_addr prefix;
> >>>    unsigned int plen;
> >>>    bool is_src_route;
> >>> +    const char *origin;
> >>>    uint16_t route_count;
> >>>    struct ovs_list route_list; /* Contains ecmp_route_list_node */
> >>> };
> >>> @@ -8819,6 +8829,7 @@ ecmp_groups_add(struct hmap *ecmp_groups,
> >>>    eg->prefix = route->prefix;
> >>>    eg->plen = route->plen;
> >>>    eg->is_src_route = route->is_src_route;
> >>> +    eg->origin = smap_get_def(&route->route->options, "origin", "");
> >>>    ovs_list_init(&eg->route_list);
> >>>    ecmp_groups_add_route(eg, route);
> >>>
> >>> @@ -8919,19 +8930,20 @@ build_route_prefix_s(const struct in6_addr *prefix, unsigned int plen)
> >>> static void
> >>> build_route_match(const struct ovn_port *op_inport, const char *network_s,
> >>>                  int plen, bool is_src_route, bool is_ipv4, struct ds *match,
> >>> -                  uint16_t *priority)
> >>> +                  uint16_t *priority, int ofs)
> >>> {
> >>>    const char *dir;
> >>>    /* The priority here is calculated to implement longest-prefix-match
> >>>     * routing. */
> >>>    if (is_src_route) {
> >>>        dir = "src";
> >>> -        *priority = plen * 2;
> >>> +        ofs = 0;
> >>>    } else {
> >>>        dir = "dst";
> >>> -        *priority = (plen * 2) + 1;
> >>>    }
> >>>
> >>> +    *priority = (plen * ROUTE_PRIO_OFFSET_MULTIPLIER) + ofs;
> >>> +
> >>>    if (op_inport) {
> >>>        ds_put_format(match, "inport == %s && ", op_inport->json_key);
> >>>    }
> >>> @@ -9073,7 +9085,7 @@ add_ecmp_symmetric_reply_flows(struct hmap *lflows,
> >>>                  out_port->lrp_networks.ea_s,
> >>>                  IN6_IS_ADDR_V4MAPPED(&route->prefix) ? "" : "xx",
> >>>                  port_ip, out_port->json_key);
> >>> -    ovn_lflow_add_with_hint(lflows, od, S_ROUTER_IN_IP_ROUTING, 300,
> >>> +    ovn_lflow_add_with_hint(lflows, od, S_ROUTER_IN_IP_ROUTING, 10300,
> >>>                           ds_cstr(&match), ds_cstr(&actions),
> >>>                           &st_route->header_);
> >>>
> >>> @@ -9103,8 +9115,10 @@ build_ecmp_route_flow(struct hmap *lflows, struct ovn_datapath *od,
> >>>    struct ds route_match = DS_EMPTY_INITIALIZER;
> >>>
> >>>    char *prefix_s = build_route_prefix_s(&eg->prefix, eg->plen);
> >>> +    int ofs = !strcmp(eg->origin, ROUTE_ORIGIN_CONNECTED) ?
> >>> +        ROUTE_PRIO_OFFSET_CONNECTED: ROUTE_PRIO_OFFSET_STATIC;
> >>>    build_route_match(NULL, prefix_s, eg->plen, eg->is_src_route, is_ipv4,
> >>> -                      &route_match, &priority);
> >>> +                      &route_match, &priority, ofs);
> >>>    free(prefix_s);
> >>>
> >>>    struct ds actions = DS_EMPTY_INITIALIZER;
> >>> @@ -9180,7 +9194,7 @@ add_route(struct hmap *lflows, struct ovn_datapath *od,
> >>>          const struct ovn_port *op, const char *lrp_addr_s,
> >>>          const char *network_s, int plen, const char *gateway,
> >>>          bool is_src_route, const struct ovsdb_idl_row *stage_hint,
> >>> -          bool is_discard_route)
> >>> +          bool is_discard_route, int ofs)
> >>> {
> >>>    bool is_ipv4 = strchr(network_s, '.') ? true : false;
> >>>    struct ds match = DS_EMPTY_INITIALIZER;
> >>> @@ -9196,7 +9210,7 @@ add_route(struct hmap *lflows, struct ovn_datapath *od,
> >>>        }
> >>>    }
> >>>    build_route_match(op_inport, network_s, plen, is_src_route, is_ipv4,
> >>> -                      &match, &priority);
> >>> +                      &match, &priority, ofs);
> >>>
> >>>    struct ds common_actions = DS_EMPTY_INITIALIZER;
> >>>    struct ds actions = DS_EMPTY_INITIALIZER;
> >>> @@ -9256,10 +9270,15 @@ build_static_route_flow(struct hmap *lflows, struct ovn_datapath *od,
> >>>        }
> >>>    }
> >>>
> >>> +    int ofs = !strcmp(smap_get_def(&route->options, "origin", ""),
> >>> +                      ROUTE_ORIGIN_CONNECTED) ? ROUTE_PRIO_OFFSET_CONNECTED
> >>> +                                              : ROUTE_PRIO_OFFSET_STATIC;
> >>> +
> >>>    char *prefix_s = build_route_prefix_s(&route_->prefix, route_->plen);
> >>>    add_route(lflows, route_->is_discard_route ? od : out_port->od, out_port,
> >>>              lrp_addr_s, prefix_s, route_->plen, route->nexthop,
> >>> -              route_->is_src_route, &route->header_, route_->is_discard_route);
> >>> +              route_->is_src_route, &route->header_, route_->is_discard_route,
> >>> +              ofs);
> >>>
> >>>    free(prefix_s);
> >>> }
> >>> @@ -10672,14 +10691,14 @@ build_ip_routing_flows_for_lrouter_port(
> >>>            add_route(lflows, op->od, op, op->lrp_networks.ipv4_addrs[i].addr_s,
> >>>                      op->lrp_networks.ipv4_addrs[i].network_s,
> >>>                      op->lrp_networks.ipv4_addrs[i].plen, NULL, false,
> >>> -                      &op->nbrp->header_, false);
> >>> +                      &op->nbrp->header_, false, ROUTE_PRIO_OFFSET_CONNECTED);
> >>>        }
> >>>
> >>>        for (int i = 0; i < op->lrp_networks.n_ipv6_addrs; i++) {
> >>>            add_route(lflows, op->od, op, op->lrp_networks.ipv6_addrs[i].addr_s,
> >>>                      op->lrp_networks.ipv6_addrs[i].network_s,
> >>>                      op->lrp_networks.ipv6_addrs[i].plen, NULL, false,
> >>> -                      &op->nbrp->header_, false);
> >>> +                      &op->nbrp->header_, false, ROUTE_PRIO_OFFSET_CONNECTED);
> >>>        }
> >>>    } else if (lsp_is_router(op->nbsp)) {
> >>>        struct ovn_port *peer = ovn_port_get_peer(ports, op);
> >>> @@ -10702,7 +10721,8 @@ build_ip_routing_flows_for_lrouter_port(
> >>>                              peer->lrp_networks.ipv4_addrs[0].addr_s,
> >>>                              laddrs->ipv4_addrs[k].network_s,
> >>>                              laddrs->ipv4_addrs[k].plen, NULL, false,
> >>> -                              &peer->nbrp->header_, false);
> >>> +                              &peer->nbrp->header_, false,
> >>> +                              ROUTE_PRIO_OFFSET_CONNECTED);
> >>>                }
> >>>            }
> >>>        }
> >>> @@ -10773,7 +10793,7 @@ build_mcast_lookup_flows_for_lrouter(
> >>>        /* Drop IPv6 multicast traffic that shouldn't be forwarded,
> >>>         * i.e., router solicitation and router advertisement.
> >>>         */
> >>> -        ovn_lflow_add(lflows, od, S_ROUTER_IN_IP_ROUTING, 550,
> >>> +        ovn_lflow_add(lflows, od, S_ROUTER_IN_IP_ROUTING, 10550,
> >>>                      "nd_rs || nd_ra", "drop;");
> >>>        if (!od->mcast_info.rtr.relay) {
> >>>            return;
> >>> @@ -10801,7 +10821,7 @@ build_mcast_lookup_flows_for_lrouter(
> >>>            }
> >>>            ds_put_format(actions, "outport = \"%s\"; ip.ttl--; next;",
> >>>                          igmp_group->mcgroup.name);
> >>> -            ovn_lflow_add(lflows, od, S_ROUTER_IN_IP_ROUTING, 500,
> >>> +            ovn_lflow_add(lflows, od, S_ROUTER_IN_IP_ROUTING, 10500,
> >>>                          ds_cstr(match), ds_cstr(actions));
> >>>        }
> >>>
> >>> @@ -10809,7 +10829,7 @@ build_mcast_lookup_flows_for_lrouter(
> >>>         * ports. Otherwise drop any multicast traffic.
> >>>         */
> >>>        if (od->mcast_info.rtr.flood_static) {
> >>> -            ovn_lflow_add(lflows, od, S_ROUTER_IN_IP_ROUTING, 450,
> >>> +            ovn_lflow_add(lflows, od, S_ROUTER_IN_IP_ROUTING, 10450,
> >>>                          "ip4.mcast || ip6.mcast",
> >>>                          "clone { "
> >>>                                "outport = \""MC_STATIC"\"; "
> >>> @@ -10817,7 +10837,7 @@ build_mcast_lookup_flows_for_lrouter(
> >>>                                "next; "
> >>>                          "};");
> >>>        } else {
> >>> -            ovn_lflow_add(lflows, od, S_ROUTER_IN_IP_ROUTING, 450,
> >>> +            ovn_lflow_add(lflows, od, S_ROUTER_IN_IP_ROUTING, 10450,
> >>>                          "ip4.mcast || ip6.mcast", "drop;");
> >>>        }
> >>>    }
> >>> diff --git a/northd/ovn-northd.8.xml b/northd/ovn-northd.8.xml
> >>> index fb67395e3..4f3a9d5e3 100644
> >>> --- a/northd/ovn-northd.8.xml
> >>> +++ b/northd/ovn-northd.8.xml
> >>> @@ -2945,12 +2945,12 @@ icmp6 {
> >>>
> >>>    <p>
> >>>      If ECMP routes with symmetric reply are configured in the
> >>> -      <code>OVN_Northbound</code> database for a gateway router, a priority-300
> >>> -      flow is added for each router port on which symmetric replies are
> >>> -      configured. The matching logic for these ports essentially reverses the
> >>> -      configured logic of the ECMP route. So for instance, a route with a
> >>> -      destination routing policy will instead match if the source IP address
> >>> -      matches the static route's prefix. The flow uses the action
> >>> +      <code>OVN_Northbound</code> database for a gateway router, a
> >>> +      priority-10300 flow is added for each router port on which symmetric
> >>> +      replies are configured. The matching logic for these ports essentially
> >>> +      reverses the configured logic of the ECMP route. So for instance, a route
> >>> +      with a destination routing policy will instead match if the source IP
> >>> +      address matches the static route's prefix. The flow uses the action
> >>>      <code>ct_next</code> to send IP packets to the connection tracker for
> >>>      packet de-fragmentation and tracking before sending it to the next table.
> >>>    </p>
> >>> diff --git a/tests/ovn-northd.at b/tests/ovn-northd.at
> >>> index 85b47a18f..3c1a97f73 100644
> >>> --- a/tests/ovn-northd.at
> >>> +++ b/tests/ovn-northd.at
> >>> @@ -5430,7 +5430,7 @@ check ovn-nbctl --wait=sb --ecmp-symmetric-reply lr-route-add lr0 1.0.0.1 192.16
> >>>
> >>> ovn-sbctl dump-flows lr0 > lr0flows
> >>> AT_CHECK([grep -e "lr_in_ip_routing.*select" lr0flows | sed 's/table=../table=??/' | sort], [0], [dnl
> >>> -  table=??(lr_in_ip_routing   ), priority=65   , match=(ip4.dst == 1.0.0.1/32), action=(ip.ttl--; flags.loopback = 1; reg8[[0..15]] = 1; reg8[[16..31]] = select(1, 2);)
> >>> +  table=??(lr_in_ip_routing   ), priority=97   , match=(ip4.dst == 1.0.0.1/32), action=(ip.ttl--; flags.loopback = 1; reg8[[0..15]] = 1; reg8[[16..31]] = select(1, 2);)
> >>> ])
> >>> AT_CHECK([grep -e "lr_in_ip_routing_ecmp" lr0flows | sed 's/192\.168\.0\..0/192.168.0.??/' | sed 's/table=../table=??/' | sort], [0], [dnl
> >>>  table=??(lr_in_ip_routing_ecmp), priority=100  , match=(reg8[[0..15]] == 1 && reg8[[16..31]] == 1), action=(reg0 = 192.168.0.??; reg1 = 192.168.0.1; eth.src = 00:00:20:20:12:13; outport = "lr0-public"; next;)
> >>> @@ -5443,7 +5443,7 @@ check ovn-nbctl --wait=sb --ecmp-symmetric-reply lr-route-add lr0 1.0.0.1 192.16
> >>>
> >>> ovn-sbctl dump-flows lr0 > lr0flows
> >>> AT_CHECK([grep -e "lr_in_ip_routing.*select" lr0flows | sed 's/table=../table=??/' | sort], [0], [dnl
> >>> -  table=??(lr_in_ip_routing   ), priority=65   , match=(ip4.dst == 1.0.0.1/32), action=(ip.ttl--; flags.loopback = 1; reg8[[0..15]] = 1; reg8[[16..31]] = select(1, 2);)
> >>> +  table=??(lr_in_ip_routing   ), priority=97   , match=(ip4.dst == 1.0.0.1/32), action=(ip.ttl--; flags.loopback = 1; reg8[[0..15]] = 1; reg8[[16..31]] = select(1, 2);)
> >>> ])
> >>> AT_CHECK([grep -e "lr_in_ip_routing_ecmp" lr0flows | sed 's/192\.168\.0\..0/192.168.0.??/' | sed 's/table=../table=??/' | sort], [0], [dnl
> >>>  table=??(lr_in_ip_routing_ecmp), priority=100  , match=(reg8[[0..15]] == 1 && reg8[[16..31]] == 1), action=(reg0 = 192.168.0.??; reg1 = 192.168.0.1; eth.src = 00:00:20:20:12:13; outport = "lr0-public"; next;)
> >>> @@ -5458,14 +5458,14 @@ check ovn-nbctl --wait=sb lr-route-add lr0 1.0.0.0/24 192.168.0.10
> >>> ovn-sbctl dump-flows lr0 > lr0flows
> >>>
> >>> AT_CHECK([grep -e "lr_in_ip_routing.*192.168.0.10" lr0flows | sed 's/table=../table=??/' | sort], [0], [dnl
> >>> -  table=??(lr_in_ip_routing   ), priority=49   , match=(ip4.dst == 1.0.0.0/24), action=(ip.ttl--; reg8[[0..15]] = 0; reg0 = 192.168.0.10; reg1 = 192.168.0.1; eth.src = 00:00:20:20:12:13; outport = "lr0-public"; flags.loopback = 1; next;)
> >>> +  table=??(lr_in_ip_routing   ), priority=73   , match=(ip4.dst == 1.0.0.0/24), action=(ip.ttl--; reg8[[0..15]] = 0; reg0 = 192.168.0.10; reg1 = 192.168.0.1; eth.src = 00:00:20:20:12:13; outport = "lr0-public"; flags.loopback = 1; next;)
> >>> ])
> >>>
> >>> check ovn-nbctl --wait=sb lr-route-add lr0 2.0.0.0/24 lr0-public
> >>>
> >>> ovn-sbctl dump-flows lr0 > lr0flows
> >>> AT_CHECK([grep -e "lr_in_ip_routing.*2.0.0.0" lr0flows | sed 's/table=../table=??/' | sort], [0], [dnl
> >>> -  table=??(lr_in_ip_routing   ), priority=49   , match=(ip4.dst == 2.0.0.0/24), action=(ip.ttl--; reg8[[0..15]] = 0; reg0 = ip4.dst; reg1 = 192.168.0.1; eth.src = 00:00:20:20:12:13; outport = "lr0-public"; flags.loopback = 1; next;)
> >>> +  table=??(lr_in_ip_routing   ), priority=73   , match=(ip4.dst == 2.0.0.0/24), action=(ip.ttl--; reg8[[0..15]] = 0; reg0 = ip4.dst; reg1 = 192.168.0.1; eth.src = 00:00:20:20:12:13; outport = "lr0-public"; flags.loopback = 1; next;)
> >>> ])
> >>>
> >>> AT_CLEANUP
> >>> --
> >>> 2.30.0
> >>>
> >>> _______________________________________________
> >>> dev mailing list
> >>> dev@openvswitch.org <mailto:dev@openvswitch.org>
> >>> https://mail.openvswitch.org/mailman/listinfo/ovs-dev <https://mail.openvswitch.org/mailman/listinfo/ovs-dev>
> > _______________________________________________
> > dev mailing list
> > dev@openvswitch.org
> > https://mail.openvswitch.org/mailman/listinfo/ovs-dev
>
> _______________________________________________
> dev mailing list
> dev@openvswitch.org
> https://mail.openvswitch.org/mailman/listinfo/ovs-dev
Vladislav Odintsov Nov. 17, 2021, 8:38 p.m. UTC | #5
I’ve submitted a patch [1] with my findings.
Also, if no comments for other my patches from this patch series, I can submit a new version.
Should I?

1: https://patchwork.ozlabs.org/project/ovn/patch/20211117203545.46142-1-odivlad@gmail.com/

Regards,
Vladislav Odintsov

> On 17 Nov 2021, at 20:57, Numan Siddique <numans@ovn.org> wrote:
> 
> On Wed, Nov 17, 2021 at 9:24 AM Vladislav Odintsov <odivlad@gmail.com <mailto:odivlad@gmail.com>> wrote:
>> 
>> Two additions:
>> 
>> 1. Regarding documentation for flow in lr_in_defrag section:
>> 
>> It seems to me that documentation for it is written in a wrong section (lr_in_defrag).
>> Since the flow is installed in lr_in_ip_routing, it should be documented there.
>> I’ll move it if you don’t mind.
> 
> Sure.  Thanks.  I'd suggest having a separate patch for fixing the
> documentation.
>> 
>> 
>> 2. Documentation for other flow changes was added, but I’ve committed it to a wrong patch (#3).
>> I’ll move documentation update between patches.
> 
> Ack.
> 
> Thanks
> Numan
> 
>> 
>> Regards,
>> Vladislav Odintsov
>> 
>>> On 17 Nov 2021, at 13:51, Vladislav Odintsov <odivlad@gmail.com> wrote:
>>> 
>>> Hi Numan,
>>> 
>>> Thanks for the review.
>>> Sure I will fix this. Should I wait for more comments or that’s all and I can send v9?
>>> 
>>> Regards,
>>> Vladislav Odintsov
>>> 
>>>> On 17 Nov 2021, at 05:17, Numan Siddique <numans@ovn.org> wrote:
>>>> 
>>>> On Sat, Nov 13, 2021 at 4:44 AM Vladislav Odintsov <odivlad@gmail.com <mailto:odivlad@gmail.com>> wrote:
>>>>> 
>>>>> With this patch routes to connected networks have higher
>>>>> priority than static routes with same ip_prefix.
>>>>> 
>>>>> This brings commonly-used behaviour for routes lookup order:
>>>>> 1: longest prefix match
>>>>> 2: metric
>>>>> 
>>>>> The metric has next lookup order:
>>>>> 1: connected routes
>>>>> 2: static routes
>>>>> 
>>>>> Earlier static and connected routes with same ip_prefix had
>>>>> the same priority, so it was impossible to predict which one
>>>>> is used for routing decision.
>>>>> 
>>>>> Each route's prefix length has its own 'slot' in lflow prios.
>>>>> Now prefix length space is calculated using next information:
>>>>> to calculate route's priority prefixlen multiplied by 3
>>>>> + route origin offset (0 - source-based route; 1 - directly-
>>>>> connected route; 2 - static route).
>>>>> 
>>>>> Also, enlarge prio for generic records in lr_in_ip_routing stage
>>>>> by 10000.
>>>>> 
>>>>> Signed-off-by: Vladislav Odintsov <odivlad@gmail.com>
>>>> 
>>>> Hi Vladislav,
>>>> 
>>>> Thanks for the patch.  Overall it looks good to me.  I've one comment.
>>>> Looks like the documentation updated in ovn-northd.8.xml is not accurate.
>>>> 
>>>> This patch modifies the flows in the lr_in_ip_routing stage but this
>>>> patch doesn't update the documentation.
>>>> Also the patch updates the documentation for the flow in lr_in_defrag
>>>> stage, which seems not correct.
>>>> 
>>>> Can you please update the documentation accurately ?
>>>> 
>>>> Numan
>>>> 
>>>>> ---
>>>>> northd/northd.c         | 50 ++++++++++++++++++++++++++++-------------
>>>>> northd/ovn-northd.8.xml | 12 +++++-----
>>>>> tests/ovn-northd.at     |  8 +++----
>>>>> 3 files changed, 45 insertions(+), 25 deletions(-)
>>>>> 
>>>>> diff --git a/northd/northd.c b/northd/northd.c
>>>>> index 1e8a3457c..0d513f039 100644
>>>>> --- a/northd/northd.c
>>>>> +++ b/northd/northd.c
>>>>> @@ -305,6 +305,15 @@ enum ovn_stage {
>>>>> *
>>>>> */
>>>>> 
>>>>> +/*
>>>>> + * Route offsets implement logic to prioritize traffic for routes with
>>>>> + * same ip_prefix values:
>>>>> + *  -  connected route overrides static one;
>>>>> + *  -  static route overrides connected route. */
>>>>> +#define ROUTE_PRIO_OFFSET_MULTIPLIER 3
>>>>> +#define ROUTE_PRIO_OFFSET_STATIC 1
>>>>> +#define ROUTE_PRIO_OFFSET_CONNECTED 2
>>>>> +
>>>>> /* Returns an "enum ovn_stage" built from the arguments. */
>>>>> static enum ovn_stage
>>>>> ovn_stage_build(enum ovn_datapath_type dp_type, enum ovn_pipeline pipeline,
>>>>> @@ -8782,6 +8791,7 @@ struct ecmp_groups_node {
>>>>>   struct in6_addr prefix;
>>>>>   unsigned int plen;
>>>>>   bool is_src_route;
>>>>> +    const char *origin;
>>>>>   uint16_t route_count;
>>>>>   struct ovs_list route_list; /* Contains ecmp_route_list_node */
>>>>> };
>>>>> @@ -8819,6 +8829,7 @@ ecmp_groups_add(struct hmap *ecmp_groups,
>>>>>   eg->prefix = route->prefix;
>>>>>   eg->plen = route->plen;
>>>>>   eg->is_src_route = route->is_src_route;
>>>>> +    eg->origin = smap_get_def(&route->route->options, "origin", "");
>>>>>   ovs_list_init(&eg->route_list);
>>>>>   ecmp_groups_add_route(eg, route);
>>>>> 
>>>>> @@ -8919,19 +8930,20 @@ build_route_prefix_s(const struct in6_addr *prefix, unsigned int plen)
>>>>> static void
>>>>> build_route_match(const struct ovn_port *op_inport, const char *network_s,
>>>>>                 int plen, bool is_src_route, bool is_ipv4, struct ds *match,
>>>>> -                  uint16_t *priority)
>>>>> +                  uint16_t *priority, int ofs)
>>>>> {
>>>>>   const char *dir;
>>>>>   /* The priority here is calculated to implement longest-prefix-match
>>>>>    * routing. */
>>>>>   if (is_src_route) {
>>>>>       dir = "src";
>>>>> -        *priority = plen * 2;
>>>>> +        ofs = 0;
>>>>>   } else {
>>>>>       dir = "dst";
>>>>> -        *priority = (plen * 2) + 1;
>>>>>   }
>>>>> 
>>>>> +    *priority = (plen * ROUTE_PRIO_OFFSET_MULTIPLIER) + ofs;
>>>>> +
>>>>>   if (op_inport) {
>>>>>       ds_put_format(match, "inport == %s && ", op_inport->json_key);
>>>>>   }
>>>>> @@ -9073,7 +9085,7 @@ add_ecmp_symmetric_reply_flows(struct hmap *lflows,
>>>>>                 out_port->lrp_networks.ea_s,
>>>>>                 IN6_IS_ADDR_V4MAPPED(&route->prefix) ? "" : "xx",
>>>>>                 port_ip, out_port->json_key);
>>>>> -    ovn_lflow_add_with_hint(lflows, od, S_ROUTER_IN_IP_ROUTING, 300,
>>>>> +    ovn_lflow_add_with_hint(lflows, od, S_ROUTER_IN_IP_ROUTING, 10300,
>>>>>                          ds_cstr(&match), ds_cstr(&actions),
>>>>>                          &st_route->header_);
>>>>> 
>>>>> @@ -9103,8 +9115,10 @@ build_ecmp_route_flow(struct hmap *lflows, struct ovn_datapath *od,
>>>>>   struct ds route_match = DS_EMPTY_INITIALIZER;
>>>>> 
>>>>>   char *prefix_s = build_route_prefix_s(&eg->prefix, eg->plen);
>>>>> +    int ofs = !strcmp(eg->origin, ROUTE_ORIGIN_CONNECTED) ?
>>>>> +        ROUTE_PRIO_OFFSET_CONNECTED: ROUTE_PRIO_OFFSET_STATIC;
>>>>>   build_route_match(NULL, prefix_s, eg->plen, eg->is_src_route, is_ipv4,
>>>>> -                      &route_match, &priority);
>>>>> +                      &route_match, &priority, ofs);
>>>>>   free(prefix_s);
>>>>> 
>>>>>   struct ds actions = DS_EMPTY_INITIALIZER;
>>>>> @@ -9180,7 +9194,7 @@ add_route(struct hmap *lflows, struct ovn_datapath *od,
>>>>>         const struct ovn_port *op, const char *lrp_addr_s,
>>>>>         const char *network_s, int plen, const char *gateway,
>>>>>         bool is_src_route, const struct ovsdb_idl_row *stage_hint,
>>>>> -          bool is_discard_route)
>>>>> +          bool is_discard_route, int ofs)
>>>>> {
>>>>>   bool is_ipv4 = strchr(network_s, '.') ? true : false;
>>>>>   struct ds match = DS_EMPTY_INITIALIZER;
>>>>> @@ -9196,7 +9210,7 @@ add_route(struct hmap *lflows, struct ovn_datapath *od,
>>>>>       }
>>>>>   }
>>>>>   build_route_match(op_inport, network_s, plen, is_src_route, is_ipv4,
>>>>> -                      &match, &priority);
>>>>> +                      &match, &priority, ofs);
>>>>> 
>>>>>   struct ds common_actions = DS_EMPTY_INITIALIZER;
>>>>>   struct ds actions = DS_EMPTY_INITIALIZER;
>>>>> @@ -9256,10 +9270,15 @@ build_static_route_flow(struct hmap *lflows, struct ovn_datapath *od,
>>>>>       }
>>>>>   }
>>>>> 
>>>>> +    int ofs = !strcmp(smap_get_def(&route->options, "origin", ""),
>>>>> +                      ROUTE_ORIGIN_CONNECTED) ? ROUTE_PRIO_OFFSET_CONNECTED
>>>>> +                                              : ROUTE_PRIO_OFFSET_STATIC;
>>>>> +
>>>>>   char *prefix_s = build_route_prefix_s(&route_->prefix, route_->plen);
>>>>>   add_route(lflows, route_->is_discard_route ? od : out_port->od, out_port,
>>>>>             lrp_addr_s, prefix_s, route_->plen, route->nexthop,
>>>>> -              route_->is_src_route, &route->header_, route_->is_discard_route);
>>>>> +              route_->is_src_route, &route->header_, route_->is_discard_route,
>>>>> +              ofs);
>>>>> 
>>>>>   free(prefix_s);
>>>>> }
>>>>> @@ -10672,14 +10691,14 @@ build_ip_routing_flows_for_lrouter_port(
>>>>>           add_route(lflows, op->od, op, op->lrp_networks.ipv4_addrs[i].addr_s,
>>>>>                     op->lrp_networks.ipv4_addrs[i].network_s,
>>>>>                     op->lrp_networks.ipv4_addrs[i].plen, NULL, false,
>>>>> -                      &op->nbrp->header_, false);
>>>>> +                      &op->nbrp->header_, false, ROUTE_PRIO_OFFSET_CONNECTED);
>>>>>       }
>>>>> 
>>>>>       for (int i = 0; i < op->lrp_networks.n_ipv6_addrs; i++) {
>>>>>           add_route(lflows, op->od, op, op->lrp_networks.ipv6_addrs[i].addr_s,
>>>>>                     op->lrp_networks.ipv6_addrs[i].network_s,
>>>>>                     op->lrp_networks.ipv6_addrs[i].plen, NULL, false,
>>>>> -                      &op->nbrp->header_, false);
>>>>> +                      &op->nbrp->header_, false, ROUTE_PRIO_OFFSET_CONNECTED);
>>>>>       }
>>>>>   } else if (lsp_is_router(op->nbsp)) {
>>>>>       struct ovn_port *peer = ovn_port_get_peer(ports, op);
>>>>> @@ -10702,7 +10721,8 @@ build_ip_routing_flows_for_lrouter_port(
>>>>>                             peer->lrp_networks.ipv4_addrs[0].addr_s,
>>>>>                             laddrs->ipv4_addrs[k].network_s,
>>>>>                             laddrs->ipv4_addrs[k].plen, NULL, false,
>>>>> -                              &peer->nbrp->header_, false);
>>>>> +                              &peer->nbrp->header_, false,
>>>>> +                              ROUTE_PRIO_OFFSET_CONNECTED);
>>>>>               }
>>>>>           }
>>>>>       }
>>>>> @@ -10773,7 +10793,7 @@ build_mcast_lookup_flows_for_lrouter(
>>>>>       /* Drop IPv6 multicast traffic that shouldn't be forwarded,
>>>>>        * i.e., router solicitation and router advertisement.
>>>>>        */
>>>>> -        ovn_lflow_add(lflows, od, S_ROUTER_IN_IP_ROUTING, 550,
>>>>> +        ovn_lflow_add(lflows, od, S_ROUTER_IN_IP_ROUTING, 10550,
>>>>>                     "nd_rs || nd_ra", "drop;");
>>>>>       if (!od->mcast_info.rtr.relay) {
>>>>>           return;
>>>>> @@ -10801,7 +10821,7 @@ build_mcast_lookup_flows_for_lrouter(
>>>>>           }
>>>>>           ds_put_format(actions, "outport = \"%s\"; ip.ttl--; next;",
>>>>>                         igmp_group->mcgroup.name);
>>>>> -            ovn_lflow_add(lflows, od, S_ROUTER_IN_IP_ROUTING, 500,
>>>>> +            ovn_lflow_add(lflows, od, S_ROUTER_IN_IP_ROUTING, 10500,
>>>>>                         ds_cstr(match), ds_cstr(actions));
>>>>>       }
>>>>> 
>>>>> @@ -10809,7 +10829,7 @@ build_mcast_lookup_flows_for_lrouter(
>>>>>        * ports. Otherwise drop any multicast traffic.
>>>>>        */
>>>>>       if (od->mcast_info.rtr.flood_static) {
>>>>> -            ovn_lflow_add(lflows, od, S_ROUTER_IN_IP_ROUTING, 450,
>>>>> +            ovn_lflow_add(lflows, od, S_ROUTER_IN_IP_ROUTING, 10450,
>>>>>                         "ip4.mcast || ip6.mcast",
>>>>>                         "clone { "
>>>>>                               "outport = \""MC_STATIC"\"; "
>>>>> @@ -10817,7 +10837,7 @@ build_mcast_lookup_flows_for_lrouter(
>>>>>                               "next; "
>>>>>                         "};");
>>>>>       } else {
>>>>> -            ovn_lflow_add(lflows, od, S_ROUTER_IN_IP_ROUTING, 450,
>>>>> +            ovn_lflow_add(lflows, od, S_ROUTER_IN_IP_ROUTING, 10450,
>>>>>                         "ip4.mcast || ip6.mcast", "drop;");
>>>>>       }
>>>>>   }
>>>>> diff --git a/northd/ovn-northd.8.xml b/northd/ovn-northd.8.xml
>>>>> index fb67395e3..4f3a9d5e3 100644
>>>>> --- a/northd/ovn-northd.8.xml
>>>>> +++ b/northd/ovn-northd.8.xml
>>>>> @@ -2945,12 +2945,12 @@ icmp6 {
>>>>> 
>>>>>   <p>
>>>>>     If ECMP routes with symmetric reply are configured in the
>>>>> -      <code>OVN_Northbound</code> database for a gateway router, a priority-300
>>>>> -      flow is added for each router port on which symmetric replies are
>>>>> -      configured. The matching logic for these ports essentially reverses the
>>>>> -      configured logic of the ECMP route. So for instance, a route with a
>>>>> -      destination routing policy will instead match if the source IP address
>>>>> -      matches the static route's prefix. The flow uses the action
>>>>> +      <code>OVN_Northbound</code> database for a gateway router, a
>>>>> +      priority-10300 flow is added for each router port on which symmetric
>>>>> +      replies are configured. The matching logic for these ports essentially
>>>>> +      reverses the configured logic of the ECMP route. So for instance, a route
>>>>> +      with a destination routing policy will instead match if the source IP
>>>>> +      address matches the static route's prefix. The flow uses the action
>>>>>     <code>ct_next</code> to send IP packets to the connection tracker for
>>>>>     packet de-fragmentation and tracking before sending it to the next table.
>>>>>   </p>
>>>>> diff --git a/tests/ovn-northd.at b/tests/ovn-northd.at
>>>>> index 85b47a18f..3c1a97f73 100644
>>>>> --- a/tests/ovn-northd.at
>>>>> +++ b/tests/ovn-northd.at
>>>>> @@ -5430,7 +5430,7 @@ check ovn-nbctl --wait=sb --ecmp-symmetric-reply lr-route-add lr0 1.0.0.1 192.16
>>>>> 
>>>>> ovn-sbctl dump-flows lr0 > lr0flows
>>>>> AT_CHECK([grep -e "lr_in_ip_routing.*select" lr0flows | sed 's/table=../table=??/' | sort], [0], [dnl
>>>>> -  table=??(lr_in_ip_routing   ), priority=65   , match=(ip4.dst == 1.0.0.1/32), action=(ip.ttl--; flags.loopback = 1; reg8[[0..15]] = 1; reg8[[16..31]] = select(1, 2);)
>>>>> +  table=??(lr_in_ip_routing   ), priority=97   , match=(ip4.dst == 1.0.0.1/32), action=(ip.ttl--; flags.loopback = 1; reg8[[0..15]] = 1; reg8[[16..31]] = select(1, 2);)
>>>>> ])
>>>>> AT_CHECK([grep -e "lr_in_ip_routing_ecmp" lr0flows | sed 's/192\.168\.0\..0/192.168.0.??/' | sed 's/table=../table=??/' | sort], [0], [dnl
>>>>> table=??(lr_in_ip_routing_ecmp), priority=100  , match=(reg8[[0..15]] == 1 && reg8[[16..31]] == 1), action=(reg0 = 192.168.0.??; reg1 = 192.168.0.1; eth.src = 00:00:20:20:12:13; outport = "lr0-public"; next;)
>>>>> @@ -5443,7 +5443,7 @@ check ovn-nbctl --wait=sb --ecmp-symmetric-reply lr-route-add lr0 1.0.0.1 192.16
>>>>> 
>>>>> ovn-sbctl dump-flows lr0 > lr0flows
>>>>> AT_CHECK([grep -e "lr_in_ip_routing.*select" lr0flows | sed 's/table=../table=??/' | sort], [0], [dnl
>>>>> -  table=??(lr_in_ip_routing   ), priority=65   , match=(ip4.dst == 1.0.0.1/32), action=(ip.ttl--; flags.loopback = 1; reg8[[0..15]] = 1; reg8[[16..31]] = select(1, 2);)
>>>>> +  table=??(lr_in_ip_routing   ), priority=97   , match=(ip4.dst == 1.0.0.1/32), action=(ip.ttl--; flags.loopback = 1; reg8[[0..15]] = 1; reg8[[16..31]] = select(1, 2);)
>>>>> ])
>>>>> AT_CHECK([grep -e "lr_in_ip_routing_ecmp" lr0flows | sed 's/192\.168\.0\..0/192.168.0.??/' | sed 's/table=../table=??/' | sort], [0], [dnl
>>>>> table=??(lr_in_ip_routing_ecmp), priority=100  , match=(reg8[[0..15]] == 1 && reg8[[16..31]] == 1), action=(reg0 = 192.168.0.??; reg1 = 192.168.0.1; eth.src = 00:00:20:20:12:13; outport = "lr0-public"; next;)
>>>>> @@ -5458,14 +5458,14 @@ check ovn-nbctl --wait=sb lr-route-add lr0 1.0.0.0/24 192.168.0.10
>>>>> ovn-sbctl dump-flows lr0 > lr0flows
>>>>> 
>>>>> AT_CHECK([grep -e "lr_in_ip_routing.*192.168.0.10" lr0flows | sed 's/table=../table=??/' | sort], [0], [dnl
>>>>> -  table=??(lr_in_ip_routing   ), priority=49   , match=(ip4.dst == 1.0.0.0/24), action=(ip.ttl--; reg8[[0..15]] = 0; reg0 = 192.168.0.10; reg1 = 192.168.0.1; eth.src = 00:00:20:20:12:13; outport = "lr0-public"; flags.loopback = 1; next;)
>>>>> +  table=??(lr_in_ip_routing   ), priority=73   , match=(ip4.dst == 1.0.0.0/24), action=(ip.ttl--; reg8[[0..15]] = 0; reg0 = 192.168.0.10; reg1 = 192.168.0.1; eth.src = 00:00:20:20:12:13; outport = "lr0-public"; flags.loopback = 1; next;)
>>>>> ])
>>>>> 
>>>>> check ovn-nbctl --wait=sb lr-route-add lr0 2.0.0.0/24 lr0-public
>>>>> 
>>>>> ovn-sbctl dump-flows lr0 > lr0flows
>>>>> AT_CHECK([grep -e "lr_in_ip_routing.*2.0.0.0" lr0flows | sed 's/table=../table=??/' | sort], [0], [dnl
>>>>> -  table=??(lr_in_ip_routing   ), priority=49   , match=(ip4.dst == 2.0.0.0/24), action=(ip.ttl--; reg8[[0..15]] = 0; reg0 = ip4.dst; reg1 = 192.168.0.1; eth.src = 00:00:20:20:12:13; outport = "lr0-public"; flags.loopback = 1; next;)
>>>>> +  table=??(lr_in_ip_routing   ), priority=73   , match=(ip4.dst == 2.0.0.0/24), action=(ip.ttl--; reg8[[0..15]] = 0; reg0 = ip4.dst; reg1 = 192.168.0.1; eth.src = 00:00:20:20:12:13; outport = "lr0-public"; flags.loopback = 1; next;)
>>>>> ])
>>>>> 
>>>>> AT_CLEANUP
>>>>> --
>>>>> 2.30.0
>>>>> 
>>>>> _______________________________________________
>>>>> dev mailing list
>>>>> dev@openvswitch.org <mailto:dev@openvswitch.org>
>>>>> https://mail.openvswitch.org/mailman/listinfo/ovs-dev <https://mail.openvswitch.org/mailman/listinfo/ovs-dev>
>>> _______________________________________________
>>> dev mailing list
>>> dev@openvswitch.org
>>> https://mail.openvswitch.org/mailman/listinfo/ovs-dev
>> 
>> _______________________________________________
>> dev mailing list
>> dev@openvswitch.org <mailto:dev@openvswitch.org>
>> https://mail.openvswitch.org/mailman/listinfo/ovs-dev <https://mail.openvswitch.org/mailman/listinfo/ovs-dev>
> _______________________________________________
> dev mailing list
> dev@openvswitch.org <mailto:dev@openvswitch.org>
> https://mail.openvswitch.org/mailman/listinfo/ovs-dev <https://mail.openvswitch.org/mailman/listinfo/ovs-dev>
Numan Siddique Nov. 17, 2021, 9:05 p.m. UTC | #6
On Wed, Nov 17, 2021 at 3:38 PM Vladislav Odintsov <odivlad@gmail.com> wrote:
>
> I’ve submitted a patch [1] with my findings.
> Also, if no comments for other my patches from this patch series, I can submit a new version.
> Should I?

No comments from my side.  Perhaps you can submit another version.

Numan

>
> 1: https://patchwork.ozlabs.org/project/ovn/patch/20211117203545.46142-1-odivlad@gmail.com/
>
> Regards,
> Vladislav Odintsov
>
> > On 17 Nov 2021, at 20:57, Numan Siddique <numans@ovn.org> wrote:
> >
> > On Wed, Nov 17, 2021 at 9:24 AM Vladislav Odintsov <odivlad@gmail.com <mailto:odivlad@gmail.com>> wrote:
> >>
> >> Two additions:
> >>
> >> 1. Regarding documentation for flow in lr_in_defrag section:
> >>
> >> It seems to me that documentation for it is written in a wrong section (lr_in_defrag).
> >> Since the flow is installed in lr_in_ip_routing, it should be documented there.
> >> I’ll move it if you don’t mind.
> >
> > Sure.  Thanks.  I'd suggest having a separate patch for fixing the
> > documentation.
> >>
> >>
> >> 2. Documentation for other flow changes was added, but I’ve committed it to a wrong patch (#3).
> >> I’ll move documentation update between patches.
> >
> > Ack.
> >
> > Thanks
> > Numan
> >
> >>
> >> Regards,
> >> Vladislav Odintsov
> >>
> >>> On 17 Nov 2021, at 13:51, Vladislav Odintsov <odivlad@gmail.com> wrote:
> >>>
> >>> Hi Numan,
> >>>
> >>> Thanks for the review.
> >>> Sure I will fix this. Should I wait for more comments or that’s all and I can send v9?
> >>>
> >>> Regards,
> >>> Vladislav Odintsov
> >>>
> >>>> On 17 Nov 2021, at 05:17, Numan Siddique <numans@ovn.org> wrote:
> >>>>
> >>>> On Sat, Nov 13, 2021 at 4:44 AM Vladislav Odintsov <odivlad@gmail.com <mailto:odivlad@gmail.com>> wrote:
> >>>>>
> >>>>> With this patch routes to connected networks have higher
> >>>>> priority than static routes with same ip_prefix.
> >>>>>
> >>>>> This brings commonly-used behaviour for routes lookup order:
> >>>>> 1: longest prefix match
> >>>>> 2: metric
> >>>>>
> >>>>> The metric has next lookup order:
> >>>>> 1: connected routes
> >>>>> 2: static routes
> >>>>>
> >>>>> Earlier static and connected routes with same ip_prefix had
> >>>>> the same priority, so it was impossible to predict which one
> >>>>> is used for routing decision.
> >>>>>
> >>>>> Each route's prefix length has its own 'slot' in lflow prios.
> >>>>> Now prefix length space is calculated using next information:
> >>>>> to calculate route's priority prefixlen multiplied by 3
> >>>>> + route origin offset (0 - source-based route; 1 - directly-
> >>>>> connected route; 2 - static route).
> >>>>>
> >>>>> Also, enlarge prio for generic records in lr_in_ip_routing stage
> >>>>> by 10000.
> >>>>>
> >>>>> Signed-off-by: Vladislav Odintsov <odivlad@gmail.com>
> >>>>
> >>>> Hi Vladislav,
> >>>>
> >>>> Thanks for the patch.  Overall it looks good to me.  I've one comment.
> >>>> Looks like the documentation updated in ovn-northd.8.xml is not accurate.
> >>>>
> >>>> This patch modifies the flows in the lr_in_ip_routing stage but this
> >>>> patch doesn't update the documentation.
> >>>> Also the patch updates the documentation for the flow in lr_in_defrag
> >>>> stage, which seems not correct.
> >>>>
> >>>> Can you please update the documentation accurately ?
> >>>>
> >>>> Numan
> >>>>
> >>>>> ---
> >>>>> northd/northd.c         | 50 ++++++++++++++++++++++++++++-------------
> >>>>> northd/ovn-northd.8.xml | 12 +++++-----
> >>>>> tests/ovn-northd.at     |  8 +++----
> >>>>> 3 files changed, 45 insertions(+), 25 deletions(-)
> >>>>>
> >>>>> diff --git a/northd/northd.c b/northd/northd.c
> >>>>> index 1e8a3457c..0d513f039 100644
> >>>>> --- a/northd/northd.c
> >>>>> +++ b/northd/northd.c
> >>>>> @@ -305,6 +305,15 @@ enum ovn_stage {
> >>>>> *
> >>>>> */
> >>>>>
> >>>>> +/*
> >>>>> + * Route offsets implement logic to prioritize traffic for routes with
> >>>>> + * same ip_prefix values:
> >>>>> + *  -  connected route overrides static one;
> >>>>> + *  -  static route overrides connected route. */
> >>>>> +#define ROUTE_PRIO_OFFSET_MULTIPLIER 3
> >>>>> +#define ROUTE_PRIO_OFFSET_STATIC 1
> >>>>> +#define ROUTE_PRIO_OFFSET_CONNECTED 2
> >>>>> +
> >>>>> /* Returns an "enum ovn_stage" built from the arguments. */
> >>>>> static enum ovn_stage
> >>>>> ovn_stage_build(enum ovn_datapath_type dp_type, enum ovn_pipeline pipeline,
> >>>>> @@ -8782,6 +8791,7 @@ struct ecmp_groups_node {
> >>>>>   struct in6_addr prefix;
> >>>>>   unsigned int plen;
> >>>>>   bool is_src_route;
> >>>>> +    const char *origin;
> >>>>>   uint16_t route_count;
> >>>>>   struct ovs_list route_list; /* Contains ecmp_route_list_node */
> >>>>> };
> >>>>> @@ -8819,6 +8829,7 @@ ecmp_groups_add(struct hmap *ecmp_groups,
> >>>>>   eg->prefix = route->prefix;
> >>>>>   eg->plen = route->plen;
> >>>>>   eg->is_src_route = route->is_src_route;
> >>>>> +    eg->origin = smap_get_def(&route->route->options, "origin", "");
> >>>>>   ovs_list_init(&eg->route_list);
> >>>>>   ecmp_groups_add_route(eg, route);
> >>>>>
> >>>>> @@ -8919,19 +8930,20 @@ build_route_prefix_s(const struct in6_addr *prefix, unsigned int plen)
> >>>>> static void
> >>>>> build_route_match(const struct ovn_port *op_inport, const char *network_s,
> >>>>>                 int plen, bool is_src_route, bool is_ipv4, struct ds *match,
> >>>>> -                  uint16_t *priority)
> >>>>> +                  uint16_t *priority, int ofs)
> >>>>> {
> >>>>>   const char *dir;
> >>>>>   /* The priority here is calculated to implement longest-prefix-match
> >>>>>    * routing. */
> >>>>>   if (is_src_route) {
> >>>>>       dir = "src";
> >>>>> -        *priority = plen * 2;
> >>>>> +        ofs = 0;
> >>>>>   } else {
> >>>>>       dir = "dst";
> >>>>> -        *priority = (plen * 2) + 1;
> >>>>>   }
> >>>>>
> >>>>> +    *priority = (plen * ROUTE_PRIO_OFFSET_MULTIPLIER) + ofs;
> >>>>> +
> >>>>>   if (op_inport) {
> >>>>>       ds_put_format(match, "inport == %s && ", op_inport->json_key);
> >>>>>   }
> >>>>> @@ -9073,7 +9085,7 @@ add_ecmp_symmetric_reply_flows(struct hmap *lflows,
> >>>>>                 out_port->lrp_networks.ea_s,
> >>>>>                 IN6_IS_ADDR_V4MAPPED(&route->prefix) ? "" : "xx",
> >>>>>                 port_ip, out_port->json_key);
> >>>>> -    ovn_lflow_add_with_hint(lflows, od, S_ROUTER_IN_IP_ROUTING, 300,
> >>>>> +    ovn_lflow_add_with_hint(lflows, od, S_ROUTER_IN_IP_ROUTING, 10300,
> >>>>>                          ds_cstr(&match), ds_cstr(&actions),
> >>>>>                          &st_route->header_);
> >>>>>
> >>>>> @@ -9103,8 +9115,10 @@ build_ecmp_route_flow(struct hmap *lflows, struct ovn_datapath *od,
> >>>>>   struct ds route_match = DS_EMPTY_INITIALIZER;
> >>>>>
> >>>>>   char *prefix_s = build_route_prefix_s(&eg->prefix, eg->plen);
> >>>>> +    int ofs = !strcmp(eg->origin, ROUTE_ORIGIN_CONNECTED) ?
> >>>>> +        ROUTE_PRIO_OFFSET_CONNECTED: ROUTE_PRIO_OFFSET_STATIC;
> >>>>>   build_route_match(NULL, prefix_s, eg->plen, eg->is_src_route, is_ipv4,
> >>>>> -                      &route_match, &priority);
> >>>>> +                      &route_match, &priority, ofs);
> >>>>>   free(prefix_s);
> >>>>>
> >>>>>   struct ds actions = DS_EMPTY_INITIALIZER;
> >>>>> @@ -9180,7 +9194,7 @@ add_route(struct hmap *lflows, struct ovn_datapath *od,
> >>>>>         const struct ovn_port *op, const char *lrp_addr_s,
> >>>>>         const char *network_s, int plen, const char *gateway,
> >>>>>         bool is_src_route, const struct ovsdb_idl_row *stage_hint,
> >>>>> -          bool is_discard_route)
> >>>>> +          bool is_discard_route, int ofs)
> >>>>> {
> >>>>>   bool is_ipv4 = strchr(network_s, '.') ? true : false;
> >>>>>   struct ds match = DS_EMPTY_INITIALIZER;
> >>>>> @@ -9196,7 +9210,7 @@ add_route(struct hmap *lflows, struct ovn_datapath *od,
> >>>>>       }
> >>>>>   }
> >>>>>   build_route_match(op_inport, network_s, plen, is_src_route, is_ipv4,
> >>>>> -                      &match, &priority);
> >>>>> +                      &match, &priority, ofs);
> >>>>>
> >>>>>   struct ds common_actions = DS_EMPTY_INITIALIZER;
> >>>>>   struct ds actions = DS_EMPTY_INITIALIZER;
> >>>>> @@ -9256,10 +9270,15 @@ build_static_route_flow(struct hmap *lflows, struct ovn_datapath *od,
> >>>>>       }
> >>>>>   }
> >>>>>
> >>>>> +    int ofs = !strcmp(smap_get_def(&route->options, "origin", ""),
> >>>>> +                      ROUTE_ORIGIN_CONNECTED) ? ROUTE_PRIO_OFFSET_CONNECTED
> >>>>> +                                              : ROUTE_PRIO_OFFSET_STATIC;
> >>>>> +
> >>>>>   char *prefix_s = build_route_prefix_s(&route_->prefix, route_->plen);
> >>>>>   add_route(lflows, route_->is_discard_route ? od : out_port->od, out_port,
> >>>>>             lrp_addr_s, prefix_s, route_->plen, route->nexthop,
> >>>>> -              route_->is_src_route, &route->header_, route_->is_discard_route);
> >>>>> +              route_->is_src_route, &route->header_, route_->is_discard_route,
> >>>>> +              ofs);
> >>>>>
> >>>>>   free(prefix_s);
> >>>>> }
> >>>>> @@ -10672,14 +10691,14 @@ build_ip_routing_flows_for_lrouter_port(
> >>>>>           add_route(lflows, op->od, op, op->lrp_networks.ipv4_addrs[i].addr_s,
> >>>>>                     op->lrp_networks.ipv4_addrs[i].network_s,
> >>>>>                     op->lrp_networks.ipv4_addrs[i].plen, NULL, false,
> >>>>> -                      &op->nbrp->header_, false);
> >>>>> +                      &op->nbrp->header_, false, ROUTE_PRIO_OFFSET_CONNECTED);
> >>>>>       }
> >>>>>
> >>>>>       for (int i = 0; i < op->lrp_networks.n_ipv6_addrs; i++) {
> >>>>>           add_route(lflows, op->od, op, op->lrp_networks.ipv6_addrs[i].addr_s,
> >>>>>                     op->lrp_networks.ipv6_addrs[i].network_s,
> >>>>>                     op->lrp_networks.ipv6_addrs[i].plen, NULL, false,
> >>>>> -                      &op->nbrp->header_, false);
> >>>>> +                      &op->nbrp->header_, false, ROUTE_PRIO_OFFSET_CONNECTED);
> >>>>>       }
> >>>>>   } else if (lsp_is_router(op->nbsp)) {
> >>>>>       struct ovn_port *peer = ovn_port_get_peer(ports, op);
> >>>>> @@ -10702,7 +10721,8 @@ build_ip_routing_flows_for_lrouter_port(
> >>>>>                             peer->lrp_networks.ipv4_addrs[0].addr_s,
> >>>>>                             laddrs->ipv4_addrs[k].network_s,
> >>>>>                             laddrs->ipv4_addrs[k].plen, NULL, false,
> >>>>> -                              &peer->nbrp->header_, false);
> >>>>> +                              &peer->nbrp->header_, false,
> >>>>> +                              ROUTE_PRIO_OFFSET_CONNECTED);
> >>>>>               }
> >>>>>           }
> >>>>>       }
> >>>>> @@ -10773,7 +10793,7 @@ build_mcast_lookup_flows_for_lrouter(
> >>>>>       /* Drop IPv6 multicast traffic that shouldn't be forwarded,
> >>>>>        * i.e., router solicitation and router advertisement.
> >>>>>        */
> >>>>> -        ovn_lflow_add(lflows, od, S_ROUTER_IN_IP_ROUTING, 550,
> >>>>> +        ovn_lflow_add(lflows, od, S_ROUTER_IN_IP_ROUTING, 10550,
> >>>>>                     "nd_rs || nd_ra", "drop;");
> >>>>>       if (!od->mcast_info.rtr.relay) {
> >>>>>           return;
> >>>>> @@ -10801,7 +10821,7 @@ build_mcast_lookup_flows_for_lrouter(
> >>>>>           }
> >>>>>           ds_put_format(actions, "outport = \"%s\"; ip.ttl--; next;",
> >>>>>                         igmp_group->mcgroup.name);
> >>>>> -            ovn_lflow_add(lflows, od, S_ROUTER_IN_IP_ROUTING, 500,
> >>>>> +            ovn_lflow_add(lflows, od, S_ROUTER_IN_IP_ROUTING, 10500,
> >>>>>                         ds_cstr(match), ds_cstr(actions));
> >>>>>       }
> >>>>>
> >>>>> @@ -10809,7 +10829,7 @@ build_mcast_lookup_flows_for_lrouter(
> >>>>>        * ports. Otherwise drop any multicast traffic.
> >>>>>        */
> >>>>>       if (od->mcast_info.rtr.flood_static) {
> >>>>> -            ovn_lflow_add(lflows, od, S_ROUTER_IN_IP_ROUTING, 450,
> >>>>> +            ovn_lflow_add(lflows, od, S_ROUTER_IN_IP_ROUTING, 10450,
> >>>>>                         "ip4.mcast || ip6.mcast",
> >>>>>                         "clone { "
> >>>>>                               "outport = \""MC_STATIC"\"; "
> >>>>> @@ -10817,7 +10837,7 @@ build_mcast_lookup_flows_for_lrouter(
> >>>>>                               "next; "
> >>>>>                         "};");
> >>>>>       } else {
> >>>>> -            ovn_lflow_add(lflows, od, S_ROUTER_IN_IP_ROUTING, 450,
> >>>>> +            ovn_lflow_add(lflows, od, S_ROUTER_IN_IP_ROUTING, 10450,
> >>>>>                         "ip4.mcast || ip6.mcast", "drop;");
> >>>>>       }
> >>>>>   }
> >>>>> diff --git a/northd/ovn-northd.8.xml b/northd/ovn-northd.8.xml
> >>>>> index fb67395e3..4f3a9d5e3 100644
> >>>>> --- a/northd/ovn-northd.8.xml
> >>>>> +++ b/northd/ovn-northd.8.xml
> >>>>> @@ -2945,12 +2945,12 @@ icmp6 {
> >>>>>
> >>>>>   <p>
> >>>>>     If ECMP routes with symmetric reply are configured in the
> >>>>> -      <code>OVN_Northbound</code> database for a gateway router, a priority-300
> >>>>> -      flow is added for each router port on which symmetric replies are
> >>>>> -      configured. The matching logic for these ports essentially reverses the
> >>>>> -      configured logic of the ECMP route. So for instance, a route with a
> >>>>> -      destination routing policy will instead match if the source IP address
> >>>>> -      matches the static route's prefix. The flow uses the action
> >>>>> +      <code>OVN_Northbound</code> database for a gateway router, a
> >>>>> +      priority-10300 flow is added for each router port on which symmetric
> >>>>> +      replies are configured. The matching logic for these ports essentially
> >>>>> +      reverses the configured logic of the ECMP route. So for instance, a route
> >>>>> +      with a destination routing policy will instead match if the source IP
> >>>>> +      address matches the static route's prefix. The flow uses the action
> >>>>>     <code>ct_next</code> to send IP packets to the connection tracker for
> >>>>>     packet de-fragmentation and tracking before sending it to the next table.
> >>>>>   </p>
> >>>>> diff --git a/tests/ovn-northd.at b/tests/ovn-northd.at
> >>>>> index 85b47a18f..3c1a97f73 100644
> >>>>> --- a/tests/ovn-northd.at
> >>>>> +++ b/tests/ovn-northd.at
> >>>>> @@ -5430,7 +5430,7 @@ check ovn-nbctl --wait=sb --ecmp-symmetric-reply lr-route-add lr0 1.0.0.1 192.16
> >>>>>
> >>>>> ovn-sbctl dump-flows lr0 > lr0flows
> >>>>> AT_CHECK([grep -e "lr_in_ip_routing.*select" lr0flows | sed 's/table=../table=??/' | sort], [0], [dnl
> >>>>> -  table=??(lr_in_ip_routing   ), priority=65   , match=(ip4.dst == 1.0.0.1/32), action=(ip.ttl--; flags.loopback = 1; reg8[[0..15]] = 1; reg8[[16..31]] = select(1, 2);)
> >>>>> +  table=??(lr_in_ip_routing   ), priority=97   , match=(ip4.dst == 1.0.0.1/32), action=(ip.ttl--; flags.loopback = 1; reg8[[0..15]] = 1; reg8[[16..31]] = select(1, 2);)
> >>>>> ])
> >>>>> AT_CHECK([grep -e "lr_in_ip_routing_ecmp" lr0flows | sed 's/192\.168\.0\..0/192.168.0.??/' | sed 's/table=../table=??/' | sort], [0], [dnl
> >>>>> table=??(lr_in_ip_routing_ecmp), priority=100  , match=(reg8[[0..15]] == 1 && reg8[[16..31]] == 1), action=(reg0 = 192.168.0.??; reg1 = 192.168.0.1; eth.src = 00:00:20:20:12:13; outport = "lr0-public"; next;)
> >>>>> @@ -5443,7 +5443,7 @@ check ovn-nbctl --wait=sb --ecmp-symmetric-reply lr-route-add lr0 1.0.0.1 192.16
> >>>>>
> >>>>> ovn-sbctl dump-flows lr0 > lr0flows
> >>>>> AT_CHECK([grep -e "lr_in_ip_routing.*select" lr0flows | sed 's/table=../table=??/' | sort], [0], [dnl
> >>>>> -  table=??(lr_in_ip_routing   ), priority=65   , match=(ip4.dst == 1.0.0.1/32), action=(ip.ttl--; flags.loopback = 1; reg8[[0..15]] = 1; reg8[[16..31]] = select(1, 2);)
> >>>>> +  table=??(lr_in_ip_routing   ), priority=97   , match=(ip4.dst == 1.0.0.1/32), action=(ip.ttl--; flags.loopback = 1; reg8[[0..15]] = 1; reg8[[16..31]] = select(1, 2);)
> >>>>> ])
> >>>>> AT_CHECK([grep -e "lr_in_ip_routing_ecmp" lr0flows | sed 's/192\.168\.0\..0/192.168.0.??/' | sed 's/table=../table=??/' | sort], [0], [dnl
> >>>>> table=??(lr_in_ip_routing_ecmp), priority=100  , match=(reg8[[0..15]] == 1 && reg8[[16..31]] == 1), action=(reg0 = 192.168.0.??; reg1 = 192.168.0.1; eth.src = 00:00:20:20:12:13; outport = "lr0-public"; next;)
> >>>>> @@ -5458,14 +5458,14 @@ check ovn-nbctl --wait=sb lr-route-add lr0 1.0.0.0/24 192.168.0.10
> >>>>> ovn-sbctl dump-flows lr0 > lr0flows
> >>>>>
> >>>>> AT_CHECK([grep -e "lr_in_ip_routing.*192.168.0.10" lr0flows | sed 's/table=../table=??/' | sort], [0], [dnl
> >>>>> -  table=??(lr_in_ip_routing   ), priority=49   , match=(ip4.dst == 1.0.0.0/24), action=(ip.ttl--; reg8[[0..15]] = 0; reg0 = 192.168.0.10; reg1 = 192.168.0.1; eth.src = 00:00:20:20:12:13; outport = "lr0-public"; flags.loopback = 1; next;)
> >>>>> +  table=??(lr_in_ip_routing   ), priority=73   , match=(ip4.dst == 1.0.0.0/24), action=(ip.ttl--; reg8[[0..15]] = 0; reg0 = 192.168.0.10; reg1 = 192.168.0.1; eth.src = 00:00:20:20:12:13; outport = "lr0-public"; flags.loopback = 1; next;)
> >>>>> ])
> >>>>>
> >>>>> check ovn-nbctl --wait=sb lr-route-add lr0 2.0.0.0/24 lr0-public
> >>>>>
> >>>>> ovn-sbctl dump-flows lr0 > lr0flows
> >>>>> AT_CHECK([grep -e "lr_in_ip_routing.*2.0.0.0" lr0flows | sed 's/table=../table=??/' | sort], [0], [dnl
> >>>>> -  table=??(lr_in_ip_routing   ), priority=49   , match=(ip4.dst == 2.0.0.0/24), action=(ip.ttl--; reg8[[0..15]] = 0; reg0 = ip4.dst; reg1 = 192.168.0.1; eth.src = 00:00:20:20:12:13; outport = "lr0-public"; flags.loopback = 1; next;)
> >>>>> +  table=??(lr_in_ip_routing   ), priority=73   , match=(ip4.dst == 2.0.0.0/24), action=(ip.ttl--; reg8[[0..15]] = 0; reg0 = ip4.dst; reg1 = 192.168.0.1; eth.src = 00:00:20:20:12:13; outport = "lr0-public"; flags.loopback = 1; next;)
> >>>>> ])
> >>>>>
> >>>>> AT_CLEANUP
> >>>>> --
> >>>>> 2.30.0
> >>>>>
> >>>>> _______________________________________________
> >>>>> dev mailing list
> >>>>> dev@openvswitch.org <mailto:dev@openvswitch.org>
> >>>>> https://mail.openvswitch.org/mailman/listinfo/ovs-dev <https://mail.openvswitch.org/mailman/listinfo/ovs-dev>
> >>> _______________________________________________
> >>> dev mailing list
> >>> dev@openvswitch.org
> >>> https://mail.openvswitch.org/mailman/listinfo/ovs-dev
> >>
> >> _______________________________________________
> >> dev mailing list
> >> dev@openvswitch.org <mailto:dev@openvswitch.org>
> >> https://mail.openvswitch.org/mailman/listinfo/ovs-dev <https://mail.openvswitch.org/mailman/listinfo/ovs-dev>
> > _______________________________________________
> > dev mailing list
> > dev@openvswitch.org <mailto:dev@openvswitch.org>
> > https://mail.openvswitch.org/mailman/listinfo/ovs-dev <https://mail.openvswitch.org/mailman/listinfo/ovs-dev>
> _______________________________________________
> dev mailing list
> dev@openvswitch.org
> https://mail.openvswitch.org/mailman/listinfo/ovs-dev
Vladislav Odintsov Nov. 18, 2021, 6:10 a.m. UTC | #7
Great, thanks.

Hi @Han,

I’d like you to look at the patch series too. Would you have time on it?
If yes, could you redirect me on terms please.

Thanks.

Regards,
Vladislav Odintsov

> On 18 Nov 2021, at 00:05, Numan Siddique <numans@ovn.org> wrote:
> 
> On Wed, Nov 17, 2021 at 3:38 PM Vladislav Odintsov <odivlad@gmail.com <mailto:odivlad@gmail.com>> wrote:
>> 
>> I’ve submitted a patch [1] with my findings.
>> Also, if no comments for other my patches from this patch series, I can submit a new version.
>> Should I?
> 
> No comments from my side.  Perhaps you can submit another version.
> 
> Numan
> 
>> 
>> 1: https://patchwork.ozlabs.org/project/ovn/patch/20211117203545.46142-1-odivlad@gmail.com/ <https://patchwork.ozlabs.org/project/ovn/patch/20211117203545.46142-1-odivlad@gmail.com/>
>> 
>> Regards,
>> Vladislav Odintsov
>> 
>>> On 17 Nov 2021, at 20:57, Numan Siddique <numans@ovn.org <mailto:numans@ovn.org>> wrote:
>>> 
>>> On Wed, Nov 17, 2021 at 9:24 AM Vladislav Odintsov <odivlad@gmail.com <mailto:odivlad@gmail.com> <mailto:odivlad@gmail.com <mailto:odivlad@gmail.com>>> wrote:
>>>> 
>>>> Two additions:
>>>> 
>>>> 1. Regarding documentation for flow in lr_in_defrag section:
>>>> 
>>>> It seems to me that documentation for it is written in a wrong section (lr_in_defrag).
>>>> Since the flow is installed in lr_in_ip_routing, it should be documented there.
>>>> I’ll move it if you don’t mind.
>>> 
>>> Sure.  Thanks.  I'd suggest having a separate patch for fixing the
>>> documentation.
>>>> 
>>>> 
>>>> 2. Documentation for other flow changes was added, but I’ve committed it to a wrong patch (#3).
>>>> I’ll move documentation update between patches.
>>> 
>>> Ack.
>>> 
>>> Thanks
>>> Numan
>>> 
>>>> 
>>>> Regards,
>>>> Vladislav Odintsov
>>>> 
>>>>> On 17 Nov 2021, at 13:51, Vladislav Odintsov <odivlad@gmail.com> wrote:
>>>>> 
>>>>> Hi Numan,
>>>>> 
>>>>> Thanks for the review.
>>>>> Sure I will fix this. Should I wait for more comments or that’s all and I can send v9?
>>>>> 
>>>>> Regards,
>>>>> Vladislav Odintsov
>>>>> 
>>>>>> On 17 Nov 2021, at 05:17, Numan Siddique <numans@ovn.org> wrote:
>>>>>> 
>>>>>> On Sat, Nov 13, 2021 at 4:44 AM Vladislav Odintsov <odivlad@gmail.com <mailto:odivlad@gmail.com>> wrote:
>>>>>>> 
>>>>>>> With this patch routes to connected networks have higher
>>>>>>> priority than static routes with same ip_prefix.
>>>>>>> 
>>>>>>> This brings commonly-used behaviour for routes lookup order:
>>>>>>> 1: longest prefix match
>>>>>>> 2: metric
>>>>>>> 
>>>>>>> The metric has next lookup order:
>>>>>>> 1: connected routes
>>>>>>> 2: static routes
>>>>>>> 
>>>>>>> Earlier static and connected routes with same ip_prefix had
>>>>>>> the same priority, so it was impossible to predict which one
>>>>>>> is used for routing decision.
>>>>>>> 
>>>>>>> Each route's prefix length has its own 'slot' in lflow prios.
>>>>>>> Now prefix length space is calculated using next information:
>>>>>>> to calculate route's priority prefixlen multiplied by 3
>>>>>>> + route origin offset (0 - source-based route; 1 - directly-
>>>>>>> connected route; 2 - static route).
>>>>>>> 
>>>>>>> Also, enlarge prio for generic records in lr_in_ip_routing stage
>>>>>>> by 10000.
>>>>>>> 
>>>>>>> Signed-off-by: Vladislav Odintsov <odivlad@gmail.com>
>>>>>> 
>>>>>> Hi Vladislav,
>>>>>> 
>>>>>> Thanks for the patch.  Overall it looks good to me.  I've one comment.
>>>>>> Looks like the documentation updated in ovn-northd.8.xml is not accurate.
>>>>>> 
>>>>>> This patch modifies the flows in the lr_in_ip_routing stage but this
>>>>>> patch doesn't update the documentation.
>>>>>> Also the patch updates the documentation for the flow in lr_in_defrag
>>>>>> stage, which seems not correct.
>>>>>> 
>>>>>> Can you please update the documentation accurately ?
>>>>>> 
>>>>>> Numan
>>>>>> 
>>>>>>> ---
>>>>>>> northd/northd.c         | 50 ++++++++++++++++++++++++++++-------------
>>>>>>> northd/ovn-northd.8.xml | 12 +++++-----
>>>>>>> tests/ovn-northd.at     |  8 +++----
>>>>>>> 3 files changed, 45 insertions(+), 25 deletions(-)
>>>>>>> 
>>>>>>> diff --git a/northd/northd.c b/northd/northd.c
>>>>>>> index 1e8a3457c..0d513f039 100644
>>>>>>> --- a/northd/northd.c
>>>>>>> +++ b/northd/northd.c
>>>>>>> @@ -305,6 +305,15 @@ enum ovn_stage {
>>>>>>> *
>>>>>>> */
>>>>>>> 
>>>>>>> +/*
>>>>>>> + * Route offsets implement logic to prioritize traffic for routes with
>>>>>>> + * same ip_prefix values:
>>>>>>> + *  -  connected route overrides static one;
>>>>>>> + *  -  static route overrides connected route. */
>>>>>>> +#define ROUTE_PRIO_OFFSET_MULTIPLIER 3
>>>>>>> +#define ROUTE_PRIO_OFFSET_STATIC 1
>>>>>>> +#define ROUTE_PRIO_OFFSET_CONNECTED 2
>>>>>>> +
>>>>>>> /* Returns an "enum ovn_stage" built from the arguments. */
>>>>>>> static enum ovn_stage
>>>>>>> ovn_stage_build(enum ovn_datapath_type dp_type, enum ovn_pipeline pipeline,
>>>>>>> @@ -8782,6 +8791,7 @@ struct ecmp_groups_node {
>>>>>>>  struct in6_addr prefix;
>>>>>>>  unsigned int plen;
>>>>>>>  bool is_src_route;
>>>>>>> +    const char *origin;
>>>>>>>  uint16_t route_count;
>>>>>>>  struct ovs_list route_list; /* Contains ecmp_route_list_node */
>>>>>>> };
>>>>>>> @@ -8819,6 +8829,7 @@ ecmp_groups_add(struct hmap *ecmp_groups,
>>>>>>>  eg->prefix = route->prefix;
>>>>>>>  eg->plen = route->plen;
>>>>>>>  eg->is_src_route = route->is_src_route;
>>>>>>> +    eg->origin = smap_get_def(&route->route->options, "origin", "");
>>>>>>>  ovs_list_init(&eg->route_list);
>>>>>>>  ecmp_groups_add_route(eg, route);
>>>>>>> 
>>>>>>> @@ -8919,19 +8930,20 @@ build_route_prefix_s(const struct in6_addr *prefix, unsigned int plen)
>>>>>>> static void
>>>>>>> build_route_match(const struct ovn_port *op_inport, const char *network_s,
>>>>>>>                int plen, bool is_src_route, bool is_ipv4, struct ds *match,
>>>>>>> -                  uint16_t *priority)
>>>>>>> +                  uint16_t *priority, int ofs)
>>>>>>> {
>>>>>>>  const char *dir;
>>>>>>>  /* The priority here is calculated to implement longest-prefix-match
>>>>>>>   * routing. */
>>>>>>>  if (is_src_route) {
>>>>>>>      dir = "src";
>>>>>>> -        *priority = plen * 2;
>>>>>>> +        ofs = 0;
>>>>>>>  } else {
>>>>>>>      dir = "dst";
>>>>>>> -        *priority = (plen * 2) + 1;
>>>>>>>  }
>>>>>>> 
>>>>>>> +    *priority = (plen * ROUTE_PRIO_OFFSET_MULTIPLIER) + ofs;
>>>>>>> +
>>>>>>>  if (op_inport) {
>>>>>>>      ds_put_format(match, "inport == %s && ", op_inport->json_key);
>>>>>>>  }
>>>>>>> @@ -9073,7 +9085,7 @@ add_ecmp_symmetric_reply_flows(struct hmap *lflows,
>>>>>>>                out_port->lrp_networks.ea_s,
>>>>>>>                IN6_IS_ADDR_V4MAPPED(&route->prefix) ? "" : "xx",
>>>>>>>                port_ip, out_port->json_key);
>>>>>>> -    ovn_lflow_add_with_hint(lflows, od, S_ROUTER_IN_IP_ROUTING, 300,
>>>>>>> +    ovn_lflow_add_with_hint(lflows, od, S_ROUTER_IN_IP_ROUTING, 10300,
>>>>>>>                         ds_cstr(&match), ds_cstr(&actions),
>>>>>>>                         &st_route->header_);
>>>>>>> 
>>>>>>> @@ -9103,8 +9115,10 @@ build_ecmp_route_flow(struct hmap *lflows, struct ovn_datapath *od,
>>>>>>>  struct ds route_match = DS_EMPTY_INITIALIZER;
>>>>>>> 
>>>>>>>  char *prefix_s = build_route_prefix_s(&eg->prefix, eg->plen);
>>>>>>> +    int ofs = !strcmp(eg->origin, ROUTE_ORIGIN_CONNECTED) ?
>>>>>>> +        ROUTE_PRIO_OFFSET_CONNECTED: ROUTE_PRIO_OFFSET_STATIC;
>>>>>>>  build_route_match(NULL, prefix_s, eg->plen, eg->is_src_route, is_ipv4,
>>>>>>> -                      &route_match, &priority);
>>>>>>> +                      &route_match, &priority, ofs);
>>>>>>>  free(prefix_s);
>>>>>>> 
>>>>>>>  struct ds actions = DS_EMPTY_INITIALIZER;
>>>>>>> @@ -9180,7 +9194,7 @@ add_route(struct hmap *lflows, struct ovn_datapath *od,
>>>>>>>        const struct ovn_port *op, const char *lrp_addr_s,
>>>>>>>        const char *network_s, int plen, const char *gateway,
>>>>>>>        bool is_src_route, const struct ovsdb_idl_row *stage_hint,
>>>>>>> -          bool is_discard_route)
>>>>>>> +          bool is_discard_route, int ofs)
>>>>>>> {
>>>>>>>  bool is_ipv4 = strchr(network_s, '.') ? true : false;
>>>>>>>  struct ds match = DS_EMPTY_INITIALIZER;
>>>>>>> @@ -9196,7 +9210,7 @@ add_route(struct hmap *lflows, struct ovn_datapath *od,
>>>>>>>      }
>>>>>>>  }
>>>>>>>  build_route_match(op_inport, network_s, plen, is_src_route, is_ipv4,
>>>>>>> -                      &match, &priority);
>>>>>>> +                      &match, &priority, ofs);
>>>>>>> 
>>>>>>>  struct ds common_actions = DS_EMPTY_INITIALIZER;
>>>>>>>  struct ds actions = DS_EMPTY_INITIALIZER;
>>>>>>> @@ -9256,10 +9270,15 @@ build_static_route_flow(struct hmap *lflows, struct ovn_datapath *od,
>>>>>>>      }
>>>>>>>  }
>>>>>>> 
>>>>>>> +    int ofs = !strcmp(smap_get_def(&route->options, "origin", ""),
>>>>>>> +                      ROUTE_ORIGIN_CONNECTED) ? ROUTE_PRIO_OFFSET_CONNECTED
>>>>>>> +                                              : ROUTE_PRIO_OFFSET_STATIC;
>>>>>>> +
>>>>>>>  char *prefix_s = build_route_prefix_s(&route_->prefix, route_->plen);
>>>>>>>  add_route(lflows, route_->is_discard_route ? od : out_port->od, out_port,
>>>>>>>            lrp_addr_s, prefix_s, route_->plen, route->nexthop,
>>>>>>> -              route_->is_src_route, &route->header_, route_->is_discard_route);
>>>>>>> +              route_->is_src_route, &route->header_, route_->is_discard_route,
>>>>>>> +              ofs);
>>>>>>> 
>>>>>>>  free(prefix_s);
>>>>>>> }
>>>>>>> @@ -10672,14 +10691,14 @@ build_ip_routing_flows_for_lrouter_port(
>>>>>>>          add_route(lflows, op->od, op, op->lrp_networks.ipv4_addrs[i].addr_s,
>>>>>>>                    op->lrp_networks.ipv4_addrs[i].network_s,
>>>>>>>                    op->lrp_networks.ipv4_addrs[i].plen, NULL, false,
>>>>>>> -                      &op->nbrp->header_, false);
>>>>>>> +                      &op->nbrp->header_, false, ROUTE_PRIO_OFFSET_CONNECTED);
>>>>>>>      }
>>>>>>> 
>>>>>>>      for (int i = 0; i < op->lrp_networks.n_ipv6_addrs; i++) {
>>>>>>>          add_route(lflows, op->od, op, op->lrp_networks.ipv6_addrs[i].addr_s,
>>>>>>>                    op->lrp_networks.ipv6_addrs[i].network_s,
>>>>>>>                    op->lrp_networks.ipv6_addrs[i].plen, NULL, false,
>>>>>>> -                      &op->nbrp->header_, false);
>>>>>>> +                      &op->nbrp->header_, false, ROUTE_PRIO_OFFSET_CONNECTED);
>>>>>>>      }
>>>>>>>  } else if (lsp_is_router(op->nbsp)) {
>>>>>>>      struct ovn_port *peer = ovn_port_get_peer(ports, op);
>>>>>>> @@ -10702,7 +10721,8 @@ build_ip_routing_flows_for_lrouter_port(
>>>>>>>                            peer->lrp_networks.ipv4_addrs[0].addr_s,
>>>>>>>                            laddrs->ipv4_addrs[k].network_s,
>>>>>>>                            laddrs->ipv4_addrs[k].plen, NULL, false,
>>>>>>> -                              &peer->nbrp->header_, false);
>>>>>>> +                              &peer->nbrp->header_, false,
>>>>>>> +                              ROUTE_PRIO_OFFSET_CONNECTED);
>>>>>>>              }
>>>>>>>          }
>>>>>>>      }
>>>>>>> @@ -10773,7 +10793,7 @@ build_mcast_lookup_flows_for_lrouter(
>>>>>>>      /* Drop IPv6 multicast traffic that shouldn't be forwarded,
>>>>>>>       * i.e., router solicitation and router advertisement.
>>>>>>>       */
>>>>>>> -        ovn_lflow_add(lflows, od, S_ROUTER_IN_IP_ROUTING, 550,
>>>>>>> +        ovn_lflow_add(lflows, od, S_ROUTER_IN_IP_ROUTING, 10550,
>>>>>>>                    "nd_rs || nd_ra", "drop;");
>>>>>>>      if (!od->mcast_info.rtr.relay) {
>>>>>>>          return;
>>>>>>> @@ -10801,7 +10821,7 @@ build_mcast_lookup_flows_for_lrouter(
>>>>>>>          }
>>>>>>>          ds_put_format(actions, "outport = \"%s\"; ip.ttl--; next;",
>>>>>>>                        igmp_group->mcgroup.name);
>>>>>>> -            ovn_lflow_add(lflows, od, S_ROUTER_IN_IP_ROUTING, 500,
>>>>>>> +            ovn_lflow_add(lflows, od, S_ROUTER_IN_IP_ROUTING, 10500,
>>>>>>>                        ds_cstr(match), ds_cstr(actions));
>>>>>>>      }
>>>>>>> 
>>>>>>> @@ -10809,7 +10829,7 @@ build_mcast_lookup_flows_for_lrouter(
>>>>>>>       * ports. Otherwise drop any multicast traffic.
>>>>>>>       */
>>>>>>>      if (od->mcast_info.rtr.flood_static) {
>>>>>>> -            ovn_lflow_add(lflows, od, S_ROUTER_IN_IP_ROUTING, 450,
>>>>>>> +            ovn_lflow_add(lflows, od, S_ROUTER_IN_IP_ROUTING, 10450,
>>>>>>>                        "ip4.mcast || ip6.mcast",
>>>>>>>                        "clone { "
>>>>>>>                              "outport = \""MC_STATIC"\"; "
>>>>>>> @@ -10817,7 +10837,7 @@ build_mcast_lookup_flows_for_lrouter(
>>>>>>>                              "next; "
>>>>>>>                        "};");
>>>>>>>      } else {
>>>>>>> -            ovn_lflow_add(lflows, od, S_ROUTER_IN_IP_ROUTING, 450,
>>>>>>> +            ovn_lflow_add(lflows, od, S_ROUTER_IN_IP_ROUTING, 10450,
>>>>>>>                        "ip4.mcast || ip6.mcast", "drop;");
>>>>>>>      }
>>>>>>>  }
>>>>>>> diff --git a/northd/ovn-northd.8.xml b/northd/ovn-northd.8.xml
>>>>>>> index fb67395e3..4f3a9d5e3 100644
>>>>>>> --- a/northd/ovn-northd.8.xml
>>>>>>> +++ b/northd/ovn-northd.8.xml
>>>>>>> @@ -2945,12 +2945,12 @@ icmp6 {
>>>>>>> 
>>>>>>>  <p>
>>>>>>>    If ECMP routes with symmetric reply are configured in the
>>>>>>> -      <code>OVN_Northbound</code> database for a gateway router, a priority-300
>>>>>>> -      flow is added for each router port on which symmetric replies are
>>>>>>> -      configured. The matching logic for these ports essentially reverses the
>>>>>>> -      configured logic of the ECMP route. So for instance, a route with a
>>>>>>> -      destination routing policy will instead match if the source IP address
>>>>>>> -      matches the static route's prefix. The flow uses the action
>>>>>>> +      <code>OVN_Northbound</code> database for a gateway router, a
>>>>>>> +      priority-10300 flow is added for each router port on which symmetric
>>>>>>> +      replies are configured. The matching logic for these ports essentially
>>>>>>> +      reverses the configured logic of the ECMP route. So for instance, a route
>>>>>>> +      with a destination routing policy will instead match if the source IP
>>>>>>> +      address matches the static route's prefix. The flow uses the action
>>>>>>>    <code>ct_next</code> to send IP packets to the connection tracker for
>>>>>>>    packet de-fragmentation and tracking before sending it to the next table.
>>>>>>>  </p>
>>>>>>> diff --git a/tests/ovn-northd.at b/tests/ovn-northd.at
>>>>>>> index 85b47a18f..3c1a97f73 100644
>>>>>>> --- a/tests/ovn-northd.at
>>>>>>> +++ b/tests/ovn-northd.at
>>>>>>> @@ -5430,7 +5430,7 @@ check ovn-nbctl --wait=sb --ecmp-symmetric-reply lr-route-add lr0 1.0.0.1 192.16
>>>>>>> 
>>>>>>> ovn-sbctl dump-flows lr0 > lr0flows
>>>>>>> AT_CHECK([grep -e "lr_in_ip_routing.*select" lr0flows | sed 's/table=../table=??/' | sort], [0], [dnl
>>>>>>> -  table=??(lr_in_ip_routing   ), priority=65   , match=(ip4.dst == 1.0.0.1/32), action=(ip.ttl--; flags.loopback = 1; reg8[[0..15]] = 1; reg8[[16..31]] = select(1, 2);)
>>>>>>> +  table=??(lr_in_ip_routing   ), priority=97   , match=(ip4.dst == 1.0.0.1/32), action=(ip.ttl--; flags.loopback = 1; reg8[[0..15]] = 1; reg8[[16..31]] = select(1, 2);)
>>>>>>> ])
>>>>>>> AT_CHECK([grep -e "lr_in_ip_routing_ecmp" lr0flows | sed 's/192\.168\.0\..0/192.168.0.??/' | sed 's/table=../table=??/' | sort], [0], [dnl
>>>>>>> table=??(lr_in_ip_routing_ecmp), priority=100  , match=(reg8[[0..15]] == 1 && reg8[[16..31]] == 1), action=(reg0 = 192.168.0.??; reg1 = 192.168.0.1; eth.src = 00:00:20:20:12:13; outport = "lr0-public"; next;)
>>>>>>> @@ -5443,7 +5443,7 @@ check ovn-nbctl --wait=sb --ecmp-symmetric-reply lr-route-add lr0 1.0.0.1 192.16
>>>>>>> 
>>>>>>> ovn-sbctl dump-flows lr0 > lr0flows
>>>>>>> AT_CHECK([grep -e "lr_in_ip_routing.*select" lr0flows | sed 's/table=../table=??/' | sort], [0], [dnl
>>>>>>> -  table=??(lr_in_ip_routing   ), priority=65   , match=(ip4.dst == 1.0.0.1/32), action=(ip.ttl--; flags.loopback = 1; reg8[[0..15]] = 1; reg8[[16..31]] = select(1, 2);)
>>>>>>> +  table=??(lr_in_ip_routing   ), priority=97   , match=(ip4.dst == 1.0.0.1/32), action=(ip.ttl--; flags.loopback = 1; reg8[[0..15]] = 1; reg8[[16..31]] = select(1, 2);)
>>>>>>> ])
>>>>>>> AT_CHECK([grep -e "lr_in_ip_routing_ecmp" lr0flows | sed 's/192\.168\.0\..0/192.168.0.??/' | sed 's/table=../table=??/' | sort], [0], [dnl
>>>>>>> table=??(lr_in_ip_routing_ecmp), priority=100  , match=(reg8[[0..15]] == 1 && reg8[[16..31]] == 1), action=(reg0 = 192.168.0.??; reg1 = 192.168.0.1; eth.src = 00:00:20:20:12:13; outport = "lr0-public"; next;)
>>>>>>> @@ -5458,14 +5458,14 @@ check ovn-nbctl --wait=sb lr-route-add lr0 1.0.0.0/24 192.168.0.10
>>>>>>> ovn-sbctl dump-flows lr0 > lr0flows
>>>>>>> 
>>>>>>> AT_CHECK([grep -e "lr_in_ip_routing.*192.168.0.10" lr0flows | sed 's/table=../table=??/' | sort], [0], [dnl
>>>>>>> -  table=??(lr_in_ip_routing   ), priority=49   , match=(ip4.dst == 1.0.0.0/24), action=(ip.ttl--; reg8[[0..15]] = 0; reg0 = 192.168.0.10; reg1 = 192.168.0.1; eth.src = 00:00:20:20:12:13; outport = "lr0-public"; flags.loopback = 1; next;)
>>>>>>> +  table=??(lr_in_ip_routing   ), priority=73   , match=(ip4.dst == 1.0.0.0/24), action=(ip.ttl--; reg8[[0..15]] = 0; reg0 = 192.168.0.10; reg1 = 192.168.0.1; eth.src = 00:00:20:20:12:13; outport = "lr0-public"; flags.loopback = 1; next;)
>>>>>>> ])
>>>>>>> 
>>>>>>> check ovn-nbctl --wait=sb lr-route-add lr0 2.0.0.0/24 lr0-public
>>>>>>> 
>>>>>>> ovn-sbctl dump-flows lr0 > lr0flows
>>>>>>> AT_CHECK([grep -e "lr_in_ip_routing.*2.0.0.0" lr0flows | sed 's/table=../table=??/' | sort], [0], [dnl
>>>>>>> -  table=??(lr_in_ip_routing   ), priority=49   , match=(ip4.dst == 2.0.0.0/24), action=(ip.ttl--; reg8[[0..15]] = 0; reg0 = ip4.dst; reg1 = 192.168.0.1; eth.src = 00:00:20:20:12:13; outport = "lr0-public"; flags.loopback = 1; next;)
>>>>>>> +  table=??(lr_in_ip_routing   ), priority=73   , match=(ip4.dst == 2.0.0.0/24), action=(ip.ttl--; reg8[[0..15]] = 0; reg0 = ip4.dst; reg1 = 192.168.0.1; eth.src = 00:00:20:20:12:13; outport = "lr0-public"; flags.loopback = 1; next;)
>>>>>>> ])
>>>>>>> 
>>>>>>> AT_CLEANUP
>>>>>>> --
>>>>>>> 2.30.0
>>>>>>> 
>>>>>>> _______________________________________________
>>>>>>> dev mailing list
>>>>>>> dev@openvswitch.org <mailto:dev@openvswitch.org>
>>>>>>> https://mail.openvswitch.org/mailman/listinfo/ovs-dev <https://mail.openvswitch.org/mailman/listinfo/ovs-dev>
>>>>> _______________________________________________
>>>>> dev mailing list
>>>>> dev@openvswitch.org
>>>>> https://mail.openvswitch.org/mailman/listinfo/ovs-dev
>>>> 
>>>> _______________________________________________
>>>> dev mailing list
>>>> dev@openvswitch.org <mailto:dev@openvswitch.org> <mailto:dev@openvswitch.org <mailto:dev@openvswitch.org>>
>>>> https://mail.openvswitch.org/mailman/listinfo/ovs-dev <https://mail.openvswitch.org/mailman/listinfo/ovs-dev> <https://mail.openvswitch.org/mailman/listinfo/ovs-dev <https://mail.openvswitch.org/mailman/listinfo/ovs-dev>>
>>> _______________________________________________
>>> dev mailing list
>>> dev@openvswitch.org <mailto:dev@openvswitch.org> <mailto:dev@openvswitch.org <mailto:dev@openvswitch.org>>
>>> https://mail.openvswitch.org/mailman/listinfo/ovs-dev <https://mail.openvswitch.org/mailman/listinfo/ovs-dev> <https://mail.openvswitch.org/mailman/listinfo/ovs-dev <https://mail.openvswitch.org/mailman/listinfo/ovs-dev>>
>> _______________________________________________
>> dev mailing list
>> dev@openvswitch.org <mailto:dev@openvswitch.org>
>> https://mail.openvswitch.org/mailman/listinfo/ovs-dev <https://mail.openvswitch.org/mailman/listinfo/ovs-dev>
> _______________________________________________
> dev mailing list
> dev@openvswitch.org <mailto:dev@openvswitch.org>
> https://mail.openvswitch.org/mailman/listinfo/ovs-dev <https://mail.openvswitch.org/mailman/listinfo/ovs-dev>
Odintsov Vladislav Nov. 18, 2021, 6:13 a.m. UTC | #8
+ Han

Regards,
Vladislav Odintsov

On 18 Nov 2021, at 09:10, Vladislav Odintsov <odivlad@gmail.com<mailto:odivlad@gmail.com>> wrote:

Great, thanks.

Hi @Han,

I’d like you to look at the patch series too. Would you have time on it?
If yes, could you redirect me on terms please.

Thanks.

Regards,
Vladislav Odintsov

On 18 Nov 2021, at 00:05, Numan Siddique <numans@ovn.org<mailto:numans@ovn.org>> wrote:

On Wed, Nov 17, 2021 at 3:38 PM Vladislav Odintsov <odivlad@gmail.com<mailto:odivlad@gmail.com> <mailto:odivlad@gmail.com>> wrote:

I’ve submitted a patch [1] with my findings.
Also, if no comments for other my patches from this patch series, I can submit a new version.
Should I?

No comments from my side.  Perhaps you can submit another version.

Numan


1: https://patchwork.ozlabs.org/project/ovn/patch/20211117203545.46142-1-odivlad@gmail.com/ <https://patchwork.ozlabs.org/project/ovn/patch/20211117203545.46142-1-odivlad@gmail.com/>

Regards,
Vladislav Odintsov

On 17 Nov 2021, at 20:57, Numan Siddique <numans@ovn.org<mailto:numans@ovn.org> <mailto:numans@ovn.org>> wrote:

On Wed, Nov 17, 2021 at 9:24 AM Vladislav Odintsov <odivlad@gmail.com<mailto:odivlad@gmail.com> <mailto:odivlad@gmail.com> <mailto:odivlad@gmail.com <mailto:odivlad@gmail.com>>> wrote:

Two additions:

1. Regarding documentation for flow in lr_in_defrag section:

It seems to me that documentation for it is written in a wrong section (lr_in_defrag).
Since the flow is installed in lr_in_ip_routing, it should be documented there.
I’ll move it if you don’t mind.

Sure.  Thanks.  I'd suggest having a separate patch for fixing the
documentation.


2. Documentation for other flow changes was added, but I’ve committed it to a wrong patch (#3).
I’ll move documentation update between patches.

Ack.

Thanks
Numan


Regards,
Vladislav Odintsov

On 17 Nov 2021, at 13:51, Vladislav Odintsov <odivlad@gmail.com<mailto:odivlad@gmail.com>> wrote:

Hi Numan,

Thanks for the review.
Sure I will fix this. Should I wait for more comments or that’s all and I can send v9?

Regards,
Vladislav Odintsov

On 17 Nov 2021, at 05:17, Numan Siddique <numans@ovn.org<mailto:numans@ovn.org>> wrote:

On Sat, Nov 13, 2021 at 4:44 AM Vladislav Odintsov <odivlad@gmail.com<mailto:odivlad@gmail.com> <mailto:odivlad@gmail.com>> wrote:

With this patch routes to connected networks have higher
priority than static routes with same ip_prefix.

This brings commonly-used behaviour for routes lookup order:
1: longest prefix match
2: metric

The metric has next lookup order:
1: connected routes
2: static routes

Earlier static and connected routes with same ip_prefix had
the same priority, so it was impossible to predict which one
is used for routing decision.

Each route's prefix length has its own 'slot' in lflow prios.
Now prefix length space is calculated using next information:
to calculate route's priority prefixlen multiplied by 3
+ route origin offset (0 - source-based route; 1 - directly-
connected route; 2 - static route).

Also, enlarge prio for generic records in lr_in_ip_routing stage
by 10000.

Signed-off-by: Vladislav Odintsov <odivlad@gmail.com<mailto:odivlad@gmail.com>>

Hi Vladislav,

Thanks for the patch.  Overall it looks good to me.  I've one comment.
Looks like the documentation updated in ovn-northd.8.xml is not accurate.

This patch modifies the flows in the lr_in_ip_routing stage but this
patch doesn't update the documentation.
Also the patch updates the documentation for the flow in lr_in_defrag
stage, which seems not correct.

Can you please update the documentation accurately ?

Numan

---
northd/northd.c         | 50 ++++++++++++++++++++++++++++-------------
northd/ovn-northd.8.xml | 12 +++++-----
tests/ovn-northd.at<http://ovn-northd.at>     |  8 +++----
3 files changed, 45 insertions(+), 25 deletions(-)

diff --git a/northd/northd.c b/northd/northd.c
index 1e8a3457c..0d513f039 100644
--- a/northd/northd.c
+++ b/northd/northd.c
@@ -305,6 +305,15 @@ enum ovn_stage {
*
*/

+/*
+ * Route offsets implement logic to prioritize traffic for routes with
+ * same ip_prefix values:
+ *  -  connected route overrides static one;
+ *  -  static route overrides connected route. */
+#define ROUTE_PRIO_OFFSET_MULTIPLIER 3
+#define ROUTE_PRIO_OFFSET_STATIC 1
+#define ROUTE_PRIO_OFFSET_CONNECTED 2
+
/* Returns an "enum ovn_stage" built from the arguments. */
static enum ovn_stage
ovn_stage_build(enum ovn_datapath_type dp_type, enum ovn_pipeline pipeline,
@@ -8782,6 +8791,7 @@ struct ecmp_groups_node {
struct in6_addr prefix;
unsigned int plen;
bool is_src_route;
+    const char *origin;
uint16_t route_count;
struct ovs_list route_list; /* Contains ecmp_route_list_node */
};
@@ -8819,6 +8829,7 @@ ecmp_groups_add(struct hmap *ecmp_groups,
eg->prefix = route->prefix;
eg->plen = route->plen;
eg->is_src_route = route->is_src_route;
+    eg->origin = smap_get_def(&route->route->options, "origin", "");
ovs_list_init(&eg->route_list);
ecmp_groups_add_route(eg, route);

@@ -8919,19 +8930,20 @@ build_route_prefix_s(const struct in6_addr *prefix, unsigned int plen)
static void
build_route_match(const struct ovn_port *op_inport, const char *network_s,
              int plen, bool is_src_route, bool is_ipv4, struct ds *match,
-                  uint16_t *priority)
+                  uint16_t *priority, int ofs)
{
const char *dir;
/* The priority here is calculated to implement longest-prefix-match
 * routing. */
if (is_src_route) {
    dir = "src";
-        *priority = plen * 2;
+        ofs = 0;
} else {
    dir = "dst";
-        *priority = (plen * 2) + 1;
}

+    *priority = (plen * ROUTE_PRIO_OFFSET_MULTIPLIER) + ofs;
+
if (op_inport) {
    ds_put_format(match, "inport == %s && ", op_inport->json_key);
}
@@ -9073,7 +9085,7 @@ add_ecmp_symmetric_reply_flows(struct hmap *lflows,
              out_port->lrp_networks.ea_s,
              IN6_IS_ADDR_V4MAPPED(&route->prefix) ? "" : "xx",
              port_ip, out_port->json_key);
-    ovn_lflow_add_with_hint(lflows, od, S_ROUTER_IN_IP_ROUTING, 300,
+    ovn_lflow_add_with_hint(lflows, od, S_ROUTER_IN_IP_ROUTING, 10300,
                       ds_cstr(&match), ds_cstr(&actions),
                       &st_route->header_);

@@ -9103,8 +9115,10 @@ build_ecmp_route_flow(struct hmap *lflows, struct ovn_datapath *od,
struct ds route_match = DS_EMPTY_INITIALIZER;

char *prefix_s = build_route_prefix_s(&eg->prefix, eg->plen);
+    int ofs = !strcmp(eg->origin, ROUTE_ORIGIN_CONNECTED) ?
+        ROUTE_PRIO_OFFSET_CONNECTED: ROUTE_PRIO_OFFSET_STATIC;
build_route_match(NULL, prefix_s, eg->plen, eg->is_src_route, is_ipv4,
-                      &route_match, &priority);
+                      &route_match, &priority, ofs);
free(prefix_s);

struct ds actions = DS_EMPTY_INITIALIZER;
@@ -9180,7 +9194,7 @@ add_route(struct hmap *lflows, struct ovn_datapath *od,
      const struct ovn_port *op, const char *lrp_addr_s,
      const char *network_s, int plen, const char *gateway,
      bool is_src_route, const struct ovsdb_idl_row *stage_hint,
-          bool is_discard_route)
+          bool is_discard_route, int ofs)
{
bool is_ipv4 = strchr(network_s, '.') ? true : false;
struct ds match = DS_EMPTY_INITIALIZER;
@@ -9196,7 +9210,7 @@ add_route(struct hmap *lflows, struct ovn_datapath *od,
    }
}
build_route_match(op_inport, network_s, plen, is_src_route, is_ipv4,
-                      &match, &priority);
+                      &match, &priority, ofs);

struct ds common_actions = DS_EMPTY_INITIALIZER;
struct ds actions = DS_EMPTY_INITIALIZER;
@@ -9256,10 +9270,15 @@ build_static_route_flow(struct hmap *lflows, struct ovn_datapath *od,
    }
}

+    int ofs = !strcmp(smap_get_def(&route->options, "origin", ""),
+                      ROUTE_ORIGIN_CONNECTED) ? ROUTE_PRIO_OFFSET_CONNECTED
+                                              : ROUTE_PRIO_OFFSET_STATIC;
+
char *prefix_s = build_route_prefix_s(&route_->prefix, route_->plen);
add_route(lflows, route_->is_discard_route ? od : out_port->od, out_port,
          lrp_addr_s, prefix_s, route_->plen, route->nexthop,
-              route_->is_src_route, &route->header_, route_->is_discard_route);
+              route_->is_src_route, &route->header_, route_->is_discard_route,
+              ofs);

free(prefix_s);
}
@@ -10672,14 +10691,14 @@ build_ip_routing_flows_for_lrouter_port(
        add_route(lflows, op->od, op, op->lrp_networks.ipv4_addrs[i].addr_s,
                  op->lrp_networks.ipv4_addrs[i].network_s,
                  op->lrp_networks.ipv4_addrs[i].plen, NULL, false,
-                      &op->nbrp->header_, false);
+                      &op->nbrp->header_, false, ROUTE_PRIO_OFFSET_CONNECTED);
    }

    for (int i = 0; i < op->lrp_networks.n_ipv6_addrs; i++) {
        add_route(lflows, op->od, op, op->lrp_networks.ipv6_addrs[i].addr_s,
                  op->lrp_networks.ipv6_addrs[i].network_s,
                  op->lrp_networks.ipv6_addrs[i].plen, NULL, false,
-                      &op->nbrp->header_, false);
+                      &op->nbrp->header_, false, ROUTE_PRIO_OFFSET_CONNECTED);
    }
} else if (lsp_is_router(op->nbsp)) {
    struct ovn_port *peer = ovn_port_get_peer(ports, op);
@@ -10702,7 +10721,8 @@ build_ip_routing_flows_for_lrouter_port(
                          peer->lrp_networks.ipv4_addrs[0].addr_s,
                          laddrs->ipv4_addrs[k].network_s,
                          laddrs->ipv4_addrs[k].plen, NULL, false,
-                              &peer->nbrp->header_, false);
+                              &peer->nbrp->header_, false,
+                              ROUTE_PRIO_OFFSET_CONNECTED);
            }
        }
    }
@@ -10773,7 +10793,7 @@ build_mcast_lookup_flows_for_lrouter(
    /* Drop IPv6 multicast traffic that shouldn't be forwarded,
     * i.e., router solicitation and router advertisement.
     */
-        ovn_lflow_add(lflows, od, S_ROUTER_IN_IP_ROUTING, 550,
+        ovn_lflow_add(lflows, od, S_ROUTER_IN_IP_ROUTING, 10550,
                  "nd_rs || nd_ra", "drop;");
    if (!od->mcast_info.rtr.relay) {
        return;
@@ -10801,7 +10821,7 @@ build_mcast_lookup_flows_for_lrouter(
        }
        ds_put_format(actions, "outport = \"%s\"; ip.ttl--; next;",
                      igmp_group->mcgroup.name);
-            ovn_lflow_add(lflows, od, S_ROUTER_IN_IP_ROUTING, 500,
+            ovn_lflow_add(lflows, od, S_ROUTER_IN_IP_ROUTING, 10500,
                      ds_cstr(match), ds_cstr(actions));
    }

@@ -10809,7 +10829,7 @@ build_mcast_lookup_flows_for_lrouter(
     * ports. Otherwise drop any multicast traffic.
     */
    if (od->mcast_info.rtr.flood_static) {
-            ovn_lflow_add(lflows, od, S_ROUTER_IN_IP_ROUTING, 450,
+            ovn_lflow_add(lflows, od, S_ROUTER_IN_IP_ROUTING, 10450,
                      "ip4.mcast || ip6.mcast",
                      "clone { "
                            "outport = \""MC_STATIC"\"; "
@@ -10817,7 +10837,7 @@ build_mcast_lookup_flows_for_lrouter(
                            "next; "
                      "};");
    } else {
-            ovn_lflow_add(lflows, od, S_ROUTER_IN_IP_ROUTING, 450,
+            ovn_lflow_add(lflows, od, S_ROUTER_IN_IP_ROUTING, 10450,
                      "ip4.mcast || ip6.mcast", "drop;");
    }
}
diff --git a/northd/ovn-northd.8.xml b/northd/ovn-northd.8.xml
index fb67395e3..4f3a9d5e3 100644
--- a/northd/ovn-northd.8.xml
+++ b/northd/ovn-northd.8.xml
@@ -2945,12 +2945,12 @@ icmp6 {

<p>
  If ECMP routes with symmetric reply are configured in the
-      <code>OVN_Northbound</code> database for a gateway router, a priority-300
-      flow is added for each router port on which symmetric replies are
-      configured. The matching logic for these ports essentially reverses the
-      configured logic of the ECMP route. So for instance, a route with a
-      destination routing policy will instead match if the source IP address
-      matches the static route's prefix. The flow uses the action
+      <code>OVN_Northbound</code> database for a gateway router, a
+      priority-10300 flow is added for each router port on which symmetric
+      replies are configured. The matching logic for these ports essentially
+      reverses the configured logic of the ECMP route. So for instance, a route
+      with a destination routing policy will instead match if the source IP
+      address matches the static route's prefix. The flow uses the action
  <code>ct_next</code> to send IP packets to the connection tracker for
  packet de-fragmentation and tracking before sending it to the next table.
</p>
diff --git a/tests/ovn-northd.at<http://ovn-northd.at> b/tests/ovn-northd.at<http://ovn-northd.at>
index 85b47a18f..3c1a97f73 100644
--- a/tests/ovn-northd.at<http://ovn-northd.at>
+++ b/tests/ovn-northd.at<http://ovn-northd.at>
@@ -5430,7 +5430,7 @@ check ovn-nbctl --wait=sb --ecmp-symmetric-reply lr-route-add lr0 1.0.0.1 192.16

ovn-sbctl dump-flows lr0 > lr0flows
AT_CHECK([grep -e "lr_in_ip_routing.*select" lr0flows | sed 's/table=../table=??/' | sort], [0], [dnl
-  table=??(lr_in_ip_routing   ), priority=65   , match=(ip4.dst == 1.0.0.1/32), action=(ip.ttl--; flags.loopback = 1; reg8[[0..15]] = 1; reg8[[16..31]] = select(1, 2);)
+  table=??(lr_in_ip_routing   ), priority=97   , match=(ip4.dst == 1.0.0.1/32), action=(ip.ttl--; flags.loopback = 1; reg8[[0..15]] = 1; reg8[[16..31]] = select(1, 2);)
])
AT_CHECK([grep -e "lr_in_ip_routing_ecmp" lr0flows | sed 's/192\.168\.0\..0/192.168.0.??/' | sed 's/table=../table=??/' | sort], [0], [dnl
table=??(lr_in_ip_routing_ecmp), priority=100  , match=(reg8[[0..15]] == 1 && reg8[[16..31]] == 1), action=(reg0 = 192.168.0.??; reg1 = 192.168.0.1; eth.src = 00:00:20:20:12:13; outport = "lr0-public"; next;)
@@ -5443,7 +5443,7 @@ check ovn-nbctl --wait=sb --ecmp-symmetric-reply lr-route-add lr0 1.0.0.1 192.16

ovn-sbctl dump-flows lr0 > lr0flows
AT_CHECK([grep -e "lr_in_ip_routing.*select" lr0flows | sed 's/table=../table=??/' | sort], [0], [dnl
-  table=??(lr_in_ip_routing   ), priority=65   , match=(ip4.dst == 1.0.0.1/32), action=(ip.ttl--; flags.loopback = 1; reg8[[0..15]] = 1; reg8[[16..31]] = select(1, 2);)
+  table=??(lr_in_ip_routing   ), priority=97   , match=(ip4.dst == 1.0.0.1/32), action=(ip.ttl--; flags.loopback = 1; reg8[[0..15]] = 1; reg8[[16..31]] = select(1, 2);)
])
AT_CHECK([grep -e "lr_in_ip_routing_ecmp" lr0flows | sed 's/192\.168\.0\..0/192.168.0.??/' | sed 's/table=../table=??/' | sort], [0], [dnl
table=??(lr_in_ip_routing_ecmp), priority=100  , match=(reg8[[0..15]] == 1 && reg8[[16..31]] == 1), action=(reg0 = 192.168.0.??; reg1 = 192.168.0.1; eth.src = 00:00:20:20:12:13; outport = "lr0-public"; next;)
@@ -5458,14 +5458,14 @@ check ovn-nbctl --wait=sb lr-route-add lr0 1.0.0.0/24 192.168.0.10
ovn-sbctl dump-flows lr0 > lr0flows

AT_CHECK([grep -e "lr_in_ip_routing.*192.168.0.10" lr0flows | sed 's/table=../table=??/' | sort], [0], [dnl
-  table=??(lr_in_ip_routing   ), priority=49   , match=(ip4.dst == 1.0.0.0/24), action=(ip.ttl--; reg8[[0..15]] = 0; reg0 = 192.168.0.10; reg1 = 192.168.0.1; eth.src = 00:00:20:20:12:13; outport = "lr0-public"; flags.loopback = 1; next;)
+  table=??(lr_in_ip_routing   ), priority=73   , match=(ip4.dst == 1.0.0.0/24), action=(ip.ttl--; reg8[[0..15]] = 0; reg0 = 192.168.0.10; reg1 = 192.168.0.1; eth.src = 00:00:20:20:12:13; outport = "lr0-public"; flags.loopback = 1; next;)
])

check ovn-nbctl --wait=sb lr-route-add lr0 2.0.0.0/24 lr0-public

ovn-sbctl dump-flows lr0 > lr0flows
AT_CHECK([grep -e "lr_in_ip_routing.*2.0.0.0" lr0flows | sed 's/table=../table=??/' | sort], [0], [dnl
-  table=??(lr_in_ip_routing   ), priority=49   , match=(ip4.dst == 2.0.0.0/24), action=(ip.ttl--; reg8[[0..15]] = 0; reg0 = ip4.dst; reg1 = 192.168.0.1; eth.src = 00:00:20:20:12:13; outport = "lr0-public"; flags.loopback = 1; next;)
+  table=??(lr_in_ip_routing   ), priority=73   , match=(ip4.dst == 2.0.0.0/24), action=(ip.ttl--; reg8[[0..15]] = 0; reg0 = ip4.dst; reg1 = 192.168.0.1; eth.src = 00:00:20:20:12:13; outport = "lr0-public"; flags.loopback = 1; next;)
])

AT_CLEANUP
--
2.30.0
Han Zhou Nov. 18, 2021, 7:26 a.m. UTC | #9
On Wed, Nov 17, 2021 at 10:10 PM Vladislav Odintsov <odivlad@gmail.com>
wrote:
>
> Great, thanks.
>
> Hi @Han,
>
> I’d like you to look at the patch series too. Would you have time on it?
> If yes, could you redirect me on terms please.

Hi Vladislav,

Thanks for adding me. I am sorry that I don't think I will have enough time
for a detailed review for this series until Nov 29. Not sure if you can
wait that long, but I don't think my review is mandatory if Numan is
reviewing all the patches in detail.

I have a quick comment though, regarding the priority offset. It is
mentioned in the commit message:
> >>>>>>> Each route's prefix length has its own 'slot' in lflow prios.
> >>>>>>> Now prefix length space is calculated using next information:
> >>>>>>> to calculate route's priority prefixlen multiplied by 3
> >>>>>>> + route origin offset (0 - source-based route; 1 - directly-
> >>>>>>> connected route; 2 - static route).

But in the code, 2 is for connected, and 1 is for static:
> >>>>>>> +#define ROUTE_PRIO_OFFSET_MULTIPLIER 3
> >>>>>>> +#define ROUTE_PRIO_OFFSET_STATIC 1
> >>>>>>> +#define ROUTE_PRIO_OFFSET_CONNECTED 2

I wonder which one is your intent? I'd let the static route have higher
priority, because otherwise why would the user add the static route at all?
But this is more of a question than a suggestion. Is there any *standard*
behavior or similar thing that we can refer from e.g. AWS?

Thanks,
Han

>
> Thanks.
>
> Regards,
> Vladislav Odintsov
>
> > On 18 Nov 2021, at 00:05, Numan Siddique <numans@ovn.org> wrote:
> >
> > On Wed, Nov 17, 2021 at 3:38 PM Vladislav Odintsov <odivlad@gmail.com
<mailto:odivlad@gmail.com>> wrote:
> >>
> >> I’ve submitted a patch [1] with my findings.
> >> Also, if no comments for other my patches from this patch series, I
can submit a new version.
> >> Should I?
> >
> > No comments from my side.  Perhaps you can submit another version.
> >
> > Numan
> >
> >>
> >> 1:
https://patchwork.ozlabs.org/project/ovn/patch/20211117203545.46142-1-odivlad@gmail.com/
<
https://patchwork.ozlabs.org/project/ovn/patch/20211117203545.46142-1-odivlad@gmail.com/
>
> >>
> >> Regards,
> >> Vladislav Odintsov
> >>
> >>> On 17 Nov 2021, at 20:57, Numan Siddique <numans@ovn.org <mailto:
numans@ovn.org>> wrote:
> >>>
> >>> On Wed, Nov 17, 2021 at 9:24 AM Vladislav Odintsov <odivlad@gmail.com
<mailto:odivlad@gmail.com> <mailto:odivlad@gmail.com <mailto:
odivlad@gmail.com>>> wrote:
> >>>>
> >>>> Two additions:
> >>>>
> >>>> 1. Regarding documentation for flow in lr_in_defrag section:
> >>>>
> >>>> It seems to me that documentation for it is written in a wrong
section (lr_in_defrag).
> >>>> Since the flow is installed in lr_in_ip_routing, it should be
documented there.
> >>>> I’ll move it if you don’t mind.
> >>>
> >>> Sure.  Thanks.  I'd suggest having a separate patch for fixing the
> >>> documentation.
> >>>>
> >>>>
> >>>> 2. Documentation for other flow changes was added, but I’ve
committed it to a wrong patch (#3).
> >>>> I’ll move documentation update between patches.
> >>>
> >>> Ack.
> >>>
> >>> Thanks
> >>> Numan
> >>>
> >>>>
> >>>> Regards,
> >>>> Vladislav Odintsov
> >>>>
> >>>>> On 17 Nov 2021, at 13:51, Vladislav Odintsov <odivlad@gmail.com>
wrote:
> >>>>>
> >>>>> Hi Numan,
> >>>>>
> >>>>> Thanks for the review.
> >>>>> Sure I will fix this. Should I wait for more comments or that’s all
and I can send v9?
> >>>>>
> >>>>> Regards,
> >>>>> Vladislav Odintsov
> >>>>>
> >>>>>> On 17 Nov 2021, at 05:17, Numan Siddique <numans@ovn.org> wrote:
> >>>>>>
> >>>>>> On Sat, Nov 13, 2021 at 4:44 AM Vladislav Odintsov <
odivlad@gmail.com <mailto:odivlad@gmail.com>> wrote:
> >>>>>>>
> >>>>>>> With this patch routes to connected networks have higher
> >>>>>>> priority than static routes with same ip_prefix.
> >>>>>>>
> >>>>>>> This brings commonly-used behaviour for routes lookup order:
> >>>>>>> 1: longest prefix match
> >>>>>>> 2: metric
> >>>>>>>
> >>>>>>> The metric has next lookup order:
> >>>>>>> 1: connected routes
> >>>>>>> 2: static routes
> >>>>>>>
> >>>>>>> Earlier static and connected routes with same ip_prefix had
> >>>>>>> the same priority, so it was impossible to predict which one
> >>>>>>> is used for routing decision.
> >>>>>>>
> >>>>>>> Each route's prefix length has its own 'slot' in lflow prios.
> >>>>>>> Now prefix length space is calculated using next information:
> >>>>>>> to calculate route's priority prefixlen multiplied by 3
> >>>>>>> + route origin offset (0 - source-based route; 1 - directly-
> >>>>>>> connected route; 2 - static route).
> >>>>>>>
> >>>>>>> Also, enlarge prio for generic records in lr_in_ip_routing stage
> >>>>>>> by 10000.
> >>>>>>>
> >>>>>>> Signed-off-by: Vladislav Odintsov <odivlad@gmail.com>
> >>>>>>
> >>>>>> Hi Vladislav,
> >>>>>>
> >>>>>> Thanks for the patch.  Overall it looks good to me.  I've one
comment.
> >>>>>> Looks like the documentation updated in ovn-northd.8.xml is not
accurate.
> >>>>>>
> >>>>>> This patch modifies the flows in the lr_in_ip_routing stage but
this
> >>>>>> patch doesn't update the documentation.
> >>>>>> Also the patch updates the documentation for the flow in
lr_in_defrag
> >>>>>> stage, which seems not correct.
> >>>>>>
> >>>>>> Can you please update the documentation accurately ?
> >>>>>>
> >>>>>> Numan
> >>>>>>
> >>>>>>> ---
> >>>>>>> northd/northd.c         | 50
++++++++++++++++++++++++++++-------------
> >>>>>>> northd/ovn-northd.8.xml | 12 +++++-----
> >>>>>>> tests/ovn-northd.at     |  8 +++----
> >>>>>>> 3 files changed, 45 insertions(+), 25 deletions(-)
> >>>>>>>
> >>>>>>> diff --git a/northd/northd.c b/northd/northd.c
> >>>>>>> index 1e8a3457c..0d513f039 100644
> >>>>>>> --- a/northd/northd.c
> >>>>>>> +++ b/northd/northd.c
> >>>>>>> @@ -305,6 +305,15 @@ enum ovn_stage {
> >>>>>>> *
> >>>>>>> */
> >>>>>>>
> >>>>>>> +/*
> >>>>>>> + * Route offsets implement logic to prioritize traffic for
routes with
> >>>>>>> + * same ip_prefix values:
> >>>>>>> + *  -  connected route overrides static one;
> >>>>>>> + *  -  static route overrides connected route. */
> >>>>>>> +#define ROUTE_PRIO_OFFSET_MULTIPLIER 3
> >>>>>>> +#define ROUTE_PRIO_OFFSET_STATIC 1
> >>>>>>> +#define ROUTE_PRIO_OFFSET_CONNECTED 2
> >>>>>>> +
> >>>>>>> /* Returns an "enum ovn_stage" built from the arguments. */
> >>>>>>> static enum ovn_stage
> >>>>>>> ovn_stage_build(enum ovn_datapath_type dp_type, enum ovn_pipeline
pipeline,
> >>>>>>> @@ -8782,6 +8791,7 @@ struct ecmp_groups_node {
> >>>>>>>  struct in6_addr prefix;
> >>>>>>>  unsigned int plen;
> >>>>>>>  bool is_src_route;
> >>>>>>> +    const char *origin;
> >>>>>>>  uint16_t route_count;
> >>>>>>>  struct ovs_list route_list; /* Contains ecmp_route_list_node */
> >>>>>>> };
> >>>>>>> @@ -8819,6 +8829,7 @@ ecmp_groups_add(struct hmap *ecmp_groups,
> >>>>>>>  eg->prefix = route->prefix;
> >>>>>>>  eg->plen = route->plen;
> >>>>>>>  eg->is_src_route = route->is_src_route;
> >>>>>>> +    eg->origin = smap_get_def(&route->route->options, "origin",
"");
> >>>>>>>  ovs_list_init(&eg->route_list);
> >>>>>>>  ecmp_groups_add_route(eg, route);
> >>>>>>>
> >>>>>>> @@ -8919,19 +8930,20 @@ build_route_prefix_s(const struct
in6_addr *prefix, unsigned int plen)
> >>>>>>> static void
> >>>>>>> build_route_match(const struct ovn_port *op_inport, const char
*network_s,
> >>>>>>>                int plen, bool is_src_route, bool is_ipv4, struct
ds *match,
> >>>>>>> -                  uint16_t *priority)
> >>>>>>> +                  uint16_t *priority, int ofs)
> >>>>>>> {
> >>>>>>>  const char *dir;
> >>>>>>>  /* The priority here is calculated to implement
longest-prefix-match
> >>>>>>>   * routing. */
> >>>>>>>  if (is_src_route) {
> >>>>>>>      dir = "src";
> >>>>>>> -        *priority = plen * 2;
> >>>>>>> +        ofs = 0;
> >>>>>>>  } else {
> >>>>>>>      dir = "dst";
> >>>>>>> -        *priority = (plen * 2) + 1;
> >>>>>>>  }
> >>>>>>>
> >>>>>>> +    *priority = (plen * ROUTE_PRIO_OFFSET_MULTIPLIER) + ofs;
> >>>>>>> +
> >>>>>>>  if (op_inport) {
> >>>>>>>      ds_put_format(match, "inport == %s && ",
op_inport->json_key);
> >>>>>>>  }
> >>>>>>> @@ -9073,7 +9085,7 @@ add_ecmp_symmetric_reply_flows(struct hmap
*lflows,
> >>>>>>>                out_port->lrp_networks.ea_s,
> >>>>>>>                IN6_IS_ADDR_V4MAPPED(&route->prefix) ? "" : "xx",
> >>>>>>>                port_ip, out_port->json_key);
> >>>>>>> -    ovn_lflow_add_with_hint(lflows, od, S_ROUTER_IN_IP_ROUTING,
300,
> >>>>>>> +    ovn_lflow_add_with_hint(lflows, od, S_ROUTER_IN_IP_ROUTING,
10300,
> >>>>>>>                         ds_cstr(&match), ds_cstr(&actions),
> >>>>>>>                         &st_route->header_);
> >>>>>>>
> >>>>>>> @@ -9103,8 +9115,10 @@ build_ecmp_route_flow(struct hmap *lflows,
struct ovn_datapath *od,
> >>>>>>>  struct ds route_match = DS_EMPTY_INITIALIZER;
> >>>>>>>
> >>>>>>>  char *prefix_s = build_route_prefix_s(&eg->prefix, eg->plen);
> >>>>>>> +    int ofs = !strcmp(eg->origin, ROUTE_ORIGIN_CONNECTED) ?
> >>>>>>> +        ROUTE_PRIO_OFFSET_CONNECTED: ROUTE_PRIO_OFFSET_STATIC;
> >>>>>>>  build_route_match(NULL, prefix_s, eg->plen, eg->is_src_route,
is_ipv4,
> >>>>>>> -                      &route_match, &priority);
> >>>>>>> +                      &route_match, &priority, ofs);
> >>>>>>>  free(prefix_s);
> >>>>>>>
> >>>>>>>  struct ds actions = DS_EMPTY_INITIALIZER;
> >>>>>>> @@ -9180,7 +9194,7 @@ add_route(struct hmap *lflows, struct
ovn_datapath *od,
> >>>>>>>        const struct ovn_port *op, const char *lrp_addr_s,
> >>>>>>>        const char *network_s, int plen, const char *gateway,
> >>>>>>>        bool is_src_route, const struct ovsdb_idl_row *stage_hint,
> >>>>>>> -          bool is_discard_route)
> >>>>>>> +          bool is_discard_route, int ofs)
> >>>>>>> {
> >>>>>>>  bool is_ipv4 = strchr(network_s, '.') ? true : false;
> >>>>>>>  struct ds match = DS_EMPTY_INITIALIZER;
> >>>>>>> @@ -9196,7 +9210,7 @@ add_route(struct hmap *lflows, struct
ovn_datapath *od,
> >>>>>>>      }
> >>>>>>>  }
> >>>>>>>  build_route_match(op_inport, network_s, plen, is_src_route,
is_ipv4,
> >>>>>>> -                      &match, &priority);
> >>>>>>> +                      &match, &priority, ofs);
> >>>>>>>
> >>>>>>>  struct ds common_actions = DS_EMPTY_INITIALIZER;
> >>>>>>>  struct ds actions = DS_EMPTY_INITIALIZER;
> >>>>>>> @@ -9256,10 +9270,15 @@ build_static_route_flow(struct hmap
*lflows, struct ovn_datapath *od,
> >>>>>>>      }
> >>>>>>>  }
> >>>>>>>
> >>>>>>> +    int ofs = !strcmp(smap_get_def(&route->options, "origin",
""),
> >>>>>>> +                      ROUTE_ORIGIN_CONNECTED) ?
ROUTE_PRIO_OFFSET_CONNECTED
> >>>>>>> +                                              :
ROUTE_PRIO_OFFSET_STATIC;
> >>>>>>> +
> >>>>>>>  char *prefix_s = build_route_prefix_s(&route_->prefix,
route_->plen);
> >>>>>>>  add_route(lflows, route_->is_discard_route ? od : out_port->od,
out_port,
> >>>>>>>            lrp_addr_s, prefix_s, route_->plen, route->nexthop,
> >>>>>>> -              route_->is_src_route, &route->header_,
route_->is_discard_route);
> >>>>>>> +              route_->is_src_route, &route->header_,
route_->is_discard_route,
> >>>>>>> +              ofs);
> >>>>>>>
> >>>>>>>  free(prefix_s);
> >>>>>>> }
> >>>>>>> @@ -10672,14 +10691,14 @@ build_ip_routing_flows_for_lrouter_port(
> >>>>>>>          add_route(lflows, op->od, op,
op->lrp_networks.ipv4_addrs[i].addr_s,
> >>>>>>>                    op->lrp_networks.ipv4_addrs[i].network_s,
> >>>>>>>                    op->lrp_networks.ipv4_addrs[i].plen, NULL,
false,
> >>>>>>> -                      &op->nbrp->header_, false);
> >>>>>>> +                      &op->nbrp->header_, false,
ROUTE_PRIO_OFFSET_CONNECTED);
> >>>>>>>      }
> >>>>>>>
> >>>>>>>      for (int i = 0; i < op->lrp_networks.n_ipv6_addrs; i++) {
> >>>>>>>          add_route(lflows, op->od, op,
op->lrp_networks.ipv6_addrs[i].addr_s,
> >>>>>>>                    op->lrp_networks.ipv6_addrs[i].network_s,
> >>>>>>>                    op->lrp_networks.ipv6_addrs[i].plen, NULL,
false,
> >>>>>>> -                      &op->nbrp->header_, false);
> >>>>>>> +                      &op->nbrp->header_, false,
ROUTE_PRIO_OFFSET_CONNECTED);
> >>>>>>>      }
> >>>>>>>  } else if (lsp_is_router(op->nbsp)) {
> >>>>>>>      struct ovn_port *peer = ovn_port_get_peer(ports, op);
> >>>>>>> @@ -10702,7 +10721,8 @@ build_ip_routing_flows_for_lrouter_port(
> >>>>>>>
 peer->lrp_networks.ipv4_addrs[0].addr_s,
> >>>>>>>                            laddrs->ipv4_addrs[k].network_s,
> >>>>>>>                            laddrs->ipv4_addrs[k].plen, NULL,
false,
> >>>>>>> -                              &peer->nbrp->header_, false);
> >>>>>>> +                              &peer->nbrp->header_, false,
> >>>>>>> +                              ROUTE_PRIO_OFFSET_CONNECTED);
> >>>>>>>              }
> >>>>>>>          }
> >>>>>>>      }
> >>>>>>> @@ -10773,7 +10793,7 @@ build_mcast_lookup_flows_for_lrouter(
> >>>>>>>      /* Drop IPv6 multicast traffic that shouldn't be forwarded,
> >>>>>>>       * i.e., router solicitation and router advertisement.
> >>>>>>>       */
> >>>>>>> -        ovn_lflow_add(lflows, od, S_ROUTER_IN_IP_ROUTING, 550,
> >>>>>>> +        ovn_lflow_add(lflows, od, S_ROUTER_IN_IP_ROUTING, 10550,
> >>>>>>>                    "nd_rs || nd_ra", "drop;");
> >>>>>>>      if (!od->mcast_info.rtr.relay) {
> >>>>>>>          return;
> >>>>>>> @@ -10801,7 +10821,7 @@ build_mcast_lookup_flows_for_lrouter(
> >>>>>>>          }
> >>>>>>>          ds_put_format(actions, "outport = \"%s\"; ip.ttl--;
next;",
> >>>>>>>                        igmp_group->mcgroup.name);
> >>>>>>> -            ovn_lflow_add(lflows, od, S_ROUTER_IN_IP_ROUTING,
500,
> >>>>>>> +            ovn_lflow_add(lflows, od, S_ROUTER_IN_IP_ROUTING,
10500,
> >>>>>>>                        ds_cstr(match), ds_cstr(actions));
> >>>>>>>      }
> >>>>>>>
> >>>>>>> @@ -10809,7 +10829,7 @@ build_mcast_lookup_flows_for_lrouter(
> >>>>>>>       * ports. Otherwise drop any multicast traffic.
> >>>>>>>       */
> >>>>>>>      if (od->mcast_info.rtr.flood_static) {
> >>>>>>> -            ovn_lflow_add(lflows, od, S_ROUTER_IN_IP_ROUTING,
450,
> >>>>>>> +            ovn_lflow_add(lflows, od, S_ROUTER_IN_IP_ROUTING,
10450,
> >>>>>>>                        "ip4.mcast || ip6.mcast",
> >>>>>>>                        "clone { "
> >>>>>>>                              "outport = \""MC_STATIC"\"; "
> >>>>>>> @@ -10817,7 +10837,7 @@ build_mcast_lookup_flows_for_lrouter(
> >>>>>>>                              "next; "
> >>>>>>>                        "};");
> >>>>>>>      } else {
> >>>>>>> -            ovn_lflow_add(lflows, od, S_ROUTER_IN_IP_ROUTING,
450,
> >>>>>>> +            ovn_lflow_add(lflows, od, S_ROUTER_IN_IP_ROUTING,
10450,
> >>>>>>>                        "ip4.mcast || ip6.mcast", "drop;");
> >>>>>>>      }
> >>>>>>>  }
> >>>>>>> diff --git a/northd/ovn-northd.8.xml b/northd/ovn-northd.8.xml
> >>>>>>> index fb67395e3..4f3a9d5e3 100644
> >>>>>>> --- a/northd/ovn-northd.8.xml
> >>>>>>> +++ b/northd/ovn-northd.8.xml
> >>>>>>> @@ -2945,12 +2945,12 @@ icmp6 {
> >>>>>>>
> >>>>>>>  <p>
> >>>>>>>    If ECMP routes with symmetric reply are configured in the
> >>>>>>> -      <code>OVN_Northbound</code> database for a gateway router,
a priority-300
> >>>>>>> -      flow is added for each router port on which symmetric
replies are
> >>>>>>> -      configured. The matching logic for these ports essentially
reverses the
> >>>>>>> -      configured logic of the ECMP route. So for instance, a
route with a
> >>>>>>> -      destination routing policy will instead match if the
source IP address
> >>>>>>> -      matches the static route's prefix. The flow uses the action
> >>>>>>> +      <code>OVN_Northbound</code> database for a gateway router,
a
> >>>>>>> +      priority-10300 flow is added for each router port on which
symmetric
> >>>>>>> +      replies are configured. The matching logic for these ports
essentially
> >>>>>>> +      reverses the configured logic of the ECMP route. So for
instance, a route
> >>>>>>> +      with a destination routing policy will instead match if
the source IP
> >>>>>>> +      address matches the static route's prefix. The flow uses
the action
> >>>>>>>    <code>ct_next</code> to send IP packets to the connection
tracker for
> >>>>>>>    packet de-fragmentation and tracking before sending it to the
next table.
> >>>>>>>  </p>
> >>>>>>> diff --git a/tests/ovn-northd.at b/tests/ovn-northd.at
> >>>>>>> index 85b47a18f..3c1a97f73 100644
> >>>>>>> --- a/tests/ovn-northd.at
> >>>>>>> +++ b/tests/ovn-northd.at
> >>>>>>> @@ -5430,7 +5430,7 @@ check ovn-nbctl --wait=sb
--ecmp-symmetric-reply lr-route-add lr0 1.0.0.1 192.16
> >>>>>>>
> >>>>>>> ovn-sbctl dump-flows lr0 > lr0flows
> >>>>>>> AT_CHECK([grep -e "lr_in_ip_routing.*select" lr0flows | sed
's/table=../table=??/' | sort], [0], [dnl
> >>>>>>> -  table=??(lr_in_ip_routing   ), priority=65   , match=(ip4.dst
== 1.0.0.1/32), action=(ip.ttl--; flags.loopback = 1; reg8[[0..15]] = 1;
reg8[[16..31]] = select(1, 2);)
> >>>>>>> +  table=??(lr_in_ip_routing   ), priority=97   , match=(ip4.dst
== 1.0.0.1/32), action=(ip.ttl--; flags.loopback = 1; reg8[[0..15]] = 1;
reg8[[16..31]] = select(1, 2);)
> >>>>>>> ])
> >>>>>>> AT_CHECK([grep -e "lr_in_ip_routing_ecmp" lr0flows | sed
's/192\.168\.0\..0/192.168.0.??/' | sed 's/table=../table=??/' | sort],
[0], [dnl
> >>>>>>> table=??(lr_in_ip_routing_ecmp), priority=100  ,
match=(reg8[[0..15]] == 1 && reg8[[16..31]] == 1), action=(reg0 =
192.168.0.??; reg1 = 192.168.0.1; eth.src = 00:00:20:20:12:13; outport =
"lr0-public"; next;)
> >>>>>>> @@ -5443,7 +5443,7 @@ check ovn-nbctl --wait=sb
--ecmp-symmetric-reply lr-route-add lr0 1.0.0.1 192.16
> >>>>>>>
> >>>>>>> ovn-sbctl dump-flows lr0 > lr0flows
> >>>>>>> AT_CHECK([grep -e "lr_in_ip_routing.*select" lr0flows | sed
's/table=../table=??/' | sort], [0], [dnl
> >>>>>>> -  table=??(lr_in_ip_routing   ), priority=65   , match=(ip4.dst
== 1.0.0.1/32), action=(ip.ttl--; flags.loopback = 1; reg8[[0..15]] = 1;
reg8[[16..31]] = select(1, 2);)
> >>>>>>> +  table=??(lr_in_ip_routing   ), priority=97   , match=(ip4.dst
== 1.0.0.1/32), action=(ip.ttl--; flags.loopback = 1; reg8[[0..15]] = 1;
reg8[[16..31]] = select(1, 2);)
> >>>>>>> ])
> >>>>>>> AT_CHECK([grep -e "lr_in_ip_routing_ecmp" lr0flows | sed
's/192\.168\.0\..0/192.168.0.??/' | sed 's/table=../table=??/' | sort],
[0], [dnl
> >>>>>>> table=??(lr_in_ip_routing_ecmp), priority=100  ,
match=(reg8[[0..15]] == 1 && reg8[[16..31]] == 1), action=(reg0 =
192.168.0.??; reg1 = 192.168.0.1; eth.src = 00:00:20:20:12:13; outport =
"lr0-public"; next;)
> >>>>>>> @@ -5458,14 +5458,14 @@ check ovn-nbctl --wait=sb lr-route-add
lr0 1.0.0.0/24 192.168.0.10
> >>>>>>> ovn-sbctl dump-flows lr0 > lr0flows
> >>>>>>>
> >>>>>>> AT_CHECK([grep -e "lr_in_ip_routing.*192.168.0.10" lr0flows | sed
's/table=../table=??/' | sort], [0], [dnl
> >>>>>>> -  table=??(lr_in_ip_routing   ), priority=49   , match=(ip4.dst
== 1.0.0.0/24), action=(ip.ttl--; reg8[[0..15]] = 0; reg0 = 192.168.0.10;
reg1 = 192.168.0.1; eth.src = 00:00:20:20:12:13; outport = "lr0-public";
flags.loopback = 1; next;)
> >>>>>>> +  table=??(lr_in_ip_routing   ), priority=73   , match=(ip4.dst
== 1.0.0.0/24), action=(ip.ttl--; reg8[[0..15]] = 0; reg0 = 192.168.0.10;
reg1 = 192.168.0.1; eth.src = 00:00:20:20:12:13; outport = "lr0-public";
flags.loopback = 1; next;)
> >>>>>>> ])
> >>>>>>>
> >>>>>>> check ovn-nbctl --wait=sb lr-route-add lr0 2.0.0.0/24 lr0-public
> >>>>>>>
> >>>>>>> ovn-sbctl dump-flows lr0 > lr0flows
> >>>>>>> AT_CHECK([grep -e "lr_in_ip_routing.*2.0.0.0" lr0flows | sed
's/table=../table=??/' | sort], [0], [dnl
> >>>>>>> -  table=??(lr_in_ip_routing   ), priority=49   , match=(ip4.dst
== 2.0.0.0/24), action=(ip.ttl--; reg8[[0..15]] = 0; reg0 = ip4.dst; reg1 =
192.168.0.1; eth.src = 00:00:20:20:12:13; outport = "lr0-public";
flags.loopback = 1; next;)
> >>>>>>> +  table=??(lr_in_ip_routing   ), priority=73   , match=(ip4.dst
== 2.0.0.0/24), action=(ip.ttl--; reg8[[0..15]] = 0; reg0 = ip4.dst; reg1 =
192.168.0.1; eth.src = 00:00:20:20:12:13; outport = "lr0-public";
flags.loopback = 1; next;)
> >>>>>>> ])
> >>>>>>>
> >>>>>>> AT_CLEANUP
> >>>>>>> --
> >>>>>>> 2.30.0
> >>>>>>>
> >>>>>>> _______________________________________________
> >>>>>>> dev mailing list
> >>>>>>> dev@openvswitch.org <mailto:dev@openvswitch.org>
> >>>>>>> https://mail.openvswitch.org/mailman/listinfo/ovs-dev <
https://mail.openvswitch.org/mailman/listinfo/ovs-dev>
> >>>>> _______________________________________________
> >>>>> dev mailing list
> >>>>> dev@openvswitch.org
> >>>>> https://mail.openvswitch.org/mailman/listinfo/ovs-dev
> >>>>
> >>>> _______________________________________________
> >>>> dev mailing list
> >>>> dev@openvswitch.org <mailto:dev@openvswitch.org> <mailto:
dev@openvswitch.org <mailto:dev@openvswitch.org>>
> >>>> https://mail.openvswitch.org/mailman/listinfo/ovs-dev <
https://mail.openvswitch.org/mailman/listinfo/ovs-dev> <
https://mail.openvswitch.org/mailman/listinfo/ovs-dev <
https://mail.openvswitch.org/mailman/listinfo/ovs-dev>>
> >>> _______________________________________________
> >>> dev mailing list
> >>> dev@openvswitch.org <mailto:dev@openvswitch.org> <mailto:
dev@openvswitch.org <mailto:dev@openvswitch.org>>
> >>> https://mail.openvswitch.org/mailman/listinfo/ovs-dev <
https://mail.openvswitch.org/mailman/listinfo/ovs-dev> <
https://mail.openvswitch.org/mailman/listinfo/ovs-dev <
https://mail.openvswitch.org/mailman/listinfo/ovs-dev>>
> >> _______________________________________________
> >> dev mailing list
> >> dev@openvswitch.org <mailto:dev@openvswitch.org>
> >> https://mail.openvswitch.org/mailman/listinfo/ovs-dev <
https://mail.openvswitch.org/mailman/listinfo/ovs-dev>
> > _______________________________________________
> > dev mailing list
> > dev@openvswitch.org <mailto:dev@openvswitch.org>
> > https://mail.openvswitch.org/mailman/listinfo/ovs-dev <
https://mail.openvswitch.org/mailman/listinfo/ovs-dev>
> _______________________________________________
> dev mailing list
> dev@openvswitch.org
> https://mail.openvswitch.org/mailman/listinfo/ovs-dev
Vladislav Odintsov Nov. 18, 2021, 7:53 a.m. UTC | #10
Thanks, Han.
Please see inline.

Regards,
Vladislav Odintsov

> On 18 Nov 2021, at 10:26, Han Zhou <zhouhan@gmail.com> wrote:
> 
> On Wed, Nov 17, 2021 at 10:10 PM Vladislav Odintsov <odivlad@gmail.com <mailto:odivlad@gmail.com>>
> wrote:
>> 
>> Great, thanks.
>> 
>> Hi @Han,
>> 
>> I’d like you to look at the patch series too. Would you have time on it?
>> If yes, could you redirect me on terms please.
> 
> Hi Vladislav,
> 
> Thanks for adding me. I am sorry that I don't think I will have enough time
> for a detailed review for this series until Nov 29. Not sure if you can
> wait that long, but I don't think my review is mandatory if Numan is
> reviewing all the patches in detail.
> 

It’s okay from my side to wait for Dec 2-3.

> I have a quick comment though, regarding the priority offset. It is
> mentioned in the commit message:
>>>>>>>>> Each route's prefix length has its own 'slot' in lflow prios.
>>>>>>>>> Now prefix length space is calculated using next information:
>>>>>>>>> to calculate route's priority prefixlen multiplied by 3
>>>>>>>>> + route origin offset (0 - source-based route; 1 - directly-
>>>>>>>>> connected route; 2 - static route).

> But in the code, 2 is for connected, and 1 is for static:
>>>>>>>>> +#define ROUTE_PRIO_OFFSET_MULTIPLIER 3
>>>>>>>>> +#define ROUTE_PRIO_OFFSET_STATIC 1
>>>>>>>>> +#define ROUTE_PRIO_OFFSET_CONNECTED 2
> 
> I wonder which one is your intent? I'd let the static route have higher
> priority, because otherwise why would the user add the static route at all?
> But this is more of a question than a suggestion. Is there any *standard*
> behavior or similar thing that we can refer from e.g. AWS?
> 

It’s a typo in commit message. I’ll fix that in v9.

It is done to support well-known behaviour, where directly-connected routes take precedence over static routes for same CIDR.

To support AWS feature, where user can override "subnet" route (think, "connected") with a static route, additional work is needed.
It’s not what I’m currently working on, but I thought about such use case and it seems that it can be easily supported by adding ability to add Logical_Router_Static_Route with some "override" flag, which ensures this static route would be installed with the highest priority.

So, if no objections here or other comments for now I’ll send v9.

> Thanks,
> Han
> 
>> 
>> Thanks.
>> 
>> Regards,
>> Vladislav Odintsov
>> 
>>> On 18 Nov 2021, at 00:05, Numan Siddique <numans@ovn.org> wrote:
>>> 
>>> On Wed, Nov 17, 2021 at 3:38 PM Vladislav Odintsov <odivlad@gmail.com
> <mailto:odivlad@gmail.com>> wrote:
>>>> 
>>>> I’ve submitted a patch [1] with my findings.
>>>> Also, if no comments for other my patches from this patch series, I
> can submit a new version.
>>>> Should I?
>>> 
>>> No comments from my side.  Perhaps you can submit another version.
>>> 
>>> Numan
>>> 
>>>> 
>>>> 1:
> https://patchwork.ozlabs.org/project/ovn/patch/20211117203545.46142-1-odivlad@gmail.com/
> <
> https://patchwork.ozlabs.org/project/ovn/patch/20211117203545.46142-1-odivlad@gmail.com/
>> 
>>>> 
>>>> Regards,
>>>> Vladislav Odintsov
>>>> 
>>>>> On 17 Nov 2021, at 20:57, Numan Siddique <numans@ovn.org <mailto:
> numans@ovn.org>> wrote:
>>>>> 
>>>>> On Wed, Nov 17, 2021 at 9:24 AM Vladislav Odintsov <odivlad@gmail.com
> <mailto:odivlad@gmail.com> <mailto:odivlad@gmail.com <mailto:
> odivlad@gmail.com>>> wrote:
>>>>>> 
>>>>>> Two additions:
>>>>>> 
>>>>>> 1. Regarding documentation for flow in lr_in_defrag section:
>>>>>> 
>>>>>> It seems to me that documentation for it is written in a wrong
> section (lr_in_defrag).
>>>>>> Since the flow is installed in lr_in_ip_routing, it should be
> documented there.
>>>>>> I’ll move it if you don’t mind.
>>>>> 
>>>>> Sure.  Thanks.  I'd suggest having a separate patch for fixing the
>>>>> documentation.
>>>>>> 
>>>>>> 
>>>>>> 2. Documentation for other flow changes was added, but I’ve
> committed it to a wrong patch (#3).
>>>>>> I’ll move documentation update between patches.
>>>>> 
>>>>> Ack.
>>>>> 
>>>>> Thanks
>>>>> Numan
>>>>> 
>>>>>> 
>>>>>> Regards,
>>>>>> Vladislav Odintsov
>>>>>> 
>>>>>>> On 17 Nov 2021, at 13:51, Vladislav Odintsov <odivlad@gmail.com>
> wrote:
>>>>>>> 
>>>>>>> Hi Numan,
>>>>>>> 
>>>>>>> Thanks for the review.
>>>>>>> Sure I will fix this. Should I wait for more comments or that’s all
> and I can send v9?
>>>>>>> 
>>>>>>> Regards,
>>>>>>> Vladislav Odintsov
>>>>>>> 
>>>>>>>> On 17 Nov 2021, at 05:17, Numan Siddique <numans@ovn.org> wrote:
>>>>>>>> 
>>>>>>>> On Sat, Nov 13, 2021 at 4:44 AM Vladislav Odintsov <
> odivlad@gmail.com <mailto:odivlad@gmail.com>> wrote:
>>>>>>>>> 
>>>>>>>>> With this patch routes to connected networks have higher
>>>>>>>>> priority than static routes with same ip_prefix.
>>>>>>>>> 
>>>>>>>>> This brings commonly-used behaviour for routes lookup order:
>>>>>>>>> 1: longest prefix match
>>>>>>>>> 2: metric
>>>>>>>>> 
>>>>>>>>> The metric has next lookup order:
>>>>>>>>> 1: connected routes
>>>>>>>>> 2: static routes
>>>>>>>>> 
>>>>>>>>> Earlier static and connected routes with same ip_prefix had
>>>>>>>>> the same priority, so it was impossible to predict which one
>>>>>>>>> is used for routing decision.
>>>>>>>>> 
>>>>>>>>> Each route's prefix length has its own 'slot' in lflow prios.
>>>>>>>>> Now prefix length space is calculated using next information:
>>>>>>>>> to calculate route's priority prefixlen multiplied by 3
>>>>>>>>> + route origin offset (0 - source-based route; 1 - directly-
>>>>>>>>> connected route; 2 - static route).
>>>>>>>>> 
>>>>>>>>> Also, enlarge prio for generic records in lr_in_ip_routing stage
>>>>>>>>> by 10000.
>>>>>>>>> 
>>>>>>>>> Signed-off-by: Vladislav Odintsov <odivlad@gmail.com>
>>>>>>>> 
>>>>>>>> Hi Vladislav,
>>>>>>>> 
>>>>>>>> Thanks for the patch.  Overall it looks good to me.  I've one
> comment.
>>>>>>>> Looks like the documentation updated in ovn-northd.8.xml is not
> accurate.
>>>>>>>> 
>>>>>>>> This patch modifies the flows in the lr_in_ip_routing stage but
> this
>>>>>>>> patch doesn't update the documentation.
>>>>>>>> Also the patch updates the documentation for the flow in
> lr_in_defrag
>>>>>>>> stage, which seems not correct.
>>>>>>>> 
>>>>>>>> Can you please update the documentation accurately ?
>>>>>>>> 
>>>>>>>> Numan
>>>>>>>> 
>>>>>>>>> ---
>>>>>>>>> northd/northd.c         | 50
> ++++++++++++++++++++++++++++-------------
>>>>>>>>> northd/ovn-northd.8.xml | 12 +++++-----
>>>>>>>>> tests/ovn-northd.at     |  8 +++----
>>>>>>>>> 3 files changed, 45 insertions(+), 25 deletions(-)
>>>>>>>>> 
>>>>>>>>> diff --git a/northd/northd.c b/northd/northd.c
>>>>>>>>> index 1e8a3457c..0d513f039 100644
>>>>>>>>> --- a/northd/northd.c
>>>>>>>>> +++ b/northd/northd.c
>>>>>>>>> @@ -305,6 +305,15 @@ enum ovn_stage {
>>>>>>>>> *
>>>>>>>>> */
>>>>>>>>> 
>>>>>>>>> +/*
>>>>>>>>> + * Route offsets implement logic to prioritize traffic for
> routes with
>>>>>>>>> + * same ip_prefix values:
>>>>>>>>> + *  -  connected route overrides static one;
>>>>>>>>> + *  -  static route overrides connected route. */
>>>>>>>>> +#define ROUTE_PRIO_OFFSET_MULTIPLIER 3
>>>>>>>>> +#define ROUTE_PRIO_OFFSET_STATIC 1
>>>>>>>>> +#define ROUTE_PRIO_OFFSET_CONNECTED 2
>>>>>>>>> +
>>>>>>>>> /* Returns an "enum ovn_stage" built from the arguments. */
>>>>>>>>> static enum ovn_stage
>>>>>>>>> ovn_stage_build(enum ovn_datapath_type dp_type, enum ovn_pipeline
> pipeline,
>>>>>>>>> @@ -8782,6 +8791,7 @@ struct ecmp_groups_node {
>>>>>>>>> struct in6_addr prefix;
>>>>>>>>> unsigned int plen;
>>>>>>>>> bool is_src_route;
>>>>>>>>> +    const char *origin;
>>>>>>>>> uint16_t route_count;
>>>>>>>>> struct ovs_list route_list; /* Contains ecmp_route_list_node */
>>>>>>>>> };
>>>>>>>>> @@ -8819,6 +8829,7 @@ ecmp_groups_add(struct hmap *ecmp_groups,
>>>>>>>>> eg->prefix = route->prefix;
>>>>>>>>> eg->plen = route->plen;
>>>>>>>>> eg->is_src_route = route->is_src_route;
>>>>>>>>> +    eg->origin = smap_get_def(&route->route->options, "origin",
> "");
>>>>>>>>> ovs_list_init(&eg->route_list);
>>>>>>>>> ecmp_groups_add_route(eg, route);
>>>>>>>>> 
>>>>>>>>> @@ -8919,19 +8930,20 @@ build_route_prefix_s(const struct
> in6_addr *prefix, unsigned int plen)
>>>>>>>>> static void
>>>>>>>>> build_route_match(const struct ovn_port *op_inport, const char
> *network_s,
>>>>>>>>>               int plen, bool is_src_route, bool is_ipv4, struct
> ds *match,
>>>>>>>>> -                  uint16_t *priority)
>>>>>>>>> +                  uint16_t *priority, int ofs)
>>>>>>>>> {
>>>>>>>>> const char *dir;
>>>>>>>>> /* The priority here is calculated to implement
> longest-prefix-match
>>>>>>>>>  * routing. */
>>>>>>>>> if (is_src_route) {
>>>>>>>>>     dir = "src";
>>>>>>>>> -        *priority = plen * 2;
>>>>>>>>> +        ofs = 0;
>>>>>>>>> } else {
>>>>>>>>>     dir = "dst";
>>>>>>>>> -        *priority = (plen * 2) + 1;
>>>>>>>>> }
>>>>>>>>> 
>>>>>>>>> +    *priority = (plen * ROUTE_PRIO_OFFSET_MULTIPLIER) + ofs;
>>>>>>>>> +
>>>>>>>>> if (op_inport) {
>>>>>>>>>     ds_put_format(match, "inport == %s && ",
> op_inport->json_key);
>>>>>>>>> }
>>>>>>>>> @@ -9073,7 +9085,7 @@ add_ecmp_symmetric_reply_flows(struct hmap
> *lflows,
>>>>>>>>>               out_port->lrp_networks.ea_s,
>>>>>>>>>               IN6_IS_ADDR_V4MAPPED(&route->prefix) ? "" : "xx",
>>>>>>>>>               port_ip, out_port->json_key);
>>>>>>>>> -    ovn_lflow_add_with_hint(lflows, od, S_ROUTER_IN_IP_ROUTING,
> 300,
>>>>>>>>> +    ovn_lflow_add_with_hint(lflows, od, S_ROUTER_IN_IP_ROUTING,
> 10300,
>>>>>>>>>                        ds_cstr(&match), ds_cstr(&actions),
>>>>>>>>>                        &st_route->header_);
>>>>>>>>> 
>>>>>>>>> @@ -9103,8 +9115,10 @@ build_ecmp_route_flow(struct hmap *lflows,
> struct ovn_datapath *od,
>>>>>>>>> struct ds route_match = DS_EMPTY_INITIALIZER;
>>>>>>>>> 
>>>>>>>>> char *prefix_s = build_route_prefix_s(&eg->prefix, eg->plen);
>>>>>>>>> +    int ofs = !strcmp(eg->origin, ROUTE_ORIGIN_CONNECTED) ?
>>>>>>>>> +        ROUTE_PRIO_OFFSET_CONNECTED: ROUTE_PRIO_OFFSET_STATIC;
>>>>>>>>> build_route_match(NULL, prefix_s, eg->plen, eg->is_src_route,
> is_ipv4,
>>>>>>>>> -                      &route_match, &priority);
>>>>>>>>> +                      &route_match, &priority, ofs);
>>>>>>>>> free(prefix_s);
>>>>>>>>> 
>>>>>>>>> struct ds actions = DS_EMPTY_INITIALIZER;
>>>>>>>>> @@ -9180,7 +9194,7 @@ add_route(struct hmap *lflows, struct
> ovn_datapath *od,
>>>>>>>>>       const struct ovn_port *op, const char *lrp_addr_s,
>>>>>>>>>       const char *network_s, int plen, const char *gateway,
>>>>>>>>>       bool is_src_route, const struct ovsdb_idl_row *stage_hint,
>>>>>>>>> -          bool is_discard_route)
>>>>>>>>> +          bool is_discard_route, int ofs)
>>>>>>>>> {
>>>>>>>>> bool is_ipv4 = strchr(network_s, '.') ? true : false;
>>>>>>>>> struct ds match = DS_EMPTY_INITIALIZER;
>>>>>>>>> @@ -9196,7 +9210,7 @@ add_route(struct hmap *lflows, struct
> ovn_datapath *od,
>>>>>>>>>     }
>>>>>>>>> }
>>>>>>>>> build_route_match(op_inport, network_s, plen, is_src_route,
> is_ipv4,
>>>>>>>>> -                      &match, &priority);
>>>>>>>>> +                      &match, &priority, ofs);
>>>>>>>>> 
>>>>>>>>> struct ds common_actions = DS_EMPTY_INITIALIZER;
>>>>>>>>> struct ds actions = DS_EMPTY_INITIALIZER;
>>>>>>>>> @@ -9256,10 +9270,15 @@ build_static_route_flow(struct hmap
> *lflows, struct ovn_datapath *od,
>>>>>>>>>     }
>>>>>>>>> }
>>>>>>>>> 
>>>>>>>>> +    int ofs = !strcmp(smap_get_def(&route->options, "origin",
> ""),
>>>>>>>>> +                      ROUTE_ORIGIN_CONNECTED) ?
> ROUTE_PRIO_OFFSET_CONNECTED
>>>>>>>>> +                                              :
> ROUTE_PRIO_OFFSET_STATIC;
>>>>>>>>> +
>>>>>>>>> char *prefix_s = build_route_prefix_s(&route_->prefix,
> route_->plen);
>>>>>>>>> add_route(lflows, route_->is_discard_route ? od : out_port->od,
> out_port,
>>>>>>>>>           lrp_addr_s, prefix_s, route_->plen, route->nexthop,
>>>>>>>>> -              route_->is_src_route, &route->header_,
> route_->is_discard_route);
>>>>>>>>> +              route_->is_src_route, &route->header_,
> route_->is_discard_route,
>>>>>>>>> +              ofs);
>>>>>>>>> 
>>>>>>>>> free(prefix_s);
>>>>>>>>> }
>>>>>>>>> @@ -10672,14 +10691,14 @@ build_ip_routing_flows_for_lrouter_port(
>>>>>>>>>         add_route(lflows, op->od, op,
> op->lrp_networks.ipv4_addrs[i].addr_s,
>>>>>>>>>                   op->lrp_networks.ipv4_addrs[i].network_s,
>>>>>>>>>                   op->lrp_networks.ipv4_addrs[i].plen, NULL,
> false,
>>>>>>>>> -                      &op->nbrp->header_, false);
>>>>>>>>> +                      &op->nbrp->header_, false,
> ROUTE_PRIO_OFFSET_CONNECTED);
>>>>>>>>>     }
>>>>>>>>> 
>>>>>>>>>     for (int i = 0; i < op->lrp_networks.n_ipv6_addrs; i++) {
>>>>>>>>>         add_route(lflows, op->od, op,
> op->lrp_networks.ipv6_addrs[i].addr_s,
>>>>>>>>>                   op->lrp_networks.ipv6_addrs[i].network_s,
>>>>>>>>>                   op->lrp_networks.ipv6_addrs[i].plen, NULL,
> false,
>>>>>>>>> -                      &op->nbrp->header_, false);
>>>>>>>>> +                      &op->nbrp->header_, false,
> ROUTE_PRIO_OFFSET_CONNECTED);
>>>>>>>>>     }
>>>>>>>>> } else if (lsp_is_router(op->nbsp)) {
>>>>>>>>>     struct ovn_port *peer = ovn_port_get_peer(ports, op);
>>>>>>>>> @@ -10702,7 +10721,8 @@ build_ip_routing_flows_for_lrouter_port(
>>>>>>>>> 
> peer->lrp_networks.ipv4_addrs[0].addr_s,
>>>>>>>>>                           laddrs->ipv4_addrs[k].network_s,
>>>>>>>>>                           laddrs->ipv4_addrs[k].plen, NULL,
> false,
>>>>>>>>> -                              &peer->nbrp->header_, false);
>>>>>>>>> +                              &peer->nbrp->header_, false,
>>>>>>>>> +                              ROUTE_PRIO_OFFSET_CONNECTED);
>>>>>>>>>             }
>>>>>>>>>         }
>>>>>>>>>     }
>>>>>>>>> @@ -10773,7 +10793,7 @@ build_mcast_lookup_flows_for_lrouter(
>>>>>>>>>     /* Drop IPv6 multicast traffic that shouldn't be forwarded,
>>>>>>>>>      * i.e., router solicitation and router advertisement.
>>>>>>>>>      */
>>>>>>>>> -        ovn_lflow_add(lflows, od, S_ROUTER_IN_IP_ROUTING, 550,
>>>>>>>>> +        ovn_lflow_add(lflows, od, S_ROUTER_IN_IP_ROUTING, 10550,
>>>>>>>>>                   "nd_rs || nd_ra", "drop;");
>>>>>>>>>     if (!od->mcast_info.rtr.relay) {
>>>>>>>>>         return;
>>>>>>>>> @@ -10801,7 +10821,7 @@ build_mcast_lookup_flows_for_lrouter(
>>>>>>>>>         }
>>>>>>>>>         ds_put_format(actions, "outport = \"%s\"; ip.ttl--;
> next;",
>>>>>>>>>                       igmp_group->mcgroup.name);
>>>>>>>>> -            ovn_lflow_add(lflows, od, S_ROUTER_IN_IP_ROUTING,
> 500,
>>>>>>>>> +            ovn_lflow_add(lflows, od, S_ROUTER_IN_IP_ROUTING,
> 10500,
>>>>>>>>>                       ds_cstr(match), ds_cstr(actions));
>>>>>>>>>     }
>>>>>>>>> 
>>>>>>>>> @@ -10809,7 +10829,7 @@ build_mcast_lookup_flows_for_lrouter(
>>>>>>>>>      * ports. Otherwise drop any multicast traffic.
>>>>>>>>>      */
>>>>>>>>>     if (od->mcast_info.rtr.flood_static) {
>>>>>>>>> -            ovn_lflow_add(lflows, od, S_ROUTER_IN_IP_ROUTING,
> 450,
>>>>>>>>> +            ovn_lflow_add(lflows, od, S_ROUTER_IN_IP_ROUTING,
> 10450,
>>>>>>>>>                       "ip4.mcast || ip6.mcast",
>>>>>>>>>                       "clone { "
>>>>>>>>>                             "outport = \""MC_STATIC"\"; "
>>>>>>>>> @@ -10817,7 +10837,7 @@ build_mcast_lookup_flows_for_lrouter(
>>>>>>>>>                             "next; "
>>>>>>>>>                       "};");
>>>>>>>>>     } else {
>>>>>>>>> -            ovn_lflow_add(lflows, od, S_ROUTER_IN_IP_ROUTING,
> 450,
>>>>>>>>> +            ovn_lflow_add(lflows, od, S_ROUTER_IN_IP_ROUTING,
> 10450,
>>>>>>>>>                       "ip4.mcast || ip6.mcast", "drop;");
>>>>>>>>>     }
>>>>>>>>> }
>>>>>>>>> diff --git a/northd/ovn-northd.8.xml b/northd/ovn-northd.8.xml
>>>>>>>>> index fb67395e3..4f3a9d5e3 100644
>>>>>>>>> --- a/northd/ovn-northd.8.xml
>>>>>>>>> +++ b/northd/ovn-northd.8.xml
>>>>>>>>> @@ -2945,12 +2945,12 @@ icmp6 {
>>>>>>>>> 
>>>>>>>>> <p>
>>>>>>>>>   If ECMP routes with symmetric reply are configured in the
>>>>>>>>> -      <code>OVN_Northbound</code> database for a gateway router,
> a priority-300
>>>>>>>>> -      flow is added for each router port on which symmetric
> replies are
>>>>>>>>> -      configured. The matching logic for these ports essentially
> reverses the
>>>>>>>>> -      configured logic of the ECMP route. So for instance, a
> route with a
>>>>>>>>> -      destination routing policy will instead match if the
> source IP address
>>>>>>>>> -      matches the static route's prefix. The flow uses the action
>>>>>>>>> +      <code>OVN_Northbound</code> database for a gateway router,
> a
>>>>>>>>> +      priority-10300 flow is added for each router port on which
> symmetric
>>>>>>>>> +      replies are configured. The matching logic for these ports
> essentially
>>>>>>>>> +      reverses the configured logic of the ECMP route. So for
> instance, a route
>>>>>>>>> +      with a destination routing policy will instead match if
> the source IP
>>>>>>>>> +      address matches the static route's prefix. The flow uses
> the action
>>>>>>>>>   <code>ct_next</code> to send IP packets to the connection
> tracker for
>>>>>>>>>   packet de-fragmentation and tracking before sending it to the
> next table.
>>>>>>>>> </p>
>>>>>>>>> diff --git a/tests/ovn-northd.at b/tests/ovn-northd.at
>>>>>>>>> index 85b47a18f..3c1a97f73 100644
>>>>>>>>> --- a/tests/ovn-northd.at
>>>>>>>>> +++ b/tests/ovn-northd.at
>>>>>>>>> @@ -5430,7 +5430,7 @@ check ovn-nbctl --wait=sb
> --ecmp-symmetric-reply lr-route-add lr0 1.0.0.1 192.16
>>>>>>>>> 
>>>>>>>>> ovn-sbctl dump-flows lr0 > lr0flows
>>>>>>>>> AT_CHECK([grep -e "lr_in_ip_routing.*select" lr0flows | sed
> 's/table=../table=??/' | sort], [0], [dnl
>>>>>>>>> -  table=??(lr_in_ip_routing   ), priority=65   , match=(ip4.dst
> == 1.0.0.1/32), action=(ip.ttl--; flags.loopback = 1; reg8[[0..15]] = 1;
> reg8[[16..31]] = select(1, 2);)
>>>>>>>>> +  table=??(lr_in_ip_routing   ), priority=97   , match=(ip4.dst
> == 1.0.0.1/32), action=(ip.ttl--; flags.loopback = 1; reg8[[0..15]] = 1;
> reg8[[16..31]] = select(1, 2);)
>>>>>>>>> ])
>>>>>>>>> AT_CHECK([grep -e "lr_in_ip_routing_ecmp" lr0flows | sed
> 's/192\.168\.0\..0/192.168.0.??/' | sed 's/table=../table=??/' | sort],
> [0], [dnl
>>>>>>>>> table=??(lr_in_ip_routing_ecmp), priority=100  ,
> match=(reg8[[0..15]] == 1 && reg8[[16..31]] == 1), action=(reg0 =
> 192.168.0.??; reg1 = 192.168.0.1; eth.src = 00:00:20:20:12:13; outport =
> "lr0-public"; next;)
>>>>>>>>> @@ -5443,7 +5443,7 @@ check ovn-nbctl --wait=sb
> --ecmp-symmetric-reply lr-route-add lr0 1.0.0.1 192.16
>>>>>>>>> 
>>>>>>>>> ovn-sbctl dump-flows lr0 > lr0flows
>>>>>>>>> AT_CHECK([grep -e "lr_in_ip_routing.*select" lr0flows | sed
> 's/table=../table=??/' | sort], [0], [dnl
>>>>>>>>> -  table=??(lr_in_ip_routing   ), priority=65   , match=(ip4.dst
> == 1.0.0.1/32), action=(ip.ttl--; flags.loopback = 1; reg8[[0..15]] = 1;
> reg8[[16..31]] = select(1, 2);)
>>>>>>>>> +  table=??(lr_in_ip_routing   ), priority=97   , match=(ip4.dst
> == 1.0.0.1/32), action=(ip.ttl--; flags.loopback = 1; reg8[[0..15]] = 1;
> reg8[[16..31]] = select(1, 2);)
>>>>>>>>> ])
>>>>>>>>> AT_CHECK([grep -e "lr_in_ip_routing_ecmp" lr0flows | sed
> 's/192\.168\.0\..0/192.168.0.??/' | sed 's/table=../table=??/' | sort],
> [0], [dnl
>>>>>>>>> table=??(lr_in_ip_routing_ecmp), priority=100  ,
> match=(reg8[[0..15]] == 1 && reg8[[16..31]] == 1), action=(reg0 =
> 192.168.0.??; reg1 = 192.168.0.1; eth.src = 00:00:20:20:12:13; outport =
> "lr0-public"; next;)
>>>>>>>>> @@ -5458,14 +5458,14 @@ check ovn-nbctl --wait=sb lr-route-add
> lr0 1.0.0.0/24 192.168.0.10
>>>>>>>>> ovn-sbctl dump-flows lr0 > lr0flows
>>>>>>>>> 
>>>>>>>>> AT_CHECK([grep -e "lr_in_ip_routing.*192.168.0.10" lr0flows | sed
> 's/table=../table=??/' | sort], [0], [dnl
>>>>>>>>> -  table=??(lr_in_ip_routing   ), priority=49   , match=(ip4.dst
> == 1.0.0.0/24), action=(ip.ttl--; reg8[[0..15]] = 0; reg0 = 192.168.0.10;
> reg1 = 192.168.0.1; eth.src = 00:00:20:20:12:13; outport = "lr0-public";
> flags.loopback = 1; next;)
>>>>>>>>> +  table=??(lr_in_ip_routing   ), priority=73   , match=(ip4.dst
> == 1.0.0.0/24), action=(ip.ttl--; reg8[[0..15]] = 0; reg0 = 192.168.0.10;
> reg1 = 192.168.0.1; eth.src = 00:00:20:20:12:13; outport = "lr0-public";
> flags.loopback = 1; next;)
>>>>>>>>> ])
>>>>>>>>> 
>>>>>>>>> check ovn-nbctl --wait=sb lr-route-add lr0 2.0.0.0/24 lr0-public
>>>>>>>>> 
>>>>>>>>> ovn-sbctl dump-flows lr0 > lr0flows
>>>>>>>>> AT_CHECK([grep -e "lr_in_ip_routing.*2.0.0.0" lr0flows | sed
> 's/table=../table=??/' | sort], [0], [dnl
>>>>>>>>> -  table=??(lr_in_ip_routing   ), priority=49   , match=(ip4.dst
> == 2.0.0.0/24), action=(ip.ttl--; reg8[[0..15]] = 0; reg0 = ip4.dst; reg1 =
> 192.168.0.1; eth.src = 00:00:20:20:12:13; outport = "lr0-public";
> flags.loopback = 1; next;)
>>>>>>>>> +  table=??(lr_in_ip_routing   ), priority=73   , match=(ip4.dst
> == 2.0.0.0/24), action=(ip.ttl--; reg8[[0..15]] = 0; reg0 = ip4.dst; reg1 =
> 192.168.0.1; eth.src = 00:00:20:20:12:13; outport = "lr0-public";
> flags.loopback = 1; next;)
>>>>>>>>> ])
>>>>>>>>> 
>>>>>>>>> AT_CLEANUP
>>>>>>>>> --
>>>>>>>>> 2.30.0
>>>>>>>>> 
>>>>>>>>> _______________________________________________
>>>>>>>>> dev mailing list
>>>>>>>>> dev@openvswitch.org <mailto:dev@openvswitch.org>
>>>>>>>>> https://mail.openvswitch.org/mailman/listinfo/ovs-dev <
> https://mail.openvswitch.org/mailman/listinfo/ovs-dev>
>>>>>>> _______________________________________________
>>>>>>> dev mailing list
>>>>>>> dev@openvswitch.org
>>>>>>> https://mail.openvswitch.org/mailman/listinfo/ovs-dev
>>>>>> 
>>>>>> _______________________________________________
>>>>>> dev mailing list
>>>>>> dev@openvswitch.org <mailto:dev@openvswitch.org> <mailto:
> dev@openvswitch.org <mailto:dev@openvswitch.org>>
>>>>>> https://mail.openvswitch.org/mailman/listinfo/ovs-dev <
> https://mail.openvswitch.org/mailman/listinfo/ovs-dev> <
> https://mail.openvswitch.org/mailman/listinfo/ovs-dev <
> https://mail.openvswitch.org/mailman/listinfo/ovs-dev>>
>>>>> _______________________________________________
>>>>> dev mailing list
>>>>> dev@openvswitch.org <mailto:dev@openvswitch.org> <mailto:
> dev@openvswitch.org <mailto:dev@openvswitch.org>>
>>>>> https://mail.openvswitch.org/mailman/listinfo/ovs-dev <
> https://mail.openvswitch.org/mailman/listinfo/ovs-dev> <
> https://mail.openvswitch.org/mailman/listinfo/ovs-dev <
> https://mail.openvswitch.org/mailman/listinfo/ovs-dev>>
>>>> _______________________________________________
>>>> dev mailing list
>>>> dev@openvswitch.org <mailto:dev@openvswitch.org>
>>>> https://mail.openvswitch.org/mailman/listinfo/ovs-dev <
> https://mail.openvswitch.org/mailman/listinfo/ovs-dev>
>>> _______________________________________________
>>> dev mailing list
>>> dev@openvswitch.org <mailto:dev@openvswitch.org>
>>> https://mail.openvswitch.org/mailman/listinfo/ovs-dev <
> https://mail.openvswitch.org/mailman/listinfo/ovs-dev>
>> _______________________________________________
>> dev mailing list
>> dev@openvswitch.org
>> https://mail.openvswitch.org/mailman/listinfo/ovs-dev
> _______________________________________________
> dev mailing list
> dev@openvswitch.org <mailto:dev@openvswitch.org>
> https://mail.openvswitch.org/mailman/listinfo/ovs-dev <https://mail.openvswitch.org/mailman/listinfo/ovs-dev>
Han Zhou Nov. 18, 2021, 7:57 a.m. UTC | #11
Sounds good to me.

On Wed, Nov 17, 2021 at 11:53 PM Vladislav Odintsov <odivlad@gmail.com>
wrote:

> Thanks, Han.
> Please see inline.
>
> Regards,
> Vladislav Odintsov
>
> On 18 Nov 2021, at 10:26, Han Zhou <zhouhan@gmail.com> wrote:
>
> On Wed, Nov 17, 2021 at 10:10 PM Vladislav Odintsov <odivlad@gmail.com>
> wrote:
>
>
> Great, thanks.
>
> Hi @Han,
>
> I’d like you to look at the patch series too. Would you have time on it?
> If yes, could you redirect me on terms please.
>
>
> Hi Vladislav,
>
> Thanks for adding me. I am sorry that I don't think I will have enough time
> for a detailed review for this series until Nov 29. Not sure if you can
> wait that long, but I don't think my review is mandatory if Numan is
> reviewing all the patches in detail.
>
>
> It’s okay from my side to wait for Dec 2-3.
>
> I have a quick comment though, regarding the priority offset. It is
> mentioned in the commit message:
>
> Each route's prefix length has its own 'slot' in lflow prios.
> Now prefix length space is calculated using next information:
> to calculate route's priority prefixlen multiplied by 3
> + route origin offset (0 - source-based route; 1 - directly-
> connected route; 2 - static route).
>
>
> But in the code, 2 is for connected, and 1 is for static:
>
> +#define ROUTE_PRIO_OFFSET_MULTIPLIER 3
> +#define ROUTE_PRIO_OFFSET_STATIC 1
> +#define ROUTE_PRIO_OFFSET_CONNECTED 2
>
>
> I wonder which one is your intent? I'd let the static route have higher
> priority, because otherwise why would the user add the static route at all?
> But this is more of a question than a suggestion. Is there any *standard*
> behavior or similar thing that we can refer from e.g. AWS?
>
>
> It’s a typo in commit message. I’ll fix that in v9.
>
> It is done to support well-known behaviour, where directly-connected
> routes take precedence over static routes for same CIDR.
>
> To support AWS feature, where user can override "subnet" route (think,
> "connected") with a static route, additional work is needed.
> It’s not what I’m currently working on, but I thought about such use case
> and it seems that it can be easily supported by adding ability to add
> Logical_Router_Static_Route with some "override" flag, which ensures this
> static route would be installed with the highest priority.
>
> So, if no objections here or other comments for now I’ll send v9.
>
> Thanks,
> Han
>
>
> Thanks.
>
> Regards,
> Vladislav Odintsov
>
> On 18 Nov 2021, at 00:05, Numan Siddique <numans@ovn.org> wrote:
>
> On Wed, Nov 17, 2021 at 3:38 PM Vladislav Odintsov <odivlad@gmail.com
>
> <mailto:odivlad@gmail.com <odivlad@gmail.com>>> wrote:
>
>
> I’ve submitted a patch [1] with my findings.
> Also, if no comments for other my patches from this patch series, I
>
> can submit a new version.
>
> Should I?
>
>
> No comments from my side.  Perhaps you can submit another version.
>
> Numan
>
>
> 1:
>
>
> https://patchwork.ozlabs.org/project/ovn/patch/20211117203545.46142-1-odivlad@gmail.com/
> <
>
> https://patchwork.ozlabs.org/project/ovn/patch/20211117203545.46142-1-odivlad@gmail.com/
>
>
>
> Regards,
> Vladislav Odintsov
>
> On 17 Nov 2021, at 20:57, Numan Siddique <numans@ovn.org <mailto:
>
> numans@ovn.org>> wrote:
>
>
> On Wed, Nov 17, 2021 at 9:24 AM Vladislav Odintsov <odivlad@gmail.com
>
> <mailto:odivlad@gmail.com <odivlad@gmail.com>> <mailto:odivlad@gmail.com
> <odivlad@gmail.com> <mailto:
> odivlad@gmail.com>>> wrote:
>
>
> Two additions:
>
> 1. Regarding documentation for flow in lr_in_defrag section:
>
> It seems to me that documentation for it is written in a wrong
>
> section (lr_in_defrag).
>
> Since the flow is installed in lr_in_ip_routing, it should be
>
> documented there.
>
> I’ll move it if you don’t mind.
>
>
> Sure.  Thanks.  I'd suggest having a separate patch for fixing the
> documentation.
>
>
>
> 2. Documentation for other flow changes was added, but I’ve
>
> committed it to a wrong patch (#3).
>
> I’ll move documentation update between patches.
>
>
> Ack.
>
> Thanks
> Numan
>
>
> Regards,
> Vladislav Odintsov
>
> On 17 Nov 2021, at 13:51, Vladislav Odintsov <odivlad@gmail.com>
>
> wrote:
>
>
> Hi Numan,
>
> Thanks for the review.
> Sure I will fix this. Should I wait for more comments or that’s all
>
> and I can send v9?
>
>
> Regards,
> Vladislav Odintsov
>
> On 17 Nov 2021, at 05:17, Numan Siddique <numans@ovn.org> wrote:
>
> On Sat, Nov 13, 2021 at 4:44 AM Vladislav Odintsov <
>
> odivlad@gmail.com <mailto:odivlad@gmail.com <odivlad@gmail.com>>> wrote:
>
>
> With this patch routes to connected networks have higher
> priority than static routes with same ip_prefix.
>
> This brings commonly-used behaviour for routes lookup order:
> 1: longest prefix match
> 2: metric
>
> The metric has next lookup order:
> 1: connected routes
> 2: static routes
>
> Earlier static and connected routes with same ip_prefix had
> the same priority, so it was impossible to predict which one
> is used for routing decision.
>
> Each route's prefix length has its own 'slot' in lflow prios.
> Now prefix length space is calculated using next information:
> to calculate route's priority prefixlen multiplied by 3
> + route origin offset (0 - source-based route; 1 - directly-
> connected route; 2 - static route).
>
> Also, enlarge prio for generic records in lr_in_ip_routing stage
> by 10000.
>
> Signed-off-by: Vladislav Odintsov <odivlad@gmail.com>
>
>
> Hi Vladislav,
>
> Thanks for the patch.  Overall it looks good to me.  I've one
>
> comment.
>
> Looks like the documentation updated in ovn-northd.8.xml is not
>
> accurate.
>
>
> This patch modifies the flows in the lr_in_ip_routing stage but
>
> this
>
> patch doesn't update the documentation.
> Also the patch updates the documentation for the flow in
>
> lr_in_defrag
>
> stage, which seems not correct.
>
> Can you please update the documentation accurately ?
>
> Numan
>
> ---
> northd/northd.c         | 50
>
> ++++++++++++++++++++++++++++-------------
>
> northd/ovn-northd.8.xml | 12 +++++-----
> tests/ovn-northd.at     |  8 +++----
> 3 files changed, 45 insertions(+), 25 deletions(-)
>
> diff --git a/northd/northd.c b/northd/northd.c
> index 1e8a3457c..0d513f039 100644
> --- a/northd/northd.c
> +++ b/northd/northd.c
> @@ -305,6 +305,15 @@ enum ovn_stage {
> *
> */
>
> +/*
> + * Route offsets implement logic to prioritize traffic for
>
> routes with
>
> + * same ip_prefix values:
> + *  -  connected route overrides static one;
> + *  -  static route overrides connected route. */
> +#define ROUTE_PRIO_OFFSET_MULTIPLIER 3
> +#define ROUTE_PRIO_OFFSET_STATIC 1
> +#define ROUTE_PRIO_OFFSET_CONNECTED 2
> +
> /* Returns an "enum ovn_stage" built from the arguments. */
> static enum ovn_stage
> ovn_stage_build(enum ovn_datapath_type dp_type, enum ovn_pipeline
>
> pipeline,
>
> @@ -8782,6 +8791,7 @@ struct ecmp_groups_node {
> struct in6_addr prefix;
> unsigned int plen;
> bool is_src_route;
> +    const char *origin;
> uint16_t route_count;
> struct ovs_list route_list; /* Contains ecmp_route_list_node */
> };
> @@ -8819,6 +8829,7 @@ ecmp_groups_add(struct hmap *ecmp_groups,
> eg->prefix = route->prefix;
> eg->plen = route->plen;
> eg->is_src_route = route->is_src_route;
> +    eg->origin = smap_get_def(&route->route->options, "origin",
>
> "");
>
> ovs_list_init(&eg->route_list);
> ecmp_groups_add_route(eg, route);
>
> @@ -8919,19 +8930,20 @@ build_route_prefix_s(const struct
>
> in6_addr *prefix, unsigned int plen)
>
> static void
> build_route_match(const struct ovn_port *op_inport, const char
>
> *network_s,
>
>               int plen, bool is_src_route, bool is_ipv4, struct
>
> ds *match,
>
> -                  uint16_t *priority)
> +                  uint16_t *priority, int ofs)
> {
> const char *dir;
> /* The priority here is calculated to implement
>
> longest-prefix-match
>
>  * routing. */
> if (is_src_route) {
>     dir = "src";
> -        *priority = plen * 2;
> +        ofs = 0;
> } else {
>     dir = "dst";
> -        *priority = (plen * 2) + 1;
> }
>
> +    *priority = (plen * ROUTE_PRIO_OFFSET_MULTIPLIER) + ofs;
> +
> if (op_inport) {
>     ds_put_format(match, "inport == %s && ",
>
> op_inport->json_key);
>
> }
> @@ -9073,7 +9085,7 @@ add_ecmp_symmetric_reply_flows(struct hmap
>
> *lflows,
>
>               out_port->lrp_networks.ea_s,
>               IN6_IS_ADDR_V4MAPPED(&route->prefix) ? "" : "xx",
>               port_ip, out_port->json_key);
> -    ovn_lflow_add_with_hint(lflows, od, S_ROUTER_IN_IP_ROUTING,
>
> 300,
>
> +    ovn_lflow_add_with_hint(lflows, od, S_ROUTER_IN_IP_ROUTING,
>
> 10300,
>
>                        ds_cstr(&match), ds_cstr(&actions),
>                        &st_route->header_);
>
> @@ -9103,8 +9115,10 @@ build_ecmp_route_flow(struct hmap *lflows,
>
> struct ovn_datapath *od,
>
> struct ds route_match = DS_EMPTY_INITIALIZER;
>
> char *prefix_s = build_route_prefix_s(&eg->prefix, eg->plen);
> +    int ofs = !strcmp(eg->origin, ROUTE_ORIGIN_CONNECTED) ?
> +        ROUTE_PRIO_OFFSET_CONNECTED: ROUTE_PRIO_OFFSET_STATIC;
> build_route_match(NULL, prefix_s, eg->plen, eg->is_src_route,
>
> is_ipv4,
>
> -                      &route_match, &priority);
> +                      &route_match, &priority, ofs);
> free(prefix_s);
>
> struct ds actions = DS_EMPTY_INITIALIZER;
> @@ -9180,7 +9194,7 @@ add_route(struct hmap *lflows, struct
>
> ovn_datapath *od,
>
>       const struct ovn_port *op, const char *lrp_addr_s,
>       const char *network_s, int plen, const char *gateway,
>       bool is_src_route, const struct ovsdb_idl_row *stage_hint,
> -          bool is_discard_route)
> +          bool is_discard_route, int ofs)
> {
> bool is_ipv4 = strchr(network_s, '.') ? true : false;
> struct ds match = DS_EMPTY_INITIALIZER;
> @@ -9196,7 +9210,7 @@ add_route(struct hmap *lflows, struct
>
> ovn_datapath *od,
>
>     }
> }
> build_route_match(op_inport, network_s, plen, is_src_route,
>
> is_ipv4,
>
> -                      &match, &priority);
> +                      &match, &priority, ofs);
>
> struct ds common_actions = DS_EMPTY_INITIALIZER;
> struct ds actions = DS_EMPTY_INITIALIZER;
> @@ -9256,10 +9270,15 @@ build_static_route_flow(struct hmap
>
> *lflows, struct ovn_datapath *od,
>
>     }
> }
>
> +    int ofs = !strcmp(smap_get_def(&route->options, "origin",
>
> ""),
>
> +                      ROUTE_ORIGIN_CONNECTED) ?
>
> ROUTE_PRIO_OFFSET_CONNECTED
>
> +                                              :
>
> ROUTE_PRIO_OFFSET_STATIC;
>
> +
> char *prefix_s = build_route_prefix_s(&route_->prefix,
>
> route_->plen);
>
> add_route(lflows, route_->is_discard_route ? od : out_port->od,
>
> out_port,
>
>           lrp_addr_s, prefix_s, route_->plen, route->nexthop,
> -              route_->is_src_route, &route->header_,
>
> route_->is_discard_route);
>
> +              route_->is_src_route, &route->header_,
>
> route_->is_discard_route,
>
> +              ofs);
>
> free(prefix_s);
> }
> @@ -10672,14 +10691,14 @@ build_ip_routing_flows_for_lrouter_port(
>         add_route(lflows, op->od, op,
>
> op->lrp_networks.ipv4_addrs[i].addr_s,
>
>                   op->lrp_networks.ipv4_addrs[i].network_s,
>                   op->lrp_networks.ipv4_addrs[i].plen, NULL,
>
> false,
>
> -                      &op->nbrp->header_, false);
> +                      &op->nbrp->header_, false,
>
> ROUTE_PRIO_OFFSET_CONNECTED);
>
>     }
>
>     for (int i = 0; i < op->lrp_networks.n_ipv6_addrs; i++) {
>         add_route(lflows, op->od, op,
>
> op->lrp_networks.ipv6_addrs[i].addr_s,
>
>                   op->lrp_networks.ipv6_addrs[i].network_s,
>                   op->lrp_networks.ipv6_addrs[i].plen, NULL,
>
> false,
>
> -                      &op->nbrp->header_, false);
> +                      &op->nbrp->header_, false,
>
> ROUTE_PRIO_OFFSET_CONNECTED);
>
>     }
> } else if (lsp_is_router(op->nbsp)) {
>     struct ovn_port *peer = ovn_port_get_peer(ports, op);
> @@ -10702,7 +10721,8 @@ build_ip_routing_flows_for_lrouter_port(
>
> peer->lrp_networks.ipv4_addrs[0].addr_s,
>
>                           laddrs->ipv4_addrs[k].network_s,
>                           laddrs->ipv4_addrs[k].plen, NULL,
>
> false,
>
> -                              &peer->nbrp->header_, false);
> +                              &peer->nbrp->header_, false,
> +                              ROUTE_PRIO_OFFSET_CONNECTED);
>             }
>         }
>     }
> @@ -10773,7 +10793,7 @@ build_mcast_lookup_flows_for_lrouter(
>     /* Drop IPv6 multicast traffic that shouldn't be forwarded,
>      * i.e., router solicitation and router advertisement.
>      */
> -        ovn_lflow_add(lflows, od, S_ROUTER_IN_IP_ROUTING, 550,
> +        ovn_lflow_add(lflows, od, S_ROUTER_IN_IP_ROUTING, 10550,
>                   "nd_rs || nd_ra", "drop;");
>     if (!od->mcast_info.rtr.relay) {
>         return;
> @@ -10801,7 +10821,7 @@ build_mcast_lookup_flows_for_lrouter(
>         }
>         ds_put_format(actions, "outport = \"%s\"; ip.ttl--;
>
> next;",
>
>                       igmp_group->mcgroup.name);
> -            ovn_lflow_add(lflows, od, S_ROUTER_IN_IP_ROUTING,
>
> 500,
>
> +            ovn_lflow_add(lflows, od, S_ROUTER_IN_IP_ROUTING,
>
> 10500,
>
>                       ds_cstr(match), ds_cstr(actions));
>     }
>
> @@ -10809,7 +10829,7 @@ build_mcast_lookup_flows_for_lrouter(
>      * ports. Otherwise drop any multicast traffic.
>      */
>     if (od->mcast_info.rtr.flood_static) {
> -            ovn_lflow_add(lflows, od, S_ROUTER_IN_IP_ROUTING,
>
> 450,
>
> +            ovn_lflow_add(lflows, od, S_ROUTER_IN_IP_ROUTING,
>
> 10450,
>
>                       "ip4.mcast || ip6.mcast",
>                       "clone { "
>                             "outport = \""MC_STATIC"\"; "
> @@ -10817,7 +10837,7 @@ build_mcast_lookup_flows_for_lrouter(
>                             "next; "
>                       "};");
>     } else {
> -            ovn_lflow_add(lflows, od, S_ROUTER_IN_IP_ROUTING,
>
> 450,
>
> +            ovn_lflow_add(lflows, od, S_ROUTER_IN_IP_ROUTING,
>
> 10450,
>
>                       "ip4.mcast || ip6.mcast", "drop;");
>     }
> }
> diff --git a/northd/ovn-northd.8.xml b/northd/ovn-northd.8.xml
> index fb67395e3..4f3a9d5e3 100644
> --- a/northd/ovn-northd.8.xml
> +++ b/northd/ovn-northd.8.xml
> @@ -2945,12 +2945,12 @@ icmp6 {
>
> <p>
>   If ECMP routes with symmetric reply are configured in the
> -      <code>OVN_Northbound</code> database for a gateway router,
>
> a priority-300
>
> -      flow is added for each router port on which symmetric
>
> replies are
>
> -      configured. The matching logic for these ports essentially
>
> reverses the
>
> -      configured logic of the ECMP route. So for instance, a
>
> route with a
>
> -      destination routing policy will instead match if the
>
> source IP address
>
> -      matches the static route's prefix. The flow uses the action
> +      <code>OVN_Northbound</code> database for a gateway router,
>
> a
>
> +      priority-10300 flow is added for each router port on which
>
> symmetric
>
> +      replies are configured. The matching logic for these ports
>
> essentially
>
> +      reverses the configured logic of the ECMP route. So for
>
> instance, a route
>
> +      with a destination routing policy will instead match if
>
> the source IP
>
>
Numan Siddique Nov. 18, 2021, 4:50 p.m. UTC | #12
Hi Vladislav,

Once v9 is ready,  please submit them. I'll take a look.

Thanks
Numan


On Thu, Nov 18, 2021 at 2:58 AM Han Zhou <zhouhan@gmail.com> wrote:
>
> Sounds good to me.
>
> On Wed, Nov 17, 2021 at 11:53 PM Vladislav Odintsov <odivlad@gmail.com>
> wrote:
>
> > Thanks, Han.
> > Please see inline.
> >
> > Regards,
> > Vladislav Odintsov
> >
> > On 18 Nov 2021, at 10:26, Han Zhou <zhouhan@gmail.com> wrote:
> >
> > On Wed, Nov 17, 2021 at 10:10 PM Vladislav Odintsov <odivlad@gmail.com>
> > wrote:
> >
> >
> > Great, thanks.
> >
> > Hi @Han,
> >
> > I’d like you to look at the patch series too. Would you have time on it?
> > If yes, could you redirect me on terms please.
> >
> >
> > Hi Vladislav,
> >
> > Thanks for adding me. I am sorry that I don't think I will have enough time
> > for a detailed review for this series until Nov 29. Not sure if you can
> > wait that long, but I don't think my review is mandatory if Numan is
> > reviewing all the patches in detail.
> >
> >
> > It’s okay from my side to wait for Dec 2-3.
> >
> > I have a quick comment though, regarding the priority offset. It is
> > mentioned in the commit message:
> >
> > Each route's prefix length has its own 'slot' in lflow prios.
> > Now prefix length space is calculated using next information:
> > to calculate route's priority prefixlen multiplied by 3
> > + route origin offset (0 - source-based route; 1 - directly-
> > connected route; 2 - static route).
> >
> >
> > But in the code, 2 is for connected, and 1 is for static:
> >
> > +#define ROUTE_PRIO_OFFSET_MULTIPLIER 3
> > +#define ROUTE_PRIO_OFFSET_STATIC 1
> > +#define ROUTE_PRIO_OFFSET_CONNECTED 2
> >
> >
> > I wonder which one is your intent? I'd let the static route have higher
> > priority, because otherwise why would the user add the static route at all?
> > But this is more of a question than a suggestion. Is there any *standard*
> > behavior or similar thing that we can refer from e.g. AWS?
> >
> >
> > It’s a typo in commit message. I’ll fix that in v9.
> >
> > It is done to support well-known behaviour, where directly-connected
> > routes take precedence over static routes for same CIDR.
> >
> > To support AWS feature, where user can override "subnet" route (think,
> > "connected") with a static route, additional work is needed.
> > It’s not what I’m currently working on, but I thought about such use case
> > and it seems that it can be easily supported by adding ability to add
> > Logical_Router_Static_Route with some "override" flag, which ensures this
> > static route would be installed with the highest priority.
> >
> > So, if no objections here or other comments for now I’ll send v9.
> >
> > Thanks,
> > Han
> >
> >
> > Thanks.
> >
> > Regards,
> > Vladislav Odintsov
> >
> > On 18 Nov 2021, at 00:05, Numan Siddique <numans@ovn.org> wrote:
> >
> > On Wed, Nov 17, 2021 at 3:38 PM Vladislav Odintsov <odivlad@gmail.com
> >
> > <mailto:odivlad@gmail.com <odivlad@gmail.com>>> wrote:
> >
> >
> > I’ve submitted a patch [1] with my findings.
> > Also, if no comments for other my patches from this patch series, I
> >
> > can submit a new version.
> >
> > Should I?
> >
> >
> > No comments from my side.  Perhaps you can submit another version.
> >
> > Numan
> >
> >
> > 1:
> >
> >
> > https://patchwork.ozlabs.org/project/ovn/patch/20211117203545.46142-1-odivlad@gmail.com/
> > <
> >
> > https://patchwork.ozlabs.org/project/ovn/patch/20211117203545.46142-1-odivlad@gmail.com/
> >
> >
> >
> > Regards,
> > Vladislav Odintsov
> >
> > On 17 Nov 2021, at 20:57, Numan Siddique <numans@ovn.org <mailto:
> >
> > numans@ovn.org>> wrote:
> >
> >
> > On Wed, Nov 17, 2021 at 9:24 AM Vladislav Odintsov <odivlad@gmail.com
> >
> > <mailto:odivlad@gmail.com <odivlad@gmail.com>> <mailto:odivlad@gmail.com
> > <odivlad@gmail.com> <mailto:
> > odivlad@gmail.com>>> wrote:
> >
> >
> > Two additions:
> >
> > 1. Regarding documentation for flow in lr_in_defrag section:
> >
> > It seems to me that documentation for it is written in a wrong
> >
> > section (lr_in_defrag).
> >
> > Since the flow is installed in lr_in_ip_routing, it should be
> >
> > documented there.
> >
> > I’ll move it if you don’t mind.
> >
> >
> > Sure.  Thanks.  I'd suggest having a separate patch for fixing the
> > documentation.
> >
> >
> >
> > 2. Documentation for other flow changes was added, but I’ve
> >
> > committed it to a wrong patch (#3).
> >
> > I’ll move documentation update between patches.
> >
> >
> > Ack.
> >
> > Thanks
> > Numan
> >
> >
> > Regards,
> > Vladislav Odintsov
> >
> > On 17 Nov 2021, at 13:51, Vladislav Odintsov <odivlad@gmail.com>
> >
> > wrote:
> >
> >
> > Hi Numan,
> >
> > Thanks for the review.
> > Sure I will fix this. Should I wait for more comments or that’s all
> >
> > and I can send v9?
> >
> >
> > Regards,
> > Vladislav Odintsov
> >
> > On 17 Nov 2021, at 05:17, Numan Siddique <numans@ovn.org> wrote:
> >
> > On Sat, Nov 13, 2021 at 4:44 AM Vladislav Odintsov <
> >
> > odivlad@gmail.com <mailto:odivlad@gmail.com <odivlad@gmail.com>>> wrote:
> >
> >
> > With this patch routes to connected networks have higher
> > priority than static routes with same ip_prefix.
> >
> > This brings commonly-used behaviour for routes lookup order:
> > 1: longest prefix match
> > 2: metric
> >
> > The metric has next lookup order:
> > 1: connected routes
> > 2: static routes
> >
> > Earlier static and connected routes with same ip_prefix had
> > the same priority, so it was impossible to predict which one
> > is used for routing decision.
> >
> > Each route's prefix length has its own 'slot' in lflow prios.
> > Now prefix length space is calculated using next information:
> > to calculate route's priority prefixlen multiplied by 3
> > + route origin offset (0 - source-based route; 1 - directly-
> > connected route; 2 - static route).
> >
> > Also, enlarge prio for generic records in lr_in_ip_routing stage
> > by 10000.
> >
> > Signed-off-by: Vladislav Odintsov <odivlad@gmail.com>
> >
> >
> > Hi Vladislav,
> >
> > Thanks for the patch.  Overall it looks good to me.  I've one
> >
> > comment.
> >
> > Looks like the documentation updated in ovn-northd.8.xml is not
> >
> > accurate.
> >
> >
> > This patch modifies the flows in the lr_in_ip_routing stage but
> >
> > this
> >
> > patch doesn't update the documentation.
> > Also the patch updates the documentation for the flow in
> >
> > lr_in_defrag
> >
> > stage, which seems not correct.
> >
> > Can you please update the documentation accurately ?
> >
> > Numan
> >
> > ---
> > northd/northd.c         | 50
> >
> > ++++++++++++++++++++++++++++-------------
> >
> > northd/ovn-northd.8.xml | 12 +++++-----
> > tests/ovn-northd.at     |  8 +++----
> > 3 files changed, 45 insertions(+), 25 deletions(-)
> >
> > diff --git a/northd/northd.c b/northd/northd.c
> > index 1e8a3457c..0d513f039 100644
> > --- a/northd/northd.c
> > +++ b/northd/northd.c
> > @@ -305,6 +305,15 @@ enum ovn_stage {
> > *
> > */
> >
> > +/*
> > + * Route offsets implement logic to prioritize traffic for
> >
> > routes with
> >
> > + * same ip_prefix values:
> > + *  -  connected route overrides static one;
> > + *  -  static route overrides connected route. */
> > +#define ROUTE_PRIO_OFFSET_MULTIPLIER 3
> > +#define ROUTE_PRIO_OFFSET_STATIC 1
> > +#define ROUTE_PRIO_OFFSET_CONNECTED 2
> > +
> > /* Returns an "enum ovn_stage" built from the arguments. */
> > static enum ovn_stage
> > ovn_stage_build(enum ovn_datapath_type dp_type, enum ovn_pipeline
> >
> > pipeline,
> >
> > @@ -8782,6 +8791,7 @@ struct ecmp_groups_node {
> > struct in6_addr prefix;
> > unsigned int plen;
> > bool is_src_route;
> > +    const char *origin;
> > uint16_t route_count;
> > struct ovs_list route_list; /* Contains ecmp_route_list_node */
> > };
> > @@ -8819,6 +8829,7 @@ ecmp_groups_add(struct hmap *ecmp_groups,
> > eg->prefix = route->prefix;
> > eg->plen = route->plen;
> > eg->is_src_route = route->is_src_route;
> > +    eg->origin = smap_get_def(&route->route->options, "origin",
> >
> > "");
> >
> > ovs_list_init(&eg->route_list);
> > ecmp_groups_add_route(eg, route);
> >
> > @@ -8919,19 +8930,20 @@ build_route_prefix_s(const struct
> >
> > in6_addr *prefix, unsigned int plen)
> >
> > static void
> > build_route_match(const struct ovn_port *op_inport, const char
> >
> > *network_s,
> >
> >               int plen, bool is_src_route, bool is_ipv4, struct
> >
> > ds *match,
> >
> > -                  uint16_t *priority)
> > +                  uint16_t *priority, int ofs)
> > {
> > const char *dir;
> > /* The priority here is calculated to implement
> >
> > longest-prefix-match
> >
> >  * routing. */
> > if (is_src_route) {
> >     dir = "src";
> > -        *priority = plen * 2;
> > +        ofs = 0;
> > } else {
> >     dir = "dst";
> > -        *priority = (plen * 2) + 1;
> > }
> >
> > +    *priority = (plen * ROUTE_PRIO_OFFSET_MULTIPLIER) + ofs;
> > +
> > if (op_inport) {
> >     ds_put_format(match, "inport == %s && ",
> >
> > op_inport->json_key);
> >
> > }
> > @@ -9073,7 +9085,7 @@ add_ecmp_symmetric_reply_flows(struct hmap
> >
> > *lflows,
> >
> >               out_port->lrp_networks.ea_s,
> >               IN6_IS_ADDR_V4MAPPED(&route->prefix) ? "" : "xx",
> >               port_ip, out_port->json_key);
> > -    ovn_lflow_add_with_hint(lflows, od, S_ROUTER_IN_IP_ROUTING,
> >
> > 300,
> >
> > +    ovn_lflow_add_with_hint(lflows, od, S_ROUTER_IN_IP_ROUTING,
> >
> > 10300,
> >
> >                        ds_cstr(&match), ds_cstr(&actions),
> >                        &st_route->header_);
> >
> > @@ -9103,8 +9115,10 @@ build_ecmp_route_flow(struct hmap *lflows,
> >
> > struct ovn_datapath *od,
> >
> > struct ds route_match = DS_EMPTY_INITIALIZER;
> >
> > char *prefix_s = build_route_prefix_s(&eg->prefix, eg->plen);
> > +    int ofs = !strcmp(eg->origin, ROUTE_ORIGIN_CONNECTED) ?
> > +        ROUTE_PRIO_OFFSET_CONNECTED: ROUTE_PRIO_OFFSET_STATIC;
> > build_route_match(NULL, prefix_s, eg->plen, eg->is_src_route,
> >
> > is_ipv4,
> >
> > -                      &route_match, &priority);
> > +                      &route_match, &priority, ofs);
> > free(prefix_s);
> >
> > struct ds actions = DS_EMPTY_INITIALIZER;
> > @@ -9180,7 +9194,7 @@ add_route(struct hmap *lflows, struct
> >
> > ovn_datapath *od,
> >
> >       const struct ovn_port *op, const char *lrp_addr_s,
> >       const char *network_s, int plen, const char *gateway,
> >       bool is_src_route, const struct ovsdb_idl_row *stage_hint,
> > -          bool is_discard_route)
> > +          bool is_discard_route, int ofs)
> > {
> > bool is_ipv4 = strchr(network_s, '.') ? true : false;
> > struct ds match = DS_EMPTY_INITIALIZER;
> > @@ -9196,7 +9210,7 @@ add_route(struct hmap *lflows, struct
> >
> > ovn_datapath *od,
> >
> >     }
> > }
> > build_route_match(op_inport, network_s, plen, is_src_route,
> >
> > is_ipv4,
> >
> > -                      &match, &priority);
> > +                      &match, &priority, ofs);
> >
> > struct ds common_actions = DS_EMPTY_INITIALIZER;
> > struct ds actions = DS_EMPTY_INITIALIZER;
> > @@ -9256,10 +9270,15 @@ build_static_route_flow(struct hmap
> >
> > *lflows, struct ovn_datapath *od,
> >
> >     }
> > }
> >
> > +    int ofs = !strcmp(smap_get_def(&route->options, "origin",
> >
> > ""),
> >
> > +                      ROUTE_ORIGIN_CONNECTED) ?
> >
> > ROUTE_PRIO_OFFSET_CONNECTED
> >
> > +                                              :
> >
> > ROUTE_PRIO_OFFSET_STATIC;
> >
> > +
> > char *prefix_s = build_route_prefix_s(&route_->prefix,
> >
> > route_->plen);
> >
> > add_route(lflows, route_->is_discard_route ? od : out_port->od,
> >
> > out_port,
> >
> >           lrp_addr_s, prefix_s, route_->plen, route->nexthop,
> > -              route_->is_src_route, &route->header_,
> >
> > route_->is_discard_route);
> >
> > +              route_->is_src_route, &route->header_,
> >
> > route_->is_discard_route,
> >
> > +              ofs);
> >
> > free(prefix_s);
> > }
> > @@ -10672,14 +10691,14 @@ build_ip_routing_flows_for_lrouter_port(
> >         add_route(lflows, op->od, op,
> >
> > op->lrp_networks.ipv4_addrs[i].addr_s,
> >
> >                   op->lrp_networks.ipv4_addrs[i].network_s,
> >                   op->lrp_networks.ipv4_addrs[i].plen, NULL,
> >
> > false,
> >
> > -                      &op->nbrp->header_, false);
> > +                      &op->nbrp->header_, false,
> >
> > ROUTE_PRIO_OFFSET_CONNECTED);
> >
> >     }
> >
> >     for (int i = 0; i < op->lrp_networks.n_ipv6_addrs; i++) {
> >         add_route(lflows, op->od, op,
> >
> > op->lrp_networks.ipv6_addrs[i].addr_s,
> >
> >                   op->lrp_networks.ipv6_addrs[i].network_s,
> >                   op->lrp_networks.ipv6_addrs[i].plen, NULL,
> >
> > false,
> >
> > -                      &op->nbrp->header_, false);
> > +                      &op->nbrp->header_, false,
> >
> > ROUTE_PRIO_OFFSET_CONNECTED);
> >
> >     }
> > } else if (lsp_is_router(op->nbsp)) {
> >     struct ovn_port *peer = ovn_port_get_peer(ports, op);
> > @@ -10702,7 +10721,8 @@ build_ip_routing_flows_for_lrouter_port(
> >
> > peer->lrp_networks.ipv4_addrs[0].addr_s,
> >
> >                           laddrs->ipv4_addrs[k].network_s,
> >                           laddrs->ipv4_addrs[k].plen, NULL,
> >
> > false,
> >
> > -                              &peer->nbrp->header_, false);
> > +                              &peer->nbrp->header_, false,
> > +                              ROUTE_PRIO_OFFSET_CONNECTED);
> >             }
> >         }
> >     }
> > @@ -10773,7 +10793,7 @@ build_mcast_lookup_flows_for_lrouter(
> >     /* Drop IPv6 multicast traffic that shouldn't be forwarded,
> >      * i.e., router solicitation and router advertisement.
> >      */
> > -        ovn_lflow_add(lflows, od, S_ROUTER_IN_IP_ROUTING, 550,
> > +        ovn_lflow_add(lflows, od, S_ROUTER_IN_IP_ROUTING, 10550,
> >                   "nd_rs || nd_ra", "drop;");
> >     if (!od->mcast_info.rtr.relay) {
> >         return;
> > @@ -10801,7 +10821,7 @@ build_mcast_lookup_flows_for_lrouter(
> >         }
> >         ds_put_format(actions, "outport = \"%s\"; ip.ttl--;
> >
> > next;",
> >
> >                       igmp_group->mcgroup.name);
> > -            ovn_lflow_add(lflows, od, S_ROUTER_IN_IP_ROUTING,
> >
> > 500,
> >
> > +            ovn_lflow_add(lflows, od, S_ROUTER_IN_IP_ROUTING,
> >
> > 10500,
> >
> >                       ds_cstr(match), ds_cstr(actions));
> >     }
> >
> > @@ -10809,7 +10829,7 @@ build_mcast_lookup_flows_for_lrouter(
> >      * ports. Otherwise drop any multicast traffic.
> >      */
> >     if (od->mcast_info.rtr.flood_static) {
> > -            ovn_lflow_add(lflows, od, S_ROUTER_IN_IP_ROUTING,
> >
> > 450,
> >
> > +            ovn_lflow_add(lflows, od, S_ROUTER_IN_IP_ROUTING,
> >
> > 10450,
> >
> >                       "ip4.mcast || ip6.mcast",
> >                       "clone { "
> >                             "outport = \""MC_STATIC"\"; "
> > @@ -10817,7 +10837,7 @@ build_mcast_lookup_flows_for_lrouter(
> >                             "next; "
> >                       "};");
> >     } else {
> > -            ovn_lflow_add(lflows, od, S_ROUTER_IN_IP_ROUTING,
> >
> > 450,
> >
> > +            ovn_lflow_add(lflows, od, S_ROUTER_IN_IP_ROUTING,
> >
> > 10450,
> >
> >                       "ip4.mcast || ip6.mcast", "drop;");
> >     }
> > }
> > diff --git a/northd/ovn-northd.8.xml b/northd/ovn-northd.8.xml
> > index fb67395e3..4f3a9d5e3 100644
> > --- a/northd/ovn-northd.8.xml
> > +++ b/northd/ovn-northd.8.xml
> > @@ -2945,12 +2945,12 @@ icmp6 {
> >
> > <p>
> >   If ECMP routes with symmetric reply are configured in the
> > -      <code>OVN_Northbound</code> database for a gateway router,
> >
> > a priority-300
> >
> > -      flow is added for each router port on which symmetric
> >
> > replies are
> >
> > -      configured. The matching logic for these ports essentially
> >
> > reverses the
> >
> > -      configured logic of the ECMP route. So for instance, a
> >
> > route with a
> >
> > -      destination routing policy will instead match if the
> >
> > source IP address
> >
> > -      matches the static route's prefix. The flow uses the action
> > +      <code>OVN_Northbound</code> database for a gateway router,
> >
> > a
> >
> > +      priority-10300 flow is added for each router port on which
> >
> > symmetric
> >
> > +      replies are configured. The matching logic for these ports
> >
> > essentially
> >
> > +      reverses the configured logic of the ECMP route. So for
> >
> > instance, a route
> >
> > +      with a destination routing policy will instead match if
> >
> > the source IP
> >
> >
> _______________________________________________
> dev mailing list
> dev@openvswitch.org
> https://mail.openvswitch.org/mailman/listinfo/ovs-dev
Vladislav Odintsov Nov. 19, 2021, 1:12 p.m. UTC | #13
Hi Numan,

yes, it’s ready. But I’ve based it on the commit from this patch [1].
Can you please take a look on it and apply if it’s okay.
Then I can send patch series without conflicting changes.

Or I can send it with that patch as a part of patchset.

1: https://patchwork.ozlabs.org/project/ovn/patch/20211117203545.46142-1-odivlad@gmail.com/

Regards,
Vladislav Odintsov

> On 18 Nov 2021, at 19:50, Numan Siddique <numans@ovn.org> wrote:
> 
> Hi Vladislav,
> 
> Once v9 is ready,  please submit them. I'll take a look.
> 
> Thanks
> Numan
> 
> 
> On Thu, Nov 18, 2021 at 2:58 AM Han Zhou <zhouhan@gmail.com> wrote:
>> 
>> Sounds good to me.
>> 
>> On Wed, Nov 17, 2021 at 11:53 PM Vladislav Odintsov <odivlad@gmail.com>
>> wrote:
>> 
>>> Thanks, Han.
>>> Please see inline.
>>> 
>>> Regards,
>>> Vladislav Odintsov
>>> 
>>> On 18 Nov 2021, at 10:26, Han Zhou <zhouhan@gmail.com> wrote:
>>> 
>>> On Wed, Nov 17, 2021 at 10:10 PM Vladislav Odintsov <odivlad@gmail.com>
>>> wrote:
>>> 
>>> 
>>> Great, thanks.
>>> 
>>> Hi @Han,
>>> 
>>> I’d like you to look at the patch series too. Would you have time on it?
>>> If yes, could you redirect me on terms please.
>>> 
>>> 
>>> Hi Vladislav,
>>> 
>>> Thanks for adding me. I am sorry that I don't think I will have enough time
>>> for a detailed review for this series until Nov 29. Not sure if you can
>>> wait that long, but I don't think my review is mandatory if Numan is
>>> reviewing all the patches in detail.
>>> 
>>> 
>>> It’s okay from my side to wait for Dec 2-3.
>>> 
>>> I have a quick comment though, regarding the priority offset. It is
>>> mentioned in the commit message:
>>> 
>>> Each route's prefix length has its own 'slot' in lflow prios.
>>> Now prefix length space is calculated using next information:
>>> to calculate route's priority prefixlen multiplied by 3
>>> + route origin offset (0 - source-based route; 1 - directly-
>>> connected route; 2 - static route).
>>> 
>>> 
>>> But in the code, 2 is for connected, and 1 is for static:
>>> 
>>> +#define ROUTE_PRIO_OFFSET_MULTIPLIER 3
>>> +#define ROUTE_PRIO_OFFSET_STATIC 1
>>> +#define ROUTE_PRIO_OFFSET_CONNECTED 2
>>> 
>>> 
>>> I wonder which one is your intent? I'd let the static route have higher
>>> priority, because otherwise why would the user add the static route at all?
>>> But this is more of a question than a suggestion. Is there any *standard*
>>> behavior or similar thing that we can refer from e.g. AWS?
>>> 
>>> 
>>> It’s a typo in commit message. I’ll fix that in v9.
>>> 
>>> It is done to support well-known behaviour, where directly-connected
>>> routes take precedence over static routes for same CIDR.
>>> 
>>> To support AWS feature, where user can override "subnet" route (think,
>>> "connected") with a static route, additional work is needed.
>>> It’s not what I’m currently working on, but I thought about such use case
>>> and it seems that it can be easily supported by adding ability to add
>>> Logical_Router_Static_Route with some "override" flag, which ensures this
>>> static route would be installed with the highest priority.
>>> 
>>> So, if no objections here or other comments for now I’ll send v9.
>>> 
>>> Thanks,
>>> Han
>>> 
>>> 
>>> Thanks.
>>> 
>>> Regards,
>>> Vladislav Odintsov
>>> 
>>> On 18 Nov 2021, at 00:05, Numan Siddique <numans@ovn.org> wrote:
>>> 
>>> On Wed, Nov 17, 2021 at 3:38 PM Vladislav Odintsov <odivlad@gmail.com
>>> 
>>> <mailto:odivlad@gmail.com <odivlad@gmail.com>>> wrote:
>>> 
>>> 
>>> I’ve submitted a patch [1] with my findings.
>>> Also, if no comments for other my patches from this patch series, I
>>> 
>>> can submit a new version.
>>> 
>>> Should I?
>>> 
>>> 
>>> No comments from my side.  Perhaps you can submit another version.
>>> 
>>> Numan
>>> 
>>> 
>>> 1:
>>> 
>>> 
>>> https://patchwork.ozlabs.org/project/ovn/patch/20211117203545.46142-1-odivlad@gmail.com/
>>> <
>>> 
>>> https://patchwork.ozlabs.org/project/ovn/patch/20211117203545.46142-1-odivlad@gmail.com/
>>> 
>>> 
>>> 
>>> Regards,
>>> Vladislav Odintsov
>>> 
>>> On 17 Nov 2021, at 20:57, Numan Siddique <numans@ovn.org <mailto:
>>> 
>>> numans@ovn.org>> wrote:
>>> 
>>> 
>>> On Wed, Nov 17, 2021 at 9:24 AM Vladislav Odintsov <odivlad@gmail.com
>>> 
>>> <mailto:odivlad@gmail.com <odivlad@gmail.com>> <mailto:odivlad@gmail.com
>>> <odivlad@gmail.com> <mailto:
>>> odivlad@gmail.com>>> wrote:
>>> 
>>> 
>>> Two additions:
>>> 
>>> 1. Regarding documentation for flow in lr_in_defrag section:
>>> 
>>> It seems to me that documentation for it is written in a wrong
>>> 
>>> section (lr_in_defrag).
>>> 
>>> Since the flow is installed in lr_in_ip_routing, it should be
>>> 
>>> documented there.
>>> 
>>> I’ll move it if you don’t mind.
>>> 
>>> 
>>> Sure.  Thanks.  I'd suggest having a separate patch for fixing the
>>> documentation.
>>> 
>>> 
>>> 
>>> 2. Documentation for other flow changes was added, but I’ve
>>> 
>>> committed it to a wrong patch (#3).
>>> 
>>> I’ll move documentation update between patches.
>>> 
>>> 
>>> Ack.
>>> 
>>> Thanks
>>> Numan
>>> 
>>> 
>>> Regards,
>>> Vladislav Odintsov
>>> 
>>> On 17 Nov 2021, at 13:51, Vladislav Odintsov <odivlad@gmail.com>
>>> 
>>> wrote:
>>> 
>>> 
>>> Hi Numan,
>>> 
>>> Thanks for the review.
>>> Sure I will fix this. Should I wait for more comments or that’s all
>>> 
>>> and I can send v9?
>>> 
>>> 
>>> Regards,
>>> Vladislav Odintsov
>>> 
>>> On 17 Nov 2021, at 05:17, Numan Siddique <numans@ovn.org> wrote:
>>> 
>>> On Sat, Nov 13, 2021 at 4:44 AM Vladislav Odintsov <
>>> 
>>> odivlad@gmail.com <mailto:odivlad@gmail.com <odivlad@gmail.com>>> wrote:
>>> 
>>> 
>>> With this patch routes to connected networks have higher
>>> priority than static routes with same ip_prefix.
>>> 
>>> This brings commonly-used behaviour for routes lookup order:
>>> 1: longest prefix match
>>> 2: metric
>>> 
>>> The metric has next lookup order:
>>> 1: connected routes
>>> 2: static routes
>>> 
>>> Earlier static and connected routes with same ip_prefix had
>>> the same priority, so it was impossible to predict which one
>>> is used for routing decision.
>>> 
>>> Each route's prefix length has its own 'slot' in lflow prios.
>>> Now prefix length space is calculated using next information:
>>> to calculate route's priority prefixlen multiplied by 3
>>> + route origin offset (0 - source-based route; 1 - directly-
>>> connected route; 2 - static route).
>>> 
>>> Also, enlarge prio for generic records in lr_in_ip_routing stage
>>> by 10000.
>>> 
>>> Signed-off-by: Vladislav Odintsov <odivlad@gmail.com>
>>> 
>>> 
>>> Hi Vladislav,
>>> 
>>> Thanks for the patch.  Overall it looks good to me.  I've one
>>> 
>>> comment.
>>> 
>>> Looks like the documentation updated in ovn-northd.8.xml is not
>>> 
>>> accurate.
>>> 
>>> 
>>> This patch modifies the flows in the lr_in_ip_routing stage but
>>> 
>>> this
>>> 
>>> patch doesn't update the documentation.
>>> Also the patch updates the documentation for the flow in
>>> 
>>> lr_in_defrag
>>> 
>>> stage, which seems not correct.
>>> 
>>> Can you please update the documentation accurately ?
>>> 
>>> Numan
>>> 
>>> ---
>>> northd/northd.c         | 50
>>> 
>>> ++++++++++++++++++++++++++++-------------
>>> 
>>> northd/ovn-northd.8.xml | 12 +++++-----
>>> tests/ovn-northd.at     |  8 +++----
>>> 3 files changed, 45 insertions(+), 25 deletions(-)
>>> 
>>> diff --git a/northd/northd.c b/northd/northd.c
>>> index 1e8a3457c..0d513f039 100644
>>> --- a/northd/northd.c
>>> +++ b/northd/northd.c
>>> @@ -305,6 +305,15 @@ enum ovn_stage {
>>> *
>>> */
>>> 
>>> +/*
>>> + * Route offsets implement logic to prioritize traffic for
>>> 
>>> routes with
>>> 
>>> + * same ip_prefix values:
>>> + *  -  connected route overrides static one;
>>> + *  -  static route overrides connected route. */
>>> +#define ROUTE_PRIO_OFFSET_MULTIPLIER 3
>>> +#define ROUTE_PRIO_OFFSET_STATIC 1
>>> +#define ROUTE_PRIO_OFFSET_CONNECTED 2
>>> +
>>> /* Returns an "enum ovn_stage" built from the arguments. */
>>> static enum ovn_stage
>>> ovn_stage_build(enum ovn_datapath_type dp_type, enum ovn_pipeline
>>> 
>>> pipeline,
>>> 
>>> @@ -8782,6 +8791,7 @@ struct ecmp_groups_node {
>>> struct in6_addr prefix;
>>> unsigned int plen;
>>> bool is_src_route;
>>> +    const char *origin;
>>> uint16_t route_count;
>>> struct ovs_list route_list; /* Contains ecmp_route_list_node */
>>> };
>>> @@ -8819,6 +8829,7 @@ ecmp_groups_add(struct hmap *ecmp_groups,
>>> eg->prefix = route->prefix;
>>> eg->plen = route->plen;
>>> eg->is_src_route = route->is_src_route;
>>> +    eg->origin = smap_get_def(&route->route->options, "origin",
>>> 
>>> "");
>>> 
>>> ovs_list_init(&eg->route_list);
>>> ecmp_groups_add_route(eg, route);
>>> 
>>> @@ -8919,19 +8930,20 @@ build_route_prefix_s(const struct
>>> 
>>> in6_addr *prefix, unsigned int plen)
>>> 
>>> static void
>>> build_route_match(const struct ovn_port *op_inport, const char
>>> 
>>> *network_s,
>>> 
>>>              int plen, bool is_src_route, bool is_ipv4, struct
>>> 
>>> ds *match,
>>> 
>>> -                  uint16_t *priority)
>>> +                  uint16_t *priority, int ofs)
>>> {
>>> const char *dir;
>>> /* The priority here is calculated to implement
>>> 
>>> longest-prefix-match
>>> 
>>> * routing. */
>>> if (is_src_route) {
>>>    dir = "src";
>>> -        *priority = plen * 2;
>>> +        ofs = 0;
>>> } else {
>>>    dir = "dst";
>>> -        *priority = (plen * 2) + 1;
>>> }
>>> 
>>> +    *priority = (plen * ROUTE_PRIO_OFFSET_MULTIPLIER) + ofs;
>>> +
>>> if (op_inport) {
>>>    ds_put_format(match, "inport == %s && ",
>>> 
>>> op_inport->json_key);
>>> 
>>> }
>>> @@ -9073,7 +9085,7 @@ add_ecmp_symmetric_reply_flows(struct hmap
>>> 
>>> *lflows,
>>> 
>>>              out_port->lrp_networks.ea_s,
>>>              IN6_IS_ADDR_V4MAPPED(&route->prefix) ? "" : "xx",
>>>              port_ip, out_port->json_key);
>>> -    ovn_lflow_add_with_hint(lflows, od, S_ROUTER_IN_IP_ROUTING,
>>> 
>>> 300,
>>> 
>>> +    ovn_lflow_add_with_hint(lflows, od, S_ROUTER_IN_IP_ROUTING,
>>> 
>>> 10300,
>>> 
>>>                       ds_cstr(&match), ds_cstr(&actions),
>>>                       &st_route->header_);
>>> 
>>> @@ -9103,8 +9115,10 @@ build_ecmp_route_flow(struct hmap *lflows,
>>> 
>>> struct ovn_datapath *od,
>>> 
>>> struct ds route_match = DS_EMPTY_INITIALIZER;
>>> 
>>> char *prefix_s = build_route_prefix_s(&eg->prefix, eg->plen);
>>> +    int ofs = !strcmp(eg->origin, ROUTE_ORIGIN_CONNECTED) ?
>>> +        ROUTE_PRIO_OFFSET_CONNECTED: ROUTE_PRIO_OFFSET_STATIC;
>>> build_route_match(NULL, prefix_s, eg->plen, eg->is_src_route,
>>> 
>>> is_ipv4,
>>> 
>>> -                      &route_match, &priority);
>>> +                      &route_match, &priority, ofs);
>>> free(prefix_s);
>>> 
>>> struct ds actions = DS_EMPTY_INITIALIZER;
>>> @@ -9180,7 +9194,7 @@ add_route(struct hmap *lflows, struct
>>> 
>>> ovn_datapath *od,
>>> 
>>>      const struct ovn_port *op, const char *lrp_addr_s,
>>>      const char *network_s, int plen, const char *gateway,
>>>      bool is_src_route, const struct ovsdb_idl_row *stage_hint,
>>> -          bool is_discard_route)
>>> +          bool is_discard_route, int ofs)
>>> {
>>> bool is_ipv4 = strchr(network_s, '.') ? true : false;
>>> struct ds match = DS_EMPTY_INITIALIZER;
>>> @@ -9196,7 +9210,7 @@ add_route(struct hmap *lflows, struct
>>> 
>>> ovn_datapath *od,
>>> 
>>>    }
>>> }
>>> build_route_match(op_inport, network_s, plen, is_src_route,
>>> 
>>> is_ipv4,
>>> 
>>> -                      &match, &priority);
>>> +                      &match, &priority, ofs);
>>> 
>>> struct ds common_actions = DS_EMPTY_INITIALIZER;
>>> struct ds actions = DS_EMPTY_INITIALIZER;
>>> @@ -9256,10 +9270,15 @@ build_static_route_flow(struct hmap
>>> 
>>> *lflows, struct ovn_datapath *od,
>>> 
>>>    }
>>> }
>>> 
>>> +    int ofs = !strcmp(smap_get_def(&route->options, "origin",
>>> 
>>> ""),
>>> 
>>> +                      ROUTE_ORIGIN_CONNECTED) ?
>>> 
>>> ROUTE_PRIO_OFFSET_CONNECTED
>>> 
>>> +                                              :
>>> 
>>> ROUTE_PRIO_OFFSET_STATIC;
>>> 
>>> +
>>> char *prefix_s = build_route_prefix_s(&route_->prefix,
>>> 
>>> route_->plen);
>>> 
>>> add_route(lflows, route_->is_discard_route ? od : out_port->od,
>>> 
>>> out_port,
>>> 
>>>          lrp_addr_s, prefix_s, route_->plen, route->nexthop,
>>> -              route_->is_src_route, &route->header_,
>>> 
>>> route_->is_discard_route);
>>> 
>>> +              route_->is_src_route, &route->header_,
>>> 
>>> route_->is_discard_route,
>>> 
>>> +              ofs);
>>> 
>>> free(prefix_s);
>>> }
>>> @@ -10672,14 +10691,14 @@ build_ip_routing_flows_for_lrouter_port(
>>>        add_route(lflows, op->od, op,
>>> 
>>> op->lrp_networks.ipv4_addrs[i].addr_s,
>>> 
>>>                  op->lrp_networks.ipv4_addrs[i].network_s,
>>>                  op->lrp_networks.ipv4_addrs[i].plen, NULL,
>>> 
>>> false,
>>> 
>>> -                      &op->nbrp->header_, false);
>>> +                      &op->nbrp->header_, false,
>>> 
>>> ROUTE_PRIO_OFFSET_CONNECTED);
>>> 
>>>    }
>>> 
>>>    for (int i = 0; i < op->lrp_networks.n_ipv6_addrs; i++) {
>>>        add_route(lflows, op->od, op,
>>> 
>>> op->lrp_networks.ipv6_addrs[i].addr_s,
>>> 
>>>                  op->lrp_networks.ipv6_addrs[i].network_s,
>>>                  op->lrp_networks.ipv6_addrs[i].plen, NULL,
>>> 
>>> false,
>>> 
>>> -                      &op->nbrp->header_, false);
>>> +                      &op->nbrp->header_, false,
>>> 
>>> ROUTE_PRIO_OFFSET_CONNECTED);
>>> 
>>>    }
>>> } else if (lsp_is_router(op->nbsp)) {
>>>    struct ovn_port *peer = ovn_port_get_peer(ports, op);
>>> @@ -10702,7 +10721,8 @@ build_ip_routing_flows_for_lrouter_port(
>>> 
>>> peer->lrp_networks.ipv4_addrs[0].addr_s,
>>> 
>>>                          laddrs->ipv4_addrs[k].network_s,
>>>                          laddrs->ipv4_addrs[k].plen, NULL,
>>> 
>>> false,
>>> 
>>> -                              &peer->nbrp->header_, false);
>>> +                              &peer->nbrp->header_, false,
>>> +                              ROUTE_PRIO_OFFSET_CONNECTED);
>>>            }
>>>        }
>>>    }
>>> @@ -10773,7 +10793,7 @@ build_mcast_lookup_flows_for_lrouter(
>>>    /* Drop IPv6 multicast traffic that shouldn't be forwarded,
>>>     * i.e., router solicitation and router advertisement.
>>>     */
>>> -        ovn_lflow_add(lflows, od, S_ROUTER_IN_IP_ROUTING, 550,
>>> +        ovn_lflow_add(lflows, od, S_ROUTER_IN_IP_ROUTING, 10550,
>>>                  "nd_rs || nd_ra", "drop;");
>>>    if (!od->mcast_info.rtr.relay) {
>>>        return;
>>> @@ -10801,7 +10821,7 @@ build_mcast_lookup_flows_for_lrouter(
>>>        }
>>>        ds_put_format(actions, "outport = \"%s\"; ip.ttl--;
>>> 
>>> next;",
>>> 
>>>                      igmp_group->mcgroup.name);
>>> -            ovn_lflow_add(lflows, od, S_ROUTER_IN_IP_ROUTING,
>>> 
>>> 500,
>>> 
>>> +            ovn_lflow_add(lflows, od, S_ROUTER_IN_IP_ROUTING,
>>> 
>>> 10500,
>>> 
>>>                      ds_cstr(match), ds_cstr(actions));
>>>    }
>>> 
>>> @@ -10809,7 +10829,7 @@ build_mcast_lookup_flows_for_lrouter(
>>>     * ports. Otherwise drop any multicast traffic.
>>>     */
>>>    if (od->mcast_info.rtr.flood_static) {
>>> -            ovn_lflow_add(lflows, od, S_ROUTER_IN_IP_ROUTING,
>>> 
>>> 450,
>>> 
>>> +            ovn_lflow_add(lflows, od, S_ROUTER_IN_IP_ROUTING,
>>> 
>>> 10450,
>>> 
>>>                      "ip4.mcast || ip6.mcast",
>>>                      "clone { "
>>>                            "outport = \""MC_STATIC"\"; "
>>> @@ -10817,7 +10837,7 @@ build_mcast_lookup_flows_for_lrouter(
>>>                            "next; "
>>>                      "};");
>>>    } else {
>>> -            ovn_lflow_add(lflows, od, S_ROUTER_IN_IP_ROUTING,
>>> 
>>> 450,
>>> 
>>> +            ovn_lflow_add(lflows, od, S_ROUTER_IN_IP_ROUTING,
>>> 
>>> 10450,
>>> 
>>>                      "ip4.mcast || ip6.mcast", "drop;");
>>>    }
>>> }
>>> diff --git a/northd/ovn-northd.8.xml b/northd/ovn-northd.8.xml
>>> index fb67395e3..4f3a9d5e3 100644
>>> --- a/northd/ovn-northd.8.xml
>>> +++ b/northd/ovn-northd.8.xml
>>> @@ -2945,12 +2945,12 @@ icmp6 {
>>> 
>>> <p>
>>>  If ECMP routes with symmetric reply are configured in the
>>> -      <code>OVN_Northbound</code> database for a gateway router,
>>> 
>>> a priority-300
>>> 
>>> -      flow is added for each router port on which symmetric
>>> 
>>> replies are
>>> 
>>> -      configured. The matching logic for these ports essentially
>>> 
>>> reverses the
>>> 
>>> -      configured logic of the ECMP route. So for instance, a
>>> 
>>> route with a
>>> 
>>> -      destination routing policy will instead match if the
>>> 
>>> source IP address
>>> 
>>> -      matches the static route's prefix. The flow uses the action
>>> +      <code>OVN_Northbound</code> database for a gateway router,
>>> 
>>> a
>>> 
>>> +      priority-10300 flow is added for each router port on which
>>> 
>>> symmetric
>>> 
>>> +      replies are configured. The matching logic for these ports
>>> 
>>> essentially
>>> 
>>> +      reverses the configured logic of the ECMP route. So for
>>> 
>>> instance, a route
>>> 
>>> +      with a destination routing policy will instead match if
>>> 
>>> the source IP
>>> 
>>> 
>> _______________________________________________
>> dev mailing list
>> dev@openvswitch.org
>> https://mail.openvswitch.org/mailman/listinfo/ovs-dev
> _______________________________________________
> dev mailing list
> dev@openvswitch.org
> https://mail.openvswitch.org/mailman/listinfo/ovs-dev
Numan Siddique Nov. 19, 2021, 2:55 p.m. UTC | #14
On Fri, Nov 19, 2021 at 8:12 AM Vladislav Odintsov <odivlad@gmail.com> wrote:
>
> Hi Numan,
>
> yes, it’s ready. But I’ve based it on the commit from this patch [1].
> Can you please take a look on it and apply if it’s okay.
> Then I can send patch series without conflicting changes.
>
> Or I can send it with that patch as a part of patchset.
>
> 1: https://patchwork.ozlabs.org/project/ovn/patch/20211117203545.46142-1-odivlad@gmail.com/

I'd suggest if you can include [1] as part of the patchset.

Numan

>
> Regards,
> Vladislav Odintsov
>
> > On 18 Nov 2021, at 19:50, Numan Siddique <numans@ovn.org> wrote:
> >
> > Hi Vladislav,
> >
> > Once v9 is ready,  please submit them. I'll take a look.
> >
> > Thanks
> > Numan
> >
> >
> > On Thu, Nov 18, 2021 at 2:58 AM Han Zhou <zhouhan@gmail.com> wrote:
> >>
> >> Sounds good to me.
> >>
> >> On Wed, Nov 17, 2021 at 11:53 PM Vladislav Odintsov <odivlad@gmail.com>
> >> wrote:
> >>
> >>> Thanks, Han.
> >>> Please see inline.
> >>>
> >>> Regards,
> >>> Vladislav Odintsov
> >>>
> >>> On 18 Nov 2021, at 10:26, Han Zhou <zhouhan@gmail.com> wrote:
> >>>
> >>> On Wed, Nov 17, 2021 at 10:10 PM Vladislav Odintsov <odivlad@gmail.com>
> >>> wrote:
> >>>
> >>>
> >>> Great, thanks.
> >>>
> >>> Hi @Han,
> >>>
> >>> I’d like you to look at the patch series too. Would you have time on it?
> >>> If yes, could you redirect me on terms please.
> >>>
> >>>
> >>> Hi Vladislav,
> >>>
> >>> Thanks for adding me. I am sorry that I don't think I will have enough time
> >>> for a detailed review for this series until Nov 29. Not sure if you can
> >>> wait that long, but I don't think my review is mandatory if Numan is
> >>> reviewing all the patches in detail.
> >>>
> >>>
> >>> It’s okay from my side to wait for Dec 2-3.
> >>>
> >>> I have a quick comment though, regarding the priority offset. It is
> >>> mentioned in the commit message:
> >>>
> >>> Each route's prefix length has its own 'slot' in lflow prios.
> >>> Now prefix length space is calculated using next information:
> >>> to calculate route's priority prefixlen multiplied by 3
> >>> + route origin offset (0 - source-based route; 1 - directly-
> >>> connected route; 2 - static route).
> >>>
> >>>
> >>> But in the code, 2 is for connected, and 1 is for static:
> >>>
> >>> +#define ROUTE_PRIO_OFFSET_MULTIPLIER 3
> >>> +#define ROUTE_PRIO_OFFSET_STATIC 1
> >>> +#define ROUTE_PRIO_OFFSET_CONNECTED 2
> >>>
> >>>
> >>> I wonder which one is your intent? I'd let the static route have higher
> >>> priority, because otherwise why would the user add the static route at all?
> >>> But this is more of a question than a suggestion. Is there any *standard*
> >>> behavior or similar thing that we can refer from e.g. AWS?
> >>>
> >>>
> >>> It’s a typo in commit message. I’ll fix that in v9.
> >>>
> >>> It is done to support well-known behaviour, where directly-connected
> >>> routes take precedence over static routes for same CIDR.
> >>>
> >>> To support AWS feature, where user can override "subnet" route (think,
> >>> "connected") with a static route, additional work is needed.
> >>> It’s not what I’m currently working on, but I thought about such use case
> >>> and it seems that it can be easily supported by adding ability to add
> >>> Logical_Router_Static_Route with some "override" flag, which ensures this
> >>> static route would be installed with the highest priority.
> >>>
> >>> So, if no objections here or other comments for now I’ll send v9.
> >>>
> >>> Thanks,
> >>> Han
> >>>
> >>>
> >>> Thanks.
> >>>
> >>> Regards,
> >>> Vladislav Odintsov
> >>>
> >>> On 18 Nov 2021, at 00:05, Numan Siddique <numans@ovn.org> wrote:
> >>>
> >>> On Wed, Nov 17, 2021 at 3:38 PM Vladislav Odintsov <odivlad@gmail.com
> >>>
> >>> <mailto:odivlad@gmail.com <odivlad@gmail.com>>> wrote:
> >>>
> >>>
> >>> I’ve submitted a patch [1] with my findings.
> >>> Also, if no comments for other my patches from this patch series, I
> >>>
> >>> can submit a new version.
> >>>
> >>> Should I?
> >>>
> >>>
> >>> No comments from my side.  Perhaps you can submit another version.
> >>>
> >>> Numan
> >>>
> >>>
> >>> 1:
> >>>
> >>>
> >>> https://patchwork.ozlabs.org/project/ovn/patch/20211117203545.46142-1-odivlad@gmail.com/
> >>> <
> >>>
> >>> https://patchwork.ozlabs.org/project/ovn/patch/20211117203545.46142-1-odivlad@gmail.com/
> >>>
> >>>
> >>>
> >>> Regards,
> >>> Vladislav Odintsov
> >>>
> >>> On 17 Nov 2021, at 20:57, Numan Siddique <numans@ovn.org <mailto:
> >>>
> >>> numans@ovn.org>> wrote:
> >>>
> >>>
> >>> On Wed, Nov 17, 2021 at 9:24 AM Vladislav Odintsov <odivlad@gmail.com
> >>>
> >>> <mailto:odivlad@gmail.com <odivlad@gmail.com>> <mailto:odivlad@gmail.com
> >>> <odivlad@gmail.com> <mailto:
> >>> odivlad@gmail.com>>> wrote:
> >>>
> >>>
> >>> Two additions:
> >>>
> >>> 1. Regarding documentation for flow in lr_in_defrag section:
> >>>
> >>> It seems to me that documentation for it is written in a wrong
> >>>
> >>> section (lr_in_defrag).
> >>>
> >>> Since the flow is installed in lr_in_ip_routing, it should be
> >>>
> >>> documented there.
> >>>
> >>> I’ll move it if you don’t mind.
> >>>
> >>>
> >>> Sure.  Thanks.  I'd suggest having a separate patch for fixing the
> >>> documentation.
> >>>
> >>>
> >>>
> >>> 2. Documentation for other flow changes was added, but I’ve
> >>>
> >>> committed it to a wrong patch (#3).
> >>>
> >>> I’ll move documentation update between patches.
> >>>
> >>>
> >>> Ack.
> >>>
> >>> Thanks
> >>> Numan
> >>>
> >>>
> >>> Regards,
> >>> Vladislav Odintsov
> >>>
> >>> On 17 Nov 2021, at 13:51, Vladislav Odintsov <odivlad@gmail.com>
> >>>
> >>> wrote:
> >>>
> >>>
> >>> Hi Numan,
> >>>
> >>> Thanks for the review.
> >>> Sure I will fix this. Should I wait for more comments or that’s all
> >>>
> >>> and I can send v9?
> >>>
> >>>
> >>> Regards,
> >>> Vladislav Odintsov
> >>>
> >>> On 17 Nov 2021, at 05:17, Numan Siddique <numans@ovn.org> wrote:
> >>>
> >>> On Sat, Nov 13, 2021 at 4:44 AM Vladislav Odintsov <
> >>>
> >>> odivlad@gmail.com <mailto:odivlad@gmail.com <odivlad@gmail.com>>> wrote:
> >>>
> >>>
> >>> With this patch routes to connected networks have higher
> >>> priority than static routes with same ip_prefix.
> >>>
> >>> This brings commonly-used behaviour for routes lookup order:
> >>> 1: longest prefix match
> >>> 2: metric
> >>>
> >>> The metric has next lookup order:
> >>> 1: connected routes
> >>> 2: static routes
> >>>
> >>> Earlier static and connected routes with same ip_prefix had
> >>> the same priority, so it was impossible to predict which one
> >>> is used for routing decision.
> >>>
> >>> Each route's prefix length has its own 'slot' in lflow prios.
> >>> Now prefix length space is calculated using next information:
> >>> to calculate route's priority prefixlen multiplied by 3
> >>> + route origin offset (0 - source-based route; 1 - directly-
> >>> connected route; 2 - static route).
> >>>
> >>> Also, enlarge prio for generic records in lr_in_ip_routing stage
> >>> by 10000.
> >>>
> >>> Signed-off-by: Vladislav Odintsov <odivlad@gmail.com>
> >>>
> >>>
> >>> Hi Vladislav,
> >>>
> >>> Thanks for the patch.  Overall it looks good to me.  I've one
> >>>
> >>> comment.
> >>>
> >>> Looks like the documentation updated in ovn-northd.8.xml is not
> >>>
> >>> accurate.
> >>>
> >>>
> >>> This patch modifies the flows in the lr_in_ip_routing stage but
> >>>
> >>> this
> >>>
> >>> patch doesn't update the documentation.
> >>> Also the patch updates the documentation for the flow in
> >>>
> >>> lr_in_defrag
> >>>
> >>> stage, which seems not correct.
> >>>
> >>> Can you please update the documentation accurately ?
> >>>
> >>> Numan
> >>>
> >>> ---
> >>> northd/northd.c         | 50
> >>>
> >>> ++++++++++++++++++++++++++++-------------
> >>>
> >>> northd/ovn-northd.8.xml | 12 +++++-----
> >>> tests/ovn-northd.at     |  8 +++----
> >>> 3 files changed, 45 insertions(+), 25 deletions(-)
> >>>
> >>> diff --git a/northd/northd.c b/northd/northd.c
> >>> index 1e8a3457c..0d513f039 100644
> >>> --- a/northd/northd.c
> >>> +++ b/northd/northd.c
> >>> @@ -305,6 +305,15 @@ enum ovn_stage {
> >>> *
> >>> */
> >>>
> >>> +/*
> >>> + * Route offsets implement logic to prioritize traffic for
> >>>
> >>> routes with
> >>>
> >>> + * same ip_prefix values:
> >>> + *  -  connected route overrides static one;
> >>> + *  -  static route overrides connected route. */
> >>> +#define ROUTE_PRIO_OFFSET_MULTIPLIER 3
> >>> +#define ROUTE_PRIO_OFFSET_STATIC 1
> >>> +#define ROUTE_PRIO_OFFSET_CONNECTED 2
> >>> +
> >>> /* Returns an "enum ovn_stage" built from the arguments. */
> >>> static enum ovn_stage
> >>> ovn_stage_build(enum ovn_datapath_type dp_type, enum ovn_pipeline
> >>>
> >>> pipeline,
> >>>
> >>> @@ -8782,6 +8791,7 @@ struct ecmp_groups_node {
> >>> struct in6_addr prefix;
> >>> unsigned int plen;
> >>> bool is_src_route;
> >>> +    const char *origin;
> >>> uint16_t route_count;
> >>> struct ovs_list route_list; /* Contains ecmp_route_list_node */
> >>> };
> >>> @@ -8819,6 +8829,7 @@ ecmp_groups_add(struct hmap *ecmp_groups,
> >>> eg->prefix = route->prefix;
> >>> eg->plen = route->plen;
> >>> eg->is_src_route = route->is_src_route;
> >>> +    eg->origin = smap_get_def(&route->route->options, "origin",
> >>>
> >>> "");
> >>>
> >>> ovs_list_init(&eg->route_list);
> >>> ecmp_groups_add_route(eg, route);
> >>>
> >>> @@ -8919,19 +8930,20 @@ build_route_prefix_s(const struct
> >>>
> >>> in6_addr *prefix, unsigned int plen)
> >>>
> >>> static void
> >>> build_route_match(const struct ovn_port *op_inport, const char
> >>>
> >>> *network_s,
> >>>
> >>>              int plen, bool is_src_route, bool is_ipv4, struct
> >>>
> >>> ds *match,
> >>>
> >>> -                  uint16_t *priority)
> >>> +                  uint16_t *priority, int ofs)
> >>> {
> >>> const char *dir;
> >>> /* The priority here is calculated to implement
> >>>
> >>> longest-prefix-match
> >>>
> >>> * routing. */
> >>> if (is_src_route) {
> >>>    dir = "src";
> >>> -        *priority = plen * 2;
> >>> +        ofs = 0;
> >>> } else {
> >>>    dir = "dst";
> >>> -        *priority = (plen * 2) + 1;
> >>> }
> >>>
> >>> +    *priority = (plen * ROUTE_PRIO_OFFSET_MULTIPLIER) + ofs;
> >>> +
> >>> if (op_inport) {
> >>>    ds_put_format(match, "inport == %s && ",
> >>>
> >>> op_inport->json_key);
> >>>
> >>> }
> >>> @@ -9073,7 +9085,7 @@ add_ecmp_symmetric_reply_flows(struct hmap
> >>>
> >>> *lflows,
> >>>
> >>>              out_port->lrp_networks.ea_s,
> >>>              IN6_IS_ADDR_V4MAPPED(&route->prefix) ? "" : "xx",
> >>>              port_ip, out_port->json_key);
> >>> -    ovn_lflow_add_with_hint(lflows, od, S_ROUTER_IN_IP_ROUTING,
> >>>
> >>> 300,
> >>>
> >>> +    ovn_lflow_add_with_hint(lflows, od, S_ROUTER_IN_IP_ROUTING,
> >>>
> >>> 10300,
> >>>
> >>>                       ds_cstr(&match), ds_cstr(&actions),
> >>>                       &st_route->header_);
> >>>
> >>> @@ -9103,8 +9115,10 @@ build_ecmp_route_flow(struct hmap *lflows,
> >>>
> >>> struct ovn_datapath *od,
> >>>
> >>> struct ds route_match = DS_EMPTY_INITIALIZER;
> >>>
> >>> char *prefix_s = build_route_prefix_s(&eg->prefix, eg->plen);
> >>> +    int ofs = !strcmp(eg->origin, ROUTE_ORIGIN_CONNECTED) ?
> >>> +        ROUTE_PRIO_OFFSET_CONNECTED: ROUTE_PRIO_OFFSET_STATIC;
> >>> build_route_match(NULL, prefix_s, eg->plen, eg->is_src_route,
> >>>
> >>> is_ipv4,
> >>>
> >>> -                      &route_match, &priority);
> >>> +                      &route_match, &priority, ofs);
> >>> free(prefix_s);
> >>>
> >>> struct ds actions = DS_EMPTY_INITIALIZER;
> >>> @@ -9180,7 +9194,7 @@ add_route(struct hmap *lflows, struct
> >>>
> >>> ovn_datapath *od,
> >>>
> >>>      const struct ovn_port *op, const char *lrp_addr_s,
> >>>      const char *network_s, int plen, const char *gateway,
> >>>      bool is_src_route, const struct ovsdb_idl_row *stage_hint,
> >>> -          bool is_discard_route)
> >>> +          bool is_discard_route, int ofs)
> >>> {
> >>> bool is_ipv4 = strchr(network_s, '.') ? true : false;
> >>> struct ds match = DS_EMPTY_INITIALIZER;
> >>> @@ -9196,7 +9210,7 @@ add_route(struct hmap *lflows, struct
> >>>
> >>> ovn_datapath *od,
> >>>
> >>>    }
> >>> }
> >>> build_route_match(op_inport, network_s, plen, is_src_route,
> >>>
> >>> is_ipv4,
> >>>
> >>> -                      &match, &priority);
> >>> +                      &match, &priority, ofs);
> >>>
> >>> struct ds common_actions = DS_EMPTY_INITIALIZER;
> >>> struct ds actions = DS_EMPTY_INITIALIZER;
> >>> @@ -9256,10 +9270,15 @@ build_static_route_flow(struct hmap
> >>>
> >>> *lflows, struct ovn_datapath *od,
> >>>
> >>>    }
> >>> }
> >>>
> >>> +    int ofs = !strcmp(smap_get_def(&route->options, "origin",
> >>>
> >>> ""),
> >>>
> >>> +                      ROUTE_ORIGIN_CONNECTED) ?
> >>>
> >>> ROUTE_PRIO_OFFSET_CONNECTED
> >>>
> >>> +                                              :
> >>>
> >>> ROUTE_PRIO_OFFSET_STATIC;
> >>>
> >>> +
> >>> char *prefix_s = build_route_prefix_s(&route_->prefix,
> >>>
> >>> route_->plen);
> >>>
> >>> add_route(lflows, route_->is_discard_route ? od : out_port->od,
> >>>
> >>> out_port,
> >>>
> >>>          lrp_addr_s, prefix_s, route_->plen, route->nexthop,
> >>> -              route_->is_src_route, &route->header_,
> >>>
> >>> route_->is_discard_route);
> >>>
> >>> +              route_->is_src_route, &route->header_,
> >>>
> >>> route_->is_discard_route,
> >>>
> >>> +              ofs);
> >>>
> >>> free(prefix_s);
> >>> }
> >>> @@ -10672,14 +10691,14 @@ build_ip_routing_flows_for_lrouter_port(
> >>>        add_route(lflows, op->od, op,
> >>>
> >>> op->lrp_networks.ipv4_addrs[i].addr_s,
> >>>
> >>>                  op->lrp_networks.ipv4_addrs[i].network_s,
> >>>                  op->lrp_networks.ipv4_addrs[i].plen, NULL,
> >>>
> >>> false,
> >>>
> >>> -                      &op->nbrp->header_, false);
> >>> +                      &op->nbrp->header_, false,
> >>>
> >>> ROUTE_PRIO_OFFSET_CONNECTED);
> >>>
> >>>    }
> >>>
> >>>    for (int i = 0; i < op->lrp_networks.n_ipv6_addrs; i++) {
> >>>        add_route(lflows, op->od, op,
> >>>
> >>> op->lrp_networks.ipv6_addrs[i].addr_s,
> >>>
> >>>                  op->lrp_networks.ipv6_addrs[i].network_s,
> >>>                  op->lrp_networks.ipv6_addrs[i].plen, NULL,
> >>>
> >>> false,
> >>>
> >>> -                      &op->nbrp->header_, false);
> >>> +                      &op->nbrp->header_, false,
> >>>
> >>> ROUTE_PRIO_OFFSET_CONNECTED);
> >>>
> >>>    }
> >>> } else if (lsp_is_router(op->nbsp)) {
> >>>    struct ovn_port *peer = ovn_port_get_peer(ports, op);
> >>> @@ -10702,7 +10721,8 @@ build_ip_routing_flows_for_lrouter_port(
> >>>
> >>> peer->lrp_networks.ipv4_addrs[0].addr_s,
> >>>
> >>>                          laddrs->ipv4_addrs[k].network_s,
> >>>                          laddrs->ipv4_addrs[k].plen, NULL,
> >>>
> >>> false,
> >>>
> >>> -                              &peer->nbrp->header_, false);
> >>> +                              &peer->nbrp->header_, false,
> >>> +                              ROUTE_PRIO_OFFSET_CONNECTED);
> >>>            }
> >>>        }
> >>>    }
> >>> @@ -10773,7 +10793,7 @@ build_mcast_lookup_flows_for_lrouter(
> >>>    /* Drop IPv6 multicast traffic that shouldn't be forwarded,
> >>>     * i.e., router solicitation and router advertisement.
> >>>     */
> >>> -        ovn_lflow_add(lflows, od, S_ROUTER_IN_IP_ROUTING, 550,
> >>> +        ovn_lflow_add(lflows, od, S_ROUTER_IN_IP_ROUTING, 10550,
> >>>                  "nd_rs || nd_ra", "drop;");
> >>>    if (!od->mcast_info.rtr.relay) {
> >>>        return;
> >>> @@ -10801,7 +10821,7 @@ build_mcast_lookup_flows_for_lrouter(
> >>>        }
> >>>        ds_put_format(actions, "outport = \"%s\"; ip.ttl--;
> >>>
> >>> next;",
> >>>
> >>>                      igmp_group->mcgroup.name);
> >>> -            ovn_lflow_add(lflows, od, S_ROUTER_IN_IP_ROUTING,
> >>>
> >>> 500,
> >>>
> >>> +            ovn_lflow_add(lflows, od, S_ROUTER_IN_IP_ROUTING,
> >>>
> >>> 10500,
> >>>
> >>>                      ds_cstr(match), ds_cstr(actions));
> >>>    }
> >>>
> >>> @@ -10809,7 +10829,7 @@ build_mcast_lookup_flows_for_lrouter(
> >>>     * ports. Otherwise drop any multicast traffic.
> >>>     */
> >>>    if (od->mcast_info.rtr.flood_static) {
> >>> -            ovn_lflow_add(lflows, od, S_ROUTER_IN_IP_ROUTING,
> >>>
> >>> 450,
> >>>
> >>> +            ovn_lflow_add(lflows, od, S_ROUTER_IN_IP_ROUTING,
> >>>
> >>> 10450,
> >>>
> >>>                      "ip4.mcast || ip6.mcast",
> >>>                      "clone { "
> >>>                            "outport = \""MC_STATIC"\"; "
> >>> @@ -10817,7 +10837,7 @@ build_mcast_lookup_flows_for_lrouter(
> >>>                            "next; "
> >>>                      "};");
> >>>    } else {
> >>> -            ovn_lflow_add(lflows, od, S_ROUTER_IN_IP_ROUTING,
> >>>
> >>> 450,
> >>>
> >>> +            ovn_lflow_add(lflows, od, S_ROUTER_IN_IP_ROUTING,
> >>>
> >>> 10450,
> >>>
> >>>                      "ip4.mcast || ip6.mcast", "drop;");
> >>>    }
> >>> }
> >>> diff --git a/northd/ovn-northd.8.xml b/northd/ovn-northd.8.xml
> >>> index fb67395e3..4f3a9d5e3 100644
> >>> --- a/northd/ovn-northd.8.xml
> >>> +++ b/northd/ovn-northd.8.xml
> >>> @@ -2945,12 +2945,12 @@ icmp6 {
> >>>
> >>> <p>
> >>>  If ECMP routes with symmetric reply are configured in the
> >>> -      <code>OVN_Northbound</code> database for a gateway router,
> >>>
> >>> a priority-300
> >>>
> >>> -      flow is added for each router port on which symmetric
> >>>
> >>> replies are
> >>>
> >>> -      configured. The matching logic for these ports essentially
> >>>
> >>> reverses the
> >>>
> >>> -      configured logic of the ECMP route. So for instance, a
> >>>
> >>> route with a
> >>>
> >>> -      destination routing policy will instead match if the
> >>>
> >>> source IP address
> >>>
> >>> -      matches the static route's prefix. The flow uses the action
> >>> +      <code>OVN_Northbound</code> database for a gateway router,
> >>>
> >>> a
> >>>
> >>> +      priority-10300 flow is added for each router port on which
> >>>
> >>> symmetric
> >>>
> >>> +      replies are configured. The matching logic for these ports
> >>>
> >>> essentially
> >>>
> >>> +      reverses the configured logic of the ECMP route. So for
> >>>
> >>> instance, a route
> >>>
> >>> +      with a destination routing policy will instead match if
> >>>
> >>> the source IP
> >>>
> >>>
> >> _______________________________________________
> >> dev mailing list
> >> dev@openvswitch.org
> >> https://mail.openvswitch.org/mailman/listinfo/ovs-dev
> > _______________________________________________
> > dev mailing list
> > dev@openvswitch.org
> > https://mail.openvswitch.org/mailman/listinfo/ovs-dev
>
> _______________________________________________
> dev mailing list
> dev@openvswitch.org
> https://mail.openvswitch.org/mailman/listinfo/ovs-dev
Vladislav Odintsov Nov. 19, 2021, 4:10 p.m. UTC | #15
Done: https://patchwork.ozlabs.org/project/ovn/patch/20211119160721.60612-2-odivlad@gmail.com/

Regards,
Vladislav Odintsov

> On 19 Nov 2021, at 17:55, Numan Siddique <numans@ovn.org> wrote:
> 
> On Fri, Nov 19, 2021 at 8:12 AM Vladislav Odintsov <odivlad@gmail.com <mailto:odivlad@gmail.com>> wrote:
>> 
>> Hi Numan,
>> 
>> yes, it’s ready. But I’ve based it on the commit from this patch [1].
>> Can you please take a look on it and apply if it’s okay.
>> Then I can send patch series without conflicting changes.
>> 
>> Or I can send it with that patch as a part of patchset.
>> 
>> 1: https://patchwork.ozlabs.org/project/ovn/patch/20211117203545.46142-1-odivlad@gmail.com/ <https://patchwork.ozlabs.org/project/ovn/patch/20211117203545.46142-1-odivlad@gmail.com/>
> 
> I'd suggest if you can include [1] as part of the patchset.
> 
> Numan
> 
>> 
>> Regards,
>> Vladislav Odintsov
>> 
>>> On 18 Nov 2021, at 19:50, Numan Siddique <numans@ovn.org> wrote:
>>> 
>>> Hi Vladislav,
>>> 
>>> Once v9 is ready,  please submit them. I'll take a look.
>>> 
>>> Thanks
>>> Numan
>>> 
>>> 
>>> On Thu, Nov 18, 2021 at 2:58 AM Han Zhou <zhouhan@gmail.com> wrote:
>>>> 
>>>> Sounds good to me.
>>>> 
>>>> On Wed, Nov 17, 2021 at 11:53 PM Vladislav Odintsov <odivlad@gmail.com>
>>>> wrote:
>>>> 
>>>>> Thanks, Han.
>>>>> Please see inline.
>>>>> 
>>>>> Regards,
>>>>> Vladislav Odintsov
>>>>> 
>>>>> On 18 Nov 2021, at 10:26, Han Zhou <zhouhan@gmail.com> wrote:
>>>>> 
>>>>> On Wed, Nov 17, 2021 at 10:10 PM Vladislav Odintsov <odivlad@gmail.com>
>>>>> wrote:
>>>>> 
>>>>> 
>>>>> Great, thanks.
>>>>> 
>>>>> Hi @Han,
>>>>> 
>>>>> I’d like you to look at the patch series too. Would you have time on it?
>>>>> If yes, could you redirect me on terms please.
>>>>> 
>>>>> 
>>>>> Hi Vladislav,
>>>>> 
>>>>> Thanks for adding me. I am sorry that I don't think I will have enough time
>>>>> for a detailed review for this series until Nov 29. Not sure if you can
>>>>> wait that long, but I don't think my review is mandatory if Numan is
>>>>> reviewing all the patches in detail.
>>>>> 
>>>>> 
>>>>> It’s okay from my side to wait for Dec 2-3.
>>>>> 
>>>>> I have a quick comment though, regarding the priority offset. It is
>>>>> mentioned in the commit message:
>>>>> 
>>>>> Each route's prefix length has its own 'slot' in lflow prios.
>>>>> Now prefix length space is calculated using next information:
>>>>> to calculate route's priority prefixlen multiplied by 3
>>>>> + route origin offset (0 - source-based route; 1 - directly-
>>>>> connected route; 2 - static route).
>>>>> 
>>>>> 
>>>>> But in the code, 2 is for connected, and 1 is for static:
>>>>> 
>>>>> +#define ROUTE_PRIO_OFFSET_MULTIPLIER 3
>>>>> +#define ROUTE_PRIO_OFFSET_STATIC 1
>>>>> +#define ROUTE_PRIO_OFFSET_CONNECTED 2
>>>>> 
>>>>> 
>>>>> I wonder which one is your intent? I'd let the static route have higher
>>>>> priority, because otherwise why would the user add the static route at all?
>>>>> But this is more of a question than a suggestion. Is there any *standard*
>>>>> behavior or similar thing that we can refer from e.g. AWS?
>>>>> 
>>>>> 
>>>>> It’s a typo in commit message. I’ll fix that in v9.
>>>>> 
>>>>> It is done to support well-known behaviour, where directly-connected
>>>>> routes take precedence over static routes for same CIDR.
>>>>> 
>>>>> To support AWS feature, where user can override "subnet" route (think,
>>>>> "connected") with a static route, additional work is needed.
>>>>> It’s not what I’m currently working on, but I thought about such use case
>>>>> and it seems that it can be easily supported by adding ability to add
>>>>> Logical_Router_Static_Route with some "override" flag, which ensures this
>>>>> static route would be installed with the highest priority.
>>>>> 
>>>>> So, if no objections here or other comments for now I’ll send v9.
>>>>> 
>>>>> Thanks,
>>>>> Han
>>>>> 
>>>>> 
>>>>> Thanks.
>>>>> 
>>>>> Regards,
>>>>> Vladislav Odintsov
>>>>> 
>>>>> On 18 Nov 2021, at 00:05, Numan Siddique <numans@ovn.org> wrote:
>>>>> 
>>>>> On Wed, Nov 17, 2021 at 3:38 PM Vladislav Odintsov <odivlad@gmail.com
>>>>> 
>>>>> <mailto:odivlad@gmail.com <odivlad@gmail.com>>> wrote:
>>>>> 
>>>>> 
>>>>> I’ve submitted a patch [1] with my findings.
>>>>> Also, if no comments for other my patches from this patch series, I
>>>>> 
>>>>> can submit a new version.
>>>>> 
>>>>> Should I?
>>>>> 
>>>>> 
>>>>> No comments from my side.  Perhaps you can submit another version.
>>>>> 
>>>>> Numan
>>>>> 
>>>>> 
>>>>> 1:
>>>>> 
>>>>> 
>>>>> https://patchwork.ozlabs.org/project/ovn/patch/20211117203545.46142-1-odivlad@gmail.com/
>>>>> <
>>>>> 
>>>>> https://patchwork.ozlabs.org/project/ovn/patch/20211117203545.46142-1-odivlad@gmail.com/
>>>>> 
>>>>> 
>>>>> 
>>>>> Regards,
>>>>> Vladislav Odintsov
>>>>> 
>>>>> On 17 Nov 2021, at 20:57, Numan Siddique <numans@ovn.org <mailto:
>>>>> 
>>>>> numans@ovn.org>> wrote:
>>>>> 
>>>>> 
>>>>> On Wed, Nov 17, 2021 at 9:24 AM Vladislav Odintsov <odivlad@gmail.com
>>>>> 
>>>>> <mailto:odivlad@gmail.com <odivlad@gmail.com>> <mailto:odivlad@gmail.com
>>>>> <odivlad@gmail.com> <mailto:
>>>>> odivlad@gmail.com>>> wrote:
>>>>> 
>>>>> 
>>>>> Two additions:
>>>>> 
>>>>> 1. Regarding documentation for flow in lr_in_defrag section:
>>>>> 
>>>>> It seems to me that documentation for it is written in a wrong
>>>>> 
>>>>> section (lr_in_defrag).
>>>>> 
>>>>> Since the flow is installed in lr_in_ip_routing, it should be
>>>>> 
>>>>> documented there.
>>>>> 
>>>>> I’ll move it if you don’t mind.
>>>>> 
>>>>> 
>>>>> Sure.  Thanks.  I'd suggest having a separate patch for fixing the
>>>>> documentation.
>>>>> 
>>>>> 
>>>>> 
>>>>> 2. Documentation for other flow changes was added, but I’ve
>>>>> 
>>>>> committed it to a wrong patch (#3).
>>>>> 
>>>>> I’ll move documentation update between patches.
>>>>> 
>>>>> 
>>>>> Ack.
>>>>> 
>>>>> Thanks
>>>>> Numan
>>>>> 
>>>>> 
>>>>> Regards,
>>>>> Vladislav Odintsov
>>>>> 
>>>>> On 17 Nov 2021, at 13:51, Vladislav Odintsov <odivlad@gmail.com>
>>>>> 
>>>>> wrote:
>>>>> 
>>>>> 
>>>>> Hi Numan,
>>>>> 
>>>>> Thanks for the review.
>>>>> Sure I will fix this. Should I wait for more comments or that’s all
>>>>> 
>>>>> and I can send v9?
>>>>> 
>>>>> 
>>>>> Regards,
>>>>> Vladislav Odintsov
>>>>> 
>>>>> On 17 Nov 2021, at 05:17, Numan Siddique <numans@ovn.org> wrote:
>>>>> 
>>>>> On Sat, Nov 13, 2021 at 4:44 AM Vladislav Odintsov <
>>>>> 
>>>>> odivlad@gmail.com <mailto:odivlad@gmail.com <odivlad@gmail.com>>> wrote:
>>>>> 
>>>>> 
>>>>> With this patch routes to connected networks have higher
>>>>> priority than static routes with same ip_prefix.
>>>>> 
>>>>> This brings commonly-used behaviour for routes lookup order:
>>>>> 1: longest prefix match
>>>>> 2: metric
>>>>> 
>>>>> The metric has next lookup order:
>>>>> 1: connected routes
>>>>> 2: static routes
>>>>> 
>>>>> Earlier static and connected routes with same ip_prefix had
>>>>> the same priority, so it was impossible to predict which one
>>>>> is used for routing decision.
>>>>> 
>>>>> Each route's prefix length has its own 'slot' in lflow prios.
>>>>> Now prefix length space is calculated using next information:
>>>>> to calculate route's priority prefixlen multiplied by 3
>>>>> + route origin offset (0 - source-based route; 1 - directly-
>>>>> connected route; 2 - static route).
>>>>> 
>>>>> Also, enlarge prio for generic records in lr_in_ip_routing stage
>>>>> by 10000.
>>>>> 
>>>>> Signed-off-by: Vladislav Odintsov <odivlad@gmail.com>
>>>>> 
>>>>> 
>>>>> Hi Vladislav,
>>>>> 
>>>>> Thanks for the patch.  Overall it looks good to me.  I've one
>>>>> 
>>>>> comment.
>>>>> 
>>>>> Looks like the documentation updated in ovn-northd.8.xml is not
>>>>> 
>>>>> accurate.
>>>>> 
>>>>> 
>>>>> This patch modifies the flows in the lr_in_ip_routing stage but
>>>>> 
>>>>> this
>>>>> 
>>>>> patch doesn't update the documentation.
>>>>> Also the patch updates the documentation for the flow in
>>>>> 
>>>>> lr_in_defrag
>>>>> 
>>>>> stage, which seems not correct.
>>>>> 
>>>>> Can you please update the documentation accurately ?
>>>>> 
>>>>> Numan
>>>>> 
>>>>> ---
>>>>> northd/northd.c         | 50
>>>>> 
>>>>> ++++++++++++++++++++++++++++-------------
>>>>> 
>>>>> northd/ovn-northd.8.xml | 12 +++++-----
>>>>> tests/ovn-northd.at     |  8 +++----
>>>>> 3 files changed, 45 insertions(+), 25 deletions(-)
>>>>> 
>>>>> diff --git a/northd/northd.c b/northd/northd.c
>>>>> index 1e8a3457c..0d513f039 100644
>>>>> --- a/northd/northd.c
>>>>> +++ b/northd/northd.c
>>>>> @@ -305,6 +305,15 @@ enum ovn_stage {
>>>>> *
>>>>> */
>>>>> 
>>>>> +/*
>>>>> + * Route offsets implement logic to prioritize traffic for
>>>>> 
>>>>> routes with
>>>>> 
>>>>> + * same ip_prefix values:
>>>>> + *  -  connected route overrides static one;
>>>>> + *  -  static route overrides connected route. */
>>>>> +#define ROUTE_PRIO_OFFSET_MULTIPLIER 3
>>>>> +#define ROUTE_PRIO_OFFSET_STATIC 1
>>>>> +#define ROUTE_PRIO_OFFSET_CONNECTED 2
>>>>> +
>>>>> /* Returns an "enum ovn_stage" built from the arguments. */
>>>>> static enum ovn_stage
>>>>> ovn_stage_build(enum ovn_datapath_type dp_type, enum ovn_pipeline
>>>>> 
>>>>> pipeline,
>>>>> 
>>>>> @@ -8782,6 +8791,7 @@ struct ecmp_groups_node {
>>>>> struct in6_addr prefix;
>>>>> unsigned int plen;
>>>>> bool is_src_route;
>>>>> +    const char *origin;
>>>>> uint16_t route_count;
>>>>> struct ovs_list route_list; /* Contains ecmp_route_list_node */
>>>>> };
>>>>> @@ -8819,6 +8829,7 @@ ecmp_groups_add(struct hmap *ecmp_groups,
>>>>> eg->prefix = route->prefix;
>>>>> eg->plen = route->plen;
>>>>> eg->is_src_route = route->is_src_route;
>>>>> +    eg->origin = smap_get_def(&route->route->options, "origin",
>>>>> 
>>>>> "");
>>>>> 
>>>>> ovs_list_init(&eg->route_list);
>>>>> ecmp_groups_add_route(eg, route);
>>>>> 
>>>>> @@ -8919,19 +8930,20 @@ build_route_prefix_s(const struct
>>>>> 
>>>>> in6_addr *prefix, unsigned int plen)
>>>>> 
>>>>> static void
>>>>> build_route_match(const struct ovn_port *op_inport, const char
>>>>> 
>>>>> *network_s,
>>>>> 
>>>>>             int plen, bool is_src_route, bool is_ipv4, struct
>>>>> 
>>>>> ds *match,
>>>>> 
>>>>> -                  uint16_t *priority)
>>>>> +                  uint16_t *priority, int ofs)
>>>>> {
>>>>> const char *dir;
>>>>> /* The priority here is calculated to implement
>>>>> 
>>>>> longest-prefix-match
>>>>> 
>>>>> * routing. */
>>>>> if (is_src_route) {
>>>>>   dir = "src";
>>>>> -        *priority = plen * 2;
>>>>> +        ofs = 0;
>>>>> } else {
>>>>>   dir = "dst";
>>>>> -        *priority = (plen * 2) + 1;
>>>>> }
>>>>> 
>>>>> +    *priority = (plen * ROUTE_PRIO_OFFSET_MULTIPLIER) + ofs;
>>>>> +
>>>>> if (op_inport) {
>>>>>   ds_put_format(match, "inport == %s && ",
>>>>> 
>>>>> op_inport->json_key);
>>>>> 
>>>>> }
>>>>> @@ -9073,7 +9085,7 @@ add_ecmp_symmetric_reply_flows(struct hmap
>>>>> 
>>>>> *lflows,
>>>>> 
>>>>>             out_port->lrp_networks.ea_s,
>>>>>             IN6_IS_ADDR_V4MAPPED(&route->prefix) ? "" : "xx",
>>>>>             port_ip, out_port->json_key);
>>>>> -    ovn_lflow_add_with_hint(lflows, od, S_ROUTER_IN_IP_ROUTING,
>>>>> 
>>>>> 300,
>>>>> 
>>>>> +    ovn_lflow_add_with_hint(lflows, od, S_ROUTER_IN_IP_ROUTING,
>>>>> 
>>>>> 10300,
>>>>> 
>>>>>                      ds_cstr(&match), ds_cstr(&actions),
>>>>>                      &st_route->header_);
>>>>> 
>>>>> @@ -9103,8 +9115,10 @@ build_ecmp_route_flow(struct hmap *lflows,
>>>>> 
>>>>> struct ovn_datapath *od,
>>>>> 
>>>>> struct ds route_match = DS_EMPTY_INITIALIZER;
>>>>> 
>>>>> char *prefix_s = build_route_prefix_s(&eg->prefix, eg->plen);
>>>>> +    int ofs = !strcmp(eg->origin, ROUTE_ORIGIN_CONNECTED) ?
>>>>> +        ROUTE_PRIO_OFFSET_CONNECTED: ROUTE_PRIO_OFFSET_STATIC;
>>>>> build_route_match(NULL, prefix_s, eg->plen, eg->is_src_route,
>>>>> 
>>>>> is_ipv4,
>>>>> 
>>>>> -                      &route_match, &priority);
>>>>> +                      &route_match, &priority, ofs);
>>>>> free(prefix_s);
>>>>> 
>>>>> struct ds actions = DS_EMPTY_INITIALIZER;
>>>>> @@ -9180,7 +9194,7 @@ add_route(struct hmap *lflows, struct
>>>>> 
>>>>> ovn_datapath *od,
>>>>> 
>>>>>     const struct ovn_port *op, const char *lrp_addr_s,
>>>>>     const char *network_s, int plen, const char *gateway,
>>>>>     bool is_src_route, const struct ovsdb_idl_row *stage_hint,
>>>>> -          bool is_discard_route)
>>>>> +          bool is_discard_route, int ofs)
>>>>> {
>>>>> bool is_ipv4 = strchr(network_s, '.') ? true : false;
>>>>> struct ds match = DS_EMPTY_INITIALIZER;
>>>>> @@ -9196,7 +9210,7 @@ add_route(struct hmap *lflows, struct
>>>>> 
>>>>> ovn_datapath *od,
>>>>> 
>>>>>   }
>>>>> }
>>>>> build_route_match(op_inport, network_s, plen, is_src_route,
>>>>> 
>>>>> is_ipv4,
>>>>> 
>>>>> -                      &match, &priority);
>>>>> +                      &match, &priority, ofs);
>>>>> 
>>>>> struct ds common_actions = DS_EMPTY_INITIALIZER;
>>>>> struct ds actions = DS_EMPTY_INITIALIZER;
>>>>> @@ -9256,10 +9270,15 @@ build_static_route_flow(struct hmap
>>>>> 
>>>>> *lflows, struct ovn_datapath *od,
>>>>> 
>>>>>   }
>>>>> }
>>>>> 
>>>>> +    int ofs = !strcmp(smap_get_def(&route->options, "origin",
>>>>> 
>>>>> ""),
>>>>> 
>>>>> +                      ROUTE_ORIGIN_CONNECTED) ?
>>>>> 
>>>>> ROUTE_PRIO_OFFSET_CONNECTED
>>>>> 
>>>>> +                                              :
>>>>> 
>>>>> ROUTE_PRIO_OFFSET_STATIC;
>>>>> 
>>>>> +
>>>>> char *prefix_s = build_route_prefix_s(&route_->prefix,
>>>>> 
>>>>> route_->plen);
>>>>> 
>>>>> add_route(lflows, route_->is_discard_route ? od : out_port->od,
>>>>> 
>>>>> out_port,
>>>>> 
>>>>>         lrp_addr_s, prefix_s, route_->plen, route->nexthop,
>>>>> -              route_->is_src_route, &route->header_,
>>>>> 
>>>>> route_->is_discard_route);
>>>>> 
>>>>> +              route_->is_src_route, &route->header_,
>>>>> 
>>>>> route_->is_discard_route,
>>>>> 
>>>>> +              ofs);
>>>>> 
>>>>> free(prefix_s);
>>>>> }
>>>>> @@ -10672,14 +10691,14 @@ build_ip_routing_flows_for_lrouter_port(
>>>>>       add_route(lflows, op->od, op,
>>>>> 
>>>>> op->lrp_networks.ipv4_addrs[i].addr_s,
>>>>> 
>>>>>                 op->lrp_networks.ipv4_addrs[i].network_s,
>>>>>                 op->lrp_networks.ipv4_addrs[i].plen, NULL,
>>>>> 
>>>>> false,
>>>>> 
>>>>> -                      &op->nbrp->header_, false);
>>>>> +                      &op->nbrp->header_, false,
>>>>> 
>>>>> ROUTE_PRIO_OFFSET_CONNECTED);
>>>>> 
>>>>>   }
>>>>> 
>>>>>   for (int i = 0; i < op->lrp_networks.n_ipv6_addrs; i++) {
>>>>>       add_route(lflows, op->od, op,
>>>>> 
>>>>> op->lrp_networks.ipv6_addrs[i].addr_s,
>>>>> 
>>>>>                 op->lrp_networks.ipv6_addrs[i].network_s,
>>>>>                 op->lrp_networks.ipv6_addrs[i].plen, NULL,
>>>>> 
>>>>> false,
>>>>> 
>>>>> -                      &op->nbrp->header_, false);
>>>>> +                      &op->nbrp->header_, false,
>>>>> 
>>>>> ROUTE_PRIO_OFFSET_CONNECTED);
>>>>> 
>>>>>   }
>>>>> } else if (lsp_is_router(op->nbsp)) {
>>>>>   struct ovn_port *peer = ovn_port_get_peer(ports, op);
>>>>> @@ -10702,7 +10721,8 @@ build_ip_routing_flows_for_lrouter_port(
>>>>> 
>>>>> peer->lrp_networks.ipv4_addrs[0].addr_s,
>>>>> 
>>>>>                         laddrs->ipv4_addrs[k].network_s,
>>>>>                         laddrs->ipv4_addrs[k].plen, NULL,
>>>>> 
>>>>> false,
>>>>> 
>>>>> -                              &peer->nbrp->header_, false);
>>>>> +                              &peer->nbrp->header_, false,
>>>>> +                              ROUTE_PRIO_OFFSET_CONNECTED);
>>>>>           }
>>>>>       }
>>>>>   }
>>>>> @@ -10773,7 +10793,7 @@ build_mcast_lookup_flows_for_lrouter(
>>>>>   /* Drop IPv6 multicast traffic that shouldn't be forwarded,
>>>>>    * i.e., router solicitation and router advertisement.
>>>>>    */
>>>>> -        ovn_lflow_add(lflows, od, S_ROUTER_IN_IP_ROUTING, 550,
>>>>> +        ovn_lflow_add(lflows, od, S_ROUTER_IN_IP_ROUTING, 10550,
>>>>>                 "nd_rs || nd_ra", "drop;");
>>>>>   if (!od->mcast_info.rtr.relay) {
>>>>>       return;
>>>>> @@ -10801,7 +10821,7 @@ build_mcast_lookup_flows_for_lrouter(
>>>>>       }
>>>>>       ds_put_format(actions, "outport = \"%s\"; ip.ttl--;
>>>>> 
>>>>> next;",
>>>>> 
>>>>>                     igmp_group->mcgroup.name);
>>>>> -            ovn_lflow_add(lflows, od, S_ROUTER_IN_IP_ROUTING,
>>>>> 
>>>>> 500,
>>>>> 
>>>>> +            ovn_lflow_add(lflows, od, S_ROUTER_IN_IP_ROUTING,
>>>>> 
>>>>> 10500,
>>>>> 
>>>>>                     ds_cstr(match), ds_cstr(actions));
>>>>>   }
>>>>> 
>>>>> @@ -10809,7 +10829,7 @@ build_mcast_lookup_flows_for_lrouter(
>>>>>    * ports. Otherwise drop any multicast traffic.
>>>>>    */
>>>>>   if (od->mcast_info.rtr.flood_static) {
>>>>> -            ovn_lflow_add(lflows, od, S_ROUTER_IN_IP_ROUTING,
>>>>> 
>>>>> 450,
>>>>> 
>>>>> +            ovn_lflow_add(lflows, od, S_ROUTER_IN_IP_ROUTING,
>>>>> 
>>>>> 10450,
>>>>> 
>>>>>                     "ip4.mcast || ip6.mcast",
>>>>>                     "clone { "
>>>>>                           "outport = \""MC_STATIC"\"; "
>>>>> @@ -10817,7 +10837,7 @@ build_mcast_lookup_flows_for_lrouter(
>>>>>                           "next; "
>>>>>                     "};");
>>>>>   } else {
>>>>> -            ovn_lflow_add(lflows, od, S_ROUTER_IN_IP_ROUTING,
>>>>> 
>>>>> 450,
>>>>> 
>>>>> +            ovn_lflow_add(lflows, od, S_ROUTER_IN_IP_ROUTING,
>>>>> 
>>>>> 10450,
>>>>> 
>>>>>                     "ip4.mcast || ip6.mcast", "drop;");
>>>>>   }
>>>>> }
>>>>> diff --git a/northd/ovn-northd.8.xml b/northd/ovn-northd.8.xml
>>>>> index fb67395e3..4f3a9d5e3 100644
>>>>> --- a/northd/ovn-northd.8.xml
>>>>> +++ b/northd/ovn-northd.8.xml
>>>>> @@ -2945,12 +2945,12 @@ icmp6 {
>>>>> 
>>>>> <p>
>>>>> If ECMP routes with symmetric reply are configured in the
>>>>> -      <code>OVN_Northbound</code> database for a gateway router,
>>>>> 
>>>>> a priority-300
>>>>> 
>>>>> -      flow is added for each router port on which symmetric
>>>>> 
>>>>> replies are
>>>>> 
>>>>> -      configured. The matching logic for these ports essentially
>>>>> 
>>>>> reverses the
>>>>> 
>>>>> -      configured logic of the ECMP route. So for instance, a
>>>>> 
>>>>> route with a
>>>>> 
>>>>> -      destination routing policy will instead match if the
>>>>> 
>>>>> source IP address
>>>>> 
>>>>> -      matches the static route's prefix. The flow uses the action
>>>>> +      <code>OVN_Northbound</code> database for a gateway router,
>>>>> 
>>>>> a
>>>>> 
>>>>> +      priority-10300 flow is added for each router port on which
>>>>> 
>>>>> symmetric
>>>>> 
>>>>> +      replies are configured. The matching logic for these ports
>>>>> 
>>>>> essentially
>>>>> 
>>>>> +      reverses the configured logic of the ECMP route. So for
>>>>> 
>>>>> instance, a route
>>>>> 
>>>>> +      with a destination routing policy will instead match if
>>>>> 
>>>>> the source IP
>>>>> 
>>>>> 
>>>> _______________________________________________
>>>> dev mailing list
>>>> dev@openvswitch.org
>>>> https://mail.openvswitch.org/mailman/listinfo/ovs-dev
>>> _______________________________________________
>>> dev mailing list
>>> dev@openvswitch.org
>>> https://mail.openvswitch.org/mailman/listinfo/ovs-dev
>> 
>> _______________________________________________
>> dev mailing list
>> dev@openvswitch.org
>> https://mail.openvswitch.org/mailman/listinfo/ovs-dev
> _______________________________________________
> dev mailing list
> dev@openvswitch.org <mailto:dev@openvswitch.org>
> https://mail.openvswitch.org/mailman/listinfo/ovs-dev <https://mail.openvswitch.org/mailman/listinfo/ovs-dev>
diff mbox series

Patch

diff --git a/northd/northd.c b/northd/northd.c
index 1e8a3457c..0d513f039 100644
--- a/northd/northd.c
+++ b/northd/northd.c
@@ -305,6 +305,15 @@  enum ovn_stage {
  *
  */
 
+/*
+ * Route offsets implement logic to prioritize traffic for routes with
+ * same ip_prefix values:
+ *  -  connected route overrides static one;
+ *  -  static route overrides connected route. */
+#define ROUTE_PRIO_OFFSET_MULTIPLIER 3
+#define ROUTE_PRIO_OFFSET_STATIC 1
+#define ROUTE_PRIO_OFFSET_CONNECTED 2
+
 /* Returns an "enum ovn_stage" built from the arguments. */
 static enum ovn_stage
 ovn_stage_build(enum ovn_datapath_type dp_type, enum ovn_pipeline pipeline,
@@ -8782,6 +8791,7 @@  struct ecmp_groups_node {
     struct in6_addr prefix;
     unsigned int plen;
     bool is_src_route;
+    const char *origin;
     uint16_t route_count;
     struct ovs_list route_list; /* Contains ecmp_route_list_node */
 };
@@ -8819,6 +8829,7 @@  ecmp_groups_add(struct hmap *ecmp_groups,
     eg->prefix = route->prefix;
     eg->plen = route->plen;
     eg->is_src_route = route->is_src_route;
+    eg->origin = smap_get_def(&route->route->options, "origin", "");
     ovs_list_init(&eg->route_list);
     ecmp_groups_add_route(eg, route);
 
@@ -8919,19 +8930,20 @@  build_route_prefix_s(const struct in6_addr *prefix, unsigned int plen)
 static void
 build_route_match(const struct ovn_port *op_inport, const char *network_s,
                   int plen, bool is_src_route, bool is_ipv4, struct ds *match,
-                  uint16_t *priority)
+                  uint16_t *priority, int ofs)
 {
     const char *dir;
     /* The priority here is calculated to implement longest-prefix-match
      * routing. */
     if (is_src_route) {
         dir = "src";
-        *priority = plen * 2;
+        ofs = 0;
     } else {
         dir = "dst";
-        *priority = (plen * 2) + 1;
     }
 
+    *priority = (plen * ROUTE_PRIO_OFFSET_MULTIPLIER) + ofs;
+
     if (op_inport) {
         ds_put_format(match, "inport == %s && ", op_inport->json_key);
     }
@@ -9073,7 +9085,7 @@  add_ecmp_symmetric_reply_flows(struct hmap *lflows,
                   out_port->lrp_networks.ea_s,
                   IN6_IS_ADDR_V4MAPPED(&route->prefix) ? "" : "xx",
                   port_ip, out_port->json_key);
-    ovn_lflow_add_with_hint(lflows, od, S_ROUTER_IN_IP_ROUTING, 300,
+    ovn_lflow_add_with_hint(lflows, od, S_ROUTER_IN_IP_ROUTING, 10300,
                            ds_cstr(&match), ds_cstr(&actions),
                            &st_route->header_);
 
@@ -9103,8 +9115,10 @@  build_ecmp_route_flow(struct hmap *lflows, struct ovn_datapath *od,
     struct ds route_match = DS_EMPTY_INITIALIZER;
 
     char *prefix_s = build_route_prefix_s(&eg->prefix, eg->plen);
+    int ofs = !strcmp(eg->origin, ROUTE_ORIGIN_CONNECTED) ?
+        ROUTE_PRIO_OFFSET_CONNECTED: ROUTE_PRIO_OFFSET_STATIC;
     build_route_match(NULL, prefix_s, eg->plen, eg->is_src_route, is_ipv4,
-                      &route_match, &priority);
+                      &route_match, &priority, ofs);
     free(prefix_s);
 
     struct ds actions = DS_EMPTY_INITIALIZER;
@@ -9180,7 +9194,7 @@  add_route(struct hmap *lflows, struct ovn_datapath *od,
           const struct ovn_port *op, const char *lrp_addr_s,
           const char *network_s, int plen, const char *gateway,
           bool is_src_route, const struct ovsdb_idl_row *stage_hint,
-          bool is_discard_route)
+          bool is_discard_route, int ofs)
 {
     bool is_ipv4 = strchr(network_s, '.') ? true : false;
     struct ds match = DS_EMPTY_INITIALIZER;
@@ -9196,7 +9210,7 @@  add_route(struct hmap *lflows, struct ovn_datapath *od,
         }
     }
     build_route_match(op_inport, network_s, plen, is_src_route, is_ipv4,
-                      &match, &priority);
+                      &match, &priority, ofs);
 
     struct ds common_actions = DS_EMPTY_INITIALIZER;
     struct ds actions = DS_EMPTY_INITIALIZER;
@@ -9256,10 +9270,15 @@  build_static_route_flow(struct hmap *lflows, struct ovn_datapath *od,
         }
     }
 
+    int ofs = !strcmp(smap_get_def(&route->options, "origin", ""),
+                      ROUTE_ORIGIN_CONNECTED) ? ROUTE_PRIO_OFFSET_CONNECTED
+                                              : ROUTE_PRIO_OFFSET_STATIC;
+
     char *prefix_s = build_route_prefix_s(&route_->prefix, route_->plen);
     add_route(lflows, route_->is_discard_route ? od : out_port->od, out_port,
               lrp_addr_s, prefix_s, route_->plen, route->nexthop,
-              route_->is_src_route, &route->header_, route_->is_discard_route);
+              route_->is_src_route, &route->header_, route_->is_discard_route,
+              ofs);
 
     free(prefix_s);
 }
@@ -10672,14 +10691,14 @@  build_ip_routing_flows_for_lrouter_port(
             add_route(lflows, op->od, op, op->lrp_networks.ipv4_addrs[i].addr_s,
                       op->lrp_networks.ipv4_addrs[i].network_s,
                       op->lrp_networks.ipv4_addrs[i].plen, NULL, false,
-                      &op->nbrp->header_, false);
+                      &op->nbrp->header_, false, ROUTE_PRIO_OFFSET_CONNECTED);
         }
 
         for (int i = 0; i < op->lrp_networks.n_ipv6_addrs; i++) {
             add_route(lflows, op->od, op, op->lrp_networks.ipv6_addrs[i].addr_s,
                       op->lrp_networks.ipv6_addrs[i].network_s,
                       op->lrp_networks.ipv6_addrs[i].plen, NULL, false,
-                      &op->nbrp->header_, false);
+                      &op->nbrp->header_, false, ROUTE_PRIO_OFFSET_CONNECTED);
         }
     } else if (lsp_is_router(op->nbsp)) {
         struct ovn_port *peer = ovn_port_get_peer(ports, op);
@@ -10702,7 +10721,8 @@  build_ip_routing_flows_for_lrouter_port(
                               peer->lrp_networks.ipv4_addrs[0].addr_s,
                               laddrs->ipv4_addrs[k].network_s,
                               laddrs->ipv4_addrs[k].plen, NULL, false,
-                              &peer->nbrp->header_, false);
+                              &peer->nbrp->header_, false,
+                              ROUTE_PRIO_OFFSET_CONNECTED);
                 }
             }
         }
@@ -10773,7 +10793,7 @@  build_mcast_lookup_flows_for_lrouter(
         /* Drop IPv6 multicast traffic that shouldn't be forwarded,
          * i.e., router solicitation and router advertisement.
          */
-        ovn_lflow_add(lflows, od, S_ROUTER_IN_IP_ROUTING, 550,
+        ovn_lflow_add(lflows, od, S_ROUTER_IN_IP_ROUTING, 10550,
                       "nd_rs || nd_ra", "drop;");
         if (!od->mcast_info.rtr.relay) {
             return;
@@ -10801,7 +10821,7 @@  build_mcast_lookup_flows_for_lrouter(
             }
             ds_put_format(actions, "outport = \"%s\"; ip.ttl--; next;",
                           igmp_group->mcgroup.name);
-            ovn_lflow_add(lflows, od, S_ROUTER_IN_IP_ROUTING, 500,
+            ovn_lflow_add(lflows, od, S_ROUTER_IN_IP_ROUTING, 10500,
                           ds_cstr(match), ds_cstr(actions));
         }
 
@@ -10809,7 +10829,7 @@  build_mcast_lookup_flows_for_lrouter(
          * ports. Otherwise drop any multicast traffic.
          */
         if (od->mcast_info.rtr.flood_static) {
-            ovn_lflow_add(lflows, od, S_ROUTER_IN_IP_ROUTING, 450,
+            ovn_lflow_add(lflows, od, S_ROUTER_IN_IP_ROUTING, 10450,
                           "ip4.mcast || ip6.mcast",
                           "clone { "
                                 "outport = \""MC_STATIC"\"; "
@@ -10817,7 +10837,7 @@  build_mcast_lookup_flows_for_lrouter(
                                 "next; "
                           "};");
         } else {
-            ovn_lflow_add(lflows, od, S_ROUTER_IN_IP_ROUTING, 450,
+            ovn_lflow_add(lflows, od, S_ROUTER_IN_IP_ROUTING, 10450,
                           "ip4.mcast || ip6.mcast", "drop;");
         }
     }
diff --git a/northd/ovn-northd.8.xml b/northd/ovn-northd.8.xml
index fb67395e3..4f3a9d5e3 100644
--- a/northd/ovn-northd.8.xml
+++ b/northd/ovn-northd.8.xml
@@ -2945,12 +2945,12 @@  icmp6 {
 
     <p>
       If ECMP routes with symmetric reply are configured in the
-      <code>OVN_Northbound</code> database for a gateway router, a priority-300
-      flow is added for each router port on which symmetric replies are
-      configured. The matching logic for these ports essentially reverses the
-      configured logic of the ECMP route. So for instance, a route with a
-      destination routing policy will instead match if the source IP address
-      matches the static route's prefix. The flow uses the action
+      <code>OVN_Northbound</code> database for a gateway router, a
+      priority-10300 flow is added for each router port on which symmetric
+      replies are configured. The matching logic for these ports essentially
+      reverses the configured logic of the ECMP route. So for instance, a route
+      with a destination routing policy will instead match if the source IP
+      address matches the static route's prefix. The flow uses the action
       <code>ct_next</code> to send IP packets to the connection tracker for
       packet de-fragmentation and tracking before sending it to the next table.
     </p>
diff --git a/tests/ovn-northd.at b/tests/ovn-northd.at
index 85b47a18f..3c1a97f73 100644
--- a/tests/ovn-northd.at
+++ b/tests/ovn-northd.at
@@ -5430,7 +5430,7 @@  check ovn-nbctl --wait=sb --ecmp-symmetric-reply lr-route-add lr0 1.0.0.1 192.16
 
 ovn-sbctl dump-flows lr0 > lr0flows
 AT_CHECK([grep -e "lr_in_ip_routing.*select" lr0flows | sed 's/table=../table=??/' | sort], [0], [dnl
-  table=??(lr_in_ip_routing   ), priority=65   , match=(ip4.dst == 1.0.0.1/32), action=(ip.ttl--; flags.loopback = 1; reg8[[0..15]] = 1; reg8[[16..31]] = select(1, 2);)
+  table=??(lr_in_ip_routing   ), priority=97   , match=(ip4.dst == 1.0.0.1/32), action=(ip.ttl--; flags.loopback = 1; reg8[[0..15]] = 1; reg8[[16..31]] = select(1, 2);)
 ])
 AT_CHECK([grep -e "lr_in_ip_routing_ecmp" lr0flows | sed 's/192\.168\.0\..0/192.168.0.??/' | sed 's/table=../table=??/' | sort], [0], [dnl
   table=??(lr_in_ip_routing_ecmp), priority=100  , match=(reg8[[0..15]] == 1 && reg8[[16..31]] == 1), action=(reg0 = 192.168.0.??; reg1 = 192.168.0.1; eth.src = 00:00:20:20:12:13; outport = "lr0-public"; next;)
@@ -5443,7 +5443,7 @@  check ovn-nbctl --wait=sb --ecmp-symmetric-reply lr-route-add lr0 1.0.0.1 192.16
 
 ovn-sbctl dump-flows lr0 > lr0flows
 AT_CHECK([grep -e "lr_in_ip_routing.*select" lr0flows | sed 's/table=../table=??/' | sort], [0], [dnl
-  table=??(lr_in_ip_routing   ), priority=65   , match=(ip4.dst == 1.0.0.1/32), action=(ip.ttl--; flags.loopback = 1; reg8[[0..15]] = 1; reg8[[16..31]] = select(1, 2);)
+  table=??(lr_in_ip_routing   ), priority=97   , match=(ip4.dst == 1.0.0.1/32), action=(ip.ttl--; flags.loopback = 1; reg8[[0..15]] = 1; reg8[[16..31]] = select(1, 2);)
 ])
 AT_CHECK([grep -e "lr_in_ip_routing_ecmp" lr0flows | sed 's/192\.168\.0\..0/192.168.0.??/' | sed 's/table=../table=??/' | sort], [0], [dnl
   table=??(lr_in_ip_routing_ecmp), priority=100  , match=(reg8[[0..15]] == 1 && reg8[[16..31]] == 1), action=(reg0 = 192.168.0.??; reg1 = 192.168.0.1; eth.src = 00:00:20:20:12:13; outport = "lr0-public"; next;)
@@ -5458,14 +5458,14 @@  check ovn-nbctl --wait=sb lr-route-add lr0 1.0.0.0/24 192.168.0.10
 ovn-sbctl dump-flows lr0 > lr0flows
 
 AT_CHECK([grep -e "lr_in_ip_routing.*192.168.0.10" lr0flows | sed 's/table=../table=??/' | sort], [0], [dnl
-  table=??(lr_in_ip_routing   ), priority=49   , match=(ip4.dst == 1.0.0.0/24), action=(ip.ttl--; reg8[[0..15]] = 0; reg0 = 192.168.0.10; reg1 = 192.168.0.1; eth.src = 00:00:20:20:12:13; outport = "lr0-public"; flags.loopback = 1; next;)
+  table=??(lr_in_ip_routing   ), priority=73   , match=(ip4.dst == 1.0.0.0/24), action=(ip.ttl--; reg8[[0..15]] = 0; reg0 = 192.168.0.10; reg1 = 192.168.0.1; eth.src = 00:00:20:20:12:13; outport = "lr0-public"; flags.loopback = 1; next;)
 ])
 
 check ovn-nbctl --wait=sb lr-route-add lr0 2.0.0.0/24 lr0-public
 
 ovn-sbctl dump-flows lr0 > lr0flows
 AT_CHECK([grep -e "lr_in_ip_routing.*2.0.0.0" lr0flows | sed 's/table=../table=??/' | sort], [0], [dnl
-  table=??(lr_in_ip_routing   ), priority=49   , match=(ip4.dst == 2.0.0.0/24), action=(ip.ttl--; reg8[[0..15]] = 0; reg0 = ip4.dst; reg1 = 192.168.0.1; eth.src = 00:00:20:20:12:13; outport = "lr0-public"; flags.loopback = 1; next;)
+  table=??(lr_in_ip_routing   ), priority=73   , match=(ip4.dst == 2.0.0.0/24), action=(ip.ttl--; reg8[[0..15]] = 0; reg0 = ip4.dst; reg1 = 192.168.0.1; eth.src = 00:00:20:20:12:13; outport = "lr0-public"; flags.loopback = 1; next;)
 ])
 
 AT_CLEANUP