diff mbox series

[ovs-dev] netdev-offload-tc: Only install recirc flows if the parent is present.

Message ID 4ff966dc90760c53c8f72dc446583322d5421a60.1733243048.git.echaudro@redhat.com
State Changes Requested
Delegated to: Ilya Maximets
Headers show
Series [ovs-dev] netdev-offload-tc: Only install recirc flows if the parent is present. | expand

Checks

Context Check Description
ovsrobot/apply-robot success apply and check: success
ovsrobot/github-robot-_Build_and_Test success github build: passed

Commit Message

Eelco Chaudron Dec. 3, 2024, 4:24 p.m. UTC
When flows are added through TC, only the match and actions are
verified to determine if they can be handled by TC. If they can,
the TC flow is installed.

However, when the flow is a continuation of a previously recirculated flow,
it can happen that the flow performing the recirculation is installed
in the kernel. This may occur, for example, if it includes an action that
cannot be handled by TC.

If the kernel module has the first flow but not the second one (missing
because it is programmed in TC), the flow is sent to userspace via an
upcall.

This patch tracks which recirculation goto actions are handled by TC.
A matching TC rule is installed only if the corresponding recirculation
ID is confirmed to be handled by TC.

Signed-off-by: Eelco Chaudron <echaudro@redhat.com>
---
 lib/netdev-offload-tc.c          | 51 ++++++++++++++++++++++++++++++--
 tests/system-offloads-traffic.at | 45 ++++++++++++++++++++++++++++
 2 files changed, 94 insertions(+), 2 deletions(-)

Comments

Roi Dayan Dec. 4, 2024, 9:33 a.m. UTC | #1
On 03/12/2024 18:24, Eelco Chaudron wrote:
> When flows are added through TC, only the match and actions are
> verified to determine if they can be handled by TC. If they can,
> the TC flow is installed.
> 
> However, when the flow is a continuation of a previously recirculated flow,
> it can happen that the flow performing the recirculation is installed
> in the kernel. This may occur, for example, if it includes an action that
> cannot be handled by TC.
> 
> If the kernel module has the first flow but not the second one (missing
> because it is programmed in TC), the flow is sent to userspace via an
> upcall.
> 
> This patch tracks which recirculation goto actions are handled by TC.
> A matching TC rule is installed only if the corresponding recirculation
> ID is confirmed to be handled by TC.
> 
> Signed-off-by: Eelco Chaudron <echaudro@redhat.com>
> ---
>  lib/netdev-offload-tc.c          | 51 ++++++++++++++++++++++++++++++--
>  tests/system-offloads-traffic.at | 45 ++++++++++++++++++++++++++++
>  2 files changed, 94 insertions(+), 2 deletions(-)
> 
> diff --git a/lib/netdev-offload-tc.c b/lib/netdev-offload-tc.c
> index 74e60d0f7..eb7c0886e 100644
> --- a/lib/netdev-offload-tc.c
> +++ b/lib/netdev-offload-tc.c
> @@ -19,6 +19,7 @@
>  #include <errno.h>
>  #include <linux/if_ether.h>
>  
> +#include "ccmap.h"
>  #include "dpif.h"
>  #include "hash.h"
>  #include "id-pool.h"
> @@ -78,6 +79,10 @@ struct policer_node {
>      uint32_t police_idx;
>  };
>  
> +/* ccmap and protective mutex for counting recirculation id (chain) usage. */
> +static struct ovs_mutex used_chains_mutex = OVS_MUTEX_INITIALIZER;
> +static struct ccmap used_chains OVS_GUARDED;
> +
>  /* Protects below meter police ids pool. */
>  static struct ovs_mutex meter_police_ids_mutex = OVS_MUTEX_INITIALIZER;
>  static struct id_pool *meter_police_ids OVS_GUARDED_BY(meter_police_ids_mutex);
> @@ -204,6 +209,10 @@ static struct ovs_mutex ufid_lock = OVS_MUTEX_INITIALIZER;
>   * @adjust_stats: When flow gets updated with new actions, we need to adjust
>   *                the reported stats to include previous values as the hardware
>   *                rule is removed and re-added. This stats copy is used for it.
> + * @chain_goto: If a TC jump action exists for the flow, the target chain it
> + *              jumps to is stored here.  Only a single goto action is stored,
> + *              as TC supports only one goto action per flow (there is no
> + *              return mechanism).
>   */
>  struct ufid_tc_data {
>      struct hmap_node ufid_to_tc_node;
> @@ -212,6 +221,7 @@ struct ufid_tc_data {
>      struct tcf_id id;
>      struct netdev *netdev;
>      struct dpif_flow_stats adjust_stats;
> +    uint32_t chain_goto;
>  };
>  
>  static void
> @@ -233,6 +243,13 @@ del_ufid_tc_mapping_unlocked(const ovs_u128 *ufid)
>      hmap_remove(&ufid_to_tc, &data->ufid_to_tc_node);
>      hmap_remove(&tc_to_ufid, &data->tc_to_ufid_node);
>      netdev_close(data->netdev);
> +
> +    if (data->chain_goto) {
> +        ovs_mutex_lock(&used_chains_mutex);
> +        ccmap_dec(&used_chains, data->chain_goto);
> +        ovs_mutex_unlock(&used_chains_mutex);
> +    }
> +
>      free(data);
>  }
>  
> @@ -288,7 +305,8 @@ del_filter_and_ufid_mapping(struct tcf_id *id, const ovs_u128 *ufid,
>  /* Add ufid entry to ufid_to_tc hashmap. */
>  static void
>  add_ufid_tc_mapping(struct netdev *netdev, const ovs_u128 *ufid,
> -                    struct tcf_id *id, struct dpif_flow_stats *stats)
> +                    struct tcf_id *id, struct dpif_flow_stats *stats,
> +                    uint32_t chain_goto)
>  {
>      struct ufid_tc_data *new_data = xzalloc(sizeof *new_data);
>      size_t ufid_hash = hash_bytes(ufid, sizeof *ufid, 0);
> @@ -300,6 +318,7 @@ add_ufid_tc_mapping(struct netdev *netdev, const ovs_u128 *ufid,
>      new_data->ufid = *ufid;
>      new_data->id = *id;
>      new_data->netdev = netdev_ref(netdev);
> +    new_data->chain_goto = chain_goto;
>      if (stats) {
>          new_data->adjust_stats = *stats;
>      }
> @@ -2261,6 +2280,7 @@ netdev_tc_flow_put(struct netdev *netdev, struct match *match,
>      struct flow_tnl *tnl_mask = &mask->tunnel;
>      struct dpif_flow_stats adjust_stats;
>      bool recirc_act = false;
> +    uint32_t chain_goto = 0;
>      uint32_t block_id = 0;
>      struct tcf_id id;
>      uint32_t chain;
> @@ -2280,6 +2300,17 @@ netdev_tc_flow_put(struct netdev *netdev, struct match *match,
>      chain = key->recirc_id;
>      mask->recirc_id = 0;
>  
> +    if (chain) {
> +        /* If we match on a recirculation ID, we must ensure the previous
> +         * flow is also in the TC datapath; otherwise, the entry is useless,
> +         * as the related packets will be handled by upcalls. */
> +        if (!ccmap_find(&used_chains, chain)) {
> +            VLOG_DBG_RL(&rl, "match for chain %u failed due to non-existing "
> +                        "goto chain action", chain);
> +            return EOPNOTSUPP;
> +        }
> +    }
> +
>      if (flow_tnl_dst_is_set(&key->tunnel) ||
>          flow_tnl_src_is_set(&key->tunnel)) {
>          VLOG_DBG_RL(&rl,
> @@ -2594,6 +2625,20 @@ netdev_tc_flow_put(struct netdev *netdev, struct match *match,
>          return EOPNOTSUPP;
>      }
>  
> +    if (recirc_act) {
> +        struct tc_action *action = flower.actions;
> +
> +        for (int i = 0; i < flower.action_count; i++, action++) {
> +            if (action->type == TC_ACT_GOTO && action->chain) {
> +                chain_goto = action->chain;
> +                ovs_mutex_lock(&used_chains_mutex);
> +                ccmap_inc(&used_chains, chain_goto);
> +                ovs_mutex_unlock(&used_chains_mutex);
> +                break;
> +            }
> +        }
> +    }
> +
>      memset(&adjust_stats, 0, sizeof adjust_stats);
>      if (get_ufid_tc_mapping(ufid, &id) == 0) {
>          VLOG_DBG_RL(&rl, "updating old handle: %d prio: %d",
> @@ -2619,7 +2664,7 @@ netdev_tc_flow_put(struct netdev *netdev, struct match *match,
>              memset(stats, 0, sizeof *stats);
>              netdev_tc_adjust_stats(stats, &adjust_stats);
>          }
> -        add_ufid_tc_mapping(netdev, ufid, &id, &adjust_stats);
> +        add_ufid_tc_mapping(netdev, ufid, &id, &adjust_stats, chain_goto);
>      }
>  
>      return err;
> @@ -3096,6 +3141,8 @@ netdev_tc_init_flow_api(struct netdev *netdev)
>      tc_add_del_qdisc(ifindex, false, 0, hook);
>  
>      if (ovsthread_once_start(&once)) {
> +        ccmap_init(&used_chains);
> +
>          probe_tc_block_support(ifindex);
>          /* Need to re-fetch block id as it depends on feature availability. */
>          block_id = get_block_id_from_netdev(netdev);
> diff --git a/tests/system-offloads-traffic.at b/tests/system-offloads-traffic.at
> index 78c6f5d7e..f890ced68 100644
> --- a/tests/system-offloads-traffic.at
> +++ b/tests/system-offloads-traffic.at
> @@ -1015,5 +1015,50 @@ AT_CHECK(
>    [grep -q -F "set(tunnel(dst=172.31.1.1,ttl=64,tp_dst=4789,flags(csum)))" \
>     stdout])
>  
> +OVS_TRAFFIC_VSWITCHD_STOP
> +AT_CLEANUP
> +
> +AT_SETUP([offloads - split kernel vs offload datapath rules])
> +OVS_TRAFFIC_VSWITCHD_START([], [], [-- set Open_vSwitch . other_config:hw-offload=true])
> +
> +ADD_NAMESPACES(at_ns0, at_ns1)
> +
> +ADD_VETH(p1, at_ns0, br0, "10.0.0.1/24")
> +ADD_VETH(p2, at_ns1, br0, "10.0.0.2/24")
> +
> +AT_CHECK([ovs-vsctl -- set interface ovs-p1 ofport_request=1 \
> +                    -- set interface ovs-p2 ofport_request=2])
> +
> +AT_DATA([groups.txt], [dnl
> +group_id=1,type=select,selection_method=dp_hash,bucket=bucket_id:0,weight:100,actions=ct(commit,table=2)
> +])
> +AT_DATA([flows.txt], [dnl
> +table=0,arp,actions=NORMAL
> +table=0,priority=100,cookie=0x12345678,in_port=ovs-p1,ip,nw_dst=10.0.0.2,actions=resubmit(,1)
> +table=0,priority=100,cookie=0xabcedf,in_port=ovs-p2,ip,nw_dst=10.0.0.1,actions=ct(table=3)
> +table=1,priority=200,ip,actions=group:1
> +table=2,ip,actions=ovs-p2
> +table=3,ip,actions=ovs-p1
> +])
> +AT_CHECK([ovs-ofctl add-groups br0 groups.txt])
> +AT_CHECK([ovs-ofctl add-flows br0 flows.txt])
> +
> +NS_CHECK_EXEC([at_ns0], [ping -q -c 10 -i 0.1 -W 2 10.0.0.2 | FORMAT_PING], [0], [dnl
> +10 packets transmitted, 10 received, 0% packet loss, time 0ms
> +])
> +
> +AT_CHECK([ovs-appctl revalidator/wait])
> +
> +dnl In this test we should not have the first recirculation(s) in the kernel
> +dnl datapath, and the final flow in tc.  They should all be in the kernel
> +dnl datapath, as the dp_hash() action is currently not supported by TC.
> +dnl The command below ensures they are all handled in the kernel datapath.
> +AT_CHECK([ovs-appctl dpctl/dump-flows --names type=ovs filter='in_port(ovs-p1),ipv4' | \
> +          strip_recirc | strip_dp_hash | DUMP_CLEAN_SORTED], [0], [dnl
> +recirc_id(<recirc>),in_port(ovs-p1),eth(),eth_type(0x0800),ipv4(dst=10.0.0.2,frag=no), packets:9, bytes:882, used:0.001s, actions:hash(l4(0)),recirc(<recirc>)
> +recirc_id(<recirc>),in_port(ovs-p1),eth(),eth_type(0x0800),ipv4(frag=no), packets:9, bytes:882, used:0.001s, actions:ct(commit),recirc(<recirc>)
> +recirc_id(<recirc>),in_port(ovs-p1),eth(),eth_type(0x0800),ipv4(frag=no), packets:9, bytes:882, used:0.001s, actions:ovs-p2
> +])
> +
>  OVS_TRAFFIC_VSWITCHD_STOP
>  AT_CLEANUP
> \ No newline at end of file

Acked-by: Roi Dayan <roid@nvidia.com>

nice.
Thanks,
Roi
Ilya Maximets Dec. 12, 2024, 10:59 p.m. UTC | #2
On 12/3/24 17:24, Eelco Chaudron wrote:
> When flows are added through TC, only the match and actions are
> verified to determine if they can be handled by TC. If they can,
> the TC flow is installed.
> 
> However, when the flow is a continuation of a previously recirculated flow,
> it can happen that the flow performing the recirculation is installed
> in the kernel. This may occur, for example, if it includes an action that
> cannot be handled by TC.
> 
> If the kernel module has the first flow but not the second one (missing
> because it is programmed in TC), the flow is sent to userspace via an
> upcall.
> 
> This patch tracks which recirculation goto actions are handled by TC.
> A matching TC rule is installed only if the corresponding recirculation
> ID is confirmed to be handled by TC.
> 
> Signed-off-by: Eelco Chaudron <echaudro@redhat.com>
> ---
>  lib/netdev-offload-tc.c          | 51 ++++++++++++++++++++++++++++++--
>  tests/system-offloads-traffic.at | 45 ++++++++++++++++++++++++++++
>  2 files changed, 94 insertions(+), 2 deletions(-)
> 
> diff --git a/lib/netdev-offload-tc.c b/lib/netdev-offload-tc.c
> index 74e60d0f7..eb7c0886e 100644
> --- a/lib/netdev-offload-tc.c
> +++ b/lib/netdev-offload-tc.c
> @@ -19,6 +19,7 @@
>  #include <errno.h>
>  #include <linux/if_ether.h>
>  
> +#include "ccmap.h"
>  #include "dpif.h"
>  #include "hash.h"
>  #include "id-pool.h"
> @@ -78,6 +79,10 @@ struct policer_node {
>      uint32_t police_idx;
>  };
>  
> +/* ccmap and protective mutex for counting recirculation id (chain) usage. */
> +static struct ovs_mutex used_chains_mutex = OVS_MUTEX_INITIALIZER;
> +static struct ccmap used_chains OVS_GUARDED;
> +
>  /* Protects below meter police ids pool. */
>  static struct ovs_mutex meter_police_ids_mutex = OVS_MUTEX_INITIALIZER;
>  static struct id_pool *meter_police_ids OVS_GUARDED_BY(meter_police_ids_mutex);
> @@ -204,6 +209,10 @@ static struct ovs_mutex ufid_lock = OVS_MUTEX_INITIALIZER;
>   * @adjust_stats: When flow gets updated with new actions, we need to adjust
>   *                the reported stats to include previous values as the hardware
>   *                rule is removed and re-added. This stats copy is used for it.
> + * @chain_goto: If a TC jump action exists for the flow, the target chain it
> + *              jumps to is stored here.  Only a single goto action is stored,
> + *              as TC supports only one goto action per flow (there is no
> + *              return mechanism).
>   */
>  struct ufid_tc_data {
>      struct hmap_node ufid_to_tc_node;
> @@ -212,6 +221,7 @@ struct ufid_tc_data {
>      struct tcf_id id;
>      struct netdev *netdev;
>      struct dpif_flow_stats adjust_stats;
> +    uint32_t chain_goto;
>  };
>  
>  static void
> @@ -233,6 +243,13 @@ del_ufid_tc_mapping_unlocked(const ovs_u128 *ufid)
>      hmap_remove(&ufid_to_tc, &data->ufid_to_tc_node);
>      hmap_remove(&tc_to_ufid, &data->tc_to_ufid_node);
>      netdev_close(data->netdev);
> +
> +    if (data->chain_goto) {
> +        ovs_mutex_lock(&used_chains_mutex);
> +        ccmap_dec(&used_chains, data->chain_goto);
> +        ovs_mutex_unlock(&used_chains_mutex);
> +    }
> +
>      free(data);
>  }
>  
> @@ -288,7 +305,8 @@ del_filter_and_ufid_mapping(struct tcf_id *id, const ovs_u128 *ufid,
>  /* Add ufid entry to ufid_to_tc hashmap. */
>  static void
>  add_ufid_tc_mapping(struct netdev *netdev, const ovs_u128 *ufid,
> -                    struct tcf_id *id, struct dpif_flow_stats *stats)
> +                    struct tcf_id *id, struct dpif_flow_stats *stats,
> +                    uint32_t chain_goto)
>  {
>      struct ufid_tc_data *new_data = xzalloc(sizeof *new_data);
>      size_t ufid_hash = hash_bytes(ufid, sizeof *ufid, 0);
> @@ -300,6 +318,7 @@ add_ufid_tc_mapping(struct netdev *netdev, const ovs_u128 *ufid,
>      new_data->ufid = *ufid;
>      new_data->id = *id;
>      new_data->netdev = netdev_ref(netdev);
> +    new_data->chain_goto = chain_goto;
>      if (stats) {
>          new_data->adjust_stats = *stats;
>      }
> @@ -2261,6 +2280,7 @@ netdev_tc_flow_put(struct netdev *netdev, struct match *match,
>      struct flow_tnl *tnl_mask = &mask->tunnel;
>      struct dpif_flow_stats adjust_stats;
>      bool recirc_act = false;
> +    uint32_t chain_goto = 0;
>      uint32_t block_id = 0;
>      struct tcf_id id;
>      uint32_t chain;
> @@ -2280,6 +2300,17 @@ netdev_tc_flow_put(struct netdev *netdev, struct match *match,
>      chain = key->recirc_id;
>      mask->recirc_id = 0;
>  
> +    if (chain) {
> +        /* If we match on a recirculation ID, we must ensure the previous
> +         * flow is also in the TC datapath; otherwise, the entry is useless,
> +         * as the related packets will be handled by upcalls. */
> +        if (!ccmap_find(&used_chains, chain)) {
> +            VLOG_DBG_RL(&rl, "match for chain %u failed due to non-existing "
> +                        "goto chain action", chain);
> +            return EOPNOTSUPP;
> +        }
> +    }
> +
>      if (flow_tnl_dst_is_set(&key->tunnel) ||
>          flow_tnl_src_is_set(&key->tunnel)) {
>          VLOG_DBG_RL(&rl,
> @@ -2594,6 +2625,20 @@ netdev_tc_flow_put(struct netdev *netdev, struct match *match,
>          return EOPNOTSUPP;
>      }
>  
> +    if (recirc_act) {
> +        struct tc_action *action = flower.actions;
> +
> +        for (int i = 0; i < flower.action_count; i++, action++) {
> +            if (action->type == TC_ACT_GOTO && action->chain) {
> +                chain_goto = action->chain;
> +                ovs_mutex_lock(&used_chains_mutex);
> +                ccmap_inc(&used_chains, chain_goto);
> +                ovs_mutex_unlock(&used_chains_mutex);
> +                break;
> +            }
> +        }
> +    }

Shouldn't we also undo this if the tc_replace_flower ultimately fails
or if we get some other error along the way?

> +
>      memset(&adjust_stats, 0, sizeof adjust_stats);
>      if (get_ufid_tc_mapping(ufid, &id) == 0) {
>          VLOG_DBG_RL(&rl, "updating old handle: %d prio: %d",

What happens if we have flow1 --> flow2 --> flow3, all offloaded.
Now, we modify actions of flow2 slightly, but in a way that is no longer
offloadable.  flow2 will be removed from TC and installed to the kernel.
Next when packet comes it matches flow1 in TC, then goes to the kernel
and matches flow2, then it gets upcalled.  We will now try to install
the flow3 again.  Normally, it would be the same datapath and it will
be a simple EEXIST.  However, now we'll fail in TC and install flow3
also into the kernel.  Now we have the same datapath flow in both the
kernel and TC and only one ukey.  It's probably fine for the datapath,
but I'm not sure what will happen in control plane since we'll be
dumping the same flow from both with different statistics.

Do you have any thoughts on this?

> @@ -2619,7 +2664,7 @@ netdev_tc_flow_put(struct netdev *netdev, struct match *match,
>              memset(stats, 0, sizeof *stats);
>              netdev_tc_adjust_stats(stats, &adjust_stats);
>          }
> -        add_ufid_tc_mapping(netdev, ufid, &id, &adjust_stats);
> +        add_ufid_tc_mapping(netdev, ufid, &id, &adjust_stats, chain_goto);
>      }
>  
>      return err;
> @@ -3096,6 +3141,8 @@ netdev_tc_init_flow_api(struct netdev *netdev)
>      tc_add_del_qdisc(ifindex, false, 0, hook);
>  
>      if (ovsthread_once_start(&once)) {
> +        ccmap_init(&used_chains);
> +
>          probe_tc_block_support(ifindex);
>          /* Need to re-fetch block id as it depends on feature availability. */
>          block_id = get_block_id_from_netdev(netdev);
> diff --git a/tests/system-offloads-traffic.at b/tests/system-offloads-traffic.at
> index 78c6f5d7e..f890ced68 100644
> --- a/tests/system-offloads-traffic.at
> +++ b/tests/system-offloads-traffic.at
> @@ -1015,5 +1015,50 @@ AT_CHECK(
>    [grep -q -F "set(tunnel(dst=172.31.1.1,ttl=64,tp_dst=4789,flags(csum)))" \
>     stdout])
>  
> +OVS_TRAFFIC_VSWITCHD_STOP
> +AT_CLEANUP
> +
> +AT_SETUP([offloads - split kernel vs offload datapath rules])
> +OVS_TRAFFIC_VSWITCHD_START([], [], [-- set Open_vSwitch . other_config:hw-offload=true])
> +
> +ADD_NAMESPACES(at_ns0, at_ns1)
> +
> +ADD_VETH(p1, at_ns0, br0, "10.0.0.1/24")
> +ADD_VETH(p2, at_ns1, br0, "10.0.0.2/24")
> +
> +AT_CHECK([ovs-vsctl -- set interface ovs-p1 ofport_request=1 \
> +                    -- set interface ovs-p2 ofport_request=2])
> +
> +AT_DATA([groups.txt], [dnl
> +group_id=1,type=select,selection_method=dp_hash,bucket=bucket_id:0,weight:100,actions=ct(commit,table=2)
> +])
> +AT_DATA([flows.txt], [dnl
> +table=0,arp,actions=NORMAL
> +table=0,priority=100,cookie=0x12345678,in_port=ovs-p1,ip,nw_dst=10.0.0.2,actions=resubmit(,1)
> +table=0,priority=100,cookie=0xabcedf,in_port=ovs-p2,ip,nw_dst=10.0.0.1,actions=ct(table=3)
> +table=1,priority=200,ip,actions=group:1
> +table=2,ip,actions=ovs-p2
> +table=3,ip,actions=ovs-p1
> +])
> +AT_CHECK([ovs-ofctl add-groups br0 groups.txt])
> +AT_CHECK([ovs-ofctl add-flows br0 flows.txt])
> +
> +NS_CHECK_EXEC([at_ns0], [ping -q -c 10 -i 0.1 -W 2 10.0.0.2 | FORMAT_PING], [0], [dnl
> +10 packets transmitted, 10 received, 0% packet loss, time 0ms
> +])
> +
> +AT_CHECK([ovs-appctl revalidator/wait])
> +
> +dnl In this test we should not have the first recirculation(s) in the kernel
> +dnl datapath, and the final flow in tc.  They should all be in the kernel
> +dnl datapath, as the dp_hash() action is currently not supported by TC.

Nit: 'tc' vs 'TC' in two sentenses nearby.

> +dnl The command below ensures they are all handled in the kernel datapath.
> +AT_CHECK([ovs-appctl dpctl/dump-flows --names type=ovs filter='in_port(ovs-p1),ipv4' | \
> +          strip_recirc | strip_dp_hash | DUMP_CLEAN_SORTED], [0], [dnl
> +recirc_id(<recirc>),in_port(ovs-p1),eth(),eth_type(0x0800),ipv4(dst=10.0.0.2,frag=no), packets:9, bytes:882, used:0.001s, actions:hash(l4(0)),recirc(<recirc>)
> +recirc_id(<recirc>),in_port(ovs-p1),eth(),eth_type(0x0800),ipv4(frag=no), packets:9, bytes:882, used:0.001s, actions:ct(commit),recirc(<recirc>)
> +recirc_id(<recirc>),in_port(ovs-p1),eth(),eth_type(0x0800),ipv4(frag=no), packets:9, bytes:882, used:0.001s, actions:ovs-p2
> +])
> +
>  OVS_TRAFFIC_VSWITCHD_STOP
>  AT_CLEANUP
> \ No newline at end of file
diff mbox series

Patch

diff --git a/lib/netdev-offload-tc.c b/lib/netdev-offload-tc.c
index 74e60d0f7..eb7c0886e 100644
--- a/lib/netdev-offload-tc.c
+++ b/lib/netdev-offload-tc.c
@@ -19,6 +19,7 @@ 
 #include <errno.h>
 #include <linux/if_ether.h>
 
+#include "ccmap.h"
 #include "dpif.h"
 #include "hash.h"
 #include "id-pool.h"
@@ -78,6 +79,10 @@  struct policer_node {
     uint32_t police_idx;
 };
 
+/* ccmap and protective mutex for counting recirculation id (chain) usage. */
+static struct ovs_mutex used_chains_mutex = OVS_MUTEX_INITIALIZER;
+static struct ccmap used_chains OVS_GUARDED;
+
 /* Protects below meter police ids pool. */
 static struct ovs_mutex meter_police_ids_mutex = OVS_MUTEX_INITIALIZER;
 static struct id_pool *meter_police_ids OVS_GUARDED_BY(meter_police_ids_mutex);
@@ -204,6 +209,10 @@  static struct ovs_mutex ufid_lock = OVS_MUTEX_INITIALIZER;
  * @adjust_stats: When flow gets updated with new actions, we need to adjust
  *                the reported stats to include previous values as the hardware
  *                rule is removed and re-added. This stats copy is used for it.
+ * @chain_goto: If a TC jump action exists for the flow, the target chain it
+ *              jumps to is stored here.  Only a single goto action is stored,
+ *              as TC supports only one goto action per flow (there is no
+ *              return mechanism).
  */
 struct ufid_tc_data {
     struct hmap_node ufid_to_tc_node;
@@ -212,6 +221,7 @@  struct ufid_tc_data {
     struct tcf_id id;
     struct netdev *netdev;
     struct dpif_flow_stats adjust_stats;
+    uint32_t chain_goto;
 };
 
 static void
@@ -233,6 +243,13 @@  del_ufid_tc_mapping_unlocked(const ovs_u128 *ufid)
     hmap_remove(&ufid_to_tc, &data->ufid_to_tc_node);
     hmap_remove(&tc_to_ufid, &data->tc_to_ufid_node);
     netdev_close(data->netdev);
+
+    if (data->chain_goto) {
+        ovs_mutex_lock(&used_chains_mutex);
+        ccmap_dec(&used_chains, data->chain_goto);
+        ovs_mutex_unlock(&used_chains_mutex);
+    }
+
     free(data);
 }
 
@@ -288,7 +305,8 @@  del_filter_and_ufid_mapping(struct tcf_id *id, const ovs_u128 *ufid,
 /* Add ufid entry to ufid_to_tc hashmap. */
 static void
 add_ufid_tc_mapping(struct netdev *netdev, const ovs_u128 *ufid,
-                    struct tcf_id *id, struct dpif_flow_stats *stats)
+                    struct tcf_id *id, struct dpif_flow_stats *stats,
+                    uint32_t chain_goto)
 {
     struct ufid_tc_data *new_data = xzalloc(sizeof *new_data);
     size_t ufid_hash = hash_bytes(ufid, sizeof *ufid, 0);
@@ -300,6 +318,7 @@  add_ufid_tc_mapping(struct netdev *netdev, const ovs_u128 *ufid,
     new_data->ufid = *ufid;
     new_data->id = *id;
     new_data->netdev = netdev_ref(netdev);
+    new_data->chain_goto = chain_goto;
     if (stats) {
         new_data->adjust_stats = *stats;
     }
@@ -2261,6 +2280,7 @@  netdev_tc_flow_put(struct netdev *netdev, struct match *match,
     struct flow_tnl *tnl_mask = &mask->tunnel;
     struct dpif_flow_stats adjust_stats;
     bool recirc_act = false;
+    uint32_t chain_goto = 0;
     uint32_t block_id = 0;
     struct tcf_id id;
     uint32_t chain;
@@ -2280,6 +2300,17 @@  netdev_tc_flow_put(struct netdev *netdev, struct match *match,
     chain = key->recirc_id;
     mask->recirc_id = 0;
 
+    if (chain) {
+        /* If we match on a recirculation ID, we must ensure the previous
+         * flow is also in the TC datapath; otherwise, the entry is useless,
+         * as the related packets will be handled by upcalls. */
+        if (!ccmap_find(&used_chains, chain)) {
+            VLOG_DBG_RL(&rl, "match for chain %u failed due to non-existing "
+                        "goto chain action", chain);
+            return EOPNOTSUPP;
+        }
+    }
+
     if (flow_tnl_dst_is_set(&key->tunnel) ||
         flow_tnl_src_is_set(&key->tunnel)) {
         VLOG_DBG_RL(&rl,
@@ -2594,6 +2625,20 @@  netdev_tc_flow_put(struct netdev *netdev, struct match *match,
         return EOPNOTSUPP;
     }
 
+    if (recirc_act) {
+        struct tc_action *action = flower.actions;
+
+        for (int i = 0; i < flower.action_count; i++, action++) {
+            if (action->type == TC_ACT_GOTO && action->chain) {
+                chain_goto = action->chain;
+                ovs_mutex_lock(&used_chains_mutex);
+                ccmap_inc(&used_chains, chain_goto);
+                ovs_mutex_unlock(&used_chains_mutex);
+                break;
+            }
+        }
+    }
+
     memset(&adjust_stats, 0, sizeof adjust_stats);
     if (get_ufid_tc_mapping(ufid, &id) == 0) {
         VLOG_DBG_RL(&rl, "updating old handle: %d prio: %d",
@@ -2619,7 +2664,7 @@  netdev_tc_flow_put(struct netdev *netdev, struct match *match,
             memset(stats, 0, sizeof *stats);
             netdev_tc_adjust_stats(stats, &adjust_stats);
         }
-        add_ufid_tc_mapping(netdev, ufid, &id, &adjust_stats);
+        add_ufid_tc_mapping(netdev, ufid, &id, &adjust_stats, chain_goto);
     }
 
     return err;
@@ -3096,6 +3141,8 @@  netdev_tc_init_flow_api(struct netdev *netdev)
     tc_add_del_qdisc(ifindex, false, 0, hook);
 
     if (ovsthread_once_start(&once)) {
+        ccmap_init(&used_chains);
+
         probe_tc_block_support(ifindex);
         /* Need to re-fetch block id as it depends on feature availability. */
         block_id = get_block_id_from_netdev(netdev);
diff --git a/tests/system-offloads-traffic.at b/tests/system-offloads-traffic.at
index 78c6f5d7e..f890ced68 100644
--- a/tests/system-offloads-traffic.at
+++ b/tests/system-offloads-traffic.at
@@ -1015,5 +1015,50 @@  AT_CHECK(
   [grep -q -F "set(tunnel(dst=172.31.1.1,ttl=64,tp_dst=4789,flags(csum)))" \
    stdout])
 
+OVS_TRAFFIC_VSWITCHD_STOP
+AT_CLEANUP
+
+AT_SETUP([offloads - split kernel vs offload datapath rules])
+OVS_TRAFFIC_VSWITCHD_START([], [], [-- set Open_vSwitch . other_config:hw-offload=true])
+
+ADD_NAMESPACES(at_ns0, at_ns1)
+
+ADD_VETH(p1, at_ns0, br0, "10.0.0.1/24")
+ADD_VETH(p2, at_ns1, br0, "10.0.0.2/24")
+
+AT_CHECK([ovs-vsctl -- set interface ovs-p1 ofport_request=1 \
+                    -- set interface ovs-p2 ofport_request=2])
+
+AT_DATA([groups.txt], [dnl
+group_id=1,type=select,selection_method=dp_hash,bucket=bucket_id:0,weight:100,actions=ct(commit,table=2)
+])
+AT_DATA([flows.txt], [dnl
+table=0,arp,actions=NORMAL
+table=0,priority=100,cookie=0x12345678,in_port=ovs-p1,ip,nw_dst=10.0.0.2,actions=resubmit(,1)
+table=0,priority=100,cookie=0xabcedf,in_port=ovs-p2,ip,nw_dst=10.0.0.1,actions=ct(table=3)
+table=1,priority=200,ip,actions=group:1
+table=2,ip,actions=ovs-p2
+table=3,ip,actions=ovs-p1
+])
+AT_CHECK([ovs-ofctl add-groups br0 groups.txt])
+AT_CHECK([ovs-ofctl add-flows br0 flows.txt])
+
+NS_CHECK_EXEC([at_ns0], [ping -q -c 10 -i 0.1 -W 2 10.0.0.2 | FORMAT_PING], [0], [dnl
+10 packets transmitted, 10 received, 0% packet loss, time 0ms
+])
+
+AT_CHECK([ovs-appctl revalidator/wait])
+
+dnl In this test we should not have the first recirculation(s) in the kernel
+dnl datapath, and the final flow in tc.  They should all be in the kernel
+dnl datapath, as the dp_hash() action is currently not supported by TC.
+dnl The command below ensures they are all handled in the kernel datapath.
+AT_CHECK([ovs-appctl dpctl/dump-flows --names type=ovs filter='in_port(ovs-p1),ipv4' | \
+          strip_recirc | strip_dp_hash | DUMP_CLEAN_SORTED], [0], [dnl
+recirc_id(<recirc>),in_port(ovs-p1),eth(),eth_type(0x0800),ipv4(dst=10.0.0.2,frag=no), packets:9, bytes:882, used:0.001s, actions:hash(l4(0)),recirc(<recirc>)
+recirc_id(<recirc>),in_port(ovs-p1),eth(),eth_type(0x0800),ipv4(frag=no), packets:9, bytes:882, used:0.001s, actions:ct(commit),recirc(<recirc>)
+recirc_id(<recirc>),in_port(ovs-p1),eth(),eth_type(0x0800),ipv4(frag=no), packets:9, bytes:882, used:0.001s, actions:ovs-p2
+])
+
 OVS_TRAFFIC_VSWITCHD_STOP
 AT_CLEANUP
\ No newline at end of file