diff mbox series

[ovs-dev,v4,3/5] dpif-netdev: Skip encap action during datapath execution

Message ID 20200629095020.8491-4-sriharsha.basavapatna@broadcom.com
State Superseded
Headers show
Series netdev datapath: Partial action offload | expand

Commit Message

Sriharsha Basavapatna June 29, 2020, 9:50 a.m. UTC
In this patch we check if action processing (apart from OUTPUT action),
should be skipped for a given dp_netdev_flow. Specifically, we check if
the action is TNL_PUSH and if it has been offloaded to HW, then we do not
push the tunnel header in SW. The datapath only executes the OUTPUT action.
The packet will be encapsulated in HW during transmit.

Signed-off-by: Sriharsha Basavapatna <sriharsha.basavapatna@broadcom.com>
---
 lib/dpif-netdev.c | 46 ++++++++++++++++++++++++++++++++++++----------
 1 file changed, 36 insertions(+), 10 deletions(-)

Comments

Eli Britstein July 5, 2020, 2:03 p.m. UTC | #1
On 6/29/2020 12:50 PM, Sriharsha Basavapatna wrote:
> In this patch we check if action processing (apart from OUTPUT action),
> should be skipped for a given dp_netdev_flow. Specifically, we check if
> the action is TNL_PUSH and if it has been offloaded to HW, then we do not
> push the tunnel header in SW. The datapath only executes the OUTPUT action.
> The packet will be encapsulated in HW during transmit.
>
> Signed-off-by: Sriharsha Basavapatna <sriharsha.basavapatna@broadcom.com>
> ---
>   lib/dpif-netdev.c | 46 ++++++++++++++++++++++++++++++++++++----------
>   1 file changed, 36 insertions(+), 10 deletions(-)
>
> diff --git a/lib/dpif-netdev.c b/lib/dpif-netdev.c
> index 7adea8c40..b96a75d1f 100644
> --- a/lib/dpif-netdev.c
> +++ b/lib/dpif-netdev.c
> @@ -114,6 +114,7 @@ COVERAGE_DEFINE(datapath_drop_invalid_port);
>   COVERAGE_DEFINE(datapath_drop_invalid_bond);
>   COVERAGE_DEFINE(datapath_drop_invalid_tnl_port);
>   COVERAGE_DEFINE(datapath_drop_rx_invalid_packet);
> +COVERAGE_DEFINE(datapath_skip_tunnel_push);
>   
>   /* Protects against changes to 'dp_netdevs'. */
>   static struct ovs_mutex dp_netdev_mutex = OVS_MUTEX_INITIALIZER;
> @@ -545,6 +546,16 @@ struct dp_netdev_flow {
>       bool dead;
>       uint32_t mark;               /* Unique flow mark assigned to a flow */
>   
> +    /* The next two members are used to support partial offloading of
> +     * actions. The boolean flag tells if this flow has its actions partially
> +     * offloaded. The egress port# tells if the action should be offloaded
> +     * on the egress (output) port instead of the in-port for the flow. Note
> +     * that we support flows with a single egress port action.
> +     * (see MAX_ACTION_ATTRS for related comments).
> +     */
> +    bool partial_actions_offloaded;
> +    odp_port_t  egress_offload_port;
> +
>       /* Statistics. */
>       struct dp_netdev_flow_stats stats;
>   
> @@ -817,7 +828,8 @@ static void dp_netdev_execute_actions(struct dp_netdev_pmd_thread *pmd,
>                                         bool should_steal,
>                                         const struct flow *flow,
>                                         const struct nlattr *actions,
> -                                      size_t actions_len);
> +                                      size_t actions_len,
> +                                      const struct dp_netdev_flow *dp_flow);
>   static void dp_netdev_input(struct dp_netdev_pmd_thread *,
>                               struct dp_packet_batch *, odp_port_t port_no);
>   static void dp_netdev_recirculate(struct dp_netdev_pmd_thread *,
> @@ -3954,7 +3966,7 @@ dpif_netdev_execute(struct dpif *dpif, struct dpif_execute *execute)
>       dp_packet_batch_init_packet(&pp, execute->packet);
>       pp.do_not_steal = true;
>       dp_netdev_execute_actions(pmd, &pp, false, execute->flow,
> -                              execute->actions, execute->actions_len);
> +                              execute->actions, execute->actions_len, NULL);
>       dp_netdev_pmd_flush_output_packets(pmd, true);
>   
>       if (pmd->core_id == NON_PMD_CORE_ID) {
> @@ -6672,7 +6684,7 @@ packet_batch_per_flow_execute(struct packet_batch_per_flow *batch,
>       actions = dp_netdev_flow_get_actions(flow);
>   
>       dp_netdev_execute_actions(pmd, &batch->array, true, &flow->flow,
> -                              actions->actions, actions->size);
> +                              actions->actions, actions->size, flow);
>   }
>   
>   static inline void
> @@ -6967,7 +6979,7 @@ handle_packet_upcall(struct dp_netdev_pmd_thread *pmd,
>        * we'll send the packet up twice. */
>       dp_packet_batch_init_packet(&b, packet);
>       dp_netdev_execute_actions(pmd, &b, true, &match.flow,
> -                              actions->data, actions->size);
> +                              actions->data, actions->size, NULL);
>   
>       add_actions = put_actions->size ? put_actions : actions;
>       if (OVS_LIKELY(error != ENOSPC)) {
> @@ -7202,6 +7214,7 @@ dp_netdev_recirculate(struct dp_netdev_pmd_thread *pmd,
>   struct dp_netdev_execute_aux {
>       struct dp_netdev_pmd_thread *pmd;
>       const struct flow *flow;
> +    const struct dp_netdev_flow *dp_flow;    /* for partial action offload */
>   };
>   
>   static void
> @@ -7346,7 +7359,7 @@ dp_execute_userspace_action(struct dp_netdev_pmd_thread *pmd,
>       if (!error || error == ENOSPC) {
>           dp_packet_batch_init_packet(&b, packet);
>           dp_netdev_execute_actions(pmd, &b, should_steal, flow,
> -                                  actions->data, actions->size);
> +                                  actions->data, actions->size, NULL);
>       } else if (should_steal) {
>           dp_packet_delete(packet);
>           COVERAGE_INC(datapath_drop_userspace_action_error);
> @@ -7455,6 +7468,7 @@ dp_execute_cb(void *aux_, struct dp_packet_batch *packets_,
>       int type = nl_attr_type(a);
>       struct tx_port *p;
>       uint32_t packet_count, packets_dropped;
> +    struct dp_netdev_flow *dp_flow = aux->dp_flow;
>   
>       switch ((enum ovs_action_attr)type) {
>       case OVS_ACTION_ATTR_OUTPUT:
> @@ -7477,9 +7491,20 @@ dp_execute_cb(void *aux_, struct dp_packet_batch *packets_,
>           }
>           dp_packet_batch_apply_cutlen(packets_);
>           packet_count = dp_packet_batch_size(packets_);
> -        if (push_tnl_action(pmd, a, packets_)) {
> -            COVERAGE_ADD(datapath_drop_tunnel_push_error,
> -                         packet_count);
> +        /* Execute tnl_push action in SW, if it is not offloaded as a partial
> +         * action in HW. Otherwise, HW pushes the tunnel header during output
> +         * processing. There's a small window here in which the offload thread
> +         * offloads the flow, but the partial_actions_offloaded flag is still
> +         * not set. In this case, as the packet is already encapsulated, it
> +         * wouldn't match the offloaded flow and the action won't be executed
> +         * in HW.
> +         */
> +        if (!dp_flow || !dp_flow->partial_actions_offloaded) {
> +            if (push_tnl_action(pmd, a, packets_)) {
> +                COVERAGE_ADD(datapath_drop_tunnel_push_error, packet_count);
> +            }
> +        } else {

It is possible (though rare) that encap headers will match the original 
flow. In such case, in the time before the SW knows the HW rule is 
installed, the packet will be encapsulated by SW, and then again in HW.

The rule is for example:

set vxlan ip-version ipv4|ipv6 vni <vni> udp-src <udp-src> udp-dst 
<udp-dst> ip-src <ip-src> ip-dst <ip-dst> eth-src e4:11:22:33:44:50 
eth-dst b6:7a:b2:f1:d5:e4

flow create 0 egress priority 0 group 0 transfer pattern eth src is 
e4:11:22:33:44:50 dst is b6:7a:b2:f1:d5:e4 type is 0x0800 / ipv4 ... end 
/ actions vxlan_encap / end

the original packet is:

Ether(src=e4:11:22:33:44:50,dst=b6:7a:b2:f1:d5:e4)/IP()...

after SW encap:

Ether(src=e4:11:22:33:44:50,dst=b6:7a:b2:f1:d5:e4)/IP()/UDP()/VXLAN()/Ether(src=e4:11:22:33:44:50,dst=b6:7a:b2:f1:d5:e4)/IP()...

It will hit the egress flow, and be encapsulated again.

Need to add a validation that the encap header doesn't match the flow's 
matches, and reject the flow if it does.

> +            COVERAGE_ADD(datapath_skip_tunnel_push, packet_count);
>           }
>           return;
>   
> @@ -7771,9 +7796,10 @@ static void
>   dp_netdev_execute_actions(struct dp_netdev_pmd_thread *pmd,
>                             struct dp_packet_batch *packets,
>                             bool should_steal, const struct flow *flow,
> -                          const struct nlattr *actions, size_t actions_len)
> +                          const struct nlattr *actions, size_t actions_len,
> +                          const struct dp_netdev_flow *dp_flow)
>   {
> -    struct dp_netdev_execute_aux aux = { pmd, flow };
> +    struct dp_netdev_execute_aux aux = { pmd, flow, dp_flow };
>   
>       odp_execute_actions(&aux, packets, should_steal, actions,
>                           actions_len, dp_execute_cb);
Sriharsha Basavapatna July 6, 2020, 1:20 p.m. UTC | #2
On Sun, Jul 5, 2020 at 7:33 PM Eli Britstein <elibr@mellanox.com> wrote:
>
>
> On 6/29/2020 12:50 PM, Sriharsha Basavapatna wrote:
> > In this patch we check if action processing (apart from OUTPUT action),
> > should be skipped for a given dp_netdev_flow. Specifically, we check if
> > the action is TNL_PUSH and if it has been offloaded to HW, then we do not
> > push the tunnel header in SW. The datapath only executes the OUTPUT action.
> > The packet will be encapsulated in HW during transmit.
> >
> > Signed-off-by: Sriharsha Basavapatna <sriharsha.basavapatna@broadcom.com>
> > ---
> >   lib/dpif-netdev.c | 46 ++++++++++++++++++++++++++++++++++++----------
> >   1 file changed, 36 insertions(+), 10 deletions(-)
> >
> > diff --git a/lib/dpif-netdev.c b/lib/dpif-netdev.c
> > index 7adea8c40..b96a75d1f 100644
> > --- a/lib/dpif-netdev.c
> > +++ b/lib/dpif-netdev.c
> > @@ -114,6 +114,7 @@ COVERAGE_DEFINE(datapath_drop_invalid_port);
> >   COVERAGE_DEFINE(datapath_drop_invalid_bond);
> >   COVERAGE_DEFINE(datapath_drop_invalid_tnl_port);
> >   COVERAGE_DEFINE(datapath_drop_rx_invalid_packet);
> > +COVERAGE_DEFINE(datapath_skip_tunnel_push);
> >
> >   /* Protects against changes to 'dp_netdevs'. */
> >   static struct ovs_mutex dp_netdev_mutex = OVS_MUTEX_INITIALIZER;
> > @@ -545,6 +546,16 @@ struct dp_netdev_flow {
> >       bool dead;
> >       uint32_t mark;               /* Unique flow mark assigned to a flow */
> >
> > +    /* The next two members are used to support partial offloading of
> > +     * actions. The boolean flag tells if this flow has its actions partially
> > +     * offloaded. The egress port# tells if the action should be offloaded
> > +     * on the egress (output) port instead of the in-port for the flow. Note
> > +     * that we support flows with a single egress port action.
> > +     * (see MAX_ACTION_ATTRS for related comments).
> > +     */
> > +    bool partial_actions_offloaded;
> > +    odp_port_t  egress_offload_port;
> > +
> >       /* Statistics. */
> >       struct dp_netdev_flow_stats stats;
> >
> > @@ -817,7 +828,8 @@ static void dp_netdev_execute_actions(struct dp_netdev_pmd_thread *pmd,
> >                                         bool should_steal,
> >                                         const struct flow *flow,
> >                                         const struct nlattr *actions,
> > -                                      size_t actions_len);
> > +                                      size_t actions_len,
> > +                                      const struct dp_netdev_flow *dp_flow);
> >   static void dp_netdev_input(struct dp_netdev_pmd_thread *,
> >                               struct dp_packet_batch *, odp_port_t port_no);
> >   static void dp_netdev_recirculate(struct dp_netdev_pmd_thread *,
> > @@ -3954,7 +3966,7 @@ dpif_netdev_execute(struct dpif *dpif, struct dpif_execute *execute)
> >       dp_packet_batch_init_packet(&pp, execute->packet);
> >       pp.do_not_steal = true;
> >       dp_netdev_execute_actions(pmd, &pp, false, execute->flow,
> > -                              execute->actions, execute->actions_len);
> > +                              execute->actions, execute->actions_len, NULL);
> >       dp_netdev_pmd_flush_output_packets(pmd, true);
> >
> >       if (pmd->core_id == NON_PMD_CORE_ID) {
> > @@ -6672,7 +6684,7 @@ packet_batch_per_flow_execute(struct packet_batch_per_flow *batch,
> >       actions = dp_netdev_flow_get_actions(flow);
> >
> >       dp_netdev_execute_actions(pmd, &batch->array, true, &flow->flow,
> > -                              actions->actions, actions->size);
> > +                              actions->actions, actions->size, flow);
> >   }
> >
> >   static inline void
> > @@ -6967,7 +6979,7 @@ handle_packet_upcall(struct dp_netdev_pmd_thread *pmd,
> >        * we'll send the packet up twice. */
> >       dp_packet_batch_init_packet(&b, packet);
> >       dp_netdev_execute_actions(pmd, &b, true, &match.flow,
> > -                              actions->data, actions->size);
> > +                              actions->data, actions->size, NULL);
> >
> >       add_actions = put_actions->size ? put_actions : actions;
> >       if (OVS_LIKELY(error != ENOSPC)) {
> > @@ -7202,6 +7214,7 @@ dp_netdev_recirculate(struct dp_netdev_pmd_thread *pmd,
> >   struct dp_netdev_execute_aux {
> >       struct dp_netdev_pmd_thread *pmd;
> >       const struct flow *flow;
> > +    const struct dp_netdev_flow *dp_flow;    /* for partial action offload */
> >   };
> >
> >   static void
> > @@ -7346,7 +7359,7 @@ dp_execute_userspace_action(struct dp_netdev_pmd_thread *pmd,
> >       if (!error || error == ENOSPC) {
> >           dp_packet_batch_init_packet(&b, packet);
> >           dp_netdev_execute_actions(pmd, &b, should_steal, flow,
> > -                                  actions->data, actions->size);
> > +                                  actions->data, actions->size, NULL);
> >       } else if (should_steal) {
> >           dp_packet_delete(packet);
> >           COVERAGE_INC(datapath_drop_userspace_action_error);
> > @@ -7455,6 +7468,7 @@ dp_execute_cb(void *aux_, struct dp_packet_batch *packets_,
> >       int type = nl_attr_type(a);
> >       struct tx_port *p;
> >       uint32_t packet_count, packets_dropped;
> > +    struct dp_netdev_flow *dp_flow = aux->dp_flow;
> >
> >       switch ((enum ovs_action_attr)type) {
> >       case OVS_ACTION_ATTR_OUTPUT:
> > @@ -7477,9 +7491,20 @@ dp_execute_cb(void *aux_, struct dp_packet_batch *packets_,
> >           }
> >           dp_packet_batch_apply_cutlen(packets_);
> >           packet_count = dp_packet_batch_size(packets_);
> > -        if (push_tnl_action(pmd, a, packets_)) {
> > -            COVERAGE_ADD(datapath_drop_tunnel_push_error,
> > -                         packet_count);
> > +        /* Execute tnl_push action in SW, if it is not offloaded as a partial
> > +         * action in HW. Otherwise, HW pushes the tunnel header during output
> > +         * processing. There's a small window here in which the offload thread
> > +         * offloads the flow, but the partial_actions_offloaded flag is still
> > +         * not set. In this case, as the packet is already encapsulated, it
> > +         * wouldn't match the offloaded flow and the action won't be executed
> > +         * in HW.
> > +         */
> > +        if (!dp_flow || !dp_flow->partial_actions_offloaded) {
> > +            if (push_tnl_action(pmd, a, packets_)) {
> > +                COVERAGE_ADD(datapath_drop_tunnel_push_error, packet_count);
> > +            }
> > +        } else {
>
> It is possible (though rare) that encap headers will match the original
> flow. In such case, in the time before the SW knows the HW rule is
> installed, the packet will be encapsulated by SW, and then again in HW.
>
> The rule is for example:
>
> set vxlan ip-version ipv4|ipv6 vni <vni> udp-src <udp-src> udp-dst
> <udp-dst> ip-src <ip-src> ip-dst <ip-dst> eth-src e4:11:22:33:44:50
> eth-dst b6:7a:b2:f1:d5:e4
>
> flow create 0 egress priority 0 group 0 transfer pattern eth src is
> e4:11:22:33:44:50 dst is b6:7a:b2:f1:d5:e4 type is 0x0800 / ipv4 ... end
> / actions vxlan_encap / end
>
> the original packet is:
>
> Ether(src=e4:11:22:33:44:50,dst=b6:7a:b2:f1:d5:e4)/IP()...
>
> after SW encap:
>
> Ether(src=e4:11:22:33:44:50,dst=b6:7a:b2:f1:d5:e4)/IP()/UDP()/VXLAN()/Ether(src=e4:11:22:33:44:50,dst=b6:7a:b2:f1:d5:e4)/IP()...
>
> It will hit the egress flow, and be encapsulated again.
>
> Need to add a validation that the encap header doesn't match the flow's
> matches, and reject the flow if it does.

I already responded to this in my email dated Jun-29-2020. I'm adding
it here for reference:

"I tested this configuration with a Broadcom-NIC. There is no issue
and the reason being the same as mentioned in the above comments. That
is, even with the same inner/outer header (packet already encapsulated
in SW), it doesn't match the offloaded rule and the packet gets
transmitted properly (no double-encap).".

Thanks,
-Harsha


>
> > +            COVERAGE_ADD(datapath_skip_tunnel_push, packet_count);
> >           }
> >           return;
> >
> > @@ -7771,9 +7796,10 @@ static void
> >   dp_netdev_execute_actions(struct dp_netdev_pmd_thread *pmd,
> >                             struct dp_packet_batch *packets,
> >                             bool should_steal, const struct flow *flow,
> > -                          const struct nlattr *actions, size_t actions_len)
> > +                          const struct nlattr *actions, size_t actions_len,
> > +                          const struct dp_netdev_flow *dp_flow)
> >   {
> > -    struct dp_netdev_execute_aux aux = { pmd, flow };
> > +    struct dp_netdev_execute_aux aux = { pmd, flow, dp_flow };
> >
> >       odp_execute_actions(&aux, packets, should_steal, actions,
> >                           actions_len, dp_execute_cb);
Eli Britstein July 7, 2020, 10:14 a.m. UTC | #3
On 7/6/2020 4:20 PM, Sriharsha Basavapatna wrote:
> On Sun, Jul 5, 2020 at 7:33 PM Eli Britstein <elibr@mellanox.com> wrote:
>>
>> On 6/29/2020 12:50 PM, Sriharsha Basavapatna wrote:
>>> In this patch we check if action processing (apart from OUTPUT action),
>>> should be skipped for a given dp_netdev_flow. Specifically, we check if
>>> the action is TNL_PUSH and if it has been offloaded to HW, then we do not
>>> push the tunnel header in SW. The datapath only executes the OUTPUT action.
>>> The packet will be encapsulated in HW during transmit.
>>>
>>> Signed-off-by: Sriharsha Basavapatna <sriharsha.basavapatna@broadcom.com>
>>> ---
>>>    lib/dpif-netdev.c | 46 ++++++++++++++++++++++++++++++++++++----------
>>>    1 file changed, 36 insertions(+), 10 deletions(-)
>>>
>>> diff --git a/lib/dpif-netdev.c b/lib/dpif-netdev.c
>>> index 7adea8c40..b96a75d1f 100644
>>> --- a/lib/dpif-netdev.c
>>> +++ b/lib/dpif-netdev.c
>>> @@ -114,6 +114,7 @@ COVERAGE_DEFINE(datapath_drop_invalid_port);
>>>    COVERAGE_DEFINE(datapath_drop_invalid_bond);
>>>    COVERAGE_DEFINE(datapath_drop_invalid_tnl_port);
>>>    COVERAGE_DEFINE(datapath_drop_rx_invalid_packet);
>>> +COVERAGE_DEFINE(datapath_skip_tunnel_push);
>>>
>>>    /* Protects against changes to 'dp_netdevs'. */
>>>    static struct ovs_mutex dp_netdev_mutex = OVS_MUTEX_INITIALIZER;
>>> @@ -545,6 +546,16 @@ struct dp_netdev_flow {
>>>        bool dead;
>>>        uint32_t mark;               /* Unique flow mark assigned to a flow */
>>>
>>> +    /* The next two members are used to support partial offloading of
>>> +     * actions. The boolean flag tells if this flow has its actions partially
>>> +     * offloaded. The egress port# tells if the action should be offloaded
>>> +     * on the egress (output) port instead of the in-port for the flow. Note
>>> +     * that we support flows with a single egress port action.
>>> +     * (see MAX_ACTION_ATTRS for related comments).
>>> +     */
>>> +    bool partial_actions_offloaded;
>>> +    odp_port_t  egress_offload_port;
>>> +
>>>        /* Statistics. */
>>>        struct dp_netdev_flow_stats stats;
>>>
>>> @@ -817,7 +828,8 @@ static void dp_netdev_execute_actions(struct dp_netdev_pmd_thread *pmd,
>>>                                          bool should_steal,
>>>                                          const struct flow *flow,
>>>                                          const struct nlattr *actions,
>>> -                                      size_t actions_len);
>>> +                                      size_t actions_len,
>>> +                                      const struct dp_netdev_flow *dp_flow);
>>>    static void dp_netdev_input(struct dp_netdev_pmd_thread *,
>>>                                struct dp_packet_batch *, odp_port_t port_no);
>>>    static void dp_netdev_recirculate(struct dp_netdev_pmd_thread *,
>>> @@ -3954,7 +3966,7 @@ dpif_netdev_execute(struct dpif *dpif, struct dpif_execute *execute)
>>>        dp_packet_batch_init_packet(&pp, execute->packet);
>>>        pp.do_not_steal = true;
>>>        dp_netdev_execute_actions(pmd, &pp, false, execute->flow,
>>> -                              execute->actions, execute->actions_len);
>>> +                              execute->actions, execute->actions_len, NULL);
>>>        dp_netdev_pmd_flush_output_packets(pmd, true);
>>>
>>>        if (pmd->core_id == NON_PMD_CORE_ID) {
>>> @@ -6672,7 +6684,7 @@ packet_batch_per_flow_execute(struct packet_batch_per_flow *batch,
>>>        actions = dp_netdev_flow_get_actions(flow);
>>>
>>>        dp_netdev_execute_actions(pmd, &batch->array, true, &flow->flow,
>>> -                              actions->actions, actions->size);
>>> +                              actions->actions, actions->size, flow);
>>>    }
>>>
>>>    static inline void
>>> @@ -6967,7 +6979,7 @@ handle_packet_upcall(struct dp_netdev_pmd_thread *pmd,
>>>         * we'll send the packet up twice. */
>>>        dp_packet_batch_init_packet(&b, packet);
>>>        dp_netdev_execute_actions(pmd, &b, true, &match.flow,
>>> -                              actions->data, actions->size);
>>> +                              actions->data, actions->size, NULL);
>>>
>>>        add_actions = put_actions->size ? put_actions : actions;
>>>        if (OVS_LIKELY(error != ENOSPC)) {
>>> @@ -7202,6 +7214,7 @@ dp_netdev_recirculate(struct dp_netdev_pmd_thread *pmd,
>>>    struct dp_netdev_execute_aux {
>>>        struct dp_netdev_pmd_thread *pmd;
>>>        const struct flow *flow;
>>> +    const struct dp_netdev_flow *dp_flow;    /* for partial action offload */
>>>    };
>>>
>>>    static void
>>> @@ -7346,7 +7359,7 @@ dp_execute_userspace_action(struct dp_netdev_pmd_thread *pmd,
>>>        if (!error || error == ENOSPC) {
>>>            dp_packet_batch_init_packet(&b, packet);
>>>            dp_netdev_execute_actions(pmd, &b, should_steal, flow,
>>> -                                  actions->data, actions->size);
>>> +                                  actions->data, actions->size, NULL);
>>>        } else if (should_steal) {
>>>            dp_packet_delete(packet);
>>>            COVERAGE_INC(datapath_drop_userspace_action_error);
>>> @@ -7455,6 +7468,7 @@ dp_execute_cb(void *aux_, struct dp_packet_batch *packets_,
>>>        int type = nl_attr_type(a);
>>>        struct tx_port *p;
>>>        uint32_t packet_count, packets_dropped;
>>> +    struct dp_netdev_flow *dp_flow = aux->dp_flow;
>>>
>>>        switch ((enum ovs_action_attr)type) {
>>>        case OVS_ACTION_ATTR_OUTPUT:
>>> @@ -7477,9 +7491,20 @@ dp_execute_cb(void *aux_, struct dp_packet_batch *packets_,
>>>            }
>>>            dp_packet_batch_apply_cutlen(packets_);
>>>            packet_count = dp_packet_batch_size(packets_);
>>> -        if (push_tnl_action(pmd, a, packets_)) {
>>> -            COVERAGE_ADD(datapath_drop_tunnel_push_error,
>>> -                         packet_count);
>>> +        /* Execute tnl_push action in SW, if it is not offloaded as a partial
>>> +         * action in HW. Otherwise, HW pushes the tunnel header during output
>>> +         * processing. There's a small window here in which the offload thread
>>> +         * offloads the flow, but the partial_actions_offloaded flag is still
>>> +         * not set. In this case, as the packet is already encapsulated, it
>>> +         * wouldn't match the offloaded flow and the action won't be executed
>>> +         * in HW.
>>> +         */
>>> +        if (!dp_flow || !dp_flow->partial_actions_offloaded) {
>>> +            if (push_tnl_action(pmd, a, packets_)) {
>>> +                COVERAGE_ADD(datapath_drop_tunnel_push_error, packet_count);
>>> +            }
>>> +        } else {
>> It is possible (though rare) that encap headers will match the original
>> flow. In such case, in the time before the SW knows the HW rule is
>> installed, the packet will be encapsulated by SW, and then again in HW.
>>
>> The rule is for example:
>>
>> set vxlan ip-version ipv4|ipv6 vni <vni> udp-src <udp-src> udp-dst
>> <udp-dst> ip-src <ip-src> ip-dst <ip-dst> eth-src e4:11:22:33:44:50
>> eth-dst b6:7a:b2:f1:d5:e4
>>
>> flow create 0 egress priority 0 group 0 transfer pattern eth src is
>> e4:11:22:33:44:50 dst is b6:7a:b2:f1:d5:e4 type is 0x0800 / ipv4 ... end
>> / actions vxlan_encap / end
>>
>> the original packet is:
>>
>> Ether(src=e4:11:22:33:44:50,dst=b6:7a:b2:f1:d5:e4)/IP()...
>>
>> after SW encap:
>>
>> Ether(src=e4:11:22:33:44:50,dst=b6:7a:b2:f1:d5:e4)/IP()/UDP()/VXLAN()/Ether(src=e4:11:22:33:44:50,dst=b6:7a:b2:f1:d5:e4)/IP()...
>>
>> It will hit the egress flow, and be encapsulated again.
>>
>> Need to add a validation that the encap header doesn't match the flow's
>> matches, and reject the flow if it does.
> I already responded to this in my email dated Jun-29-2020. I'm adding
> it here for reference:
>
> "I tested this configuration with a Broadcom-NIC. There is no issue
> and the reason being the same as mentioned in the above comments. That
> is, even with the same inner/outer header (packet already encapsulated
> in SW), it doesn't match the offloaded rule and the packet gets
> transmitted properly (no double-encap).".
>
> Thanks,
> -Harsha
 From rte_flow API perspective, the rule has matches and actions. If the 
packet headers hit those matches, the actions are applied.
In this case, both inner and outer headers will match the rte_flow rule 
matches.
When the rule is successfully offloaded, the HW must apply the actions 
when the rule is matched.
>
>
>>> +            COVERAGE_ADD(datapath_skip_tunnel_push, packet_count);
>>>            }
>>>            return;
>>>
>>> @@ -7771,9 +7796,10 @@ static void
>>>    dp_netdev_execute_actions(struct dp_netdev_pmd_thread *pmd,
>>>                              struct dp_packet_batch *packets,
>>>                              bool should_steal, const struct flow *flow,
>>> -                          const struct nlattr *actions, size_t actions_len)
>>> +                          const struct nlattr *actions, size_t actions_len,
>>> +                          const struct dp_netdev_flow *dp_flow)
>>>    {
>>> -    struct dp_netdev_execute_aux aux = { pmd, flow };
>>> +    struct dp_netdev_execute_aux aux = { pmd, flow, dp_flow };
>>>
>>>        odp_execute_actions(&aux, packets, should_steal, actions,
>>>                            actions_len, dp_execute_cb);
diff mbox series

Patch

diff --git a/lib/dpif-netdev.c b/lib/dpif-netdev.c
index 7adea8c40..b96a75d1f 100644
--- a/lib/dpif-netdev.c
+++ b/lib/dpif-netdev.c
@@ -114,6 +114,7 @@  COVERAGE_DEFINE(datapath_drop_invalid_port);
 COVERAGE_DEFINE(datapath_drop_invalid_bond);
 COVERAGE_DEFINE(datapath_drop_invalid_tnl_port);
 COVERAGE_DEFINE(datapath_drop_rx_invalid_packet);
+COVERAGE_DEFINE(datapath_skip_tunnel_push);
 
 /* Protects against changes to 'dp_netdevs'. */
 static struct ovs_mutex dp_netdev_mutex = OVS_MUTEX_INITIALIZER;
@@ -545,6 +546,16 @@  struct dp_netdev_flow {
     bool dead;
     uint32_t mark;               /* Unique flow mark assigned to a flow */
 
+    /* The next two members are used to support partial offloading of
+     * actions. The boolean flag tells if this flow has its actions partially
+     * offloaded. The egress port# tells if the action should be offloaded
+     * on the egress (output) port instead of the in-port for the flow. Note
+     * that we support flows with a single egress port action.
+     * (see MAX_ACTION_ATTRS for related comments).
+     */
+    bool partial_actions_offloaded;
+    odp_port_t  egress_offload_port;
+
     /* Statistics. */
     struct dp_netdev_flow_stats stats;
 
@@ -817,7 +828,8 @@  static void dp_netdev_execute_actions(struct dp_netdev_pmd_thread *pmd,
                                       bool should_steal,
                                       const struct flow *flow,
                                       const struct nlattr *actions,
-                                      size_t actions_len);
+                                      size_t actions_len,
+                                      const struct dp_netdev_flow *dp_flow);
 static void dp_netdev_input(struct dp_netdev_pmd_thread *,
                             struct dp_packet_batch *, odp_port_t port_no);
 static void dp_netdev_recirculate(struct dp_netdev_pmd_thread *,
@@ -3954,7 +3966,7 @@  dpif_netdev_execute(struct dpif *dpif, struct dpif_execute *execute)
     dp_packet_batch_init_packet(&pp, execute->packet);
     pp.do_not_steal = true;
     dp_netdev_execute_actions(pmd, &pp, false, execute->flow,
-                              execute->actions, execute->actions_len);
+                              execute->actions, execute->actions_len, NULL);
     dp_netdev_pmd_flush_output_packets(pmd, true);
 
     if (pmd->core_id == NON_PMD_CORE_ID) {
@@ -6672,7 +6684,7 @@  packet_batch_per_flow_execute(struct packet_batch_per_flow *batch,
     actions = dp_netdev_flow_get_actions(flow);
 
     dp_netdev_execute_actions(pmd, &batch->array, true, &flow->flow,
-                              actions->actions, actions->size);
+                              actions->actions, actions->size, flow);
 }
 
 static inline void
@@ -6967,7 +6979,7 @@  handle_packet_upcall(struct dp_netdev_pmd_thread *pmd,
      * we'll send the packet up twice. */
     dp_packet_batch_init_packet(&b, packet);
     dp_netdev_execute_actions(pmd, &b, true, &match.flow,
-                              actions->data, actions->size);
+                              actions->data, actions->size, NULL);
 
     add_actions = put_actions->size ? put_actions : actions;
     if (OVS_LIKELY(error != ENOSPC)) {
@@ -7202,6 +7214,7 @@  dp_netdev_recirculate(struct dp_netdev_pmd_thread *pmd,
 struct dp_netdev_execute_aux {
     struct dp_netdev_pmd_thread *pmd;
     const struct flow *flow;
+    const struct dp_netdev_flow *dp_flow;    /* for partial action offload */
 };
 
 static void
@@ -7346,7 +7359,7 @@  dp_execute_userspace_action(struct dp_netdev_pmd_thread *pmd,
     if (!error || error == ENOSPC) {
         dp_packet_batch_init_packet(&b, packet);
         dp_netdev_execute_actions(pmd, &b, should_steal, flow,
-                                  actions->data, actions->size);
+                                  actions->data, actions->size, NULL);
     } else if (should_steal) {
         dp_packet_delete(packet);
         COVERAGE_INC(datapath_drop_userspace_action_error);
@@ -7455,6 +7468,7 @@  dp_execute_cb(void *aux_, struct dp_packet_batch *packets_,
     int type = nl_attr_type(a);
     struct tx_port *p;
     uint32_t packet_count, packets_dropped;
+    struct dp_netdev_flow *dp_flow = aux->dp_flow;
 
     switch ((enum ovs_action_attr)type) {
     case OVS_ACTION_ATTR_OUTPUT:
@@ -7477,9 +7491,20 @@  dp_execute_cb(void *aux_, struct dp_packet_batch *packets_,
         }
         dp_packet_batch_apply_cutlen(packets_);
         packet_count = dp_packet_batch_size(packets_);
-        if (push_tnl_action(pmd, a, packets_)) {
-            COVERAGE_ADD(datapath_drop_tunnel_push_error,
-                         packet_count);
+        /* Execute tnl_push action in SW, if it is not offloaded as a partial
+         * action in HW. Otherwise, HW pushes the tunnel header during output
+         * processing. There's a small window here in which the offload thread
+         * offloads the flow, but the partial_actions_offloaded flag is still
+         * not set. In this case, as the packet is already encapsulated, it
+         * wouldn't match the offloaded flow and the action won't be executed
+         * in HW.
+         */
+        if (!dp_flow || !dp_flow->partial_actions_offloaded) {
+            if (push_tnl_action(pmd, a, packets_)) {
+                COVERAGE_ADD(datapath_drop_tunnel_push_error, packet_count);
+            }
+        } else {
+            COVERAGE_ADD(datapath_skip_tunnel_push, packet_count);
         }
         return;
 
@@ -7771,9 +7796,10 @@  static void
 dp_netdev_execute_actions(struct dp_netdev_pmd_thread *pmd,
                           struct dp_packet_batch *packets,
                           bool should_steal, const struct flow *flow,
-                          const struct nlattr *actions, size_t actions_len)
+                          const struct nlattr *actions, size_t actions_len,
+                          const struct dp_netdev_flow *dp_flow)
 {
-    struct dp_netdev_execute_aux aux = { pmd, flow };
+    struct dp_netdev_execute_aux aux = { pmd, flow, dp_flow };
 
     odp_execute_actions(&aux, packets, should_steal, actions,
                         actions_len, dp_execute_cb);