Message ID | 20200629095020.8491-4-sriharsha.basavapatna@broadcom.com |
---|---|
State | Superseded |
Headers | show |
Series | netdev datapath: Partial action offload | expand |
On 6/29/2020 12:50 PM, Sriharsha Basavapatna wrote: > In this patch we check if action processing (apart from OUTPUT action), > should be skipped for a given dp_netdev_flow. Specifically, we check if > the action is TNL_PUSH and if it has been offloaded to HW, then we do not > push the tunnel header in SW. The datapath only executes the OUTPUT action. > The packet will be encapsulated in HW during transmit. > > Signed-off-by: Sriharsha Basavapatna <sriharsha.basavapatna@broadcom.com> > --- > lib/dpif-netdev.c | 46 ++++++++++++++++++++++++++++++++++++---------- > 1 file changed, 36 insertions(+), 10 deletions(-) > > diff --git a/lib/dpif-netdev.c b/lib/dpif-netdev.c > index 7adea8c40..b96a75d1f 100644 > --- a/lib/dpif-netdev.c > +++ b/lib/dpif-netdev.c > @@ -114,6 +114,7 @@ COVERAGE_DEFINE(datapath_drop_invalid_port); > COVERAGE_DEFINE(datapath_drop_invalid_bond); > COVERAGE_DEFINE(datapath_drop_invalid_tnl_port); > COVERAGE_DEFINE(datapath_drop_rx_invalid_packet); > +COVERAGE_DEFINE(datapath_skip_tunnel_push); > > /* Protects against changes to 'dp_netdevs'. */ > static struct ovs_mutex dp_netdev_mutex = OVS_MUTEX_INITIALIZER; > @@ -545,6 +546,16 @@ struct dp_netdev_flow { > bool dead; > uint32_t mark; /* Unique flow mark assigned to a flow */ > > + /* The next two members are used to support partial offloading of > + * actions. The boolean flag tells if this flow has its actions partially > + * offloaded. The egress port# tells if the action should be offloaded > + * on the egress (output) port instead of the in-port for the flow. Note > + * that we support flows with a single egress port action. > + * (see MAX_ACTION_ATTRS for related comments). > + */ > + bool partial_actions_offloaded; > + odp_port_t egress_offload_port; > + > /* Statistics. */ > struct dp_netdev_flow_stats stats; > > @@ -817,7 +828,8 @@ static void dp_netdev_execute_actions(struct dp_netdev_pmd_thread *pmd, > bool should_steal, > const struct flow *flow, > const struct nlattr *actions, > - size_t actions_len); > + size_t actions_len, > + const struct dp_netdev_flow *dp_flow); > static void dp_netdev_input(struct dp_netdev_pmd_thread *, > struct dp_packet_batch *, odp_port_t port_no); > static void dp_netdev_recirculate(struct dp_netdev_pmd_thread *, > @@ -3954,7 +3966,7 @@ dpif_netdev_execute(struct dpif *dpif, struct dpif_execute *execute) > dp_packet_batch_init_packet(&pp, execute->packet); > pp.do_not_steal = true; > dp_netdev_execute_actions(pmd, &pp, false, execute->flow, > - execute->actions, execute->actions_len); > + execute->actions, execute->actions_len, NULL); > dp_netdev_pmd_flush_output_packets(pmd, true); > > if (pmd->core_id == NON_PMD_CORE_ID) { > @@ -6672,7 +6684,7 @@ packet_batch_per_flow_execute(struct packet_batch_per_flow *batch, > actions = dp_netdev_flow_get_actions(flow); > > dp_netdev_execute_actions(pmd, &batch->array, true, &flow->flow, > - actions->actions, actions->size); > + actions->actions, actions->size, flow); > } > > static inline void > @@ -6967,7 +6979,7 @@ handle_packet_upcall(struct dp_netdev_pmd_thread *pmd, > * we'll send the packet up twice. */ > dp_packet_batch_init_packet(&b, packet); > dp_netdev_execute_actions(pmd, &b, true, &match.flow, > - actions->data, actions->size); > + actions->data, actions->size, NULL); > > add_actions = put_actions->size ? put_actions : actions; > if (OVS_LIKELY(error != ENOSPC)) { > @@ -7202,6 +7214,7 @@ dp_netdev_recirculate(struct dp_netdev_pmd_thread *pmd, > struct dp_netdev_execute_aux { > struct dp_netdev_pmd_thread *pmd; > const struct flow *flow; > + const struct dp_netdev_flow *dp_flow; /* for partial action offload */ > }; > > static void > @@ -7346,7 +7359,7 @@ dp_execute_userspace_action(struct dp_netdev_pmd_thread *pmd, > if (!error || error == ENOSPC) { > dp_packet_batch_init_packet(&b, packet); > dp_netdev_execute_actions(pmd, &b, should_steal, flow, > - actions->data, actions->size); > + actions->data, actions->size, NULL); > } else if (should_steal) { > dp_packet_delete(packet); > COVERAGE_INC(datapath_drop_userspace_action_error); > @@ -7455,6 +7468,7 @@ dp_execute_cb(void *aux_, struct dp_packet_batch *packets_, > int type = nl_attr_type(a); > struct tx_port *p; > uint32_t packet_count, packets_dropped; > + struct dp_netdev_flow *dp_flow = aux->dp_flow; > > switch ((enum ovs_action_attr)type) { > case OVS_ACTION_ATTR_OUTPUT: > @@ -7477,9 +7491,20 @@ dp_execute_cb(void *aux_, struct dp_packet_batch *packets_, > } > dp_packet_batch_apply_cutlen(packets_); > packet_count = dp_packet_batch_size(packets_); > - if (push_tnl_action(pmd, a, packets_)) { > - COVERAGE_ADD(datapath_drop_tunnel_push_error, > - packet_count); > + /* Execute tnl_push action in SW, if it is not offloaded as a partial > + * action in HW. Otherwise, HW pushes the tunnel header during output > + * processing. There's a small window here in which the offload thread > + * offloads the flow, but the partial_actions_offloaded flag is still > + * not set. In this case, as the packet is already encapsulated, it > + * wouldn't match the offloaded flow and the action won't be executed > + * in HW. > + */ > + if (!dp_flow || !dp_flow->partial_actions_offloaded) { > + if (push_tnl_action(pmd, a, packets_)) { > + COVERAGE_ADD(datapath_drop_tunnel_push_error, packet_count); > + } > + } else { It is possible (though rare) that encap headers will match the original flow. In such case, in the time before the SW knows the HW rule is installed, the packet will be encapsulated by SW, and then again in HW. The rule is for example: set vxlan ip-version ipv4|ipv6 vni <vni> udp-src <udp-src> udp-dst <udp-dst> ip-src <ip-src> ip-dst <ip-dst> eth-src e4:11:22:33:44:50 eth-dst b6:7a:b2:f1:d5:e4 flow create 0 egress priority 0 group 0 transfer pattern eth src is e4:11:22:33:44:50 dst is b6:7a:b2:f1:d5:e4 type is 0x0800 / ipv4 ... end / actions vxlan_encap / end the original packet is: Ether(src=e4:11:22:33:44:50,dst=b6:7a:b2:f1:d5:e4)/IP()... after SW encap: Ether(src=e4:11:22:33:44:50,dst=b6:7a:b2:f1:d5:e4)/IP()/UDP()/VXLAN()/Ether(src=e4:11:22:33:44:50,dst=b6:7a:b2:f1:d5:e4)/IP()... It will hit the egress flow, and be encapsulated again. Need to add a validation that the encap header doesn't match the flow's matches, and reject the flow if it does. > + COVERAGE_ADD(datapath_skip_tunnel_push, packet_count); > } > return; > > @@ -7771,9 +7796,10 @@ static void > dp_netdev_execute_actions(struct dp_netdev_pmd_thread *pmd, > struct dp_packet_batch *packets, > bool should_steal, const struct flow *flow, > - const struct nlattr *actions, size_t actions_len) > + const struct nlattr *actions, size_t actions_len, > + const struct dp_netdev_flow *dp_flow) > { > - struct dp_netdev_execute_aux aux = { pmd, flow }; > + struct dp_netdev_execute_aux aux = { pmd, flow, dp_flow }; > > odp_execute_actions(&aux, packets, should_steal, actions, > actions_len, dp_execute_cb);
On Sun, Jul 5, 2020 at 7:33 PM Eli Britstein <elibr@mellanox.com> wrote: > > > On 6/29/2020 12:50 PM, Sriharsha Basavapatna wrote: > > In this patch we check if action processing (apart from OUTPUT action), > > should be skipped for a given dp_netdev_flow. Specifically, we check if > > the action is TNL_PUSH and if it has been offloaded to HW, then we do not > > push the tunnel header in SW. The datapath only executes the OUTPUT action. > > The packet will be encapsulated in HW during transmit. > > > > Signed-off-by: Sriharsha Basavapatna <sriharsha.basavapatna@broadcom.com> > > --- > > lib/dpif-netdev.c | 46 ++++++++++++++++++++++++++++++++++++---------- > > 1 file changed, 36 insertions(+), 10 deletions(-) > > > > diff --git a/lib/dpif-netdev.c b/lib/dpif-netdev.c > > index 7adea8c40..b96a75d1f 100644 > > --- a/lib/dpif-netdev.c > > +++ b/lib/dpif-netdev.c > > @@ -114,6 +114,7 @@ COVERAGE_DEFINE(datapath_drop_invalid_port); > > COVERAGE_DEFINE(datapath_drop_invalid_bond); > > COVERAGE_DEFINE(datapath_drop_invalid_tnl_port); > > COVERAGE_DEFINE(datapath_drop_rx_invalid_packet); > > +COVERAGE_DEFINE(datapath_skip_tunnel_push); > > > > /* Protects against changes to 'dp_netdevs'. */ > > static struct ovs_mutex dp_netdev_mutex = OVS_MUTEX_INITIALIZER; > > @@ -545,6 +546,16 @@ struct dp_netdev_flow { > > bool dead; > > uint32_t mark; /* Unique flow mark assigned to a flow */ > > > > + /* The next two members are used to support partial offloading of > > + * actions. The boolean flag tells if this flow has its actions partially > > + * offloaded. The egress port# tells if the action should be offloaded > > + * on the egress (output) port instead of the in-port for the flow. Note > > + * that we support flows with a single egress port action. > > + * (see MAX_ACTION_ATTRS for related comments). > > + */ > > + bool partial_actions_offloaded; > > + odp_port_t egress_offload_port; > > + > > /* Statistics. */ > > struct dp_netdev_flow_stats stats; > > > > @@ -817,7 +828,8 @@ static void dp_netdev_execute_actions(struct dp_netdev_pmd_thread *pmd, > > bool should_steal, > > const struct flow *flow, > > const struct nlattr *actions, > > - size_t actions_len); > > + size_t actions_len, > > + const struct dp_netdev_flow *dp_flow); > > static void dp_netdev_input(struct dp_netdev_pmd_thread *, > > struct dp_packet_batch *, odp_port_t port_no); > > static void dp_netdev_recirculate(struct dp_netdev_pmd_thread *, > > @@ -3954,7 +3966,7 @@ dpif_netdev_execute(struct dpif *dpif, struct dpif_execute *execute) > > dp_packet_batch_init_packet(&pp, execute->packet); > > pp.do_not_steal = true; > > dp_netdev_execute_actions(pmd, &pp, false, execute->flow, > > - execute->actions, execute->actions_len); > > + execute->actions, execute->actions_len, NULL); > > dp_netdev_pmd_flush_output_packets(pmd, true); > > > > if (pmd->core_id == NON_PMD_CORE_ID) { > > @@ -6672,7 +6684,7 @@ packet_batch_per_flow_execute(struct packet_batch_per_flow *batch, > > actions = dp_netdev_flow_get_actions(flow); > > > > dp_netdev_execute_actions(pmd, &batch->array, true, &flow->flow, > > - actions->actions, actions->size); > > + actions->actions, actions->size, flow); > > } > > > > static inline void > > @@ -6967,7 +6979,7 @@ handle_packet_upcall(struct dp_netdev_pmd_thread *pmd, > > * we'll send the packet up twice. */ > > dp_packet_batch_init_packet(&b, packet); > > dp_netdev_execute_actions(pmd, &b, true, &match.flow, > > - actions->data, actions->size); > > + actions->data, actions->size, NULL); > > > > add_actions = put_actions->size ? put_actions : actions; > > if (OVS_LIKELY(error != ENOSPC)) { > > @@ -7202,6 +7214,7 @@ dp_netdev_recirculate(struct dp_netdev_pmd_thread *pmd, > > struct dp_netdev_execute_aux { > > struct dp_netdev_pmd_thread *pmd; > > const struct flow *flow; > > + const struct dp_netdev_flow *dp_flow; /* for partial action offload */ > > }; > > > > static void > > @@ -7346,7 +7359,7 @@ dp_execute_userspace_action(struct dp_netdev_pmd_thread *pmd, > > if (!error || error == ENOSPC) { > > dp_packet_batch_init_packet(&b, packet); > > dp_netdev_execute_actions(pmd, &b, should_steal, flow, > > - actions->data, actions->size); > > + actions->data, actions->size, NULL); > > } else if (should_steal) { > > dp_packet_delete(packet); > > COVERAGE_INC(datapath_drop_userspace_action_error); > > @@ -7455,6 +7468,7 @@ dp_execute_cb(void *aux_, struct dp_packet_batch *packets_, > > int type = nl_attr_type(a); > > struct tx_port *p; > > uint32_t packet_count, packets_dropped; > > + struct dp_netdev_flow *dp_flow = aux->dp_flow; > > > > switch ((enum ovs_action_attr)type) { > > case OVS_ACTION_ATTR_OUTPUT: > > @@ -7477,9 +7491,20 @@ dp_execute_cb(void *aux_, struct dp_packet_batch *packets_, > > } > > dp_packet_batch_apply_cutlen(packets_); > > packet_count = dp_packet_batch_size(packets_); > > - if (push_tnl_action(pmd, a, packets_)) { > > - COVERAGE_ADD(datapath_drop_tunnel_push_error, > > - packet_count); > > + /* Execute tnl_push action in SW, if it is not offloaded as a partial > > + * action in HW. Otherwise, HW pushes the tunnel header during output > > + * processing. There's a small window here in which the offload thread > > + * offloads the flow, but the partial_actions_offloaded flag is still > > + * not set. In this case, as the packet is already encapsulated, it > > + * wouldn't match the offloaded flow and the action won't be executed > > + * in HW. > > + */ > > + if (!dp_flow || !dp_flow->partial_actions_offloaded) { > > + if (push_tnl_action(pmd, a, packets_)) { > > + COVERAGE_ADD(datapath_drop_tunnel_push_error, packet_count); > > + } > > + } else { > > It is possible (though rare) that encap headers will match the original > flow. In such case, in the time before the SW knows the HW rule is > installed, the packet will be encapsulated by SW, and then again in HW. > > The rule is for example: > > set vxlan ip-version ipv4|ipv6 vni <vni> udp-src <udp-src> udp-dst > <udp-dst> ip-src <ip-src> ip-dst <ip-dst> eth-src e4:11:22:33:44:50 > eth-dst b6:7a:b2:f1:d5:e4 > > flow create 0 egress priority 0 group 0 transfer pattern eth src is > e4:11:22:33:44:50 dst is b6:7a:b2:f1:d5:e4 type is 0x0800 / ipv4 ... end > / actions vxlan_encap / end > > the original packet is: > > Ether(src=e4:11:22:33:44:50,dst=b6:7a:b2:f1:d5:e4)/IP()... > > after SW encap: > > Ether(src=e4:11:22:33:44:50,dst=b6:7a:b2:f1:d5:e4)/IP()/UDP()/VXLAN()/Ether(src=e4:11:22:33:44:50,dst=b6:7a:b2:f1:d5:e4)/IP()... > > It will hit the egress flow, and be encapsulated again. > > Need to add a validation that the encap header doesn't match the flow's > matches, and reject the flow if it does. I already responded to this in my email dated Jun-29-2020. I'm adding it here for reference: "I tested this configuration with a Broadcom-NIC. There is no issue and the reason being the same as mentioned in the above comments. That is, even with the same inner/outer header (packet already encapsulated in SW), it doesn't match the offloaded rule and the packet gets transmitted properly (no double-encap).". Thanks, -Harsha > > > + COVERAGE_ADD(datapath_skip_tunnel_push, packet_count); > > } > > return; > > > > @@ -7771,9 +7796,10 @@ static void > > dp_netdev_execute_actions(struct dp_netdev_pmd_thread *pmd, > > struct dp_packet_batch *packets, > > bool should_steal, const struct flow *flow, > > - const struct nlattr *actions, size_t actions_len) > > + const struct nlattr *actions, size_t actions_len, > > + const struct dp_netdev_flow *dp_flow) > > { > > - struct dp_netdev_execute_aux aux = { pmd, flow }; > > + struct dp_netdev_execute_aux aux = { pmd, flow, dp_flow }; > > > > odp_execute_actions(&aux, packets, should_steal, actions, > > actions_len, dp_execute_cb);
On 7/6/2020 4:20 PM, Sriharsha Basavapatna wrote: > On Sun, Jul 5, 2020 at 7:33 PM Eli Britstein <elibr@mellanox.com> wrote: >> >> On 6/29/2020 12:50 PM, Sriharsha Basavapatna wrote: >>> In this patch we check if action processing (apart from OUTPUT action), >>> should be skipped for a given dp_netdev_flow. Specifically, we check if >>> the action is TNL_PUSH and if it has been offloaded to HW, then we do not >>> push the tunnel header in SW. The datapath only executes the OUTPUT action. >>> The packet will be encapsulated in HW during transmit. >>> >>> Signed-off-by: Sriharsha Basavapatna <sriharsha.basavapatna@broadcom.com> >>> --- >>> lib/dpif-netdev.c | 46 ++++++++++++++++++++++++++++++++++++---------- >>> 1 file changed, 36 insertions(+), 10 deletions(-) >>> >>> diff --git a/lib/dpif-netdev.c b/lib/dpif-netdev.c >>> index 7adea8c40..b96a75d1f 100644 >>> --- a/lib/dpif-netdev.c >>> +++ b/lib/dpif-netdev.c >>> @@ -114,6 +114,7 @@ COVERAGE_DEFINE(datapath_drop_invalid_port); >>> COVERAGE_DEFINE(datapath_drop_invalid_bond); >>> COVERAGE_DEFINE(datapath_drop_invalid_tnl_port); >>> COVERAGE_DEFINE(datapath_drop_rx_invalid_packet); >>> +COVERAGE_DEFINE(datapath_skip_tunnel_push); >>> >>> /* Protects against changes to 'dp_netdevs'. */ >>> static struct ovs_mutex dp_netdev_mutex = OVS_MUTEX_INITIALIZER; >>> @@ -545,6 +546,16 @@ struct dp_netdev_flow { >>> bool dead; >>> uint32_t mark; /* Unique flow mark assigned to a flow */ >>> >>> + /* The next two members are used to support partial offloading of >>> + * actions. The boolean flag tells if this flow has its actions partially >>> + * offloaded. The egress port# tells if the action should be offloaded >>> + * on the egress (output) port instead of the in-port for the flow. Note >>> + * that we support flows with a single egress port action. >>> + * (see MAX_ACTION_ATTRS for related comments). >>> + */ >>> + bool partial_actions_offloaded; >>> + odp_port_t egress_offload_port; >>> + >>> /* Statistics. */ >>> struct dp_netdev_flow_stats stats; >>> >>> @@ -817,7 +828,8 @@ static void dp_netdev_execute_actions(struct dp_netdev_pmd_thread *pmd, >>> bool should_steal, >>> const struct flow *flow, >>> const struct nlattr *actions, >>> - size_t actions_len); >>> + size_t actions_len, >>> + const struct dp_netdev_flow *dp_flow); >>> static void dp_netdev_input(struct dp_netdev_pmd_thread *, >>> struct dp_packet_batch *, odp_port_t port_no); >>> static void dp_netdev_recirculate(struct dp_netdev_pmd_thread *, >>> @@ -3954,7 +3966,7 @@ dpif_netdev_execute(struct dpif *dpif, struct dpif_execute *execute) >>> dp_packet_batch_init_packet(&pp, execute->packet); >>> pp.do_not_steal = true; >>> dp_netdev_execute_actions(pmd, &pp, false, execute->flow, >>> - execute->actions, execute->actions_len); >>> + execute->actions, execute->actions_len, NULL); >>> dp_netdev_pmd_flush_output_packets(pmd, true); >>> >>> if (pmd->core_id == NON_PMD_CORE_ID) { >>> @@ -6672,7 +6684,7 @@ packet_batch_per_flow_execute(struct packet_batch_per_flow *batch, >>> actions = dp_netdev_flow_get_actions(flow); >>> >>> dp_netdev_execute_actions(pmd, &batch->array, true, &flow->flow, >>> - actions->actions, actions->size); >>> + actions->actions, actions->size, flow); >>> } >>> >>> static inline void >>> @@ -6967,7 +6979,7 @@ handle_packet_upcall(struct dp_netdev_pmd_thread *pmd, >>> * we'll send the packet up twice. */ >>> dp_packet_batch_init_packet(&b, packet); >>> dp_netdev_execute_actions(pmd, &b, true, &match.flow, >>> - actions->data, actions->size); >>> + actions->data, actions->size, NULL); >>> >>> add_actions = put_actions->size ? put_actions : actions; >>> if (OVS_LIKELY(error != ENOSPC)) { >>> @@ -7202,6 +7214,7 @@ dp_netdev_recirculate(struct dp_netdev_pmd_thread *pmd, >>> struct dp_netdev_execute_aux { >>> struct dp_netdev_pmd_thread *pmd; >>> const struct flow *flow; >>> + const struct dp_netdev_flow *dp_flow; /* for partial action offload */ >>> }; >>> >>> static void >>> @@ -7346,7 +7359,7 @@ dp_execute_userspace_action(struct dp_netdev_pmd_thread *pmd, >>> if (!error || error == ENOSPC) { >>> dp_packet_batch_init_packet(&b, packet); >>> dp_netdev_execute_actions(pmd, &b, should_steal, flow, >>> - actions->data, actions->size); >>> + actions->data, actions->size, NULL); >>> } else if (should_steal) { >>> dp_packet_delete(packet); >>> COVERAGE_INC(datapath_drop_userspace_action_error); >>> @@ -7455,6 +7468,7 @@ dp_execute_cb(void *aux_, struct dp_packet_batch *packets_, >>> int type = nl_attr_type(a); >>> struct tx_port *p; >>> uint32_t packet_count, packets_dropped; >>> + struct dp_netdev_flow *dp_flow = aux->dp_flow; >>> >>> switch ((enum ovs_action_attr)type) { >>> case OVS_ACTION_ATTR_OUTPUT: >>> @@ -7477,9 +7491,20 @@ dp_execute_cb(void *aux_, struct dp_packet_batch *packets_, >>> } >>> dp_packet_batch_apply_cutlen(packets_); >>> packet_count = dp_packet_batch_size(packets_); >>> - if (push_tnl_action(pmd, a, packets_)) { >>> - COVERAGE_ADD(datapath_drop_tunnel_push_error, >>> - packet_count); >>> + /* Execute tnl_push action in SW, if it is not offloaded as a partial >>> + * action in HW. Otherwise, HW pushes the tunnel header during output >>> + * processing. There's a small window here in which the offload thread >>> + * offloads the flow, but the partial_actions_offloaded flag is still >>> + * not set. In this case, as the packet is already encapsulated, it >>> + * wouldn't match the offloaded flow and the action won't be executed >>> + * in HW. >>> + */ >>> + if (!dp_flow || !dp_flow->partial_actions_offloaded) { >>> + if (push_tnl_action(pmd, a, packets_)) { >>> + COVERAGE_ADD(datapath_drop_tunnel_push_error, packet_count); >>> + } >>> + } else { >> It is possible (though rare) that encap headers will match the original >> flow. In such case, in the time before the SW knows the HW rule is >> installed, the packet will be encapsulated by SW, and then again in HW. >> >> The rule is for example: >> >> set vxlan ip-version ipv4|ipv6 vni <vni> udp-src <udp-src> udp-dst >> <udp-dst> ip-src <ip-src> ip-dst <ip-dst> eth-src e4:11:22:33:44:50 >> eth-dst b6:7a:b2:f1:d5:e4 >> >> flow create 0 egress priority 0 group 0 transfer pattern eth src is >> e4:11:22:33:44:50 dst is b6:7a:b2:f1:d5:e4 type is 0x0800 / ipv4 ... end >> / actions vxlan_encap / end >> >> the original packet is: >> >> Ether(src=e4:11:22:33:44:50,dst=b6:7a:b2:f1:d5:e4)/IP()... >> >> after SW encap: >> >> Ether(src=e4:11:22:33:44:50,dst=b6:7a:b2:f1:d5:e4)/IP()/UDP()/VXLAN()/Ether(src=e4:11:22:33:44:50,dst=b6:7a:b2:f1:d5:e4)/IP()... >> >> It will hit the egress flow, and be encapsulated again. >> >> Need to add a validation that the encap header doesn't match the flow's >> matches, and reject the flow if it does. > I already responded to this in my email dated Jun-29-2020. I'm adding > it here for reference: > > "I tested this configuration with a Broadcom-NIC. There is no issue > and the reason being the same as mentioned in the above comments. That > is, even with the same inner/outer header (packet already encapsulated > in SW), it doesn't match the offloaded rule and the packet gets > transmitted properly (no double-encap).". > > Thanks, > -Harsha From rte_flow API perspective, the rule has matches and actions. If the packet headers hit those matches, the actions are applied. In this case, both inner and outer headers will match the rte_flow rule matches. When the rule is successfully offloaded, the HW must apply the actions when the rule is matched. > > >>> + COVERAGE_ADD(datapath_skip_tunnel_push, packet_count); >>> } >>> return; >>> >>> @@ -7771,9 +7796,10 @@ static void >>> dp_netdev_execute_actions(struct dp_netdev_pmd_thread *pmd, >>> struct dp_packet_batch *packets, >>> bool should_steal, const struct flow *flow, >>> - const struct nlattr *actions, size_t actions_len) >>> + const struct nlattr *actions, size_t actions_len, >>> + const struct dp_netdev_flow *dp_flow) >>> { >>> - struct dp_netdev_execute_aux aux = { pmd, flow }; >>> + struct dp_netdev_execute_aux aux = { pmd, flow, dp_flow }; >>> >>> odp_execute_actions(&aux, packets, should_steal, actions, >>> actions_len, dp_execute_cb);
diff --git a/lib/dpif-netdev.c b/lib/dpif-netdev.c index 7adea8c40..b96a75d1f 100644 --- a/lib/dpif-netdev.c +++ b/lib/dpif-netdev.c @@ -114,6 +114,7 @@ COVERAGE_DEFINE(datapath_drop_invalid_port); COVERAGE_DEFINE(datapath_drop_invalid_bond); COVERAGE_DEFINE(datapath_drop_invalid_tnl_port); COVERAGE_DEFINE(datapath_drop_rx_invalid_packet); +COVERAGE_DEFINE(datapath_skip_tunnel_push); /* Protects against changes to 'dp_netdevs'. */ static struct ovs_mutex dp_netdev_mutex = OVS_MUTEX_INITIALIZER; @@ -545,6 +546,16 @@ struct dp_netdev_flow { bool dead; uint32_t mark; /* Unique flow mark assigned to a flow */ + /* The next two members are used to support partial offloading of + * actions. The boolean flag tells if this flow has its actions partially + * offloaded. The egress port# tells if the action should be offloaded + * on the egress (output) port instead of the in-port for the flow. Note + * that we support flows with a single egress port action. + * (see MAX_ACTION_ATTRS for related comments). + */ + bool partial_actions_offloaded; + odp_port_t egress_offload_port; + /* Statistics. */ struct dp_netdev_flow_stats stats; @@ -817,7 +828,8 @@ static void dp_netdev_execute_actions(struct dp_netdev_pmd_thread *pmd, bool should_steal, const struct flow *flow, const struct nlattr *actions, - size_t actions_len); + size_t actions_len, + const struct dp_netdev_flow *dp_flow); static void dp_netdev_input(struct dp_netdev_pmd_thread *, struct dp_packet_batch *, odp_port_t port_no); static void dp_netdev_recirculate(struct dp_netdev_pmd_thread *, @@ -3954,7 +3966,7 @@ dpif_netdev_execute(struct dpif *dpif, struct dpif_execute *execute) dp_packet_batch_init_packet(&pp, execute->packet); pp.do_not_steal = true; dp_netdev_execute_actions(pmd, &pp, false, execute->flow, - execute->actions, execute->actions_len); + execute->actions, execute->actions_len, NULL); dp_netdev_pmd_flush_output_packets(pmd, true); if (pmd->core_id == NON_PMD_CORE_ID) { @@ -6672,7 +6684,7 @@ packet_batch_per_flow_execute(struct packet_batch_per_flow *batch, actions = dp_netdev_flow_get_actions(flow); dp_netdev_execute_actions(pmd, &batch->array, true, &flow->flow, - actions->actions, actions->size); + actions->actions, actions->size, flow); } static inline void @@ -6967,7 +6979,7 @@ handle_packet_upcall(struct dp_netdev_pmd_thread *pmd, * we'll send the packet up twice. */ dp_packet_batch_init_packet(&b, packet); dp_netdev_execute_actions(pmd, &b, true, &match.flow, - actions->data, actions->size); + actions->data, actions->size, NULL); add_actions = put_actions->size ? put_actions : actions; if (OVS_LIKELY(error != ENOSPC)) { @@ -7202,6 +7214,7 @@ dp_netdev_recirculate(struct dp_netdev_pmd_thread *pmd, struct dp_netdev_execute_aux { struct dp_netdev_pmd_thread *pmd; const struct flow *flow; + const struct dp_netdev_flow *dp_flow; /* for partial action offload */ }; static void @@ -7346,7 +7359,7 @@ dp_execute_userspace_action(struct dp_netdev_pmd_thread *pmd, if (!error || error == ENOSPC) { dp_packet_batch_init_packet(&b, packet); dp_netdev_execute_actions(pmd, &b, should_steal, flow, - actions->data, actions->size); + actions->data, actions->size, NULL); } else if (should_steal) { dp_packet_delete(packet); COVERAGE_INC(datapath_drop_userspace_action_error); @@ -7455,6 +7468,7 @@ dp_execute_cb(void *aux_, struct dp_packet_batch *packets_, int type = nl_attr_type(a); struct tx_port *p; uint32_t packet_count, packets_dropped; + struct dp_netdev_flow *dp_flow = aux->dp_flow; switch ((enum ovs_action_attr)type) { case OVS_ACTION_ATTR_OUTPUT: @@ -7477,9 +7491,20 @@ dp_execute_cb(void *aux_, struct dp_packet_batch *packets_, } dp_packet_batch_apply_cutlen(packets_); packet_count = dp_packet_batch_size(packets_); - if (push_tnl_action(pmd, a, packets_)) { - COVERAGE_ADD(datapath_drop_tunnel_push_error, - packet_count); + /* Execute tnl_push action in SW, if it is not offloaded as a partial + * action in HW. Otherwise, HW pushes the tunnel header during output + * processing. There's a small window here in which the offload thread + * offloads the flow, but the partial_actions_offloaded flag is still + * not set. In this case, as the packet is already encapsulated, it + * wouldn't match the offloaded flow and the action won't be executed + * in HW. + */ + if (!dp_flow || !dp_flow->partial_actions_offloaded) { + if (push_tnl_action(pmd, a, packets_)) { + COVERAGE_ADD(datapath_drop_tunnel_push_error, packet_count); + } + } else { + COVERAGE_ADD(datapath_skip_tunnel_push, packet_count); } return; @@ -7771,9 +7796,10 @@ static void dp_netdev_execute_actions(struct dp_netdev_pmd_thread *pmd, struct dp_packet_batch *packets, bool should_steal, const struct flow *flow, - const struct nlattr *actions, size_t actions_len) + const struct nlattr *actions, size_t actions_len, + const struct dp_netdev_flow *dp_flow) { - struct dp_netdev_execute_aux aux = { pmd, flow }; + struct dp_netdev_execute_aux aux = { pmd, flow, dp_flow }; odp_execute_actions(&aux, packets, should_steal, actions, actions_len, dp_execute_cb);
In this patch we check if action processing (apart from OUTPUT action), should be skipped for a given dp_netdev_flow. Specifically, we check if the action is TNL_PUSH and if it has been offloaded to HW, then we do not push the tunnel header in SW. The datapath only executes the OUTPUT action. The packet will be encapsulated in HW during transmit. Signed-off-by: Sriharsha Basavapatna <sriharsha.basavapatna@broadcom.com> --- lib/dpif-netdev.c | 46 ++++++++++++++++++++++++++++++++++++---------- 1 file changed, 36 insertions(+), 10 deletions(-)