diff mbox series

[ovs-dev,v2,2/3] actions: Add new OVN logical fields 'ip.src' and ip.dst'.

Message ID 20201014091525.3391981-1-numans@ovn.org
State Superseded
Headers show
Series Optimize logical flow generation for reject ACLs | expand

Commit Message

Numan Siddique Oct. 14, 2020, 9:15 a.m. UTC
From: Numan Siddique <numans@ovn.org>

Thee new fields are version independent and these can be used in any
OVN action. Right now the usage of these fields are restricted to
exchanging the IP source and destination fields.

Eg. reject {... ip.src <-> ip.dst ... };

"ip.src <-> ip.dst" translates to controller action with "pause" flag set.
When pinctrl thread receives the packet in, it checks the IP version of the
packet and either does - "ip4.src <-> ip4.dst" or "ip6.src <-> ip6.dst" and
resumes the packet to continue with the pipeline.

Upcoming patch will make use of these fields.

Signed-off-by: Numan Siddique <numans@ovn.org>
---
 controller/pinctrl.c         |  49 +++++++++++++++
 include/ovn/actions.h        |   4 ++
 include/ovn/logical-fields.h |  18 ++++++
 lib/actions.c                | 118 +++++++++++++++++++++++++++++++++++
 lib/logical-fields.c         |  10 +++
 ovn-sb.xml                   |  37 +++++++++++
 tests/ovn.at                 |  27 ++++++++
 utilities/ovn-trace.c        |  28 +++++++++
 8 files changed, 291 insertions(+)

Comments

Dumitru Ceara Oct. 14, 2020, 10:19 a.m. UTC | #1
On 10/14/20 11:15 AM, numans@ovn.org wrote:
> From: Numan Siddique <numans@ovn.org>
> 
> Thee new fields are version independent and these can be used in any

Hi Numan,

Nit: s/Thee/These

> OVN action. Right now the usage of these fields are restricted to
> exchanging the IP source and destination fields.
> 
> Eg. reject {... ip.src <-> ip.dst ... };
> 
> "ip.src <-> ip.dst" translates to controller action with "pause" flag set.
> When pinctrl thread receives the packet in, it checks the IP version of the
> packet and either does - "ip4.src <-> ip4.dst" or "ip6.src <-> ip6.dst" and
> resumes the packet to continue with the pipeline.
> 
> Upcoming patch will make use of these fields.
> 
> Signed-off-by: Numan Siddique <numans@ovn.org>
> ---
>  controller/pinctrl.c         |  49 +++++++++++++++
>  include/ovn/actions.h        |   4 ++
>  include/ovn/logical-fields.h |  18 ++++++
>  lib/actions.c                | 118 +++++++++++++++++++++++++++++++++++
>  lib/logical-fields.c         |  10 +++
>  ovn-sb.xml                   |  37 +++++++++++
>  tests/ovn.at                 |  27 ++++++++
>  utilities/ovn-trace.c        |  28 +++++++++
>  8 files changed, 291 insertions(+)
> 
> diff --git a/controller/pinctrl.c b/controller/pinctrl.c
> index 631d058458..bc16a82404 100644
> --- a/controller/pinctrl.c
> +++ b/controller/pinctrl.c
> @@ -233,6 +233,11 @@ static void pinctrl_handle_put_icmp_frag_mtu(struct rconn *swconn,
>                                               struct ofputil_packet_in *pin,
>                                               struct ofpbuf *userdata,
>                                               struct ofpbuf *continuation);
> +static void pinctrl_handle_swap_src_dst_ip(struct rconn *swconn,
> +                                           const struct flow *in_flow,
> +                                           struct dp_packet *pkt_in,
> +                                           struct ofputil_packet_in *pin,
> +                                           struct ofpbuf *continuation);
>  static void
>  pinctrl_handle_event(struct ofpbuf *userdata)
>      OVS_REQUIRES(pinctrl_mutex);
> @@ -2835,6 +2840,11 @@ process_packet_in(struct rconn *swconn, const struct ofp_header *msg)
>          ovs_mutex_unlock(&pinctrl_mutex);
>          break;
>  
> +    case ACTION_OPCODE_SWAP_SRC_DST_IP:
> +        pinctrl_handle_swap_src_dst_ip(swconn, &headers, &packet, &pin,
> +                                       &continuation);
> +        break;
> +
>      default:
>          VLOG_WARN_RL(&rl, "unrecognized packet-in opcode %"PRIu32,
>                       ntohl(ah->opcode));
> @@ -6508,3 +6518,42 @@ pinctrl_handle_svc_check(struct rconn *swconn, const struct flow *ip_flow,
>          svc_mon->next_send_time = time_msec() + svc_mon->interval;
>      }
>  }
> +
> +static void
> +pinctrl_handle_swap_src_dst_ip(struct rconn *swconn,
> +                               const struct flow *in_flow,
> +                               struct dp_packet *pkt_in,
> +                               struct ofputil_packet_in *pin,
> +                               struct ofpbuf *continuation)
> +{
> +    enum ofp_version version = rconn_get_version(swconn);
> +    enum ofputil_protocol proto = ofputil_protocol_from_ofp_version(version);
> +    struct dp_packet *pkt_out;
> +
> +    pkt_out = dp_packet_clone(pkt_in);
> +    pkt_out->l2_5_ofs = pkt_in->l2_5_ofs;
> +    pkt_out->l2_pad_size = pkt_in->l2_pad_size;
> +    pkt_out->l3_ofs = pkt_in->l3_ofs;
> +    pkt_out->l4_ofs = pkt_in->l4_ofs;
> +
> +    if (get_dl_type(in_flow) == htons(ETH_TYPE_IP)) {
> +        /* IPv4 packet. Swap nw_src with nw_dst. */
> +        packet_set_ipv4(pkt_out, in_flow->nw_dst, in_flow->nw_src,
> +                        in_flow->nw_tos, in_flow->nw_ttl);
> +    } else {
> +        /* IPv6 packet. Swap ip6_src with ip6_dst.
> +         * We could also use packet_set_ipv6() here, but that would require
> +         * to extract the 'tc' and 'label' from in_nh->ip6_flow which seems
> +         * unnecessary. */
> +        struct ovs_16aligned_ip6_hdr *out_nh = dp_packet_l3(pkt_out);
> +        union ovs_16aligned_in6_addr tmp = out_nh->ip6_src;
> +        out_nh->ip6_src = out_nh->ip6_dst;
> +        out_nh->ip6_dst = tmp;
> +    }
> +
> +    pin->packet = dp_packet_data(pkt_out);
> +    pin->packet_len = dp_packet_size(pkt_out);
> +
> +    queue_msg(swconn, ofputil_encode_resume(pin, continuation, proto));
> +    dp_packet_delete(pkt_out);
> +}
> diff --git a/include/ovn/actions.h b/include/ovn/actions.h
> index b4e5acabb9..bf1fe848b7 100644
> --- a/include/ovn/actions.h
> +++ b/include/ovn/actions.h
> @@ -97,6 +97,7 @@ struct ovn_extend_table;
>      OVNACT(DHCP6_REPLY,       ovnact_null)            \
>      OVNACT(ICMP6_ERROR,       ovnact_nest)            \
>      OVNACT(REJECT,            ovnact_nest)            \
> +    OVNACT(OVNFIELD_EXCHANGE, ovnact_move)            \
>  
>  /* enum ovnact_type, with a member OVNACT_<ENUM> for each action. */
>  enum OVS_PACKED_ENUM ovnact_type {
> @@ -612,6 +613,9 @@ enum action_opcode {
>       * The actions, in OpenFlow 1.3 format, follow the action_header.
>       */
>      ACTION_OPCODE_REJECT,
> +
> +    /* ip.src <-> ip.dst */
> +    ACTION_OPCODE_SWAP_SRC_DST_IP,
>  };
>  
>  /* Header. */
> diff --git a/include/ovn/logical-fields.h b/include/ovn/logical-fields.h
> index ac6f2f909b..bb6daa50ca 100644
> --- a/include/ovn/logical-fields.h
> +++ b/include/ovn/logical-fields.h
> @@ -116,6 +116,24 @@ enum ovn_field_id {
>       */
>      OVN_ICMP6_FRAG_MTU,
>  
> +    /*
> +     * Name: "ip.src"
> +     * Type: be128
> +     * Description: Sets the field MFF_IPV4_SRC if eth.type == 0x800 (IPv4)
> +     *              or sets the field MFF_IPV6_SRC if
> +     *              eth.type == 0x86dd (IPV6).
> +     */
> +    OVN_IP_SRC,
> +
> +    /*
> +     * Name: "ip.dst"
> +     * Type: be128
> +     * Description: Sets the field MFF_IPV4_DST if eth.type == 0x800 (IPv4)
> +     *              or sets the field MFF_IPV6_DST if
> +     *              eth.type == 0x86dd (IPV6).
> +     */
> +    OVN_IP_DST,
> +
>      OVN_FIELD_N_IDS
>  };
>  
> diff --git a/lib/actions.c b/lib/actions.c
> index 1e1bdeff24..e9c77d2a0a 100644
> --- a/lib/actions.c
> +++ b/lib/actions.c
> @@ -371,12 +371,35 @@ ovnact_next_free(struct ovnact_next *a OVS_UNUSED)
>  {
>  }
>  
> +static bool
> +check_ovnfield_load(struct action_context *ctx, const struct expr_field *field)
> +{
> +    switch (field->symbol->ovn_field->id) {
> +    case OVN_ICMP4_FRAG_MTU:
> +    case OVN_ICMP6_FRAG_MTU:
> +        return true;
> +
> +    case OVN_IP_SRC:
> +    case OVN_IP_DST:
> +        lexer_error(ctx->lexer, "Can't load a value to ovn field (%s).",
> +                    field->symbol->name);
> +        return false;
> +
> +    case OVN_FIELD_N_IDS:
> +    default:
> +        OVS_NOT_REACHED();
> +    }
> +}
> +
>  static void
>  parse_LOAD(struct action_context *ctx, const struct expr_field *lhs)
>  {
>      size_t ofs = ctx->ovnacts->size;
>      struct ovnact_load *load;
>      if (lhs->symbol->ovn_field) {
> +        if (!check_ovnfield_load(ctx, lhs)) {
> +            return;
> +        }
>          load = ovnact_put_OVNFIELD_LOAD(ctx->ovnacts);
>      } else {
>          load = ovnact_put_LOAD(ctx->ovnacts);
> @@ -474,6 +497,46 @@ format_EXCHANGE(const struct ovnact_move *move, struct ds *s)
>      format_assignment(move, "<->", s);
>  }
>  
> +static void
> +parse_ovnfield_exchange(struct action_context *ctx,
> +                        const struct expr_field *lhs,
> +                        const struct expr_field *rhs)
> +{
> +    if (!lhs->symbol->ovn_field || !rhs->symbol->ovn_field) {
> +        lexer_error(ctx->lexer,
> +                    "Can't exchange ovn field with non ovn field (%s <-> %s).",
> +                    lhs->symbol->name, rhs->symbol->name);
> +        return;
> +    }
> +
> +    if (lhs->symbol->ovn_field->id != OVN_IP_SRC &&
> +            lhs->symbol->ovn_field->id != OVN_IP_DST) {
> +        lexer_error(ctx->lexer,
> +                    "Can't exchange ovn field (%s) with ovn field (%s).",
> +                    lhs->symbol->name, rhs->symbol->name);
> +        return;
> +    }
> +
> +    if (rhs->symbol->ovn_field->id != OVN_IP_SRC &&
> +            rhs->symbol->ovn_field->id != OVN_IP_DST) {
> +        lexer_error(ctx->lexer,
> +                    "Can't exchange ovn field (%s) with ovn field (%s).",
> +                    lhs->symbol->name, rhs->symbol->name);
> +        return;
> +    }
> +
> +    if (lhs->symbol->ovn_field->id == rhs->symbol->ovn_field->id) {
> +        lexer_error(ctx->lexer,
> +                    "Can't exchange ovn field (%s) with ovn field (%s).",
> +                    lhs->symbol->name, rhs->symbol->name);
> +        return;
> +    }
> +
> +    struct ovnact_move *move = ovnact_put_OVNFIELD_EXCHANGE(ctx->ovnacts);
> +    move->lhs = *lhs;
> +    move->rhs = *rhs;
> +}
> +
>  static void
>  parse_assignment_action(struct action_context *ctx, bool exchange,
>                          const struct expr_field *lhs)
> @@ -483,6 +546,11 @@ parse_assignment_action(struct action_context *ctx, bool exchange,
>          return;
>      }
>  
> +    if (exchange && (lhs->symbol->ovn_field || rhs.symbol->ovn_field)) {
> +        parse_ovnfield_exchange(ctx, lhs, &rhs);
> +        return;
> +    }
> +
>      const struct expr_symbol *ls = lhs->symbol;
>      const struct expr_symbol *rs = rhs.symbol;
>      if ((ls->width != 0) != (rs->width != 0)) {
> @@ -3128,6 +3196,8 @@ format_OVNFIELD_LOAD(const struct ovnact_load *load , struct ds *s)
>                        ntohs(load->imm.value.be16_int));
>          break;
>  
> +    case OVN_IP_SRC:
> +    case OVN_IP_DST:
>      case OVN_FIELD_N_IDS:
>      default:
>          OVS_NOT_REACHED();
> @@ -3157,6 +3227,9 @@ encode_OVNFIELD_LOAD(const struct ovnact_load *load,
>          encode_finish_controller_op(oc_offset, ofpacts);
>          break;
>      }
> +
> +    case OVN_IP_SRC:
> +    case OVN_IP_DST:
>      case OVN_FIELD_N_IDS:
>      default:
>          OVS_NOT_REACHED();
> @@ -3451,6 +3524,51 @@ ovnact_fwd_group_free(struct ovnact_fwd_group *fwd_group)
>      free(fwd_group->child_ports);
>  }
>  
> +static void
> +format_OVNFIELD_EXCHANGE(const struct ovnact_move *move , struct ds *s)
> +{
> +    const struct ovn_field *lhs = ovn_field_from_name(move->lhs.symbol->name);
> +    const struct ovn_field *rhs = ovn_field_from_name(move->rhs.symbol->name);
> +    switch (lhs->id) {
> +    case OVN_IP_SRC:
> +    case OVN_IP_DST:
> +        break;
> +
> +    case OVN_ICMP4_FRAG_MTU:
> +    case OVN_ICMP6_FRAG_MTU:
> +    case OVN_FIELD_N_IDS:
> +    default:
> +        OVS_NOT_REACHED();
> +    }

Nit: I would use the shorter:
ovs_asssert(lhs->id == OVN_IP_SRC || lhs->id == OVN_IP_DST);

> +
> +    switch (rhs->id) {
> +    case OVN_IP_SRC:
> +    case OVN_IP_DST:
> +        break;
> +
> +    case OVN_ICMP4_FRAG_MTU:
> +    case OVN_ICMP6_FRAG_MTU:
> +    case OVN_FIELD_N_IDS:
> +    default:
> +        OVS_NOT_REACHED();
> +    }

Nit: I would use the shorter:
ovs_asssert(rhs->id == OVN_IP_SRC || rhs->id == OVN_IP_DST);

> +
> +    ds_put_format(s, "%s <-> %s;", lhs->name, rhs->name);
> +}
> +
> +static void
> +encode_OVNFIELD_EXCHANGE(const struct ovnact_move *move OVS_UNUSED,
> +                         const struct ovnact_encode_params *ep OVS_UNUSED,
> +                         struct ofpbuf *ofpacts OVS_UNUSED)
> +{
> +    /* Right now we only support exchanging the IPs in
> +     * OVN fields (ip.src <-> ip.dst). */
> +    size_t oc_offset =
> +        encode_start_controller_op(ACTION_OPCODE_SWAP_SRC_DST_IP,
> +                                   true, NX_CTLR_NO_METER, ofpacts);
> +    encode_finish_controller_op(oc_offset, ofpacts);

I think this means that all packets matching a flow with action "reject { ...
ip.src <-> ip.dst ... }" will generate two packet-ins, right? One for the
reject action, one for the ovnfield-exchange action.

Is that a concern wrt. performance?

> +}
> +
>  /* Parses an assignment or exchange or put_dhcp_opts action. */
>  static void
>  parse_set_action(struct action_context *ctx)
> diff --git a/lib/logical-fields.c b/lib/logical-fields.c
> index bf61df7719..d6f1981f52 100644
> --- a/lib/logical-fields.c
> +++ b/lib/logical-fields.c
> @@ -33,6 +33,14 @@ const struct ovn_field ovn_fields[OVN_FIELD_N_IDS] = {
>          OVN_ICMP6_FRAG_MTU,
>          "icmp6.frag_mtu",
>          4, 32,
> +    }, {
> +        OVN_IP_SRC,
> +        "ip.src",
> +        16, 128,
> +    }, {
> +        OVN_IP_DST,
> +        "ip.dst",
> +        16, 128,

Just partially related to your change, I'm a bit confused by the meaning of
n_bytes and n_bits in "struct ovn_field".  It seems to me that we never use
those values.  Is that the case or am I missing something?

Thanks,
Dumitru

>      },
>  };
>  
> @@ -271,6 +279,8 @@ ovn_init_symtab(struct shash *symtab)
>  
>      expr_symtab_add_ovn_field(symtab, "icmp4.frag_mtu", OVN_ICMP4_FRAG_MTU);
>      expr_symtab_add_ovn_field(symtab, "icmp6.frag_mtu", OVN_ICMP6_FRAG_MTU);
> +    expr_symtab_add_ovn_field(symtab, "ip.src", OVN_IP_SRC);
> +    expr_symtab_add_ovn_field(symtab, "ip.dst", OVN_IP_DST);
>  }
>  
>  const char *
> diff --git a/ovn-sb.xml b/ovn-sb.xml
> index 2fc84f54ee..73ee543bfe 100644
> --- a/ovn-sb.xml
> +++ b/ovn-sb.xml
> @@ -1258,6 +1258,43 @@
>              except that the two values are exchanged instead of copied.  Both
>              <var>field1</var> and <var>field2</var> must modifiable.
>            </p>
> +
> +          <p>
> +            <code>OVN</code> supports exchange of below OVN logical fields.
> +          </p>
> +
> +          <ul>
> +            <li>
> +              <code>ip.src</code>
> +              <code>ip.dst</code>
> +              <p>
> +                These fields can be used to refer to IP source and destination
> +                fields which are IP version independent. These fields can be
> +                used only for exchange of values.
> +              </p>
> +            </li>
> +
> +            <li>
> +              <p>
> +                Below are few examples:
> +              </p>
> +
> +              <p>
> +                match = "ip &amp;&amp; tcp"
> +                action = "ip.src &lt;-&gt; ip.dst; next;"
> +              </p>
> +
> +              <p>
> +                match = "ip4 || ip6"
> +                action = reject { ip.src &lt;-&gt; ip.dst; } next;"
> +              </p>
> +
> +              <p>
> +                match = "ip &amp;&amp; (tcp || udp)"
> +                action = "ip.src &lt;-&gt; ip.dst; next;"
> +              </p>
> +            </li>
> +          </ul>
>          </dd>
>  
>          <dt><code>ip.ttl--;</code></dt>
> diff --git a/tests/ovn.at b/tests/ovn.at
> index 71837a6721..c40d69c5e9 100644
> --- a/tests/ovn.at
> +++ b/tests/ovn.at
> @@ -1605,6 +1605,33 @@ reject { };
>      formats as reject { drop; };
>      encodes as controller(userdata=00.00.00.16.00.00.00.00)
>  
> +ip.src <-> ip.dst;
> +    encodes as controller(userdata=00.00.00.17.00.00.00.00,pause)
> +
> +ip.src <-> ip.dst
> +    Syntax error at end of input expecting `;'.
> +
> +ip.src = 10.0.0.10;
> +    Can't load a value to ovn field (ip.src).
> +
> +ip.dst = aef0::4;
> +    Can't load a value to ovn field (ip.dst).
> +
> +ip.src <-> ip4.dst;
> +    Can't exchange ovn field with non ovn field (ip.src <-> ip4.dst).
> +
> +ip6.src <-> ip.dst;
> +    Can't exchange ovn field with non ovn field (ip6.src <-> ip.dst).
> +
> +ip.src <-> icmp4.frag_mtu;
> +    Can't exchange ovn field (ip.src) with ovn field (icmp4.frag_mtu).
> +
> +icmp6.frag_mtu <-> ip.dst;
> +    Can't exchange ovn field (icmp6.frag_mtu) with ovn field (ip.dst).
> +
> +ip.src <-> ip.src;
> +    Can't exchange ovn field (ip.src) with ovn field (ip.src).
> +
>  # trigger_event
>  trigger_event(event = "empty_lb_backends", vip = "10.0.0.1:80", protocol = "tcp", load_balancer = "12345678-abcd-9876-fedc-11119f8e7d6c");
>      encodes as controller(userdata=00.00.00.0f.00.00.00.00.00.00.00.00.00.01.00.0b.31.30.2e.30.2e.30.2e.31.3a.38.30.00.02.00.03.74.63.70.00.03.00.24.31.32.33.34.35.36.37.38.2d.61.62.63.64.2d.39.38.37.36.2d.66.65.64.63.2d.31.31.31.31.39.66.38.65.37.64.36.63)
> diff --git a/utilities/ovn-trace.c b/utilities/ovn-trace.c
> index 38aee6081b..ed7d8bf278 100644
> --- a/utilities/ovn-trace.c
> +++ b/utilities/ovn-trace.c
> @@ -2147,12 +2147,35 @@ execute_ovnfield_load(const struct ovnact_load *load,
>                               ntohs(load->imm.value.be16_int));
>          break;
>      }
> +
> +    case OVN_IP_SRC:
> +    case OVN_IP_DST:
>      case OVN_FIELD_N_IDS:
>      default:
>          OVS_NOT_REACHED();
>      }
>  }
>  
> +static void
> +execute_ovnfield_exchange(const struct ovnact_move *move OVS_UNUSED,
> +                          struct flow *uflow,
> +                          struct ovs_list *super)
> +{
> +    /* Right now we support exchanging of ip.src with ip.dst. */
> +    ovntrace_node_append(super, OVNTRACE_NODE_MODIFY,
> +                         "ip.src <-> ip.dst");
> +
> +    if (get_dl_type(uflow) == htons(ETH_TYPE_IP)) {
> +        ovs_be32 tmp = uflow->nw_dst;
> +        uflow->nw_dst = uflow->nw_src;
> +        uflow->nw_src = tmp;
> +    } else {
> +        struct in6_addr tmp = uflow->ipv6_dst;
> +        uflow->ipv6_dst = uflow->ipv6_src;
> +        uflow->ipv6_src = tmp;
> +    }
> +}
> +
>  static void
>  trace_actions(const struct ovnact *ovnacts, size_t ovnacts_len,
>                const struct ovntrace_datapath *dp, struct flow *uflow,
> @@ -2369,6 +2392,11 @@ trace_actions(const struct ovnact *ovnacts, size_t ovnacts_len,
>                             pipeline, super);
>              break;
>  
> +        case OVNACT_OVNFIELD_EXCHANGE:
> +            execute_ovnfield_exchange(ovnact_get_OVNFIELD_EXCHANGE(a),
> +                                      uflow, super);
> +            break;
> +
>          case OVNACT_TRIGGER_EVENT:
>              break;
>  
>
Numan Siddique Oct. 14, 2020, 10:50 a.m. UTC | #2
On Wed, Oct 14, 2020 at 3:50 PM Dumitru Ceara <dceara@redhat.com> wrote:
>
> On 10/14/20 11:15 AM, numans@ovn.org wrote:
> > From: Numan Siddique <numans@ovn.org>
> >
> > Thee new fields are version independent and these can be used in any
>
> Hi Numan,
>
> Nit: s/Thee/These
>
> > OVN action. Right now the usage of these fields are restricted to
> > exchanging the IP source and destination fields.
> >
> > Eg. reject {... ip.src <-> ip.dst ... };
> >
> > "ip.src <-> ip.dst" translates to controller action with "pause" flag set.
> > When pinctrl thread receives the packet in, it checks the IP version of the
> > packet and either does - "ip4.src <-> ip4.dst" or "ip6.src <-> ip6.dst" and
> > resumes the packet to continue with the pipeline.
> >
> > Upcoming patch will make use of these fields.
> >
> > Signed-off-by: Numan Siddique <numans@ovn.org>
> > ---
> >  controller/pinctrl.c         |  49 +++++++++++++++
> >  include/ovn/actions.h        |   4 ++
> >  include/ovn/logical-fields.h |  18 ++++++
> >  lib/actions.c                | 118 +++++++++++++++++++++++++++++++++++
> >  lib/logical-fields.c         |  10 +++
> >  ovn-sb.xml                   |  37 +++++++++++
> >  tests/ovn.at                 |  27 ++++++++
> >  utilities/ovn-trace.c        |  28 +++++++++
> >  8 files changed, 291 insertions(+)
> >
> > diff --git a/controller/pinctrl.c b/controller/pinctrl.c
> > index 631d058458..bc16a82404 100644
> > --- a/controller/pinctrl.c
> > +++ b/controller/pinctrl.c
> > @@ -233,6 +233,11 @@ static void pinctrl_handle_put_icmp_frag_mtu(struct rconn *swconn,
> >                                               struct ofputil_packet_in *pin,
> >                                               struct ofpbuf *userdata,
> >                                               struct ofpbuf *continuation);
> > +static void pinctrl_handle_swap_src_dst_ip(struct rconn *swconn,
> > +                                           const struct flow *in_flow,
> > +                                           struct dp_packet *pkt_in,
> > +                                           struct ofputil_packet_in *pin,
> > +                                           struct ofpbuf *continuation);
> >  static void
> >  pinctrl_handle_event(struct ofpbuf *userdata)
> >      OVS_REQUIRES(pinctrl_mutex);
> > @@ -2835,6 +2840,11 @@ process_packet_in(struct rconn *swconn, const struct ofp_header *msg)
> >          ovs_mutex_unlock(&pinctrl_mutex);
> >          break;
> >
> > +    case ACTION_OPCODE_SWAP_SRC_DST_IP:
> > +        pinctrl_handle_swap_src_dst_ip(swconn, &headers, &packet, &pin,
> > +                                       &continuation);
> > +        break;
> > +
> >      default:
> >          VLOG_WARN_RL(&rl, "unrecognized packet-in opcode %"PRIu32,
> >                       ntohl(ah->opcode));
> > @@ -6508,3 +6518,42 @@ pinctrl_handle_svc_check(struct rconn *swconn, const struct flow *ip_flow,
> >          svc_mon->next_send_time = time_msec() + svc_mon->interval;
> >      }
> >  }
> > +
> > +static void
> > +pinctrl_handle_swap_src_dst_ip(struct rconn *swconn,
> > +                               const struct flow *in_flow,
> > +                               struct dp_packet *pkt_in,
> > +                               struct ofputil_packet_in *pin,
> > +                               struct ofpbuf *continuation)
> > +{
> > +    enum ofp_version version = rconn_get_version(swconn);
> > +    enum ofputil_protocol proto = ofputil_protocol_from_ofp_version(version);
> > +    struct dp_packet *pkt_out;
> > +
> > +    pkt_out = dp_packet_clone(pkt_in);
> > +    pkt_out->l2_5_ofs = pkt_in->l2_5_ofs;
> > +    pkt_out->l2_pad_size = pkt_in->l2_pad_size;
> > +    pkt_out->l3_ofs = pkt_in->l3_ofs;
> > +    pkt_out->l4_ofs = pkt_in->l4_ofs;
> > +
> > +    if (get_dl_type(in_flow) == htons(ETH_TYPE_IP)) {
> > +        /* IPv4 packet. Swap nw_src with nw_dst. */
> > +        packet_set_ipv4(pkt_out, in_flow->nw_dst, in_flow->nw_src,
> > +                        in_flow->nw_tos, in_flow->nw_ttl);
> > +    } else {
> > +        /* IPv6 packet. Swap ip6_src with ip6_dst.
> > +         * We could also use packet_set_ipv6() here, but that would require
> > +         * to extract the 'tc' and 'label' from in_nh->ip6_flow which seems
> > +         * unnecessary. */
> > +        struct ovs_16aligned_ip6_hdr *out_nh = dp_packet_l3(pkt_out);
> > +        union ovs_16aligned_in6_addr tmp = out_nh->ip6_src;
> > +        out_nh->ip6_src = out_nh->ip6_dst;
> > +        out_nh->ip6_dst = tmp;
> > +    }
> > +
> > +    pin->packet = dp_packet_data(pkt_out);
> > +    pin->packet_len = dp_packet_size(pkt_out);
> > +
> > +    queue_msg(swconn, ofputil_encode_resume(pin, continuation, proto));
> > +    dp_packet_delete(pkt_out);
> > +}
> > diff --git a/include/ovn/actions.h b/include/ovn/actions.h
> > index b4e5acabb9..bf1fe848b7 100644
> > --- a/include/ovn/actions.h
> > +++ b/include/ovn/actions.h
> > @@ -97,6 +97,7 @@ struct ovn_extend_table;
> >      OVNACT(DHCP6_REPLY,       ovnact_null)            \
> >      OVNACT(ICMP6_ERROR,       ovnact_nest)            \
> >      OVNACT(REJECT,            ovnact_nest)            \
> > +    OVNACT(OVNFIELD_EXCHANGE, ovnact_move)            \
> >
> >  /* enum ovnact_type, with a member OVNACT_<ENUM> for each action. */
> >  enum OVS_PACKED_ENUM ovnact_type {
> > @@ -612,6 +613,9 @@ enum action_opcode {
> >       * The actions, in OpenFlow 1.3 format, follow the action_header.
> >       */
> >      ACTION_OPCODE_REJECT,
> > +
> > +    /* ip.src <-> ip.dst */
> > +    ACTION_OPCODE_SWAP_SRC_DST_IP,
> >  };
> >
> >  /* Header. */
> > diff --git a/include/ovn/logical-fields.h b/include/ovn/logical-fields.h
> > index ac6f2f909b..bb6daa50ca 100644
> > --- a/include/ovn/logical-fields.h
> > +++ b/include/ovn/logical-fields.h
> > @@ -116,6 +116,24 @@ enum ovn_field_id {
> >       */
> >      OVN_ICMP6_FRAG_MTU,
> >
> > +    /*
> > +     * Name: "ip.src"
> > +     * Type: be128
> > +     * Description: Sets the field MFF_IPV4_SRC if eth.type == 0x800 (IPv4)
> > +     *              or sets the field MFF_IPV6_SRC if
> > +     *              eth.type == 0x86dd (IPV6).
> > +     */
> > +    OVN_IP_SRC,
> > +
> > +    /*
> > +     * Name: "ip.dst"
> > +     * Type: be128
> > +     * Description: Sets the field MFF_IPV4_DST if eth.type == 0x800 (IPv4)
> > +     *              or sets the field MFF_IPV6_DST if
> > +     *              eth.type == 0x86dd (IPV6).
> > +     */
> > +    OVN_IP_DST,
> > +
> >      OVN_FIELD_N_IDS
> >  };
> >
> > diff --git a/lib/actions.c b/lib/actions.c
> > index 1e1bdeff24..e9c77d2a0a 100644
> > --- a/lib/actions.c
> > +++ b/lib/actions.c
> > @@ -371,12 +371,35 @@ ovnact_next_free(struct ovnact_next *a OVS_UNUSED)
> >  {
> >  }
> >
> > +static bool
> > +check_ovnfield_load(struct action_context *ctx, const struct expr_field *field)
> > +{
> > +    switch (field->symbol->ovn_field->id) {
> > +    case OVN_ICMP4_FRAG_MTU:
> > +    case OVN_ICMP6_FRAG_MTU:
> > +        return true;
> > +
> > +    case OVN_IP_SRC:
> > +    case OVN_IP_DST:
> > +        lexer_error(ctx->lexer, "Can't load a value to ovn field (%s).",
> > +                    field->symbol->name);
> > +        return false;
> > +
> > +    case OVN_FIELD_N_IDS:
> > +    default:
> > +        OVS_NOT_REACHED();
> > +    }
> > +}
> > +
> >  static void
> >  parse_LOAD(struct action_context *ctx, const struct expr_field *lhs)
> >  {
> >      size_t ofs = ctx->ovnacts->size;
> >      struct ovnact_load *load;
> >      if (lhs->symbol->ovn_field) {
> > +        if (!check_ovnfield_load(ctx, lhs)) {
> > +            return;
> > +        }
> >          load = ovnact_put_OVNFIELD_LOAD(ctx->ovnacts);
> >      } else {
> >          load = ovnact_put_LOAD(ctx->ovnacts);
> > @@ -474,6 +497,46 @@ format_EXCHANGE(const struct ovnact_move *move, struct ds *s)
> >      format_assignment(move, "<->", s);
> >  }
> >
> > +static void
> > +parse_ovnfield_exchange(struct action_context *ctx,
> > +                        const struct expr_field *lhs,
> > +                        const struct expr_field *rhs)
> > +{
> > +    if (!lhs->symbol->ovn_field || !rhs->symbol->ovn_field) {
> > +        lexer_error(ctx->lexer,
> > +                    "Can't exchange ovn field with non ovn field (%s <-> %s).",
> > +                    lhs->symbol->name, rhs->symbol->name);
> > +        return;
> > +    }
> > +
> > +    if (lhs->symbol->ovn_field->id != OVN_IP_SRC &&
> > +            lhs->symbol->ovn_field->id != OVN_IP_DST) {
> > +        lexer_error(ctx->lexer,
> > +                    "Can't exchange ovn field (%s) with ovn field (%s).",
> > +                    lhs->symbol->name, rhs->symbol->name);
> > +        return;
> > +    }
> > +
> > +    if (rhs->symbol->ovn_field->id != OVN_IP_SRC &&
> > +            rhs->symbol->ovn_field->id != OVN_IP_DST) {
> > +        lexer_error(ctx->lexer,
> > +                    "Can't exchange ovn field (%s) with ovn field (%s).",
> > +                    lhs->symbol->name, rhs->symbol->name);
> > +        return;
> > +    }
> > +
> > +    if (lhs->symbol->ovn_field->id == rhs->symbol->ovn_field->id) {
> > +        lexer_error(ctx->lexer,
> > +                    "Can't exchange ovn field (%s) with ovn field (%s).",
> > +                    lhs->symbol->name, rhs->symbol->name);
> > +        return;
> > +    }
> > +
> > +    struct ovnact_move *move = ovnact_put_OVNFIELD_EXCHANGE(ctx->ovnacts);
> > +    move->lhs = *lhs;
> > +    move->rhs = *rhs;
> > +}
> > +
> >  static void
> >  parse_assignment_action(struct action_context *ctx, bool exchange,
> >                          const struct expr_field *lhs)
> > @@ -483,6 +546,11 @@ parse_assignment_action(struct action_context *ctx, bool exchange,
> >          return;
> >      }
> >
> > +    if (exchange && (lhs->symbol->ovn_field || rhs.symbol->ovn_field)) {
> > +        parse_ovnfield_exchange(ctx, lhs, &rhs);
> > +        return;
> > +    }
> > +
> >      const struct expr_symbol *ls = lhs->symbol;
> >      const struct expr_symbol *rs = rhs.symbol;
> >      if ((ls->width != 0) != (rs->width != 0)) {
> > @@ -3128,6 +3196,8 @@ format_OVNFIELD_LOAD(const struct ovnact_load *load , struct ds *s)
> >                        ntohs(load->imm.value.be16_int));
> >          break;
> >
> > +    case OVN_IP_SRC:
> > +    case OVN_IP_DST:
> >      case OVN_FIELD_N_IDS:
> >      default:
> >          OVS_NOT_REACHED();
> > @@ -3157,6 +3227,9 @@ encode_OVNFIELD_LOAD(const struct ovnact_load *load,
> >          encode_finish_controller_op(oc_offset, ofpacts);
> >          break;
> >      }
> > +
> > +    case OVN_IP_SRC:
> > +    case OVN_IP_DST:
> >      case OVN_FIELD_N_IDS:
> >      default:
> >          OVS_NOT_REACHED();
> > @@ -3451,6 +3524,51 @@ ovnact_fwd_group_free(struct ovnact_fwd_group *fwd_group)
> >      free(fwd_group->child_ports);
> >  }
> >
> > +static void
> > +format_OVNFIELD_EXCHANGE(const struct ovnact_move *move , struct ds *s)
> > +{
> > +    const struct ovn_field *lhs = ovn_field_from_name(move->lhs.symbol->name);
> > +    const struct ovn_field *rhs = ovn_field_from_name(move->rhs.symbol->name);
> > +    switch (lhs->id) {
> > +    case OVN_IP_SRC:
> > +    case OVN_IP_DST:
> > +        break;
> > +
> > +    case OVN_ICMP4_FRAG_MTU:
> > +    case OVN_ICMP6_FRAG_MTU:
> > +    case OVN_FIELD_N_IDS:
> > +    default:
> > +        OVS_NOT_REACHED();
> > +    }
>
> Nit: I would use the shorter:
> ovs_asssert(lhs->id == OVN_IP_SRC || lhs->id == OVN_IP_DST);

You mean to drop the switch right ? If so, Ack.


>
> > +
> > +    switch (rhs->id) {
> > +    case OVN_IP_SRC:
> > +    case OVN_IP_DST:
> > +        break;
> > +
> > +    case OVN_ICMP4_FRAG_MTU:
> > +    case OVN_ICMP6_FRAG_MTU:
> > +    case OVN_FIELD_N_IDS:
> > +    default:
> > +        OVS_NOT_REACHED();
> > +    }
>
> Nit: I would use the shorter:
> ovs_asssert(rhs->id == OVN_IP_SRC || rhs->id == OVN_IP_DST);
>
> > +
> > +    ds_put_format(s, "%s <-> %s;", lhs->name, rhs->name);
> > +}
> > +
> > +static void
> > +encode_OVNFIELD_EXCHANGE(const struct ovnact_move *move OVS_UNUSED,
> > +                         const struct ovnact_encode_params *ep OVS_UNUSED,
> > +                         struct ofpbuf *ofpacts OVS_UNUSED)
> > +{
> > +    /* Right now we only support exchanging the IPs in
> > +     * OVN fields (ip.src <-> ip.dst). */
> > +    size_t oc_offset =
> > +        encode_start_controller_op(ACTION_OPCODE_SWAP_SRC_DST_IP,
> > +                                   true, NX_CTLR_NO_METER, ofpacts);
> > +    encode_finish_controller_op(oc_offset, ofpacts);
>
> I think this means that all packets matching a flow with action "reject { ...
> ip.src <-> ip.dst ... }" will generate two packet-ins, right? One for the
> reject action, one for the ovnfield-exchange action.
>
> Is that a concern wrt. performance?

Yes. It would have 2 packet-ins. Just like how we handle - icmp4 {...
icmp4.frag_mtu = 1500; ...};

If performance is a concern, we could just drop ... ip.src <-> ip.dst
in reject and let reject action
swap the ip src with ip destination. I thought about that and my take
is that since reject { } results in
TCP RST or ICMP unreachable packet, it should be fine. I see these
packets as more of control packets.

I think it should be ok. Let me know what you think.


>
> > +}
> > +
> >  /* Parses an assignment or exchange or put_dhcp_opts action. */
> >  static void
> >  parse_set_action(struct action_context *ctx)
> > diff --git a/lib/logical-fields.c b/lib/logical-fields.c
> > index bf61df7719..d6f1981f52 100644
> > --- a/lib/logical-fields.c
> > +++ b/lib/logical-fields.c
> > @@ -33,6 +33,14 @@ const struct ovn_field ovn_fields[OVN_FIELD_N_IDS] = {
> >          OVN_ICMP6_FRAG_MTU,
> >          "icmp6.frag_mtu",
> >          4, 32,
> > +    }, {
> > +        OVN_IP_SRC,
> > +        "ip.src",
> > +        16, 128,
> > +    }, {
> > +        OVN_IP_DST,
> > +        "ip.dst",
> > +        16, 128,
>
> Just partially related to your change, I'm a bit confused by the meaning of
> n_bytes and n_bits in "struct ovn_field".  It seems to me that we never use
> those values.  Is that the case or am I missing something?

These OVN fields are defined more like the way - mf_fields are defined.
Like icmp4.frag_mtu is 2 bytes and 16 bits field.

So ip.src/ip.dst can be used for both ipv4 and ipv6, I have set the
size of ipv6.

Since we don't support ip.src = 10.0.0.4 or ip.src = aef00::4 for
example, these fields
are not used. Later on if there is a need to support assigning a value
to these fields, these fields
could be used. Although I don't see a need to support it since we
already have ip4.src/ip


Thanks for the review.

Numan

>


> Thanks,
> Dumitru
>
> >      },
> >  };
> >
> > @@ -271,6 +279,8 @@ ovn_init_symtab(struct shash *symtab)
> >
> >      expr_symtab_add_ovn_field(symtab, "icmp4.frag_mtu", OVN_ICMP4_FRAG_MTU);
> >      expr_symtab_add_ovn_field(symtab, "icmp6.frag_mtu", OVN_ICMP6_FRAG_MTU);
> > +    expr_symtab_add_ovn_field(symtab, "ip.src", OVN_IP_SRC);
> > +    expr_symtab_add_ovn_field(symtab, "ip.dst", OVN_IP_DST);
> >  }
> >
> >  const char *
> > diff --git a/ovn-sb.xml b/ovn-sb.xml
> > index 2fc84f54ee..73ee543bfe 100644
> > --- a/ovn-sb.xml
> > +++ b/ovn-sb.xml
> > @@ -1258,6 +1258,43 @@
> >              except that the two values are exchanged instead of copied.  Both
> >              <var>field1</var> and <var>field2</var> must modifiable.
> >            </p>
> > +
> > +          <p>
> > +            <code>OVN</code> supports exchange of below OVN logical fields.
> > +          </p>
> > +
> > +          <ul>
> > +            <li>
> > +              <code>ip.src</code>
> > +              <code>ip.dst</code>
> > +              <p>
> > +                These fields can be used to refer to IP source and destination
> > +                fields which are IP version independent. These fields can be
> > +                used only for exchange of values.
> > +              </p>
> > +            </li>
> > +
> > +            <li>
> > +              <p>
> > +                Below are few examples:
> > +              </p>
> > +
> > +              <p>
> > +                match = "ip &amp;&amp; tcp"
> > +                action = "ip.src &lt;-&gt; ip.dst; next;"
> > +              </p>
> > +
> > +              <p>
> > +                match = "ip4 || ip6"
> > +                action = reject { ip.src &lt;-&gt; ip.dst; } next;"
> > +              </p>
> > +
> > +              <p>
> > +                match = "ip &amp;&amp; (tcp || udp)"
> > +                action = "ip.src &lt;-&gt; ip.dst; next;"
> > +              </p>
> > +            </li>
> > +          </ul>
> >          </dd>
> >
> >          <dt><code>ip.ttl--;</code></dt>
> > diff --git a/tests/ovn.at b/tests/ovn.at
> > index 71837a6721..c40d69c5e9 100644
> > --- a/tests/ovn.at
> > +++ b/tests/ovn.at
> > @@ -1605,6 +1605,33 @@ reject { };
> >      formats as reject { drop; };
> >      encodes as controller(userdata=00.00.00.16.00.00.00.00)
> >
> > +ip.src <-> ip.dst;
> > +    encodes as controller(userdata=00.00.00.17.00.00.00.00,pause)
> > +
> > +ip.src <-> ip.dst
> > +    Syntax error at end of input expecting `;'.
> > +
> > +ip.src = 10.0.0.10;
> > +    Can't load a value to ovn field (ip.src).
> > +
> > +ip.dst = aef0::4;
> > +    Can't load a value to ovn field (ip.dst).
> > +
> > +ip.src <-> ip4.dst;
> > +    Can't exchange ovn field with non ovn field (ip.src <-> ip4.dst).
> > +
> > +ip6.src <-> ip.dst;
> > +    Can't exchange ovn field with non ovn field (ip6.src <-> ip.dst).
> > +
> > +ip.src <-> icmp4.frag_mtu;
> > +    Can't exchange ovn field (ip.src) with ovn field (icmp4.frag_mtu).
> > +
> > +icmp6.frag_mtu <-> ip.dst;
> > +    Can't exchange ovn field (icmp6.frag_mtu) with ovn field (ip.dst).
> > +
> > +ip.src <-> ip.src;
> > +    Can't exchange ovn field (ip.src) with ovn field (ip.src).
> > +
> >  # trigger_event
> >  trigger_event(event = "empty_lb_backends", vip = "10.0.0.1:80", protocol = "tcp", load_balancer = "12345678-abcd-9876-fedc-11119f8e7d6c");
> >      encodes as controller(userdata=00.00.00.0f.00.00.00.00.00.00.00.00.00.01.00.0b.31.30.2e.30.2e.30.2e.31.3a.38.30.00.02.00.03.74.63.70.00.03.00.24.31.32.33.34.35.36.37.38.2d.61.62.63.64.2d.39.38.37.36.2d.66.65.64.63.2d.31.31.31.31.39.66.38.65.37.64.36.63)
> > diff --git a/utilities/ovn-trace.c b/utilities/ovn-trace.c
> > index 38aee6081b..ed7d8bf278 100644
> > --- a/utilities/ovn-trace.c
> > +++ b/utilities/ovn-trace.c
> > @@ -2147,12 +2147,35 @@ execute_ovnfield_load(const struct ovnact_load *load,
> >                               ntohs(load->imm.value.be16_int));
> >          break;
> >      }
> > +
> > +    case OVN_IP_SRC:
> > +    case OVN_IP_DST:
> >      case OVN_FIELD_N_IDS:
> >      default:
> >          OVS_NOT_REACHED();
> >      }
> >  }
> >
> > +static void
> > +execute_ovnfield_exchange(const struct ovnact_move *move OVS_UNUSED,
> > +                          struct flow *uflow,
> > +                          struct ovs_list *super)
> > +{
> > +    /* Right now we support exchanging of ip.src with ip.dst. */
> > +    ovntrace_node_append(super, OVNTRACE_NODE_MODIFY,
> > +                         "ip.src <-> ip.dst");
> > +
> > +    if (get_dl_type(uflow) == htons(ETH_TYPE_IP)) {
> > +        ovs_be32 tmp = uflow->nw_dst;
> > +        uflow->nw_dst = uflow->nw_src;
> > +        uflow->nw_src = tmp;
> > +    } else {
> > +        struct in6_addr tmp = uflow->ipv6_dst;
> > +        uflow->ipv6_dst = uflow->ipv6_src;
> > +        uflow->ipv6_src = tmp;
> > +    }
> > +}
> > +
> >  static void
> >  trace_actions(const struct ovnact *ovnacts, size_t ovnacts_len,
> >                const struct ovntrace_datapath *dp, struct flow *uflow,
> > @@ -2369,6 +2392,11 @@ trace_actions(const struct ovnact *ovnacts, size_t ovnacts_len,
> >                             pipeline, super);
> >              break;
> >
> > +        case OVNACT_OVNFIELD_EXCHANGE:
> > +            execute_ovnfield_exchange(ovnact_get_OVNFIELD_EXCHANGE(a),
> > +                                      uflow, super);
> > +            break;
> > +
> >          case OVNACT_TRIGGER_EVENT:
> >              break;
> >
> >
>
> _______________________________________________
> dev mailing list
> dev@openvswitch.org
> https://mail.openvswitch.org/mailman/listinfo/ovs-dev
>
Dumitru Ceara Oct. 14, 2020, 11:10 a.m. UTC | #3
On 10/14/20 12:50 PM, Numan Siddique wrote:
> On Wed, Oct 14, 2020 at 3:50 PM Dumitru Ceara <dceara@redhat.com> wrote:
>>
>> On 10/14/20 11:15 AM, numans@ovn.org wrote:
>>> From: Numan Siddique <numans@ovn.org>
>>>
>>> Thee new fields are version independent and these can be used in any
>>
>> Hi Numan,
>>
>> Nit: s/Thee/These
>>
>>> OVN action. Right now the usage of these fields are restricted to
>>> exchanging the IP source and destination fields.
>>>
>>> Eg. reject {... ip.src <-> ip.dst ... };
>>>
>>> "ip.src <-> ip.dst" translates to controller action with "pause" flag set.
>>> When pinctrl thread receives the packet in, it checks the IP version of the
>>> packet and either does - "ip4.src <-> ip4.dst" or "ip6.src <-> ip6.dst" and
>>> resumes the packet to continue with the pipeline.
>>>
>>> Upcoming patch will make use of these fields.
>>>
>>> Signed-off-by: Numan Siddique <numans@ovn.org>
>>> ---
>>>  controller/pinctrl.c         |  49 +++++++++++++++
>>>  include/ovn/actions.h        |   4 ++
>>>  include/ovn/logical-fields.h |  18 ++++++
>>>  lib/actions.c                | 118 +++++++++++++++++++++++++++++++++++
>>>  lib/logical-fields.c         |  10 +++
>>>  ovn-sb.xml                   |  37 +++++++++++
>>>  tests/ovn.at                 |  27 ++++++++
>>>  utilities/ovn-trace.c        |  28 +++++++++
>>>  8 files changed, 291 insertions(+)
>>>
>>> diff --git a/controller/pinctrl.c b/controller/pinctrl.c
>>> index 631d058458..bc16a82404 100644
>>> --- a/controller/pinctrl.c
>>> +++ b/controller/pinctrl.c
>>> @@ -233,6 +233,11 @@ static void pinctrl_handle_put_icmp_frag_mtu(struct rconn *swconn,
>>>                                               struct ofputil_packet_in *pin,
>>>                                               struct ofpbuf *userdata,
>>>                                               struct ofpbuf *continuation);
>>> +static void pinctrl_handle_swap_src_dst_ip(struct rconn *swconn,
>>> +                                           const struct flow *in_flow,
>>> +                                           struct dp_packet *pkt_in,
>>> +                                           struct ofputil_packet_in *pin,
>>> +                                           struct ofpbuf *continuation);
>>>  static void
>>>  pinctrl_handle_event(struct ofpbuf *userdata)
>>>      OVS_REQUIRES(pinctrl_mutex);
>>> @@ -2835,6 +2840,11 @@ process_packet_in(struct rconn *swconn, const struct ofp_header *msg)
>>>          ovs_mutex_unlock(&pinctrl_mutex);
>>>          break;
>>>
>>> +    case ACTION_OPCODE_SWAP_SRC_DST_IP:
>>> +        pinctrl_handle_swap_src_dst_ip(swconn, &headers, &packet, &pin,
>>> +                                       &continuation);
>>> +        break;
>>> +
>>>      default:
>>>          VLOG_WARN_RL(&rl, "unrecognized packet-in opcode %"PRIu32,
>>>                       ntohl(ah->opcode));
>>> @@ -6508,3 +6518,42 @@ pinctrl_handle_svc_check(struct rconn *swconn, const struct flow *ip_flow,
>>>          svc_mon->next_send_time = time_msec() + svc_mon->interval;
>>>      }
>>>  }
>>> +
>>> +static void
>>> +pinctrl_handle_swap_src_dst_ip(struct rconn *swconn,
>>> +                               const struct flow *in_flow,
>>> +                               struct dp_packet *pkt_in,
>>> +                               struct ofputil_packet_in *pin,
>>> +                               struct ofpbuf *continuation)
>>> +{
>>> +    enum ofp_version version = rconn_get_version(swconn);
>>> +    enum ofputil_protocol proto = ofputil_protocol_from_ofp_version(version);
>>> +    struct dp_packet *pkt_out;
>>> +
>>> +    pkt_out = dp_packet_clone(pkt_in);
>>> +    pkt_out->l2_5_ofs = pkt_in->l2_5_ofs;
>>> +    pkt_out->l2_pad_size = pkt_in->l2_pad_size;
>>> +    pkt_out->l3_ofs = pkt_in->l3_ofs;
>>> +    pkt_out->l4_ofs = pkt_in->l4_ofs;
>>> +
>>> +    if (get_dl_type(in_flow) == htons(ETH_TYPE_IP)) {
>>> +        /* IPv4 packet. Swap nw_src with nw_dst. */
>>> +        packet_set_ipv4(pkt_out, in_flow->nw_dst, in_flow->nw_src,
>>> +                        in_flow->nw_tos, in_flow->nw_ttl);
>>> +    } else {
>>> +        /* IPv6 packet. Swap ip6_src with ip6_dst.
>>> +         * We could also use packet_set_ipv6() here, but that would require
>>> +         * to extract the 'tc' and 'label' from in_nh->ip6_flow which seems
>>> +         * unnecessary. */
>>> +        struct ovs_16aligned_ip6_hdr *out_nh = dp_packet_l3(pkt_out);
>>> +        union ovs_16aligned_in6_addr tmp = out_nh->ip6_src;
>>> +        out_nh->ip6_src = out_nh->ip6_dst;
>>> +        out_nh->ip6_dst = tmp;
>>> +    }
>>> +
>>> +    pin->packet = dp_packet_data(pkt_out);
>>> +    pin->packet_len = dp_packet_size(pkt_out);
>>> +
>>> +    queue_msg(swconn, ofputil_encode_resume(pin, continuation, proto));
>>> +    dp_packet_delete(pkt_out);
>>> +}
>>> diff --git a/include/ovn/actions.h b/include/ovn/actions.h
>>> index b4e5acabb9..bf1fe848b7 100644
>>> --- a/include/ovn/actions.h
>>> +++ b/include/ovn/actions.h
>>> @@ -97,6 +97,7 @@ struct ovn_extend_table;
>>>      OVNACT(DHCP6_REPLY,       ovnact_null)            \
>>>      OVNACT(ICMP6_ERROR,       ovnact_nest)            \
>>>      OVNACT(REJECT,            ovnact_nest)            \
>>> +    OVNACT(OVNFIELD_EXCHANGE, ovnact_move)            \
>>>
>>>  /* enum ovnact_type, with a member OVNACT_<ENUM> for each action. */
>>>  enum OVS_PACKED_ENUM ovnact_type {
>>> @@ -612,6 +613,9 @@ enum action_opcode {
>>>       * The actions, in OpenFlow 1.3 format, follow the action_header.
>>>       */
>>>      ACTION_OPCODE_REJECT,
>>> +
>>> +    /* ip.src <-> ip.dst */
>>> +    ACTION_OPCODE_SWAP_SRC_DST_IP,
>>>  };
>>>
>>>  /* Header. */
>>> diff --git a/include/ovn/logical-fields.h b/include/ovn/logical-fields.h
>>> index ac6f2f909b..bb6daa50ca 100644
>>> --- a/include/ovn/logical-fields.h
>>> +++ b/include/ovn/logical-fields.h
>>> @@ -116,6 +116,24 @@ enum ovn_field_id {
>>>       */
>>>      OVN_ICMP6_FRAG_MTU,
>>>
>>> +    /*
>>> +     * Name: "ip.src"
>>> +     * Type: be128
>>> +     * Description: Sets the field MFF_IPV4_SRC if eth.type == 0x800 (IPv4)
>>> +     *              or sets the field MFF_IPV6_SRC if
>>> +     *              eth.type == 0x86dd (IPV6).
>>> +     */
>>> +    OVN_IP_SRC,
>>> +
>>> +    /*
>>> +     * Name: "ip.dst"
>>> +     * Type: be128
>>> +     * Description: Sets the field MFF_IPV4_DST if eth.type == 0x800 (IPv4)
>>> +     *              or sets the field MFF_IPV6_DST if
>>> +     *              eth.type == 0x86dd (IPV6).
>>> +     */
>>> +    OVN_IP_DST,
>>> +
>>>      OVN_FIELD_N_IDS
>>>  };
>>>
>>> diff --git a/lib/actions.c b/lib/actions.c
>>> index 1e1bdeff24..e9c77d2a0a 100644
>>> --- a/lib/actions.c
>>> +++ b/lib/actions.c
>>> @@ -371,12 +371,35 @@ ovnact_next_free(struct ovnact_next *a OVS_UNUSED)
>>>  {
>>>  }
>>>
>>> +static bool
>>> +check_ovnfield_load(struct action_context *ctx, const struct expr_field *field)
>>> +{
>>> +    switch (field->symbol->ovn_field->id) {
>>> +    case OVN_ICMP4_FRAG_MTU:
>>> +    case OVN_ICMP6_FRAG_MTU:
>>> +        return true;
>>> +
>>> +    case OVN_IP_SRC:
>>> +    case OVN_IP_DST:
>>> +        lexer_error(ctx->lexer, "Can't load a value to ovn field (%s).",
>>> +                    field->symbol->name);
>>> +        return false;
>>> +
>>> +    case OVN_FIELD_N_IDS:
>>> +    default:
>>> +        OVS_NOT_REACHED();
>>> +    }
>>> +}
>>> +
>>>  static void
>>>  parse_LOAD(struct action_context *ctx, const struct expr_field *lhs)
>>>  {
>>>      size_t ofs = ctx->ovnacts->size;
>>>      struct ovnact_load *load;
>>>      if (lhs->symbol->ovn_field) {
>>> +        if (!check_ovnfield_load(ctx, lhs)) {
>>> +            return;
>>> +        }
>>>          load = ovnact_put_OVNFIELD_LOAD(ctx->ovnacts);
>>>      } else {
>>>          load = ovnact_put_LOAD(ctx->ovnacts);
>>> @@ -474,6 +497,46 @@ format_EXCHANGE(const struct ovnact_move *move, struct ds *s)
>>>      format_assignment(move, "<->", s);
>>>  }
>>>
>>> +static void
>>> +parse_ovnfield_exchange(struct action_context *ctx,
>>> +                        const struct expr_field *lhs,
>>> +                        const struct expr_field *rhs)
>>> +{
>>> +    if (!lhs->symbol->ovn_field || !rhs->symbol->ovn_field) {
>>> +        lexer_error(ctx->lexer,
>>> +                    "Can't exchange ovn field with non ovn field (%s <-> %s).",
>>> +                    lhs->symbol->name, rhs->symbol->name);
>>> +        return;
>>> +    }
>>> +
>>> +    if (lhs->symbol->ovn_field->id != OVN_IP_SRC &&
>>> +            lhs->symbol->ovn_field->id != OVN_IP_DST) {
>>> +        lexer_error(ctx->lexer,
>>> +                    "Can't exchange ovn field (%s) with ovn field (%s).",
>>> +                    lhs->symbol->name, rhs->symbol->name);
>>> +        return;
>>> +    }
>>> +
>>> +    if (rhs->symbol->ovn_field->id != OVN_IP_SRC &&
>>> +            rhs->symbol->ovn_field->id != OVN_IP_DST) {
>>> +        lexer_error(ctx->lexer,
>>> +                    "Can't exchange ovn field (%s) with ovn field (%s).",
>>> +                    lhs->symbol->name, rhs->symbol->name);
>>> +        return;
>>> +    }
>>> +
>>> +    if (lhs->symbol->ovn_field->id == rhs->symbol->ovn_field->id) {
>>> +        lexer_error(ctx->lexer,
>>> +                    "Can't exchange ovn field (%s) with ovn field (%s).",
>>> +                    lhs->symbol->name, rhs->symbol->name);
>>> +        return;
>>> +    }
>>> +
>>> +    struct ovnact_move *move = ovnact_put_OVNFIELD_EXCHANGE(ctx->ovnacts);
>>> +    move->lhs = *lhs;
>>> +    move->rhs = *rhs;
>>> +}
>>> +
>>>  static void
>>>  parse_assignment_action(struct action_context *ctx, bool exchange,
>>>                          const struct expr_field *lhs)
>>> @@ -483,6 +546,11 @@ parse_assignment_action(struct action_context *ctx, bool exchange,
>>>          return;
>>>      }
>>>
>>> +    if (exchange && (lhs->symbol->ovn_field || rhs.symbol->ovn_field)) {
>>> +        parse_ovnfield_exchange(ctx, lhs, &rhs);
>>> +        return;
>>> +    }
>>> +
>>>      const struct expr_symbol *ls = lhs->symbol;
>>>      const struct expr_symbol *rs = rhs.symbol;
>>>      if ((ls->width != 0) != (rs->width != 0)) {
>>> @@ -3128,6 +3196,8 @@ format_OVNFIELD_LOAD(const struct ovnact_load *load , struct ds *s)
>>>                        ntohs(load->imm.value.be16_int));
>>>          break;
>>>
>>> +    case OVN_IP_SRC:
>>> +    case OVN_IP_DST:
>>>      case OVN_FIELD_N_IDS:
>>>      default:
>>>          OVS_NOT_REACHED();
>>> @@ -3157,6 +3227,9 @@ encode_OVNFIELD_LOAD(const struct ovnact_load *load,
>>>          encode_finish_controller_op(oc_offset, ofpacts);
>>>          break;
>>>      }
>>> +
>>> +    case OVN_IP_SRC:
>>> +    case OVN_IP_DST:
>>>      case OVN_FIELD_N_IDS:
>>>      default:
>>>          OVS_NOT_REACHED();
>>> @@ -3451,6 +3524,51 @@ ovnact_fwd_group_free(struct ovnact_fwd_group *fwd_group)
>>>      free(fwd_group->child_ports);
>>>  }
>>>
>>> +static void
>>> +format_OVNFIELD_EXCHANGE(const struct ovnact_move *move , struct ds *s)
>>> +{
>>> +    const struct ovn_field *lhs = ovn_field_from_name(move->lhs.symbol->name);
>>> +    const struct ovn_field *rhs = ovn_field_from_name(move->rhs.symbol->name);
>>> +    switch (lhs->id) {
>>> +    case OVN_IP_SRC:
>>> +    case OVN_IP_DST:
>>> +        break;
>>> +
>>> +    case OVN_ICMP4_FRAG_MTU:
>>> +    case OVN_ICMP6_FRAG_MTU:
>>> +    case OVN_FIELD_N_IDS:
>>> +    default:
>>> +        OVS_NOT_REACHED();
>>> +    }
>>
>> Nit: I would use the shorter:
>> ovs_asssert(lhs->id == OVN_IP_SRC || lhs->id == OVN_IP_DST);
> 
> You mean to drop the switch right ? If so, Ack.
> 

Right, I meant ovs_assert instead of switch.

> 
>>
>>> +
>>> +    switch (rhs->id) {
>>> +    case OVN_IP_SRC:
>>> +    case OVN_IP_DST:
>>> +        break;
>>> +
>>> +    case OVN_ICMP4_FRAG_MTU:
>>> +    case OVN_ICMP6_FRAG_MTU:
>>> +    case OVN_FIELD_N_IDS:
>>> +    default:
>>> +        OVS_NOT_REACHED();
>>> +    }
>>
>> Nit: I would use the shorter:
>> ovs_asssert(rhs->id == OVN_IP_SRC || rhs->id == OVN_IP_DST);
>>
>>> +
>>> +    ds_put_format(s, "%s <-> %s;", lhs->name, rhs->name);
>>> +}
>>> +
>>> +static void
>>> +encode_OVNFIELD_EXCHANGE(const struct ovnact_move *move OVS_UNUSED,
>>> +                         const struct ovnact_encode_params *ep OVS_UNUSED,
>>> +                         struct ofpbuf *ofpacts OVS_UNUSED)
>>> +{
>>> +    /* Right now we only support exchanging the IPs in
>>> +     * OVN fields (ip.src <-> ip.dst). */
>>> +    size_t oc_offset =
>>> +        encode_start_controller_op(ACTION_OPCODE_SWAP_SRC_DST_IP,
>>> +                                   true, NX_CTLR_NO_METER, ofpacts);
>>> +    encode_finish_controller_op(oc_offset, ofpacts);
>>
>> I think this means that all packets matching a flow with action "reject { ...
>> ip.src <-> ip.dst ... }" will generate two packet-ins, right? One for the
>> reject action, one for the ovnfield-exchange action.
>>
>> Is that a concern wrt. performance?
> 
> Yes. It would have 2 packet-ins. Just like how we handle - icmp4 {...
> icmp4.frag_mtu = 1500; ...};
> 
> If performance is a concern, we could just drop ... ip.src <-> ip.dst
> in reject and let reject action
> swap the ip src with ip destination. I thought about that and my take
> is that since reject { } results in
> TCP RST or ICMP unreachable packet, it should be fine. I see these
> packets as more of control packets.
> 
> I think it should be ok. Let me know what you think.
> 

I think I like the approach of "reject" action implicitly swapping IPs best.
In the end it's not like we could implement reject withough swapping IP dst
with IP src so why not do it as part of the reject action.

But on the other hand, I think there might be other simpler ways to generate a
lot of packet-ins towards ovn-controller that I don't have a strong preference
about either solution.  I just wanted to make sure we discuss the performance
implications.

> 
>>
>>> +}
>>> +
>>>  /* Parses an assignment or exchange or put_dhcp_opts action. */
>>>  static void
>>>  parse_set_action(struct action_context *ctx)
>>> diff --git a/lib/logical-fields.c b/lib/logical-fields.c
>>> index bf61df7719..d6f1981f52 100644
>>> --- a/lib/logical-fields.c
>>> +++ b/lib/logical-fields.c
>>> @@ -33,6 +33,14 @@ const struct ovn_field ovn_fields[OVN_FIELD_N_IDS] = {
>>>          OVN_ICMP6_FRAG_MTU,
>>>          "icmp6.frag_mtu",
>>>          4, 32,
>>> +    }, {
>>> +        OVN_IP_SRC,
>>> +        "ip.src",
>>> +        16, 128,
>>> +    }, {
>>> +        OVN_IP_DST,
>>> +        "ip.dst",
>>> +        16, 128,
>>
>> Just partially related to your change, I'm a bit confused by the meaning of
>> n_bytes and n_bits in "struct ovn_field".  It seems to me that we never use
>> those values.  Is that the case or am I missing something?
> 
> These OVN fields are defined more like the way - mf_fields are defined.
> Like icmp4.frag_mtu is 2 bytes and 16 bits field.
> 
> So ip.src/ip.dst can be used for both ipv4 and ipv6, I have set the
> size of ipv6.
> 
> Since we don't support ip.src = 10.0.0.4 or ip.src = aef00::4 for
> example, these fields
> are not used. Later on if there is a need to support assigning a value
> to these fields, these fields
> could be used. Although I don't see a need to support it since we
> already have ip4.src/ip
> 

Ok, my question was more about the fact that I don't see the ovn_field.n_bytes
or ovn_fields.n_bits being used anywhere in general (not referring to
ip.src/ip.dst only).

Thanks,
Dumitru

> 
> Thanks for the review.
> 
> Numan
> 
>>
> 
> 
>> Thanks,
>> Dumitru
>>
>>>      },
>>>  };
>>>
>>> @@ -271,6 +279,8 @@ ovn_init_symtab(struct shash *symtab)
>>>
>>>      expr_symtab_add_ovn_field(symtab, "icmp4.frag_mtu", OVN_ICMP4_FRAG_MTU);
>>>      expr_symtab_add_ovn_field(symtab, "icmp6.frag_mtu", OVN_ICMP6_FRAG_MTU);
>>> +    expr_symtab_add_ovn_field(symtab, "ip.src", OVN_IP_SRC);
>>> +    expr_symtab_add_ovn_field(symtab, "ip.dst", OVN_IP_DST);
>>>  }
>>>
>>>  const char *
>>> diff --git a/ovn-sb.xml b/ovn-sb.xml
>>> index 2fc84f54ee..73ee543bfe 100644
>>> --- a/ovn-sb.xml
>>> +++ b/ovn-sb.xml
>>> @@ -1258,6 +1258,43 @@
>>>              except that the two values are exchanged instead of copied.  Both
>>>              <var>field1</var> and <var>field2</var> must modifiable.
>>>            </p>
>>> +
>>> +          <p>
>>> +            <code>OVN</code> supports exchange of below OVN logical fields.
>>> +          </p>
>>> +
>>> +          <ul>
>>> +            <li>
>>> +              <code>ip.src</code>
>>> +              <code>ip.dst</code>
>>> +              <p>
>>> +                These fields can be used to refer to IP source and destination
>>> +                fields which are IP version independent. These fields can be
>>> +                used only for exchange of values.
>>> +              </p>
>>> +            </li>
>>> +
>>> +            <li>
>>> +              <p>
>>> +                Below are few examples:
>>> +              </p>
>>> +
>>> +              <p>
>>> +                match = "ip &amp;&amp; tcp"
>>> +                action = "ip.src &lt;-&gt; ip.dst; next;"
>>> +              </p>
>>> +
>>> +              <p>
>>> +                match = "ip4 || ip6"
>>> +                action = reject { ip.src &lt;-&gt; ip.dst; } next;"
>>> +              </p>
>>> +
>>> +              <p>
>>> +                match = "ip &amp;&amp; (tcp || udp)"
>>> +                action = "ip.src &lt;-&gt; ip.dst; next;"
>>> +              </p>
>>> +            </li>
>>> +          </ul>
>>>          </dd>
>>>
>>>          <dt><code>ip.ttl--;</code></dt>
>>> diff --git a/tests/ovn.at b/tests/ovn.at
>>> index 71837a6721..c40d69c5e9 100644
>>> --- a/tests/ovn.at
>>> +++ b/tests/ovn.at
>>> @@ -1605,6 +1605,33 @@ reject { };
>>>      formats as reject { drop; };
>>>      encodes as controller(userdata=00.00.00.16.00.00.00.00)
>>>
>>> +ip.src <-> ip.dst;
>>> +    encodes as controller(userdata=00.00.00.17.00.00.00.00,pause)
>>> +
>>> +ip.src <-> ip.dst
>>> +    Syntax error at end of input expecting `;'.
>>> +
>>> +ip.src = 10.0.0.10;
>>> +    Can't load a value to ovn field (ip.src).
>>> +
>>> +ip.dst = aef0::4;
>>> +    Can't load a value to ovn field (ip.dst).
>>> +
>>> +ip.src <-> ip4.dst;
>>> +    Can't exchange ovn field with non ovn field (ip.src <-> ip4.dst).
>>> +
>>> +ip6.src <-> ip.dst;
>>> +    Can't exchange ovn field with non ovn field (ip6.src <-> ip.dst).
>>> +
>>> +ip.src <-> icmp4.frag_mtu;
>>> +    Can't exchange ovn field (ip.src) with ovn field (icmp4.frag_mtu).
>>> +
>>> +icmp6.frag_mtu <-> ip.dst;
>>> +    Can't exchange ovn field (icmp6.frag_mtu) with ovn field (ip.dst).
>>> +
>>> +ip.src <-> ip.src;
>>> +    Can't exchange ovn field (ip.src) with ovn field (ip.src).
>>> +
>>>  # trigger_event
>>>  trigger_event(event = "empty_lb_backends", vip = "10.0.0.1:80", protocol = "tcp", load_balancer = "12345678-abcd-9876-fedc-11119f8e7d6c");
>>>      encodes as controller(userdata=00.00.00.0f.00.00.00.00.00.00.00.00.00.01.00.0b.31.30.2e.30.2e.30.2e.31.3a.38.30.00.02.00.03.74.63.70.00.03.00.24.31.32.33.34.35.36.37.38.2d.61.62.63.64.2d.39.38.37.36.2d.66.65.64.63.2d.31.31.31.31.39.66.38.65.37.64.36.63)
>>> diff --git a/utilities/ovn-trace.c b/utilities/ovn-trace.c
>>> index 38aee6081b..ed7d8bf278 100644
>>> --- a/utilities/ovn-trace.c
>>> +++ b/utilities/ovn-trace.c
>>> @@ -2147,12 +2147,35 @@ execute_ovnfield_load(const struct ovnact_load *load,
>>>                               ntohs(load->imm.value.be16_int));
>>>          break;
>>>      }
>>> +
>>> +    case OVN_IP_SRC:
>>> +    case OVN_IP_DST:
>>>      case OVN_FIELD_N_IDS:
>>>      default:
>>>          OVS_NOT_REACHED();
>>>      }
>>>  }
>>>
>>> +static void
>>> +execute_ovnfield_exchange(const struct ovnact_move *move OVS_UNUSED,
>>> +                          struct flow *uflow,
>>> +                          struct ovs_list *super)
>>> +{
>>> +    /* Right now we support exchanging of ip.src with ip.dst. */
>>> +    ovntrace_node_append(super, OVNTRACE_NODE_MODIFY,
>>> +                         "ip.src <-> ip.dst");
>>> +
>>> +    if (get_dl_type(uflow) == htons(ETH_TYPE_IP)) {
>>> +        ovs_be32 tmp = uflow->nw_dst;
>>> +        uflow->nw_dst = uflow->nw_src;
>>> +        uflow->nw_src = tmp;
>>> +    } else {
>>> +        struct in6_addr tmp = uflow->ipv6_dst;
>>> +        uflow->ipv6_dst = uflow->ipv6_src;
>>> +        uflow->ipv6_src = tmp;
>>> +    }
>>> +}
>>> +
>>>  static void
>>>  trace_actions(const struct ovnact *ovnacts, size_t ovnacts_len,
>>>                const struct ovntrace_datapath *dp, struct flow *uflow,
>>> @@ -2369,6 +2392,11 @@ trace_actions(const struct ovnact *ovnacts, size_t ovnacts_len,
>>>                             pipeline, super);
>>>              break;
>>>
>>> +        case OVNACT_OVNFIELD_EXCHANGE:
>>> +            execute_ovnfield_exchange(ovnact_get_OVNFIELD_EXCHANGE(a),
>>> +                                      uflow, super);
>>> +            break;
>>> +
>>>          case OVNACT_TRIGGER_EVENT:
>>>              break;
>>>
>>>
>>
>> _______________________________________________
>> dev mailing list
>> dev@openvswitch.org
>> https://mail.openvswitch.org/mailman/listinfo/ovs-dev
>>
>
Numan Siddique Oct. 14, 2020, 11:54 a.m. UTC | #4
On Wed, Oct 14, 2020 at 4:40 PM Dumitru Ceara <dceara@redhat.com> wrote:
>
> On 10/14/20 12:50 PM, Numan Siddique wrote:
> > On Wed, Oct 14, 2020 at 3:50 PM Dumitru Ceara <dceara@redhat.com> wrote:
> >>
> >> On 10/14/20 11:15 AM, numans@ovn.org wrote:
> >>> From: Numan Siddique <numans@ovn.org>
> >>>
> >>> Thee new fields are version independent and these can be used in any
> >>
> >> Hi Numan,
> >>
> >> Nit: s/Thee/These
> >>
> >>> OVN action. Right now the usage of these fields are restricted to
> >>> exchanging the IP source and destination fields.
> >>>
> >>> Eg. reject {... ip.src <-> ip.dst ... };
> >>>
> >>> "ip.src <-> ip.dst" translates to controller action with "pause" flag set.
> >>> When pinctrl thread receives the packet in, it checks the IP version of the
> >>> packet and either does - "ip4.src <-> ip4.dst" or "ip6.src <-> ip6.dst" and
> >>> resumes the packet to continue with the pipeline.
> >>>
> >>> Upcoming patch will make use of these fields.
> >>>
> >>> Signed-off-by: Numan Siddique <numans@ovn.org>
> >>> ---
> >>>  controller/pinctrl.c         |  49 +++++++++++++++
> >>>  include/ovn/actions.h        |   4 ++
> >>>  include/ovn/logical-fields.h |  18 ++++++
> >>>  lib/actions.c                | 118 +++++++++++++++++++++++++++++++++++
> >>>  lib/logical-fields.c         |  10 +++
> >>>  ovn-sb.xml                   |  37 +++++++++++
> >>>  tests/ovn.at                 |  27 ++++++++
> >>>  utilities/ovn-trace.c        |  28 +++++++++
> >>>  8 files changed, 291 insertions(+)
> >>>
> >>> diff --git a/controller/pinctrl.c b/controller/pinctrl.c
> >>> index 631d058458..bc16a82404 100644
> >>> --- a/controller/pinctrl.c
> >>> +++ b/controller/pinctrl.c
> >>> @@ -233,6 +233,11 @@ static void pinctrl_handle_put_icmp_frag_mtu(struct rconn *swconn,
> >>>                                               struct ofputil_packet_in *pin,
> >>>                                               struct ofpbuf *userdata,
> >>>                                               struct ofpbuf *continuation);
> >>> +static void pinctrl_handle_swap_src_dst_ip(struct rconn *swconn,
> >>> +                                           const struct flow *in_flow,
> >>> +                                           struct dp_packet *pkt_in,
> >>> +                                           struct ofputil_packet_in *pin,
> >>> +                                           struct ofpbuf *continuation);
> >>>  static void
> >>>  pinctrl_handle_event(struct ofpbuf *userdata)
> >>>      OVS_REQUIRES(pinctrl_mutex);
> >>> @@ -2835,6 +2840,11 @@ process_packet_in(struct rconn *swconn, const struct ofp_header *msg)
> >>>          ovs_mutex_unlock(&pinctrl_mutex);
> >>>          break;
> >>>
> >>> +    case ACTION_OPCODE_SWAP_SRC_DST_IP:
> >>> +        pinctrl_handle_swap_src_dst_ip(swconn, &headers, &packet, &pin,
> >>> +                                       &continuation);
> >>> +        break;
> >>> +
> >>>      default:
> >>>          VLOG_WARN_RL(&rl, "unrecognized packet-in opcode %"PRIu32,
> >>>                       ntohl(ah->opcode));
> >>> @@ -6508,3 +6518,42 @@ pinctrl_handle_svc_check(struct rconn *swconn, const struct flow *ip_flow,
> >>>          svc_mon->next_send_time = time_msec() + svc_mon->interval;
> >>>      }
> >>>  }
> >>> +
> >>> +static void
> >>> +pinctrl_handle_swap_src_dst_ip(struct rconn *swconn,
> >>> +                               const struct flow *in_flow,
> >>> +                               struct dp_packet *pkt_in,
> >>> +                               struct ofputil_packet_in *pin,
> >>> +                               struct ofpbuf *continuation)
> >>> +{
> >>> +    enum ofp_version version = rconn_get_version(swconn);
> >>> +    enum ofputil_protocol proto = ofputil_protocol_from_ofp_version(version);
> >>> +    struct dp_packet *pkt_out;
> >>> +
> >>> +    pkt_out = dp_packet_clone(pkt_in);
> >>> +    pkt_out->l2_5_ofs = pkt_in->l2_5_ofs;
> >>> +    pkt_out->l2_pad_size = pkt_in->l2_pad_size;
> >>> +    pkt_out->l3_ofs = pkt_in->l3_ofs;
> >>> +    pkt_out->l4_ofs = pkt_in->l4_ofs;
> >>> +
> >>> +    if (get_dl_type(in_flow) == htons(ETH_TYPE_IP)) {
> >>> +        /* IPv4 packet. Swap nw_src with nw_dst. */
> >>> +        packet_set_ipv4(pkt_out, in_flow->nw_dst, in_flow->nw_src,
> >>> +                        in_flow->nw_tos, in_flow->nw_ttl);
> >>> +    } else {
> >>> +        /* IPv6 packet. Swap ip6_src with ip6_dst.
> >>> +         * We could also use packet_set_ipv6() here, but that would require
> >>> +         * to extract the 'tc' and 'label' from in_nh->ip6_flow which seems
> >>> +         * unnecessary. */
> >>> +        struct ovs_16aligned_ip6_hdr *out_nh = dp_packet_l3(pkt_out);
> >>> +        union ovs_16aligned_in6_addr tmp = out_nh->ip6_src;
> >>> +        out_nh->ip6_src = out_nh->ip6_dst;
> >>> +        out_nh->ip6_dst = tmp;
> >>> +    }
> >>> +
> >>> +    pin->packet = dp_packet_data(pkt_out);
> >>> +    pin->packet_len = dp_packet_size(pkt_out);
> >>> +
> >>> +    queue_msg(swconn, ofputil_encode_resume(pin, continuation, proto));
> >>> +    dp_packet_delete(pkt_out);
> >>> +}
> >>> diff --git a/include/ovn/actions.h b/include/ovn/actions.h
> >>> index b4e5acabb9..bf1fe848b7 100644
> >>> --- a/include/ovn/actions.h
> >>> +++ b/include/ovn/actions.h
> >>> @@ -97,6 +97,7 @@ struct ovn_extend_table;
> >>>      OVNACT(DHCP6_REPLY,       ovnact_null)            \
> >>>      OVNACT(ICMP6_ERROR,       ovnact_nest)            \
> >>>      OVNACT(REJECT,            ovnact_nest)            \
> >>> +    OVNACT(OVNFIELD_EXCHANGE, ovnact_move)            \
> >>>
> >>>  /* enum ovnact_type, with a member OVNACT_<ENUM> for each action. */
> >>>  enum OVS_PACKED_ENUM ovnact_type {
> >>> @@ -612,6 +613,9 @@ enum action_opcode {
> >>>       * The actions, in OpenFlow 1.3 format, follow the action_header.
> >>>       */
> >>>      ACTION_OPCODE_REJECT,
> >>> +
> >>> +    /* ip.src <-> ip.dst */
> >>> +    ACTION_OPCODE_SWAP_SRC_DST_IP,
> >>>  };
> >>>
> >>>  /* Header. */
> >>> diff --git a/include/ovn/logical-fields.h b/include/ovn/logical-fields.h
> >>> index ac6f2f909b..bb6daa50ca 100644
> >>> --- a/include/ovn/logical-fields.h
> >>> +++ b/include/ovn/logical-fields.h
> >>> @@ -116,6 +116,24 @@ enum ovn_field_id {
> >>>       */
> >>>      OVN_ICMP6_FRAG_MTU,
> >>>
> >>> +    /*
> >>> +     * Name: "ip.src"
> >>> +     * Type: be128
> >>> +     * Description: Sets the field MFF_IPV4_SRC if eth.type == 0x800 (IPv4)
> >>> +     *              or sets the field MFF_IPV6_SRC if
> >>> +     *              eth.type == 0x86dd (IPV6).
> >>> +     */
> >>> +    OVN_IP_SRC,
> >>> +
> >>> +    /*
> >>> +     * Name: "ip.dst"
> >>> +     * Type: be128
> >>> +     * Description: Sets the field MFF_IPV4_DST if eth.type == 0x800 (IPv4)
> >>> +     *              or sets the field MFF_IPV6_DST if
> >>> +     *              eth.type == 0x86dd (IPV6).
> >>> +     */
> >>> +    OVN_IP_DST,
> >>> +
> >>>      OVN_FIELD_N_IDS
> >>>  };
> >>>
> >>> diff --git a/lib/actions.c b/lib/actions.c
> >>> index 1e1bdeff24..e9c77d2a0a 100644
> >>> --- a/lib/actions.c
> >>> +++ b/lib/actions.c
> >>> @@ -371,12 +371,35 @@ ovnact_next_free(struct ovnact_next *a OVS_UNUSED)
> >>>  {
> >>>  }
> >>>
> >>> +static bool
> >>> +check_ovnfield_load(struct action_context *ctx, const struct expr_field *field)
> >>> +{
> >>> +    switch (field->symbol->ovn_field->id) {
> >>> +    case OVN_ICMP4_FRAG_MTU:
> >>> +    case OVN_ICMP6_FRAG_MTU:
> >>> +        return true;
> >>> +
> >>> +    case OVN_IP_SRC:
> >>> +    case OVN_IP_DST:
> >>> +        lexer_error(ctx->lexer, "Can't load a value to ovn field (%s).",
> >>> +                    field->symbol->name);
> >>> +        return false;
> >>> +
> >>> +    case OVN_FIELD_N_IDS:
> >>> +    default:
> >>> +        OVS_NOT_REACHED();
> >>> +    }
> >>> +}
> >>> +
> >>>  static void
> >>>  parse_LOAD(struct action_context *ctx, const struct expr_field *lhs)
> >>>  {
> >>>      size_t ofs = ctx->ovnacts->size;
> >>>      struct ovnact_load *load;
> >>>      if (lhs->symbol->ovn_field) {
> >>> +        if (!check_ovnfield_load(ctx, lhs)) {
> >>> +            return;
> >>> +        }
> >>>          load = ovnact_put_OVNFIELD_LOAD(ctx->ovnacts);
> >>>      } else {
> >>>          load = ovnact_put_LOAD(ctx->ovnacts);
> >>> @@ -474,6 +497,46 @@ format_EXCHANGE(const struct ovnact_move *move, struct ds *s)
> >>>      format_assignment(move, "<->", s);
> >>>  }
> >>>
> >>> +static void
> >>> +parse_ovnfield_exchange(struct action_context *ctx,
> >>> +                        const struct expr_field *lhs,
> >>> +                        const struct expr_field *rhs)
> >>> +{
> >>> +    if (!lhs->symbol->ovn_field || !rhs->symbol->ovn_field) {
> >>> +        lexer_error(ctx->lexer,
> >>> +                    "Can't exchange ovn field with non ovn field (%s <-> %s).",
> >>> +                    lhs->symbol->name, rhs->symbol->name);
> >>> +        return;
> >>> +    }
> >>> +
> >>> +    if (lhs->symbol->ovn_field->id != OVN_IP_SRC &&
> >>> +            lhs->symbol->ovn_field->id != OVN_IP_DST) {
> >>> +        lexer_error(ctx->lexer,
> >>> +                    "Can't exchange ovn field (%s) with ovn field (%s).",
> >>> +                    lhs->symbol->name, rhs->symbol->name);
> >>> +        return;
> >>> +    }
> >>> +
> >>> +    if (rhs->symbol->ovn_field->id != OVN_IP_SRC &&
> >>> +            rhs->symbol->ovn_field->id != OVN_IP_DST) {
> >>> +        lexer_error(ctx->lexer,
> >>> +                    "Can't exchange ovn field (%s) with ovn field (%s).",
> >>> +                    lhs->symbol->name, rhs->symbol->name);
> >>> +        return;
> >>> +    }
> >>> +
> >>> +    if (lhs->symbol->ovn_field->id == rhs->symbol->ovn_field->id) {
> >>> +        lexer_error(ctx->lexer,
> >>> +                    "Can't exchange ovn field (%s) with ovn field (%s).",
> >>> +                    lhs->symbol->name, rhs->symbol->name);
> >>> +        return;
> >>> +    }
> >>> +
> >>> +    struct ovnact_move *move = ovnact_put_OVNFIELD_EXCHANGE(ctx->ovnacts);
> >>> +    move->lhs = *lhs;
> >>> +    move->rhs = *rhs;
> >>> +}
> >>> +
> >>>  static void
> >>>  parse_assignment_action(struct action_context *ctx, bool exchange,
> >>>                          const struct expr_field *lhs)
> >>> @@ -483,6 +546,11 @@ parse_assignment_action(struct action_context *ctx, bool exchange,
> >>>          return;
> >>>      }
> >>>
> >>> +    if (exchange && (lhs->symbol->ovn_field || rhs.symbol->ovn_field)) {
> >>> +        parse_ovnfield_exchange(ctx, lhs, &rhs);
> >>> +        return;
> >>> +    }
> >>> +
> >>>      const struct expr_symbol *ls = lhs->symbol;
> >>>      const struct expr_symbol *rs = rhs.symbol;
> >>>      if ((ls->width != 0) != (rs->width != 0)) {
> >>> @@ -3128,6 +3196,8 @@ format_OVNFIELD_LOAD(const struct ovnact_load *load , struct ds *s)
> >>>                        ntohs(load->imm.value.be16_int));
> >>>          break;
> >>>
> >>> +    case OVN_IP_SRC:
> >>> +    case OVN_IP_DST:
> >>>      case OVN_FIELD_N_IDS:
> >>>      default:
> >>>          OVS_NOT_REACHED();
> >>> @@ -3157,6 +3227,9 @@ encode_OVNFIELD_LOAD(const struct ovnact_load *load,
> >>>          encode_finish_controller_op(oc_offset, ofpacts);
> >>>          break;
> >>>      }
> >>> +
> >>> +    case OVN_IP_SRC:
> >>> +    case OVN_IP_DST:
> >>>      case OVN_FIELD_N_IDS:
> >>>      default:
> >>>          OVS_NOT_REACHED();
> >>> @@ -3451,6 +3524,51 @@ ovnact_fwd_group_free(struct ovnact_fwd_group *fwd_group)
> >>>      free(fwd_group->child_ports);
> >>>  }
> >>>
> >>> +static void
> >>> +format_OVNFIELD_EXCHANGE(const struct ovnact_move *move , struct ds *s)
> >>> +{
> >>> +    const struct ovn_field *lhs = ovn_field_from_name(move->lhs.symbol->name);
> >>> +    const struct ovn_field *rhs = ovn_field_from_name(move->rhs.symbol->name);
> >>> +    switch (lhs->id) {
> >>> +    case OVN_IP_SRC:
> >>> +    case OVN_IP_DST:
> >>> +        break;
> >>> +
> >>> +    case OVN_ICMP4_FRAG_MTU:
> >>> +    case OVN_ICMP6_FRAG_MTU:
> >>> +    case OVN_FIELD_N_IDS:
> >>> +    default:
> >>> +        OVS_NOT_REACHED();
> >>> +    }
> >>
> >> Nit: I would use the shorter:
> >> ovs_asssert(lhs->id == OVN_IP_SRC || lhs->id == OVN_IP_DST);
> >
> > You mean to drop the switch right ? If so, Ack.
> >
>
> Right, I meant ovs_assert instead of switch.
>
> >
> >>
> >>> +
> >>> +    switch (rhs->id) {
> >>> +    case OVN_IP_SRC:
> >>> +    case OVN_IP_DST:
> >>> +        break;
> >>> +
> >>> +    case OVN_ICMP4_FRAG_MTU:
> >>> +    case OVN_ICMP6_FRAG_MTU:
> >>> +    case OVN_FIELD_N_IDS:
> >>> +    default:
> >>> +        OVS_NOT_REACHED();
> >>> +    }
> >>
> >> Nit: I would use the shorter:
> >> ovs_asssert(rhs->id == OVN_IP_SRC || rhs->id == OVN_IP_DST);
> >>
> >>> +
> >>> +    ds_put_format(s, "%s <-> %s;", lhs->name, rhs->name);
> >>> +}
> >>> +
> >>> +static void
> >>> +encode_OVNFIELD_EXCHANGE(const struct ovnact_move *move OVS_UNUSED,
> >>> +                         const struct ovnact_encode_params *ep OVS_UNUSED,
> >>> +                         struct ofpbuf *ofpacts OVS_UNUSED)
> >>> +{
> >>> +    /* Right now we only support exchanging the IPs in
> >>> +     * OVN fields (ip.src <-> ip.dst). */
> >>> +    size_t oc_offset =
> >>> +        encode_start_controller_op(ACTION_OPCODE_SWAP_SRC_DST_IP,
> >>> +                                   true, NX_CTLR_NO_METER, ofpacts);
> >>> +    encode_finish_controller_op(oc_offset, ofpacts);
> >>
> >> I think this means that all packets matching a flow with action "reject { ...
> >> ip.src <-> ip.dst ... }" will generate two packet-ins, right? One for the
> >> reject action, one for the ovnfield-exchange action.
> >>
> >> Is that a concern wrt. performance?
> >
> > Yes. It would have 2 packet-ins. Just like how we handle - icmp4 {...
> > icmp4.frag_mtu = 1500; ...};
> >
> > If performance is a concern, we could just drop ... ip.src <-> ip.dst
> > in reject and let reject action
> > swap the ip src with ip destination. I thought about that and my take
> > is that since reject { } results in
> > TCP RST or ICMP unreachable packet, it should be fine. I see these
> > packets as more of control packets.
> >
> > I think it should be ok. Let me know what you think.
> >
>
> I think I like the approach of "reject" action implicitly swapping IPs best.
> In the end it's not like we could implement reject withough swapping IP dst
> with IP src so why not do it as part of the reject action.
>

Agree. reject action is expected to be used to send the generated tcp
rst/icmp unreachable
packet back to the sender.

I'm ok to change the reject action to just swap the IPs internally if
everyone is fine.

@Mark Michelson  @Han Zhou  Do you have any comments here ?

Option 1 - reject { ...}    -> The reject action handling in pinctrl.c
will swap the ip source and destination

Option 2: reject {... ip.src <-> ip.dst ...} which is the proposed
approach in this patch series. This results in
2 packet-ins.

I personally prefer (2) since in OVN we normally tell what inner
actions to apply for many OVN actions.
But I have no strong preference and I'm fine changing to (1).

If we chose option (1), we could add a comment inside the action like
- reject { /* ip.src <-> ip.dst is done implicitly*/,  eth.src <->
eth.dst; output ; }


> But on the other hand, I think there might be other simpler ways to generate a
> lot of packet-ins towards ovn-controller that I don't have a strong preference
> about either solution.  I just wanted to make sure we discuss the performance
> implications.

In my opinion there should not be any performance implications.


>
> >
> >>
> >>> +}
> >>> +
> >>>  /* Parses an assignment or exchange or put_dhcp_opts action. */
> >>>  static void
> >>>  parse_set_action(struct action_context *ctx)
> >>> diff --git a/lib/logical-fields.c b/lib/logical-fields.c
> >>> index bf61df7719..d6f1981f52 100644
> >>> --- a/lib/logical-fields.c
> >>> +++ b/lib/logical-fields.c
> >>> @@ -33,6 +33,14 @@ const struct ovn_field ovn_fields[OVN_FIELD_N_IDS] = {
> >>>          OVN_ICMP6_FRAG_MTU,
> >>>          "icmp6.frag_mtu",
> >>>          4, 32,
> >>> +    }, {
> >>> +        OVN_IP_SRC,
> >>> +        "ip.src",
> >>> +        16, 128,
> >>> +    }, {
> >>> +        OVN_IP_DST,
> >>> +        "ip.dst",
> >>> +        16, 128,
> >>
> >> Just partially related to your change, I'm a bit confused by the meaning of
> >> n_bytes and n_bits in "struct ovn_field".  It seems to me that we never use
> >> those values.  Is that the case or am I missing something?
> >
> > These OVN fields are defined more like the way - mf_fields are defined.
> > Like icmp4.frag_mtu is 2 bytes and 16 bits field.
> >
> > So ip.src/ip.dst can be used for both ipv4 and ipv6, I have set the
> > size of ipv6.
> >
> > Since we don't support ip.src = 10.0.0.4 or ip.src = aef00::4 for
> > example, these fields
> > are not used. Later on if there is a need to support assigning a value
> > to these fields, these fields
> > could be used. Although I don't see a need to support it since we
> > already have ip4.src/ip

> >
>
> Ok, my question was more about the fact that I don't see the ovn_field.n_bytes
> or ovn_fields.n_bits being used anywhere in general (not referring to
> ip.src/ip.dst only).
>

Actually they are being used. See
https://github.com/ovn-org/ovn/blob/master/lib/actions.c#L394
and https://github.com/ovn-org/ovn/blob/master/lib/expr.c#L569

If you add the below in the ovn -- action parsing test case in ovn.at
---
icmp4.frag_mtu = 65536;
---

You'll see the below error:
17-bit constant is not compatible with 16-bit field icmp4.frag_mtu.

Thanks
Numan


> Thanks,
> Dumitru
>
> >
> > Thanks for the review.
> >
> > Numan
> >
> >>
> >
> >
> >> Thanks,
> >> Dumitru
> >>
> >>>      },
> >>>  };
> >>>
> >>> @@ -271,6 +279,8 @@ ovn_init_symtab(struct shash *symtab)
> >>>
> >>>      expr_symtab_add_ovn_field(symtab, "icmp4.frag_mtu", OVN_ICMP4_FRAG_MTU);
> >>>      expr_symtab_add_ovn_field(symtab, "icmp6.frag_mtu", OVN_ICMP6_FRAG_MTU);
> >>> +    expr_symtab_add_ovn_field(symtab, "ip.src", OVN_IP_SRC);
> >>> +    expr_symtab_add_ovn_field(symtab, "ip.dst", OVN_IP_DST);
> >>>  }
> >>>
> >>>  const char *
> >>> diff --git a/ovn-sb.xml b/ovn-sb.xml
> >>> index 2fc84f54ee..73ee543bfe 100644
> >>> --- a/ovn-sb.xml
> >>> +++ b/ovn-sb.xml
> >>> @@ -1258,6 +1258,43 @@
> >>>              except that the two values are exchanged instead of copied.  Both
> >>>              <var>field1</var> and <var>field2</var> must modifiable.
> >>>            </p>
> >>> +
> >>> +          <p>
> >>> +            <code>OVN</code> supports exchange of below OVN logical fields.
> >>> +          </p>
> >>> +
> >>> +          <ul>
> >>> +            <li>
> >>> +              <code>ip.src</code>
> >>> +              <code>ip.dst</code>
> >>> +              <p>
> >>> +                These fields can be used to refer to IP source and destination
> >>> +                fields which are IP version independent. These fields can be
> >>> +                used only for exchange of values.
> >>> +              </p>
> >>> +            </li>
> >>> +
> >>> +            <li>
> >>> +              <p>
> >>> +                Below are few examples:
> >>> +              </p>
> >>> +
> >>> +              <p>
> >>> +                match = "ip &amp;&amp; tcp"
> >>> +                action = "ip.src &lt;-&gt; ip.dst; next;"
> >>> +              </p>
> >>> +
> >>> +              <p>
> >>> +                match = "ip4 || ip6"
> >>> +                action = reject { ip.src &lt;-&gt; ip.dst; } next;"
> >>> +              </p>
> >>> +
> >>> +              <p>
> >>> +                match = "ip &amp;&amp; (tcp || udp)"
> >>> +                action = "ip.src &lt;-&gt; ip.dst; next;"
> >>> +              </p>
> >>> +            </li>
> >>> +          </ul>
> >>>          </dd>
> >>>
> >>>          <dt><code>ip.ttl--;</code></dt>
> >>> diff --git a/tests/ovn.at b/tests/ovn.at
> >>> index 71837a6721..c40d69c5e9 100644
> >>> --- a/tests/ovn.at
> >>> +++ b/tests/ovn.at
> >>> @@ -1605,6 +1605,33 @@ reject { };
> >>>      formats as reject { drop; };
> >>>      encodes as controller(userdata=00.00.00.16.00.00.00.00)
> >>>
> >>> +ip.src <-> ip.dst;
> >>> +    encodes as controller(userdata=00.00.00.17.00.00.00.00,pause)
> >>> +
> >>> +ip.src <-> ip.dst
> >>> +    Syntax error at end of input expecting `;'.
> >>> +
> >>> +ip.src = 10.0.0.10;
> >>> +    Can't load a value to ovn field (ip.src).
> >>> +
> >>> +ip.dst = aef0::4;
> >>> +    Can't load a value to ovn field (ip.dst).
> >>> +
> >>> +ip.src <-> ip4.dst;
> >>> +    Can't exchange ovn field with non ovn field (ip.src <-> ip4.dst).
> >>> +
> >>> +ip6.src <-> ip.dst;
> >>> +    Can't exchange ovn field with non ovn field (ip6.src <-> ip.dst).
> >>> +
> >>> +ip.src <-> icmp4.frag_mtu;
> >>> +    Can't exchange ovn field (ip.src) with ovn field (icmp4.frag_mtu).
> >>> +
> >>> +icmp6.frag_mtu <-> ip.dst;
> >>> +    Can't exchange ovn field (icmp6.frag_mtu) with ovn field (ip.dst).
> >>> +
> >>> +ip.src <-> ip.src;
> >>> +    Can't exchange ovn field (ip.src) with ovn field (ip.src).
> >>> +
> >>>  # trigger_event
> >>>  trigger_event(event = "empty_lb_backends", vip = "10.0.0.1:80", protocol = "tcp", load_balancer = "12345678-abcd-9876-fedc-11119f8e7d6c");
> >>>      encodes as controller(userdata=00.00.00.0f.00.00.00.00.00.00.00.00.00.01.00.0b.31.30.2e.30.2e.30.2e.31.3a.38.30.00.02.00.03.74.63.70.00.03.00.24.31.32.33.34.35.36.37.38.2d.61.62.63.64.2d.39.38.37.36.2d.66.65.64.63.2d.31.31.31.31.39.66.38.65.37.64.36.63)
> >>> diff --git a/utilities/ovn-trace.c b/utilities/ovn-trace.c
> >>> index 38aee6081b..ed7d8bf278 100644
> >>> --- a/utilities/ovn-trace.c
> >>> +++ b/utilities/ovn-trace.c
> >>> @@ -2147,12 +2147,35 @@ execute_ovnfield_load(const struct ovnact_load *load,
> >>>                               ntohs(load->imm.value.be16_int));
> >>>          break;
> >>>      }
> >>> +
> >>> +    case OVN_IP_SRC:
> >>> +    case OVN_IP_DST:
> >>>      case OVN_FIELD_N_IDS:
> >>>      default:
> >>>          OVS_NOT_REACHED();
> >>>      }
> >>>  }
> >>>
> >>> +static void
> >>> +execute_ovnfield_exchange(const struct ovnact_move *move OVS_UNUSED,
> >>> +                          struct flow *uflow,
> >>> +                          struct ovs_list *super)
> >>> +{
> >>> +    /* Right now we support exchanging of ip.src with ip.dst. */
> >>> +    ovntrace_node_append(super, OVNTRACE_NODE_MODIFY,
> >>> +                         "ip.src <-> ip.dst");
> >>> +
> >>> +    if (get_dl_type(uflow) == htons(ETH_TYPE_IP)) {
> >>> +        ovs_be32 tmp = uflow->nw_dst;
> >>> +        uflow->nw_dst = uflow->nw_src;
> >>> +        uflow->nw_src = tmp;
> >>> +    } else {
> >>> +        struct in6_addr tmp = uflow->ipv6_dst;
> >>> +        uflow->ipv6_dst = uflow->ipv6_src;
> >>> +        uflow->ipv6_src = tmp;
> >>> +    }
> >>> +}
> >>> +
> >>>  static void
> >>>  trace_actions(const struct ovnact *ovnacts, size_t ovnacts_len,
> >>>                const struct ovntrace_datapath *dp, struct flow *uflow,
> >>> @@ -2369,6 +2392,11 @@ trace_actions(const struct ovnact *ovnacts, size_t ovnacts_len,
> >>>                             pipeline, super);
> >>>              break;
> >>>
> >>> +        case OVNACT_OVNFIELD_EXCHANGE:
> >>> +            execute_ovnfield_exchange(ovnact_get_OVNFIELD_EXCHANGE(a),
> >>> +                                      uflow, super);
> >>> +            break;
> >>> +
> >>>          case OVNACT_TRIGGER_EVENT:
> >>>              break;
> >>>
> >>>
> >>
> >> _______________________________________________
> >> dev mailing list
> >> dev@openvswitch.org
> >> https://mail.openvswitch.org/mailman/listinfo/ovs-dev
> >>
> >
>
> _______________________________________________
> dev mailing list
> dev@openvswitch.org
> https://mail.openvswitch.org/mailman/listinfo/ovs-dev
>
Dumitru Ceara Oct. 14, 2020, 12:03 p.m. UTC | #5
On 10/14/20 1:54 PM, Numan Siddique wrote:
> On Wed, Oct 14, 2020 at 4:40 PM Dumitru Ceara <dceara@redhat.com> wrote:
>>
>> On 10/14/20 12:50 PM, Numan Siddique wrote:
>>> On Wed, Oct 14, 2020 at 3:50 PM Dumitru Ceara <dceara@redhat.com> wrote:
>>>>
>>>> On 10/14/20 11:15 AM, numans@ovn.org wrote:
>>>>> From: Numan Siddique <numans@ovn.org>
>>>>>
>>>>> Thee new fields are version independent and these can be used in any
>>>>
>>>> Hi Numan,
>>>>
>>>> Nit: s/Thee/These
>>>>
>>>>> OVN action. Right now the usage of these fields are restricted to
>>>>> exchanging the IP source and destination fields.
>>>>>
>>>>> Eg. reject {... ip.src <-> ip.dst ... };
>>>>>
>>>>> "ip.src <-> ip.dst" translates to controller action with "pause" flag set.
>>>>> When pinctrl thread receives the packet in, it checks the IP version of the
>>>>> packet and either does - "ip4.src <-> ip4.dst" or "ip6.src <-> ip6.dst" and
>>>>> resumes the packet to continue with the pipeline.
>>>>>
>>>>> Upcoming patch will make use of these fields.
>>>>>
>>>>> Signed-off-by: Numan Siddique <numans@ovn.org>
>>>>> ---
>>>>>  controller/pinctrl.c         |  49 +++++++++++++++
>>>>>  include/ovn/actions.h        |   4 ++
>>>>>  include/ovn/logical-fields.h |  18 ++++++
>>>>>  lib/actions.c                | 118 +++++++++++++++++++++++++++++++++++
>>>>>  lib/logical-fields.c         |  10 +++
>>>>>  ovn-sb.xml                   |  37 +++++++++++
>>>>>  tests/ovn.at                 |  27 ++++++++
>>>>>  utilities/ovn-trace.c        |  28 +++++++++
>>>>>  8 files changed, 291 insertions(+)
>>>>>
>>>>> diff --git a/controller/pinctrl.c b/controller/pinctrl.c
>>>>> index 631d058458..bc16a82404 100644
>>>>> --- a/controller/pinctrl.c
>>>>> +++ b/controller/pinctrl.c
>>>>> @@ -233,6 +233,11 @@ static void pinctrl_handle_put_icmp_frag_mtu(struct rconn *swconn,
>>>>>                                               struct ofputil_packet_in *pin,
>>>>>                                               struct ofpbuf *userdata,
>>>>>                                               struct ofpbuf *continuation);
>>>>> +static void pinctrl_handle_swap_src_dst_ip(struct rconn *swconn,
>>>>> +                                           const struct flow *in_flow,
>>>>> +                                           struct dp_packet *pkt_in,
>>>>> +                                           struct ofputil_packet_in *pin,
>>>>> +                                           struct ofpbuf *continuation);
>>>>>  static void
>>>>>  pinctrl_handle_event(struct ofpbuf *userdata)
>>>>>      OVS_REQUIRES(pinctrl_mutex);
>>>>> @@ -2835,6 +2840,11 @@ process_packet_in(struct rconn *swconn, const struct ofp_header *msg)
>>>>>          ovs_mutex_unlock(&pinctrl_mutex);
>>>>>          break;
>>>>>
>>>>> +    case ACTION_OPCODE_SWAP_SRC_DST_IP:
>>>>> +        pinctrl_handle_swap_src_dst_ip(swconn, &headers, &packet, &pin,
>>>>> +                                       &continuation);
>>>>> +        break;
>>>>> +
>>>>>      default:
>>>>>          VLOG_WARN_RL(&rl, "unrecognized packet-in opcode %"PRIu32,
>>>>>                       ntohl(ah->opcode));
>>>>> @@ -6508,3 +6518,42 @@ pinctrl_handle_svc_check(struct rconn *swconn, const struct flow *ip_flow,
>>>>>          svc_mon->next_send_time = time_msec() + svc_mon->interval;
>>>>>      }
>>>>>  }
>>>>> +
>>>>> +static void
>>>>> +pinctrl_handle_swap_src_dst_ip(struct rconn *swconn,
>>>>> +                               const struct flow *in_flow,
>>>>> +                               struct dp_packet *pkt_in,
>>>>> +                               struct ofputil_packet_in *pin,
>>>>> +                               struct ofpbuf *continuation)
>>>>> +{
>>>>> +    enum ofp_version version = rconn_get_version(swconn);
>>>>> +    enum ofputil_protocol proto = ofputil_protocol_from_ofp_version(version);
>>>>> +    struct dp_packet *pkt_out;
>>>>> +
>>>>> +    pkt_out = dp_packet_clone(pkt_in);
>>>>> +    pkt_out->l2_5_ofs = pkt_in->l2_5_ofs;
>>>>> +    pkt_out->l2_pad_size = pkt_in->l2_pad_size;
>>>>> +    pkt_out->l3_ofs = pkt_in->l3_ofs;
>>>>> +    pkt_out->l4_ofs = pkt_in->l4_ofs;
>>>>> +
>>>>> +    if (get_dl_type(in_flow) == htons(ETH_TYPE_IP)) {
>>>>> +        /* IPv4 packet. Swap nw_src with nw_dst. */
>>>>> +        packet_set_ipv4(pkt_out, in_flow->nw_dst, in_flow->nw_src,
>>>>> +                        in_flow->nw_tos, in_flow->nw_ttl);
>>>>> +    } else {
>>>>> +        /* IPv6 packet. Swap ip6_src with ip6_dst.
>>>>> +         * We could also use packet_set_ipv6() here, but that would require
>>>>> +         * to extract the 'tc' and 'label' from in_nh->ip6_flow which seems
>>>>> +         * unnecessary. */
>>>>> +        struct ovs_16aligned_ip6_hdr *out_nh = dp_packet_l3(pkt_out);
>>>>> +        union ovs_16aligned_in6_addr tmp = out_nh->ip6_src;
>>>>> +        out_nh->ip6_src = out_nh->ip6_dst;
>>>>> +        out_nh->ip6_dst = tmp;
>>>>> +    }
>>>>> +
>>>>> +    pin->packet = dp_packet_data(pkt_out);
>>>>> +    pin->packet_len = dp_packet_size(pkt_out);
>>>>> +
>>>>> +    queue_msg(swconn, ofputil_encode_resume(pin, continuation, proto));
>>>>> +    dp_packet_delete(pkt_out);
>>>>> +}
>>>>> diff --git a/include/ovn/actions.h b/include/ovn/actions.h
>>>>> index b4e5acabb9..bf1fe848b7 100644
>>>>> --- a/include/ovn/actions.h
>>>>> +++ b/include/ovn/actions.h
>>>>> @@ -97,6 +97,7 @@ struct ovn_extend_table;
>>>>>      OVNACT(DHCP6_REPLY,       ovnact_null)            \
>>>>>      OVNACT(ICMP6_ERROR,       ovnact_nest)            \
>>>>>      OVNACT(REJECT,            ovnact_nest)            \
>>>>> +    OVNACT(OVNFIELD_EXCHANGE, ovnact_move)            \
>>>>>
>>>>>  /* enum ovnact_type, with a member OVNACT_<ENUM> for each action. */
>>>>>  enum OVS_PACKED_ENUM ovnact_type {
>>>>> @@ -612,6 +613,9 @@ enum action_opcode {
>>>>>       * The actions, in OpenFlow 1.3 format, follow the action_header.
>>>>>       */
>>>>>      ACTION_OPCODE_REJECT,
>>>>> +
>>>>> +    /* ip.src <-> ip.dst */
>>>>> +    ACTION_OPCODE_SWAP_SRC_DST_IP,
>>>>>  };
>>>>>
>>>>>  /* Header. */
>>>>> diff --git a/include/ovn/logical-fields.h b/include/ovn/logical-fields.h
>>>>> index ac6f2f909b..bb6daa50ca 100644
>>>>> --- a/include/ovn/logical-fields.h
>>>>> +++ b/include/ovn/logical-fields.h
>>>>> @@ -116,6 +116,24 @@ enum ovn_field_id {
>>>>>       */
>>>>>      OVN_ICMP6_FRAG_MTU,
>>>>>
>>>>> +    /*
>>>>> +     * Name: "ip.src"
>>>>> +     * Type: be128
>>>>> +     * Description: Sets the field MFF_IPV4_SRC if eth.type == 0x800 (IPv4)
>>>>> +     *              or sets the field MFF_IPV6_SRC if
>>>>> +     *              eth.type == 0x86dd (IPV6).
>>>>> +     */
>>>>> +    OVN_IP_SRC,
>>>>> +
>>>>> +    /*
>>>>> +     * Name: "ip.dst"
>>>>> +     * Type: be128
>>>>> +     * Description: Sets the field MFF_IPV4_DST if eth.type == 0x800 (IPv4)
>>>>> +     *              or sets the field MFF_IPV6_DST if
>>>>> +     *              eth.type == 0x86dd (IPV6).
>>>>> +     */
>>>>> +    OVN_IP_DST,
>>>>> +
>>>>>      OVN_FIELD_N_IDS
>>>>>  };
>>>>>
>>>>> diff --git a/lib/actions.c b/lib/actions.c
>>>>> index 1e1bdeff24..e9c77d2a0a 100644
>>>>> --- a/lib/actions.c
>>>>> +++ b/lib/actions.c
>>>>> @@ -371,12 +371,35 @@ ovnact_next_free(struct ovnact_next *a OVS_UNUSED)
>>>>>  {
>>>>>  }
>>>>>
>>>>> +static bool
>>>>> +check_ovnfield_load(struct action_context *ctx, const struct expr_field *field)
>>>>> +{
>>>>> +    switch (field->symbol->ovn_field->id) {
>>>>> +    case OVN_ICMP4_FRAG_MTU:
>>>>> +    case OVN_ICMP6_FRAG_MTU:
>>>>> +        return true;
>>>>> +
>>>>> +    case OVN_IP_SRC:
>>>>> +    case OVN_IP_DST:
>>>>> +        lexer_error(ctx->lexer, "Can't load a value to ovn field (%s).",
>>>>> +                    field->symbol->name);
>>>>> +        return false;
>>>>> +
>>>>> +    case OVN_FIELD_N_IDS:
>>>>> +    default:
>>>>> +        OVS_NOT_REACHED();
>>>>> +    }
>>>>> +}
>>>>> +
>>>>>  static void
>>>>>  parse_LOAD(struct action_context *ctx, const struct expr_field *lhs)
>>>>>  {
>>>>>      size_t ofs = ctx->ovnacts->size;
>>>>>      struct ovnact_load *load;
>>>>>      if (lhs->symbol->ovn_field) {
>>>>> +        if (!check_ovnfield_load(ctx, lhs)) {
>>>>> +            return;
>>>>> +        }
>>>>>          load = ovnact_put_OVNFIELD_LOAD(ctx->ovnacts);
>>>>>      } else {
>>>>>          load = ovnact_put_LOAD(ctx->ovnacts);
>>>>> @@ -474,6 +497,46 @@ format_EXCHANGE(const struct ovnact_move *move, struct ds *s)
>>>>>      format_assignment(move, "<->", s);
>>>>>  }
>>>>>
>>>>> +static void
>>>>> +parse_ovnfield_exchange(struct action_context *ctx,
>>>>> +                        const struct expr_field *lhs,
>>>>> +                        const struct expr_field *rhs)
>>>>> +{
>>>>> +    if (!lhs->symbol->ovn_field || !rhs->symbol->ovn_field) {
>>>>> +        lexer_error(ctx->lexer,
>>>>> +                    "Can't exchange ovn field with non ovn field (%s <-> %s).",
>>>>> +                    lhs->symbol->name, rhs->symbol->name);
>>>>> +        return;
>>>>> +    }
>>>>> +
>>>>> +    if (lhs->symbol->ovn_field->id != OVN_IP_SRC &&
>>>>> +            lhs->symbol->ovn_field->id != OVN_IP_DST) {
>>>>> +        lexer_error(ctx->lexer,
>>>>> +                    "Can't exchange ovn field (%s) with ovn field (%s).",
>>>>> +                    lhs->symbol->name, rhs->symbol->name);
>>>>> +        return;
>>>>> +    }
>>>>> +
>>>>> +    if (rhs->symbol->ovn_field->id != OVN_IP_SRC &&
>>>>> +            rhs->symbol->ovn_field->id != OVN_IP_DST) {
>>>>> +        lexer_error(ctx->lexer,
>>>>> +                    "Can't exchange ovn field (%s) with ovn field (%s).",
>>>>> +                    lhs->symbol->name, rhs->symbol->name);
>>>>> +        return;
>>>>> +    }
>>>>> +
>>>>> +    if (lhs->symbol->ovn_field->id == rhs->symbol->ovn_field->id) {
>>>>> +        lexer_error(ctx->lexer,
>>>>> +                    "Can't exchange ovn field (%s) with ovn field (%s).",
>>>>> +                    lhs->symbol->name, rhs->symbol->name);
>>>>> +        return;
>>>>> +    }
>>>>> +
>>>>> +    struct ovnact_move *move = ovnact_put_OVNFIELD_EXCHANGE(ctx->ovnacts);
>>>>> +    move->lhs = *lhs;
>>>>> +    move->rhs = *rhs;
>>>>> +}
>>>>> +
>>>>>  static void
>>>>>  parse_assignment_action(struct action_context *ctx, bool exchange,
>>>>>                          const struct expr_field *lhs)
>>>>> @@ -483,6 +546,11 @@ parse_assignment_action(struct action_context *ctx, bool exchange,
>>>>>          return;
>>>>>      }
>>>>>
>>>>> +    if (exchange && (lhs->symbol->ovn_field || rhs.symbol->ovn_field)) {
>>>>> +        parse_ovnfield_exchange(ctx, lhs, &rhs);
>>>>> +        return;
>>>>> +    }
>>>>> +
>>>>>      const struct expr_symbol *ls = lhs->symbol;
>>>>>      const struct expr_symbol *rs = rhs.symbol;
>>>>>      if ((ls->width != 0) != (rs->width != 0)) {
>>>>> @@ -3128,6 +3196,8 @@ format_OVNFIELD_LOAD(const struct ovnact_load *load , struct ds *s)
>>>>>                        ntohs(load->imm.value.be16_int));
>>>>>          break;
>>>>>
>>>>> +    case OVN_IP_SRC:
>>>>> +    case OVN_IP_DST:
>>>>>      case OVN_FIELD_N_IDS:
>>>>>      default:
>>>>>          OVS_NOT_REACHED();
>>>>> @@ -3157,6 +3227,9 @@ encode_OVNFIELD_LOAD(const struct ovnact_load *load,
>>>>>          encode_finish_controller_op(oc_offset, ofpacts);
>>>>>          break;
>>>>>      }
>>>>> +
>>>>> +    case OVN_IP_SRC:
>>>>> +    case OVN_IP_DST:
>>>>>      case OVN_FIELD_N_IDS:
>>>>>      default:
>>>>>          OVS_NOT_REACHED();
>>>>> @@ -3451,6 +3524,51 @@ ovnact_fwd_group_free(struct ovnact_fwd_group *fwd_group)
>>>>>      free(fwd_group->child_ports);
>>>>>  }
>>>>>
>>>>> +static void
>>>>> +format_OVNFIELD_EXCHANGE(const struct ovnact_move *move , struct ds *s)
>>>>> +{
>>>>> +    const struct ovn_field *lhs = ovn_field_from_name(move->lhs.symbol->name);
>>>>> +    const struct ovn_field *rhs = ovn_field_from_name(move->rhs.symbol->name);
>>>>> +    switch (lhs->id) {
>>>>> +    case OVN_IP_SRC:
>>>>> +    case OVN_IP_DST:
>>>>> +        break;
>>>>> +
>>>>> +    case OVN_ICMP4_FRAG_MTU:
>>>>> +    case OVN_ICMP6_FRAG_MTU:
>>>>> +    case OVN_FIELD_N_IDS:
>>>>> +    default:
>>>>> +        OVS_NOT_REACHED();
>>>>> +    }
>>>>
>>>> Nit: I would use the shorter:
>>>> ovs_asssert(lhs->id == OVN_IP_SRC || lhs->id == OVN_IP_DST);
>>>
>>> You mean to drop the switch right ? If so, Ack.
>>>
>>
>> Right, I meant ovs_assert instead of switch.
>>
>>>
>>>>
>>>>> +
>>>>> +    switch (rhs->id) {
>>>>> +    case OVN_IP_SRC:
>>>>> +    case OVN_IP_DST:
>>>>> +        break;
>>>>> +
>>>>> +    case OVN_ICMP4_FRAG_MTU:
>>>>> +    case OVN_ICMP6_FRAG_MTU:
>>>>> +    case OVN_FIELD_N_IDS:
>>>>> +    default:
>>>>> +        OVS_NOT_REACHED();
>>>>> +    }
>>>>
>>>> Nit: I would use the shorter:
>>>> ovs_asssert(rhs->id == OVN_IP_SRC || rhs->id == OVN_IP_DST);
>>>>
>>>>> +
>>>>> +    ds_put_format(s, "%s <-> %s;", lhs->name, rhs->name);
>>>>> +}
>>>>> +
>>>>> +static void
>>>>> +encode_OVNFIELD_EXCHANGE(const struct ovnact_move *move OVS_UNUSED,
>>>>> +                         const struct ovnact_encode_params *ep OVS_UNUSED,
>>>>> +                         struct ofpbuf *ofpacts OVS_UNUSED)
>>>>> +{
>>>>> +    /* Right now we only support exchanging the IPs in
>>>>> +     * OVN fields (ip.src <-> ip.dst). */
>>>>> +    size_t oc_offset =
>>>>> +        encode_start_controller_op(ACTION_OPCODE_SWAP_SRC_DST_IP,
>>>>> +                                   true, NX_CTLR_NO_METER, ofpacts);
>>>>> +    encode_finish_controller_op(oc_offset, ofpacts);
>>>>
>>>> I think this means that all packets matching a flow with action "reject { ...
>>>> ip.src <-> ip.dst ... }" will generate two packet-ins, right? One for the
>>>> reject action, one for the ovnfield-exchange action.
>>>>
>>>> Is that a concern wrt. performance?
>>>
>>> Yes. It would have 2 packet-ins. Just like how we handle - icmp4 {...
>>> icmp4.frag_mtu = 1500; ...};
>>>
>>> If performance is a concern, we could just drop ... ip.src <-> ip.dst
>>> in reject and let reject action
>>> swap the ip src with ip destination. I thought about that and my take
>>> is that since reject { } results in
>>> TCP RST or ICMP unreachable packet, it should be fine. I see these
>>> packets as more of control packets.
>>>
>>> I think it should be ok. Let me know what you think.
>>>
>>
>> I think I like the approach of "reject" action implicitly swapping IPs best.
>> In the end it's not like we could implement reject withough swapping IP dst
>> with IP src so why not do it as part of the reject action.
>>
> 
> Agree. reject action is expected to be used to send the generated tcp
> rst/icmp unreachable
> packet back to the sender.
> 
> I'm ok to change the reject action to just swap the IPs internally if
> everyone is fine.
> 
> @Mark Michelson  @Han Zhou  Do you have any comments here ?
> 
> Option 1 - reject { ...}    -> The reject action handling in pinctrl.c
> will swap the ip source and destination
> 
> Option 2: reject {... ip.src <-> ip.dst ...} which is the proposed
> approach in this patch series. This results in
> 2 packet-ins.
> 
> I personally prefer (2) since in OVN we normally tell what inner
> actions to apply for many OVN actions.
> But I have no strong preference and I'm fine changing to (1).
> 
> If we chose option (1), we could add a comment inside the action like
> - reject { /* ip.src <-> ip.dst is done implicitly*/,  eth.src <->
> eth.dst; output ; }
> 

Then, why not swap ETH addresses implicitly too?

> 
>> But on the other hand, I think there might be other simpler ways to generate a
>> lot of packet-ins towards ovn-controller that I don't have a strong preference
>> about either solution.  I just wanted to make sure we discuss the performance
>> implications.
> 
> In my opinion there should not be any performance implications.
> 
> 
>>
>>>
>>>>
>>>>> +}
>>>>> +
>>>>>  /* Parses an assignment or exchange or put_dhcp_opts action. */
>>>>>  static void
>>>>>  parse_set_action(struct action_context *ctx)
>>>>> diff --git a/lib/logical-fields.c b/lib/logical-fields.c
>>>>> index bf61df7719..d6f1981f52 100644
>>>>> --- a/lib/logical-fields.c
>>>>> +++ b/lib/logical-fields.c
>>>>> @@ -33,6 +33,14 @@ const struct ovn_field ovn_fields[OVN_FIELD_N_IDS] = {
>>>>>          OVN_ICMP6_FRAG_MTU,
>>>>>          "icmp6.frag_mtu",
>>>>>          4, 32,
>>>>> +    }, {
>>>>> +        OVN_IP_SRC,
>>>>> +        "ip.src",
>>>>> +        16, 128,
>>>>> +    }, {
>>>>> +        OVN_IP_DST,
>>>>> +        "ip.dst",
>>>>> +        16, 128,
>>>>
>>>> Just partially related to your change, I'm a bit confused by the meaning of
>>>> n_bytes and n_bits in "struct ovn_field".  It seems to me that we never use
>>>> those values.  Is that the case or am I missing something?
>>>
>>> These OVN fields are defined more like the way - mf_fields are defined.
>>> Like icmp4.frag_mtu is 2 bytes and 16 bits field.
>>>
>>> So ip.src/ip.dst can be used for both ipv4 and ipv6, I have set the
>>> size of ipv6.
>>>
>>> Since we don't support ip.src = 10.0.0.4 or ip.src = aef00::4 for
>>> example, these fields
>>> are not used. Later on if there is a need to support assigning a value
>>> to these fields, these fields
>>> could be used. Although I don't see a need to support it since we
>>> already have ip4.src/ip
> 
>>>
>>
>> Ok, my question was more about the fact that I don't see the ovn_field.n_bytes
>> or ovn_fields.n_bits being used anywhere in general (not referring to
>> ip.src/ip.dst only).
>>
> 
> Actually they are being used. See
> https://github.com/ovn-org/ovn/blob/master/lib/actions.c#L394
> and https://github.com/ovn-org/ovn/blob/master/lib/expr.c#L569
> 
> If you add the below in the ovn -- action parsing test case in ovn.at
> ---
> icmp4.frag_mtu = 65536;
> ---
> 
> You'll see the below error:
> 17-bit constant is not compatible with 16-bit field icmp4.frag_mtu.
> 

Ah, you're right, sorry about the noise, I see it now.

Thanks,
Dumitru
Numan Siddique Oct. 14, 2020, 12:13 p.m. UTC | #6
On Wed, Oct 14, 2020 at 5:34 PM Dumitru Ceara <dceara@redhat.com> wrote:
>
> On 10/14/20 1:54 PM, Numan Siddique wrote:
> > On Wed, Oct 14, 2020 at 4:40 PM Dumitru Ceara <dceara@redhat.com> wrote:
> >>
> >> On 10/14/20 12:50 PM, Numan Siddique wrote:
> >>> On Wed, Oct 14, 2020 at 3:50 PM Dumitru Ceara <dceara@redhat.com> wrote:
> >>>>
> >>>> On 10/14/20 11:15 AM, numans@ovn.org wrote:
> >>>>> From: Numan Siddique <numans@ovn.org>
> >>>>>
> >>>>> Thee new fields are version independent and these can be used in any
> >>>>
> >>>> Hi Numan,
> >>>>
> >>>> Nit: s/Thee/These
> >>>>
> >>>>> OVN action. Right now the usage of these fields are restricted to
> >>>>> exchanging the IP source and destination fields.
> >>>>>
> >>>>> Eg. reject {... ip.src <-> ip.dst ... };
> >>>>>
> >>>>> "ip.src <-> ip.dst" translates to controller action with "pause" flag set.
> >>>>> When pinctrl thread receives the packet in, it checks the IP version of the
> >>>>> packet and either does - "ip4.src <-> ip4.dst" or "ip6.src <-> ip6.dst" and
> >>>>> resumes the packet to continue with the pipeline.
> >>>>>
> >>>>> Upcoming patch will make use of these fields.
> >>>>>
> >>>>> Signed-off-by: Numan Siddique <numans@ovn.org>
> >>>>> ---
> >>>>>  controller/pinctrl.c         |  49 +++++++++++++++
> >>>>>  include/ovn/actions.h        |   4 ++
> >>>>>  include/ovn/logical-fields.h |  18 ++++++
> >>>>>  lib/actions.c                | 118 +++++++++++++++++++++++++++++++++++
> >>>>>  lib/logical-fields.c         |  10 +++
> >>>>>  ovn-sb.xml                   |  37 +++++++++++
> >>>>>  tests/ovn.at                 |  27 ++++++++
> >>>>>  utilities/ovn-trace.c        |  28 +++++++++
> >>>>>  8 files changed, 291 insertions(+)
> >>>>>
> >>>>> diff --git a/controller/pinctrl.c b/controller/pinctrl.c
> >>>>> index 631d058458..bc16a82404 100644
> >>>>> --- a/controller/pinctrl.c
> >>>>> +++ b/controller/pinctrl.c
> >>>>> @@ -233,6 +233,11 @@ static void pinctrl_handle_put_icmp_frag_mtu(struct rconn *swconn,
> >>>>>                                               struct ofputil_packet_in *pin,
> >>>>>                                               struct ofpbuf *userdata,
> >>>>>                                               struct ofpbuf *continuation);
> >>>>> +static void pinctrl_handle_swap_src_dst_ip(struct rconn *swconn,
> >>>>> +                                           const struct flow *in_flow,
> >>>>> +                                           struct dp_packet *pkt_in,
> >>>>> +                                           struct ofputil_packet_in *pin,
> >>>>> +                                           struct ofpbuf *continuation);
> >>>>>  static void
> >>>>>  pinctrl_handle_event(struct ofpbuf *userdata)
> >>>>>      OVS_REQUIRES(pinctrl_mutex);
> >>>>> @@ -2835,6 +2840,11 @@ process_packet_in(struct rconn *swconn, const struct ofp_header *msg)
> >>>>>          ovs_mutex_unlock(&pinctrl_mutex);
> >>>>>          break;
> >>>>>
> >>>>> +    case ACTION_OPCODE_SWAP_SRC_DST_IP:
> >>>>> +        pinctrl_handle_swap_src_dst_ip(swconn, &headers, &packet, &pin,
> >>>>> +                                       &continuation);
> >>>>> +        break;
> >>>>> +
> >>>>>      default:
> >>>>>          VLOG_WARN_RL(&rl, "unrecognized packet-in opcode %"PRIu32,
> >>>>>                       ntohl(ah->opcode));
> >>>>> @@ -6508,3 +6518,42 @@ pinctrl_handle_svc_check(struct rconn *swconn, const struct flow *ip_flow,
> >>>>>          svc_mon->next_send_time = time_msec() + svc_mon->interval;
> >>>>>      }
> >>>>>  }
> >>>>> +
> >>>>> +static void
> >>>>> +pinctrl_handle_swap_src_dst_ip(struct rconn *swconn,
> >>>>> +                               const struct flow *in_flow,
> >>>>> +                               struct dp_packet *pkt_in,
> >>>>> +                               struct ofputil_packet_in *pin,
> >>>>> +                               struct ofpbuf *continuation)
> >>>>> +{
> >>>>> +    enum ofp_version version = rconn_get_version(swconn);
> >>>>> +    enum ofputil_protocol proto = ofputil_protocol_from_ofp_version(version);
> >>>>> +    struct dp_packet *pkt_out;
> >>>>> +
> >>>>> +    pkt_out = dp_packet_clone(pkt_in);
> >>>>> +    pkt_out->l2_5_ofs = pkt_in->l2_5_ofs;
> >>>>> +    pkt_out->l2_pad_size = pkt_in->l2_pad_size;
> >>>>> +    pkt_out->l3_ofs = pkt_in->l3_ofs;
> >>>>> +    pkt_out->l4_ofs = pkt_in->l4_ofs;
> >>>>> +
> >>>>> +    if (get_dl_type(in_flow) == htons(ETH_TYPE_IP)) {
> >>>>> +        /* IPv4 packet. Swap nw_src with nw_dst. */
> >>>>> +        packet_set_ipv4(pkt_out, in_flow->nw_dst, in_flow->nw_src,
> >>>>> +                        in_flow->nw_tos, in_flow->nw_ttl);
> >>>>> +    } else {
> >>>>> +        /* IPv6 packet. Swap ip6_src with ip6_dst.
> >>>>> +         * We could also use packet_set_ipv6() here, but that would require
> >>>>> +         * to extract the 'tc' and 'label' from in_nh->ip6_flow which seems
> >>>>> +         * unnecessary. */
> >>>>> +        struct ovs_16aligned_ip6_hdr *out_nh = dp_packet_l3(pkt_out);
> >>>>> +        union ovs_16aligned_in6_addr tmp = out_nh->ip6_src;
> >>>>> +        out_nh->ip6_src = out_nh->ip6_dst;
> >>>>> +        out_nh->ip6_dst = tmp;
> >>>>> +    }
> >>>>> +
> >>>>> +    pin->packet = dp_packet_data(pkt_out);
> >>>>> +    pin->packet_len = dp_packet_size(pkt_out);
> >>>>> +
> >>>>> +    queue_msg(swconn, ofputil_encode_resume(pin, continuation, proto));
> >>>>> +    dp_packet_delete(pkt_out);
> >>>>> +}
> >>>>> diff --git a/include/ovn/actions.h b/include/ovn/actions.h
> >>>>> index b4e5acabb9..bf1fe848b7 100644
> >>>>> --- a/include/ovn/actions.h
> >>>>> +++ b/include/ovn/actions.h
> >>>>> @@ -97,6 +97,7 @@ struct ovn_extend_table;
> >>>>>      OVNACT(DHCP6_REPLY,       ovnact_null)            \
> >>>>>      OVNACT(ICMP6_ERROR,       ovnact_nest)            \
> >>>>>      OVNACT(REJECT,            ovnact_nest)            \
> >>>>> +    OVNACT(OVNFIELD_EXCHANGE, ovnact_move)            \
> >>>>>
> >>>>>  /* enum ovnact_type, with a member OVNACT_<ENUM> for each action. */
> >>>>>  enum OVS_PACKED_ENUM ovnact_type {
> >>>>> @@ -612,6 +613,9 @@ enum action_opcode {
> >>>>>       * The actions, in OpenFlow 1.3 format, follow the action_header.
> >>>>>       */
> >>>>>      ACTION_OPCODE_REJECT,
> >>>>> +
> >>>>> +    /* ip.src <-> ip.dst */
> >>>>> +    ACTION_OPCODE_SWAP_SRC_DST_IP,
> >>>>>  };
> >>>>>
> >>>>>  /* Header. */
> >>>>> diff --git a/include/ovn/logical-fields.h b/include/ovn/logical-fields.h
> >>>>> index ac6f2f909b..bb6daa50ca 100644
> >>>>> --- a/include/ovn/logical-fields.h
> >>>>> +++ b/include/ovn/logical-fields.h
> >>>>> @@ -116,6 +116,24 @@ enum ovn_field_id {
> >>>>>       */
> >>>>>      OVN_ICMP6_FRAG_MTU,
> >>>>>
> >>>>> +    /*
> >>>>> +     * Name: "ip.src"
> >>>>> +     * Type: be128
> >>>>> +     * Description: Sets the field MFF_IPV4_SRC if eth.type == 0x800 (IPv4)
> >>>>> +     *              or sets the field MFF_IPV6_SRC if
> >>>>> +     *              eth.type == 0x86dd (IPV6).
> >>>>> +     */
> >>>>> +    OVN_IP_SRC,
> >>>>> +
> >>>>> +    /*
> >>>>> +     * Name: "ip.dst"
> >>>>> +     * Type: be128
> >>>>> +     * Description: Sets the field MFF_IPV4_DST if eth.type == 0x800 (IPv4)
> >>>>> +     *              or sets the field MFF_IPV6_DST if
> >>>>> +     *              eth.type == 0x86dd (IPV6).
> >>>>> +     */
> >>>>> +    OVN_IP_DST,
> >>>>> +
> >>>>>      OVN_FIELD_N_IDS
> >>>>>  };
> >>>>>
> >>>>> diff --git a/lib/actions.c b/lib/actions.c
> >>>>> index 1e1bdeff24..e9c77d2a0a 100644
> >>>>> --- a/lib/actions.c
> >>>>> +++ b/lib/actions.c
> >>>>> @@ -371,12 +371,35 @@ ovnact_next_free(struct ovnact_next *a OVS_UNUSED)
> >>>>>  {
> >>>>>  }
> >>>>>
> >>>>> +static bool
> >>>>> +check_ovnfield_load(struct action_context *ctx, const struct expr_field *field)
> >>>>> +{
> >>>>> +    switch (field->symbol->ovn_field->id) {
> >>>>> +    case OVN_ICMP4_FRAG_MTU:
> >>>>> +    case OVN_ICMP6_FRAG_MTU:
> >>>>> +        return true;
> >>>>> +
> >>>>> +    case OVN_IP_SRC:
> >>>>> +    case OVN_IP_DST:
> >>>>> +        lexer_error(ctx->lexer, "Can't load a value to ovn field (%s).",
> >>>>> +                    field->symbol->name);
> >>>>> +        return false;
> >>>>> +
> >>>>> +    case OVN_FIELD_N_IDS:
> >>>>> +    default:
> >>>>> +        OVS_NOT_REACHED();
> >>>>> +    }
> >>>>> +}
> >>>>> +
> >>>>>  static void
> >>>>>  parse_LOAD(struct action_context *ctx, const struct expr_field *lhs)
> >>>>>  {
> >>>>>      size_t ofs = ctx->ovnacts->size;
> >>>>>      struct ovnact_load *load;
> >>>>>      if (lhs->symbol->ovn_field) {
> >>>>> +        if (!check_ovnfield_load(ctx, lhs)) {
> >>>>> +            return;
> >>>>> +        }
> >>>>>          load = ovnact_put_OVNFIELD_LOAD(ctx->ovnacts);
> >>>>>      } else {
> >>>>>          load = ovnact_put_LOAD(ctx->ovnacts);
> >>>>> @@ -474,6 +497,46 @@ format_EXCHANGE(const struct ovnact_move *move, struct ds *s)
> >>>>>      format_assignment(move, "<->", s);
> >>>>>  }
> >>>>>
> >>>>> +static void
> >>>>> +parse_ovnfield_exchange(struct action_context *ctx,
> >>>>> +                        const struct expr_field *lhs,
> >>>>> +                        const struct expr_field *rhs)
> >>>>> +{
> >>>>> +    if (!lhs->symbol->ovn_field || !rhs->symbol->ovn_field) {
> >>>>> +        lexer_error(ctx->lexer,
> >>>>> +                    "Can't exchange ovn field with non ovn field (%s <-> %s).",
> >>>>> +                    lhs->symbol->name, rhs->symbol->name);
> >>>>> +        return;
> >>>>> +    }
> >>>>> +
> >>>>> +    if (lhs->symbol->ovn_field->id != OVN_IP_SRC &&
> >>>>> +            lhs->symbol->ovn_field->id != OVN_IP_DST) {
> >>>>> +        lexer_error(ctx->lexer,
> >>>>> +                    "Can't exchange ovn field (%s) with ovn field (%s).",
> >>>>> +                    lhs->symbol->name, rhs->symbol->name);
> >>>>> +        return;
> >>>>> +    }
> >>>>> +
> >>>>> +    if (rhs->symbol->ovn_field->id != OVN_IP_SRC &&
> >>>>> +            rhs->symbol->ovn_field->id != OVN_IP_DST) {
> >>>>> +        lexer_error(ctx->lexer,
> >>>>> +                    "Can't exchange ovn field (%s) with ovn field (%s).",
> >>>>> +                    lhs->symbol->name, rhs->symbol->name);
> >>>>> +        return;
> >>>>> +    }
> >>>>> +
> >>>>> +    if (lhs->symbol->ovn_field->id == rhs->symbol->ovn_field->id) {
> >>>>> +        lexer_error(ctx->lexer,
> >>>>> +                    "Can't exchange ovn field (%s) with ovn field (%s).",
> >>>>> +                    lhs->symbol->name, rhs->symbol->name);
> >>>>> +        return;
> >>>>> +    }
> >>>>> +
> >>>>> +    struct ovnact_move *move = ovnact_put_OVNFIELD_EXCHANGE(ctx->ovnacts);
> >>>>> +    move->lhs = *lhs;
> >>>>> +    move->rhs = *rhs;
> >>>>> +}
> >>>>> +
> >>>>>  static void
> >>>>>  parse_assignment_action(struct action_context *ctx, bool exchange,
> >>>>>                          const struct expr_field *lhs)
> >>>>> @@ -483,6 +546,11 @@ parse_assignment_action(struct action_context *ctx, bool exchange,
> >>>>>          return;
> >>>>>      }
> >>>>>
> >>>>> +    if (exchange && (lhs->symbol->ovn_field || rhs.symbol->ovn_field)) {
> >>>>> +        parse_ovnfield_exchange(ctx, lhs, &rhs);
> >>>>> +        return;
> >>>>> +    }
> >>>>> +
> >>>>>      const struct expr_symbol *ls = lhs->symbol;
> >>>>>      const struct expr_symbol *rs = rhs.symbol;
> >>>>>      if ((ls->width != 0) != (rs->width != 0)) {
> >>>>> @@ -3128,6 +3196,8 @@ format_OVNFIELD_LOAD(const struct ovnact_load *load , struct ds *s)
> >>>>>                        ntohs(load->imm.value.be16_int));
> >>>>>          break;
> >>>>>
> >>>>> +    case OVN_IP_SRC:
> >>>>> +    case OVN_IP_DST:
> >>>>>      case OVN_FIELD_N_IDS:
> >>>>>      default:
> >>>>>          OVS_NOT_REACHED();
> >>>>> @@ -3157,6 +3227,9 @@ encode_OVNFIELD_LOAD(const struct ovnact_load *load,
> >>>>>          encode_finish_controller_op(oc_offset, ofpacts);
> >>>>>          break;
> >>>>>      }
> >>>>> +
> >>>>> +    case OVN_IP_SRC:
> >>>>> +    case OVN_IP_DST:
> >>>>>      case OVN_FIELD_N_IDS:
> >>>>>      default:
> >>>>>          OVS_NOT_REACHED();
> >>>>> @@ -3451,6 +3524,51 @@ ovnact_fwd_group_free(struct ovnact_fwd_group *fwd_group)
> >>>>>      free(fwd_group->child_ports);
> >>>>>  }
> >>>>>
> >>>>> +static void
> >>>>> +format_OVNFIELD_EXCHANGE(const struct ovnact_move *move , struct ds *s)
> >>>>> +{
> >>>>> +    const struct ovn_field *lhs = ovn_field_from_name(move->lhs.symbol->name);
> >>>>> +    const struct ovn_field *rhs = ovn_field_from_name(move->rhs.symbol->name);
> >>>>> +    switch (lhs->id) {
> >>>>> +    case OVN_IP_SRC:
> >>>>> +    case OVN_IP_DST:
> >>>>> +        break;
> >>>>> +
> >>>>> +    case OVN_ICMP4_FRAG_MTU:
> >>>>> +    case OVN_ICMP6_FRAG_MTU:
> >>>>> +    case OVN_FIELD_N_IDS:
> >>>>> +    default:
> >>>>> +        OVS_NOT_REACHED();
> >>>>> +    }
> >>>>
> >>>> Nit: I would use the shorter:
> >>>> ovs_asssert(lhs->id == OVN_IP_SRC || lhs->id == OVN_IP_DST);
> >>>
> >>> You mean to drop the switch right ? If so, Ack.
> >>>
> >>
> >> Right, I meant ovs_assert instead of switch.
> >>
> >>>
> >>>>
> >>>>> +
> >>>>> +    switch (rhs->id) {
> >>>>> +    case OVN_IP_SRC:
> >>>>> +    case OVN_IP_DST:
> >>>>> +        break;
> >>>>> +
> >>>>> +    case OVN_ICMP4_FRAG_MTU:
> >>>>> +    case OVN_ICMP6_FRAG_MTU:
> >>>>> +    case OVN_FIELD_N_IDS:
> >>>>> +    default:
> >>>>> +        OVS_NOT_REACHED();
> >>>>> +    }
> >>>>
> >>>> Nit: I would use the shorter:
> >>>> ovs_asssert(rhs->id == OVN_IP_SRC || rhs->id == OVN_IP_DST);
> >>>>
> >>>>> +
> >>>>> +    ds_put_format(s, "%s <-> %s;", lhs->name, rhs->name);
> >>>>> +}
> >>>>> +
> >>>>> +static void
> >>>>> +encode_OVNFIELD_EXCHANGE(const struct ovnact_move *move OVS_UNUSED,
> >>>>> +                         const struct ovnact_encode_params *ep OVS_UNUSED,
> >>>>> +                         struct ofpbuf *ofpacts OVS_UNUSED)
> >>>>> +{
> >>>>> +    /* Right now we only support exchanging the IPs in
> >>>>> +     * OVN fields (ip.src <-> ip.dst). */
> >>>>> +    size_t oc_offset =
> >>>>> +        encode_start_controller_op(ACTION_OPCODE_SWAP_SRC_DST_IP,
> >>>>> +                                   true, NX_CTLR_NO_METER, ofpacts);
> >>>>> +    encode_finish_controller_op(oc_offset, ofpacts);
> >>>>
> >>>> I think this means that all packets matching a flow with action "reject { ...
> >>>> ip.src <-> ip.dst ... }" will generate two packet-ins, right? One for the
> >>>> reject action, one for the ovnfield-exchange action.
> >>>>
> >>>> Is that a concern wrt. performance?
> >>>
> >>> Yes. It would have 2 packet-ins. Just like how we handle - icmp4 {...
> >>> icmp4.frag_mtu = 1500; ...};
> >>>
> >>> If performance is a concern, we could just drop ... ip.src <-> ip.dst
> >>> in reject and let reject action
> >>> swap the ip src with ip destination. I thought about that and my take
> >>> is that since reject { } results in
> >>> TCP RST or ICMP unreachable packet, it should be fine. I see these
> >>> packets as more of control packets.
> >>>
> >>> I think it should be ok. Let me know what you think.
> >>>
> >>
> >> I think I like the approach of "reject" action implicitly swapping IPs best.
> >> In the end it's not like we could implement reject withough swapping IP dst
> >> with IP src so why not do it as part of the reject action.
> >>
> >
> > Agree. reject action is expected to be used to send the generated tcp
> > rst/icmp unreachable
> > packet back to the sender.
> >
> > I'm ok to change the reject action to just swap the IPs internally if
> > everyone is fine.
> >
> > @Mark Michelson  @Han Zhou  Do you have any comments here ?
> >
> > Option 1 - reject { ...}    -> The reject action handling in pinctrl.c
> > will swap the ip source and destination
> >
> > Option 2: reject {... ip.src <-> ip.dst ...} which is the proposed
> > approach in this patch series. This results in
> > 2 packet-ins.
> >
> > I personally prefer (2) since in OVN we normally tell what inner
> > actions to apply for many OVN actions.
> > But I have no strong preference and I'm fine changing to (1).
> >
> > If we chose option (1), we could add a comment inside the action like
> > - reject { /* ip.src <-> ip.dst is done implicitly*/,  eth.src <->
> > eth.dst; output ; }
> >
>
> Then, why not swap ETH addresses implicitly too?

As I mentioned above, I'd prefer if ovn-northd says what actions to
apply for the reject
packet. Just like for icmp4 or arp actions, ovn-northd says what
actions to apply for the
transformed packet from the source packet.

Thanks
Numan

>
> >
> >> But on the other hand, I think there might be other simpler ways to generate a
> >> lot of packet-ins towards ovn-controller that I don't have a strong preference
> >> about either solution.  I just wanted to make sure we discuss the performance
> >> implications.
> >
> > In my opinion there should not be any performance implications.
> >
> >
> >>
> >>>
> >>>>
> >>>>> +}
> >>>>> +
> >>>>>  /* Parses an assignment or exchange or put_dhcp_opts action. */
> >>>>>  static void
> >>>>>  parse_set_action(struct action_context *ctx)
> >>>>> diff --git a/lib/logical-fields.c b/lib/logical-fields.c
> >>>>> index bf61df7719..d6f1981f52 100644
> >>>>> --- a/lib/logical-fields.c
> >>>>> +++ b/lib/logical-fields.c
> >>>>> @@ -33,6 +33,14 @@ const struct ovn_field ovn_fields[OVN_FIELD_N_IDS] = {
> >>>>>          OVN_ICMP6_FRAG_MTU,
> >>>>>          "icmp6.frag_mtu",
> >>>>>          4, 32,
> >>>>> +    }, {
> >>>>> +        OVN_IP_SRC,
> >>>>> +        "ip.src",
> >>>>> +        16, 128,
> >>>>> +    }, {
> >>>>> +        OVN_IP_DST,
> >>>>> +        "ip.dst",
> >>>>> +        16, 128,
> >>>>
> >>>> Just partially related to your change, I'm a bit confused by the meaning of
> >>>> n_bytes and n_bits in "struct ovn_field".  It seems to me that we never use
> >>>> those values.  Is that the case or am I missing something?
> >>>
> >>> These OVN fields are defined more like the way - mf_fields are defined.
> >>> Like icmp4.frag_mtu is 2 bytes and 16 bits field.
> >>>
> >>> So ip.src/ip.dst can be used for both ipv4 and ipv6, I have set the
> >>> size of ipv6.
> >>>
> >>> Since we don't support ip.src = 10.0.0.4 or ip.src = aef00::4 for
> >>> example, these fields
> >>> are not used. Later on if there is a need to support assigning a value
> >>> to these fields, these fields
> >>> could be used. Although I don't see a need to support it since we
> >>> already have ip4.src/ip
> >
> >>>
> >>
> >> Ok, my question was more about the fact that I don't see the ovn_field.n_bytes
> >> or ovn_fields.n_bits being used anywhere in general (not referring to
> >> ip.src/ip.dst only).
> >>
> >
> > Actually they are being used. See
> > https://github.com/ovn-org/ovn/blob/master/lib/actions.c#L394
> > and https://github.com/ovn-org/ovn/blob/master/lib/expr.c#L569
> >
> > If you add the below in the ovn -- action parsing test case in ovn.at
> > ---
> > icmp4.frag_mtu = 65536;
> > ---
> >
> > You'll see the below error:
> > 17-bit constant is not compatible with 16-bit field icmp4.frag_mtu.
> >
>
> Ah, you're right, sorry about the noise, I see it now.
>
> Thanks,
> Dumitru
>
> _______________________________________________
> dev mailing list
> dev@openvswitch.org
> https://mail.openvswitch.org/mailman/listinfo/ovs-dev
>
Dumitru Ceara Oct. 14, 2020, 12:21 p.m. UTC | #7
On 10/14/20 2:13 PM, Numan Siddique wrote:
> On Wed, Oct 14, 2020 at 5:34 PM Dumitru Ceara <dceara@redhat.com> wrote:
>>
>> On 10/14/20 1:54 PM, Numan Siddique wrote:
>>> On Wed, Oct 14, 2020 at 4:40 PM Dumitru Ceara <dceara@redhat.com> wrote:
>>>>
>>>> On 10/14/20 12:50 PM, Numan Siddique wrote:
>>>>> On Wed, Oct 14, 2020 at 3:50 PM Dumitru Ceara <dceara@redhat.com> wrote:
>>>>>>
>>>>>> On 10/14/20 11:15 AM, numans@ovn.org wrote:
>>>>>>> From: Numan Siddique <numans@ovn.org>
>>>>>>>
>>>>>>> Thee new fields are version independent and these can be used in any
>>>>>>
>>>>>> Hi Numan,
>>>>>>
>>>>>> Nit: s/Thee/These
>>>>>>
>>>>>>> OVN action. Right now the usage of these fields are restricted to
>>>>>>> exchanging the IP source and destination fields.
>>>>>>>
>>>>>>> Eg. reject {... ip.src <-> ip.dst ... };
>>>>>>>
>>>>>>> "ip.src <-> ip.dst" translates to controller action with "pause" flag set.
>>>>>>> When pinctrl thread receives the packet in, it checks the IP version of the
>>>>>>> packet and either does - "ip4.src <-> ip4.dst" or "ip6.src <-> ip6.dst" and
>>>>>>> resumes the packet to continue with the pipeline.
>>>>>>>
>>>>>>> Upcoming patch will make use of these fields.
>>>>>>>
>>>>>>> Signed-off-by: Numan Siddique <numans@ovn.org>
>>>>>>> ---
>>>>>>>  controller/pinctrl.c         |  49 +++++++++++++++
>>>>>>>  include/ovn/actions.h        |   4 ++
>>>>>>>  include/ovn/logical-fields.h |  18 ++++++
>>>>>>>  lib/actions.c                | 118 +++++++++++++++++++++++++++++++++++
>>>>>>>  lib/logical-fields.c         |  10 +++
>>>>>>>  ovn-sb.xml                   |  37 +++++++++++
>>>>>>>  tests/ovn.at                 |  27 ++++++++
>>>>>>>  utilities/ovn-trace.c        |  28 +++++++++
>>>>>>>  8 files changed, 291 insertions(+)
>>>>>>>
>>>>>>> diff --git a/controller/pinctrl.c b/controller/pinctrl.c
>>>>>>> index 631d058458..bc16a82404 100644
>>>>>>> --- a/controller/pinctrl.c
>>>>>>> +++ b/controller/pinctrl.c
>>>>>>> @@ -233,6 +233,11 @@ static void pinctrl_handle_put_icmp_frag_mtu(struct rconn *swconn,
>>>>>>>                                               struct ofputil_packet_in *pin,
>>>>>>>                                               struct ofpbuf *userdata,
>>>>>>>                                               struct ofpbuf *continuation);
>>>>>>> +static void pinctrl_handle_swap_src_dst_ip(struct rconn *swconn,
>>>>>>> +                                           const struct flow *in_flow,
>>>>>>> +                                           struct dp_packet *pkt_in,
>>>>>>> +                                           struct ofputil_packet_in *pin,
>>>>>>> +                                           struct ofpbuf *continuation);
>>>>>>>  static void
>>>>>>>  pinctrl_handle_event(struct ofpbuf *userdata)
>>>>>>>      OVS_REQUIRES(pinctrl_mutex);
>>>>>>> @@ -2835,6 +2840,11 @@ process_packet_in(struct rconn *swconn, const struct ofp_header *msg)
>>>>>>>          ovs_mutex_unlock(&pinctrl_mutex);
>>>>>>>          break;
>>>>>>>
>>>>>>> +    case ACTION_OPCODE_SWAP_SRC_DST_IP:
>>>>>>> +        pinctrl_handle_swap_src_dst_ip(swconn, &headers, &packet, &pin,
>>>>>>> +                                       &continuation);
>>>>>>> +        break;
>>>>>>> +
>>>>>>>      default:
>>>>>>>          VLOG_WARN_RL(&rl, "unrecognized packet-in opcode %"PRIu32,
>>>>>>>                       ntohl(ah->opcode));
>>>>>>> @@ -6508,3 +6518,42 @@ pinctrl_handle_svc_check(struct rconn *swconn, const struct flow *ip_flow,
>>>>>>>          svc_mon->next_send_time = time_msec() + svc_mon->interval;
>>>>>>>      }
>>>>>>>  }
>>>>>>> +
>>>>>>> +static void
>>>>>>> +pinctrl_handle_swap_src_dst_ip(struct rconn *swconn,
>>>>>>> +                               const struct flow *in_flow,
>>>>>>> +                               struct dp_packet *pkt_in,
>>>>>>> +                               struct ofputil_packet_in *pin,
>>>>>>> +                               struct ofpbuf *continuation)
>>>>>>> +{
>>>>>>> +    enum ofp_version version = rconn_get_version(swconn);
>>>>>>> +    enum ofputil_protocol proto = ofputil_protocol_from_ofp_version(version);
>>>>>>> +    struct dp_packet *pkt_out;
>>>>>>> +
>>>>>>> +    pkt_out = dp_packet_clone(pkt_in);
>>>>>>> +    pkt_out->l2_5_ofs = pkt_in->l2_5_ofs;
>>>>>>> +    pkt_out->l2_pad_size = pkt_in->l2_pad_size;
>>>>>>> +    pkt_out->l3_ofs = pkt_in->l3_ofs;
>>>>>>> +    pkt_out->l4_ofs = pkt_in->l4_ofs;
>>>>>>> +
>>>>>>> +    if (get_dl_type(in_flow) == htons(ETH_TYPE_IP)) {
>>>>>>> +        /* IPv4 packet. Swap nw_src with nw_dst. */
>>>>>>> +        packet_set_ipv4(pkt_out, in_flow->nw_dst, in_flow->nw_src,
>>>>>>> +                        in_flow->nw_tos, in_flow->nw_ttl);
>>>>>>> +    } else {
>>>>>>> +        /* IPv6 packet. Swap ip6_src with ip6_dst.
>>>>>>> +         * We could also use packet_set_ipv6() here, but that would require
>>>>>>> +         * to extract the 'tc' and 'label' from in_nh->ip6_flow which seems
>>>>>>> +         * unnecessary. */
>>>>>>> +        struct ovs_16aligned_ip6_hdr *out_nh = dp_packet_l3(pkt_out);
>>>>>>> +        union ovs_16aligned_in6_addr tmp = out_nh->ip6_src;
>>>>>>> +        out_nh->ip6_src = out_nh->ip6_dst;
>>>>>>> +        out_nh->ip6_dst = tmp;
>>>>>>> +    }
>>>>>>> +
>>>>>>> +    pin->packet = dp_packet_data(pkt_out);
>>>>>>> +    pin->packet_len = dp_packet_size(pkt_out);
>>>>>>> +
>>>>>>> +    queue_msg(swconn, ofputil_encode_resume(pin, continuation, proto));
>>>>>>> +    dp_packet_delete(pkt_out);
>>>>>>> +}
>>>>>>> diff --git a/include/ovn/actions.h b/include/ovn/actions.h
>>>>>>> index b4e5acabb9..bf1fe848b7 100644
>>>>>>> --- a/include/ovn/actions.h
>>>>>>> +++ b/include/ovn/actions.h
>>>>>>> @@ -97,6 +97,7 @@ struct ovn_extend_table;
>>>>>>>      OVNACT(DHCP6_REPLY,       ovnact_null)            \
>>>>>>>      OVNACT(ICMP6_ERROR,       ovnact_nest)            \
>>>>>>>      OVNACT(REJECT,            ovnact_nest)            \
>>>>>>> +    OVNACT(OVNFIELD_EXCHANGE, ovnact_move)            \
>>>>>>>
>>>>>>>  /* enum ovnact_type, with a member OVNACT_<ENUM> for each action. */
>>>>>>>  enum OVS_PACKED_ENUM ovnact_type {
>>>>>>> @@ -612,6 +613,9 @@ enum action_opcode {
>>>>>>>       * The actions, in OpenFlow 1.3 format, follow the action_header.
>>>>>>>       */
>>>>>>>      ACTION_OPCODE_REJECT,
>>>>>>> +
>>>>>>> +    /* ip.src <-> ip.dst */
>>>>>>> +    ACTION_OPCODE_SWAP_SRC_DST_IP,
>>>>>>>  };
>>>>>>>
>>>>>>>  /* Header. */
>>>>>>> diff --git a/include/ovn/logical-fields.h b/include/ovn/logical-fields.h
>>>>>>> index ac6f2f909b..bb6daa50ca 100644
>>>>>>> --- a/include/ovn/logical-fields.h
>>>>>>> +++ b/include/ovn/logical-fields.h
>>>>>>> @@ -116,6 +116,24 @@ enum ovn_field_id {
>>>>>>>       */
>>>>>>>      OVN_ICMP6_FRAG_MTU,
>>>>>>>
>>>>>>> +    /*
>>>>>>> +     * Name: "ip.src"
>>>>>>> +     * Type: be128
>>>>>>> +     * Description: Sets the field MFF_IPV4_SRC if eth.type == 0x800 (IPv4)
>>>>>>> +     *              or sets the field MFF_IPV6_SRC if
>>>>>>> +     *              eth.type == 0x86dd (IPV6).
>>>>>>> +     */
>>>>>>> +    OVN_IP_SRC,
>>>>>>> +
>>>>>>> +    /*
>>>>>>> +     * Name: "ip.dst"
>>>>>>> +     * Type: be128
>>>>>>> +     * Description: Sets the field MFF_IPV4_DST if eth.type == 0x800 (IPv4)
>>>>>>> +     *              or sets the field MFF_IPV6_DST if
>>>>>>> +     *              eth.type == 0x86dd (IPV6).
>>>>>>> +     */
>>>>>>> +    OVN_IP_DST,
>>>>>>> +
>>>>>>>      OVN_FIELD_N_IDS
>>>>>>>  };
>>>>>>>
>>>>>>> diff --git a/lib/actions.c b/lib/actions.c
>>>>>>> index 1e1bdeff24..e9c77d2a0a 100644
>>>>>>> --- a/lib/actions.c
>>>>>>> +++ b/lib/actions.c
>>>>>>> @@ -371,12 +371,35 @@ ovnact_next_free(struct ovnact_next *a OVS_UNUSED)
>>>>>>>  {
>>>>>>>  }
>>>>>>>
>>>>>>> +static bool
>>>>>>> +check_ovnfield_load(struct action_context *ctx, const struct expr_field *field)
>>>>>>> +{
>>>>>>> +    switch (field->symbol->ovn_field->id) {
>>>>>>> +    case OVN_ICMP4_FRAG_MTU:
>>>>>>> +    case OVN_ICMP6_FRAG_MTU:
>>>>>>> +        return true;
>>>>>>> +
>>>>>>> +    case OVN_IP_SRC:
>>>>>>> +    case OVN_IP_DST:
>>>>>>> +        lexer_error(ctx->lexer, "Can't load a value to ovn field (%s).",
>>>>>>> +                    field->symbol->name);
>>>>>>> +        return false;
>>>>>>> +
>>>>>>> +    case OVN_FIELD_N_IDS:
>>>>>>> +    default:
>>>>>>> +        OVS_NOT_REACHED();
>>>>>>> +    }
>>>>>>> +}
>>>>>>> +
>>>>>>>  static void
>>>>>>>  parse_LOAD(struct action_context *ctx, const struct expr_field *lhs)
>>>>>>>  {
>>>>>>>      size_t ofs = ctx->ovnacts->size;
>>>>>>>      struct ovnact_load *load;
>>>>>>>      if (lhs->symbol->ovn_field) {
>>>>>>> +        if (!check_ovnfield_load(ctx, lhs)) {
>>>>>>> +            return;
>>>>>>> +        }
>>>>>>>          load = ovnact_put_OVNFIELD_LOAD(ctx->ovnacts);
>>>>>>>      } else {
>>>>>>>          load = ovnact_put_LOAD(ctx->ovnacts);
>>>>>>> @@ -474,6 +497,46 @@ format_EXCHANGE(const struct ovnact_move *move, struct ds *s)
>>>>>>>      format_assignment(move, "<->", s);
>>>>>>>  }
>>>>>>>
>>>>>>> +static void
>>>>>>> +parse_ovnfield_exchange(struct action_context *ctx,
>>>>>>> +                        const struct expr_field *lhs,
>>>>>>> +                        const struct expr_field *rhs)
>>>>>>> +{
>>>>>>> +    if (!lhs->symbol->ovn_field || !rhs->symbol->ovn_field) {
>>>>>>> +        lexer_error(ctx->lexer,
>>>>>>> +                    "Can't exchange ovn field with non ovn field (%s <-> %s).",
>>>>>>> +                    lhs->symbol->name, rhs->symbol->name);
>>>>>>> +        return;
>>>>>>> +    }
>>>>>>> +
>>>>>>> +    if (lhs->symbol->ovn_field->id != OVN_IP_SRC &&
>>>>>>> +            lhs->symbol->ovn_field->id != OVN_IP_DST) {
>>>>>>> +        lexer_error(ctx->lexer,
>>>>>>> +                    "Can't exchange ovn field (%s) with ovn field (%s).",
>>>>>>> +                    lhs->symbol->name, rhs->symbol->name);
>>>>>>> +        return;
>>>>>>> +    }
>>>>>>> +
>>>>>>> +    if (rhs->symbol->ovn_field->id != OVN_IP_SRC &&
>>>>>>> +            rhs->symbol->ovn_field->id != OVN_IP_DST) {
>>>>>>> +        lexer_error(ctx->lexer,
>>>>>>> +                    "Can't exchange ovn field (%s) with ovn field (%s).",
>>>>>>> +                    lhs->symbol->name, rhs->symbol->name);
>>>>>>> +        return;
>>>>>>> +    }
>>>>>>> +
>>>>>>> +    if (lhs->symbol->ovn_field->id == rhs->symbol->ovn_field->id) {
>>>>>>> +        lexer_error(ctx->lexer,
>>>>>>> +                    "Can't exchange ovn field (%s) with ovn field (%s).",
>>>>>>> +                    lhs->symbol->name, rhs->symbol->name);
>>>>>>> +        return;
>>>>>>> +    }
>>>>>>> +
>>>>>>> +    struct ovnact_move *move = ovnact_put_OVNFIELD_EXCHANGE(ctx->ovnacts);
>>>>>>> +    move->lhs = *lhs;
>>>>>>> +    move->rhs = *rhs;
>>>>>>> +}
>>>>>>> +
>>>>>>>  static void
>>>>>>>  parse_assignment_action(struct action_context *ctx, bool exchange,
>>>>>>>                          const struct expr_field *lhs)
>>>>>>> @@ -483,6 +546,11 @@ parse_assignment_action(struct action_context *ctx, bool exchange,
>>>>>>>          return;
>>>>>>>      }
>>>>>>>
>>>>>>> +    if (exchange && (lhs->symbol->ovn_field || rhs.symbol->ovn_field)) {
>>>>>>> +        parse_ovnfield_exchange(ctx, lhs, &rhs);
>>>>>>> +        return;
>>>>>>> +    }
>>>>>>> +
>>>>>>>      const struct expr_symbol *ls = lhs->symbol;
>>>>>>>      const struct expr_symbol *rs = rhs.symbol;
>>>>>>>      if ((ls->width != 0) != (rs->width != 0)) {
>>>>>>> @@ -3128,6 +3196,8 @@ format_OVNFIELD_LOAD(const struct ovnact_load *load , struct ds *s)
>>>>>>>                        ntohs(load->imm.value.be16_int));
>>>>>>>          break;
>>>>>>>
>>>>>>> +    case OVN_IP_SRC:
>>>>>>> +    case OVN_IP_DST:
>>>>>>>      case OVN_FIELD_N_IDS:
>>>>>>>      default:
>>>>>>>          OVS_NOT_REACHED();
>>>>>>> @@ -3157,6 +3227,9 @@ encode_OVNFIELD_LOAD(const struct ovnact_load *load,
>>>>>>>          encode_finish_controller_op(oc_offset, ofpacts);
>>>>>>>          break;
>>>>>>>      }
>>>>>>> +
>>>>>>> +    case OVN_IP_SRC:
>>>>>>> +    case OVN_IP_DST:
>>>>>>>      case OVN_FIELD_N_IDS:
>>>>>>>      default:
>>>>>>>          OVS_NOT_REACHED();
>>>>>>> @@ -3451,6 +3524,51 @@ ovnact_fwd_group_free(struct ovnact_fwd_group *fwd_group)
>>>>>>>      free(fwd_group->child_ports);
>>>>>>>  }
>>>>>>>
>>>>>>> +static void
>>>>>>> +format_OVNFIELD_EXCHANGE(const struct ovnact_move *move , struct ds *s)
>>>>>>> +{
>>>>>>> +    const struct ovn_field *lhs = ovn_field_from_name(move->lhs.symbol->name);
>>>>>>> +    const struct ovn_field *rhs = ovn_field_from_name(move->rhs.symbol->name);
>>>>>>> +    switch (lhs->id) {
>>>>>>> +    case OVN_IP_SRC:
>>>>>>> +    case OVN_IP_DST:
>>>>>>> +        break;
>>>>>>> +
>>>>>>> +    case OVN_ICMP4_FRAG_MTU:
>>>>>>> +    case OVN_ICMP6_FRAG_MTU:
>>>>>>> +    case OVN_FIELD_N_IDS:
>>>>>>> +    default:
>>>>>>> +        OVS_NOT_REACHED();
>>>>>>> +    }
>>>>>>
>>>>>> Nit: I would use the shorter:
>>>>>> ovs_asssert(lhs->id == OVN_IP_SRC || lhs->id == OVN_IP_DST);
>>>>>
>>>>> You mean to drop the switch right ? If so, Ack.
>>>>>
>>>>
>>>> Right, I meant ovs_assert instead of switch.
>>>>
>>>>>
>>>>>>
>>>>>>> +
>>>>>>> +    switch (rhs->id) {
>>>>>>> +    case OVN_IP_SRC:
>>>>>>> +    case OVN_IP_DST:
>>>>>>> +        break;
>>>>>>> +
>>>>>>> +    case OVN_ICMP4_FRAG_MTU:
>>>>>>> +    case OVN_ICMP6_FRAG_MTU:
>>>>>>> +    case OVN_FIELD_N_IDS:
>>>>>>> +    default:
>>>>>>> +        OVS_NOT_REACHED();
>>>>>>> +    }
>>>>>>
>>>>>> Nit: I would use the shorter:
>>>>>> ovs_asssert(rhs->id == OVN_IP_SRC || rhs->id == OVN_IP_DST);
>>>>>>
>>>>>>> +
>>>>>>> +    ds_put_format(s, "%s <-> %s;", lhs->name, rhs->name);
>>>>>>> +}
>>>>>>> +
>>>>>>> +static void
>>>>>>> +encode_OVNFIELD_EXCHANGE(const struct ovnact_move *move OVS_UNUSED,
>>>>>>> +                         const struct ovnact_encode_params *ep OVS_UNUSED,
>>>>>>> +                         struct ofpbuf *ofpacts OVS_UNUSED)
>>>>>>> +{
>>>>>>> +    /* Right now we only support exchanging the IPs in
>>>>>>> +     * OVN fields (ip.src <-> ip.dst). */
>>>>>>> +    size_t oc_offset =
>>>>>>> +        encode_start_controller_op(ACTION_OPCODE_SWAP_SRC_DST_IP,
>>>>>>> +                                   true, NX_CTLR_NO_METER, ofpacts);
>>>>>>> +    encode_finish_controller_op(oc_offset, ofpacts);
>>>>>>
>>>>>> I think this means that all packets matching a flow with action "reject { ...
>>>>>> ip.src <-> ip.dst ... }" will generate two packet-ins, right? One for the
>>>>>> reject action, one for the ovnfield-exchange action.
>>>>>>
>>>>>> Is that a concern wrt. performance?
>>>>>
>>>>> Yes. It would have 2 packet-ins. Just like how we handle - icmp4 {...
>>>>> icmp4.frag_mtu = 1500; ...};
>>>>>
>>>>> If performance is a concern, we could just drop ... ip.src <-> ip.dst
>>>>> in reject and let reject action
>>>>> swap the ip src with ip destination. I thought about that and my take
>>>>> is that since reject { } results in
>>>>> TCP RST or ICMP unreachable packet, it should be fine. I see these
>>>>> packets as more of control packets.
>>>>>
>>>>> I think it should be ok. Let me know what you think.
>>>>>
>>>>
>>>> I think I like the approach of "reject" action implicitly swapping IPs best.
>>>> In the end it's not like we could implement reject withough swapping IP dst
>>>> with IP src so why not do it as part of the reject action.
>>>>
>>>
>>> Agree. reject action is expected to be used to send the generated tcp
>>> rst/icmp unreachable
>>> packet back to the sender.
>>>
>>> I'm ok to change the reject action to just swap the IPs internally if
>>> everyone is fine.
>>>
>>> @Mark Michelson  @Han Zhou  Do you have any comments here ?
>>>
>>> Option 1 - reject { ...}    -> The reject action handling in pinctrl.c
>>> will swap the ip source and destination
>>>
>>> Option 2: reject {... ip.src <-> ip.dst ...} which is the proposed
>>> approach in this patch series. This results in
>>> 2 packet-ins.
>>>
>>> I personally prefer (2) since in OVN we normally tell what inner
>>> actions to apply for many OVN actions.
>>> But I have no strong preference and I'm fine changing to (1).
>>>
>>> If we chose option (1), we could add a comment inside the action like
>>> - reject { /* ip.src <-> ip.dst is done implicitly*/,  eth.src <->
>>> eth.dst; output ; }
>>>
>>
>> Then, why not swap ETH addresses implicitly too?
> 
> As I mentioned above, I'd prefer if ovn-northd says what actions to
> apply for the reject
> packet. Just like for icmp4 or arp actions, ovn-northd says what
> actions to apply for the
> transformed packet from the source packet.
> 

I was thinking that if we go this way when we build the packet in
ovn-controller it might look weird to a reader of the pinctrl code to see
something like:

/* We only swap IPs because ETH addresses are swapped in OVS. */
pinctrl_compose_ipv4(&packet, ip_flow->dl_src, ip_flow->dl_dst,
                     ip_flow->nw_dst, ip_flow->nw_src, ...)

Regards,
Dumitru
Numan Siddique Oct. 14, 2020, 12:38 p.m. UTC | #8
On Wed, Oct 14, 2020 at 5:51 PM Dumitru Ceara <dceara@redhat.com> wrote:
>
> On 10/14/20 2:13 PM, Numan Siddique wrote:
> > On Wed, Oct 14, 2020 at 5:34 PM Dumitru Ceara <dceara@redhat.com> wrote:
> >>
> >> On 10/14/20 1:54 PM, Numan Siddique wrote:
> >>> On Wed, Oct 14, 2020 at 4:40 PM Dumitru Ceara <dceara@redhat.com> wrote:
> >>>>
> >>>> On 10/14/20 12:50 PM, Numan Siddique wrote:
> >>>>> On Wed, Oct 14, 2020 at 3:50 PM Dumitru Ceara <dceara@redhat.com> wrote:
> >>>>>>
> >>>>>> On 10/14/20 11:15 AM, numans@ovn.org wrote:
> >>>>>>> From: Numan Siddique <numans@ovn.org>
> >>>>>>>
> >>>>>>> Thee new fields are version independent and these can be used in any
> >>>>>>
> >>>>>> Hi Numan,
> >>>>>>
> >>>>>> Nit: s/Thee/These
> >>>>>>
> >>>>>>> OVN action. Right now the usage of these fields are restricted to
> >>>>>>> exchanging the IP source and destination fields.
> >>>>>>>
> >>>>>>> Eg. reject {... ip.src <-> ip.dst ... };
> >>>>>>>
> >>>>>>> "ip.src <-> ip.dst" translates to controller action with "pause" flag set.
> >>>>>>> When pinctrl thread receives the packet in, it checks the IP version of the
> >>>>>>> packet and either does - "ip4.src <-> ip4.dst" or "ip6.src <-> ip6.dst" and
> >>>>>>> resumes the packet to continue with the pipeline.
> >>>>>>>
> >>>>>>> Upcoming patch will make use of these fields.
> >>>>>>>
> >>>>>>> Signed-off-by: Numan Siddique <numans@ovn.org>
> >>>>>>> ---
> >>>>>>>  controller/pinctrl.c         |  49 +++++++++++++++
> >>>>>>>  include/ovn/actions.h        |   4 ++
> >>>>>>>  include/ovn/logical-fields.h |  18 ++++++
> >>>>>>>  lib/actions.c                | 118 +++++++++++++++++++++++++++++++++++
> >>>>>>>  lib/logical-fields.c         |  10 +++
> >>>>>>>  ovn-sb.xml                   |  37 +++++++++++
> >>>>>>>  tests/ovn.at                 |  27 ++++++++
> >>>>>>>  utilities/ovn-trace.c        |  28 +++++++++
> >>>>>>>  8 files changed, 291 insertions(+)
> >>>>>>>
> >>>>>>> diff --git a/controller/pinctrl.c b/controller/pinctrl.c
> >>>>>>> index 631d058458..bc16a82404 100644
> >>>>>>> --- a/controller/pinctrl.c
> >>>>>>> +++ b/controller/pinctrl.c
> >>>>>>> @@ -233,6 +233,11 @@ static void pinctrl_handle_put_icmp_frag_mtu(struct rconn *swconn,
> >>>>>>>                                               struct ofputil_packet_in *pin,
> >>>>>>>                                               struct ofpbuf *userdata,
> >>>>>>>                                               struct ofpbuf *continuation);
> >>>>>>> +static void pinctrl_handle_swap_src_dst_ip(struct rconn *swconn,
> >>>>>>> +                                           const struct flow *in_flow,
> >>>>>>> +                                           struct dp_packet *pkt_in,
> >>>>>>> +                                           struct ofputil_packet_in *pin,
> >>>>>>> +                                           struct ofpbuf *continuation);
> >>>>>>>  static void
> >>>>>>>  pinctrl_handle_event(struct ofpbuf *userdata)
> >>>>>>>      OVS_REQUIRES(pinctrl_mutex);
> >>>>>>> @@ -2835,6 +2840,11 @@ process_packet_in(struct rconn *swconn, const struct ofp_header *msg)
> >>>>>>>          ovs_mutex_unlock(&pinctrl_mutex);
> >>>>>>>          break;
> >>>>>>>
> >>>>>>> +    case ACTION_OPCODE_SWAP_SRC_DST_IP:
> >>>>>>> +        pinctrl_handle_swap_src_dst_ip(swconn, &headers, &packet, &pin,
> >>>>>>> +                                       &continuation);
> >>>>>>> +        break;
> >>>>>>> +
> >>>>>>>      default:
> >>>>>>>          VLOG_WARN_RL(&rl, "unrecognized packet-in opcode %"PRIu32,
> >>>>>>>                       ntohl(ah->opcode));
> >>>>>>> @@ -6508,3 +6518,42 @@ pinctrl_handle_svc_check(struct rconn *swconn, const struct flow *ip_flow,
> >>>>>>>          svc_mon->next_send_time = time_msec() + svc_mon->interval;
> >>>>>>>      }
> >>>>>>>  }
> >>>>>>> +
> >>>>>>> +static void
> >>>>>>> +pinctrl_handle_swap_src_dst_ip(struct rconn *swconn,
> >>>>>>> +                               const struct flow *in_flow,
> >>>>>>> +                               struct dp_packet *pkt_in,
> >>>>>>> +                               struct ofputil_packet_in *pin,
> >>>>>>> +                               struct ofpbuf *continuation)
> >>>>>>> +{
> >>>>>>> +    enum ofp_version version = rconn_get_version(swconn);
> >>>>>>> +    enum ofputil_protocol proto = ofputil_protocol_from_ofp_version(version);
> >>>>>>> +    struct dp_packet *pkt_out;
> >>>>>>> +
> >>>>>>> +    pkt_out = dp_packet_clone(pkt_in);
> >>>>>>> +    pkt_out->l2_5_ofs = pkt_in->l2_5_ofs;
> >>>>>>> +    pkt_out->l2_pad_size = pkt_in->l2_pad_size;
> >>>>>>> +    pkt_out->l3_ofs = pkt_in->l3_ofs;
> >>>>>>> +    pkt_out->l4_ofs = pkt_in->l4_ofs;
> >>>>>>> +
> >>>>>>> +    if (get_dl_type(in_flow) == htons(ETH_TYPE_IP)) {
> >>>>>>> +        /* IPv4 packet. Swap nw_src with nw_dst. */
> >>>>>>> +        packet_set_ipv4(pkt_out, in_flow->nw_dst, in_flow->nw_src,
> >>>>>>> +                        in_flow->nw_tos, in_flow->nw_ttl);
> >>>>>>> +    } else {
> >>>>>>> +        /* IPv6 packet. Swap ip6_src with ip6_dst.
> >>>>>>> +         * We could also use packet_set_ipv6() here, but that would require
> >>>>>>> +         * to extract the 'tc' and 'label' from in_nh->ip6_flow which seems
> >>>>>>> +         * unnecessary. */
> >>>>>>> +        struct ovs_16aligned_ip6_hdr *out_nh = dp_packet_l3(pkt_out);
> >>>>>>> +        union ovs_16aligned_in6_addr tmp = out_nh->ip6_src;
> >>>>>>> +        out_nh->ip6_src = out_nh->ip6_dst;
> >>>>>>> +        out_nh->ip6_dst = tmp;
> >>>>>>> +    }
> >>>>>>> +
> >>>>>>> +    pin->packet = dp_packet_data(pkt_out);
> >>>>>>> +    pin->packet_len = dp_packet_size(pkt_out);
> >>>>>>> +
> >>>>>>> +    queue_msg(swconn, ofputil_encode_resume(pin, continuation, proto));
> >>>>>>> +    dp_packet_delete(pkt_out);
> >>>>>>> +}
> >>>>>>> diff --git a/include/ovn/actions.h b/include/ovn/actions.h
> >>>>>>> index b4e5acabb9..bf1fe848b7 100644
> >>>>>>> --- a/include/ovn/actions.h
> >>>>>>> +++ b/include/ovn/actions.h
> >>>>>>> @@ -97,6 +97,7 @@ struct ovn_extend_table;
> >>>>>>>      OVNACT(DHCP6_REPLY,       ovnact_null)            \
> >>>>>>>      OVNACT(ICMP6_ERROR,       ovnact_nest)            \
> >>>>>>>      OVNACT(REJECT,            ovnact_nest)            \
> >>>>>>> +    OVNACT(OVNFIELD_EXCHANGE, ovnact_move)            \
> >>>>>>>
> >>>>>>>  /* enum ovnact_type, with a member OVNACT_<ENUM> for each action. */
> >>>>>>>  enum OVS_PACKED_ENUM ovnact_type {
> >>>>>>> @@ -612,6 +613,9 @@ enum action_opcode {
> >>>>>>>       * The actions, in OpenFlow 1.3 format, follow the action_header.
> >>>>>>>       */
> >>>>>>>      ACTION_OPCODE_REJECT,
> >>>>>>> +
> >>>>>>> +    /* ip.src <-> ip.dst */
> >>>>>>> +    ACTION_OPCODE_SWAP_SRC_DST_IP,
> >>>>>>>  };
> >>>>>>>
> >>>>>>>  /* Header. */
> >>>>>>> diff --git a/include/ovn/logical-fields.h b/include/ovn/logical-fields.h
> >>>>>>> index ac6f2f909b..bb6daa50ca 100644
> >>>>>>> --- a/include/ovn/logical-fields.h
> >>>>>>> +++ b/include/ovn/logical-fields.h
> >>>>>>> @@ -116,6 +116,24 @@ enum ovn_field_id {
> >>>>>>>       */
> >>>>>>>      OVN_ICMP6_FRAG_MTU,
> >>>>>>>
> >>>>>>> +    /*
> >>>>>>> +     * Name: "ip.src"
> >>>>>>> +     * Type: be128
> >>>>>>> +     * Description: Sets the field MFF_IPV4_SRC if eth.type == 0x800 (IPv4)
> >>>>>>> +     *              or sets the field MFF_IPV6_SRC if
> >>>>>>> +     *              eth.type == 0x86dd (IPV6).
> >>>>>>> +     */
> >>>>>>> +    OVN_IP_SRC,
> >>>>>>> +
> >>>>>>> +    /*
> >>>>>>> +     * Name: "ip.dst"
> >>>>>>> +     * Type: be128
> >>>>>>> +     * Description: Sets the field MFF_IPV4_DST if eth.type == 0x800 (IPv4)
> >>>>>>> +     *              or sets the field MFF_IPV6_DST if
> >>>>>>> +     *              eth.type == 0x86dd (IPV6).
> >>>>>>> +     */
> >>>>>>> +    OVN_IP_DST,
> >>>>>>> +
> >>>>>>>      OVN_FIELD_N_IDS
> >>>>>>>  };
> >>>>>>>
> >>>>>>> diff --git a/lib/actions.c b/lib/actions.c
> >>>>>>> index 1e1bdeff24..e9c77d2a0a 100644
> >>>>>>> --- a/lib/actions.c
> >>>>>>> +++ b/lib/actions.c
> >>>>>>> @@ -371,12 +371,35 @@ ovnact_next_free(struct ovnact_next *a OVS_UNUSED)
> >>>>>>>  {
> >>>>>>>  }
> >>>>>>>
> >>>>>>> +static bool
> >>>>>>> +check_ovnfield_load(struct action_context *ctx, const struct expr_field *field)
> >>>>>>> +{
> >>>>>>> +    switch (field->symbol->ovn_field->id) {
> >>>>>>> +    case OVN_ICMP4_FRAG_MTU:
> >>>>>>> +    case OVN_ICMP6_FRAG_MTU:
> >>>>>>> +        return true;
> >>>>>>> +
> >>>>>>> +    case OVN_IP_SRC:
> >>>>>>> +    case OVN_IP_DST:
> >>>>>>> +        lexer_error(ctx->lexer, "Can't load a value to ovn field (%s).",
> >>>>>>> +                    field->symbol->name);
> >>>>>>> +        return false;
> >>>>>>> +
> >>>>>>> +    case OVN_FIELD_N_IDS:
> >>>>>>> +    default:
> >>>>>>> +        OVS_NOT_REACHED();
> >>>>>>> +    }
> >>>>>>> +}
> >>>>>>> +
> >>>>>>>  static void
> >>>>>>>  parse_LOAD(struct action_context *ctx, const struct expr_field *lhs)
> >>>>>>>  {
> >>>>>>>      size_t ofs = ctx->ovnacts->size;
> >>>>>>>      struct ovnact_load *load;
> >>>>>>>      if (lhs->symbol->ovn_field) {
> >>>>>>> +        if (!check_ovnfield_load(ctx, lhs)) {
> >>>>>>> +            return;
> >>>>>>> +        }
> >>>>>>>          load = ovnact_put_OVNFIELD_LOAD(ctx->ovnacts);
> >>>>>>>      } else {
> >>>>>>>          load = ovnact_put_LOAD(ctx->ovnacts);
> >>>>>>> @@ -474,6 +497,46 @@ format_EXCHANGE(const struct ovnact_move *move, struct ds *s)
> >>>>>>>      format_assignment(move, "<->", s);
> >>>>>>>  }
> >>>>>>>
> >>>>>>> +static void
> >>>>>>> +parse_ovnfield_exchange(struct action_context *ctx,
> >>>>>>> +                        const struct expr_field *lhs,
> >>>>>>> +                        const struct expr_field *rhs)
> >>>>>>> +{
> >>>>>>> +    if (!lhs->symbol->ovn_field || !rhs->symbol->ovn_field) {
> >>>>>>> +        lexer_error(ctx->lexer,
> >>>>>>> +                    "Can't exchange ovn field with non ovn field (%s <-> %s).",
> >>>>>>> +                    lhs->symbol->name, rhs->symbol->name);
> >>>>>>> +        return;
> >>>>>>> +    }
> >>>>>>> +
> >>>>>>> +    if (lhs->symbol->ovn_field->id != OVN_IP_SRC &&
> >>>>>>> +            lhs->symbol->ovn_field->id != OVN_IP_DST) {
> >>>>>>> +        lexer_error(ctx->lexer,
> >>>>>>> +                    "Can't exchange ovn field (%s) with ovn field (%s).",
> >>>>>>> +                    lhs->symbol->name, rhs->symbol->name);
> >>>>>>> +        return;
> >>>>>>> +    }
> >>>>>>> +
> >>>>>>> +    if (rhs->symbol->ovn_field->id != OVN_IP_SRC &&
> >>>>>>> +            rhs->symbol->ovn_field->id != OVN_IP_DST) {
> >>>>>>> +        lexer_error(ctx->lexer,
> >>>>>>> +                    "Can't exchange ovn field (%s) with ovn field (%s).",
> >>>>>>> +                    lhs->symbol->name, rhs->symbol->name);
> >>>>>>> +        return;
> >>>>>>> +    }
> >>>>>>> +
> >>>>>>> +    if (lhs->symbol->ovn_field->id == rhs->symbol->ovn_field->id) {
> >>>>>>> +        lexer_error(ctx->lexer,
> >>>>>>> +                    "Can't exchange ovn field (%s) with ovn field (%s).",
> >>>>>>> +                    lhs->symbol->name, rhs->symbol->name);
> >>>>>>> +        return;
> >>>>>>> +    }
> >>>>>>> +
> >>>>>>> +    struct ovnact_move *move = ovnact_put_OVNFIELD_EXCHANGE(ctx->ovnacts);
> >>>>>>> +    move->lhs = *lhs;
> >>>>>>> +    move->rhs = *rhs;
> >>>>>>> +}
> >>>>>>> +
> >>>>>>>  static void
> >>>>>>>  parse_assignment_action(struct action_context *ctx, bool exchange,
> >>>>>>>                          const struct expr_field *lhs)
> >>>>>>> @@ -483,6 +546,11 @@ parse_assignment_action(struct action_context *ctx, bool exchange,
> >>>>>>>          return;
> >>>>>>>      }
> >>>>>>>
> >>>>>>> +    if (exchange && (lhs->symbol->ovn_field || rhs.symbol->ovn_field)) {
> >>>>>>> +        parse_ovnfield_exchange(ctx, lhs, &rhs);
> >>>>>>> +        return;
> >>>>>>> +    }
> >>>>>>> +
> >>>>>>>      const struct expr_symbol *ls = lhs->symbol;
> >>>>>>>      const struct expr_symbol *rs = rhs.symbol;
> >>>>>>>      if ((ls->width != 0) != (rs->width != 0)) {
> >>>>>>> @@ -3128,6 +3196,8 @@ format_OVNFIELD_LOAD(const struct ovnact_load *load , struct ds *s)
> >>>>>>>                        ntohs(load->imm.value.be16_int));
> >>>>>>>          break;
> >>>>>>>
> >>>>>>> +    case OVN_IP_SRC:
> >>>>>>> +    case OVN_IP_DST:
> >>>>>>>      case OVN_FIELD_N_IDS:
> >>>>>>>      default:
> >>>>>>>          OVS_NOT_REACHED();
> >>>>>>> @@ -3157,6 +3227,9 @@ encode_OVNFIELD_LOAD(const struct ovnact_load *load,
> >>>>>>>          encode_finish_controller_op(oc_offset, ofpacts);
> >>>>>>>          break;
> >>>>>>>      }
> >>>>>>> +
> >>>>>>> +    case OVN_IP_SRC:
> >>>>>>> +    case OVN_IP_DST:
> >>>>>>>      case OVN_FIELD_N_IDS:
> >>>>>>>      default:
> >>>>>>>          OVS_NOT_REACHED();
> >>>>>>> @@ -3451,6 +3524,51 @@ ovnact_fwd_group_free(struct ovnact_fwd_group *fwd_group)
> >>>>>>>      free(fwd_group->child_ports);
> >>>>>>>  }
> >>>>>>>
> >>>>>>> +static void
> >>>>>>> +format_OVNFIELD_EXCHANGE(const struct ovnact_move *move , struct ds *s)
> >>>>>>> +{
> >>>>>>> +    const struct ovn_field *lhs = ovn_field_from_name(move->lhs.symbol->name);
> >>>>>>> +    const struct ovn_field *rhs = ovn_field_from_name(move->rhs.symbol->name);
> >>>>>>> +    switch (lhs->id) {
> >>>>>>> +    case OVN_IP_SRC:
> >>>>>>> +    case OVN_IP_DST:
> >>>>>>> +        break;
> >>>>>>> +
> >>>>>>> +    case OVN_ICMP4_FRAG_MTU:
> >>>>>>> +    case OVN_ICMP6_FRAG_MTU:
> >>>>>>> +    case OVN_FIELD_N_IDS:
> >>>>>>> +    default:
> >>>>>>> +        OVS_NOT_REACHED();
> >>>>>>> +    }
> >>>>>>
> >>>>>> Nit: I would use the shorter:
> >>>>>> ovs_asssert(lhs->id == OVN_IP_SRC || lhs->id == OVN_IP_DST);
> >>>>>
> >>>>> You mean to drop the switch right ? If so, Ack.
> >>>>>
> >>>>
> >>>> Right, I meant ovs_assert instead of switch.
> >>>>
> >>>>>
> >>>>>>
> >>>>>>> +
> >>>>>>> +    switch (rhs->id) {
> >>>>>>> +    case OVN_IP_SRC:
> >>>>>>> +    case OVN_IP_DST:
> >>>>>>> +        break;
> >>>>>>> +
> >>>>>>> +    case OVN_ICMP4_FRAG_MTU:
> >>>>>>> +    case OVN_ICMP6_FRAG_MTU:
> >>>>>>> +    case OVN_FIELD_N_IDS:
> >>>>>>> +    default:
> >>>>>>> +        OVS_NOT_REACHED();
> >>>>>>> +    }
> >>>>>>
> >>>>>> Nit: I would use the shorter:
> >>>>>> ovs_asssert(rhs->id == OVN_IP_SRC || rhs->id == OVN_IP_DST);
> >>>>>>
> >>>>>>> +
> >>>>>>> +    ds_put_format(s, "%s <-> %s;", lhs->name, rhs->name);
> >>>>>>> +}
> >>>>>>> +
> >>>>>>> +static void
> >>>>>>> +encode_OVNFIELD_EXCHANGE(const struct ovnact_move *move OVS_UNUSED,
> >>>>>>> +                         const struct ovnact_encode_params *ep OVS_UNUSED,
> >>>>>>> +                         struct ofpbuf *ofpacts OVS_UNUSED)
> >>>>>>> +{
> >>>>>>> +    /* Right now we only support exchanging the IPs in
> >>>>>>> +     * OVN fields (ip.src <-> ip.dst). */
> >>>>>>> +    size_t oc_offset =
> >>>>>>> +        encode_start_controller_op(ACTION_OPCODE_SWAP_SRC_DST_IP,
> >>>>>>> +                                   true, NX_CTLR_NO_METER, ofpacts);
> >>>>>>> +    encode_finish_controller_op(oc_offset, ofpacts);
> >>>>>>
> >>>>>> I think this means that all packets matching a flow with action "reject { ...
> >>>>>> ip.src <-> ip.dst ... }" will generate two packet-ins, right? One for the
> >>>>>> reject action, one for the ovnfield-exchange action.
> >>>>>>
> >>>>>> Is that a concern wrt. performance?
> >>>>>
> >>>>> Yes. It would have 2 packet-ins. Just like how we handle - icmp4 {...
> >>>>> icmp4.frag_mtu = 1500; ...};
> >>>>>
> >>>>> If performance is a concern, we could just drop ... ip.src <-> ip.dst
> >>>>> in reject and let reject action
> >>>>> swap the ip src with ip destination. I thought about that and my take
> >>>>> is that since reject { } results in
> >>>>> TCP RST or ICMP unreachable packet, it should be fine. I see these
> >>>>> packets as more of control packets.
> >>>>>
> >>>>> I think it should be ok. Let me know what you think.
> >>>>>
> >>>>
> >>>> I think I like the approach of "reject" action implicitly swapping IPs best.
> >>>> In the end it's not like we could implement reject withough swapping IP dst
> >>>> with IP src so why not do it as part of the reject action.
> >>>>
> >>>
> >>> Agree. reject action is expected to be used to send the generated tcp
> >>> rst/icmp unreachable
> >>> packet back to the sender.
> >>>
> >>> I'm ok to change the reject action to just swap the IPs internally if
> >>> everyone is fine.
> >>>
> >>> @Mark Michelson  @Han Zhou  Do you have any comments here ?
> >>>
> >>> Option 1 - reject { ...}    -> The reject action handling in pinctrl.c
> >>> will swap the ip source and destination
> >>>
> >>> Option 2: reject {... ip.src <-> ip.dst ...} which is the proposed
> >>> approach in this patch series. This results in
> >>> 2 packet-ins.
> >>>
> >>> I personally prefer (2) since in OVN we normally tell what inner
> >>> actions to apply for many OVN actions.
> >>> But I have no strong preference and I'm fine changing to (1).
> >>>
> >>> If we chose option (1), we could add a comment inside the action like
> >>> - reject { /* ip.src <-> ip.dst is done implicitly*/,  eth.src <->
> >>> eth.dst; output ; }
> >>>
> >>
> >> Then, why not swap ETH addresses implicitly too?
> >
> > As I mentioned above, I'd prefer if ovn-northd says what actions to
> > apply for the reject
> > packet. Just like for icmp4 or arp actions, ovn-northd says what
> > actions to apply for the
> > transformed packet from the source packet.
> >
>
> I was thinking that if we go this way when we build the packet in
> ovn-controller it might look weird to a reader of the pinctrl code to see
> something like:
>
> /* We only swap IPs because ETH addresses are swapped in OVS. */
> pinctrl_compose_ipv4(&packet, ip_flow->dl_src, ip_flow->dl_dst,
>                      ip_flow->nw_dst, ip_flow->nw_src, ...)

Right. I agree. It makes sense to swap both eth and ip if we go this
way. Sorry for the confusion.

Thanks
Numan

>
> Regards,
> Dumitru
>
> _______________________________________________
> dev mailing list
> dev@openvswitch.org
> https://mail.openvswitch.org/mailman/listinfo/ovs-dev
>
Mark Michelson Oct. 15, 2020, 8:37 p.m. UTC | #9
On 10/14/20 8:38 AM, Numan Siddique wrote:
> On Wed, Oct 14, 2020 at 5:51 PM Dumitru Ceara <dceara@redhat.com> wrote:
>>
>> On 10/14/20 2:13 PM, Numan Siddique wrote:
>>> On Wed, Oct 14, 2020 at 5:34 PM Dumitru Ceara <dceara@redhat.com> wrote:
>>>>
>>>> On 10/14/20 1:54 PM, Numan Siddique wrote:
>>>>> On Wed, Oct 14, 2020 at 4:40 PM Dumitru Ceara <dceara@redhat.com> wrote:
>>>>>>
>>>>>> On 10/14/20 12:50 PM, Numan Siddique wrote:
>>>>>>> On Wed, Oct 14, 2020 at 3:50 PM Dumitru Ceara <dceara@redhat.com> wrote:
>>>>>>>>
>>>>>>>> On 10/14/20 11:15 AM, numans@ovn.org wrote:
>>>>>>>>> From: Numan Siddique <numans@ovn.org>
>>>>>>>>>
>>>>>>>>> Thee new fields are version independent and these can be used in any
>>>>>>>>
>>>>>>>> Hi Numan,
>>>>>>>>
>>>>>>>> Nit: s/Thee/These
>>>>>>>>
>>>>>>>>> OVN action. Right now the usage of these fields are restricted to
>>>>>>>>> exchanging the IP source and destination fields.
>>>>>>>>>
>>>>>>>>> Eg. reject {... ip.src <-> ip.dst ... };
>>>>>>>>>
>>>>>>>>> "ip.src <-> ip.dst" translates to controller action with "pause" flag set.
>>>>>>>>> When pinctrl thread receives the packet in, it checks the IP version of the
>>>>>>>>> packet and either does - "ip4.src <-> ip4.dst" or "ip6.src <-> ip6.dst" and
>>>>>>>>> resumes the packet to continue with the pipeline.
>>>>>>>>>
>>>>>>>>> Upcoming patch will make use of these fields.
>>>>>>>>>
>>>>>>>>> Signed-off-by: Numan Siddique <numans@ovn.org>
>>>>>>>>> ---
>>>>>>>>>   controller/pinctrl.c         |  49 +++++++++++++++
>>>>>>>>>   include/ovn/actions.h        |   4 ++
>>>>>>>>>   include/ovn/logical-fields.h |  18 ++++++
>>>>>>>>>   lib/actions.c                | 118 +++++++++++++++++++++++++++++++++++
>>>>>>>>>   lib/logical-fields.c         |  10 +++
>>>>>>>>>   ovn-sb.xml                   |  37 +++++++++++
>>>>>>>>>   tests/ovn.at                 |  27 ++++++++
>>>>>>>>>   utilities/ovn-trace.c        |  28 +++++++++
>>>>>>>>>   8 files changed, 291 insertions(+)
>>>>>>>>>
>>>>>>>>> diff --git a/controller/pinctrl.c b/controller/pinctrl.c
>>>>>>>>> index 631d058458..bc16a82404 100644
>>>>>>>>> --- a/controller/pinctrl.c
>>>>>>>>> +++ b/controller/pinctrl.c
>>>>>>>>> @@ -233,6 +233,11 @@ static void pinctrl_handle_put_icmp_frag_mtu(struct rconn *swconn,
>>>>>>>>>                                                struct ofputil_packet_in *pin,
>>>>>>>>>                                                struct ofpbuf *userdata,
>>>>>>>>>                                                struct ofpbuf *continuation);
>>>>>>>>> +static void pinctrl_handle_swap_src_dst_ip(struct rconn *swconn,
>>>>>>>>> +                                           const struct flow *in_flow,
>>>>>>>>> +                                           struct dp_packet *pkt_in,
>>>>>>>>> +                                           struct ofputil_packet_in *pin,
>>>>>>>>> +                                           struct ofpbuf *continuation);
>>>>>>>>>   static void
>>>>>>>>>   pinctrl_handle_event(struct ofpbuf *userdata)
>>>>>>>>>       OVS_REQUIRES(pinctrl_mutex);
>>>>>>>>> @@ -2835,6 +2840,11 @@ process_packet_in(struct rconn *swconn, const struct ofp_header *msg)
>>>>>>>>>           ovs_mutex_unlock(&pinctrl_mutex);
>>>>>>>>>           break;
>>>>>>>>>
>>>>>>>>> +    case ACTION_OPCODE_SWAP_SRC_DST_IP:
>>>>>>>>> +        pinctrl_handle_swap_src_dst_ip(swconn, &headers, &packet, &pin,
>>>>>>>>> +                                       &continuation);
>>>>>>>>> +        break;
>>>>>>>>> +
>>>>>>>>>       default:
>>>>>>>>>           VLOG_WARN_RL(&rl, "unrecognized packet-in opcode %"PRIu32,
>>>>>>>>>                        ntohl(ah->opcode));
>>>>>>>>> @@ -6508,3 +6518,42 @@ pinctrl_handle_svc_check(struct rconn *swconn, const struct flow *ip_flow,
>>>>>>>>>           svc_mon->next_send_time = time_msec() + svc_mon->interval;
>>>>>>>>>       }
>>>>>>>>>   }
>>>>>>>>> +
>>>>>>>>> +static void
>>>>>>>>> +pinctrl_handle_swap_src_dst_ip(struct rconn *swconn,
>>>>>>>>> +                               const struct flow *in_flow,
>>>>>>>>> +                               struct dp_packet *pkt_in,
>>>>>>>>> +                               struct ofputil_packet_in *pin,
>>>>>>>>> +                               struct ofpbuf *continuation)
>>>>>>>>> +{
>>>>>>>>> +    enum ofp_version version = rconn_get_version(swconn);
>>>>>>>>> +    enum ofputil_protocol proto = ofputil_protocol_from_ofp_version(version);
>>>>>>>>> +    struct dp_packet *pkt_out;
>>>>>>>>> +
>>>>>>>>> +    pkt_out = dp_packet_clone(pkt_in);
>>>>>>>>> +    pkt_out->l2_5_ofs = pkt_in->l2_5_ofs;
>>>>>>>>> +    pkt_out->l2_pad_size = pkt_in->l2_pad_size;
>>>>>>>>> +    pkt_out->l3_ofs = pkt_in->l3_ofs;
>>>>>>>>> +    pkt_out->l4_ofs = pkt_in->l4_ofs;
>>>>>>>>> +
>>>>>>>>> +    if (get_dl_type(in_flow) == htons(ETH_TYPE_IP)) {
>>>>>>>>> +        /* IPv4 packet. Swap nw_src with nw_dst. */
>>>>>>>>> +        packet_set_ipv4(pkt_out, in_flow->nw_dst, in_flow->nw_src,
>>>>>>>>> +                        in_flow->nw_tos, in_flow->nw_ttl);
>>>>>>>>> +    } else {
>>>>>>>>> +        /* IPv6 packet. Swap ip6_src with ip6_dst.
>>>>>>>>> +         * We could also use packet_set_ipv6() here, but that would require
>>>>>>>>> +         * to extract the 'tc' and 'label' from in_nh->ip6_flow which seems
>>>>>>>>> +         * unnecessary. */
>>>>>>>>> +        struct ovs_16aligned_ip6_hdr *out_nh = dp_packet_l3(pkt_out);
>>>>>>>>> +        union ovs_16aligned_in6_addr tmp = out_nh->ip6_src;
>>>>>>>>> +        out_nh->ip6_src = out_nh->ip6_dst;
>>>>>>>>> +        out_nh->ip6_dst = tmp;
>>>>>>>>> +    }
>>>>>>>>> +
>>>>>>>>> +    pin->packet = dp_packet_data(pkt_out);
>>>>>>>>> +    pin->packet_len = dp_packet_size(pkt_out);
>>>>>>>>> +
>>>>>>>>> +    queue_msg(swconn, ofputil_encode_resume(pin, continuation, proto));
>>>>>>>>> +    dp_packet_delete(pkt_out);
>>>>>>>>> +}
>>>>>>>>> diff --git a/include/ovn/actions.h b/include/ovn/actions.h
>>>>>>>>> index b4e5acabb9..bf1fe848b7 100644
>>>>>>>>> --- a/include/ovn/actions.h
>>>>>>>>> +++ b/include/ovn/actions.h
>>>>>>>>> @@ -97,6 +97,7 @@ struct ovn_extend_table;
>>>>>>>>>       OVNACT(DHCP6_REPLY,       ovnact_null)            \
>>>>>>>>>       OVNACT(ICMP6_ERROR,       ovnact_nest)            \
>>>>>>>>>       OVNACT(REJECT,            ovnact_nest)            \
>>>>>>>>> +    OVNACT(OVNFIELD_EXCHANGE, ovnact_move)            \
>>>>>>>>>
>>>>>>>>>   /* enum ovnact_type, with a member OVNACT_<ENUM> for each action. */
>>>>>>>>>   enum OVS_PACKED_ENUM ovnact_type {
>>>>>>>>> @@ -612,6 +613,9 @@ enum action_opcode {
>>>>>>>>>        * The actions, in OpenFlow 1.3 format, follow the action_header.
>>>>>>>>>        */
>>>>>>>>>       ACTION_OPCODE_REJECT,
>>>>>>>>> +
>>>>>>>>> +    /* ip.src <-> ip.dst */
>>>>>>>>> +    ACTION_OPCODE_SWAP_SRC_DST_IP,
>>>>>>>>>   };
>>>>>>>>>
>>>>>>>>>   /* Header. */
>>>>>>>>> diff --git a/include/ovn/logical-fields.h b/include/ovn/logical-fields.h
>>>>>>>>> index ac6f2f909b..bb6daa50ca 100644
>>>>>>>>> --- a/include/ovn/logical-fields.h
>>>>>>>>> +++ b/include/ovn/logical-fields.h
>>>>>>>>> @@ -116,6 +116,24 @@ enum ovn_field_id {
>>>>>>>>>        */
>>>>>>>>>       OVN_ICMP6_FRAG_MTU,
>>>>>>>>>
>>>>>>>>> +    /*
>>>>>>>>> +     * Name: "ip.src"
>>>>>>>>> +     * Type: be128
>>>>>>>>> +     * Description: Sets the field MFF_IPV4_SRC if eth.type == 0x800 (IPv4)
>>>>>>>>> +     *              or sets the field MFF_IPV6_SRC if
>>>>>>>>> +     *              eth.type == 0x86dd (IPV6).
>>>>>>>>> +     */
>>>>>>>>> +    OVN_IP_SRC,
>>>>>>>>> +
>>>>>>>>> +    /*
>>>>>>>>> +     * Name: "ip.dst"
>>>>>>>>> +     * Type: be128
>>>>>>>>> +     * Description: Sets the field MFF_IPV4_DST if eth.type == 0x800 (IPv4)
>>>>>>>>> +     *              or sets the field MFF_IPV6_DST if
>>>>>>>>> +     *              eth.type == 0x86dd (IPV6).
>>>>>>>>> +     */
>>>>>>>>> +    OVN_IP_DST,
>>>>>>>>> +
>>>>>>>>>       OVN_FIELD_N_IDS
>>>>>>>>>   };
>>>>>>>>>
>>>>>>>>> diff --git a/lib/actions.c b/lib/actions.c
>>>>>>>>> index 1e1bdeff24..e9c77d2a0a 100644
>>>>>>>>> --- a/lib/actions.c
>>>>>>>>> +++ b/lib/actions.c
>>>>>>>>> @@ -371,12 +371,35 @@ ovnact_next_free(struct ovnact_next *a OVS_UNUSED)
>>>>>>>>>   {
>>>>>>>>>   }
>>>>>>>>>
>>>>>>>>> +static bool
>>>>>>>>> +check_ovnfield_load(struct action_context *ctx, const struct expr_field *field)
>>>>>>>>> +{
>>>>>>>>> +    switch (field->symbol->ovn_field->id) {
>>>>>>>>> +    case OVN_ICMP4_FRAG_MTU:
>>>>>>>>> +    case OVN_ICMP6_FRAG_MTU:
>>>>>>>>> +        return true;
>>>>>>>>> +
>>>>>>>>> +    case OVN_IP_SRC:
>>>>>>>>> +    case OVN_IP_DST:
>>>>>>>>> +        lexer_error(ctx->lexer, "Can't load a value to ovn field (%s).",
>>>>>>>>> +                    field->symbol->name);
>>>>>>>>> +        return false;
>>>>>>>>> +
>>>>>>>>> +    case OVN_FIELD_N_IDS:
>>>>>>>>> +    default:
>>>>>>>>> +        OVS_NOT_REACHED();
>>>>>>>>> +    }
>>>>>>>>> +}
>>>>>>>>> +
>>>>>>>>>   static void
>>>>>>>>>   parse_LOAD(struct action_context *ctx, const struct expr_field *lhs)
>>>>>>>>>   {
>>>>>>>>>       size_t ofs = ctx->ovnacts->size;
>>>>>>>>>       struct ovnact_load *load;
>>>>>>>>>       if (lhs->symbol->ovn_field) {
>>>>>>>>> +        if (!check_ovnfield_load(ctx, lhs)) {
>>>>>>>>> +            return;
>>>>>>>>> +        }
>>>>>>>>>           load = ovnact_put_OVNFIELD_LOAD(ctx->ovnacts);
>>>>>>>>>       } else {
>>>>>>>>>           load = ovnact_put_LOAD(ctx->ovnacts);
>>>>>>>>> @@ -474,6 +497,46 @@ format_EXCHANGE(const struct ovnact_move *move, struct ds *s)
>>>>>>>>>       format_assignment(move, "<->", s);
>>>>>>>>>   }
>>>>>>>>>
>>>>>>>>> +static void
>>>>>>>>> +parse_ovnfield_exchange(struct action_context *ctx,
>>>>>>>>> +                        const struct expr_field *lhs,
>>>>>>>>> +                        const struct expr_field *rhs)
>>>>>>>>> +{
>>>>>>>>> +    if (!lhs->symbol->ovn_field || !rhs->symbol->ovn_field) {
>>>>>>>>> +        lexer_error(ctx->lexer,
>>>>>>>>> +                    "Can't exchange ovn field with non ovn field (%s <-> %s).",
>>>>>>>>> +                    lhs->symbol->name, rhs->symbol->name);
>>>>>>>>> +        return;
>>>>>>>>> +    }
>>>>>>>>> +
>>>>>>>>> +    if (lhs->symbol->ovn_field->id != OVN_IP_SRC &&
>>>>>>>>> +            lhs->symbol->ovn_field->id != OVN_IP_DST) {
>>>>>>>>> +        lexer_error(ctx->lexer,
>>>>>>>>> +                    "Can't exchange ovn field (%s) with ovn field (%s).",
>>>>>>>>> +                    lhs->symbol->name, rhs->symbol->name);
>>>>>>>>> +        return;
>>>>>>>>> +    }
>>>>>>>>> +
>>>>>>>>> +    if (rhs->symbol->ovn_field->id != OVN_IP_SRC &&
>>>>>>>>> +            rhs->symbol->ovn_field->id != OVN_IP_DST) {
>>>>>>>>> +        lexer_error(ctx->lexer,
>>>>>>>>> +                    "Can't exchange ovn field (%s) with ovn field (%s).",
>>>>>>>>> +                    lhs->symbol->name, rhs->symbol->name);
>>>>>>>>> +        return;
>>>>>>>>> +    }
>>>>>>>>> +
>>>>>>>>> +    if (lhs->symbol->ovn_field->id == rhs->symbol->ovn_field->id) {
>>>>>>>>> +        lexer_error(ctx->lexer,
>>>>>>>>> +                    "Can't exchange ovn field (%s) with ovn field (%s).",
>>>>>>>>> +                    lhs->symbol->name, rhs->symbol->name);
>>>>>>>>> +        return;
>>>>>>>>> +    }
>>>>>>>>> +
>>>>>>>>> +    struct ovnact_move *move = ovnact_put_OVNFIELD_EXCHANGE(ctx->ovnacts);
>>>>>>>>> +    move->lhs = *lhs;
>>>>>>>>> +    move->rhs = *rhs;
>>>>>>>>> +}
>>>>>>>>> +
>>>>>>>>>   static void
>>>>>>>>>   parse_assignment_action(struct action_context *ctx, bool exchange,
>>>>>>>>>                           const struct expr_field *lhs)
>>>>>>>>> @@ -483,6 +546,11 @@ parse_assignment_action(struct action_context *ctx, bool exchange,
>>>>>>>>>           return;
>>>>>>>>>       }
>>>>>>>>>
>>>>>>>>> +    if (exchange && (lhs->symbol->ovn_field || rhs.symbol->ovn_field)) {
>>>>>>>>> +        parse_ovnfield_exchange(ctx, lhs, &rhs);
>>>>>>>>> +        return;
>>>>>>>>> +    }
>>>>>>>>> +
>>>>>>>>>       const struct expr_symbol *ls = lhs->symbol;
>>>>>>>>>       const struct expr_symbol *rs = rhs.symbol;
>>>>>>>>>       if ((ls->width != 0) != (rs->width != 0)) {
>>>>>>>>> @@ -3128,6 +3196,8 @@ format_OVNFIELD_LOAD(const struct ovnact_load *load , struct ds *s)
>>>>>>>>>                         ntohs(load->imm.value.be16_int));
>>>>>>>>>           break;
>>>>>>>>>
>>>>>>>>> +    case OVN_IP_SRC:
>>>>>>>>> +    case OVN_IP_DST:
>>>>>>>>>       case OVN_FIELD_N_IDS:
>>>>>>>>>       default:
>>>>>>>>>           OVS_NOT_REACHED();
>>>>>>>>> @@ -3157,6 +3227,9 @@ encode_OVNFIELD_LOAD(const struct ovnact_load *load,
>>>>>>>>>           encode_finish_controller_op(oc_offset, ofpacts);
>>>>>>>>>           break;
>>>>>>>>>       }
>>>>>>>>> +
>>>>>>>>> +    case OVN_IP_SRC:
>>>>>>>>> +    case OVN_IP_DST:
>>>>>>>>>       case OVN_FIELD_N_IDS:
>>>>>>>>>       default:
>>>>>>>>>           OVS_NOT_REACHED();
>>>>>>>>> @@ -3451,6 +3524,51 @@ ovnact_fwd_group_free(struct ovnact_fwd_group *fwd_group)
>>>>>>>>>       free(fwd_group->child_ports);
>>>>>>>>>   }
>>>>>>>>>
>>>>>>>>> +static void
>>>>>>>>> +format_OVNFIELD_EXCHANGE(const struct ovnact_move *move , struct ds *s)
>>>>>>>>> +{
>>>>>>>>> +    const struct ovn_field *lhs = ovn_field_from_name(move->lhs.symbol->name);
>>>>>>>>> +    const struct ovn_field *rhs = ovn_field_from_name(move->rhs.symbol->name);
>>>>>>>>> +    switch (lhs->id) {
>>>>>>>>> +    case OVN_IP_SRC:
>>>>>>>>> +    case OVN_IP_DST:
>>>>>>>>> +        break;
>>>>>>>>> +
>>>>>>>>> +    case OVN_ICMP4_FRAG_MTU:
>>>>>>>>> +    case OVN_ICMP6_FRAG_MTU:
>>>>>>>>> +    case OVN_FIELD_N_IDS:
>>>>>>>>> +    default:
>>>>>>>>> +        OVS_NOT_REACHED();
>>>>>>>>> +    }
>>>>>>>>
>>>>>>>> Nit: I would use the shorter:
>>>>>>>> ovs_asssert(lhs->id == OVN_IP_SRC || lhs->id == OVN_IP_DST);
>>>>>>>
>>>>>>> You mean to drop the switch right ? If so, Ack.
>>>>>>>
>>>>>>
>>>>>> Right, I meant ovs_assert instead of switch.
>>>>>>
>>>>>>>
>>>>>>>>
>>>>>>>>> +
>>>>>>>>> +    switch (rhs->id) {
>>>>>>>>> +    case OVN_IP_SRC:
>>>>>>>>> +    case OVN_IP_DST:
>>>>>>>>> +        break;
>>>>>>>>> +
>>>>>>>>> +    case OVN_ICMP4_FRAG_MTU:
>>>>>>>>> +    case OVN_ICMP6_FRAG_MTU:
>>>>>>>>> +    case OVN_FIELD_N_IDS:
>>>>>>>>> +    default:
>>>>>>>>> +        OVS_NOT_REACHED();
>>>>>>>>> +    }
>>>>>>>>
>>>>>>>> Nit: I would use the shorter:
>>>>>>>> ovs_asssert(rhs->id == OVN_IP_SRC || rhs->id == OVN_IP_DST);
>>>>>>>>
>>>>>>>>> +
>>>>>>>>> +    ds_put_format(s, "%s <-> %s;", lhs->name, rhs->name);
>>>>>>>>> +}
>>>>>>>>> +
>>>>>>>>> +static void
>>>>>>>>> +encode_OVNFIELD_EXCHANGE(const struct ovnact_move *move OVS_UNUSED,
>>>>>>>>> +                         const struct ovnact_encode_params *ep OVS_UNUSED,
>>>>>>>>> +                         struct ofpbuf *ofpacts OVS_UNUSED)
>>>>>>>>> +{
>>>>>>>>> +    /* Right now we only support exchanging the IPs in
>>>>>>>>> +     * OVN fields (ip.src <-> ip.dst). */
>>>>>>>>> +    size_t oc_offset =
>>>>>>>>> +        encode_start_controller_op(ACTION_OPCODE_SWAP_SRC_DST_IP,
>>>>>>>>> +                                   true, NX_CTLR_NO_METER, ofpacts);
>>>>>>>>> +    encode_finish_controller_op(oc_offset, ofpacts);
>>>>>>>>
>>>>>>>> I think this means that all packets matching a flow with action "reject { ...
>>>>>>>> ip.src <-> ip.dst ... }" will generate two packet-ins, right? One for the
>>>>>>>> reject action, one for the ovnfield-exchange action.
>>>>>>>>
>>>>>>>> Is that a concern wrt. performance?
>>>>>>>
>>>>>>> Yes. It would have 2 packet-ins. Just like how we handle - icmp4 {...
>>>>>>> icmp4.frag_mtu = 1500; ...};
>>>>>>>
>>>>>>> If performance is a concern, we could just drop ... ip.src <-> ip.dst
>>>>>>> in reject and let reject action
>>>>>>> swap the ip src with ip destination. I thought about that and my take
>>>>>>> is that since reject { } results in
>>>>>>> TCP RST or ICMP unreachable packet, it should be fine. I see these
>>>>>>> packets as more of control packets.
>>>>>>>
>>>>>>> I think it should be ok. Let me know what you think.
>>>>>>>
>>>>>>
>>>>>> I think I like the approach of "reject" action implicitly swapping IPs best.
>>>>>> In the end it's not like we could implement reject withough swapping IP dst
>>>>>> with IP src so why not do it as part of the reject action.
>>>>>>
>>>>>
>>>>> Agree. reject action is expected to be used to send the generated tcp
>>>>> rst/icmp unreachable
>>>>> packet back to the sender.
>>>>>
>>>>> I'm ok to change the reject action to just swap the IPs internally if
>>>>> everyone is fine.
>>>>>
>>>>> @Mark Michelson  @Han Zhou  Do you have any comments here ?
>>>>>
>>>>> Option 1 - reject { ...}    -> The reject action handling in pinctrl.c
>>>>> will swap the ip source and destination
>>>>>
>>>>> Option 2: reject {... ip.src <-> ip.dst ...} which is the proposed
>>>>> approach in this patch series. This results in
>>>>> 2 packet-ins.
>>>>>
>>>>> I personally prefer (2) since in OVN we normally tell what inner
>>>>> actions to apply for many OVN actions.
>>>>> But I have no strong preference and I'm fine changing to (1).
>>>>>
>>>>> If we chose option (1), we could add a comment inside the action like
>>>>> - reject { /* ip.src <-> ip.dst is done implicitly*/,  eth.src <->
>>>>> eth.dst; output ; }
>>>>>
>>>>
>>>> Then, why not swap ETH addresses implicitly too?
>>>
>>> As I mentioned above, I'd prefer if ovn-northd says what actions to
>>> apply for the reject
>>> packet. Just like for icmp4 or arp actions, ovn-northd says what
>>> actions to apply for the
>>> transformed packet from the source packet.
>>>
>>
>> I was thinking that if we go this way when we build the packet in
>> ovn-controller it might look weird to a reader of the pinctrl code to see
>> something like:
>>
>> /* We only swap IPs because ETH addresses are swapped in OVS. */
>> pinctrl_compose_ipv4(&packet, ip_flow->dl_src, ip_flow->dl_dst,
>>                       ip_flow->nw_dst, ip_flow->nw_src, ...)
> 
> Right. I agree. It makes sense to swap both eth and ip if we go this
> way. Sorry for the confusion.
> 
> Thanks
> Numan

I've been reading through this, thinking about it, and have changed my 
mind on the matter probably 3 or 4 times while trying to write a response.

In short, I think that the savings in flow creation introduced by this 
patch series are brilliant, but I'm hesitant about the second packet-in. 
I have a feeling this could be more costly than the frag_mtu examples 
that Numan pointed out earlier in the thread. I think that the savings 
in flow creation should make us consider forging new ground with regards 
to what happens implicitly during a controller action. I think we have 
the facilities to get the point across to users what is going on even if 
there are no explicit exchange actions in the generated logical flow or 
OF flow.

I thought through this to try to figure out if there's a different way 
forward that both allows us to reduce the number of flows and still be 
explicit with the actions being performed. Unfortunately, the only ways 
I could think of required either parsing ACL matches in ovn-northd or 
generating just as many flows as before.

In conclusion, I'm fine with the reject action implicitly swapping 
values, but it needs to be well documented.

> 
>>
>> Regards,
>> Dumitru
>>
>> _______________________________________________
>> dev mailing list
>> dev@openvswitch.org
>> https://mail.openvswitch.org/mailman/listinfo/ovs-dev
>>
> _______________________________________________
> dev mailing list
> dev@openvswitch.org
> https://mail.openvswitch.org/mailman/listinfo/ovs-dev
>
Han Zhou Oct. 16, 2020, 6:23 p.m. UTC | #10
On Thu, Oct 15, 2020 at 1:38 PM Mark Michelson <mmichels@redhat.com> wrote:
>
> On 10/14/20 8:38 AM, Numan Siddique wrote:
> > On Wed, Oct 14, 2020 at 5:51 PM Dumitru Ceara <dceara@redhat.com> wrote:
> >>
> >> On 10/14/20 2:13 PM, Numan Siddique wrote:
> >>> On Wed, Oct 14, 2020 at 5:34 PM Dumitru Ceara <dceara@redhat.com>
wrote:
> >>>>
> >>>> On 10/14/20 1:54 PM, Numan Siddique wrote:
> >>>>> On Wed, Oct 14, 2020 at 4:40 PM Dumitru Ceara <dceara@redhat.com>
wrote:
> >>>>>>
> >>>>>> On 10/14/20 12:50 PM, Numan Siddique wrote:
> >>>>>>> On Wed, Oct 14, 2020 at 3:50 PM Dumitru Ceara <dceara@redhat.com>
wrote:
> >>>>>>>>
> >>>>>>>> On 10/14/20 11:15 AM, numans@ovn.org wrote:
> >>>>>>>>> From: Numan Siddique <numans@ovn.org>
> >>>>>>>>>
> >>>>>>>>> Thee new fields are version independent and these can be used
in any
> >>>>>>>>
> >>>>>>>> Hi Numan,
> >>>>>>>>
> >>>>>>>> Nit: s/Thee/These
> >>>>>>>>
> >>>>>>>>> OVN action. Right now the usage of these fields are restricted
to
> >>>>>>>>> exchanging the IP source and destination fields.
> >>>>>>>>>
> >>>>>>>>> Eg. reject {... ip.src <-> ip.dst ... };
> >>>>>>>>>
> >>>>>>>>> "ip.src <-> ip.dst" translates to controller action with
"pause" flag set.
> >>>>>>>>> When pinctrl thread receives the packet in, it checks the IP
version of the
> >>>>>>>>> packet and either does - "ip4.src <-> ip4.dst" or "ip6.src <->
ip6.dst" and
> >>>>>>>>> resumes the packet to continue with the pipeline.
> >>>>>>>>>
> >>>>>>>>> Upcoming patch will make use of these fields.
> >>>>>>>>>
> >>>>>>>>> Signed-off-by: Numan Siddique <numans@ovn.org>
> >>>>>>>>> ---
> >>>>>>>>>   controller/pinctrl.c         |  49 +++++++++++++++
> >>>>>>>>>   include/ovn/actions.h        |   4 ++
> >>>>>>>>>   include/ovn/logical-fields.h |  18 ++++++
> >>>>>>>>>   lib/actions.c                | 118
+++++++++++++++++++++++++++++++++++
> >>>>>>>>>   lib/logical-fields.c         |  10 +++
> >>>>>>>>>   ovn-sb.xml                   |  37 +++++++++++
> >>>>>>>>>   tests/ovn.at                 |  27 ++++++++
> >>>>>>>>>   utilities/ovn-trace.c        |  28 +++++++++
> >>>>>>>>>   8 files changed, 291 insertions(+)
> >>>>>>>>>
> >>>>>>>>> diff --git a/controller/pinctrl.c b/controller/pinctrl.c
> >>>>>>>>> index 631d058458..bc16a82404 100644
> >>>>>>>>> --- a/controller/pinctrl.c
> >>>>>>>>> +++ b/controller/pinctrl.c
> >>>>>>>>> @@ -233,6 +233,11 @@ static void
pinctrl_handle_put_icmp_frag_mtu(struct rconn *swconn,
> >>>>>>>>>                                                struct
ofputil_packet_in *pin,
> >>>>>>>>>                                                struct ofpbuf
*userdata,
> >>>>>>>>>                                                struct ofpbuf
*continuation);
> >>>>>>>>> +static void pinctrl_handle_swap_src_dst_ip(struct rconn
*swconn,
> >>>>>>>>> +                                           const struct flow
*in_flow,
> >>>>>>>>> +                                           struct dp_packet
*pkt_in,
> >>>>>>>>> +                                           struct
ofputil_packet_in *pin,
> >>>>>>>>> +                                           struct ofpbuf
*continuation);
> >>>>>>>>>   static void
> >>>>>>>>>   pinctrl_handle_event(struct ofpbuf *userdata)
> >>>>>>>>>       OVS_REQUIRES(pinctrl_mutex);
> >>>>>>>>> @@ -2835,6 +2840,11 @@ process_packet_in(struct rconn *swconn,
const struct ofp_header *msg)
> >>>>>>>>>           ovs_mutex_unlock(&pinctrl_mutex);
> >>>>>>>>>           break;
> >>>>>>>>>
> >>>>>>>>> +    case ACTION_OPCODE_SWAP_SRC_DST_IP:
> >>>>>>>>> +        pinctrl_handle_swap_src_dst_ip(swconn, &headers,
&packet, &pin,
> >>>>>>>>> +                                       &continuation);
> >>>>>>>>> +        break;
> >>>>>>>>> +
> >>>>>>>>>       default:
> >>>>>>>>>           VLOG_WARN_RL(&rl, "unrecognized packet-in opcode
%"PRIu32,
> >>>>>>>>>                        ntohl(ah->opcode));
> >>>>>>>>> @@ -6508,3 +6518,42 @@ pinctrl_handle_svc_check(struct rconn
*swconn, const struct flow *ip_flow,
> >>>>>>>>>           svc_mon->next_send_time = time_msec() +
svc_mon->interval;
> >>>>>>>>>       }
> >>>>>>>>>   }
> >>>>>>>>> +
> >>>>>>>>> +static void
> >>>>>>>>> +pinctrl_handle_swap_src_dst_ip(struct rconn *swconn,
> >>>>>>>>> +                               const struct flow *in_flow,
> >>>>>>>>> +                               struct dp_packet *pkt_in,
> >>>>>>>>> +                               struct ofputil_packet_in *pin,
> >>>>>>>>> +                               struct ofpbuf *continuation)
> >>>>>>>>> +{
> >>>>>>>>> +    enum ofp_version version = rconn_get_version(swconn);
> >>>>>>>>> +    enum ofputil_protocol proto =
ofputil_protocol_from_ofp_version(version);
> >>>>>>>>> +    struct dp_packet *pkt_out;
> >>>>>>>>> +
> >>>>>>>>> +    pkt_out = dp_packet_clone(pkt_in);
> >>>>>>>>> +    pkt_out->l2_5_ofs = pkt_in->l2_5_ofs;
> >>>>>>>>> +    pkt_out->l2_pad_size = pkt_in->l2_pad_size;
> >>>>>>>>> +    pkt_out->l3_ofs = pkt_in->l3_ofs;
> >>>>>>>>> +    pkt_out->l4_ofs = pkt_in->l4_ofs;
> >>>>>>>>> +
> >>>>>>>>> +    if (get_dl_type(in_flow) == htons(ETH_TYPE_IP)) {
> >>>>>>>>> +        /* IPv4 packet. Swap nw_src with nw_dst. */
> >>>>>>>>> +        packet_set_ipv4(pkt_out, in_flow->nw_dst,
in_flow->nw_src,
> >>>>>>>>> +                        in_flow->nw_tos, in_flow->nw_ttl);
> >>>>>>>>> +    } else {
> >>>>>>>>> +        /* IPv6 packet. Swap ip6_src with ip6_dst.
> >>>>>>>>> +         * We could also use packet_set_ipv6() here, but that
would require
> >>>>>>>>> +         * to extract the 'tc' and 'label' from
in_nh->ip6_flow which seems
> >>>>>>>>> +         * unnecessary. */
> >>>>>>>>> +        struct ovs_16aligned_ip6_hdr *out_nh =
dp_packet_l3(pkt_out);
> >>>>>>>>> +        union ovs_16aligned_in6_addr tmp = out_nh->ip6_src;
> >>>>>>>>> +        out_nh->ip6_src = out_nh->ip6_dst;
> >>>>>>>>> +        out_nh->ip6_dst = tmp;
> >>>>>>>>> +    }
> >>>>>>>>> +
> >>>>>>>>> +    pin->packet = dp_packet_data(pkt_out);
> >>>>>>>>> +    pin->packet_len = dp_packet_size(pkt_out);
> >>>>>>>>> +
> >>>>>>>>> +    queue_msg(swconn, ofputil_encode_resume(pin, continuation,
proto));
> >>>>>>>>> +    dp_packet_delete(pkt_out);
> >>>>>>>>> +}
> >>>>>>>>> diff --git a/include/ovn/actions.h b/include/ovn/actions.h
> >>>>>>>>> index b4e5acabb9..bf1fe848b7 100644
> >>>>>>>>> --- a/include/ovn/actions.h
> >>>>>>>>> +++ b/include/ovn/actions.h
> >>>>>>>>> @@ -97,6 +97,7 @@ struct ovn_extend_table;
> >>>>>>>>>       OVNACT(DHCP6_REPLY,       ovnact_null)            \
> >>>>>>>>>       OVNACT(ICMP6_ERROR,       ovnact_nest)            \
> >>>>>>>>>       OVNACT(REJECT,            ovnact_nest)            \
> >>>>>>>>> +    OVNACT(OVNFIELD_EXCHANGE, ovnact_move)            \
> >>>>>>>>>
> >>>>>>>>>   /* enum ovnact_type, with a member OVNACT_<ENUM> for each
action. */
> >>>>>>>>>   enum OVS_PACKED_ENUM ovnact_type {
> >>>>>>>>> @@ -612,6 +613,9 @@ enum action_opcode {
> >>>>>>>>>        * The actions, in OpenFlow 1.3 format, follow the
action_header.
> >>>>>>>>>        */
> >>>>>>>>>       ACTION_OPCODE_REJECT,
> >>>>>>>>> +
> >>>>>>>>> +    /* ip.src <-> ip.dst */
> >>>>>>>>> +    ACTION_OPCODE_SWAP_SRC_DST_IP,
> >>>>>>>>>   };
> >>>>>>>>>
> >>>>>>>>>   /* Header. */
> >>>>>>>>> diff --git a/include/ovn/logical-fields.h
b/include/ovn/logical-fields.h
> >>>>>>>>> index ac6f2f909b..bb6daa50ca 100644
> >>>>>>>>> --- a/include/ovn/logical-fields.h
> >>>>>>>>> +++ b/include/ovn/logical-fields.h
> >>>>>>>>> @@ -116,6 +116,24 @@ enum ovn_field_id {
> >>>>>>>>>        */
> >>>>>>>>>       OVN_ICMP6_FRAG_MTU,
> >>>>>>>>>
> >>>>>>>>> +    /*
> >>>>>>>>> +     * Name: "ip.src"
> >>>>>>>>> +     * Type: be128
> >>>>>>>>> +     * Description: Sets the field MFF_IPV4_SRC if eth.type ==
0x800 (IPv4)
> >>>>>>>>> +     *              or sets the field MFF_IPV6_SRC if
> >>>>>>>>> +     *              eth.type == 0x86dd (IPV6).
> >>>>>>>>> +     */
> >>>>>>>>> +    OVN_IP_SRC,
> >>>>>>>>> +
> >>>>>>>>> +    /*
> >>>>>>>>> +     * Name: "ip.dst"
> >>>>>>>>> +     * Type: be128
> >>>>>>>>> +     * Description: Sets the field MFF_IPV4_DST if eth.type ==
0x800 (IPv4)
> >>>>>>>>> +     *              or sets the field MFF_IPV6_DST if
> >>>>>>>>> +     *              eth.type == 0x86dd (IPV6).
> >>>>>>>>> +     */
> >>>>>>>>> +    OVN_IP_DST,
> >>>>>>>>> +
> >>>>>>>>>       OVN_FIELD_N_IDS
> >>>>>>>>>   };
> >>>>>>>>>
> >>>>>>>>> diff --git a/lib/actions.c b/lib/actions.c
> >>>>>>>>> index 1e1bdeff24..e9c77d2a0a 100644
> >>>>>>>>> --- a/lib/actions.c
> >>>>>>>>> +++ b/lib/actions.c
> >>>>>>>>> @@ -371,12 +371,35 @@ ovnact_next_free(struct ovnact_next *a
OVS_UNUSED)
> >>>>>>>>>   {
> >>>>>>>>>   }
> >>>>>>>>>
> >>>>>>>>> +static bool
> >>>>>>>>> +check_ovnfield_load(struct action_context *ctx, const struct
expr_field *field)
> >>>>>>>>> +{
> >>>>>>>>> +    switch (field->symbol->ovn_field->id) {
> >>>>>>>>> +    case OVN_ICMP4_FRAG_MTU:
> >>>>>>>>> +    case OVN_ICMP6_FRAG_MTU:
> >>>>>>>>> +        return true;
> >>>>>>>>> +
> >>>>>>>>> +    case OVN_IP_SRC:
> >>>>>>>>> +    case OVN_IP_DST:
> >>>>>>>>> +        lexer_error(ctx->lexer, "Can't load a value to ovn
field (%s).",
> >>>>>>>>> +                    field->symbol->name);
> >>>>>>>>> +        return false;
> >>>>>>>>> +
> >>>>>>>>> +    case OVN_FIELD_N_IDS:
> >>>>>>>>> +    default:
> >>>>>>>>> +        OVS_NOT_REACHED();
> >>>>>>>>> +    }
> >>>>>>>>> +}
> >>>>>>>>> +
> >>>>>>>>>   static void
> >>>>>>>>>   parse_LOAD(struct action_context *ctx, const struct
expr_field *lhs)
> >>>>>>>>>   {
> >>>>>>>>>       size_t ofs = ctx->ovnacts->size;
> >>>>>>>>>       struct ovnact_load *load;
> >>>>>>>>>       if (lhs->symbol->ovn_field) {
> >>>>>>>>> +        if (!check_ovnfield_load(ctx, lhs)) {
> >>>>>>>>> +            return;
> >>>>>>>>> +        }
> >>>>>>>>>           load = ovnact_put_OVNFIELD_LOAD(ctx->ovnacts);
> >>>>>>>>>       } else {
> >>>>>>>>>           load = ovnact_put_LOAD(ctx->ovnacts);
> >>>>>>>>> @@ -474,6 +497,46 @@ format_EXCHANGE(const struct ovnact_move
*move, struct ds *s)
> >>>>>>>>>       format_assignment(move, "<->", s);
> >>>>>>>>>   }
> >>>>>>>>>
> >>>>>>>>> +static void
> >>>>>>>>> +parse_ovnfield_exchange(struct action_context *ctx,
> >>>>>>>>> +                        const struct expr_field *lhs,
> >>>>>>>>> +                        const struct expr_field *rhs)
> >>>>>>>>> +{
> >>>>>>>>> +    if (!lhs->symbol->ovn_field || !rhs->symbol->ovn_field) {
> >>>>>>>>> +        lexer_error(ctx->lexer,
> >>>>>>>>> +                    "Can't exchange ovn field with non ovn
field (%s <-> %s).",
> >>>>>>>>> +                    lhs->symbol->name, rhs->symbol->name);
> >>>>>>>>> +        return;
> >>>>>>>>> +    }
> >>>>>>>>> +
> >>>>>>>>> +    if (lhs->symbol->ovn_field->id != OVN_IP_SRC &&
> >>>>>>>>> +            lhs->symbol->ovn_field->id != OVN_IP_DST) {
> >>>>>>>>> +        lexer_error(ctx->lexer,
> >>>>>>>>> +                    "Can't exchange ovn field (%s) with ovn
field (%s).",
> >>>>>>>>> +                    lhs->symbol->name, rhs->symbol->name);
> >>>>>>>>> +        return;
> >>>>>>>>> +    }
> >>>>>>>>> +
> >>>>>>>>> +    if (rhs->symbol->ovn_field->id != OVN_IP_SRC &&
> >>>>>>>>> +            rhs->symbol->ovn_field->id != OVN_IP_DST) {
> >>>>>>>>> +        lexer_error(ctx->lexer,
> >>>>>>>>> +                    "Can't exchange ovn field (%s) with ovn
field (%s).",
> >>>>>>>>> +                    lhs->symbol->name, rhs->symbol->name);
> >>>>>>>>> +        return;
> >>>>>>>>> +    }
> >>>>>>>>> +
> >>>>>>>>> +    if (lhs->symbol->ovn_field->id ==
rhs->symbol->ovn_field->id) {
> >>>>>>>>> +        lexer_error(ctx->lexer,
> >>>>>>>>> +                    "Can't exchange ovn field (%s) with ovn
field (%s).",
> >>>>>>>>> +                    lhs->symbol->name, rhs->symbol->name);
> >>>>>>>>> +        return;
> >>>>>>>>> +    }
> >>>>>>>>> +
> >>>>>>>>> +    struct ovnact_move *move =
ovnact_put_OVNFIELD_EXCHANGE(ctx->ovnacts);
> >>>>>>>>> +    move->lhs = *lhs;
> >>>>>>>>> +    move->rhs = *rhs;
> >>>>>>>>> +}
> >>>>>>>>> +
> >>>>>>>>>   static void
> >>>>>>>>>   parse_assignment_action(struct action_context *ctx, bool
exchange,
> >>>>>>>>>                           const struct expr_field *lhs)
> >>>>>>>>> @@ -483,6 +546,11 @@ parse_assignment_action(struct
action_context *ctx, bool exchange,
> >>>>>>>>>           return;
> >>>>>>>>>       }
> >>>>>>>>>
> >>>>>>>>> +    if (exchange && (lhs->symbol->ovn_field ||
rhs.symbol->ovn_field)) {
> >>>>>>>>> +        parse_ovnfield_exchange(ctx, lhs, &rhs);
> >>>>>>>>> +        return;
> >>>>>>>>> +    }
> >>>>>>>>> +
> >>>>>>>>>       const struct expr_symbol *ls = lhs->symbol;
> >>>>>>>>>       const struct expr_symbol *rs = rhs.symbol;
> >>>>>>>>>       if ((ls->width != 0) != (rs->width != 0)) {
> >>>>>>>>> @@ -3128,6 +3196,8 @@ format_OVNFIELD_LOAD(const struct
ovnact_load *load , struct ds *s)
> >>>>>>>>>                         ntohs(load->imm.value.be16_int));
> >>>>>>>>>           break;
> >>>>>>>>>
> >>>>>>>>> +    case OVN_IP_SRC:
> >>>>>>>>> +    case OVN_IP_DST:
> >>>>>>>>>       case OVN_FIELD_N_IDS:
> >>>>>>>>>       default:
> >>>>>>>>>           OVS_NOT_REACHED();
> >>>>>>>>> @@ -3157,6 +3227,9 @@ encode_OVNFIELD_LOAD(const struct
ovnact_load *load,
> >>>>>>>>>           encode_finish_controller_op(oc_offset, ofpacts);
> >>>>>>>>>           break;
> >>>>>>>>>       }
> >>>>>>>>> +
> >>>>>>>>> +    case OVN_IP_SRC:
> >>>>>>>>> +    case OVN_IP_DST:
> >>>>>>>>>       case OVN_FIELD_N_IDS:
> >>>>>>>>>       default:
> >>>>>>>>>           OVS_NOT_REACHED();
> >>>>>>>>> @@ -3451,6 +3524,51 @@ ovnact_fwd_group_free(struct
ovnact_fwd_group *fwd_group)
> >>>>>>>>>       free(fwd_group->child_ports);
> >>>>>>>>>   }
> >>>>>>>>>
> >>>>>>>>> +static void
> >>>>>>>>> +format_OVNFIELD_EXCHANGE(const struct ovnact_move *move ,
struct ds *s)
> >>>>>>>>> +{
> >>>>>>>>> +    const struct ovn_field *lhs =
ovn_field_from_name(move->lhs.symbol->name);
> >>>>>>>>> +    const struct ovn_field *rhs =
ovn_field_from_name(move->rhs.symbol->name);
> >>>>>>>>> +    switch (lhs->id) {
> >>>>>>>>> +    case OVN_IP_SRC:
> >>>>>>>>> +    case OVN_IP_DST:
> >>>>>>>>> +        break;
> >>>>>>>>> +
> >>>>>>>>> +    case OVN_ICMP4_FRAG_MTU:
> >>>>>>>>> +    case OVN_ICMP6_FRAG_MTU:
> >>>>>>>>> +    case OVN_FIELD_N_IDS:
> >>>>>>>>> +    default:
> >>>>>>>>> +        OVS_NOT_REACHED();
> >>>>>>>>> +    }
> >>>>>>>>
> >>>>>>>> Nit: I would use the shorter:
> >>>>>>>> ovs_asssert(lhs->id == OVN_IP_SRC || lhs->id == OVN_IP_DST);
> >>>>>>>
> >>>>>>> You mean to drop the switch right ? If so, Ack.
> >>>>>>>
> >>>>>>
> >>>>>> Right, I meant ovs_assert instead of switch.
> >>>>>>
> >>>>>>>
> >>>>>>>>
> >>>>>>>>> +
> >>>>>>>>> +    switch (rhs->id) {
> >>>>>>>>> +    case OVN_IP_SRC:
> >>>>>>>>> +    case OVN_IP_DST:
> >>>>>>>>> +        break;
> >>>>>>>>> +
> >>>>>>>>> +    case OVN_ICMP4_FRAG_MTU:
> >>>>>>>>> +    case OVN_ICMP6_FRAG_MTU:
> >>>>>>>>> +    case OVN_FIELD_N_IDS:
> >>>>>>>>> +    default:
> >>>>>>>>> +        OVS_NOT_REACHED();
> >>>>>>>>> +    }
> >>>>>>>>
> >>>>>>>> Nit: I would use the shorter:
> >>>>>>>> ovs_asssert(rhs->id == OVN_IP_SRC || rhs->id == OVN_IP_DST);
> >>>>>>>>
> >>>>>>>>> +
> >>>>>>>>> +    ds_put_format(s, "%s <-> %s;", lhs->name, rhs->name);
> >>>>>>>>> +}
> >>>>>>>>> +
> >>>>>>>>> +static void
> >>>>>>>>> +encode_OVNFIELD_EXCHANGE(const struct ovnact_move *move
OVS_UNUSED,
> >>>>>>>>> +                         const struct ovnact_encode_params *ep
OVS_UNUSED,
> >>>>>>>>> +                         struct ofpbuf *ofpacts OVS_UNUSED)
> >>>>>>>>> +{
> >>>>>>>>> +    /* Right now we only support exchanging the IPs in
> >>>>>>>>> +     * OVN fields (ip.src <-> ip.dst). */
> >>>>>>>>> +    size_t oc_offset =
> >>>>>>>>> +
 encode_start_controller_op(ACTION_OPCODE_SWAP_SRC_DST_IP,
> >>>>>>>>> +                                   true, NX_CTLR_NO_METER,
ofpacts);
> >>>>>>>>> +    encode_finish_controller_op(oc_offset, ofpacts);
> >>>>>>>>
> >>>>>>>> I think this means that all packets matching a flow with action
"reject { ...
> >>>>>>>> ip.src <-> ip.dst ... }" will generate two packet-ins, right?
One for the
> >>>>>>>> reject action, one for the ovnfield-exchange action.
> >>>>>>>>
> >>>>>>>> Is that a concern wrt. performance?
> >>>>>>>
> >>>>>>> Yes. It would have 2 packet-ins. Just like how we handle - icmp4
{...
> >>>>>>> icmp4.frag_mtu = 1500; ...};
> >>>>>>>
> >>>>>>> If performance is a concern, we could just drop ... ip.src <->
ip.dst
> >>>>>>> in reject and let reject action
> >>>>>>> swap the ip src with ip destination. I thought about that and my
take
> >>>>>>> is that since reject { } results in
> >>>>>>> TCP RST or ICMP unreachable packet, it should be fine. I see these
> >>>>>>> packets as more of control packets.
> >>>>>>>
> >>>>>>> I think it should be ok. Let me know what you think.
> >>>>>>>
> >>>>>>
> >>>>>> I think I like the approach of "reject" action implicitly swapping
IPs best.
> >>>>>> In the end it's not like we could implement reject withough
swapping IP dst
> >>>>>> with IP src so why not do it as part of the reject action.
> >>>>>>
> >>>>>
> >>>>> Agree. reject action is expected to be used to send the generated
tcp
> >>>>> rst/icmp unreachable
> >>>>> packet back to the sender.
> >>>>>
> >>>>> I'm ok to change the reject action to just swap the IPs internally
if
> >>>>> everyone is fine.
> >>>>>
> >>>>> @Mark Michelson  @Han Zhou  Do you have any comments here ?
> >>>>>
> >>>>> Option 1 - reject { ...}    -> The reject action handling in
pinctrl.c
> >>>>> will swap the ip source and destination
> >>>>>
> >>>>> Option 2: reject {... ip.src <-> ip.dst ...} which is the proposed
> >>>>> approach in this patch series. This results in
> >>>>> 2 packet-ins.
> >>>>>
> >>>>> I personally prefer (2) since in OVN we normally tell what inner
> >>>>> actions to apply for many OVN actions.
> >>>>> But I have no strong preference and I'm fine changing to (1).
> >>>>>
> >>>>> If we chose option (1), we could add a comment inside the action
like
> >>>>> - reject { /* ip.src <-> ip.dst is done implicitly*/,  eth.src <->
> >>>>> eth.dst; output ; }
> >>>>>
> >>>>
> >>>> Then, why not swap ETH addresses implicitly too?
> >>>
> >>> As I mentioned above, I'd prefer if ovn-northd says what actions to
> >>> apply for the reject
> >>> packet. Just like for icmp4 or arp actions, ovn-northd says what
> >>> actions to apply for the
> >>> transformed packet from the source packet.
> >>>
> >>
> >> I was thinking that if we go this way when we build the packet in
> >> ovn-controller it might look weird to a reader of the pinctrl code to
see
> >> something like:
> >>
> >> /* We only swap IPs because ETH addresses are swapped in OVS. */
> >> pinctrl_compose_ipv4(&packet, ip_flow->dl_src, ip_flow->dl_dst,
> >>                       ip_flow->nw_dst, ip_flow->nw_src, ...)
> >
> > Right. I agree. It makes sense to swap both eth and ip if we go this
> > way. Sorry for the confusion.
> >
> > Thanks
> > Numan
>
> I've been reading through this, thinking about it, and have changed my
> mind on the matter probably 3 or 4 times while trying to write a response.
>
> In short, I think that the savings in flow creation introduced by this
> patch series are brilliant, but I'm hesitant about the second packet-in.
> I have a feeling this could be more costly than the frag_mtu examples
> that Numan pointed out earlier in the thread. I think that the savings
> in flow creation should make us consider forging new ground with regards
> to what happens implicitly during a controller action. I think we have
> the facilities to get the point across to users what is going on even if
> there are no explicit exchange actions in the generated logical flow or
> OF flow.
>
> I thought through this to try to figure out if there's a different way
> forward that both allows us to reduce the number of flows and still be
> explicit with the actions being performed. Unfortunately, the only ways
> I could think of required either parsing ACL matches in ovn-northd or
> generating just as many flows as before.
>
> In conclusion, I'm fine with the reject action implicitly swapping
> values, but it needs to be well documented.
>

I prefer implicitly swapping IP fields, too, because as mentioned by Mark
the reject action always requires the src and dst being swapped, and the
whole purpose of this change is to provide a generic way to handle both
IPv4/IPv6 and TCP/UDP. It should be sufficient to document what this action
does - if it is documented clearly it is not *implicit*, and we don't want
to name the action reject_with_ip_swapped just because it is too ugly.

I also thought about supporting action ip.src <-> ip.dst may be more
generic and can potentially be combined with other actions for more use
cases in the future. However, I think in practice that doesn't seem to be
really useful:
1. In normal situations it doesn't justify a controller action just for
reducing the number of flows. Data plane efficiency is more important in
most cases - unless the cost of control plane scale is extremely high,
which I think is not the case for this ipv4/v6 consideration.
2. If it is combined (nested) within a controller action then it is better
to perform the swapping within the controller action itself to avoid an
extra packet-in. The only exception may be, when a controller action
doesn't always require swapping the IPs, but in such case the action may
just provide an argument telling if IP swapping is needed.

For swapping ETH src/dst, I am ok with either way, as long as it is
documented clearly in the action definition itself.

Thanks,
Han

> >
> >>
> >> Regards,
> >> Dumitru
> >>
> >> _______________________________________________
> >> dev mailing list
> >> dev@openvswitch.org
> >> https://mail.openvswitch.org/mailman/listinfo/ovs-dev
> >>
> > _______________________________________________
> > dev mailing list
> > dev@openvswitch.org
> > https://mail.openvswitch.org/mailman/listinfo/ovs-dev
> >
>
> _______________________________________________
> dev mailing list
> dev@openvswitch.org
> https://mail.openvswitch.org/mailman/listinfo/ovs-dev
Numan Siddique Oct. 19, 2020, 8:41 a.m. UTC | #11
On Fri, Oct 16, 2020 at 11:53 PM Han Zhou <zhouhan@gmail.com> wrote:
>
> On Thu, Oct 15, 2020 at 1:38 PM Mark Michelson <mmichels@redhat.com> wrote:
> >
> > On 10/14/20 8:38 AM, Numan Siddique wrote:
> > > On Wed, Oct 14, 2020 at 5:51 PM Dumitru Ceara <dceara@redhat.com> wrote:
> > >>
> > >> On 10/14/20 2:13 PM, Numan Siddique wrote:
> > >>> On Wed, Oct 14, 2020 at 5:34 PM Dumitru Ceara <dceara@redhat.com>
> wrote:
> > >>>>
> > >>>> On 10/14/20 1:54 PM, Numan Siddique wrote:
> > >>>>> On Wed, Oct 14, 2020 at 4:40 PM Dumitru Ceara <dceara@redhat.com>
> wrote:
> > >>>>>>
> > >>>>>> On 10/14/20 12:50 PM, Numan Siddique wrote:
> > >>>>>>> On Wed, Oct 14, 2020 at 3:50 PM Dumitru Ceara <dceara@redhat.com>
> wrote:
> > >>>>>>>>
> > >>>>>>>> On 10/14/20 11:15 AM, numans@ovn.org wrote:
> > >>>>>>>>> From: Numan Siddique <numans@ovn.org>
> > >>>>>>>>>
> > >>>>>>>>> Thee new fields are version independent and these can be used
> in any
> > >>>>>>>>
> > >>>>>>>> Hi Numan,
> > >>>>>>>>
> > >>>>>>>> Nit: s/Thee/These
> > >>>>>>>>
> > >>>>>>>>> OVN action. Right now the usage of these fields are restricted
> to
> > >>>>>>>>> exchanging the IP source and destination fields.
> > >>>>>>>>>
> > >>>>>>>>> Eg. reject {... ip.src <-> ip.dst ... };
> > >>>>>>>>>
> > >>>>>>>>> "ip.src <-> ip.dst" translates to controller action with
> "pause" flag set.
> > >>>>>>>>> When pinctrl thread receives the packet in, it checks the IP
> version of the
> > >>>>>>>>> packet and either does - "ip4.src <-> ip4.dst" or "ip6.src <->
> ip6.dst" and
> > >>>>>>>>> resumes the packet to continue with the pipeline.
> > >>>>>>>>>
> > >>>>>>>>> Upcoming patch will make use of these fields.
> > >>>>>>>>>
> > >>>>>>>>> Signed-off-by: Numan Siddique <numans@ovn.org>
> > >>>>>>>>> ---
> > >>>>>>>>>   controller/pinctrl.c         |  49 +++++++++++++++
> > >>>>>>>>>   include/ovn/actions.h        |   4 ++
> > >>>>>>>>>   include/ovn/logical-fields.h |  18 ++++++
> > >>>>>>>>>   lib/actions.c                | 118
> +++++++++++++++++++++++++++++++++++
> > >>>>>>>>>   lib/logical-fields.c         |  10 +++
> > >>>>>>>>>   ovn-sb.xml                   |  37 +++++++++++
> > >>>>>>>>>   tests/ovn.at                 |  27 ++++++++
> > >>>>>>>>>   utilities/ovn-trace.c        |  28 +++++++++
> > >>>>>>>>>   8 files changed, 291 insertions(+)
> > >>>>>>>>>
> > >>>>>>>>> diff --git a/controller/pinctrl.c b/controller/pinctrl.c
> > >>>>>>>>> index 631d058458..bc16a82404 100644
> > >>>>>>>>> --- a/controller/pinctrl.c
> > >>>>>>>>> +++ b/controller/pinctrl.c
> > >>>>>>>>> @@ -233,6 +233,11 @@ static void
> pinctrl_handle_put_icmp_frag_mtu(struct rconn *swconn,
> > >>>>>>>>>                                                struct
> ofputil_packet_in *pin,
> > >>>>>>>>>                                                struct ofpbuf
> *userdata,
> > >>>>>>>>>                                                struct ofpbuf
> *continuation);
> > >>>>>>>>> +static void pinctrl_handle_swap_src_dst_ip(struct rconn
> *swconn,
> > >>>>>>>>> +                                           const struct flow
> *in_flow,
> > >>>>>>>>> +                                           struct dp_packet
> *pkt_in,
> > >>>>>>>>> +                                           struct
> ofputil_packet_in *pin,
> > >>>>>>>>> +                                           struct ofpbuf
> *continuation);
> > >>>>>>>>>   static void
> > >>>>>>>>>   pinctrl_handle_event(struct ofpbuf *userdata)
> > >>>>>>>>>       OVS_REQUIRES(pinctrl_mutex);
> > >>>>>>>>> @@ -2835,6 +2840,11 @@ process_packet_in(struct rconn *swconn,
> const struct ofp_header *msg)
> > >>>>>>>>>           ovs_mutex_unlock(&pinctrl_mutex);
> > >>>>>>>>>           break;
> > >>>>>>>>>
> > >>>>>>>>> +    case ACTION_OPCODE_SWAP_SRC_DST_IP:
> > >>>>>>>>> +        pinctrl_handle_swap_src_dst_ip(swconn, &headers,
> &packet, &pin,
> > >>>>>>>>> +                                       &continuation);
> > >>>>>>>>> +        break;
> > >>>>>>>>> +
> > >>>>>>>>>       default:
> > >>>>>>>>>           VLOG_WARN_RL(&rl, "unrecognized packet-in opcode
> %"PRIu32,
> > >>>>>>>>>                        ntohl(ah->opcode));
> > >>>>>>>>> @@ -6508,3 +6518,42 @@ pinctrl_handle_svc_check(struct rconn
> *swconn, const struct flow *ip_flow,
> > >>>>>>>>>           svc_mon->next_send_time = time_msec() +
> svc_mon->interval;
> > >>>>>>>>>       }
> > >>>>>>>>>   }
> > >>>>>>>>> +
> > >>>>>>>>> +static void
> > >>>>>>>>> +pinctrl_handle_swap_src_dst_ip(struct rconn *swconn,
> > >>>>>>>>> +                               const struct flow *in_flow,
> > >>>>>>>>> +                               struct dp_packet *pkt_in,
> > >>>>>>>>> +                               struct ofputil_packet_in *pin,
> > >>>>>>>>> +                               struct ofpbuf *continuation)
> > >>>>>>>>> +{
> > >>>>>>>>> +    enum ofp_version version = rconn_get_version(swconn);
> > >>>>>>>>> +    enum ofputil_protocol proto =
> ofputil_protocol_from_ofp_version(version);
> > >>>>>>>>> +    struct dp_packet *pkt_out;
> > >>>>>>>>> +
> > >>>>>>>>> +    pkt_out = dp_packet_clone(pkt_in);
> > >>>>>>>>> +    pkt_out->l2_5_ofs = pkt_in->l2_5_ofs;
> > >>>>>>>>> +    pkt_out->l2_pad_size = pkt_in->l2_pad_size;
> > >>>>>>>>> +    pkt_out->l3_ofs = pkt_in->l3_ofs;
> > >>>>>>>>> +    pkt_out->l4_ofs = pkt_in->l4_ofs;
> > >>>>>>>>> +
> > >>>>>>>>> +    if (get_dl_type(in_flow) == htons(ETH_TYPE_IP)) {
> > >>>>>>>>> +        /* IPv4 packet. Swap nw_src with nw_dst. */
> > >>>>>>>>> +        packet_set_ipv4(pkt_out, in_flow->nw_dst,
> in_flow->nw_src,
> > >>>>>>>>> +                        in_flow->nw_tos, in_flow->nw_ttl);
> > >>>>>>>>> +    } else {
> > >>>>>>>>> +        /* IPv6 packet. Swap ip6_src with ip6_dst.
> > >>>>>>>>> +         * We could also use packet_set_ipv6() here, but that
> would require
> > >>>>>>>>> +         * to extract the 'tc' and 'label' from
> in_nh->ip6_flow which seems
> > >>>>>>>>> +         * unnecessary. */
> > >>>>>>>>> +        struct ovs_16aligned_ip6_hdr *out_nh =
> dp_packet_l3(pkt_out);
> > >>>>>>>>> +        union ovs_16aligned_in6_addr tmp = out_nh->ip6_src;
> > >>>>>>>>> +        out_nh->ip6_src = out_nh->ip6_dst;
> > >>>>>>>>> +        out_nh->ip6_dst = tmp;
> > >>>>>>>>> +    }
> > >>>>>>>>> +
> > >>>>>>>>> +    pin->packet = dp_packet_data(pkt_out);
> > >>>>>>>>> +    pin->packet_len = dp_packet_size(pkt_out);
> > >>>>>>>>> +
> > >>>>>>>>> +    queue_msg(swconn, ofputil_encode_resume(pin, continuation,
> proto));
> > >>>>>>>>> +    dp_packet_delete(pkt_out);
> > >>>>>>>>> +}
> > >>>>>>>>> diff --git a/include/ovn/actions.h b/include/ovn/actions.h
> > >>>>>>>>> index b4e5acabb9..bf1fe848b7 100644
> > >>>>>>>>> --- a/include/ovn/actions.h
> > >>>>>>>>> +++ b/include/ovn/actions.h
> > >>>>>>>>> @@ -97,6 +97,7 @@ struct ovn_extend_table;
> > >>>>>>>>>       OVNACT(DHCP6_REPLY,       ovnact_null)            \
> > >>>>>>>>>       OVNACT(ICMP6_ERROR,       ovnact_nest)            \
> > >>>>>>>>>       OVNACT(REJECT,            ovnact_nest)            \
> > >>>>>>>>> +    OVNACT(OVNFIELD_EXCHANGE, ovnact_move)            \
> > >>>>>>>>>
> > >>>>>>>>>   /* enum ovnact_type, with a member OVNACT_<ENUM> for each
> action. */
> > >>>>>>>>>   enum OVS_PACKED_ENUM ovnact_type {
> > >>>>>>>>> @@ -612,6 +613,9 @@ enum action_opcode {
> > >>>>>>>>>        * The actions, in OpenFlow 1.3 format, follow the
> action_header.
> > >>>>>>>>>        */
> > >>>>>>>>>       ACTION_OPCODE_REJECT,
> > >>>>>>>>> +
> > >>>>>>>>> +    /* ip.src <-> ip.dst */
> > >>>>>>>>> +    ACTION_OPCODE_SWAP_SRC_DST_IP,
> > >>>>>>>>>   };
> > >>>>>>>>>
> > >>>>>>>>>   /* Header. */
> > >>>>>>>>> diff --git a/include/ovn/logical-fields.h
> b/include/ovn/logical-fields.h
> > >>>>>>>>> index ac6f2f909b..bb6daa50ca 100644
> > >>>>>>>>> --- a/include/ovn/logical-fields.h
> > >>>>>>>>> +++ b/include/ovn/logical-fields.h
> > >>>>>>>>> @@ -116,6 +116,24 @@ enum ovn_field_id {
> > >>>>>>>>>        */
> > >>>>>>>>>       OVN_ICMP6_FRAG_MTU,
> > >>>>>>>>>
> > >>>>>>>>> +    /*
> > >>>>>>>>> +     * Name: "ip.src"
> > >>>>>>>>> +     * Type: be128
> > >>>>>>>>> +     * Description: Sets the field MFF_IPV4_SRC if eth.type ==
> 0x800 (IPv4)
> > >>>>>>>>> +     *              or sets the field MFF_IPV6_SRC if
> > >>>>>>>>> +     *              eth.type == 0x86dd (IPV6).
> > >>>>>>>>> +     */
> > >>>>>>>>> +    OVN_IP_SRC,
> > >>>>>>>>> +
> > >>>>>>>>> +    /*
> > >>>>>>>>> +     * Name: "ip.dst"
> > >>>>>>>>> +     * Type: be128
> > >>>>>>>>> +     * Description: Sets the field MFF_IPV4_DST if eth.type ==
> 0x800 (IPv4)
> > >>>>>>>>> +     *              or sets the field MFF_IPV6_DST if
> > >>>>>>>>> +     *              eth.type == 0x86dd (IPV6).
> > >>>>>>>>> +     */
> > >>>>>>>>> +    OVN_IP_DST,
> > >>>>>>>>> +
> > >>>>>>>>>       OVN_FIELD_N_IDS
> > >>>>>>>>>   };
> > >>>>>>>>>
> > >>>>>>>>> diff --git a/lib/actions.c b/lib/actions.c
> > >>>>>>>>> index 1e1bdeff24..e9c77d2a0a 100644
> > >>>>>>>>> --- a/lib/actions.c
> > >>>>>>>>> +++ b/lib/actions.c
> > >>>>>>>>> @@ -371,12 +371,35 @@ ovnact_next_free(struct ovnact_next *a
> OVS_UNUSED)
> > >>>>>>>>>   {
> > >>>>>>>>>   }
> > >>>>>>>>>
> > >>>>>>>>> +static bool
> > >>>>>>>>> +check_ovnfield_load(struct action_context *ctx, const struct
> expr_field *field)
> > >>>>>>>>> +{
> > >>>>>>>>> +    switch (field->symbol->ovn_field->id) {
> > >>>>>>>>> +    case OVN_ICMP4_FRAG_MTU:
> > >>>>>>>>> +    case OVN_ICMP6_FRAG_MTU:
> > >>>>>>>>> +        return true;
> > >>>>>>>>> +
> > >>>>>>>>> +    case OVN_IP_SRC:
> > >>>>>>>>> +    case OVN_IP_DST:
> > >>>>>>>>> +        lexer_error(ctx->lexer, "Can't load a value to ovn
> field (%s).",
> > >>>>>>>>> +                    field->symbol->name);
> > >>>>>>>>> +        return false;
> > >>>>>>>>> +
> > >>>>>>>>> +    case OVN_FIELD_N_IDS:
> > >>>>>>>>> +    default:
> > >>>>>>>>> +        OVS_NOT_REACHED();
> > >>>>>>>>> +    }
> > >>>>>>>>> +}
> > >>>>>>>>> +
> > >>>>>>>>>   static void
> > >>>>>>>>>   parse_LOAD(struct action_context *ctx, const struct
> expr_field *lhs)
> > >>>>>>>>>   {
> > >>>>>>>>>       size_t ofs = ctx->ovnacts->size;
> > >>>>>>>>>       struct ovnact_load *load;
> > >>>>>>>>>       if (lhs->symbol->ovn_field) {
> > >>>>>>>>> +        if (!check_ovnfield_load(ctx, lhs)) {
> > >>>>>>>>> +            return;
> > >>>>>>>>> +        }
> > >>>>>>>>>           load = ovnact_put_OVNFIELD_LOAD(ctx->ovnacts);
> > >>>>>>>>>       } else {
> > >>>>>>>>>           load = ovnact_put_LOAD(ctx->ovnacts);
> > >>>>>>>>> @@ -474,6 +497,46 @@ format_EXCHANGE(const struct ovnact_move
> *move, struct ds *s)
> > >>>>>>>>>       format_assignment(move, "<->", s);
> > >>>>>>>>>   }
> > >>>>>>>>>
> > >>>>>>>>> +static void
> > >>>>>>>>> +parse_ovnfield_exchange(struct action_context *ctx,
> > >>>>>>>>> +                        const struct expr_field *lhs,
> > >>>>>>>>> +                        const struct expr_field *rhs)
> > >>>>>>>>> +{
> > >>>>>>>>> +    if (!lhs->symbol->ovn_field || !rhs->symbol->ovn_field) {
> > >>>>>>>>> +        lexer_error(ctx->lexer,
> > >>>>>>>>> +                    "Can't exchange ovn field with non ovn
> field (%s <-> %s).",
> > >>>>>>>>> +                    lhs->symbol->name, rhs->symbol->name);
> > >>>>>>>>> +        return;
> > >>>>>>>>> +    }
> > >>>>>>>>> +
> > >>>>>>>>> +    if (lhs->symbol->ovn_field->id != OVN_IP_SRC &&
> > >>>>>>>>> +            lhs->symbol->ovn_field->id != OVN_IP_DST) {
> > >>>>>>>>> +        lexer_error(ctx->lexer,
> > >>>>>>>>> +                    "Can't exchange ovn field (%s) with ovn
> field (%s).",
> > >>>>>>>>> +                    lhs->symbol->name, rhs->symbol->name);
> > >>>>>>>>> +        return;
> > >>>>>>>>> +    }
> > >>>>>>>>> +
> > >>>>>>>>> +    if (rhs->symbol->ovn_field->id != OVN_IP_SRC &&
> > >>>>>>>>> +            rhs->symbol->ovn_field->id != OVN_IP_DST) {
> > >>>>>>>>> +        lexer_error(ctx->lexer,
> > >>>>>>>>> +                    "Can't exchange ovn field (%s) with ovn
> field (%s).",
> > >>>>>>>>> +                    lhs->symbol->name, rhs->symbol->name);
> > >>>>>>>>> +        return;
> > >>>>>>>>> +    }
> > >>>>>>>>> +
> > >>>>>>>>> +    if (lhs->symbol->ovn_field->id ==
> rhs->symbol->ovn_field->id) {
> > >>>>>>>>> +        lexer_error(ctx->lexer,
> > >>>>>>>>> +                    "Can't exchange ovn field (%s) with ovn
> field (%s).",
> > >>>>>>>>> +                    lhs->symbol->name, rhs->symbol->name);
> > >>>>>>>>> +        return;
> > >>>>>>>>> +    }
> > >>>>>>>>> +
> > >>>>>>>>> +    struct ovnact_move *move =
> ovnact_put_OVNFIELD_EXCHANGE(ctx->ovnacts);
> > >>>>>>>>> +    move->lhs = *lhs;
> > >>>>>>>>> +    move->rhs = *rhs;
> > >>>>>>>>> +}
> > >>>>>>>>> +
> > >>>>>>>>>   static void
> > >>>>>>>>>   parse_assignment_action(struct action_context *ctx, bool
> exchange,
> > >>>>>>>>>                           const struct expr_field *lhs)
> > >>>>>>>>> @@ -483,6 +546,11 @@ parse_assignment_action(struct
> action_context *ctx, bool exchange,
> > >>>>>>>>>           return;
> > >>>>>>>>>       }
> > >>>>>>>>>
> > >>>>>>>>> +    if (exchange && (lhs->symbol->ovn_field ||
> rhs.symbol->ovn_field)) {
> > >>>>>>>>> +        parse_ovnfield_exchange(ctx, lhs, &rhs);
> > >>>>>>>>> +        return;
> > >>>>>>>>> +    }
> > >>>>>>>>> +
> > >>>>>>>>>       const struct expr_symbol *ls = lhs->symbol;
> > >>>>>>>>>       const struct expr_symbol *rs = rhs.symbol;
> > >>>>>>>>>       if ((ls->width != 0) != (rs->width != 0)) {
> > >>>>>>>>> @@ -3128,6 +3196,8 @@ format_OVNFIELD_LOAD(const struct
> ovnact_load *load , struct ds *s)
> > >>>>>>>>>                         ntohs(load->imm.value.be16_int));
> > >>>>>>>>>           break;
> > >>>>>>>>>
> > >>>>>>>>> +    case OVN_IP_SRC:
> > >>>>>>>>> +    case OVN_IP_DST:
> > >>>>>>>>>       case OVN_FIELD_N_IDS:
> > >>>>>>>>>       default:
> > >>>>>>>>>           OVS_NOT_REACHED();
> > >>>>>>>>> @@ -3157,6 +3227,9 @@ encode_OVNFIELD_LOAD(const struct
> ovnact_load *load,
> > >>>>>>>>>           encode_finish_controller_op(oc_offset, ofpacts);
> > >>>>>>>>>           break;
> > >>>>>>>>>       }
> > >>>>>>>>> +
> > >>>>>>>>> +    case OVN_IP_SRC:
> > >>>>>>>>> +    case OVN_IP_DST:
> > >>>>>>>>>       case OVN_FIELD_N_IDS:
> > >>>>>>>>>       default:
> > >>>>>>>>>           OVS_NOT_REACHED();
> > >>>>>>>>> @@ -3451,6 +3524,51 @@ ovnact_fwd_group_free(struct
> ovnact_fwd_group *fwd_group)
> > >>>>>>>>>       free(fwd_group->child_ports);
> > >>>>>>>>>   }
> > >>>>>>>>>
> > >>>>>>>>> +static void
> > >>>>>>>>> +format_OVNFIELD_EXCHANGE(const struct ovnact_move *move ,
> struct ds *s)
> > >>>>>>>>> +{
> > >>>>>>>>> +    const struct ovn_field *lhs =
> ovn_field_from_name(move->lhs.symbol->name);
> > >>>>>>>>> +    const struct ovn_field *rhs =
> ovn_field_from_name(move->rhs.symbol->name);
> > >>>>>>>>> +    switch (lhs->id) {
> > >>>>>>>>> +    case OVN_IP_SRC:
> > >>>>>>>>> +    case OVN_IP_DST:
> > >>>>>>>>> +        break;
> > >>>>>>>>> +
> > >>>>>>>>> +    case OVN_ICMP4_FRAG_MTU:
> > >>>>>>>>> +    case OVN_ICMP6_FRAG_MTU:
> > >>>>>>>>> +    case OVN_FIELD_N_IDS:
> > >>>>>>>>> +    default:
> > >>>>>>>>> +        OVS_NOT_REACHED();
> > >>>>>>>>> +    }
> > >>>>>>>>
> > >>>>>>>> Nit: I would use the shorter:
> > >>>>>>>> ovs_asssert(lhs->id == OVN_IP_SRC || lhs->id == OVN_IP_DST);
> > >>>>>>>
> > >>>>>>> You mean to drop the switch right ? If so, Ack.
> > >>>>>>>
> > >>>>>>
> > >>>>>> Right, I meant ovs_assert instead of switch.
> > >>>>>>
> > >>>>>>>
> > >>>>>>>>
> > >>>>>>>>> +
> > >>>>>>>>> +    switch (rhs->id) {
> > >>>>>>>>> +    case OVN_IP_SRC:
> > >>>>>>>>> +    case OVN_IP_DST:
> > >>>>>>>>> +        break;
> > >>>>>>>>> +
> > >>>>>>>>> +    case OVN_ICMP4_FRAG_MTU:
> > >>>>>>>>> +    case OVN_ICMP6_FRAG_MTU:
> > >>>>>>>>> +    case OVN_FIELD_N_IDS:
> > >>>>>>>>> +    default:
> > >>>>>>>>> +        OVS_NOT_REACHED();
> > >>>>>>>>> +    }
> > >>>>>>>>
> > >>>>>>>> Nit: I would use the shorter:
> > >>>>>>>> ovs_asssert(rhs->id == OVN_IP_SRC || rhs->id == OVN_IP_DST);
> > >>>>>>>>
> > >>>>>>>>> +
> > >>>>>>>>> +    ds_put_format(s, "%s <-> %s;", lhs->name, rhs->name);
> > >>>>>>>>> +}
> > >>>>>>>>> +
> > >>>>>>>>> +static void
> > >>>>>>>>> +encode_OVNFIELD_EXCHANGE(const struct ovnact_move *move
> OVS_UNUSED,
> > >>>>>>>>> +                         const struct ovnact_encode_params *ep
> OVS_UNUSED,
> > >>>>>>>>> +                         struct ofpbuf *ofpacts OVS_UNUSED)
> > >>>>>>>>> +{
> > >>>>>>>>> +    /* Right now we only support exchanging the IPs in
> > >>>>>>>>> +     * OVN fields (ip.src <-> ip.dst). */
> > >>>>>>>>> +    size_t oc_offset =
> > >>>>>>>>> +
>  encode_start_controller_op(ACTION_OPCODE_SWAP_SRC_DST_IP,
> > >>>>>>>>> +                                   true, NX_CTLR_NO_METER,
> ofpacts);
> > >>>>>>>>> +    encode_finish_controller_op(oc_offset, ofpacts);
> > >>>>>>>>
> > >>>>>>>> I think this means that all packets matching a flow with action
> "reject { ...
> > >>>>>>>> ip.src <-> ip.dst ... }" will generate two packet-ins, right?
> One for the
> > >>>>>>>> reject action, one for the ovnfield-exchange action.
> > >>>>>>>>
> > >>>>>>>> Is that a concern wrt. performance?
> > >>>>>>>
> > >>>>>>> Yes. It would have 2 packet-ins. Just like how we handle - icmp4
> {...
> > >>>>>>> icmp4.frag_mtu = 1500; ...};
> > >>>>>>>
> > >>>>>>> If performance is a concern, we could just drop ... ip.src <->
> ip.dst
> > >>>>>>> in reject and let reject action
> > >>>>>>> swap the ip src with ip destination. I thought about that and my
> take
> > >>>>>>> is that since reject { } results in
> > >>>>>>> TCP RST or ICMP unreachable packet, it should be fine. I see these
> > >>>>>>> packets as more of control packets.
> > >>>>>>>
> > >>>>>>> I think it should be ok. Let me know what you think.
> > >>>>>>>
> > >>>>>>
> > >>>>>> I think I like the approach of "reject" action implicitly swapping
> IPs best.
> > >>>>>> In the end it's not like we could implement reject withough
> swapping IP dst
> > >>>>>> with IP src so why not do it as part of the reject action.
> > >>>>>>
> > >>>>>
> > >>>>> Agree. reject action is expected to be used to send the generated
> tcp
> > >>>>> rst/icmp unreachable
> > >>>>> packet back to the sender.
> > >>>>>
> > >>>>> I'm ok to change the reject action to just swap the IPs internally
> if
> > >>>>> everyone is fine.
> > >>>>>
> > >>>>> @Mark Michelson  @Han Zhou  Do you have any comments here ?
> > >>>>>
> > >>>>> Option 1 - reject { ...}    -> The reject action handling in
> pinctrl.c
> > >>>>> will swap the ip source and destination
> > >>>>>
> > >>>>> Option 2: reject {... ip.src <-> ip.dst ...} which is the proposed
> > >>>>> approach in this patch series. This results in
> > >>>>> 2 packet-ins.
> > >>>>>
> > >>>>> I personally prefer (2) since in OVN we normally tell what inner
> > >>>>> actions to apply for many OVN actions.
> > >>>>> But I have no strong preference and I'm fine changing to (1).
> > >>>>>
> > >>>>> If we chose option (1), we could add a comment inside the action
> like
> > >>>>> - reject { /* ip.src <-> ip.dst is done implicitly*/,  eth.src <->
> > >>>>> eth.dst; output ; }
> > >>>>>
> > >>>>
> > >>>> Then, why not swap ETH addresses implicitly too?
> > >>>
> > >>> As I mentioned above, I'd prefer if ovn-northd says what actions to
> > >>> apply for the reject
> > >>> packet. Just like for icmp4 or arp actions, ovn-northd says what
> > >>> actions to apply for the
> > >>> transformed packet from the source packet.
> > >>>
> > >>
> > >> I was thinking that if we go this way when we build the packet in
> > >> ovn-controller it might look weird to a reader of the pinctrl code to
> see
> > >> something like:
> > >>
> > >> /* We only swap IPs because ETH addresses are swapped in OVS. */
> > >> pinctrl_compose_ipv4(&packet, ip_flow->dl_src, ip_flow->dl_dst,
> > >>                       ip_flow->nw_dst, ip_flow->nw_src, ...)
> > >
> > > Right. I agree. It makes sense to swap both eth and ip if we go this
> > > way. Sorry for the confusion.
> > >
> > > Thanks
> > > Numan
> >
> > I've been reading through this, thinking about it, and have changed my
> > mind on the matter probably 3 or 4 times while trying to write a response.
> >
> > In short, I think that the savings in flow creation introduced by this
> > patch series are brilliant, but I'm hesitant about the second packet-in.
> > I have a feeling this could be more costly than the frag_mtu examples
> > that Numan pointed out earlier in the thread. I think that the savings
> > in flow creation should make us consider forging new ground with regards
> > to what happens implicitly during a controller action. I think we have
> > the facilities to get the point across to users what is going on even if
> > there are no explicit exchange actions in the generated logical flow or
> > OF flow.
> >
> > I thought through this to try to figure out if there's a different way
> > forward that both allows us to reduce the number of flows and still be
> > explicit with the actions being performed. Unfortunately, the only ways
> > I could think of required either parsing ACL matches in ovn-northd or
> > generating just as many flows as before.
> >
> > In conclusion, I'm fine with the reject action implicitly swapping
> > values, but it needs to be well documented.
> >
>
> I prefer implicitly swapping IP fields, too, because as mentioned by Mark
> the reject action always requires the src and dst being swapped, and the
> whole purpose of this change is to provide a generic way to handle both
> IPv4/IPv6 and TCP/UDP. It should be sufficient to document what this action
> does - if it is documented clearly it is not *implicit*, and we don't want
> to name the action reject_with_ip_swapped just because it is too ugly.
>
> I also thought about supporting action ip.src <-> ip.dst may be more
> generic and can potentially be combined with other actions for more use
> cases in the future. However, I think in practice that doesn't seem to be
> really useful:
> 1. In normal situations it doesn't justify a controller action just for
> reducing the number of flows. Data plane efficiency is more important in
> most cases - unless the cost of control plane scale is extremely high,
> which I think is not the case for this ipv4/v6 consideration.
> 2. If it is combined (nested) within a controller action then it is better
> to perform the swapping within the controller action itself to avoid an
> extra packet-in. The only exception may be, when a controller action
> doesn't always require swapping the IPs, but in such case the action may
> just provide an argument telling if IP swapping is needed.
>
> For swapping ETH src/dst, I am ok with either way, as long as it is
> documented clearly in the action definition itself.


Thanks Mark and Han for your comments. I submitted v3 dropping the
patch 2 from the series
and swapping the Eth and IP fields implicitly.

Request to take a look -
https://patchwork.ozlabs.org/project/ovn/list/?series=208619

@Dumitru Ceara  - Even though you had acked the first patch, I have
not added your ack.
Can you please take a look.


Thanks
Numan

>
> Thanks,
> Han
>
> > >
> > >>
> > >> Regards,
> > >> Dumitru
> > >>
> > >> _______________________________________________
> > >> dev mailing list
> > >> dev@openvswitch.org
> > >> https://mail.openvswitch.org/mailman/listinfo/ovs-dev
> > >>
> > > _______________________________________________
> > > dev mailing list
> > > dev@openvswitch.org
> > > https://mail.openvswitch.org/mailman/listinfo/ovs-dev
> > >
> >
> > _______________________________________________
> > dev mailing list
> > dev@openvswitch.org
> > https://mail.openvswitch.org/mailman/listinfo/ovs-dev
> _______________________________________________
> dev mailing list
> dev@openvswitch.org
> https://mail.openvswitch.org/mailman/listinfo/ovs-dev
>
diff mbox series

Patch

diff --git a/controller/pinctrl.c b/controller/pinctrl.c
index 631d058458..bc16a82404 100644
--- a/controller/pinctrl.c
+++ b/controller/pinctrl.c
@@ -233,6 +233,11 @@  static void pinctrl_handle_put_icmp_frag_mtu(struct rconn *swconn,
                                              struct ofputil_packet_in *pin,
                                              struct ofpbuf *userdata,
                                              struct ofpbuf *continuation);
+static void pinctrl_handle_swap_src_dst_ip(struct rconn *swconn,
+                                           const struct flow *in_flow,
+                                           struct dp_packet *pkt_in,
+                                           struct ofputil_packet_in *pin,
+                                           struct ofpbuf *continuation);
 static void
 pinctrl_handle_event(struct ofpbuf *userdata)
     OVS_REQUIRES(pinctrl_mutex);
@@ -2835,6 +2840,11 @@  process_packet_in(struct rconn *swconn, const struct ofp_header *msg)
         ovs_mutex_unlock(&pinctrl_mutex);
         break;
 
+    case ACTION_OPCODE_SWAP_SRC_DST_IP:
+        pinctrl_handle_swap_src_dst_ip(swconn, &headers, &packet, &pin,
+                                       &continuation);
+        break;
+
     default:
         VLOG_WARN_RL(&rl, "unrecognized packet-in opcode %"PRIu32,
                      ntohl(ah->opcode));
@@ -6508,3 +6518,42 @@  pinctrl_handle_svc_check(struct rconn *swconn, const struct flow *ip_flow,
         svc_mon->next_send_time = time_msec() + svc_mon->interval;
     }
 }
+
+static void
+pinctrl_handle_swap_src_dst_ip(struct rconn *swconn,
+                               const struct flow *in_flow,
+                               struct dp_packet *pkt_in,
+                               struct ofputil_packet_in *pin,
+                               struct ofpbuf *continuation)
+{
+    enum ofp_version version = rconn_get_version(swconn);
+    enum ofputil_protocol proto = ofputil_protocol_from_ofp_version(version);
+    struct dp_packet *pkt_out;
+
+    pkt_out = dp_packet_clone(pkt_in);
+    pkt_out->l2_5_ofs = pkt_in->l2_5_ofs;
+    pkt_out->l2_pad_size = pkt_in->l2_pad_size;
+    pkt_out->l3_ofs = pkt_in->l3_ofs;
+    pkt_out->l4_ofs = pkt_in->l4_ofs;
+
+    if (get_dl_type(in_flow) == htons(ETH_TYPE_IP)) {
+        /* IPv4 packet. Swap nw_src with nw_dst. */
+        packet_set_ipv4(pkt_out, in_flow->nw_dst, in_flow->nw_src,
+                        in_flow->nw_tos, in_flow->nw_ttl);
+    } else {
+        /* IPv6 packet. Swap ip6_src with ip6_dst.
+         * We could also use packet_set_ipv6() here, but that would require
+         * to extract the 'tc' and 'label' from in_nh->ip6_flow which seems
+         * unnecessary. */
+        struct ovs_16aligned_ip6_hdr *out_nh = dp_packet_l3(pkt_out);
+        union ovs_16aligned_in6_addr tmp = out_nh->ip6_src;
+        out_nh->ip6_src = out_nh->ip6_dst;
+        out_nh->ip6_dst = tmp;
+    }
+
+    pin->packet = dp_packet_data(pkt_out);
+    pin->packet_len = dp_packet_size(pkt_out);
+
+    queue_msg(swconn, ofputil_encode_resume(pin, continuation, proto));
+    dp_packet_delete(pkt_out);
+}
diff --git a/include/ovn/actions.h b/include/ovn/actions.h
index b4e5acabb9..bf1fe848b7 100644
--- a/include/ovn/actions.h
+++ b/include/ovn/actions.h
@@ -97,6 +97,7 @@  struct ovn_extend_table;
     OVNACT(DHCP6_REPLY,       ovnact_null)            \
     OVNACT(ICMP6_ERROR,       ovnact_nest)            \
     OVNACT(REJECT,            ovnact_nest)            \
+    OVNACT(OVNFIELD_EXCHANGE, ovnact_move)            \
 
 /* enum ovnact_type, with a member OVNACT_<ENUM> for each action. */
 enum OVS_PACKED_ENUM ovnact_type {
@@ -612,6 +613,9 @@  enum action_opcode {
      * The actions, in OpenFlow 1.3 format, follow the action_header.
      */
     ACTION_OPCODE_REJECT,
+
+    /* ip.src <-> ip.dst */
+    ACTION_OPCODE_SWAP_SRC_DST_IP,
 };
 
 /* Header. */
diff --git a/include/ovn/logical-fields.h b/include/ovn/logical-fields.h
index ac6f2f909b..bb6daa50ca 100644
--- a/include/ovn/logical-fields.h
+++ b/include/ovn/logical-fields.h
@@ -116,6 +116,24 @@  enum ovn_field_id {
      */
     OVN_ICMP6_FRAG_MTU,
 
+    /*
+     * Name: "ip.src"
+     * Type: be128
+     * Description: Sets the field MFF_IPV4_SRC if eth.type == 0x800 (IPv4)
+     *              or sets the field MFF_IPV6_SRC if
+     *              eth.type == 0x86dd (IPV6).
+     */
+    OVN_IP_SRC,
+
+    /*
+     * Name: "ip.dst"
+     * Type: be128
+     * Description: Sets the field MFF_IPV4_DST if eth.type == 0x800 (IPv4)
+     *              or sets the field MFF_IPV6_DST if
+     *              eth.type == 0x86dd (IPV6).
+     */
+    OVN_IP_DST,
+
     OVN_FIELD_N_IDS
 };
 
diff --git a/lib/actions.c b/lib/actions.c
index 1e1bdeff24..e9c77d2a0a 100644
--- a/lib/actions.c
+++ b/lib/actions.c
@@ -371,12 +371,35 @@  ovnact_next_free(struct ovnact_next *a OVS_UNUSED)
 {
 }
 
+static bool
+check_ovnfield_load(struct action_context *ctx, const struct expr_field *field)
+{
+    switch (field->symbol->ovn_field->id) {
+    case OVN_ICMP4_FRAG_MTU:
+    case OVN_ICMP6_FRAG_MTU:
+        return true;
+
+    case OVN_IP_SRC:
+    case OVN_IP_DST:
+        lexer_error(ctx->lexer, "Can't load a value to ovn field (%s).",
+                    field->symbol->name);
+        return false;
+
+    case OVN_FIELD_N_IDS:
+    default:
+        OVS_NOT_REACHED();
+    }
+}
+
 static void
 parse_LOAD(struct action_context *ctx, const struct expr_field *lhs)
 {
     size_t ofs = ctx->ovnacts->size;
     struct ovnact_load *load;
     if (lhs->symbol->ovn_field) {
+        if (!check_ovnfield_load(ctx, lhs)) {
+            return;
+        }
         load = ovnact_put_OVNFIELD_LOAD(ctx->ovnacts);
     } else {
         load = ovnact_put_LOAD(ctx->ovnacts);
@@ -474,6 +497,46 @@  format_EXCHANGE(const struct ovnact_move *move, struct ds *s)
     format_assignment(move, "<->", s);
 }
 
+static void
+parse_ovnfield_exchange(struct action_context *ctx,
+                        const struct expr_field *lhs,
+                        const struct expr_field *rhs)
+{
+    if (!lhs->symbol->ovn_field || !rhs->symbol->ovn_field) {
+        lexer_error(ctx->lexer,
+                    "Can't exchange ovn field with non ovn field (%s <-> %s).",
+                    lhs->symbol->name, rhs->symbol->name);
+        return;
+    }
+
+    if (lhs->symbol->ovn_field->id != OVN_IP_SRC &&
+            lhs->symbol->ovn_field->id != OVN_IP_DST) {
+        lexer_error(ctx->lexer,
+                    "Can't exchange ovn field (%s) with ovn field (%s).",
+                    lhs->symbol->name, rhs->symbol->name);
+        return;
+    }
+
+    if (rhs->symbol->ovn_field->id != OVN_IP_SRC &&
+            rhs->symbol->ovn_field->id != OVN_IP_DST) {
+        lexer_error(ctx->lexer,
+                    "Can't exchange ovn field (%s) with ovn field (%s).",
+                    lhs->symbol->name, rhs->symbol->name);
+        return;
+    }
+
+    if (lhs->symbol->ovn_field->id == rhs->symbol->ovn_field->id) {
+        lexer_error(ctx->lexer,
+                    "Can't exchange ovn field (%s) with ovn field (%s).",
+                    lhs->symbol->name, rhs->symbol->name);
+        return;
+    }
+
+    struct ovnact_move *move = ovnact_put_OVNFIELD_EXCHANGE(ctx->ovnacts);
+    move->lhs = *lhs;
+    move->rhs = *rhs;
+}
+
 static void
 parse_assignment_action(struct action_context *ctx, bool exchange,
                         const struct expr_field *lhs)
@@ -483,6 +546,11 @@  parse_assignment_action(struct action_context *ctx, bool exchange,
         return;
     }
 
+    if (exchange && (lhs->symbol->ovn_field || rhs.symbol->ovn_field)) {
+        parse_ovnfield_exchange(ctx, lhs, &rhs);
+        return;
+    }
+
     const struct expr_symbol *ls = lhs->symbol;
     const struct expr_symbol *rs = rhs.symbol;
     if ((ls->width != 0) != (rs->width != 0)) {
@@ -3128,6 +3196,8 @@  format_OVNFIELD_LOAD(const struct ovnact_load *load , struct ds *s)
                       ntohs(load->imm.value.be16_int));
         break;
 
+    case OVN_IP_SRC:
+    case OVN_IP_DST:
     case OVN_FIELD_N_IDS:
     default:
         OVS_NOT_REACHED();
@@ -3157,6 +3227,9 @@  encode_OVNFIELD_LOAD(const struct ovnact_load *load,
         encode_finish_controller_op(oc_offset, ofpacts);
         break;
     }
+
+    case OVN_IP_SRC:
+    case OVN_IP_DST:
     case OVN_FIELD_N_IDS:
     default:
         OVS_NOT_REACHED();
@@ -3451,6 +3524,51 @@  ovnact_fwd_group_free(struct ovnact_fwd_group *fwd_group)
     free(fwd_group->child_ports);
 }
 
+static void
+format_OVNFIELD_EXCHANGE(const struct ovnact_move *move , struct ds *s)
+{
+    const struct ovn_field *lhs = ovn_field_from_name(move->lhs.symbol->name);
+    const struct ovn_field *rhs = ovn_field_from_name(move->rhs.symbol->name);
+    switch (lhs->id) {
+    case OVN_IP_SRC:
+    case OVN_IP_DST:
+        break;
+
+    case OVN_ICMP4_FRAG_MTU:
+    case OVN_ICMP6_FRAG_MTU:
+    case OVN_FIELD_N_IDS:
+    default:
+        OVS_NOT_REACHED();
+    }
+
+    switch (rhs->id) {
+    case OVN_IP_SRC:
+    case OVN_IP_DST:
+        break;
+
+    case OVN_ICMP4_FRAG_MTU:
+    case OVN_ICMP6_FRAG_MTU:
+    case OVN_FIELD_N_IDS:
+    default:
+        OVS_NOT_REACHED();
+    }
+
+    ds_put_format(s, "%s <-> %s;", lhs->name, rhs->name);
+}
+
+static void
+encode_OVNFIELD_EXCHANGE(const struct ovnact_move *move OVS_UNUSED,
+                         const struct ovnact_encode_params *ep OVS_UNUSED,
+                         struct ofpbuf *ofpacts OVS_UNUSED)
+{
+    /* Right now we only support exchanging the IPs in
+     * OVN fields (ip.src <-> ip.dst). */
+    size_t oc_offset =
+        encode_start_controller_op(ACTION_OPCODE_SWAP_SRC_DST_IP,
+                                   true, NX_CTLR_NO_METER, ofpacts);
+    encode_finish_controller_op(oc_offset, ofpacts);
+}
+
 /* Parses an assignment or exchange or put_dhcp_opts action. */
 static void
 parse_set_action(struct action_context *ctx)
diff --git a/lib/logical-fields.c b/lib/logical-fields.c
index bf61df7719..d6f1981f52 100644
--- a/lib/logical-fields.c
+++ b/lib/logical-fields.c
@@ -33,6 +33,14 @@  const struct ovn_field ovn_fields[OVN_FIELD_N_IDS] = {
         OVN_ICMP6_FRAG_MTU,
         "icmp6.frag_mtu",
         4, 32,
+    }, {
+        OVN_IP_SRC,
+        "ip.src",
+        16, 128,
+    }, {
+        OVN_IP_DST,
+        "ip.dst",
+        16, 128,
     },
 };
 
@@ -271,6 +279,8 @@  ovn_init_symtab(struct shash *symtab)
 
     expr_symtab_add_ovn_field(symtab, "icmp4.frag_mtu", OVN_ICMP4_FRAG_MTU);
     expr_symtab_add_ovn_field(symtab, "icmp6.frag_mtu", OVN_ICMP6_FRAG_MTU);
+    expr_symtab_add_ovn_field(symtab, "ip.src", OVN_IP_SRC);
+    expr_symtab_add_ovn_field(symtab, "ip.dst", OVN_IP_DST);
 }
 
 const char *
diff --git a/ovn-sb.xml b/ovn-sb.xml
index 2fc84f54ee..73ee543bfe 100644
--- a/ovn-sb.xml
+++ b/ovn-sb.xml
@@ -1258,6 +1258,43 @@ 
             except that the two values are exchanged instead of copied.  Both
             <var>field1</var> and <var>field2</var> must modifiable.
           </p>
+
+          <p>
+            <code>OVN</code> supports exchange of below OVN logical fields.
+          </p>
+
+          <ul>
+            <li>
+              <code>ip.src</code>
+              <code>ip.dst</code>
+              <p>
+                These fields can be used to refer to IP source and destination
+                fields which are IP version independent. These fields can be
+                used only for exchange of values.
+              </p>
+            </li>
+
+            <li>
+              <p>
+                Below are few examples:
+              </p>
+
+              <p>
+                match = "ip &amp;&amp; tcp"
+                action = "ip.src &lt;-&gt; ip.dst; next;"
+              </p>
+
+              <p>
+                match = "ip4 || ip6"
+                action = reject { ip.src &lt;-&gt; ip.dst; } next;"
+              </p>
+
+              <p>
+                match = "ip &amp;&amp; (tcp || udp)"
+                action = "ip.src &lt;-&gt; ip.dst; next;"
+              </p>
+            </li>
+          </ul>
         </dd>
 
         <dt><code>ip.ttl--;</code></dt>
diff --git a/tests/ovn.at b/tests/ovn.at
index 71837a6721..c40d69c5e9 100644
--- a/tests/ovn.at
+++ b/tests/ovn.at
@@ -1605,6 +1605,33 @@  reject { };
     formats as reject { drop; };
     encodes as controller(userdata=00.00.00.16.00.00.00.00)
 
+ip.src <-> ip.dst;
+    encodes as controller(userdata=00.00.00.17.00.00.00.00,pause)
+
+ip.src <-> ip.dst
+    Syntax error at end of input expecting `;'.
+
+ip.src = 10.0.0.10;
+    Can't load a value to ovn field (ip.src).
+
+ip.dst = aef0::4;
+    Can't load a value to ovn field (ip.dst).
+
+ip.src <-> ip4.dst;
+    Can't exchange ovn field with non ovn field (ip.src <-> ip4.dst).
+
+ip6.src <-> ip.dst;
+    Can't exchange ovn field with non ovn field (ip6.src <-> ip.dst).
+
+ip.src <-> icmp4.frag_mtu;
+    Can't exchange ovn field (ip.src) with ovn field (icmp4.frag_mtu).
+
+icmp6.frag_mtu <-> ip.dst;
+    Can't exchange ovn field (icmp6.frag_mtu) with ovn field (ip.dst).
+
+ip.src <-> ip.src;
+    Can't exchange ovn field (ip.src) with ovn field (ip.src).
+
 # trigger_event
 trigger_event(event = "empty_lb_backends", vip = "10.0.0.1:80", protocol = "tcp", load_balancer = "12345678-abcd-9876-fedc-11119f8e7d6c");
     encodes as controller(userdata=00.00.00.0f.00.00.00.00.00.00.00.00.00.01.00.0b.31.30.2e.30.2e.30.2e.31.3a.38.30.00.02.00.03.74.63.70.00.03.00.24.31.32.33.34.35.36.37.38.2d.61.62.63.64.2d.39.38.37.36.2d.66.65.64.63.2d.31.31.31.31.39.66.38.65.37.64.36.63)
diff --git a/utilities/ovn-trace.c b/utilities/ovn-trace.c
index 38aee6081b..ed7d8bf278 100644
--- a/utilities/ovn-trace.c
+++ b/utilities/ovn-trace.c
@@ -2147,12 +2147,35 @@  execute_ovnfield_load(const struct ovnact_load *load,
                              ntohs(load->imm.value.be16_int));
         break;
     }
+
+    case OVN_IP_SRC:
+    case OVN_IP_DST:
     case OVN_FIELD_N_IDS:
     default:
         OVS_NOT_REACHED();
     }
 }
 
+static void
+execute_ovnfield_exchange(const struct ovnact_move *move OVS_UNUSED,
+                          struct flow *uflow,
+                          struct ovs_list *super)
+{
+    /* Right now we support exchanging of ip.src with ip.dst. */
+    ovntrace_node_append(super, OVNTRACE_NODE_MODIFY,
+                         "ip.src <-> ip.dst");
+
+    if (get_dl_type(uflow) == htons(ETH_TYPE_IP)) {
+        ovs_be32 tmp = uflow->nw_dst;
+        uflow->nw_dst = uflow->nw_src;
+        uflow->nw_src = tmp;
+    } else {
+        struct in6_addr tmp = uflow->ipv6_dst;
+        uflow->ipv6_dst = uflow->ipv6_src;
+        uflow->ipv6_src = tmp;
+    }
+}
+
 static void
 trace_actions(const struct ovnact *ovnacts, size_t ovnacts_len,
               const struct ovntrace_datapath *dp, struct flow *uflow,
@@ -2369,6 +2392,11 @@  trace_actions(const struct ovnact *ovnacts, size_t ovnacts_len,
                            pipeline, super);
             break;
 
+        case OVNACT_OVNFIELD_EXCHANGE:
+            execute_ovnfield_exchange(ovnact_get_OVNFIELD_EXCHANGE(a),
+                                      uflow, super);
+            break;
+
         case OVNACT_TRIGGER_EVENT:
             break;