diff mbox series

[ovs-dev,v3] controller: Avoid double controller action for ICMP errors.

Message ID 20240206091557.526837-1-amusil@redhat.com
State Accepted
Headers show
Series [ovs-dev,v3] controller: Avoid double controller action for ICMP errors. | expand

Checks

Context Check Description
ovsrobot/apply-robot warning apply and check: warning
ovsrobot/github-robot-_Build_and_Test success github build: passed
ovsrobot/github-robot-_ovn-kubernetes success github build: passed

Commit Message

Ales Musil Feb. 6, 2024, 9:15 a.m. UTC
The fields that are not directly supported by OvS were encoded
via additional controller action that changed the required value.
This was most notably needed for ICMP need frag messages.

Encode the field value loads as note action instead. This allows
us to find the note and act accordingly in the first controller
action without the need to send it to pinctrl again, parse and
change the packet. This way we can also encode any future fields
that might be needed as this method should be flexible enough.

This change is completely transparent to the user and shouldn't
cause any disruptions.

Signed-off-by: Ales Musil <amusil@redhat.com>
---
v3: Rebase on top of current main.
    Fix some cosmetic stuff pointed out by Dumitru.
    Add back the handler for old way to keep the ICMP
    error working during upgrade.
v2: Fix the wrong checksum for the ICMP packet.
---
 controller/physical.c |  14 +++---
 controller/pinctrl.c  | 104 +++++++++++++++++++++++++++++++++++++-----
 include/ovn/actions.h |  17 +++++++
 lib/actions.c         |  42 +++++++++++++----
 tests/ovn.at          |  10 ++--
 5 files changed, 153 insertions(+), 34 deletions(-)

Comments

Dumitru Ceara Feb. 12, 2024, noon UTC | #1
On 2/6/24 10:15, Ales Musil wrote:
> The fields that are not directly supported by OvS were encoded
> via additional controller action that changed the required value.
> This was most notably needed for ICMP need frag messages.
> 
> Encode the field value loads as note action instead. This allows
> us to find the note and act accordingly in the first controller
> action without the need to send it to pinctrl again, parse and
> change the packet. This way we can also encode any future fields
> that might be needed as this method should be flexible enough.
> 
> This change is completely transparent to the user and shouldn't
> cause any disruptions.
> 
> Signed-off-by: Ales Musil <amusil@redhat.com>
> ---
> v3: Rebase on top of current main.
>     Fix some cosmetic stuff pointed out by Dumitru.
>     Add back the handler for old way to keep the ICMP
>     error working during upgrade.
> v2: Fix the wrong checksum for the ICMP packet.
> ---

Thanks for the update, Ales!  Looks good to me:

Acked-by: Dumitru Ceara <dceara@redhat.com>

Regards,
Dumitru
Mark Michelson March 15, 2024, 3:40 p.m. UTC | #2
On 2/12/24 07:00, Dumitru Ceara wrote:
> On 2/6/24 10:15, Ales Musil wrote:
>> The fields that are not directly supported by OvS were encoded
>> via additional controller action that changed the required value.
>> This was most notably needed for ICMP need frag messages.
>>
>> Encode the field value loads as note action instead. This allows
>> us to find the note and act accordingly in the first controller
>> action without the need to send it to pinctrl again, parse and
>> change the packet. This way we can also encode any future fields
>> that might be needed as this method should be flexible enough.
>>
>> This change is completely transparent to the user and shouldn't
>> cause any disruptions.
>>
>> Signed-off-by: Ales Musil <amusil@redhat.com>
>> ---
>> v3: Rebase on top of current main.
>>      Fix some cosmetic stuff pointed out by Dumitru.
>>      Add back the handler for old way to keep the ICMP
>>      error working during upgrade.
>> v2: Fix the wrong checksum for the ICMP packet.
>> ---
> 
> Thanks for the update, Ales!  Looks good to me:
> 
> Acked-by: Dumitru Ceara <dceara@redhat.com>
> 
> Regards,
> Dumitru
> 

Thanks Ales and Dumitru,

I pushed this change to main.

> _______________________________________________
> dev mailing list
> dev@openvswitch.org
> https://mail.openvswitch.org/mailman/listinfo/ovs-dev
>
diff mbox series

Patch

diff --git a/controller/physical.c b/controller/physical.c
index c32642d2c..1891629ae 100644
--- a/controller/physical.c
+++ b/controller/physical.c
@@ -1273,7 +1273,7 @@  reply_imcp_error_if_pkt_too_big(struct ovn_desired_flow_table *flow_table,
     ip_ttl->ttl = 255;
 
     uint16_t frag_mtu = mtu - ETHERNET_OVERHEAD;
-    size_t frag_mtu_oc_offset;
+    size_t note_offset;
     if (is_ipv6) {
         /* icmp6.type = 2 (Packet Too Big) */
         /* icmp6.code = 0 */
@@ -1285,9 +1285,8 @@  reply_imcp_error_if_pkt_too_big(struct ovn_desired_flow_table *flow_table,
             &inner_ofpacts, mf_from_id(MFF_ICMPV6_CODE), &icmp_code, NULL);
 
         /* icmp6.frag_mtu */
-        frag_mtu_oc_offset = encode_start_controller_op(
-            ACTION_OPCODE_PUT_ICMP6_FRAG_MTU, true, NX_CTLR_NO_METER,
-            &inner_ofpacts);
+        note_offset = encode_start_ovn_field_note(OVN_ICMP6_FRAG_MTU,
+                                                  &inner_ofpacts);
         ovs_be32 frag_mtu_ovs = htonl(frag_mtu);
         ofpbuf_put(&inner_ofpacts, &frag_mtu_ovs, sizeof(frag_mtu_ovs));
     } else {
@@ -1301,13 +1300,12 @@  reply_imcp_error_if_pkt_too_big(struct ovn_desired_flow_table *flow_table,
             &inner_ofpacts, mf_from_id(MFF_ICMPV4_CODE), &icmp_code, NULL);
 
         /* icmp4.frag_mtu = */
-        frag_mtu_oc_offset = encode_start_controller_op(
-            ACTION_OPCODE_PUT_ICMP4_FRAG_MTU, true, NX_CTLR_NO_METER,
-            &inner_ofpacts);
+        note_offset = encode_start_ovn_field_note(OVN_ICMP4_FRAG_MTU,
+                                                  &inner_ofpacts);
         ovs_be16 frag_mtu_ovs = htons(frag_mtu);
         ofpbuf_put(&inner_ofpacts, &frag_mtu_ovs, sizeof(frag_mtu_ovs));
     }
-    encode_finish_controller_op(frag_mtu_oc_offset, &inner_ofpacts);
+    encode_finish_ovn_field_note(note_offset, &inner_ofpacts);
 
     /* Finally, submit the ICMP error back to the ingress pipeline */
     put_resubmit(OFTABLE_LOG_INGRESS_PIPELINE, &inner_ofpacts);
diff --git a/controller/pinctrl.c b/controller/pinctrl.c
index faa3f9226..62f2bcaa5 100644
--- a/controller/pinctrl.c
+++ b/controller/pinctrl.c
@@ -607,6 +607,23 @@  set_switch_config(struct rconn *swconn,
     queue_msg(swconn, request);
 }
 
+static void
+enqueue_packet(struct rconn *swconn, enum ofp_version version,
+               const struct dp_packet *packet, const struct ofpbuf *ofpacts)
+{
+    struct ofputil_packet_out po = (struct ofputil_packet_out) {
+        .packet = dp_packet_data(packet),
+        .packet_len = dp_packet_size(packet),
+        .buffer_id = UINT32_MAX,
+        .ofpacts = ofpacts->data,
+        .ofpacts_len = ofpacts->size,
+    };
+
+    match_set_in_port(&po.flow_metadata, OFPP_CONTROLLER);
+    enum ofputil_protocol proto = ofputil_protocol_from_ofp_version(version);
+    queue_msg(swconn, ofputil_encode_packet_out(&po, proto));
+}
+
 static void
 set_actions_and_enqueue_msg(struct rconn *swconn,
                             const struct dp_packet *packet,
@@ -632,19 +649,59 @@  set_actions_and_enqueue_msg(struct rconn *swconn,
         return;
     }
 
-    struct ofputil_packet_out po = {
-        .packet = dp_packet_data(packet),
-        .packet_len = dp_packet_size(packet),
-        .buffer_id = UINT32_MAX,
-        .ofpacts = ofpacts.data,
-        .ofpacts_len = ofpacts.size,
-    };
-    match_set_in_port(&po.flow_metadata, OFPP_CONTROLLER);
-    enum ofputil_protocol proto = ofputil_protocol_from_ofp_version(version);
-    queue_msg(swconn, ofputil_encode_packet_out(&po, proto));
+    enqueue_packet(swconn, version, packet, &ofpacts);
     ofpbuf_uninit(&ofpacts);
 }
 
+static bool
+ofpacts_decode_and_reload_metadata(enum ofp_version version,
+                                   const struct match *md,
+                                   struct ofpbuf *userdata,
+                                   struct ofpbuf *ofpacts)
+{
+    /* Copy metadata from 'md' into the packet-out via "set_field"
+     * actions, then add actions from 'userdata'.
+     */
+    reload_metadata(ofpacts, md);
+    enum ofperr error = ofpacts_pull_openflow_actions(userdata, userdata->size,
+                                                      version, NULL, NULL,
+                                                      ofpacts);
+    if (error) {
+        static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 5);
+        VLOG_WARN_RL(&rl, "failed to parse actions from userdata (%s)",
+                     ofperr_to_string(error));
+        return false;
+    }
+
+    return true;
+}
+
+static void *
+ofpacts_get_ovn_field(const struct ofpbuf *ofpacts,
+                      enum ovn_field_id id)
+{
+    const struct ovn_field *field = ovn_field_from_id(id);
+
+    struct ofpact *ofpact;
+    OFPACT_FOR_EACH (ofpact, ofpacts->data, ofpacts->size) {
+        if (ofpact->type != OFPACT_NOTE) {
+            continue;
+        }
+
+        struct ofpact_note *note = ofpact_get_NOTE(ofpact);
+        struct ovn_field_note_header *hdr = (void *) note->data;
+        /* The data can contain padding bytes from NXAST_NOTE encode/decode. */
+        size_t data_len = note->length - sizeof *hdr;
+
+        if (!strncmp(hdr->magic, OVN_FIELD_NOTE_MAGIC, ARRAY_SIZE(hdr->magic))
+            && field->id == ntohs(hdr->type) && data_len >= field->n_bytes) {
+            return hdr->data;
+        }
+    }
+
+    return NULL;
+}
+
 /* Forwards a packet to 'out_port_key' even if that's on a remote
  * hypervisor, i.e., the packet is re-injected in table OFTABLE_OUTPUT_INIT.
  */
@@ -1558,6 +1615,8 @@  pinctrl_handle_icmp(struct rconn *swconn, const struct flow *ip_flow,
                     const struct match *md, struct ofpbuf *userdata,
                     bool set_icmp_code, bool loopback)
 {
+    enum ofp_version version = rconn_get_version(swconn);
+
     /* This action only works for IP packets, and the switch should only send
      * us IP packets this way, but check here just to be sure. */
     if (ip_flow->dl_type != htons(ETH_TYPE_IP) &&
@@ -1569,6 +1628,14 @@  pinctrl_handle_icmp(struct rconn *swconn, const struct flow *ip_flow,
         return;
     }
 
+    uint64_t ofpacts_stub[4096 / 8];
+    struct ofpbuf ofpacts = OFPBUF_STUB_INITIALIZER(ofpacts_stub);
+
+    if (!ofpacts_decode_and_reload_metadata(version, md, userdata, &ofpacts)) {
+        ofpbuf_uninit(&ofpacts);
+        return;
+    }
+
     uint64_t packet_stub[128 / 8];
     struct dp_packet packet;
 
@@ -1584,6 +1651,7 @@  pinctrl_handle_icmp(struct rconn *swconn, const struct flow *ip_flow,
             VLOG_WARN_RL(&rl,
                         "ICMP action on IP packet with invalid length (%u)",
                         in_ip_len);
+            ofpbuf_uninit(&ofpacts);
             return;
         }
 
@@ -1627,6 +1695,12 @@  pinctrl_handle_icmp(struct rconn *swconn, const struct flow *ip_flow,
         void *data = ih + 1;
         memcpy(data, in_ip, in_ip_len);
 
+        ovs_be16 *mtu = ofpacts_get_ovn_field(&ofpacts, OVN_ICMP4_FRAG_MTU);
+        if (mtu) {
+            ih->icmp_fields.frag.mtu = *mtu;
+            ih->icmp_code = 4;
+        }
+
         ih->icmp_csum = 0;
         ih->icmp_csum = csum(ih, sizeof *ih + in_ip_len);
     } else {
@@ -1674,6 +1748,12 @@  pinctrl_handle_icmp(struct rconn *swconn, const struct flow *ip_flow,
         }
         ih->icmp6_base.icmp6_cksum = 0;
 
+        ovs_be32 *mtu = ofpacts_get_ovn_field(&ofpacts, OVN_ICMP6_FRAG_MTU);
+        if (mtu) {
+            put_16aligned_be32(ih->icmp6_data.be32, *mtu);
+            ih->icmp6_base.icmp6_type = ICMP6_PACKET_TOO_BIG;
+        }
+
         void *data = ih + 1;
         memcpy(data, in_ip, in_ip_len);
         uint32_t icmpv6_csum =
@@ -1688,8 +1768,9 @@  pinctrl_handle_icmp(struct rconn *swconn, const struct flow *ip_flow,
                       ip_flow->vlans[0].tci);
     }
 
-    set_actions_and_enqueue_msg(swconn, &packet, md, userdata);
+    enqueue_packet(swconn, version, &packet, &ofpacts);
     dp_packet_uninit(&packet);
+    ofpbuf_uninit(&ofpacts);
 }
 
 /* Called with in the pinctrl_handler thread context. */
@@ -6338,6 +6419,7 @@  exit:
 }
 
 /* Called with in the pinctrl_handler thread context. */
+/* XXX: This handler can be removed in next version (25.03). */
 static void
 pinctrl_handle_put_icmp_frag_mtu(struct rconn *swconn,
                                  const struct flow *in_flow,
diff --git a/include/ovn/actions.h b/include/ovn/actions.h
index 49cfe0624..f840e12f2 100644
--- a/include/ovn/actions.h
+++ b/include/ovn/actions.h
@@ -23,6 +23,7 @@ 
 #include "expr.h"
 #include "openvswitch/dynamic-string.h"
 #include "openvswitch/hmap.h"
+#include "openvswitch/ofp-actions.h"
 #include "openvswitch/uuid.h"
 #include "util.h"
 #include "ovn/features.h"
@@ -514,6 +515,19 @@  struct ovnact_commit_lb_aff {
     uint16_t timeout;
 };
 
+#define OVN_FIELD_NOTE_MAGIC "ovn"
+
+struct ovn_field_note_header {
+    OFPACT_PADDED_MEMBERS(
+        char magic[4];
+        ovs_be16 type;   /* The type of ovn field note, based
+                          * on 'enum ovn_field_id'. */
+    );
+    uint8_t data[];
+};
+
+BUILD_ASSERT_DECL(sizeof(struct ovn_field_note_header) == 8);
+
 /* Internal use by the helpers below. */
 void ovnact_init(struct ovnact *, enum ovnact_type, size_t len);
 void *ovnact_put(struct ofpbuf *, enum ovnact_type, size_t len);
@@ -901,4 +915,7 @@  void encode_finish_controller_op(size_t ofs, struct ofpbuf *ofpacts);
 void encode_controller_op(enum action_opcode opcode, uint32_t meter_id,
                           struct ofpbuf *ofpacts);
 
+size_t encode_start_ovn_field_note(enum ovn_field_id, struct ofpbuf *ofpacts);
+void encode_finish_ovn_field_note(size_t offset, struct ofpbuf *ofpacts);
+
 #endif /* ovn/actions.h */
diff --git a/lib/actions.c b/lib/actions.c
index fdc0529de..f421bbda2 100644
--- a/lib/actions.c
+++ b/lib/actions.c
@@ -117,6 +117,32 @@  encode_controller_op(enum action_opcode opcode, uint32_t meter_id,
     encode_finish_controller_op(ofs, ofpacts);
 }
 
+size_t
+encode_start_ovn_field_note(enum ovn_field_id id, struct ofpbuf *ofpacts)
+{
+    size_t offset = ofpacts->size;
+
+    ofpact_put_NOTE(ofpacts);
+    struct ovn_field_note_header *hdr = ofpbuf_put_zeros(ofpacts, sizeof *hdr);
+    *hdr = (struct ovn_field_note_header) {
+        .magic = OVN_FIELD_NOTE_MAGIC,
+        .type = htons(id),
+    };
+
+    return offset;
+}
+
+void
+encode_finish_ovn_field_note(size_t offset, struct ofpbuf *ofpacts)
+{
+    struct ofpact_note *note = ofpbuf_at_assert(ofpacts, offset, sizeof *note);
+
+    ofpacts->header = note;
+    note->length = ofpacts->size - (offset + sizeof *note);
+
+    ofpact_finish_NOTE(ofpacts, &note);
+}
+
 static void
 init_stack(struct ofpact_stack *stack, enum mf_field_id field)
 {
@@ -3769,31 +3795,27 @@  format_OVNFIELD_LOAD(const struct ovnact_load *load , struct ds *s)
 
 static void
 encode_OVNFIELD_LOAD(const struct ovnact_load *load,
-            const struct ovnact_encode_params *ep,
-            struct ofpbuf *ofpacts)
+                     const struct ovnact_encode_params *ep OVS_UNUSED,
+                     struct ofpbuf *ofpacts)
 {
     const struct ovn_field *f = ovn_field_from_name(load->dst.symbol->name);
+    size_t offset = encode_start_ovn_field_note(f->id, ofpacts);
+
     switch (f->id) {
     case OVN_ICMP4_FRAG_MTU: {
-        size_t oc_offset = encode_start_controller_op(
-            ACTION_OPCODE_PUT_ICMP4_FRAG_MTU, true,
-            ep->ctrl_meter_id, ofpacts);
         ofpbuf_put(ofpacts, &load->imm.value.be16_int, sizeof(ovs_be16));
-        encode_finish_controller_op(oc_offset, ofpacts);
         break;
     }
     case OVN_ICMP6_FRAG_MTU: {
-        size_t oc_offset = encode_start_controller_op(
-            ACTION_OPCODE_PUT_ICMP6_FRAG_MTU, true,
-            ep->ctrl_meter_id, ofpacts);
         ofpbuf_put(ofpacts, &load->imm.value.be32_int, sizeof(ovs_be32));
-        encode_finish_controller_op(oc_offset, ofpacts);
         break;
     }
     case OVN_FIELD_N_IDS:
     default:
         OVS_NOT_REACHED();
     }
+
+    encode_finish_ovn_field_note(offset, ofpacts);
 }
 
 static void
diff --git a/tests/ovn.at b/tests/ovn.at
index 3e83cb2c0..259c8d9de 100644
--- a/tests/ovn.at
+++ b/tests/ovn.at
@@ -1867,7 +1867,7 @@  icmp4 { };
 
 # icmp4 with icmp4.frag_mtu
 icmp4 { eth.dst = ff:ff:ff:ff:ff:ff; icmp4.frag_mtu = 1500; output; }; output;
-    encodes as controller(userdata=00.00.00.0a.00.00.00.00.00.19.00.10.80.00.06.06.ff.ff.ff.ff.ff.ff.00.00.ff.ff.00.28.00.00.23.20.00.25.00.00.00.00.00.00.00.03.00.0e.00.00.00.0d.00.00.00.00.05.dc.00.00.00.04.00.04.00.00.00.00.ff.ff.00.10.00.00.23.20.00.0e.ff.f8.40.00.00.00),resubmit(,64)
+    encodes as controller(userdata=00.00.00.0a.00.00.00.00.00.19.00.10.80.00.06.06.ff.ff.ff.ff.ff.ff.00.00.ff.ff.00.18.00.00.23.20.00.08.6f.76.6e.00.00.00.00.00.05.dc.00.00.00.00.ff.ff.00.10.00.00.23.20.00.0e.ff.f8.40.00.00.00),resubmit(,64)
     has prereqs ip4
 
 # icmp4_error
@@ -1882,11 +1882,11 @@  icmp4_error { };
 
 # icmp4_error with icmp4.frag_mtu
 icmp4_error { eth.dst = ff:ff:ff:ff:ff:ff; icmp4.frag_mtu = 1500; output; }; output;
-    encodes as controller(userdata=00.00.00.0e.00.00.00.00.00.19.00.10.80.00.06.06.ff.ff.ff.ff.ff.ff.00.00.ff.ff.00.28.00.00.23.20.00.25.00.00.00.00.00.00.00.03.00.0e.00.00.00.0d.00.00.00.00.05.dc.00.00.00.04.00.04.00.00.00.00.ff.ff.00.10.00.00.23.20.00.0e.ff.f8.40.00.00.00),resubmit(,64)
+    encodes as controller(userdata=00.00.00.0e.00.00.00.00.00.19.00.10.80.00.06.06.ff.ff.ff.ff.ff.ff.00.00.ff.ff.00.18.00.00.23.20.00.08.6f.76.6e.00.00.00.00.00.05.dc.00.00.00.00.ff.ff.00.10.00.00.23.20.00.0e.ff.f8.40.00.00.00),resubmit(,64)
     has prereqs ip4
 
 icmp4.frag_mtu = 1500;
-    encodes as controller(userdata=00.00.00.0d.00.00.00.00.05.dc,pause)
+    encodes as note:6f.76.6e.00.00.00.00.00.05.dc
 
 # icmp6
 icmp6 { eth.dst = ff:ff:ff:ff:ff:ff; output; }; output;
@@ -1910,11 +1910,11 @@  icmp6_error { };
 
 # icmp6_error with icmp6.frag_mtu
 icmp6_error { eth.dst = ff:ff:ff:ff:ff:ff; icmp6.frag_mtu = 1500; output; }; output;
-    encodes as controller(userdata=00.00.00.14.00.00.00.00.00.19.00.10.80.00.06.06.ff.ff.ff.ff.ff.ff.00.00.ff.ff.00.28.00.00.23.20.00.25.00.00.00.00.00.00.00.03.00.10.00.00.00.15.00.00.00.00.00.00.05.dc.00.04.00.04.00.00.00.00.ff.ff.00.10.00.00.23.20.00.0e.ff.f8.40.00.00.00),resubmit(,64)
+    encodes as controller(userdata=00.00.00.14.00.00.00.00.00.19.00.10.80.00.06.06.ff.ff.ff.ff.ff.ff.00.00.ff.ff.00.18.00.00.23.20.00.08.6f.76.6e.00.00.01.00.00.00.00.05.dc.00.00.ff.ff.00.10.00.00.23.20.00.0e.ff.f8.40.00.00.00),resubmit(,64)
     has prereqs ip6
 
 icmp6.frag_mtu = 1500;
-    encodes as controller(userdata=00.00.00.15.00.00.00.00.00.00.05.dc,pause)
+    encodes as note:6f.76.6e.00.00.01.00.00.00.00.05.dc
 
 # tcp_reset
 tcp_reset { eth.dst = ff:ff:ff:ff:ff:ff; output; }; output;