diff mbox

[ovs-dev] ovn-northd: Skip icmp4 packets destined for router ports from conntrack

Message ID 20170227055914.31953-1-nusiddiq@redhat.com
State Deferred
Delegated to: Russell Bryant
Headers show

Commit Message

Numan Siddique Feb. 27, 2017, 5:59 a.m. UTC
From: Numan Siddique <nusiddiq@redhat.com>

Presently, the icmp4 requests to the router gateway ip are sent to the
connectiont tracker, but the icmp4 reply packets responded by
'lr_in_ip_input' stage are not sent to the connection tracker.
Also no zone ids are assigned for the router ports. Because of which
the icmp4 request packets in the connection tracker will be in the
UNREPLIED state. If the CMS has added ACLs to drop packets which
are not in ESTABLISHED state, the icmp4 replies will be dropped.

To fix this issue, this patch adds a priority-110 flow in 'ls_in_pre_acl'
stage which doesn't set reg0[0] = 1 for icmp4 request packets destined
to the router gateway ips.

Alternate solution would be to assign zone ids for the router ports
and send the packets from the router ports to the connection tracker.

The approach used in this patch seems to be simpler.

Signed-off-by: Numan Siddique <nusiddiq@redhat.com>
---
 ovn/northd/ovn-northd.8.xml | 29 +++++++++++---
 ovn/northd/ovn-northd.c     | 92 +++++++++++++++++++++++++++------------------
 2 files changed, 79 insertions(+), 42 deletions(-)

Comments

Ben Pfaff March 8, 2017, 9:32 p.m. UTC | #1
Hi Darrell and Daniele, do one of you have an opinion on whether this is
the right approach?

Thanks,

Ben.

On Mon, Feb 27, 2017 at 11:29:14AM +0530, nusiddiq@redhat.com wrote:
> From: Numan Siddique <nusiddiq@redhat.com>
> 
> Presently, the icmp4 requests to the router gateway ip are sent to the
> connectiont tracker, but the icmp4 reply packets responded by
> 'lr_in_ip_input' stage are not sent to the connection tracker.
> Also no zone ids are assigned for the router ports. Because of which
> the icmp4 request packets in the connection tracker will be in the
> UNREPLIED state. If the CMS has added ACLs to drop packets which
> are not in ESTABLISHED state, the icmp4 replies will be dropped.
> 
> To fix this issue, this patch adds a priority-110 flow in 'ls_in_pre_acl'
> stage which doesn't set reg0[0] = 1 for icmp4 request packets destined
> to the router gateway ips.
> 
> Alternate solution would be to assign zone ids for the router ports
> and send the packets from the router ports to the connection tracker.
> 
> The approach used in this patch seems to be simpler.
> 
> Signed-off-by: Numan Siddique <nusiddiq@redhat.com>
> ---
>  ovn/northd/ovn-northd.8.xml | 29 +++++++++++---
>  ovn/northd/ovn-northd.c     | 92 +++++++++++++++++++++++++++------------------
>  2 files changed, 79 insertions(+), 42 deletions(-)
> 
> diff --git a/ovn/northd/ovn-northd.8.xml b/ovn/northd/ovn-northd.8.xml
> index ab8fd88..06465ff 100644
> --- a/ovn/northd/ovn-northd.8.xml
> +++ b/ovn/northd/ovn-northd.8.xml
> @@ -245,11 +245,30 @@
>      <p>
>        This table prepares flows for possible stateful ACL processing in
>        ingress table <code>ACLs</code>.  It contains a priority-0 flow that
> -      simply moves traffic to the next table.  If stateful ACLs are used in the
> -      logical datapath, a priority-100 flow is added that sets a hint
> -      (with <code>reg0[0] = 1; next;</code>) for table
> -      <code>Pre-stateful</code> to send IP packets to the connection tracker
> -      before eventually advancing to ingress table <code>ACLs</code>.
> +      simply moves traffic to the next table. It adds the following flows if
> +      stateful ACLs are used in the logical datapath.
> +
> +      <ul>
> +        <li>
> +          A priority-100 flow is added that sets a hint
> +          (with <code>reg0[0] = 1; next;</code>) for table
> +          <code>Pre-stateful</code> to send IP packets to the connection tracker
> +          before eventually advancing to ingress table <code>ACLs</code>.
> +        </li>
> +
> +        <li>
> +          A priority-110 flow which doesn't set the connection tracking hint for
> +          the packets from the router ports.
> +        </li>
> +
> +        <li>
> +          A priority-110 flow which doesn't set the connection tracking hint
> +          for the packets with the match
> +          <code>ip4 &amp;&amp; icmp4 &amp;&amp; ip4.dst = {<var>R</var>}</code>
> +          where <var>R</var> is the IPv4 address(es) of the router ports
> +          connected to the logical datapath.
> +        </li>
> +      </ul>
>      </p>
>  
>      <h3>Ingress Table 4: Pre-LB</h3>
> diff --git a/ovn/northd/ovn-northd.c b/ovn/northd/ovn-northd.c
> index 03dc850..4fc86f4 100644
> --- a/ovn/northd/ovn-northd.c
> +++ b/ovn/northd/ovn-northd.c
> @@ -2339,6 +2339,43 @@ has_stateful_acl(struct ovn_datapath *od)
>  }
>  
>  static void
> +op_put_v4_networks(struct ds *ds, const struct ovn_port *op, bool add_bcast)
> +{
> +    if (!add_bcast && op->lrp_networks.n_ipv4_addrs == 1) {
> +        ds_put_format(ds, "%s", op->lrp_networks.ipv4_addrs[0].addr_s);
> +        return;
> +    }
> +
> +    ds_put_cstr(ds, "{");
> +    for (int i = 0; i < op->lrp_networks.n_ipv4_addrs; i++) {
> +        ds_put_format(ds, "%s, ", op->lrp_networks.ipv4_addrs[i].addr_s);
> +        if (add_bcast) {
> +            ds_put_format(ds, "%s, ", op->lrp_networks.ipv4_addrs[i].bcast_s);
> +        }
> +    }
> +    ds_chomp(ds, ' ');
> +    ds_chomp(ds, ',');
> +    ds_put_cstr(ds, "}");
> +}
> +
> +static void
> +op_put_v6_networks(struct ds *ds, const struct ovn_port *op)
> +{
> +    if (op->lrp_networks.n_ipv6_addrs == 1) {
> +        ds_put_format(ds, "%s", op->lrp_networks.ipv6_addrs[0].addr_s);
> +        return;
> +    }
> +
> +    ds_put_cstr(ds, "{");
> +    for (int i = 0; i < op->lrp_networks.n_ipv6_addrs; i++) {
> +        ds_put_format(ds, "%s, ", op->lrp_networks.ipv6_addrs[i].addr_s);
> +    }
> +    ds_chomp(ds, ' ');
> +    ds_chomp(ds, ',');
> +    ds_put_cstr(ds, "}");
> +}
> +
> +static void
>  build_pre_acls(struct ovn_datapath *od, struct hmap *lflows)
>  {
>      bool has_stateful = has_stateful_acl(od);
> @@ -2378,6 +2415,24 @@ build_pre_acls(struct ovn_datapath *od, struct hmap *lflows)
>  
>              ds_destroy(&match_in);
>              ds_destroy(&match_out);
> +
> +            /* The icmp4 request to the router ip (eg. ip4.dst = 10.0.0.1) will
> +             * be sent to the CT, but the reply from the router ip is not sent
> +             * to the CT. Also the zone id's are not assigned for the router
> +             * ports. Because of which the icmp request packet in the CT will
> +             * be in the UNREPLIED state.
> +             * The icmp4 reply packets could be dropped if the CMS has added
> +             * ACLs to drop packets which are not in ct.est state.
> +             * So, don't send the icpmp4 request packet for the router ips
> +             * to the CT.
> +             */
> +            if (op->peer && op->peer->lrp_networks.n_ipv4_addrs) {
> +                struct ds match = DS_EMPTY_INITIALIZER;
> +                ds_put_format(&match, "ip4 && icmp4 && ip4.dst == ");
> +                op_put_v4_networks(&match, op->peer, false);
> +                ovn_lflow_add(lflows, od, S_SWITCH_IN_PRE_ACL, 110,
> +                                          ds_cstr(&match), "next;");
> +            }
>          }
>          /* Ingress and Egress Pre-ACL Table (Priority 110).
>           *
> @@ -3644,43 +3699,6 @@ free_prefix_s:
>      free(prefix_s);
>  }
>  
> -static void
> -op_put_v4_networks(struct ds *ds, const struct ovn_port *op, bool add_bcast)
> -{
> -    if (!add_bcast && op->lrp_networks.n_ipv4_addrs == 1) {
> -        ds_put_format(ds, "%s", op->lrp_networks.ipv4_addrs[0].addr_s);
> -        return;
> -    }
> -
> -    ds_put_cstr(ds, "{");
> -    for (int i = 0; i < op->lrp_networks.n_ipv4_addrs; i++) {
> -        ds_put_format(ds, "%s, ", op->lrp_networks.ipv4_addrs[i].addr_s);
> -        if (add_bcast) {
> -            ds_put_format(ds, "%s, ", op->lrp_networks.ipv4_addrs[i].bcast_s);
> -        }
> -    }
> -    ds_chomp(ds, ' ');
> -    ds_chomp(ds, ',');
> -    ds_put_cstr(ds, "}");
> -}
> -
> -static void
> -op_put_v6_networks(struct ds *ds, const struct ovn_port *op)
> -{
> -    if (op->lrp_networks.n_ipv6_addrs == 1) {
> -        ds_put_format(ds, "%s", op->lrp_networks.ipv6_addrs[0].addr_s);
> -        return;
> -    }
> -
> -    ds_put_cstr(ds, "{");
> -    for (int i = 0; i < op->lrp_networks.n_ipv6_addrs; i++) {
> -        ds_put_format(ds, "%s, ", op->lrp_networks.ipv6_addrs[i].addr_s);
> -    }
> -    ds_chomp(ds, ' ');
> -    ds_chomp(ds, ',');
> -    ds_put_cstr(ds, "}");
> -}
> -
>  static const char *
>  get_force_snat_ip(struct ovn_datapath *od, const char *key_type, ovs_be32 *ip)
>  {
> -- 
> 2.9.3
> 
> _______________________________________________
> dev mailing list
> dev@openvswitch.org
> https://mail.openvswitch.org/mailman/listinfo/ovs-dev
Russell Bryant March 8, 2017, 9:36 p.m. UTC | #2
I'm also looking at this one.  I was trying to review today, but have
been slowed down by getting an OpenStack test environment working for
testing this and looking closer.

On Wed, Mar 8, 2017 at 4:32 PM, Ben Pfaff <blp@ovn.org> wrote:
> Hi Darrell and Daniele, do one of you have an opinion on whether this is
> the right approach?
>
> Thanks,
>
> Ben.
>
> On Mon, Feb 27, 2017 at 11:29:14AM +0530, nusiddiq@redhat.com wrote:
>> From: Numan Siddique <nusiddiq@redhat.com>
>>
>> Presently, the icmp4 requests to the router gateway ip are sent to the
>> connectiont tracker, but the icmp4 reply packets responded by
>> 'lr_in_ip_input' stage are not sent to the connection tracker.
>> Also no zone ids are assigned for the router ports. Because of which
>> the icmp4 request packets in the connection tracker will be in the
>> UNREPLIED state. If the CMS has added ACLs to drop packets which
>> are not in ESTABLISHED state, the icmp4 replies will be dropped.
>>
>> To fix this issue, this patch adds a priority-110 flow in 'ls_in_pre_acl'
>> stage which doesn't set reg0[0] = 1 for icmp4 request packets destined
>> to the router gateway ips.
>>
>> Alternate solution would be to assign zone ids for the router ports
>> and send the packets from the router ports to the connection tracker.
>>
>> The approach used in this patch seems to be simpler.
>>
>> Signed-off-by: Numan Siddique <nusiddiq@redhat.com>
>> ---
>>  ovn/northd/ovn-northd.8.xml | 29 +++++++++++---
>>  ovn/northd/ovn-northd.c     | 92 +++++++++++++++++++++++++++------------------
>>  2 files changed, 79 insertions(+), 42 deletions(-)
>>
>> diff --git a/ovn/northd/ovn-northd.8.xml b/ovn/northd/ovn-northd.8.xml
>> index ab8fd88..06465ff 100644
>> --- a/ovn/northd/ovn-northd.8.xml
>> +++ b/ovn/northd/ovn-northd.8.xml
>> @@ -245,11 +245,30 @@
>>      <p>
>>        This table prepares flows for possible stateful ACL processing in
>>        ingress table <code>ACLs</code>.  It contains a priority-0 flow that
>> -      simply moves traffic to the next table.  If stateful ACLs are used in the
>> -      logical datapath, a priority-100 flow is added that sets a hint
>> -      (with <code>reg0[0] = 1; next;</code>) for table
>> -      <code>Pre-stateful</code> to send IP packets to the connection tracker
>> -      before eventually advancing to ingress table <code>ACLs</code>.
>> +      simply moves traffic to the next table. It adds the following flows if
>> +      stateful ACLs are used in the logical datapath.
>> +
>> +      <ul>
>> +        <li>
>> +          A priority-100 flow is added that sets a hint
>> +          (with <code>reg0[0] = 1; next;</code>) for table
>> +          <code>Pre-stateful</code> to send IP packets to the connection tracker
>> +          before eventually advancing to ingress table <code>ACLs</code>.
>> +        </li>
>> +
>> +        <li>
>> +          A priority-110 flow which doesn't set the connection tracking hint for
>> +          the packets from the router ports.
>> +        </li>
>> +
>> +        <li>
>> +          A priority-110 flow which doesn't set the connection tracking hint
>> +          for the packets with the match
>> +          <code>ip4 &amp;&amp; icmp4 &amp;&amp; ip4.dst = {<var>R</var>}</code>
>> +          where <var>R</var> is the IPv4 address(es) of the router ports
>> +          connected to the logical datapath.
>> +        </li>
>> +      </ul>
>>      </p>
>>
>>      <h3>Ingress Table 4: Pre-LB</h3>
>> diff --git a/ovn/northd/ovn-northd.c b/ovn/northd/ovn-northd.c
>> index 03dc850..4fc86f4 100644
>> --- a/ovn/northd/ovn-northd.c
>> +++ b/ovn/northd/ovn-northd.c
>> @@ -2339,6 +2339,43 @@ has_stateful_acl(struct ovn_datapath *od)
>>  }
>>
>>  static void
>> +op_put_v4_networks(struct ds *ds, const struct ovn_port *op, bool add_bcast)
>> +{
>> +    if (!add_bcast && op->lrp_networks.n_ipv4_addrs == 1) {
>> +        ds_put_format(ds, "%s", op->lrp_networks.ipv4_addrs[0].addr_s);
>> +        return;
>> +    }
>> +
>> +    ds_put_cstr(ds, "{");
>> +    for (int i = 0; i < op->lrp_networks.n_ipv4_addrs; i++) {
>> +        ds_put_format(ds, "%s, ", op->lrp_networks.ipv4_addrs[i].addr_s);
>> +        if (add_bcast) {
>> +            ds_put_format(ds, "%s, ", op->lrp_networks.ipv4_addrs[i].bcast_s);
>> +        }
>> +    }
>> +    ds_chomp(ds, ' ');
>> +    ds_chomp(ds, ',');
>> +    ds_put_cstr(ds, "}");
>> +}
>> +
>> +static void
>> +op_put_v6_networks(struct ds *ds, const struct ovn_port *op)
>> +{
>> +    if (op->lrp_networks.n_ipv6_addrs == 1) {
>> +        ds_put_format(ds, "%s", op->lrp_networks.ipv6_addrs[0].addr_s);
>> +        return;
>> +    }
>> +
>> +    ds_put_cstr(ds, "{");
>> +    for (int i = 0; i < op->lrp_networks.n_ipv6_addrs; i++) {
>> +        ds_put_format(ds, "%s, ", op->lrp_networks.ipv6_addrs[i].addr_s);
>> +    }
>> +    ds_chomp(ds, ' ');
>> +    ds_chomp(ds, ',');
>> +    ds_put_cstr(ds, "}");
>> +}
>> +
>> +static void
>>  build_pre_acls(struct ovn_datapath *od, struct hmap *lflows)
>>  {
>>      bool has_stateful = has_stateful_acl(od);
>> @@ -2378,6 +2415,24 @@ build_pre_acls(struct ovn_datapath *od, struct hmap *lflows)
>>
>>              ds_destroy(&match_in);
>>              ds_destroy(&match_out);
>> +
>> +            /* The icmp4 request to the router ip (eg. ip4.dst = 10.0.0.1) will
>> +             * be sent to the CT, but the reply from the router ip is not sent
>> +             * to the CT. Also the zone id's are not assigned for the router
>> +             * ports. Because of which the icmp request packet in the CT will
>> +             * be in the UNREPLIED state.
>> +             * The icmp4 reply packets could be dropped if the CMS has added
>> +             * ACLs to drop packets which are not in ct.est state.
>> +             * So, don't send the icpmp4 request packet for the router ips
>> +             * to the CT.
>> +             */
>> +            if (op->peer && op->peer->lrp_networks.n_ipv4_addrs) {
>> +                struct ds match = DS_EMPTY_INITIALIZER;
>> +                ds_put_format(&match, "ip4 && icmp4 && ip4.dst == ");
>> +                op_put_v4_networks(&match, op->peer, false);
>> +                ovn_lflow_add(lflows, od, S_SWITCH_IN_PRE_ACL, 110,
>> +                                          ds_cstr(&match), "next;");
>> +            }
>>          }
>>          /* Ingress and Egress Pre-ACL Table (Priority 110).
>>           *
>> @@ -3644,43 +3699,6 @@ free_prefix_s:
>>      free(prefix_s);
>>  }
>>
>> -static void
>> -op_put_v4_networks(struct ds *ds, const struct ovn_port *op, bool add_bcast)
>> -{
>> -    if (!add_bcast && op->lrp_networks.n_ipv4_addrs == 1) {
>> -        ds_put_format(ds, "%s", op->lrp_networks.ipv4_addrs[0].addr_s);
>> -        return;
>> -    }
>> -
>> -    ds_put_cstr(ds, "{");
>> -    for (int i = 0; i < op->lrp_networks.n_ipv4_addrs; i++) {
>> -        ds_put_format(ds, "%s, ", op->lrp_networks.ipv4_addrs[i].addr_s);
>> -        if (add_bcast) {
>> -            ds_put_format(ds, "%s, ", op->lrp_networks.ipv4_addrs[i].bcast_s);
>> -        }
>> -    }
>> -    ds_chomp(ds, ' ');
>> -    ds_chomp(ds, ',');
>> -    ds_put_cstr(ds, "}");
>> -}
>> -
>> -static void
>> -op_put_v6_networks(struct ds *ds, const struct ovn_port *op)
>> -{
>> -    if (op->lrp_networks.n_ipv6_addrs == 1) {
>> -        ds_put_format(ds, "%s", op->lrp_networks.ipv6_addrs[0].addr_s);
>> -        return;
>> -    }
>> -
>> -    ds_put_cstr(ds, "{");
>> -    for (int i = 0; i < op->lrp_networks.n_ipv6_addrs; i++) {
>> -        ds_put_format(ds, "%s, ", op->lrp_networks.ipv6_addrs[i].addr_s);
>> -    }
>> -    ds_chomp(ds, ' ');
>> -    ds_chomp(ds, ',');
>> -    ds_put_cstr(ds, "}");
>> -}
>> -
>>  static const char *
>>  get_force_snat_ip(struct ovn_datapath *od, const char *key_type, ovs_be32 *ip)
>>  {
>> --
>> 2.9.3
>>
>> _______________________________________________
>> dev mailing list
>> dev@openvswitch.org
>> https://mail.openvswitch.org/mailman/listinfo/ovs-dev
> _______________________________________________
> dev mailing list
> dev@openvswitch.org
> https://mail.openvswitch.org/mailman/listinfo/ovs-dev
Darrell Ball March 9, 2017, 4:30 a.m. UTC | #3
Daniele and I discussed

1) Seems ok in that there is security at the VM LP so weakening the 
Check at the router port for ICMP seems ok.
2) The same applies to V6 ?

Thanks 


On 3/8/17, 1:32 PM, "ovs-dev-bounces@openvswitch.org on behalf of Ben Pfaff" <ovs-dev-bounces@openvswitch.org on behalf of blp@ovn.org> wrote:

    Hi Darrell and Daniele, do one of you have an opinion on whether this is
    the right approach?
    
    Thanks,
    
    Ben.
    
    On Mon, Feb 27, 2017 at 11:29:14AM +0530, nusiddiq@redhat.com wrote:
    > From: Numan Siddique <nusiddiq@redhat.com>
    > 
    > Presently, the icmp4 requests to the router gateway ip are sent to the
    > connectiont tracker, but the icmp4 reply packets responded by
    > 'lr_in_ip_input' stage are not sent to the connection tracker.
    > Also no zone ids are assigned for the router ports. Because of which
    > the icmp4 request packets in the connection tracker will be in the
    > UNREPLIED state. If the CMS has added ACLs to drop packets which
    > are not in ESTABLISHED state, the icmp4 replies will be dropped.
    > 
    > To fix this issue, this patch adds a priority-110 flow in 'ls_in_pre_acl'
    > stage which doesn't set reg0[0] = 1 for icmp4 request packets destined
    > to the router gateway ips.
    > 
    > Alternate solution would be to assign zone ids for the router ports
    > and send the packets from the router ports to the connection tracker.
    > 
    > The approach used in this patch seems to be simpler.
    > 
    > Signed-off-by: Numan Siddique <nusiddiq@redhat.com>
    > ---
    >  ovn/northd/ovn-northd.8.xml | 29 +++++++++++---
    >  ovn/northd/ovn-northd.c     | 92 +++++++++++++++++++++++++++------------------
    >  2 files changed, 79 insertions(+), 42 deletions(-)
    > 
    > diff --git a/ovn/northd/ovn-northd.8.xml b/ovn/northd/ovn-northd.8.xml
    > index ab8fd88..06465ff 100644
    > --- a/ovn/northd/ovn-northd.8.xml
    > +++ b/ovn/northd/ovn-northd.8.xml
    > @@ -245,11 +245,30 @@
    >      <p>
    >        This table prepares flows for possible stateful ACL processing in
    >        ingress table <code>ACLs</code>.  It contains a priority-0 flow that
    > -      simply moves traffic to the next table.  If stateful ACLs are used in the
    > -      logical datapath, a priority-100 flow is added that sets a hint
    > -      (with <code>reg0[0] = 1; next;</code>) for table
    > -      <code>Pre-stateful</code> to send IP packets to the connection tracker
    > -      before eventually advancing to ingress table <code>ACLs</code>.
    > +      simply moves traffic to the next table. It adds the following flows if
    > +      stateful ACLs are used in the logical datapath.
    > +
    > +      <ul>
    > +        <li>
    > +          A priority-100 flow is added that sets a hint
    > +          (with <code>reg0[0] = 1; next;</code>) for table
    > +          <code>Pre-stateful</code> to send IP packets to the connection tracker
    > +          before eventually advancing to ingress table <code>ACLs</code>.
    > +        </li>
    > +
    > +        <li>
    > +          A priority-110 flow which doesn't set the connection tracking hint for
    > +          the packets from the router ports.

This above comment is not needed as there is only the below flow added.


    > +        </li>
    > +
    > +        <li>
    > +          A priority-110 flow which doesn't set the connection tracking hint
    > +          for the packets with the match
    > +          <code>ip4 &amp;&amp; icmp4 &amp;&amp; ip4.dst = {<var>R</var>}</code>
    > +          where <var>R</var> is the IPv4 address(es) of the router ports
    > +          connected to the logical datapath.
    > +        </li>
    > +      </ul>
    >      </p>
    >  
    >      <h3>Ingress Table 4: Pre-LB</h3>
    > diff --git a/ovn/northd/ovn-northd.c b/ovn/northd/ovn-northd.c
    > index 03dc850..4fc86f4 100644
    > --- a/ovn/northd/ovn-northd.c
    > +++ b/ovn/northd/ovn-northd.c
    > @@ -2339,6 +2339,43 @@ has_stateful_acl(struct ovn_datapath *od)
    >  }
    >  
    >  static void
    > +op_put_v4_networks(struct ds *ds, const struct ovn_port *op, bool add_bcast)
    > +{
    > +    if (!add_bcast && op->lrp_networks.n_ipv4_addrs == 1) {
    > +        ds_put_format(ds, "%s", op->lrp_networks.ipv4_addrs[0].addr_s);
    > +        return;
    > +    }
    > +
    > +    ds_put_cstr(ds, "{");
    > +    for (int i = 0; i < op->lrp_networks.n_ipv4_addrs; i++) {
    > +        ds_put_format(ds, "%s, ", op->lrp_networks.ipv4_addrs[i].addr_s);
    > +        if (add_bcast) {
    > +            ds_put_format(ds, "%s, ", op->lrp_networks.ipv4_addrs[i].bcast_s);
    > +        }
    > +    }
    > +    ds_chomp(ds, ' ');
    > +    ds_chomp(ds, ',');
    > +    ds_put_cstr(ds, "}");
    > +}
    > +
    > +static void
    > +op_put_v6_networks(struct ds *ds, const struct ovn_port *op)
    > +{
    > +    if (op->lrp_networks.n_ipv6_addrs == 1) {
    > +        ds_put_format(ds, "%s", op->lrp_networks.ipv6_addrs[0].addr_s);
    > +        return;
    > +    }
    > +
    > +    ds_put_cstr(ds, "{");
    > +    for (int i = 0; i < op->lrp_networks.n_ipv6_addrs; i++) {
    > +        ds_put_format(ds, "%s, ", op->lrp_networks.ipv6_addrs[i].addr_s);
    > +    }
    > +    ds_chomp(ds, ' ');
    > +    ds_chomp(ds, ',');
    > +    ds_put_cstr(ds, "}");
    > +}
    > +
    > +static void
    >  build_pre_acls(struct ovn_datapath *od, struct hmap *lflows)
    >  {
    >      bool has_stateful = has_stateful_acl(od);
    > @@ -2378,6 +2415,24 @@ build_pre_acls(struct ovn_datapath *od, struct hmap *lflows)
    >  
    >              ds_destroy(&match_in);
    >              ds_destroy(&match_out);
    > +
    > +            /* The icmp4 request to the router ip (eg. ip4.dst = 10.0.0.1) will
    > +             * be sent to the CT, but the reply from the router ip is not sent
    > +             * to the CT. Also the zone id's are not assigned for the router
    > +             * ports. Because of which the icmp request packet in the CT will
    > +             * be in the UNREPLIED state.
    > +             * The icmp4 reply packets could be dropped if the CMS has added
    > +             * ACLs to drop packets which are not in ct.est state.
    > +             * So, don't send the icpmp4 request packet for the router ips
    > +             * to the CT.
    > +             */
    > +            if (op->peer && op->peer->lrp_networks.n_ipv4_addrs) {
    > +                struct ds match = DS_EMPTY_INITIALIZER;
    > +                ds_put_format(&match, "ip4 && icmp4 && ip4.dst == ");
    > +                op_put_v4_networks(&match, op->peer, false);
    > +                ovn_lflow_add(lflows, od, S_SWITCH_IN_PRE_ACL, 110,
    > +                                          ds_cstr(&match), "next;");
    > +            }
    >          }
    >          /* Ingress and Egress Pre-ACL Table (Priority 110).
    >           *
    > @@ -3644,43 +3699,6 @@ free_prefix_s:
    >      free(prefix_s);
    >  }
    >  
    > -static void
    > -op_put_v4_networks(struct ds *ds, const struct ovn_port *op, bool add_bcast)
    > -{
    > -    if (!add_bcast && op->lrp_networks.n_ipv4_addrs == 1) {
    > -        ds_put_format(ds, "%s", op->lrp_networks.ipv4_addrs[0].addr_s);
    > -        return;
    > -    }
    > -
    > -    ds_put_cstr(ds, "{");
    > -    for (int i = 0; i < op->lrp_networks.n_ipv4_addrs; i++) {
    > -        ds_put_format(ds, "%s, ", op->lrp_networks.ipv4_addrs[i].addr_s);
    > -        if (add_bcast) {
    > -            ds_put_format(ds, "%s, ", op->lrp_networks.ipv4_addrs[i].bcast_s);
    > -        }
    > -    }
    > -    ds_chomp(ds, ' ');
    > -    ds_chomp(ds, ',');
    > -    ds_put_cstr(ds, "}");
    > -}
    > -
    > -static void
    > -op_put_v6_networks(struct ds *ds, const struct ovn_port *op)
    > -{
    > -    if (op->lrp_networks.n_ipv6_addrs == 1) {
    > -        ds_put_format(ds, "%s", op->lrp_networks.ipv6_addrs[0].addr_s);
    > -        return;
    > -    }
    > -
    > -    ds_put_cstr(ds, "{");
    > -    for (int i = 0; i < op->lrp_networks.n_ipv6_addrs; i++) {
    > -        ds_put_format(ds, "%s, ", op->lrp_networks.ipv6_addrs[i].addr_s);
    > -    }
    > -    ds_chomp(ds, ' ');
    > -    ds_chomp(ds, ',');
    > -    ds_put_cstr(ds, "}");
    > -}
    > -
    >  static const char *
    >  get_force_snat_ip(struct ovn_datapath *od, const char *key_type, ovs_be32 *ip)
    >  {
    > -- 
    > 2.9.3
    > 
    > _______________________________________________
    > dev mailing list
    > dev@openvswitch.org
    > https://urldefense.proofpoint.com/v2/url?u=https-3A__mail.openvswitch.org_mailman_listinfo_ovs-2Ddev&d=DwICAg&c=uilaK90D4TOVoH58JNXRgQ&r=BVhFA09CGX7JQ5Ih-uZnsw&m=1bF0sSIIafODfWGrMWfrLDZp6Gan2-gIhuE5AqyVSS8&s=Gt9_RJ5SYYMdi7iiM_NPyk3NXkB4wLYbxyx_pQjlRrA&e= 
    _______________________________________________
    dev mailing list
    dev@openvswitch.org
    https://urldefense.proofpoint.com/v2/url?u=https-3A__mail.openvswitch.org_mailman_listinfo_ovs-2Ddev&d=DwICAg&c=uilaK90D4TOVoH58JNXRgQ&r=BVhFA09CGX7JQ5Ih-uZnsw&m=1bF0sSIIafODfWGrMWfrLDZp6Gan2-gIhuE5AqyVSS8&s=Gt9_RJ5SYYMdi7iiM_NPyk3NXkB4wLYbxyx_pQjlRrA&e=
Numan Siddique March 9, 2017, 6:41 a.m. UTC | #4
Thanks for the review and comments.


On Thu, Mar 9, 2017 at 10:00 AM, Darrell Ball <dball@vmware.com> wrote:

> Daniele and I discussed
>
> 1) Seems ok in that there is security at the VM LP so weakening the
> Check at the router port for ICMP seems ok.
> 2) The same applies to V6 ?
>

​I need to test this before confirming.

​


>
> Thanks
>
>
> On 3/8/17, 1:32 PM, "ovs-dev-bounces@openvswitch.org on behalf of Ben
> Pfaff" <ovs-dev-bounces@openvswitch.org on behalf of blp@ovn.org> wrote:
>
>     Hi Darrell and Daniele, do one of you have an opinion on whether this
> is
>     the right approach?
>
>     Thanks,
>
>     Ben.
>
>     On Mon, Feb 27, 2017 at 11:29:14AM +0530, nusiddiq@redhat.com wrote:
>     > From: Numan Siddique <nusiddiq@redhat.com>
>     >
>     > Presently, the icmp4 requests to the router gateway ip are sent to
> the
>     > connectiont tracker, but the icmp4 reply packets responded by
>     > 'lr_in_ip_input' stage are not sent to the connection tracker.
>     > Also no zone ids are assigned for the router ports. Because of which
>     > the icmp4 request packets in the connection tracker will be in the
>     > UNREPLIED state. If the CMS has added ACLs to drop packets which
>     > are not in ESTABLISHED state, the icmp4 replies will be dropped.
>     >
>     > To fix this issue, this patch adds a priority-110 flow in
> 'ls_in_pre_acl'
>     > stage which doesn't set reg0[0] = 1 for icmp4 request packets
> destined
>     > to the router gateway ips.
>     >
>     > Alternate solution would be to assign zone ids for the router ports
>     > and send the packets from the router ports to the connection tracker.
>     >
>     > The approach used in this patch seems to be simpler.
>     >
>     > Signed-off-by: Numan Siddique <nusiddiq@redhat.com>
>     > ---
>     >  ovn/northd/ovn-northd.8.xml | 29 +++++++++++---
>     >  ovn/northd/ovn-northd.c     | 92 +++++++++++++++++++++++++++---
> ---------------
>     >  2 files changed, 79 insertions(+), 42 deletions(-)
>     >
>     > diff --git a/ovn/northd/ovn-northd.8.xml
> b/ovn/northd/ovn-northd.8.xml
>     > index ab8fd88..06465ff 100644
>     > --- a/ovn/northd/ovn-northd.8.xml
>     > +++ b/ovn/northd/ovn-northd.8.xml
>     > @@ -245,11 +245,30 @@
>     >      <p>
>     >        This table prepares flows for possible stateful ACL
> processing in
>     >        ingress table <code>ACLs</code>.  It contains a priority-0
> flow that
>     > -      simply moves traffic to the next table.  If stateful ACLs are
> used in the
>     > -      logical datapath, a priority-100 flow is added that sets a
> hint
>     > -      (with <code>reg0[0] = 1; next;</code>) for table
>     > -      <code>Pre-stateful</code> to send IP packets to the
> connection tracker
>     > -      before eventually advancing to ingress table
> <code>ACLs</code>.
>     > +      simply moves traffic to the next table. It adds the following
> flows if
>     > +      stateful ACLs are used in the logical datapath.
>     > +
>     > +      <ul>
>     > +        <li>
>     > +          A priority-100 flow is added that sets a hint
>     > +          (with <code>reg0[0] = 1; next;</code>) for table
>     > +          <code>Pre-stateful</code> to send IP packets to the
> connection tracker
>     > +          before eventually advancing to ingress table
> <code>ACLs</code>.
>     > +        </li>
>     > +
>     > +        <li>
>     > +          A priority-110 flow which doesn't set the connection
> tracking hint for
>     > +          the packets from the router ports.
>
> This above comment is not needed as there is only the below flow added.
>

"build_pre_acls" adds the above flow (though this patch doesn't add this
flow and the comment is somewhat unrelated to this patch), but the
documentation is missing. I can spin another patch if it's not fine to have
this change in this patch.

https://github.com/openvswitch/ovs/blob/master/ovn/northd/ovn-northd.c#L2460



>
>     > +        </li>
>     > +
>     > +        <li>
>     > +          A priority-110 flow which doesn't set the connection
> tracking hint
>     > +          for the packets with the match
>     > +          <code>ip4 &amp;&amp; icmp4 &amp;&amp; ip4.dst =
> {<var>R</var>}</code>
>     > +          where <var>R</var> is the IPv4 address(es) of the router
> ports
>     > +          connected to the logical datapath.
>     > +        </li>
>     > +      </ul>
>     >      </p>
>     >
>     >      <h3>Ingress Table 4: Pre-LB</h3>
>     > diff --git a/ovn/northd/ovn-northd.c b/ovn/northd/ovn-northd.c
>     > index 03dc850..4fc86f4 100644
>     > --- a/ovn/northd/ovn-northd.c
>     > +++ b/ovn/northd/ovn-northd.c
>     > @@ -2339,6 +2339,43 @@ has_stateful_acl(struct ovn_datapath *od)
>     >  }
>     >
>     >  static void
>     > +op_put_v4_networks(struct ds *ds, const struct ovn_port *op, bool
> add_bcast)
>     > +{
>     > +    if (!add_bcast && op->lrp_networks.n_ipv4_addrs == 1) {
>     > +        ds_put_format(ds, "%s", op->lrp_networks.ipv4_addrs[0]
> .addr_s);
>     > +        return;
>     > +    }
>     > +
>     > +    ds_put_cstr(ds, "{");
>     > +    for (int i = 0; i < op->lrp_networks.n_ipv4_addrs; i++) {
>     > +        ds_put_format(ds, "%s, ", op->lrp_networks.ipv4_addrs[i]
> .addr_s);
>     > +        if (add_bcast) {
>     > +            ds_put_format(ds, "%s, ", op->lrp_networks.ipv4_addrs[i]
> .bcast_s);
>     > +        }
>     > +    }
>     > +    ds_chomp(ds, ' ');
>     > +    ds_chomp(ds, ',');
>     > +    ds_put_cstr(ds, "}");
>     > +}
>     > +
>     > +static void
>     > +op_put_v6_networks(struct ds *ds, const struct ovn_port *op)
>     > +{
>     > +    if (op->lrp_networks.n_ipv6_addrs == 1) {
>     > +        ds_put_format(ds, "%s", op->lrp_networks.ipv6_addrs[0]
> .addr_s);
>     > +        return;
>     > +    }
>     > +
>     > +    ds_put_cstr(ds, "{");
>     > +    for (int i = 0; i < op->lrp_networks.n_ipv6_addrs; i++) {
>     > +        ds_put_format(ds, "%s, ", op->lrp_networks.ipv6_addrs[i]
> .addr_s);
>     > +    }
>     > +    ds_chomp(ds, ' ');
>     > +    ds_chomp(ds, ',');
>     > +    ds_put_cstr(ds, "}");
>     > +}
>     > +
>     > +static void
>     >  build_pre_acls(struct ovn_datapath *od, struct hmap *lflows)
>     >  {
>     >      bool has_stateful = has_stateful_acl(od);
>     > @@ -2378,6 +2415,24 @@ build_pre_acls(struct ovn_datapath *od,
> struct hmap *lflows)
>     >
>     >              ds_destroy(&match_in);
>     >              ds_destroy(&match_out);
>     > +
>     > +            /* The icmp4 request to the router ip (eg. ip4.dst =
> 10.0.0.1) will
>     > +             * be sent to the CT, but the reply from the router ip
> is not sent
>     > +             * to the CT. Also the zone id's are not assigned for
> the router
>     > +             * ports. Because of which the icmp request packet in
> the CT will
>     > +             * be in the UNREPLIED state.
>     > +             * The icmp4 reply packets could be dropped if the CMS
> has added
>     > +             * ACLs to drop packets which are not in ct.est state.
>     > +             * So, don't send the icpmp4 request packet for the
> router ips
>     > +             * to the CT.
>     > +             */
>     > +            if (op->peer && op->peer->lrp_networks.n_ipv4_addrs) {
>     > +                struct ds match = DS_EMPTY_INITIALIZER;
>     > +                ds_put_format(&match, "ip4 && icmp4 && ip4.dst ==
> ");
>     > +                op_put_v4_networks(&match, op->peer, false);
>     > +                ovn_lflow_add(lflows, od, S_SWITCH_IN_PRE_ACL, 110,
>     > +                                          ds_cstr(&match), "next;");
>     > +            }
>     >          }
>     >          /* Ingress and Egress Pre-ACL Table (Priority 110).
>     >           *
>     > @@ -3644,43 +3699,6 @@ free_prefix_s:
>     >      free(prefix_s);
>     >  }
>     >
>     > -static void
>     > -op_put_v4_networks(struct ds *ds, const struct ovn_port *op, bool
> add_bcast)
>     > -{
>     > -    if (!add_bcast && op->lrp_networks.n_ipv4_addrs == 1) {
>     > -        ds_put_format(ds, "%s", op->lrp_networks.ipv4_addrs[0]
> .addr_s);
>     > -        return;
>     > -    }
>     > -
>     > -    ds_put_cstr(ds, "{");
>     > -    for (int i = 0; i < op->lrp_networks.n_ipv4_addrs; i++) {
>     > -        ds_put_format(ds, "%s, ", op->lrp_networks.ipv4_addrs[i]
> .addr_s);
>     > -        if (add_bcast) {
>     > -            ds_put_format(ds, "%s, ", op->lrp_networks.ipv4_addrs[i]
> .bcast_s);
>     > -        }
>     > -    }
>     > -    ds_chomp(ds, ' ');
>     > -    ds_chomp(ds, ',');
>     > -    ds_put_cstr(ds, "}");
>     > -}
>     > -
>     > -static void
>     > -op_put_v6_networks(struct ds *ds, const struct ovn_port *op)
>     > -{
>     > -    if (op->lrp_networks.n_ipv6_addrs == 1) {
>     > -        ds_put_format(ds, "%s", op->lrp_networks.ipv6_addrs[0]
> .addr_s);
>     > -        return;
>     > -    }
>     > -
>     > -    ds_put_cstr(ds, "{");
>     > -    for (int i = 0; i < op->lrp_networks.n_ipv6_addrs; i++) {
>     > -        ds_put_format(ds, "%s, ", op->lrp_networks.ipv6_addrs[i]
> .addr_s);
>     > -    }
>     > -    ds_chomp(ds, ' ');
>     > -    ds_chomp(ds, ',');
>     > -    ds_put_cstr(ds, "}");
>     > -}
>     > -
>     >  static const char *
>     >  get_force_snat_ip(struct ovn_datapath *od, const char *key_type,
> ovs_be32 *ip)
>     >  {
>     > --
>     > 2.9.3
>     >
>     > _______________________________________________
>     > dev mailing list
>     > dev@openvswitch.org
>     > https://urldefense.proofpoint.com/v2/url?u=https-3A__mail.
> openvswitch.org_mailman_listinfo_ovs-2Ddev&d=DwICAg&c=
> uilaK90D4TOVoH58JNXRgQ&r=BVhFA09CGX7JQ5Ih-uZnsw&m=
> 1bF0sSIIafODfWGrMWfrLDZp6Gan2-gIhuE5AqyVSS8&s=Gt9_RJ5SYYMdi7iiM_
> NPyk3NXkB4wLYbxyx_pQjlRrA&e=
>     _______________________________________________
>     dev mailing list
>     dev@openvswitch.org
>     https://urldefense.proofpoint.com/v2/url?u=https-3A__mail.
> openvswitch.org_mailman_listinfo_ovs-2Ddev&d=DwICAg&c=
> uilaK90D4TOVoH58JNXRgQ&r=BVhFA09CGX7JQ5Ih-uZnsw&m=
> 1bF0sSIIafODfWGrMWfrLDZp6Gan2-gIhuE5AqyVSS8&s=Gt9_RJ5SYYMdi7iiM_
> NPyk3NXkB4wLYbxyx_pQjlRrA&e=
>
>
>
Russell Bryant March 9, 2017, 8:14 p.m. UTC | #5
On Mon, Feb 27, 2017 at 12:59 AM,  <nusiddiq@redhat.com> wrote:
> From: Numan Siddique <nusiddiq@redhat.com>
>
> Presently, the icmp4 requests to the router gateway ip are sent to the
> connectiont tracker, but the icmp4 reply packets responded by
> 'lr_in_ip_input' stage are not sent to the connection tracker.
> Also no zone ids are assigned for the router ports. Because of which
> the icmp4 request packets in the connection tracker will be in the
> UNREPLIED state. If the CMS has added ACLs to drop packets which
> are not in ESTABLISHED state, the icmp4 replies will be dropped.
>
> To fix this issue, this patch adds a priority-110 flow in 'ls_in_pre_acl'
> stage which doesn't set reg0[0] = 1 for icmp4 request packets destined
> to the router gateway ips.
>
> Alternate solution would be to assign zone ids for the router ports
> and send the packets from the router ports to the connection tracker.
>
> The approach used in this patch seems to be simpler.
>
> Signed-off-by: Numan Siddique <nusiddiq@redhat.com>
> ---
>  ovn/northd/ovn-northd.8.xml | 29 +++++++++++---
>  ovn/northd/ovn-northd.c     | 92 +++++++++++++++++++++++++++------------------
>  2 files changed, 79 insertions(+), 42 deletions(-)


Can you clarify where the packet gets dropped?  It seems like we have
flows trying to handle this already.  We skip conntrack for the router
interface ports.  Roughly, I would expect:

1) inport == lport1, logical switch ingress pipeline.  packet sent
through conntrack.

2) outport == router interface port, logical switch egress pipeline.
packet *skips* conntrack since it's a router interface.

3) router datapath, icmp respose generated, sent back to logical switch ...

4) inport == router interface, logical switch ingress pipeline, packet
*skips* conntrack since it's a router interface

5) outport == lport1, logical switch egress pipeline, packet sent
through conntrack, which should find an existing conntrack entry
established in step 1.  packet delivered to lport1.

Where in the above process is it coming apart?  If it's broken, it
sounds like a more general problem than ICMP.  It would be any type of
traffic to the router IP where we expect a response.
Numan Siddique March 10, 2017, 4:52 a.m. UTC | #6
Thanks for the review. Please see inline.


On Fri, Mar 10, 2017 at 1:44 AM, Russell Bryant <russell@ovn.org> wrote:

> On Mon, Feb 27, 2017 at 12:59 AM,  <nusiddiq@redhat.com> wrote:
> > From: Numan Siddique <nusiddiq@redhat.com>
> >
> > Presently, the icmp4 requests to the router gateway ip are sent to the
> > connectiont tracker, but the icmp4 reply packets responded by
> > 'lr_in_ip_input' stage are not sent to the connection tracker.
> > Also no zone ids are assigned for the router ports. Because of which
> > the icmp4 request packets in the connection tracker will be in the
> > UNREPLIED state. If the CMS has added ACLs to drop packets which
> > are not in ESTABLISHED state, the icmp4 replies will be dropped.
> >
> > To fix this issue, this patch adds a priority-110 flow in 'ls_in_pre_acl'
> > stage which doesn't set reg0[0] = 1 for icmp4 request packets destined
> > to the router gateway ips.
> >
> > Alternate solution would be to assign zone ids for the router ports
> > and send the packets from the router ports to the connection tracker.
> >
> > The approach used in this patch seems to be simpler.
> >
> > Signed-off-by: Numan Siddique <nusiddiq@redhat.com>
> > ---
> >  ovn/northd/ovn-northd.8.xml | 29 +++++++++++---
> >  ovn/northd/ovn-northd.c     | 92 +++++++++++++++++++++++++++---
> ---------------
> >  2 files changed, 79 insertions(+), 42 deletions(-)
>
>
> Can you clarify where the packet gets dropped?  It seems like we have
> flows trying to handle this already.  We skip conntrack for the router
> interface ports.  Roughly, I would expect:
>
>
​
​The packets are getting dropped because of the flow

​
table=52, n_packets=40, n_bytes=3920,
 priority=2001,ct_state=-est+trk,ip,reg15=0x3,metadata=0x3 ac
tions=drop

This flow corresponds to logical flow -
table=4 (ls_out_acl         ), priority=2001 , match=((!ct.est || (ct.est
&& ct_label.blocked == 1)) && (outport ==
"ce575934-f308-45b8-b9cd-457646da213d" && ip)), action=(drop;)
​


​This logical flow is added by neutron OVN plugin when the port is
configured with security groups. When I clear the security groups for the
port or add a specific security group rule to allow icmp, it works fine.
​


> 1) inport == lport1, logical switch ingress pipeline.  packet sent
> through conntrack.
>
> 2) outport == router interface port, logical switch egress pipeline.
> packet *skips* conntrack since it's a router interface.
>
> 3) router datapath, icmp respose generated, sent back to logical switch ...
>
> 4) inport == router interface, logical switch ingress pipeline, packet
> *skips* conntrack since it's a router interface
>
> 5) outport == lport1, logical switch egress pipeline, packet sent
> through conntrack, which should find an existing conntrack entry
> established in step 1.  packet delivered to lport1.
>
>
The connection tracking entry ​has this

$ sudo conntrack -L | grep 10.0.0.1
conntrack v1.4.3 (conntrack-tools): 54 flow entries have been shown.
icmp     1 29 src=10.0.0.6 dst=10.0.0.1 type=8 code=0 id=7486 [UNREPLIED]
src=10.0.0.1 dst=10.0.0.6 type=0 code=0 id=7486 mark=0
secctx=system_u:object_r:unlabeled_t:s0 zone=1 use=1

You think this should be addressed by neutron OVN plugin ?



Where in the above process is it coming apart?  If it's broken, it
> sounds like a more general problem than ICMP.  It would be any type of
> traffic to the router IP where we expect a response.
>
> --
> Russell Bryant
>
Numan Siddique March 10, 2017, 2:09 p.m. UTC | #7
On Fri, Mar 10, 2017 at 10:22 AM, Numan Siddique <nusiddiq@redhat.com>
wrote:

> Thanks for the review. Please see inline.
>
>
> On Fri, Mar 10, 2017 at 1:44 AM, Russell Bryant <russell@ovn.org> wrote:
>
>> On Mon, Feb 27, 2017 at 12:59 AM,  <nusiddiq@redhat.com> wrote:
>> > From: Numan Siddique <nusiddiq@redhat.com>
>> >
>> > Presently, the icmp4 requests to the router gateway ip are sent to the
>> > connectiont tracker, but the icmp4 reply packets responded by
>> > 'lr_in_ip_input' stage are not sent to the connection tracker.
>> > Also no zone ids are assigned for the router ports. Because of which
>> > the icmp4 request packets in the connection tracker will be in the
>> > UNREPLIED state.
>
>
​​Also, this commit message is not accurate.
​

> If the CMS has added ACLs to drop packets which
>> > are not in ESTABLISHED state, the icmp4 replies will be dropped.
>> >
>> > To fix this issue, this patch adds a priority-110 flow in
>> 'ls_in_pre_acl'
>> > stage which doesn't set reg0[0] = 1 for icmp4 request packets destined
>> > to the router gateway ips.
>
> >
>> > Alternate solution would be to assign zone ids for the router ports
>> > and send the packets from the router ports to the connection tracker.
>> >
>> > The approach used in this patch seems to be simpler.
>> >
>> > Signed-off-by: Numan Siddique <nusiddiq@redhat.com>
>> > ---
>> >  ovn/northd/ovn-northd.8.xml | 29 +++++++++++---
>> >  ovn/northd/ovn-northd.c     | 92 +++++++++++++++++++++++++++---
>> ---------------
>> >  2 files changed, 79 insertions(+), 42 deletions(-)
>>
>>
>> Can you clarify where the packet gets dropped?  It seems like we have
>> flows trying to handle this already.  We skip conntrack for the router
>> interface ports.  Roughly, I would expect:
>>
>>
> ​
> ​The packets are getting dropped because of the flow
>
> ​
> table=52, n_packets=40, n_bytes=3920,  priority=2001,ct_state=-est+trk,ip,reg15=0x3,metadata=0x3
> ac
> tions=drop
>
> This flow corresponds to logical flow -
> table=4 (ls_out_acl         ), priority=2001 , match=((!ct.est || (ct.est
> && ct_label.blocked == 1)) && (outport == "ce575934-f308-45b8-b9cd-457646da213d"
> && ip)), action=(drop;)
> ​
>
>
> ​This logical flow is added by neutron OVN plugin when the port is
> configured with security groups. When I clear the security groups for the
> port or add a specific security group rule to allow icmp, it works fine.
> ​
>
>
>> 1) inport == lport1, logical switch ingress pipeline.  packet sent
>> through conntrack.
>>
>> 2) outport == router interface port, logical switch egress pipeline.
>> packet *skips* conntrack since it's a router interface.
>>
>> 3) router datapath, icmp respose generated, sent back to logical switch
>> ...
>>
>> 4) inport == router interface, logical switch ingress pipeline, packet
>> *skips* conntrack since it's a router interface
>>
>> 5) outport == lport1, logical switch egress pipeline, packet sent
>> through conntrack, which should find an existing conntrack entry
>> established in step 1.  packet delivered to lport1.
>>
>>
> The connection tracking entry ​has this
>
> $ sudo conntrack -L | grep 10.0.0.1
> conntrack v1.4.3 (conntrack-tools): 54 flow entries have been shown.
> icmp     1 29 src=10.0.0.6 dst=10.0.0.1 type=8 code=0 id=7486 [UNREPLIED]
> src=10.0.0.1 dst=10.0.0.6 type=0 code=0 id=7486 mark=0
> secctx=system_u:object_r:unlabeled_t:s0 zone=1 use=1
>
> You think this should be addressed by neutron OVN plugin ?
>
>
>
> Where in the above process is it coming apart?  If it's broken, it
>> sounds like a more general problem than ICMP.  It would be any type of
>> traffic to the router IP where we expect a response.
>>
>> --
>> Russell Bryant
>>
>
>
Russell Bryant March 10, 2017, 7:35 p.m. UTC | #8
On Thu, Mar 9, 2017 at 11:52 PM, Numan Siddique <nusiddiq@redhat.com> wrote:
> Thanks for the review. Please see inline.
>
>
> On Fri, Mar 10, 2017 at 1:44 AM, Russell Bryant <russell@ovn.org> wrote:
>>
>> On Mon, Feb 27, 2017 at 12:59 AM,  <nusiddiq@redhat.com> wrote:
>> > From: Numan Siddique <nusiddiq@redhat.com>
>> >
>> > Presently, the icmp4 requests to the router gateway ip are sent to the
>> > connectiont tracker, but the icmp4 reply packets responded by
>> > 'lr_in_ip_input' stage are not sent to the connection tracker.
>> > Also no zone ids are assigned for the router ports. Because of which
>> > the icmp4 request packets in the connection tracker will be in the
>> > UNREPLIED state. If the CMS has added ACLs to drop packets which
>> > are not in ESTABLISHED state, the icmp4 replies will be dropped.
>> >
>> > To fix this issue, this patch adds a priority-110 flow in
>> > 'ls_in_pre_acl'
>> > stage which doesn't set reg0[0] = 1 for icmp4 request packets destined
>> > to the router gateway ips.
>> >
>> > Alternate solution would be to assign zone ids for the router ports
>> > and send the packets from the router ports to the connection tracker.
>> >
>> > The approach used in this patch seems to be simpler.
>> >
>> > Signed-off-by: Numan Siddique <nusiddiq@redhat.com>
>> > ---
>> >  ovn/northd/ovn-northd.8.xml | 29 +++++++++++---
>> >  ovn/northd/ovn-northd.c     | 92
>> > +++++++++++++++++++++++++++------------------
>> >  2 files changed, 79 insertions(+), 42 deletions(-)
>>
>>
>> Can you clarify where the packet gets dropped?  It seems like we have
>> flows trying to handle this already.  We skip conntrack for the router
>> interface ports.  Roughly, I would expect:
>>
>
> The packets are getting dropped because of the flow
>
> table=52, n_packets=40, n_bytes=3920,
> priority=2001,ct_state=-est+trk,ip,reg15=0x3,metadata=0x3 ac
> tions=drop
>
> This flow corresponds to logical flow -
> table=4 (ls_out_acl         ), priority=2001 , match=((!ct.est || (ct.est &&
> ct_label.blocked == 1)) && (outport ==
> "ce575934-f308-45b8-b9cd-457646da213d" && ip)), action=(drop;)
>
>
> This logical flow is added by neutron OVN plugin when the port is configured
> with security groups. When I clear the security groups for the port or add a
> specific security group rule to allow icmp, it works fine.

The above flow could be hit at two different points (#2 and #5 below).
In my local testing, it looks like it's happening in #5 so at least we
aren't hitting conntrack related flows in a part of the pipeline where
we don't expect it.

>
>>
>> 1) inport == lport1, logical switch ingress pipeline.  packet sent
>> through conntrack.
>>
>> 2) outport == router interface port, logical switch egress pipeline.
>> packet *skips* conntrack since it's a router interface.
>>
>> 3) router datapath, icmp respose generated, sent back to logical switch
>> ...
>>
>> 4) inport == router interface, logical switch ingress pipeline, packet
>> *skips* conntrack since it's a router interface
>>
>> 5) outport == lport1, logical switch egress pipeline, packet sent
>> through conntrack, which should find an existing conntrack entry
>> established in step 1.  packet delivered to lport1.
>>
>
> The connection tracking entry has this
>
> $ sudo conntrack -L | grep 10.0.0.1
> conntrack v1.4.3 (conntrack-tools): 54 flow entries have been shown.
> icmp     1 29 src=10.0.0.6 dst=10.0.0.1 type=8 code=0 id=7486 [UNREPLIED]
> src=10.0.0.1 dst=10.0.0.6 type=0 code=0 id=7486 mark=0
> secctx=system_u:object_r:unlabeled_t:s0 zone=1 use=1
>
> You think this should be addressed by neutron OVN plugin ?

I don't think it's a Neutron issue.

I see the conntrack entry remaining in the UNREPLIED state, even in
the working case where there's not an ACL dropping the reply.

icmp     1 29 src=10.0.0.10 dst=10.0.0.1 type=8 code=0 id=25857
[UNREPLIED] src=10.0.0.1 dst=10.0.0.10 type=0 code=0 id=25857 mark=0
zone=8 use=1

If I ping a different address (something past the logical router), I
see a proper conntrack entry that's not in the UNREPLIED state.

I wonder if there's something about how we are generating the ICMP
response from the logical router that's making conntrack not properly
associate it with the request?
Russell Bryant March 10, 2017, 9:48 p.m. UTC | #9
On Fri, Mar 10, 2017 at 2:35 PM, Russell Bryant <russell@ovn.org> wrote:
> On Thu, Mar 9, 2017 at 11:52 PM, Numan Siddique <nusiddiq@redhat.com> wrote:
>> Thanks for the review. Please see inline.
>>
>>
>> On Fri, Mar 10, 2017 at 1:44 AM, Russell Bryant <russell@ovn.org> wrote:
>>>
>>> On Mon, Feb 27, 2017 at 12:59 AM,  <nusiddiq@redhat.com> wrote:
>>> > From: Numan Siddique <nusiddiq@redhat.com>
>>> >
>>> > Presently, the icmp4 requests to the router gateway ip are sent to the
>>> > connectiont tracker, but the icmp4 reply packets responded by
>>> > 'lr_in_ip_input' stage are not sent to the connection tracker.
>>> > Also no zone ids are assigned for the router ports. Because of which
>>> > the icmp4 request packets in the connection tracker will be in the
>>> > UNREPLIED state. If the CMS has added ACLs to drop packets which
>>> > are not in ESTABLISHED state, the icmp4 replies will be dropped.
>>> >
>>> > To fix this issue, this patch adds a priority-110 flow in
>>> > 'ls_in_pre_acl'
>>> > stage which doesn't set reg0[0] = 1 for icmp4 request packets destined
>>> > to the router gateway ips.
>>> >
>>> > Alternate solution would be to assign zone ids for the router ports
>>> > and send the packets from the router ports to the connection tracker.
>>> >
>>> > The approach used in this patch seems to be simpler.
>>> >
>>> > Signed-off-by: Numan Siddique <nusiddiq@redhat.com>
>>> > ---
>>> >  ovn/northd/ovn-northd.8.xml | 29 +++++++++++---
>>> >  ovn/northd/ovn-northd.c     | 92
>>> > +++++++++++++++++++++++++++------------------
>>> >  2 files changed, 79 insertions(+), 42 deletions(-)
>>>
>>>
>>> Can you clarify where the packet gets dropped?  It seems like we have
>>> flows trying to handle this already.  We skip conntrack for the router
>>> interface ports.  Roughly, I would expect:
>>>
>>
>> The packets are getting dropped because of the flow
>>
>> table=52, n_packets=40, n_bytes=3920,
>> priority=2001,ct_state=-est+trk,ip,reg15=0x3,metadata=0x3 ac
>> tions=drop
>>
>> This flow corresponds to logical flow -
>> table=4 (ls_out_acl         ), priority=2001 , match=((!ct.est || (ct.est &&
>> ct_label.blocked == 1)) && (outport ==
>> "ce575934-f308-45b8-b9cd-457646da213d" && ip)), action=(drop;)
>>
>>
>> This logical flow is added by neutron OVN plugin when the port is configured
>> with security groups. When I clear the security groups for the port or add a
>> specific security group rule to allow icmp, it works fine.
>
> The above flow could be hit at two different points (#2 and #5 below).
> In my local testing, it looks like it's happening in #5 so at least we
> aren't hitting conntrack related flows in a part of the pipeline where
> we don't expect it.
>
>>
>>>
>>> 1) inport == lport1, logical switch ingress pipeline.  packet sent
>>> through conntrack.
>>>
>>> 2) outport == router interface port, logical switch egress pipeline.
>>> packet *skips* conntrack since it's a router interface.
>>>
>>> 3) router datapath, icmp respose generated, sent back to logical switch
>>> ...
>>>
>>> 4) inport == router interface, logical switch ingress pipeline, packet
>>> *skips* conntrack since it's a router interface
>>>
>>> 5) outport == lport1, logical switch egress pipeline, packet sent
>>> through conntrack, which should find an existing conntrack entry
>>> established in step 1.  packet delivered to lport1.
>>>
>>
>> The connection tracking entry has this
>>
>> $ sudo conntrack -L | grep 10.0.0.1
>> conntrack v1.4.3 (conntrack-tools): 54 flow entries have been shown.
>> icmp     1 29 src=10.0.0.6 dst=10.0.0.1 type=8 code=0 id=7486 [UNREPLIED]
>> src=10.0.0.1 dst=10.0.0.6 type=0 code=0 id=7486 mark=0
>> secctx=system_u:object_r:unlabeled_t:s0 zone=1 use=1
>>
>> You think this should be addressed by neutron OVN plugin ?
>
> I don't think it's a Neutron issue.
>
> I see the conntrack entry remaining in the UNREPLIED state, even in
> the working case where there's not an ACL dropping the reply.
>
> icmp     1 29 src=10.0.0.10 dst=10.0.0.1 type=8 code=0 id=25857
> [UNREPLIED] src=10.0.0.1 dst=10.0.0.10 type=0 code=0 id=25857 mark=0
> zone=8 use=1
>
> If I ping a different address (something past the logical router), I
> see a proper conntrack entry that's not in the UNREPLIED state.
>
> I wonder if there's something about how we are generating the ICMP
> response from the logical router that's making conntrack not properly
> associate it with the request?

I checked into this and there's no meaningful difference in how we
form the ICMP reply.

I'm guessing this has to do with the request and reply both going
through conntrack as a part of processing the same packet in the OVS
data path.  That's not behaving how we would expect.  I'll keep
looking next week to try to identify the root cause here, but I would
appreciate any insight others may have about the behavior we should
expect in this scenario.
Russell Bryant March 13, 2017, 7:27 p.m. UTC | #10
On Fri, Mar 10, 2017 at 4:48 PM, Russell Bryant <russell@ovn.org> wrote:
> On Fri, Mar 10, 2017 at 2:35 PM, Russell Bryant <russell@ovn.org> wrote:
>> On Thu, Mar 9, 2017 at 11:52 PM, Numan Siddique <nusiddiq@redhat.com> wrote:
>> I don't think it's a Neutron issue.
>>
>> I see the conntrack entry remaining in the UNREPLIED state, even in
>> the working case where there's not an ACL dropping the reply.
>>
>> icmp     1 29 src=10.0.0.10 dst=10.0.0.1 type=8 code=0 id=25857
>> [UNREPLIED] src=10.0.0.1 dst=10.0.0.10 type=0 code=0 id=25857 mark=0
>> zone=8 use=1
>>
>> If I ping a different address (something past the logical router), I
>> see a proper conntrack entry that's not in the UNREPLIED state.
>>
>> I wonder if there's something about how we are generating the ICMP
>> response from the logical router that's making conntrack not properly
>> associate it with the request?
>
> I checked into this and there's no meaningful difference in how we
> form the ICMP reply.
>
> I'm guessing this has to do with the request and reply both going
> through conntrack as a part of processing the same packet in the OVS
> data path.  That's not behaving how we would expect.  I'll keep
> looking next week to try to identify the root cause here, but I would
> appreciate any insight others may have about the behavior we should
> expect in this scenario.

I'm able to reproduce this outside of OpenStack.

https://gist.github.com/russellb/4ab0a9641f12f8ac66fdd6822ee7789e

This creates an OVN setup with two logical switches connected by a
logical router.

    switch d47412f9-e64a-4734-be26-80ee71ded805 (sw1)
        port sw1-port1
            addresses: ["50:54:00:00:00:03 11.0.0.2"]
    switch a1730d73-ccab-4f00-b748-3cafb683e9b8 (sw0)
        port sw0-port2
            addresses: ["50:54:00:00:00:02 192.168.0.3"]
        port sw0-port1
            addresses: ["50:54:00:00:00:01 192.168.0.2"]
    router 193f68cd-4a93-4a04-ad3b-3ddf7b5c66f3 (lr0)

The ping commands at the end demonstrate the problem.  My expectation
is that the very last ping command should be successful, but is not
due to the issue we're seeing here.
diff mbox

Patch

diff --git a/ovn/northd/ovn-northd.8.xml b/ovn/northd/ovn-northd.8.xml
index ab8fd88..06465ff 100644
--- a/ovn/northd/ovn-northd.8.xml
+++ b/ovn/northd/ovn-northd.8.xml
@@ -245,11 +245,30 @@ 
     <p>
       This table prepares flows for possible stateful ACL processing in
       ingress table <code>ACLs</code>.  It contains a priority-0 flow that
-      simply moves traffic to the next table.  If stateful ACLs are used in the
-      logical datapath, a priority-100 flow is added that sets a hint
-      (with <code>reg0[0] = 1; next;</code>) for table
-      <code>Pre-stateful</code> to send IP packets to the connection tracker
-      before eventually advancing to ingress table <code>ACLs</code>.
+      simply moves traffic to the next table. It adds the following flows if
+      stateful ACLs are used in the logical datapath.
+
+      <ul>
+        <li>
+          A priority-100 flow is added that sets a hint
+          (with <code>reg0[0] = 1; next;</code>) for table
+          <code>Pre-stateful</code> to send IP packets to the connection tracker
+          before eventually advancing to ingress table <code>ACLs</code>.
+        </li>
+
+        <li>
+          A priority-110 flow which doesn't set the connection tracking hint for
+          the packets from the router ports.
+        </li>
+
+        <li>
+          A priority-110 flow which doesn't set the connection tracking hint
+          for the packets with the match
+          <code>ip4 &amp;&amp; icmp4 &amp;&amp; ip4.dst = {<var>R</var>}</code>
+          where <var>R</var> is the IPv4 address(es) of the router ports
+          connected to the logical datapath.
+        </li>
+      </ul>
     </p>
 
     <h3>Ingress Table 4: Pre-LB</h3>
diff --git a/ovn/northd/ovn-northd.c b/ovn/northd/ovn-northd.c
index 03dc850..4fc86f4 100644
--- a/ovn/northd/ovn-northd.c
+++ b/ovn/northd/ovn-northd.c
@@ -2339,6 +2339,43 @@  has_stateful_acl(struct ovn_datapath *od)
 }
 
 static void
+op_put_v4_networks(struct ds *ds, const struct ovn_port *op, bool add_bcast)
+{
+    if (!add_bcast && op->lrp_networks.n_ipv4_addrs == 1) {
+        ds_put_format(ds, "%s", op->lrp_networks.ipv4_addrs[0].addr_s);
+        return;
+    }
+
+    ds_put_cstr(ds, "{");
+    for (int i = 0; i < op->lrp_networks.n_ipv4_addrs; i++) {
+        ds_put_format(ds, "%s, ", op->lrp_networks.ipv4_addrs[i].addr_s);
+        if (add_bcast) {
+            ds_put_format(ds, "%s, ", op->lrp_networks.ipv4_addrs[i].bcast_s);
+        }
+    }
+    ds_chomp(ds, ' ');
+    ds_chomp(ds, ',');
+    ds_put_cstr(ds, "}");
+}
+
+static void
+op_put_v6_networks(struct ds *ds, const struct ovn_port *op)
+{
+    if (op->lrp_networks.n_ipv6_addrs == 1) {
+        ds_put_format(ds, "%s", op->lrp_networks.ipv6_addrs[0].addr_s);
+        return;
+    }
+
+    ds_put_cstr(ds, "{");
+    for (int i = 0; i < op->lrp_networks.n_ipv6_addrs; i++) {
+        ds_put_format(ds, "%s, ", op->lrp_networks.ipv6_addrs[i].addr_s);
+    }
+    ds_chomp(ds, ' ');
+    ds_chomp(ds, ',');
+    ds_put_cstr(ds, "}");
+}
+
+static void
 build_pre_acls(struct ovn_datapath *od, struct hmap *lflows)
 {
     bool has_stateful = has_stateful_acl(od);
@@ -2378,6 +2415,24 @@  build_pre_acls(struct ovn_datapath *od, struct hmap *lflows)
 
             ds_destroy(&match_in);
             ds_destroy(&match_out);
+
+            /* The icmp4 request to the router ip (eg. ip4.dst = 10.0.0.1) will
+             * be sent to the CT, but the reply from the router ip is not sent
+             * to the CT. Also the zone id's are not assigned for the router
+             * ports. Because of which the icmp request packet in the CT will
+             * be in the UNREPLIED state.
+             * The icmp4 reply packets could be dropped if the CMS has added
+             * ACLs to drop packets which are not in ct.est state.
+             * So, don't send the icpmp4 request packet for the router ips
+             * to the CT.
+             */
+            if (op->peer && op->peer->lrp_networks.n_ipv4_addrs) {
+                struct ds match = DS_EMPTY_INITIALIZER;
+                ds_put_format(&match, "ip4 && icmp4 && ip4.dst == ");
+                op_put_v4_networks(&match, op->peer, false);
+                ovn_lflow_add(lflows, od, S_SWITCH_IN_PRE_ACL, 110,
+                                          ds_cstr(&match), "next;");
+            }
         }
         /* Ingress and Egress Pre-ACL Table (Priority 110).
          *
@@ -3644,43 +3699,6 @@  free_prefix_s:
     free(prefix_s);
 }
 
-static void
-op_put_v4_networks(struct ds *ds, const struct ovn_port *op, bool add_bcast)
-{
-    if (!add_bcast && op->lrp_networks.n_ipv4_addrs == 1) {
-        ds_put_format(ds, "%s", op->lrp_networks.ipv4_addrs[0].addr_s);
-        return;
-    }
-
-    ds_put_cstr(ds, "{");
-    for (int i = 0; i < op->lrp_networks.n_ipv4_addrs; i++) {
-        ds_put_format(ds, "%s, ", op->lrp_networks.ipv4_addrs[i].addr_s);
-        if (add_bcast) {
-            ds_put_format(ds, "%s, ", op->lrp_networks.ipv4_addrs[i].bcast_s);
-        }
-    }
-    ds_chomp(ds, ' ');
-    ds_chomp(ds, ',');
-    ds_put_cstr(ds, "}");
-}
-
-static void
-op_put_v6_networks(struct ds *ds, const struct ovn_port *op)
-{
-    if (op->lrp_networks.n_ipv6_addrs == 1) {
-        ds_put_format(ds, "%s", op->lrp_networks.ipv6_addrs[0].addr_s);
-        return;
-    }
-
-    ds_put_cstr(ds, "{");
-    for (int i = 0; i < op->lrp_networks.n_ipv6_addrs; i++) {
-        ds_put_format(ds, "%s, ", op->lrp_networks.ipv6_addrs[i].addr_s);
-    }
-    ds_chomp(ds, ' ');
-    ds_chomp(ds, ',');
-    ds_put_cstr(ds, "}");
-}
-
 static const char *
 get_force_snat_ip(struct ovn_datapath *od, const char *key_type, ovs_be32 *ip)
 {