From patchwork Thu Jun 9 07:37:49 2016
Content-Type: text/plain; charset="utf-8"
MIME-Version: 1.0
Content-Transfer-Encoding: 7bit
X-Patchwork-Submitter: Gurucharan Shetty
- Reply to ARP requests. These flows reply to ARP requests for the
- router's own IP address. For each router port P that owns
- IP address A and Ethernet address E, a
- priority-90 flow matches
+ These flows reply to ARP requests for the router's own IP address.
+ For each router port P that owns IP address A
+ and Ethernet address E, a priority-90 flow matches
+
+ These flows reply to ARP requests for the virtual IP addresses
+ configured in the router for DNAT. For a configured DNAT IP address
+ A, for each router port P with Ethernet
+ address E, a priority-90 flow matches
+
+ This is for already established connections' reverse traffic.
+ i.e., SNAT has already been done in egress pipeline and now the
+ packet has entered the ingress pipeline as part of a reply. It is
+ unSNATted here.
+
+ For each configuration in the OVN Northbound database, that asks
+ to change the source IP address of a packet from A to
+ B, a priority-100 flow matches
+ A priority-0 logical flow with match
+ Packets enter the pipeline with destination IP address that needs to
+ be DNATted from a virtual IP address to a real IP address. Packets
+ in the reverse direction needs to be unDNATed.
+
+ For each configuration in the OVN Northbound database, that asks
+ to change the destination IP address of a packet from A to
+ B, a priority-100 flow matches
+ For all IP packets of a Gateway router, a priority-50 flow with an
+ action
+ A priority-0 logical flow with match
A packet that arrives at this table is an IP packet that should be routed
@@ -672,7 +756,7 @@ icmp4 {
Any packet that reaches this table is an IP packet whose next-hop IP
@@ -798,7 +882,7 @@ icmp4 {
-
In the common case where the Ethernet destination has been resolved, this
@@ -823,7 +907,7 @@ arp {
- (Ingress table 2 initialized
+ Packets that are configured to be SNATed get their source IP address
+ changed based on the configuration in the OVN Northbound database.
+
+ For each configuration in the OVN Northbound database, that asks
+ to change the source IP address of a packet from an IP address of
+ A or to change the source IP address of a packet that
+ belongs to network A to B, a flow matches
+
+ A priority-0 logical flow with match
Packets that reach this table are ready for delivery. It contains
diff --git a/ovn/northd/ovn-northd.c b/ovn/northd/ovn-northd.c
index cac0148..4683780 100644
--- a/ovn/northd/ovn-northd.c
+++ b/ovn/northd/ovn-northd.c
@@ -105,12 +105,15 @@ enum ovn_stage {
/* Logical router ingress stages. */ \
PIPELINE_STAGE(ROUTER, IN, ADMISSION, 0, "lr_in_admission") \
PIPELINE_STAGE(ROUTER, IN, IP_INPUT, 1, "lr_in_ip_input") \
- PIPELINE_STAGE(ROUTER, IN, IP_ROUTING, 2, "lr_in_ip_routing") \
- PIPELINE_STAGE(ROUTER, IN, ARP_RESOLVE, 3, "lr_in_arp_resolve") \
- PIPELINE_STAGE(ROUTER, IN, ARP_REQUEST, 4, "lr_in_arp_request") \
+ PIPELINE_STAGE(ROUTER, IN, UNSNAT, 2, "lr_in_unsnat") \
+ PIPELINE_STAGE(ROUTER, IN, DNAT, 3, "lr_in_dnat") \
+ PIPELINE_STAGE(ROUTER, IN, IP_ROUTING, 4, "lr_in_ip_routing") \
+ PIPELINE_STAGE(ROUTER, IN, ARP_RESOLVE, 5, "lr_in_arp_resolve") \
+ PIPELINE_STAGE(ROUTER, IN, ARP_REQUEST, 6, "lr_in_arp_request") \
\
/* Logical router egress stages. */ \
- PIPELINE_STAGE(ROUTER, OUT, DELIVERY, 0, "lr_out_delivery")
+ PIPELINE_STAGE(ROUTER, OUT, SNAT, 0, "lr_out_snat") \
+ PIPELINE_STAGE(ROUTER, OUT, DELIVERY, 1, "lr_out_delivery")
#define PIPELINE_STAGE(DP_TYPE, PIPELINE, STAGE, TABLE, NAME) \
S_##DP_TYPE##_##PIPELINE##_##STAGE \
@@ -1998,6 +2001,51 @@ build_lrouter_flows(struct hmap *datapaths, struct hmap *ports,
free(match);
free(actions);
+ /* ARP handling for external IP addresses.
+ *
+ * DNAT IP addresses are external IP addresses that need ARP
+ * handling. */
+ for (int i = 0; i < op->od->nbr->n_nat; i++) {
+ const struct nbrec_nat *nat;
+
+ nat = op->od->nbr->nat[i];
+
+ if(!strcmp(nat->type, "snat")) {
+ continue;
+ }
+
+ ovs_be32 ip;
+ if (!ip_parse(nat->external_ip, &ip) || !ip) {
+ static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(5, 1);
+ VLOG_WARN_RL(&rl, "bad ip address %s in dnat configuration "
+ "for router %s", nat->external_ip, op->key);
+ continue;
+ }
+
+ match = xasprintf(
+ "inport == %s && arp.tpa == "IP_FMT" && arp.op == 1",
+ op->json_key, IP_ARGS(ip));
+ actions = xasprintf(
+ "eth.dst = eth.src; "
+ "eth.src = "ETH_ADDR_FMT"; "
+ "arp.op = 2; /* ARP reply */ "
+ "arp.tha = arp.sha; "
+ "arp.sha = "ETH_ADDR_FMT"; "
+ "arp.tpa = arp.spa; "
+ "arp.spa = "IP_FMT"; "
+ "outport = %s; "
+ "inport = \"\"; /* Allow sending out inport. */ "
+ "output;",
+ ETH_ADDR_ARGS(op->mac),
+ ETH_ADDR_ARGS(op->mac),
+ IP_ARGS(ip),
+ op->json_key);
+ ovn_lflow_add(lflows, op->od, S_ROUTER_IN_IP_INPUT, 90,
+ match, actions);
+ free(match);
+ free(actions);
+ }
+
/* Drop IP traffic to this router. */
match = xasprintf("ip4.dst == "IP_FMT, IP_ARGS(op->ip));
ovn_lflow_add(lflows, op->od, S_ROUTER_IN_IP_INPUT, 60,
@@ -2005,6 +2053,135 @@ build_lrouter_flows(struct hmap *datapaths, struct hmap *ports,
free(match);
}
+ /* NAT in Gateway routers. */
+ HMAP_FOR_EACH (od, key_node, datapaths) {
+ if (!od->nbr) {
+ continue;
+ }
+
+ /* Packets are allowed by default. */
+ ovn_lflow_add(lflows, od, S_ROUTER_IN_UNSNAT, 0, "1", "next;");
+ ovn_lflow_add(lflows, od, S_ROUTER_OUT_SNAT, 0, "1", "next;");
+ ovn_lflow_add(lflows, od, S_ROUTER_IN_DNAT, 0, "1", "next;");
+
+ /* NAT rules are only valid on Gateway routers. */
+ if (!smap_get(&od->nbr->options, "chassis")) {
+ continue;
+ }
+
+ for (int i = 0; i < od->nbr->n_nat; i++) {
+ const struct nbrec_nat *nat;
+
+ nat = od->nbr->nat[i];
+
+ ovs_be32 ip, mask;
+
+ char *error = ip_parse_masked(nat->external_ip, &ip, &mask);
+ if (error || mask != OVS_BE32_MAX) {
+ static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(5, 1);
+ VLOG_WARN_RL(&rl, "bad external ip %s for nat",
+ nat->external_ip);
+ free(error);
+ continue;
+ }
+
+ /* Check the validity of nat->logical_ip. 'logical_ip' can
+ * be a subnet when the type is "snat". */
+ error = ip_parse_masked(nat->logical_ip, &ip, &mask);
+ if (!strcmp(nat->type, "snat")) {
+ if (error) {
+ static struct vlog_rate_limit rl =
+ VLOG_RATE_LIMIT_INIT(5, 1);
+ VLOG_WARN_RL(&rl, "bad ip network or ip %s for snat "
+ "in router "UUID_FMT"",
+ nat->logical_ip, UUID_ARGS(&od->key));
+ free(error);
+ continue;
+ }
+ } else {
+ if (error || mask != OVS_BE32_MAX) {
+ static struct vlog_rate_limit rl =
+ VLOG_RATE_LIMIT_INIT(5, 1);
+ VLOG_WARN_RL(&rl, "bad ip %s for dnat in router "
+ ""UUID_FMT"", nat->logical_ip, UUID_ARGS(&od->key));
+ free(error);
+ continue;
+ }
+ }
+
+
+ char *match, *actions;
+
+ /* Ingress UNSNAT table: It is for already established connections'
+ * reverse traffic. i.e., SNAT has already been done in egress
+ * pipeline and now the packet has entered the ingress pipeline as
+ * part of a reply. We undo the SNAT here.
+ *
+ * Undoing SNAT has to happen before DNAT processing. This is
+ * because when the packet was DNATed in ingress pipeline, it did
+ * not know about the possibility of eventual additional SNAT in
+ * egress pipeline. */
+ if (!strcmp(nat->type, "snat")
+ || !strcmp(nat->type, "dnat_and_snat")) {
+ match = xasprintf("ip && ip4.dst == %s", nat->external_ip);
+ ovn_lflow_add(lflows, od, S_ROUTER_IN_UNSNAT, 100,
+ match, "ct_snat; next;");
+ free(match);
+ }
+
+ /* Ingress DNAT table: Packets enter the pipeline with destination
+ * IP address that needs to be DNATted from a external IP address
+ * to a logical IP address. */
+ if (!strcmp(nat->type, "dnat")
+ || !strcmp(nat->type, "dnat_and_snat")) {
+ /* Packet when it goes from the initiator to destination.
+ * We need to zero the inport because the router can
+ * send the packet back through the same interface. */
+ match = xasprintf("ip && ip4.dst == %s", nat->external_ip);
+ actions = xasprintf("inport = \"\"; ct_dnat(%s);",
+ nat->logical_ip);
+ ovn_lflow_add(lflows, od, S_ROUTER_IN_DNAT, 100,
+ match, actions);
+ free(match);
+ free(actions);
+ }
+
+ /* Egress SNAT table: Packets enter the egress pipeline with
+ * source ip address that needs to be SNATted to a external ip
+ * address. */
+ if (!strcmp(nat->type, "snat")
+ || !strcmp(nat->type, "dnat_and_snat")) {
+ match = xasprintf("ip && ip4.src == %s", nat->logical_ip);
+ actions = xasprintf("ct_snat(%s);", nat->external_ip);
+
+ /* The priority here is calculated such that the
+ * nat->logical_ip with the longest mask gets a higher
+ * priority. */
+ ovn_lflow_add(lflows, od, S_ROUTER_OUT_SNAT,
+ count_1bits(ntohl(mask)) + 1, match, actions);
+ free(match);
+ free(actions);
+ }
+ }
+
+ /* Re-circulate every packet through the DNAT zone.
+ * This helps with two things.
+ *
+ * 1. Any packet that needs to be unDNATed in the reverse
+ * direction gets unDNATed. Ideally this could be done in
+ * the egress pipeline. But since the gateway router
+ * does not have any feature that depends on the source
+ * ip address being external IP address for IP routing,
+ * we can do it here, saving a future re-circulation.
+ *
+ * 2. Any packet that was sent through SNAT zone in the
+ * previous table automatically gets re-circulated to get
+ * back the new destination IP address that is needed for
+ * routing in the openflow pipeline. */
+ ovn_lflow_add(lflows, od, S_ROUTER_IN_DNAT, 50,
+ "ip", "inport = \"\"; ct_dnat;");
+ }
+
/* Logical router ingress table 2: IP Routing.
*
* A packet that arrives at this table is an IP packet that should be
@@ -2205,7 +2382,7 @@ build_lrouter_flows(struct hmap *datapaths, struct hmap *ports,
ovn_lflow_add(lflows, od, S_ROUTER_IN_ARP_REQUEST, 0, "1", "output;");
}
- /* Logical router egress table 0: Delivery (priority 100).
+ /* Logical router egress table 1: Delivery (priority 100).
*
* Priority 100 rules deliver packets to enabled logical ports. */
HMAP_FOR_EACH (op, key_node, ports) {
diff --git a/ovn/ovn-nb.ovsschema b/ovn/ovn-nb.ovsschema
index fa21b30..ac6ca14 100644
--- a/ovn/ovn-nb.ovsschema
+++ b/ovn/ovn-nb.ovsschema
@@ -1,7 +1,7 @@
{
"name": "OVN_Northbound",
- "version": "2.1.2",
- "cksum": "429668869 5325",
+ "version": "2.1.3",
+ "cksum": "3631923697 6121",
"tables": {
"Logical_Switch": {
"columns": {
@@ -78,6 +78,11 @@
"max": "unlimited"}},
"default_gw": {"type": {"key": "string", "min": 0, "max": 1}},
"enabled": {"type": {"key": "boolean", "min": 0, "max": 1}},
+ "nat": {"type": {"key": {"type": "uuid",
+ "refTable": "NAT",
+ "refType": "strong"},
+ "min": 0,
+ "max": "unlimited"}},
"options": {
"type": {"key": "string",
"value": "string",
@@ -104,6 +109,16 @@
"ip_prefix": {"type": "string"},
"nexthop": {"type": "string"},
"output_port": {"type": {"key": "string", "min": 0, "max": 1}}},
+ "isRoot": false},
+ "NAT": {
+ "columns": {
+ "external_ip": {"type": "string"},
+ "logical_ip": {"type": "string"},
+ "type": {"type": {"key": {"type": "string",
+ "enum": ["set", ["dnat",
+ "snat",
+ "dnat_and_snat"
+ ]]}}}},
"isRoot": false}
}
}
diff --git a/ovn/ovn-nb.xml b/ovn/ovn-nb.xml
index 41092f1..93ad305 100644
--- a/ovn/ovn-nb.xml
+++ b/ovn/ovn-nb.xml
@@ -631,18 +631,31 @@
router has all ingress and egress traffic dropped.
+
Additional options for the logical router.
+ If set, indicates that the logical router in question is a Gateway
+ router (which is centralized) and resides in the set chassis. The
+ same value is also used by
+ The Gateway router can only be connected to a distributed router
+ via a switch if SNAT and DNAT are to be configured in the Gateway
+ router.
+ inport == P &&
- arp.op == 1 && arp.tpa == A
(ARP request)
+ Reply to ARP requests.
+ inport == P && arp.op == 1 &&
+ arp.tpa == A
(ARP request) with the following
+ actions:
+
+eth.dst = eth.src;
+eth.src = E;
+arp.op = 2; /* ARP reply. */
+arp.tha = arp.sha;
+arp.sha = E;
+arp.tpa = arp.spa;
+arp.spa = A;
+outport = P;
+inport = ""; /* Allow sending out inport. */
+output;
+
+ inport == P && arp.op == 1 &&
+ arp.tpa == A
(ARP request)
with the following actions:
Ingress Table 2: IP Routing
+ Ingress Table 2: UNSNAT
+
+
+
+
+ ip &&
+ ip4.dst == B
with an action
+ ct_snat; next;
.
+ 1
has actions
+ next;
.
+ Ingress Table 3: DNAT
+
+
+
+
+ ip &&
+ ip4.dst == A
with an action inport = "";
+ ct_dnat(B);
.
+ inport = ""; ct_dnat;
.
+ 1
has actions
+ next;
.
+ Ingress Table 4: IP Routing
ip4.dst
, the packet's final destination, unchanged) and
advances to the next table for ARP resolution. It also sets
reg1
to the IP address owned by the selected router port
- (which is used later in table 4 as the IP source address for an ARP
+ (which is used later in table 6 as the IP source address for an ARP
request, if needed).
Ingress Table 3: ARP Resolution
+ Ingress Table 5: ARP Resolution
Ingress Table 4: ARP Request
+ Ingress Table 6: ARP Request
reg1
with the IP address
+ (Ingress table 4 initialized reg1
with the IP address
owned by outport
.)
Egress Table 0: Delivery
+ Egress Table 0: SNAT
+
+
+
+
+ ip && ip4.src == A
with an action
+ ct_snat(B);
. The priority of the flow
+ is calculated based on the mask of A, with matches
+ having larger masks getting higher priorities.
+ 1
has actions
+ next;
.
+ Egress Table 1: Delivery
ovn-controller
- to uniquely identify the chassis in the OVN deployment and
- comes from external_ids:system-id
in the
- Open_vSwitch
table of Open_vSwitch database.
+ ovn-controller
to
+ uniquely identify the chassis in the OVN deployment and
+ comes from external_ids:system-id
in the
+ Open_vSwitch
table of Open_vSwitch database.
+
ct_dnat;
ct_dnat(IP);
+ ct_dnat
sends the packet through the DNAT zone in
+ connection tracking table to unDNAT any packet that was DNATed in
+ the opposite direction. The packet is then automatically sent to
+ to the next tables as if followed by next;
action.
+ The next tables will see the changes in the packet caused by
+ the connection tracker.
+
+ ct_dnat(IP)
sends the packet through the
+ DNAT zone to change the destination IP address of the packet to
+ the one provided inside the parenthesis and commits the connection.
+ The packet is then automatically sent to the next tables as if
+ followed by next;
action. The next tables will see
+ the changes in the packet caused by the connection tracker.
+
ct_snat;
ct_snat(IP);
+ ct_snat
sends the packet through the SNAT zone to
+ unSNAT any packet that was SNATed in the opposite direction. If
+ the packet needs to be sent to the next tables, then it should be
+ followed by a next;
action. The next tables will not
+ see the changes in the packet caused by the connection tracker.
+
+ ct_snat(IP)
sends the packet through the
+ SNAT zone to change the source IP address of the packet to
+ the one provided inside the parenthesis and commits the connection.
+ The packet is then automatically sent to the next tables as if
+ followed by next;
action. The next tables will see the
+ changes in the packet caused by the connection tracker.
+
arp { action;
... };
diff --git a/ovn/utilities/ovn-nbctl.c b/ovn/utilities/ovn-nbctl.c index 321040e..b821307 100644 --- a/ovn/utilities/ovn-nbctl.c +++ b/ovn/utilities/ovn-nbctl.c @@ -1449,6 +1449,11 @@ static const struct ctl_table_class tables[] = { NULL}, {NULL, NULL, NULL}}}, + {&nbrec_table_nat, + {{&nbrec_table_nat, NULL, + NULL}, + {NULL, NULL, NULL}}}, + {NULL, {{NULL, NULL, NULL}, {NULL, NULL, NULL}}} }; diff --git a/tests/ovn.at b/tests/ovn.at index 633cf35..19d5c73 100644 --- a/tests/ovn.at +++ b/tests/ovn.at @@ -507,6 +507,23 @@ ip.ttl => Syntax error at end of input expecting `--'. ct_next; => actions=ct(table=27,zone=NXM_NX_REG5[0..15]), prereqs=ip ct_commit; => actions=ct(commit,zone=NXM_NX_REG5[0..15]), prereqs=ip +# dnat +ct_dnat; => actions=ct(table=27,zone=NXM_NX_REG3[0..15],nat), prereqs=ip +ct_dnat(192.168.1.2); => actions=ct(commit,table=27,zone=NXM_NX_REG3[0..15],nat(dst=192.168.1.2)), prereqs=ip +ct_dnat(192.168.1.2, 192.168.1.3); => Syntax error at `,' expecting `)'. +ct_dnat(foo); => Syntax error at `foo' invalid ip. +ct_dnat(foo, bar); => Syntax error at `foo' invalid ip. +ct_dnat(); => Syntax error at `)' invalid ip. + +# snat +ct_snat; => actions=ct(zone=NXM_NX_REG4[0..15],nat), prereqs=ip +ct_snat(192.168.1.2); => actions=ct(commit,table=27,zone=NXM_NX_REG4[0..15],nat(src=192.168.1.2)), prereqs=ip +ct_snat(192.168.1.2, 192.168.1.3); => Syntax error at `,' expecting `)'. +ct_snat(foo); => Syntax error at `foo' invalid ip. +ct_snat(foo, bar); => Syntax error at `foo' invalid ip. +ct_snat(); => Syntax error at `)' invalid ip. + + # arp arp { eth.dst = ff:ff:ff:ff:ff:ff; output; }; => actions=controller(userdata=00.00.00.00.00.00.00.00.00.19.00.10.80.00.06.06.ff.ff.ff.ff.ff.ff.00.00.ff.ff.00.10.00.00.23.20.00.0e.ff.f8.40.00.00.00), prereqs=ip4