Message ID | 1283345350.2556.265.camel@edumazet-laptop |
---|---|
State | RFC, archived |
Delegated to: | David Miller |
Headers | show |
Quick answer! It seems do exactly what I want. Is there a corresponding 'fwmark inherit' parameter added to the ip command somewhere? Will this patch get it into the kernel? If you need testing I will do that. /Anders On Wed, 2010-09-01 at 14:49 +0200, Eric Dumazet wrote: > Le mercredi 01 septembre 2010 à 14:21 +0200, Anders Franzen a écrit : > > If I have multiple routing choices to reach a tunnel end point, > > I would like to mark different flows with iptables, after the > > postrouting of the primary route look up, this would put an fwmark on > > the skb. The route would in my case resolve to an ip6_tunnel. > > > > In the tunnel a secondary route lookup is done to select next hop for > > the tunnel end point. > > > > I would like to apply policy routing to the secondary lookup. > > > > This will not work, for two reasons: > > > > 1. None of the tunnels (ipip, gre, ip6_tunnel) I looked at regards the > > fwmark at the skb, when performing the route lookup. > > > > 2. ip6_tunnel is keeping a local dst cache, so it will never reroute as > > long as the current cache is valid. > > > > > > I wonder if there is a reason for not giving the fwmark at the tunnel > > route for tunnels in general? > > > > And the local dst cache for ip6_tunnel, is it needed, is not the routing > > subsystem keeping some form of internal cache (ip route ls cache). > > ip6_tunnel (or others) could be extended with a > IP6_TNL_F_USE_ORIG_FWMARK, and in this case not use/cache the route. > > untested patch to get the idea : > > diff --git a/include/linux/ip6_tunnel.h b/include/linux/ip6_tunnel.h > index acb9ad6..bf22b03 100644 > --- a/include/linux/ip6_tunnel.h > +++ b/include/linux/ip6_tunnel.h > @@ -16,6 +16,8 @@ > #define IP6_TNL_F_MIP6_DEV 0x8 > /* copy DSCP from the outer packet */ > #define IP6_TNL_F_RCV_DSCP_COPY 0x10 > +/* copy fwmark from inner packet */ > +#define IP6_TNL_F_USE_ORIG_FWMARK 0x20 > > struct ip6_tnl_parm { > char name[IFNAMSIZ]; /* name of tunnel device */ > diff --git a/net/ipv6/ip6_tunnel.c b/net/ipv6/ip6_tunnel.c > index 0fd027f..e31a91f 100644 > --- a/net/ipv6/ip6_tunnel.c > +++ b/net/ipv6/ip6_tunnel.c > @@ -858,7 +858,7 @@ static int ip6_tnl_xmit2(struct sk_buff *skb, > int err = -1; > int pkt_len; > > - if ((dst = ip6_tnl_dst_check(t)) != NULL) > + if (!fl->mark && (dst = ip6_tnl_dst_check(t)) != NULL) > dst_hold(dst); > else { > dst = ip6_route_output(net, NULL, fl); > @@ -910,7 +910,7 @@ static int ip6_tnl_xmit2(struct sk_buff *skb, > skb = new_skb; > } > skb_dst_drop(skb); > - skb_dst_set(skb, dst_clone(dst)); > + skb_dst_set(skb, fl->mark ? dst : dst_clone(dst)); > > skb->transport_header = skb->network_header; > > @@ -940,7 +940,9 @@ static int ip6_tnl_xmit2(struct sk_buff *skb, > stats->tx_errors++; > stats->tx_aborted_errors++; > } > - ip6_tnl_dst_store(t, dst); > + if (!fl->mark) > + ip6_tnl_dst_store(t, dst); > + > return 0; > tx_err_link_failure: > stats->tx_carrier_errors++; > @@ -976,6 +978,8 @@ ip4ip6_tnl_xmit(struct sk_buff *skb, struct net_device *dev) > if ((t->parms.flags & IP6_TNL_F_USE_ORIG_TCLASS)) > fl.fl6_flowlabel |= htonl((__u32)iph->tos << IPV6_TCLASS_SHIFT) > & IPV6_TCLASS_MASK; > + if ((t->parms.flags & IP6_TNL_F_USE_ORIG_FWMARK)) > + fl.mark = skb->mark; > > err = ip6_tnl_xmit2(skb, dev, dsfield, &fl, encap_limit, &mtu); > if (err != 0) { > @@ -1026,7 +1030,8 @@ ip6ip6_tnl_xmit(struct sk_buff *skb, struct net_device *dev) > fl.fl6_flowlabel |= (*(__be32 *) ipv6h & IPV6_TCLASS_MASK); > if ((t->parms.flags & IP6_TNL_F_USE_ORIG_FLOWLABEL)) > fl.fl6_flowlabel |= (*(__be32 *) ipv6h & IPV6_FLOWLABEL_MASK); > - > + if ((t->parms.flags & IP6_TNL_F_USE_ORIG_FWMARK)) > + fl.mark = skb->mark; > err = ip6_tnl_xmit2(skb, dev, dsfield, &fl, encap_limit, &mtu); > if (err != 0) { > if (err == -EMSGSIZE) > > -- To unsubscribe from this list: send the line "unsubscribe netdev" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
diff --git a/include/linux/ip6_tunnel.h b/include/linux/ip6_tunnel.h index acb9ad6..bf22b03 100644 --- a/include/linux/ip6_tunnel.h +++ b/include/linux/ip6_tunnel.h @@ -16,6 +16,8 @@ #define IP6_TNL_F_MIP6_DEV 0x8 /* copy DSCP from the outer packet */ #define IP6_TNL_F_RCV_DSCP_COPY 0x10 +/* copy fwmark from inner packet */ +#define IP6_TNL_F_USE_ORIG_FWMARK 0x20 struct ip6_tnl_parm { char name[IFNAMSIZ]; /* name of tunnel device */ diff --git a/net/ipv6/ip6_tunnel.c b/net/ipv6/ip6_tunnel.c index 0fd027f..e31a91f 100644 --- a/net/ipv6/ip6_tunnel.c +++ b/net/ipv6/ip6_tunnel.c @@ -858,7 +858,7 @@ static int ip6_tnl_xmit2(struct sk_buff *skb, int err = -1; int pkt_len; - if ((dst = ip6_tnl_dst_check(t)) != NULL) + if (!fl->mark && (dst = ip6_tnl_dst_check(t)) != NULL) dst_hold(dst); else { dst = ip6_route_output(net, NULL, fl); @@ -910,7 +910,7 @@ static int ip6_tnl_xmit2(struct sk_buff *skb, skb = new_skb; } skb_dst_drop(skb); - skb_dst_set(skb, dst_clone(dst)); + skb_dst_set(skb, fl->mark ? dst : dst_clone(dst)); skb->transport_header = skb->network_header; @@ -940,7 +940,9 @@ static int ip6_tnl_xmit2(struct sk_buff *skb, stats->tx_errors++; stats->tx_aborted_errors++; } - ip6_tnl_dst_store(t, dst); + if (!fl->mark) + ip6_tnl_dst_store(t, dst); + return 0; tx_err_link_failure: stats->tx_carrier_errors++; @@ -976,6 +978,8 @@ ip4ip6_tnl_xmit(struct sk_buff *skb, struct net_device *dev) if ((t->parms.flags & IP6_TNL_F_USE_ORIG_TCLASS)) fl.fl6_flowlabel |= htonl((__u32)iph->tos << IPV6_TCLASS_SHIFT) & IPV6_TCLASS_MASK; + if ((t->parms.flags & IP6_TNL_F_USE_ORIG_FWMARK)) + fl.mark = skb->mark; err = ip6_tnl_xmit2(skb, dev, dsfield, &fl, encap_limit, &mtu); if (err != 0) { @@ -1026,7 +1030,8 @@ ip6ip6_tnl_xmit(struct sk_buff *skb, struct net_device *dev) fl.fl6_flowlabel |= (*(__be32 *) ipv6h & IPV6_TCLASS_MASK); if ((t->parms.flags & IP6_TNL_F_USE_ORIG_FLOWLABEL)) fl.fl6_flowlabel |= (*(__be32 *) ipv6h & IPV6_FLOWLABEL_MASK); - + if ((t->parms.flags & IP6_TNL_F_USE_ORIG_FWMARK)) + fl.mark = skb->mark; err = ip6_tnl_xmit2(skb, dev, dsfield, &fl, encap_limit, &mtu); if (err != 0) { if (err == -EMSGSIZE)