diff mbox

[1/9] ipv6: implement dataplane support for rthdr type 4 (Segment Routing Header)

Message ID 1476715350-18983-2-git-send-email-david.lebrun@uclouvain.be
State Changes Requested, archived
Delegated to: David Miller
Headers show

Commit Message

David Lebrun Oct. 17, 2016, 2:42 p.m. UTC
Implement minimal support for processing of SR-enabled packets
as described in
https://tools.ietf.org/html/draft-ietf-6man-segment-routing-header-02.

This patch implements the following operations:
- Intermediate segment endpoint: incrementation of active segment and rerouting.
- Egress for SR-encapsulated packets: decapsulation of outer IPv6 header + SRH
  and routing of inner packet.
- Cleanup flag support for SR-inlined packets: removal of SRH if we are the
  penultimate segment endpoint.

A per-interface sysctl seg6_enabled is provided, to accept/deny SR-enabled
packets. Default is deny.

This patch does not provide support for HMAC-signed packets.

Signed-off-by: David Lebrun <david.lebrun@uclouvain.be>
---
 include/linux/ipv6.h      |   3 +
 include/linux/seg6.h      |   6 ++
 include/uapi/linux/ipv6.h |   2 +
 include/uapi/linux/seg6.h |  46 +++++++++++++++
 net/ipv6/Kconfig          |  13 +++++
 net/ipv6/addrconf.c       |  18 ++++++
 net/ipv6/exthdrs.c        | 140 ++++++++++++++++++++++++++++++++++++++++++++++
 7 files changed, 228 insertions(+)
 create mode 100644 include/linux/seg6.h
 create mode 100644 include/uapi/linux/seg6.h

Comments

David Miller Oct. 17, 2016, 2:57 p.m. UTC | #1
From: David Lebrun <david.lebrun@uclouvain.be>
Date: Mon, 17 Oct 2016 16:42:22 +0200

> +/*
> + * SRH
> + */
> +struct ipv6_sr_hdr {
> +	__u8	nexthdr;
> +	__u8	hdrlen;
> +	__u8	type;
> +	__u8	segments_left;
> +	__u8	first_segment;
> +	__be16	flags;
> +	__u8	reserved;
> +
> +	struct in6_addr segments[0];
> +} __attribute__((packed));

Please don't use packed, it results in extremely inefficient code on
several architectures.

You can simply declare the flags as two 8-bit pieces and all will work
out fine.
David Lebrun Oct. 17, 2016, 3:04 p.m. UTC | #2
On 10/17/2016 04:57 PM, David Miller wrote:
> Please don't use packed, it results in extremely inefficient code on
> several architectures.
> 
> You can simply declare the flags as two 8-bit pieces and all will work
> out fine.

Noted, will do
Tom Herbert Oct. 17, 2016, 5:01 p.m. UTC | #3
On Mon, Oct 17, 2016 at 7:42 AM, David Lebrun <david.lebrun@uclouvain.be> wrote:
> Implement minimal support for processing of SR-enabled packets
> as described in
> https://tools.ietf.org/html/draft-ietf-6man-segment-routing-header-02.
>
> This patch implements the following operations:
> - Intermediate segment endpoint: incrementation of active segment and rerouting.
> - Egress for SR-encapsulated packets: decapsulation of outer IPv6 header + SRH
>   and routing of inner packet.
> - Cleanup flag support for SR-inlined packets: removal of SRH if we are the
>   penultimate segment endpoint.
>
> A per-interface sysctl seg6_enabled is provided, to accept/deny SR-enabled
> packets. Default is deny.
>
> This patch does not provide support for HMAC-signed packets.
>
> Signed-off-by: David Lebrun <david.lebrun@uclouvain.be>
> ---
>  include/linux/ipv6.h      |   3 +
>  include/linux/seg6.h      |   6 ++
>  include/uapi/linux/ipv6.h |   2 +
>  include/uapi/linux/seg6.h |  46 +++++++++++++++
>  net/ipv6/Kconfig          |  13 +++++
>  net/ipv6/addrconf.c       |  18 ++++++
>  net/ipv6/exthdrs.c        | 140 ++++++++++++++++++++++++++++++++++++++++++++++
>  7 files changed, 228 insertions(+)
>  create mode 100644 include/linux/seg6.h
>  create mode 100644 include/uapi/linux/seg6.h
>
> diff --git a/include/linux/ipv6.h b/include/linux/ipv6.h
> index 7e9a789..75395ad 100644
> --- a/include/linux/ipv6.h
> +++ b/include/linux/ipv6.h
> @@ -64,6 +64,9 @@ struct ipv6_devconf {
>         } stable_secret;
>         __s32           use_oif_addrs_only;
>         __s32           keep_addr_on_down;
> +#ifdef CONFIG_IPV6_SEG6
> +       __s32           seg6_enabled;
> +#endif
>
>         struct ctl_table_header *sysctl_header;
>  };
> diff --git a/include/linux/seg6.h b/include/linux/seg6.h
> new file mode 100644
> index 0000000..7a66d2b
> --- /dev/null
> +++ b/include/linux/seg6.h
> @@ -0,0 +1,6 @@
> +#ifndef _LINUX_SEG6_H
> +#define _LINUX_SEG6_H
> +
> +#include <uapi/linux/seg6.h>
> +
> +#endif
> diff --git a/include/uapi/linux/ipv6.h b/include/uapi/linux/ipv6.h
> index 8c27723..7ff1d65 100644
> --- a/include/uapi/linux/ipv6.h
> +++ b/include/uapi/linux/ipv6.h
> @@ -39,6 +39,7 @@ struct in6_ifreq {
>  #define IPV6_SRCRT_STRICT      0x01    /* Deprecated; will be removed */
>  #define IPV6_SRCRT_TYPE_0      0       /* Deprecated; will be removed */
>  #define IPV6_SRCRT_TYPE_2      2       /* IPv6 type 2 Routing Header   */
> +#define IPV6_SRCRT_TYPE_4      4       /* Segment Routing with IPv6 */
>
>  /*
>   *     routing header
> @@ -178,6 +179,7 @@ enum {
>         DEVCONF_DROP_UNSOLICITED_NA,
>         DEVCONF_KEEP_ADDR_ON_DOWN,
>         DEVCONF_RTR_SOLICIT_MAX_INTERVAL,
> +       DEVCONF_SEG6_ENABLED,
>         DEVCONF_MAX
>  };
>
> diff --git a/include/uapi/linux/seg6.h b/include/uapi/linux/seg6.h
> new file mode 100644
> index 0000000..9f9e157
> --- /dev/null
> +++ b/include/uapi/linux/seg6.h
> @@ -0,0 +1,46 @@
> +/*
> + *  SR-IPv6 implementation
> + *
> + *  Author:
> + *  David Lebrun <david.lebrun@uclouvain.be>
> + *
> + *
> + *  This program is free software; you can redistribute it and/or
> + *      modify it under the terms of the GNU General Public License
> + *      as published by the Free Software Foundation; either version
> + *      2 of the License, or (at your option) any later version.
> + */
> +
> +#ifndef _UAPI_LINUX_SEG6_H
> +#define _UAPI_LINUX_SEG6_H
> +
> +/*
> + * SRH
> + */
> +struct ipv6_sr_hdr {
> +       __u8    nexthdr;
> +       __u8    hdrlen;
> +       __u8    type;
> +       __u8    segments_left;
> +       __u8    first_segment;
> +       __be16  flags;

Bad alignment for 16 bit field could be unpleasant on some
architectures. Might be better to split this into to u8's, defined
flags are only in first eight bits anyway.

> +       __u8    reserved;
> +
> +       struct in6_addr segments[0];
> +} __attribute__((packed));
> +
> +#define SR6_FLAG_CLEANUP       (1 << 15)
> +#define SR6_FLAG_PROTECTED     (1 << 14)
> +#define SR6_FLAG_OAM           (1 << 13)
> +#define SR6_FLAG_ALERT         (1 << 12)
> +#define SR6_FLAG_HMAC          (1 << 11)
> +
> +#define SR6_TLV_INGRESS                1
> +#define SR6_TLV_EGRESS         2
> +#define SR6_TLV_OPAQUE         3
> +#define SR6_TLV_PADDING                4
> +#define SR6_TLV_HMAC           5
> +
> +#define sr_get_flags(srh) (be16_to_cpu((srh)->flags))
> +
> +#endif
> diff --git a/net/ipv6/Kconfig b/net/ipv6/Kconfig
> index 2343e4f..691c318 100644
> --- a/net/ipv6/Kconfig
> +++ b/net/ipv6/Kconfig
> @@ -289,4 +289,17 @@ config IPV6_PIMSM_V2
>           Support for IPv6 PIM multicast routing protocol PIM-SMv2.
>           If unsure, say N.
>
> +config IPV6_SEG6
> +       bool "IPv6: Segment Routing support"
> +       depends on IPV6
> +       select CRYPTO_HMAC
> +       select CRYPTO_SHA1
> +       select CRYPTO_SHA256
> +       ---help---
> +         Experimental support for IPv6 Segment Routing dataplane as defined

I don't think calling this experimental is relevant.

> +         in IETF draft-ietf-6man-segment-routing-header-02. This option
> +         enables the processing of SR-enabled packets allowing the kernel
> +         to act as a segment endpoint (intermediate or egress). It also
> +         enables an API for the kernel to act as an ingress SR router.
> +
>  endif # IPV6
> diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c
> index d8983e1..42c0ffb 100644
> --- a/net/ipv6/addrconf.c
> +++ b/net/ipv6/addrconf.c
> @@ -239,6 +239,9 @@ static struct ipv6_devconf ipv6_devconf __read_mostly = {
>         .use_oif_addrs_only     = 0,
>         .ignore_routes_with_linkdown = 0,
>         .keep_addr_on_down      = 0,
> +#ifdef CONFIG_IPV6_SEG6
> +       .seg6_enabled           = 0,
> +#endif
>  };
>
>  static struct ipv6_devconf ipv6_devconf_dflt __read_mostly = {
> @@ -285,6 +288,9 @@ static struct ipv6_devconf ipv6_devconf_dflt __read_mostly = {
>         .use_oif_addrs_only     = 0,
>         .ignore_routes_with_linkdown = 0,
>         .keep_addr_on_down      = 0,
> +#ifdef CONFIG_IPV6_SEG6
> +       .seg6_enabled           = 0,
> +#endif
>  };
>
>  /* Check if a valid qdisc is available */
> @@ -4965,6 +4971,9 @@ static inline void ipv6_store_devconf(struct ipv6_devconf *cnf,
>         array[DEVCONF_DROP_UNICAST_IN_L2_MULTICAST] = cnf->drop_unicast_in_l2_multicast;
>         array[DEVCONF_DROP_UNSOLICITED_NA] = cnf->drop_unsolicited_na;
>         array[DEVCONF_KEEP_ADDR_ON_DOWN] = cnf->keep_addr_on_down;
> +#ifdef CONFIG_IPV6_SEG6
> +       array[DEVCONF_SEG6_ENABLED] = cnf->seg6_enabled;
> +#endif
>  }
>
>  static inline size_t inet6_ifla6_size(void)
> @@ -6056,6 +6065,15 @@ static const struct ctl_table addrconf_sysctl[] = {
>                 .proc_handler   = proc_dointvec,
>
>         },
> +#ifdef CONFIG_IPV6_SEG6
> +       {
> +               .procname       = "seg6_enabled",
> +               .data           = &ipv6_devconf.seg6_enabled,
> +               .maxlen         = sizeof(int),
> +               .mode           = 0644,
> +               .proc_handler   = proc_dointvec,
> +       },
> +#endif
>         {
>                 /* sentinel */
>         }
> diff --git a/net/ipv6/exthdrs.c b/net/ipv6/exthdrs.c
> index 139ceb6..b31f811 100644
> --- a/net/ipv6/exthdrs.c
> +++ b/net/ipv6/exthdrs.c
> @@ -47,6 +47,9 @@
>  #if IS_ENABLED(CONFIG_IPV6_MIP6)
>  #include <net/xfrm.h>
>  #endif
> +#ifdef CONFIG_IPV6_SEG6
> +#include <linux/seg6.h>
> +#endif
>
>  #include <linux/uaccess.h>
>
> @@ -286,6 +289,137 @@ static int ipv6_destopt_rcv(struct sk_buff *skb)
>         return -1;
>  }
>
> +#ifdef CONFIG_IPV6_SEG6
> +static int ipv6_srh_rcv(struct sk_buff *skb)
> +{
> +       struct in6_addr *addr = NULL, *last_addr = NULL, *active_addr = NULL;
> +       struct inet6_skb_parm *opt = IP6CB(skb);
> +       struct net *net = dev_net(skb->dev);
> +       struct ipv6_sr_hdr *hdr;
> +       struct inet6_dev *idev;
> +       int cleanup = 0;
> +       int accept_seg6;
> +
> +       hdr = (struct ipv6_sr_hdr *)skb_transport_header(skb);
> +
> +       idev = __in6_dev_get(skb->dev);
> +
> +       accept_seg6 = net->ipv6.devconf_all->seg6_enabled;
> +       if (accept_seg6 > idev->cnf.seg6_enabled)
> +               accept_seg6 = idev->cnf.seg6_enabled;
> +
> +       if (!accept_seg6) {
> +               kfree_skb(skb);
> +               return -1;
> +       }
> +
> +looped_back:
> +       last_addr = hdr->segments;
> +
> +       if (hdr->segments_left > 0) {
> +               if (hdr->nexthdr != NEXTHDR_IPV6 && hdr->segments_left == 1 &&
> +                   sr_get_flags(hdr) & SR6_FLAG_CLEANUP)
> +                       cleanup = 1;
> +       } else {
> +               if (hdr->nexthdr == NEXTHDR_IPV6) {
> +                       int offset = (hdr->hdrlen + 1) << 3;
> +
> +                       if (!pskb_pull(skb, offset)) {
> +                               kfree_skb(skb);
> +                               return -1;
> +                       }
> +                       skb_postpull_rcsum(skb, skb_transport_header(skb),
> +                                          offset);
> +
> +                       skb_reset_network_header(skb);
> +                       skb_reset_transport_header(skb);
> +                       skb->encapsulation = 0;
> +
> +                       __skb_tunnel_rx(skb, skb->dev, net);
> +
> +                       netif_rx(skb);
> +                       return -1;
> +               }
> +
> +               opt->srcrt = skb_network_header_len(skb);
> +               opt->lastopt = opt->srcrt;
> +               skb->transport_header += (hdr->hdrlen + 1) << 3;
> +               opt->nhoff = (&hdr->nexthdr) - skb_network_header(skb);
> +
> +               return 1;
> +       }
> +
> +       if (skb_cloned(skb)) {
> +               if (pskb_expand_head(skb, 0, 0, GFP_ATOMIC)) {
> +                       __IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
> +                                       IPSTATS_MIB_OUTDISCARDS);
> +                       kfree_skb(skb);
> +                       return -1;
> +               }
> +       }
> +
> +       if (skb->ip_summed == CHECKSUM_COMPLETE)
> +               skb->ip_summed = CHECKSUM_NONE;
> +
Because the packet is being changed? Would it make sense to update the
checksum complete value based on the changes being made. Consider the
case that the next hop is local to the host (someone may try to
implement network virtualization this way).

> +       hdr = (struct ipv6_sr_hdr *)skb_transport_header(skb);
> +
> +       active_addr = hdr->segments + hdr->segments_left;
> +       hdr->segments_left--;
> +       addr = hdr->segments + hdr->segments_left;
> +
> +       ipv6_hdr(skb)->daddr = *addr;
> +
> +       skb_push(skb, sizeof(struct ipv6hdr));
> +
> +       if (cleanup) {
> +               int srhlen = (hdr->hdrlen + 1) << 3;
> +               int nh = hdr->nexthdr;
> +
> +               memmove(skb_network_header(skb) + srhlen,
> +                       skb_network_header(skb),
> +                       (unsigned char *)hdr - skb_network_header(skb));
> +               skb_pull(skb, srhlen);
> +               skb->network_header += srhlen;
> +               ipv6_hdr(skb)->nexthdr = nh;
> +               ipv6_hdr(skb)->payload_len = htons(skb->len -
> +                                                  sizeof(struct ipv6hdr));
> +       }
> +
> +       skb_dst_drop(skb);
> +
> +       ip6_route_input(skb);
> +
> +       if (skb_dst(skb)->error) {
> +               dst_input(skb);
> +               return -1;
> +       }
> +
> +       if (skb_dst(skb)->dev->flags & IFF_LOOPBACK) {
> +               if (ipv6_hdr(skb)->hop_limit <= 1) {
> +                       __IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
> +                                       IPSTATS_MIB_INHDRERRORS);
> +                       icmpv6_send(skb, ICMPV6_TIME_EXCEED,
> +                                   ICMPV6_EXC_HOPLIMIT, 0);
> +                       kfree_skb(skb);
> +                       return -1;
> +               }
> +               ipv6_hdr(skb)->hop_limit--;
> +
> +               /* be sure that srh is still present before reinjecting */
> +               if (!cleanup) {
> +                       skb_pull(skb, sizeof(struct ipv6hdr));
> +                       goto looped_back;
> +               }
> +               skb_set_transport_header(skb, sizeof(struct ipv6hdr));
> +               IP6CB(skb)->nhoff = offsetof(struct ipv6hdr, nexthdr);
> +       }
> +
> +       dst_input(skb);
> +
> +       return -1;
> +}
> +#endif
> +
>  /********************************
>    Routing header.
>   ********************************/
> @@ -326,6 +460,12 @@ static int ipv6_rthdr_rcv(struct sk_buff *skb)
>                 return -1;
>         }
>
> +#ifdef CONFIG_IPV6_SEG6
> +       /* segment routing */
> +       if (hdr->type == IPV6_SRCRT_TYPE_4)
> +               return ipv6_srh_rcv(skb);
> +#endif

This doesn't belong in one of the switch statements in ipv6_rthdr_rcv?

> +
>  looped_back:
>         if (hdr->segments_left == 0) {
>                 switch (hdr->type) {
> --
> 2.7.3
>
Tom Herbert Oct. 17, 2016, 5:31 p.m. UTC | #4
On Mon, Oct 17, 2016 at 7:42 AM, David Lebrun <david.lebrun@uclouvain.be> wrote:
> Implement minimal support for processing of SR-enabled packets
> as described in
> https://tools.ietf.org/html/draft-ietf-6man-segment-routing-header-02.
>
> This patch implements the following operations:
> - Intermediate segment endpoint: incrementation of active segment and rerouting.
> - Egress for SR-encapsulated packets: decapsulation of outer IPv6 header + SRH
>   and routing of inner packet.
> - Cleanup flag support for SR-inlined packets: removal of SRH if we are the
>   penultimate segment endpoint.
>
> A per-interface sysctl seg6_enabled is provided, to accept/deny SR-enabled
> packets. Default is deny.
>
> This patch does not provide support for HMAC-signed packets.
>
> Signed-off-by: David Lebrun <david.lebrun@uclouvain.be>
> ---
>  include/linux/ipv6.h      |   3 +
>  include/linux/seg6.h      |   6 ++
>  include/uapi/linux/ipv6.h |   2 +
>  include/uapi/linux/seg6.h |  46 +++++++++++++++
>  net/ipv6/Kconfig          |  13 +++++
>  net/ipv6/addrconf.c       |  18 ++++++
>  net/ipv6/exthdrs.c        | 140 ++++++++++++++++++++++++++++++++++++++++++++++
>  7 files changed, 228 insertions(+)
>  create mode 100644 include/linux/seg6.h
>  create mode 100644 include/uapi/linux/seg6.h
>
> diff --git a/include/linux/ipv6.h b/include/linux/ipv6.h
> index 7e9a789..75395ad 100644
> --- a/include/linux/ipv6.h
> +++ b/include/linux/ipv6.h
> @@ -64,6 +64,9 @@ struct ipv6_devconf {
>         } stable_secret;
>         __s32           use_oif_addrs_only;
>         __s32           keep_addr_on_down;
> +#ifdef CONFIG_IPV6_SEG6
> +       __s32           seg6_enabled;
> +#endif
>
>         struct ctl_table_header *sysctl_header;
>  };
> diff --git a/include/linux/seg6.h b/include/linux/seg6.h
> new file mode 100644
> index 0000000..7a66d2b
> --- /dev/null
> +++ b/include/linux/seg6.h
> @@ -0,0 +1,6 @@
> +#ifndef _LINUX_SEG6_H
> +#define _LINUX_SEG6_H
> +
> +#include <uapi/linux/seg6.h>
> +
> +#endif
> diff --git a/include/uapi/linux/ipv6.h b/include/uapi/linux/ipv6.h
> index 8c27723..7ff1d65 100644
> --- a/include/uapi/linux/ipv6.h
> +++ b/include/uapi/linux/ipv6.h
> @@ -39,6 +39,7 @@ struct in6_ifreq {
>  #define IPV6_SRCRT_STRICT      0x01    /* Deprecated; will be removed */
>  #define IPV6_SRCRT_TYPE_0      0       /* Deprecated; will be removed */
>  #define IPV6_SRCRT_TYPE_2      2       /* IPv6 type 2 Routing Header   */
> +#define IPV6_SRCRT_TYPE_4      4       /* Segment Routing with IPv6 */
>
>  /*
>   *     routing header
> @@ -178,6 +179,7 @@ enum {
>         DEVCONF_DROP_UNSOLICITED_NA,
>         DEVCONF_KEEP_ADDR_ON_DOWN,
>         DEVCONF_RTR_SOLICIT_MAX_INTERVAL,
> +       DEVCONF_SEG6_ENABLED,
>         DEVCONF_MAX
>  };
>
> diff --git a/include/uapi/linux/seg6.h b/include/uapi/linux/seg6.h
> new file mode 100644
> index 0000000..9f9e157
> --- /dev/null
> +++ b/include/uapi/linux/seg6.h
> @@ -0,0 +1,46 @@
> +/*
> + *  SR-IPv6 implementation
> + *
> + *  Author:
> + *  David Lebrun <david.lebrun@uclouvain.be>
> + *
> + *
> + *  This program is free software; you can redistribute it and/or
> + *      modify it under the terms of the GNU General Public License
> + *      as published by the Free Software Foundation; either version
> + *      2 of the License, or (at your option) any later version.
> + */
> +
> +#ifndef _UAPI_LINUX_SEG6_H
> +#define _UAPI_LINUX_SEG6_H
> +
> +/*
> + * SRH
> + */
> +struct ipv6_sr_hdr {
> +       __u8    nexthdr;
> +       __u8    hdrlen;
> +       __u8    type;
> +       __u8    segments_left;
> +       __u8    first_segment;
> +       __be16  flags;
> +       __u8    reserved;
> +
> +       struct in6_addr segments[0];
> +} __attribute__((packed));
> +
> +#define SR6_FLAG_CLEANUP       (1 << 15)
> +#define SR6_FLAG_PROTECTED     (1 << 14)
> +#define SR6_FLAG_OAM           (1 << 13)
> +#define SR6_FLAG_ALERT         (1 << 12)
> +#define SR6_FLAG_HMAC          (1 << 11)
> +
> +#define SR6_TLV_INGRESS                1
> +#define SR6_TLV_EGRESS         2
> +#define SR6_TLV_OPAQUE         3
> +#define SR6_TLV_PADDING                4
> +#define SR6_TLV_HMAC           5
> +
> +#define sr_get_flags(srh) (be16_to_cpu((srh)->flags))
> +
> +#endif
> diff --git a/net/ipv6/Kconfig b/net/ipv6/Kconfig
> index 2343e4f..691c318 100644
> --- a/net/ipv6/Kconfig
> +++ b/net/ipv6/Kconfig
> @@ -289,4 +289,17 @@ config IPV6_PIMSM_V2
>           Support for IPv6 PIM multicast routing protocol PIM-SMv2.
>           If unsure, say N.
>
> +config IPV6_SEG6
> +       bool "IPv6: Segment Routing support"
> +       depends on IPV6
> +       select CRYPTO_HMAC
> +       select CRYPTO_SHA1
> +       select CRYPTO_SHA256
> +       ---help---
> +         Experimental support for IPv6 Segment Routing dataplane as defined
> +         in IETF draft-ietf-6man-segment-routing-header-02. This option
> +         enables the processing of SR-enabled packets allowing the kernel
> +         to act as a segment endpoint (intermediate or egress). It also
> +         enables an API for the kernel to act as an ingress SR router.
> +

I suggest that you eliminate IPV6_SEG6 as config, always include SR
with IPv6. But then maybe SR security should be a CONFIG variable
since this would pull in a lot of crypto. I imagine in a closed
environment (e.g. within a datacenter) security might not normally be
enabled.

>  endif # IPV6
> diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c
> index d8983e1..42c0ffb 100644
> --- a/net/ipv6/addrconf.c
> +++ b/net/ipv6/addrconf.c
> @@ -239,6 +239,9 @@ static struct ipv6_devconf ipv6_devconf __read_mostly = {
>         .use_oif_addrs_only     = 0,
>         .ignore_routes_with_linkdown = 0,
>         .keep_addr_on_down      = 0,
> +#ifdef CONFIG_IPV6_SEG6
> +       .seg6_enabled           = 0,
> +#endif
>  };
>
>  static struct ipv6_devconf ipv6_devconf_dflt __read_mostly = {
> @@ -285,6 +288,9 @@ static struct ipv6_devconf ipv6_devconf_dflt __read_mostly = {
>         .use_oif_addrs_only     = 0,
>         .ignore_routes_with_linkdown = 0,
>         .keep_addr_on_down      = 0,
> +#ifdef CONFIG_IPV6_SEG6
> +       .seg6_enabled           = 0,
> +#endif
>  };
>
>  /* Check if a valid qdisc is available */
> @@ -4965,6 +4971,9 @@ static inline void ipv6_store_devconf(struct ipv6_devconf *cnf,
>         array[DEVCONF_DROP_UNICAST_IN_L2_MULTICAST] = cnf->drop_unicast_in_l2_multicast;
>         array[DEVCONF_DROP_UNSOLICITED_NA] = cnf->drop_unsolicited_na;
>         array[DEVCONF_KEEP_ADDR_ON_DOWN] = cnf->keep_addr_on_down;
> +#ifdef CONFIG_IPV6_SEG6
> +       array[DEVCONF_SEG6_ENABLED] = cnf->seg6_enabled;
> +#endif
>  }
>
>  static inline size_t inet6_ifla6_size(void)
> @@ -6056,6 +6065,15 @@ static const struct ctl_table addrconf_sysctl[] = {
>                 .proc_handler   = proc_dointvec,
>
>         },
> +#ifdef CONFIG_IPV6_SEG6
> +       {
> +               .procname       = "seg6_enabled",
> +               .data           = &ipv6_devconf.seg6_enabled,
> +               .maxlen         = sizeof(int),
> +               .mode           = 0644,
> +               .proc_handler   = proc_dointvec,
> +       },
> +#endif
>         {
>                 /* sentinel */
>         }
> diff --git a/net/ipv6/exthdrs.c b/net/ipv6/exthdrs.c
> index 139ceb6..b31f811 100644
> --- a/net/ipv6/exthdrs.c
> +++ b/net/ipv6/exthdrs.c
> @@ -47,6 +47,9 @@
>  #if IS_ENABLED(CONFIG_IPV6_MIP6)
>  #include <net/xfrm.h>
>  #endif
> +#ifdef CONFIG_IPV6_SEG6
> +#include <linux/seg6.h>
> +#endif
>
>  #include <linux/uaccess.h>
>
> @@ -286,6 +289,137 @@ static int ipv6_destopt_rcv(struct sk_buff *skb)
>         return -1;
>  }
>
> +#ifdef CONFIG_IPV6_SEG6
> +static int ipv6_srh_rcv(struct sk_buff *skb)
> +{
> +       struct in6_addr *addr = NULL, *last_addr = NULL, *active_addr = NULL;
> +       struct inet6_skb_parm *opt = IP6CB(skb);
> +       struct net *net = dev_net(skb->dev);
> +       struct ipv6_sr_hdr *hdr;
> +       struct inet6_dev *idev;
> +       int cleanup = 0;
> +       int accept_seg6;
> +
> +       hdr = (struct ipv6_sr_hdr *)skb_transport_header(skb);
> +
> +       idev = __in6_dev_get(skb->dev);
> +
> +       accept_seg6 = net->ipv6.devconf_all->seg6_enabled;
> +       if (accept_seg6 > idev->cnf.seg6_enabled)
> +               accept_seg6 = idev->cnf.seg6_enabled;
> +
> +       if (!accept_seg6) {
> +               kfree_skb(skb);
> +               return -1;
> +       }
> +
> +looped_back:
> +       last_addr = hdr->segments;
> +
> +       if (hdr->segments_left > 0) {
> +               if (hdr->nexthdr != NEXTHDR_IPV6 && hdr->segments_left == 1 &&
> +                   sr_get_flags(hdr) & SR6_FLAG_CLEANUP)
> +                       cleanup = 1;
> +       } else {
> +               if (hdr->nexthdr == NEXTHDR_IPV6) {
> +                       int offset = (hdr->hdrlen + 1) << 3;
> +
> +                       if (!pskb_pull(skb, offset)) {
> +                               kfree_skb(skb);
> +                               return -1;
> +                       }
> +                       skb_postpull_rcsum(skb, skb_transport_header(skb),
> +                                          offset);
> +
> +                       skb_reset_network_header(skb);
> +                       skb_reset_transport_header(skb);
> +                       skb->encapsulation = 0;
> +
> +                       __skb_tunnel_rx(skb, skb->dev, net);
> +
> +                       netif_rx(skb);
> +                       return -1;
> +               }
> +
> +               opt->srcrt = skb_network_header_len(skb);
> +               opt->lastopt = opt->srcrt;
> +               skb->transport_header += (hdr->hdrlen + 1) << 3;
> +               opt->nhoff = (&hdr->nexthdr) - skb_network_header(skb);
> +
> +               return 1;
> +       }
> +
> +       if (skb_cloned(skb)) {
> +               if (pskb_expand_head(skb, 0, 0, GFP_ATOMIC)) {
> +                       __IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
> +                                       IPSTATS_MIB_OUTDISCARDS);
> +                       kfree_skb(skb);
> +                       return -1;
> +               }
> +       }
> +
> +       if (skb->ip_summed == CHECKSUM_COMPLETE)
> +               skb->ip_summed = CHECKSUM_NONE;
> +
> +       hdr = (struct ipv6_sr_hdr *)skb_transport_header(skb);
> +
> +       active_addr = hdr->segments + hdr->segments_left;
> +       hdr->segments_left--;
> +       addr = hdr->segments + hdr->segments_left;
> +
> +       ipv6_hdr(skb)->daddr = *addr;
> +
> +       skb_push(skb, sizeof(struct ipv6hdr));
> +
> +       if (cleanup) {
> +               int srhlen = (hdr->hdrlen + 1) << 3;
> +               int nh = hdr->nexthdr;
> +
> +               memmove(skb_network_header(skb) + srhlen,
> +                       skb_network_header(skb),
> +                       (unsigned char *)hdr - skb_network_header(skb));
> +               skb_pull(skb, srhlen);
> +               skb->network_header += srhlen;
> +               ipv6_hdr(skb)->nexthdr = nh;
> +               ipv6_hdr(skb)->payload_len = htons(skb->len -
> +                                                  sizeof(struct ipv6hdr));
> +       }
> +
> +       skb_dst_drop(skb);
> +
> +       ip6_route_input(skb);
> +
> +       if (skb_dst(skb)->error) {
> +               dst_input(skb);
> +               return -1;
> +       }
> +
> +       if (skb_dst(skb)->dev->flags & IFF_LOOPBACK) {
> +               if (ipv6_hdr(skb)->hop_limit <= 1) {
> +                       __IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
> +                                       IPSTATS_MIB_INHDRERRORS);
> +                       icmpv6_send(skb, ICMPV6_TIME_EXCEED,
> +                                   ICMPV6_EXC_HOPLIMIT, 0);
> +                       kfree_skb(skb);
> +                       return -1;
> +               }
> +               ipv6_hdr(skb)->hop_limit--;
> +
> +               /* be sure that srh is still present before reinjecting */
> +               if (!cleanup) {
> +                       skb_pull(skb, sizeof(struct ipv6hdr));
> +                       goto looped_back;
> +               }
> +               skb_set_transport_header(skb, sizeof(struct ipv6hdr));
> +               IP6CB(skb)->nhoff = offsetof(struct ipv6hdr, nexthdr);
> +       }
> +
> +       dst_input(skb);
> +
> +       return -1;
> +}
> +#endif
> +
>  /********************************
>    Routing header.
>   ********************************/
> @@ -326,6 +460,12 @@ static int ipv6_rthdr_rcv(struct sk_buff *skb)
>                 return -1;
>         }
>
> +#ifdef CONFIG_IPV6_SEG6
> +       /* segment routing */
> +       if (hdr->type == IPV6_SRCRT_TYPE_4)
> +               return ipv6_srh_rcv(skb);
> +#endif
> +
>  looped_back:
>         if (hdr->segments_left == 0) {
>                 switch (hdr->type) {
> --
> 2.7.3
>
David Lebrun Oct. 18, 2016, 11:43 a.m. UTC | #5
On 10/17/2016 07:01 PM, Tom Herbert wrote:
>> +struct ipv6_sr_hdr {
>> +       __u8    nexthdr;
>> +       __u8    hdrlen;
>> +       __u8    type;
>> +       __u8    segments_left;
>> +       __u8    first_segment;
>> +       __be16  flags;
> 
> Bad alignment for 16 bit field could be unpleasant on some
> architectures. Might be better to split this into to u8's, defined
> flags are only in first eight bits anyway.
> 

Will do

>> +config IPV6_SEG6
>> +       bool "IPv6: Segment Routing support"
>> +       depends on IPV6
>> +       select CRYPTO_HMAC
>> +       select CRYPTO_SHA1
>> +       select CRYPTO_SHA256
>> +       ---help---
>> +         Experimental support for IPv6 Segment Routing dataplane as defined
> 
> I don't think calling this experimental is relevant.

OK

>> +       if (skb->ip_summed == CHECKSUM_COMPLETE)
>> +               skb->ip_summed = CHECKSUM_NONE;
>> +
> Because the packet is being changed? Would it make sense to update the
> checksum complete value based on the changes being made. Consider the
> case that the next hop is local to the host (someone may try to
> implement network virtualization this way).
> 

Seems to make sense, I will try your suggestion

>>
>> +#ifdef CONFIG_IPV6_SEG6
>> +       /* segment routing */
>> +       if (hdr->type == IPV6_SRCRT_TYPE_4)
>> +               return ipv6_srh_rcv(skb);
>> +#endif
> 
> This doesn't belong in one of the switch statements in ipv6_rthdr_rcv?
> 

From what I see, ipv6_rthdr_rcv was initially implemented to support
RH0, and then specific code was added at multiple points to handle MIP6.
The first switch already handles a specific case (i.e. segments_left ==
0), so the call to ipv6_srh_rcv() must happen before that. I choose not
to inline ipv6_srh_rcv into ipv6_rthdr_rcv as it would make the code
quite messy.
David Lebrun Oct. 20, 2016, 1:04 p.m. UTC | #6
On 10/17/2016 07:01 PM, Tom Herbert wrote:
>> > +
>> > +       if (skb->ip_summed == CHECKSUM_COMPLETE)
>> > +               skb->ip_summed = CHECKSUM_NONE;
>> > +
> Because the packet is being changed? Would it make sense to update the
> checksum complete value based on the changes being made. Consider the
> case that the next hop is local to the host (someone may try to
> implement network virtualization this way).
> 

Rethinking about that: even if the next hop is local, I am not sure to
see the benefits of updating the checksum instead of setting
CHECKSUM_NONE. For example, if the next and final hop is local and the
packet carries a TCP payload, tcp_checksum_complete() would force the
recomputation of the checksum anyway (unless ip_summed ==
CHECKSUM_UNNECESSARY).

So I fail to see a path where updating the checksum would be beneficial.

Am I missing something ?

David
Tom Herbert Oct. 20, 2016, 3:47 p.m. UTC | #7
On Thu, Oct 20, 2016 at 6:04 AM, David Lebrun <david.lebrun@uclouvain.be> wrote:
> On 10/17/2016 07:01 PM, Tom Herbert wrote:
>>> > +
>>> > +       if (skb->ip_summed == CHECKSUM_COMPLETE)
>>> > +               skb->ip_summed = CHECKSUM_NONE;
>>> > +
>> Because the packet is being changed? Would it make sense to update the
>> checksum complete value based on the changes being made. Consider the
>> case that the next hop is local to the host (someone may try to
>> implement network virtualization this way).
>>
>
> Rethinking about that: even if the next hop is local, I am not sure to
> see the benefits of updating the checksum instead of setting
> CHECKSUM_NONE. For example, if the next and final hop is local and the
> packet carries a TCP payload, tcp_checksum_complete() would force the
> recomputation of the checksum anyway (unless ip_summed ==
> CHECKSUM_UNNECESSARY).
>
Or unless skb->csum_valid is set (tcp_checksum_complete calls
skb_csum_unnecessary where the check is done). If the checksum
complete value is correct then skb->csum_valid would be set from
skb_checksum_init which is called early in tcp_v4_rcv and tcp_v6_rcv.
This way if the penultimate and final hops are local and
CHECKSUM_COMPLETE is set computing the packet checksum is avoided for
a TCP packet.

Tom

> So I fail to see a path where updating the checksum would be beneficial.
>
> Am I missing something ?
>
> David
>
diff mbox

Patch

diff --git a/include/linux/ipv6.h b/include/linux/ipv6.h
index 7e9a789..75395ad 100644
--- a/include/linux/ipv6.h
+++ b/include/linux/ipv6.h
@@ -64,6 +64,9 @@  struct ipv6_devconf {
 	} stable_secret;
 	__s32		use_oif_addrs_only;
 	__s32		keep_addr_on_down;
+#ifdef CONFIG_IPV6_SEG6
+	__s32		seg6_enabled;
+#endif
 
 	struct ctl_table_header *sysctl_header;
 };
diff --git a/include/linux/seg6.h b/include/linux/seg6.h
new file mode 100644
index 0000000..7a66d2b
--- /dev/null
+++ b/include/linux/seg6.h
@@ -0,0 +1,6 @@ 
+#ifndef _LINUX_SEG6_H
+#define _LINUX_SEG6_H
+
+#include <uapi/linux/seg6.h>
+
+#endif
diff --git a/include/uapi/linux/ipv6.h b/include/uapi/linux/ipv6.h
index 8c27723..7ff1d65 100644
--- a/include/uapi/linux/ipv6.h
+++ b/include/uapi/linux/ipv6.h
@@ -39,6 +39,7 @@  struct in6_ifreq {
 #define IPV6_SRCRT_STRICT	0x01	/* Deprecated; will be removed */
 #define IPV6_SRCRT_TYPE_0	0	/* Deprecated; will be removed */
 #define IPV6_SRCRT_TYPE_2	2	/* IPv6 type 2 Routing Header	*/
+#define IPV6_SRCRT_TYPE_4	4	/* Segment Routing with IPv6 */
 
 /*
  *	routing header
@@ -178,6 +179,7 @@  enum {
 	DEVCONF_DROP_UNSOLICITED_NA,
 	DEVCONF_KEEP_ADDR_ON_DOWN,
 	DEVCONF_RTR_SOLICIT_MAX_INTERVAL,
+	DEVCONF_SEG6_ENABLED,
 	DEVCONF_MAX
 };
 
diff --git a/include/uapi/linux/seg6.h b/include/uapi/linux/seg6.h
new file mode 100644
index 0000000..9f9e157
--- /dev/null
+++ b/include/uapi/linux/seg6.h
@@ -0,0 +1,46 @@ 
+/*
+ *  SR-IPv6 implementation
+ *
+ *  Author:
+ *  David Lebrun <david.lebrun@uclouvain.be>
+ *
+ *
+ *  This program is free software; you can redistribute it and/or
+ *      modify it under the terms of the GNU General Public License
+ *      as published by the Free Software Foundation; either version
+ *      2 of the License, or (at your option) any later version.
+ */
+
+#ifndef _UAPI_LINUX_SEG6_H
+#define _UAPI_LINUX_SEG6_H
+
+/*
+ * SRH
+ */
+struct ipv6_sr_hdr {
+	__u8	nexthdr;
+	__u8	hdrlen;
+	__u8	type;
+	__u8	segments_left;
+	__u8	first_segment;
+	__be16	flags;
+	__u8	reserved;
+
+	struct in6_addr segments[0];
+} __attribute__((packed));
+
+#define SR6_FLAG_CLEANUP	(1 << 15)
+#define SR6_FLAG_PROTECTED	(1 << 14)
+#define SR6_FLAG_OAM		(1 << 13)
+#define SR6_FLAG_ALERT		(1 << 12)
+#define SR6_FLAG_HMAC		(1 << 11)
+
+#define SR6_TLV_INGRESS		1
+#define SR6_TLV_EGRESS		2
+#define SR6_TLV_OPAQUE		3
+#define SR6_TLV_PADDING		4
+#define SR6_TLV_HMAC		5
+
+#define sr_get_flags(srh) (be16_to_cpu((srh)->flags))
+
+#endif
diff --git a/net/ipv6/Kconfig b/net/ipv6/Kconfig
index 2343e4f..691c318 100644
--- a/net/ipv6/Kconfig
+++ b/net/ipv6/Kconfig
@@ -289,4 +289,17 @@  config IPV6_PIMSM_V2
 	  Support for IPv6 PIM multicast routing protocol PIM-SMv2.
 	  If unsure, say N.
 
+config IPV6_SEG6
+	bool "IPv6: Segment Routing support"
+	depends on IPV6
+	select CRYPTO_HMAC
+	select CRYPTO_SHA1
+	select CRYPTO_SHA256
+	---help---
+	  Experimental support for IPv6 Segment Routing dataplane as defined
+	  in IETF draft-ietf-6man-segment-routing-header-02. This option
+	  enables the processing of SR-enabled packets allowing the kernel
+	  to act as a segment endpoint (intermediate or egress). It also
+	  enables an API for the kernel to act as an ingress SR router.
+
 endif # IPV6
diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c
index d8983e1..42c0ffb 100644
--- a/net/ipv6/addrconf.c
+++ b/net/ipv6/addrconf.c
@@ -239,6 +239,9 @@  static struct ipv6_devconf ipv6_devconf __read_mostly = {
 	.use_oif_addrs_only	= 0,
 	.ignore_routes_with_linkdown = 0,
 	.keep_addr_on_down	= 0,
+#ifdef CONFIG_IPV6_SEG6
+	.seg6_enabled		= 0,
+#endif
 };
 
 static struct ipv6_devconf ipv6_devconf_dflt __read_mostly = {
@@ -285,6 +288,9 @@  static struct ipv6_devconf ipv6_devconf_dflt __read_mostly = {
 	.use_oif_addrs_only	= 0,
 	.ignore_routes_with_linkdown = 0,
 	.keep_addr_on_down	= 0,
+#ifdef CONFIG_IPV6_SEG6
+	.seg6_enabled		= 0,
+#endif
 };
 
 /* Check if a valid qdisc is available */
@@ -4965,6 +4971,9 @@  static inline void ipv6_store_devconf(struct ipv6_devconf *cnf,
 	array[DEVCONF_DROP_UNICAST_IN_L2_MULTICAST] = cnf->drop_unicast_in_l2_multicast;
 	array[DEVCONF_DROP_UNSOLICITED_NA] = cnf->drop_unsolicited_na;
 	array[DEVCONF_KEEP_ADDR_ON_DOWN] = cnf->keep_addr_on_down;
+#ifdef CONFIG_IPV6_SEG6
+	array[DEVCONF_SEG6_ENABLED] = cnf->seg6_enabled;
+#endif
 }
 
 static inline size_t inet6_ifla6_size(void)
@@ -6056,6 +6065,15 @@  static const struct ctl_table addrconf_sysctl[] = {
 		.proc_handler	= proc_dointvec,
 
 	},
+#ifdef CONFIG_IPV6_SEG6
+	{
+		.procname	= "seg6_enabled",
+		.data		= &ipv6_devconf.seg6_enabled,
+		.maxlen		= sizeof(int),
+		.mode		= 0644,
+		.proc_handler	= proc_dointvec,
+	},
+#endif
 	{
 		/* sentinel */
 	}
diff --git a/net/ipv6/exthdrs.c b/net/ipv6/exthdrs.c
index 139ceb6..b31f811 100644
--- a/net/ipv6/exthdrs.c
+++ b/net/ipv6/exthdrs.c
@@ -47,6 +47,9 @@ 
 #if IS_ENABLED(CONFIG_IPV6_MIP6)
 #include <net/xfrm.h>
 #endif
+#ifdef CONFIG_IPV6_SEG6
+#include <linux/seg6.h>
+#endif
 
 #include <linux/uaccess.h>
 
@@ -286,6 +289,137 @@  static int ipv6_destopt_rcv(struct sk_buff *skb)
 	return -1;
 }
 
+#ifdef CONFIG_IPV6_SEG6
+static int ipv6_srh_rcv(struct sk_buff *skb)
+{
+	struct in6_addr *addr = NULL, *last_addr = NULL, *active_addr = NULL;
+	struct inet6_skb_parm *opt = IP6CB(skb);
+	struct net *net = dev_net(skb->dev);
+	struct ipv6_sr_hdr *hdr;
+	struct inet6_dev *idev;
+	int cleanup = 0;
+	int accept_seg6;
+
+	hdr = (struct ipv6_sr_hdr *)skb_transport_header(skb);
+
+	idev = __in6_dev_get(skb->dev);
+
+	accept_seg6 = net->ipv6.devconf_all->seg6_enabled;
+	if (accept_seg6 > idev->cnf.seg6_enabled)
+		accept_seg6 = idev->cnf.seg6_enabled;
+
+	if (!accept_seg6) {
+		kfree_skb(skb);
+		return -1;
+	}
+
+looped_back:
+	last_addr = hdr->segments;
+
+	if (hdr->segments_left > 0) {
+		if (hdr->nexthdr != NEXTHDR_IPV6 && hdr->segments_left == 1 &&
+		    sr_get_flags(hdr) & SR6_FLAG_CLEANUP)
+			cleanup = 1;
+	} else {
+		if (hdr->nexthdr == NEXTHDR_IPV6) {
+			int offset = (hdr->hdrlen + 1) << 3;
+
+			if (!pskb_pull(skb, offset)) {
+				kfree_skb(skb);
+				return -1;
+			}
+			skb_postpull_rcsum(skb, skb_transport_header(skb),
+					   offset);
+
+			skb_reset_network_header(skb);
+			skb_reset_transport_header(skb);
+			skb->encapsulation = 0;
+
+			__skb_tunnel_rx(skb, skb->dev, net);
+
+			netif_rx(skb);
+			return -1;
+		}
+
+		opt->srcrt = skb_network_header_len(skb);
+		opt->lastopt = opt->srcrt;
+		skb->transport_header += (hdr->hdrlen + 1) << 3;
+		opt->nhoff = (&hdr->nexthdr) - skb_network_header(skb);
+
+		return 1;
+	}
+
+	if (skb_cloned(skb)) {
+		if (pskb_expand_head(skb, 0, 0, GFP_ATOMIC)) {
+			__IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
+					IPSTATS_MIB_OUTDISCARDS);
+			kfree_skb(skb);
+			return -1;
+		}
+	}
+
+	if (skb->ip_summed == CHECKSUM_COMPLETE)
+		skb->ip_summed = CHECKSUM_NONE;
+
+	hdr = (struct ipv6_sr_hdr *)skb_transport_header(skb);
+
+	active_addr = hdr->segments + hdr->segments_left;
+	hdr->segments_left--;
+	addr = hdr->segments + hdr->segments_left;
+
+	ipv6_hdr(skb)->daddr = *addr;
+
+	skb_push(skb, sizeof(struct ipv6hdr));
+
+	if (cleanup) {
+		int srhlen = (hdr->hdrlen + 1) << 3;
+		int nh = hdr->nexthdr;
+
+		memmove(skb_network_header(skb) + srhlen,
+			skb_network_header(skb),
+			(unsigned char *)hdr - skb_network_header(skb));
+		skb_pull(skb, srhlen);
+		skb->network_header += srhlen;
+		ipv6_hdr(skb)->nexthdr = nh;
+		ipv6_hdr(skb)->payload_len = htons(skb->len -
+						   sizeof(struct ipv6hdr));
+	}
+
+	skb_dst_drop(skb);
+
+	ip6_route_input(skb);
+
+	if (skb_dst(skb)->error) {
+		dst_input(skb);
+		return -1;
+	}
+
+	if (skb_dst(skb)->dev->flags & IFF_LOOPBACK) {
+		if (ipv6_hdr(skb)->hop_limit <= 1) {
+			__IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
+					IPSTATS_MIB_INHDRERRORS);
+			icmpv6_send(skb, ICMPV6_TIME_EXCEED,
+				    ICMPV6_EXC_HOPLIMIT, 0);
+			kfree_skb(skb);
+			return -1;
+		}
+		ipv6_hdr(skb)->hop_limit--;
+
+		/* be sure that srh is still present before reinjecting */
+		if (!cleanup) {
+			skb_pull(skb, sizeof(struct ipv6hdr));
+			goto looped_back;
+		}
+		skb_set_transport_header(skb, sizeof(struct ipv6hdr));
+		IP6CB(skb)->nhoff = offsetof(struct ipv6hdr, nexthdr);
+	}
+
+	dst_input(skb);
+
+	return -1;
+}
+#endif
+
 /********************************
   Routing header.
  ********************************/
@@ -326,6 +460,12 @@  static int ipv6_rthdr_rcv(struct sk_buff *skb)
 		return -1;
 	}
 
+#ifdef CONFIG_IPV6_SEG6
+	/* segment routing */
+	if (hdr->type == IPV6_SRCRT_TYPE_4)
+		return ipv6_srh_rcv(skb);
+#endif
+
 looped_back:
 	if (hdr->segments_left == 0) {
 		switch (hdr->type) {