diff mbox series

[bpf-next] samples/bpf: add xdp_redirect_map with xdp_prog support

Message ID 20201110124639.1941654-1-liuhangbin@gmail.com
State Superseded
Headers show
Series [bpf-next] samples/bpf: add xdp_redirect_map with xdp_prog support | expand

Commit Message

Hangbin Liu Nov. 10, 2020, 12:46 p.m. UTC
This patch add running xdp program on egress interface support for
xdp_redirect_map sample. The new prog will change the IP ttl based
on egress ifindex.

Signed-off-by: Hangbin Liu <liuhangbin@gmail.com>
---
 samples/bpf/xdp_redirect_map_kern.c | 74 ++++++++++++++++++++++++++++-
 samples/bpf/xdp_redirect_map_user.c | 21 ++++----
 2 files changed, 86 insertions(+), 9 deletions(-)

Comments

Jesper Dangaard Brouer Nov. 10, 2020, 2:25 p.m. UTC | #1
On Tue, 10 Nov 2020 20:46:39 +0800
Hangbin Liu <liuhangbin@gmail.com> wrote:

> This patch add running xdp program on egress interface support for
> xdp_redirect_map sample. The new prog will change the IP ttl based
> on egress ifindex.
> 
> Signed-off-by: Hangbin Liu <liuhangbin@gmail.com>
> ---
>  samples/bpf/xdp_redirect_map_kern.c | 74 ++++++++++++++++++++++++++++-
>  samples/bpf/xdp_redirect_map_user.c | 21 ++++----

Hmmm... I don't think is it a good idea to modify xdp_redirect_map this way.

The xdp_redirect_map is used for comparative benchmarking and
mentioned+used in scientific articles.  As far as I can see, this
change will default slowdown xdp_redirect_map performance, right?
Maciej Fijalkowski Nov. 10, 2020, 3:24 p.m. UTC | #2
On Tue, Nov 10, 2020 at 03:25:10PM +0100, Jesper Dangaard Brouer wrote:
> On Tue, 10 Nov 2020 20:46:39 +0800
> Hangbin Liu <liuhangbin@gmail.com> wrote:
> 
> > This patch add running xdp program on egress interface support for
> > xdp_redirect_map sample. The new prog will change the IP ttl based
> > on egress ifindex.
> > 
> > Signed-off-by: Hangbin Liu <liuhangbin@gmail.com>
> > ---
> >  samples/bpf/xdp_redirect_map_kern.c | 74 ++++++++++++++++++++++++++++-
> >  samples/bpf/xdp_redirect_map_user.c | 21 ++++----
> 
> Hmmm... I don't think is it a good idea to modify xdp_redirect_map this way.
> 
> The xdp_redirect_map is used for comparative benchmarking and
> mentioned+used in scientific articles.  As far as I can see, this
> change will default slowdown xdp_redirect_map performance, right?

+1

User should be able to trigger attachment of this xdp egress prog by
himself. I don't like having it as a mandatory thing on this sample.

> 
> -- 
> Best regards,
>   Jesper Dangaard Brouer
>   MSc.CS, Principal Kernel Engineer at Red Hat
>   LinkedIn: http://www.linkedin.com/in/brouer
>
Hangbin Liu Nov. 11, 2020, 1:12 a.m. UTC | #3
On Tue, Nov 10, 2020 at 03:25:10PM +0100, Jesper Dangaard Brouer wrote:
> On Tue, 10 Nov 2020 20:46:39 +0800
> Hangbin Liu <liuhangbin@gmail.com> wrote:
> 
> > This patch add running xdp program on egress interface support for
> > xdp_redirect_map sample. The new prog will change the IP ttl based
> > on egress ifindex.
> > 
> > Signed-off-by: Hangbin Liu <liuhangbin@gmail.com>
> > ---
> >  samples/bpf/xdp_redirect_map_kern.c | 74 ++++++++++++++++++++++++++++-
> >  samples/bpf/xdp_redirect_map_user.c | 21 ++++----
> 
> Hmmm... I don't think is it a good idea to modify xdp_redirect_map this way.
> 
> The xdp_redirect_map is used for comparative benchmarking and

Hi Jesper,

Would you please help explain or give some example about how to do
comparative benchmarking with xdp_redirect_map, just count the TX pkts?

Thanks
Hangbin
> mentioned+used in scientific articles.  As far as I can see, this
> change will default slowdown xdp_redirect_map performance, right?
> 
> -- 
> Best regards,
>   Jesper Dangaard Brouer
>   MSc.CS, Principal Kernel Engineer at Red Hat
>   LinkedIn: http://www.linkedin.com/in/brouer
>
diff mbox series

Patch

diff --git a/samples/bpf/xdp_redirect_map_kern.c b/samples/bpf/xdp_redirect_map_kern.c
index 6489352ab7a4..e5ebff2271f4 100644
--- a/samples/bpf/xdp_redirect_map_kern.c
+++ b/samples/bpf/xdp_redirect_map_kern.c
@@ -22,7 +22,7 @@ 
 struct {
 	__uint(type, BPF_MAP_TYPE_DEVMAP);
 	__uint(key_size, sizeof(int));
-	__uint(value_size, sizeof(int));
+	__uint(value_size, sizeof(struct bpf_devmap_val));
 	__uint(max_entries, 100);
 } tx_port SEC(".maps");
 
@@ -52,6 +52,48 @@  static void swap_src_dst_mac(void *data)
 	p[5] = dst[2];
 }
 
+static __always_inline __u16 csum_fold_helper(__u32 csum)
+{
+	__u32 sum;
+	sum = (csum & 0xffff) + (csum >> 16);
+	sum += (sum >> 16);
+	return ~sum;
+}
+
+static __always_inline __u16 ipv4_csum(__u16 seed, struct iphdr *iphdr_new,
+				       struct iphdr *iphdr_old)
+{
+	__u32 csum, size = sizeof(struct iphdr);
+	csum = bpf_csum_diff((__be32 *)iphdr_old, size,
+			     (__be32 *)iphdr_new, size, seed);
+	return csum_fold_helper(csum);
+}
+
+static void parse_ipv4(void *data, u64 nh_off, void *data_end, u8 ttl)
+{
+	struct iphdr *iph = data + nh_off;
+	struct iphdr iph_old;
+	__u16 csum_old;
+
+	if (iph + 1 > data_end)
+		return;
+
+	iph_old = *iph;
+	csum_old = iph->check;
+	iph->ttl = ttl;
+	iph->check = ipv4_csum(~csum_old, iph, &iph_old);
+}
+
+static void parse_ipv6(void *data, u64 nh_off, void *data_end, u8 hop_limit)
+{
+	struct ipv6hdr *ip6h = data + nh_off;
+
+	if (ip6h + 1 > data_end)
+		return;
+
+	ip6h->hop_limit = hop_limit;
+}
+
 SEC("xdp_redirect_map")
 int xdp_redirect_map_prog(struct xdp_md *ctx)
 {
@@ -82,6 +124,36 @@  int xdp_redirect_map_prog(struct xdp_md *ctx)
 	return bpf_redirect_map(&tx_port, vport, 0);
 }
 
+/* This map prog will set new IP ttl based on egress ifindex */
+SEC("xdp_devmap/map_prog")
+int xdp_devmap_prog(struct xdp_md *ctx)
+{
+	char fmt[] = "devmap redirect: egress dev %u with new ttl %u\n";
+	void *data_end = (void *)(long)ctx->data_end;
+	void *data = (void *)(long)ctx->data;
+	struct ethhdr *eth = data;
+	u16 h_proto;
+	u64 nh_off;
+	u8 ttl;
+
+	nh_off = sizeof(struct ethhdr);
+	if (data + nh_off > data_end)
+		return XDP_DROP;
+
+	/* set new ttl based on egress ifindex */
+	ttl = ctx->egress_ifindex % 64;
+
+	h_proto = eth->h_proto;
+	if (h_proto == htons(ETH_P_IP))
+		parse_ipv4(data, nh_off, data_end, ttl);
+	else if (h_proto == htons(ETH_P_IPV6))
+		parse_ipv6(data, nh_off, data_end, ttl);
+
+	bpf_trace_printk(fmt, sizeof(fmt), ctx->egress_ifindex, ttl);
+
+	return XDP_PASS;
+}
+
 /* Redirect require an XDP bpf_prog loaded on the TX device */
 SEC("xdp_redirect_dummy")
 int xdp_redirect_dummy_prog(struct xdp_md *ctx)
diff --git a/samples/bpf/xdp_redirect_map_user.c b/samples/bpf/xdp_redirect_map_user.c
index 35e16dee613e..9a95eab629bd 100644
--- a/samples/bpf/xdp_redirect_map_user.c
+++ b/samples/bpf/xdp_redirect_map_user.c
@@ -98,12 +98,13 @@  int main(int argc, char **argv)
 {
 	struct rlimit r = {RLIM_INFINITY, RLIM_INFINITY};
 	struct bpf_prog_load_attr prog_load_attr = {
-		.prog_type	= BPF_PROG_TYPE_XDP,
+		.prog_type	= BPF_PROG_TYPE_UNSPEC,
 	};
-	struct bpf_program *prog, *dummy_prog;
+	struct bpf_program *prog, *dummy_prog, *devmap_prog;
+	int prog_fd, dummy_prog_fd, devmap_prog_fd;
+	struct bpf_devmap_val devmap_val;
 	struct bpf_prog_info info = {};
 	__u32 info_len = sizeof(info);
-	int prog_fd, dummy_prog_fd;
 	const char *optstr = "FSN";
 	struct bpf_object *obj;
 	int ret, opt, key = 0;
@@ -157,16 +158,18 @@  int main(int argc, char **argv)
 		return 1;
 
 	prog = bpf_program__next(NULL, obj);
-	dummy_prog = bpf_program__next(prog, obj);
-	if (!prog || !dummy_prog) {
+	devmap_prog = bpf_object__find_program_by_title(obj, "xdp_devmap/map_prog");
+	dummy_prog = bpf_object__find_program_by_title(obj, "xdp_redirect_dummy");
+	if (!prog || !devmap_prog || !dummy_prog) {
 		printf("finding a prog in obj file failed\n");
 		return 1;
 	}
 	/* bpf_prog_load_xattr gives us the pointer to first prog's fd,
-	 * so we're missing only the fd for dummy prog
+	 * so we're missing the fd for devmap and dummy prog
 	 */
+	devmap_prog_fd = bpf_program__fd(devmap_prog);
 	dummy_prog_fd = bpf_program__fd(dummy_prog);
-	if (prog_fd < 0 || dummy_prog_fd < 0) {
+	if (prog_fd < 0 || devmap_prog_fd < 0 || dummy_prog_fd < 0) {
 		printf("bpf_prog_load_xattr: %s\n", strerror(errno));
 		return 1;
 	}
@@ -209,7 +212,9 @@  int main(int argc, char **argv)
 	signal(SIGTERM, int_exit);
 
 	/* populate virtual to physical port map */
-	ret = bpf_map_update_elem(tx_port_map_fd, &key, &ifindex_out, 0);
+	devmap_val.bpf_prog.fd = devmap_prog_fd;
+	devmap_val.ifindex = ifindex_out;
+	ret = bpf_map_update_elem(tx_port_map_fd, &key, &devmap_val, 0);
 	if (ret) {
 		perror("bpf_update_elem");
 		goto out;