diff mbox series

[net-next] net: avoid indirect calls in L4 checksum calculation

Message ID 20190528235844.19360-1-mcroce@redhat.com
State Changes Requested
Delegated to: David Miller
Headers show
Series [net-next] net: avoid indirect calls in L4 checksum calculation | expand

Commit Message

Matteo Croce May 28, 2019, 11:58 p.m. UTC
Commit 283c16a2dfd3 ("indirect call wrappers: helpers to speed-up
indirect calls of builtin") introduces some macros to avoid doing
indirect calls.

Use these helpers to remove two indirect calls in the L4 checksum
calculation for devices which don't have hardware support for it.

As a test I generate packets with pktgen out to a dummy interface
with HW checksumming disabled, to have the checksum calculated in
every sent packet.
The packet rate measured with an i7-6700K CPU and a single pktgen
thread raised from 6143 to 6608 Kpps, an increase by 7.5%

Suggested-by: Davide Caratti <dcaratti@redhat.com>
Signed-off-by: Matteo Croce <mcroce@redhat.com>
---
 net/core/skbuff.c | 22 ++++++++++++++++++----
 1 file changed, 18 insertions(+), 4 deletions(-)

Comments

Matteo Croce May 29, 2019, 1:56 p.m. UTC | #1
On Wed, May 29, 2019 at 1:58 AM Matteo Croce <mcroce@redhat.com> wrote:
>
> Commit 283c16a2dfd3 ("indirect call wrappers: helpers to speed-up
> indirect calls of builtin") introduces some macros to avoid doing
> indirect calls.
>
> Use these helpers to remove two indirect calls in the L4 checksum
> calculation for devices which don't have hardware support for it.
>
> As a test I generate packets with pktgen out to a dummy interface
> with HW checksumming disabled, to have the checksum calculated in
> every sent packet.
> The packet rate measured with an i7-6700K CPU and a single pktgen
> thread raised from 6143 to 6608 Kpps, an increase by 7.5%
>
> Suggested-by: Davide Caratti <dcaratti@redhat.com>
> Signed-off-by: Matteo Croce <mcroce@redhat.com>

I found a build error with CONFIG_LIBCRC32C=m:

ld: net/core/skbuff.o: in function `sctp_csum_update':
skbuff.c:(.text+0x2640): undefined reference to `crc32c'
ld: net/core/skbuff.o: in function `__skb_checksum':
(.text+0x2aba): undefined reference to `crc32c'
ld: (.text+0x2cf9): undefined reference to `crc32c'

I have two possible solutions for this:
- use INDIRECT_CALL_1 and leave the SCTP callback called by an indirect pointer
- use IS_BUILTIN(CONFIG_LIBCRC32C) around the sctp_csum_combine usage

I'm more toward the first one, which will also avoid the
net/sctp/checksum.h inclusion.
diff mbox series

Patch

diff --git a/net/core/skbuff.c b/net/core/skbuff.c
index e89be6282693..a24a7ef55ce9 100644
--- a/net/core/skbuff.c
+++ b/net/core/skbuff.c
@@ -69,6 +69,7 @@ 
 #include <net/sock.h>
 #include <net/checksum.h>
 #include <net/ip6_checksum.h>
+#include <net/sctp/checksum.h>
 #include <net/xfrm.h>
 
 #include <linux/uaccess.h>
@@ -76,9 +77,22 @@ 
 #include <linux/highmem.h>
 #include <linux/capability.h>
 #include <linux/user_namespace.h>
+#include <linux/indirect_call_wrapper.h>
 
 #include "datagram.h"
 
+#if IS_ENABLED(CONFIG_IP_SCTP)
+#define CSUM_UPDATE(f, ...) \
+	INDIRECT_CALL_2(f, csum_partial_ext, sctp_csum_update, __VA_ARGS__)
+#define CSUM_COMBINE(f, ...) \
+	INDIRECT_CALL_2(f, csum_block_add_ext, sctp_csum_combine, __VA_ARGS__)
+#else
+#define CSUM_UPDATE(f, ...) \
+	INDIRECT_CALL_1(f, csum_partial_ext, __VA_ARGS__)
+#define CSUM_COMBINE(f, ...) \
+	INDIRECT_CALL_1(f, csum_block_add_ext, __VA_ARGS__)
+#endif
+
 struct kmem_cache *skbuff_head_cache __ro_after_init;
 static struct kmem_cache *skbuff_fclone_cache __ro_after_init;
 #ifdef CONFIG_SKB_EXTENSIONS
@@ -2507,7 +2521,7 @@  __wsum __skb_checksum(const struct sk_buff *skb, int offset, int len,
 	if (copy > 0) {
 		if (copy > len)
 			copy = len;
-		csum = ops->update(skb->data + offset, copy, csum);
+		csum = CSUM_UPDATE(ops->update, skb->data + offset, copy, csum);
 		if ((len -= copy) == 0)
 			return csum;
 		offset += copy;
@@ -2534,9 +2548,9 @@  __wsum __skb_checksum(const struct sk_buff *skb, int offset, int len,
 					      frag->page_offset + offset - start,
 					      copy, p, p_off, p_len, copied) {
 				vaddr = kmap_atomic(p);
-				csum2 = ops->update(vaddr + p_off, p_len, 0);
+				csum2 = CSUM_UPDATE(ops->update, vaddr + p_off, p_len, 0);
 				kunmap_atomic(vaddr);
-				csum = ops->combine(csum, csum2, pos, p_len);
+				csum = CSUM_COMBINE(ops->combine, csum, csum2, pos, p_len);
 				pos += p_len;
 			}
 
@@ -2559,7 +2573,7 @@  __wsum __skb_checksum(const struct sk_buff *skb, int offset, int len,
 				copy = len;
 			csum2 = __skb_checksum(frag_iter, offset - start,
 					       copy, 0, ops);
-			csum = ops->combine(csum, csum2, pos, copy);
+			csum = CSUM_COMBINE(ops->combine, csum, csum2, pos, copy);
 			if ((len -= copy) == 0)
 				return csum;
 			offset += copy;