diff mbox series

[nf-next,v2,5/8] nft_set_pipapo: Provide unrolled lookup loops for common field sizes

Message ID 8954376b602e231687c7513e461782dc8c781e09.1574428269.git.sbrivio@redhat.com
State Changes Requested
Delegated to: Pablo Neira
Headers show
Series nftables: Set implementation for arbitrary concatenation of ranges | expand

Commit Message

Stefano Brivio Nov. 22, 2019, 1:40 p.m. UTC
For non-vectorised lookup implementations, this increases matching
rates by 20 to 30% for most set types.

Signed-off-by: Stefano Brivio <sbrivio@redhat.com>
---
v2: No changes

 net/netfilter/nft_set_pipapo.c | 86 +++++++++++++++++++++++++++++-----
 1 file changed, 73 insertions(+), 13 deletions(-)
diff mbox series

Patch

diff --git a/net/netfilter/nft_set_pipapo.c b/net/netfilter/nft_set_pipapo.c
index 3cad9aedc168..0596dbd11319 100644
--- a/net/netfilter/nft_set_pipapo.c
+++ b/net/netfilter/nft_set_pipapo.c
@@ -526,6 +526,51 @@  static int pipapo_refill(unsigned long *map, int len, int rules,
 	return ret;
 }
 
+#define NFT_PIPAPO_AND_BUCKET(map, bucket, bsize, idx)			       \
+	do {								       \
+		for (idx = 0; idx < (bsize); idx++)			       \
+			map[idx] &= *((bucket) + idx);			       \
+	} while (0)
+
+#define NFT_PIPAPO_MATCH_2(map, lt, bsize, pkt, offset, idx)		       \
+	do {								       \
+		NFT_PIPAPO_AND_BUCKET(map,				       \
+				      lt +				       \
+				      (offset +  0 +   (*pkt >> 4)) * bsize,   \
+				      bsize, idx);			       \
+		NFT_PIPAPO_AND_BUCKET(map,				       \
+				      lt +				       \
+				      (offset + 16 + (*pkt & 0x0f)) * bsize,   \
+				      bsize, idx);			       \
+		pkt++;							       \
+	} while (0)
+
+#define NFT_PIPAPO_MATCH_4(map, lt, bsize, pkt, offset, idx)		       \
+	do {								       \
+		NFT_PIPAPO_MATCH_2(map, lt, bsize, pkt, offset, idx);	       \
+		NFT_PIPAPO_MATCH_2(map, lt, bsize, pkt, offset + 2 * 16, idx); \
+	} while (0)
+
+#define NFT_PIPAPO_MATCH_8(map, lt, bsize, pkt, offset, idx)		       \
+	do {								       \
+		NFT_PIPAPO_MATCH_4(map, lt, bsize, pkt, offset, idx);	       \
+		NFT_PIPAPO_MATCH_4(map, lt, bsize, pkt, offset + 4 * 16, idx); \
+	} while (0)
+
+#define NFT_PIPAPO_MATCH_12(map, lt, bsize, pkt, idx)			       \
+	do {								       \
+		NFT_PIPAPO_MATCH_8(map, lt, bsize, pkt, 0, idx);	       \
+		NFT_PIPAPO_MATCH_4(map, lt, bsize, pkt, 8 * 16, idx);	       \
+	} while (0)
+
+#define NFT_PIPAPO_MATCH_32(map, lt, bsize, pkt, idx)			       \
+	do {								       \
+		NFT_PIPAPO_MATCH_8(map, lt, bsize, pkt,  0, idx);	       \
+		NFT_PIPAPO_MATCH_8(map, lt, bsize, pkt,  8 * 16, idx);	       \
+		NFT_PIPAPO_MATCH_8(map, lt, bsize, pkt, 16 * 16, idx);	       \
+		NFT_PIPAPO_MATCH_8(map, lt, bsize, pkt, 24 * 16, idx);	       \
+	} while (0)
+
 /**
  * nft_pipapo_lookup() - Lookup function
  * @net:	Network namespace
@@ -566,24 +611,39 @@  static bool nft_pipapo_lookup(const struct net *net, const struct nft_set *set,
 	nft_pipapo_for_each_field(f, i, m) {
 		bool last = i == m->field_count - 1;
 		unsigned long *lt = f->lt;
-		int b, group;
+		int b, group, j;
 
 		/* For each 4-bit group: select lookup table bucket depending on
-		 * packet bytes value, then AND bucket value
+		 * packet bytes value, then AND bucket value. Unroll loops for
+		 * the most common cases (protocol, port, IPv4 address, MAC
+		 * address, IPv6 address).
 		 */
-		for (group = 0; group < f->groups; group++) {
-			u8 v;
+		if (f->groups == 2) {
+			NFT_PIPAPO_MATCH_2(res_map, lt, f->bsize, rp, 0, j);
+		} else if (f->groups == 4) {
+			NFT_PIPAPO_MATCH_4(res_map, lt, f->bsize, rp, 0, j);
+		} else if (f->groups == 8) {
+			NFT_PIPAPO_MATCH_8(res_map, lt, f->bsize, rp, 0, j);
+		} else if (f->groups == 12) {
+			NFT_PIPAPO_MATCH_12(res_map, lt, f->bsize, rp, j);
+		} else if (f->groups == 32) {
+			NFT_PIPAPO_MATCH_32(res_map, lt, f->bsize, rp, j);
+		} else {
+			for (group = 0; group < f->groups; group++) {
+				u8 v;
+
+				if (group % 2) {
+					v = *rp & 0x0f;
+					rp++;
+				} else {
+					v = *rp >> 4;
+				}
+				__bitmap_and(res_map, res_map,
+					     lt + v * f->bsize,
+					     f->bsize * BITS_PER_LONG);
 
-			if (group % 2) {
-				v = *rp & 0x0f;
-				rp++;
-			} else {
-				v = *rp >> 4;
+				lt += f->bsize * NFT_PIPAPO_BUCKETS;
 			}
-			__bitmap_and(res_map, res_map, lt + v * f->bsize,
-				     f->bsize * BITS_PER_LONG);
-
-			lt += f->bsize * NFT_PIPAPO_BUCKETS;
 		}
 
 		/* Now populate the bitmap for the next field, unless this is