diff mbox

[nft,1/4] monitor: Fix printing of range elements in named sets

Message ID 20170712123658.25363-2-phil@nwl.cc
State Changes Requested
Delegated to: Pablo Neira
Headers show

Commit Message

Phil Sutter July 12, 2017, 12:36 p.m. UTC
When adding or removing ranges from named sets, the kernel sends
separate netlink events for the lower and upper boundary of the range,
so 'nft monitor' incorrectly treated them as separate elements.

An intuitive approach to fix this is to cache the first element reported
for sets with 'interval' flag set and use it to reconstruct the range
when the second one is reported. Though this does not work for unclosed
intervals: If a range's upper boundary aligns with the maximum value
allowed for the given set datatype, an unclosed interval is created
which consists of only the lower boundary. The kernel then reports this
range as a single element (which does not have EXPR_F_INTERVAL_END flag
set).

This patch solves both cases: netlink_events_setelem_cb() caches the
first reported element of a range. If the upper boundary is reported
afterwards, the same function takes care of the reassembling and cache
removal. If not, 'nft monitor' will eventually receive a NEWGEN message
indicating the end of the transaction. To convert the cached lower
boundary into an unclosed range element, a new callback
netlink_events_setelem_newgen_cb() is introduced which after printing
the element also clears the cache.

Signed-off-by: Phil Sutter <phil@nwl.cc>
---
 src/netlink.c | 72 +++++++++++++++++++++++++++++++++++++++++++++++++++++++----
 1 file changed, 68 insertions(+), 4 deletions(-)

Comments

Arturo Borrero Gonzalez July 12, 2017, 4:30 p.m. UTC | #1
Well, we can probably merge all the patches in this one.

On 12 July 2017 at 14:36, Phil Sutter <phil@nwl.cc> wrote:
> When adding or removing ranges from named sets, the kernel sends
> separate netlink events for the lower and upper boundary of the range,
> so 'nft monitor' incorrectly treated them as separate elements.
>
> An intuitive approach to fix this is to cache the first element reported
> for sets with 'interval' flag set and use it to reconstruct the range
> when the second one is reported. Though this does not work for unclosed
> intervals: If a range's upper boundary aligns with the maximum value
> allowed for the given set datatype, an unclosed interval is created
> which consists of only the lower boundary. The kernel then reports this
> range as a single element (which does not have EXPR_F_INTERVAL_END flag
> set).
>
> This patch solves both cases: netlink_events_setelem_cb() caches the
> first reported element of a range. If the upper boundary is reported
> afterwards, the same function takes care of the reassembling and cache
> removal. If not, 'nft monitor' will eventually receive a NEWGEN message
> indicating the end of the transaction. To convert the cached lower
> boundary into an unclosed range element, a new callback
> netlink_events_setelem_newgen_cb() is introduced which after printing
> the element also clears the cache.
>
> Signed-off-by: Phil Sutter <phil@nwl.cc>
> ---
>  src/netlink.c | 72 +++++++++++++++++++++++++++++++++++++++++++++++++++++++----
>  1 file changed, 68 insertions(+), 4 deletions(-)
>

Somme comments below.

> diff --git a/src/netlink.c b/src/netlink.c
> index 2e30622de4bb1..65c6f05a57649 100644
> --- a/src/netlink.c
> +++ b/src/netlink.c
> @@ -2196,6 +2196,14 @@ out:
>         return MNL_CB_OK;
>  }
>
> +static struct {
> +       int type;
> +       uint32_t family;
> +       char *table;
> +       char *setname;
> +       struct set *set;
> +} setelem_cache;
> +

Most of this info is available in 'struct set'.
Why not using it? We have the set cached anyway, the event for the set
creation was reported before, and we surely cache it, in
netlink_events_cache_addset().
All following set elements belong to this already cached set. That's
why in my patch I simply added a new field to 'struct set'.

In fact, I used the 'dummyset' trick to avoid touching the cached set.
But reading again the code, it seems that in this very case we indeed
would like to modify the cached set.. to add our new elements.

>  }
>
> +static int netlink_events_setelem_newgen_cb(const struct nlmsghdr *nlh,
> +                                           int type,
> +                                           struct netlink_mon_handler *monh)
> +{
> +       if (!setelem_cache.set ||
> +           monh->format != NFTNL_OUTPUT_DEFAULT)
> +               return MNL_CB_OK;
> +
> +       interval_map_decompose(setelem_cache.set->init);
> +
> +       switch (setelem_cache.type) {
> +       case NFT_MSG_NEWSETELEM:
> +               printf("add ");
> +               break;
> +       case NFT_MSG_DELSETELEM:
> +               printf("delete ");
> +               break;
> +       default:
> +               goto out;
> +       }
> +       printf("element %s %s %s ", family2str(setelem_cache.family),
> +              setelem_cache.table, setelem_cache.setname);
> +       expr_print(setelem_cache.set->init, monh->ctx->octx);
> +       printf("\n");
> +
^^^

In this function you are somehow re-implementing what
netlink_events_setelem_cb() does, which
is the logic for printing.

Could this logic be merged into that function? My goal is to only
print from one code path.

> +
>  static int netlink_events_obj_cb(const struct nlmsghdr *nlh, int type,
>                                  struct netlink_mon_handler *monh)
>  {
> @@ -2914,6 +2976,8 @@ static int netlink_events_cb(const struct nlmsghdr *nlh, void *data)
>         case NFT_MSG_DELOBJ:
>                 ret = netlink_events_obj_cb(nlh, type, monh);
>                 break;
> +       case NFT_MSG_NEWGEN:
> +               ret = netlink_events_setelem_newgen_cb(nlh, type, monh);
>         }
>         fflush(stdout);
>

I'm almost sure that we should be doing the cache stuff in the routine
we start at netlink_events_cache_update(). My patch is lacking this
too.

My proposal, keep each thing in it's stage:

netlink_events_cb() <-- main event cb function
netlink_events_cache_update() <--- we do here all the stuff regarding
caching sets/elements
netlink_events_setelem_cb() <--- printing (or ignoring event, if required)
--
To unsubscribe from this list: send the line "unsubscribe netfilter-devel" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Phil Sutter July 12, 2017, 7:05 p.m. UTC | #2
On Wed, Jul 12, 2017 at 06:30:47PM +0200, Arturo Borrero Gonzalez wrote:
> Well, we can probably merge all the patches in this one.
> 
> On 12 July 2017 at 14:36, Phil Sutter <phil@nwl.cc> wrote:
[...]
> > diff --git a/src/netlink.c b/src/netlink.c
> > index 2e30622de4bb1..65c6f05a57649 100644
> > --- a/src/netlink.c
> > +++ b/src/netlink.c
> > @@ -2196,6 +2196,14 @@ out:
> >         return MNL_CB_OK;
> >  }
> >
> > +static struct {
> > +       int type;
> > +       uint32_t family;
> > +       char *table;
> > +       char *setname;
> > +       struct set *set;
> > +} setelem_cache;
> > +
> 
> Most of this info is available in 'struct set'.
> Why not using it? We have the set cached anyway, the event for the set
> creation was reported before, and we surely cache it, in
> netlink_events_cache_addset().

Oh, I overlooked it's handle field. That's indeed all I need!

> All following set elements belong to this already cached set. That's
> why in my patch I simply added a new field to 'struct set'.
> 
> In fact, I used the 'dummyset' trick to avoid touching the cached set.
> But reading again the code, it seems that in this very case we indeed
> would like to modify the cached set.. to add our new elements.

Hmm. For no obvious reason I ignored that cache_update function
altogether, and I think that was a mistake: At the time
netlink_events_setelem_cb() is called, the element is already present in
the cache. So we can just copy the last element in there (or the last
two for interval sets) to dummy set and run interval_map_decompose().

The only remaining bit then should be half-open ranges. To get that
sorted, I think we can get by with a pointer to the set we have
unfinished business with and deal with it upon NEWGEN message.

> > +static int netlink_events_setelem_newgen_cb(const struct nlmsghdr *nlh,
> > +                                           int type,
> > +                                           struct netlink_mon_handler *monh)
> > +{
> > +       if (!setelem_cache.set ||
> > +           monh->format != NFTNL_OUTPUT_DEFAULT)
> > +               return MNL_CB_OK;
> > +
> > +       interval_map_decompose(setelem_cache.set->init);
> > +
> > +       switch (setelem_cache.type) {
> > +       case NFT_MSG_NEWSETELEM:
> > +               printf("add ");
> > +               break;
> > +       case NFT_MSG_DELSETELEM:
> > +               printf("delete ");
> > +               break;
> > +       default:
> > +               goto out;
> > +       }
> > +       printf("element %s %s %s ", family2str(setelem_cache.family),
> > +              setelem_cache.table, setelem_cache.setname);
> > +       expr_print(setelem_cache.set->init, monh->ctx->octx);
> > +       printf("\n");
> > +
> ^^^
> 
> In this function you are somehow re-implementing what
> netlink_events_setelem_cb() does, which
> is the logic for printing.

Ah yes, that was copy'n'paste, sorry for not cleaning it up in
beforehand.

> Could this logic be merged into that function? My goal is to only
> print from one code path.

Yes, that makes sense. I found it too cumbersome to squeeze the
additional logic into netlink_events_setelem_cb(), hence why I went with
a separate function. I'll give it another try. In doubt I'll move the
printing logic into a separate function to be called from both places.

I'll prepare a v2 tomorrow, also merging the previous two patches as
suggested.

Thanks, Phil
--
To unsubscribe from this list: send the line "unsubscribe netfilter-devel" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Phil Sutter July 13, 2017, 6:22 p.m. UTC | #3
On Wed, Jul 12, 2017 at 09:05:45PM +0200, Phil Sutter wrote:
> On Wed, Jul 12, 2017 at 06:30:47PM +0200, Arturo Borrero Gonzalez wrote:
[...]
> > Could this logic be merged into that function? My goal is to only
> > print from one code path.
> 
> Yes, that makes sense. I found it too cumbersome to squeeze the
> additional logic into netlink_events_setelem_cb(), hence why I went with
> a separate function. I'll give it another try. In doubt I'll move the
> printing logic into a separate function to be called from both places.
> 
> I'll prepare a v2 tomorrow, also merging the previous two patches as
> suggested.

Just a quick status update: It's a mess. ;)

There are so many different cases, I actually started drawing flow
diagrams (can't remember when I did that last time). In addition to what
we discussed already, I realized that via 'nft -f', I can make multiple
changes to even different sets within a single transaction - this
requires dealing with cached half-open ranges everywhere, not just in
NEWGEN callback. Another trap is 'nft flush set': The elements are
reported in reverse order. Anyway, I have something that seems to work
but needs quite some cleanup before I dare to publish it. :)

I should probably look into ways to write tests for this to get all the
cases covered.

Cheers, Phil
--
To unsubscribe from this list: send the line "unsubscribe netfilter-devel" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Arturo Borrero Gonzalez July 14, 2017, 9:03 a.m. UTC | #4
On 13 July 2017 at 20:22, Phil Sutter <phil@nwl.cc> wrote:
> There are so many different cases, I actually started drawing flow
> diagrams (can't remember when I did that last time). In addition to what
> we discussed already, I realized that via 'nft -f', I can make multiple
> changes to even different sets within a single transaction - this
> requires dealing with cached half-open ranges everywhere, not just in
> NEWGEN callback. Another trap is 'nft flush set': The elements are
> reported in reverse order. Anyway, I have something that seems to work
> but needs quite some cleanup before I dare to publish it. :)
>

In an ideal world, we would detect these 'flush x' calls and print
just that, instead
of every single object being deleted.
I.e. flush ruleset --> monitor prints 'flush ruleset' instead of every
single object in the
ruleset being deleted.

But at this point, I think that going this way would increase
complexity in this code a lot.
Better fix by now the broken things we have.

> I should probably look into ways to write tests for this to get all the
> cases covered.
>

Indeed, but if your --echo functionality is going to use the very same
code path, then we should wait for it,
because testing the monitor thing is not that easy (async output).
The --echo output is sync with the command you call, so you can simply
check the output.

Once the code is in place, this is a good task for one of our
students/interns in GSoC-like programs.
--
To unsubscribe from this list: send the line "unsubscribe netfilter-devel" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Pablo Neira Ayuso July 17, 2017, 4:12 p.m. UTC | #5
On Thu, Jul 13, 2017 at 08:22:02PM +0200, Phil Sutter wrote:
> Just a quick status update: It's a mess. ;)

OK, let's address problems one by one.

> There are so many different cases, I actually started drawing flow
> diagrams (can't remember when I did that last time). In addition to what
> we discussed already, I realized that via 'nft -f', I can make multiple
> changes to even different sets within a single transaction - this
> requires dealing with cached half-open ranges everywhere, not just in
> NEWGEN callback.

half-open ranges always start by a NFT_SET_ELEM_INTERVAL_END flag set
on, eg.

# nft --debug=netlink add element x y { 5-65535 }
element 00000000  : 1 [end]     element 00000500  : 0 [end]

> Another trap is 'nft flush set': The elements are reported in
> reverse order.

Could you have a look at the function to order elements using the
mergesort function? It's currently only called for non-intervals by
now, so it would be good to converge to use it in all cases.

Anything else?
--
To unsubscribe from this list: send the line "unsubscribe netfilter-devel" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Phil Sutter July 17, 2017, 5:02 p.m. UTC | #6
On Mon, Jul 17, 2017 at 06:12:34PM +0200, Pablo Neira Ayuso wrote:
> On Thu, Jul 13, 2017 at 08:22:02PM +0200, Phil Sutter wrote:
> > Just a quick status update: It's a mess. ;)
> 
> OK, let's address problems one by one.
> 
> > There are so many different cases, I actually started drawing flow
> > diagrams (can't remember when I did that last time). In addition to what
> > we discussed already, I realized that via 'nft -f', I can make multiple
> > changes to even different sets within a single transaction - this
> > requires dealing with cached half-open ranges everywhere, not just in
> > NEWGEN callback.
> 
> half-open ranges always start by a NFT_SET_ELEM_INTERVAL_END flag set
> on, eg.

No, they don't. See the end of segtree_linearize() src/segtree.c in
nftables code: EI_F_INTERVAL_END is set for intervals which don't match,
so if the matching interval extends to the end, no element with that
flag set will be inserted.

> # nft --debug=netlink add element x y { 5-65535 }
> element 00000000  : 1 [end]     element 00000500  : 0 [end]

Here, the first element is the "null" element indicating a non-matching
segment from 0 to 4, the second one marks a matching segment from 5 till
the end. That '[end]' marker is printed unconditionally for all
elements.

> > Another trap is 'nft flush set': The elements are reported in
> > reverse order.
> 
> Could you have a look at the function to order elements using the
> mergesort function? It's currently only called for non-intervals by
> now, so it would be good to converge to use it in all cases.

You mean the call to list_expr_sort() in netlink_get_setelems()? It is
not called because interval_map_decompose() (which is called later in
the same function does it's own sorting.

Cheers, Phil
--
To unsubscribe from this list: send the line "unsubscribe netfilter-devel" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
diff mbox

Patch

diff --git a/src/netlink.c b/src/netlink.c
index 2e30622de4bb1..65c6f05a57649 100644
--- a/src/netlink.c
+++ b/src/netlink.c
@@ -2196,6 +2196,14 @@  out:
 	return MNL_CB_OK;
 }
 
+static struct {
+	int type;
+	uint32_t family;
+	char *table;
+	char *setname;
+	struct set *set;
+} setelem_cache;
+
 static int netlink_events_setelem_cb(const struct nlmsghdr *nlh, int type,
 				     struct netlink_mon_handler *monh)
 {
@@ -2227,10 +2235,15 @@  static int netlink_events_setelem_cb(const struct nlmsghdr *nlh, int type,
 		 * modify the original cached set. This path is only
 		 * used by named sets, so use a dummy set.
 		 */
-		dummyset = set_alloc(monh->loc);
-		dummyset->keytype = set->keytype;
-		dummyset->datatype = set->datatype;
-		dummyset->init = set_expr_alloc(monh->loc, set);
+		if (setelem_cache.set) {
+			dummyset = setelem_cache.set;
+		} else {
+			dummyset = set_alloc(monh->loc);
+			dummyset->keytype = set->keytype;
+			dummyset->datatype = set->datatype;
+			dummyset->flags = set->flags;
+			dummyset->init = set_expr_alloc(monh->loc, set);
+		}
 
 		nlsei = nftnl_set_elems_iter_create(nls);
 		if (nlsei == NULL)
@@ -2247,6 +2260,22 @@  static int netlink_events_setelem_cb(const struct nlmsghdr *nlh, int type,
 		}
 		nftnl_set_elems_iter_destroy(nlsei);
 
+		if (set->flags & NFT_SET_INTERVAL) {
+			if (setelem_cache.set) {
+				interval_map_decompose(dummyset->init);
+				setelem_cache.set = NULL;
+				free(setelem_cache.table);
+				free(setelem_cache.setname);
+			} else {
+				setelem_cache.type = type;
+				setelem_cache.family = family;
+				setelem_cache.table = xstrdup(table);
+				setelem_cache.setname = xstrdup(setname);
+				setelem_cache.set = dummyset;
+				goto out;
+			}
+		}
+
 		switch (type) {
 		case NFT_MSG_NEWSETELEM:
 			printf("add ");
@@ -2276,6 +2305,39 @@  out:
 	return MNL_CB_OK;
 }
 
+static int netlink_events_setelem_newgen_cb(const struct nlmsghdr *nlh,
+					    int type,
+					    struct netlink_mon_handler *monh)
+{
+	if (!setelem_cache.set ||
+	    monh->format != NFTNL_OUTPUT_DEFAULT)
+		return MNL_CB_OK;
+
+	interval_map_decompose(setelem_cache.set->init);
+
+	switch (setelem_cache.type) {
+	case NFT_MSG_NEWSETELEM:
+		printf("add ");
+		break;
+	case NFT_MSG_DELSETELEM:
+		printf("delete ");
+		break;
+	default:
+		goto out;
+	}
+	printf("element %s %s %s ", family2str(setelem_cache.family),
+	       setelem_cache.table, setelem_cache.setname);
+	expr_print(setelem_cache.set->init, monh->ctx->octx);
+	printf("\n");
+
+out:
+	set_free(setelem_cache.set);
+	free(setelem_cache.table);
+	free(setelem_cache.setname);
+	setelem_cache.set = NULL;
+	return MNL_CB_OK;
+}
+
 static int netlink_events_obj_cb(const struct nlmsghdr *nlh, int type,
 				 struct netlink_mon_handler *monh)
 {
@@ -2914,6 +2976,8 @@  static int netlink_events_cb(const struct nlmsghdr *nlh, void *data)
 	case NFT_MSG_DELOBJ:
 		ret = netlink_events_obj_cb(nlh, type, monh);
 		break;
+	case NFT_MSG_NEWGEN:
+		ret = netlink_events_setelem_newgen_cb(nlh, type, monh);
 	}
 	fflush(stdout);