diff mbox series

[nft,2/2] Handle retriable errors from mnl functions

Message ID 20211208134914.16365-3-crosser@average.org
State Changes Requested
Delegated to: Pablo Neira
Headers show
Series [nft,1/2] Use abort() in case of netlink_abi_error | expand

Commit Message

Eugene Crosser Dec. 8, 2021, 1:49 p.m. UTC
rc == -1 and errno == EINTR mean:

mnl_socket_recvfrom() - blindly rerun the function
mnl_cb_run()          - restart dump request from scratch

This commit introduces handling of both these conditions

Signed-off-by: Eugene Crosser <crosser@average.org>
---
 src/iface.c | 73 ++++++++++++++++++++++++++++++++---------------------
 1 file changed, 44 insertions(+), 29 deletions(-)

Comments

Pablo Neira Ayuso Dec. 8, 2021, 6:06 p.m. UTC | #1
On Wed, Dec 08, 2021 at 02:49:14PM +0100, Eugene Crosser wrote:
> rc == -1 and errno == EINTR mean:
> 
> mnl_socket_recvfrom() - blindly rerun the function
> mnl_cb_run()          - restart dump request from scratch
> 
> This commit introduces handling of both these conditions
> 
> Signed-off-by: Eugene Crosser <crosser@average.org>
> ---
>  src/iface.c | 73 ++++++++++++++++++++++++++++++++---------------------
>  1 file changed, 44 insertions(+), 29 deletions(-)
> 
> diff --git a/src/iface.c b/src/iface.c
> index d0e1834c..029f6476 100644
> --- a/src/iface.c
> +++ b/src/iface.c
> @@ -66,39 +66,54 @@ void iface_cache_update(void)
>  	struct nlmsghdr *nlh;
>  	struct rtgenmsg *rt;
>  	uint32_t seq, portid;
> +	bool need_restart;
> +	int retry_count = 5;

Did you ever hit this retry count? What is you daemon going to do
after these retries?

Probably this can be made configurable for libraries in case you
prefer your daemon to give up after many retries, but, by default,
I'd prefer to to keep trying until you get a consistent cache from the
kernel via netlink dump.
Pablo Neira Ayuso Dec. 8, 2021, 6:11 p.m. UTC | #2
On Wed, Dec 08, 2021 at 02:49:14PM +0100, Eugene Crosser wrote:
> diff --git a/src/iface.c b/src/iface.c
> index d0e1834c..029f6476 100644
> --- a/src/iface.c
> +++ b/src/iface.c
> @@ -66,39 +66,54 @@ void iface_cache_update(void)
>  	struct nlmsghdr *nlh;
>  	struct rtgenmsg *rt;
>  	uint32_t seq, portid;
> +	bool need_restart;
> +	int retry_count = 5;
>  	int ret;
>  
> -	nlh = mnl_nlmsg_put_header(buf);
> -	nlh->nlmsg_type	= RTM_GETLINK;
> -	nlh->nlmsg_flags = NLM_F_REQUEST | NLM_F_DUMP;
> -	nlh->nlmsg_seq = seq = time(NULL);
> -	rt = mnl_nlmsg_put_extra_header(nlh, sizeof(struct rtgenmsg));
> -	rt->rtgen_family = AF_PACKET;
> -
> -	nl = mnl_socket_open(NETLINK_ROUTE);
> -	if (nl == NULL)
> -		netlink_init_error();
> -
> -	if (mnl_socket_bind(nl, 0, MNL_SOCKET_AUTOPID) < 0)
> -		netlink_init_error();
> -
> -	portid = mnl_socket_get_portid(nl);
> -
> -	if (mnl_socket_sendto(nl, nlh, nlh->nlmsg_len) < 0)
> -		netlink_init_error();
> -
> -	ret = mnl_socket_recvfrom(nl, buf, sizeof(buf));
> -	while (ret > 0) {
> -		ret = mnl_cb_run(buf, ret, seq, portid, data_cb, NULL);
> -		if (ret <= MNL_CB_STOP)
> -			break;
> -		ret = mnl_socket_recvfrom(nl, buf, sizeof(buf));
> -	}
> -	if (ret == -1)
> +	do {
> +		nlh = mnl_nlmsg_put_header(buf);
> +		nlh->nlmsg_type	= RTM_GETLINK;
> +		nlh->nlmsg_flags = NLM_F_REQUEST | NLM_F_DUMP;
> +		nlh->nlmsg_seq = seq = time(NULL);
> +		rt = mnl_nlmsg_put_extra_header(nlh, sizeof(struct rtgenmsg));
> +		rt->rtgen_family = AF_PACKET;
> +
> +		nl = mnl_socket_open(NETLINK_ROUTE);
> +		if (nl == NULL)
> +			netlink_init_error();
> +
> +		if (mnl_socket_bind(nl, 0, MNL_SOCKET_AUTOPID) < 0)
> +			netlink_init_error();
> +
> +		portid = mnl_socket_get_portid(nl);
> +
> +		if (mnl_socket_sendto(nl, nlh, nlh->nlmsg_len) < 0)
> +			netlink_init_error();
> +
> +		need_restart = false;
> +		while (true) {
> +			ret = mnl_socket_recvfrom(nl, buf, sizeof(buf));
> +			if (ret == -1) {
> +				if (errno == EINTR)
> +					continue;
> +				else
> +					netlink_init_error();
> +			}
> +			ret = mnl_cb_run(buf, ret, seq, portid, data_cb, NULL);
> +			if (ret == MNL_CB_STOP)
> +				break;
> +			if (ret == -1) {
> +				if (errno == EINTR)
> +					need_restart = true;
> +				else
> +					netlink_init_error();
> +			}
> +		}
> +		mnl_socket_close(nl);

BTW, could you just rename iface_cache_update() to
__iface_cache_update() then add the loop to retry on EINTR? That would
skip this extra large indent in this patch.

Thanks.

> +	} while (need_restart && retry_count--);
> +	if (need_restart)
>  		netlink_init_error();
>  
> -	mnl_socket_close(nl);
> -
>  	iface_cache_init = true;
>  }
>  
> -- 
> 2.32.0
>
Eugene Crosser Dec. 16, 2021, 8:33 p.m. UTC | #3
Hello,

On 08/12/2021 19:06, Pablo Neira Ayuso wrote:

>> ---
>>  src/iface.c | 73 ++++++++++++++++++++++++++++++++---------------------
>>  1 file changed, 44 insertions(+), 29 deletions(-)
>>
>> diff --git a/src/iface.c b/src/iface.c
>> index d0e1834c..029f6476 100644
>> --- a/src/iface.c
>> +++ b/src/iface.c
>> @@ -66,39 +66,54 @@ void iface_cache_update(void)
>>  	struct nlmsghdr *nlh;
>>  	struct rtgenmsg *rt;
>>  	uint32_t seq, portid;
>> +	bool need_restart;
>> +	int retry_count = 5;
> 
> Did you ever hit this retry count? What is you daemon going to do
> after these retries?
> 
> Probably this can be made configurable for libraries in case you
> prefer your daemon to give up after many retries, but, by default,
> I'd prefer to to keep trying until you get a consistent cache from the
> kernel via netlink dump.
[...]
> BTW, could you just rename iface_cache_update() to
> __iface_cache_update() then add the loop to retry on EINTR? That would
> skip this extra large indent in this patch.


I have sent the new patches a week ago:

  [PATCH nft v2 0/2] Improve handling of errors from mnl* functions"
  [PATCH nft v2 1/2] Use abort() in case of netlink_abi_error
  [PATCH nft v2 2/2] Handle retriable errors from mnl functions

Do they look better now?

Thanks,

Eugene
diff mbox series

Patch

diff --git a/src/iface.c b/src/iface.c
index d0e1834c..029f6476 100644
--- a/src/iface.c
+++ b/src/iface.c
@@ -66,39 +66,54 @@  void iface_cache_update(void)
 	struct nlmsghdr *nlh;
 	struct rtgenmsg *rt;
 	uint32_t seq, portid;
+	bool need_restart;
+	int retry_count = 5;
 	int ret;
 
-	nlh = mnl_nlmsg_put_header(buf);
-	nlh->nlmsg_type	= RTM_GETLINK;
-	nlh->nlmsg_flags = NLM_F_REQUEST | NLM_F_DUMP;
-	nlh->nlmsg_seq = seq = time(NULL);
-	rt = mnl_nlmsg_put_extra_header(nlh, sizeof(struct rtgenmsg));
-	rt->rtgen_family = AF_PACKET;
-
-	nl = mnl_socket_open(NETLINK_ROUTE);
-	if (nl == NULL)
-		netlink_init_error();
-
-	if (mnl_socket_bind(nl, 0, MNL_SOCKET_AUTOPID) < 0)
-		netlink_init_error();
-
-	portid = mnl_socket_get_portid(nl);
-
-	if (mnl_socket_sendto(nl, nlh, nlh->nlmsg_len) < 0)
-		netlink_init_error();
-
-	ret = mnl_socket_recvfrom(nl, buf, sizeof(buf));
-	while (ret > 0) {
-		ret = mnl_cb_run(buf, ret, seq, portid, data_cb, NULL);
-		if (ret <= MNL_CB_STOP)
-			break;
-		ret = mnl_socket_recvfrom(nl, buf, sizeof(buf));
-	}
-	if (ret == -1)
+	do {
+		nlh = mnl_nlmsg_put_header(buf);
+		nlh->nlmsg_type	= RTM_GETLINK;
+		nlh->nlmsg_flags = NLM_F_REQUEST | NLM_F_DUMP;
+		nlh->nlmsg_seq = seq = time(NULL);
+		rt = mnl_nlmsg_put_extra_header(nlh, sizeof(struct rtgenmsg));
+		rt->rtgen_family = AF_PACKET;
+
+		nl = mnl_socket_open(NETLINK_ROUTE);
+		if (nl == NULL)
+			netlink_init_error();
+
+		if (mnl_socket_bind(nl, 0, MNL_SOCKET_AUTOPID) < 0)
+			netlink_init_error();
+
+		portid = mnl_socket_get_portid(nl);
+
+		if (mnl_socket_sendto(nl, nlh, nlh->nlmsg_len) < 0)
+			netlink_init_error();
+
+		need_restart = false;
+		while (true) {
+			ret = mnl_socket_recvfrom(nl, buf, sizeof(buf));
+			if (ret == -1) {
+				if (errno == EINTR)
+					continue;
+				else
+					netlink_init_error();
+			}
+			ret = mnl_cb_run(buf, ret, seq, portid, data_cb, NULL);
+			if (ret == MNL_CB_STOP)
+				break;
+			if (ret == -1) {
+				if (errno == EINTR)
+					need_restart = true;
+				else
+					netlink_init_error();
+			}
+		}
+		mnl_socket_close(nl);
+	} while (need_restart && retry_count--);
+	if (need_restart)
 		netlink_init_error();
 
-	mnl_socket_close(nl);
-
 	iface_cache_init = true;
 }