diff mbox series

[5/9] Avoid re-opening and re-reading config file

Message ID 20210126131412.3567-6-michael.adler@siemens.com
State Changes Requested
Headers show
Series Overhaul swupdate.cfg handling | expand

Commit Message

Michael Adler Jan. 26, 2021, 1:14 p.m. UTC
During a regular startup, the config file is at least open(2)ed and
read(2) four times.

* New API for reading swupdate.cfg (use in-memory copy of the file)
* Move settings_callback to swupdate_settings.h; it is only used there

Notice: mmap(3p) was not used to avoid security issues when sharing with
fork(2)ed processes which drop privileges.

Signed-off-by: Michael Adler <michael.adler@siemens.com>
Signed-off-by: Christian Storm <christian.storm@siemens.com>
---
 core/artifacts_versions.c   |  2 +-
 core/swupdate.c             | 16 ++++++---
 corelib/swupdate_settings.c | 72 +++++++++++++++++++++++--------------
 include/parselib.h          |  2 --
 include/swupdate_settings.h | 37 ++++++++++++++++++-
 5 files changed, 94 insertions(+), 35 deletions(-)

Comments

Stefano Babic Jan. 26, 2021, 2:46 p.m. UTC | #1
Hi Michael,

On 26.01.21 14:14, Michael Adler wrote:
> During a regular startup, the config file is at least open(2)ed and
> read(2) four times.
> 
> * New API for reading swupdate.cfg (use in-memory copy of the file)
> * Move settings_callback to swupdate_settings.h; it is only used there
> 
> Notice: mmap(3p) was not used to avoid security issues when sharing with
> fork(2)ed processes which drop privileges.
> 
> Signed-off-by: Michael Adler <michael.adler@siemens.com>
> Signed-off-by: Christian Storm <christian.storm@siemens.com>
> ---
>  core/artifacts_versions.c   |  2 +-
>  core/swupdate.c             | 16 ++++++---
>  corelib/swupdate_settings.c | 72 +++++++++++++++++++++++--------------
>  include/parselib.h          |  2 --
>  include/swupdate_settings.h | 37 ++++++++++++++++++-
>  5 files changed, 94 insertions(+), 35 deletions(-)
> 
> diff --git a/core/artifacts_versions.c b/core/artifacts_versions.c
> index 7d1d035..cba45b7 100644
> --- a/core/artifacts_versions.c
> +++ b/core/artifacts_versions.c
> @@ -149,7 +149,7 @@ void get_sw_versions(char *cfgname, struct swupdate_cfg *sw)
>  }
>  #else
>  
> -void get_sw_versions(char __attribute__ ((__unused__)) *cfgname,
> +void get_sw_versions(swupdate_cfg_handle  __attribute__ ((__unused__))*handle,
>  			struct swupdate_cfg *sw)
>  {
>  	read_sw_version_file(sw);
> diff --git a/core/swupdate.c b/core/swupdate.c
> index 6548d06..37eda4c 100644
> --- a/core/swupdate.c
> +++ b/core/swupdate.c
> @@ -547,11 +547,16 @@ int main(int argc, char **argv)
>  
>  	/* Load configuration file */
>  	if (cfgfname != NULL) {
> +		swupdate_cfg_handle handle;
> +		int ret = swupdate_cfg_init(&handle, cfgfname);

But if the goal is to cache and to read (that means, to parse) the file
by libconfig just once, why is the handle just on the stack ? Each
subsystem is calling it another time.

I am also expecting that the signature of the function changes. Moving
this to a general place, it means it is accessible by each subsystem
without knowing the filename - once read, it has no meaning more.

So I am expecting that each subsytem gets the handle for the config,
instead of the filename.


> +
>  		/*
>  		 * 'globals' section is mandatory if configuration file is specified.
>  		 */
> -		int ret = read_module_settings(cfgfname, "globals",
> -					       read_globals_settings, &swcfg);
> +		if (ret == 0) {
> +			ret = swupdate_cfg_read_module_settings(&handle, "globals",
> +					read_globals_settings, &swcfg);
> +		}
>  		if (ret != 0) {
>  			/*
>  			 * Exit on -ENODATA or -EINVAL errors.
> @@ -560,6 +565,7 @@ int main(int argc, char **argv)
>  			    "Error parsing configuration file: %s, exiting.\n",
>  			    ret == -ENODATA ? "'globals' section missing"
>  					    : "cannot read");
> +			swupdate_cfg_destroy(&handle);



>  			exit(EXIT_FAILURE);
>  		}
>  
> @@ -569,11 +575,13 @@ int main(int argc, char **argv)
>  		 * The following sections are optional, hence -ENODATA error code is
>  		 * ignored if the section is not found. -EINVAL will not happen here.
>  		 */
> -		(void)read_module_settings(cfgfname, "logcolors",
> +		(void)swupdate_cfg_read_module_settings(&handle, "logcolors",
>  					   read_console_settings, &swcfg);
>  
> -		(void)read_module_settings(cfgfname, "processes",
> +		(void)swupdate_cfg_read_module_settings(&handle, "processes",
>  					   read_processes_settings, &swcfg);
> +
> +		swupdate_cfg_destroy(&handle);
>  	}
>  
>  	/*
> diff --git a/corelib/swupdate_settings.c b/corelib/swupdate_settings.c
> index de7f331..f06ecf7 100644
> --- a/corelib/swupdate_settings.c
> +++ b/corelib/swupdate_settings.c
> @@ -10,6 +10,7 @@
>   * starting swupdate with a long list of parameters.
>   */
>  
> +#include <libconfig.h>
>  #include <stdio.h>
>  #include <stdlib.h>
>  #include <string.h>
> @@ -53,6 +54,9 @@ static int read_settings_file(config_t *cfg, const char *filename)
>  {
>  	int ret;
>  
> + 	if (!filename)
> +		return -EINVAL;
> +
>  	TRACE("Reading config file %s", filename);
>  	ret = config_read_file(cfg, filename);
>  	if (ret != CONFIG_TRUE) {
> @@ -69,35 +73,15 @@ static int read_settings_file(config_t *cfg, const char *filename)
>  
>  int read_module_settings(const char *filename, const char *module, settings_callback fcn, void *data)
>  {
> -	config_t cfg;
> -	config_setting_t *elem;
> -
> -	if (!fcn || !filename)
> -		return -EINVAL;
> -
> -	memset(&cfg, 0, sizeof(cfg));
> -	config_init(&cfg);
> -
> -	if (read_settings_file(&cfg, filename) != CONFIG_TRUE) {
> -		config_destroy(&cfg);
> +	swupdate_cfg_handle handle;
> +	int ret = swupdate_cfg_init(&handle, filename);

See my comment above.

> +	if (ret == 0) {
> +		ret = swupdate_cfg_read_module_settings(&handle, module, fcn, data);
> +	} else {
>  		ERROR("Error reading configuration file, skipping....");
> -		return -EINVAL;
> -	}
> -
> -	elem = find_settings_node(&cfg, module);
> -
> -	if (!elem) {
> -		TRACE("No config settings found for module %s", module);
> -		config_destroy(&cfg);
> -		return -ENODATA;
>  	}
> -
> -	TRACE("Reading config settings for module %s", module);
> -	fcn(elem, data);
> -
> -	config_destroy(&cfg);
> -
> -	return 0;
> +	swupdate_cfg_destroy(&handle);
> +	return ret;
>  }
>  
>  static int get_run_as(void *elem, void *data)
> @@ -162,3 +146,37 @@ int settings_into_dict(void *settings, void *data)
>  
>  	return 0;
>  }
> +
> +int swupdate_cfg_init(swupdate_cfg_handle *handle, const char *filename)
> +{
> +	config_init(&handle->cfg);

In my understandig, config_init() in libconfig can allocate structures,
and according the documentation, a config_init should be followed by a
config_destroy (this is how it is implemented now in SWUpdate).

If we move to a cache solution, we should be sure that config_init() is
called once, or this could generate a memory leak. In your
implementation, swupdate_cfg_init() is called multiple times.

> +	if (read_settings_file(&handle->cfg, filename) != CONFIG_TRUE) {
> +		return -EINVAL;
> +	}
> +	return 0;
> +}
> +
> +void swupdate_cfg_destroy(swupdate_cfg_handle *handle)
> +{
> +	config_destroy(&handle->cfg);
> +}
> +
> +int swupdate_cfg_read_module_settings(swupdate_cfg_handle *handle, const char *module, settings_callback fcn, void *data)
> +{
> +	config_setting_t *elem;
> +
> +	if (!fcn)
> +		return -EINVAL;
> +
> +	TRACE("Reading config file settings for module %s", module);
> +	elem = find_settings_node(&handle->cfg, module);
> +
> +	if (!elem) {
> +		TRACE("No config file settings found for module %s", module);
> +		return -ENODATA;
> +	}
> +
> +	fcn(elem, data);
> +
> +	return 0;
> +}

This is a duplicate of read_module_settings(), just without destroy. I
do not think we need another wrapper, just the current functions should
be changed to get the handle instead of the filename.


> diff --git a/include/parselib.h b/include/parselib.h
> index 5cbc255..84a51e2 100644
> --- a/include/parselib.h
> +++ b/include/parselib.h
> @@ -85,8 +85,6 @@ void *get_node_json(json_object *root, const char **nodes);
>  #define get_node_json(root, nodes)	(NULL)
>  #endif
>  
> -typedef int (*settings_callback)(void *elem, void *data);
> -
>  const char *get_field_string(parsertype p, void *e, const char *path);
>  void get_field_string_with_size(parsertype p, void *e, const char *path,
>  				char *d, size_t n);
> diff --git a/include/swupdate_settings.h b/include/swupdate_settings.h
> index c3b083d..c8e4f1d 100644
> --- a/include/swupdate_settings.h
> +++ b/include/swupdate_settings.h
> @@ -8,12 +8,29 @@
>  #ifndef _SWUPDATE_SETTINGS_H
>  #define _SWUPDATE_SETTINGS_H
>  
> +#include <unistd.h>
> +
> +typedef int (*settings_callback)(void *elem, void *data);
> +
>  #ifdef CONFIG_LIBCONFIG
> +
> +#include <libconfig.h>
> +
> +typedef struct {
> +	config_t cfg;
> +} swupdate_cfg_handle;

Which is the added value of this structure ?

> +
> +int swupdate_cfg_init(swupdate_cfg_handle *handle, const char *filename);
> +void swupdate_cfg_destroy(swupdate_cfg_handle *handle);
> +int swupdate_cfg_read_module_settings(swupdate_cfg_handle *handle, const char *module, settings_callback fcn, void *data);
> +
>  int read_module_settings(const char *filename, const char *module, settings_callback fcn, void *data);
>  int read_settings_user_id(const char *filename, const char *module, uid_t *userid, gid_t *groupid);
>  int settings_into_dict(void *settings, void *data);
>  #else
> -#include <unistd.h>
> +
> +typedef struct {} swupdate_cfg_handle;
> +
>  static inline int read_module_settings(const char __attribute__ ((__unused__))*filename,
>  		const char __attribute__ ((__unused__)) *module,
>  		settings_callback __attribute__ ((__unused__)) fcn,
> @@ -40,6 +57,24 @@ static inline int settings_into_dict(void __attribute__ ((__unused__)) *settings
>  {
>  	return -1;
>  }
> +
> +static inline int swupdate_cfg_init(swupdate_cfg_handle __attribute__ ((__unused__))*handle,
> +		const char __attribute__ ((__unused__))*filename)
> +{
> +	return -1;
> +}
> +
> +static inline void swupdate_cfg_destroy(swupdate_cfg_handle __attribute__ ((__unused__))*handle) {
> +	return;
> +}
> +
> +static inline int swupdate_cfg_read_module_settings(swupdate_cfg_handle __attribute__ ((__unused__))*handle,
> +		const char __attribute__ ((__unused__))*module,
> +		settings_callback __attribute__ ((__unused__))fcn,
> +		void __attribute__ ((__unused__))*data) {
> +	return -1;
> +};
> +
>  #endif
>  
>  #endif
> 

Best regards,
Stefano Babic
Michael Adler Jan. 27, 2021, 3:18 p.m. UTC | #2
Hi Stefano,

> But if the goal is to cache and to read (that means, to parse) the file by libconfig just once, why is the handle just
> on the stack ? Each subsystem is calling it another time.

yes, you are right. This is addressed in patches 6-8. and I just realized that patch 6 was not sent yesterday. It is
available now though. Apologies for the delay.

Patches 5-8 could be squashed into one (large) patch. Each commit addresses a separate issue, but clearly patches 6-8
build upon patch 5, so I'm perfectly fine with squashing it for V2 - whatever you prefer.

> So I am expecting that each subsytem gets the handle for the config, instead of the filename.

I agree. Also, I believe that this is achieved within patches 5-8.
There is one important thing I would like to stress though: currently, fork(2)ed processes must read the file again. I
have made that decision for security reasons, because the forked process might run with less privileges, i.e. it might
not be able to read swupdate.cfg due to insufficient permissions - although in my use cases, swupdate.cfg is always
world-readable and does not contain any secrets. If sharing the configuration data with subprocesses is fine, we could
use mmap(3p) and share the config data among process boundaries. What do you think?

> In my understandig, config_init() in libconfig can allocate structures, and according the documentation, a config_init
> should be followed by a config_destroy (this is how it is implemented now in SWUpdate).

> If we move to a cache solution, we should be sure that config_init() is called once, or this could generate a memory
> leak. In your implementation, swupdate_cfg_init() is called multiple times.

Yes, I followed the same principle: whenever I call swupdate_cfg_init(), that same function must also free the resources
by calling swupdate_cfg_destroy(). So every config_init() is followed by config_destroy(), since my functions are
wrappers.

> This is a duplicate of read_module_settings(), just without destroy. I
> do not think we need another wrapper, just the current functions should
> be changed to get the handle instead of the filename.

Agreed, I'll change it.

> > +typedef struct {
> > +	config_t cfg;
> > +} swupdate_cfg_handle;
> 
> Which is the added value of this structure ?

My intention was to *not* expose the specific library type. That way, it would be quite easy to support additional file
formats in the future, e.g. JSON. Of course, I could have just `typedef`ed config_t without the struct, but memory
usage is the same, isn't it? Having a struct makes it easy to extend and e.g. store the filename (=origin) of the
configuration data.

I hope I've addressed all your points. Thanks for taking the time and providing me feedback, it's much appreciated.

Kind regards,
  Michael
diff mbox series

Patch

diff --git a/core/artifacts_versions.c b/core/artifacts_versions.c
index 7d1d035..cba45b7 100644
--- a/core/artifacts_versions.c
+++ b/core/artifacts_versions.c
@@ -149,7 +149,7 @@  void get_sw_versions(char *cfgname, struct swupdate_cfg *sw)
 }
 #else
 
-void get_sw_versions(char __attribute__ ((__unused__)) *cfgname,
+void get_sw_versions(swupdate_cfg_handle  __attribute__ ((__unused__))*handle,
 			struct swupdate_cfg *sw)
 {
 	read_sw_version_file(sw);
diff --git a/core/swupdate.c b/core/swupdate.c
index 6548d06..37eda4c 100644
--- a/core/swupdate.c
+++ b/core/swupdate.c
@@ -547,11 +547,16 @@  int main(int argc, char **argv)
 
 	/* Load configuration file */
 	if (cfgfname != NULL) {
+		swupdate_cfg_handle handle;
+		int ret = swupdate_cfg_init(&handle, cfgfname);
+
 		/*
 		 * 'globals' section is mandatory if configuration file is specified.
 		 */
-		int ret = read_module_settings(cfgfname, "globals",
-					       read_globals_settings, &swcfg);
+		if (ret == 0) {
+			ret = swupdate_cfg_read_module_settings(&handle, "globals",
+					read_globals_settings, &swcfg);
+		}
 		if (ret != 0) {
 			/*
 			 * Exit on -ENODATA or -EINVAL errors.
@@ -560,6 +565,7 @@  int main(int argc, char **argv)
 			    "Error parsing configuration file: %s, exiting.\n",
 			    ret == -ENODATA ? "'globals' section missing"
 					    : "cannot read");
+			swupdate_cfg_destroy(&handle);
 			exit(EXIT_FAILURE);
 		}
 
@@ -569,11 +575,13 @@  int main(int argc, char **argv)
 		 * The following sections are optional, hence -ENODATA error code is
 		 * ignored if the section is not found. -EINVAL will not happen here.
 		 */
-		(void)read_module_settings(cfgfname, "logcolors",
+		(void)swupdate_cfg_read_module_settings(&handle, "logcolors",
 					   read_console_settings, &swcfg);
 
-		(void)read_module_settings(cfgfname, "processes",
+		(void)swupdate_cfg_read_module_settings(&handle, "processes",
 					   read_processes_settings, &swcfg);
+
+		swupdate_cfg_destroy(&handle);
 	}
 
 	/*
diff --git a/corelib/swupdate_settings.c b/corelib/swupdate_settings.c
index de7f331..f06ecf7 100644
--- a/corelib/swupdate_settings.c
+++ b/corelib/swupdate_settings.c
@@ -10,6 +10,7 @@ 
  * starting swupdate with a long list of parameters.
  */
 
+#include <libconfig.h>
 #include <stdio.h>
 #include <stdlib.h>
 #include <string.h>
@@ -53,6 +54,9 @@  static int read_settings_file(config_t *cfg, const char *filename)
 {
 	int ret;
 
+ 	if (!filename)
+		return -EINVAL;
+
 	TRACE("Reading config file %s", filename);
 	ret = config_read_file(cfg, filename);
 	if (ret != CONFIG_TRUE) {
@@ -69,35 +73,15 @@  static int read_settings_file(config_t *cfg, const char *filename)
 
 int read_module_settings(const char *filename, const char *module, settings_callback fcn, void *data)
 {
-	config_t cfg;
-	config_setting_t *elem;
-
-	if (!fcn || !filename)
-		return -EINVAL;
-
-	memset(&cfg, 0, sizeof(cfg));
-	config_init(&cfg);
-
-	if (read_settings_file(&cfg, filename) != CONFIG_TRUE) {
-		config_destroy(&cfg);
+	swupdate_cfg_handle handle;
+	int ret = swupdate_cfg_init(&handle, filename);
+	if (ret == 0) {
+		ret = swupdate_cfg_read_module_settings(&handle, module, fcn, data);
+	} else {
 		ERROR("Error reading configuration file, skipping....");
-		return -EINVAL;
-	}
-
-	elem = find_settings_node(&cfg, module);
-
-	if (!elem) {
-		TRACE("No config settings found for module %s", module);
-		config_destroy(&cfg);
-		return -ENODATA;
 	}
-
-	TRACE("Reading config settings for module %s", module);
-	fcn(elem, data);
-
-	config_destroy(&cfg);
-
-	return 0;
+	swupdate_cfg_destroy(&handle);
+	return ret;
 }
 
 static int get_run_as(void *elem, void *data)
@@ -162,3 +146,37 @@  int settings_into_dict(void *settings, void *data)
 
 	return 0;
 }
+
+int swupdate_cfg_init(swupdate_cfg_handle *handle, const char *filename)
+{
+	config_init(&handle->cfg);
+	if (read_settings_file(&handle->cfg, filename) != CONFIG_TRUE) {
+		return -EINVAL;
+	}
+	return 0;
+}
+
+void swupdate_cfg_destroy(swupdate_cfg_handle *handle)
+{
+	config_destroy(&handle->cfg);
+}
+
+int swupdate_cfg_read_module_settings(swupdate_cfg_handle *handle, const char *module, settings_callback fcn, void *data)
+{
+	config_setting_t *elem;
+
+	if (!fcn)
+		return -EINVAL;
+
+	TRACE("Reading config file settings for module %s", module);
+	elem = find_settings_node(&handle->cfg, module);
+
+	if (!elem) {
+		TRACE("No config file settings found for module %s", module);
+		return -ENODATA;
+	}
+
+	fcn(elem, data);
+
+	return 0;
+}
diff --git a/include/parselib.h b/include/parselib.h
index 5cbc255..84a51e2 100644
--- a/include/parselib.h
+++ b/include/parselib.h
@@ -85,8 +85,6 @@  void *get_node_json(json_object *root, const char **nodes);
 #define get_node_json(root, nodes)	(NULL)
 #endif
 
-typedef int (*settings_callback)(void *elem, void *data);
-
 const char *get_field_string(parsertype p, void *e, const char *path);
 void get_field_string_with_size(parsertype p, void *e, const char *path,
 				char *d, size_t n);
diff --git a/include/swupdate_settings.h b/include/swupdate_settings.h
index c3b083d..c8e4f1d 100644
--- a/include/swupdate_settings.h
+++ b/include/swupdate_settings.h
@@ -8,12 +8,29 @@ 
 #ifndef _SWUPDATE_SETTINGS_H
 #define _SWUPDATE_SETTINGS_H
 
+#include <unistd.h>
+
+typedef int (*settings_callback)(void *elem, void *data);
+
 #ifdef CONFIG_LIBCONFIG
+
+#include <libconfig.h>
+
+typedef struct {
+	config_t cfg;
+} swupdate_cfg_handle;
+
+int swupdate_cfg_init(swupdate_cfg_handle *handle, const char *filename);
+void swupdate_cfg_destroy(swupdate_cfg_handle *handle);
+int swupdate_cfg_read_module_settings(swupdate_cfg_handle *handle, const char *module, settings_callback fcn, void *data);
+
 int read_module_settings(const char *filename, const char *module, settings_callback fcn, void *data);
 int read_settings_user_id(const char *filename, const char *module, uid_t *userid, gid_t *groupid);
 int settings_into_dict(void *settings, void *data);
 #else
-#include <unistd.h>
+
+typedef struct {} swupdate_cfg_handle;
+
 static inline int read_module_settings(const char __attribute__ ((__unused__))*filename,
 		const char __attribute__ ((__unused__)) *module,
 		settings_callback __attribute__ ((__unused__)) fcn,
@@ -40,6 +57,24 @@  static inline int settings_into_dict(void __attribute__ ((__unused__)) *settings
 {
 	return -1;
 }
+
+static inline int swupdate_cfg_init(swupdate_cfg_handle __attribute__ ((__unused__))*handle,
+		const char __attribute__ ((__unused__))*filename)
+{
+	return -1;
+}
+
+static inline void swupdate_cfg_destroy(swupdate_cfg_handle __attribute__ ((__unused__))*handle) {
+	return;
+}
+
+static inline int swupdate_cfg_read_module_settings(swupdate_cfg_handle __attribute__ ((__unused__))*handle,
+		const char __attribute__ ((__unused__))*module,
+		settings_callback __attribute__ ((__unused__))fcn,
+		void __attribute__ ((__unused__))*data) {
+	return -1;
+};
+
 #endif
 
 #endif