Message ID | 1449116634-14671-2-git-send-email-aconole@redhat.com |
---|---|
State | RFC |
Headers | show |
Dear Aaron, The general idea looks good to me. Just some comments inline On 3 December 2015 at 05:23, Aaron Conole <aconole@redhat.com> wrote: > Existing DPDK integration is provided by use of command line options which > must be split out and passed to librte in a special manner. However, this > forces any configuration to be passed by way of a special DPDK flag, and > interferes with ovs+dpdk packaging solutions. > > This commit delays dpdk initialization until after the OVS database > connection > is established, and then initializes librte. It pulls all of the config > data > from the OVS database, and assembles a new argv/argc pair to be passed > along. > > Signed-off-by: Aaron Conole <aconole@redhat.com> > --- > lib/netdev-dpdk.c | 126 > ++++++++++++++++++++++++++++++++---------------- > lib/netdev-dpdk.h | 12 ++--- > utilities/ovs-ctl.in | 12 ++--- > vswitchd/bridge.c | 3 ++ > vswitchd/ovs-vswitchd.c | 27 ----------- > 5 files changed, 99 insertions(+), 81 deletions(-) > > diff --git a/lib/netdev-dpdk.c b/lib/netdev-dpdk.c > index e3a0771..f3e0840 100644 > --- a/lib/netdev-dpdk.c > +++ b/lib/netdev-dpdk.c > @@ -29,6 +29,7 @@ > #include <stdio.h> > #include <sys/types.h> > #include <sys/stat.h> > +#include <getopt.h> > > #include "dirs.h" > #include "dp-packet.h" > @@ -49,6 +50,8 @@ > #include "timeval.h" > #include "unixctl.h" > #include "openvswitch/vlog.h" > +#include "smap.h" > +#include "vswitch-idl.h" > > #include "rte_config.h" > #include "rte_mbuf.h" > @@ -2107,10 +2110,14 @@ unlock_dpdk: > > static int > process_vhost_flags(char *flag, char *default_val, int size, > - char **argv, char **new_val) > + const struct ovsrec_open_vswitch *ovs_cfg, > + char **new_val) > { > + const char *val; > int changed = 0; > > + val = smap_get(&ovs_cfg->other_config, flag); > + > /* Depending on which version of vhost is in use, process the > vhost-specific > * flag if it is provided on the vswitchd command line, otherwise > resort to > * a default value. > @@ -2120,9 +2127,9 @@ process_vhost_flags(char *flag, char *default_val, > int size, > * For vhost-cuse: Process "-cuse_dev_name" to set the custom name of > the > * vhost-cuse character device. > */ > - if (!strcmp(argv[1], flag) && (strlen(argv[2]) <= size)) { > + if (val && (strlen(val) <= size)) { > changed = 1; > - *new_val = strdup(argv[2]); > + *new_val = strdup(val); > VLOG_INFO("User-provided %s in use: %s", flag, *new_val); > } else { > VLOG_INFO("No %s provided - defaulting to %s", flag, default_val); > @@ -2132,34 +2139,77 @@ process_vhost_flags(char *flag, char *default_val, > int size, > return changed; > } > > -int > -dpdk_init(int argc, char **argv) > +static char ** > +grow_argv(char ***argv, size_t cur_siz, size_t grow_by) > { > - int result; > - int base = 0; > - char *pragram_name = argv[0]; > + char **new_argv = realloc(*argv, sizeof(char *) * (cur_siz + > grow_by)); > + return new_argv; > +} > > - if (argc < 2 || strcmp(argv[1], "--dpdk")) > - return 0; > +static int > +get_dpdk_args(const struct ovsrec_open_vswitch *ovs_cfg, char ***argv) > +{ > + struct dpdk_options_map { > + const char *ovs_configuration; > + const char *dpdk_option; > + bool default_enabled; > + const char *default_value; > + } opts[] = { > + {"dpdk_core_mask", "-c", true, "0x1"}, > + {"dpdk_mem_channels", "-n", true, "4"}, > + {"dpdk_alloc_mem", "-m", false, NULL}, > + {"dpdk_socket_mem", "--socket-mem", false, NULL}, > + {"dpdk_hugepage_dir", "--huge-dir", false, NULL}, > + }; > + int i, ret = 1; > + > + for(i = 0; i < (sizeof(opts) / sizeof(opts[0])); ++i){ > + const char *lookup = smap_get(&ovs_cfg->other_config, > + opts[i].ovs_configuration); > + if(!lookup && opts[i].default_enabled) > + lookup = opts[i].default_value; > + > + if(lookup) { > + char **newargv = grow_argv(argv, ret, 2); > + > + if (!newargv) > + return ret; > + > + *argv = newargv; > + (*argv)[ret++] = strdup(opts[i].dpdk_option); > + (*argv)[ret++] = strdup(lookup); > + } > + } > > - /* Remove the --dpdk argument from arg list.*/ > - argc--; > - argv++; > + return ret; > +} > > argv shall be NULL terminated, i.e. argv[argc] shall be NULL. - /* Reject --user option */ > - int i; > - for (i = 0; i < argc; i++) { > - if (!strcmp(argv[i], "--user")) { > - VLOG_ERR("Can not mix --dpdk and --user options, aborting."); > - } > +void > +dpdk_init(const struct ovsrec_open_vswitch *ovs_cfg) > +{ > + static int initialized = 0; > + > + char **argv; > + int result; > + int argc; > + > + if(initialized || !smap_get_bool(&ovs_cfg->other_config, "dpdk", > false)){ > + return; > } > + > + initialized = 1; > > + VLOG_INFO("DPDK Enabled, initializing"); > + > + /* TODO should check for user permissions. DPDK requires root user. */ > + > #ifdef VHOST_CUSE > - if (process_vhost_flags("-cuse_dev_name", strdup("vhost-net"), > - PATH_MAX, argv, &cuse_dev_name)) { > + if (process_vhost_flags("cuse_dev_name", strdup("vhost-net"), > + PATH_MAX, ovs_cfg, &cuse_dev_name)) { > + > #else > - if (process_vhost_flags("-vhost_sock_dir", strdup(ovs_rundir()), > - NAME_MAX, argv, &vhost_sock_dir)) { > + if (process_vhost_flags("vhost_sock_dir", strdup(ovs_rundir()), > + NAME_MAX, ovs_cfg, &vhost_sock_dir)) { > struct stat s; > int err; > > @@ -2167,40 +2217,34 @@ dpdk_init(int argc, char **argv) > if (err) { > VLOG_ERR("vHostUser socket DIR '%s' does not exist.", > vhost_sock_dir); > - return err; > + return; > } > #endif > - /* Remove the vhost flag configuration parameters from the > argument > - * list, so that the correct elements are passed to the DPDK > - * initialization function > - */ > - argc -= 2; > - argv += 2; /* Increment by two to bypass the vhost flag > arguments */ > - base = 2; > } > > - /* Keep the program name argument as this is needed for call to > - * rte_eal_init() > - */ > - argv[0] = pragram_name; > + argv = (char **)malloc(sizeof(char *)); > + *argv = strdup("ovs"); /* TODO use prctl to get process name */ > + > + argc = get_dpdk_args(ovs_cfg, &argv); > + > + optind = 1; > > /* Make sure things are initialized ... */ > result = rte_eal_init(argc, argv); > if (result < 0) { > ovs_abort(result, "Cannot init EAL"); > } > + > + for(result = 0; result < argc-1; ++result) > + free(argv[result]); > + > + free(argv); > > I am not sure if freeing argv is valid in this case, C99 standard states that it shall remain valid until the program terminates. > rte_memzone_dump(stdout); > rte_eal_init_ret = 0; > > - if (argc > result) { > - argv[result] = argv[0]; > - } > - > /* We are called from the main thread here */ > RTE_PER_LCORE(_lcore_id) = NON_PMD_CORE_ID; > - > - return result + 1 + base; > } > > static const struct netdev_class dpdk_class = > diff --git a/lib/netdev-dpdk.h b/lib/netdev-dpdk.h > index 646d3e2..e01adb3 100644 > --- a/lib/netdev-dpdk.h > +++ b/lib/netdev-dpdk.h > @@ -4,6 +4,7 @@ > #include <config.h> > > struct dp_packet; > +struct ovsrec_open_vswitch; > > #ifdef DPDK_NETDEV > > @@ -22,7 +23,7 @@ struct dp_packet; > > #define NON_PMD_CORE_ID LCORE_ID_ANY > > -int dpdk_init(int argc, char **argv); > +void dpdk_init(const struct ovsrec_open_vswitch *ovs_cfg); > void netdev_dpdk_register(void); > void free_dpdk_buf(struct dp_packet *); > int pmd_thread_setaffinity_cpu(unsigned cpu); > @@ -33,13 +34,10 @@ int pmd_thread_setaffinity_cpu(unsigned cpu); > > #include "util.h" > > -static inline int > -dpdk_init(int argc, char **argv) > +static inline void > +dpdk_init(const struct ovsrec_open_vswitch *ovs_cfg OVS_UNUSED) > { > - if (argc >= 2 && !strcmp(argv[1], "--dpdk")) { > - ovs_fatal(0, "DPDK support not built into this copy of Open > vSwitch."); > - } > - return 0; > + > } > Why to remove this error handling? > > static inline void > diff --git a/utilities/ovs-ctl.in b/utilities/ovs-ctl.in > index 0082bed..6ae8bbd 100755 > --- a/utilities/ovs-ctl.in > +++ b/utilities/ovs-ctl.in > @@ -73,13 +73,13 @@ insert_mod_if_required () { > } > > ovs_vsctl () { > - ovs-vsctl --no-wait "$@" > + @bindir@/ovs-vsctl --no-wait "$@" > } > > set_system_ids () { > set ovs_vsctl set Open_vSwitch . > > - OVS_VERSION=`ovs-vswitchd --version | sed 's/.*) //;1q'` > + OVS_VERSION=`@bindir@/ovs-vswitchd --version | sed 's/.*) //;1q'` > set "$@" ovs-version="$OVS_VERSION" > > case $SYSTEM_ID in > @@ -131,7 +131,7 @@ check_force_cores () { > } > > del_transient_ports () { > - for port in `ovs-vsctl --bare -- --columns=name find port > other_config:transient=true`; do > + for port in `@bindir@/ovs-vsctl --bare -- --columns=name find port > other_config:transient=true`; do > ovs_vsctl -- del-port "$port" > done > } > @@ -193,7 +193,7 @@ add_managers () { > # won't briefly see empty datapath-id or ofport columns for records > that > # exist at startup.) > action "Enabling remote OVSDB managers" \ > - ovs-appctl -t ovsdb-server ovsdb-server/add-remote \ > + @bindir@/ovs-appctl -t ovsdb-server ovsdb-server/add-remote \ > db:Open_vSwitch,Open_vSwitch,manager_options > } > > @@ -215,7 +215,7 @@ start_forwarding () { > fi > > # Start ovs-vswitchd. > - set ovs-vswitchd unix:"$DB_SOCK" > + set @bindir@/ovs-vswitchd unix:"$DB_SOCK" > set "$@" -vconsole:emer -vsyslog:err -vfile:info > if test X"$MLOCKALL" != Xno; then > set "$@" --mlockall > @@ -276,7 +276,7 @@ save_ofports_if_required () { > # > # (Versions 1.10 and later save OpenFlow port numbers without > assistance, > # so we don't have to do anything for them. > - case `ovs-appctl version | sed 1q` in > + case `@bindir@/ovs-appctl version | sed 1q` in > "ovs-vswitchd (Open vSwitch) 1."[0-9].*) > action "Saving ofport values" ovs_save save-ofports \ > "${script_ofports}" > diff --git a/vswitchd/bridge.c b/vswitchd/bridge.c > index b966d92..d33f42c 100644 > --- a/vswitchd/bridge.c > +++ b/vswitchd/bridge.c > @@ -68,6 +68,7 @@ > #include "sflow_api.h" > #include "vlan-bitmap.h" > #include "packets.h" > +#include "lib/netdev-dpdk.h" > > VLOG_DEFINE_THIS_MODULE(bridge); > > @@ -2921,6 +2922,8 @@ bridge_run(void) > } > cfg = ovsrec_open_vswitch_first(idl); > > + dpdk_init(cfg); > + > /* Initialize the ofproto library. This only needs to run once, but > * it must be done after the configuration is set. If the > * initialization has already occurred, bridge_init_ofproto() > diff --git a/vswitchd/ovs-vswitchd.c b/vswitchd/ovs-vswitchd.c > index e78ecda..ae86861 100644 > --- a/vswitchd/ovs-vswitchd.c > +++ b/vswitchd/ovs-vswitchd.c > @@ -48,7 +48,6 @@ > #include "openvswitch/vconn.h" > #include "openvswitch/vlog.h" > #include "lib/vswitch-idl.h" > -#include "lib/netdev-dpdk.h" > > VLOG_DEFINE_THIS_MODULE(vswitchd); > > @@ -71,13 +70,6 @@ main(int argc, char *argv[]) > int retval; > > set_program_name(argv[0]); > - retval = dpdk_init(argc,argv); > - if (retval < 0) { > - return retval; > - } > - > - argc -= retval; > - argv += retval; > > ovs_cmdl_proctitle_init(argc, argv); > service_start(&argc, &argv); > @@ -152,7 +144,6 @@ parse_options(int argc, char *argv[], char > **unixctl_pathp) > OPT_ENABLE_DUMMY, > OPT_DISABLE_SYSTEM, > DAEMON_OPTION_ENUMS, > - OPT_DPDK, > }; > static const struct option long_options[] = { > {"help", no_argument, NULL, 'h'}, > @@ -166,7 +157,6 @@ parse_options(int argc, char *argv[], char > **unixctl_pathp) > {"bootstrap-ca-cert", required_argument, NULL, > OPT_BOOTSTRAP_CA_CERT}, > {"enable-dummy", optional_argument, NULL, OPT_ENABLE_DUMMY}, > {"disable-system", no_argument, NULL, OPT_DISABLE_SYSTEM}, > - {"dpdk", required_argument, NULL, OPT_DPDK}, > {NULL, 0, NULL, 0}, > }; > char *short_options = > ovs_cmdl_long_options_to_short_options(long_options); > @@ -218,10 +208,6 @@ parse_options(int argc, char *argv[], char > **unixctl_pathp) > case '?': > exit(EXIT_FAILURE); > > - case OPT_DPDK: > - ovs_fatal(0, "--dpdk must be given at beginning of command > line."); > - break; > - > default: > abort(); > } > @@ -255,19 +241,6 @@ usage(void) > stream_usage("DATABASE", true, false, true); > daemon_usage(); > vlog_usage(); > - printf("\nDPDK options:\n" > - " --dpdk [VHOST] [DPDK] Initialize DPDK datapath.\n" > - " where DPDK are options for initializing DPDK lib and VHOST > is\n" > -#ifdef VHOST_CUSE > - " option to override default character device name used for\n" > - " for use with userspace vHost\n" > - " -cuse_dev_name NAME\n" > -#else > - " option to override default directory where vhost-user\n" > - " sockets are created.\n" > - " -vhost_sock_dir DIR\n" > -#endif > - ); > printf("\nOther options:\n" > " --unixctl=SOCKET override default control socket > name\n" > " -h, --help display this help message\n" > -- > 2.6.1.133.gf5b6079 > > _______________________________________________ > dev mailing list > dev@openvswitch.org > http://openvswitch.org/mailman/listinfo/dev >
On 12/03/2015 06:23 AM, Aaron Conole wrote: > Existing DPDK integration is provided by use of command line options which > must be split out and passed to librte in a special manner. However, this > forces any configuration to be passed by way of a special DPDK flag, and > interferes with ovs+dpdk packaging solutions. > > This commit delays dpdk initialization until after the OVS database connection > is established, and then initializes librte. It pulls all of the config data > from the OVS database, and assembles a new argv/argc pair to be passed along. There needs to be a way to turn off DPDK even when support for it is built in since it requires extra setup and failing to initialize will tear down the whole ovs-vswitchd process. So in fact it probably should default to off. It'd be nice to have something smarter than a big "DPDK on/off" switch in the db, such as lazy initialization only when needed (ie dpdk ports are added) but there are quite a few problems with that I suspect. - Panu -
> -----Original Message----- > From: dev [mailto:dev-bounces@openvswitch.org] On Behalf Of Aaron Conole > Sent: Thursday, December 3, 2015 4:24 AM > To: dev@openvswitch.org > Cc: Flavio Leitner > Subject: [ovs-dev] [RFC 1/2] dpdk: Convert initialization from cmdline to db > > Existing DPDK integration is provided by use of command line options which > must be split out and passed to librte in a special manner. However, this > forces any configuration to be passed by way of a special DPDK flag, and > interferes with ovs+dpdk packaging solutions. > > This commit delays dpdk initialization until after the OVS database > connection > is established, and then initializes librte. It pulls all of the config data > from the OVS database, and assembles a new argv/argc pair to be passed along. Hi Aaron, thanks for posting. Panu makes good points, but this is a big step in the right direction. I've a few small comments on the code below. > > Signed-off-by: Aaron Conole <aconole@redhat.com> > --- > lib/netdev-dpdk.c | 126 ++++++++++++++++++++++++++++++++-------------- > -- > lib/netdev-dpdk.h | 12 ++--- > utilities/ovs-ctl.in | 12 ++--- > vswitchd/bridge.c | 3 ++ > vswitchd/ovs-vswitchd.c | 27 ----------- > 5 files changed, 99 insertions(+), 81 deletions(-) > > diff --git a/lib/netdev-dpdk.c b/lib/netdev-dpdk.c > index e3a0771..f3e0840 100644 > --- a/lib/netdev-dpdk.c > +++ b/lib/netdev-dpdk.c > @@ -29,6 +29,7 @@ > #include <stdio.h> > #include <sys/types.h> > #include <sys/stat.h> > +#include <getopt.h> > > #include "dirs.h" > #include "dp-packet.h" > @@ -49,6 +50,8 @@ > #include "timeval.h" > #include "unixctl.h" > #include "openvswitch/vlog.h" > +#include "smap.h" > +#include "vswitch-idl.h" > > #include "rte_config.h" > #include "rte_mbuf.h" > @@ -2107,10 +2110,14 @@ unlock_dpdk: > > static int > process_vhost_flags(char *flag, char *default_val, int size, > - char **argv, char **new_val) > + const struct ovsrec_open_vswitch *ovs_cfg, > + char **new_val) > { > + const char *val; > int changed = 0; > > + val = smap_get(&ovs_cfg->other_config, flag); > + > /* Depending on which version of vhost is in use, process the vhost- > specific > * flag if it is provided on the vswitchd command line, otherwise resort > to > * a default value. > @@ -2120,9 +2127,9 @@ process_vhost_flags(char *flag, char *default_val, int > size, > * For vhost-cuse: Process "-cuse_dev_name" to set the custom name of > the > * vhost-cuse character device. > */ > - if (!strcmp(argv[1], flag) && (strlen(argv[2]) <= size)) { > + if (val && (strlen(val) <= size)) { > changed = 1; > - *new_val = strdup(argv[2]); > + *new_val = strdup(val); > VLOG_INFO("User-provided %s in use: %s", flag, *new_val); > } else { > VLOG_INFO("No %s provided - defaulting to %s", flag, default_val); > @@ -2132,34 +2139,77 @@ process_vhost_flags(char *flag, char *default_val, > int size, > return changed; > } > > -int > -dpdk_init(int argc, char **argv) > +static char ** > +grow_argv(char ***argv, size_t cur_siz, size_t grow_by) > { > - int result; > - int base = 0; > - char *pragram_name = argv[0]; > + char **new_argv = realloc(*argv, sizeof(char *) * (cur_siz + grow_by)); > + return new_argv; > +} > > - if (argc < 2 || strcmp(argv[1], "--dpdk")) > - return 0; > +static int > +get_dpdk_args(const struct ovsrec_open_vswitch *ovs_cfg, char ***argv) > +{ > + struct dpdk_options_map { > + const char *ovs_configuration; > + const char *dpdk_option; > + bool default_enabled; > + const char *default_value; > + } opts[] = { > + {"dpdk_core_mask", "-c", true, "0x1"}, > + {"dpdk_mem_channels", "-n", true, "4"}, > + {"dpdk_alloc_mem", "-m", false, NULL}, > + {"dpdk_socket_mem", "--socket-mem", false, NULL}, What do you think about adding a default for socket-mem? > + {"dpdk_hugepage_dir", "--huge-dir", false, NULL}, > + }; > + int i, ret = 1; > + > + for(i = 0; i < (sizeof(opts) / sizeof(opts[0])); ++i){ > + const char *lookup = smap_get(&ovs_cfg->other_config, > + opts[i].ovs_configuration); > + if(!lookup && opts[i].default_enabled) > + lookup = opts[i].default_value; > + > + if(lookup) { > + char **newargv = grow_argv(argv, ret, 2); > + > + if (!newargv) > + return ret; We have not completed creating our eal args. In the unlikely event of this happening, I'm not sure if we should continue to call rte_eal_init() as we may end up with a silent non-intended config. > + > + *argv = newargv; > + (*argv)[ret++] = strdup(opts[i].dpdk_option); > + (*argv)[ret++] = strdup(lookup); > + } > + } > > - /* Remove the --dpdk argument from arg list.*/ > - argc--; > - argv++; > + return ret; > +} > > - /* Reject --user option */ > - int i; > - for (i = 0; i < argc; i++) { > - if (!strcmp(argv[i], "--user")) { > - VLOG_ERR("Can not mix --dpdk and --user options, aborting."); > - } > +void > +dpdk_init(const struct ovsrec_open_vswitch *ovs_cfg) > +{ > + static int initialized = 0; > + > + char **argv; > + int result; > + int argc; > + > + if(initialized || !smap_get_bool(&ovs_cfg->other_config, "dpdk", > false)){ > + return; > } > + > + initialized = 1; > > + VLOG_INFO("DPDK Enabled, initializing"); > + > + /* TODO should check for user permissions. DPDK requires root user. */ > + > #ifdef VHOST_CUSE > - if (process_vhost_flags("-cuse_dev_name", strdup("vhost-net"), > - PATH_MAX, argv, &cuse_dev_name)) { > + if (process_vhost_flags("cuse_dev_name", strdup("vhost-net"), > + PATH_MAX, ovs_cfg, &cuse_dev_name)) { > + > #else > - if (process_vhost_flags("-vhost_sock_dir", strdup(ovs_rundir()), > - NAME_MAX, argv, &vhost_sock_dir)) { > + if (process_vhost_flags("vhost_sock_dir", strdup(ovs_rundir()), > + NAME_MAX, ovs_cfg, &vhost_sock_dir)) { > struct stat s; > int err; > > @@ -2167,40 +2217,34 @@ dpdk_init(int argc, char **argv) > if (err) { > VLOG_ERR("vHostUser socket DIR '%s' does not exist.", > vhost_sock_dir); > - return err; > + return; We're not returning an error here anymore. I'm wondering should we abort or just deal with it later down the line if vhostuser is used > } > #endif > - /* Remove the vhost flag configuration parameters from the argument > - * list, so that the correct elements are passed to the DPDK > - * initialization function > - */ > - argc -= 2; > - argv += 2; /* Increment by two to bypass the vhost flag arguments > */ > - base = 2; > } > > - /* Keep the program name argument as this is needed for call to > - * rte_eal_init() > - */ > - argv[0] = pragram_name; > + argv = (char **)malloc(sizeof(char *)); > + *argv = strdup("ovs"); /* TODO use prctl to get process name */ > + > + argc = get_dpdk_args(ovs_cfg, &argv); > + > + optind = 1; > > /* Make sure things are initialized ... */ > result = rte_eal_init(argc, argv); > if (result < 0) { > ovs_abort(result, "Cannot init EAL"); > } > + > + for(result = 0; result < argc-1; ++result) > + free(argv[result]); > + > + free(argv); > > rte_memzone_dump(stdout); > rte_eal_init_ret = 0; > > - if (argc > result) { > - argv[result] = argv[0]; > - } > - > /* We are called from the main thread here */ > RTE_PER_LCORE(_lcore_id) = NON_PMD_CORE_ID; > - > - return result + 1 + base; > } > > static const struct netdev_class dpdk_class = > diff --git a/lib/netdev-dpdk.h b/lib/netdev-dpdk.h > index 646d3e2..e01adb3 100644 > --- a/lib/netdev-dpdk.h > +++ b/lib/netdev-dpdk.h > @@ -4,6 +4,7 @@ > #include <config.h> > > struct dp_packet; > +struct ovsrec_open_vswitch; > > #ifdef DPDK_NETDEV > > @@ -22,7 +23,7 @@ struct dp_packet; > > #define NON_PMD_CORE_ID LCORE_ID_ANY > > -int dpdk_init(int argc, char **argv); > +void dpdk_init(const struct ovsrec_open_vswitch *ovs_cfg); > void netdev_dpdk_register(void); > void free_dpdk_buf(struct dp_packet *); > int pmd_thread_setaffinity_cpu(unsigned cpu); > @@ -33,13 +34,10 @@ int pmd_thread_setaffinity_cpu(unsigned cpu); > > #include "util.h" > > -static inline int > -dpdk_init(int argc, char **argv) > +static inline void > +dpdk_init(const struct ovsrec_open_vswitch *ovs_cfg OVS_UNUSED) > { > - if (argc >= 2 && !strcmp(argv[1], "--dpdk")) { > - ovs_fatal(0, "DPDK support not built into this copy of Open > vSwitch."); > - } > - return 0; > + > } > > static inline void > diff --git a/utilities/ovs-ctl.in b/utilities/ovs-ctl.in > index 0082bed..6ae8bbd 100755 > --- a/utilities/ovs-ctl.in > +++ b/utilities/ovs-ctl.in > @@ -73,13 +73,13 @@ insert_mod_if_required () { > } > > ovs_vsctl () { > - ovs-vsctl --no-wait "$@" > + @bindir@/ovs-vsctl --no-wait "$@" > } > > set_system_ids () { > set ovs_vsctl set Open_vSwitch . > > - OVS_VERSION=`ovs-vswitchd --version | sed 's/.*) //;1q'` > + OVS_VERSION=`@bindir@/ovs-vswitchd --version | sed 's/.*) //;1q'` > set "$@" ovs-version="$OVS_VERSION" > > case $SYSTEM_ID in > @@ -131,7 +131,7 @@ check_force_cores () { > } > > del_transient_ports () { > - for port in `ovs-vsctl --bare -- --columns=name find port > other_config:transient=true`; do > + for port in `@bindir@/ovs-vsctl --bare -- --columns=name find port > other_config:transient=true`; do > ovs_vsctl -- del-port "$port" > done > } > @@ -193,7 +193,7 @@ add_managers () { > # won't briefly see empty datapath-id or ofport columns for records that > # exist at startup.) > action "Enabling remote OVSDB managers" \ > - ovs-appctl -t ovsdb-server ovsdb-server/add-remote \ > + @bindir@/ovs-appctl -t ovsdb-server ovsdb-server/add-remote \ > db:Open_vSwitch,Open_vSwitch,manager_options > } > > @@ -215,7 +215,7 @@ start_forwarding () { > fi > > # Start ovs-vswitchd. > - set ovs-vswitchd unix:"$DB_SOCK" > + set @bindir@/ovs-vswitchd unix:"$DB_SOCK" > set "$@" -vconsole:emer -vsyslog:err -vfile:info > if test X"$MLOCKALL" != Xno; then > set "$@" --mlockall > @@ -276,7 +276,7 @@ save_ofports_if_required () { > # > # (Versions 1.10 and later save OpenFlow port numbers without > assistance, > # so we don't have to do anything for them. > - case `ovs-appctl version | sed 1q` in > + case `@bindir@/ovs-appctl version | sed 1q` in > "ovs-vswitchd (Open vSwitch) 1."[0-9].*) > action "Saving ofport values" ovs_save save-ofports \ > "${script_ofports}" > diff --git a/vswitchd/bridge.c b/vswitchd/bridge.c > index b966d92..d33f42c 100644 > --- a/vswitchd/bridge.c > +++ b/vswitchd/bridge.c > @@ -68,6 +68,7 @@ > #include "sflow_api.h" > #include "vlan-bitmap.h" > #include "packets.h" > +#include "lib/netdev-dpdk.h" > > VLOG_DEFINE_THIS_MODULE(bridge); > > @@ -2921,6 +2922,8 @@ bridge_run(void) > } > cfg = ovsrec_open_vswitch_first(idl); > > + dpdk_init(cfg); > + > /* Initialize the ofproto library. This only needs to run once, but > * it must be done after the configuration is set. If the > * initialization has already occurred, bridge_init_ofproto() > diff --git a/vswitchd/ovs-vswitchd.c b/vswitchd/ovs-vswitchd.c > index e78ecda..ae86861 100644 > --- a/vswitchd/ovs-vswitchd.c > +++ b/vswitchd/ovs-vswitchd.c > @@ -48,7 +48,6 @@ > #include "openvswitch/vconn.h" > #include "openvswitch/vlog.h" > #include "lib/vswitch-idl.h" > -#include "lib/netdev-dpdk.h" > > VLOG_DEFINE_THIS_MODULE(vswitchd); > > @@ -71,13 +70,6 @@ main(int argc, char *argv[]) > int retval; > > set_program_name(argv[0]); > - retval = dpdk_init(argc,argv); > - if (retval < 0) { > - return retval; > - } > - > - argc -= retval; > - argv += retval; > > ovs_cmdl_proctitle_init(argc, argv); > service_start(&argc, &argv); > @@ -152,7 +144,6 @@ parse_options(int argc, char *argv[], char > **unixctl_pathp) > OPT_ENABLE_DUMMY, > OPT_DISABLE_SYSTEM, > DAEMON_OPTION_ENUMS, > - OPT_DPDK, > }; > static const struct option long_options[] = { > {"help", no_argument, NULL, 'h'}, > @@ -166,7 +157,6 @@ parse_options(int argc, char *argv[], char > **unixctl_pathp) > {"bootstrap-ca-cert", required_argument, NULL, > OPT_BOOTSTRAP_CA_CERT}, > {"enable-dummy", optional_argument, NULL, OPT_ENABLE_DUMMY}, > {"disable-system", no_argument, NULL, OPT_DISABLE_SYSTEM}, > - {"dpdk", required_argument, NULL, OPT_DPDK}, > {NULL, 0, NULL, 0}, > }; > char *short_options = > ovs_cmdl_long_options_to_short_options(long_options); > @@ -218,10 +208,6 @@ parse_options(int argc, char *argv[], char > **unixctl_pathp) > case '?': > exit(EXIT_FAILURE); > > - case OPT_DPDK: > - ovs_fatal(0, "--dpdk must be given at beginning of command > line."); It might be helpful to leave the case statement here and call usage() or give a deprecation message about the dpdk cmdline params for a release or two to ensure a smooth transition to the new scheme - otherwise it'll probably end up on the mailing list. > - break; > - > default: > abort(); > } > @@ -255,19 +241,6 @@ usage(void) > stream_usage("DATABASE", true, false, true); > daemon_usage(); > vlog_usage(); > - printf("\nDPDK options:\n" > - " --dpdk [VHOST] [DPDK] Initialize DPDK datapath.\n" > - " where DPDK are options for initializing DPDK lib and VHOST > is\n" > -#ifdef VHOST_CUSE > - " option to override default character device name used for\n" > - " for use with userspace vHost\n" > - " -cuse_dev_name NAME\n" > -#else > - " option to override default directory where vhost-user\n" > - " sockets are created.\n" > - " -vhost_sock_dir DIR\n" > -#endif > - ); > printf("\nOther options:\n" > " --unixctl=SOCKET override default control socket > name\n" > " -h, --help display this help message\n" > -- > 2.6.1.133.gf5b6079 > > _______________________________________________ > dev mailing list > dev@openvswitch.org > http://openvswitch.org/mailman/listinfo/dev
Mauricio Vásquez <mauricio.vasquezbernal@studenti.polito.it> writes: > Dear Aaron, > > The general idea looks good to me. > > Just some comments inline > > On 3 December 2015 at 05:23, Aaron Conole <aconole@redhat.com> wrote: > << snipped >> >> + if(lookup) { >> + char **newargv = grow_argv(argv, ret, 2); >> + >> + if (!newargv) >> + return ret; >> + >> + *argv = newargv; >> + (*argv)[ret++] = strdup(opts[i].dpdk_option); >> + (*argv)[ret++] = strdup(lookup); >> + } >> + } >> >> - /* Remove the --dpdk argument from arg list.*/ >> - argc--; >> - argv++; >> + return ret; >> +} >> >> > argv shall be NULL terminated, i.e. argv[argc] shall be NULL. Bah, not sure how I forgot the argv/argc conventions. I'll fix when I submit the formal patch, thanks for catching this. > > - /* Reject --user option */ >> - int i; >> - for (i = 0; i < argc; i++) { >> - if (!strcmp(argv[i], "--user")) { >> - VLOG_ERR("Can not mix --dpdk and --user options, aborting."); >> - } >> +void >> +dpdk_init(const struct ovsrec_open_vswitch *ovs_cfg) >> +{ >> + static int initialized = 0; >> + >> + char **argv; >> + int result; >> + int argc; >> + << snipped >> >> >> /* Make sure things are initialized ... */ >> result = rte_eal_init(argc, argv); >> if (result < 0) { >> ovs_abort(result, "Cannot init EAL"); >> } >> + >> + for(result = 0; result < argc-1; ++result) >> + free(argv[result]); >> + >> + free(argv); >> > I am not sure if freeing argv is valid in this case, C99 standard states > that it shall remain valid until the program terminates. I hope dpdk doesn't rely on that behavior, but you're correct. I'll switch to using an atexit() function that delays releasing all the memory. Thanks so much for the review! -Aaron
Panu Matilainen <pmatilai@redhat.com> writes: > On 12/03/2015 06:23 AM, Aaron Conole wrote: >> Existing DPDK integration is provided by use of command line options which >> must be split out and passed to librte in a special manner. However, this >> forces any configuration to be passed by way of a special DPDK flag, and >> interferes with ovs+dpdk packaging solutions. >> >> This commit delays dpdk initialization until after the OVS database connection >> is established, and then initializes librte. It pulls all of the config data >> from the OVS database, and assembles a new argv/argc pair to be passed along. > > There needs to be a way to turn off DPDK even when support for it is > built in since it requires extra setup and failing to initialize will > tear down the whole ovs-vswitchd process. So in fact it probably > should default to off. There is such a flag, and it is defaulted to off: >> +void >> +dpdk_init(const struct ovsrec_open_vswitch *ovs_cfg) >> +{ >> + static int initialized = 0; >> + >> + char **argv; >> + int result; >> + int argc; >> + Just before we do any initialization. >> + if(initialized || !smap_get_bool(&ovs_cfg->other_config, "dpdk", false)){ >> + return; >> } > > It'd be nice to have something smarter than a big "DPDK on/off" switch > in the db, such as lazy initialization only when needed (ie dpdk ports > are added) but there are quite a few problems with that I suspect. I can look into how to add this, because I think you're right. I will make that an enhancement to this patch. > > - Panu - Thanks for the review, Panu!
"Traynor, Kevin" <kevin.traynor@intel.com> writes: >> -----Original Message----- <<snipped>> >> +static int >> +get_dpdk_args(const struct ovsrec_open_vswitch *ovs_cfg, char ***argv) >> +{ >> + struct dpdk_options_map { >> + const char *ovs_configuration; >> + const char *dpdk_option; >> + bool default_enabled; >> + const char *default_value; >> + } opts[] = { >> + {"dpdk_core_mask", "-c", true, "0x1"}, >> + {"dpdk_mem_channels", "-n", true, "4"}, >> + {"dpdk_alloc_mem", "-m", false, NULL}, >> + {"dpdk_socket_mem", "--socket-mem", false, NULL}, > > What do you think about adding a default for socket-mem? My only concern is I don't know what it should look like. This is the per-socket memory allocation and I'm not sure what kind of default makes sense, although with my specified defaults in the rest of the configuration perhaps "1024,0" is a 'proper' value. What do you think? >> + {"dpdk_hugepage_dir", "--huge-dir", false, NULL}, >> + }; >> + int i, ret = 1; >> + >> + for(i = 0; i < (sizeof(opts) / sizeof(opts[0])); ++i){ >> + const char *lookup = smap_get(&ovs_cfg->other_config, >> + opts[i].ovs_configuration); >> + if(!lookup && opts[i].default_enabled) >> + lookup = opts[i].default_value; >> + >> + if(lookup) { >> + char **newargv = grow_argv(argv, ret, 2); >> + >> + if (!newargv) >> + return ret; > > We have not completed creating our eal args. In the unlikely event of > this happening, I'm not sure if we should continue to call rte_eal_init() > as we may end up with a silent non-intended config. I agree, I haven't done much negative testing here; I hope this should not happen, but hope in one hand, etc. Perhaps I'll release the argc/argv memory I've allocated here and return NULL. Let the init() function deal with it (by aborting most likely). >> + >> + *argv = newargv; >> + (*argv)[ret++] = strdup(opts[i].dpdk_option); >> + (*argv)[ret++] = strdup(lookup); >> + } <<snipped>> >> #else >> - if (process_vhost_flags("-vhost_sock_dir", strdup(ovs_rundir()), >> - NAME_MAX, argv, &vhost_sock_dir)) { >> + if (process_vhost_flags("vhost_sock_dir", strdup(ovs_rundir()), >> + NAME_MAX, ovs_cfg, &vhost_sock_dir)) { >> struct stat s; >> int err; >> >> @@ -2167,40 +2217,34 @@ dpdk_init(int argc, char **argv) >> if (err) { >> VLOG_ERR("vHostUser socket DIR '%s' does not exist.", >> vhost_sock_dir); >> - return err; >> + return; > > We're not returning an error here anymore. I'm wondering should we abort > or just deal with it later down the line if vhostuser is used I was thinking of dealing with it if/when vhostuser socket is used. Actually, what I planned on doing was proposing a directory creation flag so that the user sees it to 'just work', but that's beyond the scope of this patch. Since we did return an error previously, and that error was used to terminate the program, perhaps I'll do an ovs_abort() for now. <<snipped code>> >> static const struct option long_options[] = { >> {"help", no_argument, NULL, 'h'}, >> @@ -166,7 +157,6 @@ parse_options(int argc, char *argv[], char >> **unixctl_pathp) >> {"bootstrap-ca-cert", required_argument, NULL, >> OPT_BOOTSTRAP_CA_CERT}, >> {"enable-dummy", optional_argument, NULL, OPT_ENABLE_DUMMY}, >> {"disable-system", no_argument, NULL, OPT_DISABLE_SYSTEM}, >> - {"dpdk", required_argument, NULL, OPT_DPDK}, >> {NULL, 0, NULL, 0}, >> }; >> char *short_options = >> ovs_cmdl_long_options_to_short_options(long_options); >> @@ -218,10 +208,6 @@ parse_options(int argc, char *argv[], char >> **unixctl_pathp) >> case '?': >> exit(EXIT_FAILURE); >> >> - case OPT_DPDK: >> - ovs_fatal(0, "--dpdk must be given at beginning of command >> line."); > > It might be helpful to leave the case statement here and call usage() or give > a deprecation message about the dpdk cmdline params for a release or two to > ensure a smooth transition to the new scheme - otherwise it'll probably end > up on the mailing list. I waffled on whether or not to change it to be "--dpdk is deprecated" or just remove it, but there's probably a user who will complain that we removed the DPDK feature. Okay, I'll update the NEWS file, and mention that the configuration of DPDK via command line is deprecated in favor of the db approach. Thanks for the review, Kevin!
On 03/12/15 04:23, Aaron Conole wrote: > Existing DPDK integration is provided by use of command line options which > must be split out and passed to librte in a special manner. However, this > forces any configuration to be passed by way of a special DPDK flag, and > interferes with ovs+dpdk packaging solutions. > > This commit delays dpdk initialization until after the OVS database connection > is established, and then initializes librte. It pulls all of the config data > from the OVS database, and assembles a new argv/argc pair to be passed along. > > Signed-off-by: Aaron Conole <aconole@redhat.com> > --- > lib/netdev-dpdk.c | 126 ++++++++++++++++++++++++++++++++---------------- > lib/netdev-dpdk.h | 12 ++--- > utilities/ovs-ctl.in | 12 ++--- > vswitchd/bridge.c | 3 ++ > vswitchd/ovs-vswitchd.c | 27 ----------- > 5 files changed, 99 insertions(+), 81 deletions(-) > > diff --git a/lib/netdev-dpdk.c b/lib/netdev-dpdk.c > index e3a0771..f3e0840 100644 > --- a/lib/netdev-dpdk.c > +++ b/lib/netdev-dpdk.c [snip] > +void > +dpdk_init(const struct ovsrec_open_vswitch *ovs_cfg) > +{ > + static int initialized = 0; > + > + char **argv; > + int result; > + int argc; > + > + if(initialized || !smap_get_bool(&ovs_cfg->other_config, "dpdk", false)){ > + return; Your second patch mentions that any change for any of the config needs an ovs-vswitchd restart, which is probably the right thing to do. But this function is called from bridge_run(), so a false -> true change takes immediate effect, which may not be what the user expects. I think it's better to wrap the bool check into an ovsthread_once_start() to make sure that it is only checked once. > } > + > + initialized = 1; > > + VLOG_INFO("DPDK Enabled, initializing"); > + > + /* TODO should check for user permissions. DPDK requires root user. */ > + [snip] > --- a/utilities/ovs-ctl.in > +++ b/utilities/ovs-ctl.in > @@ -73,13 +73,13 @@ insert_mod_if_required () { > } > > ovs_vsctl () { > - ovs-vsctl --no-wait "$@" > + @bindir@/ovs-vsctl --no-wait "$@" > } > > set_system_ids () { > set ovs_vsctl set Open_vSwitch . > > - OVS_VERSION=`ovs-vswitchd --version | sed 's/.*) //;1q'` > + OVS_VERSION=`@bindir@/ovs-vswitchd --version | sed 's/.*) //;1q'` Why do you need to prepend all these command lines with @bindir@ [snip] > diff --git a/vswitchd/bridge.c b/vswitchd/bridge.c > index b966d92..d33f42c 100644 > --- a/vswitchd/bridge.c > +++ b/vswitchd/bridge.c > @@ -68,6 +68,7 @@ > #include "sflow_api.h" > #include "vlan-bitmap.h" > #include "packets.h" > +#include "lib/netdev-dpdk.h" > > VLOG_DEFINE_THIS_MODULE(bridge); > > @@ -2921,6 +2922,8 @@ bridge_run(void) > } > cfg = ovsrec_open_vswitch_first(idl); > > + dpdk_init(cfg); You should check 'cfg != NULL', just as bridge_init_ofproto() does. > + > /* Initialize the ofproto library. This only needs to run once, but > * it must be done after the configuration is set. If the > * initialization has already occurred, bridge_init_ofproto()
Zoltan Kiss <zoltan.kiss@linaro.org> writes: > On 03/12/15 04:23, Aaron Conole wrote: >> Existing DPDK integration is provided by use of command line options which >> must be split out and passed to librte in a special manner. However, this >> forces any configuration to be passed by way of a special DPDK flag, and >> interferes with ovs+dpdk packaging solutions. >> >> This commit delays dpdk initialization until after the OVS database connection >> is established, and then initializes librte. It pulls all of the config data >> from the OVS database, and assembles a new argv/argc pair to be passed along. >> >> Signed-off-by: Aaron Conole <aconole@redhat.com> >> --- >> lib/netdev-dpdk.c | 126 >> ++++++++++++++++++++++++++++++++---------------- >> lib/netdev-dpdk.h | 12 ++--- >> utilities/ovs-ctl.in | 12 ++--- >> vswitchd/bridge.c | 3 ++ >> vswitchd/ovs-vswitchd.c | 27 ----------- >> 5 files changed, 99 insertions(+), 81 deletions(-) >> >> diff --git a/lib/netdev-dpdk.c b/lib/netdev-dpdk.c >> index e3a0771..f3e0840 100644 >> --- a/lib/netdev-dpdk.c >> +++ b/lib/netdev-dpdk.c > > [snip] > >> +void >> +dpdk_init(const struct ovsrec_open_vswitch *ovs_cfg) >> +{ >> + static int initialized = 0; >> + >> + char **argv; >> + int result; >> + int argc; >> + >> + if(initialized || !smap_get_bool(&ovs_cfg->other_config, "dpdk", false)){ >> + return; > > Your second patch mentions that any change for any of the config needs > an ovs-vswitchd restart, which is probably the right thing to do. > But this function is called from bridge_run(), so a false -> true > change takes immediate effect, which may not be what the user > expects. I think it's better to wrap the bool check into an > ovsthread_once_start() to make sure that it is only checked once. Good catch. I'll do that. > >> } >> + >> + initialized = 1; >> >> + VLOG_INFO("DPDK Enabled, initializing"); >> + >> + /* TODO should check for user permissions. DPDK requires root user. */ >> + > > [snip] > >> --- a/utilities/ovs-ctl.in >> +++ b/utilities/ovs-ctl.in >> @@ -73,13 +73,13 @@ insert_mod_if_required () { >> } >> >> ovs_vsctl () { >> - ovs-vsctl --no-wait "$@" >> + @bindir@/ovs-vsctl --no-wait "$@" >> } >> >> set_system_ids () { >> set ovs_vsctl set Open_vSwitch . >> >> - OVS_VERSION=`ovs-vswitchd --version | sed 's/.*) //;1q'` >> + OVS_VERSION=`@bindir@/ovs-vswitchd --version | sed 's/.*) //;1q'` > > Why do you need to prepend all these command lines with @bindir@ I didn't intend to include this with this patch series, but the answer is in how systemd integration is done. The way the systemd code works, it calls ovs_ctl to start everything (good). ovs_ctl relies on the path, though. There's a block at the beginning where a new path is built up, but there is an edge case in there. I'll drop this change and submit it separately. > [snip] >> diff --git a/vswitchd/bridge.c b/vswitchd/bridge.c >> index b966d92..d33f42c 100644 >> --- a/vswitchd/bridge.c >> +++ b/vswitchd/bridge.c >> @@ -68,6 +68,7 @@ >> #include "sflow_api.h" >> #include "vlan-bitmap.h" >> #include "packets.h" >> +#include "lib/netdev-dpdk.h" >> >> VLOG_DEFINE_THIS_MODULE(bridge); >> >> @@ -2921,6 +2922,8 @@ bridge_run(void) >> } >> cfg = ovsrec_open_vswitch_first(idl); >> >> + dpdk_init(cfg); > > You should check 'cfg != NULL', just as bridge_init_ofproto() does. Yes, I will. Thanks for the review, Zoltan!
> -----Original Message----- > From: Aaron Conole [mailto:aconole@redhat.com] > Sent: Friday, December 4, 2015 3:37 PM > To: Traynor, Kevin > Cc: dev@openvswitch.org; Flavio Leitner > Subject: Re: [ovs-dev] [RFC 1/2] dpdk: Convert initialization from cmdline to > db > > "Traynor, Kevin" <kevin.traynor@intel.com> writes: > >> -----Original Message----- > <<snipped>> > >> +static int > >> +get_dpdk_args(const struct ovsrec_open_vswitch *ovs_cfg, char ***argv) > >> +{ > >> + struct dpdk_options_map { > >> + const char *ovs_configuration; > >> + const char *dpdk_option; > >> + bool default_enabled; > >> + const char *default_value; > >> + } opts[] = { > >> + {"dpdk_core_mask", "-c", true, "0x1"}, > >> + {"dpdk_mem_channels", "-n", true, "4"}, > >> + {"dpdk_alloc_mem", "-m", false, NULL}, > >> + {"dpdk_socket_mem", "--socket-mem", false, NULL}, > > > > What do you think about adding a default for socket-mem? > > My only concern is I don't know what it should look like. This is the > per-socket memory allocation and I'm not sure what kind of default makes > sense, although with my specified defaults in the rest of the > configuration perhaps "1024,0" is a 'proper' value. > > What do you think? I think "1024,0" is reasonable default for users who don't want to know too much low level config. > <<snipped code>> > >> static const struct option long_options[] = { > >> {"help", no_argument, NULL, 'h'}, > >> @@ -166,7 +157,6 @@ parse_options(int argc, char *argv[], char > >> **unixctl_pathp) > >> {"bootstrap-ca-cert", required_argument, NULL, > >> OPT_BOOTSTRAP_CA_CERT}, > >> {"enable-dummy", optional_argument, NULL, OPT_ENABLE_DUMMY}, > >> {"disable-system", no_argument, NULL, OPT_DISABLE_SYSTEM}, > >> - {"dpdk", required_argument, NULL, OPT_DPDK}, > >> {NULL, 0, NULL, 0}, > >> }; > >> char *short_options = > >> ovs_cmdl_long_options_to_short_options(long_options); > >> @@ -218,10 +208,6 @@ parse_options(int argc, char *argv[], char > >> **unixctl_pathp) > >> case '?': > >> exit(EXIT_FAILURE); > >> > >> - case OPT_DPDK: > >> - ovs_fatal(0, "--dpdk must be given at beginning of command > >> line."); > > > > It might be helpful to leave the case statement here and call usage() or > give > > a deprecation message about the dpdk cmdline params for a release or two to > > ensure a smooth transition to the new scheme - otherwise it'll probably end > > up on the mailing list. > > I waffled on whether or not to change it to be "--dpdk is deprecated" or > just remove it, but there's probably a user who will complain that we > removed the DPDK feature. Okay, I'll update the NEWS file, and mention > that the configuration of DPDK via command line is deprecated in favor > of the db approach. Yes, please don't put "--dpdk is deprecated" anywhere in the code base!! It's a cleaner cut to code as you did, but worries me that a user will just pull from the repo, run the their setup script and suddenly it won't run with no error message to say why. > > Thanks for the review, Kevin!
diff --git a/lib/netdev-dpdk.c b/lib/netdev-dpdk.c index e3a0771..f3e0840 100644 --- a/lib/netdev-dpdk.c +++ b/lib/netdev-dpdk.c @@ -29,6 +29,7 @@ #include <stdio.h> #include <sys/types.h> #include <sys/stat.h> +#include <getopt.h> #include "dirs.h" #include "dp-packet.h" @@ -49,6 +50,8 @@ #include "timeval.h" #include "unixctl.h" #include "openvswitch/vlog.h" +#include "smap.h" +#include "vswitch-idl.h" #include "rte_config.h" #include "rte_mbuf.h" @@ -2107,10 +2110,14 @@ unlock_dpdk: static int process_vhost_flags(char *flag, char *default_val, int size, - char **argv, char **new_val) + const struct ovsrec_open_vswitch *ovs_cfg, + char **new_val) { + const char *val; int changed = 0; + val = smap_get(&ovs_cfg->other_config, flag); + /* Depending on which version of vhost is in use, process the vhost-specific * flag if it is provided on the vswitchd command line, otherwise resort to * a default value. @@ -2120,9 +2127,9 @@ process_vhost_flags(char *flag, char *default_val, int size, * For vhost-cuse: Process "-cuse_dev_name" to set the custom name of the * vhost-cuse character device. */ - if (!strcmp(argv[1], flag) && (strlen(argv[2]) <= size)) { + if (val && (strlen(val) <= size)) { changed = 1; - *new_val = strdup(argv[2]); + *new_val = strdup(val); VLOG_INFO("User-provided %s in use: %s", flag, *new_val); } else { VLOG_INFO("No %s provided - defaulting to %s", flag, default_val); @@ -2132,34 +2139,77 @@ process_vhost_flags(char *flag, char *default_val, int size, return changed; } -int -dpdk_init(int argc, char **argv) +static char ** +grow_argv(char ***argv, size_t cur_siz, size_t grow_by) { - int result; - int base = 0; - char *pragram_name = argv[0]; + char **new_argv = realloc(*argv, sizeof(char *) * (cur_siz + grow_by)); + return new_argv; +} - if (argc < 2 || strcmp(argv[1], "--dpdk")) - return 0; +static int +get_dpdk_args(const struct ovsrec_open_vswitch *ovs_cfg, char ***argv) +{ + struct dpdk_options_map { + const char *ovs_configuration; + const char *dpdk_option; + bool default_enabled; + const char *default_value; + } opts[] = { + {"dpdk_core_mask", "-c", true, "0x1"}, + {"dpdk_mem_channels", "-n", true, "4"}, + {"dpdk_alloc_mem", "-m", false, NULL}, + {"dpdk_socket_mem", "--socket-mem", false, NULL}, + {"dpdk_hugepage_dir", "--huge-dir", false, NULL}, + }; + int i, ret = 1; + + for(i = 0; i < (sizeof(opts) / sizeof(opts[0])); ++i){ + const char *lookup = smap_get(&ovs_cfg->other_config, + opts[i].ovs_configuration); + if(!lookup && opts[i].default_enabled) + lookup = opts[i].default_value; + + if(lookup) { + char **newargv = grow_argv(argv, ret, 2); + + if (!newargv) + return ret; + + *argv = newargv; + (*argv)[ret++] = strdup(opts[i].dpdk_option); + (*argv)[ret++] = strdup(lookup); + } + } - /* Remove the --dpdk argument from arg list.*/ - argc--; - argv++; + return ret; +} - /* Reject --user option */ - int i; - for (i = 0; i < argc; i++) { - if (!strcmp(argv[i], "--user")) { - VLOG_ERR("Can not mix --dpdk and --user options, aborting."); - } +void +dpdk_init(const struct ovsrec_open_vswitch *ovs_cfg) +{ + static int initialized = 0; + + char **argv; + int result; + int argc; + + if(initialized || !smap_get_bool(&ovs_cfg->other_config, "dpdk", false)){ + return; } + + initialized = 1; + VLOG_INFO("DPDK Enabled, initializing"); + + /* TODO should check for user permissions. DPDK requires root user. */ + #ifdef VHOST_CUSE - if (process_vhost_flags("-cuse_dev_name", strdup("vhost-net"), - PATH_MAX, argv, &cuse_dev_name)) { + if (process_vhost_flags("cuse_dev_name", strdup("vhost-net"), + PATH_MAX, ovs_cfg, &cuse_dev_name)) { + #else - if (process_vhost_flags("-vhost_sock_dir", strdup(ovs_rundir()), - NAME_MAX, argv, &vhost_sock_dir)) { + if (process_vhost_flags("vhost_sock_dir", strdup(ovs_rundir()), + NAME_MAX, ovs_cfg, &vhost_sock_dir)) { struct stat s; int err; @@ -2167,40 +2217,34 @@ dpdk_init(int argc, char **argv) if (err) { VLOG_ERR("vHostUser socket DIR '%s' does not exist.", vhost_sock_dir); - return err; + return; } #endif - /* Remove the vhost flag configuration parameters from the argument - * list, so that the correct elements are passed to the DPDK - * initialization function - */ - argc -= 2; - argv += 2; /* Increment by two to bypass the vhost flag arguments */ - base = 2; } - /* Keep the program name argument as this is needed for call to - * rte_eal_init() - */ - argv[0] = pragram_name; + argv = (char **)malloc(sizeof(char *)); + *argv = strdup("ovs"); /* TODO use prctl to get process name */ + + argc = get_dpdk_args(ovs_cfg, &argv); + + optind = 1; /* Make sure things are initialized ... */ result = rte_eal_init(argc, argv); if (result < 0) { ovs_abort(result, "Cannot init EAL"); } + + for(result = 0; result < argc-1; ++result) + free(argv[result]); + + free(argv); rte_memzone_dump(stdout); rte_eal_init_ret = 0; - if (argc > result) { - argv[result] = argv[0]; - } - /* We are called from the main thread here */ RTE_PER_LCORE(_lcore_id) = NON_PMD_CORE_ID; - - return result + 1 + base; } static const struct netdev_class dpdk_class = diff --git a/lib/netdev-dpdk.h b/lib/netdev-dpdk.h index 646d3e2..e01adb3 100644 --- a/lib/netdev-dpdk.h +++ b/lib/netdev-dpdk.h @@ -4,6 +4,7 @@ #include <config.h> struct dp_packet; +struct ovsrec_open_vswitch; #ifdef DPDK_NETDEV @@ -22,7 +23,7 @@ struct dp_packet; #define NON_PMD_CORE_ID LCORE_ID_ANY -int dpdk_init(int argc, char **argv); +void dpdk_init(const struct ovsrec_open_vswitch *ovs_cfg); void netdev_dpdk_register(void); void free_dpdk_buf(struct dp_packet *); int pmd_thread_setaffinity_cpu(unsigned cpu); @@ -33,13 +34,10 @@ int pmd_thread_setaffinity_cpu(unsigned cpu); #include "util.h" -static inline int -dpdk_init(int argc, char **argv) +static inline void +dpdk_init(const struct ovsrec_open_vswitch *ovs_cfg OVS_UNUSED) { - if (argc >= 2 && !strcmp(argv[1], "--dpdk")) { - ovs_fatal(0, "DPDK support not built into this copy of Open vSwitch."); - } - return 0; + } static inline void diff --git a/utilities/ovs-ctl.in b/utilities/ovs-ctl.in index 0082bed..6ae8bbd 100755 --- a/utilities/ovs-ctl.in +++ b/utilities/ovs-ctl.in @@ -73,13 +73,13 @@ insert_mod_if_required () { } ovs_vsctl () { - ovs-vsctl --no-wait "$@" + @bindir@/ovs-vsctl --no-wait "$@" } set_system_ids () { set ovs_vsctl set Open_vSwitch . - OVS_VERSION=`ovs-vswitchd --version | sed 's/.*) //;1q'` + OVS_VERSION=`@bindir@/ovs-vswitchd --version | sed 's/.*) //;1q'` set "$@" ovs-version="$OVS_VERSION" case $SYSTEM_ID in @@ -131,7 +131,7 @@ check_force_cores () { } del_transient_ports () { - for port in `ovs-vsctl --bare -- --columns=name find port other_config:transient=true`; do + for port in `@bindir@/ovs-vsctl --bare -- --columns=name find port other_config:transient=true`; do ovs_vsctl -- del-port "$port" done } @@ -193,7 +193,7 @@ add_managers () { # won't briefly see empty datapath-id or ofport columns for records that # exist at startup.) action "Enabling remote OVSDB managers" \ - ovs-appctl -t ovsdb-server ovsdb-server/add-remote \ + @bindir@/ovs-appctl -t ovsdb-server ovsdb-server/add-remote \ db:Open_vSwitch,Open_vSwitch,manager_options } @@ -215,7 +215,7 @@ start_forwarding () { fi # Start ovs-vswitchd. - set ovs-vswitchd unix:"$DB_SOCK" + set @bindir@/ovs-vswitchd unix:"$DB_SOCK" set "$@" -vconsole:emer -vsyslog:err -vfile:info if test X"$MLOCKALL" != Xno; then set "$@" --mlockall @@ -276,7 +276,7 @@ save_ofports_if_required () { # # (Versions 1.10 and later save OpenFlow port numbers without assistance, # so we don't have to do anything for them. - case `ovs-appctl version | sed 1q` in + case `@bindir@/ovs-appctl version | sed 1q` in "ovs-vswitchd (Open vSwitch) 1."[0-9].*) action "Saving ofport values" ovs_save save-ofports \ "${script_ofports}" diff --git a/vswitchd/bridge.c b/vswitchd/bridge.c index b966d92..d33f42c 100644 --- a/vswitchd/bridge.c +++ b/vswitchd/bridge.c @@ -68,6 +68,7 @@ #include "sflow_api.h" #include "vlan-bitmap.h" #include "packets.h" +#include "lib/netdev-dpdk.h" VLOG_DEFINE_THIS_MODULE(bridge); @@ -2921,6 +2922,8 @@ bridge_run(void) } cfg = ovsrec_open_vswitch_first(idl); + dpdk_init(cfg); + /* Initialize the ofproto library. This only needs to run once, but * it must be done after the configuration is set. If the * initialization has already occurred, bridge_init_ofproto() diff --git a/vswitchd/ovs-vswitchd.c b/vswitchd/ovs-vswitchd.c index e78ecda..ae86861 100644 --- a/vswitchd/ovs-vswitchd.c +++ b/vswitchd/ovs-vswitchd.c @@ -48,7 +48,6 @@ #include "openvswitch/vconn.h" #include "openvswitch/vlog.h" #include "lib/vswitch-idl.h" -#include "lib/netdev-dpdk.h" VLOG_DEFINE_THIS_MODULE(vswitchd); @@ -71,13 +70,6 @@ main(int argc, char *argv[]) int retval; set_program_name(argv[0]); - retval = dpdk_init(argc,argv); - if (retval < 0) { - return retval; - } - - argc -= retval; - argv += retval; ovs_cmdl_proctitle_init(argc, argv); service_start(&argc, &argv); @@ -152,7 +144,6 @@ parse_options(int argc, char *argv[], char **unixctl_pathp) OPT_ENABLE_DUMMY, OPT_DISABLE_SYSTEM, DAEMON_OPTION_ENUMS, - OPT_DPDK, }; static const struct option long_options[] = { {"help", no_argument, NULL, 'h'}, @@ -166,7 +157,6 @@ parse_options(int argc, char *argv[], char **unixctl_pathp) {"bootstrap-ca-cert", required_argument, NULL, OPT_BOOTSTRAP_CA_CERT}, {"enable-dummy", optional_argument, NULL, OPT_ENABLE_DUMMY}, {"disable-system", no_argument, NULL, OPT_DISABLE_SYSTEM}, - {"dpdk", required_argument, NULL, OPT_DPDK}, {NULL, 0, NULL, 0}, }; char *short_options = ovs_cmdl_long_options_to_short_options(long_options); @@ -218,10 +208,6 @@ parse_options(int argc, char *argv[], char **unixctl_pathp) case '?': exit(EXIT_FAILURE); - case OPT_DPDK: - ovs_fatal(0, "--dpdk must be given at beginning of command line."); - break; - default: abort(); } @@ -255,19 +241,6 @@ usage(void) stream_usage("DATABASE", true, false, true); daemon_usage(); vlog_usage(); - printf("\nDPDK options:\n" - " --dpdk [VHOST] [DPDK] Initialize DPDK datapath.\n" - " where DPDK are options for initializing DPDK lib and VHOST is\n" -#ifdef VHOST_CUSE - " option to override default character device name used for\n" - " for use with userspace vHost\n" - " -cuse_dev_name NAME\n" -#else - " option to override default directory where vhost-user\n" - " sockets are created.\n" - " -vhost_sock_dir DIR\n" -#endif - ); printf("\nOther options:\n" " --unixctl=SOCKET override default control socket name\n" " -h, --help display this help message\n"
Existing DPDK integration is provided by use of command line options which must be split out and passed to librte in a special manner. However, this forces any configuration to be passed by way of a special DPDK flag, and interferes with ovs+dpdk packaging solutions. This commit delays dpdk initialization until after the OVS database connection is established, and then initializes librte. It pulls all of the config data from the OVS database, and assembles a new argv/argc pair to be passed along. Signed-off-by: Aaron Conole <aconole@redhat.com> --- lib/netdev-dpdk.c | 126 ++++++++++++++++++++++++++++++++---------------- lib/netdev-dpdk.h | 12 ++--- utilities/ovs-ctl.in | 12 ++--- vswitchd/bridge.c | 3 ++ vswitchd/ovs-vswitchd.c | 27 ----------- 5 files changed, 99 insertions(+), 81 deletions(-)