diff mbox

[ovs-dev,RFC,1/2] dpdk: Convert initialization from cmdline to db

Message ID 1449116634-14671-2-git-send-email-aconole@redhat.com
State RFC
Headers show

Commit Message

Aaron Conole Dec. 3, 2015, 4:23 a.m. UTC
Existing DPDK integration is provided by use of command line options which
must be split out and passed to librte in a special manner. However, this
forces any configuration to be passed by way of a special DPDK flag, and
interferes with ovs+dpdk packaging solutions.

This commit delays dpdk initialization until after the OVS database connection
is established, and then initializes librte. It pulls all of the config data
from the OVS database, and assembles a new argv/argc pair to be passed along.

Signed-off-by: Aaron Conole <aconole@redhat.com>
---
 lib/netdev-dpdk.c       | 126 ++++++++++++++++++++++++++++++++----------------
 lib/netdev-dpdk.h       |  12 ++---
 utilities/ovs-ctl.in    |  12 ++---
 vswitchd/bridge.c       |   3 ++
 vswitchd/ovs-vswitchd.c |  27 -----------
 5 files changed, 99 insertions(+), 81 deletions(-)

Comments

Mauricio Vásquez Dec. 3, 2015, 4:05 p.m. UTC | #1
Dear Aaron,

The general idea looks good to me.

Just  some comments inline

On 3 December 2015 at 05:23, Aaron Conole <aconole@redhat.com> wrote:

> Existing DPDK integration is provided by use of command line options which
> must be split out and passed to librte in a special manner. However, this
> forces any configuration to be passed by way of a special DPDK flag, and
> interferes with ovs+dpdk packaging solutions.
>
> This commit delays dpdk initialization until after the OVS database
> connection
> is established, and then initializes librte. It pulls all of the config
> data
> from the OVS database, and assembles a new argv/argc pair to be passed
> along.
>
> Signed-off-by: Aaron Conole <aconole@redhat.com>
> ---
>  lib/netdev-dpdk.c       | 126
> ++++++++++++++++++++++++++++++++----------------
>  lib/netdev-dpdk.h       |  12 ++---
>  utilities/ovs-ctl.in    |  12 ++---
>  vswitchd/bridge.c       |   3 ++
>  vswitchd/ovs-vswitchd.c |  27 -----------
>  5 files changed, 99 insertions(+), 81 deletions(-)
>
> diff --git a/lib/netdev-dpdk.c b/lib/netdev-dpdk.c
> index e3a0771..f3e0840 100644
> --- a/lib/netdev-dpdk.c
> +++ b/lib/netdev-dpdk.c
> @@ -29,6 +29,7 @@
>  #include <stdio.h>
>  #include <sys/types.h>
>  #include <sys/stat.h>
> +#include <getopt.h>
>
>  #include "dirs.h"
>  #include "dp-packet.h"
> @@ -49,6 +50,8 @@
>  #include "timeval.h"
>  #include "unixctl.h"
>  #include "openvswitch/vlog.h"
> +#include "smap.h"
> +#include "vswitch-idl.h"
>
>  #include "rte_config.h"
>  #include "rte_mbuf.h"
> @@ -2107,10 +2110,14 @@ unlock_dpdk:
>
>  static int
>  process_vhost_flags(char *flag, char *default_val, int size,
> -                    char **argv, char **new_val)
> +                    const struct ovsrec_open_vswitch *ovs_cfg,
> +                    char **new_val)
>  {
> +    const char *val;
>      int changed = 0;
>
> +    val = smap_get(&ovs_cfg->other_config, flag);
> +
>      /* Depending on which version of vhost is in use, process the
> vhost-specific
>       * flag if it is provided on the vswitchd command line, otherwise
> resort to
>       * a default value.
> @@ -2120,9 +2127,9 @@ process_vhost_flags(char *flag, char *default_val,
> int size,
>       * For vhost-cuse: Process "-cuse_dev_name" to set the custom name of
> the
>       * vhost-cuse character device.
>       */
> -    if (!strcmp(argv[1], flag) && (strlen(argv[2]) <= size)) {
> +    if (val && (strlen(val) <= size)) {
>          changed = 1;
> -        *new_val = strdup(argv[2]);
> +        *new_val = strdup(val);
>          VLOG_INFO("User-provided %s in use: %s", flag, *new_val);
>      } else {
>          VLOG_INFO("No %s provided - defaulting to %s", flag, default_val);
> @@ -2132,34 +2139,77 @@ process_vhost_flags(char *flag, char *default_val,
> int size,
>      return changed;
>  }
>
> -int
> -dpdk_init(int argc, char **argv)
> +static char **
> +grow_argv(char ***argv, size_t cur_siz, size_t grow_by)
>  {
> -    int result;
> -    int base = 0;
> -    char *pragram_name = argv[0];
> +    char **new_argv = realloc(*argv, sizeof(char *) * (cur_siz +
> grow_by));
> +    return new_argv;
> +}
>
> -    if (argc < 2 || strcmp(argv[1], "--dpdk"))
> -        return 0;
> +static int
> +get_dpdk_args(const struct ovsrec_open_vswitch *ovs_cfg, char ***argv)
> +{
> +    struct dpdk_options_map {
> +        const char *ovs_configuration;
> +        const char *dpdk_option;
> +        bool default_enabled;
> +        const char *default_value;
> +    } opts[] = {
> +        {"dpdk_core_mask", "-c", true, "0x1"},
> +        {"dpdk_mem_channels", "-n", true, "4"},
> +        {"dpdk_alloc_mem", "-m", false, NULL},
> +        {"dpdk_socket_mem", "--socket-mem", false, NULL},
> +        {"dpdk_hugepage_dir", "--huge-dir", false, NULL},
> +    };
> +    int i, ret = 1;
> +
> +    for(i = 0; i < (sizeof(opts) / sizeof(opts[0])); ++i){
> +        const char *lookup = smap_get(&ovs_cfg->other_config,
> +                                      opts[i].ovs_configuration);
> +        if(!lookup && opts[i].default_enabled)
> +            lookup = opts[i].default_value;
> +
> +        if(lookup) {
> +            char **newargv = grow_argv(argv, ret, 2);
> +
> +            if (!newargv)
> +                return ret;
> +
> +            *argv = newargv;
> +            (*argv)[ret++] = strdup(opts[i].dpdk_option);
> +            (*argv)[ret++] = strdup(lookup);
> +        }
> +    }
>
> -    /* Remove the --dpdk argument from arg list.*/
> -    argc--;
> -    argv++;
> +    return ret;
> +}
>
>
argv shall be NULL terminated, i.e. argv[argc] shall be NULL.

-    /* Reject --user option */
> -    int i;
> -    for (i = 0; i < argc; i++) {
> -        if (!strcmp(argv[i], "--user")) {
> -            VLOG_ERR("Can not mix --dpdk and --user options, aborting.");
> -        }
> +void
> +dpdk_init(const struct ovsrec_open_vswitch *ovs_cfg)
> +{
> +    static int initialized = 0;
> +
> +    char **argv;
> +    int result;
> +    int argc;
> +
> +    if(initialized || !smap_get_bool(&ovs_cfg->other_config, "dpdk",
> false)){
> +        return;
>      }
> +
> +    initialized = 1;
>
> +    VLOG_INFO("DPDK Enabled, initializing");
> +
> +    /* TODO should check for user permissions. DPDK requires root user. */
> +
>  #ifdef VHOST_CUSE
> -    if (process_vhost_flags("-cuse_dev_name", strdup("vhost-net"),
> -                            PATH_MAX, argv, &cuse_dev_name)) {
> +    if (process_vhost_flags("cuse_dev_name", strdup("vhost-net"),
> +                            PATH_MAX, ovs_cfg, &cuse_dev_name)) {
> +
>  #else
> -    if (process_vhost_flags("-vhost_sock_dir", strdup(ovs_rundir()),
> -                            NAME_MAX, argv, &vhost_sock_dir)) {
> +    if (process_vhost_flags("vhost_sock_dir", strdup(ovs_rundir()),
> +                            NAME_MAX, ovs_cfg, &vhost_sock_dir)) {
>          struct stat s;
>          int err;
>
> @@ -2167,40 +2217,34 @@ dpdk_init(int argc, char **argv)
>          if (err) {
>              VLOG_ERR("vHostUser socket DIR '%s' does not exist.",
>                       vhost_sock_dir);
> -            return err;
> +            return;
>          }
>  #endif
> -        /* Remove the vhost flag configuration parameters from the
> argument
> -         * list, so that the correct elements are passed to the DPDK
> -         * initialization function
> -         */
> -        argc -= 2;
> -        argv += 2;    /* Increment by two to bypass the vhost flag
> arguments */
> -        base = 2;
>      }
>
> -    /* Keep the program name argument as this is needed for call to
> -     * rte_eal_init()
> -     */
> -    argv[0] = pragram_name;
> +    argv = (char **)malloc(sizeof(char *));
> +    *argv = strdup("ovs"); /* TODO use prctl to get process name */
> +
> +    argc = get_dpdk_args(ovs_cfg, &argv);
> +
> +    optind = 1;
>
>      /* Make sure things are initialized ... */
>      result = rte_eal_init(argc, argv);
>      if (result < 0) {
>          ovs_abort(result, "Cannot init EAL");
>      }
> +
> +    for(result = 0; result < argc-1; ++result)
> +        free(argv[result]);
> +
> +    free(argv);
>
> I am not sure if freeing argv is valid in this case, C99 standard states
that it shall remain valid until the program terminates.


>      rte_memzone_dump(stdout);
>      rte_eal_init_ret = 0;
>
> -    if (argc > result) {
> -        argv[result] = argv[0];
> -    }
> -
>      /* We are called from the main thread here */
>      RTE_PER_LCORE(_lcore_id) = NON_PMD_CORE_ID;
> -
> -    return result + 1 + base;
>  }
>
>  static const struct netdev_class dpdk_class =
> diff --git a/lib/netdev-dpdk.h b/lib/netdev-dpdk.h
> index 646d3e2..e01adb3 100644
> --- a/lib/netdev-dpdk.h
> +++ b/lib/netdev-dpdk.h
> @@ -4,6 +4,7 @@
>  #include <config.h>
>
>  struct dp_packet;
> +struct ovsrec_open_vswitch;
>
>  #ifdef DPDK_NETDEV
>
> @@ -22,7 +23,7 @@ struct dp_packet;
>
>  #define NON_PMD_CORE_ID LCORE_ID_ANY
>
> -int dpdk_init(int argc, char **argv);
> +void dpdk_init(const struct ovsrec_open_vswitch *ovs_cfg);
>  void netdev_dpdk_register(void);
>  void free_dpdk_buf(struct dp_packet *);
>  int pmd_thread_setaffinity_cpu(unsigned cpu);
> @@ -33,13 +34,10 @@ int pmd_thread_setaffinity_cpu(unsigned cpu);
>
>  #include "util.h"
>
> -static inline int
> -dpdk_init(int argc, char **argv)
> +static inline void
> +dpdk_init(const struct ovsrec_open_vswitch *ovs_cfg OVS_UNUSED)
>  {
> -    if (argc >= 2 && !strcmp(argv[1], "--dpdk")) {
> -        ovs_fatal(0, "DPDK support not built into this copy of Open
> vSwitch.");
> -    }
> -    return 0;
> +
>  }
>

Why to remove this error handling?


>
>  static inline void
> diff --git a/utilities/ovs-ctl.in b/utilities/ovs-ctl.in
> index 0082bed..6ae8bbd 100755
> --- a/utilities/ovs-ctl.in
> +++ b/utilities/ovs-ctl.in
> @@ -73,13 +73,13 @@ insert_mod_if_required () {
>  }
>
>  ovs_vsctl () {
> -    ovs-vsctl --no-wait "$@"
> +    @bindir@/ovs-vsctl --no-wait "$@"
>  }
>
>  set_system_ids () {
>      set ovs_vsctl set Open_vSwitch .
>
> -    OVS_VERSION=`ovs-vswitchd --version | sed 's/.*) //;1q'`
> +    OVS_VERSION=`@bindir@/ovs-vswitchd --version | sed 's/.*) //;1q'`
>      set "$@" ovs-version="$OVS_VERSION"
>
>      case $SYSTEM_ID in
> @@ -131,7 +131,7 @@ check_force_cores () {
>  }
>
>  del_transient_ports () {
> -    for port in `ovs-vsctl --bare -- --columns=name find port
> other_config:transient=true`; do
> +    for port in `@bindir@/ovs-vsctl --bare -- --columns=name find port
> other_config:transient=true`; do
>          ovs_vsctl -- del-port "$port"
>      done
>  }
> @@ -193,7 +193,7 @@ add_managers () {
>      # won't briefly see empty datapath-id or ofport columns for records
> that
>      # exist at startup.)
>      action "Enabling remote OVSDB managers" \
> -       ovs-appctl -t ovsdb-server ovsdb-server/add-remote \
> +       @bindir@/ovs-appctl -t ovsdb-server ovsdb-server/add-remote \
>             db:Open_vSwitch,Open_vSwitch,manager_options
>  }
>
> @@ -215,7 +215,7 @@ start_forwarding () {
>          fi
>
>             # Start ovs-vswitchd.
> -           set ovs-vswitchd unix:"$DB_SOCK"
> +           set @bindir@/ovs-vswitchd unix:"$DB_SOCK"
>             set "$@" -vconsole:emer -vsyslog:err -vfile:info
>             if test X"$MLOCKALL" != Xno; then
>                 set "$@" --mlockall
> @@ -276,7 +276,7 @@ save_ofports_if_required () {
>      #
>      # (Versions 1.10 and later save OpenFlow port numbers without
> assistance,
>      # so we don't have to do anything for them.
> -    case `ovs-appctl version | sed 1q` in
> +    case `@bindir@/ovs-appctl version | sed 1q` in
>          "ovs-vswitchd (Open vSwitch) 1."[0-9].*)
>              action "Saving ofport values" ovs_save save-ofports \
>                  "${script_ofports}"
> diff --git a/vswitchd/bridge.c b/vswitchd/bridge.c
> index b966d92..d33f42c 100644
> --- a/vswitchd/bridge.c
> +++ b/vswitchd/bridge.c
> @@ -68,6 +68,7 @@
>  #include "sflow_api.h"
>  #include "vlan-bitmap.h"
>  #include "packets.h"
> +#include "lib/netdev-dpdk.h"
>
>  VLOG_DEFINE_THIS_MODULE(bridge);
>
> @@ -2921,6 +2922,8 @@ bridge_run(void)
>      }
>      cfg = ovsrec_open_vswitch_first(idl);
>
> +    dpdk_init(cfg);
> +
>      /* Initialize the ofproto library.  This only needs to run once, but
>       * it must be done after the configuration is set.  If the
>       * initialization has already occurred, bridge_init_ofproto()
> diff --git a/vswitchd/ovs-vswitchd.c b/vswitchd/ovs-vswitchd.c
> index e78ecda..ae86861 100644
> --- a/vswitchd/ovs-vswitchd.c
> +++ b/vswitchd/ovs-vswitchd.c
> @@ -48,7 +48,6 @@
>  #include "openvswitch/vconn.h"
>  #include "openvswitch/vlog.h"
>  #include "lib/vswitch-idl.h"
> -#include "lib/netdev-dpdk.h"
>
>  VLOG_DEFINE_THIS_MODULE(vswitchd);
>
> @@ -71,13 +70,6 @@ main(int argc, char *argv[])
>      int retval;
>
>      set_program_name(argv[0]);
> -    retval = dpdk_init(argc,argv);
> -    if (retval < 0) {
> -        return retval;
> -    }
> -
> -    argc -= retval;
> -    argv += retval;
>
>      ovs_cmdl_proctitle_init(argc, argv);
>      service_start(&argc, &argv);
> @@ -152,7 +144,6 @@ parse_options(int argc, char *argv[], char
> **unixctl_pathp)
>          OPT_ENABLE_DUMMY,
>          OPT_DISABLE_SYSTEM,
>          DAEMON_OPTION_ENUMS,
> -        OPT_DPDK,
>      };
>      static const struct option long_options[] = {
>          {"help",        no_argument, NULL, 'h'},
> @@ -166,7 +157,6 @@ parse_options(int argc, char *argv[], char
> **unixctl_pathp)
>          {"bootstrap-ca-cert", required_argument, NULL,
> OPT_BOOTSTRAP_CA_CERT},
>          {"enable-dummy", optional_argument, NULL, OPT_ENABLE_DUMMY},
>          {"disable-system", no_argument, NULL, OPT_DISABLE_SYSTEM},
> -        {"dpdk", required_argument, NULL, OPT_DPDK},
>          {NULL, 0, NULL, 0},
>      };
>      char *short_options =
> ovs_cmdl_long_options_to_short_options(long_options);
> @@ -218,10 +208,6 @@ parse_options(int argc, char *argv[], char
> **unixctl_pathp)
>          case '?':
>              exit(EXIT_FAILURE);
>
> -        case OPT_DPDK:
> -            ovs_fatal(0, "--dpdk must be given at beginning of command
> line.");
> -            break;
> -
>          default:
>              abort();
>          }
> @@ -255,19 +241,6 @@ usage(void)
>      stream_usage("DATABASE", true, false, true);
>      daemon_usage();
>      vlog_usage();
> -    printf("\nDPDK options:\n"
> -           "  --dpdk [VHOST] [DPDK]     Initialize DPDK datapath.\n"
> -           "  where DPDK are options for initializing DPDK lib and VHOST
> is\n"
> -#ifdef VHOST_CUSE
> -           "  option to override default character device name used for\n"
> -           "  for use with userspace vHost\n"
> -           "    -cuse_dev_name NAME\n"
> -#else
> -           "  option to override default directory where vhost-user\n"
> -           "  sockets are created.\n"
> -           "    -vhost_sock_dir DIR\n"
> -#endif
> -           );
>      printf("\nOther options:\n"
>             "  --unixctl=SOCKET          override default control socket
> name\n"
>             "  -h, --help                display this help message\n"
> --
> 2.6.1.133.gf5b6079
>
> _______________________________________________
> dev mailing list
> dev@openvswitch.org
> http://openvswitch.org/mailman/listinfo/dev
>
Panu Matilainen Dec. 4, 2015, 12:53 p.m. UTC | #2
On 12/03/2015 06:23 AM, Aaron Conole wrote:
> Existing DPDK integration is provided by use of command line options which
> must be split out and passed to librte in a special manner. However, this
> forces any configuration to be passed by way of a special DPDK flag, and
> interferes with ovs+dpdk packaging solutions.
>
> This commit delays dpdk initialization until after the OVS database connection
> is established, and then initializes librte. It pulls all of the config data
> from the OVS database, and assembles a new argv/argc pair to be passed along.

There needs to be a way to turn off DPDK even when support for it is 
built in since it requires extra setup and failing to initialize will 
tear down the whole ovs-vswitchd process. So in fact it probably should 
default to off.

It'd be nice to have something smarter than a big "DPDK on/off" switch 
in the db, such as lazy initialization only when needed (ie dpdk ports 
are added) but there are quite a few problems with that I suspect.

	- Panu -
Kevin Traynor Dec. 4, 2015, 1:58 p.m. UTC | #3
> -----Original Message-----

> From: dev [mailto:dev-bounces@openvswitch.org] On Behalf Of Aaron Conole

> Sent: Thursday, December 3, 2015 4:24 AM

> To: dev@openvswitch.org

> Cc: Flavio Leitner

> Subject: [ovs-dev] [RFC 1/2] dpdk: Convert initialization from cmdline to db

> 

> Existing DPDK integration is provided by use of command line options which

> must be split out and passed to librte in a special manner. However, this

> forces any configuration to be passed by way of a special DPDK flag, and

> interferes with ovs+dpdk packaging solutions.

> 

> This commit delays dpdk initialization until after the OVS database

> connection

> is established, and then initializes librte. It pulls all of the config data

> from the OVS database, and assembles a new argv/argc pair to be passed along.


Hi Aaron, thanks for posting. Panu makes good points, but this is a big step in
the right direction. I've a few small comments on the code below. 

> 

> Signed-off-by: Aaron Conole <aconole@redhat.com>

> ---

>  lib/netdev-dpdk.c       | 126 ++++++++++++++++++++++++++++++++--------------

> --

>  lib/netdev-dpdk.h       |  12 ++---

>  utilities/ovs-ctl.in    |  12 ++---

>  vswitchd/bridge.c       |   3 ++

>  vswitchd/ovs-vswitchd.c |  27 -----------

>  5 files changed, 99 insertions(+), 81 deletions(-)

> 

> diff --git a/lib/netdev-dpdk.c b/lib/netdev-dpdk.c

> index e3a0771..f3e0840 100644

> --- a/lib/netdev-dpdk.c

> +++ b/lib/netdev-dpdk.c

> @@ -29,6 +29,7 @@

>  #include <stdio.h>

>  #include <sys/types.h>

>  #include <sys/stat.h>

> +#include <getopt.h>

> 

>  #include "dirs.h"

>  #include "dp-packet.h"

> @@ -49,6 +50,8 @@

>  #include "timeval.h"

>  #include "unixctl.h"

>  #include "openvswitch/vlog.h"

> +#include "smap.h"

> +#include "vswitch-idl.h"

> 

>  #include "rte_config.h"

>  #include "rte_mbuf.h"

> @@ -2107,10 +2110,14 @@ unlock_dpdk:

> 

>  static int

>  process_vhost_flags(char *flag, char *default_val, int size,

> -                    char **argv, char **new_val)

> +                    const struct ovsrec_open_vswitch *ovs_cfg,

> +                    char **new_val)

>  {

> +    const char *val;

>      int changed = 0;

> 

> +    val = smap_get(&ovs_cfg->other_config, flag);

> +

>      /* Depending on which version of vhost is in use, process the vhost-

> specific

>       * flag if it is provided on the vswitchd command line, otherwise resort

> to

>       * a default value.

> @@ -2120,9 +2127,9 @@ process_vhost_flags(char *flag, char *default_val, int

> size,

>       * For vhost-cuse: Process "-cuse_dev_name" to set the custom name of

> the

>       * vhost-cuse character device.

>       */

> -    if (!strcmp(argv[1], flag) && (strlen(argv[2]) <= size)) {

> +    if (val && (strlen(val) <= size)) {

>          changed = 1;

> -        *new_val = strdup(argv[2]);

> +        *new_val = strdup(val);

>          VLOG_INFO("User-provided %s in use: %s", flag, *new_val);

>      } else {

>          VLOG_INFO("No %s provided - defaulting to %s", flag, default_val);

> @@ -2132,34 +2139,77 @@ process_vhost_flags(char *flag, char *default_val,

> int size,

>      return changed;

>  }

> 

> -int

> -dpdk_init(int argc, char **argv)

> +static char **

> +grow_argv(char ***argv, size_t cur_siz, size_t grow_by)

>  {

> -    int result;

> -    int base = 0;

> -    char *pragram_name = argv[0];

> +    char **new_argv = realloc(*argv, sizeof(char *) * (cur_siz + grow_by));

> +    return new_argv;

> +}

> 

> -    if (argc < 2 || strcmp(argv[1], "--dpdk"))

> -        return 0;

> +static int

> +get_dpdk_args(const struct ovsrec_open_vswitch *ovs_cfg, char ***argv)

> +{

> +    struct dpdk_options_map {

> +        const char *ovs_configuration;

> +        const char *dpdk_option;

> +        bool default_enabled;

> +        const char *default_value;

> +    } opts[] = {

> +        {"dpdk_core_mask", "-c", true, "0x1"},

> +        {"dpdk_mem_channels", "-n", true, "4"},

> +        {"dpdk_alloc_mem", "-m", false, NULL},

> +        {"dpdk_socket_mem", "--socket-mem", false, NULL},


What do you think about adding a default for socket-mem? 

> +        {"dpdk_hugepage_dir", "--huge-dir", false, NULL},

> +    };

> +    int i, ret = 1;

> +

> +    for(i = 0; i < (sizeof(opts) / sizeof(opts[0])); ++i){

> +        const char *lookup = smap_get(&ovs_cfg->other_config,

> +                                      opts[i].ovs_configuration);

> +        if(!lookup && opts[i].default_enabled)

> +            lookup = opts[i].default_value;

> +

> +        if(lookup) {

> +            char **newargv = grow_argv(argv, ret, 2);

> +

> +            if (!newargv)

> +                return ret;


We have not completed creating our eal args. In the unlikely event of
this happening, I'm not sure if we should continue to call rte_eal_init()
as we may end up with a silent non-intended config.

> +

> +            *argv = newargv;

> +            (*argv)[ret++] = strdup(opts[i].dpdk_option);

> +            (*argv)[ret++] = strdup(lookup);

> +        }

> +    }

> 

> -    /* Remove the --dpdk argument from arg list.*/

> -    argc--;

> -    argv++;

> +    return ret;

> +}

> 

> -    /* Reject --user option */

> -    int i;

> -    for (i = 0; i < argc; i++) {

> -        if (!strcmp(argv[i], "--user")) {

> -            VLOG_ERR("Can not mix --dpdk and --user options, aborting.");

> -        }

> +void

> +dpdk_init(const struct ovsrec_open_vswitch *ovs_cfg)

> +{

> +    static int initialized = 0;

> +

> +    char **argv;

> +    int result;

> +    int argc;

> +

> +    if(initialized || !smap_get_bool(&ovs_cfg->other_config, "dpdk",

> false)){

> +        return;

>      }

> +

> +    initialized = 1;

> 

> +    VLOG_INFO("DPDK Enabled, initializing");

> +

> +    /* TODO should check for user permissions. DPDK requires root user. */

> +

>  #ifdef VHOST_CUSE

> -    if (process_vhost_flags("-cuse_dev_name", strdup("vhost-net"),

> -                            PATH_MAX, argv, &cuse_dev_name)) {

> +    if (process_vhost_flags("cuse_dev_name", strdup("vhost-net"),

> +                            PATH_MAX, ovs_cfg, &cuse_dev_name)) {

> +

>  #else

> -    if (process_vhost_flags("-vhost_sock_dir", strdup(ovs_rundir()),

> -                            NAME_MAX, argv, &vhost_sock_dir)) {

> +    if (process_vhost_flags("vhost_sock_dir", strdup(ovs_rundir()),

> +                            NAME_MAX, ovs_cfg, &vhost_sock_dir)) {

>          struct stat s;

>          int err;

> 

> @@ -2167,40 +2217,34 @@ dpdk_init(int argc, char **argv)

>          if (err) {

>              VLOG_ERR("vHostUser socket DIR '%s' does not exist.",

>                       vhost_sock_dir);

> -            return err;

> +            return;


We're not returning an error here anymore. I'm wondering should we abort
or just deal with it later down the line if vhostuser is used

>          }

>  #endif

> -        /* Remove the vhost flag configuration parameters from the argument

> -         * list, so that the correct elements are passed to the DPDK

> -         * initialization function

> -         */

> -        argc -= 2;

> -        argv += 2;    /* Increment by two to bypass the vhost flag arguments

> */

> -        base = 2;

>      }

> 

> -    /* Keep the program name argument as this is needed for call to

> -     * rte_eal_init()

> -     */

> -    argv[0] = pragram_name;

> +    argv = (char **)malloc(sizeof(char *));

> +    *argv = strdup("ovs"); /* TODO use prctl to get process name */

> +

> +    argc = get_dpdk_args(ovs_cfg, &argv);

> +

> +    optind = 1;

> 

>      /* Make sure things are initialized ... */

>      result = rte_eal_init(argc, argv);

>      if (result < 0) {

>          ovs_abort(result, "Cannot init EAL");

>      }

> +

> +    for(result = 0; result < argc-1; ++result)

> +        free(argv[result]);

> +

> +    free(argv);

> 

>      rte_memzone_dump(stdout);

>      rte_eal_init_ret = 0;

> 

> -    if (argc > result) {

> -        argv[result] = argv[0];

> -    }

> -

>      /* We are called from the main thread here */

>      RTE_PER_LCORE(_lcore_id) = NON_PMD_CORE_ID;

> -

> -    return result + 1 + base;

>  }

> 

>  static const struct netdev_class dpdk_class =

> diff --git a/lib/netdev-dpdk.h b/lib/netdev-dpdk.h

> index 646d3e2..e01adb3 100644

> --- a/lib/netdev-dpdk.h

> +++ b/lib/netdev-dpdk.h

> @@ -4,6 +4,7 @@

>  #include <config.h>

> 

>  struct dp_packet;

> +struct ovsrec_open_vswitch;

> 

>  #ifdef DPDK_NETDEV

> 

> @@ -22,7 +23,7 @@ struct dp_packet;

> 

>  #define NON_PMD_CORE_ID LCORE_ID_ANY

> 

> -int dpdk_init(int argc, char **argv);

> +void dpdk_init(const struct ovsrec_open_vswitch *ovs_cfg);

>  void netdev_dpdk_register(void);

>  void free_dpdk_buf(struct dp_packet *);

>  int pmd_thread_setaffinity_cpu(unsigned cpu);

> @@ -33,13 +34,10 @@ int pmd_thread_setaffinity_cpu(unsigned cpu);

> 

>  #include "util.h"

> 

> -static inline int

> -dpdk_init(int argc, char **argv)

> +static inline void

> +dpdk_init(const struct ovsrec_open_vswitch *ovs_cfg OVS_UNUSED)

>  {

> -    if (argc >= 2 && !strcmp(argv[1], "--dpdk")) {

> -        ovs_fatal(0, "DPDK support not built into this copy of Open

> vSwitch.");

> -    }

> -    return 0;

> +

>  }

> 

>  static inline void

> diff --git a/utilities/ovs-ctl.in b/utilities/ovs-ctl.in

> index 0082bed..6ae8bbd 100755

> --- a/utilities/ovs-ctl.in

> +++ b/utilities/ovs-ctl.in

> @@ -73,13 +73,13 @@ insert_mod_if_required () {

>  }

> 

>  ovs_vsctl () {

> -    ovs-vsctl --no-wait "$@"

> +    @bindir@/ovs-vsctl --no-wait "$@"

>  }

> 

>  set_system_ids () {

>      set ovs_vsctl set Open_vSwitch .

> 

> -    OVS_VERSION=`ovs-vswitchd --version | sed 's/.*) //;1q'`

> +    OVS_VERSION=`@bindir@/ovs-vswitchd --version | sed 's/.*) //;1q'`

>      set "$@" ovs-version="$OVS_VERSION"

> 

>      case $SYSTEM_ID in

> @@ -131,7 +131,7 @@ check_force_cores () {

>  }

> 

>  del_transient_ports () {

> -    for port in `ovs-vsctl --bare -- --columns=name find port

> other_config:transient=true`; do

> +    for port in `@bindir@/ovs-vsctl --bare -- --columns=name find port

> other_config:transient=true`; do

>          ovs_vsctl -- del-port "$port"

>      done

>  }

> @@ -193,7 +193,7 @@ add_managers () {

>      # won't briefly see empty datapath-id or ofport columns for records that

>      # exist at startup.)

>      action "Enabling remote OVSDB managers" \

> -	ovs-appctl -t ovsdb-server ovsdb-server/add-remote \

> +	@bindir@/ovs-appctl -t ovsdb-server ovsdb-server/add-remote \

>  	    db:Open_vSwitch,Open_vSwitch,manager_options

>  }

> 

> @@ -215,7 +215,7 @@ start_forwarding () {

>          fi

> 

>  	    # Start ovs-vswitchd.

> -	    set ovs-vswitchd unix:"$DB_SOCK"

> +	    set @bindir@/ovs-vswitchd unix:"$DB_SOCK"

>  	    set "$@" -vconsole:emer -vsyslog:err -vfile:info

>  	    if test X"$MLOCKALL" != Xno; then

>  	        set "$@" --mlockall

> @@ -276,7 +276,7 @@ save_ofports_if_required () {

>      #

>      # (Versions 1.10 and later save OpenFlow port numbers without

> assistance,

>      # so we don't have to do anything for them.

> -    case `ovs-appctl version | sed 1q` in

> +    case `@bindir@/ovs-appctl version | sed 1q` in

>          "ovs-vswitchd (Open vSwitch) 1."[0-9].*)

>              action "Saving ofport values" ovs_save save-ofports \

>                  "${script_ofports}"

> diff --git a/vswitchd/bridge.c b/vswitchd/bridge.c

> index b966d92..d33f42c 100644

> --- a/vswitchd/bridge.c

> +++ b/vswitchd/bridge.c

> @@ -68,6 +68,7 @@

>  #include "sflow_api.h"

>  #include "vlan-bitmap.h"

>  #include "packets.h"

> +#include "lib/netdev-dpdk.h"

> 

>  VLOG_DEFINE_THIS_MODULE(bridge);

> 

> @@ -2921,6 +2922,8 @@ bridge_run(void)

>      }

>      cfg = ovsrec_open_vswitch_first(idl);

> 

> +    dpdk_init(cfg);

> +

>      /* Initialize the ofproto library.  This only needs to run once, but

>       * it must be done after the configuration is set.  If the

>       * initialization has already occurred, bridge_init_ofproto()

> diff --git a/vswitchd/ovs-vswitchd.c b/vswitchd/ovs-vswitchd.c

> index e78ecda..ae86861 100644

> --- a/vswitchd/ovs-vswitchd.c

> +++ b/vswitchd/ovs-vswitchd.c

> @@ -48,7 +48,6 @@

>  #include "openvswitch/vconn.h"

>  #include "openvswitch/vlog.h"

>  #include "lib/vswitch-idl.h"

> -#include "lib/netdev-dpdk.h"

> 

>  VLOG_DEFINE_THIS_MODULE(vswitchd);

> 

> @@ -71,13 +70,6 @@ main(int argc, char *argv[])

>      int retval;

> 

>      set_program_name(argv[0]);

> -    retval = dpdk_init(argc,argv);

> -    if (retval < 0) {

> -        return retval;

> -    }

> -

> -    argc -= retval;

> -    argv += retval;

> 

>      ovs_cmdl_proctitle_init(argc, argv);

>      service_start(&argc, &argv);

> @@ -152,7 +144,6 @@ parse_options(int argc, char *argv[], char

> **unixctl_pathp)

>          OPT_ENABLE_DUMMY,

>          OPT_DISABLE_SYSTEM,

>          DAEMON_OPTION_ENUMS,

> -        OPT_DPDK,

>      };

>      static const struct option long_options[] = {

>          {"help",        no_argument, NULL, 'h'},

> @@ -166,7 +157,6 @@ parse_options(int argc, char *argv[], char

> **unixctl_pathp)

>          {"bootstrap-ca-cert", required_argument, NULL,

> OPT_BOOTSTRAP_CA_CERT},

>          {"enable-dummy", optional_argument, NULL, OPT_ENABLE_DUMMY},

>          {"disable-system", no_argument, NULL, OPT_DISABLE_SYSTEM},

> -        {"dpdk", required_argument, NULL, OPT_DPDK},

>          {NULL, 0, NULL, 0},

>      };

>      char *short_options =

> ovs_cmdl_long_options_to_short_options(long_options);

> @@ -218,10 +208,6 @@ parse_options(int argc, char *argv[], char

> **unixctl_pathp)

>          case '?':

>              exit(EXIT_FAILURE);

> 

> -        case OPT_DPDK:

> -            ovs_fatal(0, "--dpdk must be given at beginning of command

> line.");


It might be helpful to leave the case statement here and call usage() or give
a deprecation message about the dpdk cmdline params for a release or two to
ensure a smooth transition to the new scheme - otherwise it'll probably end
up on the mailing list.

> -            break;

> -

>          default:

>              abort();

>          }

> @@ -255,19 +241,6 @@ usage(void)

>      stream_usage("DATABASE", true, false, true);

>      daemon_usage();

>      vlog_usage();

> -    printf("\nDPDK options:\n"

> -           "  --dpdk [VHOST] [DPDK]     Initialize DPDK datapath.\n"

> -           "  where DPDK are options for initializing DPDK lib and VHOST

> is\n"

> -#ifdef VHOST_CUSE

> -           "  option to override default character device name used for\n"

> -           "  for use with userspace vHost\n"

> -           "    -cuse_dev_name NAME\n"

> -#else

> -           "  option to override default directory where vhost-user\n"

> -           "  sockets are created.\n"

> -           "    -vhost_sock_dir DIR\n"

> -#endif

> -           );

>      printf("\nOther options:\n"

>             "  --unixctl=SOCKET          override default control socket

> name\n"

>             "  -h, --help                display this help message\n"

> --

> 2.6.1.133.gf5b6079

> 

> _______________________________________________

> dev mailing list

> dev@openvswitch.org

> http://openvswitch.org/mailman/listinfo/dev
Aaron Conole Dec. 4, 2015, 3:19 p.m. UTC | #4
Mauricio Vásquez <mauricio.vasquezbernal@studenti.polito.it> writes:
> Dear Aaron,
>
> The general idea looks good to me.
>
> Just  some comments inline
>
> On 3 December 2015 at 05:23, Aaron Conole <aconole@redhat.com> wrote:
>
<< snipped >>
>> +        if(lookup) {
>> +            char **newargv = grow_argv(argv, ret, 2);
>> +
>> +            if (!newargv)
>> +                return ret;
>> +
>> +            *argv = newargv;
>> +            (*argv)[ret++] = strdup(opts[i].dpdk_option);
>> +            (*argv)[ret++] = strdup(lookup);
>> +        }
>> +    }
>>
>> -    /* Remove the --dpdk argument from arg list.*/
>> -    argc--;
>> -    argv++;
>> +    return ret;
>> +}
>>
>>
> argv shall be NULL terminated, i.e. argv[argc] shall be NULL.

Bah, not sure how I forgot the argv/argc conventions.
I'll fix when I submit the formal patch, thanks for catching this.

>
> -    /* Reject --user option */
>> -    int i;
>> -    for (i = 0; i < argc; i++) {
>> -        if (!strcmp(argv[i], "--user")) {
>> -            VLOG_ERR("Can not mix --dpdk and --user options, aborting.");
>> -        }
>> +void
>> +dpdk_init(const struct ovsrec_open_vswitch *ovs_cfg)
>> +{
>> +    static int initialized = 0;
>> +
>> +    char **argv;
>> +    int result;
>> +    int argc;
>> +
<< snipped >>
>>
>>      /* Make sure things are initialized ... */
>>      result = rte_eal_init(argc, argv);
>>      if (result < 0) {
>>          ovs_abort(result, "Cannot init EAL");
>>      }
>> +
>> +    for(result = 0; result < argc-1; ++result)
>> +        free(argv[result]);
>> +
>> +    free(argv);
>>
> I am not sure if freeing argv is valid in this case, C99 standard states
> that it shall remain valid until the program terminates.

I hope dpdk doesn't rely on that behavior, but you're correct. I'll
switch to using an atexit() function that delays releasing all the memory.

Thanks so much for the review!

-Aaron
Aaron Conole Dec. 4, 2015, 3:24 p.m. UTC | #5
Panu Matilainen <pmatilai@redhat.com> writes:
> On 12/03/2015 06:23 AM, Aaron Conole wrote:
>> Existing DPDK integration is provided by use of command line options which
>> must be split out and passed to librte in a special manner. However, this
>> forces any configuration to be passed by way of a special DPDK flag, and
>> interferes with ovs+dpdk packaging solutions.
>>
>> This commit delays dpdk initialization until after the OVS database connection
>> is established, and then initializes librte. It pulls all of the config data
>> from the OVS database, and assembles a new argv/argc pair to be passed along.
>
> There needs to be a way to turn off DPDK even when support for it is
> built in since it requires extra setup and failing to initialize will
> tear down the whole ovs-vswitchd process. So in fact it probably
> should default to off.

There is such a flag, and it is defaulted to off:

>> +void
>> +dpdk_init(const struct ovsrec_open_vswitch *ovs_cfg)
>> +{
>> +    static int initialized = 0;
>> +
>> +    char **argv;
>> +    int result;
>> +    int argc;
>> +

Just before we do any initialization.

>> +    if(initialized || !smap_get_bool(&ovs_cfg->other_config, "dpdk", false)){
>> +        return;
>>      }


>
> It'd be nice to have something smarter than a big "DPDK on/off" switch
> in the db, such as lazy initialization only when needed (ie dpdk ports
> are added) but there are quite a few problems with that I suspect.

I can look into how to add this, because I think you're right. I will
make that an enhancement to this patch.

>
> 	- Panu -

Thanks for the review, Panu!
Aaron Conole Dec. 4, 2015, 3:36 p.m. UTC | #6
"Traynor, Kevin" <kevin.traynor@intel.com> writes:
>> -----Original Message-----
<<snipped>>
>> +static int
>> +get_dpdk_args(const struct ovsrec_open_vswitch *ovs_cfg, char ***argv)
>> +{
>> +    struct dpdk_options_map {
>> +        const char *ovs_configuration;
>> +        const char *dpdk_option;
>> +        bool default_enabled;
>> +        const char *default_value;
>> +    } opts[] = {
>> +        {"dpdk_core_mask", "-c", true, "0x1"},
>> +        {"dpdk_mem_channels", "-n", true, "4"},
>> +        {"dpdk_alloc_mem", "-m", false, NULL},
>> +        {"dpdk_socket_mem", "--socket-mem", false, NULL},
>
> What do you think about adding a default for socket-mem?

My only concern is I don't know what it should look like. This is the
per-socket memory allocation and I'm not sure what kind of default makes
sense, although with my specified defaults in the rest of the
configuration perhaps "1024,0" is a 'proper' value.

What do you think?

>> +        {"dpdk_hugepage_dir", "--huge-dir", false, NULL},
>> +    };
>> +    int i, ret = 1;
>> +
>> +    for(i = 0; i < (sizeof(opts) / sizeof(opts[0])); ++i){
>> +        const char *lookup = smap_get(&ovs_cfg->other_config,
>> +                                      opts[i].ovs_configuration);
>> +        if(!lookup && opts[i].default_enabled)
>> +            lookup = opts[i].default_value;
>> +
>> +        if(lookup) {
>> +            char **newargv = grow_argv(argv, ret, 2);
>> +
>> +            if (!newargv)
>> +                return ret;
>
> We have not completed creating our eal args. In the unlikely event of
> this happening, I'm not sure if we should continue to call rte_eal_init()
> as we may end up with a silent non-intended config.

I agree, I haven't done much negative testing here; I hope this should
not happen, but hope in one hand, etc. Perhaps I'll release the
argc/argv memory I've allocated here and return NULL. Let the init()
function deal with it (by aborting most likely).

>> +
>> +            *argv = newargv;
>> +            (*argv)[ret++] = strdup(opts[i].dpdk_option);
>> +            (*argv)[ret++] = strdup(lookup);
>> +        }

<<snipped>>
>>  #else
>> -    if (process_vhost_flags("-vhost_sock_dir", strdup(ovs_rundir()),
>> -                            NAME_MAX, argv, &vhost_sock_dir)) {
>> +    if (process_vhost_flags("vhost_sock_dir", strdup(ovs_rundir()),
>> +                            NAME_MAX, ovs_cfg, &vhost_sock_dir)) {
>>          struct stat s;
>>          int err;
>> 
>> @@ -2167,40 +2217,34 @@ dpdk_init(int argc, char **argv)
>>          if (err) {
>>              VLOG_ERR("vHostUser socket DIR '%s' does not exist.",
>>                       vhost_sock_dir);
>> -            return err;
>> +            return;
>
> We're not returning an error here anymore. I'm wondering should we abort
> or just deal with it later down the line if vhostuser is used

I was thinking of dealing with it if/when vhostuser socket is
used. Actually, what I planned on doing was proposing a directory
creation flag so that the user sees it to 'just work', but that's beyond
the scope of this patch.

Since we did return an error previously, and that error was used to
terminate the program, perhaps I'll do an ovs_abort() for now.

<<snipped code>>
>>      static const struct option long_options[] = {
>>          {"help",        no_argument, NULL, 'h'},
>> @@ -166,7 +157,6 @@ parse_options(int argc, char *argv[], char
>> **unixctl_pathp)
>>          {"bootstrap-ca-cert", required_argument, NULL,
>> OPT_BOOTSTRAP_CA_CERT},
>>          {"enable-dummy", optional_argument, NULL, OPT_ENABLE_DUMMY},
>>          {"disable-system", no_argument, NULL, OPT_DISABLE_SYSTEM},
>> -        {"dpdk", required_argument, NULL, OPT_DPDK},
>>          {NULL, 0, NULL, 0},
>>      };
>>      char *short_options =
>> ovs_cmdl_long_options_to_short_options(long_options);
>> @@ -218,10 +208,6 @@ parse_options(int argc, char *argv[], char
>> **unixctl_pathp)
>>          case '?':
>>              exit(EXIT_FAILURE);
>> 
>> -        case OPT_DPDK:
>> -            ovs_fatal(0, "--dpdk must be given at beginning of command
>> line.");
>
> It might be helpful to leave the case statement here and call usage() or give
> a deprecation message about the dpdk cmdline params for a release or two to
> ensure a smooth transition to the new scheme - otherwise it'll probably end
> up on the mailing list.

I waffled on whether or not to change it to be "--dpdk is deprecated" or
just remove it, but there's probably a user who will complain that we
removed the DPDK feature. Okay, I'll update the NEWS file, and mention
that the configuration of DPDK via command line is deprecated in favor
of the db approach.

Thanks for the review, Kevin!
Zoltan Kiss Dec. 4, 2015, 3:52 p.m. UTC | #7
On 03/12/15 04:23, Aaron Conole wrote:
> Existing DPDK integration is provided by use of command line options which
> must be split out and passed to librte in a special manner. However, this
> forces any configuration to be passed by way of a special DPDK flag, and
> interferes with ovs+dpdk packaging solutions.
>
> This commit delays dpdk initialization until after the OVS database connection
> is established, and then initializes librte. It pulls all of the config data
> from the OVS database, and assembles a new argv/argc pair to be passed along.
>
> Signed-off-by: Aaron Conole <aconole@redhat.com>
> ---
>   lib/netdev-dpdk.c       | 126 ++++++++++++++++++++++++++++++++----------------
>   lib/netdev-dpdk.h       |  12 ++---
>   utilities/ovs-ctl.in    |  12 ++---
>   vswitchd/bridge.c       |   3 ++
>   vswitchd/ovs-vswitchd.c |  27 -----------
>   5 files changed, 99 insertions(+), 81 deletions(-)
>
> diff --git a/lib/netdev-dpdk.c b/lib/netdev-dpdk.c
> index e3a0771..f3e0840 100644
> --- a/lib/netdev-dpdk.c
> +++ b/lib/netdev-dpdk.c

[snip]

> +void
> +dpdk_init(const struct ovsrec_open_vswitch *ovs_cfg)
> +{
> +    static int initialized = 0;
> +
> +    char **argv;
> +    int result;
> +    int argc;
> +
> +    if(initialized || !smap_get_bool(&ovs_cfg->other_config, "dpdk", false)){
> +        return;

Your second patch mentions that any change for any of the config needs 
an ovs-vswitchd restart, which is probably the right thing to do.
But this function is called from bridge_run(), so a false -> true change 
takes immediate effect, which may not be what the user expects. I think 
it's better to wrap the bool check into an ovsthread_once_start() to 
make sure that it is only checked once.

>       }
> +
> +    initialized = 1;
>
> +    VLOG_INFO("DPDK Enabled, initializing");
> +
> +    /* TODO should check for user permissions. DPDK requires root user. */
> +

[snip]

> --- a/utilities/ovs-ctl.in
> +++ b/utilities/ovs-ctl.in
> @@ -73,13 +73,13 @@ insert_mod_if_required () {
>   }
>
>   ovs_vsctl () {
> -    ovs-vsctl --no-wait "$@"
> +    @bindir@/ovs-vsctl --no-wait "$@"
>   }
>
>   set_system_ids () {
>       set ovs_vsctl set Open_vSwitch .
>
> -    OVS_VERSION=`ovs-vswitchd --version | sed 's/.*) //;1q'`
> +    OVS_VERSION=`@bindir@/ovs-vswitchd --version | sed 's/.*) //;1q'`

Why do you need to prepend all these command lines with @bindir@

[snip]
> diff --git a/vswitchd/bridge.c b/vswitchd/bridge.c
> index b966d92..d33f42c 100644
> --- a/vswitchd/bridge.c
> +++ b/vswitchd/bridge.c
> @@ -68,6 +68,7 @@
>   #include "sflow_api.h"
>   #include "vlan-bitmap.h"
>   #include "packets.h"
> +#include "lib/netdev-dpdk.h"
>
>   VLOG_DEFINE_THIS_MODULE(bridge);
>
> @@ -2921,6 +2922,8 @@ bridge_run(void)
>       }
>       cfg = ovsrec_open_vswitch_first(idl);
>
> +    dpdk_init(cfg);

You should check 'cfg != NULL', just as bridge_init_ofproto() does.

> +
>       /* Initialize the ofproto library.  This only needs to run once, but
>        * it must be done after the configuration is set.  If the
>        * initialization has already occurred, bridge_init_ofproto()
Aaron Conole Dec. 4, 2015, 4:04 p.m. UTC | #8
Zoltan Kiss <zoltan.kiss@linaro.org> writes:
> On 03/12/15 04:23, Aaron Conole wrote:
>> Existing DPDK integration is provided by use of command line options which
>> must be split out and passed to librte in a special manner. However, this
>> forces any configuration to be passed by way of a special DPDK flag, and
>> interferes with ovs+dpdk packaging solutions.
>>
>> This commit delays dpdk initialization until after the OVS database connection
>> is established, and then initializes librte. It pulls all of the config data
>> from the OVS database, and assembles a new argv/argc pair to be passed along.
>>
>> Signed-off-by: Aaron Conole <aconole@redhat.com>
>> ---
>>   lib/netdev-dpdk.c | 126
>> ++++++++++++++++++++++++++++++++----------------
>>   lib/netdev-dpdk.h       |  12 ++---
>>   utilities/ovs-ctl.in    |  12 ++---
>>   vswitchd/bridge.c       |   3 ++
>>   vswitchd/ovs-vswitchd.c |  27 -----------
>>   5 files changed, 99 insertions(+), 81 deletions(-)
>>
>> diff --git a/lib/netdev-dpdk.c b/lib/netdev-dpdk.c
>> index e3a0771..f3e0840 100644
>> --- a/lib/netdev-dpdk.c
>> +++ b/lib/netdev-dpdk.c
>
> [snip]
>
>> +void
>> +dpdk_init(const struct ovsrec_open_vswitch *ovs_cfg)
>> +{
>> +    static int initialized = 0;
>> +
>> +    char **argv;
>> +    int result;
>> +    int argc;
>> +
>> +    if(initialized || !smap_get_bool(&ovs_cfg->other_config, "dpdk", false)){
>> +        return;
>
> Your second patch mentions that any change for any of the config needs
> an ovs-vswitchd restart, which is probably the right thing to do.
> But this function is called from bridge_run(), so a false -> true
> change takes immediate effect, which may not be what the user
> expects. I think it's better to wrap the bool check into an
> ovsthread_once_start() to make sure that it is only checked once.

Good catch. I'll do that.

>
>>       }
>> +
>> +    initialized = 1;
>>
>> +    VLOG_INFO("DPDK Enabled, initializing");
>> +
>> +    /* TODO should check for user permissions. DPDK requires root user. */
>> +
>
> [snip]
>
>> --- a/utilities/ovs-ctl.in
>> +++ b/utilities/ovs-ctl.in
>> @@ -73,13 +73,13 @@ insert_mod_if_required () {
>>   }
>>
>>   ovs_vsctl () {
>> -    ovs-vsctl --no-wait "$@"
>> +    @bindir@/ovs-vsctl --no-wait "$@"
>>   }
>>
>>   set_system_ids () {
>>       set ovs_vsctl set Open_vSwitch .
>>
>> -    OVS_VERSION=`ovs-vswitchd --version | sed 's/.*) //;1q'`
>> +    OVS_VERSION=`@bindir@/ovs-vswitchd --version | sed 's/.*) //;1q'`
>
> Why do you need to prepend all these command lines with @bindir@

I didn't intend to include this with this patch series, but the answer
is in how systemd integration is done. The way the systemd code works,
it calls ovs_ctl to start everything (good). ovs_ctl relies on the path,
though. There's a block at the beginning where a new path is built up,
but there is an edge case in there. I'll drop this change and submit it
separately.

> [snip]
>> diff --git a/vswitchd/bridge.c b/vswitchd/bridge.c
>> index b966d92..d33f42c 100644
>> --- a/vswitchd/bridge.c
>> +++ b/vswitchd/bridge.c
>> @@ -68,6 +68,7 @@
>>   #include "sflow_api.h"
>>   #include "vlan-bitmap.h"
>>   #include "packets.h"
>> +#include "lib/netdev-dpdk.h"
>>
>>   VLOG_DEFINE_THIS_MODULE(bridge);
>>
>> @@ -2921,6 +2922,8 @@ bridge_run(void)
>>       }
>>       cfg = ovsrec_open_vswitch_first(idl);
>>
>> +    dpdk_init(cfg);
>
> You should check 'cfg != NULL', just as bridge_init_ofproto() does.

Yes, I will.

Thanks for the review, Zoltan!
Kevin Traynor Dec. 4, 2015, 4:16 p.m. UTC | #9
> -----Original Message-----
> From: Aaron Conole [mailto:aconole@redhat.com]
> Sent: Friday, December 4, 2015 3:37 PM
> To: Traynor, Kevin
> Cc: dev@openvswitch.org; Flavio Leitner
> Subject: Re: [ovs-dev] [RFC 1/2] dpdk: Convert initialization from cmdline to
> db
> 
> "Traynor, Kevin" <kevin.traynor@intel.com> writes:
> >> -----Original Message-----
> <<snipped>>
> >> +static int
> >> +get_dpdk_args(const struct ovsrec_open_vswitch *ovs_cfg, char ***argv)
> >> +{
> >> +    struct dpdk_options_map {
> >> +        const char *ovs_configuration;
> >> +        const char *dpdk_option;
> >> +        bool default_enabled;
> >> +        const char *default_value;
> >> +    } opts[] = {
> >> +        {"dpdk_core_mask", "-c", true, "0x1"},
> >> +        {"dpdk_mem_channels", "-n", true, "4"},
> >> +        {"dpdk_alloc_mem", "-m", false, NULL},
> >> +        {"dpdk_socket_mem", "--socket-mem", false, NULL},
> >
> > What do you think about adding a default for socket-mem?
> 
> My only concern is I don't know what it should look like. This is the
> per-socket memory allocation and I'm not sure what kind of default makes
> sense, although with my specified defaults in the rest of the
> configuration perhaps "1024,0" is a 'proper' value.
> 
> What do you think?

I think "1024,0" is reasonable default for users who don't want to know too much
low level config. 

> <<snipped code>>
> >>      static const struct option long_options[] = {
> >>          {"help",        no_argument, NULL, 'h'},
> >> @@ -166,7 +157,6 @@ parse_options(int argc, char *argv[], char
> >> **unixctl_pathp)
> >>          {"bootstrap-ca-cert", required_argument, NULL,
> >> OPT_BOOTSTRAP_CA_CERT},
> >>          {"enable-dummy", optional_argument, NULL, OPT_ENABLE_DUMMY},
> >>          {"disable-system", no_argument, NULL, OPT_DISABLE_SYSTEM},
> >> -        {"dpdk", required_argument, NULL, OPT_DPDK},
> >>          {NULL, 0, NULL, 0},
> >>      };
> >>      char *short_options =
> >> ovs_cmdl_long_options_to_short_options(long_options);
> >> @@ -218,10 +208,6 @@ parse_options(int argc, char *argv[], char
> >> **unixctl_pathp)
> >>          case '?':
> >>              exit(EXIT_FAILURE);
> >>
> >> -        case OPT_DPDK:
> >> -            ovs_fatal(0, "--dpdk must be given at beginning of command
> >> line.");
> >
> > It might be helpful to leave the case statement here and call usage() or
> give
> > a deprecation message about the dpdk cmdline params for a release or two to
> > ensure a smooth transition to the new scheme - otherwise it'll probably end
> > up on the mailing list.
> 
> I waffled on whether or not to change it to be "--dpdk is deprecated" or
> just remove it, but there's probably a user who will complain that we
> removed the DPDK feature. Okay, I'll update the NEWS file, and mention
> that the configuration of DPDK via command line is deprecated in favor
> of the db approach.

Yes, please don't put "--dpdk is deprecated" anywhere in the code base!!

It's a cleaner cut to code as you did, but worries me that a user will just
pull from the repo, run the their setup script and suddenly it won't run with
no error message to say why. 

> 
> Thanks for the review, Kevin!
diff mbox

Patch

diff --git a/lib/netdev-dpdk.c b/lib/netdev-dpdk.c
index e3a0771..f3e0840 100644
--- a/lib/netdev-dpdk.c
+++ b/lib/netdev-dpdk.c
@@ -29,6 +29,7 @@ 
 #include <stdio.h>
 #include <sys/types.h>
 #include <sys/stat.h>
+#include <getopt.h>
 
 #include "dirs.h"
 #include "dp-packet.h"
@@ -49,6 +50,8 @@ 
 #include "timeval.h"
 #include "unixctl.h"
 #include "openvswitch/vlog.h"
+#include "smap.h"
+#include "vswitch-idl.h"
 
 #include "rte_config.h"
 #include "rte_mbuf.h"
@@ -2107,10 +2110,14 @@  unlock_dpdk:
 
 static int
 process_vhost_flags(char *flag, char *default_val, int size,
-                    char **argv, char **new_val)
+                    const struct ovsrec_open_vswitch *ovs_cfg,
+                    char **new_val)
 {
+    const char *val;
     int changed = 0;
 
+    val = smap_get(&ovs_cfg->other_config, flag);
+    
     /* Depending on which version of vhost is in use, process the vhost-specific
      * flag if it is provided on the vswitchd command line, otherwise resort to
      * a default value.
@@ -2120,9 +2127,9 @@  process_vhost_flags(char *flag, char *default_val, int size,
      * For vhost-cuse: Process "-cuse_dev_name" to set the custom name of the
      * vhost-cuse character device.
      */
-    if (!strcmp(argv[1], flag) && (strlen(argv[2]) <= size)) {
+    if (val && (strlen(val) <= size)) {
         changed = 1;
-        *new_val = strdup(argv[2]);
+        *new_val = strdup(val);
         VLOG_INFO("User-provided %s in use: %s", flag, *new_val);
     } else {
         VLOG_INFO("No %s provided - defaulting to %s", flag, default_val);
@@ -2132,34 +2139,77 @@  process_vhost_flags(char *flag, char *default_val, int size,
     return changed;
 }
 
-int
-dpdk_init(int argc, char **argv)
+static char **
+grow_argv(char ***argv, size_t cur_siz, size_t grow_by)
 {
-    int result;
-    int base = 0;
-    char *pragram_name = argv[0];
+    char **new_argv = realloc(*argv, sizeof(char *) * (cur_siz + grow_by));
+    return new_argv;
+}
 
-    if (argc < 2 || strcmp(argv[1], "--dpdk"))
-        return 0;
+static int
+get_dpdk_args(const struct ovsrec_open_vswitch *ovs_cfg, char ***argv)
+{
+    struct dpdk_options_map {
+        const char *ovs_configuration;
+        const char *dpdk_option;
+        bool default_enabled;
+        const char *default_value;
+    } opts[] = {
+        {"dpdk_core_mask", "-c", true, "0x1"},
+        {"dpdk_mem_channels", "-n", true, "4"},
+        {"dpdk_alloc_mem", "-m", false, NULL},
+        {"dpdk_socket_mem", "--socket-mem", false, NULL},
+        {"dpdk_hugepage_dir", "--huge-dir", false, NULL},
+    };
+    int i, ret = 1;
+
+    for(i = 0; i < (sizeof(opts) / sizeof(opts[0])); ++i){
+        const char *lookup = smap_get(&ovs_cfg->other_config,
+                                      opts[i].ovs_configuration);
+        if(!lookup && opts[i].default_enabled)
+            lookup = opts[i].default_value;
+        
+        if(lookup) {
+            char **newargv = grow_argv(argv, ret, 2);
+            
+            if (!newargv)
+                return ret;
+            
+            *argv = newargv;
+            (*argv)[ret++] = strdup(opts[i].dpdk_option);
+            (*argv)[ret++] = strdup(lookup);
+        }
+    }
 
-    /* Remove the --dpdk argument from arg list.*/
-    argc--;
-    argv++;
+    return ret;
+}
 
-    /* Reject --user option */
-    int i;
-    for (i = 0; i < argc; i++) {
-        if (!strcmp(argv[i], "--user")) {
-            VLOG_ERR("Can not mix --dpdk and --user options, aborting.");
-        }
+void
+dpdk_init(const struct ovsrec_open_vswitch *ovs_cfg)
+{
+    static int initialized = 0;
+
+    char **argv;
+    int result;
+    int argc;
+    
+    if(initialized || !smap_get_bool(&ovs_cfg->other_config, "dpdk", false)){
+        return;
     }
+    
+    initialized = 1;
 
+    VLOG_INFO("DPDK Enabled, initializing");
+    
+    /* TODO should check for user permissions. DPDK requires root user. */
+    
 #ifdef VHOST_CUSE
-    if (process_vhost_flags("-cuse_dev_name", strdup("vhost-net"),
-                            PATH_MAX, argv, &cuse_dev_name)) {
+    if (process_vhost_flags("cuse_dev_name", strdup("vhost-net"),
+                            PATH_MAX, ovs_cfg, &cuse_dev_name)) {
+
 #else
-    if (process_vhost_flags("-vhost_sock_dir", strdup(ovs_rundir()),
-                            NAME_MAX, argv, &vhost_sock_dir)) {
+    if (process_vhost_flags("vhost_sock_dir", strdup(ovs_rundir()),
+                            NAME_MAX, ovs_cfg, &vhost_sock_dir)) {
         struct stat s;
         int err;
 
@@ -2167,40 +2217,34 @@  dpdk_init(int argc, char **argv)
         if (err) {
             VLOG_ERR("vHostUser socket DIR '%s' does not exist.",
                      vhost_sock_dir);
-            return err;
+            return;
         }
 #endif
-        /* Remove the vhost flag configuration parameters from the argument
-         * list, so that the correct elements are passed to the DPDK
-         * initialization function
-         */
-        argc -= 2;
-        argv += 2;    /* Increment by two to bypass the vhost flag arguments */
-        base = 2;
     }
 
-    /* Keep the program name argument as this is needed for call to
-     * rte_eal_init()
-     */
-    argv[0] = pragram_name;
+    argv = (char **)malloc(sizeof(char *));
+    *argv = strdup("ovs"); /* TODO use prctl to get process name */
+    
+    argc = get_dpdk_args(ovs_cfg, &argv);
+    
+    optind = 1;
 
     /* Make sure things are initialized ... */
     result = rte_eal_init(argc, argv);
     if (result < 0) {
         ovs_abort(result, "Cannot init EAL");
     }
+    
+    for(result = 0; result < argc-1; ++result)
+        free(argv[result]);
+
+    free(argv);
 
     rte_memzone_dump(stdout);
     rte_eal_init_ret = 0;
 
-    if (argc > result) {
-        argv[result] = argv[0];
-    }
-
     /* We are called from the main thread here */
     RTE_PER_LCORE(_lcore_id) = NON_PMD_CORE_ID;
-
-    return result + 1 + base;
 }
 
 static const struct netdev_class dpdk_class =
diff --git a/lib/netdev-dpdk.h b/lib/netdev-dpdk.h
index 646d3e2..e01adb3 100644
--- a/lib/netdev-dpdk.h
+++ b/lib/netdev-dpdk.h
@@ -4,6 +4,7 @@ 
 #include <config.h>
 
 struct dp_packet;
+struct ovsrec_open_vswitch;
 
 #ifdef DPDK_NETDEV
 
@@ -22,7 +23,7 @@  struct dp_packet;
 
 #define NON_PMD_CORE_ID LCORE_ID_ANY
 
-int dpdk_init(int argc, char **argv);
+void dpdk_init(const struct ovsrec_open_vswitch *ovs_cfg);
 void netdev_dpdk_register(void);
 void free_dpdk_buf(struct dp_packet *);
 int pmd_thread_setaffinity_cpu(unsigned cpu);
@@ -33,13 +34,10 @@  int pmd_thread_setaffinity_cpu(unsigned cpu);
 
 #include "util.h"
 
-static inline int
-dpdk_init(int argc, char **argv)
+static inline void
+dpdk_init(const struct ovsrec_open_vswitch *ovs_cfg OVS_UNUSED)
 {
-    if (argc >= 2 && !strcmp(argv[1], "--dpdk")) {
-        ovs_fatal(0, "DPDK support not built into this copy of Open vSwitch.");
-    }
-    return 0;
+
 }
 
 static inline void
diff --git a/utilities/ovs-ctl.in b/utilities/ovs-ctl.in
index 0082bed..6ae8bbd 100755
--- a/utilities/ovs-ctl.in
+++ b/utilities/ovs-ctl.in
@@ -73,13 +73,13 @@  insert_mod_if_required () {
 }
 
 ovs_vsctl () {
-    ovs-vsctl --no-wait "$@"
+    @bindir@/ovs-vsctl --no-wait "$@"
 }
 
 set_system_ids () {
     set ovs_vsctl set Open_vSwitch .
 
-    OVS_VERSION=`ovs-vswitchd --version | sed 's/.*) //;1q'`
+    OVS_VERSION=`@bindir@/ovs-vswitchd --version | sed 's/.*) //;1q'`
     set "$@" ovs-version="$OVS_VERSION"
 
     case $SYSTEM_ID in
@@ -131,7 +131,7 @@  check_force_cores () {
 }
 
 del_transient_ports () {
-    for port in `ovs-vsctl --bare -- --columns=name find port other_config:transient=true`; do
+    for port in `@bindir@/ovs-vsctl --bare -- --columns=name find port other_config:transient=true`; do
         ovs_vsctl -- del-port "$port"
     done
 }
@@ -193,7 +193,7 @@  add_managers () {
     # won't briefly see empty datapath-id or ofport columns for records that
     # exist at startup.)
     action "Enabling remote OVSDB managers" \
-	ovs-appctl -t ovsdb-server ovsdb-server/add-remote \
+	@bindir@/ovs-appctl -t ovsdb-server ovsdb-server/add-remote \
 	    db:Open_vSwitch,Open_vSwitch,manager_options
 }
 
@@ -215,7 +215,7 @@  start_forwarding () {
         fi
 
 	    # Start ovs-vswitchd.
-	    set ovs-vswitchd unix:"$DB_SOCK"
+	    set @bindir@/ovs-vswitchd unix:"$DB_SOCK"
 	    set "$@" -vconsole:emer -vsyslog:err -vfile:info
 	    if test X"$MLOCKALL" != Xno; then
 	        set "$@" --mlockall
@@ -276,7 +276,7 @@  save_ofports_if_required () {
     #
     # (Versions 1.10 and later save OpenFlow port numbers without assistance,
     # so we don't have to do anything for them.
-    case `ovs-appctl version | sed 1q` in
+    case `@bindir@/ovs-appctl version | sed 1q` in
         "ovs-vswitchd (Open vSwitch) 1."[0-9].*)
             action "Saving ofport values" ovs_save save-ofports \
                 "${script_ofports}"
diff --git a/vswitchd/bridge.c b/vswitchd/bridge.c
index b966d92..d33f42c 100644
--- a/vswitchd/bridge.c
+++ b/vswitchd/bridge.c
@@ -68,6 +68,7 @@ 
 #include "sflow_api.h"
 #include "vlan-bitmap.h"
 #include "packets.h"
+#include "lib/netdev-dpdk.h"
 
 VLOG_DEFINE_THIS_MODULE(bridge);
 
@@ -2921,6 +2922,8 @@  bridge_run(void)
     }
     cfg = ovsrec_open_vswitch_first(idl);
 
+    dpdk_init(cfg);
+    
     /* Initialize the ofproto library.  This only needs to run once, but
      * it must be done after the configuration is set.  If the
      * initialization has already occurred, bridge_init_ofproto()
diff --git a/vswitchd/ovs-vswitchd.c b/vswitchd/ovs-vswitchd.c
index e78ecda..ae86861 100644
--- a/vswitchd/ovs-vswitchd.c
+++ b/vswitchd/ovs-vswitchd.c
@@ -48,7 +48,6 @@ 
 #include "openvswitch/vconn.h"
 #include "openvswitch/vlog.h"
 #include "lib/vswitch-idl.h"
-#include "lib/netdev-dpdk.h"
 
 VLOG_DEFINE_THIS_MODULE(vswitchd);
 
@@ -71,13 +70,6 @@  main(int argc, char *argv[])
     int retval;
 
     set_program_name(argv[0]);
-    retval = dpdk_init(argc,argv);
-    if (retval < 0) {
-        return retval;
-    }
-
-    argc -= retval;
-    argv += retval;
 
     ovs_cmdl_proctitle_init(argc, argv);
     service_start(&argc, &argv);
@@ -152,7 +144,6 @@  parse_options(int argc, char *argv[], char **unixctl_pathp)
         OPT_ENABLE_DUMMY,
         OPT_DISABLE_SYSTEM,
         DAEMON_OPTION_ENUMS,
-        OPT_DPDK,
     };
     static const struct option long_options[] = {
         {"help",        no_argument, NULL, 'h'},
@@ -166,7 +157,6 @@  parse_options(int argc, char *argv[], char **unixctl_pathp)
         {"bootstrap-ca-cert", required_argument, NULL, OPT_BOOTSTRAP_CA_CERT},
         {"enable-dummy", optional_argument, NULL, OPT_ENABLE_DUMMY},
         {"disable-system", no_argument, NULL, OPT_DISABLE_SYSTEM},
-        {"dpdk", required_argument, NULL, OPT_DPDK},
         {NULL, 0, NULL, 0},
     };
     char *short_options = ovs_cmdl_long_options_to_short_options(long_options);
@@ -218,10 +208,6 @@  parse_options(int argc, char *argv[], char **unixctl_pathp)
         case '?':
             exit(EXIT_FAILURE);
 
-        case OPT_DPDK:
-            ovs_fatal(0, "--dpdk must be given at beginning of command line.");
-            break;
-
         default:
             abort();
         }
@@ -255,19 +241,6 @@  usage(void)
     stream_usage("DATABASE", true, false, true);
     daemon_usage();
     vlog_usage();
-    printf("\nDPDK options:\n"
-           "  --dpdk [VHOST] [DPDK]     Initialize DPDK datapath.\n"
-           "  where DPDK are options for initializing DPDK lib and VHOST is\n"
-#ifdef VHOST_CUSE
-           "  option to override default character device name used for\n"
-           "  for use with userspace vHost\n"
-           "    -cuse_dev_name NAME\n"
-#else
-           "  option to override default directory where vhost-user\n"
-           "  sockets are created.\n"
-           "    -vhost_sock_dir DIR\n"
-#endif
-           );
     printf("\nOther options:\n"
            "  --unixctl=SOCKET          override default control socket name\n"
            "  -h, --help                display this help message\n"