diff mbox

[ovs-dev,RFC,v2,2/4] netdev-dpdk: Apply ingress_sched config to dpdk phy ports

Message ID 1499792313-27606-3-git-send-email-billy.o.mahony@intel.com
State Superseded
Headers show

Commit Message

Billy O'Mahony July 11, 2017, 4:58 p.m. UTC
Ingress scheduling configuration is given effect by way of Flow Director
filters. A small subset of the possible ingress scheduling possible is
implemented in this patch.

Signed-off-by: Billy O'Mahony <billy.o.mahony@intel.com>
---
 include/openvswitch/ofp-parse.h |   3 ++
 lib/dpif-netdev.c               |   1 +
 lib/netdev-dpdk.c               | 117 ++++++++++++++++++++++++++++++++++++++++
 3 files changed, 121 insertions(+)

Comments

Darrell Ball July 11, 2017, 5:48 p.m. UTC | #1
On 7/11/17, 9:58 AM, "ovs-dev-bounces@openvswitch.org on behalf of Billy O'Mahony" <ovs-dev-bounces@openvswitch.org on behalf of billy.o.mahony@intel.com> wrote:

    Ingress scheduling configuration is given effect by way of Flow Director
    filters. A small subset of the possible ingress scheduling possible is
    implemented in this patch.
    
    Signed-off-by: Billy O'Mahony <billy.o.mahony@intel.com>

    ---
     include/openvswitch/ofp-parse.h |   3 ++
     lib/dpif-netdev.c               |   1 +
     lib/netdev-dpdk.c               | 117 ++++++++++++++++++++++++++++++++++++++++
     3 files changed, 121 insertions(+)
    
    diff --git a/include/openvswitch/ofp-parse.h b/include/openvswitch/ofp-parse.h
    index fc5784e..08d6086 100644
    --- a/include/openvswitch/ofp-parse.h
    +++ b/include/openvswitch/ofp-parse.h
    @@ -37,6 +37,9 @@ struct ofputil_table_mod;
     struct ofputil_bundle_msg;
     struct ofputil_tlv_table_mod;
     struct simap;
    +struct tun_table;
    +struct flow_wildcards;
    +struct ofputil_port_map;
     enum ofputil_protocol;
     
     char *parse_ofp_str(struct ofputil_flow_mod *, int command, const char *str_,
    diff --git a/lib/dpif-netdev.c b/lib/dpif-netdev.c
    index 2f224db..66712c7 100644
    --- a/lib/dpif-netdev.c
    +++ b/lib/dpif-netdev.c
    @@ -44,6 +44,7 @@
     #include "dp-packet.h"
     #include "dpif.h"
     #include "dpif-provider.h"
    +#include "netdev-provider.h"
     #include "dummy.h"
     #include "fat-rwlock.h"
     #include "flow.h"
    diff --git a/lib/netdev-dpdk.c b/lib/netdev-dpdk.c
    index d14c381..93556e7 100644
    --- a/lib/netdev-dpdk.c
    +++ b/lib/netdev-dpdk.c
    @@ -33,6 +33,8 @@
     #include <rte_meter.h>
     #include <rte_virtio_net.h>
     
    +#include <openvswitch/ofp-parse.h>
    +#include <openvswitch/ofp-util.h>
     #include "dirs.h"
     #include "dp-packet.h"
     #include "dpdk.h"
    @@ -168,6 +170,10 @@ static const struct rte_eth_conf port_conf = {
         .txmode = {
             .mq_mode = ETH_MQ_TX_NONE,
         },
    +    .fdir_conf = {
    +        .mode = RTE_FDIR_MODE_PERFECT,
    +    },
    +
     };
     
     enum { DPDK_RING_SIZE = 256 };
    @@ -652,6 +658,15 @@ dpdk_eth_dev_queue_setup(struct netdev_dpdk *dev, int n_rxq, int n_txq)
         int i;
         struct rte_eth_conf conf = port_conf;
     
    +    /* Ingress scheduling requires ETH_MQ_RX_NONE so limit it to when exactly
    +     * two rxqs are defined. Otherwise MQ will not work as expected. */
    +    if (dev->ingress_sched_str && n_rxq == 2) {
    +        conf.rxmode.mq_mode = ETH_MQ_RX_NONE;
    +    }
    +    else {
    +        conf.rxmode.mq_mode = ETH_MQ_RX_RSS;
    +    }
    +
         if (dev->mtu > ETHER_MTU) {
             conf.rxmode.jumbo_frame = 1;
             conf.rxmode.max_rx_pkt_len = dev->max_packet_len;
    @@ -752,6 +767,106 @@ dpdk_eth_flow_ctrl_setup(struct netdev_dpdk *dev) OVS_REQUIRES(dev->mutex)
         }
     }
     
    +static void
    +dpdk_apply_ingress_scheduling(struct netdev_dpdk *dev, int n_rxq)
    +{
    +    if (!dev->ingress_sched_str) {
    +        return;
    +    }
    +
    +    if (n_rxq != 2) {
    +        VLOG_ERR("Interface %s: Ingress scheduling config ignored; " \
    +                 "Requires n_rxq==2.", dev->up.name);
    +    }
    +
    +    int priority_q_id = n_rxq-1;
    +    char *key, *val, *str, *iter;
    +
    +    ovs_be32 ip_src, ip_dst;
    +    ip_src = ip_dst = 0;
    +
    +    uint16_t eth_type, port_src, port_dst;
    +    eth_type = port_src = port_dst = 0;
    +    uint8_t ip_proto = 0;
    +
    +    char *mallocd_str; /* str_to_x returns malloc'd str we'll need to free */
    +    /* Parse the configuration into local vars */
    +    iter = str = xstrdup(dev->ingress_sched_str);
    +    while (ofputil_parse_key_value (&iter, &key, &val)) {
    +        if (strcmp(key, "nw_src") == 0 || strcmp(key, "ip_src") == 0) {
    +            mallocd_str = str_to_ip(val, &ip_src);
    +        } else if (strcmp(key, "nw_dst") == 0 || strcmp(key, "ip_dst") == 0) {
    +            mallocd_str = str_to_ip(val, &ip_dst);
    +        } else if (strcmp(key, "dl_type") == 0 ||
    +                   strcmp(key, "eth_type") == 0) {
    +            mallocd_str = str_to_u16(val, "eth_type/dl_type", &eth_type);
    +        } else if (strcmp(key, "tcp_src") == 0 ||
    +                  strcmp(key, "tp_src") == 0 ||
    +                  strcmp(key, "udp_src") == 0) {
    +            mallocd_str = str_to_u16(val, "tcp/udp_src", &port_src);
    +        } else if (strcmp(key, "tcp_dst") == 0 ||
    +                   strcmp(key, "tp_dst") == 0 ||
    +                   strcmp(key, "udp_dst") == 0) {
    +            mallocd_str = str_to_u16(val, "tcp/udp_dst", &port_dst);
    +        } else if (strcmp(key, "ip") == 0) {
    +            eth_type = 0x0800;
    +        } else if (strcmp(key, "udp") == 0) {
    +            eth_type = 0x0800;
    +            ip_proto = 17;
    +        } else if (strcmp(key, "tcp") == 0) {
    +            eth_type = 0x0800;
    +            ip_proto = 6;
    +        } else {
    +            VLOG_WARN("Ignoring unsupported ingress scheduling field '%s'", \
    +                      key);
    +        }
    +        if (mallocd_str) {
    +            VLOG_ERR ("%s", mallocd_str);
    +            free(mallocd_str);
    +            mallocd_str = NULL;
    +        }
    +    }
    +    free (str);
    +
    +    /* Set the filters */
    +    int diag = 0;
    +    if (eth_type && ip_src && ip_dst && port_src && port_dst && ip_proto) {
    +        struct rte_eth_fdir_filter entry = { 0 };
    +        entry.input.flow_type = RTE_ETH_FLOW_NONFRAG_IPV4_UDP;
    +        entry.input.flow.udp4_flow.ip.src_ip = ip_src;
    +        entry.input.flow.udp4_flow.ip.dst_ip = ip_dst;
    +        entry.input.flow.udp4_flow.src_port = htons(port_src);
    +        entry.input.flow.udp4_flow.dst_port = htons(port_dst);
    +        entry.action.rx_queue = priority_q_id;
    +        entry.action.behavior = RTE_ETH_FDIR_ACCEPT;
    +        entry.action.report_status = RTE_ETH_FDIR_REPORT_ID;
    +        diag = rte_eth_dev_filter_ctrl(dev->port_id,
    +                RTE_ETH_FILTER_FDIR, RTE_ETH_FILTER_ADD, &entry);
    +    }
    +    else if (eth_type && !ip_src && !ip_dst && !port_src
    +             && !port_dst && !ip_proto) {
    +        struct rte_eth_ethertype_filter entry = {0};
    +        memset (&entry, 0, sizeof entry);
    +        entry.ether_type = eth_type;
    +        entry.flags = 0;
    +        entry.queue = priority_q_id;
    +        diag = rte_eth_dev_filter_ctrl(dev->port_id,
    +                RTE_ETH_FILTER_ETHERTYPE, RTE_ETH_FILTER_ADD, &entry);
    +    }
    +    else {
    +        VLOG_ERR("Unsupported ingress scheduling match-field combination.");

The above ‘if’ and ‘else if’ cases seem quite limiting;
can we have just ‘port_dst’ or just ‘ip_src’ for examples ?


    +        return;
    +    }
    +
    +    if (diag) {
    +        VLOG_ERR("Failed to add ingress scheduling filter.");

I am sure you thought about this already, but this may be hard for the user to know
what might be allowed; it would a be trial and error exercise.

Would an interface ‘capability’ help here ?


    +    }
    +    else {
    +        /* Mark the appropriate q as prioritized */
    +        dev->up.priority_rxq = priority_q_id;
    +    }
    +}
    +
     static int
     dpdk_eth_dev_init(struct netdev_dpdk *dev)
         OVS_REQUIRES(dev->mutex)
    @@ -774,6 +889,8 @@ dpdk_eth_dev_init(struct netdev_dpdk *dev)
             return -diag;
         }
     
    +    dpdk_apply_ingress_scheduling(dev, n_rxq);
    +
         diag = rte_eth_dev_start(dev->port_id);
         if (diag) {
             VLOG_ERR("Interface %s start error: %s", dev->up.name,
    -- 
    2.7.4
    
    _______________________________________________
    dev mailing list
    dev@openvswitch.org
    https://urldefense.proofpoint.com/v2/url?u=https-3A__mail.openvswitch.org_mailman_listinfo_ovs-2Ddev&d=DwICAg&c=uilaK90D4TOVoH58JNXRgQ&r=BVhFA09CGX7JQ5Ih-uZnsw&m=mD-1gqbGI1_5EN7JASS5cT8B1gcLrBXG-Bb1yRgrihU&s=mCo9sG_4f7mU8sW-qBwmEEs-fRC77rOa_SO5G0wtOhc&e=
Billy O'Mahony July 12, 2017, 6:38 p.m. UTC | #2
> -----Original Message-----

> From: Darrell Ball [mailto:dball@vmware.com]

> Sent: Tuesday, July 11, 2017 6:49 PM

> To: O Mahony, Billy <billy.o.mahony@intel.com>; dev@openvswitch.org

> Subject: Re: [ovs-dev] [RFC v2 2/4] netdev-dpdk: Apply ingress_sched config

> to dpdk phy ports

> 

> 

> 

> On 7/11/17, 9:58 AM, "ovs-dev-bounces@openvswitch.org on behalf of Billy

> O'Mahony" <ovs-dev-bounces@openvswitch.org on behalf of

> billy.o.mahony@intel.com> wrote:

> 

>     Ingress scheduling configuration is given effect by way of Flow Director

>     filters. A small subset of the possible ingress scheduling possible is

>     implemented in this patch.

> 

>     Signed-off-by: Billy O'Mahony <billy.o.mahony@intel.com>

>     ---

>      include/openvswitch/ofp-parse.h |   3 ++

>      lib/dpif-netdev.c               |   1 +

>      lib/netdev-dpdk.c               | 117

> ++++++++++++++++++++++++++++++++++++++++

>      3 files changed, 121 insertions(+)

> 

>     diff --git a/include/openvswitch/ofp-parse.h b/include/openvswitch/ofp-

> parse.h

>     index fc5784e..08d6086 100644

>     --- a/include/openvswitch/ofp-parse.h

>     +++ b/include/openvswitch/ofp-parse.h

>     @@ -37,6 +37,9 @@ struct ofputil_table_mod;

>      struct ofputil_bundle_msg;

>      struct ofputil_tlv_table_mod;

>      struct simap;

>     +struct tun_table;

>     +struct flow_wildcards;

>     +struct ofputil_port_map;

>      enum ofputil_protocol;

> 

>      char *parse_ofp_str(struct ofputil_flow_mod *, int command, const char

> *str_,

>     diff --git a/lib/dpif-netdev.c b/lib/dpif-netdev.c

>     index 2f224db..66712c7 100644

>     --- a/lib/dpif-netdev.c

>     +++ b/lib/dpif-netdev.c

>     @@ -44,6 +44,7 @@

>      #include "dp-packet.h"

>      #include "dpif.h"

>      #include "dpif-provider.h"

>     +#include "netdev-provider.h"

>      #include "dummy.h"

>      #include "fat-rwlock.h"

>      #include "flow.h"

>     diff --git a/lib/netdev-dpdk.c b/lib/netdev-dpdk.c

>     index d14c381..93556e7 100644

>     --- a/lib/netdev-dpdk.c

>     +++ b/lib/netdev-dpdk.c

>     @@ -33,6 +33,8 @@

>      #include <rte_meter.h>

>      #include <rte_virtio_net.h>

> 

>     +#include <openvswitch/ofp-parse.h>

>     +#include <openvswitch/ofp-util.h>

>      #include "dirs.h"

>      #include "dp-packet.h"

>      #include "dpdk.h"

>     @@ -168,6 +170,10 @@ static const struct rte_eth_conf port_conf = {

>          .txmode = {

>              .mq_mode = ETH_MQ_TX_NONE,

>          },

>     +    .fdir_conf = {

>     +        .mode = RTE_FDIR_MODE_PERFECT,

>     +    },

>     +

>      };

> 

>      enum { DPDK_RING_SIZE = 256 };

>     @@ -652,6 +658,15 @@ dpdk_eth_dev_queue_setup(struct netdev_dpdk

> *dev, int n_rxq, int n_txq)

>          int i;

>          struct rte_eth_conf conf = port_conf;

> 

>     +    /* Ingress scheduling requires ETH_MQ_RX_NONE so limit it to when

> exactly

>     +     * two rxqs are defined. Otherwise MQ will not work as expected. */

>     +    if (dev->ingress_sched_str && n_rxq == 2) {

>     +        conf.rxmode.mq_mode = ETH_MQ_RX_NONE;

>     +    }

>     +    else {

>     +        conf.rxmode.mq_mode = ETH_MQ_RX_RSS;

>     +    }

>     +

>          if (dev->mtu > ETHER_MTU) {

>              conf.rxmode.jumbo_frame = 1;

>              conf.rxmode.max_rx_pkt_len = dev->max_packet_len;

>     @@ -752,6 +767,106 @@ dpdk_eth_flow_ctrl_setup(struct netdev_dpdk

> *dev) OVS_REQUIRES(dev->mutex)

>          }

>      }

> 

>     +static void

>     +dpdk_apply_ingress_scheduling(struct netdev_dpdk *dev, int n_rxq)

>     +{

>     +    if (!dev->ingress_sched_str) {

>     +        return;

>     +    }

>     +

>     +    if (n_rxq != 2) {

>     +        VLOG_ERR("Interface %s: Ingress scheduling config ignored; " \

>     +                 "Requires n_rxq==2.", dev->up.name);

>     +    }

>     +

>     +    int priority_q_id = n_rxq-1;

>     +    char *key, *val, *str, *iter;

>     +

>     +    ovs_be32 ip_src, ip_dst;

>     +    ip_src = ip_dst = 0;

>     +

>     +    uint16_t eth_type, port_src, port_dst;

>     +    eth_type = port_src = port_dst = 0;

>     +    uint8_t ip_proto = 0;

>     +

>     +    char *mallocd_str; /* str_to_x returns malloc'd str we'll need to free */

>     +    /* Parse the configuration into local vars */

>     +    iter = str = xstrdup(dev->ingress_sched_str);

>     +    while (ofputil_parse_key_value (&iter, &key, &val)) {

>     +        if (strcmp(key, "nw_src") == 0 || strcmp(key, "ip_src") == 0) {

>     +            mallocd_str = str_to_ip(val, &ip_src);

>     +        } else if (strcmp(key, "nw_dst") == 0 || strcmp(key, "ip_dst") == 0) {

>     +            mallocd_str = str_to_ip(val, &ip_dst);

>     +        } else if (strcmp(key, "dl_type") == 0 ||

>     +                   strcmp(key, "eth_type") == 0) {

>     +            mallocd_str = str_to_u16(val, "eth_type/dl_type", &eth_type);

>     +        } else if (strcmp(key, "tcp_src") == 0 ||

>     +                  strcmp(key, "tp_src") == 0 ||

>     +                  strcmp(key, "udp_src") == 0) {

>     +            mallocd_str = str_to_u16(val, "tcp/udp_src", &port_src);

>     +        } else if (strcmp(key, "tcp_dst") == 0 ||

>     +                   strcmp(key, "tp_dst") == 0 ||

>     +                   strcmp(key, "udp_dst") == 0) {

>     +            mallocd_str = str_to_u16(val, "tcp/udp_dst", &port_dst);

>     +        } else if (strcmp(key, "ip") == 0) {

>     +            eth_type = 0x0800;

>     +        } else if (strcmp(key, "udp") == 0) {

>     +            eth_type = 0x0800;

>     +            ip_proto = 17;

>     +        } else if (strcmp(key, "tcp") == 0) {

>     +            eth_type = 0x0800;

>     +            ip_proto = 6;

>     +        } else {

>     +            VLOG_WARN("Ignoring unsupported ingress scheduling field '%s'",

> \

>     +                      key);

>     +        }

>     +        if (mallocd_str) {

>     +            VLOG_ERR ("%s", mallocd_str);

>     +            free(mallocd_str);

>     +            mallocd_str = NULL;

>     +        }

>     +    }

>     +    free (str);

>     +

>     +    /* Set the filters */

>     +    int diag = 0;

>     +    if (eth_type && ip_src && ip_dst && port_src && port_dst &&

> ip_proto) {

>     +        struct rte_eth_fdir_filter entry = { 0 };

>     +        entry.input.flow_type = RTE_ETH_FLOW_NONFRAG_IPV4_UDP;

>     +        entry.input.flow.udp4_flow.ip.src_ip = ip_src;

>     +        entry.input.flow.udp4_flow.ip.dst_ip = ip_dst;

>     +        entry.input.flow.udp4_flow.src_port = htons(port_src);

>     +        entry.input.flow.udp4_flow.dst_port = htons(port_dst);

>     +        entry.action.rx_queue = priority_q_id;

>     +        entry.action.behavior = RTE_ETH_FDIR_ACCEPT;

>     +        entry.action.report_status = RTE_ETH_FDIR_REPORT_ID;

>     +        diag = rte_eth_dev_filter_ctrl(dev->port_id,

>     +                RTE_ETH_FILTER_FDIR, RTE_ETH_FILTER_ADD, &entry);

>     +    }

>     +    else if (eth_type && !ip_src && !ip_dst && !port_src

>     +             && !port_dst && !ip_proto) {

>     +        struct rte_eth_ethertype_filter entry = {0};

>     +        memset (&entry, 0, sizeof entry);

>     +        entry.ether_type = eth_type;

>     +        entry.flags = 0;

>     +        entry.queue = priority_q_id;

>     +        diag = rte_eth_dev_filter_ctrl(dev->port_id,

>     +                RTE_ETH_FILTER_ETHERTYPE, RTE_ETH_FILTER_ADD, &entry);

>     +    }

>     +    else {

>     +        VLOG_ERR("Unsupported ingress scheduling match-field

> combination.");

> 

> The above ‘if’ and ‘else if’ cases seem quite limiting; can we have just

> ‘port_dst’ or just ‘ip_src’ for examples ?

[[BO'M]] Hopefully yes all these kinds of permutations and combinations would be supported. It depends ultimately on what the underlying functionality (in this case the NIC plus dpdk drivers) supports. 
> 

> 

>     +        return;

>     +    }

>     +

>     +    if (diag) {

>     +        VLOG_ERR("Failed to add ingress scheduling filter.");

> 

> I am sure you thought about this already, but this may be hard for the user to

> know what might be allowed; it would a be trial and error exercise.

> 

> Would an interface ‘capability’ help here ?

[[BO'M]] My initial reaction is that I'm not sure it would. 
If an attempt is made to prioritize certain traffic and it fails then nothing is lost. Everything should work the same as if no prioritization specified.

Maybe there could be a case where the 'user' want to prioritize traffic of type=x or y or z (say three different eth_types) but failing that will prioritize traffic just of type x. But the code to use to discover the capability (or lack of it in this case) would be more complicated than just trying the ideal case and then falling back to the simpler case.
 
Would the user of the capability discovery api be code (ie a higher up layer of logic) or a human on the cli. (or a program using the cli).

Though there is no checking parse of the config in netdev_dpdk_set_ingress_sched() which could be used to detect some unsupported configurations before the reconfig is applied (by which time it's too late to pass ENOTSUPP) back to the netdev client. I'll see if I can do something about that in a later version. 

> 

> 

>     +    }

>     +    else {

>     +        /* Mark the appropriate q as prioritized */

>     +        dev->up.priority_rxq = priority_q_id;

>     +    }

>     +}

>     +

>      static int

>      dpdk_eth_dev_init(struct netdev_dpdk *dev)

>          OVS_REQUIRES(dev->mutex)

>     @@ -774,6 +889,8 @@ dpdk_eth_dev_init(struct netdev_dpdk *dev)

>              return -diag;

>          }

> 

>     +    dpdk_apply_ingress_scheduling(dev, n_rxq);

>     +

>          diag = rte_eth_dev_start(dev->port_id);

>          if (diag) {

>              VLOG_ERR("Interface %s start error: %s", dev->up.name,

>     --

>     2.7.4

> 

>     _______________________________________________

>     dev mailing list

>     dev@openvswitch.org

>     https://urldefense.proofpoint.com/v2/url?u=https-

> 3A__mail.openvswitch.org_mailman_listinfo_ovs-

> 2Ddev&d=DwICAg&c=uilaK90D4TOVoH58JNXRgQ&r=BVhFA09CGX7JQ5Ih-

> uZnsw&m=mD-1gqbGI1_5EN7JASS5cT8B1gcLrBXG-

> Bb1yRgrihU&s=mCo9sG_4f7mU8sW-qBwmEEs-fRC77rOa_SO5G0wtOhc&e=

>
diff mbox

Patch

diff --git a/include/openvswitch/ofp-parse.h b/include/openvswitch/ofp-parse.h
index fc5784e..08d6086 100644
--- a/include/openvswitch/ofp-parse.h
+++ b/include/openvswitch/ofp-parse.h
@@ -37,6 +37,9 @@  struct ofputil_table_mod;
 struct ofputil_bundle_msg;
 struct ofputil_tlv_table_mod;
 struct simap;
+struct tun_table;
+struct flow_wildcards;
+struct ofputil_port_map;
 enum ofputil_protocol;
 
 char *parse_ofp_str(struct ofputil_flow_mod *, int command, const char *str_,
diff --git a/lib/dpif-netdev.c b/lib/dpif-netdev.c
index 2f224db..66712c7 100644
--- a/lib/dpif-netdev.c
+++ b/lib/dpif-netdev.c
@@ -44,6 +44,7 @@ 
 #include "dp-packet.h"
 #include "dpif.h"
 #include "dpif-provider.h"
+#include "netdev-provider.h"
 #include "dummy.h"
 #include "fat-rwlock.h"
 #include "flow.h"
diff --git a/lib/netdev-dpdk.c b/lib/netdev-dpdk.c
index d14c381..93556e7 100644
--- a/lib/netdev-dpdk.c
+++ b/lib/netdev-dpdk.c
@@ -33,6 +33,8 @@ 
 #include <rte_meter.h>
 #include <rte_virtio_net.h>
 
+#include <openvswitch/ofp-parse.h>
+#include <openvswitch/ofp-util.h>
 #include "dirs.h"
 #include "dp-packet.h"
 #include "dpdk.h"
@@ -168,6 +170,10 @@  static const struct rte_eth_conf port_conf = {
     .txmode = {
         .mq_mode = ETH_MQ_TX_NONE,
     },
+    .fdir_conf = {
+        .mode = RTE_FDIR_MODE_PERFECT,
+    },
+
 };
 
 enum { DPDK_RING_SIZE = 256 };
@@ -652,6 +658,15 @@  dpdk_eth_dev_queue_setup(struct netdev_dpdk *dev, int n_rxq, int n_txq)
     int i;
     struct rte_eth_conf conf = port_conf;
 
+    /* Ingress scheduling requires ETH_MQ_RX_NONE so limit it to when exactly
+     * two rxqs are defined. Otherwise MQ will not work as expected. */
+    if (dev->ingress_sched_str && n_rxq == 2) {
+        conf.rxmode.mq_mode = ETH_MQ_RX_NONE;
+    }
+    else {
+        conf.rxmode.mq_mode = ETH_MQ_RX_RSS;
+    }
+
     if (dev->mtu > ETHER_MTU) {
         conf.rxmode.jumbo_frame = 1;
         conf.rxmode.max_rx_pkt_len = dev->max_packet_len;
@@ -752,6 +767,106 @@  dpdk_eth_flow_ctrl_setup(struct netdev_dpdk *dev) OVS_REQUIRES(dev->mutex)
     }
 }
 
+static void
+dpdk_apply_ingress_scheduling(struct netdev_dpdk *dev, int n_rxq)
+{
+    if (!dev->ingress_sched_str) {
+        return;
+    }
+
+    if (n_rxq != 2) {
+        VLOG_ERR("Interface %s: Ingress scheduling config ignored; " \
+                 "Requires n_rxq==2.", dev->up.name);
+    }
+
+    int priority_q_id = n_rxq-1;
+    char *key, *val, *str, *iter;
+
+    ovs_be32 ip_src, ip_dst;
+    ip_src = ip_dst = 0;
+
+    uint16_t eth_type, port_src, port_dst;
+    eth_type = port_src = port_dst = 0;
+    uint8_t ip_proto = 0;
+
+    char *mallocd_str; /* str_to_x returns malloc'd str we'll need to free */
+    /* Parse the configuration into local vars */
+    iter = str = xstrdup(dev->ingress_sched_str);
+    while (ofputil_parse_key_value (&iter, &key, &val)) {
+        if (strcmp(key, "nw_src") == 0 || strcmp(key, "ip_src") == 0) {
+            mallocd_str = str_to_ip(val, &ip_src);
+        } else if (strcmp(key, "nw_dst") == 0 || strcmp(key, "ip_dst") == 0) {
+            mallocd_str = str_to_ip(val, &ip_dst);
+        } else if (strcmp(key, "dl_type") == 0 ||
+                   strcmp(key, "eth_type") == 0) {
+            mallocd_str = str_to_u16(val, "eth_type/dl_type", &eth_type);
+        } else if (strcmp(key, "tcp_src") == 0 ||
+                  strcmp(key, "tp_src") == 0 ||
+                  strcmp(key, "udp_src") == 0) {
+            mallocd_str = str_to_u16(val, "tcp/udp_src", &port_src);
+        } else if (strcmp(key, "tcp_dst") == 0 ||
+                   strcmp(key, "tp_dst") == 0 ||
+                   strcmp(key, "udp_dst") == 0) {
+            mallocd_str = str_to_u16(val, "tcp/udp_dst", &port_dst);
+        } else if (strcmp(key, "ip") == 0) {
+            eth_type = 0x0800;
+        } else if (strcmp(key, "udp") == 0) {
+            eth_type = 0x0800;
+            ip_proto = 17;
+        } else if (strcmp(key, "tcp") == 0) {
+            eth_type = 0x0800;
+            ip_proto = 6;
+        } else {
+            VLOG_WARN("Ignoring unsupported ingress scheduling field '%s'", \
+                      key);
+        }
+        if (mallocd_str) {
+            VLOG_ERR ("%s", mallocd_str);
+            free(mallocd_str);
+            mallocd_str = NULL;
+        }
+    }
+    free (str);
+
+    /* Set the filters */
+    int diag = 0;
+    if (eth_type && ip_src && ip_dst && port_src && port_dst && ip_proto) {
+        struct rte_eth_fdir_filter entry = { 0 };
+        entry.input.flow_type = RTE_ETH_FLOW_NONFRAG_IPV4_UDP;
+        entry.input.flow.udp4_flow.ip.src_ip = ip_src;
+        entry.input.flow.udp4_flow.ip.dst_ip = ip_dst;
+        entry.input.flow.udp4_flow.src_port = htons(port_src);
+        entry.input.flow.udp4_flow.dst_port = htons(port_dst);
+        entry.action.rx_queue = priority_q_id;
+        entry.action.behavior = RTE_ETH_FDIR_ACCEPT;
+        entry.action.report_status = RTE_ETH_FDIR_REPORT_ID;
+        diag = rte_eth_dev_filter_ctrl(dev->port_id,
+                RTE_ETH_FILTER_FDIR, RTE_ETH_FILTER_ADD, &entry);
+    }
+    else if (eth_type && !ip_src && !ip_dst && !port_src
+             && !port_dst && !ip_proto) {
+        struct rte_eth_ethertype_filter entry = {0};
+        memset (&entry, 0, sizeof entry);
+        entry.ether_type = eth_type;
+        entry.flags = 0;
+        entry.queue = priority_q_id;
+        diag = rte_eth_dev_filter_ctrl(dev->port_id,
+                RTE_ETH_FILTER_ETHERTYPE, RTE_ETH_FILTER_ADD, &entry);
+    }
+    else {
+        VLOG_ERR("Unsupported ingress scheduling match-field combination.");
+        return;
+    }
+
+    if (diag) {
+        VLOG_ERR("Failed to add ingress scheduling filter.");
+    }
+    else {
+        /* Mark the appropriate q as prioritized */
+        dev->up.priority_rxq = priority_q_id;
+    }
+}
+
 static int
 dpdk_eth_dev_init(struct netdev_dpdk *dev)
     OVS_REQUIRES(dev->mutex)
@@ -774,6 +889,8 @@  dpdk_eth_dev_init(struct netdev_dpdk *dev)
         return -diag;
     }
 
+    dpdk_apply_ingress_scheduling(dev, n_rxq);
+
     diag = rte_eth_dev_start(dev->port_id);
     if (diag) {
         VLOG_ERR("Interface %s start error: %s", dev->up.name,