diff mbox

[ovs-dev,1/1] KNI support in OVS-DPDK

Message ID SG2PR03MB14568C6609A87610589BE0C89FBB0@SG2PR03MB1456.apcprd03.prod.outlook.com
State Changes Requested
Headers show

Commit Message

ravali.burra@wipro.com March 1, 2016, 10:29 a.m. UTC
Hi Team,



Please find the configuration and patch details of KNI support in OVS-DPDK



This patch contains support for creating KNI interfaces.



It also has the required modifications in netdev-dpdk.c,

netdev.c files for creation/deletion of kni interfaces in ovs.



This patch also adds NETDEV_DPDK_CLASS w.r.t dpdkkni and also has some

changes in structure for netdev-dpdk for the support of kni interfaces.



Implemented below helper functions

*         netdev_dpdk_veth_construct

*         netdev_dpdk_veth_destruct

*         netdev_dpdk_veth_send

*          netdev_dpdk_veth_get_stats

*          netdev_dpdk_veth_rxq_recv.


Commands to handle for creation/deletion of KNI interfaces


ovs-vsctl --no-wait add-port <br0> <vEth0> -- set Interface <vEth0>
type=dpdkkni   //adds the KNI interface
ovs-vsctl --no-wait del-port <br0> <vEth0>  //deletes the KNI interface
Appropriate ovs-ofctl rules to be added to forward/receive packet to/from KNI
Signed-off-by: Ravali Burra ravali.burra@wipro.com<mailto:ravali.burra@wipro.com>
void
netdev_rxq_wait(struct netdev_rxq *rx)
{
+  if (rx->netdev->netdev_class->rxq_wait != NULL)
     rx->netdev->netdev_class->rxq_wait(rx);
}

Thanks & Regards,
Ravali
The information contained in this electronic message and any attachments to this message are intended for the exclusive use of the addressee(s) and may contain proprietary, confidential or privileged information. If you are not the intended recipient, you should not disseminate, distribute or copy this e-mail. Please notify the sender immediately and destroy all copies of this message and any attachments. WARNING: Computer viruses can be transmitted via email. The recipient should check this email and any attachments for the presence of viruses. The company accepts no liability for any damage caused by any virus transmitted by this email. www.wipro.com

Comments

Aaron Conole March 1, 2016, 4:50 p.m. UTC | #1
Hi Ravali,

Anywhere in my response that has the words "coding style" is likely a
place I stopped reviewing for correctness. In general there are quite a
few spots where it was that way. Don't assume that things where no
comments appear are okay, I just did a quick review.

Your subject line is incorrect. It should indicate which
module is being touched something like:

[PATCH] netdev-dpdk: Add KNI support

There is no NEWS, INSTALL.DPDK.md, or any other documentation change to
go with this, which should be included.

-Aaron

<ravali.burra@wipro.com> writes:

> Hi Team,
>
>
>
> Please find the configuration and patch details of KNI support in OVS-DPDK
>
>
>
> This patch contains support for creating KNI interfaces.

KNI is an out-of-tree kernel interface, and my understanding is that DPDK
upstream is trying to find ways to remove it. That should be confirmed /
kept in mind for this.

> It also has the required modifications in netdev-dpdk.c,
>
> netdev.c files for creation/deletion of kni interfaces in ovs.
>
>
>
> This patch also adds NETDEV_DPDK_CLASS w.r.t dpdkkni and also has some
>
> changes in structure for netdev-dpdk for the support of kni interfaces.
>
>
>
> Implemented below helper functions
>
> *         netdev_dpdk_veth_construct
>
> *         netdev_dpdk_veth_destruct
>
> *         netdev_dpdk_veth_send
>
> *          netdev_dpdk_veth_get_stats
>
> *          netdev_dpdk_veth_rxq_recv.
>
>
> Commands to handle for creation/deletion of KNI interfaces
>
>
> ovs-vsctl --no-wait add-port <br0> <vEth0> -- set Interface <vEth0>
> type=dpdkkni   //adds the KNI interface
> ovs-vsctl --no-wait del-port <br0> <vEth0>  //deletes the KNI interface
> Appropriate ovs-ofctl rules to be added to forward/receive packet to/from KNI
> Signed-off-by: Ravali Burra ravali.burra@wipro.com<mailto:ravali.burra@wipro.com>
This Signed-off-by line looks mangled, perhaps by your mail client?

Your diff is not a git diff, please retry formatting that way.

The following file doesn't exist in openvswitch.
> --- /root/base/nopenvswitch-2.4.0.1/DPDK/lib/librte_kni/rte_kni.c    2015-04-03 10:13:45.000000000 -0400
> +++ /root/base/openvswitch-2.4.0.1/DPDK/lib/librte_kni/rte_kni.c     2016-03-01 09:02:21.449863228 -0500
> @@ -432,7 +432,7 @@
>     dev_info.sync_va = mz->addr;
>     dev_info.sync_phys = mz->phys_addr;
> -
> +        memset(mz_name,0, sizeof(mz_name));
>     /* MBUF mempool */
>     snprintf(mz_name, sizeof(mz_name), RTE_MEMPOOL_OBJ_NAME,
>           pktmbuf_pool->name);
> @@ -749,3 +749,8 @@
>     close(kni_fd);
>     kni_fd = -1;
> }
> +int
> +rte_kni_get_fd(void)
> +{
> +    return kni_fd;
> +}

The following file doesn't exist in openvswitch.
> --- /root/base/nopenvswitch-2.4.0.1/DPDK/lib/librte_kni/rte_kni.h    2015-04-03 10:13:45.000000000 -0400
> +++ /root/base/openvswitch-2.4.0.1/DPDK/lib/librte_kni/rte_kni.h     2016-03-01 09:04:26.128870730 -0500
> @@ -297,6 +297,15 @@
>   *   void
>   */
> extern void rte_kni_close(void);
> +/**
> + *  get fd for KNI device.
> + *
> + *  @param void
> + *
> + *  @return
> + *   int
> + */
> +extern int rte_kni_get_fd(void);
>  #ifdef __cplusplus
> }

The following file doesn't exist in openvswitch.
> --- /root/base/nopenvswitch-2.4.0.1/DPDK/lib/librte_eal/linuxapp/kni/kni_net.c     2015-04-03 10:13:45.000000000 -0400
> +++ /root/base/openvswitch-2.4.0.1/DPDK/lib/librte_eal/linuxapp/kni/kni_net.c     2016-03-01 09:43:46.935012781 -0500
> @@ -72,12 +72,13 @@
>      if (kni->lad_dev)
>           memcpy(dev->dev_addr, kni->lad_dev->dev_addr, ETH_ALEN);
> -    else
> +    else if(strlen(dev->dev_addr)==0){
>           /*
>            * Generate random mac address. eth_random_addr() is the newer
>            * version of generating mac address in linux kernel.
>            */
>           random_ether_addr(dev->dev_addr);
> +    }
>      netif_start_queue(dev);
> --- /root/base/nopenvswitch-2.4.0.1/openvswitch-2.4.0/lib/netdev-dpdk.c     2015-08-20 20:18:21.386479276 -0400
> +++ /root/base/openvswitch-2.4.0.1/openvswitch-2.4.0/lib/netdev-dpdk.c     2016-03-01 10:01:11.569075636 -0500
> @@ -53,6 +53,7 @@
> #include "rte_config.h"
> #include "rte_mbuf.h"
> #include "rte_virtio_net.h"
> +#include <rte_kni.h>
Please follow the include quoting style

>  VLOG_DEFINE_THIS_MODULE(dpdk);
> static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(5, 20);
> @@ -62,6 +63,13 @@
> #define OVS_CACHE_LINE_SIZE CACHE_LINE_SIZE
> #define OVS_VPORT_DPDK "ovs_dpdk"
> +#define MAX_PACKET_SZ               2048
> +#define NUM_KNI_INTERFACES          16
> +#define PORT_VETH_RX_BURST_SIZE     32
> +#define PORT_VETH_TX_BURST_SIZE     32
> +#define VETH_RETRY_NUM         4
> +#define VETH_RETRY_DELAY_US    10
> +
> /*
>   * need to reserve tons of extra space in the mbufs so we can align the
>   * DMA addresses to 4KB.
> @@ -130,6 +138,7 @@
> enum dpdk_dev_type {
>      DPDK_DEV_ETH = 0,
>      DPDK_DEV_VHOST = 1,
> +    DPDK_DEV_KNI = 2
> };
>  static int rte_eal_init_ret = ENODEV;
> @@ -223,7 +232,10 @@
>      /* virtio-net structure for vhost device */
>      OVSRCU_TYPE(struct virtio_net *) virtio_dev;
> -
> +
what happened here?

> +    /* kni structure for veth device */
> +    struct rte_kni *veth;
> +
>      /* Identifier used to distinguish vhost devices from each other */
>      char vhost_id[PATH_MAX];
> @@ -235,6 +247,7 @@
>      struct netdev_rxq up;
>      int port_id;
> };
> +int kni_dpdk_set_mac(struct netdev_dpdk *);
>  static bool thread_is_pmd(void);
> @@ -535,7 +548,49 @@
>      dev->flags = NETDEV_UP | NETDEV_PROMISC;
>      return 0;
> }
> +static int
> +dpdk_veth_dev_init(struct netdev_dpdk *dev) OVS_REQUIRES(dpdk_mutex)
> +{
> +    int err=0;
> +    struct rte_kni *veth = NULL;
> +    struct rte_kni_conf conf;
> +    static int port_id = 1;
> +
> +    if (rte_kni_get(dev->up.name)){
> +        VLOG_ERR("Kni interface %s already exists",dev->up.name);
> +        return EAGAIN;
> +    }
> +           /* invoke init before rte_kni_alloc.Currently hardcoded the num of kni interfaces as 2  */
coding style

> +     if (rte_kni_get_fd() == -1)
> +         rte_kni_init(NUM_KNI_INTERFACES);
> +
> +     kni_dpdk_set_mac(dev);
> +        /* Clear conf at first */
coding style

> +     memset(&conf, 0, sizeof(conf));
> +     memcpy(conf.name, dev->up.name, RTE_KNI_NAMESIZE);
> +     conf.group_id = (uint16_t)port_id; /* currently group id is hardcoded */
> +     conf.mbuf_size = MAX_PACKET_SZ;
> +
> +     veth = rte_kni_alloc(dev->dpdk_mp->mp, &conf, NULL);
> +     if (veth !=  NULL){
> +        port_id ++; /* increment the port id for the next kni interface */
> +        dev->veth = veth;
> +     }
> +     else
> +        err = ENOMEM;
> +
> +     return err;
> +}
> +int kni_dpdk_set_mac(struct netdev_dpdk *dev)
> +{
> +        struct ether_addr addr;
> +        if(!is_zero_ether_addr(&addr)){
> +                eth_random_addr(&addr);
> +                memcpy(dev->hwaddr, addr.addr_bytes,ETH_ADDR_LEN);
> +        }
> +        return 0;
> +}
> static struct netdev_dpdk *
> netdev_dpdk_cast(const struct netdev *netdev)
> {
> @@ -619,6 +674,12 @@
>              goto unlock;
>          }
>      }
> +    else if(type == DPDK_DEV_KNI) {
> +        err = dpdk_veth_dev_init(netdev);
> +        if (err) {
> +            goto unlock;
> +        }
> +    }
>      list_push_back(&dpdk_list, &netdev->list_node);
> @@ -693,7 +754,25 @@
>      ovs_mutex_unlock(&dpdk_mutex);
>      return err;
> }
> +static int
> +netdev_dpdk_veth_construct(struct netdev *netdev_)
> +{
> +    struct netdev_dpdk *netdev = netdev_dpdk_cast(netdev_);
> +    int err=0;
> +
> +    if (rte_eal_init_ret) {
> +        return rte_eal_init_ret;
> +    }
> +
> +    ovs_mutex_lock(&dpdk_mutex);
> +    err = netdev_dpdk_init(netdev_, -1, DPDK_DEV_KNI);
> +    ovs_mutex_unlock(&dpdk_mutex);
> +    rte_spinlock_init(&netdev->vhost_tx_lock); /* using same structure host_tx_lock for kni */
> +
> +    return err;
> +
> +}
> static int
> netdev_dpdk_construct(struct netdev *netdev)
> {
> @@ -748,7 +827,23 @@
>      dpdk_mp_put(dev->dpdk_mp);
>      ovs_mutex_unlock(&dpdk_mutex);
> }
> +static void
> +netdev_dpdk_veth_destruct(struct netdev *netdev_)
> +{
> +    struct netdev_dpdk *dev = netdev_dpdk_cast(netdev_);
> +    ovs_mutex_lock(&dev->mutex);
> +    if (rte_kni_release(dev->veth)) {
> +        VLOG_ERR("Failed to release KNI interface");
failed to release the lock here
> +        return;
> +    }
> +    ovs_mutex_unlock(&dev->mutex);
> +
> +    ovs_mutex_lock(&dpdk_mutex);
> +    list_remove(&dev->list_node);
> +    dpdk_mp_put(dev->dpdk_mp);
> +    ovs_mutex_unlock(&dpdk_mutex);
> +}
> static void
> netdev_dpdk_dealloc(struct netdev *netdev_)
> {
> @@ -972,7 +1067,36 @@
>      *c = (int) nb_rx;
>      return 0;
> }
> +static int
> +netdev_dpdk_veth_rxq_recv(struct netdev_rxq *rxq_,
> +                           struct dp_packet **packets, int *c)
> +{
> +    struct netdev_rxq_dpdk *rx = netdev_rxq_dpdk_cast(rxq_);
> +    struct netdev *netdev = rx->up.netdev;
> +    struct netdev_dpdk *veth_dev = netdev_dpdk_cast(netdev);
> +    uint16_t nb_rx = 0;
> +    struct rte_kni *veth = NULL;
> +    veth = veth_dev->veth;
> +
> +    if (veth != NULL)
> +    {
> +      /* receive and free */
> +        nb_rx =  rte_kni_rx_burst(veth, (struct rte_mbuf **)packets, PORT_VETH_RX_BURST_SIZE);
> + /* handle callbacks (i.e. ifconfig) before we return */
coding style (not just whitespace, but the link length)
> +        rte_kni_handle_request(veth);
> +        if (!nb_rx) {
> +           return EAGAIN;
> +     }
> +     rte_spinlock_lock(&veth_dev->stats_lock);
> +     veth_dev->stats.rx_packets += (uint64_t)nb_rx;
> +     rte_spinlock_unlock(&veth_dev->stats_lock);
> +
> +     *c = (int) nb_rx;
> +     return 0;
> +  }
> +  return EAGAIN;
> +}
> static int
> netdev_dpdk_rxq_recv(struct netdev_rxq *rxq_, struct dp_packet **packets,
>                       int *c)
> @@ -1191,6 +1315,62 @@
>      }
>      return 0;
> }
> +static int
> +netdev_dpdk_veth_send(struct netdev *netdev_, int qid OVS_UNUSED, struct dp_packet **pkts,
> +                 int cnt, bool may_steal)
> +{
> +        struct netdev_dpdk *netdev = netdev_dpdk_cast(netdev_);
> +        struct rte_kni *veth = NULL;
> +        struct rte_mbuf **cur_pkts =  (struct rte_mbuf **)pkts;
> +        unsigned int tx_pkts = 0, dropped_pkts = 0, i;
> +
> +        if (!thread_is_pmd()) {
> +            ovs_mutex_lock(&nonpmd_mempool_mutex);
> +        }
> +        veth = netdev->veth;
> +        if (veth == NULL)
> +        {
coding style
> +            rte_spinlock_lock(&netdev->stats_lock);
> +            netdev->stats.tx_dropped+= cnt;
> +            rte_spinlock_unlock(&netdev->stats_lock);
> +            goto out;
> +
> +        }
> +        rte_spinlock_lock(&netdev->vhost_tx_lock);
> +
> +       for (i=0;i<cnt;i++)
> +       {
Coding style
> +           if (cur_pkts[i]->data_off < RTE_PKTMBUF_HEADROOM)
> +               ++dropped_pkts;
> +
Coding style
> +           else if (rte_kni_tx_burst(veth,
> +                           (struct rte_mbuf **) &cur_pkts[i], 1))
> +                ++tx_pkts;
> +           else
> +                ++dropped_pkts;
> +
> +       }
> +        rte_spinlock_unlock(&netdev->vhost_tx_lock);
> +
> +         /* statistics for the no of packets sent */
> +        rte_spinlock_lock(&netdev->stats_lock);
> +        netdev->stats.tx_packets += tx_pkts;
> +        netdev->stats.tx_dropped += dropped_pkts;
> +        rte_spinlock_unlock(&netdev->stats_lock);
> +
> +        if (!thread_is_pmd()) {
> +            ovs_mutex_unlock(&nonpmd_mempool_mutex);
> +        }
> +
> +out:    if (may_steal) {
This out is in the wrong spot, and the coding style on this is not
appropriate.

> +        int i;
> +        for (i = 0; i < cnt; i++) {
> +            dp_packet_delete(pkts[i]);
> +        }
> +        }
> +        return 0;
> +
> +}
>  static inline void
> netdev_dpdk_send__(struct netdev_dpdk *dev, int qid,
> @@ -1394,7 +1574,45 @@
>      return 0;
> }
> +static int
> +netdev_dpdk_veth_get_stats(const struct netdev *netdev,
> +                            struct netdev_stats *stats)
> +{
> +    struct netdev_dpdk *dev = netdev_dpdk_cast(netdev);
> +
> +    ovs_mutex_lock(&dev->mutex);
> +    memset(stats, 0, sizeof(*stats));
> +    /* Unsupported Stats */
> +    stats->rx_errors = UINT64_MAX;
> +    stats->tx_errors = UINT64_MAX;
> +    stats->multicast = UINT64_MAX;
> +    stats->collisions = UINT64_MAX;
> +    stats->rx_crc_errors = UINT64_MAX;
> +    stats->rx_fifo_errors = UINT64_MAX;
> +    stats->rx_frame_errors = UINT64_MAX;
> +    stats->rx_length_errors = UINT64_MAX;
> +    stats->rx_missed_errors = UINT64_MAX;
> +    stats->rx_over_errors = UINT64_MAX;
> +    stats->tx_aborted_errors = UINT64_MAX;
> +    stats->tx_carrier_errors = UINT64_MAX;
> +    stats->tx_errors = UINT64_MAX;
> +    stats->tx_fifo_errors = UINT64_MAX;
> +    stats->tx_heartbeat_errors = UINT64_MAX;
> +    stats->tx_window_errors = UINT64_MAX;
> +    stats->rx_bytes += UINT64_MAX;
> +    stats->rx_dropped += UINT64_MAX;
> +    stats->tx_bytes += UINT64_MAX;
> +
> +    rte_spinlock_lock(&dev->stats_lock);
> +    /* Supported Stats */
> +    stats->rx_packets += dev->stats.rx_packets;
> +    stats->tx_packets += dev->stats.tx_packets;
> +    stats->tx_dropped += dev->stats.tx_dropped;
> +    rte_spinlock_unlock(&dev->stats_lock);
> +    ovs_mutex_unlock(&dev->mutex);
> +    return 0;
> +}
> static int
> netdev_dpdk_get_stats(const struct netdev *netdev, struct netdev_stats *stats)
> {
> @@ -2177,6 +2395,20 @@
>          NULL,
>          netdev_dpdk_vhost_rxq_recv);
> +static const struct netdev_class dpdk_kni_class =
> +    NETDEV_DPDK_CLASS(
> +        "dpdkkni",
> +        NULL,
> +        netdev_dpdk_veth_construct,
> +        netdev_dpdk_veth_destruct,
> +        NULL,
> +        netdev_dpdk_veth_send,
> +        NULL,
> +        netdev_dpdk_veth_get_stats,
> +        NULL,
> +        NULL,
> +        netdev_dpdk_veth_rxq_recv);
> +
> void
> netdev_dpdk_register(void)
> {
> @@ -2195,6 +2427,7 @@
> #else
>          netdev_register_provider(&dpdk_vhost_user_class);
> #endif
> +    netdev_register_provider(&dpdk_kni_class);
It would be good to (like with VHOST_CUSE and VHOST_USER) keep this as
compile-time enabled/disabled.

>          ovsthread_once_done(&once);
>      }
> }
> --- /root/base/nopenvswitch-2.4.0.1/openvswitch-2.4.0/lib/netdev.c   2015-08-20 20:18:21.430479276 -0400
> +++ /root/base/openvswitch-2.4.0.1/openvswitch-2.4.0/lib/netdev.c    2016-03-01 09:41:53.295005943 -0500
> @@ -112,7 +112,8 @@
>      return (!strcmp(netdev->netdev_class->type, "dpdk") ||
>              !strcmp(netdev->netdev_class->type, "dpdkr") ||
>              !strcmp(netdev->netdev_class->type, "dpdkvhostcuse") ||
> -            !strcmp(netdev->netdev_class->type, "dpdkvhostuser"));
> +            !strcmp(netdev->netdev_class->type, "dpdkvhostuser")||
> +            !strcmp(netdev->netdev_class->type, "dpdkkni"));
Maybe it's a good time to refactor this somehow? Have an optional function like
netdev->netdev_class->is_pmd() and do

if (netdev->netdev_class->is_pmd())
  return netdev->netdev_class->is_pmd();
return false;

> }
>  static void
> @@ -661,6 +662,7 @@
> void
> netdev_rxq_wait(struct netdev_rxq *rx)
> {
> +  if (rx->netdev->netdev_class->rxq_wait != NULL)
>      rx->netdev->netdev_class->rxq_wait(rx);
> }
>
> Thanks & Regards,
> Ravali
> The information contained in this electronic message and any
> attachments to this message are intended for the exclusive use of the
> addressee(s) and may contain proprietary, confidential or privileged
> information. If you are not the intended recipient, you should not
> disseminate, distribute or copy this e-mail. Please notify the sender
> immediately and destroy all copies of this message and any
> attachments. WARNING: Computer viruses can be transmitted via
> email. The recipient should check this email and any attachments for
> the presence of viruses. The company accepts no liability for any
> damage caused by any virus transmitted by this email. www.wipro.com
This is inappropriate for a public mailing list.

> _______________________________________________
> dev mailing list
> dev@openvswitch.org
> http://openvswitch.org/mailman/listinfo/dev
diff mbox

Patch

--- /root/base/nopenvswitch-2.4.0.1/DPDK/lib/librte_kni/rte_kni.c    2015-04-03 10:13:45.000000000 -0400
+++ /root/base/openvswitch-2.4.0.1/DPDK/lib/librte_kni/rte_kni.c     2016-03-01 09:02:21.449863228 -0500
@@ -432,7 +432,7 @@ 
    dev_info.sync_va = mz->addr;
    dev_info.sync_phys = mz->phys_addr;
-
+        memset(mz_name,0, sizeof(mz_name));
    /* MBUF mempool */
    snprintf(mz_name, sizeof(mz_name), RTE_MEMPOOL_OBJ_NAME,
          pktmbuf_pool->name);
@@ -749,3 +749,8 @@ 
    close(kni_fd);
    kni_fd = -1;
}
+int
+rte_kni_get_fd(void)
+{
+    return kni_fd;
+}
--- /root/base/nopenvswitch-2.4.0.1/DPDK/lib/librte_kni/rte_kni.h    2015-04-03 10:13:45.000000000 -0400
+++ /root/base/openvswitch-2.4.0.1/DPDK/lib/librte_kni/rte_kni.h     2016-03-01 09:04:26.128870730 -0500
@@ -297,6 +297,15 @@ 
  *   void
  */
extern void rte_kni_close(void);
+/**
+ *  get fd for KNI device.
+ *
+ *  @param void
+ *
+ *  @return
+ *   int
+ */
+extern int rte_kni_get_fd(void);
 #ifdef __cplusplus
}
--- /root/base/nopenvswitch-2.4.0.1/DPDK/lib/librte_eal/linuxapp/kni/kni_net.c     2015-04-03 10:13:45.000000000 -0400
+++ /root/base/openvswitch-2.4.0.1/DPDK/lib/librte_eal/linuxapp/kni/kni_net.c     2016-03-01 09:43:46.935012781 -0500
@@ -72,12 +72,13 @@ 
     if (kni->lad_dev)
          memcpy(dev->dev_addr, kni->lad_dev->dev_addr, ETH_ALEN);
-    else
+    else if(strlen(dev->dev_addr)==0){
          /*
           * Generate random mac address. eth_random_addr() is the newer
           * version of generating mac address in linux kernel.
           */
          random_ether_addr(dev->dev_addr);
+    }
     netif_start_queue(dev);
--- /root/base/nopenvswitch-2.4.0.1/openvswitch-2.4.0/lib/netdev-dpdk.c     2015-08-20 20:18:21.386479276 -0400
+++ /root/base/openvswitch-2.4.0.1/openvswitch-2.4.0/lib/netdev-dpdk.c     2016-03-01 10:01:11.569075636 -0500
@@ -53,6 +53,7 @@ 
#include "rte_config.h"
#include "rte_mbuf.h"
#include "rte_virtio_net.h"
+#include <rte_kni.h>
 VLOG_DEFINE_THIS_MODULE(dpdk);
static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(5, 20);
@@ -62,6 +63,13 @@ 
#define OVS_CACHE_LINE_SIZE CACHE_LINE_SIZE
#define OVS_VPORT_DPDK "ovs_dpdk"
+#define MAX_PACKET_SZ               2048
+#define NUM_KNI_INTERFACES          16
+#define PORT_VETH_RX_BURST_SIZE     32
+#define PORT_VETH_TX_BURST_SIZE     32
+#define VETH_RETRY_NUM         4
+#define VETH_RETRY_DELAY_US    10
+
/*
  * need to reserve tons of extra space in the mbufs so we can align the
  * DMA addresses to 4KB.
@@ -130,6 +138,7 @@ 
enum dpdk_dev_type {
     DPDK_DEV_ETH = 0,
     DPDK_DEV_VHOST = 1,
+    DPDK_DEV_KNI = 2
};
 static int rte_eal_init_ret = ENODEV;
@@ -223,7 +232,10 @@ 
     /* virtio-net structure for vhost device */
     OVSRCU_TYPE(struct virtio_net *) virtio_dev;
-
+
+    /* kni structure for veth device */
+    struct rte_kni *veth;
+
     /* Identifier used to distinguish vhost devices from each other */
     char vhost_id[PATH_MAX];
@@ -235,6 +247,7 @@ 
     struct netdev_rxq up;
     int port_id;
};
+int kni_dpdk_set_mac(struct netdev_dpdk *);
 static bool thread_is_pmd(void);
@@ -535,7 +548,49 @@ 
     dev->flags = NETDEV_UP | NETDEV_PROMISC;
     return 0;
}
+static int
+dpdk_veth_dev_init(struct netdev_dpdk *dev) OVS_REQUIRES(dpdk_mutex)
+{
+    int err=0;
+    struct rte_kni *veth = NULL;
+    struct rte_kni_conf conf;
+    static int port_id = 1;
+
+    if (rte_kni_get(dev->up.name)){
+        VLOG_ERR("Kni interface %s already exists",dev->up.name);
+        return EAGAIN;
+    }
+           /* invoke init before rte_kni_alloc.Currently hardcoded the num of kni interfaces as 2  */
+     if (rte_kni_get_fd() == -1)
+         rte_kni_init(NUM_KNI_INTERFACES);
+
+     kni_dpdk_set_mac(dev);
+        /* Clear conf at first */
+     memset(&conf, 0, sizeof(conf));
+     memcpy(conf.name, dev->up.name, RTE_KNI_NAMESIZE);
+     conf.group_id = (uint16_t)port_id; /* currently group id is hardcoded */
+     conf.mbuf_size = MAX_PACKET_SZ;
+
+     veth = rte_kni_alloc(dev->dpdk_mp->mp, &conf, NULL);
+     if (veth !=  NULL){
+        port_id ++; /* increment the port id for the next kni interface */
+        dev->veth = veth;
+     }
+     else
+        err = ENOMEM;
+
+     return err;
+}
+int kni_dpdk_set_mac(struct netdev_dpdk *dev)
+{
+        struct ether_addr addr;
+        if(!is_zero_ether_addr(&addr)){
+                eth_random_addr(&addr);
+                memcpy(dev->hwaddr, addr.addr_bytes,ETH_ADDR_LEN);
+        }
+        return 0;
+}
static struct netdev_dpdk *
netdev_dpdk_cast(const struct netdev *netdev)
{
@@ -619,6 +674,12 @@ 
             goto unlock;
         }
     }
+    else if(type == DPDK_DEV_KNI) {
+        err = dpdk_veth_dev_init(netdev);
+        if (err) {
+            goto unlock;
+        }
+    }
     list_push_back(&dpdk_list, &netdev->list_node);
@@ -693,7 +754,25 @@ 
     ovs_mutex_unlock(&dpdk_mutex);
     return err;
}
+static int
+netdev_dpdk_veth_construct(struct netdev *netdev_)
+{
+    struct netdev_dpdk *netdev = netdev_dpdk_cast(netdev_);
+    int err=0;
+
+    if (rte_eal_init_ret) {
+        return rte_eal_init_ret;
+    }
+
+    ovs_mutex_lock(&dpdk_mutex);
+    err = netdev_dpdk_init(netdev_, -1, DPDK_DEV_KNI);
+    ovs_mutex_unlock(&dpdk_mutex);
+    rte_spinlock_init(&netdev->vhost_tx_lock); /* using same structure host_tx_lock for kni */
+
+    return err;
+
+}
static int
netdev_dpdk_construct(struct netdev *netdev)
{
@@ -748,7 +827,23 @@ 
     dpdk_mp_put(dev->dpdk_mp);
     ovs_mutex_unlock(&dpdk_mutex);
}
+static void
+netdev_dpdk_veth_destruct(struct netdev *netdev_)
+{
+    struct netdev_dpdk *dev = netdev_dpdk_cast(netdev_);
+    ovs_mutex_lock(&dev->mutex);
+    if (rte_kni_release(dev->veth)) {
+        VLOG_ERR("Failed to release KNI interface");
+        return;
+    }
+    ovs_mutex_unlock(&dev->mutex);
+
+    ovs_mutex_lock(&dpdk_mutex);
+    list_remove(&dev->list_node);
+    dpdk_mp_put(dev->dpdk_mp);
+    ovs_mutex_unlock(&dpdk_mutex);
+}
static void
netdev_dpdk_dealloc(struct netdev *netdev_)
{
@@ -972,7 +1067,36 @@ 
     *c = (int) nb_rx;
     return 0;
}
+static int
+netdev_dpdk_veth_rxq_recv(struct netdev_rxq *rxq_,
+                           struct dp_packet **packets, int *c)
+{
+    struct netdev_rxq_dpdk *rx = netdev_rxq_dpdk_cast(rxq_);
+    struct netdev *netdev = rx->up.netdev;
+    struct netdev_dpdk *veth_dev = netdev_dpdk_cast(netdev);
+    uint16_t nb_rx = 0;
+    struct rte_kni *veth = NULL;
+    veth = veth_dev->veth;
+
+    if (veth != NULL)
+    {
+      /* receive and free */
+        nb_rx =  rte_kni_rx_burst(veth, (struct rte_mbuf **)packets, PORT_VETH_RX_BURST_SIZE);
+ /* handle callbacks (i.e. ifconfig) before we return */
+        rte_kni_handle_request(veth);
+        if (!nb_rx) {
+           return EAGAIN;
+     }
+     rte_spinlock_lock(&veth_dev->stats_lock);
+     veth_dev->stats.rx_packets += (uint64_t)nb_rx;
+     rte_spinlock_unlock(&veth_dev->stats_lock);
+
+     *c = (int) nb_rx;
+     return 0;
+  }
+  return EAGAIN;
+}
static int
netdev_dpdk_rxq_recv(struct netdev_rxq *rxq_, struct dp_packet **packets,
                      int *c)
@@ -1191,6 +1315,62 @@ 
     }
     return 0;
}
+static int
+netdev_dpdk_veth_send(struct netdev *netdev_, int qid OVS_UNUSED, struct dp_packet **pkts,
+                 int cnt, bool may_steal)
+{
+        struct netdev_dpdk *netdev = netdev_dpdk_cast(netdev_);
+        struct rte_kni *veth = NULL;
+        struct rte_mbuf **cur_pkts =  (struct rte_mbuf **)pkts;
+        unsigned int tx_pkts = 0, dropped_pkts = 0, i;
+
+        if (!thread_is_pmd()) {
+            ovs_mutex_lock(&nonpmd_mempool_mutex);
+        }
+        veth = netdev->veth;
+        if (veth == NULL)
+        {
+            rte_spinlock_lock(&netdev->stats_lock);
+            netdev->stats.tx_dropped+= cnt;
+            rte_spinlock_unlock(&netdev->stats_lock);
+            goto out;
+
+        }
+        rte_spinlock_lock(&netdev->vhost_tx_lock);
+
+       for (i=0;i<cnt;i++)
+       {
+           if (cur_pkts[i]->data_off < RTE_PKTMBUF_HEADROOM)
+               ++dropped_pkts;
+
+           else if (rte_kni_tx_burst(veth,
+                           (struct rte_mbuf **) &cur_pkts[i], 1))
+                ++tx_pkts;
+           else
+                ++dropped_pkts;
+
+       }
+        rte_spinlock_unlock(&netdev->vhost_tx_lock);
+
+         /* statistics for the no of packets sent */
+        rte_spinlock_lock(&netdev->stats_lock);
+        netdev->stats.tx_packets += tx_pkts;
+        netdev->stats.tx_dropped += dropped_pkts;
+        rte_spinlock_unlock(&netdev->stats_lock);
+
+        if (!thread_is_pmd()) {
+            ovs_mutex_unlock(&nonpmd_mempool_mutex);
+        }
+
+out:    if (may_steal) {
+        int i;
+        for (i = 0; i < cnt; i++) {
+            dp_packet_delete(pkts[i]);
+        }
+        }
+        return 0;
+
+}
 static inline void
netdev_dpdk_send__(struct netdev_dpdk *dev, int qid,
@@ -1394,7 +1574,45 @@ 
     return 0;
}
+static int
+netdev_dpdk_veth_get_stats(const struct netdev *netdev,
+                            struct netdev_stats *stats)
+{
+    struct netdev_dpdk *dev = netdev_dpdk_cast(netdev);
+
+    ovs_mutex_lock(&dev->mutex);
+    memset(stats, 0, sizeof(*stats));
+    /* Unsupported Stats */
+    stats->rx_errors = UINT64_MAX;
+    stats->tx_errors = UINT64_MAX;
+    stats->multicast = UINT64_MAX;
+    stats->collisions = UINT64_MAX;
+    stats->rx_crc_errors = UINT64_MAX;
+    stats->rx_fifo_errors = UINT64_MAX;
+    stats->rx_frame_errors = UINT64_MAX;
+    stats->rx_length_errors = UINT64_MAX;
+    stats->rx_missed_errors = UINT64_MAX;
+    stats->rx_over_errors = UINT64_MAX;
+    stats->tx_aborted_errors = UINT64_MAX;
+    stats->tx_carrier_errors = UINT64_MAX;
+    stats->tx_errors = UINT64_MAX;
+    stats->tx_fifo_errors = UINT64_MAX;
+    stats->tx_heartbeat_errors = UINT64_MAX;
+    stats->tx_window_errors = UINT64_MAX;
+    stats->rx_bytes += UINT64_MAX;
+    stats->rx_dropped += UINT64_MAX;
+    stats->tx_bytes += UINT64_MAX;
+
+    rte_spinlock_lock(&dev->stats_lock);
+    /* Supported Stats */
+    stats->rx_packets += dev->stats.rx_packets;
+    stats->tx_packets += dev->stats.tx_packets;
+    stats->tx_dropped += dev->stats.tx_dropped;
+    rte_spinlock_unlock(&dev->stats_lock);
+    ovs_mutex_unlock(&dev->mutex);
+    return 0;
+}
static int
netdev_dpdk_get_stats(const struct netdev *netdev, struct netdev_stats *stats)
{
@@ -2177,6 +2395,20 @@ 
         NULL,
         netdev_dpdk_vhost_rxq_recv);
+static const struct netdev_class dpdk_kni_class =
+    NETDEV_DPDK_CLASS(
+        "dpdkkni",
+        NULL,
+        netdev_dpdk_veth_construct,
+        netdev_dpdk_veth_destruct,
+        NULL,
+        netdev_dpdk_veth_send,
+        NULL,
+        netdev_dpdk_veth_get_stats,
+        NULL,
+        NULL,
+        netdev_dpdk_veth_rxq_recv);
+
void
netdev_dpdk_register(void)
{
@@ -2195,6 +2427,7 @@ 
#else
         netdev_register_provider(&dpdk_vhost_user_class);
#endif
+    netdev_register_provider(&dpdk_kni_class);
         ovsthread_once_done(&once);
     }
}
--- /root/base/nopenvswitch-2.4.0.1/openvswitch-2.4.0/lib/netdev.c   2015-08-20 20:18:21.430479276 -0400
+++ /root/base/openvswitch-2.4.0.1/openvswitch-2.4.0/lib/netdev.c    2016-03-01 09:41:53.295005943 -0500
@@ -112,7 +112,8 @@ 
     return (!strcmp(netdev->netdev_class->type, "dpdk") ||
             !strcmp(netdev->netdev_class->type, "dpdkr") ||
             !strcmp(netdev->netdev_class->type, "dpdkvhostcuse") ||
-            !strcmp(netdev->netdev_class->type, "dpdkvhostuser"));
+            !strcmp(netdev->netdev_class->type, "dpdkvhostuser")||
+            !strcmp(netdev->netdev_class->type, "dpdkkni"));
}
 static void
@@ -661,6 +662,7 @@