diff mbox

[ovs-dev,03/17] dpif-netdev: Don't try to output on a device without txqs.

Message ID 20161116004612.79315-4-diproiettod@vmware.com
State Superseded
Headers show

Commit Message

Daniele Di Proietto Nov. 16, 2016, 12:45 a.m. UTC
Tunnel devices have 0 txqs and don't support netdev_send().  While
netdev_send() simply returns EOPNOTSUPP, the XPS logic is still executed
on output, and that might be confused by devices with no txqs.

It seems better to have different structures in the fast path for ports
that support netdev_{push,pop}_header (tunnel devices), and ports that
support netdev_send.  With this we can also remove a branch in
netdev_send().

This is also necessary for a future commit, which starts DPDK devices
without txqs.

Signed-off-by: Daniele Di Proietto <diproiettod@vmware.com>
---
 lib/dpif-netdev.c | 72 ++++++++++++++++++++++++++++++++++++++++---------------
 lib/netdev.c      | 15 ++++++++----
 lib/netdev.h      |  1 +
 3 files changed, 64 insertions(+), 24 deletions(-)

Comments

Ilya Maximets Nov. 21, 2016, 1:07 p.m. UTC | #1
On 16.11.2016 03:45, Daniele Di Proietto wrote:
> Tunnel devices have 0 txqs and don't support netdev_send().  While
> netdev_send() simply returns EOPNOTSUPP, the XPS logic is still executed
> on output, and that might be confused by devices with no txqs.
> 
> It seems better to have different structures in the fast path for ports
> that support netdev_{push,pop}_header (tunnel devices), and ports that
> support netdev_send.  With this we can also remove a branch in
> netdev_send().
> 
> This is also necessary for a future commit, which starts DPDK devices
> without txqs.
> 
> Signed-off-by: Daniele Di Proietto <diproiettod@vmware.com>
> ---
>  lib/dpif-netdev.c | 72 ++++++++++++++++++++++++++++++++++++++++---------------
>  lib/netdev.c      | 15 ++++++++----
>  lib/netdev.h      |  1 +
>  3 files changed, 64 insertions(+), 24 deletions(-)
> 
> diff --git a/lib/dpif-netdev.c b/lib/dpif-netdev.c
> index 7b67b42..81366b2 100644
> --- a/lib/dpif-netdev.c
> +++ b/lib/dpif-netdev.c
> @@ -422,7 +422,8 @@ struct rxq_poll {
>      struct ovs_list node;
>  };
>  
> -/* Contained by struct dp_netdev_pmd_thread's 'port_cache' or 'tx_ports'. */
> +/* Contained by struct dp_netdev_pmd_thread's 'send_port_cache',
> + * 'tnl_port_cache' or 'tx_ports'. */
>  struct tx_port {
>      struct dp_netdev_port *port;
>      int qid;
> @@ -504,11 +505,19 @@ struct dp_netdev_pmd_thread {
>       * read by the pmd thread. */
>      struct hmap tx_ports OVS_GUARDED;
>  
> -    /* Map of 'tx_port' used in the fast path. This is a thread-local copy of
> -     * 'tx_ports'. The instance for cpu core NON_PMD_CORE_ID can be accessed
> -     * by multiple threads, and thusly need to be protected by 'non_pmd_mutex'.
> -     * Every other instance will only be accessed by its own pmd thread. */
> -    struct hmap port_cache;
> +    /* These are thread-local copies of 'tx_ports'.  One contains only tunnel
> +     * ports (that support push_tunnel/pop_tunnel)  The other contains
> +     * non-tunnel ports (that support send).
> +     *
> +     * These are kept separate to make sure that we don't try to execute
> +     * OUTPUT on a tunnel device (which has 0 txqs) or PUSH/POP on a non-tunnel
> +     * device.
> +     *
> +     * The instance for cpu core NON_PMD_CORE_ID can be accessed by multiple
> +     * threads, and thusly needs to be protected by 'non_pmd_mutex'.  Every
> +     * other instance will only be accessed by its own pmd thread. */
> +    struct hmap tnl_port_cache;
> +    struct hmap send_port_cache;
>  
>      /* Only a pmd thread can write on its own 'cycles' and 'stats'.
>       * The main thread keeps 'stats_zero' and 'cycles_zero' as base
> @@ -3055,7 +3064,10 @@ pmd_free_cached_ports(struct dp_netdev_pmd_thread *pmd)
>      /* Free all used tx queue ids. */
>      dpif_netdev_xps_revalidate_pmd(pmd, 0, true);
>  
> -    HMAP_FOR_EACH_POP (tx_port_cached, node, &pmd->port_cache) {
> +    HMAP_FOR_EACH_POP (tx_port_cached, node, &pmd->tnl_port_cache) {
> +        free(tx_port_cached);
> +    }
> +    HMAP_FOR_EACH_POP (tx_port_cached, node, &pmd->send_port_cache) {
>          free(tx_port_cached);
>      }
>  }
> @@ -3069,11 +3081,22 @@ pmd_load_cached_ports(struct dp_netdev_pmd_thread *pmd)
>      struct tx_port *tx_port, *tx_port_cached;
>  
>      pmd_free_cached_ports(pmd);
> -    hmap_shrink(&pmd->port_cache);
> +    hmap_shrink(&pmd->send_port_cache);
> +    hmap_shrink(&pmd->tnl_port_cache);
>  
>      HMAP_FOR_EACH (tx_port, node, &pmd->tx_ports) {
> +        struct hmap *cache;
> +
> +        if (netdev_has_tunnel_push_pop(tx_port->port->netdev)) {
> +            cache = &pmd->tnl_port_cache;
> +        } else if (netdev_n_txq(tx_port->port->netdev)) {
> +            cache = &pmd->send_port_cache;
> +        } else {
> +            continue;
> +        }

IMHO, this code introduces artificial limitations for netdev.
What about something like this:

        if (has_pop _OR_ has_push) {
            insert to 'tnl_port_cache';
        }

	if (netdev_n_txq(tx_port->port->netdev)) {
            insert to 'send_port_cache';
        }
?
i.e make all the checks independent.

Otherwise, it must be described in 'netdev-provider.h' that netdev
can have only tunnel related functions (both 'push' and 'pop') or
send function.

> +
>          tx_port_cached = xmemdup(tx_port, sizeof *tx_port_cached);
> -        hmap_insert(&pmd->port_cache, &tx_port_cached->node,
> +        hmap_insert(cache, &tx_port_cached->node,
>                      hash_port_no(tx_port_cached->port->port_no));
>      }
>  }
> @@ -3309,7 +3332,8 @@ dp_netdev_configure_pmd(struct dp_netdev_pmd_thread *pmd, struct dp_netdev *dp,
>      pmd->next_optimization = time_msec() + DPCLS_OPTIMIZATION_INTERVAL;
>      ovs_list_init(&pmd->poll_list);
>      hmap_init(&pmd->tx_ports);
> -    hmap_init(&pmd->port_cache);
> +    hmap_init(&pmd->tnl_port_cache);
> +    hmap_init(&pmd->send_port_cache);
>      /* init the 'flow_cache' since there is no
>       * actual thread created for NON_PMD_CORE_ID. */
>      if (core_id == NON_PMD_CORE_ID) {
> @@ -3325,7 +3349,8 @@ dp_netdev_destroy_pmd(struct dp_netdev_pmd_thread *pmd)
>      struct dpcls *cls;
>  
>      dp_netdev_pmd_flow_flush(pmd);
> -    hmap_destroy(&pmd->port_cache);
> +    hmap_destroy(&pmd->send_port_cache);
> +    hmap_destroy(&pmd->tnl_port_cache);
>      hmap_destroy(&pmd->tx_ports);
>      /* All flows (including their dpcls_rules) have been deleted already */
>      CMAP_FOR_EACH (cls, node, &pmd->classifiers) {
> @@ -3592,7 +3617,9 @@ static void
>  dp_netdev_add_port_tx_to_pmd(struct dp_netdev_pmd_thread *pmd,
>                               struct dp_netdev_port *port)
>  {
> -    struct tx_port *tx = xzalloc(sizeof *tx);
> +    struct tx_port *tx;
> +
> +    tx = xzalloc(sizeof *tx);
>  
>      tx->port = port;
>      tx->qid = -1;
> @@ -4280,7 +4307,7 @@ dpif_netdev_xps_revalidate_pmd(const struct dp_netdev_pmd_thread *pmd,
>      struct dp_netdev_port *port;
>      long long interval;
>  
> -    HMAP_FOR_EACH (tx, node, &pmd->port_cache) {
> +    HMAP_FOR_EACH (tx, node, &pmd->send_port_cache) {
>          if (!tx->port->dynamic_txqs) {
>              continue;
>          }
> @@ -4344,10 +4371,17 @@ dpif_netdev_xps_get_tx_qid(const struct dp_netdev_pmd_thread *pmd,
>  }
>  
>  static struct tx_port *
> -pmd_tx_port_cache_lookup(const struct dp_netdev_pmd_thread *pmd,
> -                         odp_port_t port_no)
> +pmd_tnl_port_cache_lookup(const struct dp_netdev_pmd_thread *pmd,
> +                          odp_port_t port_no)
> +{
> +    return tx_port_lookup(&pmd->tnl_port_cache, port_no);
> +}
> +
> +static struct tx_port *
> +pmd_send_port_cache_lookup(const struct dp_netdev_pmd_thread *pmd,
> +                           odp_port_t port_no)
>  {
> -    return tx_port_lookup(&pmd->port_cache, port_no);
> +    return tx_port_lookup(&pmd->send_port_cache, port_no);
>  }
>  
>  static int
> @@ -4361,7 +4395,7 @@ push_tnl_action(const struct dp_netdev_pmd_thread *pmd,
>  
>      data = nl_attr_get(attr);
>  
> -    tun_port = pmd_tx_port_cache_lookup(pmd, u32_to_odp(data->tnl_port));
> +    tun_port = pmd_tnl_port_cache_lookup(pmd, u32_to_odp(data->tnl_port));
>      if (!tun_port) {
>          err = -EINVAL;
>          goto error;
> @@ -4413,7 +4447,7 @@ dp_execute_cb(void *aux_, struct dp_packet_batch *packets_,
>  
>      switch ((enum ovs_action_attr)type) {
>      case OVS_ACTION_ATTR_OUTPUT:
> -        p = pmd_tx_port_cache_lookup(pmd, nl_attr_get_odp_port(a));
> +        p = pmd_send_port_cache_lookup(pmd, nl_attr_get_odp_port(a));
>          if (OVS_LIKELY(p)) {
>              int tx_qid;
>              bool dynamic_txqs;
> @@ -4460,7 +4494,7 @@ dp_execute_cb(void *aux_, struct dp_packet_batch *packets_,
>              struct dp_packet_batch *orig_packets_ = packets_;
>              odp_port_t portno = nl_attr_get_odp_port(a);
>  
> -            p = pmd_tx_port_cache_lookup(pmd, portno);
> +            p = pmd_tnl_port_cache_lookup(pmd, portno);
>              if (p) {
>                  struct dp_packet_batch tnl_pkt;
>                  int i;
> diff --git a/lib/netdev.c b/lib/netdev.c
> index ad90ef6..9ab4e4c 100644
> --- a/lib/netdev.c
> +++ b/lib/netdev.c
> @@ -117,6 +117,13 @@ netdev_is_pmd(const struct netdev *netdev)
>      return netdev->netdev_class->is_pmd;
>  }
>  
> +bool
> +netdev_has_tunnel_push_pop(const struct netdev *netdev)
> +{
> +    return netdev->netdev_class->push_header
> +           && netdev->netdev_class->pop_header;
> +}
> +
>  static void
>  netdev_initialize(void)
>      OVS_EXCLUDED(netdev_mutex)
> @@ -686,6 +693,9 @@ netdev_set_tx_multiq(struct netdev *netdev, unsigned int n_txq)
>   * if a partial packet was transmitted or if a packet is too big or too small
>   * to transmit on the device.
>   *
> + * The caller must make sure that 'netdev' supports sending by making sure that
> + * 'netdev_n_txq(netdev)' returns >= 1.
> + *
>   * If the function returns a non-zero value, some of the packets might have
>   * been sent anyway.
>   *
> @@ -710,11 +720,6 @@ int
>  netdev_send(struct netdev *netdev, int qid, struct dp_packet_batch *batch,
>              bool may_steal, bool concurrent_txq)
>  {
> -    if (!netdev->netdev_class->send) {
> -        dp_packet_delete_batch(batch, may_steal);
> -        return EOPNOTSUPP;
> -    }
> -
>      int error = netdev->netdev_class->send(netdev, qid, batch, may_steal,
>                                             concurrent_txq);
>      if (!error) {
> diff --git a/lib/netdev.h b/lib/netdev.h
> index bad28c4..03059ca 100644
> --- a/lib/netdev.h
> +++ b/lib/netdev.h
> @@ -109,6 +109,7 @@ bool netdev_is_reserved_name(const char *name);
>  int netdev_n_txq(const struct netdev *netdev);
>  int netdev_n_rxq(const struct netdev *netdev);
>  bool netdev_is_pmd(const struct netdev *netdev);
> +bool netdev_has_tunnel_push_pop(const struct netdev *netdev);
>  
>  /* Open and close. */
>  int netdev_open(const char *name, const char *type, struct netdev **netdevp);
>
Daniele Di Proietto Nov. 21, 2016, 10 p.m. UTC | #2
On 21/11/2016 05:07, "Ilya Maximets" <i.maximets@samsung.com> wrote:

>On 16.11.2016 03:45, Daniele Di Proietto wrote:
>> Tunnel devices have 0 txqs and don't support netdev_send().  While
>> netdev_send() simply returns EOPNOTSUPP, the XPS logic is still executed
>> on output, and that might be confused by devices with no txqs.
>> 
>> It seems better to have different structures in the fast path for ports
>> that support netdev_{push,pop}_header (tunnel devices), and ports that
>> support netdev_send.  With this we can also remove a branch in
>> netdev_send().
>> 
>> This is also necessary for a future commit, which starts DPDK devices
>> without txqs.
>> 
>> Signed-off-by: Daniele Di Proietto <diproiettod@vmware.com>
>> ---
>>  lib/dpif-netdev.c | 72 ++++++++++++++++++++++++++++++++++++++++---------------
>>  lib/netdev.c      | 15 ++++++++----
>>  lib/netdev.h      |  1 +
>>  3 files changed, 64 insertions(+), 24 deletions(-)
>> 
>> diff --git a/lib/dpif-netdev.c b/lib/dpif-netdev.c
>> index 7b67b42..81366b2 100644
>> --- a/lib/dpif-netdev.c
>> +++ b/lib/dpif-netdev.c
>> @@ -422,7 +422,8 @@ struct rxq_poll {
>>      struct ovs_list node;
>>  };
>>  
>> -/* Contained by struct dp_netdev_pmd_thread's 'port_cache' or 'tx_ports'. */
>> +/* Contained by struct dp_netdev_pmd_thread's 'send_port_cache',
>> + * 'tnl_port_cache' or 'tx_ports'. */
>>  struct tx_port {
>>      struct dp_netdev_port *port;
>>      int qid;
>> @@ -504,11 +505,19 @@ struct dp_netdev_pmd_thread {
>>       * read by the pmd thread. */
>>      struct hmap tx_ports OVS_GUARDED;
>>  
>> -    /* Map of 'tx_port' used in the fast path. This is a thread-local copy of
>> -     * 'tx_ports'. The instance for cpu core NON_PMD_CORE_ID can be accessed
>> -     * by multiple threads, and thusly need to be protected by 'non_pmd_mutex'.
>> -     * Every other instance will only be accessed by its own pmd thread. */
>> -    struct hmap port_cache;
>> +    /* These are thread-local copies of 'tx_ports'.  One contains only tunnel
>> +     * ports (that support push_tunnel/pop_tunnel)  The other contains
>> +     * non-tunnel ports (that support send).
>> +     *
>> +     * These are kept separate to make sure that we don't try to execute
>> +     * OUTPUT on a tunnel device (which has 0 txqs) or PUSH/POP on a non-tunnel
>> +     * device.
>> +     *
>> +     * The instance for cpu core NON_PMD_CORE_ID can be accessed by multiple
>> +     * threads, and thusly needs to be protected by 'non_pmd_mutex'.  Every
>> +     * other instance will only be accessed by its own pmd thread. */
>> +    struct hmap tnl_port_cache;
>> +    struct hmap send_port_cache;
>>  
>>      /* Only a pmd thread can write on its own 'cycles' and 'stats'.
>>       * The main thread keeps 'stats_zero' and 'cycles_zero' as base
>> @@ -3055,7 +3064,10 @@ pmd_free_cached_ports(struct dp_netdev_pmd_thread *pmd)
>>      /* Free all used tx queue ids. */
>>      dpif_netdev_xps_revalidate_pmd(pmd, 0, true);
>>  
>> -    HMAP_FOR_EACH_POP (tx_port_cached, node, &pmd->port_cache) {
>> +    HMAP_FOR_EACH_POP (tx_port_cached, node, &pmd->tnl_port_cache) {
>> +        free(tx_port_cached);
>> +    }
>> +    HMAP_FOR_EACH_POP (tx_port_cached, node, &pmd->send_port_cache) {
>>          free(tx_port_cached);
>>      }
>>  }
>> @@ -3069,11 +3081,22 @@ pmd_load_cached_ports(struct dp_netdev_pmd_thread *pmd)
>>      struct tx_port *tx_port, *tx_port_cached;
>>  
>>      pmd_free_cached_ports(pmd);
>> -    hmap_shrink(&pmd->port_cache);
>> +    hmap_shrink(&pmd->send_port_cache);
>> +    hmap_shrink(&pmd->tnl_port_cache);
>>  
>>      HMAP_FOR_EACH (tx_port, node, &pmd->tx_ports) {
>> +        struct hmap *cache;
>> +
>> +        if (netdev_has_tunnel_push_pop(tx_port->port->netdev)) {
>> +            cache = &pmd->tnl_port_cache;
>> +        } else if (netdev_n_txq(tx_port->port->netdev)) {
>> +            cache = &pmd->send_port_cache;
>> +        } else {
>> +            continue;
>> +        }
>
>IMHO, this code introduces artificial limitations for netdev.
>What about something like this:
>
>        if (has_pop _OR_ has_push) {
>            insert to 'tnl_port_cache';
>        }
>
>	if (netdev_n_txq(tx_port->port->netdev)) {
>            insert to 'send_port_cache';
>        }
>?
>i.e make all the checks independent.
>
>Otherwise, it must be described in 'netdev-provider.h' that netdev
>can have only tunnel related functions (both 'push' and 'pop') or
>send function.

I was more leaning towards documenting the limitation (currently we have
no use case for a netdev that support both), but making the checks
independent is simpler and it avoids one assumption on the netdev provider,
so I went with that.

I also realized that the checks in netdev_pop_header() and
netdev_push_header() are not necessary, so I removed them.

Thanks

>
>> +
>>          tx_port_cached = xmemdup(tx_port, sizeof *tx_port_cached);
>> -        hmap_insert(&pmd->port_cache, &tx_port_cached->node,
>> +        hmap_insert(cache, &tx_port_cached->node,
>>                      hash_port_no(tx_port_cached->port->port_no));
>>      }
>>  }
>> @@ -3309,7 +3332,8 @@ dp_netdev_configure_pmd(struct dp_netdev_pmd_thread *pmd, struct dp_netdev *dp,
>>      pmd->next_optimization = time_msec() + DPCLS_OPTIMIZATION_INTERVAL;
>>      ovs_list_init(&pmd->poll_list);
>>      hmap_init(&pmd->tx_ports);
>> -    hmap_init(&pmd->port_cache);
>> +    hmap_init(&pmd->tnl_port_cache);
>> +    hmap_init(&pmd->send_port_cache);
>>      /* init the 'flow_cache' since there is no
>>       * actual thread created for NON_PMD_CORE_ID. */
>>      if (core_id == NON_PMD_CORE_ID) {
>> @@ -3325,7 +3349,8 @@ dp_netdev_destroy_pmd(struct dp_netdev_pmd_thread *pmd)
>>      struct dpcls *cls;
>>  
>>      dp_netdev_pmd_flow_flush(pmd);
>> -    hmap_destroy(&pmd->port_cache);
>> +    hmap_destroy(&pmd->send_port_cache);
>> +    hmap_destroy(&pmd->tnl_port_cache);
>>      hmap_destroy(&pmd->tx_ports);
>>      /* All flows (including their dpcls_rules) have been deleted already */
>>      CMAP_FOR_EACH (cls, node, &pmd->classifiers) {
>> @@ -3592,7 +3617,9 @@ static void
>>  dp_netdev_add_port_tx_to_pmd(struct dp_netdev_pmd_thread *pmd,
>>                               struct dp_netdev_port *port)
>>  {
>> -    struct tx_port *tx = xzalloc(sizeof *tx);
>> +    struct tx_port *tx;
>> +
>> +    tx = xzalloc(sizeof *tx);
>>  
>>      tx->port = port;
>>      tx->qid = -1;
>> @@ -4280,7 +4307,7 @@ dpif_netdev_xps_revalidate_pmd(const struct dp_netdev_pmd_thread *pmd,
>>      struct dp_netdev_port *port;
>>      long long interval;
>>  
>> -    HMAP_FOR_EACH (tx, node, &pmd->port_cache) {
>> +    HMAP_FOR_EACH (tx, node, &pmd->send_port_cache) {
>>          if (!tx->port->dynamic_txqs) {
>>              continue;
>>          }
>> @@ -4344,10 +4371,17 @@ dpif_netdev_xps_get_tx_qid(const struct dp_netdev_pmd_thread *pmd,
>>  }
>>  
>>  static struct tx_port *
>> -pmd_tx_port_cache_lookup(const struct dp_netdev_pmd_thread *pmd,
>> -                         odp_port_t port_no)
>> +pmd_tnl_port_cache_lookup(const struct dp_netdev_pmd_thread *pmd,
>> +                          odp_port_t port_no)
>> +{
>> +    return tx_port_lookup(&pmd->tnl_port_cache, port_no);
>> +}
>> +
>> +static struct tx_port *
>> +pmd_send_port_cache_lookup(const struct dp_netdev_pmd_thread *pmd,
>> +                           odp_port_t port_no)
>>  {
>> -    return tx_port_lookup(&pmd->port_cache, port_no);
>> +    return tx_port_lookup(&pmd->send_port_cache, port_no);
>>  }
>>  
>>  static int
>> @@ -4361,7 +4395,7 @@ push_tnl_action(const struct dp_netdev_pmd_thread *pmd,
>>  
>>      data = nl_attr_get(attr);
>>  
>> -    tun_port = pmd_tx_port_cache_lookup(pmd, u32_to_odp(data->tnl_port));
>> +    tun_port = pmd_tnl_port_cache_lookup(pmd, u32_to_odp(data->tnl_port));
>>      if (!tun_port) {
>>          err = -EINVAL;
>>          goto error;
>> @@ -4413,7 +4447,7 @@ dp_execute_cb(void *aux_, struct dp_packet_batch *packets_,
>>  
>>      switch ((enum ovs_action_attr)type) {
>>      case OVS_ACTION_ATTR_OUTPUT:
>> -        p = pmd_tx_port_cache_lookup(pmd, nl_attr_get_odp_port(a));
>> +        p = pmd_send_port_cache_lookup(pmd, nl_attr_get_odp_port(a));
>>          if (OVS_LIKELY(p)) {
>>              int tx_qid;
>>              bool dynamic_txqs;
>> @@ -4460,7 +4494,7 @@ dp_execute_cb(void *aux_, struct dp_packet_batch *packets_,
>>              struct dp_packet_batch *orig_packets_ = packets_;
>>              odp_port_t portno = nl_attr_get_odp_port(a);
>>  
>> -            p = pmd_tx_port_cache_lookup(pmd, portno);
>> +            p = pmd_tnl_port_cache_lookup(pmd, portno);
>>              if (p) {
>>                  struct dp_packet_batch tnl_pkt;
>>                  int i;
>> diff --git a/lib/netdev.c b/lib/netdev.c
>> index ad90ef6..9ab4e4c 100644
>> --- a/lib/netdev.c
>> +++ b/lib/netdev.c
>> @@ -117,6 +117,13 @@ netdev_is_pmd(const struct netdev *netdev)
>>      return netdev->netdev_class->is_pmd;
>>  }
>>  
>> +bool
>> +netdev_has_tunnel_push_pop(const struct netdev *netdev)
>> +{
>> +    return netdev->netdev_class->push_header
>> +           && netdev->netdev_class->pop_header;
>> +}
>> +
>>  static void
>>  netdev_initialize(void)
>>      OVS_EXCLUDED(netdev_mutex)
>> @@ -686,6 +693,9 @@ netdev_set_tx_multiq(struct netdev *netdev, unsigned int n_txq)
>>   * if a partial packet was transmitted or if a packet is too big or too small
>>   * to transmit on the device.
>>   *
>> + * The caller must make sure that 'netdev' supports sending by making sure that
>> + * 'netdev_n_txq(netdev)' returns >= 1.
>> + *
>>   * If the function returns a non-zero value, some of the packets might have
>>   * been sent anyway.
>>   *
>> @@ -710,11 +720,6 @@ int
>>  netdev_send(struct netdev *netdev, int qid, struct dp_packet_batch *batch,
>>              bool may_steal, bool concurrent_txq)
>>  {
>> -    if (!netdev->netdev_class->send) {
>> -        dp_packet_delete_batch(batch, may_steal);
>> -        return EOPNOTSUPP;
>> -    }
>> -
>>      int error = netdev->netdev_class->send(netdev, qid, batch, may_steal,
>>                                             concurrent_txq);
>>      if (!error) {
>> diff --git a/lib/netdev.h b/lib/netdev.h
>> index bad28c4..03059ca 100644
>> --- a/lib/netdev.h
>> +++ b/lib/netdev.h
>> @@ -109,6 +109,7 @@ bool netdev_is_reserved_name(const char *name);
>>  int netdev_n_txq(const struct netdev *netdev);
>>  int netdev_n_rxq(const struct netdev *netdev);
>>  bool netdev_is_pmd(const struct netdev *netdev);
>> +bool netdev_has_tunnel_push_pop(const struct netdev *netdev);
>>  
>>  /* Open and close. */
>>  int netdev_open(const char *name, const char *type, struct netdev **netdevp);
>>
diff mbox

Patch

diff --git a/lib/dpif-netdev.c b/lib/dpif-netdev.c
index 7b67b42..81366b2 100644
--- a/lib/dpif-netdev.c
+++ b/lib/dpif-netdev.c
@@ -422,7 +422,8 @@  struct rxq_poll {
     struct ovs_list node;
 };
 
-/* Contained by struct dp_netdev_pmd_thread's 'port_cache' or 'tx_ports'. */
+/* Contained by struct dp_netdev_pmd_thread's 'send_port_cache',
+ * 'tnl_port_cache' or 'tx_ports'. */
 struct tx_port {
     struct dp_netdev_port *port;
     int qid;
@@ -504,11 +505,19 @@  struct dp_netdev_pmd_thread {
      * read by the pmd thread. */
     struct hmap tx_ports OVS_GUARDED;
 
-    /* Map of 'tx_port' used in the fast path. This is a thread-local copy of
-     * 'tx_ports'. The instance for cpu core NON_PMD_CORE_ID can be accessed
-     * by multiple threads, and thusly need to be protected by 'non_pmd_mutex'.
-     * Every other instance will only be accessed by its own pmd thread. */
-    struct hmap port_cache;
+    /* These are thread-local copies of 'tx_ports'.  One contains only tunnel
+     * ports (that support push_tunnel/pop_tunnel)  The other contains
+     * non-tunnel ports (that support send).
+     *
+     * These are kept separate to make sure that we don't try to execute
+     * OUTPUT on a tunnel device (which has 0 txqs) or PUSH/POP on a non-tunnel
+     * device.
+     *
+     * The instance for cpu core NON_PMD_CORE_ID can be accessed by multiple
+     * threads, and thusly needs to be protected by 'non_pmd_mutex'.  Every
+     * other instance will only be accessed by its own pmd thread. */
+    struct hmap tnl_port_cache;
+    struct hmap send_port_cache;
 
     /* Only a pmd thread can write on its own 'cycles' and 'stats'.
      * The main thread keeps 'stats_zero' and 'cycles_zero' as base
@@ -3055,7 +3064,10 @@  pmd_free_cached_ports(struct dp_netdev_pmd_thread *pmd)
     /* Free all used tx queue ids. */
     dpif_netdev_xps_revalidate_pmd(pmd, 0, true);
 
-    HMAP_FOR_EACH_POP (tx_port_cached, node, &pmd->port_cache) {
+    HMAP_FOR_EACH_POP (tx_port_cached, node, &pmd->tnl_port_cache) {
+        free(tx_port_cached);
+    }
+    HMAP_FOR_EACH_POP (tx_port_cached, node, &pmd->send_port_cache) {
         free(tx_port_cached);
     }
 }
@@ -3069,11 +3081,22 @@  pmd_load_cached_ports(struct dp_netdev_pmd_thread *pmd)
     struct tx_port *tx_port, *tx_port_cached;
 
     pmd_free_cached_ports(pmd);
-    hmap_shrink(&pmd->port_cache);
+    hmap_shrink(&pmd->send_port_cache);
+    hmap_shrink(&pmd->tnl_port_cache);
 
     HMAP_FOR_EACH (tx_port, node, &pmd->tx_ports) {
+        struct hmap *cache;
+
+        if (netdev_has_tunnel_push_pop(tx_port->port->netdev)) {
+            cache = &pmd->tnl_port_cache;
+        } else if (netdev_n_txq(tx_port->port->netdev)) {
+            cache = &pmd->send_port_cache;
+        } else {
+            continue;
+        }
+
         tx_port_cached = xmemdup(tx_port, sizeof *tx_port_cached);
-        hmap_insert(&pmd->port_cache, &tx_port_cached->node,
+        hmap_insert(cache, &tx_port_cached->node,
                     hash_port_no(tx_port_cached->port->port_no));
     }
 }
@@ -3309,7 +3332,8 @@  dp_netdev_configure_pmd(struct dp_netdev_pmd_thread *pmd, struct dp_netdev *dp,
     pmd->next_optimization = time_msec() + DPCLS_OPTIMIZATION_INTERVAL;
     ovs_list_init(&pmd->poll_list);
     hmap_init(&pmd->tx_ports);
-    hmap_init(&pmd->port_cache);
+    hmap_init(&pmd->tnl_port_cache);
+    hmap_init(&pmd->send_port_cache);
     /* init the 'flow_cache' since there is no
      * actual thread created for NON_PMD_CORE_ID. */
     if (core_id == NON_PMD_CORE_ID) {
@@ -3325,7 +3349,8 @@  dp_netdev_destroy_pmd(struct dp_netdev_pmd_thread *pmd)
     struct dpcls *cls;
 
     dp_netdev_pmd_flow_flush(pmd);
-    hmap_destroy(&pmd->port_cache);
+    hmap_destroy(&pmd->send_port_cache);
+    hmap_destroy(&pmd->tnl_port_cache);
     hmap_destroy(&pmd->tx_ports);
     /* All flows (including their dpcls_rules) have been deleted already */
     CMAP_FOR_EACH (cls, node, &pmd->classifiers) {
@@ -3592,7 +3617,9 @@  static void
 dp_netdev_add_port_tx_to_pmd(struct dp_netdev_pmd_thread *pmd,
                              struct dp_netdev_port *port)
 {
-    struct tx_port *tx = xzalloc(sizeof *tx);
+    struct tx_port *tx;
+
+    tx = xzalloc(sizeof *tx);
 
     tx->port = port;
     tx->qid = -1;
@@ -4280,7 +4307,7 @@  dpif_netdev_xps_revalidate_pmd(const struct dp_netdev_pmd_thread *pmd,
     struct dp_netdev_port *port;
     long long interval;
 
-    HMAP_FOR_EACH (tx, node, &pmd->port_cache) {
+    HMAP_FOR_EACH (tx, node, &pmd->send_port_cache) {
         if (!tx->port->dynamic_txqs) {
             continue;
         }
@@ -4344,10 +4371,17 @@  dpif_netdev_xps_get_tx_qid(const struct dp_netdev_pmd_thread *pmd,
 }
 
 static struct tx_port *
-pmd_tx_port_cache_lookup(const struct dp_netdev_pmd_thread *pmd,
-                         odp_port_t port_no)
+pmd_tnl_port_cache_lookup(const struct dp_netdev_pmd_thread *pmd,
+                          odp_port_t port_no)
+{
+    return tx_port_lookup(&pmd->tnl_port_cache, port_no);
+}
+
+static struct tx_port *
+pmd_send_port_cache_lookup(const struct dp_netdev_pmd_thread *pmd,
+                           odp_port_t port_no)
 {
-    return tx_port_lookup(&pmd->port_cache, port_no);
+    return tx_port_lookup(&pmd->send_port_cache, port_no);
 }
 
 static int
@@ -4361,7 +4395,7 @@  push_tnl_action(const struct dp_netdev_pmd_thread *pmd,
 
     data = nl_attr_get(attr);
 
-    tun_port = pmd_tx_port_cache_lookup(pmd, u32_to_odp(data->tnl_port));
+    tun_port = pmd_tnl_port_cache_lookup(pmd, u32_to_odp(data->tnl_port));
     if (!tun_port) {
         err = -EINVAL;
         goto error;
@@ -4413,7 +4447,7 @@  dp_execute_cb(void *aux_, struct dp_packet_batch *packets_,
 
     switch ((enum ovs_action_attr)type) {
     case OVS_ACTION_ATTR_OUTPUT:
-        p = pmd_tx_port_cache_lookup(pmd, u32_to_odp(nl_attr_get_u32(a)));
+        p = pmd_send_port_cache_lookup(pmd, u32_to_odp(nl_attr_get_u32(a)));
         if (OVS_LIKELY(p)) {
             int tx_qid;
             bool dynamic_txqs;
@@ -4460,7 +4494,7 @@  dp_execute_cb(void *aux_, struct dp_packet_batch *packets_,
             struct dp_packet_batch *orig_packets_ = packets_;
             odp_port_t portno = u32_to_odp(nl_attr_get_u32(a));
 
-            p = pmd_tx_port_cache_lookup(pmd, portno);
+            p = pmd_tnl_port_cache_lookup(pmd, portno);
             if (p) {
                 struct dp_packet_batch tnl_pkt;
                 int i;
diff --git a/lib/netdev.c b/lib/netdev.c
index ad90ef6..9ab4e4c 100644
--- a/lib/netdev.c
+++ b/lib/netdev.c
@@ -117,6 +117,13 @@  netdev_is_pmd(const struct netdev *netdev)
     return netdev->netdev_class->is_pmd;
 }
 
+bool
+netdev_has_tunnel_push_pop(const struct netdev *netdev)
+{
+    return netdev->netdev_class->push_header
+           && netdev->netdev_class->pop_header;
+}
+
 static void
 netdev_initialize(void)
     OVS_EXCLUDED(netdev_mutex)
@@ -686,6 +693,9 @@  netdev_set_tx_multiq(struct netdev *netdev, unsigned int n_txq)
  * if a partial packet was transmitted or if a packet is too big or too small
  * to transmit on the device.
  *
+ * The caller must make sure that 'netdev' supports sending by making sure that
+ * 'netdev_n_txq(netdev)' returns >= 1.
+ *
  * If the function returns a non-zero value, some of the packets might have
  * been sent anyway.
  *
@@ -710,11 +720,6 @@  int
 netdev_send(struct netdev *netdev, int qid, struct dp_packet_batch *batch,
             bool may_steal, bool concurrent_txq)
 {
-    if (!netdev->netdev_class->send) {
-        dp_packet_delete_batch(batch, may_steal);
-        return EOPNOTSUPP;
-    }
-
     int error = netdev->netdev_class->send(netdev, qid, batch, may_steal,
                                            concurrent_txq);
     if (!error) {
diff --git a/lib/netdev.h b/lib/netdev.h
index bad28c4..03059ca 100644
--- a/lib/netdev.h
+++ b/lib/netdev.h
@@ -109,6 +109,7 @@  bool netdev_is_reserved_name(const char *name);
 int netdev_n_txq(const struct netdev *netdev);
 int netdev_n_rxq(const struct netdev *netdev);
 bool netdev_is_pmd(const struct netdev *netdev);
+bool netdev_has_tunnel_push_pop(const struct netdev *netdev);
 
 /* Open and close. */
 int netdev_open(const char *name, const char *type, struct netdev **netdevp);