[net-next,11/11] RFC: net: RPS bulk enqueue to backlog

Message ID	20160202211512.16315.89019.stgit@firesoul
State	Changes Requested, archived
Delegated to:	David Miller
Headers	show Return-Path: <netdev-owner@vger.kernel.org> Subject: [net-next PATCH 11/11] RFC: net: RPS bulk enqueue to backlog From: Jesper Dangaard Brouer <brouer@redhat.com> To: netdev@vger.kernel.org Cc: Christoph Lameter <cl@linux.com>, tom@herbertland.com, Alexander Duyck <alexander.duyck@gmail.com>, alexei.starovoitov@gmail.com, Jesper Dangaard Brouer <brouer@redhat.com>, ogerlitz@mellanox.com, gerlitz.or@gmail.com Date: Tue, 02 Feb 2016 22:15:27 +0100 Message-ID: <20160202211512.16315.89019.stgit@firesoul> In-Reply-To: <20160202211051.16315.51808.stgit@firesoul> References: <20160202211051.16315.51808.stgit@firesoul> User-Agent: StGit/0.17.1-dirty MIME-Version: 1.0 Content-Type: text/plain; charset="utf-8" Content-Transfer-Encoding: 7bit Sender: netdev-owner@vger.kernel.org Precedence: bulk

diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 11df9af41a3c..dc5baef95d27 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -633,6 +633,15 @@ struct rps_dev_flow { }; #define RPS_NO_FILTER 0xffff +struct rps_cpu_queue { + struct sk_buff_head skb_list; + int to_cpu; + struct rps_dev_flow *rflow; + struct net_device *dev; +}; +#define RPS_CPU_QUEUES 2 /* Must be power of 2 */ +#define RPS_CPU_QUEUES_MASK (RPS_CPU_QUEUES - 1) + /* * The rps_dev_flow_table structure contains a table of flow mappings. */ @@ -2662,6 +2671,7 @@ struct softnet_data { unsigned int received_rps; #ifdef CONFIG_RPS struct softnet_data *rps_ipi_list; + struct rps_cpu_queue local_rps_queue[RPS_CPU_QUEUES]; #endif #ifdef CONFIG_NET_FLOW_LIMIT struct sd_flow_limit __rcu *flow_limit; diff --git a/net/core/dev.c b/net/core/dev.c index 35c92a968937..0a231529bc0c 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -3736,6 +3736,60 @@ drop: return NET_RX_DROP; } +static int enqueue_list_to_backlog(struct sk_buff_head *skb_list, int cpu, + unsigned int *qtail, struct net_device *dev) +{ + unsigned int qlen, qlen_drop; + struct softnet_data *sd; + struct sk_buff *skb; + unsigned long flags; + + sd = &per_cpu(softnet_data, cpu); + + local_irq_save(flags); + + rps_lock(sd); + if (!netif_running(dev)) + goto drop; + qlen = skb_queue_len(&sd->input_pkt_queue); + /* NOTICE: Had to drop !skb_flow_limit(skb, qlen) check here */ + if (qlen <= netdev_max_backlog) { + if (qlen) { +enqueue: + //__skb_queue_tail(&sd->input_pkt_queue, skb); + skb_queue_splice_tail_init(skb_list, + &sd->input_pkt_queue); + input_queue_tail_incr_save(sd, qtail); + rps_unlock(sd); + local_irq_restore(flags); + return NET_RX_SUCCESS; + } + + /* Schedule NAPI for backlog device + * We can use non atomic operation since we own the queue lock + */ + if (!__test_and_set_bit(NAPI_STATE_SCHED, &sd->backlog.state)) { + if (!rps_ipi_queued(sd)) + ____napi_schedule(sd, &sd->backlog); + } + goto enqueue; + } + +drop: + qlen_drop = skb_queue_len(skb_list); + sd->dropped += qlen_drop; + rps_unlock(sd); + + local_irq_restore(flags); + + atomic_long_add(qlen_drop, &dev->rx_dropped); + while ((skb = __skb_dequeue(skb_list)) != NULL) { + __kfree_skb_defer(skb); + } + return NET_RX_DROP; +} + + static int netif_rx_internal(struct sk_buff *skb) { int ret; @@ -4211,14 +4265,43 @@ static int netif_receive_skb_internal(struct sk_buff *skb) #ifdef CONFIG_RPS if (static_key_false(&rps_needed)) { struct rps_dev_flow voidflow, *rflow = &voidflow; + struct softnet_data *sd = this_cpu_ptr(&softnet_data); + struct rps_cpu_queue *lq; /* softnet cpu local queue (lq) */ + int cpu = get_rps_cpu(skb->dev, skb, &rflow); + if (cpu < 0) + goto no_rps; + + /* RPS destinated packet */ + // XXX: is local_irq_disable needed here? + sd = this_cpu_ptr(&softnet_data); + lq = &sd->local_rps_queue[cpu & RPS_CPU_QUEUES_MASK]; - if (cpu >= 0) { - ret = enqueue_to_backlog(skb, cpu, &rflow->last_qtail); + if (lq->to_cpu == cpu && lq->dev == skb->dev) { + /* Bonus, RPS dest match prev CPU, q pkt for later */ + __skb_queue_tail(&lq->skb_list, skb); + lq->rflow = rflow; rcu_read_unlock(); - return ret; + return NET_RX_SUCCESS; } + if (unlikely(lq->to_cpu < 0)) + goto init_localq; + + /* No match, bulk enq to remote cpu backlog */ + ret = enqueue_list_to_backlog(&lq->skb_list, lq->to_cpu, + &lq->rflow->last_qtail, lq->dev); + init_localq: /* start new localq (lq) */ + /* XXX: check if lq->skb_list was already re-init'ed */ + skb_queue_head_init(&lq->skb_list); + __skb_queue_tail(&lq->skb_list, skb); + lq->rflow = rflow; + lq->to_cpu = cpu; + lq->dev = skb->dev; + + rcu_read_unlock(); + return ret; } +no_rps: #endif ret = __netif_receive_skb(skb); rcu_read_unlock(); @@ -4579,6 +4662,30 @@ gro_result_t napi_gro_receive(struct napi_struct *napi, struct sk_buff *skb) } EXPORT_SYMBOL(napi_gro_receive); +static void rps_flush_local_queues(struct softnet_data *sd) +{ +#ifdef CONFIG_RPS + int i; + +// local_irq_disable(); //TEST +// if (!sd) + sd = this_cpu_ptr(&softnet_data); + + /* Bulk flush any remaining locally queued packets for RPS */ + for (i = 0; i < RPS_CPU_QUEUES; i++) { + struct rps_cpu_queue *lq = &sd->local_rps_queue[i]; + + if (skb_queue_empty(&lq->skb_list)) + continue; + + enqueue_list_to_backlog(&lq->skb_list, lq->to_cpu, + &lq->rflow->last_qtail, lq->dev); + // FAILS: lq->to_cpu = -1; + } +// local_irq_enable(); //TEST +#endif +} + void napi_gro_receive_list(struct napi_struct *napi, struct sk_buff_head *skb_list, struct net_device *netdev) @@ -4594,6 +4701,10 @@ void napi_gro_receive_list(struct napi_struct *napi, skb_gro_reset_offset(skb); napi_skb_finish(dev_gro_receive(napi, skb), skb); } +#ifdef CONFIG_RPS +// if (static_key_false(&rps_needed)) +// rps_flush_local_queues(NULL); +#endif } EXPORT_SYMBOL(napi_gro_receive_list); @@ -4747,6 +4858,8 @@ static void net_rps_action_and_irq_enable(struct softnet_data *sd) local_irq_enable(); +// rps_flush_local_queues(NULL); + /* Send pending IPI's to kick RPS processing on remote cpus. */ while (remsd) { struct softnet_data *next = remsd->rps_ipi_next; @@ -5176,6 +5289,8 @@ static void net_rx_action(struct softirq_action *h) __kfree_skb_flush(); local_irq_disable(); + rps_flush_local_queues(NULL); + list_splice_tail_init(&sd->poll_list, &list); list_splice_tail(&repoll, &list); list_splice(&list, &sd->poll_list); @@ -8085,6 +8200,9 @@ static int __init net_dev_init(void) for_each_possible_cpu(i) { struct softnet_data *sd = &per_cpu(softnet_data, i); +#ifdef CONFIG_RPS + int j; +#endif skb_queue_head_init(&sd->input_pkt_queue); skb_queue_head_init(&sd->process_queue); @@ -8094,6 +8212,15 @@ static int __init net_dev_init(void) sd->csd.func = rps_trigger_softirq; sd->csd.info = sd; sd->cpu = i; + for (j = 0; j < RPS_CPU_QUEUES; j++) { + struct rps_cpu_queue *lq = &sd->local_rps_queue[j]; + + skb_queue_head_init(&lq->skb_list); + lq->to_cpu = -1; + // XXX: below not needed + lq->rflow = NULL; + lq->dev = NULL; + } #endif sd->backlog.poll = process_backlog;

[net-next,11/11] RFC: net: RPS bulk enqueue to backlog

Commit Message

Patch