From patchwork Thu Jul 14 11:28:21 2016 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Amir Levy X-Patchwork-Id: 648331 Return-Path: X-Original-To: incoming@patchwork.ozlabs.org Delivered-To: patchwork-incoming@bilbo.ozlabs.org Received: from vger.kernel.org (vger.kernel.org [209.132.180.67]) by ozlabs.org (Postfix) with ESMTP id 3rqtng4dSKz9s9Z for ; Thu, 14 Jul 2016 21:31:35 +1000 (AEST) Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S1750967AbcGNLaJ (ORCPT ); Thu, 14 Jul 2016 07:30:09 -0400 Received: from mga11.intel.com ([192.55.52.93]:30075 "EHLO mga11.intel.com" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S1751428AbcGNL3W (ORCPT ); Thu, 14 Jul 2016 07:29:22 -0400 Received: from orsmga001.jf.intel.com ([10.7.209.18]) by fmsmga102.fm.intel.com with ESMTP; 14 Jul 2016 04:29:21 -0700 X-ExtLoop1: 1 X-IronPort-AV: E=Sophos;i="5.28,362,1464678000"; d="scan'208";a="995056071" Received: from ud5.iil.intel.com ([10.88.176.146]) by orsmga001.jf.intel.com with ESMTP; 14 Jul 2016 04:29:18 -0700 From: Amir Levy To: andreas.noever@gmail.com, gregkh@linuxfoundation.org, bhelgaas@google.com Cc: linux-pci@vger.kernel.org, linux-kernel@vger.kernel.org, netdev@vger.kernel.org, thunderbolt-linux@intel.com, mika.westerberg@intel.com, tomas.winkler@intel.com, Amir Levy Subject: [PATCH v3 7/8] thunderbolt: Networking transmit and receive Date: Thu, 14 Jul 2016 14:28:21 +0300 Message-Id: <1468495702-7467-8-git-send-email-amir.jer.levy@intel.com> X-Mailer: git-send-email 2.7.4 In-Reply-To: <1468495702-7467-1-git-send-email-amir.jer.levy@intel.com> References: <1468495702-7467-1-git-send-email-amir.jer.levy@intel.com> Sender: linux-pci-owner@vger.kernel.org Precedence: bulk List-ID: X-Mailing-List: linux-pci@vger.kernel.org Handling the transmission to second peer and receiving from it. This includes communication with upper layer, the network stack and configuration of Thunderbolt(TM) HW. Signed-off-by: Amir Levy --- drivers/thunderbolt/icm/icm_nhi.c | 15 + drivers/thunderbolt/icm/net.c | 1475 +++++++++++++++++++++++++++++++++++++ 2 files changed, 1490 insertions(+) diff --git a/drivers/thunderbolt/icm/icm_nhi.c b/drivers/thunderbolt/icm/icm_nhi.c index 060bb38..f8b0527 100644 --- a/drivers/thunderbolt/icm/icm_nhi.c +++ b/drivers/thunderbolt/icm/icm_nhi.c @@ -1045,6 +1045,7 @@ static irqreturn_t nhi_msi(int __always_unused irq, void *data) { struct tbt_nhi_ctxt *nhi_ctxt = data; u32 isr0, isr1, imr0, imr1; + int i; /* clear on read */ isr0 = ioread32(nhi_ctxt->iobase + REG_RING_NOTIFY_BASE); @@ -1067,6 +1068,20 @@ static irqreturn_t nhi_msi(int __always_unused irq, void *data) spin_unlock(&nhi_ctxt->lock); + for (i = 0; i < nhi_ctxt->num_ports; ++i) { + struct net_device *net_dev = + nhi_ctxt->net_devices[i].net_dev; + if (net_dev) { + u8 path = PATH_FROM_PORT(nhi_ctxt->num_paths, i); + + if (isr0 & REG_RING_INT_RX_PROCESSED( + path, nhi_ctxt->num_paths)) + tbt_net_rx_msi(net_dev); + if (isr0 & REG_RING_INT_TX_PROCESSED(path)) + tbt_net_tx_msi(net_dev); + } + } + if (isr0 & REG_RING_INT_RX_PROCESSED(TBT_ICM_RING_NUM, nhi_ctxt->num_paths)) schedule_work(&nhi_ctxt->icm_msgs_work); diff --git a/drivers/thunderbolt/icm/net.c b/drivers/thunderbolt/icm/net.c index e983dfb..77cc843 100644 --- a/drivers/thunderbolt/icm/net.c +++ b/drivers/thunderbolt/icm/net.c @@ -135,6 +135,17 @@ struct approve_inter_domain_connection_cmd { }; +struct tbt_frame_header { + /* size of the data with the frame */ + __le32 frame_size; + /* running index on the frames */ + __le16 frame_index; + /* ID of the frame to match frames to specific packet */ + __le16 frame_id; + /* how many frames assembles a full packet */ + __le32 frame_count; +}; + enum neg_event { RECEIVE_LOGOUT = NUM_MEDIUM_STATUSES, RECEIVE_LOGIN_RESPONSE, @@ -142,15 +153,81 @@ enum neg_event { NUM_NEG_EVENTS }; +enum frame_status { + GOOD_FRAME, + GOOD_AS_FIRST_FRAME, + GOOD_AS_FIRST_MULTICAST_FRAME, + FRAME_NOT_READY, + FRAME_ERROR, +}; + +enum packet_filter { + /* all multicast MAC addresses */ + PACKET_TYPE_ALL_MULTICAST, + /* all types of MAC addresses: multicast, unicast and broadcast */ + PACKET_TYPE_PROMISCUOUS, + /* all unicast MAC addresses */ + PACKET_TYPE_UNICAST_PROMISCUOUS, +}; + enum disconnect_path_stage { STAGE_1 = BIT(0), STAGE_2 = BIT(1) }; +struct tbt_net_stats { + u64 tx_packets; + u64 tx_bytes; + u64 tx_errors; + u64 rx_packets; + u64 rx_bytes; + u64 rx_length_errors; + u64 rx_over_errors; + u64 rx_crc_errors; + u64 rx_missed_errors; + u64 multicast; +}; + +static const char tbt_net_gstrings_stats[][ETH_GSTRING_LEN] = { + "tx_packets", + "tx_bytes", + "tx_errors", + "rx_packets", + "rx_bytes", + "rx_length_errors", + "rx_over_errors", + "rx_crc_errors", + "rx_missed_errors", + "multicast", +}; + +struct tbt_buffer { + dma_addr_t dma; + union { + struct tbt_frame_header *hdr; + struct page *page; + }; + u32 page_offset; +}; + +struct tbt_desc_ring { + /* pointer to the descriptor ring memory */ + struct tbt_buf_desc *desc; + /* physical address of the descriptor ring */ + dma_addr_t dma; + /* array of buffer structs */ + struct tbt_buffer *buffers; + /* last descriptor that was associated with a buffer */ + u16 last_allocated; + /* next descriptor to check for DD status bit */ + u16 next_to_clean; +}; + /** * struct tbt_port - the basic tbt_port structure * @tbt_nhi_ctxt: context of the nhi controller. * @net_dev: networking device object. +* @napi: network API * @login_retry_work: work queue for sending login requests. * @login_response_work: work queue for sending login responses. * @work_struct logout_work: work queue for sending logout requests. @@ -166,6 +243,11 @@ enum disconnect_path_stage { * @login_retry_count: counts number of login retries sent. * @local_depth: depth of the remote peer in the chain. * @transmit_path: routing parameter for the icm. +* @tx_ring: transmit ring from where the packets are sent. +* @rx_ring: receive ring where the packets are received. +* @stats: network statistics of the rx/tx packets. +* @packet_filters: defines filters for the received packets. +* @multicast_hash_table: hash table of multicast addresses. * @frame_id: counting ID of frames. * @num: port number. * @local_path: routing parameter for the icm. @@ -175,6 +257,7 @@ enum disconnect_path_stage { struct tbt_port { struct tbt_nhi_ctxt *nhi_ctxt; struct net_device *net_dev; + struct napi_struct napi; struct delayed_work login_retry_work; struct work_struct login_response_work; struct work_struct logout_work; @@ -190,6 +273,17 @@ struct tbt_port { u8 login_retry_count; u8 local_depth; u8 transmit_path; + struct tbt_desc_ring tx_ring ____cacheline_aligned_in_smp; + struct tbt_desc_ring rx_ring; + struct tbt_net_stats stats; + u32 packet_filters; + /* + * hash table of 1024 boolean entries with hashing of + * the multicast address + */ + u32 multicast_hash_table[DIV_ROUND_UP( + TBT_NET_MULTICAST_HASH_TABLE_SIZE, + BITS_PER_U32)]; u16 frame_id; u8 num; u8 local_path; @@ -236,6 +330,8 @@ static void tbt_net_tear_down(struct net_device *net_dev, bool send_logout) (port->local_path * REG_OPTS_STEP); u32 rx_reg_val = ioread32(rx_reg) & ~REG_OPTS_E2E_EN; + napi_disable(&port->napi); + tx_reg = iobase + REG_TX_OPTIONS_BASE + (port->local_path * REG_OPTS_STEP); tx_reg_val = ioread32(tx_reg) & ~REG_OPTS_E2E_EN; @@ -277,8 +373,1340 @@ static void tbt_net_tear_down(struct net_device *net_dev, bool send_logout) port->nhi_ctxt->num_paths); spin_unlock_irqrestore(&port->nhi_ctxt->lock, flags); } + + port->rx_ring.next_to_clean = 0; + port->rx_ring.last_allocated = TBT_NET_NUM_RX_BUFS - 1; + +} + +void tbt_net_tx_msi(struct net_device *net_dev) +{ + struct tbt_port *port = netdev_priv(net_dev); + void __iomem *iobase = port->nhi_ctxt->iobase; + u32 prod_cons, prod, cons; + + prod_cons = ioread32(TBT_RING_CONS_PROD_REG(iobase, REG_TX_RING_BASE, + port->local_path)); + prod = TBT_REG_RING_PROD_EXTRACT(prod_cons); + cons = TBT_REG_RING_CONS_EXTRACT(prod_cons); + if (prod >= TBT_NET_NUM_TX_BUFS || cons >= TBT_NET_NUM_TX_BUFS) + return; + + if (TBT_NUM_BUFS_BETWEEN(prod, cons, TBT_NET_NUM_TX_BUFS) >= + TX_WAKE_THRESHOLD) { + netif_wake_queue(port->net_dev); + } else { + spin_lock(&port->nhi_ctxt->lock); + /* enable TX interrupt */ + RING_INT_ENABLE_TX(iobase, port->local_path); + spin_unlock(&port->nhi_ctxt->lock); + } +} + +static irqreturn_t tbt_net_tx_msix(int __always_unused irq, void *data) +{ + struct tbt_port *port = data; + void __iomem *iobase = port->nhi_ctxt->iobase; + u32 prod_cons, prod, cons; + + prod_cons = ioread32(TBT_RING_CONS_PROD_REG(iobase, + REG_TX_RING_BASE, + port->local_path)); + prod = TBT_REG_RING_PROD_EXTRACT(prod_cons); + cons = TBT_REG_RING_CONS_EXTRACT(prod_cons); + if (prod < TBT_NET_NUM_TX_BUFS && cons < TBT_NET_NUM_TX_BUFS && + TBT_NUM_BUFS_BETWEEN(prod, cons, TBT_NET_NUM_TX_BUFS) >= + TX_WAKE_THRESHOLD) { + spin_lock(&port->nhi_ctxt->lock); + /* disable TX interrupt */ + RING_INT_DISABLE_TX(iobase, port->local_path); + spin_unlock(&port->nhi_ctxt->lock); + + netif_wake_queue(port->net_dev); + } + + return IRQ_HANDLED; +} + +void tbt_net_rx_msi(struct net_device *net_dev) +{ + struct tbt_port *port = netdev_priv(net_dev); + + napi_schedule_irqoff(&port->napi); +} + +static irqreturn_t tbt_net_rx_msix(int __always_unused irq, void *data) +{ + struct tbt_port *port = data; + + if (likely(napi_schedule_prep(&port->napi))) { + struct tbt_nhi_ctxt *nhi_ctx = port->nhi_ctxt; + + spin_lock(&nhi_ctx->lock); + /* disable RX interrupt */ + RING_INT_DISABLE_RX(nhi_ctx->iobase, port->local_path, + nhi_ctx->num_paths); + spin_unlock(&nhi_ctx->lock); + + __napi_schedule_irqoff(&port->napi); + } + + return IRQ_HANDLED; +} + +static void tbt_net_pull_tail(struct sk_buff *skb) +{ + skb_frag_t *frag = &skb_shinfo(skb)->frags[0]; + unsigned int pull_len; + unsigned char *va; + + /* + * it is valid to use page_address instead of kmap since we are + * working with pages allocated out of the lomem pool + */ + va = skb_frag_address(frag); + + pull_len = eth_get_headlen(va, TBT_NET_RX_HDR_SIZE); + + /* align pull length to size of long to optimize memcpy performance */ + skb_copy_to_linear_data(skb, va, ALIGN(pull_len, sizeof(long))); + + /* update all of the pointers */ + skb_frag_size_sub(frag, pull_len); + frag->page_offset += pull_len; + skb->data_len -= pull_len; + skb->tail += pull_len; +} + +static inline bool tbt_net_alloc_mapped_page(struct device *dev, + struct tbt_buffer *buf, gfp_t gfp) +{ + if (!buf->page) { + buf->page = alloc_page(gfp | __GFP_COLD); + if (unlikely(!buf->page)) + return false; + + buf->dma = dma_map_page(dev, buf->page, 0, PAGE_SIZE, + DMA_FROM_DEVICE); + if (dma_mapping_error(dev, buf->dma)) { + __free_page(buf->page); + buf->page = NULL; + return false; + } + buf->page_offset = 0; + } + return true; +} + +static bool tbt_net_alloc_rx_buffers(struct device *dev, + struct tbt_desc_ring *rx_ring, + u16 cleaned_count, void __iomem *reg, + gfp_t gfp) +{ + u16 i = (rx_ring->last_allocated + 1) & (TBT_NET_NUM_RX_BUFS - 1); + bool res = false; + + while (cleaned_count--) { + struct tbt_buf_desc *desc = &rx_ring->desc[i]; + struct tbt_buffer *buf = &rx_ring->buffers[i]; + + /* making sure next_to_clean won't get old buffer */ + desc->attributes = cpu_to_le32(DESC_ATTR_REQ_STS | + DESC_ATTR_INT_EN); + if (tbt_net_alloc_mapped_page(dev, buf, gfp)) { + res = true; + rx_ring->last_allocated = i; + i = (i + 1) & (TBT_NET_NUM_RX_BUFS - 1); + desc->phys = cpu_to_le64(buf->dma + buf->page_offset); + } else { + break; + } + } + + if (res) { + iowrite32((rx_ring->last_allocated << REG_RING_CONS_SHIFT) & + REG_RING_CONS_MASK, reg); + } + + return res; +} + +static inline bool tbt_net_multicast_mac_set(const u32 *multicast_hash_table, + const u8 *ether_addr) +{ + u16 hash_val = TBT_NET_ETHER_ADDR_HASH(ether_addr); + + return !!(multicast_hash_table[hash_val / BITS_PER_U32] & + BIT(hash_val % BITS_PER_U32)); +} + +static enum frame_status tbt_net_check_frame(struct tbt_port *port, + u16 frame_num, u32 *count, + u16 index, u16 *id, u32 *size) +{ + struct tbt_desc_ring *rx_ring = &port->rx_ring; + __le32 desc_attr = rx_ring->desc[frame_num].attributes; + enum frame_status res = GOOD_AS_FIRST_FRAME; + u32 len, frame_count, frame_size; + struct tbt_frame_header *hdr; + + if (!(desc_attr & cpu_to_le32(DESC_ATTR_DESC_DONE))) + return FRAME_NOT_READY; + + rmb(); /* read other fields from desc after checking DD */ + + if (unlikely(desc_attr & cpu_to_le32(DESC_ATTR_RX_CRC_ERR))) { + ++port->stats.rx_crc_errors; + goto err; + } else if (unlikely(desc_attr & + cpu_to_le32(DESC_ATTR_RX_BUF_OVRN_ERR))) { + ++port->stats.rx_over_errors; + goto err; + } + + len = (le32_to_cpu(desc_attr) & DESC_ATTR_LEN_MASK) + >> DESC_ATTR_LEN_SHIFT; + if (len == 0) + len = TBT_RING_MAX_FRAME_SIZE; + /* should be greater than just header i.e. contains data */ + if (unlikely(len <= sizeof(struct tbt_frame_header))) { + ++port->stats.rx_length_errors; + goto err; + } + + prefetchw(rx_ring->buffers[frame_num].page); + hdr = page_address(rx_ring->buffers[frame_num].page) + + rx_ring->buffers[frame_num].page_offset; + /* prefetch first cache line of first page */ + prefetch(hdr); + + /* we are reusing so sync this buffer for CPU use */ + dma_sync_single_range_for_cpu(&port->nhi_ctxt->pdev->dev, + rx_ring->buffers[frame_num].dma, + rx_ring->buffers[frame_num].page_offset, + TBT_RING_MAX_FRAME_SIZE, + DMA_FROM_DEVICE); + + frame_count = le32_to_cpu(hdr->frame_count); + frame_size = le32_to_cpu(hdr->frame_size); + + if (unlikely((frame_size > len - sizeof(struct tbt_frame_header)) || + (frame_size == 0))) { + ++port->stats.rx_length_errors; + goto err; + } + /* + * In case we're in the middle of packet, validate the frame header + * based on first fragment of the packet + */ + if (*count) { + /* check the frame count fits the count field */ + if (frame_count != *count) { + ++port->stats.rx_length_errors; + goto check_as_first; + } + + /* + * check the frame identifiers are incremented correctly, + * and id is matching + */ + if ((le16_to_cpu(hdr->frame_index) != index) || + (le16_to_cpu(hdr->frame_id) != *id)) { + ++port->stats.rx_missed_errors; + goto check_as_first; + } + + *size += frame_size; + if (*size > TBT_NET_MTU) { + ++port->stats.rx_length_errors; + goto err; + } + res = GOOD_FRAME; + } else { /* start of packet, validate the frame header */ + const u8 *addr; + +check_as_first: + rx_ring->next_to_clean = frame_num; + + /* validate the first packet has a valid frame count */ + if (unlikely(frame_count == 0 || + frame_count > (TBT_NET_NUM_RX_BUFS / 4))) { + ++port->stats.rx_length_errors; + goto err; + } + + /* validate the first packet has a valid frame index */ + if (hdr->frame_index != 0) { + ++port->stats.rx_missed_errors; + goto err; + } + + BUILD_BUG_ON(TBT_NET_RX_HDR_SIZE > TBT_RING_MAX_FRM_DATA_SZ); + if ((frame_count > 1) && (frame_size < TBT_NET_RX_HDR_SIZE)) { + ++port->stats.rx_length_errors; + goto err; + } + + addr = (u8 *)(hdr + 1); + + /* check the packet can go through the filter */ + if (is_multicast_ether_addr(addr)) { + if (!is_broadcast_ether_addr(addr)) { + if ((port->packet_filters & + (BIT(PACKET_TYPE_PROMISCUOUS) | + BIT(PACKET_TYPE_ALL_MULTICAST))) || + tbt_net_multicast_mac_set( + port->multicast_hash_table, addr)) + res = GOOD_AS_FIRST_MULTICAST_FRAME; + else + goto err; + } + } else if (!(port->packet_filters & + (BIT(PACKET_TYPE_PROMISCUOUS) | + BIT(PACKET_TYPE_UNICAST_PROMISCUOUS))) && + !ether_addr_equal(port->net_dev->dev_addr, addr)) { + goto err; + } + + *size = frame_size; + *count = frame_count; + *id = le16_to_cpu(hdr->frame_id); + } + +#if (PREFETCH_STRIDE < 128) + prefetch((u8 *)hdr + PREFETCH_STRIDE); +#endif + + return res; + +err: + rx_ring->next_to_clean = (frame_num + 1) & (TBT_NET_NUM_RX_BUFS - 1); + return FRAME_ERROR; +} + +static inline unsigned int tbt_net_max_frm_data_size( + __maybe_unused u32 frame_size) +{ +#if (TBT_NUM_FRAMES_PER_PAGE > 1) + return ALIGN(frame_size + sizeof(struct tbt_frame_header), + L1_CACHE_BYTES) - + sizeof(struct tbt_frame_header); +#else + return TBT_RING_MAX_FRM_DATA_SZ; +#endif +} + +static int tbt_net_poll(struct napi_struct *napi, int budget) +{ + struct tbt_port *port = container_of(napi, struct tbt_port, napi); + void __iomem *reg = TBT_RING_CONS_PROD_REG(port->nhi_ctxt->iobase, + REG_RX_RING_BASE, + port->local_path); + struct tbt_desc_ring *rx_ring = &port->rx_ring; + u16 cleaned_count = TBT_NUM_BUFS_BETWEEN(rx_ring->last_allocated, + rx_ring->next_to_clean, + TBT_NET_NUM_RX_BUFS); + unsigned long flags; + int rx_packets = 0; + +loop: + while (likely(rx_packets < budget)) { + struct sk_buff *skb; + enum frame_status status; + bool multicast = false; + u32 frame_count = 0, size; + u16 j, frame_id; + int i; + + /* + * return some buffers to hardware, one at a time is too slow + * so allocate TBT_NET_RX_BUFFER_WRITE buffers at the same time + */ + if (cleaned_count >= TBT_NET_RX_BUFFER_WRITE) { + tbt_net_alloc_rx_buffers(&port->nhi_ctxt->pdev->dev, + rx_ring, cleaned_count, reg, + GFP_ATOMIC); + cleaned_count = 0; + } + + status = tbt_net_check_frame(port, rx_ring->next_to_clean, + &frame_count, 0, &frame_id, + &size); + if (status == FRAME_NOT_READY) + break; + + if (status == FRAME_ERROR) { + ++cleaned_count; + continue; + } + + multicast = (status == GOOD_AS_FIRST_MULTICAST_FRAME); + + /* + * i is incremented up to the frame_count frames received, + * j cyclicly goes over the location from the next frame + * to clean in the ring + */ + j = (rx_ring->next_to_clean + 1); + j &= (TBT_NET_NUM_RX_BUFS - 1); + for (i = 1; i < frame_count; ++i) { + status = tbt_net_check_frame(port, j, &frame_count, i, + &frame_id, &size); + if (status == FRAME_NOT_READY) + goto out; + + j = (j + 1) & (TBT_NET_NUM_RX_BUFS - 1); + + /* if a new frame is found, start over */ + if (status == GOOD_AS_FIRST_FRAME || + status == GOOD_AS_FIRST_MULTICAST_FRAME) { + multicast = (status == + GOOD_AS_FIRST_MULTICAST_FRAME); + cleaned_count += i; + i = 0; + continue; + } + + if (status == FRAME_ERROR) { + cleaned_count += (i + 1); + goto loop; + } + } + + /* allocate a skb to store the frags */ + skb = netdev_alloc_skb_ip_align(port->net_dev, + TBT_NET_RX_HDR_SIZE); + if (unlikely(!skb)) + break; + + /* + * we will be copying header into skb->data in + * tbt_net_pull_tail so it is in our interest to prefetch + * it now to avoid a possible cache miss + */ + prefetchw(skb->data); + + /* + * if overall size of packet smaller than TBT_NET_RX_HDR_SIZE + * which is a small buffer size we decided to allocate + * as the base to RX + */ + if (size <= TBT_NET_RX_HDR_SIZE) { + struct tbt_buffer *buf = + &(rx_ring->buffers[rx_ring->next_to_clean]); + u8 *va = page_address(buf->page) + buf->page_offset + + sizeof(struct tbt_frame_header); + + memcpy(__skb_put(skb, size), va, + ALIGN(size, sizeof(long))); + + /* + * Reuse buffer as-is, + * just make sure it is local + * Access to local memory is faster than non-local + * memory so let's reuse. + * If not local, let's free it and reallocate later. + */ + if (likely(page_to_nid(buf->page) == numa_node_id())) + /* sync the buffer for use by the device */ + dma_sync_single_range_for_device( + &port->nhi_ctxt->pdev->dev, + buf->dma, buf->page_offset, + TBT_RING_MAX_FRAME_SIZE, + DMA_FROM_DEVICE); + else { + /* this page cannot be reused so discard it */ + put_page(buf->page); + buf->page = NULL; + dma_unmap_page(&port->nhi_ctxt->pdev->dev, + buf->dma, PAGE_SIZE, + DMA_FROM_DEVICE); + } + rx_ring->next_to_clean = (rx_ring->next_to_clean + 1) & + (TBT_NET_NUM_RX_BUFS - 1); + } else { + for (i = 0; i < frame_count; ++i) { + struct tbt_buffer *buf = &(rx_ring->buffers[ + rx_ring->next_to_clean]); + struct tbt_frame_header *hdr = + page_address(buf->page) + + buf->page_offset; + u32 frm_size = le32_to_cpu(hdr->frame_size); + + unsigned int truesize = + tbt_net_max_frm_data_size(frm_size); + + /* add frame to skb struct */ + skb_add_rx_frag(skb, skb_shinfo(skb)->nr_frags, + buf->page, + sizeof(struct tbt_frame_header) + + buf->page_offset, + frm_size, truesize); + +#if (TBT_NUM_FRAMES_PER_PAGE > 1) + /* move offset up to the next cache line */ + buf->page_offset += (truesize + + sizeof(struct tbt_frame_header)); + + /* + * we can reuse buffer if there is space + * available and it is local + */ + if (page_to_nid(buf->page) == numa_node_id() + && buf->page_offset <= + PAGE_SIZE - TBT_RING_MAX_FRAME_SIZE) { + /* + * bump ref count on page before + * it is given to the stack + */ + get_page(buf->page); + /* + * sync the buffer for use by the + * device + */ + dma_sync_single_range_for_device( + &port->nhi_ctxt->pdev->dev, + buf->dma, buf->page_offset, + TBT_RING_MAX_FRAME_SIZE, + DMA_FROM_DEVICE); + } else +#endif + { + buf->page = NULL; + dma_unmap_page( + &port->nhi_ctxt->pdev->dev, + buf->dma, PAGE_SIZE, + DMA_FROM_DEVICE); + } + + rx_ring->next_to_clean = + (rx_ring->next_to_clean + 1) & + (TBT_NET_NUM_RX_BUFS - 1); + } + /* + * place header from the first + * fragment in linear portion of buffer + */ + tbt_net_pull_tail(skb); + } + + /* pad short packets */ + if (unlikely(skb->len < ETH_ZLEN)) { + int pad_len = ETH_ZLEN - skb->len; + + /* The skb is freed on error */ + if (unlikely(skb_pad(skb, pad_len))) { + cleaned_count += frame_count; + continue; + } + __skb_put(skb, pad_len); + } + + skb->protocol = eth_type_trans(skb, port->net_dev); + napi_gro_receive(&port->napi, skb); + + ++rx_packets; + port->stats.rx_bytes += size; + if (multicast) + ++port->stats.multicast; + cleaned_count += frame_count; + } + +out: + port->stats.rx_packets += rx_packets; + + if (cleaned_count) + tbt_net_alloc_rx_buffers(&port->nhi_ctxt->pdev->dev, + rx_ring, cleaned_count, reg, + GFP_ATOMIC); + + /* If all work not completed, return budget and keep polling */ + if (rx_packets >= budget) + return budget; + + /* Work is done so exit the polling mode and re-enable the interrupt */ + napi_complete(napi); + + spin_lock_irqsave(&port->nhi_ctxt->lock, flags); + /* enable RX interrupt */ + RING_INT_ENABLE_RX(port->nhi_ctxt->iobase, port->local_path, + port->nhi_ctxt->num_paths); + + spin_unlock_irqrestore(&port->nhi_ctxt->lock, flags); + + return 0; +} + +static int tbt_net_open(struct net_device *net_dev) +{ + struct tbt_port *port = netdev_priv(net_dev); + int res = 0; + int i, j; + + /* change link state to off until path establishment finishes */ + netif_carrier_off(net_dev); + + /* + * if we previously succeeded to allocate msix entries, + * now request IRQ for them: + * 2=tx data port 0, + * 3=rx data port 0, + * 4=tx data port 1, + * 5=rx data port 1, + * ... + * if not, if msi is used, nhi_msi will handle icm & data paths + */ + if (port->nhi_ctxt->msix_entries) { + char name[] = "tbt-net-xx-xx"; + + scnprintf(name, sizeof(name), "tbt-net-rx-%02u", port->num); + res = devm_request_irq(&port->nhi_ctxt->pdev->dev, + port->nhi_ctxt->msix_entries[3+(port->num*2)].vector, + tbt_net_rx_msix, 0, name, port); + if (res) { + netif_err(port, ifup, net_dev, "request_irq %s failed %d\n", + name, res); + goto out; + } + name[8] = 't'; + res = devm_request_irq(&port->nhi_ctxt->pdev->dev, + port->nhi_ctxt->msix_entries[2+(port->num*2)].vector, + tbt_net_tx_msix, 0, name, port); + if (res) { + netif_err(port, ifup, net_dev, "request_irq %s failed %d\n", + name, res); + goto request_irq_failure; + } + } + /* + * Verifying that all buffer sizes are well defined. + * Starting with frame(s) will not tip over the + * page boundary + */ + BUILD_BUG_ON(TBT_NUM_FRAMES_PER_PAGE < 1); + /* + * Just to make sure we have enough place for containing + * 3 max MTU packets for TX + */ + BUILD_BUG_ON((TBT_NET_NUM_TX_BUFS * TBT_RING_MAX_FRAME_SIZE) < + (TBT_NET_MTU * 3)); + /* make sure the number of TX Buffers is power of 2 */ + BUILD_BUG_ON_NOT_POWER_OF_2(TBT_NET_NUM_TX_BUFS); + /* + * Just to make sure we have enough place for containing + * 3 max MTU packets for RX + */ + BUILD_BUG_ON((TBT_NET_NUM_RX_BUFS * TBT_RING_MAX_FRAME_SIZE) < + (TBT_NET_MTU * 3)); + /* make sure the number of RX Buffers is power of 2 */ + BUILD_BUG_ON_NOT_POWER_OF_2(TBT_NET_NUM_RX_BUFS); + + port->rx_ring.last_allocated = TBT_NET_NUM_RX_BUFS - 1; + + port->tx_ring.buffers = vzalloc(TBT_NET_NUM_TX_BUFS * + sizeof(struct tbt_buffer)); + if (!port->tx_ring.buffers) + goto ring_alloc_failure; + port->rx_ring.buffers = vzalloc(TBT_NET_NUM_RX_BUFS * + sizeof(struct tbt_buffer)); + if (!port->rx_ring.buffers) + goto ring_alloc_failure; + + /* + * Allocate TX and RX descriptors + * if the total size is less than a page, do a central allocation + * Otherwise, split TX and RX + */ + if (TBT_NET_SIZE_TOTAL_DESCS <= PAGE_SIZE) { + port->tx_ring.desc = dmam_alloc_coherent( + &port->nhi_ctxt->pdev->dev, + TBT_NET_SIZE_TOTAL_DESCS, + &port->tx_ring.dma, + GFP_KERNEL | __GFP_ZERO); + if (!port->tx_ring.desc) + goto ring_alloc_failure; + /* RX starts where TX finishes */ + port->rx_ring.desc = &port->tx_ring.desc[TBT_NET_NUM_TX_BUFS]; + port->rx_ring.dma = port->tx_ring.dma + + (TBT_NET_NUM_TX_BUFS * sizeof(struct tbt_buf_desc)); + } else { + port->tx_ring.desc = dmam_alloc_coherent( + &port->nhi_ctxt->pdev->dev, + TBT_NET_NUM_TX_BUFS * + sizeof(struct tbt_buf_desc), + &port->tx_ring.dma, + GFP_KERNEL | __GFP_ZERO); + if (!port->tx_ring.desc) + goto ring_alloc_failure; + port->rx_ring.desc = dmam_alloc_coherent( + &port->nhi_ctxt->pdev->dev, + TBT_NET_NUM_RX_BUFS * + sizeof(struct tbt_buf_desc), + &port->rx_ring.dma, + GFP_KERNEL | __GFP_ZERO); + if (!port->rx_ring.desc) + goto rx_desc_alloc_failure; + } + + /* allocate TX buffers and configure the descriptors */ + for (i = 0; i < TBT_NET_NUM_TX_BUFS; i++) { + port->tx_ring.buffers[i].hdr = dma_alloc_coherent( + &port->nhi_ctxt->pdev->dev, + TBT_NUM_FRAMES_PER_PAGE * TBT_RING_MAX_FRAME_SIZE, + &port->tx_ring.buffers[i].dma, + GFP_KERNEL); + if (!port->tx_ring.buffers[i].hdr) + goto buffers_alloc_failure; + + port->tx_ring.desc[i].phys = + cpu_to_le64(port->tx_ring.buffers[i].dma); + port->tx_ring.desc[i].attributes = + cpu_to_le32(DESC_ATTR_REQ_STS | + TBT_NET_DESC_ATTR_SOF_EOF); + + /* + * In case the page is bigger than the frame size, + * make the next buffer descriptor points + * on the next frame memory address within the page + */ + for (i++, j = 1; (i < TBT_NET_NUM_TX_BUFS) && + (j < TBT_NUM_FRAMES_PER_PAGE); i++, j++) { + port->tx_ring.buffers[i].dma = + port->tx_ring.buffers[i - 1].dma + + TBT_RING_MAX_FRAME_SIZE; + port->tx_ring.buffers[i].hdr = + (void *)(port->tx_ring.buffers[i - 1].hdr) + + TBT_RING_MAX_FRAME_SIZE; + /* move the next offset i.e. TBT_RING_MAX_FRAME_SIZE */ + port->tx_ring.buffers[i].page_offset = + port->tx_ring.buffers[i - 1].page_offset + + TBT_RING_MAX_FRAME_SIZE; + port->tx_ring.desc[i].phys = + cpu_to_le64(port->tx_ring.buffers[i].dma); + port->tx_ring.desc[i].attributes = + cpu_to_le32(DESC_ATTR_REQ_STS | + TBT_NET_DESC_ATTR_SOF_EOF); + } + i--; + } + + port->negotiation_status = + BIT(port->nhi_ctxt->net_devices[port->num].medium_sts); + if (port->negotiation_status == BIT(MEDIUM_READY_FOR_CONNECTION)) { + port->login_retry_count = 0; + queue_delayed_work(port->nhi_ctxt->net_workqueue, + &port->login_retry_work, 0); + } + + netif_info(port, ifup, net_dev, "Thunderbolt(TM) Networking port %u - ready for ThunderboltIP negotiation\n", + port->num); + return 0; + +buffers_alloc_failure: + /* + * Rollback the Tx buffers that were already allocated + * until the failure + */ + for (i--; i >= 0; i--) { + /* free only for first buffer allocation */ + if (port->tx_ring.buffers[i].page_offset == 0) + dma_free_coherent(&port->nhi_ctxt->pdev->dev, + TBT_NUM_FRAMES_PER_PAGE * + TBT_RING_MAX_FRAME_SIZE, + port->tx_ring.buffers[i].hdr, + port->tx_ring.buffers[i].dma); + port->tx_ring.buffers[i].hdr = NULL; + } + /* + * For central allocation, free all + * otherwise free RX and then TX separately + */ + if (TBT_NET_SIZE_TOTAL_DESCS <= PAGE_SIZE) { + dmam_free_coherent(&port->nhi_ctxt->pdev->dev, + TBT_NET_SIZE_TOTAL_DESCS, + port->tx_ring.desc, + port->tx_ring.dma); + port->rx_ring.desc = NULL; + } else { + dmam_free_coherent(&port->nhi_ctxt->pdev->dev, + TBT_NET_NUM_RX_BUFS * + sizeof(struct tbt_buf_desc), + port->rx_ring.desc, + port->rx_ring.dma); + port->rx_ring.desc = NULL; +rx_desc_alloc_failure: + dmam_free_coherent(&port->nhi_ctxt->pdev->dev, + TBT_NET_NUM_TX_BUFS * + sizeof(struct tbt_buf_desc), + port->tx_ring.desc, + port->tx_ring.dma); + } + port->tx_ring.desc = NULL; +ring_alloc_failure: + vfree(port->tx_ring.buffers); + port->tx_ring.buffers = NULL; + vfree(port->rx_ring.buffers); + port->rx_ring.buffers = NULL; + res = -ENOMEM; + netif_err(port, ifup, net_dev, "Thunderbolt(TM) Networking port %u - unable to allocate memory\n", + port->num); + + if (!port->nhi_ctxt->msix_entries) + goto out; + + devm_free_irq(&port->nhi_ctxt->pdev->dev, + port->nhi_ctxt->msix_entries[2 + (port->num * 2)].vector, + port); +request_irq_failure: + devm_free_irq(&port->nhi_ctxt->pdev->dev, + port->nhi_ctxt->msix_entries[3 + (port->num * 2)].vector, + port); +out: + return res; +} + +static int tbt_net_close(struct net_device *net_dev) +{ + struct tbt_port *port = netdev_priv(net_dev); + int i; + + /* + * Close connection, disable rings, flow controls + * and interrupts + */ + tbt_net_tear_down(net_dev, !(port->negotiation_status & + BIT(RECEIVE_LOGOUT))); + + cancel_work_sync(&port->login_response_work); + cancel_work_sync(&port->logout_work); + cancel_work_sync(&port->status_reply_work); + cancel_work_sync(&port->approve_inter_domain_work); + + /* Rollback the Tx buffers that were allocated */ + for (i = 0; i < TBT_NET_NUM_TX_BUFS; i++) { + if (port->tx_ring.buffers[i].page_offset == 0) + dma_free_coherent(&port->nhi_ctxt->pdev->dev, + TBT_NUM_FRAMES_PER_PAGE * + TBT_RING_MAX_FRAME_SIZE, + port->tx_ring.buffers[i].hdr, + port->tx_ring.buffers[i].dma); + port->tx_ring.buffers[i].hdr = NULL; + } + /* Unmap the Rx buffers that were allocated */ + for (i = 0; i < TBT_NET_NUM_RX_BUFS; i++) + if (port->rx_ring.buffers[i].page) { + put_page(port->rx_ring.buffers[i].page); + port->rx_ring.buffers[i].page = NULL; + dma_unmap_page(&port->nhi_ctxt->pdev->dev, + port->rx_ring.buffers[i].dma, PAGE_SIZE, + DMA_FROM_DEVICE); + } + + /* + * For central allocation, free all + * otherwise free RX and then TX separately + */ + if (TBT_NET_SIZE_TOTAL_DESCS <= PAGE_SIZE) { + dmam_free_coherent(&port->nhi_ctxt->pdev->dev, + TBT_NET_SIZE_TOTAL_DESCS, + port->tx_ring.desc, + port->tx_ring.dma); + port->rx_ring.desc = NULL; + } else { + dmam_free_coherent(&port->nhi_ctxt->pdev->dev, + TBT_NET_NUM_RX_BUFS * + sizeof(struct tbt_buf_desc), + port->rx_ring.desc, + port->rx_ring.dma); + port->rx_ring.desc = NULL; + dmam_free_coherent(&port->nhi_ctxt->pdev->dev, + TBT_NET_NUM_TX_BUFS * + sizeof(struct tbt_buf_desc), + port->tx_ring.desc, + port->tx_ring.dma); + } + port->tx_ring.desc = NULL; + + vfree(port->tx_ring.buffers); + port->tx_ring.buffers = NULL; + vfree(port->rx_ring.buffers); + port->rx_ring.buffers = NULL; + + devm_free_irq(&port->nhi_ctxt->pdev->dev, + port->nhi_ctxt->msix_entries[3 + (port->num * 2)].vector, + port); + devm_free_irq(&port->nhi_ctxt->pdev->dev, + port->nhi_ctxt->msix_entries[2 + (port->num * 2)].vector, + port); + + netif_info(port, ifdown, net_dev, "Thunderbolt(TM) Networking port %u - is down\n", + port->num); + + return 0; +} + +static bool tbt_net_xmit_csum(struct sk_buff *skb, + struct tbt_desc_ring *tx_ring, u32 first, + u32 last, u32 frame_count) +{ + + struct tbt_frame_header *hdr = tx_ring->buffers[first].hdr; + __wsum wsum = (__force __wsum)htonl(skb->len - + skb_transport_offset(skb)); + int offset = skb_transport_offset(skb); + __sum16 *tucso; /* TCP UDP Checksum Segment Offset */ + __be16 protocol = skb->protocol; + u8 *dest = (u8 *)(hdr + 1); + int len; + + if (skb->ip_summed != CHECKSUM_PARTIAL) { + for (; first != last; + first = (first + 1) & (TBT_NET_NUM_TX_BUFS - 1)) { + hdr = tx_ring->buffers[first].hdr; + hdr->frame_count = cpu_to_le32(frame_count); + } + return true; + } + + if (protocol == htons(ETH_P_8021Q)) { + struct vlan_hdr *vhdr, vh; + + vhdr = skb_header_pointer(skb, ETH_HLEN, sizeof(vh), &vh); + if (!vhdr) + return false; + + protocol = vhdr->h_vlan_encapsulated_proto; + } + + /* + * Data points on the beginning of packet. + * Check is the checksum absolute place in the + * packet. + * ipcso will update IP checksum. + * tucso will update TCP/UPD checksum. + */ + if (protocol == htons(ETH_P_IP)) { + __sum16 *ipcso = (__sum16 *)(dest + + ((u8 *)&(ip_hdr(skb)->check) - skb->data)); + + *ipcso = 0; + *ipcso = ip_fast_csum(dest + skb_network_offset(skb), + ip_hdr(skb)->ihl); + if (ip_hdr(skb)->protocol == IPPROTO_TCP) + tucso = (__sum16 *)(dest + + ((u8 *)&(tcp_hdr(skb)->check) - skb->data)); + else if (ip_hdr(skb)->protocol == IPPROTO_UDP) + tucso = (__sum16 *)(dest + + ((u8 *)&(udp_hdr(skb)->check) - skb->data)); + else + return false; + + *tucso = ~csum_tcpudp_magic(ip_hdr(skb)->saddr, + ip_hdr(skb)->daddr, 0, + ip_hdr(skb)->protocol, 0); + } else if (skb_is_gso(skb)) { + if (skb_is_gso_v6(skb)) { + tucso = (__sum16 *)(dest + + ((u8 *)&(tcp_hdr(skb)->check) - skb->data)); + *tucso = ~csum_ipv6_magic(&ipv6_hdr(skb)->saddr, + &ipv6_hdr(skb)->daddr, + 0, IPPROTO_TCP, 0); + } else if ((protocol == htons(ETH_P_IPV6)) && + (skb_shinfo(skb)->gso_type & SKB_GSO_UDP)) { + tucso = (__sum16 *)(dest + + ((u8 *)&(udp_hdr(skb)->check) - skb->data)); + *tucso = ~csum_ipv6_magic(&ipv6_hdr(skb)->saddr, + &ipv6_hdr(skb)->daddr, + 0, IPPROTO_UDP, 0); + } else { + return false; + } + } else if (protocol == htons(ETH_P_IPV6)) { + tucso = (__sum16 *)(dest + skb_checksum_start_offset(skb) + + skb->csum_offset); + *tucso = ~csum_ipv6_magic(&ipv6_hdr(skb)->saddr, + &ipv6_hdr(skb)->daddr, + 0, ipv6_hdr(skb)->nexthdr, 0); + } else { + return false; + } + + /* First frame was headers, rest of the frames is data */ + for (; first != last; first = (first + 1) & (TBT_NET_NUM_TX_BUFS - 1), + offset = 0) { + hdr = tx_ring->buffers[first].hdr; + dest = (u8 *)(hdr + 1) + offset; + len = le32_to_cpu(hdr->frame_size) - offset; + wsum = csum_partial(dest, len, wsum); + hdr->frame_count = cpu_to_le32(frame_count); + } + *tucso = csum_fold(wsum); + + return true; +} + +static netdev_tx_t tbt_net_xmit_frame(struct sk_buff *skb, + struct net_device *net_dev) +{ + struct tbt_port *port = netdev_priv(net_dev); + void __iomem *iobase = port->nhi_ctxt->iobase; + void __iomem *reg = TBT_RING_CONS_PROD_REG(iobase, + REG_TX_RING_BASE, + port->local_path); + struct tbt_desc_ring *tx_ring = &port->tx_ring; + struct tbt_frame_header *hdr; + u32 prod_cons, prod, cons, first; + /* len equivalent to the fragment length */ + unsigned int len = skb_headlen(skb); + /* data_len is overall packet length */ + unsigned int data_len = skb->len; + u32 frm_idx, frag_num = 0; + const u8 *src = skb->data; + bool unmap = false; + __le32 *attr; + u8 *dest; + + if (unlikely(data_len == 0 || data_len > TBT_NET_MTU)) + goto invalid_packet; + + prod_cons = ioread32(reg); + prod = TBT_REG_RING_PROD_EXTRACT(prod_cons); + cons = TBT_REG_RING_CONS_EXTRACT(prod_cons); + if (prod >= TBT_NET_NUM_TX_BUFS || cons >= TBT_NET_NUM_TX_BUFS) + goto tx_error; + + if (data_len > (TBT_NUM_BUFS_BETWEEN(prod, cons, TBT_NET_NUM_TX_BUFS) * + TBT_RING_MAX_FRM_DATA_SZ)) { + unsigned long flags; + + netif_stop_queue(net_dev); + + spin_lock_irqsave(&port->nhi_ctxt->lock, flags); + /* + * Enable TX interrupt to be notified about available buffers + * and restart transmission upon this. + */ + RING_INT_ENABLE_TX(iobase, port->local_path); + spin_unlock_irqrestore(&port->nhi_ctxt->lock, flags); + + return NETDEV_TX_BUSY; + } + + first = prod; + attr = &tx_ring->desc[prod].attributes; + hdr = tx_ring->buffers[prod].hdr; + dest = (u8 *)(hdr + 1); + /* if overall packet is bigger than the frame data size */ + for (frm_idx = 0; data_len > TBT_RING_MAX_FRM_DATA_SZ; ++frm_idx) { + u32 size_left = TBT_RING_MAX_FRM_DATA_SZ; + + *attr &= cpu_to_le32(~(DESC_ATTR_LEN_MASK | + DESC_ATTR_INT_EN | + DESC_ATTR_DESC_DONE)); + hdr->frame_size = cpu_to_le32(TBT_RING_MAX_FRM_DATA_SZ); + hdr->frame_index = cpu_to_le16(frm_idx); + hdr->frame_id = cpu_to_le16(port->frame_id); + + do { + if (len > size_left) { + /* + * Copy data onto tx buffer data with full + * frame size then break + * and go to next frame + */ + memcpy(dest, src, size_left); + len -= size_left; + dest += size_left; + src += size_left; + break; + } + + memcpy(dest, src, len); + size_left -= len; + dest += len; + + if (unmap) { + kunmap_atomic((void *)src); + unmap = false; + } + /* + * Ensure all fragments have been processed + */ + if (frag_num < skb_shinfo(skb)->nr_frags) { + const skb_frag_t *frag = + &(skb_shinfo(skb)->frags[frag_num]); + len = skb_frag_size(frag); + /* map and then unmap quickly */ + src = kmap_atomic(skb_frag_page(frag)) + + frag->page_offset; + unmap = true; + ++frag_num; + } else if (unlikely(size_left > 0)) { + goto invalid_packet; + } + } while (size_left > 0); + + data_len -= TBT_RING_MAX_FRM_DATA_SZ; + prod = (prod + 1) & (TBT_NET_NUM_TX_BUFS - 1); + attr = &tx_ring->desc[prod].attributes; + hdr = tx_ring->buffers[prod].hdr; + dest = (u8 *)(hdr + 1); + } + + *attr &= cpu_to_le32(~(DESC_ATTR_LEN_MASK | DESC_ATTR_DESC_DONE)); + /* Enable the interrupts, for resuming from stop queue later (if so) */ + *attr |= cpu_to_le32(DESC_ATTR_INT_EN | + (((sizeof(struct tbt_frame_header) + data_len) << + DESC_ATTR_LEN_SHIFT) & DESC_ATTR_LEN_MASK)); + hdr->frame_size = cpu_to_le32(data_len); + hdr->frame_index = cpu_to_le16(frm_idx); + hdr->frame_id = cpu_to_le16(port->frame_id); + + /* In case the remaining data_len is smaller than a frame */ + while (len < data_len) { + memcpy(dest, src, len); + data_len -= len; + dest += len; + + if (unmap) { + kunmap_atomic((void *)src); + unmap = false; + } + + if (frag_num < skb_shinfo(skb)->nr_frags) { + const skb_frag_t *frag = + &(skb_shinfo(skb)->frags[frag_num]); + len = skb_frag_size(frag); + src = kmap_atomic(skb_frag_page(frag)) + + frag->page_offset; + unmap = true; + ++frag_num; + } else if (unlikely(data_len > 0)) { + goto invalid_packet; + } + } + memcpy(dest, src, data_len); + if (unmap) { + kunmap_atomic((void *)src); + unmap = false; + } + + ++frm_idx; + prod = (prod + 1) & (TBT_NET_NUM_TX_BUFS - 1); + + if (!tbt_net_xmit_csum(skb, tx_ring, first, prod, frm_idx)) + goto invalid_packet; + + if (port->match_frame_id) + ++port->frame_id; + + prod_cons &= ~REG_RING_PROD_MASK; + prod_cons |= (prod << REG_RING_PROD_SHIFT) & REG_RING_PROD_MASK; + wmb(); /* make sure producer update is done after buffers are ready */ + iowrite32(prod_cons, reg); + + ++port->stats.tx_packets; + port->stats.tx_bytes += skb->len; + + dev_consume_skb_any(skb); + return NETDEV_TX_OK; + +invalid_packet: + netif_err(port, tx_err, net_dev, "port %u invalid transmit packet\n", + port->num); +tx_error: + ++port->stats.tx_errors; + dev_kfree_skb_any(skb); + return NETDEV_TX_OK; +} + +static void tbt_net_set_rx_mode(struct net_device *net_dev) +{ + struct tbt_port *port = netdev_priv(net_dev); + struct netdev_hw_addr *ha; + + if (net_dev->flags & IFF_PROMISC) + port->packet_filters |= BIT(PACKET_TYPE_PROMISCUOUS); + else + port->packet_filters &= ~BIT(PACKET_TYPE_PROMISCUOUS); + if (net_dev->flags & IFF_ALLMULTI) + port->packet_filters |= BIT(PACKET_TYPE_ALL_MULTICAST); + else + port->packet_filters &= ~BIT(PACKET_TYPE_ALL_MULTICAST); + + /* if you have more than a single MAC address */ + if (netdev_uc_count(net_dev) > 1) + port->packet_filters |= BIT(PACKET_TYPE_UNICAST_PROMISCUOUS); + /* if have a single MAC address */ + else if (netdev_uc_count(net_dev) == 1) { + netdev_for_each_uc_addr(ha, net_dev) + /* checks whether the MAC is what we set */ + if (ether_addr_equal(ha->addr, net_dev->dev_addr)) + port->packet_filters &= + ~BIT(PACKET_TYPE_UNICAST_PROMISCUOUS); + else + port->packet_filters |= + BIT(PACKET_TYPE_UNICAST_PROMISCUOUS); + } else { + port->packet_filters &= ~BIT(PACKET_TYPE_UNICAST_PROMISCUOUS); + } + + /* Populate the multicast hash table with received MAC addresses */ + memset(port->multicast_hash_table, 0, + sizeof(port->multicast_hash_table)); + netdev_for_each_mc_addr(ha, net_dev) { + u16 hash_val = TBT_NET_ETHER_ADDR_HASH(ha->addr); + + port->multicast_hash_table[hash_val / BITS_PER_U32] |= + BIT(hash_val % BITS_PER_U32); + } + +} + +static struct rtnl_link_stats64 *tbt_net_get_stats64( + struct net_device *net_dev, + struct rtnl_link_stats64 *stats) +{ + struct tbt_port *port = netdev_priv(net_dev); + + memset(stats, 0, sizeof(*stats)); + stats->tx_packets = port->stats.tx_packets; + stats->tx_bytes = port->stats.tx_bytes; + stats->tx_errors = port->stats.tx_errors; + stats->rx_packets = port->stats.rx_packets; + stats->rx_bytes = port->stats.rx_bytes; + stats->rx_length_errors = port->stats.rx_length_errors; + stats->rx_over_errors = port->stats.rx_over_errors; + stats->rx_crc_errors = port->stats.rx_crc_errors; + stats->rx_missed_errors = port->stats.rx_missed_errors; + stats->rx_errors = stats->rx_length_errors + stats->rx_over_errors + + stats->rx_crc_errors + stats->rx_missed_errors; + stats->multicast = port->stats.multicast; + return stats; } +static int tbt_net_set_mac_address(struct net_device *net_dev, void *addr) +{ + struct sockaddr *saddr = addr; + + if (!is_valid_ether_addr(saddr->sa_data)) + return -EADDRNOTAVAIL; + + memcpy(net_dev->dev_addr, saddr->sa_data, net_dev->addr_len); + + return 0; +} + +static int tbt_net_change_mtu(struct net_device *net_dev, int new_mtu) +{ + struct tbt_port *port = netdev_priv(net_dev); + + /* MTU < 68 is an error and causes problems on some kernels */ + if (new_mtu < 68 || new_mtu > (TBT_NET_MTU - ETH_HLEN)) + return -EINVAL; + + netif_info(port, probe, net_dev, "Thunderbolt(TM) Networking port %u - changing MTU from %u to %d\n", + port->num, net_dev->mtu, new_mtu); + + net_dev->mtu = new_mtu; + + return 0; +} + +static const struct net_device_ops tbt_netdev_ops = { + /* called when the network is up'ed */ + .ndo_open = tbt_net_open, + /* called when the network is down'ed */ + .ndo_stop = tbt_net_close, + .ndo_start_xmit = tbt_net_xmit_frame, + .ndo_set_rx_mode = tbt_net_set_rx_mode, + .ndo_get_stats64 = tbt_net_get_stats64, + .ndo_set_mac_address = tbt_net_set_mac_address, + .ndo_change_mtu = tbt_net_change_mtu, + .ndo_validate_addr = eth_validate_addr, +}; + +static int tbt_net_get_settings(__maybe_unused struct net_device *net_dev, + struct ethtool_cmd *ecmd) +{ + ecmd->supported |= SUPPORTED_20000baseKR2_Full; + ecmd->advertising |= ADVERTISED_20000baseKR2_Full; + ecmd->autoneg = AUTONEG_DISABLE; + ecmd->transceiver = XCVR_INTERNAL; + ecmd->supported |= SUPPORTED_FIBRE; + ecmd->advertising |= ADVERTISED_FIBRE; + ecmd->port = PORT_FIBRE; + ethtool_cmd_speed_set(ecmd, SPEED_20000); + ecmd->duplex = DUPLEX_FULL; + + return 0; +} + + +static u32 tbt_net_get_msglevel(struct net_device *net_dev) +{ + struct tbt_port *port = netdev_priv(net_dev); + + return port->msg_enable; +} + +static void tbt_net_set_msglevel(struct net_device *net_dev, u32 data) +{ + struct tbt_port *port = netdev_priv(net_dev); + + port->msg_enable = data; +} + +static void tbt_net_get_strings(__maybe_unused struct net_device *net_dev, + u32 stringset, u8 *data) +{ + if (stringset == ETH_SS_STATS) + memcpy(data, tbt_net_gstrings_stats, + sizeof(tbt_net_gstrings_stats)); +} + +static void tbt_net_get_ethtool_stats(struct net_device *net_dev, + __maybe_unused struct ethtool_stats *sts, + u64 *data) +{ + struct tbt_port *port = netdev_priv(net_dev); + + memcpy(data, &port->stats, sizeof(port->stats)); +} + +static int tbt_net_get_sset_count(__maybe_unused struct net_device *net_dev, + int sset) +{ + if (sset == ETH_SS_STATS) + return sizeof(tbt_net_gstrings_stats) / ETH_GSTRING_LEN; + return -EOPNOTSUPP; +} + +static void tbt_net_get_drvinfo(struct net_device *net_dev, + struct ethtool_drvinfo *drvinfo) +{ + struct tbt_port *port = netdev_priv(net_dev); + + strlcpy(drvinfo->driver, "Thunderbolt(TM) Networking", + sizeof(drvinfo->driver)); + strlcpy(drvinfo->version, DRV_VERSION, sizeof(drvinfo->version)); + + strlcpy(drvinfo->bus_info, pci_name(port->nhi_ctxt->pdev), + sizeof(drvinfo->bus_info)); + drvinfo->n_stats = tbt_net_get_sset_count(net_dev, ETH_SS_STATS); +} + +static const struct ethtool_ops tbt_net_ethtool_ops = { + .get_settings = tbt_net_get_settings, + .get_drvinfo = tbt_net_get_drvinfo, + .get_link = ethtool_op_get_link, + .get_msglevel = tbt_net_get_msglevel, + .set_msglevel = tbt_net_set_msglevel, + .get_strings = tbt_net_get_strings, + .get_ethtool_stats = tbt_net_get_ethtool_stats, + .get_sset_count = tbt_net_get_sset_count, +}; + static inline int send_message(struct tbt_port *port, const char *func, enum pdf_value pdf, u32 msg_len, const u8 *msg) { @@ -515,6 +1943,10 @@ void negotiation_events(struct net_device *net_dev, /* configure TX ring */ reg = iobase + REG_TX_RING_BASE + (port->local_path * REG_RING_STEP); + iowrite32(lower_32_bits(port->tx_ring.dma), + reg + REG_RING_PHYS_LO_OFFSET); + iowrite32(upper_32_bits(port->tx_ring.dma), + reg + REG_RING_PHYS_HI_OFFSET); tx_ring_conf = (TBT_NET_NUM_TX_BUFS << REG_RING_SIZE_SHIFT) & REG_RING_SIZE_MASK; @@ -557,6 +1989,10 @@ void negotiation_events(struct net_device *net_dev, */ reg = iobase + REG_RX_RING_BASE + (port->local_path * REG_RING_STEP); + iowrite32(lower_32_bits(port->rx_ring.dma), + reg + REG_RING_PHYS_LO_OFFSET); + iowrite32(upper_32_bits(port->rx_ring.dma), + reg + REG_RING_PHYS_HI_OFFSET); rx_ring_conf = (TBT_NET_NUM_RX_BUFS << REG_RING_SIZE_SHIFT) & REG_RING_SIZE_MASK; @@ -566,6 +2002,17 @@ void negotiation_events(struct net_device *net_dev, REG_RING_BUF_SIZE_MASK; iowrite32(rx_ring_conf, reg + REG_RING_SIZE_OFFSET); + /* allocate RX buffers and configure the descriptors */ + if (!tbt_net_alloc_rx_buffers(&port->nhi_ctxt->pdev->dev, + &port->rx_ring, + TBT_NET_NUM_RX_BUFS, + reg + REG_RING_CONS_PROD_OFFSET, + GFP_KERNEL)) { + netif_err(port, link, net_dev, "Thunderbolt(TM) Networking port %u - no memory for receive buffers\n", + port->num); + tbt_net_tear_down(net_dev, true); + break; + } spin_lock_irqsave(&port->nhi_ctxt->lock, flags); /* enable RX interrupt */ @@ -578,6 +2025,7 @@ void negotiation_events(struct net_device *net_dev, netif_info(port, link, net_dev, "Thunderbolt(TM) Networking port %u - ready\n", port->num); + napi_enable(&port->napi); netif_carrier_on(net_dev); netif_start_queue(net_dev); break; @@ -788,15 +2236,42 @@ struct net_device *nhi_alloc_etherdev(struct tbt_nhi_ctxt *nhi_ctxt, scnprintf(net_dev->name, sizeof(net_dev->name), "tbtnet%%dp%hhu", port_num); + net_dev->netdev_ops = &tbt_netdev_ops; + + netif_napi_add(net_dev, &port->napi, tbt_net_poll, NAPI_POLL_WEIGHT); + + net_dev->hw_features = NETIF_F_SG | + NETIF_F_ALL_TSO | + NETIF_F_UFO | + NETIF_F_GRO | + NETIF_F_IP_CSUM | + NETIF_F_IPV6_CSUM; + net_dev->features = net_dev->hw_features; + if (nhi_ctxt->pci_using_dac) + net_dev->features |= NETIF_F_HIGHDMA; + INIT_DELAYED_WORK(&port->login_retry_work, login_retry); INIT_WORK(&port->login_response_work, login_response); INIT_WORK(&port->logout_work, logout); INIT_WORK(&port->status_reply_work, status_reply); INIT_WORK(&port->approve_inter_domain_work, approve_inter_domain); + net_dev->ethtool_ops = &tbt_net_ethtool_ops; + + tbt_net_change_mtu(net_dev, TBT_NET_MTU - ETH_HLEN); + + if (register_netdev(net_dev)) + goto err_register; + + netif_carrier_off(net_dev); + netif_info(port, probe, net_dev, "Thunderbolt(TM) Networking port %u - MAC Address: %pM\n", port_num, net_dev->dev_addr); return net_dev; + +err_register: + free_netdev(net_dev); + return NULL; }