@@ -69,6 +69,8 @@ lib_libopenvswitch_la_SOURCES = \
lib/dpctl.h \
lib/dp-packet.h \
lib/dp-packet.c \
+ lib/dp-packet-lso.c \
+ lib/dp-packet-lso.h \
lib/dpif-netdev.c \
lib/dpif-netdev.h \
lib/dpif-provider.h \
new file mode 100644
@@ -0,0 +1,402 @@
+/*
+ * Copyright (c) 2016 Nicira, Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <config.h>
+
+#include <errno.h>
+#include <fcntl.h>
+#include <sys/socket.h>
+#include <net/if.h>
+#include <netinet/ip6.h>
+#include <sys/ioctl.h>
+
+#include <errno.h>
+#include <stdlib.h>
+#include <sys/time.h>
+
+#include "openvswitch/list.h"
+
+#include "byte-order.h"
+#include "csum.h"
+#include "daemon.h"
+#include "dirs.h"
+#include "dpif.h"
+#include "dp-packet.h"
+#include "entropy.h"
+#include "flow.h"
+#include "hash.h"
+#include "hmap.h"
+#include "id-pool.h"
+#include "netdev-provider.h"
+#include "netdev-vport.h"
+#include "netdev-vport-private.h"
+#include "odp-netlink.h"
+#include "dp-packet.h"
+#include "dp-packet-lso.h"
+#include "ovs-router.h"
+#include "packets.h"
+#include "poll-loop.h"
+#include "random.h"
+#include "route-table.h"
+#include "shash.h"
+#include "socket-util.h"
+#include "timeval.h"
+#include "netdev-native-tnl.h"
+#include "openvswitch/vlog.h"
+#include "unaligned.h"
+#include "unixctl.h"
+#include "util.h"
+
+VLOG_DEFINE_THIS_MODULE(dp_packet_lso);
+static struct vlog_rate_limit err_rl = VLOG_RATE_LIMIT_INIT(60, 5);
+
+#define UDP_CSUM_OFFSET offsetof(struct udp_header, udp_csum)
+#define TCP_CSUM_OFFSET offsetof(struct tcp_header, tcp_csum)
+
+static struct dp_packet *
+segment_packet__(struct dp_packet *orig, int header_len)
+{
+ struct dp_packet *seg_list = NULL, *prev = NULL;
+ uint16_t mss = orig->lso.mss;
+ int offset = header_len;
+ int size = dp_packet_size(orig);
+ struct dp_packet *seg;
+
+ if (!mss) {
+ seg_list = dp_packet_clone(orig);
+ memset(&seg_list->lso, 0, sizeof seg_list->lso);
+ PACKET_LSO_CTX(seg_list)->next = NULL;
+ return seg_list;
+ }
+ while (offset < size) {
+ int current_seg_size;
+ unsigned char *data;
+
+ current_seg_size = size < (offset + mss) ? (size - offset) : mss;
+ seg = dp_packet_new(0);
+ dp_packet_put(seg, dp_packet_data(orig), header_len);
+
+ data = (unsigned char *)dp_packet_data(orig) + offset;
+ dp_packet_put(seg, data, current_seg_size);
+ offset += mss;
+ seg->l2_pad_size = orig->l2_pad_size;
+ seg->l2_5_ofs = orig->l2_5_ofs;
+ seg->l3_ofs = orig->l3_ofs;
+ seg->l4_ofs = orig->l4_ofs;
+ PACKET_LSO_CTX(seg)->next = NULL;
+
+ if (prev) {
+ PACKET_LSO_CTX(prev)->next = seg;
+ }
+ prev = seg;
+ if (!seg_list) {
+ seg_list = seg;
+ }
+ }
+ return seg_list;
+}
+
+void
+fixup_packet_cheksum(struct dp_packet *pkt, int l4_offset, int csum_offset,
+ int new_ip_tot_len, int old_ip_tot_len)
+{
+ ovs_be16 *data_ptr, *csum;
+ uint32_t l4_csum;
+
+ data_ptr = (ovs_be16 *) ((uint16_t *) dp_packet_data(pkt) + (l4_offset >> 1));
+ csum = data_ptr + (csum_offset >> 1);
+
+ l4_csum = csum_continue(0, data_ptr, dp_packet_size(pkt) - l4_offset);
+ *csum = csum_finish(l4_csum);
+ if (new_ip_tot_len != old_ip_tot_len) {
+ *csum = recalc_csum16(*csum, htons(old_ip_tot_len), htons(new_ip_tot_len));
+ }
+}
+
+static void
+fixup_segment_cheksum(struct dp_packet *pkt, struct dp_packet *orig, int csum_offset)
+{
+ int l4_offset = orig->l4_ofs;
+ int old_ip_tot_len = dp_packet_size(orig) - l4_offset;
+ int new_ip_tot_len = dp_packet_size(pkt) - l4_offset;
+
+ fixup_packet_cheksum(pkt, l4_offset, csum_offset,
+ new_ip_tot_len, old_ip_tot_len);
+}
+
+static struct dp_packet *
+segment_udp_packet(struct dp_packet *orig)
+{
+ struct dp_packet *seg_list, *seg;
+
+ seg_list = segment_packet__(orig, orig->l4_ofs + sizeof(struct udp_header));
+ FOR_EACH_LSO_SEG(seg_list, seg) {
+ struct udp_header *udp;
+
+ udp = dp_packet_l4(seg);
+ udp->udp_len = htons(dp_packet_size(seg) - seg->l4_ofs);
+ fixup_segment_cheksum(seg, orig, UDP_CSUM_OFFSET);
+ }
+ return seg_list;
+}
+
+static struct dp_packet *
+segment_tcp_packet(struct dp_packet *orig)
+{
+ struct dp_packet *seg_list, *seg;
+ struct tcp_header *orig_tcph = dp_packet_l4(orig);
+ int tcph_len = TCP_OFFSET(orig_tcph->tcp_ctl) * 4;
+ uint32_t tcp_seq = ntohl(get_16aligned_be32(&orig_tcph->tcp_seq));
+ int mss = orig->lso.mss;
+
+ seg_list = segment_packet__(orig, orig->l4_ofs + tcph_len);
+ FOR_EACH_LSO_SEG(seg_list, seg) {
+
+ if (mss) {
+ struct tcp_header *tcph = dp_packet_l4(seg);
+
+ put_16aligned_be32(&tcph->tcp_seq, htonl(tcp_seq));
+ tcp_seq += mss;
+ tcph->tcp_ctl = htons(ntohs(tcph->tcp_ctl) &
+ ~(TCP_FIN | TCP_PSH));
+ }
+
+ fixup_segment_cheksum(seg, orig, TCP_CSUM_OFFSET);
+ }
+ return seg_list;
+}
+
+static struct dp_packet *
+segment_l4_packet(struct dp_packet *orig)
+{
+ if (orig->lso.type & (DPBUF_LSO_TCPv4 | DPBUF_LSO_TCPv6)) {
+ return segment_tcp_packet(orig);
+ } else if (orig->lso.type & (DPBUF_LSO_UDPv4 | DPBUF_LSO_UDPv6)) {
+ return segment_udp_packet(orig);
+ }
+ OVS_NOT_REACHED();
+ return NULL;
+}
+
+static struct dp_packet *
+segment_ipv4_packet(struct dp_packet *orig)
+{
+ struct dp_packet *seg_list, *seg;
+ struct ip_header *orig_iph = dp_packet_l3(orig);
+ int ip_id = 0;
+ int ip_offset = 0;
+ bool inc_ip_id = false;
+
+ if (orig->lso.type & DPBUF_LSO_TCPv4) {
+ inc_ip_id = true;
+ ip_id = ntohs(orig_iph->ip_id);
+ }
+
+ orig->l4_ofs = orig->l3_ofs + IP_HEADER_LEN;
+ seg_list = segment_l4_packet(orig);
+
+ FOR_EACH_LSO_SEG(seg_list, seg) {
+ struct ip_header *iph = dp_packet_l3(seg);
+ int new_len = dp_packet_size(seg) - orig->l3_ofs;
+
+ if (inc_ip_id) {
+ iph->ip_id = htons(ip_id++);
+ } else {
+ iph->ip_frag_off = htons(ip_offset);
+ ip_offset += (new_len - IP_HEADER_LEN);
+ if (PACKET_LSO_CTX(seg)->next) {
+ iph->ip_frag_off |= IP_MORE_FRAGMENTS;
+ }
+ }
+ iph->ip_tot_len = htons(new_len);
+ iph->ip_csum = 0;
+ iph->ip_csum = csum(iph, sizeof *iph);
+ }
+ return seg_list;
+}
+
+static void
+update_ipv6_frag_hdr(struct dp_packet *pkt, int *ipv6_frag_offset)
+{
+ int size = dp_packet_size(pkt);
+ struct ip6_hdr *nh = dp_packet_l3(pkt);
+ uint8_t nw_proto = nh->ip6_nxt;
+ void *data = nh + 1;
+ int offset = (unsigned char *)data - (unsigned char *)dp_packet_l2(pkt);
+
+ while (1) {
+ if (OVS_LIKELY((nw_proto != IPPROTO_HOPOPTS)
+ && (nw_proto != IPPROTO_ROUTING)
+ && (nw_proto != IPPROTO_DSTOPTS)
+ && (nw_proto != IPPROTO_AH)
+ && (nw_proto != IPPROTO_FRAGMENT))) {
+ break;
+ }
+
+ if ((nw_proto == IPPROTO_HOPOPTS)
+ || (nw_proto == IPPROTO_ROUTING)
+ || (nw_proto == IPPROTO_DSTOPTS)) {
+ const struct ip6_ext *ext_hdr = data;
+
+ nw_proto = ext_hdr->ip6e_nxt;
+
+ offset += (ext_hdr->ip6e_len + 1) * 8;
+ if (offset > size) {
+ goto out;
+ }
+ } else if (nw_proto == IPPROTO_AH) {
+ const struct ip6_ext *ext_hdr = data;
+ nw_proto = ext_hdr->ip6e_nxt;
+
+ offset += (ext_hdr->ip6e_len + 2) * 4;
+ if (offset > size) {
+ goto out;
+ }
+ } else if (nw_proto == IPPROTO_FRAGMENT) {
+ struct ovs_16aligned_ip6_frag *frag_hdr;
+
+ offset += sizeof (struct ovs_16aligned_ip6_frag);
+ if (offset > size) {
+ goto out;
+ }
+ frag_hdr = data;
+ frag_hdr->ip6f_offlg = htons(*ipv6_frag_offset);
+ if (PACKET_LSO_CTX(pkt)->next) {
+ frag_hdr->ip6f_offlg |= htons(IPV6_MF);
+ }
+ return;
+ }
+ data = (unsigned char *)data + offset;
+ }
+out:
+ VLOG_ERR_RL(&err_rl, "could not find frag header");
+}
+
+static int
+ipv6_set_l4_offset(struct dp_packet *pkt)
+{
+ int size = dp_packet_size(pkt);
+ struct ip6_hdr *nh = dp_packet_l3(pkt);
+ uint8_t nw_proto = nh->ip6_nxt;
+ void *data = nh + 1;
+ int offset = pkt->l3_ofs + sizeof *nh;
+
+ while (1) {
+ if (OVS_LIKELY((nw_proto != IPPROTO_HOPOPTS)
+ && (nw_proto != IPPROTO_ROUTING)
+ && (nw_proto != IPPROTO_DSTOPTS)
+ && (nw_proto != IPPROTO_AH)
+ && (nw_proto != IPPROTO_FRAGMENT))) {
+ break;
+ }
+
+ if ((nw_proto == IPPROTO_HOPOPTS)
+ || (nw_proto == IPPROTO_ROUTING)
+ || (nw_proto == IPPROTO_DSTOPTS)) {
+ const struct ip6_ext *ext_hdr = data;
+
+ nw_proto = ext_hdr->ip6e_nxt;
+
+ offset += (ext_hdr->ip6e_len + 1) * 8;
+ if (offset > size) {
+ goto out;
+ }
+ } else if (nw_proto == IPPROTO_AH) {
+ const struct ip6_ext *ext_hdr = data;
+ nw_proto = ext_hdr->ip6e_nxt;
+
+ offset += (ext_hdr->ip6e_len + 2) * 4;
+ if (offset > size) {
+ goto out;
+ }
+ } else if (nw_proto == IPPROTO_FRAGMENT) {
+ struct ovs_16aligned_ip6_frag *frag_hdr;
+
+ offset += sizeof (struct ovs_16aligned_ip6_frag);
+ if (offset > size) {
+ goto out;
+ }
+ frag_hdr = data;
+ nw_proto = frag_hdr->ip6f_nxt;
+ }
+ data = (unsigned char *)data + offset;
+ }
+ pkt->l4_ofs = offset;
+ return 0;
+out:
+ VLOG_ERR_RL(&err_rl, "Could not parse ipv6 ext header");
+ return -EINVAL;
+}
+
+static struct dp_packet *
+segment_ipv6_packet(struct dp_packet *orig)
+{
+ struct dp_packet *seg_list, *seg;
+ int ip_offset = 0;
+
+ ipv6_set_l4_offset(orig);
+ seg_list = segment_l4_packet(orig);
+
+ FOR_EACH_LSO_SEG(seg_list, seg) {
+ struct ip6_hdr *nh = dp_packet_l3(seg);
+ int new_len = dp_packet_size(seg) - orig->l4_ofs;
+
+ nh->ip6_plen = htons(new_len);
+
+ if (orig->lso.type & DPBUF_LSO_UDPv6) {
+ update_ipv6_frag_hdr(seg, &ip_offset);
+
+ ip_offset += (new_len - sizeof (struct ovs_16aligned_ip6_frag));
+ }
+ }
+ return seg_list;
+}
+
+static struct dp_packet *
+segment_eth_packet(struct dp_packet *orig, int offset)
+{
+ int header_len = ETH_HEADER_LEN;
+ struct dp_packet *seg_list;
+ struct eth_header *eth;
+ ovs_be16 eth_type;
+
+ dp_packet_reset_packet(orig, offset);
+ eth = dp_packet_data(orig);
+ eth_type = eth->eth_type;
+ if (eth_type_vlan(eth->eth_type)) {
+ struct vlan_eth_header *vethh = (struct vlan_eth_header *) dp_packet_data(orig);
+
+ eth_type = vethh->veth_next_type;
+ header_len += VLAN_HEADER_LEN;
+ }
+ orig->l3_ofs = header_len;
+
+ if (eth_type == htons(ETH_TYPE_IP)) {
+ seg_list = segment_ipv4_packet(orig);
+ } else if (eth_type == htons(ETH_TYPE_IPV6)) {
+ seg_list = segment_ipv6_packet(orig);
+ } else {
+ return NULL;
+ }
+ return seg_list;
+}
+
+struct dp_packet *
+segment_packet(struct dp_packet *orig)
+{
+ PACKET_LSO_CTX(orig)->next = NULL;
+ return segment_eth_packet(orig, 0);
+}
new file mode 100644
@@ -0,0 +1,57 @@
+/*
+ * Copyright (c) 2016 Nicira, Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef DPBUF_LSO_H
+#define DPBUF_LSO_H 1
+
+#include <stddef.h>
+#include <stdint.h>
+
+#include "openvswitch/list.h"
+
+#include "packets.h"
+#include "util.h"
+#include "netdev-dpdk.h"
+
+
+#define DPBUF_LSO_TCPv4 (1 << 0)
+#define DPBUF_LSO_TCPv6 (1 << 1)
+#define DPBUF_LSO_UDPv4 (1 << 2)
+#define DPBUF_LSO_UDPv6 (1 << 3)
+
+struct dp_packet_lso_ctx {
+ struct dp_packet *next; /* Used to list lso segments. */
+};
+
+BUILD_ASSERT_DECL(DP_PACKET_CONTEXT_SIZE >= sizeof(struct dp_packet_lso_ctx));
+
+#define PACKET_LSO_CTX(packet) ((struct dp_packet_lso_ctx *)(packet)->data)
+
+struct dp_packet *segment_packet(struct dp_packet *orig);
+
+#define FOR_EACH_LSO_SEG(lso_list, seg) \
+ for (seg = lso_list; seg; seg = PACKET_LSO_CTX(seg)->next)
+
+#define FOR_EACH_LSO_SEG_SAFE(lso_list, seg, next) \
+ for (seg = lso_list, next = PACKET_LSO_CTX(lso_list)->next; \
+ (seg ? next = PACKET_LSO_CTX(seg)->next, 1: 0); \
+ seg = next)
+
+void
+fixup_packet_cheksum(struct dp_packet *pkt, int l4_offset, int csum_offset,
+ int new_ip_tot_len, int old_ip_tot_len);
+
+#endif /* dp-packet-lso.h */
@@ -30,6 +30,7 @@ dp_packet_init__(struct dp_packet *b, size_t allocated, enum dp_packet_source so
dp_packet_reset_offsets(b);
pkt_metadata_init(&b->md, 0);
dp_packet_rss_invalidate(b);
+ memset(&b->lso, 0, sizeof b->lso);
}
static void
@@ -168,6 +169,7 @@ dp_packet_clone_with_headroom(const struct dp_packet *buffer, size_t headroom)
new_buffer->l3_ofs = buffer->l3_ofs;
new_buffer->l4_ofs = buffer->l4_ofs;
new_buffer->md = buffer->md;
+ new_buffer->lso = buffer->lso;
#ifdef DPDK_NETDEV
new_buffer->mbuf.ol_flags = buffer->mbuf.ol_flags;
#else
@@ -64,6 +64,10 @@ struct dp_packet {
struct pkt_metadata md;
uint64_t data[DP_PACKET_CONTEXT_SIZE / 8];
};
+ struct {
+ uint16_t mss;
+ uint8_t type;
+ } lso;
};
static inline void *dp_packet_data(const struct dp_packet *);
@@ -567,12 +571,14 @@ enum { NETDEV_MAX_BURST = 32 }; /* Maximum number packets in a batch. */
struct dp_packet_batch {
int count;
+ uint8_t lso_type;
struct dp_packet *packets[NETDEV_MAX_BURST];
};
static inline void dp_packet_batch_init(struct dp_packet_batch *b)
{
b->count = 0;
+ b->lso_type = 0;
}
static inline void
@@ -585,15 +591,30 @@ dp_packet_batch_clone(struct dp_packet_batch *dst,
dst->packets[i] = dp_packet_clone(src->packets[i]);
}
dst->count = src->count;
+ dst->lso_type = src->lso_type;
}
static inline void
packet_batch_init_packet(struct dp_packet_batch *b, struct dp_packet *p)
{
b->count = 1;
+ b->lso_type = p->lso.type;
b->packets[0] = p;
}
+static inline bool
+dp_packet_batch_is_full(struct dp_packet_batch *b)
+{
+ return b->count == NETDEV_MAX_BURST;
+}
+
+static inline void
+dp_packet_batch_add_pkt(struct dp_packet_batch *b, struct dp_packet *p)
+{
+ b->packets[b->count++] = p;
+ b->lso_type |= p->lso.type;
+}
+
#ifdef __cplusplus
}
#endif
@@ -36,6 +36,7 @@
#include "coverage.h"
#include "csum.h"
#include "dp-packet.h"
+#include "dp-packet-lso.h"
#include "dpif.h"
#include "dpif-provider.h"
#include "dummy.h"
@@ -3347,7 +3348,7 @@ packet_batch_per_flow_update(struct packet_batch_per_flow *batch,
{
batch->byte_count += dp_packet_size(packet);
batch->tcp_flags |= miniflow_get_tcp_flags(mf);
- batch->array.packets[batch->array.count++] = packet;
+ dp_packet_batch_add_pkt(&batch->array, packet);
}
static inline void
@@ -3578,8 +3579,19 @@ fast_path_processing(struct dp_netdev_pmd_thread *pmd,
}
miss_cnt++;
- handle_packet(pmd, packets[i], &keys[i], &actions, &put_actions,
- &lost_cnt);
+ if (!packets[i]->lso.type) {
+ handle_packet(pmd, packets[i], &keys[i], &actions, &put_actions,
+ &lost_cnt);
+ } else {
+ struct dp_packet *seg_list, *seg, *next;
+
+ seg_list = segment_packet(packets[i]);
+ FOR_EACH_LSO_SEG_SAFE(seg_list, seg, next) {
+ handle_packet(pmd, seg, &keys[i], &actions, &put_actions,
+ &lost_cnt);
+ }
+ dp_packet_delete(packets[i]);
+ }
}
ofpbuf_uninit(&actions);
@@ -3845,8 +3857,22 @@ dp_execute_cb(void *aux_, struct dp_packet_batch *packets_,
for (i = 0; i < packets_->count; i++) {
flow_extract(packets[i], &flow);
dpif_flow_hash(dp->dpif, &flow, sizeof flow, &ufid);
- dp_execute_userspace_action(pmd, packets[i], may_steal, &flow,
- &ufid, &actions, userdata);
+
+ if (!packets[i]->lso.type) {
+ dp_execute_userspace_action(pmd, packets[i], may_steal, &flow,
+ &ufid, &actions, userdata);
+ } else {
+ struct dp_packet *seg_list, *seg, *next;
+
+ seg_list = segment_packet(packets[i]);
+ FOR_EACH_LSO_SEG_SAFE(seg_list, seg, next) {
+ dp_execute_userspace_action(pmd, seg, true, &flow,
+ &ufid, &actions, userdata);
+ }
+ if (may_steal) {
+ dp_packet_delete(packets[i]);
+ }
+ }
}
ofpbuf_uninit(&actions);
fat_rwlock_unlock(&dp->upcall_rwlock);
@@ -62,6 +62,7 @@ struct netdev {
int ref_cnt; /* Times this devices was opened. */
struct shash_node *node; /* Pointer to element in global map. */
struct ovs_list saved_flags_list; /* Contains "struct netdev_saved_flags". */
+ uint8_t supported_lso_types;
};
static inline void
@@ -34,6 +34,7 @@
#include "coverage.h"
#include "dpif.h"
#include "dp-packet.h"
+#include "dp-packet-lso.h"
#include "openvswitch/dynamic-string.h"
#include "fatal-signal.h"
#include "hash.h"
@@ -724,6 +725,88 @@ netdev_set_multiq(struct netdev *netdev, unsigned int n_txq,
return error;
}
+static int
+send_packet__(struct netdev *netdev, int qid, struct dp_packet_batch *b,
+ struct dp_packet *pkt, bool may_steal)
+{
+ dp_packet_batch_add_pkt(b, pkt);
+ if (dp_packet_batch_is_full(b)) {
+ int error = netdev->netdev_class->send(netdev, qid,
+ b->packets, b->count,
+ may_steal);
+ if (!error) {
+ dp_packet_batch_init(b);
+ }
+ return error;
+ }
+ return 0;
+}
+
+static int
+netdev_send_lso(struct netdev *netdev, int qid, struct dp_packet_batch *s,
+ bool may_steal)
+{
+ struct dp_packet_batch b;
+ int i, error;
+
+ dp_packet_batch_init(&b);
+ for (i = 0; i < s->count; i++) {
+ struct dp_packet *seg, *seg_list, *next, *pkt = s->packets[i];
+
+ if (!pkt->lso.type) {
+ if (!may_steal) {
+ pkt = dp_packet_clone(pkt);
+ }
+ error = send_packet__(netdev, qid, &b, pkt, true);
+ if (OVS_UNLIKELY(error)) {
+ seg = NULL;
+ goto err;
+ }
+ } else {
+ seg_list = segment_packet(pkt);
+ if (may_steal) {
+ dp_packet_delete(pkt);
+ }
+ error = 0;
+ FOR_EACH_LSO_SEG_SAFE(seg_list, seg, next) {
+ if (OVS_UNLIKELY(error)) {
+ dp_packet_delete(seg);
+ continue;
+ }
+ error = send_packet__(netdev, qid, &b, seg, true);
+ if (OVS_UNLIKELY(error)) {
+ dp_packet_delete(seg);
+ }
+ }
+ if (OVS_UNLIKELY(error)) {
+ goto err;
+ }
+ }
+ }
+ if (b.count) {
+ error = netdev->netdev_class->send(netdev, qid,
+ b.packets, b.count, true);
+ if (!error) {
+ dp_packet_batch_init(&b);
+ } else {
+ goto err;
+ }
+ }
+
+ return 0;
+err:
+ if (may_steal) {
+ for (i = i + 1; i < s->count; i++) {
+ dp_packet_delete(s->packets[i]);
+ }
+ }
+
+ for (i = 0; i < b.count; i++) {
+ dp_packet_delete(b.packets[i]);
+ }
+ return error;
+}
+
/* Sends 'buffers' on 'netdev'. Returns 0 if successful (for every packet),
* otherwise a positive errno value. Returns EAGAIN without blocking if
* at least one the packets cannot be queued immediately. Returns EMSGSIZE
@@ -749,20 +832,30 @@ int
netdev_send(struct netdev *netdev, int qid, struct dp_packet_batch *batch,
bool may_steal)
{
- if (!netdev->netdev_class->send) {
- if (may_steal) {
- for (int i = 0; i < batch->count; i++) {
- dp_packet_delete(batch->packets[i]);
- }
- }
- return EOPNOTSUPP;
- }
+ int error;
- int error = netdev->netdev_class->send(netdev, qid,
+ if (!netdev->netdev_class->send) {
+ error = EOPNOTSUPP;
+ } else if (batch->lso_type & ~netdev->supported_lso_types) {
+ return netdev_send_lso(netdev, qid, batch, may_steal);
+ } else {
+ error = netdev->netdev_class->send(netdev, qid,
batch->packets, batch->count,
may_steal);
+ }
+
if (!error) {
COVERAGE_INC(netdev_sent);
+ } else {
+ goto err;
+ }
+ return 0;
+
+err:
+ if (may_steal) {
+ for (int i = 0; i < batch->count; i++) {
+ dp_packet_delete(batch->packets[i]);
+ }
}
return error;
}
@@ -778,9 +871,12 @@ netdev_pop_header(struct netdev *netdev, struct dp_packet_batch *batch)
}
for (i = 0; i < batch->count; i++) {
- buffers[i] = netdev->netdev_class->pop_header(buffers[i]);
- if (buffers[i]) {
- buffers[n_cnt++] = buffers[i];
+ struct dp_packet *inner_pkt;
+
+ inner_pkt = netdev->netdev_class->pop_header(buffers[i]);
+ if (inner_pkt) {
+ batch->lso_type |= inner_pkt->lso.type;
+ buffers[n_cnt++] = inner_pkt;
}
}
batch->count = n_cnt;
@@ -808,6 +904,10 @@ netdev_push_header(const struct netdev *netdev,
return -EINVAL;
}
+ if (batch->lso_type & ~netdev->supported_lso_types) {
+ return -EINVAL;
+ }
+
for (i = 0; i < batch->count; i++) {
netdev->netdev_class->push_header(batch->packets[i], data);
pkt_metadata_init(&batch->packets[i]->md, u32_to_odp(data->out_port));
@@ -785,6 +785,8 @@ struct ovs_16aligned_ip6_hdr {
union ovs_16aligned_in6_addr ip6_dst;
};
+#define IPV6_MF 0x0001
+
/* Like struct in6_frag, but whereas that struct requires 32-bit alignment,
* this one only requires 16-bit alignment. */
struct ovs_16aligned_ip6_frag {
STT can generate large packets. Following patch adds support to handle packet which are larger than device MTU size. To support such packets following members are added dp-packet structure. type: type of packet, e.g tcp, tcpv6, geneve, etc. mss: maximum segment size. netdev can set the flags in netdev->supported_lso_types if it supports segmentation. Software fallback is also defined in case netdev implementation does not support large packet segmentation. Signed-off-by: Pravin B Shelar <pshelar@ovn.org> --- lib/automake.mk | 2 + lib/dp-packet-lso.c | 402 ++++++++++++++++++++++++++++++++++++++++++++++++++ lib/dp-packet-lso.h | 57 +++++++ lib/dp-packet.c | 2 + lib/dp-packet.h | 21 +++ lib/dpif-netdev.c | 36 ++++- lib/netdev-provider.h | 1 + lib/netdev.c | 124 ++++++++++++++-- lib/packets.h | 2 + 9 files changed, 630 insertions(+), 17 deletions(-) create mode 100644 lib/dp-packet-lso.c create mode 100644 lib/dp-packet-lso.h