@@ -335,6 +335,11 @@ struct ubuf_info {
#define skb_uarg(SKB) ((struct ubuf_info *)(skb_shinfo(SKB)->destructor_arg))
+#define sock_can_zerocopy(sk, rt, csummode) \
+ ((rt->dst.dev->features & NETIF_F_SG) && \
+ ((sk->sk_type == SOCK_RAW) || \
+ (sk->sk_type == SOCK_DGRAM && csummode & CHECKSUM_UNNECESSARY)))
+
struct ubuf_info *sock_zerocopy_alloc(struct sock *sk, size_t size);
struct ubuf_info *sock_zerocopy_realloc(struct sock *sk, size_t size,
struct ubuf_info *uarg);
@@ -871,7 +871,7 @@ static int __ip_append_data(struct sock *sk,
{
struct inet_sock *inet = inet_sk(sk);
struct sk_buff *skb;
-
+ struct ubuf_info *uarg = NULL;
struct ip_options *opt = cork->opt;
int hh_len;
int exthdrlen;
@@ -914,9 +914,16 @@ static int __ip_append_data(struct sock *sk,
!exthdrlen)
csummode = CHECKSUM_PARTIAL;
+ if (flags & MSG_ZEROCOPY && length &&
+ sock_can_zerocopy(sk, rt, skb ? skb->ip_summed : csummode)) {
+ uarg = sock_zerocopy_realloc(sk, length, skb_zcopy(skb));
+ if (!uarg)
+ return -ENOBUFS;
+ }
+
cork->length += length;
if (((length > mtu) || (skb && skb_is_gso(skb))) &&
- (sk->sk_protocol == IPPROTO_UDP) &&
+ (sk->sk_protocol == IPPROTO_UDP) && !uarg &&
(rt->dst.dev->features & NETIF_F_UFO) && !rt->dst.header_len &&
(sk->sk_type == SOCK_DGRAM)) {
err = ip_ufo_append_data(sk, queue, getfrag, from, length,
@@ -968,6 +975,8 @@ alloc_new_skb:
if ((flags & MSG_MORE) &&
!(rt->dst.dev->features&NETIF_F_SG))
alloclen = mtu;
+ else if (uarg)
+ alloclen = min_t(int, fraglen, MAX_HEADER);
else
alloclen = fraglen;
@@ -1010,11 +1019,12 @@ alloc_new_skb:
cork->tx_flags = 0;
skb_shinfo(skb)->tskey = tskey;
tskey = 0;
+ skb_zcopy_set(skb, uarg);
/*
* Find where to start putting bytes.
*/
- data = skb_put(skb, fraglen + exthdrlen);
+ data = skb_put(skb, alloclen);
skb_set_network_header(skb, exthdrlen);
skb->transport_header = (skb->network_header +
fragheaderlen);
@@ -1030,7 +1040,9 @@ alloc_new_skb:
pskb_trim_unique(skb_prev, maxfraglen);
}
- copy = datalen - transhdrlen - fraggap;
+ copy = min(datalen,
+ alloclen - exthdrlen - fragheaderlen);
+ copy -= transhdrlen - fraggap;
if (copy > 0 && getfrag(from, data + transhdrlen, offset, copy, fraggap, skb) < 0) {
err = -EFAULT;
kfree_skb(skb);
@@ -1038,7 +1050,7 @@ alloc_new_skb:
}
offset += copy;
- length -= datalen - fraggap;
+ length -= copy + transhdrlen;
transhdrlen = 0;
exthdrlen = 0;
csummode = CHECKSUM_NONE;
@@ -1063,6 +1075,17 @@ alloc_new_skb:
err = -EFAULT;
goto error;
}
+ } else if (uarg) {
+ struct iov_iter *iter;
+
+ if (sk->sk_type == SOCK_RAW)
+ iter = &((struct msghdr **)from)[0]->msg_iter;
+ else
+ iter = &((struct msghdr *)from)->msg_iter;
+ err = skb_zerocopy_add_frags_iter(sk, skb, iter, copy, uarg);
+ if (err < 0)
+ goto error;
+ copy = err;
} else {
int i = skb_shinfo(skb)->nr_frags;
@@ -1103,6 +1126,7 @@ alloc_new_skb:
error_efault:
err = -EFAULT;
error:
+ sock_zerocopy_put_abort(uarg);
cork->length -= length;
IP_INC_STATS(sock_net(sk), IPSTATS_MIB_OUTDISCARDS);
return err;