@@ -1145,6 +1145,10 @@ extern int __zerocopy_sg_from_iter(struct sock *sk, struct sk_buff *skb,
int skb_zerocopy_iter(struct sock *sk, struct sk_buff *skb, struct msghdr *msg,
int len)
{
+ /* raw has extra indirection in raw_frag_vec */
+ if (sk->sk_type == SOCK_RAW && sk->sk_family != PF_PACKET)
+ msg = *(struct msghdr **)msg;
+
return __zerocopy_sg_from_iter(sk, skb, &msg->msg_iter, len);
}
EXPORT_SYMBOL_GPL(skb_zerocopy_iter);
@@ -919,7 +919,7 @@ static int __ip_append_data(struct sock *sk,
{
struct inet_sock *inet = inet_sk(sk);
struct sk_buff *skb;
-
+ struct ubuf_info *uarg = NULL;
struct ip_options *opt = cork->opt;
int hh_len;
int exthdrlen;
@@ -963,9 +963,21 @@ static int __ip_append_data(struct sock *sk,
!exthdrlen)
csummode = CHECKSUM_PARTIAL;
+ if (flags & MSG_ZEROCOPY && length) {
+ uarg = sock_zerocopy_realloc(sk, length, skb_zcopy(skb));
+ if (!uarg)
+ return -ENOBUFS;
+
+ if (!(rt->dst.dev->features & NETIF_F_SG) ||
+ (sk->sk_type == SOCK_DGRAM && csummode == CHECKSUM_NONE)) {
+ uarg->zerocopy = 0;
+ skb_zcopy_set(skb, uarg);
+ }
+ }
+
cork->length += length;
if ((((length + fragheaderlen) > mtu) || (skb && skb_is_gso(skb))) &&
- (sk->sk_protocol == IPPROTO_UDP) &&
+ (sk->sk_protocol == IPPROTO_UDP) && !uarg &&
(rt->dst.dev->features & NETIF_F_UFO) && !dst_xfrm(&rt->dst) &&
(sk->sk_type == SOCK_DGRAM) && !sk->sk_no_check_tx) {
err = ip_ufo_append_data(sk, queue, getfrag, from, length,
@@ -997,6 +1009,7 @@ static int __ip_append_data(struct sock *sk,
unsigned int fraglen;
unsigned int fraggap;
unsigned int alloclen;
+ unsigned int zcopylen = 0;
struct sk_buff *skb_prev;
alloc_new_skb:
skb_prev = skb;
@@ -1017,8 +1030,12 @@ static int __ip_append_data(struct sock *sk,
if ((flags & MSG_MORE) &&
!(rt->dst.dev->features&NETIF_F_SG))
alloclen = mtu;
- else
+ else if (!uarg || !uarg->zerocopy)
alloclen = fraglen;
+ else {
+ alloclen = min_t(int, fraglen, MAX_HEADER);
+ zcopylen = fraglen - alloclen;
+ }
alloclen += exthdrlen;
@@ -1059,11 +1076,12 @@ static int __ip_append_data(struct sock *sk,
cork->tx_flags = 0;
skb_shinfo(skb)->tskey = tskey;
tskey = 0;
+ skb_zcopy_set(skb, uarg);
/*
* Find where to start putting bytes.
*/
- data = skb_put(skb, fraglen + exthdrlen);
+ data = skb_put(skb, fraglen + exthdrlen - zcopylen);
skb_set_network_header(skb, exthdrlen);
skb->transport_header = (skb->network_header +
fragheaderlen);
@@ -1079,7 +1097,7 @@ static int __ip_append_data(struct sock *sk,
pskb_trim_unique(skb_prev, maxfraglen);
}
- copy = datalen - transhdrlen - fraggap;
+ copy = datalen - transhdrlen - fraggap - zcopylen;
if (copy > 0 && getfrag(from, data + transhdrlen, offset, copy, fraggap, skb) < 0) {
err = -EFAULT;
kfree_skb(skb);
@@ -1087,7 +1105,7 @@ static int __ip_append_data(struct sock *sk,
}
offset += copy;
- length -= datalen - fraggap;
+ length -= copy + transhdrlen;
transhdrlen = 0;
exthdrlen = 0;
csummode = CHECKSUM_NONE;
@@ -1115,7 +1133,7 @@ static int __ip_append_data(struct sock *sk,
err = -EFAULT;
goto error;
}
- } else {
+ } else if (!uarg || !uarg->zerocopy) {
int i = skb_shinfo(skb)->nr_frags;
err = -ENOMEM;
@@ -1145,6 +1163,10 @@ static int __ip_append_data(struct sock *sk,
skb->data_len += copy;
skb->truesize += copy;
atomic_add(copy, &sk->sk_wmem_alloc);
+ } else {
+ err = skb_zerocopy_iter(sk, skb, from, copy);
+ if (err)
+ goto error;
}
offset += copy;
length -= copy;
@@ -1155,6 +1177,7 @@ static int __ip_append_data(struct sock *sk,
error_efault:
err = -EFAULT;
error:
+ sock_zerocopy_put_abort(uarg);
cork->length -= length;
IP_INC_STATS(sock_net(sk), IPSTATS_MIB_OUTDISCARDS);
return err;
@@ -1307,6 +1307,7 @@ static int __ip6_append_data(struct sock *sk,
struct ipv6_txoptions *opt = v6_cork->opt;
int csummode = CHECKSUM_NONE;
unsigned int maxnonfragsize, headersize;
+ struct ubuf_info *uarg = NULL;
skb = skb_peek_tail(queue);
if (!skb) {
@@ -1368,6 +1369,18 @@ static int __ip6_append_data(struct sock *sk,
tskey = sk->sk_tskey++;
}
+ if (flags & MSG_ZEROCOPY && length) {
+ uarg = sock_zerocopy_realloc(sk, length, skb_zcopy(skb));
+ if (!uarg)
+ return -ENOBUFS;
+
+ if (!(rt->dst.dev->features & NETIF_F_SG) ||
+ (sk->sk_type == SOCK_DGRAM && csummode == CHECKSUM_NONE)) {
+ uarg->zerocopy = 0;
+ skb_zcopy_set(skb, uarg);
+ }
+ }
+
/*
* Let's try using as much space as possible.
* Use MTU if total length of the message fits into the MTU.
@@ -1387,7 +1400,7 @@ static int __ip6_append_data(struct sock *sk,
cork->length += length;
if ((((length + fragheaderlen) > mtu) ||
(skb && skb_is_gso(skb))) &&
- (sk->sk_protocol == IPPROTO_UDP) &&
+ (sk->sk_protocol == IPPROTO_UDP) && !uarg &&
(rt->dst.dev->features & NETIF_F_UFO) && !dst_xfrm(&rt->dst) &&
(sk->sk_type == SOCK_DGRAM) && !udp_get_no_check6_tx(sk)) {
err = ip6_ufo_append_data(sk, queue, getfrag, from, length,
@@ -1413,6 +1426,7 @@ static int __ip6_append_data(struct sock *sk,
unsigned int fraglen;
unsigned int fraggap;
unsigned int alloclen;
+ unsigned int zcopylen = 0;
alloc_new_skb:
/* There's no room in the current skb */
if (skb)
@@ -1435,11 +1449,17 @@ static int __ip6_append_data(struct sock *sk,
if (datalen > (cork->length <= mtu && !(cork->flags & IPCORK_ALLFRAG) ? mtu : maxfraglen) - fragheaderlen)
datalen = maxfraglen - fragheaderlen - rt->dst.trailer_len;
+ fraglen = datalen + fragheaderlen;
+
if ((flags & MSG_MORE) &&
!(rt->dst.dev->features&NETIF_F_SG))
alloclen = mtu;
- else
- alloclen = datalen + fragheaderlen;
+ else if (!uarg || !uarg->zerocopy)
+ alloclen = fraglen;
+ else {
+ alloclen = min_t(int, fraglen, MAX_HEADER);
+ zcopylen = fraglen - alloclen;
+ }
alloclen += dst_exthdrlen;
@@ -1461,7 +1481,7 @@ static int __ip6_append_data(struct sock *sk,
*/
alloclen += sizeof(struct frag_hdr);
- copy = datalen - transhdrlen - fraggap;
+ copy = datalen - transhdrlen - fraggap - zcopylen;
if (copy < 0) {
err = -EINVAL;
goto error;
@@ -1497,11 +1517,12 @@ static int __ip6_append_data(struct sock *sk,
tx_flags = 0;
skb_shinfo(skb)->tskey = tskey;
tskey = 0;
+ skb_zcopy_set(skb, uarg);
/*
* Find where to start putting bytes
*/
- data = skb_put(skb, fraglen);
+ data = skb_put(skb, fraglen - zcopylen);
skb_set_network_header(skb, exthdrlen);
data += fragheaderlen;
skb->transport_header = (skb->network_header +
@@ -1524,7 +1545,7 @@ static int __ip6_append_data(struct sock *sk,
}
offset += copy;
- length -= datalen - fraggap;
+ length -= copy + transhdrlen;
transhdrlen = 0;
exthdrlen = 0;
dst_exthdrlen = 0;
@@ -1552,7 +1573,7 @@ static int __ip6_append_data(struct sock *sk,
err = -EFAULT;
goto error;
}
- } else {
+ } else if (!uarg || !uarg->zerocopy) {
int i = skb_shinfo(skb)->nr_frags;
err = -ENOMEM;
@@ -1582,6 +1603,10 @@ static int __ip6_append_data(struct sock *sk,
skb->data_len += copy;
skb->truesize += copy;
atomic_add(copy, &sk->sk_wmem_alloc);
+ } else {
+ err = skb_zerocopy_iter(sk, skb, from, copy);
+ if (err)
+ goto error;
}
offset += copy;
length -= copy;
@@ -1592,6 +1617,7 @@ static int __ip6_append_data(struct sock *sk,
error_efault:
err = -EFAULT;
error:
+ sock_zerocopy_put_abort(uarg);
cork->length -= length;
IP6_INC_STATS(sock_net(sk), rt->rt6i_idev, IPSTATS_MIB_OUTDISCARDS);
return err;