diff mbox

[net-next,v2,3/3] net: sctp: Add partial support for MSG_MORE on SCTP

Message ID 063D6719AE5E284EB5DD2968C1650D6D1726EEB7@AcuExch.aculab.com
State Changes Requested, archived
Delegated to: David Miller
Headers show

Commit Message

David Laight July 9, 2014, 8:29 a.m. UTC
If MSG_MORE is set then buffer sends as if Nagle were enabled.
The first data chunk is still sent on its own, but subsequent chunks
will be bundled and full packets sent.
Full MSG_MORE support would require a timout (preferably configurable
per-socket) to send the last chunk(s), instead of sending them
when there is nothing outstanding.

Signed-off-by: David Laight <david.laight@aculab.com>
---
 include/net/sctp/structs.h |  6 +++++-
 net/sctp/output.c          | 12 ++++++++++--
 net/sctp/socket.c          | 18 +++++++++++++++---
 3 files changed, 30 insertions(+), 6 deletions(-)

Comments

Vladislav Yasevich July 11, 2014, 8:11 p.m. UTC | #1
On 07/09/2014 04:29 AM, David Laight wrote:
> If MSG_MORE is set then buffer sends as if Nagle were enabled.
> The first data chunk is still sent on its own, but subsequent chunks
> will be bundled and full packets sent.
> Full MSG_MORE support would require a timout (preferably configurable
> per-socket) to send the last chunk(s), instead of sending them
> when there is nothing outstanding.
> 
> Signed-off-by: David Laight <david.laight@aculab.com>
> ---
>  include/net/sctp/structs.h |  6 +++++-
>  net/sctp/output.c          | 12 ++++++++++--
>  net/sctp/socket.c          | 18 +++++++++++++++---
>  3 files changed, 30 insertions(+), 6 deletions(-)
> 
> diff --git a/include/net/sctp/structs.h b/include/net/sctp/structs.h
> index 0dfcc92..629346f 100644
> --- a/include/net/sctp/structs.h
> +++ b/include/net/sctp/structs.h
> @@ -209,7 +209,11 @@ struct sctp_sock {
>  	struct sctp_assocparams assocparams;
>  	int user_frag;
>  	__u32 autoclose;
> -	__u8 nodelay;
> +
> +#define	SCTP_F_TX_NODELAY	0
> +#define	SCTP_F_TX_NAGLE		1	/* SCTP_NODELAY not set */
> +#define	SCTP_F_TX_MSG_MORE	2	/* MSG_MORE set on last send */
> +	__u8 tx_delay;
>  	__u8 disable_fragments;
>  	__u8 v4mapped;
>  	__u8 frag_interleave;
> diff --git a/net/sctp/output.c b/net/sctp/output.c
> index 7f28a8e..df7889c 100644
> --- a/net/sctp/output.c
> +++ b/net/sctp/output.c
> @@ -679,14 +679,22 @@ static sctp_xmit_t sctp_packet_can_append_data(struct sctp_packet *packet,
>  	    flight_size >= transport->cwnd)
>  		return SCTP_XMIT_RWND_FULL;
>  
> +	/* If MSG_MORE is set we probably shouldn't create a new message.
> +	 * However unless we also implement a timeout (preferable settable
> +	 * as a socket option) then data could easily be left unsent.
> +	 * Instead we ignore MSG_MORE on the first data chunk.
> +	 * This makes the implementation of MSG_MORE the same as the
> +	 * implementation of Nagle.
> +	 */
> +
>  	/* Nagle's algorithm to solve small-packet problem:
>  	 * Inhibit the sending of new chunks when new outgoing data arrives
>  	 * if any previously transmitted data on the connection remains
>  	 * unacknowledged.
>  	 */
>  
> -	if (sctp_sk(asoc->base.sk)->nodelay)
> -		/* Nagle disabled */
> +	if (sctp_sk(asoc->base.sk)->tx_delay == SCTP_F_TX_NODELAY)
> +		/* Nagle disabled and MSG_MORE unset */
>  		return SCTP_XMIT_OK;
>  
>  	if (!sctp_packet_empty(packet))
> diff --git a/net/sctp/socket.c b/net/sctp/socket.c
> index fee06b9..4a9f760 100644
> --- a/net/sctp/socket.c
> +++ b/net/sctp/socket.c
> @@ -1927,6 +1927,18 @@ static int sctp_sendmsg(struct kiocb *iocb, struct sock *sk,
>  		pr_debug("%s: we associated primitively\n", __func__);
>  	}
>  
> +	/* Setting MSG_MORE currently has the same effect as enabling Nagle.
> +	 * This means that the user can't force bundling of the first two data
> +	 * chunks.  It does mean that all the data chunks will be sent
> +	 * without an extra timer.
> +	 * It is enough to save the last value since any data sent with
> +	 * MSG_MORE clear will already have been sent (subject to flow control).
> +	 */
> +	if (msg->msg_flags & MSG_MORE)
> +		sp->tx_delay |= SCTP_F_TX_MSG_MORE;
> +	else
> +		sp->tx_delay &= ~SCTP_F_TX_MSG_MORE;
> +

This is ok for 1-1 sockets, but it doesn't really work for 1-many sockets.  If one of
the associations uses MSG_MORE while another does not, we'll see some interesting
side-effects on the wire.

-vlad

>  	/* Break the message into multiple chunks of maximum size. */
>  	datamsg = sctp_datamsg_from_user(asoc, sinfo, msg, msg_len);
>  	if (IS_ERR(datamsg)) {
> @@ -2821,7 +2833,7 @@ static int sctp_setsockopt_nodelay(struct sock *sk, char __user *optval,
>  	if (get_user(val, (int __user *)optval))
>  		return -EFAULT;
>  
> -	sctp_sk(sk)->nodelay = (val == 0) ? 0 : 1;
> +	sctp_sk(sk)->tx_delay = val == 0 ? SCTP_F_TX_NAGLE : SCTP_F_TX_NODELAY;
>  	return 0;
>  }
>  
> @@ -3968,7 +3980,7 @@ static int sctp_init_sock(struct sock *sk)
>  	sp->disable_fragments = 0;
>  
>  	/* Enable Nagle algorithm by default.  */
> -	sp->nodelay           = 0;
> +	sp->tx_delay          = SCTP_F_TX_NAGLE;
>  
>  	/* Enable by default. */
>  	sp->v4mapped          = 1;
> @@ -5020,7 +5032,7 @@ static int sctp_getsockopt_nodelay(struct sock *sk, int len,
>  		return -EINVAL;
>  
>  	len = sizeof(int);
> -	val = (sctp_sk(sk)->nodelay == 1);
> +	val = sctp_sk(sk)->tx_delay & SCTP_F_TX_NAGLE ? 0 : 1;
>  	if (put_user(len, optlen))
>  		return -EFAULT;
>  	if (copy_to_user(optval, &val, len))
> 

--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
David Laight July 14, 2014, 4:27 p.m. UTC | #2
From: Vlad Yasevich
...
> > +	/* Setting MSG_MORE currently has the same effect as enabling Nagle.
> > +	 * This means that the user can't force bundling of the first two data
> > +	 * chunks.  It does mean that all the data chunks will be sent
> > +	 * without an extra timer.
> > +	 * It is enough to save the last value since any data sent with
> > +	 * MSG_MORE clear will already have been sent (subject to flow control).
> > +	 */
> > +	if (msg->msg_flags & MSG_MORE)
> > +		sp->tx_delay |= SCTP_F_TX_MSG_MORE;
> > +	else
> > +		sp->tx_delay &= ~SCTP_F_TX_MSG_MORE;
> > +
> 
> This is ok for 1-1 sockets, but it doesn't really work for 1-many sockets.  If one of
> the associations uses MSG_MORE while another does not, we'll see some interesting
> side-effects on the wire.

They shouldn't cause any grief, and are somewhat unlikely.
Unless multiple threads/processes are writing data into the same socket
and are also flipping MSG_MORE (and the socket locking allows the
send path to run concurrently - I suspect it doesn't).

AFAICT the tx_delay/Nagle flag is looked at in two code paths:
1) After the application tries to send some data.
2) When processing a received ack chunk.

For 1-many sockets I suspect the code that checks tx_delay after a send()
is executed before a send() from a different thread could change the value.
And that sends for alternate destinations won't try to clear the tx queue
for the other association.
So the send() processing is unlikely to be affected by the MSG_MORE flag
value for the other association.

The only time there will be sendable data for (2) is if the connection
were flow-controlled off, or if data were unsent due the MSG_MORE/Nagle
being set when the last send was processed.
Most likely the queued data will be sent - either because there is nothing
outstanding, because there is more than a packet full, or because the last
send had MSG_MORE clear.

The expectation is that an application will send some data chunks with
MSG_MORE set, followed by one with it clear.

The only scenario I can see that might be unexpected is:
- a 1-many socket.
- one destination flow controlled (ie waiting an ack chunk) but
  with less than 1500 bytes queued.
- send with MSG_MORE set for a different destination.
- ack received, queued data not sent.

But if you are waiting for ack chunks on a 1-many socket you are already
in deep trouble - since there is only a single socket send buffer.

I don't think this is a problem.

	David


--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Vladislav Yasevich July 14, 2014, 7:15 p.m. UTC | #3
On 07/14/2014 12:27 PM, David Laight wrote:
> From: Vlad Yasevich
> ...
>>> +	/* Setting MSG_MORE currently has the same effect as enabling Nagle.
>>> +	 * This means that the user can't force bundling of the first two data
>>> +	 * chunks.  It does mean that all the data chunks will be sent
>>> +	 * without an extra timer.
>>> +	 * It is enough to save the last value since any data sent with
>>> +	 * MSG_MORE clear will already have been sent (subject to flow control).
>>> +	 */
>>> +	if (msg->msg_flags & MSG_MORE)
>>> +		sp->tx_delay |= SCTP_F_TX_MSG_MORE;
>>> +	else
>>> +		sp->tx_delay &= ~SCTP_F_TX_MSG_MORE;
>>> +
>>
>> This is ok for 1-1 sockets, but it doesn't really work for 1-many sockets.  If one of
>> the associations uses MSG_MORE while another does not, we'll see some interesting
>> side-effects on the wire.
> 
> They shouldn't cause any grief, and are somewhat unlikely.
> Unless multiple threads/processes are writing data into the same socket
> and are also flipping MSG_MORE (and the socket locking allows the
> send path to run concurrently - I suspect it doesn't).
> 
> AFAICT the tx_delay/Nagle flag is looked at in two code paths:
> 1) After the application tries to send some data.
> 2) When processing a received ack chunk.
> 
> For 1-many sockets I suspect the code that checks tx_delay after a send()
> is executed before a send() from a different thread could change the value.
> And that sends for alternate destinations won't try to clear the tx queue
> for the other association.
> So the send() processing is unlikely to be affected by the MSG_MORE flag
> value for the other association.

But the MSG_MORE is not per association.  It is per socket.  So if you have
a process with 2 threads that clears Nagle (sets SCTP_NODELAY) and then
uses MSG_MORE to force bundling when it has a lot of data in queue then
you can have the following:
  1: send(MSG_MORE)
  1: send(MSG_MORE)
  2: send()

The send from thread2 will reset the tx_delay across the socket.  If
association from thread 1 then receives a SACK, it will flush the queue
before it's ready.  So, you have a side-effect that you don't get the
bundling that you are really after with MSG_MORE usage.

> 
> The only time there will be sendable data for (2) is if the connection
> were flow-controlled off, or if data were unsent due the MSG_MORE/Nagle
> being set when the last send was processed.
> Most likely the queued data will be sent - either because there is nothing
> outstanding, because there is more than a packet full, or because the last
> send had MSG_MORE clear.
> 
> The expectation is that an application will send some data chunks with
> MSG_MORE set, followed by one with it clear.
> 

Within a single thread, sure.  But it you have multiple association as above,
you could end up with a scenario where MSG_MORE is almost useless.

> The only scenario I can see that might be unexpected is:
> - a 1-many socket.
> - one destination flow controlled (ie waiting an ack chunk) but
>   with less than 1500 bytes queued.
> - send with MSG_MORE set for a different destination.
> - ack received, queued data not sent.
> 
> But if you are waiting for ack chunks on a 1-many socket you are already
> in deep trouble - since there is only a single socket send buffer.

Not always.  A lot of deployments that use 1-many socket specifically
change buffering policy.

> 
> I don't think this is a problem.

Not, it is not a _problem_, but it does make MSG_MORE rather useless
in some situations.  Waiting for an ACK across low-latency links
is rare, but in a high-latency scenarios where you want to utilize the
bandwidth better with bundling, you may not see the gains you expect.

Since MSG_MORE is association, it should be handled as such and an
a change on one association should not effect the others.

-vlad
> 
> 	David
> 
> 

--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
David Laight July 15, 2014, 2:33 p.m. UTC | #4
From: Vlad Yasevich 
> On 07/14/2014 12:27 PM, David Laight wrote:
> > From: Vlad Yasevich
> > ...
> >>> +	/* Setting MSG_MORE currently has the same effect as enabling Nagle.
> >>> +	 * This means that the user can't force bundling of the first two data
> >>> +	 * chunks.  It does mean that all the data chunks will be sent
> >>> +	 * without an extra timer.
> >>> +	 * It is enough to save the last value since any data sent with
> >>> +	 * MSG_MORE clear will already have been sent (subject to flow control).
> >>> +	 */
> >>> +	if (msg->msg_flags & MSG_MORE)
> >>> +		sp->tx_delay |= SCTP_F_TX_MSG_MORE;
> >>> +	else
> >>> +		sp->tx_delay &= ~SCTP_F_TX_MSG_MORE;
> >>> +
> >>
> >> This is ok for 1-1 sockets, but it doesn't really work for 1-many sockets.  If one of
> >> the associations uses MSG_MORE while another does not, we'll see some interesting
> >> side-effects on the wire.
> >
...
> > I don't think this is a problem.
> 
> Not, it is not a _problem_, but it does make MSG_MORE rather useless
> in some situations.  Waiting for an ACK across low-latency links
> is rare, but in a high-latency scenarios where you want to utilize the
> bandwidth better with bundling, you may not see the gains you expect.
> 
> Since MSG_MORE is association, it should be handled as such and an
> a change on one association should not effect the others.

I think the comments already say that it is only a partial implementation.
(If you send 2 chunks on an idle connection, they get sent separately.)
Perhaps I'll add a note about possibly 'odd' effects for 1-many sockets
with multi-threaded apps.

It helps a lot for my M3UA traffic.
I can get the same effect on an old kernel by repeatedly changing SCTP_NODELAY,
but that does rather rely on the way Nagle is implemented.

	David




--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Vladislav Yasevich July 15, 2014, 3:24 p.m. UTC | #5
On 07/15/2014 10:33 AM, David Laight wrote:
> From: Vlad Yasevich 
>> On 07/14/2014 12:27 PM, David Laight wrote:
>>> From: Vlad Yasevich
>>> ...
>>>>> +	/* Setting MSG_MORE currently has the same effect as enabling Nagle.
>>>>> +	 * This means that the user can't force bundling of the first two data
>>>>> +	 * chunks.  It does mean that all the data chunks will be sent
>>>>> +	 * without an extra timer.
>>>>> +	 * It is enough to save the last value since any data sent with
>>>>> +	 * MSG_MORE clear will already have been sent (subject to flow control).
>>>>> +	 */
>>>>> +	if (msg->msg_flags & MSG_MORE)
>>>>> +		sp->tx_delay |= SCTP_F_TX_MSG_MORE;
>>>>> +	else
>>>>> +		sp->tx_delay &= ~SCTP_F_TX_MSG_MORE;
>>>>> +
>>>>
>>>> This is ok for 1-1 sockets, but it doesn't really work for 1-many sockets.  If one of
>>>> the associations uses MSG_MORE while another does not, we'll see some interesting
>>>> side-effects on the wire.
>>>
> ...
>>> I don't think this is a problem.
>>
>> Not, it is not a _problem_, but it does make MSG_MORE rather useless
>> in some situations.  Waiting for an ACK across low-latency links
>> is rare, but in a high-latency scenarios where you want to utilize the
>> bandwidth better with bundling, you may not see the gains you expect.
>>
>> Since MSG_MORE is association, it should be handled as such and an
>> a change on one association should not effect the others.
> 
> I think the comments already say that it is only a partial implementation.
> (If you send 2 chunks on an idle connection, they get sent separately.)
> Perhaps I'll add a note about possibly 'odd' effects for 1-many sockets
> with multi-threaded apps.
> 
> It helps a lot for my M3UA traffic.
> I can get the same effect on an old kernel by repeatedly changing SCTP_NODELAY,
> but that does rather rely on the way Nagle is implemented.

You can fix this by having an sp->tx_delay value and a assoc->tx_delay value
and simple check (sp->tx_delay | assoc->tx_delay).  MSG_MORE would only set
the assoc->tx_delay while SCTP_NODELAY would effect the socket.

This way, when one association uses MSG_MORE, it will not effect other associations
on the same socket that don't use it.

-vlad

> 
> 	David
> 
> 
> 
> 

--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
David Laight July 15, 2014, 4:13 p.m. UTC | #6
From: Vlad Yasevich
> On 07/15/2014 10:33 AM, David Laight wrote:
> > From: Vlad Yasevich
> >> On 07/14/2014 12:27 PM, David Laight wrote:
> >>> From: Vlad Yasevich
> >>> ...
> >>>>> +	/* Setting MSG_MORE currently has the same effect as enabling Nagle.
> >>>>> +	 * This means that the user can't force bundling of the first two data
> >>>>> +	 * chunks.  It does mean that all the data chunks will be sent
> >>>>> +	 * without an extra timer.
> >>>>> +	 * It is enough to save the last value since any data sent with
> >>>>> +	 * MSG_MORE clear will already have been sent (subject to flow control).
> >>>>> +	 */
> >>>>> +	if (msg->msg_flags & MSG_MORE)
> >>>>> +		sp->tx_delay |= SCTP_F_TX_MSG_MORE;
> >>>>> +	else
> >>>>> +		sp->tx_delay &= ~SCTP_F_TX_MSG_MORE;
> >>>>> +
> >>>>
> >>>> This is ok for 1-1 sockets, but it doesn't really work for 1-many sockets.  If one of
> >>>> the associations uses MSG_MORE while another does not, we'll see some interesting
> >>>> side-effects on the wire.
> >>>
> > ...
> >>> I don't think this is a problem.
> >>
> >> Not, it is not a _problem_, but it does make MSG_MORE rather useless
> >> in some situations.  Waiting for an ACK across low-latency links
> >> is rare, but in a high-latency scenarios where you want to utilize the
> >> bandwidth better with bundling, you may not see the gains you expect.
> >>
> >> Since MSG_MORE is association, it should be handled as such and an
> >> a change on one association should not effect the others.
> >
> > I think the comments already say that it is only a partial implementation.
> > (If you send 2 chunks on an idle connection, they get sent separately.)
> > Perhaps I'll add a note about possibly 'odd' effects for 1-many sockets
> > with multi-threaded apps.
> >
> > It helps a lot for my M3UA traffic.
> > I can get the same effect on an old kernel by repeatedly changing SCTP_NODELAY,
> > but that does rather rely on the way Nagle is implemented.
> 
> You can fix this by having an sp->tx_delay value and a assoc->tx_delay value
> and simple check (sp->tx_delay | assoc->tx_delay).  MSG_MORE would only set
> the assoc->tx_delay while SCTP_NODELAY would effect the socket.
> 
> This way, when one association uses MSG_MORE, it will not effect other associations
> on the same socket that don't use it.

In that case it is probably worth caching SCTP_NODELAY in the asoc as well.
It looks like it ought to be valid for the setsockopt code to 'list_for_each_entry'
on ep->asocs and sctp_endpoint_add_asoc() to copy down the current value.

Should I change the code so that the first chunk is also not sent?
If an application failed to do a final send (with MSG_MORE clear) then
it would never be set, and there is no way to flush it.
I didn't do that because I'm not really interested in merging 2 chunks.
I'm trying to get 100s of chunks merged.

	David



--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
diff mbox

Patch

diff --git a/include/net/sctp/structs.h b/include/net/sctp/structs.h
index 0dfcc92..629346f 100644
--- a/include/net/sctp/structs.h
+++ b/include/net/sctp/structs.h
@@ -209,7 +209,11 @@  struct sctp_sock {
 	struct sctp_assocparams assocparams;
 	int user_frag;
 	__u32 autoclose;
-	__u8 nodelay;
+
+#define	SCTP_F_TX_NODELAY	0
+#define	SCTP_F_TX_NAGLE		1	/* SCTP_NODELAY not set */
+#define	SCTP_F_TX_MSG_MORE	2	/* MSG_MORE set on last send */
+	__u8 tx_delay;
 	__u8 disable_fragments;
 	__u8 v4mapped;
 	__u8 frag_interleave;
diff --git a/net/sctp/output.c b/net/sctp/output.c
index 7f28a8e..df7889c 100644
--- a/net/sctp/output.c
+++ b/net/sctp/output.c
@@ -679,14 +679,22 @@  static sctp_xmit_t sctp_packet_can_append_data(struct sctp_packet *packet,
 	    flight_size >= transport->cwnd)
 		return SCTP_XMIT_RWND_FULL;
 
+	/* If MSG_MORE is set we probably shouldn't create a new message.
+	 * However unless we also implement a timeout (preferable settable
+	 * as a socket option) then data could easily be left unsent.
+	 * Instead we ignore MSG_MORE on the first data chunk.
+	 * This makes the implementation of MSG_MORE the same as the
+	 * implementation of Nagle.
+	 */
+
 	/* Nagle's algorithm to solve small-packet problem:
 	 * Inhibit the sending of new chunks when new outgoing data arrives
 	 * if any previously transmitted data on the connection remains
 	 * unacknowledged.
 	 */
 
-	if (sctp_sk(asoc->base.sk)->nodelay)
-		/* Nagle disabled */
+	if (sctp_sk(asoc->base.sk)->tx_delay == SCTP_F_TX_NODELAY)
+		/* Nagle disabled and MSG_MORE unset */
 		return SCTP_XMIT_OK;
 
 	if (!sctp_packet_empty(packet))
diff --git a/net/sctp/socket.c b/net/sctp/socket.c
index fee06b9..4a9f760 100644
--- a/net/sctp/socket.c
+++ b/net/sctp/socket.c
@@ -1927,6 +1927,18 @@  static int sctp_sendmsg(struct kiocb *iocb, struct sock *sk,
 		pr_debug("%s: we associated primitively\n", __func__);
 	}
 
+	/* Setting MSG_MORE currently has the same effect as enabling Nagle.
+	 * This means that the user can't force bundling of the first two data
+	 * chunks.  It does mean that all the data chunks will be sent
+	 * without an extra timer.
+	 * It is enough to save the last value since any data sent with
+	 * MSG_MORE clear will already have been sent (subject to flow control).
+	 */
+	if (msg->msg_flags & MSG_MORE)
+		sp->tx_delay |= SCTP_F_TX_MSG_MORE;
+	else
+		sp->tx_delay &= ~SCTP_F_TX_MSG_MORE;
+
 	/* Break the message into multiple chunks of maximum size. */
 	datamsg = sctp_datamsg_from_user(asoc, sinfo, msg, msg_len);
 	if (IS_ERR(datamsg)) {
@@ -2821,7 +2833,7 @@  static int sctp_setsockopt_nodelay(struct sock *sk, char __user *optval,
 	if (get_user(val, (int __user *)optval))
 		return -EFAULT;
 
-	sctp_sk(sk)->nodelay = (val == 0) ? 0 : 1;
+	sctp_sk(sk)->tx_delay = val == 0 ? SCTP_F_TX_NAGLE : SCTP_F_TX_NODELAY;
 	return 0;
 }
 
@@ -3968,7 +3980,7 @@  static int sctp_init_sock(struct sock *sk)
 	sp->disable_fragments = 0;
 
 	/* Enable Nagle algorithm by default.  */
-	sp->nodelay           = 0;
+	sp->tx_delay          = SCTP_F_TX_NAGLE;
 
 	/* Enable by default. */
 	sp->v4mapped          = 1;
@@ -5020,7 +5032,7 @@  static int sctp_getsockopt_nodelay(struct sock *sk, int len,
 		return -EINVAL;
 
 	len = sizeof(int);
-	val = (sctp_sk(sk)->nodelay == 1);
+	val = sctp_sk(sk)->tx_delay & SCTP_F_TX_NAGLE ? 0 : 1;
 	if (put_user(len, optlen))
 		return -EFAULT;
 	if (copy_to_user(optval, &val, len))