Message ID | 20180624224043.31503-2-DH@synoia.com |
---|---|
State | Deferred |
Delegated to: | Joe Hershberger |
Headers | show |
Series | [U-Boot,v12,1/3] Consolidating UDP header functions. | expand |
Am Mo., 25. Juni 2018 um 00:42 Uhr schrieb <DH@synoia.com>: > > From: Duncan Hare <DH@Synoia.com> > > Currently file transfers are done using tftp or NFS both > over udp. This requires a request to be sent from client > (u-boot) to the boot server. > > The current standard is TCP with selective acknowledgment. > > In our testing we have reduce kernel transmission time to > around 0.4 seconds for a 4Mbyte kernel, with a 100 Mbps > downlink. > > Series-Changes 11 > - Add TCP with SACK > - Clean formatting > - Remove buffer search and print routines > > Series-Changes 12 > - Fix License statement > > Signed-off-by: Duncan Hare <DH@Synoia.com> > Signed-off-by: Duncan Hare <DuncanCHare@yahoo.com> > --- > > include/net.h | 11 +- > include/net/tcp.h | 227 ++++++++++++++++++ > net/Kconfig | 6 + > net/Makefile | 1 + > net/net.c | 33 +++ > net/tcp.c | 700 ++++++++++++++++++++++++++++++++++++++++++++++++++++++ > 6 files changed, 977 insertions(+), 1 deletion(-) > create mode 100644 include/net/tcp.h > create mode 100644 net/tcp.c > > diff --git a/include/net.h b/include/net.h > index a54160fff6..ba96267eb8 100644 > --- a/include/net.h > +++ b/include/net.h > @@ -26,6 +26,9 @@ > * > */ > > +#if defined(CONFIG_TCP) /* Protected UDP uses less bufferes than TCP */ > +#define CONFIG_SYS_RX_ETH_BUFFER 12 > +#endif > #ifdef CONFIG_SYS_RX_ETH_BUFFER > # define PKTBUFSRX CONFIG_SYS_RX_ETH_BUFFER > #else > @@ -350,6 +353,7 @@ struct vlan_ethernet_hdr { > #define PROT_PPP_SES 0x8864 /* PPPoE session messages */ > > #define IPPROTO_ICMP 1 /* Internet Control Message Protocol */ > +#define IPPROTO_TCP 6 /* Transmission Control Protocol */ > #define IPPROTO_UDP 17 /* User Datagram Protocol */ > > /* > @@ -659,7 +663,7 @@ static inline void net_send_packet(uchar *pkt, int len) > } > > /* > - * Transmit "net_tx_packet" as UDP packet, performing ARP request if needed > + * Transmit "net_tx_packet" as UDP or TCPpacket, send ARP request if needed missing space - should be * Transmit "net_tx_packet" as UDP or TCP packet, send ARP request if needed > * (ether will be populated) > * > * @param ether Raw packet buffer > @@ -667,10 +671,15 @@ static inline void net_send_packet(uchar *pkt, int len) > * @param dport Destination UDP port > * @param sport Source UDP port > * @param payload_len Length of data after the UDP header > + * @param action TCP action to be performed > + * @param tcp_seq_num TCP sequence number of this transmission > + * @param tcp_ack_num TCP stream acknolegement number > */ > int net_send_ip_packet(uchar *ether, struct in_addr dest, int dport, int sport, > int payload_len, int proto, u8 action, u32 tcp_seq_num, > u32 tcp_ack_num); > +int net_send_tcp_packet(int payload_len, int dport, int sport, u8 action, > + u32 tcp_seq_num, u32 tcp_ack_num); > int net_send_udp_packet(uchar *ether, struct in_addr dest, int dport, > int sport, int payload_len); > > diff --git a/include/net/tcp.h b/include/net/tcp.h > new file mode 100644 > index 0000000000..d0e90e07dd > --- /dev/null > +++ b/include/net/tcp.h > @@ -0,0 +1,227 @@ > +/* SPDX-License-Identifier: GPL-2.0 */ > +/* > + * TCP Support with SACK for file transfer. > + * > + * Copyright 2017 Duncan Hare, All rights reserved. > + */ > + > +#define TCP_ACTIVITY 127 /* Number of packets received */ > + /* before console progress mark */ > + > +struct ip_tcp_hdr { > + u8 ip_hl_v; /* header length and version */ > + u8 ip_tos; /* type of service */ > + u16 ip_len; /* total length */ > + u16 ip_id; /* identification */ > + u16 ip_off; /* fragment offset field */ > + u8 ip_ttl; /* time to live */ > + u8 ip_p; /* protocol */ > + u16 ip_sum; /* checksum */ > + struct in_addr ip_src; /* Source IP address */ > + struct in_addr ip_dst; /* Destination IP address */ > + u16 tcp_src; /* TCP source port */ > + u16 tcp_dst; /* TCP destination port */ > + u32 tcp_seq; /* TCP sequence number */ > + u32 tcp_ack; /* TCP Acknowledgment number */ > + u8 tcp_hlen; /* 4 bits TCP header Length/4 */ > + /* 4 bits Reserved */ > + /* 2 more bits reserved */ > + u8 tcp_flags; /* see defines */ > + u16 tcp_win; /* TCP windows size */ > + u16 tcp_xsum; /* Checksum */ > + u16 tcp_ugr; /* Pointer to urgent data */ > +} __packed; > + > +#define IP_TCP_HDR_SIZE (sizeof(struct ip_tcp_hdr)) > +#define TCP_HDR_SIZE (IP_TCP_HDR_SIZE - IP_HDR_SIZE) > + > +#define TCP_DATA 0x00 /* Data Packet - internal use only */ > +#define TCP_FIN 0x01 /* Finish flag */ > +#define TCP_SYN 0x02 /* Synch (start) flag */ > +#define TCP_RST 0x04 /* reset flag */ > +#define TCP_PUSH 0x08 /* Push - Notify app */ > +#define TCP_ACK 0x10 /* Acknowledgment of data received */ > +#define TCP_URG 0x20 /* Urgent */ > +#define TCP_ECE 0x40 /* Congestion control */ > +#define TCP_CWR 0x80 /* Congestion Control */ > + > +/* > + * TCP header options, Seq, MSS, and SACK > + */ > + > +#define TCP_SACK 32 /* Number of packets analyzed */ > + /* on leading edge of stream */ > + > +#define TCP_O_END 0x00 /* End of option list */ > +#define TCP_1_NOP 0x01 /* Single padding NOP */ > +#define TCP_O_NOP 0x01010101 /* NOPs pad to 32 bit boundary */ > +#define TCP_O_MSS 0x02 /* MSS Size option */ > +#define TCP_O_SCL 0x03 /* Window Scale option */ > +#define TCP_P_SACK 0x04 /* SACK permitted */ > +#define TCP_V_SACK 0x05 /* SACK values */ > +#define TCP_O_TS 0x08 /* Timestamp option */ > +#define TCP_OPT_LEN_2 0x02 > +#define TCP_OPT_LEN_3 0x03 > +#define TCP_OPT_LEN_4 0x04 > +#define TCP_OPT_LEN_6 0x06 > +#define TCP_OPT_LEN_8 0x08 > +#define TCP_OPT_LEN_A 0x0a /* Timestamp Length */ > + > +/* > + * Please review the warning in net.c about these two parameters. > + * They are part of a promise of RX buffer size to the sending TCP > + */ > + > +#define TCP_MSS 1460 /* Max segment size */ > +#define TCP_SCALE 0x01 /* Scale */ > + > +struct tcp_mss { /* TCP Max Segment size */ > + u8 kind; /* Field ID */ > + u8 len; /* Field length */ > + u16 mss; /* Segment size value */ > +} __packed; > + > +struct tcp_scale { /* TCP Windows Scale */ > + u8 kind; /* Field ID */ > + u8 len; /* Filed length */ > + u8 scale; /* windows shift value used for */ > + /* networks with many hops */ > + /* Typically 4 or more hops */ extra empty line needed? > +} __packed; > + > +struct tcp_sack_p { /* SACK permitted */ > + u8 kind; /* Field Id */ > + u8 len; /* Field length */ > +} __packed; > + > + /* Terse definitions used */ > + /* long definitions make the */ > + /* indented code overflow line */ > + /* length linits */ wrong/funny indentation? > +struct sack_edges { > + u32 l; /* Left edge of stream */ > + u32 r; /* right edge of stream */ > +} __packed; > + > +#define TCP_SACK_SIZE (sizeof(struct sack_edges)) > + > +/* > + * A TCP stream has holes when packets are missing or disordered. > + * A hill is the inverese of a hole, and is data received. Should be A hill is the inverse of a .. > + * TCP receiveds hills (a sequence of data), and inferrs Holes Should be TCP received hills .. > + * from the "hills" or packets received. > + */ > + > +#define TCP_SACK_HILLS 4 > + > +struct tcp_sack_v { > + u8 kind; /* Field ID */ > + u8 len; /* Field Length */ > + struct sack_edges hill[TCP_SACK_HILLS]; /* L & R window edges */ > +} __packed; > + > +struct tcp_t_opt { /* TCP time stamps option */ > + u8 kind; /* Field id */ > + u8 len; /* Field length */ > + u32 t_snd; /* Sender timestamp */ > + u32 t_rcv; /* Receiver timestamp */ > +} __packed; > + > +#define TCP_TSOPT_SIZE (sizeof(struct tcp_t_opt)) > + > +/* > + * ip tcp structure with options > + */ > + > +struct ip_tcp_hdr_o { > + struct ip_tcp_hdr hdr; > + struct tcp_mss mss; > + struct tcp_scale scale; > + struct tcp_sack_p sack_p; > + struct tcp_t_opt t_opt; > + u8 end; > +} __packed; > + > +#define IP_TCP_O_SIZE (sizeof(struct ip_tcp_hdr_o)) > + > +struct ip_tcp_hdr_s { > + struct ip_tcp_hdr hdr; > + struct tcp_t_opt t_opt; > + struct tcp_sack_v sack_v; > + u8 end; > +} __packed; > + > +#define IP_TCP_SACK_SIZE (sizeof(struct ip_tcp_hdr_s)) > + > +/* > + * TCP pseudo header definitions > + */ > +#define PSEUDO_PAD_SIZE 8 > + > +struct pseudo_hdr { > + u8 padding[PSEUDO_PAD_SIZE]; /* pseudo hdr size = ip_tcp hdr size */ > + struct in_addr p_src; > + struct in_addr p_dst; > + u8 rsvd; > + u8 p; > + u16 len; > +} __packed; > + > +#define PSEUDO_HDR_SIZE (sizeof(struct pseudo_hdr)) - PSEUDO_PAD_SIZE > + > +/* > + * union for building TCP/IP packet. Build Pseudo header in packed buffer > + * first, calculate TCP checksum, then build IP header in packed buffer. > + */ > + > +union tcp_build_pkt { > + struct pseudo_hdr ph; > + struct ip_tcp_hdr_o ip; > + struct ip_tcp_hdr_s sack; > + uchar raw[1600]; > +} __packed; > + > +/* > + * TCP State machine states for connection > + */ > + > +enum TCP_STATE { > + TCP_CLOSED, /* Need to send SYN to connect */ > + TCP_SYN_SENT, /* Trying to connect, waiting for SYN ACK */ > + TCP_ESTABLISHED, /* both server & client have a connection */ > + TCP_CLOSE_WAIT, /* Rec FIN, passed to app for FIN, ACK rsp*/ > + TCP_CLOSING, /* Rec FIN, sent FIN, ACK waiting for ACK */ > + TCP_FIN_WAIT_1, /* Sent FIN waiting for response */ > + TCP_FIN_WAIT_2 /* Rec ACK from FIN sent, waiting for FIN */ > +}; > + > +enum TCP_STATE tcp_get_tcp_state(void); > +void tcp_set_tcp_state(enum TCP_STATE new_state); > +int tcp_set_tcp_header(uchar *pkt, int dport, int sport, int payload_len, > + u8 action, u32 tcp_seq_num, u32 tcp_ack_num); > + > +/* > + * An incoming packet handler. > + * @param pkt pointer to the application packet > + * @param dport destination UDP port > + * @param sip source IP address > + * @param sport source UDP port > + * @param len packet length > + */ > +typedef void rxhand_tcp(uchar *pkt, unsigned int dport, > + struct in_addr sip, unsigned int sport, > + unsigned int len); > +void tcp_set_tcp_handler(rxhand_tcp *f); > + > +void rxhand_tcp_f(union tcp_build_pkt *b, unsigned int len); > + > +/* > + * An incoming TCP packet handler for the TCP protocol. > + * There is also a dynamic function pointer for TCP based commands to > + * receive incoming traffic after the TCP protocol code has done its work. > + */ > + > +void rxhand_action(u8 tcp_action, int payload_len, u32 tcp_seq_num, > + u32 tcp_ack_num, unsigned int pkt_len, > + union tcp_build_pkt *b); > diff --git a/net/Kconfig b/net/Kconfig > index f2363e5256..77ab683eb8 100644 > --- a/net/Kconfig > +++ b/net/Kconfig > @@ -22,4 +22,10 @@ config NETCONSOLE > Support the 'nc' input/output device for networked console. > See README.NetConsole for details. > > +config TCP > + bool "TCP stack" > + help > + TCP protocol support with SACK for wget. Selecting this will provide > + the fastest file transfer possible. > + > endif # if NET > diff --git a/net/Makefile b/net/Makefile > index 07466879f5..237023407f 100644 > --- a/net/Makefile > +++ b/net/Makefile > @@ -24,6 +24,7 @@ obj-$(CONFIG_CMD_RARP) += rarp.o > obj-$(CONFIG_CMD_SNTP) += sntp.o > obj-$(CONFIG_CMD_TFTPBOOT) += tftp.o > obj-$(CONFIG_UDP_FUNCTION_FASTBOOT) += fastboot.o > +obj-$(CONFIG_TCP) += tcp.o > > # Disable this warning as it is triggered by: > # sprintf(buf, index ? "foo%d" : "foo", index) > diff --git a/net/net.c b/net/net.c > index f831c34599..8ac1ff050f 100644 > --- a/net/net.c > +++ b/net/net.c > @@ -108,6 +108,7 @@ > #if defined(CONFIG_CMD_SNTP) > #include "sntp.h" > #endif > +#include <net/tcp.h> > > /** BOOTP EXTENTIONS **/ > > @@ -380,6 +381,9 @@ void net_init(void) > > /* Only need to setup buffer pointers once. */ > first_call = 0; > +#if defined(CONFIG_TCP) > + tcp_set_tcp_state(TCP_CLOSED); > +#endif > } > > net_init_loop(); > @@ -790,6 +794,16 @@ return net_send_ip_packet(ether, dest, dport, sport, payload_len, > IPPROTO_UDP, 0, 0, 0); > } > > +#if defined(CONFIG_TCP) > +int net_send_tcp_packet(int payload_len, int dport, int sport, u8 action, > + u32 tcp_seq_num, u32 tcp_ack_num) > +{ > + return net_send_ip_packet(net_server_ethaddr, net_server_ip, dport, > + sport, payload_len, IPPROTO_TCP, action, > + tcp_seq_num, tcp_ack_num); > +} > +#endif > + > int net_send_ip_packet(uchar *ether, struct in_addr dest, int dport, int sport, > int payload_len, int proto, u8 action, u32 tcp_seq_num, > u32 tcp_ack_num) > @@ -821,6 +835,15 @@ int net_send_ip_packet(uchar *ether, struct in_addr dest, int dport, int sport, > dport, sport, payload_len); > pkt_hdr_size = eth_hdr_size + IP_UDP_HDR_SIZE; > break; > +#if defined(CONFIG_TCP) > + case IPPROTO_TCP: > + pkt_hdr_size = eth_hdr_size + > + tcp_set_tcp_header(pkt + eth_hdr_size, dport, sport, > + payload_len, action, tcp_seq_num, > + tcp_ack_num); > + break; > +#endif > + > default: return -EINVAL; > } > > @@ -1229,6 +1252,16 @@ void net_process_received_packet(uchar *in_packet, int len) > if (ip->ip_p == IPPROTO_ICMP) { > receive_icmp(ip, len, src_ip, et); > return; > +#if defined(CONFIG_TCP) > + } else if (ip->ip_p == IPPROTO_TCP) { > + debug_cond(DEBUG_DEV_PKT, > + "TCP PH (to=%pI4, from=%pI4, len=%d)\n", > + &dst_ip, &src_ip, len); > + > + rxhand_tcp_f((union tcp_build_pkt *)ip, len); > + return; > +#endif > + > } else if (ip->ip_p != IPPROTO_UDP) { /* Only UDP packets */ > return; > } > diff --git a/net/tcp.c b/net/tcp.c > new file mode 100644 > index 0000000000..12fa0a72cd > --- /dev/null > +++ b/net/tcp.c > @@ -0,0 +1,700 @@ > +// SPDX-License-Identifier: GPL-2.0 > +/* > + * Copyright 2017 Duncan Hare, all rights reserved. > + */ > + > +/* > + * General Desription: > + * > + * TCP support for the wget command, for fast file downloading. > + * > + * HTTP/TCP Receiver: > + * > + * Prequeisites: - own ethernet address Should be * Prerequisites: - own ethernet address > + * - own IP address > + * - Server IP address > + * - Server with TCP > + * - TCP application (eg wget) > + * Next Step HTTPS? > + */ > +#include <common.h> > +#include <command.h> > +#include <console.h> > +#include <environment.h> > +#include <errno.h> > +#include <net.h> > +#include <net/tcp.h> > + > +/* > + * TCP sliding window control used by us to request re-TX > + */ > + > +static struct tcp_sack_v tcp_lost; > + > +/* TCP option timestamp */ > +static u32 loc_timestamp; > +static u32 rmt_timestamp; > + > +u32 tcp_seq_init; > +u32 tcp_ack_edge; > +u32 tcp_seq_max; > + > +int tcp_activity_count; > + > +/* > + * Search for TCP_SACK and review the comments before the code section > + * TCP_SACK is the number of packets at the front of the stream > + */ > + > +enum pkt_state {PKT, NOPKT}; > +struct sack_r { > + struct sack_edges se; > + enum pkt_state st; > +}; > + > +struct sack_r edge_a[TCP_SACK]; > +unsigned int sack_idx; > +unsigned int prev_len; > + > +/* TCP connection state */ > +static enum TCP_STATE tcp_state; > + > +/* > + * An incoming TCP packet handler for the TCP protocol. > + * There is also a dynamic function pointer for TCP based commands to > + * receive incoming traffic after the TCP protocol code has done its work. > + */ > + > +/* Current TCP RX packet handler */ > +static rxhand_tcp *tcp_packet_handler; > + > +enum TCP_STATE tcp_get_tcp_state(void) > +{ > + return tcp_state; > +} > + > +void tcp_set_tcp_state(enum TCP_STATE new_state) > +{ > + tcp_state = new_state; > +} > + > +static void dummy_handler(uchar *pkt, unsigned int dport, > + struct in_addr sip, unsigned int sport, > + unsigned int len) > +{ > +} > + > +void tcp_set_tcp_handler(rxhand_tcp *f) > +{ > + debug_cond(DEBUG_INT_STATE, "--- net_loop TCP handler set (%p)\n", f); > + if (!f) > + tcp_packet_handler = dummy_handler; > + else > + tcp_packet_handler = f; > +} > + > +u16 tcp_set_pseudo_header(uchar *pkt, struct in_addr src, struct in_addr dest, > + int tcp_len, int pkt_len) > +{ > + union tcp_build_pkt *b = (union tcp_build_pkt *)pkt; > + int checksum_len; > + > + /* > + * Pseudo header > + * > + * Zero the byte after the last byte so that the header checksum > + * will always work. > + */ > + > + pkt[pkt_len] = 0x00; > + > + net_copy_ip((void *)&b->ph.p_src, &src); > + net_copy_ip((void *)&b->ph.p_dst, &dest); > + b->ph.rsvd = 0x00; > + b->ph.p = IPPROTO_TCP; > + b->ph.len = htons(tcp_len); > + checksum_len = tcp_len + PSEUDO_HDR_SIZE; > + > + debug_cond(DEBUG_DEV_PKT, > + "TCP Pesudo Header (to=%pI4, from=%pI4, Len=%d)\n", > + &b->ph.p_dst, &b->ph.p_src, checksum_len); > + > + return compute_ip_checksum(pkt + PSEUDO_PAD_SIZE, checksum_len); > +} > + > +int net_set_ack_options(union tcp_build_pkt *b) > +{ > + b->sack.hdr.tcp_hlen = (TCP_HDR_SIZE >> 2) << 4; > + > + b->sack.t_opt.kind = TCP_O_TS; > + b->sack.t_opt.len = TCP_OPT_LEN_A; > + b->sack.t_opt.t_snd = htons(loc_timestamp); > + b->sack.t_opt.t_rcv = rmt_timestamp; > + b->sack.sack_v.kind = TCP_1_NOP; > + b->sack.sack_v.len = 0x00; > + > + if (tcp_lost.len > TCP_OPT_LEN_2) { > + debug_cond(DEBUG_DEV_PKT, "TCP ack opt lost.len %x\n", > + tcp_lost.len); > + b->sack.sack_v.len = tcp_lost.len; > + b->sack.sack_v.kind = TCP_V_SACK; > + b->sack.sack_v.hill[0].l = htonl(tcp_lost.hill[0].l); > + b->sack.sack_v.hill[0].r = htonl(tcp_lost.hill[0].r); > + > + /* > + * These SACK structures are initialized with NOPs to > + * provide TCP header alignment padding. There are 4 > + * SACK structures used for both header padding and > + * internally. > + */ > + > + b->sack.sack_v.hill[1].l = htonl(tcp_lost.hill[1].l); > + b->sack.sack_v.hill[1].r = htonl(tcp_lost.hill[1].r); > + b->sack.sack_v.hill[2].l = htonl(tcp_lost.hill[2].l); > + b->sack.sack_v.hill[2].r = htonl(tcp_lost.hill[2].r); > + b->sack.sack_v.hill[3].l = TCP_O_NOP; > + b->sack.sack_v.hill[3].r = TCP_O_NOP; > + } > + > + /* > + * TCP lengths are stored as a rounded up number of 32 bit words > + * Add 3 to length round up, rounded, then divided into the length > + * in 32 bit words. > + */ > + > + b->sack.hdr.tcp_hlen = (((TCP_HDR_SIZE + TCP_TSOPT_SIZE > + + tcp_lost.len + 3) >> 2) << 4); > + > + /* > + * This returns the actual rounded up length of the > + * TCP header to add to the total packet length > + */ > + > + return b->sack.hdr.tcp_hlen >> 2; > +} > + > +void net_set_syn_options(union tcp_build_pkt *b) > +{ > + tcp_lost.len = 0; > + b->ip.hdr.tcp_hlen = 0xa0; > + > + b->ip.mss.kind = TCP_O_MSS; > + b->ip.mss.len = TCP_OPT_LEN_4; > + b->ip.mss.mss = htons(TCP_MSS); > + b->ip.scale.kind = TCP_O_SCL; > + b->ip.scale.scale = TCP_SCALE; > + b->ip.scale.len = TCP_OPT_LEN_3; > + b->ip.sack_p.kind = TCP_P_SACK; > + b->ip.sack_p.len = TCP_OPT_LEN_2; > + b->ip.t_opt.kind = TCP_O_TS; > + b->ip.t_opt.len = TCP_OPT_LEN_A; > + loc_timestamp = get_ticks(); > + rmt_timestamp = 0x00000000; > + b->ip.t_opt.t_snd = 0; > + b->ip.t_opt.t_rcv = 0x00000000; > + b->ip.end = TCP_O_END; > +} > + > +int tcp_set_tcp_header(uchar *pkt, int dport, int sport, int payload_len, > + u8 action, u32 tcp_seq_num, u32 tcp_ack_num) > +{ > + union tcp_build_pkt *b = (union tcp_build_pkt *)pkt; > + int pkt_hdr_len; > + int pkt_len; > + int tcp_len; > + > +/* > + * Header: 5 32 bit words. 4 bits TCP header Length, 4 bits reserved options > + */ > + b->ip.hdr.tcp_flags = action; > + pkt_hdr_len = IP_TCP_HDR_SIZE; > + b->ip.hdr.tcp_hlen = 0x50; > + > + switch (action) { > + case TCP_SYN: > + debug_cond(DEBUG_DEV_PKT, > + "TCP Hdr:SYN (%pI4, %pI4, sq=%d, ak=%d)\n", > + &net_server_ip, &net_ip, > + tcp_seq_num, tcp_ack_num); > + tcp_activity_count = 0; > + net_set_syn_options(b); > + tcp_seq_num = 0; > + tcp_ack_num = 0; > + pkt_hdr_len = IP_TCP_O_SIZE; > + if (tcp_state == TCP_SYN_SENT) { /* Too many SYNs */ > + action = TCP_FIN; > + tcp_state = TCP_FIN_WAIT_1; > + } else { > + tcp_state = TCP_SYN_SENT; > + } > + break; > + case TCP_ACK: > + pkt_hdr_len = IP_HDR_SIZE + > + net_set_ack_options(b); Do we need the extraces spaes before = and do we need to move net_set_ack_options(..) to next line? > + b->ip.hdr.tcp_flags = action; > + debug_cond(DEBUG_DEV_PKT, > + "TCP Hdr:ACK (%pI4, %pI4, s=%d, a=%d, A=%x)\n", > + &net_server_ip, &net_ip, tcp_seq_num, tcp_ack_num, > + action); > + break; > + case TCP_FIN: > + debug_cond(DEBUG_DEV_PKT, > + "TCP Hdr:FIN (%pI4, %pI4, s=%d, a=%d)\n", > + &net_server_ip, &net_ip, tcp_seq_num, tcp_ack_num); > + payload_len = 0; > + pkt_hdr_len = IP_TCP_HDR_SIZE; > + tcp_state = TCP_FIN_WAIT_1; > + > + break; > + > + /* Notify connection closing */ > + > + case (TCP_FIN | TCP_ACK): > + case ((TCP_FIN | TCP_ACK) | TCP_PUSH): > + if (tcp_state == TCP_CLOSE_WAIT) > + tcp_state = TCP_CLOSING; > + tcp_ack_edge++; > + debug_cond(DEBUG_DEV_PKT, > + "TCP Hdr:FIN ACK PSH(%pI4, %pI4, s=%d, a=%d, A=%x)\n", > + &net_server_ip, &net_ip, > + tcp_seq_num, tcp_ack_edge, action); > + /* FALLTHRU */ > + default: > + pkt_hdr_len = IP_HDR_SIZE + > + net_set_ack_options(b); Do we need the extraces spaes before = and do we need to move net_set_ack_options(..) to next line? > + b->ip.hdr.tcp_flags = action | TCP_PUSH | TCP_ACK; > + debug_cond(DEBUG_DEV_PKT, > + "TCP Hdr:dft (%pI4, %pI4, s=%d, a=%d, A=%x)\n", > + &net_server_ip, &net_ip, > + tcp_seq_num, tcp_ack_num, action); > + } > + > + pkt_len = pkt_hdr_len + payload_len; > + tcp_len = pkt_len - IP_HDR_SIZE; > + > + /* TCP Header */ > + b->ip.hdr.tcp_ack = htonl(tcp_ack_edge); > + b->ip.hdr.tcp_src = htons(sport); > + b->ip.hdr.tcp_dst = htons(dport); > + b->ip.hdr.tcp_seq = htonl(tcp_seq_num); > + tcp_seq_num = tcp_seq_num + payload_len; > + > + /* > + * TCP window size - TCP header variable tcp_win. > + * Change tcp_win only if you have an understanding of network > + * overrun, congestion, TCP segment sizes, TCP windows, TCP scale, > + * queuing theory and packet buffering. If there are too few buffers, > + * there will be data loss, recovery may work or the sending TCP, > + * the server, could abort the stream transmission. > + * MSS is governed by maximum Ethernet frame length. > + * The number of buffers is governed by the desire to have a queue of > + * full buffers to be processed at the destination to maximize > + * throughput. Temporary memory use for the boot phase on modern > + * SOCs is may not be considered a constraint to buffer space, if > + * it is, then the u-boot tftp or nfs kernel netboot should be > + * considered. > + */ > + > + b->ip.hdr.tcp_win = htons(PKTBUFSRX * TCP_MSS >> TCP_SCALE); > + > + b->ip.hdr.tcp_xsum = 0x0000; > + b->ip.hdr.tcp_ugr = 0x0000; > + > + b->ip.hdr.tcp_xsum = tcp_set_pseudo_header(pkt, net_ip, net_server_ip, > + tcp_len, pkt_len); > + > + net_set_ip_header((uchar *)&b->ip, net_server_ip, net_ip, > + pkt_len, IPPROTO_TCP); > + > + return pkt_hdr_len; > +} > + > +/* > + * Selective Acknowledgment (Essential for fast stream transfer) > + */ > + > +void tcp_hole(u32 tcp_seq_num, u32 len, u32 tcp_seq_max) > +{ > + unsigned int idx_sack; > + unsigned int sack_end = TCP_SACK - 1; > + unsigned int sack_in; > + unsigned int hill = 0; > + enum pkt_state expect = PKT; > + > + u32 seq = tcp_seq_num - tcp_seq_init; > + u32 hol_l = tcp_ack_edge - tcp_seq_init; > + u32 hol_r = 0; > + > + /* Place new seq number in correct place in receive array */ > + > + if (prev_len == 0) > + prev_len = len; > + idx_sack = sack_idx + ((tcp_seq_num - tcp_ack_edge) / prev_len); > + if (idx_sack < TCP_SACK) { > + edge_a[idx_sack].se.l = tcp_seq_num; > + edge_a[idx_sack].se.r = tcp_seq_num + len; > + edge_a[idx_sack].st = PKT; > + > +/* > + * The fin (last) packet is not the same length as data packets, and if it's > + * length is recorded and used for array index calculation, calculation breaks. > + */ > + if (prev_len < len) > + prev_len = len; > + } > + > + debug_cond(DEBUG_DEV_PKT, > + "TCP 1 seq %d, edg %d, len %d, sack_idx %d, sack_end %d\n", > + seq, hol_l, len, sack_idx, sack_end); > + > + /* Right edge of contiguous stream, is the left edge of first hill */ > + > + hol_l = tcp_seq_num - tcp_seq_init; > + hol_r = hol_l + len; > + > + tcp_lost.len = TCP_OPT_LEN_2; > + > + debug_cond(DEBUG_DEV_PKT, > + "TCP 1 in %d, seq %d, pkt_l %d, pkt_r %d, sack_idx %d, sack_end %d\n", > + idx_sack, seq, hol_l, hol_r, sack_idx, sack_end); > + > + for (sack_in = sack_idx; sack_in < sack_end && hill < TCP_SACK_HILLS; > + sack_in++) { > + switch (expect) { > + case NOPKT: > + switch (edge_a[sack_in].st) { > + case NOPKT: > + debug_cond(DEBUG_INT_STATE, "N"); > + break; > + case PKT: > + debug_cond(DEBUG_INT_STATE, "n"); > + tcp_lost.hill[hill].l = > + edge_a[sack_in].se.l; > + tcp_lost.hill[hill].r = > + edge_a[sack_in].se.r; > + expect = PKT; > + break; > + } > + break; > + case PKT: > + switch (edge_a[sack_in].st) { > + case NOPKT: > + debug_cond(DEBUG_INT_STATE, "p"); > + if (sack_in > sack_idx && > + hill < TCP_SACK_HILLS) { > + hill++; > + tcp_lost.len += TCP_OPT_LEN_8; > + } > + expect = NOPKT; > + break; > + case PKT: > + debug_cond(DEBUG_INT_STATE, "P"); > + > + if (tcp_ack_edge == edge_a[sack_in].se.l) { > + tcp_ack_edge = edge_a[sack_in].se.r; > + edge_a[sack_in].st = NOPKT; > + sack_idx++; > + } else { > + if (hill < TCP_SACK_HILLS) > + tcp_lost.hill[hill].r = > + edge_a[sack_in].se.r; > + if (sack_in == sack_end - 1) > + tcp_lost.hill[hill].r = > + edge_a[sack_in].se.r; > + } > + break; > + } > + break; > + } > + } > + debug_cond(DEBUG_INT_STATE, "\n"); > + if (tcp_lost.len <= TCP_OPT_LEN_2) > + sack_idx = 0; > +} > + > +void tcp_parse_options(uchar *o, int o_len) > +{ > + struct tcp_t_opt *tsopt; > + uchar *p = o; > +/* > + * NOPs are options with a zero length, and thus are special. > + * All other options have length fields. > + */ > + > + for (p = o; p < (o + o_len); p = p + p[1]) { > + if (p[1] != 0) { > + switch (p[0]) { > + case TCP_O_END: > + return; /* Finished processing options */ > + case TCP_O_MSS: > + case TCP_O_SCL: > + case TCP_P_SACK: > + case TCP_V_SACK: > + break; /* Continue to process options */ > + case TCP_O_TS: > + tsopt = (struct tcp_t_opt *)p; > + rmt_timestamp = tsopt->t_snd; > + return; > + break; > + } /* End switch, process optional NOPs */ > + > + if (p[0] == TCP_O_NOP) > + p++; > + } else { > + return; /* Finished processing options */ > + } > + } > +} > + > +u8 tcp_state_machine(u8 tcp_flags, u32 *tcp_seq_num, int payload_len) > +{ > + u8 tcp_fin = tcp_flags & TCP_FIN; > + u8 tcp_syn = tcp_flags & TCP_SYN; > + u8 tcp_rst = tcp_flags & TCP_RST; > + u8 tcp_push = tcp_flags & TCP_PUSH; > + u8 tcp_ack = tcp_flags & TCP_ACK; > + u8 action = TCP_DATA; > + int i; > + > + /* > + * tcp_flags are examined to determine TX action in a given state > + * tcp_push is interpreted to mean "inform the app" > + * urg, ece, cer and nonce flags are not supported. > + * > + * exe and crw are use to signal and confirm knowledge of congestion. > + * This TCP only sends a file request and acks. If it generates > + * congestion, the network is broken. > + */ > + > + debug_cond(DEBUG_INT_STATE, "TCP STATE ENTRY %x\n", action); > + if (tcp_rst) { > + action = TCP_DATA; > + tcp_state = TCP_CLOSED; > + net_set_state(NETLOOP_FAIL); > + debug_cond(DEBUG_INT_STATE, "TCP Reset %x\n", tcp_flags); > + return TCP_RST; > + } > + > + switch (tcp_state) { > + case TCP_CLOSED: > + debug_cond(DEBUG_INT_STATE, "TCP CLOSED %x\n", tcp_flags); > + if (tcp_fin) > + action = TCP_DATA; > + if (tcp_syn) > + action = TCP_RST; > + if (tcp_ack) > + action = TCP_DATA; > + break; > + case TCP_SYN_SENT: > + debug_cond(DEBUG_INT_STATE, "TCP_SYN_SENT %x, %d\n", > + tcp_flags, *tcp_seq_num); > + if (tcp_fin) { > + action = action | TCP_PUSH; > + tcp_state = TCP_CLOSE_WAIT; > + } > + if (tcp_syn) { > + action = action | TCP_ACK | TCP_PUSH; > + if (tcp_ack) { > + tcp_seq_init = *tcp_seq_num; > + *tcp_seq_num = *tcp_seq_num + 1; > + tcp_seq_max = *tcp_seq_num; > + tcp_ack_edge = *tcp_seq_num; > + sack_idx = 0; > + edge_a[sack_idx].se.l = *tcp_seq_num; > + edge_a[sack_idx].se.r = *tcp_seq_num; > + prev_len = 0; > + tcp_state = TCP_ESTABLISHED; > + for (i = 0; i < TCP_SACK; i++) > + edge_a[i].st = NOPKT; > + } > + } else { > + if (tcp_ack) > + action = TCP_DATA; > + } > + break; > + case TCP_ESTABLISHED: > + debug_cond(DEBUG_INT_STATE, > + "TCP_ESTABLISHED %x\n", tcp_flags); > + if (*tcp_seq_num > tcp_seq_max) > + tcp_seq_max = *tcp_seq_num; > + if (payload_len > 0) { > + tcp_hole(*tcp_seq_num, payload_len, tcp_seq_max); > + tcp_fin = TCP_DATA; /* cause standalone FIN */ > + } > + > + if ((tcp_fin) && tcp_lost.len <= TCP_OPT_LEN_2) { > + action = action | TCP_FIN | TCP_PUSH | TCP_ACK; > + tcp_state = TCP_CLOSE_WAIT; > + } else { > + if (tcp_ack) > + action = TCP_DATA; > + } > + if (tcp_push) > + action = action | TCP_PUSH; > + if (tcp_syn) > + action = TCP_ACK + TCP_RST; > + break; > + case TCP_CLOSE_WAIT: > + debug_cond(DEBUG_INT_STATE, "TCP_CLOSE_WAIT (%x)\n", tcp_flags); > + action = TCP_DATA; /* Wait for app */ > + break; > + case TCP_FIN_WAIT_2: > + debug_cond(DEBUG_INT_STATE, "TCP_FIN_WAIT_2 (%x)\n", tcp_flags); > + if (tcp_fin) > + action = TCP_DATA; > + if (tcp_syn) > + action = TCP_DATA; > + if (tcp_ack) { > + action = TCP_PUSH | TCP_ACK; > + tcp_state = TCP_CLOSED; > + puts("\n"); > + } > + break; > + case TCP_FIN_WAIT_1: > + debug_cond(DEBUG_INT_STATE, "TCP_FIN_WAIT_1 (%x)\n", tcp_flags); > + if (tcp_fin) { > + action = TCP_ACK | TCP_FIN; > + tcp_state = TCP_FIN_WAIT_2; > + } > + if (tcp_syn) > + action = TCP_RST; > + if (tcp_ack) { > + tcp_state = TCP_CLOSED; > + tcp_seq_num = tcp_seq_num + 1; > + } > + break; > + case TCP_CLOSING: > + debug_cond(DEBUG_INT_STATE, "TCP_CLOSING (%x)\n", tcp_flags); > + if (tcp_fin) > + action = TCP_DATA; > + if (tcp_syn) > + action = TCP_RST; > + if (tcp_ack) { > + action = TCP_PUSH; > + tcp_state = TCP_CLOSED; > + puts("\n"); > + } > + break; > + } > + return action; > +} > + > +void rxhand_tcp_f(union tcp_build_pkt *b, unsigned int pkt_len) > +{ > + int tcp_len = pkt_len - IP_HDR_SIZE; > + u16 tcp_rx_xsum = b->ip.hdr.ip_sum; > + u8 tcp_action = TCP_DATA; > + u32 tcp_seq_num; > + u32 tcp_ack_num; > + struct in_addr action_and_state; > + > + int tcp_hdr_len; > + int payload_len; > + > + /* > + * Verify IP header > + */ > + debug_cond(DEBUG_DEV_PKT, > + "TCP RX in RX Sum (to=%pI4, from=%pI4, len=%d)\n", > + &b->ip.hdr.ip_src, &b->ip.hdr.ip_dst, pkt_len); > + > + debug_cond(DEBUG_DEV_PKT, > + "In__________________________________________\n"); > + > + b->ip.hdr.ip_src = net_server_ip; > + b->ip.hdr.ip_dst = net_ip; > + b->ip.hdr.ip_sum = 0x0000; > + if (tcp_rx_xsum != compute_ip_checksum(b, IP_HDR_SIZE)) { > + debug_cond(DEBUG_DEV_PKT, > + "TCP RX IP xSum Error (%pI4, =%pI4, len=%d)\n", > + &net_ip, &net_server_ip, pkt_len); > + return; > + } > + > + /* > + * Build pseudo header and verify TCP header > + */ > + tcp_rx_xsum = b->ip.hdr.tcp_xsum; > + b->ip.hdr.tcp_xsum = 0x0000; > + if (tcp_rx_xsum != tcp_set_pseudo_header((uchar *)b, b->ip.hdr.ip_src, > + b->ip.hdr.ip_dst, tcp_len, > + pkt_len)) { > + debug_cond(DEBUG_DEV_PKT, > + "TCP RX TCP xSum Error (%pI4, %pI4, len=%d)\n", > + &net_ip, &net_server_ip, tcp_len); > + return; > + } > + > + tcp_hdr_len = (b->ip.hdr.tcp_hlen >> 2); > + payload_len = tcp_len - tcp_hdr_len; > + > + if (tcp_hdr_len > TCP_HDR_SIZE) > + tcp_parse_options((uchar *)b + IP_TCP_HDR_SIZE, > + tcp_hdr_len - TCP_HDR_SIZE); > + /* > + * Incoming sequence and ack numbers are server's view of the numbers. > + * The app must swap the numbers when responding. > + */ > + > + tcp_seq_num = ntohl(b->ip.hdr.tcp_seq); > + tcp_ack_num = ntohl(b->ip.hdr.tcp_ack); > + > + /* Packets are not ordered. Send to app as received. */ > + > + tcp_action = tcp_state_machine(b->ip.hdr.tcp_flags, > + &tcp_seq_num, payload_len); > + > + /* > + * State-altering command to be sent. > + * The packet sequence and ack numbers are in the tcp_seq_num > + * and tcp_ack_num variables. The current packet, its position > + * in the data stream, is the in the range of those variables. > + * > + * In the "application push" invocation, the TCP header with all > + * its information is pointed to by the packet pointer. > + * > + * In the typedef > + * void rxhand_tcp(uchar *pkt, unsigned int dport, > + * struct in_addr sip, unsigned int sport, > + * unsigned int len); > + * *pkt is the pointer to the payload > + * dport is used for tcp_seg_num > + * action_and_state.s_addr is used for TCP state > + * sport is used for tcp_ack_num (which is unused by the app) > + * pkt_ length is the payload length. > + * > + * TCP_PUSH from the state machine with a payload length of 0 is a > + * connect or disconnect event > + */ > + > + tcp_activity_count++; > + if (tcp_activity_count > TCP_ACTIVITY) { > + puts("| "); > + tcp_activity_count = 0; > + } > + > + if ((tcp_action & TCP_PUSH) || payload_len > 0) { > + debug_cond(DEBUG_DEV_PKT, > + "TCP Notify (action=%x, Seq=%d,Ack=%d,Pay%d)\n", > + tcp_action, tcp_seq_num, tcp_ack_num, payload_len); > + > + action_and_state.s_addr = tcp_action; > + (*tcp_packet_handler) ((uchar *)b + pkt_len - payload_len, > + tcp_seq_num, action_and_state, > + tcp_ack_num, payload_len); > + > + } else if (tcp_action != TCP_DATA) { > + debug_cond(DEBUG_DEV_PKT, > + "TCP Action (action=%x,Seq=%d,Ack=%d,Pay=%d)\n", > + tcp_action, tcp_seq_num, tcp_ack_num, payload_len); > + > + /* > + * Warning: Incoming Ack & Seq sequence numbers are transposed > + * here to outgoing Seq & Ack sequence numbers > + */ > + net_send_tcp_packet(0, ntohs(b->ip.hdr.tcp_src), > + ntohs(b->ip.hdr.tcp_dst), > + (tcp_action & (~TCP_PUSH)), > + tcp_seq_num, tcp_ack_num); > + } > +} > -- Overall this does not look too bad - hopefully it will land during this year.
diff --git a/include/net.h b/include/net.h index a54160fff6..ba96267eb8 100644 --- a/include/net.h +++ b/include/net.h @@ -26,6 +26,9 @@ * */ +#if defined(CONFIG_TCP) /* Protected UDP uses less bufferes than TCP */ +#define CONFIG_SYS_RX_ETH_BUFFER 12 +#endif #ifdef CONFIG_SYS_RX_ETH_BUFFER # define PKTBUFSRX CONFIG_SYS_RX_ETH_BUFFER #else @@ -350,6 +353,7 @@ struct vlan_ethernet_hdr { #define PROT_PPP_SES 0x8864 /* PPPoE session messages */ #define IPPROTO_ICMP 1 /* Internet Control Message Protocol */ +#define IPPROTO_TCP 6 /* Transmission Control Protocol */ #define IPPROTO_UDP 17 /* User Datagram Protocol */ /* @@ -659,7 +663,7 @@ static inline void net_send_packet(uchar *pkt, int len) } /* - * Transmit "net_tx_packet" as UDP packet, performing ARP request if needed + * Transmit "net_tx_packet" as UDP or TCPpacket, send ARP request if needed * (ether will be populated) * * @param ether Raw packet buffer @@ -667,10 +671,15 @@ static inline void net_send_packet(uchar *pkt, int len) * @param dport Destination UDP port * @param sport Source UDP port * @param payload_len Length of data after the UDP header + * @param action TCP action to be performed + * @param tcp_seq_num TCP sequence number of this transmission + * @param tcp_ack_num TCP stream acknolegement number */ int net_send_ip_packet(uchar *ether, struct in_addr dest, int dport, int sport, int payload_len, int proto, u8 action, u32 tcp_seq_num, u32 tcp_ack_num); +int net_send_tcp_packet(int payload_len, int dport, int sport, u8 action, + u32 tcp_seq_num, u32 tcp_ack_num); int net_send_udp_packet(uchar *ether, struct in_addr dest, int dport, int sport, int payload_len); diff --git a/include/net/tcp.h b/include/net/tcp.h new file mode 100644 index 0000000000..d0e90e07dd --- /dev/null +++ b/include/net/tcp.h @@ -0,0 +1,227 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * TCP Support with SACK for file transfer. + * + * Copyright 2017 Duncan Hare, All rights reserved. + */ + +#define TCP_ACTIVITY 127 /* Number of packets received */ + /* before console progress mark */ + +struct ip_tcp_hdr { + u8 ip_hl_v; /* header length and version */ + u8 ip_tos; /* type of service */ + u16 ip_len; /* total length */ + u16 ip_id; /* identification */ + u16 ip_off; /* fragment offset field */ + u8 ip_ttl; /* time to live */ + u8 ip_p; /* protocol */ + u16 ip_sum; /* checksum */ + struct in_addr ip_src; /* Source IP address */ + struct in_addr ip_dst; /* Destination IP address */ + u16 tcp_src; /* TCP source port */ + u16 tcp_dst; /* TCP destination port */ + u32 tcp_seq; /* TCP sequence number */ + u32 tcp_ack; /* TCP Acknowledgment number */ + u8 tcp_hlen; /* 4 bits TCP header Length/4 */ + /* 4 bits Reserved */ + /* 2 more bits reserved */ + u8 tcp_flags; /* see defines */ + u16 tcp_win; /* TCP windows size */ + u16 tcp_xsum; /* Checksum */ + u16 tcp_ugr; /* Pointer to urgent data */ +} __packed; + +#define IP_TCP_HDR_SIZE (sizeof(struct ip_tcp_hdr)) +#define TCP_HDR_SIZE (IP_TCP_HDR_SIZE - IP_HDR_SIZE) + +#define TCP_DATA 0x00 /* Data Packet - internal use only */ +#define TCP_FIN 0x01 /* Finish flag */ +#define TCP_SYN 0x02 /* Synch (start) flag */ +#define TCP_RST 0x04 /* reset flag */ +#define TCP_PUSH 0x08 /* Push - Notify app */ +#define TCP_ACK 0x10 /* Acknowledgment of data received */ +#define TCP_URG 0x20 /* Urgent */ +#define TCP_ECE 0x40 /* Congestion control */ +#define TCP_CWR 0x80 /* Congestion Control */ + +/* + * TCP header options, Seq, MSS, and SACK + */ + +#define TCP_SACK 32 /* Number of packets analyzed */ + /* on leading edge of stream */ + +#define TCP_O_END 0x00 /* End of option list */ +#define TCP_1_NOP 0x01 /* Single padding NOP */ +#define TCP_O_NOP 0x01010101 /* NOPs pad to 32 bit boundary */ +#define TCP_O_MSS 0x02 /* MSS Size option */ +#define TCP_O_SCL 0x03 /* Window Scale option */ +#define TCP_P_SACK 0x04 /* SACK permitted */ +#define TCP_V_SACK 0x05 /* SACK values */ +#define TCP_O_TS 0x08 /* Timestamp option */ +#define TCP_OPT_LEN_2 0x02 +#define TCP_OPT_LEN_3 0x03 +#define TCP_OPT_LEN_4 0x04 +#define TCP_OPT_LEN_6 0x06 +#define TCP_OPT_LEN_8 0x08 +#define TCP_OPT_LEN_A 0x0a /* Timestamp Length */ + +/* + * Please review the warning in net.c about these two parameters. + * They are part of a promise of RX buffer size to the sending TCP + */ + +#define TCP_MSS 1460 /* Max segment size */ +#define TCP_SCALE 0x01 /* Scale */ + +struct tcp_mss { /* TCP Max Segment size */ + u8 kind; /* Field ID */ + u8 len; /* Field length */ + u16 mss; /* Segment size value */ +} __packed; + +struct tcp_scale { /* TCP Windows Scale */ + u8 kind; /* Field ID */ + u8 len; /* Filed length */ + u8 scale; /* windows shift value used for */ + /* networks with many hops */ + /* Typically 4 or more hops */ + +} __packed; + +struct tcp_sack_p { /* SACK permitted */ + u8 kind; /* Field Id */ + u8 len; /* Field length */ +} __packed; + + /* Terse definitions used */ + /* long definitions make the */ + /* indented code overflow line */ + /* length linits */ +struct sack_edges { + u32 l; /* Left edge of stream */ + u32 r; /* right edge of stream */ +} __packed; + +#define TCP_SACK_SIZE (sizeof(struct sack_edges)) + +/* + * A TCP stream has holes when packets are missing or disordered. + * A hill is the inverese of a hole, and is data received. + * TCP receiveds hills (a sequence of data), and inferrs Holes + * from the "hills" or packets received. + */ + +#define TCP_SACK_HILLS 4 + +struct tcp_sack_v { + u8 kind; /* Field ID */ + u8 len; /* Field Length */ + struct sack_edges hill[TCP_SACK_HILLS]; /* L & R window edges */ +} __packed; + +struct tcp_t_opt { /* TCP time stamps option */ + u8 kind; /* Field id */ + u8 len; /* Field length */ + u32 t_snd; /* Sender timestamp */ + u32 t_rcv; /* Receiver timestamp */ +} __packed; + +#define TCP_TSOPT_SIZE (sizeof(struct tcp_t_opt)) + +/* + * ip tcp structure with options + */ + +struct ip_tcp_hdr_o { + struct ip_tcp_hdr hdr; + struct tcp_mss mss; + struct tcp_scale scale; + struct tcp_sack_p sack_p; + struct tcp_t_opt t_opt; + u8 end; +} __packed; + +#define IP_TCP_O_SIZE (sizeof(struct ip_tcp_hdr_o)) + +struct ip_tcp_hdr_s { + struct ip_tcp_hdr hdr; + struct tcp_t_opt t_opt; + struct tcp_sack_v sack_v; + u8 end; +} __packed; + +#define IP_TCP_SACK_SIZE (sizeof(struct ip_tcp_hdr_s)) + +/* + * TCP pseudo header definitions + */ +#define PSEUDO_PAD_SIZE 8 + +struct pseudo_hdr { + u8 padding[PSEUDO_PAD_SIZE]; /* pseudo hdr size = ip_tcp hdr size */ + struct in_addr p_src; + struct in_addr p_dst; + u8 rsvd; + u8 p; + u16 len; +} __packed; + +#define PSEUDO_HDR_SIZE (sizeof(struct pseudo_hdr)) - PSEUDO_PAD_SIZE + +/* + * union for building TCP/IP packet. Build Pseudo header in packed buffer + * first, calculate TCP checksum, then build IP header in packed buffer. + */ + +union tcp_build_pkt { + struct pseudo_hdr ph; + struct ip_tcp_hdr_o ip; + struct ip_tcp_hdr_s sack; + uchar raw[1600]; +} __packed; + +/* + * TCP State machine states for connection + */ + +enum TCP_STATE { + TCP_CLOSED, /* Need to send SYN to connect */ + TCP_SYN_SENT, /* Trying to connect, waiting for SYN ACK */ + TCP_ESTABLISHED, /* both server & client have a connection */ + TCP_CLOSE_WAIT, /* Rec FIN, passed to app for FIN, ACK rsp*/ + TCP_CLOSING, /* Rec FIN, sent FIN, ACK waiting for ACK */ + TCP_FIN_WAIT_1, /* Sent FIN waiting for response */ + TCP_FIN_WAIT_2 /* Rec ACK from FIN sent, waiting for FIN */ +}; + +enum TCP_STATE tcp_get_tcp_state(void); +void tcp_set_tcp_state(enum TCP_STATE new_state); +int tcp_set_tcp_header(uchar *pkt, int dport, int sport, int payload_len, + u8 action, u32 tcp_seq_num, u32 tcp_ack_num); + +/* + * An incoming packet handler. + * @param pkt pointer to the application packet + * @param dport destination UDP port + * @param sip source IP address + * @param sport source UDP port + * @param len packet length + */ +typedef void rxhand_tcp(uchar *pkt, unsigned int dport, + struct in_addr sip, unsigned int sport, + unsigned int len); +void tcp_set_tcp_handler(rxhand_tcp *f); + +void rxhand_tcp_f(union tcp_build_pkt *b, unsigned int len); + +/* + * An incoming TCP packet handler for the TCP protocol. + * There is also a dynamic function pointer for TCP based commands to + * receive incoming traffic after the TCP protocol code has done its work. + */ + +void rxhand_action(u8 tcp_action, int payload_len, u32 tcp_seq_num, + u32 tcp_ack_num, unsigned int pkt_len, + union tcp_build_pkt *b); diff --git a/net/Kconfig b/net/Kconfig index f2363e5256..77ab683eb8 100644 --- a/net/Kconfig +++ b/net/Kconfig @@ -22,4 +22,10 @@ config NETCONSOLE Support the 'nc' input/output device for networked console. See README.NetConsole for details. +config TCP + bool "TCP stack" + help + TCP protocol support with SACK for wget. Selecting this will provide + the fastest file transfer possible. + endif # if NET diff --git a/net/Makefile b/net/Makefile index 07466879f5..237023407f 100644 --- a/net/Makefile +++ b/net/Makefile @@ -24,6 +24,7 @@ obj-$(CONFIG_CMD_RARP) += rarp.o obj-$(CONFIG_CMD_SNTP) += sntp.o obj-$(CONFIG_CMD_TFTPBOOT) += tftp.o obj-$(CONFIG_UDP_FUNCTION_FASTBOOT) += fastboot.o +obj-$(CONFIG_TCP) += tcp.o # Disable this warning as it is triggered by: # sprintf(buf, index ? "foo%d" : "foo", index) diff --git a/net/net.c b/net/net.c index f831c34599..8ac1ff050f 100644 --- a/net/net.c +++ b/net/net.c @@ -108,6 +108,7 @@ #if defined(CONFIG_CMD_SNTP) #include "sntp.h" #endif +#include <net/tcp.h> /** BOOTP EXTENTIONS **/ @@ -380,6 +381,9 @@ void net_init(void) /* Only need to setup buffer pointers once. */ first_call = 0; +#if defined(CONFIG_TCP) + tcp_set_tcp_state(TCP_CLOSED); +#endif } net_init_loop(); @@ -790,6 +794,16 @@ return net_send_ip_packet(ether, dest, dport, sport, payload_len, IPPROTO_UDP, 0, 0, 0); } +#if defined(CONFIG_TCP) +int net_send_tcp_packet(int payload_len, int dport, int sport, u8 action, + u32 tcp_seq_num, u32 tcp_ack_num) +{ + return net_send_ip_packet(net_server_ethaddr, net_server_ip, dport, + sport, payload_len, IPPROTO_TCP, action, + tcp_seq_num, tcp_ack_num); +} +#endif + int net_send_ip_packet(uchar *ether, struct in_addr dest, int dport, int sport, int payload_len, int proto, u8 action, u32 tcp_seq_num, u32 tcp_ack_num) @@ -821,6 +835,15 @@ int net_send_ip_packet(uchar *ether, struct in_addr dest, int dport, int sport, dport, sport, payload_len); pkt_hdr_size = eth_hdr_size + IP_UDP_HDR_SIZE; break; +#if defined(CONFIG_TCP) + case IPPROTO_TCP: + pkt_hdr_size = eth_hdr_size + + tcp_set_tcp_header(pkt + eth_hdr_size, dport, sport, + payload_len, action, tcp_seq_num, + tcp_ack_num); + break; +#endif + default: return -EINVAL; } @@ -1229,6 +1252,16 @@ void net_process_received_packet(uchar *in_packet, int len) if (ip->ip_p == IPPROTO_ICMP) { receive_icmp(ip, len, src_ip, et); return; +#if defined(CONFIG_TCP) + } else if (ip->ip_p == IPPROTO_TCP) { + debug_cond(DEBUG_DEV_PKT, + "TCP PH (to=%pI4, from=%pI4, len=%d)\n", + &dst_ip, &src_ip, len); + + rxhand_tcp_f((union tcp_build_pkt *)ip, len); + return; +#endif + } else if (ip->ip_p != IPPROTO_UDP) { /* Only UDP packets */ return; } diff --git a/net/tcp.c b/net/tcp.c new file mode 100644 index 0000000000..12fa0a72cd --- /dev/null +++ b/net/tcp.c @@ -0,0 +1,700 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright 2017 Duncan Hare, all rights reserved. + */ + +/* + * General Desription: + * + * TCP support for the wget command, for fast file downloading. + * + * HTTP/TCP Receiver: + * + * Prequeisites: - own ethernet address + * - own IP address + * - Server IP address + * - Server with TCP + * - TCP application (eg wget) + * Next Step HTTPS? + */ +#include <common.h> +#include <command.h> +#include <console.h> +#include <environment.h> +#include <errno.h> +#include <net.h> +#include <net/tcp.h> + +/* + * TCP sliding window control used by us to request re-TX + */ + +static struct tcp_sack_v tcp_lost; + +/* TCP option timestamp */ +static u32 loc_timestamp; +static u32 rmt_timestamp; + +u32 tcp_seq_init; +u32 tcp_ack_edge; +u32 tcp_seq_max; + +int tcp_activity_count; + +/* + * Search for TCP_SACK and review the comments before the code section + * TCP_SACK is the number of packets at the front of the stream + */ + +enum pkt_state {PKT, NOPKT}; +struct sack_r { + struct sack_edges se; + enum pkt_state st; +}; + +struct sack_r edge_a[TCP_SACK]; +unsigned int sack_idx; +unsigned int prev_len; + +/* TCP connection state */ +static enum TCP_STATE tcp_state; + +/* + * An incoming TCP packet handler for the TCP protocol. + * There is also a dynamic function pointer for TCP based commands to + * receive incoming traffic after the TCP protocol code has done its work. + */ + +/* Current TCP RX packet handler */ +static rxhand_tcp *tcp_packet_handler; + +enum TCP_STATE tcp_get_tcp_state(void) +{ + return tcp_state; +} + +void tcp_set_tcp_state(enum TCP_STATE new_state) +{ + tcp_state = new_state; +} + +static void dummy_handler(uchar *pkt, unsigned int dport, + struct in_addr sip, unsigned int sport, + unsigned int len) +{ +} + +void tcp_set_tcp_handler(rxhand_tcp *f) +{ + debug_cond(DEBUG_INT_STATE, "--- net_loop TCP handler set (%p)\n", f); + if (!f) + tcp_packet_handler = dummy_handler; + else + tcp_packet_handler = f; +} + +u16 tcp_set_pseudo_header(uchar *pkt, struct in_addr src, struct in_addr dest, + int tcp_len, int pkt_len) +{ + union tcp_build_pkt *b = (union tcp_build_pkt *)pkt; + int checksum_len; + + /* + * Pseudo header + * + * Zero the byte after the last byte so that the header checksum + * will always work. + */ + + pkt[pkt_len] = 0x00; + + net_copy_ip((void *)&b->ph.p_src, &src); + net_copy_ip((void *)&b->ph.p_dst, &dest); + b->ph.rsvd = 0x00; + b->ph.p = IPPROTO_TCP; + b->ph.len = htons(tcp_len); + checksum_len = tcp_len + PSEUDO_HDR_SIZE; + + debug_cond(DEBUG_DEV_PKT, + "TCP Pesudo Header (to=%pI4, from=%pI4, Len=%d)\n", + &b->ph.p_dst, &b->ph.p_src, checksum_len); + + return compute_ip_checksum(pkt + PSEUDO_PAD_SIZE, checksum_len); +} + +int net_set_ack_options(union tcp_build_pkt *b) +{ + b->sack.hdr.tcp_hlen = (TCP_HDR_SIZE >> 2) << 4; + + b->sack.t_opt.kind = TCP_O_TS; + b->sack.t_opt.len = TCP_OPT_LEN_A; + b->sack.t_opt.t_snd = htons(loc_timestamp); + b->sack.t_opt.t_rcv = rmt_timestamp; + b->sack.sack_v.kind = TCP_1_NOP; + b->sack.sack_v.len = 0x00; + + if (tcp_lost.len > TCP_OPT_LEN_2) { + debug_cond(DEBUG_DEV_PKT, "TCP ack opt lost.len %x\n", + tcp_lost.len); + b->sack.sack_v.len = tcp_lost.len; + b->sack.sack_v.kind = TCP_V_SACK; + b->sack.sack_v.hill[0].l = htonl(tcp_lost.hill[0].l); + b->sack.sack_v.hill[0].r = htonl(tcp_lost.hill[0].r); + + /* + * These SACK structures are initialized with NOPs to + * provide TCP header alignment padding. There are 4 + * SACK structures used for both header padding and + * internally. + */ + + b->sack.sack_v.hill[1].l = htonl(tcp_lost.hill[1].l); + b->sack.sack_v.hill[1].r = htonl(tcp_lost.hill[1].r); + b->sack.sack_v.hill[2].l = htonl(tcp_lost.hill[2].l); + b->sack.sack_v.hill[2].r = htonl(tcp_lost.hill[2].r); + b->sack.sack_v.hill[3].l = TCP_O_NOP; + b->sack.sack_v.hill[3].r = TCP_O_NOP; + } + + /* + * TCP lengths are stored as a rounded up number of 32 bit words + * Add 3 to length round up, rounded, then divided into the length + * in 32 bit words. + */ + + b->sack.hdr.tcp_hlen = (((TCP_HDR_SIZE + TCP_TSOPT_SIZE + + tcp_lost.len + 3) >> 2) << 4); + + /* + * This returns the actual rounded up length of the + * TCP header to add to the total packet length + */ + + return b->sack.hdr.tcp_hlen >> 2; +} + +void net_set_syn_options(union tcp_build_pkt *b) +{ + tcp_lost.len = 0; + b->ip.hdr.tcp_hlen = 0xa0; + + b->ip.mss.kind = TCP_O_MSS; + b->ip.mss.len = TCP_OPT_LEN_4; + b->ip.mss.mss = htons(TCP_MSS); + b->ip.scale.kind = TCP_O_SCL; + b->ip.scale.scale = TCP_SCALE; + b->ip.scale.len = TCP_OPT_LEN_3; + b->ip.sack_p.kind = TCP_P_SACK; + b->ip.sack_p.len = TCP_OPT_LEN_2; + b->ip.t_opt.kind = TCP_O_TS; + b->ip.t_opt.len = TCP_OPT_LEN_A; + loc_timestamp = get_ticks(); + rmt_timestamp = 0x00000000; + b->ip.t_opt.t_snd = 0; + b->ip.t_opt.t_rcv = 0x00000000; + b->ip.end = TCP_O_END; +} + +int tcp_set_tcp_header(uchar *pkt, int dport, int sport, int payload_len, + u8 action, u32 tcp_seq_num, u32 tcp_ack_num) +{ + union tcp_build_pkt *b = (union tcp_build_pkt *)pkt; + int pkt_hdr_len; + int pkt_len; + int tcp_len; + +/* + * Header: 5 32 bit words. 4 bits TCP header Length, 4 bits reserved options + */ + b->ip.hdr.tcp_flags = action; + pkt_hdr_len = IP_TCP_HDR_SIZE; + b->ip.hdr.tcp_hlen = 0x50; + + switch (action) { + case TCP_SYN: + debug_cond(DEBUG_DEV_PKT, + "TCP Hdr:SYN (%pI4, %pI4, sq=%d, ak=%d)\n", + &net_server_ip, &net_ip, + tcp_seq_num, tcp_ack_num); + tcp_activity_count = 0; + net_set_syn_options(b); + tcp_seq_num = 0; + tcp_ack_num = 0; + pkt_hdr_len = IP_TCP_O_SIZE; + if (tcp_state == TCP_SYN_SENT) { /* Too many SYNs */ + action = TCP_FIN; + tcp_state = TCP_FIN_WAIT_1; + } else { + tcp_state = TCP_SYN_SENT; + } + break; + case TCP_ACK: + pkt_hdr_len = IP_HDR_SIZE + + net_set_ack_options(b); + b->ip.hdr.tcp_flags = action; + debug_cond(DEBUG_DEV_PKT, + "TCP Hdr:ACK (%pI4, %pI4, s=%d, a=%d, A=%x)\n", + &net_server_ip, &net_ip, tcp_seq_num, tcp_ack_num, + action); + break; + case TCP_FIN: + debug_cond(DEBUG_DEV_PKT, + "TCP Hdr:FIN (%pI4, %pI4, s=%d, a=%d)\n", + &net_server_ip, &net_ip, tcp_seq_num, tcp_ack_num); + payload_len = 0; + pkt_hdr_len = IP_TCP_HDR_SIZE; + tcp_state = TCP_FIN_WAIT_1; + + break; + + /* Notify connection closing */ + + case (TCP_FIN | TCP_ACK): + case ((TCP_FIN | TCP_ACK) | TCP_PUSH): + if (tcp_state == TCP_CLOSE_WAIT) + tcp_state = TCP_CLOSING; + tcp_ack_edge++; + debug_cond(DEBUG_DEV_PKT, + "TCP Hdr:FIN ACK PSH(%pI4, %pI4, s=%d, a=%d, A=%x)\n", + &net_server_ip, &net_ip, + tcp_seq_num, tcp_ack_edge, action); + /* FALLTHRU */ + default: + pkt_hdr_len = IP_HDR_SIZE + + net_set_ack_options(b); + b->ip.hdr.tcp_flags = action | TCP_PUSH | TCP_ACK; + debug_cond(DEBUG_DEV_PKT, + "TCP Hdr:dft (%pI4, %pI4, s=%d, a=%d, A=%x)\n", + &net_server_ip, &net_ip, + tcp_seq_num, tcp_ack_num, action); + } + + pkt_len = pkt_hdr_len + payload_len; + tcp_len = pkt_len - IP_HDR_SIZE; + + /* TCP Header */ + b->ip.hdr.tcp_ack = htonl(tcp_ack_edge); + b->ip.hdr.tcp_src = htons(sport); + b->ip.hdr.tcp_dst = htons(dport); + b->ip.hdr.tcp_seq = htonl(tcp_seq_num); + tcp_seq_num = tcp_seq_num + payload_len; + + /* + * TCP window size - TCP header variable tcp_win. + * Change tcp_win only if you have an understanding of network + * overrun, congestion, TCP segment sizes, TCP windows, TCP scale, + * queuing theory and packet buffering. If there are too few buffers, + * there will be data loss, recovery may work or the sending TCP, + * the server, could abort the stream transmission. + * MSS is governed by maximum Ethernet frame length. + * The number of buffers is governed by the desire to have a queue of + * full buffers to be processed at the destination to maximize + * throughput. Temporary memory use for the boot phase on modern + * SOCs is may not be considered a constraint to buffer space, if + * it is, then the u-boot tftp or nfs kernel netboot should be + * considered. + */ + + b->ip.hdr.tcp_win = htons(PKTBUFSRX * TCP_MSS >> TCP_SCALE); + + b->ip.hdr.tcp_xsum = 0x0000; + b->ip.hdr.tcp_ugr = 0x0000; + + b->ip.hdr.tcp_xsum = tcp_set_pseudo_header(pkt, net_ip, net_server_ip, + tcp_len, pkt_len); + + net_set_ip_header((uchar *)&b->ip, net_server_ip, net_ip, + pkt_len, IPPROTO_TCP); + + return pkt_hdr_len; +} + +/* + * Selective Acknowledgment (Essential for fast stream transfer) + */ + +void tcp_hole(u32 tcp_seq_num, u32 len, u32 tcp_seq_max) +{ + unsigned int idx_sack; + unsigned int sack_end = TCP_SACK - 1; + unsigned int sack_in; + unsigned int hill = 0; + enum pkt_state expect = PKT; + + u32 seq = tcp_seq_num - tcp_seq_init; + u32 hol_l = tcp_ack_edge - tcp_seq_init; + u32 hol_r = 0; + + /* Place new seq number in correct place in receive array */ + + if (prev_len == 0) + prev_len = len; + idx_sack = sack_idx + ((tcp_seq_num - tcp_ack_edge) / prev_len); + if (idx_sack < TCP_SACK) { + edge_a[idx_sack].se.l = tcp_seq_num; + edge_a[idx_sack].se.r = tcp_seq_num + len; + edge_a[idx_sack].st = PKT; + +/* + * The fin (last) packet is not the same length as data packets, and if it's + * length is recorded and used for array index calculation, calculation breaks. + */ + if (prev_len < len) + prev_len = len; + } + + debug_cond(DEBUG_DEV_PKT, + "TCP 1 seq %d, edg %d, len %d, sack_idx %d, sack_end %d\n", + seq, hol_l, len, sack_idx, sack_end); + + /* Right edge of contiguous stream, is the left edge of first hill */ + + hol_l = tcp_seq_num - tcp_seq_init; + hol_r = hol_l + len; + + tcp_lost.len = TCP_OPT_LEN_2; + + debug_cond(DEBUG_DEV_PKT, + "TCP 1 in %d, seq %d, pkt_l %d, pkt_r %d, sack_idx %d, sack_end %d\n", + idx_sack, seq, hol_l, hol_r, sack_idx, sack_end); + + for (sack_in = sack_idx; sack_in < sack_end && hill < TCP_SACK_HILLS; + sack_in++) { + switch (expect) { + case NOPKT: + switch (edge_a[sack_in].st) { + case NOPKT: + debug_cond(DEBUG_INT_STATE, "N"); + break; + case PKT: + debug_cond(DEBUG_INT_STATE, "n"); + tcp_lost.hill[hill].l = + edge_a[sack_in].se.l; + tcp_lost.hill[hill].r = + edge_a[sack_in].se.r; + expect = PKT; + break; + } + break; + case PKT: + switch (edge_a[sack_in].st) { + case NOPKT: + debug_cond(DEBUG_INT_STATE, "p"); + if (sack_in > sack_idx && + hill < TCP_SACK_HILLS) { + hill++; + tcp_lost.len += TCP_OPT_LEN_8; + } + expect = NOPKT; + break; + case PKT: + debug_cond(DEBUG_INT_STATE, "P"); + + if (tcp_ack_edge == edge_a[sack_in].se.l) { + tcp_ack_edge = edge_a[sack_in].se.r; + edge_a[sack_in].st = NOPKT; + sack_idx++; + } else { + if (hill < TCP_SACK_HILLS) + tcp_lost.hill[hill].r = + edge_a[sack_in].se.r; + if (sack_in == sack_end - 1) + tcp_lost.hill[hill].r = + edge_a[sack_in].se.r; + } + break; + } + break; + } + } + debug_cond(DEBUG_INT_STATE, "\n"); + if (tcp_lost.len <= TCP_OPT_LEN_2) + sack_idx = 0; +} + +void tcp_parse_options(uchar *o, int o_len) +{ + struct tcp_t_opt *tsopt; + uchar *p = o; +/* + * NOPs are options with a zero length, and thus are special. + * All other options have length fields. + */ + + for (p = o; p < (o + o_len); p = p + p[1]) { + if (p[1] != 0) { + switch (p[0]) { + case TCP_O_END: + return; /* Finished processing options */ + case TCP_O_MSS: + case TCP_O_SCL: + case TCP_P_SACK: + case TCP_V_SACK: + break; /* Continue to process options */ + case TCP_O_TS: + tsopt = (struct tcp_t_opt *)p; + rmt_timestamp = tsopt->t_snd; + return; + break; + } /* End switch, process optional NOPs */ + + if (p[0] == TCP_O_NOP) + p++; + } else { + return; /* Finished processing options */ + } + } +} + +u8 tcp_state_machine(u8 tcp_flags, u32 *tcp_seq_num, int payload_len) +{ + u8 tcp_fin = tcp_flags & TCP_FIN; + u8 tcp_syn = tcp_flags & TCP_SYN; + u8 tcp_rst = tcp_flags & TCP_RST; + u8 tcp_push = tcp_flags & TCP_PUSH; + u8 tcp_ack = tcp_flags & TCP_ACK; + u8 action = TCP_DATA; + int i; + + /* + * tcp_flags are examined to determine TX action in a given state + * tcp_push is interpreted to mean "inform the app" + * urg, ece, cer and nonce flags are not supported. + * + * exe and crw are use to signal and confirm knowledge of congestion. + * This TCP only sends a file request and acks. If it generates + * congestion, the network is broken. + */ + + debug_cond(DEBUG_INT_STATE, "TCP STATE ENTRY %x\n", action); + if (tcp_rst) { + action = TCP_DATA; + tcp_state = TCP_CLOSED; + net_set_state(NETLOOP_FAIL); + debug_cond(DEBUG_INT_STATE, "TCP Reset %x\n", tcp_flags); + return TCP_RST; + } + + switch (tcp_state) { + case TCP_CLOSED: + debug_cond(DEBUG_INT_STATE, "TCP CLOSED %x\n", tcp_flags); + if (tcp_fin) + action = TCP_DATA; + if (tcp_syn) + action = TCP_RST; + if (tcp_ack) + action = TCP_DATA; + break; + case TCP_SYN_SENT: + debug_cond(DEBUG_INT_STATE, "TCP_SYN_SENT %x, %d\n", + tcp_flags, *tcp_seq_num); + if (tcp_fin) { + action = action | TCP_PUSH; + tcp_state = TCP_CLOSE_WAIT; + } + if (tcp_syn) { + action = action | TCP_ACK | TCP_PUSH; + if (tcp_ack) { + tcp_seq_init = *tcp_seq_num; + *tcp_seq_num = *tcp_seq_num + 1; + tcp_seq_max = *tcp_seq_num; + tcp_ack_edge = *tcp_seq_num; + sack_idx = 0; + edge_a[sack_idx].se.l = *tcp_seq_num; + edge_a[sack_idx].se.r = *tcp_seq_num; + prev_len = 0; + tcp_state = TCP_ESTABLISHED; + for (i = 0; i < TCP_SACK; i++) + edge_a[i].st = NOPKT; + } + } else { + if (tcp_ack) + action = TCP_DATA; + } + break; + case TCP_ESTABLISHED: + debug_cond(DEBUG_INT_STATE, + "TCP_ESTABLISHED %x\n", tcp_flags); + if (*tcp_seq_num > tcp_seq_max) + tcp_seq_max = *tcp_seq_num; + if (payload_len > 0) { + tcp_hole(*tcp_seq_num, payload_len, tcp_seq_max); + tcp_fin = TCP_DATA; /* cause standalone FIN */ + } + + if ((tcp_fin) && tcp_lost.len <= TCP_OPT_LEN_2) { + action = action | TCP_FIN | TCP_PUSH | TCP_ACK; + tcp_state = TCP_CLOSE_WAIT; + } else { + if (tcp_ack) + action = TCP_DATA; + } + if (tcp_push) + action = action | TCP_PUSH; + if (tcp_syn) + action = TCP_ACK + TCP_RST; + break; + case TCP_CLOSE_WAIT: + debug_cond(DEBUG_INT_STATE, "TCP_CLOSE_WAIT (%x)\n", tcp_flags); + action = TCP_DATA; /* Wait for app */ + break; + case TCP_FIN_WAIT_2: + debug_cond(DEBUG_INT_STATE, "TCP_FIN_WAIT_2 (%x)\n", tcp_flags); + if (tcp_fin) + action = TCP_DATA; + if (tcp_syn) + action = TCP_DATA; + if (tcp_ack) { + action = TCP_PUSH | TCP_ACK; + tcp_state = TCP_CLOSED; + puts("\n"); + } + break; + case TCP_FIN_WAIT_1: + debug_cond(DEBUG_INT_STATE, "TCP_FIN_WAIT_1 (%x)\n", tcp_flags); + if (tcp_fin) { + action = TCP_ACK | TCP_FIN; + tcp_state = TCP_FIN_WAIT_2; + } + if (tcp_syn) + action = TCP_RST; + if (tcp_ack) { + tcp_state = TCP_CLOSED; + tcp_seq_num = tcp_seq_num + 1; + } + break; + case TCP_CLOSING: + debug_cond(DEBUG_INT_STATE, "TCP_CLOSING (%x)\n", tcp_flags); + if (tcp_fin) + action = TCP_DATA; + if (tcp_syn) + action = TCP_RST; + if (tcp_ack) { + action = TCP_PUSH; + tcp_state = TCP_CLOSED; + puts("\n"); + } + break; + } + return action; +} + +void rxhand_tcp_f(union tcp_build_pkt *b, unsigned int pkt_len) +{ + int tcp_len = pkt_len - IP_HDR_SIZE; + u16 tcp_rx_xsum = b->ip.hdr.ip_sum; + u8 tcp_action = TCP_DATA; + u32 tcp_seq_num; + u32 tcp_ack_num; + struct in_addr action_and_state; + + int tcp_hdr_len; + int payload_len; + + /* + * Verify IP header + */ + debug_cond(DEBUG_DEV_PKT, + "TCP RX in RX Sum (to=%pI4, from=%pI4, len=%d)\n", + &b->ip.hdr.ip_src, &b->ip.hdr.ip_dst, pkt_len); + + debug_cond(DEBUG_DEV_PKT, + "In__________________________________________\n"); + + b->ip.hdr.ip_src = net_server_ip; + b->ip.hdr.ip_dst = net_ip; + b->ip.hdr.ip_sum = 0x0000; + if (tcp_rx_xsum != compute_ip_checksum(b, IP_HDR_SIZE)) { + debug_cond(DEBUG_DEV_PKT, + "TCP RX IP xSum Error (%pI4, =%pI4, len=%d)\n", + &net_ip, &net_server_ip, pkt_len); + return; + } + + /* + * Build pseudo header and verify TCP header + */ + tcp_rx_xsum = b->ip.hdr.tcp_xsum; + b->ip.hdr.tcp_xsum = 0x0000; + if (tcp_rx_xsum != tcp_set_pseudo_header((uchar *)b, b->ip.hdr.ip_src, + b->ip.hdr.ip_dst, tcp_len, + pkt_len)) { + debug_cond(DEBUG_DEV_PKT, + "TCP RX TCP xSum Error (%pI4, %pI4, len=%d)\n", + &net_ip, &net_server_ip, tcp_len); + return; + } + + tcp_hdr_len = (b->ip.hdr.tcp_hlen >> 2); + payload_len = tcp_len - tcp_hdr_len; + + if (tcp_hdr_len > TCP_HDR_SIZE) + tcp_parse_options((uchar *)b + IP_TCP_HDR_SIZE, + tcp_hdr_len - TCP_HDR_SIZE); + /* + * Incoming sequence and ack numbers are server's view of the numbers. + * The app must swap the numbers when responding. + */ + + tcp_seq_num = ntohl(b->ip.hdr.tcp_seq); + tcp_ack_num = ntohl(b->ip.hdr.tcp_ack); + + /* Packets are not ordered. Send to app as received. */ + + tcp_action = tcp_state_machine(b->ip.hdr.tcp_flags, + &tcp_seq_num, payload_len); + + /* + * State-altering command to be sent. + * The packet sequence and ack numbers are in the tcp_seq_num + * and tcp_ack_num variables. The current packet, its position + * in the data stream, is the in the range of those variables. + * + * In the "application push" invocation, the TCP header with all + * its information is pointed to by the packet pointer. + * + * In the typedef + * void rxhand_tcp(uchar *pkt, unsigned int dport, + * struct in_addr sip, unsigned int sport, + * unsigned int len); + * *pkt is the pointer to the payload + * dport is used for tcp_seg_num + * action_and_state.s_addr is used for TCP state + * sport is used for tcp_ack_num (which is unused by the app) + * pkt_ length is the payload length. + * + * TCP_PUSH from the state machine with a payload length of 0 is a + * connect or disconnect event + */ + + tcp_activity_count++; + if (tcp_activity_count > TCP_ACTIVITY) { + puts("| "); + tcp_activity_count = 0; + } + + if ((tcp_action & TCP_PUSH) || payload_len > 0) { + debug_cond(DEBUG_DEV_PKT, + "TCP Notify (action=%x, Seq=%d,Ack=%d,Pay%d)\n", + tcp_action, tcp_seq_num, tcp_ack_num, payload_len); + + action_and_state.s_addr = tcp_action; + (*tcp_packet_handler) ((uchar *)b + pkt_len - payload_len, + tcp_seq_num, action_and_state, + tcp_ack_num, payload_len); + + } else if (tcp_action != TCP_DATA) { + debug_cond(DEBUG_DEV_PKT, + "TCP Action (action=%x,Seq=%d,Ack=%d,Pay=%d)\n", + tcp_action, tcp_seq_num, tcp_ack_num, payload_len); + + /* + * Warning: Incoming Ack & Seq sequence numbers are transposed + * here to outgoing Seq & Ack sequence numbers + */ + net_send_tcp_packet(0, ntohs(b->ip.hdr.tcp_src), + ntohs(b->ip.hdr.tcp_dst), + (tcp_action & (~TCP_PUSH)), + tcp_seq_num, tcp_ack_num); + } +}