tcp.c

Go to the documentation of this file.
00001 #include <string.h>
00002 #include <stdlib.h>
00003 #include <stdio.h>
00004 #include <assert.h>
00005 #include <errno.h>
00006 #include <byteswap.h>
00007 #include <gpxe/timer.h>
00008 #include <gpxe/iobuf.h>
00009 #include <gpxe/malloc.h>
00010 #include <gpxe/retry.h>
00011 #include <gpxe/refcnt.h>
00012 #include <gpxe/xfer.h>
00013 #include <gpxe/open.h>
00014 #include <gpxe/uri.h>
00015 #include <gpxe/tcpip.h>
00016 #include <gpxe/tcp.h>
00017 
00018 /** @file
00019  *
00020  * TCP protocol
00021  *
00022  */
00023 
00024 FILE_LICENCE ( GPL2_OR_LATER );
00025 
00026 /** A TCP connection */
00027 struct tcp_connection {
00028         /** Reference counter */
00029         struct refcnt refcnt;
00030         /** List of TCP connections */
00031         struct list_head list;
00032 
00033         /** Data transfer interface */
00034         struct xfer_interface xfer;
00035         /** Data transfer interface closed flag */
00036         int xfer_closed;
00037 
00038         /** Remote socket address */
00039         struct sockaddr_tcpip peer;
00040         /** Local port, in network byte order */
00041         unsigned int local_port;
00042 
00043         /** Current TCP state */
00044         unsigned int tcp_state;
00045         /** Previous TCP state
00046          *
00047          * Maintained only for debug messages
00048          */
00049         unsigned int prev_tcp_state;
00050         /** Current sequence number
00051          *
00052          * Equivalent to SND.UNA in RFC 793 terminology.
00053          */
00054         uint32_t snd_seq;
00055         /** Unacknowledged sequence count
00056          *
00057          * Equivalent to (SND.NXT-SND.UNA) in RFC 793 terminology.
00058          */
00059         uint32_t snd_sent;
00060         /** Send window
00061          *
00062          * Equivalent to SND.WND in RFC 793 terminology
00063          */
00064         uint32_t snd_win;
00065         /** Current acknowledgement number
00066          *
00067          * Equivalent to RCV.NXT in RFC 793 terminology.
00068          */
00069         uint32_t rcv_ack;
00070         /** Receive window
00071          *
00072          * Equivalent to RCV.WND in RFC 793 terminology.
00073          */
00074         uint32_t rcv_win;
00075         /** Most recent received timestamp
00076          *
00077          * Equivalent to TS.Recent in RFC 1323 terminology.
00078          */
00079         uint32_t ts_recent;
00080         /** Timestamps enabled */
00081         int timestamps;
00082 
00083         /** Transmit queue */
00084         struct list_head queue;
00085         /** Retransmission timer */
00086         struct retry_timer timer;
00087 };
00088 
00089 /**
00090  * List of registered TCP connections
00091  */
00092 static LIST_HEAD ( tcp_conns );
00093 
00094 /* Forward declarations */
00095 static struct xfer_interface_operations tcp_xfer_operations;
00096 static void tcp_expired ( struct retry_timer *timer, int over );
00097 static int tcp_rx_ack ( struct tcp_connection *tcp, uint32_t ack,
00098                         uint32_t win );
00099 
00100 /**
00101  * Name TCP state
00102  *
00103  * @v state             TCP state
00104  * @ret name            Name of TCP state
00105  */
00106 static inline __attribute__ (( always_inline )) const char *
00107 tcp_state ( int state ) {
00108         switch ( state ) {
00109         case TCP_CLOSED:                return "CLOSED";
00110         case TCP_LISTEN:                return "LISTEN";
00111         case TCP_SYN_SENT:              return "SYN_SENT";
00112         case TCP_SYN_RCVD:              return "SYN_RCVD";
00113         case TCP_ESTABLISHED:           return "ESTABLISHED";
00114         case TCP_FIN_WAIT_1:            return "FIN_WAIT_1";
00115         case TCP_FIN_WAIT_2:            return "FIN_WAIT_2";
00116         case TCP_CLOSING_OR_LAST_ACK:   return "CLOSING/LAST_ACK";
00117         case TCP_TIME_WAIT:             return "TIME_WAIT";
00118         case TCP_CLOSE_WAIT:            return "CLOSE_WAIT";
00119         default:                        return "INVALID";
00120         }
00121 }
00122 
00123 /**
00124  * Dump TCP state transition
00125  *
00126  * @v tcp               TCP connection
00127  */
00128 static inline __attribute__ (( always_inline )) void
00129 tcp_dump_state ( struct tcp_connection *tcp ) {
00130 
00131         if ( tcp->tcp_state != tcp->prev_tcp_state ) {
00132                 DBGC ( tcp, "TCP %p transitioned from %s to %s\n", tcp,
00133                        tcp_state ( tcp->prev_tcp_state ),
00134                        tcp_state ( tcp->tcp_state ) );
00135         }
00136         tcp->prev_tcp_state = tcp->tcp_state;
00137 }
00138 
00139 /**
00140  * Dump TCP flags
00141  *
00142  * @v flags             TCP flags
00143  */
00144 static inline __attribute__ (( always_inline )) void
00145 tcp_dump_flags ( struct tcp_connection *tcp, unsigned int flags ) {
00146         if ( flags & TCP_RST )
00147                 DBGC2 ( tcp, " RST" );
00148         if ( flags & TCP_SYN )
00149                 DBGC2 ( tcp, " SYN" );
00150         if ( flags & TCP_PSH )
00151                 DBGC2 ( tcp, " PSH" );
00152         if ( flags & TCP_FIN )
00153                 DBGC2 ( tcp, " FIN" );
00154         if ( flags & TCP_ACK )
00155                 DBGC2 ( tcp, " ACK" );
00156 }
00157 
00158 /***************************************************************************
00159  *
00160  * Open and close
00161  *
00162  ***************************************************************************
00163  */
00164 
00165 /**
00166  * Bind TCP connection to local port
00167  *
00168  * @v tcp               TCP connection
00169  * @v port              Local port number, in network-endian order
00170  * @ret rc              Return status code
00171  *
00172  * If the port is 0, the connection is assigned an available port
00173  * between 1024 and 65535.
00174  */
00175 static int tcp_bind ( struct tcp_connection *tcp, unsigned int port ) {
00176         struct tcp_connection *existing;
00177         static uint16_t try_port = 1023;
00178 
00179         /* If no port specified, find the first available port */
00180         if ( ! port ) {
00181                 while ( try_port ) {
00182                         try_port++;
00183                         if ( try_port < 1024 )
00184                                 continue;
00185                         if ( tcp_bind ( tcp, htons ( try_port ) ) == 0 )
00186                                 return 0;
00187                 }
00188                 DBGC ( tcp, "TCP %p could not bind: no free ports\n", tcp );
00189                 return -EADDRINUSE;
00190         }
00191 
00192         /* Attempt bind to local port */
00193         list_for_each_entry ( existing, &tcp_conns, list ) {
00194                 if ( existing->local_port == port ) {
00195                         DBGC ( tcp, "TCP %p could not bind: port %d in use\n",
00196                                tcp, ntohs ( port ) );
00197                         return -EADDRINUSE;
00198                 }
00199         }
00200         tcp->local_port = port;
00201 
00202         DBGC ( tcp, "TCP %p bound to port %d\n", tcp, ntohs ( port ) );
00203         return 0;
00204 }
00205 
00206 /**
00207  * Open a TCP connection
00208  *
00209  * @v xfer              Data transfer interface
00210  * @v peer              Peer socket address
00211  * @v local             Local socket address, or NULL
00212  * @ret rc              Return status code
00213  */
00214 static int tcp_open ( struct xfer_interface *xfer, struct sockaddr *peer,
00215                       struct sockaddr *local ) {
00216         struct sockaddr_tcpip *st_peer = ( struct sockaddr_tcpip * ) peer;
00217         struct sockaddr_tcpip *st_local = ( struct sockaddr_tcpip * ) local;
00218         struct tcp_connection *tcp;
00219         unsigned int bind_port;
00220         int rc;
00221 
00222         /* Allocate and initialise structure */
00223         tcp = zalloc ( sizeof ( *tcp ) );
00224         if ( ! tcp )
00225                 return -ENOMEM;
00226         DBGC ( tcp, "TCP %p allocated\n", tcp );
00227         xfer_init ( &tcp->xfer, &tcp_xfer_operations, &tcp->refcnt );
00228         tcp->prev_tcp_state = TCP_CLOSED;
00229         tcp->tcp_state = TCP_STATE_SENT ( TCP_SYN );
00230         tcp_dump_state ( tcp );
00231         tcp->snd_seq = random();
00232         INIT_LIST_HEAD ( &tcp->queue );
00233         tcp->timer.expired = tcp_expired;
00234         memcpy ( &tcp->peer, st_peer, sizeof ( tcp->peer ) );
00235 
00236         /* Bind to local port */
00237         bind_port = ( st_local ? st_local->st_port : 0 );
00238         if ( ( rc = tcp_bind ( tcp, bind_port ) ) != 0 )
00239                 goto err;
00240 
00241         /* Start timer to initiate SYN */
00242         start_timer_nodelay ( &tcp->timer );
00243 
00244         /* Attach parent interface, transfer reference to connection
00245          * list and return
00246          */
00247         xfer_plug_plug ( &tcp->xfer, xfer );
00248         list_add ( &tcp->list, &tcp_conns );
00249         return 0;
00250 
00251  err:
00252         ref_put ( &tcp->refcnt );
00253         return rc;
00254 }
00255 
00256 /**
00257  * Close TCP connection
00258  *
00259  * @v tcp               TCP connection
00260  * @v rc                Reason for close
00261  *
00262  * Closes the data transfer interface.  If the TCP state machine is in
00263  * a suitable state, the connection will be deleted.
00264  */
00265 static void tcp_close ( struct tcp_connection *tcp, int rc ) {
00266         struct io_buffer *iobuf;
00267         struct io_buffer *tmp;
00268 
00269         /* Close data transfer interface */
00270         xfer_nullify ( &tcp->xfer );
00271         xfer_close ( &tcp->xfer, rc );
00272         tcp->xfer_closed = 1;
00273 
00274         /* If we are in CLOSED, or have otherwise not yet received a
00275          * SYN (i.e. we are in LISTEN or SYN_SENT), just delete the
00276          * connection.
00277          */
00278         if ( ! ( tcp->tcp_state & TCP_STATE_RCVD ( TCP_SYN ) ) ) {
00279 
00280                 /* Transition to CLOSED for the sake of debugging messages */
00281                 tcp->tcp_state = TCP_CLOSED;
00282                 tcp_dump_state ( tcp );
00283 
00284                 /* Free any unsent I/O buffers */
00285                 list_for_each_entry_safe ( iobuf, tmp, &tcp->queue, list ) {
00286                         list_del ( &iobuf->list );
00287                         free_iob ( iobuf );
00288                 }
00289 
00290                 /* Remove from list and drop reference */
00291                 stop_timer ( &tcp->timer );
00292                 list_del ( &tcp->list );
00293                 ref_put ( &tcp->refcnt );
00294                 DBGC ( tcp, "TCP %p connection deleted\n", tcp );
00295                 return;
00296         }
00297 
00298         /* If we have not had our SYN acknowledged (i.e. we are in
00299          * SYN_RCVD), pretend that it has been acknowledged so that we
00300          * can send a FIN without breaking things.
00301          */
00302         if ( ! ( tcp->tcp_state & TCP_STATE_ACKED ( TCP_SYN ) ) )
00303                 tcp_rx_ack ( tcp, ( tcp->snd_seq + 1 ), 0 );
00304 
00305         /* If we have no data remaining to send, start sending FIN */
00306         if ( list_empty ( &tcp->queue ) ) {
00307                 tcp->tcp_state |= TCP_STATE_SENT ( TCP_FIN );
00308                 tcp_dump_state ( tcp );
00309         }
00310 }
00311 
00312 /***************************************************************************
00313  *
00314  * Transmit data path
00315  *
00316  ***************************************************************************
00317  */
00318 
00319 /**
00320  * Calculate transmission window
00321  *
00322  * @v tcp               TCP connection
00323  * @ret len             Maximum length that can be sent in a single packet
00324  */
00325 static size_t tcp_xmit_win ( struct tcp_connection *tcp ) {
00326         size_t len;
00327 
00328         /* Not ready if we're not in a suitable connection state */
00329         if ( ! TCP_CAN_SEND_DATA ( tcp->tcp_state ) )
00330                 return 0;
00331 
00332         /* Length is the minimum of the receiver's window and the path MTU */
00333         len = tcp->snd_win;
00334         if ( len > TCP_PATH_MTU )
00335                 len = TCP_PATH_MTU;
00336 
00337         return len;
00338 }
00339 
00340 /**
00341  * Process TCP transmit queue
00342  *
00343  * @v tcp               TCP connection
00344  * @v max_len           Maximum length to process
00345  * @v dest              I/O buffer to fill with data, or NULL
00346  * @v remove            Remove data from queue
00347  * @ret len             Length of data processed
00348  *
00349  * This processes at most @c max_len bytes from the TCP connection's
00350  * transmit queue.  Data will be copied into the @c dest I/O buffer
00351  * (if provided) and, if @c remove is true, removed from the transmit
00352  * queue.
00353  */
00354 static size_t tcp_process_queue ( struct tcp_connection *tcp, size_t max_len,
00355                                   struct io_buffer *dest, int remove ) {
00356         struct io_buffer *iobuf;
00357         struct io_buffer *tmp;
00358         size_t frag_len;
00359         size_t len = 0;
00360 
00361         list_for_each_entry_safe ( iobuf, tmp, &tcp->queue, list ) {
00362                 frag_len = iob_len ( iobuf );
00363                 if ( frag_len > max_len )
00364                         frag_len = max_len;
00365                 if ( dest ) {
00366                         memcpy ( iob_put ( dest, frag_len ), iobuf->data,
00367                                  frag_len );
00368                 }
00369                 if ( remove ) {
00370                         iob_pull ( iobuf, frag_len );
00371                         if ( ! iob_len ( iobuf ) ) {
00372                                 list_del ( &iobuf->list );
00373                                 free_iob ( iobuf );
00374                         }
00375                 }
00376                 len += frag_len;
00377                 max_len -= frag_len;
00378         }
00379         return len;
00380 }
00381 
00382 /**
00383  * Transmit any outstanding data
00384  *
00385  * @v tcp               TCP connection
00386  * @v force_send        Force sending of packet
00387  * 
00388  * Transmits any outstanding data on the connection.
00389  *
00390  * Note that even if an error is returned, the retransmission timer
00391  * will have been started if necessary, and so the stack will
00392  * eventually attempt to retransmit the failed packet.
00393  */
00394 static int tcp_xmit ( struct tcp_connection *tcp, int force_send ) {
00395         struct io_buffer *iobuf;
00396         struct tcp_header *tcphdr;
00397         struct tcp_mss_option *mssopt;
00398         struct tcp_timestamp_padded_option *tsopt;
00399         void *payload;
00400         unsigned int flags;
00401         size_t len = 0;
00402         uint32_t seq_len;
00403         uint32_t app_win;
00404         uint32_t max_rcv_win;
00405         int rc;
00406 
00407         /* If retransmission timer is already running, do nothing */
00408         if ( timer_running ( &tcp->timer ) )
00409                 return 0;
00410 
00411         /* Calculate both the actual (payload) and sequence space
00412          * lengths that we wish to transmit.
00413          */
00414         if ( TCP_CAN_SEND_DATA ( tcp->tcp_state ) ) {
00415                 len = tcp_process_queue ( tcp, tcp_xmit_win ( tcp ),
00416                                           NULL, 0 );
00417         }
00418         seq_len = len;
00419         flags = TCP_FLAGS_SENDING ( tcp->tcp_state );
00420         if ( flags & ( TCP_SYN | TCP_FIN ) ) {
00421                 /* SYN or FIN consume one byte, and we can never send both */
00422                 assert ( ! ( ( flags & TCP_SYN ) && ( flags & TCP_FIN ) ) );
00423                 seq_len++;
00424         }
00425         tcp->snd_sent = seq_len;
00426 
00427         /* If we have nothing to transmit, stop now */
00428         if ( ( seq_len == 0 ) && ! force_send )
00429                 return 0;
00430 
00431         /* If we are transmitting anything that requires
00432          * acknowledgement (i.e. consumes sequence space), start the
00433          * retransmission timer.  Do this before attempting to
00434          * allocate the I/O buffer, in case allocation itself fails.
00435          */
00436         if ( seq_len )
00437                 start_timer ( &tcp->timer );
00438 
00439         /* Allocate I/O buffer */
00440         iobuf = alloc_iob ( len + MAX_HDR_LEN );
00441         if ( ! iobuf ) {
00442                 DBGC ( tcp, "TCP %p could not allocate iobuf for %08x..%08x "
00443                        "%08x\n", tcp, tcp->snd_seq, ( tcp->snd_seq + seq_len ),
00444                        tcp->rcv_ack );
00445                 return -ENOMEM;
00446         }
00447         iob_reserve ( iobuf, MAX_HDR_LEN );
00448 
00449         /* Fill data payload from transmit queue */
00450         tcp_process_queue ( tcp, len, iobuf, 0 );
00451 
00452         /* Expand receive window if possible */
00453         max_rcv_win = ( ( freemem * 3 ) / 4 );
00454         if ( max_rcv_win > TCP_MAX_WINDOW_SIZE )
00455                 max_rcv_win = TCP_MAX_WINDOW_SIZE;
00456         app_win = xfer_window ( &tcp->xfer );
00457         if ( max_rcv_win > app_win )
00458                 max_rcv_win = app_win;
00459         max_rcv_win &= ~0x03; /* Keep everything dword-aligned */
00460         if ( tcp->rcv_win < max_rcv_win )
00461                 tcp->rcv_win = max_rcv_win;
00462 
00463         /* Fill up the TCP header */
00464         payload = iobuf->data;
00465         if ( flags & TCP_SYN ) {
00466                 mssopt = iob_push ( iobuf, sizeof ( *mssopt ) );
00467                 mssopt->kind = TCP_OPTION_MSS;
00468                 mssopt->length = sizeof ( *mssopt );
00469                 mssopt->mss = htons ( TCP_MSS );
00470         }
00471         if ( ( flags & TCP_SYN ) || tcp->timestamps ) {
00472                 tsopt = iob_push ( iobuf, sizeof ( *tsopt ) );
00473                 memset ( tsopt->nop, TCP_OPTION_NOP, sizeof ( tsopt->nop ) );
00474                 tsopt->tsopt.kind = TCP_OPTION_TS;
00475                 tsopt->tsopt.length = sizeof ( tsopt->tsopt );
00476                 tsopt->tsopt.tsval = ntohl ( currticks() );
00477                 tsopt->tsopt.tsecr = ntohl ( tcp->ts_recent );
00478         }
00479         if ( ! ( flags & TCP_SYN ) )
00480                 flags |= TCP_PSH;
00481         tcphdr = iob_push ( iobuf, sizeof ( *tcphdr ) );
00482         memset ( tcphdr, 0, sizeof ( *tcphdr ) );
00483         tcphdr->src = tcp->local_port;
00484         tcphdr->dest = tcp->peer.st_port;
00485         tcphdr->seq = htonl ( tcp->snd_seq );
00486         tcphdr->ack = htonl ( tcp->rcv_ack );
00487         tcphdr->hlen = ( ( payload - iobuf->data ) << 2 );
00488         tcphdr->flags = flags;
00489         tcphdr->win = htons ( tcp->rcv_win );
00490         tcphdr->csum = tcpip_chksum ( iobuf->data, iob_len ( iobuf ) );
00491 
00492         /* Dump header */
00493         DBGC2 ( tcp, "TCP %p TX %d->%d %08x..%08x           %08x %4zd",
00494                 tcp, ntohs ( tcphdr->src ), ntohs ( tcphdr->dest ),
00495                 ntohl ( tcphdr->seq ), ( ntohl ( tcphdr->seq ) + seq_len ),
00496                 ntohl ( tcphdr->ack ), len );
00497         tcp_dump_flags ( tcp, tcphdr->flags );
00498         DBGC2 ( tcp, "\n" );
00499 
00500         /* Transmit packet */
00501         if ( ( rc = tcpip_tx ( iobuf, &tcp_protocol, NULL, &tcp->peer, NULL,
00502                                &tcphdr->csum ) ) != 0 ) {
00503                 DBGC ( tcp, "TCP %p could not transmit %08x..%08x %08x: %s\n",
00504                        tcp, tcp->snd_seq, ( tcp->snd_seq + tcp->snd_sent ),
00505                        tcp->rcv_ack, strerror ( rc ) );
00506                 return rc;
00507         }
00508 
00509         return 0;
00510 }
00511 
00512 /**
00513  * Retransmission timer expired
00514  *
00515  * @v timer     Retry timer
00516  * @v over      Failure indicator
00517  */
00518 static void tcp_expired ( struct retry_timer *timer, int over ) {
00519         struct tcp_connection *tcp =
00520                 container_of ( timer, struct tcp_connection, timer );
00521         int graceful_close = TCP_CLOSED_GRACEFULLY ( tcp->tcp_state );
00522 
00523         DBGC ( tcp, "TCP %p timer %s in %s for %08x..%08x %08x\n", tcp,
00524                ( over ? "expired" : "fired" ), tcp_state ( tcp->tcp_state ),
00525                tcp->snd_seq, ( tcp->snd_seq + tcp->snd_sent ), tcp->rcv_ack );
00526 
00527         assert ( ( tcp->tcp_state == TCP_SYN_SENT ) ||
00528                  ( tcp->tcp_state == TCP_SYN_RCVD ) ||
00529                  ( tcp->tcp_state == TCP_ESTABLISHED ) ||
00530                  ( tcp->tcp_state == TCP_FIN_WAIT_1 ) ||
00531                  ( tcp->tcp_state == TCP_TIME_WAIT ) ||
00532                  ( tcp->tcp_state == TCP_CLOSE_WAIT ) ||
00533                  ( tcp->tcp_state == TCP_CLOSING_OR_LAST_ACK ) );
00534 
00535         if ( over || graceful_close ) {
00536                 /* If we have finally timed out and given up, or if
00537                  * this is the result of a graceful close, terminate
00538                  * the connection
00539                  */
00540                 tcp->tcp_state = TCP_CLOSED;
00541                 tcp_dump_state ( tcp );
00542                 tcp_close ( tcp, -ETIMEDOUT );
00543         } else {
00544                 /* Otherwise, retransmit the packet */
00545                 tcp_xmit ( tcp, 0 );
00546         }
00547 }
00548 
00549 /**
00550  * Send RST response to incoming packet
00551  *
00552  * @v in_tcphdr         TCP header of incoming packet
00553  * @ret rc              Return status code
00554  */
00555 static int tcp_xmit_reset ( struct tcp_connection *tcp,
00556                             struct sockaddr_tcpip *st_dest,
00557                             struct tcp_header *in_tcphdr ) {
00558         struct io_buffer *iobuf;
00559         struct tcp_header *tcphdr;
00560         int rc;
00561 
00562         /* Allocate space for dataless TX buffer */
00563         iobuf = alloc_iob ( MAX_HDR_LEN );
00564         if ( ! iobuf ) {
00565                 DBGC ( tcp, "TCP %p could not allocate iobuf for RST "
00566                        "%08x..%08x %08x\n", tcp, ntohl ( in_tcphdr->ack ),
00567                        ntohl ( in_tcphdr->ack ), ntohl ( in_tcphdr->seq ) );
00568                 return -ENOMEM;
00569         }
00570         iob_reserve ( iobuf, MAX_HDR_LEN );
00571 
00572         /* Construct RST response */
00573         tcphdr = iob_push ( iobuf, sizeof ( *tcphdr ) );
00574         memset ( tcphdr, 0, sizeof ( *tcphdr ) );
00575         tcphdr->src = in_tcphdr->dest;
00576         tcphdr->dest = in_tcphdr->src;
00577         tcphdr->seq = in_tcphdr->ack;
00578         tcphdr->ack = in_tcphdr->seq;
00579         tcphdr->hlen = ( ( sizeof ( *tcphdr ) / 4 ) << 4 );
00580         tcphdr->flags = ( TCP_RST | TCP_ACK );
00581         tcphdr->win = htons ( TCP_MAX_WINDOW_SIZE );
00582         tcphdr->csum = tcpip_chksum ( iobuf->data, iob_len ( iobuf ) );
00583 
00584         /* Dump header */
00585         DBGC2 ( tcp, "TCP %p TX %d->%d %08x..%08x           %08x %4d",
00586                 tcp, ntohs ( tcphdr->src ), ntohs ( tcphdr->dest ),
00587                 ntohl ( tcphdr->seq ), ( ntohl ( tcphdr->seq ) ),
00588                 ntohl ( tcphdr->ack ), 0 );
00589         tcp_dump_flags ( tcp, tcphdr->flags );
00590         DBGC2 ( tcp, "\n" );
00591 
00592         /* Transmit packet */
00593         if ( ( rc = tcpip_tx ( iobuf, &tcp_protocol, NULL, st_dest,
00594                                NULL, &tcphdr->csum ) ) != 0 ) {
00595                 DBGC ( tcp, "TCP %p could not transmit RST %08x..%08x %08x: "
00596                        "%s\n", tcp, ntohl ( in_tcphdr->ack ),
00597                        ntohl ( in_tcphdr->ack ), ntohl ( in_tcphdr->seq ),
00598                        strerror ( rc ) );
00599                 return rc;
00600         }
00601 
00602         return 0;
00603 }
00604 
00605 /***************************************************************************
00606  *
00607  * Receive data path
00608  *
00609  ***************************************************************************
00610  */
00611 
00612 /**
00613  * Identify TCP connection by local port number
00614  *
00615  * @v local_port        Local port (in network-endian order)
00616  * @ret tcp             TCP connection, or NULL
00617  */
00618 static struct tcp_connection * tcp_demux ( unsigned int local_port ) {
00619         struct tcp_connection *tcp;
00620 
00621         list_for_each_entry ( tcp, &tcp_conns, list ) {
00622                 if ( tcp->local_port == local_port )
00623                         return tcp;
00624         }
00625         return NULL;
00626 }
00627 
00628 /**
00629  * Parse TCP received options
00630  *
00631  * @v tcp               TCP connection
00632  * @v data              Raw options data
00633  * @v len               Raw options length
00634  * @v options           Options structure to fill in
00635  */
00636 static void tcp_rx_opts ( struct tcp_connection *tcp, const void *data,
00637                           size_t len, struct tcp_options *options ) {
00638         const void *end = ( data + len );
00639         const struct tcp_option *option;
00640         unsigned int kind;
00641 
00642         memset ( options, 0, sizeof ( *options ) );
00643         while ( data < end ) {
00644                 option = data;
00645                 kind = option->kind;
00646                 if ( kind == TCP_OPTION_END )
00647                         return;
00648                 if ( kind == TCP_OPTION_NOP ) {
00649                         data++;
00650                         continue;
00651                 }
00652                 switch ( kind ) {
00653                 case TCP_OPTION_MSS:
00654                         options->mssopt = data;
00655                         break;
00656                 case TCP_OPTION_TS:
00657                         options->tsopt = data;
00658                         break;
00659                 default:
00660                         DBGC ( tcp, "TCP %p received unknown option %d\n",
00661                                tcp, kind );
00662                         break;
00663                 }
00664                 data += option->length;
00665         }
00666 }
00667 
00668 /**
00669  * Consume received sequence space
00670  *
00671  * @v tcp               TCP connection
00672  * @v seq_len           Sequence space length to consume
00673  */
00674 static void tcp_rx_seq ( struct tcp_connection *tcp, uint32_t seq_len ) {
00675         tcp->rcv_ack += seq_len;
00676         if ( tcp->rcv_win > seq_len ) {
00677                 tcp->rcv_win -= seq_len;
00678         } else {
00679                 tcp->rcv_win = 0;
00680         }
00681 }
00682 
00683 /**
00684  * Handle TCP received SYN
00685  *
00686  * @v tcp               TCP connection
00687  * @v seq               SEQ value (in host-endian order)
00688  * @v options           TCP options
00689  * @ret rc              Return status code
00690  */
00691 static int tcp_rx_syn ( struct tcp_connection *tcp, uint32_t seq,
00692                         struct tcp_options *options ) {
00693 
00694         /* Synchronise sequence numbers on first SYN */
00695         if ( ! ( tcp->tcp_state & TCP_STATE_RCVD ( TCP_SYN ) ) ) {
00696                 tcp->rcv_ack = seq;
00697                 if ( options->tsopt )
00698                         tcp->timestamps = 1;
00699         }
00700 
00701         /* Ignore duplicate SYN */
00702         if ( ( tcp->rcv_ack - seq ) > 0 )
00703                 return 0;
00704 
00705         /* Mark SYN as received and start sending ACKs with each packet */
00706         tcp->tcp_state |= ( TCP_STATE_SENT ( TCP_ACK ) |
00707                             TCP_STATE_RCVD ( TCP_SYN ) );
00708 
00709         /* Acknowledge SYN */
00710         tcp_rx_seq ( tcp, 1 );
00711 
00712         return 0;
00713 }
00714 
00715 /**
00716  * Handle TCP received ACK
00717  *
00718  * @v tcp               TCP connection
00719  * @v ack               ACK value (in host-endian order)
00720  * @v win               WIN value (in host-endian order)
00721  * @ret rc              Return status code
00722  */
00723 static int tcp_rx_ack ( struct tcp_connection *tcp, uint32_t ack,
00724                         uint32_t win ) {
00725         uint32_t ack_len = ( ack - tcp->snd_seq );
00726         size_t len;
00727         unsigned int acked_flags;
00728 
00729         /* Check for out-of-range or old duplicate ACKs */
00730         if ( ack_len > tcp->snd_sent ) {
00731                 DBGC ( tcp, "TCP %p received ACK for %08x..%08x, "
00732                        "sent only %08x..%08x\n", tcp, tcp->snd_seq,
00733                        ( tcp->snd_seq + ack_len ), tcp->snd_seq,
00734                        ( tcp->snd_seq + tcp->snd_sent ) );
00735 
00736                 if ( TCP_HAS_BEEN_ESTABLISHED ( tcp->tcp_state ) ) {
00737                         /* Just ignore what might be old duplicate ACKs */
00738                         return 0;
00739                 } else {
00740                         /* Send RST if an out-of-range ACK is received
00741                          * on a not-yet-established connection, as per
00742                          * RFC 793.
00743                          */
00744                         return -EINVAL;
00745                 }
00746         }
00747 
00748         /* Ignore ACKs that don't actually acknowledge any new data.
00749          * (In particular, do not stop the retransmission timer; this
00750          * avoids creating a sorceror's apprentice syndrome when a
00751          * duplicate ACK is received and we still have data in our
00752          * transmit queue.)
00753          */
00754         if ( ack_len == 0 )
00755                 return 0;
00756 
00757         /* Stop the retransmission timer */
00758         stop_timer ( &tcp->timer );
00759 
00760         /* Determine acknowledged flags and data length */
00761         len = ack_len;
00762         acked_flags = ( TCP_FLAGS_SENDING ( tcp->tcp_state ) &
00763                         ( TCP_SYN | TCP_FIN ) );
00764         if ( acked_flags )
00765                 len--;
00766 
00767         /* Update SEQ and sent counters, and window size */
00768         tcp->snd_seq = ack;
00769         tcp->snd_sent = 0;
00770         tcp->snd_win = win;
00771 
00772         /* Remove any acknowledged data from transmit queue */
00773         tcp_process_queue ( tcp, len, NULL, 1 );
00774                 
00775         /* Mark SYN/FIN as acknowledged if applicable. */
00776         if ( acked_flags )
00777                 tcp->tcp_state |= TCP_STATE_ACKED ( acked_flags );
00778 
00779         /* Start sending FIN if we've had all possible data ACKed */
00780         if ( list_empty ( &tcp->queue ) && tcp->xfer_closed )
00781                 tcp->tcp_state |= TCP_STATE_SENT ( TCP_FIN );
00782 
00783         return 0;
00784 }
00785 
00786 /**
00787  * Handle TCP received data
00788  *
00789  * @v tcp               TCP connection
00790  * @v seq               SEQ value (in host-endian order)
00791  * @v iobuf             I/O buffer
00792  * @ret rc              Return status code
00793  *
00794  * This function takes ownership of the I/O buffer.
00795  */
00796 static int tcp_rx_data ( struct tcp_connection *tcp, uint32_t seq,
00797                          struct io_buffer *iobuf ) {
00798         uint32_t already_rcvd;
00799         uint32_t len;
00800         int rc;
00801 
00802         /* Ignore duplicate or out-of-order data */
00803         already_rcvd = ( tcp->rcv_ack - seq );
00804         len = iob_len ( iobuf );
00805         if ( already_rcvd >= len ) {
00806                 free_iob ( iobuf );
00807                 return 0;
00808         }
00809         iob_pull ( iobuf, already_rcvd );
00810         len -= already_rcvd;
00811 
00812         /* Deliver data to application */
00813         if ( ( rc = xfer_deliver_iob ( &tcp->xfer, iobuf ) ) != 0 ) {
00814                 DBGC ( tcp, "TCP %p could not deliver %08x..%08x: %s\n",
00815                        tcp, seq, ( seq + len ), strerror ( rc ) );
00816                 return rc;
00817         }
00818 
00819         /* Acknowledge new data */
00820         tcp_rx_seq ( tcp, len );
00821 
00822         return 0;
00823 }
00824 
00825 /**
00826  * Handle TCP received FIN
00827  *
00828  * @v tcp               TCP connection
00829  * @v seq               SEQ value (in host-endian order)
00830  * @ret rc              Return status code
00831  */
00832 static int tcp_rx_fin ( struct tcp_connection *tcp, uint32_t seq ) {
00833 
00834         /* Ignore duplicate or out-of-order FIN */
00835         if ( ( tcp->rcv_ack - seq ) > 0 )
00836                 return 0;
00837 
00838         /* Mark FIN as received and acknowledge it */
00839         tcp->tcp_state |= TCP_STATE_RCVD ( TCP_FIN );
00840         tcp_rx_seq ( tcp, 1 );
00841 
00842         /* Close connection */
00843         tcp_close ( tcp, 0 );
00844 
00845         return 0;
00846 }
00847 
00848 /**
00849  * Handle TCP received RST
00850  *
00851  * @v tcp               TCP connection
00852  * @v seq               SEQ value (in host-endian order)
00853  * @ret rc              Return status code
00854  */
00855 static int tcp_rx_rst ( struct tcp_connection *tcp, uint32_t seq ) {
00856 
00857         /* Accept RST only if it falls within the window.  If we have
00858          * not yet received a SYN, then we have no window to test
00859          * against, so fall back to checking that our SYN has been
00860          * ACKed.
00861          */
00862         if ( tcp->tcp_state & TCP_STATE_RCVD ( TCP_SYN ) ) {
00863                 if ( ( seq - tcp->rcv_ack ) >= tcp->rcv_win )
00864                         return 0;
00865         } else {
00866                 if ( ! ( tcp->tcp_state & TCP_STATE_ACKED ( TCP_SYN ) ) )
00867                         return 0;
00868         }
00869 
00870         /* Abort connection */
00871         tcp->tcp_state = TCP_CLOSED;
00872         tcp_dump_state ( tcp );
00873         tcp_close ( tcp, -ECONNRESET );
00874 
00875         DBGC ( tcp, "TCP %p connection reset by peer\n", tcp );
00876         return -ECONNRESET;
00877 }
00878 
00879 /**
00880  * Process received packet
00881  *
00882  * @v iobuf             I/O buffer
00883  * @v st_src            Partially-filled source address
00884  * @v st_dest           Partially-filled destination address
00885  * @v pshdr_csum        Pseudo-header checksum
00886  * @ret rc              Return status code
00887   */
00888 static int tcp_rx ( struct io_buffer *iobuf,
00889                     struct sockaddr_tcpip *st_src,
00890                     struct sockaddr_tcpip *st_dest __unused,
00891                     uint16_t pshdr_csum ) {
00892         struct tcp_header *tcphdr = iobuf->data;
00893         struct tcp_connection *tcp;
00894         struct tcp_options options;
00895         size_t hlen;
00896         uint16_t csum;
00897         uint32_t start_seq;
00898         uint32_t seq;
00899         uint32_t ack;
00900         uint32_t win;
00901         unsigned int flags;
00902         size_t len;
00903         int rc;
00904 
00905         /* Sanity check packet */
00906         if ( iob_len ( iobuf ) < sizeof ( *tcphdr ) ) {
00907                 DBG ( "TCP packet too short at %zd bytes (min %zd bytes)\n",
00908                       iob_len ( iobuf ), sizeof ( *tcphdr ) );
00909                 rc = -EINVAL;
00910                 goto discard;
00911         }
00912         hlen = ( ( tcphdr->hlen & TCP_MASK_HLEN ) / 16 ) * 4;
00913         if ( hlen < sizeof ( *tcphdr ) ) {
00914                 DBG ( "TCP header too short at %zd bytes (min %zd bytes)\n",
00915                       hlen, sizeof ( *tcphdr ) );
00916                 rc = -EINVAL;
00917                 goto discard;
00918         }
00919         if ( hlen > iob_len ( iobuf ) ) {
00920                 DBG ( "TCP header too long at %zd bytes (max %zd bytes)\n",
00921                       hlen, iob_len ( iobuf ) );
00922                 rc = -EINVAL;
00923                 goto discard;
00924         }
00925         csum = tcpip_continue_chksum ( pshdr_csum, iobuf->data,
00926                                        iob_len ( iobuf ) );
00927         if ( csum != 0 ) {
00928                 DBG ( "TCP checksum incorrect (is %04x including checksum "
00929                       "field, should be 0000)\n", csum );
00930                 rc = -EINVAL;
00931                 goto discard;
00932         }
00933         
00934         /* Parse parameters from header and strip header */
00935         tcp = tcp_demux ( tcphdr->dest );
00936         start_seq = seq = ntohl ( tcphdr->seq );
00937         ack = ntohl ( tcphdr->ack );
00938         win = ntohs ( tcphdr->win );
00939         flags = tcphdr->flags;
00940         tcp_rx_opts ( tcp, ( ( ( void * ) tcphdr ) + sizeof ( *tcphdr ) ),
00941                       ( hlen - sizeof ( *tcphdr ) ), &options );
00942         iob_pull ( iobuf, hlen );
00943         len = iob_len ( iobuf );
00944 
00945         /* Dump header */
00946         DBGC2 ( tcp, "TCP %p RX %d<-%d           %08x %08x..%08zx %4zd",
00947                 tcp, ntohs ( tcphdr->dest ), ntohs ( tcphdr->src ),
00948                 ntohl ( tcphdr->ack ), ntohl ( tcphdr->seq ),
00949                 ( ntohl ( tcphdr->seq ) + len +
00950                   ( ( tcphdr->flags & ( TCP_SYN | TCP_FIN ) ) ? 1 : 0 )), len);
00951         tcp_dump_flags ( tcp, tcphdr->flags );
00952         DBGC2 ( tcp, "\n" );
00953 
00954         /* If no connection was found, send RST */
00955         if ( ! tcp ) {
00956                 tcp_xmit_reset ( tcp, st_src, tcphdr );
00957                 rc = -ENOTCONN;
00958                 goto discard;
00959         }
00960 
00961         /* Handle ACK, if present */
00962         if ( flags & TCP_ACK ) {
00963                 if ( ( rc = tcp_rx_ack ( tcp, ack, win ) ) != 0 ) {
00964                         tcp_xmit_reset ( tcp, st_src, tcphdr );
00965                         goto discard;
00966                 }
00967         }
00968 
00969         /* Handle SYN, if present */
00970         if ( flags & TCP_SYN ) {
00971                 tcp_rx_syn ( tcp, seq, &options );
00972                 seq++;
00973         }
00974 
00975         /* Handle RST, if present */
00976         if ( flags & TCP_RST ) {
00977                 if ( ( rc = tcp_rx_rst ( tcp, seq ) ) != 0 )
00978                         goto discard;
00979         }
00980 
00981         /* Handle new data, if any */
00982         tcp_rx_data ( tcp, seq, iobuf );
00983         seq += len;
00984 
00985         /* Handle FIN, if present */
00986         if ( flags & TCP_FIN ) {
00987                 tcp_rx_fin ( tcp, seq );
00988                 seq++;
00989         }
00990 
00991         /* Update timestamp, if present and applicable */
00992         if ( ( seq == tcp->rcv_ack ) && options.tsopt )
00993                 tcp->ts_recent = ntohl ( options.tsopt->tsval );
00994 
00995         /* Dump out any state change as a result of the received packet */
00996         tcp_dump_state ( tcp );
00997 
00998         /* Send out any pending data.  We force sending a reply if either
00999          *
01000          *  a) the peer is expecting an ACK (i.e. consumed sequence space), or
01001          *  b) either end of the packet was outside the receive window
01002          *
01003          * Case (b) enables us to support TCP keepalives using
01004          * zero-length packets, which we would otherwise ignore.  Note
01005          * that for case (b), we need *only* consider zero-length
01006          * packets, since non-zero-length packets will already be
01007          * caught by case (a).
01008          */
01009         tcp_xmit ( tcp, ( ( start_seq != seq ) ||
01010                           ( ( seq - tcp->rcv_ack ) > tcp->rcv_win ) ) );
01011 
01012         /* If this packet was the last we expect to receive, set up
01013          * timer to expire and cause the connection to be freed.
01014          */
01015         if ( TCP_CLOSED_GRACEFULLY ( tcp->tcp_state ) ) {
01016                 tcp->timer.timeout = ( 2 * TCP_MSL );
01017                 start_timer ( &tcp->timer );
01018         }
01019 
01020         return 0;
01021 
01022  discard:
01023         /* Free received packet */
01024         free_iob ( iobuf );
01025         return rc;
01026 }
01027 
01028 /** TCP protocol */
01029 struct tcpip_protocol tcp_protocol __tcpip_protocol = {
01030         .name = "TCP",
01031         .rx = tcp_rx,
01032         .tcpip_proto = IP_TCP,
01033 };
01034 
01035 /***************************************************************************
01036  *
01037  * Data transfer interface
01038  *
01039  ***************************************************************************
01040  */
01041 
01042 /**
01043  * Close interface
01044  *
01045  * @v xfer              Data transfer interface
01046  * @v rc                Reason for close
01047  */
01048 static void tcp_xfer_close ( struct xfer_interface *xfer, int rc ) {
01049         struct tcp_connection *tcp =
01050                 container_of ( xfer, struct tcp_connection, xfer );
01051 
01052         /* Close data transfer interface */
01053         tcp_close ( tcp, rc );
01054 
01055         /* Transmit FIN, if possible */
01056         tcp_xmit ( tcp, 0 );
01057 }
01058 
01059 /**
01060  * Check flow control window
01061  *
01062  * @v xfer              Data transfer interface
01063  * @ret len             Length of window
01064  */
01065 static size_t tcp_xfer_window ( struct xfer_interface *xfer ) {
01066         struct tcp_connection *tcp =
01067                 container_of ( xfer, struct tcp_connection, xfer );
01068 
01069         /* Not ready if data queue is non-empty.  This imposes a limit
01070          * of only one unACKed packet in the TX queue at any time; we
01071          * do this to conserve memory usage.
01072          */
01073         if ( ! list_empty ( &tcp->queue ) )
01074                 return 0;
01075 
01076         /* Return TCP window length */
01077         return tcp_xmit_win ( tcp );
01078 }
01079 
01080 /**
01081  * Deliver datagram as I/O buffer
01082  *
01083  * @v xfer              Data transfer interface
01084  * @v iobuf             Datagram I/O buffer
01085  * @v meta              Data transfer metadata
01086  * @ret rc              Return status code
01087  */
01088 static int tcp_xfer_deliver_iob ( struct xfer_interface *xfer,
01089                                   struct io_buffer *iobuf,
01090                                   struct xfer_metadata *meta __unused ) {
01091         struct tcp_connection *tcp =
01092                 container_of ( xfer, struct tcp_connection, xfer );
01093 
01094         /* Enqueue packet */
01095         list_add_tail ( &iobuf->list, &tcp->queue );
01096 
01097         /* Transmit data, if possible */
01098         tcp_xmit ( tcp, 0 );
01099 
01100         return 0;
01101 }
01102 
01103 /** TCP data transfer interface operations */
01104 static struct xfer_interface_operations tcp_xfer_operations = {
01105         .close          = tcp_xfer_close,
01106         .vredirect      = ignore_xfer_vredirect,
01107         .window         = tcp_xfer_window,
01108         .alloc_iob      = default_xfer_alloc_iob,
01109         .deliver_iob    = tcp_xfer_deliver_iob,
01110         .deliver_raw    = xfer_deliver_as_iob,
01111 };
01112 
01113 /***************************************************************************
01114  *
01115  * Openers
01116  *
01117  ***************************************************************************
01118  */
01119 
01120 /** TCP socket opener */
01121 struct socket_opener tcp_socket_opener __socket_opener = {
01122         .semantics      = TCP_SOCK_STREAM,
01123         .family         = AF_INET,
01124         .open           = tcp_open,
01125 };
01126 
01127 /** Linkage hack */
01128 int tcp_sock_stream = TCP_SOCK_STREAM;
01129 
01130 /**
01131  * Open TCP URI
01132  *
01133  * @v xfer              Data transfer interface
01134  * @v uri               URI
01135  * @ret rc              Return status code
01136  */
01137 static int tcp_open_uri ( struct xfer_interface *xfer, struct uri *uri ) {
01138         struct sockaddr_tcpip peer;
01139 
01140         /* Sanity check */
01141         if ( ! uri->host )
01142                 return -EINVAL;
01143 
01144         memset ( &peer, 0, sizeof ( peer ) );
01145         peer.st_port = htons ( uri_port ( uri, 0 ) );
01146         return xfer_open_named_socket ( xfer, SOCK_STREAM,
01147                                         ( struct sockaddr * ) &peer,
01148                                         uri->host, NULL );
01149 }
01150 
01151 /** TCP URI opener */
01152 struct uri_opener tcp_uri_opener __uri_opener = {
01153         .scheme         = "tcp",
01154         .open           = tcp_open_uri,
01155 };
01156 

Generated on Tue Apr 6 20:01:10 2010 for gPXE by  doxygen 1.5.7.1