#include #include #include #include "tcp.h" #include "demux.h" // TCP in user space, for common case // ASSUMPTIONS // Host machine uses network byte ordering // Q operations. Put here, instead of header file, because demux // file uses same ops but with pointers to the Q structure #define HEAD(Q) (Q.head) #define TAIL(Q) (Q.tail) #define NOT_EMPTY(Q) (Q.head!=Q.tail) #define Q_FULL(Q) (Q.head==(Q.tail+1)%MAX_Q_SIZE) #define ENQ(Q) (Q.tail=(Q.tail+1)%MAX_Q_SIZE) #define DEQ(Q) (Q.head=(Q.head+1)%MAX_Q_SIZE) // TCP/IP Headers // 0 1 2 3 // 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 // +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ // |Version| IHL |Type of Service| Total Length | // +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ // | Identification |Flags| Fragment Offset | // +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ // | Time to Live | Protocol | Header Checksum | // +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ // | Source Address | // +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ // | Destination Address | // +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ // | Options | Padding | // +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ // +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ // | Source Port | Destination Port | // +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ // | Sequence Number | // +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ // | Acknowledgment Number | // +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ // | Data | |U|A|E|R|S|F| | // | Offset| Reserved |R|C|O|S|Y|I| Window | // | | |G|K|L|T|N|N| | // +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ // | Checksum | Urgent Pointer | // +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ // | Options | Padding | // +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ // | data | // +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ // TCP/IP Headers, Common case values to check for at receiver // 0 1 2 3 // 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 // +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ // |0 1 0 0|0 1 0 1|X X X X X X X X| Total Length | // +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ // |X X X X X X X X X X X X X X X X|X X 0|0 0 0 0 0 0 0 0 0 0 0 0 0| // +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ // |X X X X X X X X|*TODO: FILL IN*| Header Checksum | // +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ // |***********************Source Address**************************| // +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ // |********************Destination Address************************| // +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ // +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ // |**********Source Port**********|*******Destination Port********| // +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ // | Sequence Number | // +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ // | Acknowledgment Number | // +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ // | | | |A| | | | | | // |0 1 0 1|X X X X X X|0|C|0|0|0|0| Window | // | | | |K| | | | | | // +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ // | Checksum |X X X X X X X X X X X X X X X X| // +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ // Fields with ***** should be handled by the network interface or coprocessor // Fields with ????? may be handled by network interface or by TCP/IP code // Sender Fields %%%%%=Cached, doesn't change between packets for common // case packets. // 0 1 2 3 // 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 // +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ // |%%%%%%%%%%%%%%%|%%%%%%%%%%%%%%%| Total Length | // +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ // |%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%|%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%| // +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ // |%%%%%%%%%%%%%%%|%%%%%%%%%%%%%%%| Header Checksum | // +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ // |%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%| // +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ // |%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%| // +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ // +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ // |%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%|%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%| // +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ // | Sequence Number | // +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ // | Acknowledgment Number | // +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ // | | | |A| | | | | | // |%%%%%%%|%%%%%%%%%%%|%|C|%|%|%|%| Window | // | | | |K| | | | | | // +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ // | Checksum |%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%| // +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ // | data | // +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ // Questions/TODOS // What about 32-bit checksum option? // When does an ACK get sent? Costs a lot. // Check about IP Sequence number...should be able to discard this // Worry about protection, possible change in buffer semantics // Which of the NI fields should we check against cached values? // In MINI, what do you do with the second page? Do they have to be // contiguous? How much overhead is there to freeing up that page? // Crossing the protection boundary...how long does it take? Have to // do this once for the sender // Do we need to worry about semaphores on user space Q? // // Main things to do at receiver: // Send ACK (not always) // Demultiplexing (done by NI?) // S&R -- even if this is necessary, shouldn't hurt too much // Verify checksum (done by NI?) // Free packets which have been acknowledged, update window // Calculate and verify header checksum // Main things to do at sender: // Fill in fields (mostly cached) // Set timeouts on packets // Periodically verify that no retrans needed // Calculate checksum // S&R -- shouldn't happen // Congestion control Q outstanding; // Non-stable packets Q Q_for_app; // Packets delivered to application Q Q_for_net; // Do we need this? SocketInfo socket; // This handles an incoming packet, sends out an ACK if necessary, and // pushes it on a queue, if necessary. Returns whether or not there are // any packets in the outgoing queue // How is the application going to get these packets without copying them? // TCP interface is all wrong...causes another copy. // So we have to propose another interface, where we pass the shared buffers // right up to the app. #define MIN_BYTES_TO_ACK 1024 #define FREE(X) (X=X) // TODO: Update this char *HandleUncommonPacket(TCPIP_Packet *InPacket) { return NULL; } char *HandleErrorPacket(TCPIP_Packet *InPacket) { return NULL; } void SendACK(TCPIP_Packet *InPacket) { return; } // Handle a packet that can't be sent immediately void BlockOnSend(char *data, // Part of a shared buffer unsigned short int length, // Length of buffer in use by data SocketInfo *socket) // Pointer to socket to send out on { } void Send(char *data, // Part of a shared buffer unsigned short int length, // Length of buffer in use by data SocketInfo *socket) // Pointer to socket to send out on { // Header space has already been provided at beginning of buffer // Get address of beginning of header register TCPIP_Packet *outPacket = (TCPIP_Packet *)(data - TCPIP_HEADER_SIZE); register i; register DWORD sum; register unsigned short *header; register WORD receiveWindow; // Check window and congestionWindow to see if we can send this now if (socket->window < length || socket->congestionWindow < length) { BlockOnSend(data, length, socket); return; } // Fill in cached values memcpy(outPacket, &socket->sendHeader, TCPIP_HEADER_SIZE); // Fill in length outPacket->length=length+20; // add on size of TCP header // Calculate window to advertise, fill it in // Have to worry about silly window effect // socket->maxReceiveWindow is incremented when a buffer is freed, // which is not simulated in this code. receiveWindow = socket->receiveWindow; if (receiveWindow * 4 < socket->maxReceiveWindow || receiveWindow < socket->maxSegmentSize) receiveWindow = 0; outPacket->window = receiveWindow; // Checksum field gets written whenever the single copy occurs // Fill in sequence number, increment outPacket->seq_number = socket->senderSeqNumber; socket->senderSeqNumber += length; // Fill in acknowledgement value and flag // TODO: Could optimize a lot of these copies by manipulating the // value in the sendHeader directly outPacket->ack_number = socket->highest_seq_number; // Decrement window we can send socket->window -= length; // Calculate header checksum, fill in header=(unsigned short *)outPacket; for (sum=0, i=0; i<8; i++) { sum += *header++; } sum = (sum >> 16) + (sum & 0xffff); // add in carry sum += (sum >> 16); // maybe one more (?) outPacket->header_checksum = (short)~sum; // TODO: Actually send packet to net / NI } char *Receive(TCPIP_Packet *inPacket, SocketInfo *socket, DWORD checksum) { register length; register i; register DWORD sum; register unsigned short *header; // Check to make sure this is a common packet // We do not have to check for segmented packets here, because // this is done in the demux if (((inPacket->ver_len_qos & 0xFF00) != 0x4500) || ((inPacket->ttl_proto & 0x00FF) != TCP_PROTO_NUMBER) || ((inPacket->off_flags & 0xF02F) != 0x5000) || (inPacket->seq_number != socket->next_seq_number)) { return HandleUncommonPacket(inPacket); } // Calculate length, update next expected sequence number length=inPacket->length - 40; socket->next_seq_number += length; // Calculate and verify header checksum // Code for this comes from Comer, vol.2, pg. 70 header=(unsigned short *)inPacket; for (sum=0, i=0; i<8; i++) { sum += *header++; } sum = (sum >> 16) + (sum & 0xffff); // add in carry sum += (sum >> 16); // maybe one more (?) if ((short)~sum != inPacket->header_checksum) return HandleErrorPacket(inPacket); // Verify checksum -- TODO: What about pseudo-header? if (checksum != inPacket->checksum) return HandleErrorPacket(inPacket); // Put packet on outgoing Q // TODO: Simulate cost for user of pulling packet off queue and // sending it back? Q_for_app.data[TAIL(Q_for_app)]=&(inPacket->data); Q_for_app.header[TAIL(Q_for_app)]=inPacket; // Update highest received sequence number, maxReceiveWindow socket->highest_seq_number = inPacket->seq_number; socket->maxReceiveWindow -= length; // Send ACK (not always...only if >some fraction of window?) if (length > MIN_BYTES_TO_ACK || socket->bytes_unacked > 0) { SendACK(inPacket); } else socket->bytes_unacked += length; // Update (amount we can send) window size var socket->window=inPacket->window; // Update congestion control var // If we are below slow start threshold, increment our // our window by a segment if (socket->congestionWindow < socket->slowStartThreshold) socket->congestionWindow += socket->maxSegmentSize; // Else do a slower linear increase else socket->congestionWindow += (socket->maxSegmentSize * socket->maxSegmentSize) / socket->congestionWindow; // Free packets which have been acknowledged, update window if ((inPacket->off_flags & 0x0010) > 0) { // Is ACK flag set? while (NOT_EMPTY(outstanding)) { // are there packets left to be acked? // if ack number is smaller than highest byte in this packet, we are done if (LESS_THAN(inPacket->ack_number, outstanding.number[HEAD(outstanding)])) break; // free up buffer ... TODO // This should be a single write to a local queue FREE(outstanding.data[HEAD(outstanding)]); // look at next outstanding packet DEQ(outstanding); } } return NULL; } main() { TCPIP_Packet temp; Receive(&temp, &socket, 0); return 0; }