http_connect.c

Go to the documentation of this file.
00001 /*
00002 http_connect
00003 
00004 Copyright July 5, 2001, The University of North Carolina at Chapel Hill
00005 
00006 Redistribution and use in source and binary forms, with or without
00007 modification, are permitted provided that the following conditions are met:
00008 
00009   1. Redistributions of source code must retain the above copyright notice,
00010      this list of conditions and the following disclaimer.
00011   2. Redistributions in binary form must reproduce the above copyright
00012      notice, this list of conditions and the following disclaimer in the
00013      documentation and/or other materials provided with the distribution.
00014   3. The name of the author may not be used to endorse or promote products
00015      derived from this software without specific prior written permission.
00016 
00017 THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR IMPLIED
00018 WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
00019 MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO
00020 EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
00021 SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
00022 PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
00023 OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
00024 WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
00025 OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
00026 ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
00027 
00028 Contact person:
00029 
00030     Frank D. Smith, University of North Carolina at Chapel Hill
00031         email: smithfd@cs.unc.edu
00032     phone: 919-962-1884
00033     fax:   919-962-1799
00034 */
00035 
00036 /* 
00037    This program performs an analysis of tcpdump output (the ASCII print
00038    lines that tcpdump generates to stdout) and produces a summary of the
00039    TCP connections used for HTTP.  It assumes that the tcpdump has been
00040    filtered for packets that are from TCP source port 80 and that
00041    the result has been sorted so that packets are in ascending time
00042    order within each TCP connection.  The script given below will properly
00043    prepare the input for this program given a tcpdump binary file that 
00044    may contain more than just HTTP packets (the file extensions are
00045    just examples, the program does not make any assumptions about input
00046    file names).  Note that this filtering produces a UNI-DIRECTIONAL 
00047    trace containing only those TCP segments sent from the server to the
00048    client.
00049 
00050    The output is a file with summary records for each connection (basically
00051    the data data summarized into response and request lengths).  There is
00052    also a <file name>.log file with summary counts.
00053 
00054    To get usage information, invoke the program with the -h switch.
00055 
00056    WARNING! THIS PROGRAM DEPENDS ON THE FORMAT OF THE OUTPUT OF TCPDUMP
00057    AS PRINTED ON STDOUT (ASCII).  IT HAS BEEN TESTED ONLY ON THE 
00058    FREEBSD VERSION OF TCPDUMP CORRESPONDING TO RELEASES UP THROUGH 4.1
00059 
00060    KNOWN BUG: Does not process tcpdump output for ECN flags correctly.
00061 
00062 #! /bin/sh
00063 tcpdump -n -tt -r $1 tcp src port 80 > $1.http-srv
00064 sort -s -o $1.http-srv-sort +1 -2 +3 -4 +0 -1 -T /tmp $1.http-srv
00065 
00066 */
00067 
00068 #include <stdlib.h>
00069 #include <stdio.h>
00070 #include <math.h>
00071 #include <sys/time.h>
00072 
00073 #define min(a,b) ((a) <= (b) ? (a) : (b))
00074 #define max(a,b) ((a) >= (b) ? (a) : (b))
00075 
00076 int report_ACK_err = 1;
00077 
00078 void Usage(char *s)
00079 {
00080   fprintf (stderr,"\nUsage: %s\n", s);
00081   fprintf (stderr,"    [-w file_name] (name for output file)\n");
00082   fprintf (stderr,"    [-r file_name] (name for input file)\n");
00083   fprintf (stderr,"If either -w or -r is omitted, stdout(stdin) is used\n");
00084   fprintf (stderr,"\n");
00085   exit(-1);
00086 }
00087 
00088   FILE *dumpFP, *outFP;
00089   FILE *logFP;
00090 
00091   struct timeval recvTime;
00092   struct timeval lastTime = {0,0};
00093 
00094   char ts[20]; /* ASCII timestamp in tcpdump output */
00095                /* max. ssssssssss.mmmmmm characters + EOL */
00096   char sh[25]; /* ASCII source host.port in tcpdump output */
00097                /* max. hhh.hhh.hhh.hhh.ppppp + EOL */
00098   char gt[3];  /* ">" symbol in tcpdump output */
00099   char lt[3];  /* "<" symbol in tcpdump output */
00100   char dh[25]; /* ASCII destination host.port in tcpdump output */
00101                /* max. hhh.hhh.hhh.hhh.ppppp + EOL */
00102   char fl[5];  /* ASCII TCP flags field in tcpdump output */
00103   char p1[50]; /* ASCII first field after flags in tcpdump output */
00104   char p2[50]; /* ASCII second field after flags */
00105   char p3[50]; /* ASCII third field after flags */
00106 
00107 /* These are read from the tcpdump records */
00108   unsigned long begin_seq, end_seq, seq_bytes, new_ack;
00109   unsigned long current_synseq;
00110 
00111   int has_seq, has_ack;
00112 
00113 /* In TCP ACKs should be monotonically increasing so the length of the current
00114    request in a sequence can be computed as the difference between the
00115    ACK value marking the end of the previous request and the ACK value
00116    marking the end of the current request.  Unfortunately, out-of-order
00117    segments can cause ACKs to appear as if they "go backward".  For segments
00118    without data (ACK_ONLY), simply ignoring the "backward" ACK is fine. 
00119    In many cases where there is data along with a "backward" ACK,
00120    this is the result of (un-necessary) retransmission of segments
00121    containing data and those cases are handled properly.  In other cases,
00122    the results can be incorrect for HTTP 1.1 connections.  Each case of
00123    suspected ACK mis-ordering that might lead to erroneous request or
00124    response lengths is noted for off-line investigation.
00125    */
00126 
00127   unsigned long current_request_end, last_request_end;
00128 
00129 /* In TCP segments the sequence numbers we see may NOT be monotonically
00130    increasing (retransmission, out-of-order).  Instead we record the 
00131    largest value seen at any point.  Then the length of the current
00132    response in a sequence can be computed as the difference between the
00133    (largest) sequence at the end of the previous response and the
00134    (largest) sequence at the end of the current response.  */
00135 
00136   unsigned long current_response_end, last_response_end;
00137 
00138 /* Various counters for logging connection summary information */
00139 
00140   int syn_count = 0; /* connections beginning with SYN in trace */
00141   int req_count = 0; /* number of identified requests */
00142   int rsp_count = 0; /* number of identified responses */
00143   int fin_count = 0; /* connections ending with FIN in trace */
00144   int rst_count = 0; /* connections ending with Reset (no FIN) */
00145   int trm_count = 0; /* connections ending with no Reset or FIN */
00146   int err_count = 0; /* connections with at least one suspected error */
00147   int act_req_count = 0; /* connections beginning in a request */
00148   int act_rsp_count = 0; /* connections beginning in a response */
00149   int pending_fin_count = 0; /* partial with only FIN(s) */
00150   int pending_rst_count = 0; /* partial with only Reset(s) */
00151   int pending_ack_count = 0; /* partial with only ACK(s) */
00152   int pending_oth_count = 0; /* partial -- others */
00153   int pending_cmb_count = 0; /* partials with FIN, Reset, ACK combinations */
00154 
00155 /* Various counters for events within a single connection */
00156   /* the following apply only when state == PENDING; see pending_xxx_counts
00157      above for explanations */
00158   int have_pending_acks = 0;
00159   int have_pending_fins = 0;
00160   int have_pending_rsts = 0;
00161   int have_pending_othr = 0;
00162   /* the following apply only when state != PENDING */
00163   int have_ACK_error = 0;   /* seen "backwards" ACK */
00164   int have_value_error = 0; /* seen suspect ACK or sequence # */
00165   int have_FINdata_error = 0; /* seen data after FIN */
00166 
00167   enum states {PENDING, SYN_SENT, FIN_SENT, RESET, IN_REQUEST, IN_RESPONSE};
00168 
00169   enum states connection_state = PENDING;
00170   enum states last_state = PENDING;
00171 
00172   enum inputs {SYN, FIN, RST, ACK_ONLY, DATA_ACK};
00173   enum inputs input_type;
00174 
00175   char current_src[25] = "";
00176   char src_host[25];
00177   char src_port[10];
00178 
00179   char current_dst[25] = "";
00180   char dst_host[25];
00181   char dst_port[10];
00182 
00183 
00184 /* A request is considered to start at the timestamp on the tcpdump record
00185    containing the first advance in the ACK field (a) following the connection
00186    establishment or (b) while the connection is sending response data. */
00187 
00188   char start_request_time[20];
00189 
00190 /* A response is considered to start at the timestamp on the tcpdump record
00191    containing the first advance in the sequence number field following a
00192    period when the ACK has been advanced (receiving request data).  */
00193 
00194   char start_response_time[20];
00195 
00196 /* A response is considered to end at the timestamp on the tcpdump record 
00197    that LAST advanced the data sequence number before the response ends 
00198    (a new request starts, a FIN is sent, etc.).  Since we would have to
00199    look ahead in the trace to be sure the current record has the LAST 
00200    advance, we go ahead and assume it is the last and record its 
00201    timestamp as response_end_time.  This way, when we know the sequence
00202    number will not advance more for this response from processsing some 
00203    subsequent record in the trace (FIN or ACK advance), the last timestamp
00204    has already been saved.  Similarly, a request is considered to end at
00205    the timestamp on the last record of the request. */
00206   
00207   char response_end_time[20];
00208   char request_end_time[20];
00209 
00210   char FIN_sent_time[20];   
00211   char RST_sent_time[20];
00212   char last_connection_time[20];
00213 
00214   char input_name[256] = "";
00215   char output_name[256] = "";
00216   char log_name[256] = "";
00217 
00218   char new_line[500];  /* assumes no tcpdump output line is longer */
00219 
00220   long elapsed;
00221 
00222   int new_address = 0;
00223   int rc = 0;
00224 
00225 void error_line(char *s);  
00226 void error_state(char *s);
00227 int parse_dump_record(void);
00228 void init_connection(void);
00229 void init_active(void);
00230 void check_tuple_reuse(void);
00231 int check_ACK_advance(unsigned long old_ack);
00232 void begin_REQ(void);
00233 void more_REQ(void);
00234 void begin_RSP(void);
00235 void more_RSP(void);
00236 void log_REQ(void);
00237 void log_RSP(void);
00238 void log_SYN(void);
00239 void log_END(char *how);
00240 void log_ACT(char *how);
00241 void log_nosyn(void);
00242 void log_connection(void);
00243 void log_log(void);
00244 long elapsed_ms(char *end, char *start);
00245 void get_host_port(char *adr, char *host, char *port);
00246 int  get_sequence(char *p, unsigned long *begin, unsigned long *end,
00247                            unsigned long *bytes);
00248 
00249 void main (int argc, char* argv[])
00250 {
00251   int i;
00252 
00253   /* Parse the command line */
00254   i = 1;
00255   while (i < argc) {
00256     if (strcmp (argv[i], "-r") == 0) {
00257       /* -r flag is followed by name of file to read */
00258       if (++i >= argc) Usage (argv[0]);
00259       strcpy (input_name, argv[i]);
00260     }
00261     else if (strcmp (argv[i], "-w") == 0) {
00262       /* -w flag is followed by the name of file to write */
00263       if (++i >= argc) Usage (argv[0]);
00264       strcpy (output_name, argv[i]);
00265     }
00266     else 
00267       Usage (argv[0]);
00268     i++;
00269   }
00270 
00271   /* Open files */
00272   /* Note: program is written to also be used as a filter */
00273 
00274   if (strcmp(output_name, "") == 0) 
00275     /* if no explicit output file named with -w, use stdout */
00276      outFP = stdout;
00277   else 
00278      {
00279       if ((outFP = fopen (output_name, "w")) == NULL) {
00280           fprintf (stderr, "error opening %s\n", output_name);
00281           exit (-1);
00282           }
00283      }
00284 
00285   if (strcmp(input_name, "") == 0)
00286     /* if no explicit input file named with -r, use stdin */
00287      dumpFP = stdin;
00288   else 
00289      {
00290       if ((dumpFP = fopen (input_name, "r")) == NULL) {
00291           fprintf (stderr, "error opening %s\n", input_name);
00292           exit (-1);
00293          }
00294      }
00295   
00296   strcpy(log_name, output_name);
00297   strcat(log_name, ".log");
00298   if ((logFP = fopen (log_name, "w")) == NULL) {
00299       fprintf (stderr, "error opening %s\n", log_name);
00300       exit (-1);
00301      }
00302 
00303   /* begin main loop; once through loop for each line in the tcpdump */
00304   /* a <continue> anywhere in the loop (usually after an error case)
00305      implies beginning of processing a new tcpdump record */
00306 
00307   while (!feof (dumpFP)) {
00308 
00309 /* printf("State is %d, last_state is %d\n", connection_state, last_state); */
00310 
00311     /* Get and parse line of tcpdump file */
00312 
00313     fgets (new_line, sizeof(new_line), dumpFP);
00314 
00315     /* get line pieces; this works because there are always 8 or more 
00316        fields separated by white space in tcpdump ASCII-format lines */
00317 /*    sscanf (new_line, "%s %s %s %s %s %s %s %s %s",
00318                       &ts, &lt, &sh, &gt, &dh, &fl, &p1, &p2, &p3);
00319 */
00320     sscanf (new_line, "%s %s %s %s %s %s %s %s",
00321                       &ts, &sh, &gt, &dh, &fl, &p1, &p2, &p3);
00322 
00323    /* If any part of the connection tuple (source host.port,destination
00324       host.port) differs from the current values, there are no more 
00325       tcpdump records for that connection so treat as end of connection.
00326       The action taken at the end of a connection depends on the current
00327       state of processing in that connection */  
00328 
00329    if ((strcmp(current_src, sh) != 0) ||  /* new source host/port */
00330        (strcmp(current_dst, dh) != 0))    /* new dest. host/port */
00331        {
00332         log_connection(); 
00333 
00334         /* begin processing this record as being from a potential new
00335            TCP connection so initialize connection's state */
00336 
00337         strcpy(current_src, sh);  /* new connection tuple */
00338         strcpy(current_dst, dh);  /* host and port for src & dest */
00339 
00340         have_pending_acks = 0;
00341         have_pending_fins = 0;
00342         have_pending_rsts = 0;
00343         have_pending_othr = 0;
00344 
00345         current_synseq = 0;
00346         connection_state = PENDING;  /* unconnected pending a SYN */
00347         last_state = PENDING;
00348         new_address = 1;        /* true only for very first record
00349                                    from a different TCP connection -- 
00350                                    avoids multiple error messages */
00351        }
00352    
00353    /* break dump record into essential data fields; initializes the
00354       following variables: begin_seq, end_seq, seq_bytes, new_ack,
00355                            has_ack, has_seq, input_type.
00356       Also checks for suspect sequence and ACK values.
00357    */
00358 
00359    if ((rc = parse_dump_record()) < 0) 
00360        continue;
00361 
00362    /* processing records from a TCP connections is based on a notion of
00363       the current "state" of the connection.  The defined states are
00364         PENDING := record is from different connection than before but
00365                    a beginning SYN has not yet been identified.
00366         SYN_SENT:= have identified SYN sent from source port 80 (server)
00367         FIN_SENT:= have identified a FIN sent from source port 80 (server)
00368                    This terminates any data from server.
00369         RESET   := have identified a Reset sent from source port 80 (server)
00370                    This means that the server should not accept any more
00371                    client data.
00372         IN_REQUEST := processing ACKs sent from source port 80 (server)
00373                       in response to data (request) from the client.  In
00374                       this state, need to identify end of client data (request)
00375                       and start of server data (response).
00376         IN_RESPONSE := processing data sequence #s from source port 80
00377                        In this state, need to identify the end of server data
00378                        (response) and, possibly the beginning of a new request.
00379       Whenever the state changes, the prior state is also noted.
00380 
00381       Fundamental to all of this is the notion that a server cannot  
00382       possibly be sending data in response to a request unless that
00383       data is accompanied or preceeded by an advance in the ACK sequence
00384       indicating receipt of the request data.  Similarly, we assume that
00385       any new data sent by a server that follows or is accompanied by an
00386       advance in the ACK sequence number is a response to the request 
00387       that caused the ACK sequence to advance.  Put another way, response
00388       data (sequence # advance) marks the end of a request and ACK 
00389       advance marks the end of a response.  Of course other events such
00390       as FIN or Reset can mark ends also.  The use of ACK advance to mark
00391       the end of a response assumes that HTTP/1.1 browsers don't overlap 
00392       requests on a single TCP connection even if they may "batch" requests, 
00393       i.e., a new request will not be generated until the response has 
00394       been received. If requests and responses are batched but not 
00395       overlapped this will understate the number of objects requested 
00396       and overstate request and response sizes.
00397 
00398       This use of advancing sequence numbers (ACK or data) to mark
00399       requests and responses is disturbed by segment reorderings in 
00400       the network.  In some cases, such as reordering of only data segments
00401       in a response, there is no problem since only the highest value seen
00402       is used.  Reordering of ACKs (especially with data) presents real
00403       problems since boundaries between requests and responses are missed
00404       which can result in overstating request and response sizes.  For this
00405       reason, all such cases of ACK misordering are logged and reported.
00406    */
00407 
00408    switch (connection_state)
00409 
00410      /* a <break> anywhere ends processing of the current tcpdump record 
00411         by ending the switch (which continues the main read loop) */
00412 
00413       {
00414        case PENDING:
00415      {
00416           switch (input_type)
00417              {
00418          case FIN:
00419             {
00420          /* Ignore random FIN before something useful */ 
00421 
00422                  have_pending_fins++;
00423                  break;
00424                 }
00425 
00426          case SYN:
00427             {
00428           /* normal connection start, initialize connection state */
00429 
00430                  init_connection();
00431                  break;
00432         }
00433 
00434          case RST:
00435                 {
00436           /* Ignore random Reset before something useful */
00437 
00438                  have_pending_rsts++;
00439                  break;
00440                 }
00441 
00442              /* The trace may start after a connection is established and we
00443                 do not see a SYN.  Determine if the connection is most likely
00444                 in a request or in a response and log its status.  As before,
00445                 a request is indicated if the ACK sequence (after the first
00446                 one in the trace) is advancing and a response is indicated if
00447                 the data sequence number is advancing. */
00448 
00449              case ACK_ONLY:
00450             {
00451                   /* ignore initial ACK (has absolute value, not relative) */
00452 
00453                  if (new_address == 1)
00454            {
00455                     new_address = 0;
00456                     have_pending_acks++;
00457                    }
00458                  else
00459             {
00460              /* If ACK advances, the connection is in a request */
00461 
00462                      if ((new_ack > 2) &&
00463                          (new_ack < 16384))
00464                 {
00465                          log_ACT("REQ");
00466 
00467                          last_request_end = 1;
00468                          current_request_end = new_ack;
00469 
00470                          last_response_end = 1;
00471                          current_response_end = 1;
00472 
00473                        /* record the request start time as beginning at the 
00474                           tcpdump time stamp on this record */
00475 
00476                           strcpy(start_request_time, ts);
00477                           strcpy(request_end_time, ts);
00478 
00479                           last_state = connection_state;
00480                           connection_state = IN_REQUEST;
00481  
00482                           init_active();  /* initialize connection state */
00483              }
00484                      else /* ignore ACKs that are not advancing */
00485                         have_pending_acks++;
00486             }
00487                  break;
00488                 }
00489 
00490          case DATA_ACK:          
00491             {
00492          /* If data sequence advances, connection is in response */
00493 
00494                  if ((seq_bytes > 1) &&
00495                      (seq_bytes < 65535))
00496             {
00497                      log_ACT("RSP");
00498 
00499                      /* assume tcpdump relative addressing and initialize */
00500 
00501                      last_request_end = 1;    
00502                      current_request_end = 1;
00503 
00504                      last_response_end = 0;     
00505                      current_response_end = seq_bytes;
00506 
00507                      /* record timestamp of tcpdump record as current
00508                         value of both start and end times of response
00509                         (in case no later end time is found) */
00510 
00511                      strcpy(start_response_time, ts); 
00512                      strcpy(response_end_time, ts);
00513 
00514                      last_state = connection_state;
00515                      connection_state = IN_RESPONSE;
00516  
00517                      init_active();
00518                     }
00519                  else /* ignore data lengths of 0 or 1 */
00520                     have_pending_othr++;
00521                  break;
00522                 }
00523          default:
00524                 break;
00525              }  /* end switch on input_type */
00526           break;
00527      } /* end case PENDING */
00528 
00529        case SYN_SENT:
00530          {
00531 
00532          /* Treat this case as the establishment of a connection.  Usually
00533             the first activity on the connection will be request data from
00534             the client, but some servers appear to "pre-send" data (maybe
00535             the headers) speculatively before ACKing any client data. */
00536 
00537           switch (input_type)
00538              {
00539          case FIN:
00540 
00541               /* A FIN marks the end of either or both request and response */
00542 
00543             {
00544                  if ((has_ack == 1) &&
00545                      (new_ack > (current_request_end + 1)))
00546                                /* ignore possible ACK of FIN */
00547 
00548                { /* this record had advanced the ACK sequence so
00549                         save current request data and log it (since
00550                         the FIN ends the connection it also ends the 
00551                         request). */
00552                
00553                      begin_REQ();
00554                      log_REQ();
00555                     }
00556 
00557                  /* If the data sequence number advances, then there was
00558                     response data.  Save the current response info. and
00559                     log it (since the FIN ends the connection, it also
00560                     ends the response). */
00561 
00562                  if ((has_seq == 1) &&
00563                      (end_seq > current_response_end))  
00564             {
00565                      begin_RSP();
00566                      log_RSP();
00567                     }
00568                  last_state = SYN_SENT;
00569                  connection_state = FIN_SENT;
00570 
00571                  /* record timestamp of first FIN seen on connection */
00572                  if (strcmp(FIN_sent_time, "") == 0)
00573                     strcpy(FIN_sent_time, ts);
00574                  break;
00575                 }
00576 
00577          case SYN:
00578             {
00579           /* In some cases the same host/port pairs are reused in a
00580                      single trace; check for plausible reuse */
00581 
00582                  check_tuple_reuse();
00583                  break;
00584         }
00585 
00586          case RST:
00587                 {
00588                  connection_state = RESET;
00589                  last_state = SYN_SENT;
00590 
00591                  /* record timestamp of first Reset on the connection */
00592 
00593                  if (strcmp(RST_sent_time, "") == 0)
00594                     strcpy(RST_sent_time, ts);
00595                  break;
00596                 }
00597 
00598              case ACK_ONLY:
00599             {
00600                  /* since there is no data sequence # present in the record
00601                     and, therefore, the server is not sending data so
00602                     all the client's data may not have arrived.  Note the
00603                     current extent of ACKed client data and change state */
00604 
00605                  if (new_ack > (current_request_end + 1))
00606                     begin_REQ();
00607                  break;
00608                 }
00609 
00610          case DATA_ACK:          
00611             {
00612                 /* check for presence of data sequence # in the common cases
00613                    in SYN_SENT state, data presence indicates that 
00614                    request data has been received and server is sending data
00615                    that should be a response */
00616 
00617                  if (new_ack > (current_request_end + 1)) 
00618                 {/* this record had advanced the ACK sequence so
00619                      save current request info. and change state */
00620 
00621                      begin_REQ();
00622 
00623                      /* the server's data sequence may not have advanced;
00624                         but if it has, treat it as the start of a response
00625                         and the end of the client (request) data */
00626 
00627                      if (end_seq > current_response_end)
00628                 {
00629                           /* start of response ends the request, log
00630                           it and change state to look for end of response */  
00631 
00632                          log_REQ();
00633                          begin_RSP();
00634                 }
00635             }
00636                  else 
00637            /* some servers appear to send response data immediately
00638                       on completing the TCP connection without receiving 
00639                       any request data */
00640 
00641                     if ((end_seq > last_response_end) && 
00642                         (seq_bytes > 0))
00643                        begin_RSP();
00644                 break;
00645         }
00646          default:
00647                 break;
00648          } /* end switch on input type */
00649           break;
00650      } /* end case SYN_SENT */
00651 
00652        case FIN_SENT:
00653       {
00654            switch (input_type)
00655               {
00656            case SYN:
00657               {
00658           /* In some cases the same host/port pairs are reused in a
00659                      single trace; check for plausible reuse */
00660 
00661                    check_tuple_reuse();
00662                    break;
00663           }
00664 
00665             case FIN:  /* ignore multiple FINs */
00666                    break;
00667 
00668                 case RST:  /* ignore reset after FIN */
00669                    break;
00670 
00671             case ACK_ONLY:
00672                     /* If there is an ACK (only) coming after a FIN is sent, it
00673                       is ether the normal one in the 4-way termination or it
00674                       is to ACK more request data which will be ignored (there
00675                       can be no response).  
00676                       In either case, it is ignored here. */
00677 
00678                    break;
00679 
00680             case DATA_ACK:
00681           {
00682 
00683               /* All others may have sequence number fields.  Treat them as
00684                  errors if they advance the sequence number after a FIN.  
00685                  The usual case here is just retransmissions (including 
00686                  retransmission of the FIN) which should not advance it
00687                  because the highest sequence number possible was on the FIN */
00688 
00689                    if ((end_seq > (current_response_end + 2)) &&
00690                        (have_FINdata_error == 0))
00691               {
00692                        error_state("new data in FIN_SENT state");
00693                        have_FINdata_error = 1;
00694                       }
00695                    break;
00696           }
00697             default:
00698                    break;
00699           } /* end switch on input type */
00700             break; 
00701       } /* end case FIN_SENT */
00702 
00703        case RESET:
00704        {
00705         /* A Reset nominally means an abnormal close of the connection 
00706                with any queued data discarded before transmitting it.  We
00707                observe, however, that under some (unknown) circumstances
00708                segments that advance the data sequence number continue in 
00709                the trace after the Reset.  If these are part of a response, 
00710                it makes sense to count them in the response size since 
00711                the server obviously sent them and the network has to handle 
00712                them.  This also means that a FIN following the Reset may be
00713                a valid indication of the end of a response. Just ignore 
00714                anything else unexpected (e.g., Reset) */
00715 
00716             switch (input_type)
00717                {
00718             case RST:
00719                    break;
00720 
00721             case SYN:
00722               {
00723           /* In some cases the same host/port pairs are reused in a
00724                      single trace; check for plausible reuse */
00725 
00726                    check_tuple_reuse();
00727                    break;
00728           }
00729 
00730             case ACK_ONLY:
00731 
00732                /* an ACK might advance possibly indicating a request.  However,
00733                   since the connection is Reset and there should be no new
00734                   response, just ignore it */
00735 
00736                     break;
00737 
00738             case FIN:
00739           {
00740 
00741             /* Only if the state is IN_RESPONSE do we try to continue looking
00742                for the end of the response.  FIN is treated as the true end
00743                of a response. */
00744 
00745                    if (last_state == IN_RESPONSE)
00746                   {/* ends the current response, ignore any ACK */
00747                        if ((has_seq == 1) &&
00748                            (end_seq > current_response_end)) 
00749                           more_RSP();
00750                        log_RSP();
00751           
00752                        last_state = RESET;
00753                        connection_state = FIN_SENT;
00754 
00755                        /* record timestamp of first FIN on connection */
00756 
00757                        if (strcmp(FIN_sent_time, "") == 0)
00758                           strcpy(FIN_sent_time, ts);
00759               }
00760                     break;
00761            }
00762 
00763            case DATA_ACK:
00764                  {
00765                 /* segments with data may advance the data sequence number as 
00766                    more parts of the response.  They may also advance the ACK
00767                    which normally would indicate a new request.  However,
00768                    after a Reset, it probably will be ignored by the 
00769                    server so it is also ignored here except to mark the end
00770                    of the response. */
00771 
00772                    if (last_state == IN_RESPONSE)
00773               {
00774                        if (new_ack > (last_request_end + 1)) /* new request */
00775                           { /* implies end of current response */
00776                            log_RSP(); 
00777                            last_state = RESET; /* will ignore all else */
00778                            break; 
00779                           }
00780  
00781                      /* As long as the data sequence # advances, continue to
00782                         save info about the current response. */
00783 
00784                        if ((end_seq > current_response_end) &&
00785                            (seq_bytes > 0))
00786                           more_RSP();
00787                       }
00788                   break;
00789          }
00790                default:
00791                   break;
00792            } /* end switch on input type */
00793             break;
00794            } /* end case RESET */
00795 
00796        case IN_RESPONSE:
00797        {
00798             /* In this state, look for events that will indicate the end of
00799                the response data (ACK advances for request, FIN, Reset).  If
00800                the event is ACK advance, this also initiates the start of a
00801                new request.  */
00802 
00803             switch (input_type)
00804                {
00805             case FIN:
00806           {         
00807                    /* FIN is complicated since the segment that carries it may
00808                     also advance the ACK (new request), advance the data
00809                     sequence # and end the connection.  If the ACK advances,
00810                     any sequence # advance is for the new request; otherwise
00811                     a sequence # advance extends and completes the current 
00812                     response. */
00813 
00814                    if (has_ack == 1) 
00815               {
00816                if ((rc = check_ACK_advance(current_request_end)) < 0)
00817                           break;
00818                       }
00819 
00820            /* The ACK advance amount has to be greater than 1 to be
00821                      considered a real new request.  This is primarily to
00822                     filter out just an ACK for a FIN from the client.  Note
00823                     the ACK must advance so duplicate ACKs are ignored. */
00824 
00825                    if ((has_ack == 1) &&
00826                        (new_ack > (current_request_end + 1)))
00827                       { /* implies end of current response, begin request 
00828                            Log the info. for the current response. */
00829 
00830                        log_RSP();  /*end of previous response */
00831 
00832                        /* on FIN, assume end of request also since server is
00833                           closing the connection. */
00834 
00835                        begin_REQ();
00836                        log_REQ(); 
00837 
00838                        /* if the data sequence # also advances, this segment
00839                           (only -- because of the FIN) carries the last part
00840                           of the response */
00841  
00842                        if ((has_seq == 1) &&
00843                            (end_seq > last_response_end)) 
00844                           {/* response begins and ends in the FIN segment */
00845                            begin_RSP();
00846                            log_RSP();  /* also end of any response */
00847                           }
00848 
00849                        /* Note that the null "else" here is the case that the
00850                           data sequence # does not advance (there is no 
00851                           response to the request) -- probably an HTTP/1.1
00852                           broswer attempting multiple requests on a TCP 
00853                           connection where the server doesn't play nice. */
00854 
00855                       }
00856                    else 
00857                       /* the ACK did not advance so no request; if the data
00858                        sequence # advances, it extends the current response */
00859 
00860               {
00861                        if ((has_seq == 1) &&
00862                            (end_seq > current_response_end)) 
00863                           more_RSP();
00864                        log_RSP();    /* FIN always implies end of response */
00865                       }
00866 
00867                    last_state = IN_RESPONSE;
00868                    connection_state = FIN_SENT;
00869 
00870                    if (strcmp(FIN_sent_time, "") == 0)
00871                       strcpy(FIN_sent_time, ts);
00872                    break;
00873           }
00874 
00875                case RST:
00876 
00877                  /* Look for a Reset and only change state to RESET to
00878                    continue processing.  This is explained in comments in 
00879                    processing for the RESET state when the last_state is
00880                    IN_RESPONSE. */
00881 
00882              {/* ignore any advance in seq# on Reset */
00883                   /* it may not be wise to trust ACK on RST */
00884 
00885                   last_state = IN_RESPONSE;
00886                   connection_state = RESET;   
00887 
00888                   if (strcmp(RST_sent_time, "") == 0)
00889                      strcpy(RST_sent_time, ts);
00890                   break;
00891                  }
00892 
00893                case SYN:
00894              {
00895           /* In some cases the same host/port pairs are reused in a
00896                      single trace; check for plausible reuse */
00897 
00898                   check_tuple_reuse();
00899                   break;
00900          }
00901 
00902                case ACK_ONLY:
00903 
00904              /* Begin checking for cases where the ACK may advance
00905                     and really indicate that a new request starts. */
00906 
00907                  {
00908           /* The ACK advance amount has to be greater than 1 to be
00909                      considered a real new request.  This is primarily to
00910                      filter out just an ACK for a FIN from the client.  Note
00911                      the ACK must advance so duplicate or OOO
00912                      ACKs are ignored. */
00913 
00914                   if (new_ack > (last_request_end + 1)) 
00915              { /* implies end of current response, begin request.
00916                          Log the info. for the current response and change
00917                          states. */
00918                       log_RSP();
00919 
00920                     /* since there is no data sequence # present in the record
00921                     and, therefore, the server is not sending data so
00922                     all the client's data may not have arrived.  Note the
00923                     current extent of ACKed client data and start time */
00924 
00925                       begin_REQ();
00926                      }
00927                   break;
00928                  }
00929 
00930                case DATA_ACK:
00931 
00932             /* cases with data and ACKs have two important sub-cases: (a) 
00933                ACK sequence number advances -- implies the start of a
00934                new request and the end of the current response; if the
00935                data sequence number also advances, it is assumed that
00936                all the request data has been received and the first of
00937                the response data is contained in the segment. (b) the
00938                ACK sequence number does not advance so any advance in
00939                the data sequence number is more of the current response. */
00940 
00941            {
00942         if ((rc = check_ACK_advance(last_request_end)) < 0)
00943                    break;
00944 
00945          /* The ACK advance amount has to be greater than 1 to be
00946                     considered a real new request.  This is primarily to
00947                     filter out just an ACK for a FIN from the client.  Note
00948                     the ACK must advance so duplicate ACKs are ignored. */
00949 
00950                 if (new_ack > (last_request_end + 1)) 
00951                { /* implies end of current response, begin request 
00952                          Log the info. for the current response. */
00953 
00954                     log_RSP();  
00955                     begin_REQ();
00956 
00957                     /* If there is also new data (sequence # advances), then
00958                        the request is considered completed and a new response
00959                        has started but not necessarily completed. */
00960 
00961                     if ((end_seq > current_response_end) &&
00962                         (seq_bytes > 0))
00963                {/* Log info. about this request */
00964                         log_REQ();
00965                         begin_RSP();
00966                }
00967            }
00968                 else
00969                     /* the ACK did not advance so no request; if the data
00970                        sequence # advances, it extends the current response */
00971 
00972                     if (end_seq > current_response_end) 
00973                        more_RSP();
00974                 break;
00975                }
00976          default:
00977                 break;
00978         } /* end switch on input type */
00979            break;
00980       }  /* end case IN_RESPONSE */
00981 
00982        case IN_REQUEST:
00983        {
00984         /* In this state check for events that indicate the end of the
00985                request data (advance of data sequence #, FIN, Reset).  An
00986                advance in the data sequence # also marks the beginning of
00987                the response to the request */
00988 
00989             switch (input_type)
00990                {
00991             case FIN:
00992 
00993             /* FIN is somewhat less complicated here since the segment that
00994                implies that the server will send no more data.  Treat this
00995                as effectively ending the request.  If the ACK advances, it
00996                is considered as part of the request and if the data 
00997                sequence # advances, it is the response data (all in this
00998                segment). */
00999 
01000             { /* FIN can ACK more of current request */
01001 
01002           /* The ACK advance amount has to be greater than 1 to be
01003                     considered as extending the request.  This is primarily 
01004                     to filter out just an ACK for a FIN from the client.  Note
01005                     the ACK must advance so duplicate ACKs are ignored. */
01006 
01007                  if ((has_ack == 1) &&
01008                      (new_ack > (current_request_end + 1)))
01009                     more_REQ();
01010                  else
01011                     if (has_ack == 1)
01012                {
01013                 if ((rc = check_ACK_advance(current_request_end)) < 0)
01014                            break;
01015                        }
01016 
01017                  log_REQ();  /* on FIN, assume end of request */
01018 
01019                 /* the server's data sequence may not have advanced;
01020                    but if it has, treat it as the complete response */
01021 
01022                  if ((has_seq == 1) &&
01023                      (end_seq > last_response_end))  
01024                     {
01025                      begin_RSP();
01026                      log_RSP();  /* also end of any response */
01027                     }
01028 
01029                  last_state = IN_REQUEST;
01030                  connection_state = FIN_SENT;
01031 
01032                  if (strcmp(FIN_sent_time, "") == 0)
01033                     strcpy(FIN_sent_time, ts);
01034                  break;
01035         }
01036 
01037          case RST:
01038                {
01039                 /* Treat a Reset as ending the request with no response */
01040             /* ignore any advance in ack on Reset */
01041         /* treat as ending any request */
01042 
01043                 log_REQ();                
01044                 last_state = IN_REQUEST;
01045                 connection_state = RESET;
01046 
01047                 if (strcmp(RST_sent_time, "") == 0)
01048                    strcpy(RST_sent_time, ts);
01049                 break;
01050            }
01051 
01052          case SYN:
01053             {
01054          /* In some cases the same host/port pairs are reused in a
01055                     single trace; check for plausible reuse */
01056 
01057                  check_tuple_reuse();
01058                  break;
01059         }
01060 
01061          case ACK_ONLY:
01062             {
01063          /* The ACK advance amount has to be greater than 1 to be
01064                     considered as extending the requesst.  This is primarily 
01065                     to filter out just an ACK for a FIN from the client.  Note
01066                     the ACK must advance so duplicate or OOO 
01067                     ACKs are ignored. */
01068 
01069                  if (new_ack > (current_request_end + 1))
01070                     more_REQ();
01071                  break;
01072                 }
01073 
01074          case DATA_ACK:
01075                {
01076                 /* cases with data and ACKs have two important sub-cases: 
01077                   (a) ACK sequence number advances which extends the amount
01078                   of request data, and (b) the data sequence # advances
01079                   which ends the request and is the beginning of the 
01080                   response to that request. */
01081         /* The ACK advance amount has to be greater than 1 to be
01082                    considered as extending the request.  This is primarily 
01083                    to filter out just an ACK for a FIN from the client.  Note
01084                    the ACK must advance so duplicate ACKs are ignored. */
01085 
01086                 if (new_ack > (current_request_end + 1))
01087                    more_REQ();
01088                 else
01089             if ((rc = check_ACK_advance(current_request_end)) < 0)
01090                        break;
01091 
01092                 /* the server's data sequence may not have advanced;
01093                    but if it has, treat it as the start of a response
01094                    and the end of the request data */
01095 
01096                 if ((end_seq > last_response_end) && 
01097                     (seq_bytes > 0))
01098                    {
01099             /* record info. for current request and change state 
01100                        to look for the end of the response*/
01101 
01102                     log_REQ();
01103                     begin_RSP();
01104                    }
01105                 break;
01106                }
01107          default:
01108                 break;
01109         } /* end switch on input type */ 
01110           break;
01111      }  /* end case IN_REQUEST */
01112 
01113        default:
01114        break;
01115       } /* end switch on connection state */
01116 
01117    /* save the last known time for the connection from current record */
01118    strcpy(last_connection_time, ts); 
01119 
01120  }  /* end main loop */  
01121   log_log();
01122   close (dumpFP);
01123   close (outFP);
01124   close (logFP);
01125 } /* end main() */
01126 
01127 
01128 /* Initialize connection state when a new connection is recognized by
01129    the presence of a SYN. */ 
01130 
01131 void init_connection(void)
01132 {
01133  log_SYN();                /* log start of connection */
01134  connection_state = SYN_SENT;  /* new connection state */
01135 
01136  /* save SYN sequence number for duplicate detection */
01137  if (has_seq == 1)
01138      current_synseq = begin_seq;
01139  else
01140      error_line ("SYN without valid sequence #");
01141 
01142  /* assume tcpdump relative addressing and initialize */
01143  /* note that initializing to 1 instead of 0 adjusts for ACK
01144     being the next expected sequence number */
01145 
01146  last_request_end = 1;       /* need "last" and "current" */
01147  last_response_end = 1;      /* values since there may be */
01148  current_response_end = 1;   /* > 1 request per connection */
01149  current_request_end = 1;
01150 
01151  strcpy(FIN_sent_time, "");
01152  strcpy(RST_sent_time, "");
01153  strcpy(last_connection_time, "");
01154 
01155  have_ACK_error = 0;
01156  have_value_error = 0;
01157  have_FINdata_error = 0;
01158 }
01159 
01160 /* Initialize connection state when a new connection is recognized by a
01161    change in the host/port 4-tuple, there is no SYN but request or 
01162    response activity can be determined.  Request or response state is
01163    initialized in the main program when the type of activity is determined */ 
01164 
01165 void init_active(void)
01166 {
01167  strcpy(FIN_sent_time, "");
01168  strcpy(RST_sent_time, "");
01169  strcpy(last_connection_time, "");
01170 
01171  have_ACK_error = 0;
01172  have_value_error = 0;
01173  have_FINdata_error = 0;
01174 
01175 }
01176 
01177 /* Check for some indications of possible errors 
01178    such as the ACK appearing to move back -- may indicate
01179    out-of-order or something worse. */
01180 
01181 int check_ACK_advance(unsigned long old_ack)
01182 {
01183  
01184  if ((new_ack < old_ack) &&
01185       report_ACK_err)
01186     {
01187      if (have_ACK_error == 0)
01188         {
01189          error_state("ACK error -- backward");
01190          have_ACK_error = 1;
01191         }
01192      return(-1);
01193     }
01194  else
01195     return (0);
01196 }
01197 
01198 /* Check to see if reuse of a connection in a trace is reasonable */
01199 
01200 void check_tuple_reuse(void)
01201 {
01202  /* ignore duplicate SYNs (have same SYN sequence number) */
01203 
01204  if ((has_seq == 1) &&
01205      (current_synseq != begin_seq))
01206     {
01207       /* if a non-duplicate SYN comes before any other activity,
01208          treat it as terminating the incomplete connection and starting
01209          another */
01210      
01211      if ((connection_state == SYN_SENT) ||
01212          ((connection_state == RESET) && (last_state == SYN_SENT)))
01213         {
01214          log_END("TRM");
01215          init_connection();
01216          return;
01217         }
01218 
01219 
01220     /* It is quite possible to have a valid new
01221     connection that reuses the same source and destination
01222     IP.port address 4-tuple.  Treat this as valid if 
01223     there has been a FIN from the server or at least
01224     2*MSL has passed since seeing any previous packet
01225     from this connection (in case we missed the FIN).  If there
01226     was a FIN from the server, allow for the case it was an
01227     active close by the server (normal for HTTP 1.0), the 
01228     stack is a BSD derivative, and SO_REUSEADDR is is use
01229     (see Stevens vol. 1, pp 245) */ 
01230 
01231      if (connection_state == FIN_SENT)
01232         elapsed = 60001;  /* force beyond 2MSL test */
01233      else        
01234         elapsed = elapsed_ms(ts, last_connection_time);
01235 
01236      if (elapsed < 60000)  /* MSL of 30 seconds, minimum */
01237         error_state("Non-duplicate SYN in connection");
01238      else
01239     {
01240      /* perform all actions associated with the end of
01241             one connection and the beginning of the next 
01242             (see state PENDING) */
01243          switch (connection_state)
01244         {
01245         case FIN_SENT:
01246                  log_END("FIN");
01247                  break;
01248             case RESET:
01249                  if (last_state == IN_RESPONSE)
01250                     log_RSP();
01251 
01252                  log_END("RST");
01253                  break;
01254             case IN_RESPONSE:
01255                  log_RSP();
01256                  log_END("TRM");
01257                  break;
01258             case IN_REQUEST:
01259                  log_REQ();
01260                  log_END("TRM");
01261                  break;
01262             default:
01263                  break;
01264         }
01265          init_connection();
01266         }
01267     }
01268 }
01269 
01270 /* Record information about the beginning of a new request */
01271 
01272 void begin_REQ(void)
01273 {
01274  current_request_end = new_ack;
01275 
01276  /* record the request start time as beginning at the 
01277     tcpdump time stamp on this record */
01278 
01279  strcpy(start_request_time, ts);
01280  strcpy(request_end_time, ts);
01281 
01282  last_state = connection_state;
01283  connection_state = IN_REQUEST;
01284 }
01285 
01286 /* Record information about an in-progress request */
01287 
01288 void more_REQ(void)
01289 {
01290  current_request_end = new_ack;
01291  strcpy(request_end_time, ts);
01292 }
01293 
01294 /* Record information about the beginning of a new response */
01295 
01296 void begin_RSP(void)
01297 {
01298  current_response_end = end_seq;
01299 
01300  /* record timestamp of tcpdump record as current
01301  value of both start and end times of response
01302  (in case no later end time is found) */
01303 
01304  strcpy(start_response_time, ts); 
01305  strcpy(response_end_time, ts);
01306 
01307  last_state = connection_state;
01308  connection_state = IN_RESPONSE;
01309 }
01310 
01311 /* Record information about an in-progress response */
01312 
01313 void more_RSP(void)
01314 {
01315  current_response_end = end_seq;
01316  /* save the new (potential) response end time */
01317  strcpy(response_end_time, ts);
01318 }
01319 
01320 
01321 /* Breaks dump record into essential data fields; initializes the
01322    following variables: begin_seq, end_seq, seq_bytes, new_ack,
01323                         has_ack, has_seq, input_type.
01324    Also checks for suspect sequence and ACK values.
01325 */
01326 
01327 int parse_dump_record()
01328 {
01329     begin_seq = end_seq = seq_bytes = new_ack = 0;
01330     has_ack = has_seq = 0;
01331 
01332    /* The following flag combinations are flagged because they (a) make no
01333       real sense for SYNs and (b) should be very rare. */
01334 
01335     if ((strcmp(fl, "SFRP") == 0) ||
01336     (strcmp(fl, "SFR") == 0) ||
01337     (strcmp(fl, "SFP") == 0) ||
01338     (strcmp(fl, "SF") == 0) ||
01339     (strcmp(fl, "SRP") == 0) ||
01340     (strcmp(fl, "SR") == 0))
01341        {
01342      /* If out of PENDING state (initial SYN recognized), just note
01343             the error, ignore it and continue.  */
01344 
01345         if (connection_state != PENDING)
01346             error_line ("SYN in combination with F or R");
01347         return(-1);
01348        }
01349 
01350     /* In tcpdump format, the fields coming after flags are, in order,
01351        data-seqno (format "bbb:eee(ccc)") and ack (format "ack xxx").
01352        Both the data-seqno and ack fields may not be present if they
01353        do not have valid information.  If the data-seqno field is not
01354        present, the first field after the flag is the string "ack";
01355        if both are not present the field after the flag is the 
01356        string "win".  Also check to see if tcpdump used absolute instead
01357        of relative values */
01358 
01359     if (strcmp(p1, "ack") == 0)  /* ack and no data sequence no. */
01360        {
01361         has_seq = 0;
01362         has_ack = 1; 
01363         new_ack = strtoul(p2, (char **)NULL, 10);
01364        }
01365     else
01366        {
01367         if (strcmp(p1, "win") == 0) /* no ack, no data */
01368        {
01369             has_ack = 0;
01370             has_seq = 0;
01371            }
01372         else 
01373        {
01374             /* assume it is a valid sequence number field */
01375         /* parse sequence field in header */
01376         /* sequence field format is
01377                <begin_seq #>:<end_seq #>(<seq_bytes count>) */
01378 
01379             if ((rc = get_sequence(p1, &begin_seq, &end_seq, &seq_bytes)) < 0)
01380            {
01381                 error_line ("invalid sequence # field");
01382                 return (-1);
01383                }
01384             has_seq = 1;
01385 
01386             /* check the field following the data sequence # for an ACK */
01387 
01388             if (strcmp(p2, "ack") == 0)  
01389                {
01390                 has_ack = 1; 
01391                 new_ack = strtoul(p3, (char **)NULL, 10);
01392                }
01393             else
01394                has_ack = 0;
01395            }
01396        }
01397 
01398    /* classify flag combinations into equivalence classes for later steps */
01399 
01400       if ((strcmp(fl, "F") == 0) || 
01401           (strcmp(fl, "FP") == 0) ||
01402           (strcmp(fl, "FR") == 0) ||
01403           (strcmp(fl, "FRP") == 0))
01404          input_type = FIN;
01405       else
01406      {
01407           if ((strcmp(fl, "R") == 0) ||
01408               (strcmp(fl, "RP") == 0))
01409              input_type = RST;
01410           else
01411          {
01412               if ((strcmp(fl, "S") == 0) || 
01413                   (strcmp(fl, "SP") == 0))
01414                  input_type = SYN;
01415               else
01416                 {
01417                  if ((has_ack == 1) &&
01418                      (has_seq == 0))
01419                     input_type = ACK_ONLY;
01420                  else
01421                     if ((has_seq == 1) &&
01422                         (has_ack == 1))
01423                        input_type = DATA_ACK;
01424                     else
01425                {   
01426                         error_line("Unexpected Data/ACK combination");
01427                         return (-1);
01428                        }
01429         }
01430          }
01431      }
01432 
01433 
01434       if ((connection_state == IN_RESPONSE) ||
01435           (connection_state == IN_REQUEST) ||
01436           (connection_state == SYN_SENT) ||
01437           ((connection_state == RESET) && (last_state == IN_RESPONSE)))
01438      {   
01439        /* allow for gaps of up to one normal TCP window */
01440 
01441           if (((input_type == FIN) ||
01442                (input_type == DATA_ACK)) &&
01443                (end_seq > (current_response_end + 65535)))
01444          {
01445               if (have_value_error == 0)
01446          {
01447                   error_line ("suspect sequence # value");
01448                   have_value_error = 1;
01449                  }
01450               return (-1);
01451              }
01452 
01453           /* ACK is required for each 2 segments with space for gaps */
01454 
01455           if (((input_type == FIN) ||
01456                (input_type == ACK_ONLY) ||
01457                (input_type == DATA_ACK)) &&
01458                (new_ack > (current_request_end + 16384)))
01459          {
01460               if (have_value_error == 0)
01461          {
01462                   error_line ("suspect ACK value");
01463                   have_value_error = 1;
01464                  }
01465               return (-1);
01466              }
01467          }
01468  return(0);
01469 }
01470 
01471 /* Output data associated with ending a connection */
01472 
01473 void log_connection(void)
01474 {
01475 /* if no more tcpdump records found while processing an http
01476    request, log (perhaps incomplete) client request */  
01477 
01478    if (connection_state == IN_REQUEST)
01479       log_REQ();
01480    else
01481       {
01482        /* if no more records found while processing an http 
01483           response, log (perhaps incomplete) response information */
01484 
01485        if ((connection_state == IN_RESPONSE) ||
01486            ((connection_state == RESET) && last_state == IN_RESPONSE))
01487       { /* don't log if just ACKed 1 (assume  FIN) */
01488            if (current_response_end > (last_response_end + 1))
01489                log_RSP();
01490           }
01491       }
01492 
01493    /* make log entry indicating type of connection termination;
01494       entry for connection is made only if a valid start (SYN) was
01495       previously recognized */ 
01496 
01497    if (connection_state != PENDING)  /* saw SYN */
01498       {
01499        if (connection_state == FIN_SENT) 
01500            log_END("FIN");
01501        else
01502       {
01503            if (connection_state == RESET) 
01504                log_END("RST");
01505            else
01506                log_END("TRM");
01507       }
01508       }
01509    else
01510       {
01511        if (((have_pending_fins > 0) +
01512             (have_pending_rsts > 0) +
01513             (have_pending_othr > 0) +
01514             (have_pending_acks > 0)) > 1)
01515           pending_cmb_count++;
01516        else
01517       {
01518            pending_fin_count += (have_pending_fins > 0);
01519            pending_rst_count += (have_pending_rsts > 0);
01520            pending_ack_count += (have_pending_acks > 0);
01521            pending_oth_count += (have_pending_othr > 0);
01522           }
01523       }
01524 }
01525 
01526 
01527 void log_log(void)
01528 {
01529  fprintf(logFP, "Input tcpdump file: %s \n", input_name);
01530  fprintf(logFP, "Output connection file: %s \n", output_name);
01531  fprintf(logFP, "   SYNs     %8d \n", syn_count);
01532  fprintf(logFP, "   REQs     %8d \n", req_count);
01533  fprintf(logFP, "   ACT-REQs %8d \n", act_req_count);
01534  fprintf(logFP, "   RSPs     %8d \n", rsp_count);
01535  fprintf(logFP, "   ACT-RSPs %8d \n", act_rsp_count);
01536  fprintf(logFP, "   FINs     %8d \n", fin_count);
01537  fprintf(logFP, "   RSTs     %8d \n", rst_count);
01538  fprintf(logFP, "   TRMs     %8d \n", trm_count);
01539  fprintf(logFP, "   ERRs     %8d \n", err_count);
01540  fprintf(logFP, "Partial Connections:\n");
01541  fprintf(logFP, " FIN only   %8d \n", pending_fin_count);
01542  fprintf(logFP, " RST only   %8d \n", pending_rst_count);
01543  fprintf(logFP, " ACK only   %8d \n", pending_ack_count);
01544  fprintf(logFP, " Combos     %8d \n", pending_cmb_count);
01545  fprintf(logFP, " Other      %8d \n", pending_oth_count);
01546 }
01547 
01548 /* A set of event-specific data logging functions.  A critical part of
01549    the logging functions for Requests and Responses is to save the 
01550    "current" value of the sequence number (ACK or data) that marks the
01551    end of it as the "last" value.  This is done to tell when the 
01552    sequence number advances again for multiple request/response pairs
01553    in a connection and to allow computing its size as (current - last). */
01554 
01555 void log_REQ(void)
01556 {
01557 /* parse sourse host/port */
01558   get_host_port(current_src, src_host, src_port);
01559 
01560 /* parse destination host/port */
01561   get_host_port(current_dst, dst_host, dst_port);
01562 
01563   /* for requests we log the request start time  -- the tcpdump 
01564      timestamp on the first record associated with a request -- 
01565      along with the TCP connection information and the size of the 
01566      request data */
01567 
01568   fprintf(outFP, "%s %-15s %5s > %-15s %4s: REQ %12d  %s\n", 
01569                                     start_request_time,  
01570                                     dst_host, dst_port, src_host, src_port,  
01571                                     current_request_end - last_request_end,
01572                                     request_end_time);
01573   /* IMPORTANT */
01574   last_request_end = current_request_end;
01575   req_count++;
01576 }
01577 
01578 void log_RSP(void)
01579 {
01580 /* parse sourse host/port */
01581   get_host_port(current_src, src_host, src_port);
01582 
01583 /* parse destination host/port */
01584   get_host_port(current_dst, dst_host, dst_port);
01585 
01586   /* for responses we log the response end time  -- the tcpdump 
01587      timestamp on the last record associated with a response -- 
01588      along with the TCP connection information, the size of the 
01589      response data, and the response start time -- the tcpdump
01590      timestamp on the first record associated with the response. */
01591 
01592   fprintf(outFP, "%s %-15s %5s > %-15s %4s: RSP %12d  %s\n", 
01593                                    response_end_time, 
01594                                    dst_host, dst_port, src_host, src_port,  
01595                                    current_response_end - last_response_end,
01596                                    start_response_time);
01597 #ifdef FOO
01598   fprintf(outFP, "%s %-15s %5s > %-15s %4s RSP %d %s\n", start_response_time, 
01599                                    src_host, src_port, dst_host, dst_port, 
01600                                    current_response_end - last_response_end,
01601                                    response_end_time);
01602   fprintf(outFP, "%s %s > %s RSP %d\n", start_response_time, current_src, 
01603                                    current_dst, 
01604                                    current_response_end - last_response_end);
01605 #endif
01606   /* IMPORTANT */
01607   last_response_end = current_response_end;
01608   rsp_count++;
01609 }
01610 
01611 void log_SYN(void)
01612 {
01613 /* parse sourse host/port */
01614   get_host_port(current_src, src_host, src_port);
01615 
01616 /* parse destination host/port */
01617   get_host_port(current_dst, dst_host, dst_port);
01618 
01619   fprintf(outFP, "%s %-15s %5s > %-15s %4s: SYN\n", ts, 
01620                                      dst_host, dst_port, src_host, src_port);  
01621   syn_count++;
01622 }
01623 
01624 void log_END(char *how)
01625 {
01626   char logical_end_time[20];
01627   
01628 /* parse sourse host/port */
01629   get_host_port(current_src, src_host, src_port);
01630 
01631 /* parse destination host/port */
01632   get_host_port(current_dst, dst_host, dst_port);
01633 
01634   if (strcmp(how, "FIN") == 0)
01635      {
01636       fin_count++;
01637       strcpy(logical_end_time, FIN_sent_time);
01638      }
01639   else
01640     {
01641      if (strcmp(how, "RST") == 0)
01642         {
01643          rst_count++;
01644          strcpy(logical_end_time, RST_sent_time);
01645         }
01646      else
01647         if (strcmp(how, "TRM") == 0)
01648        {
01649             trm_count++;
01650             strcpy(logical_end_time, last_connection_time);
01651            }
01652     }
01653 
01654   /* for termination of a connection we record the tcpdump timestamp of
01655      the last record of any kind associated with that conneciton along
01656      with the TCP connection 4-tuple and the way the connection ended
01657      (FIN, Reset, or just no more records in the trace). */
01658 
01659   fprintf(outFP, "%s %-15s %5s > %-15s %4s: %s               %s\n", 
01660                                     last_connection_time, 
01661                                     dst_host, dst_port, src_host, src_port,  
01662                                     how, logical_end_time);
01663 }
01664 
01665 void log_ACT(char *how)
01666 {
01667 /* parse sourse host/port */
01668   get_host_port(current_src, src_host, src_port);
01669 
01670 /* parse destination host/port */
01671   get_host_port(current_dst, dst_host, dst_port);
01672 
01673   /* for activity on a SYN-less connection we record the tcpdump timestamp
01674      of the first record of activiy associated with that conneciton along
01675      with the TCP connection 4-tuple and the way the connection started
01676      (Request or Response). */
01677 
01678   fprintf(outFP, "%s %-15s %5s > %-15s %4s: ACT-%s\n", ts, 
01679                                     dst_host, dst_port, src_host, src_port,  
01680                                     how);
01681   if (strcmp(how, "REQ") == 0)
01682      act_req_count++;
01683   else
01684      if (strcmp(how, "RSP") == 0)
01685         act_rsp_count++;
01686 }
01687 
01688 void error_line(char * s)
01689 {
01690 /* parse sourse host/port */
01691   get_host_port(sh, src_host, src_port);
01692 
01693 /* parse destination host/port */
01694   get_host_port(dh, dst_host, dst_port);
01695 
01696   fprintf(outFP, "%s %-15s %5s > %-15s %4s: ERR: %s\n", ts, 
01697                                    dst_host, dst_port, src_host, src_port, s);
01698   err_count++;
01699 }
01700 
01701 void error_state(char * s)
01702 {
01703 /* parse sourse host/port */
01704   get_host_port(sh, src_host, src_port);
01705 
01706 /* parse destination host/port */
01707   get_host_port(dh, dst_host, dst_port);
01708 
01709   fprintf(outFP, "%s %-15s %5s > %-15s %4s: ERR: %s\n", ts, 
01710                                    dst_host, dst_port, src_host, src_port, s);
01711   err_count++;
01712 }
01713 
01714 void get_host_port(char *adr, char *host, char *port)
01715 {
01716  char *fp;
01717  char *fpx;
01718  char adr_field[50];
01719 
01720  strcpy(adr_field, adr);
01721  /* break string at '.' separating host and port fields (last in string) */
01722  fp = (char *)rindex(adr_field, '.');
01723  *fp = '\0';   /* replace '.' with string terminator */
01724  strcpy(host, adr_field); /* copies host name up to terminator */ 
01725 
01726  fp++;  /* move pointer past terminator to 1st char in port field */
01727  fpx = (char *)index(fp, ':');   /* see if we have the ':' after a dst port */
01728  if (fpx != NULL)
01729      *fpx = '\0';  /* if so, replace with string terminator */
01730  strcpy(port, fp); 
01731 }
01732 
01733 int get_sequence(char *p, unsigned long *begin, unsigned long *end,
01734                            unsigned long *bytes)
01735 {
01736  char seq_field[50];
01737  char *cursor = seq_field;
01738  char *fp;
01739 
01740  strcpy (seq_field, p);
01741 
01742  fp = (char *)strsep(&cursor, ":" );
01743  if ((cursor == (char *)NULL) ||
01744      (fp == (char *)NULL))
01745     return (-1);
01746  else
01747     *begin = strtoul(fp, (char **)NULL, 10);
01748 
01749  fp = (char *)strsep(&cursor, "(" );
01750  if ((cursor == (char *)NULL) ||
01751      (fp == (char *)NULL))
01752     return (-1);
01753  else
01754     *end = strtoul(fp, (char **)NULL, 10);
01755 
01756  fp = (char *)strsep(&cursor, ")" );
01757  if ((cursor == (char *)NULL) ||
01758      (fp == (char *)NULL))
01759     return (-1);
01760  else
01761     *bytes = strtoul(fp, (char **)NULL, 10);
01762  return(0);
01763 }
01764 
01765 /*--------------------------------------------------------------*/ 
01766 /* subtract two timevals (t1 - t0) with result in tdiff         */
01767 /* tdiff, t1 and t0 are all pointers to struct timeval          */
01768 /*--------------------------------------------------------------*/ 
01769 static void
01770 tvsub(tdiff, t1, t0)
01771 struct timeval *tdiff, *t1, *t0;
01772 {
01773 
01774         tdiff->tv_sec = t1->tv_sec - t0->tv_sec;
01775         tdiff->tv_usec = t1->tv_usec - t0->tv_usec;
01776         if (tdiff->tv_usec < 0)
01777            {
01778             tdiff->tv_sec--;
01779             tdiff->tv_usec += 1000000;
01780            }
01781 }
01782 
01783 /*--------------------------------------------------------------*/ 
01784 /* compute the elapsed time in milliseconds to end_time         */
01785 /* from some past time given by start_time (both formatted timevals) */
01786 /*--------------------------------------------------------------*/ 
01787 long elapsed_ms(char *end, char *start)
01788 {
01789  struct timeval delta, end_time, start_time;
01790  long elapsed_time;
01791 
01792  char end_tmp[20];
01793  char start_tmp[20];
01794 
01795  char *cursor;
01796  char *cp;
01797 
01798  strcpy(end_tmp, end);
01799  cursor = end_tmp;
01800  cp = (char *)strsep(&cursor, "." );
01801  end_time.tv_sec = atoi(end_tmp);
01802  end_time.tv_usec = atoi(cursor);
01803 
01804  strcpy(start_tmp, start);
01805  cursor = start_tmp;
01806  cp = (char *)strsep(&cursor, "." );
01807  start_time.tv_sec = atoi(start_tmp);
01808  start_time.tv_usec = atoi(cursor);
01809 
01810  tvsub(&delta, &end_time, &start_time);
01811  /* express as milliseconds */
01812  elapsed_time = (delta.tv_sec * 1000) + (delta.tv_usec/1000);
01813  return (elapsed_time);
01814 }
01815 

Generated on Tue Mar 6 16:47:46 2007 for ns2 Network Simulator 2.29 by  doxygen 1.4.6