dec/tr-stat.cc

Go to the documentation of this file.
00001 // Generate statistics from UCB traces
00002 // All we need to know: 
00003 // 
00004 // (1) client request streams: 
00005 //     <time> <clientID> <serverID> <URL_ID> 
00006 // (2) server page mod stream(s):
00007 //     <serverID> <URL_ID> <PageSize>
00008 //
00009 // Part of the code comes from Steven Gribble's UCB trace parse codes
00010 // 
00011 // $Header: /nfs/jade/vint/CVSROOT/ns-2/indep-utils/webtrace-conv/dec/tr-stat.cc,v 1.3 2005/09/18 23:33:32 tomh Exp $
00012 
00013 #include <stdio.h>
00014 #include <stdlib.h>
00015 #include <string.h>
00016 #include <ctype.h>
00017 #include <time.h>
00018 #include <sys/types.h>
00019 #include <sys/socket.h>
00020 #include <netinet/in.h>
00021 #include <arpa/inet.h>
00022 #include <tcl.h>
00023 
00024 #include "proxytrace.h"
00025 
00026 FILE *cf, *sf;
00027 double initTime = -1;
00028 double duration = -1;
00029 double startTime = -1;
00030 
00031 Tcl_HashTable cidHash;  // Client id (IP, port) hash
00032 static int client = 0;  // client sequence number
00033 
00034 Tcl_HashTable sidHash;  // server id (IP, port) hash
00035 static int server = 0;  // server sequence number
00036 
00037 Tcl_HashTable urlHash;  // URL id hash
00038 static int url = 0; // URL sequence number
00039 static int* umap;   // URL mapping table, used for url sort
00040 
00041 ReqLog* rlog = NULL;
00042 unsigned int num_rlog = 0, sz_rlog = 0;
00043 
00044 static int compare(const void *a1, const void *b1)
00045 {
00046     const ReqLog *a = (const ReqLog*)a1, *b = (const ReqLog*)b1;
00047     return (a->time > b->time) ? 1 : 
00048         (a->time == b->time) ? 0 : -1;
00049 }
00050 
00051 void sort_rlog()
00052 {
00053     qsort((void *)rlog, num_rlog, sizeof(ReqLog), compare);
00054     double t = rlog[0].time;
00055     for (unsigned int i = 0; i < num_rlog; i++) {
00056         rlog[i].time -= t;
00057         fprintf(cf, "%f %d %d %d\n", rlog[i].time, 
00058             rlog[i].cid, rlog[i].sid, umap[rlog[i].url]);
00059     }
00060     // Record trace duration and # of unique urls
00061     fprintf(cf, "i %f %u\n", rlog[num_rlog-1].time, url);
00062 
00063     fprintf(stderr, 
00064         "%d unique clients, %d unique servers, %d unique urls.\n", 
00065         client, server, url);
00066 }
00067 
00068 static int compare_url(const void* a1, const void* b1)
00069 {
00070     const URL **a = (const URL**)a1, **b = (const URL**)b1;
00071     return ((*a)->access > (*b)->access) ? -1:
00072         ((*a)->access == (*b)->access) ? 0 : 1;
00073 }
00074 
00075 void sort_url()
00076 {
00077     // XXX use an interval member of Tcl_HashTable
00078     URL** tbl = new URL*[urlHash.numEntries];
00079     Tcl_HashEntry *he;
00080     Tcl_HashSearch hs;
00081     int i = 0, sz = urlHash.numEntries;
00082     for (he = Tcl_FirstHashEntry(&urlHash, &hs);
00083          he != NULL;
00084          he = Tcl_NextHashEntry(&hs))
00085         tbl[i++] = (URL*)Tcl_GetHashValue(he);
00086     Tcl_DeleteHashTable(&urlHash);
00087 
00088     // sort using access frequencies
00089     qsort((void *)tbl, sz, sizeof(URL*), compare_url);
00090     umap = new int[url];
00091     // write sorted url to page table
00092     for (i = 0; i < sz; i++) {
00093         umap[tbl[i]->id] = i;
00094         fprintf(sf, "%d %d %d %u\n", tbl[i]->sid, i,
00095             tbl[i]->size, tbl[i]->access);
00096         delete tbl[i];
00097     }
00098     delete []tbl;
00099 }
00100 
00101 const unsigned long MAX_FILESIZE = 10000000;
00102 
00103 double lf_analyze(TEntry& lfe)
00104 {
00105     double time;
00106     int ne, cid, sid, uid;
00107     Tcl_HashEntry *he;
00108 
00109     // Filter out entries with 'post', 'head' etc. only keep 'get'
00110     // Also filter out 
00111     if (lfe.tail.method != METHOD_GET)
00112         return -1;
00113     if ((lfe.tail.flags & QUERY_FOUND_FLAG) || 
00114         (lfe.tail.flags & CGI_BIN_FLAG))
00115         return -1;
00116     if ((lfe.tail.status != 200) && (lfe.tail.status != 304))
00117         return -1;
00118 
00119     // We don't consider pages with size 0
00120     if (lfe.head.size == 0)
00121         return -1;
00122     // We don't consider file size larger than 10MB
00123     if (lfe.head.size > MAX_FILESIZE)
00124         return -1;
00125 
00126     time = (double)lfe.head.time_sec + (double)lfe.head.time_usec/(double)1000000.0;
00127 
00128     if (initTime < 0) {
00129         initTime = time;
00130         time = 0;
00131     } else 
00132         time -= initTime;
00133 
00134     // If a trace start time is required, don't do anything
00135     if ((startTime > 0) && (time < startTime)) 
00136         return -1;
00137 
00138     // check client id
00139     long clientKey = lfe.head.client;
00140     if (!(he = Tcl_FindHashEntry(&cidHash, (const char *)clientKey))) {
00141         // new client, allocate a client id
00142         he = Tcl_CreateHashEntry(&cidHash, (const char *)clientKey, &ne);
00143         client++;
00144         long clientValue = client;
00145         Tcl_SetHashValue(he, clientValue);
00146         cid = client;
00147     } else {
00148         // existing entry, find its client seqno
00149         cid = (long)Tcl_GetHashValue(he);
00150     }
00151 
00152     // check server id
00153     long serverKey = lfe.head.server;
00154     if (!(he = Tcl_FindHashEntry(&sidHash, (const char *)serverKey))) {
00155         // new client, allocate a client id
00156         he = Tcl_CreateHashEntry(&sidHash, (const char *)serverKey, &ne);
00157         server++;
00158         long serverValue = server;
00159         Tcl_SetHashValue(he, serverValue);
00160         sid = server;
00161     } else {
00162         // existing entry, find its client seqno
00163         sid = (long)Tcl_GetHashValue(he);
00164     }
00165 
00166     // check url id
00167     long urlKey = lfe.url;
00168     if (!(he = Tcl_FindHashEntry(&urlHash, (const char*)urlKey))) {
00169         // new client, allocate a client id
00170         he = Tcl_CreateHashEntry(&urlHash, (const char*)urlKey, &ne);
00171         URL* u = new URL(++url, sid, lfe.head.size);
00172         Tcl_SetHashValue(he, (const char*)u);
00173         uid = u->id;
00174     } else {
00175         // existing entry, find its client seqno
00176         URL* u = (URL*)Tcl_GetHashValue(he);
00177         u->access++;
00178         uid = u->id;
00179     }
00180 
00181     rlog[num_rlog++] = ReqLog(time, cid, sid, uid);
00182     //fprintf(cf, "%f %d %d %d\n", time, cid, sid, uid);
00183 
00184     if (startTime > 0) 
00185         return time - startTime;
00186     else 
00187         return time;
00188 }

Generated on Tue Mar 6 16:47:53 2007 for ns2 Network Simulator 2.29 by  doxygen 1.4.6