nlanr/tr-stat.cc

Go to the documentation of this file.
00001 // Generate statistics from UCB traces
00002 // All we need to know: 
00003 // 
00004 // (1) client request streams: 
00005 //     <time> <clientID> <serverID> <URL_ID> 
00006 // (2) server page mod stream(s):
00007 //     <serverID> <URL_ID> <PageSize> <access times>
00008 //
00009 // Part of the code comes from Steven Gribble's UCB trace parse codes
00010 // 
00011 // $Header: /nfs/jade/vint/CVSROOT/ns-2/indep-utils/webtrace-conv/nlanr/tr-stat.cc,v 1.3 2005/09/18 23:33:33 tomh Exp $
00012 
00013 #include <stdio.h>
00014 #include <stdlib.h>
00015 #include <string.h>
00016 #include <ctype.h>
00017 #include <time.h>
00018 #include <sys/types.h>
00019 #include <sys/socket.h>
00020 #include <netinet/in.h>
00021 #include <arpa/inet.h>
00022 #include <tcl.h>
00023 
00024 #include "logparse.h"
00025 
00026 Tcl_HashTable cidHash;  // Client id (IP, port) hash
00027 int client = 0;     // client sequence number
00028 
00029 Tcl_HashTable sidHash;  // server id (IP, port) hash
00030 int server = 0;     // server sequence number
00031 
00032 Tcl_HashTable urlHash;  // URL id hash
00033 int url = 0;        // URL sequence number
00034 int* umap;      // URL mapping table, used for url sort
00035 struct URL {
00036     URL(int i, int sd, int sz) : access(1), id(i), sid(sd), size(sz) {}
00037     int access; // access counts
00038     int id;
00039     int sid, size;
00040 };
00041 
00042 FILE *cf, *sf;
00043 double initTime = -1;
00044 double duration = -1;
00045 double startTime = -1;
00046 
00047 struct ReqLog {
00048     ReqLog() {}
00049     ReqLog(double t, unsigned int c, unsigned int s, unsigned int u) :
00050         time(t), cid(c), sid(s), url(u) {}
00051     double time;
00052     unsigned int cid, sid, url;
00053 };
00054 ReqLog* rlog = NULL;
00055 unsigned int num_rlog = 0, sz_rlog = 0;
00056 
00057 int compare(const void *a1, const void *b1)
00058 {
00059     const ReqLog *a = (const ReqLog*)a1, *b = (const ReqLog*)b1;
00060     return (a->time > b->time) ? 1 : 
00061         (a->time == b->time) ? 0 : -1;
00062 }
00063 
00064 void sort_rlog()
00065 {
00066     qsort((void *)rlog, num_rlog, sizeof(ReqLog), compare);
00067     double t = rlog[0].time;
00068     for (unsigned int i = 0; i < num_rlog; i++) {
00069         rlog[i].time -= t;
00070         fprintf(cf, "%f %d %d %d\n", rlog[i].time, 
00071             rlog[i].cid, rlog[i].sid, umap[rlog[i].url]);
00072     }
00073     delete []umap;
00074     // Record trace duration and # of unique urls
00075     fprintf(cf, "i %f %u\n", rlog[num_rlog-1].time, url);
00076 }
00077 
00078 int compare_url(const void* a1, const void* b1)
00079 {
00080     const URL **a = (const URL**)a1, **b = (const URL**)b1;
00081     return ((*a)->access > (*b)->access) ? -1:
00082         ((*a)->access == (*b)->access) ? 0 : 1;
00083 }
00084 
00085 void sort_url()
00086 {
00087     // XXX use an interval member of Tcl_HashTable
00088     URL** tbl = new URL*[urlHash.numEntries];
00089     Tcl_HashEntry *he;
00090     Tcl_HashSearch hs;
00091     int i = 0, sz = urlHash.numEntries;
00092     for (he = Tcl_FirstHashEntry(&urlHash, &hs);
00093          he != NULL;
00094          he = Tcl_NextHashEntry(&hs))
00095         tbl[i++] = (URL*)Tcl_GetHashValue(he);
00096     Tcl_DeleteHashTable(&urlHash);
00097 
00098     // sort using access frequencies
00099     qsort((void *)tbl, sz, sizeof(URL*), compare_url);
00100     umap = new int[url];
00101     // write sorted url to page table
00102     for (i = 0; i < sz; i++) {
00103         umap[tbl[i]->id] = i;
00104         fprintf(sf, "%d %d %d %u\n", tbl[i]->sid, i,
00105             tbl[i]->size, tbl[i]->access);
00106         delete tbl[i];
00107     }
00108     delete []tbl;
00109 }
00110 
00111 double lf_analyze(lf_entry& lfe)
00112 {
00113     double time;
00114     int ne, cid, sid, uid;
00115     Tcl_HashEntry *he;
00116 
00117     time = lfe.rt;
00118 
00119     if (initTime < 0) {
00120         initTime = time;
00121         time = 0;
00122     } else 
00123         time -= initTime;
00124 
00125     // If a trace start time is required, don't do anything
00126     if ((startTime > 0) && (time < startTime)) 
00127         return -1;
00128 
00129     // Ignore pages with size 0
00130     if (lfe.size == 0) 
00131         return -1;
00132 
00133     // check client id
00134     long clientKey = lfe.cid;
00135     if (!(he = Tcl_FindHashEntry(&cidHash, (const char *)clientKey))) {
00136         // new client, allocate a client id
00137         he = Tcl_CreateHashEntry(&cidHash, (const char *)clientKey, &ne);
00138         Tcl_SetHashValue(he, (long)++client);
00139         cid = client;
00140     } else {
00141         // existing entry, find its client seqno
00142         cid = (long)Tcl_GetHashValue(he);
00143     }
00144 
00145     // check server id
00146     if (!(he = Tcl_FindHashEntry(&sidHash, lfe.sid))) {
00147         // new server, assign a server id
00148         he = Tcl_CreateHashEntry(&sidHash, lfe.sid, &ne);
00149         server++;
00150         long serverValue = server;
00151         Tcl_SetHashValue(he, serverValue);
00152         sid = server;
00153     } else {
00154         // existing entry, find its client seqno
00155         sid = (long)Tcl_GetHashValue(he);
00156     }
00157 
00158     // check url id
00159     if (!(he = Tcl_FindHashEntry(&urlHash, lfe.url))) {
00160         // new client, allocate a client id
00161         he = Tcl_CreateHashEntry(&urlHash, lfe.url, &ne);
00162         URL* u = new URL(++url, sid, lfe.size);
00163         Tcl_SetHashValue(he, (const char*)u);
00164         uid = u->id;
00165         //fprintf(sf, "%d %d %ld\n", sid, u->id, lfe.rhl+lfe.rdl);
00166     } else {
00167         // existing entry, find its client seqno
00168         URL* u = (URL*)Tcl_GetHashValue(he);
00169         u->access++;
00170         uid = u->id;
00171     }
00172 
00173     rlog[num_rlog++] = ReqLog(time, cid, sid, uid);
00174     //fprintf(cf, "%f %d %d %d\n", time, cid, sid, uid);
00175 
00176     if (startTime > 0) 
00177         return time - startTime;
00178     else 
00179         return time;
00180 }
00181 
00182 int main(int argc, char**argv)
00183 {
00184     lf_entry lfntree;
00185     int      ret;
00186     double   ctime;
00187 
00188     // Init tcl
00189     Tcl_Interp *interp = Tcl_CreateInterp();
00190     if (Tcl_Init(interp) == TCL_ERROR) {
00191         printf("%s\n", interp->result);
00192         abort();
00193     }
00194     Tcl_InitHashTable(&cidHash, TCL_ONE_WORD_KEYS);
00195     Tcl_InitHashTable(&sidHash, TCL_STRING_KEYS);
00196     Tcl_InitHashTable(&urlHash, TCL_STRING_KEYS);
00197 
00198     if ((cf = fopen("reqlog", "w")) == NULL) {
00199         printf("cannot open request log.\n");
00200         exit(1);
00201     }
00202     if ((sf = fopen("pglog", "w")) == NULL) {
00203         printf("cannot open page log.\n");
00204         exit(1);
00205     }
00206 
00207     if ((argc < 2) || (argc > 4)) {
00208         printf("Usage: %s <trace size> [<time duration>] [<start_time>]\n", argv[0]);
00209         return 1;
00210     }
00211     if (argc >= 3) {
00212         duration = strtod(argv[2], NULL);
00213         if (argc == 4) {
00214             startTime = strtod(argv[3], NULL);
00215             printf("start time = %f\n", startTime);
00216         }
00217     }
00218 
00219     sz_rlog = strtoul(argv[1], NULL, 10);
00220     rlog = new ReqLog[sz_rlog];
00221 
00222     while(1) {
00223         ret = lf_get_next_entry(stdin, lfntree);
00224         if (ret > 0) {
00225             if (ret == 1) {
00226                 /* EOF */
00227                 break;
00228             }
00229             fprintf(stderr, "Failed to get next entry.\n");
00230             exit(1);
00231         } else if (ret < 0) {
00232             // Unusable entry, i.e., cache miss, cgi-bin, etc.
00233             continue;
00234         }
00235         // Analyse one log entry
00236         ctime = lf_analyze(lfntree);
00237         delete []lfntree.url;
00238         delete []lfntree.sid;
00239         if ((duration > 0) && (ctime > duration))
00240             break;
00241     }
00242     Tcl_DeleteHashTable(&cidHash);
00243     Tcl_DeleteHashTable(&sidHash);
00244 
00245     fprintf(stderr, "sort url\n");
00246     sort_url();
00247     fclose(sf);
00248 
00249     fprintf(stderr, "sort requests\n");
00250     sort_rlog();
00251     fclose(cf);
00252 
00253     fprintf(stderr, 
00254         "%d unique clients, %d unique servers, %d unique urls.\n", 
00255         client, server, url);
00256     return 0;
00257 }

Generated on Tue Mar 6 16:47:53 2007 for ns2 Network Simulator 2.29 by  doxygen 1.4.6