00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015 #include <stdio.h>
00016 #include <stdlib.h>
00017 #include <string.h>
00018 #include <ctype.h>
00019 #include <time.h>
00020 #include <sys/types.h>
00021 #include <sys/socket.h>
00022 #include <netinet/in.h>
00023 #include <arpa/inet.h>
00024 #include <tcl.h>
00025
00026 #include "logparse.h"
00027
00028 Tcl_HashTable cidHash;
00029 int client = 0;
00030
00031 Tcl_HashTable sidHash;
00032 int server = 0;
00033
00034 Tcl_HashTable urlHash;
00035 int url = 0;
00036 int* umap;
00037 struct URL {
00038 URL(int i, int sd, int sz) : access(1), id(i), sid(sd), size(sz) {}
00039 int access;
00040 int id;
00041 int sid, size;
00042 };
00043
00044 FILE *cf, *sf;
00045 double initTime = -1;
00046 double duration = -1;
00047 double startTime = -1;
00048
00049 struct ReqLog {
00050 ReqLog() {}
00051 ReqLog(double t, unsigned int c, unsigned int s, unsigned int u) :
00052 time(t), cid(c), sid(s), url(u) {}
00053 double time;
00054 unsigned int cid, sid, url;
00055 };
00056 ReqLog* rlog = NULL;
00057 unsigned int num_rlog = 0, sz_rlog = 0;
00058
00059 int compare(const void *a1, const void *b1)
00060 {
00061 const ReqLog *a, *b;
00062 a = (const ReqLog*)a1, b = (const ReqLog*)b1;
00063 return (a->time > b->time) ? 1 :
00064 (a->time == b->time) ? 0 : -1;
00065 }
00066
00067 void sort_rlog()
00068 {
00069 qsort((void *)rlog, num_rlog, sizeof(ReqLog), compare);
00070 double t = rlog[0].time;
00071 for (unsigned int i = 0; i < num_rlog; i++) {
00072 rlog[i].time -= t;
00073 fprintf(cf, "%f %d %d %d\n", rlog[i].time,
00074 rlog[i].cid, rlog[i].sid, umap[rlog[i].url]);
00075 }
00076 delete []umap;
00077
00078 fprintf(cf, "i %f %u\n", rlog[num_rlog-1].time, url);
00079 }
00080
00081 int compare_url(const void* a1, const void* b1)
00082 {
00083 const URL **a, **b;
00084 a = (const URL**)a1, b = (const URL**)b1;
00085 return ((*a)->access > (*b)->access) ? -1:
00086 ((*a)->access == (*b)->access) ? 0 : 1;
00087 }
00088
00089 void sort_url()
00090 {
00091
00092 URL** tbl = new URL*[urlHash.numEntries];
00093 Tcl_HashEntry *he;
00094 Tcl_HashSearch hs;
00095 int i = 0, sz = urlHash.numEntries;
00096 for (he = Tcl_FirstHashEntry(&urlHash, &hs);
00097 he != NULL;
00098 he = Tcl_NextHashEntry(&hs))
00099 tbl[i++] = (URL*)Tcl_GetHashValue(he);
00100 Tcl_DeleteHashTable(&urlHash);
00101
00102
00103 qsort((void *)tbl, sz, sizeof(URL*), compare_url);
00104 umap = new int[url];
00105
00106 for (i = 0; i < sz; i++) {
00107 umap[tbl[i]->id] = i;
00108 fprintf(sf, "%d %d %d %u\n", tbl[i]->sid, i,
00109 tbl[i]->size, tbl[i]->access);
00110 delete tbl[i];
00111 }
00112 delete []tbl;
00113 }
00114
00115 const unsigned long MAX_FILESIZE = 10000000;
00116
00117 double lf_analyze(lf_entry& lfe)
00118 {
00119 double time;
00120 int id[2], ne, cid, sid, uid;
00121 Tcl_HashEntry *he;
00122
00123
00124
00125
00126
00127
00128 if ((lfe.cprg != 0xff) && (lfe.cprg & PB_CLNT_NO_CACHE))
00129 return -1;
00130 if ((lfe.sprg != 0xff) && (lfe.sprg & PB_SRVR_NO_CACHE))
00131 return -1;
00132
00133
00134 lf_convert_order(&lfe);
00135
00136
00137 if (lfe.rhl + lfe.rdl == 0)
00138 return -1;
00139
00140 if (lfe.rhl + lfe.rdl > MAX_FILESIZE)
00141 return -1;
00142
00143
00144
00145
00146 char *str, *idx = (char *)lfe.url, *tmp;
00147 tmp = strtok(idx, " ");
00148 if (strcmp(tmp, "GET") != 0) {
00149
00150 return -1;
00151 }
00152 str = strtok(NULL, ".");
00153
00154 time = (double)lfe.crs + (double)lfe.cru/(double)1000000.0;
00155
00156 if (initTime < 0) {
00157 initTime = time;
00158 time = 0;
00159 } else
00160 time -= initTime;
00161
00162
00163 if ((startTime > 0) && (time < startTime))
00164 return -1;
00165
00166
00167 if (!(he = Tcl_FindHashEntry(&cidHash, (const char *)lfe.cip))) {
00168
00169 he = Tcl_CreateHashEntry(&cidHash, (const char *)lfe.cip, &ne);
00170 client++;
00171 long clientValue = client;
00172 Tcl_SetHashValue(he, clientValue);
00173 cid = client;
00174 } else {
00175
00176 cid = (long)Tcl_GetHashValue(he);
00177 }
00178
00179
00180 id[0] = lfe.sip;
00181 id[1] = lfe.spt;
00182 if (!(he = Tcl_FindHashEntry(&sidHash, (const char *)id))) {
00183
00184 he = Tcl_CreateHashEntry(&sidHash, (const char *)id, &ne);
00185 server++;
00186 long serverValue = server;
00187 Tcl_SetHashValue(he, serverValue);
00188 sid = server;
00189 } else {
00190
00191 sid = (long)Tcl_GetHashValue(he);
00192 }
00193
00194
00195 if (!(he = Tcl_FindHashEntry(&urlHash, str))) {
00196
00197 he = Tcl_CreateHashEntry(&urlHash, str, &ne);
00198 URL* u = new URL(++url, sid, lfe.rhl+lfe.rdl);
00199 Tcl_SetHashValue(he, (const char*)u);
00200 uid = u->id;
00201
00202 } else {
00203
00204 URL* u = (URL*)Tcl_GetHashValue(he);
00205 u->access++;
00206 uid = u->id;
00207 }
00208
00209 rlog[num_rlog++] = ReqLog(time, cid, sid, uid);
00210
00211
00212 if (startTime > 0)
00213 return time - startTime;
00214 else
00215 return time;
00216 }
00217
00218 int main(int argc, char**argv)
00219 {
00220 lf_entry lfntree;
00221 int ret;
00222 double ctime;
00223
00224
00225 Tcl_Interp *interp = Tcl_CreateInterp();
00226 if (Tcl_Init(interp) == TCL_ERROR) {
00227 printf("%s\n", interp->result);
00228 abort();
00229 }
00230 Tcl_InitHashTable(&cidHash, TCL_ONE_WORD_KEYS);
00231 Tcl_InitHashTable(&sidHash, 2);
00232 Tcl_InitHashTable(&urlHash, TCL_STRING_KEYS);
00233
00234 if ((cf = fopen("reqlog", "w")) == NULL) {
00235 printf("cannot open request log.\n");
00236 exit(1);
00237 }
00238 if ((sf = fopen("pglog", "w")) == NULL) {
00239 printf("cannot open page log.\n");
00240 exit(1);
00241 }
00242
00243 if ((argc < 2) || (argc > 4)) {
00244 printf("Usage: %s <trace size> [<time duration>] [<start_time>]\n", argv[0]);
00245 return 1;
00246 }
00247 if (argc >= 3) {
00248 duration = strtod(argv[2], NULL);
00249 if (argc == 4) {
00250 startTime = strtod(argv[3], NULL);
00251 printf("start time = %f\n", startTime);
00252 }
00253 }
00254
00255 sz_rlog = strtoul(argv[1], NULL, 10);
00256 rlog = new ReqLog[sz_rlog];
00257
00258 while(1) {
00259 if ((ret = lf_get_next_entry(0, &lfntree, 0)) != 0) {
00260 if (ret == 1) {
00261
00262 break;
00263 }
00264 fprintf(stderr, "Failed to get next entry.\n");
00265 exit(1);
00266 }
00267
00268 ctime = lf_analyze(lfntree);
00269 free(lfntree.url);
00270 if ((duration > 0) && (ctime > duration))
00271 break;
00272 }
00273 Tcl_DeleteHashTable(&cidHash);
00274 Tcl_DeleteHashTable(&sidHash);
00275
00276 fprintf(stderr, "sort url\n");
00277 sort_url();
00278 fclose(sf);
00279
00280 fprintf(stderr, "sort requests\n");
00281 sort_rlog();
00282 fclose(cf);
00283
00284 fprintf(stderr,
00285 "%d unique clients, %d unique servers, %d unique urls.\n",
00286 client, server, url);
00287 return 0;
00288 }