00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015 #include <stdio.h>
00016 #include <stdlib.h>
00017 #include <string.h>
00018 #include <ctype.h>
00019 #include <time.h>
00020 #include <sys/types.h>
00021 #include <sys/socket.h>
00022 #include <netinet/in.h>
00023 #include <arpa/inet.h>
00024 #include <tcl.h>
00025
00026 struct URL {
00027 URL(int i, int sd, int sz) : access(1), id(i), sid(sd), size(sz) {}
00028 int access;
00029 int id;
00030 int sid, size;
00031 };
00032
00033 struct ReqLog {
00034 ReqLog() {}
00035 ReqLog(double t, unsigned int c, unsigned int s, unsigned int u) :
00036 time(t), cid(c), sid(s), url(u) {}
00037 double time;
00038 unsigned int cid, sid, url;
00039 };
00040
00041 FILE *cf, *sf;
00042 double initTime = -1;
00043 double duration = -1;
00044 double startTime = -1;
00045
00046 Tcl_HashTable cidHash;
00047 static int client = 0;
00048
00049 static int server = 1;
00050
00051 Tcl_HashTable urlHash;
00052 static int url = 0;
00053 static int* umap;
00054
00055 ReqLog* rlog = NULL;
00056 unsigned int num_rlog = 0, sz_rlog = 0;
00057
00058 struct Entry {
00059 char *client;
00060 unsigned int time;
00061 char *url;
00062 int size;
00063 };
00064
00065 static int compare(const void *a1, const void *b1)
00066 {
00067 const ReqLog *a, *b;
00068 a = (const ReqLog*)a1, b = (const ReqLog*)b1;
00069 return (a->time > b->time) ? 1 :
00070 (a->time == b->time) ? 0 : -1;
00071 }
00072
00073 void sort_rlog()
00074 {
00075 qsort((void *)rlog, num_rlog, sizeof(ReqLog), compare);
00076 double t = rlog[0].time;
00077 for (unsigned int i = 0; i < num_rlog; i++) {
00078 rlog[i].time -= t;
00079 fprintf(cf, "%f %d %d %d\n", rlog[i].time,
00080 rlog[i].cid, rlog[i].sid, umap[rlog[i].url]);
00081 }
00082
00083 fprintf(cf, "i %f %u\n", rlog[num_rlog-1].time, url);
00084
00085 fprintf(stderr,
00086 "%d unique clients, %d unique servers, %d unique urls.\n",
00087 client, server, url);
00088 }
00089
00090 static int compare_url(const void* a1, const void* b1)
00091 {
00092 const URL **a, **b;
00093 a = (const URL**)a1, b = (const URL**)b1;
00094 return ((*a)->access > (*b)->access) ? -1:
00095 ((*a)->access == (*b)->access) ? 0 : 1;
00096 }
00097
00098 void sort_url()
00099 {
00100
00101 URL** tbl = new URL*[urlHash.numEntries];
00102 Tcl_HashEntry *he;
00103 Tcl_HashSearch hs;
00104 int i = 0, sz = urlHash.numEntries;
00105 for (he = Tcl_FirstHashEntry(&urlHash, &hs);
00106 he != NULL;
00107 he = Tcl_NextHashEntry(&hs))
00108 tbl[i++] = (URL*)Tcl_GetHashValue(he);
00109 Tcl_DeleteHashTable(&urlHash);
00110
00111
00112 qsort((void *)tbl, sz, sizeof(URL*), compare_url);
00113 umap = new int[url];
00114
00115 for (i = 0; i < sz; i++) {
00116 umap[tbl[i]->id] = i;
00117 fprintf(sf, "%d %d %d %u\n", tbl[i]->sid, i,
00118 tbl[i]->size, tbl[i]->access);
00119 delete tbl[i];
00120 }
00121 delete []tbl;
00122 }
00123
00124 double lf_analyze(Entry& lfe)
00125 {
00126 double time;
00127 int ne, cid, sid, uid;
00128 Tcl_HashEntry *he;
00129
00130 time = (double)lfe.time;
00131
00132 if (initTime < 0) {
00133 initTime = time;
00134 time = 0;
00135 } else
00136 time -= initTime;
00137
00138
00139 if ((startTime > 0) && (time < startTime))
00140 return -1;
00141
00142
00143 if (lfe.size == 0)
00144 return -1;
00145
00146
00147 if (!(he = Tcl_FindHashEntry(&cidHash, (const char *)lfe.client))) {
00148
00149 he = Tcl_CreateHashEntry(&cidHash, (const char *)lfe.client, &ne);
00150 client++;
00151 long clientValue = client;
00152 Tcl_SetHashValue(he, clientValue);
00153 cid = client;
00154 } else {
00155
00156 cid = (long)Tcl_GetHashValue(he);
00157 }
00158
00159
00160 sid = 0;
00161
00162
00163 if (!(he = Tcl_FindHashEntry(&urlHash, (const char*)lfe.url))) {
00164
00165 he = Tcl_CreateHashEntry(&urlHash, (const char*)lfe.url, &ne);
00166 URL* u = new URL(++url, sid, lfe.size);
00167 Tcl_SetHashValue(he, (const char*)u);
00168 uid = u->id;
00169 } else {
00170
00171 URL* u = (URL*)Tcl_GetHashValue(he);
00172 u->access++;
00173 uid = u->id;
00174 }
00175
00176 rlog[num_rlog++] = ReqLog(time, cid, sid, uid);
00177
00178
00179 if (startTime > 0)
00180 return time - startTime;
00181 else
00182 return time;
00183 }
00184
00185 int get_next_entry(Entry& lfe)
00186 {
00187 char buf[1024];
00188
00189 if (feof(stdin))
00190 return 0;
00191
00192 fgets(buf, 1024, stdin);
00193 if (feof(stdin) || ferror(stdin))
00194 return 0;
00195
00196 char *tmp = buf, *code, *method, *date;
00197 lfe.client = strtok(tmp, " ");
00198 date = strtok(NULL, " ");
00199 method = strtok(NULL, " ");
00200 *(method++) = 0;
00201 if (strcmp(method, "GET") != 0)
00202
00203 return -1;
00204
00205 lfe.url = strtok(NULL, " ");
00206 if (strchr(lfe.url, '?') != NULL)
00207
00208 return -1;
00209 strtok(NULL, " ");
00210 code = strtok(NULL, " ");
00211 if ((atoi(code) != 200) && (atoi(code) != 304))
00212 return -1;
00213
00214 tmp = strtok(NULL, " ");
00215 lfe.size = atoi(tmp);
00216
00217
00218
00219
00220 tmp = new char[strlen(date)+1];
00221 strcpy(tmp, date);
00222 date = tmp + 1;
00223 lfe.time = 0;
00224 date = strtok(date, ":");
00225 lfe.time = atoi(date);
00226 date = strtok(NULL, ":");
00227 lfe.time = lfe.time*24 + atoi(date);
00228 date = strtok(NULL, ":");
00229 lfe.time = lfe.time*60 + atoi(date);
00230 date = strtok(NULL, "]");
00231 lfe.time = lfe.time*60 + atoi(date);
00232 delete []tmp;
00233
00234 return 1;
00235 }
00236
00237 int main(int argc, char**argv)
00238 {
00239 Entry lfntree;
00240 int ret;
00241 double ctime;
00242
00243
00244 Tcl_Interp *interp = Tcl_CreateInterp();
00245 if (Tcl_Init(interp) == TCL_ERROR) {
00246 printf("%s\n", interp->result);
00247 abort();
00248 }
00249 Tcl_InitHashTable(&cidHash, TCL_STRING_KEYS);
00250 Tcl_InitHashTable(&urlHash, TCL_STRING_KEYS);
00251
00252 if ((cf = fopen("reqlog", "w")) == NULL) {
00253 printf("cannot open request log.\n");
00254 exit(1);
00255 }
00256 if ((sf = fopen("pglog", "w")) == NULL) {
00257 printf("cannot open page log.\n");
00258 exit(1);
00259 }
00260
00261 if ((argc > 4) || (argc < 2)) {
00262 printf("Usage: %s <trace size> [<time duration>] [<start_time>]\n", argv[0]);
00263 return 1;
00264 }
00265 if (argc >= 3) {
00266 duration = strtod(argv[2], NULL);
00267 if (argc == 4) {
00268 startTime = strtod(argv[3], NULL);
00269 printf("start time = %f\n", startTime);
00270 }
00271 }
00272
00273 sz_rlog = strtoul(argv[1], NULL, 10);
00274 rlog = new ReqLog[sz_rlog];
00275
00276 while((ret = get_next_entry(lfntree)) != 0) {
00277
00278 if (ret < 0)
00279 continue;
00280 ctime = lf_analyze(lfntree);
00281 if ((duration > 0) && (ctime > duration))
00282 break;
00283 }
00284 Tcl_DeleteHashTable(&cidHash);
00285
00286 fprintf(stderr, "sort url\n");
00287 sort_url();
00288 fclose(sf);
00289
00290 fprintf(stderr, "sort requests\n");
00291 sort_rlog();
00292 fclose(cf);
00293
00294 fprintf(stderr,
00295 "%d unique clients, %d unique servers, %d unique urls.\n",
00296 client, server, url);
00297 return 0;
00298 }