/*** analog 3.11      http://www.statslab.cam.ac.uk/~sret1/analog/  ***/

/*** process.c; process some data ***/

#include "analhea2.h"

void process_data(Filelist *logfilep, FILE *lf, Hashtable **hash,
		  choice *count, Include **wanthead, Include *ispagehead,
		  Alias **aliashead, Include *argshead, Include *refargshead,
		  Dateman *dman, Sizedata *sizes, Tree **tree,
		  choice *alltrees, choice *lowmem, logical case_insensitive,
		  char *dirsuffix, unsigned int dirsufflength,
		  unsigned int granularity) {
  extern unsigned int year, month, date, hr, min, code;
  extern char am;
  extern double bytes;
  extern Memman mm[], mmq, *amemman;
  extern choice *rep2type;
  extern Hashentry *unwanted_entry, *blank_entry;
  extern Hashindex *dummy_item;

  static Hashindex *gp[ITEM_NUMBER];
  Hashentry *item[ITEM_NUMBER];
  logical wanttree[ITEM_NUMBER];
  logical isitpage, last7;
  choice ispage = UNSET;
  choice wanted = TRUE, outcome;
  timecode_t timecode = FIRST_TIME;
  char *name, *namestart, *nameend;
  size_t len;
  choice i, j;

  /*** check whether this line is wanted ***/

  if (code == IGNORE_CODE && count[INP_CODE] != 0) {
    for (j = 0; j < ITEM_NUMBER; j++) {   /* reset strings */
      if (count[j] != 0)
	mm[j].next_pos = mm[j].curr_pos;
    }
    logfilep -> data[LOGDATA_UNKNOWN]++;
    return;
  }
  else if (count[INP_DATE] > 0) {
    if (count[INP_AM]) {
      if (hr > 12) {
	corrupt_line(lf, logfilep);
	return;
      }
      else if (hr == 12)
	hr = 0;
      if (am == 'p')
	hr += 12;
    }
    wanted = wantdate(&timecode, dman, hr, min, date, month, year,
		      logfilep -> tz);
    if (wanted == ERR) { /* corrupt date */
      corrupt_line(lf, logfilep);
      return;
    }
  }
  for (i = 0; i < ITEM_NUMBER; i++) {
    wanttree[i] = FALSE;
    if (!wanted) {
      for (j = i; j < ITEM_NUMBER; j++) {  /* reset not-yet-hashed strings */
	if (count[j] != 0)                 /* NB i is now (unwanted i) + 1 */
	  mm[j].next_pos = mm[j].curr_pos;
      }
      logfilep -> data[LOGDATA_UNWANTED]++;
      return;
    }
    name = (char *)(mm[i].curr_pos);
    if (count[i] == 0 || IS_EMPTY_STRING(name) ||
	(name[0] == '-' && name[1] == '\0'))
      item[i] = blank_entry;
    else {
      if (i == ITEM_FILE || i == ITEM_REFERRER) {
	if (prealias(&(mm[i]), &(mm[ITEM_VHOST]), item[ITEM_VHOST], &mmq,
		     (logical)((i == ITEM_FILE)?case_insensitive:FALSE),
		     (i == ITEM_FILE)?(logfilep -> prefix):NULL,
		     logfilep -> prefixlen, logfilep -> pvpos,
		     (i == ITEM_FILE)?argshead:refargshead) == ERR) {
	  corrupt_line(lf, logfilep);
	  return;
	}
      }
      if (lowmem[i] == 0) {
	if (gp[i] == NULL || !STREQ(name, gp[i] -> name)) {
	  gp[i] = hashfind(&mm[i], &(hash[i]), wanthead[i], UNSET, ispagehead,
			   aliashead[i], dirsuffix, dirsufflength, i, FALSE);
	}     /* if name the same as last time, don't need */
	else  /* to hashfind again, or save the name */
	  mm[i].next_pos = mm[i].curr_pos;
	item[i] = (Hashentry *)(gp[i] -> other);	  
	wanted = (choice)(ENTRY_WANTED(item[i]));
      }

      else if (lowmem[i] == 1) {
	if ((outcome = do_alias(name, amemman, aliashead[i], dirsuffix,
				dirsufflength, i)) == FALSE) {
	  item[i] = hashfind(&mm[i], &(hash[i]), wanthead[i], UNSET,
			     ispagehead, NULL, dirsuffix, dirsufflength, i,
			     TRUE) -> own;
	}
	else if (outcome == TRUE) {
	  mm[i].next_pos = mm[i].curr_pos;  /* don't save string */
	  item[i] = hashfind(amemman, &(hash[i]), wanthead[i], UNSET,
			     ispagehead, NULL, dirsuffix, dirsufflength, i,
			     TRUE) -> own;
	}
	else { /* outcome == ERR */
	  mm[i].next_pos = mm[i].curr_pos;
	  item[i] = blank_entry;
	}
	wanted = (choice)(ENTRY_WANTED(item[i]));
      }

      else { /* lowmem[i] >= 2 */
	if ((outcome = do_alias(name, amemman, aliashead[i], dirsuffix,
				dirsufflength, i)) == TRUE) {
	  mm[i].next_pos = mm[i].curr_pos;  /* don't save old string */
	  len = strlen((char *)(amemman -> curr_pos));
	  (void)memcpy(submalloc(&(mm[i]), len + 1),
		       (void *)(amemman -> curr_pos), len + 1);
	  name = (char *)(mm[i].curr_pos); /* which might have changed */
	  amemman -> next_pos = amemman -> curr_pos;
	}
	if (outcome == ERR) {
	  item[i] = blank_entry;
	  mm[i].next_pos = mm[i].curr_pos;
	  if (i == ITEM_FILE)
	    ispage = FALSE;
	}
	else {
	  isitpage = pageq(name, ispagehead, i);
	  if (i == ITEM_FILE)
	    ispage = (choice)isitpage;
	  if (included(name, isitpage, wanthead[i])) {
	    if (lowmem[i] == 2) {
	      item[i] = hashfind(&(mm[i]), &(hash[i]), wanthead[i], isitpage,
				 ispagehead, NULL, dirsuffix, dirsufflength,
				 i, TRUE) -> own;
	    }
	    else {
	      item[i] = blank_entry;
	      wanttree[i] = TRUE;
	      mm[i].next_pos = mm[i].curr_pos;
	    }
	  }
	  else {
	    wanted = FALSE;
	    mm[i].next_pos = mm[i].curr_pos;
	  }
	}
      }  /* end lowmem[i] >= 2 */

    }
  }      /* end for i */
  if (!wanted) {
    logfilep -> data[LOGDATA_UNWANTED]++;
    return;
  }

  /*** now add it to the hash tables ***/

  /* add to logfile from and to if wanted, whatever status code */
  if (timecode != FIRST_TIME)
    logfilep -> from = MIN(logfilep -> from, timecode);
  logfilep -> to = MAX(logfilep -> to, timecode);
  last7 = (timecode > dman -> last7);
  if (ispage == UNSET)            /* NB blank_entry has ispage FALSE */
    ispage = (choice)(item[ITEM_FILE] -> ispage);
  if (count[INP_BYTES] == 0)
    bytes = 0;
  if (count[INP_CODE] == 0) {
    outcome = TRUE;
    if (count[ITEM_FILE] == 2) {
      logfilep -> data[LOGDATA_SUCC]++;
      logfilep -> data[LOGDATA_SUCC7] += (unsigned long)last7;
      logfilep -> data[LOGDATA_PAGES] += (unsigned long)ispage;
      logfilep -> data[LOGDATA_PAGES7] +=
	(unsigned long)((logical)ispage && last7);
    }
    else {
      logfilep -> data[LOGDATA_UNKNOWN]++;
      logfilep -> data[LOGDATA_UNKNOWN7] += (unsigned long)last7;
    }
  }
  else if (code <= 199) {
    outcome = INFO;
    logfilep -> data[LOGDATA_INFO]++;
    logfilep -> data[LOGDATA_INFO7] += (unsigned long)last7;
  }
  else if (code <= 299 || code == 304) {
    outcome = TRUE;
    logfilep -> data[LOGDATA_SUCC]++;
    logfilep -> data[LOGDATA_SUCC7] += (unsigned long)last7;
    logfilep -> data[LOGDATA_PAGES] += (unsigned long)ispage;
    logfilep -> data[LOGDATA_PAGES7] +=
      (unsigned long)((logical)ispage && last7);
  }
  else if (code <= 399) {
    outcome = REDIRECT;
    logfilep -> data[LOGDATA_REDIR]++;
    logfilep -> data[LOGDATA_REDIR7] += (unsigned long)last7;
  }
  else {
    outcome = FALSE;
    logfilep -> data[LOGDATA_FAIL]++;
    logfilep -> data[LOGDATA_FAIL7] += (unsigned long)last7;
  }

  /* NB any change in what to count when will require corresponding change to
     end of strtoinfmt() and to fmt munching in correct() */
  if (count[INP_CODE] == 2)
    codescore(code, 1, timecode);
  if (outcome != INFO) {
    if (outcome == TRUE) {
      if (count[INP_DATE] == 2)  /* only if file present: see strtoinfmt() */
	datehash(timecode, dman, 1, (unsigned long)ispage, bytes, granularity);
      if (count[INP_BYTES] == 2) {
	sizescore(sizes, bytes, timecode, (logical)ispage);
	logfilep -> bytes += bytes;
	if (last7)
	  logfilep -> bytes7 += bytes;
      }
      for (i = 0; alltrees[i] != REP_NUMBER; i++) {
	if (wanttree[rep2type[alltrees[i]]]) {
	  /* NB these trees only count successes */
	  dummy_item -> name = mm[rep2type[alltrees[i]]].curr_pos;
	  /* mm.curr_pos is marked for deletion, but still intact at present */
	  dummy_item -> own -> data[REQUESTS] = 1;
	  dummy_item -> own -> data[PAGES] = (unsigned long)ispage;
	  dummy_item -> own -> data[SUCCDATE] = timecode;
	  dummy_item -> own -> bytes = bytes;
	  namestart = NULL;
	  tree[G(alltrees[i])] -> cutfn(&namestart, &nameend,
					dummy_item -> name, FALSE);
	  (void)treefind(namestart, nameend, &(tree[G(alltrees[i])] -> tree),
			 dummy_item, tree[G(alltrees[i])] -> cutfn, FALSE,
			 TRUE, tree[G(alltrees[i])] -> space);
	}
      }
    }    /* end if outcome == TRUE */
    for (i = 0; i < ITEM_NUMBER; i++) {
      if (count[i] == 2 && !ENTRY_BLANK(item[i]))
	hashscore(item[i], outcome, (logical)ispage, timecode, bytes);
    }
  }      /* end if outcome != INFO */
}

void corrupt_line(FILE *lf, Filelist *logfilep) {
  extern char *record_start, *pos;

  pos = record_start;
  parsenonnewline(lf);  /* skip to after next newline */
  *pos = '\0';
  if (pos - record_start > 500)  /* debug might only handle 509 chars */
    *(record_start + 500) = '\0';
  debug('C', "%s", record_start);
  logfilep -> data[LOGDATA_CORRUPT]++;
  *pos = '\n';
  (void)parsenewline(lf, NULL, '\0');
}

char *sizestr[SIZEBINS] = {"0", "1b-  10b", "11b- 100b", "101b-  1kb",
			     "1kb- 10kb", "10kb-100kb", "100kb-  1Mb",
			     "1Mb- 10Mb", "10Mb-100Mb", "100Mb-  1Gb",
			     "> 1Gb"};
void sizescore(Sizedata *sizes, double bytes, timecode_t timecode,
	       logical ispage) {
  choice i;

  if (bytes < 0.5)
    i = 0;
  else if (bytes < 10.5)
    i = 1;
  else if (bytes < 100.5)
    i = 2;
  else if (bytes < 1024.5)
    i = 3;
  else if (bytes < 10240.5)
    i = 4;
  else if (bytes < 102400.5)
    i = 5;
  else if (bytes < 1048576.5)   /* 1MB */
    i = 6;
  else if (bytes < 10485760.5)
    i = 7;
  else if (bytes < 104857600.5)
    i = 8;
  else if (bytes < 1073741824.5)  /* 1GB */
    i = 9;
  else
    i = 10;  /* determines SIZEBINS in analhea2.h */

  sizes -> reqs[i]++;
  sizes -> pages[i] += ispage;
  sizes -> bytes[i] += bytes;
  sizes -> lastdate[i] = MAX(sizes -> lastdate[i], timecode);
}

void codescore(unsigned int code, unsigned long reqs, timecode_t timecode) {
  extern Statuscodes scs[];

  choice i;
  
  for (i = 0; scs[i].code < 600; i++) {
    if (code <= scs[i].code) {
      scs[i].n += reqs;
      scs[i].lastdate = MAX(scs[i].lastdate, timecode);
      return;
    }
  }
}
