/*** analog 3.11 ***/
/* Please read Readme.html, or http://www.statslab.cam.ac.uk/~sret1/analog/  */

/*** cache.c; reading and writing cache files ***/

#include "analhea2.h"

void outputcache(char *outfile, Hashindex **gooditems, Dateman *dman,
		 Sizedata *sizes, unsigned long *data, double bys) {
  extern Statuscodes scs[];
  FILE *outf;
  Daysdata *dp;
  Hashindex *p;
  int i, j;
  datecode_t k;

  /* first open output file as in output.c */
  if (STREQ(outfile, "stdout") || STREQ(outfile, "-")) {
    outf = stdout;
    debug('F', "Opening stdout as cache output file");
  }
  else if ((outf = fopen(outfile, "w")) == NULL)
    error("failed to open cache output file %s for writing", outfile);
  else
    debug('F', "Opening %s as cache output file", outfile);
  (void)fprintf(outf, "CACHE type 3 produced by analog%s. Do not modify or delete!\n",
		VERSION);

  /* output the summary data */
  (void)fprintf(outf, "T\t%lu\t%lu\t%lu\t%lu\t%lu\t%lu\t%.0f\n",
		data[LOGDATA_UNKNOWN], data[LOGDATA_INFO], data[LOGDATA_SUCC],
		data[LOGDATA_PAGES], data[LOGDATA_REDIR], data[LOGDATA_FAIL],
		bys);

  /* output the date info */
  for (k = dman -> firstdate, dp = dman -> firstdp; k <= dman -> lastdate;
       k++, TO_NEXT(dp)) {
    for (i = 0; i < 288; i++) {
      if (dp -> reqs[i] != 0)
	(void)fprintf(outf, "%u\t%d\t%lu\t%lu\t%.0f\n", k, i, dp -> reqs[i],
		      dp -> pages[i], dp -> bytes[i]);
    }
  }

  /* output the items */
  for (i = 0; i < ITEM_NUMBER; i++) {
    for (p = gooditems[i]; p != NULL; TO_NEXT(p)) {
      if (p -> own != NULL && (p -> own -> data[REQUESTS] != 0 ||
			       p -> own -> data[FAIL] != 0 ||
			       p -> own -> data[REDIR] != 0)) {
	(void)fprintf(outf, "%d\t", i);
	for (j = 0; j < DATA_NUMBER; j++)
	  (void)fprintf(outf, "%lu\t", p -> own -> data[j]);
	(void)fprintf(outf, "%.0f\t%s\n", p -> own -> bytes, p -> name);
      }
    }
  }

  /* output the sizes */
  for (i = 0; i < SIZEBINS; i++) {
    if (sizes -> reqs[i] > 0)
      (void)fprintf(outf, "z\t%d\t%lu\t%lu\t%lu\t%.0f\n", i, sizes -> reqs[i],
		    sizes -> pages[i], sizes -> lastdate[i],
		    sizes -> bytes[i]);
  }

  /* output the status codes */
  for (i = 0; scs[i].code < 600; i++) {
    if (scs[i].n > 0)
      (void)fprintf(outf, "c\t%u\t%lu\t%lu\n", scs[i].code, scs[i].n,
		    scs[i].lastdate);
  }
}

void hashbuild(unsigned int code, unsigned long *data, double bys,
	       Memman *mm, Hashtable **hash, Include *wanthead,
	       Include *ispagehead, Alias *aliashead, Tree **tree,
	       choice *alltrees, choice lowmem, char *dirsuffix,
	       unsigned int dirsufflength) {
  /* very simplified version of process_data(): don't do any more aliases */
  extern Hashentry *unwanted_entry;
  extern Hashindex *dummy_item;
  extern choice *rep2type;

  Hashentry *item;
  char *namestart, *nameend;
  logical ispage, buildtree = FALSE;
  int i;
  
  if (lowmem <= 1)
    item = hashfind(mm, hash, wanthead, UNSET, ispagehead, aliashead,
		    dirsuffix, dirsufflength, (choice)code, TRUE) -> own;
  else {
    ispage = pageq(mm -> curr_pos, ispagehead, (choice)code);
    if (included(mm -> curr_pos, ispage, wanthead)) {
      if (lowmem == 2)
	item = hashfind(mm, hash, wanthead, ispage, ispagehead, NULL,
			dirsuffix, dirsufflength, (choice)code, TRUE) -> own;
      else {
	item = unwanted_entry;
	buildtree = TRUE;
      }
    }
    else {
      item = unwanted_entry;
      mm -> next_pos = mm -> curr_pos;
    }
  }

  if (ENTRY_WANTED(item)) {
    for (i = 0; i < COUNT_NUMBER; i++)
      item -> data[i] += data[i];
    for ( ; i < DATA_NUMBER; i++)
      item -> data[i] = MAX(data[i], item -> data[i]);
    item -> bytes += bys;
  }
  else if (buildtree) {
    for (i = 0; alltrees[i] != REP_NUMBER; i++) {
      if (rep2type[alltrees[i]] == (choice)code) {
	dummy_item -> name = mm -> curr_pos;
	dummy_item -> own -> data[REQUESTS] = data[REQUESTS];
	dummy_item -> own -> data[PAGES] = data[PAGES];
	dummy_item -> own -> data[SUCCDATE] = data[SUCCDATE];
	dummy_item -> own -> bytes = bys;
	namestart = NULL;
	tree[G(alltrees[i])] -> cutfn(&namestart, &nameend, dummy_item -> name,
				      FALSE);
	(void)treefind(namestart, nameend, &(tree[G(alltrees[i])] -> tree),
		       dummy_item, tree[G(alltrees[i])] -> cutfn, FALSE,
		       TRUE, tree[G(alltrees[i])] -> space);
      }
    }
  }
}

void corruptcacheline(char *line, choice *rc, choice type) {
  extern char *pos;
  char *p;

  for (p = line; p < pos - 2 && p - line < 76; p++) {
    if (*p == '\0')
      *p = (type == 0)?'\t':':';
  }
  if (pos - line > 76)
    *(line + 76) = '\0';
  if (type == 2)
    warn('F', "Incomplete line in cache file looking like\n%s", line);
  else
    warn('F', "Ignoring corrupt line in cache file looking like\n%s", line);
  *rc = FALSE;
}

logical parsecacheline(char *linestart, char *parsestart, unsigned int *code,
		       unsigned int maxcode, int no_items, unsigned long *data,
		       double *bys, Memman *mm) {
  choice rc = TRUE;
  char *temps, *temps2;
  int i;

  if (!isdigit(*parsestart) ||
      (*code = (unsigned int)atoi(parsestart)) >= maxcode ||
      (temps2 = strchr(parsestart, '\t')) == NULL)
    corruptcacheline(linestart, &rc, 0);
  else {
    temps = strtok(temps2 + 1, "\t");
    for (i = 0; i < no_items; i++) {
      if (rc == TRUE) {
	if (temps == NULL || !isdigit(*temps))
	  corruptcacheline(linestart, &rc, 0);
	else {
	  data[i] = strtoul(temps, (char **)NULL, 10);
	  temps = strtok((char *)NULL, "\t");
	}
      }
    }
    if (rc == TRUE && bys != NULL) {
      if (temps == NULL || !isdigit(*temps))
	corruptcacheline(linestart, &rc, 0);
      else {
	*bys = strtod(temps, (char **)NULL);
	temps = strtok((char *)NULL, "\t");
      }
    }
    if (rc == TRUE && mm != NULL) {
      if (temps == NULL)
	corruptcacheline(linestart, &rc, 0);
      else {
	temps2 = strchr(temps, '\0');
	(void)memcpy(submalloc(mm, (size_t)(temps2 - temps) + 1),
		     (void *)temps, (size_t)(temps2 - temps) + 1);
	temps = strtok((char *)NULL, "\t");
      }
    }
    if (rc == TRUE && temps != NULL)
      corruptcacheline(linestart, &rc, 0);
  }
  return((logical)rc);
}

void readoldcache(Filelist *cachefilep, FILE *lf, Dateman *dman, choice type,
		  unsigned int granularity) {
  extern char *block_start, *block_end, *record_start, *pos;
  unsigned int y, m, d, h;
  unsigned long reqs, pages;
  double bys;
  timecode_t timecode;
  choice rc = TRUE;
  char *temps;
  char tempc;
  int i;

  while (rc != EOF) {
    rc = TRUE;
    if (pos >= block_end) {
      rc = getmoredata(lf, block_start, BLOCKSIZE);
      pos = block_start;
    }
    if (rc == TRUE) {
      (void)parsenewline(lf, NULL, '\0');
      record_start = pos;
      parsenonnewline(lf);
      if (pos == record_start)
	rc = EOF;
      *(pos++) = '\0';
      if (rc == TRUE) {
	if (sscanf(record_start, "%4u%2u%2u%2u%c", &y, &m, &d, &h, &tempc)
	    != 5 || tempc != ':' ||
	    wantdate(&timecode, dman, h, 0, d, m - 1, y, 0) == ERR)
	  corruptcacheline(record_start, &rc, 1);
	else {
	  (void)strtok(record_start, ":");
	  for ( ; h < 24 && rc == TRUE; h++) {
	    if ((temps = strtok((char *)NULL, ":")) == NULL ||
		!isdigit(*temps)) {
	      if (*temps == '*')
		rc = EOF;
	      else
		corruptcacheline(record_start, &rc, 2);
	    }
	    else {
	      reqs = strtoul(temps, (char **)NULL, 10);
	      if ((temps = strtok((char *)NULL, ":")) == NULL ||
		  !isdigit(*temps))
		corruptcacheline(record_start, &rc, 2);
	      else if (type == 2) {
		pages = strtoul(temps, (char **)NULL, 10);
		if ((temps = strtok((char *)NULL, ":")) == NULL ||
		    !isdigit(*temps))
		  corruptcacheline(record_start, &rc, 2);
		else
		  bys = strtod(temps, (char **)NULL);
	      }
	      else
		bys = strtod(temps, (char **)NULL);
	      for (i = 0; i < 12; i++) {
		if (wantdate(&timecode, dman, h, (unsigned int)(5 * i), d,
			     m - 1, y, 0))
		  datehash(timecode, dman,
			   ((i + 1) * reqs) / 12 - (i * reqs) / 12,
			   (type == 2)?\
			   (((i + 1) * pages) / 12 - (i * pages) / 12):0,
			   ((i + 1) * bys) / 12 - (i * bys) / 12, granularity);
		cachefilep -> from = MIN(cachefilep -> from, timecode);
		cachefilep -> to = MAX(cachefilep -> to, timecode);
	      }
	      cachefilep -> data[LOGDATA_SUCC] += reqs;
	      if (type == 2)
		cachefilep -> data[LOGDATA_PAGES] += pages;
	      cachefilep -> bytes += bys;
	      if (timecode > dman -> last7) {
		cachefilep -> data[LOGDATA_SUCC7] += reqs;
		if (type == 2)
		  cachefilep -> data[LOGDATA_PAGES7] += pages;
		cachefilep -> bytes7 += bys;
	      }
	    }
	  }
	}
      }
    }
  }
}

void readcache(Filelist *cachefilep, FILE *lf, Hashtable **hash,
	       Include **wanthead, Include *ispagehead, Alias **aliashead,
	       Dateman *dman, Sizedata *sizes, Tree **tree, choice *alltrees,
	       choice *lowmem, char *dirsuffix, unsigned int dirsufflength,
	       unsigned int granularity) {
  /* compare with nextdnsline etc. in input.c */
  extern Memman mmq;  /* use mmq for another purpose here */
  extern char *block_start, *block_end, *record_start, *pos;
  extern unsigned int *rep2gran;

  unsigned long data[DATA_NUMBER];
  double bys;
  datecode_t datecode;
  timecode_t timecode;
  unsigned int code;
  choice rc = TRUE;
  char *temps;
  char tempc1, tempc2;

  rc = getmoredata(lf, block_start, BLOCKSIZE);
  pos = block_start;
  if (sscanf(pos, "CACHE type %c produced by analo%c", &tempc1, &tempc2) != 2
      || tempc1 < '1' || tempc1 > '3' || tempc2 != 'g') {
    warn('F', "%s is not an analog cache file: ignoring it",
	 cachefilep -> name);
    return;
  }
  parsenonnewline(lf);

  if (tempc1 < '3')
    readoldcache(cachefilep, lf, dman, (choice)(tempc1 - '0'), granularity);

  else {
    (void)parsenewline(lf, NULL, '\0');
    record_start = pos;
    parsenonnewline(lf);
    if (sscanf(record_start, "T\t%lu\t%lu\t%lu\t%lu\t%lu\t%lu\t%lf%c",
	       &(data[0]), &(data[1]), &(data[2]), &(data[3]), &(data[4]),
	       &(data[5]), &bys, &tempc1) != 8 ||
	(tempc1 != '\r' && tempc1 != '\n')) {
      warn('F', "Cache file %s is corrupt: ignoring it", cachefilep -> name);
      return;
    }
    cachefilep -> data[LOGDATA_UNKNOWN] += data[0];
    cachefilep -> data[LOGDATA_INFO] += data[1];
    cachefilep -> data[LOGDATA_SUCC] += data[2];
    cachefilep -> data[LOGDATA_PAGES] += data[3];
    cachefilep -> data[LOGDATA_REDIR] += data[4];
    cachefilep -> data[LOGDATA_FAIL] += data[5];
    cachefilep -> bytes += bys;
    while (rc != EOF) {
      rc = TRUE;
      if (pos >= block_end) {
	rc = getmoredata(lf, block_start, BLOCKSIZE);
	pos = block_start;
      }
      if (rc == TRUE) {
	(void)parsenewline(lf, NULL, '\0');
	record_start = pos;
	parsenonnewline(lf);
	if (pos - record_start >= 511) {
	  *(record_start + 70) = '\0';
	  warn('F', "Ignoring long line in cache file starting\n%s",
	       record_start);
	  rc = FALSE;
	}
	if (pos == record_start)
	  rc = EOF;
      }
      if (rc == TRUE) {
	*(pos++) = '\0';
	if (isdigit(*record_start)) {
	  if (isdigit(*(record_start + 1))) {
	    temps = strchr(record_start, '\t');
	    if (temps == NULL)
	      corruptcacheline(record_start, &rc, 0);
	    else {
	      datecode = (unsigned int)atoi(record_start);
	      if (parsecacheline(record_start, ++temps, &code,
				 rep2gran[REP_FIVE], 2, data, &bys, NULL)) {
		timecode = (timecode_t)datecode * 1440 + (timecode_t)code * 5;
		if (timecode >= dman -> from && timecode <= dman -> to) {
		  datehash(timecode, dman, data[0], data[1], bys, granularity);
		  cachefilep -> from = MIN(cachefilep -> from, timecode);
		  cachefilep -> to = MAX(cachefilep -> to, timecode);
		  if (timecode > dman -> last7) {
		    cachefilep -> data[LOGDATA_SUCC7] += data[0];
		    cachefilep -> data[LOGDATA_PAGES7] += data[1];
		    cachefilep -> bytes7 += bys;
		  }
		}
	      }
	    }
	  }
	  else if (*(record_start + 1) == '\t') {
	    if (parsecacheline(record_start, record_start, &code, ITEM_NUMBER,
			       DATA_NUMBER, data, &bys, &mmq))
	      hashbuild(code, data, bys, &mmq, &(hash[code]), wanthead[code],
			ispagehead, aliashead[code], tree, alltrees,
			lowmem[code], dirsuffix, dirsufflength);
	  }
	  else
	    corruptcacheline(record_start, &rc, 0);
	}
	else if (*record_start == 'z' && *(record_start + 1) == '\t') {
	  if (parsecacheline(record_start, record_start + 2, &code, SIZEBINS,
			     3, data, &bys, NULL)) {
	    sizes -> reqs[code] += data[0];
	    sizes -> pages[code] += data[1];
	    sizes -> lastdate[code] = MAX(sizes -> lastdate[code], data[2]);
	    sizes -> bytes[code] += bys;
	  }
	}
	else if (*record_start == 'c' && *(record_start + 1) == '\t') {
	  if (parsecacheline(record_start, record_start + 2, &code, 600, 2,
			     data, NULL, NULL))
	    codescore(code, data[0], data[1]);
	}
	else
	  corruptcacheline(record_start, &rc, 0);
      }
    }
  }
}
