/* foremost.c
 *
 * (C) Copyright 2001 Air Force Office of Special Investigations 
 *
 * This program is free software; you can redistribute it and/or modify
 * it under the terms of the GNU General Public License as published by
 * the Free Software Foundation; either version 2 of the License, or
 * (at your option) any later version.
 *
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU General Public License for more details.
 *
 * You should have received a copy of the GNU General Public License
 * along with this program; if not, write to the Free Software
 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
 */

#include "foremost.h"


void usage() {
  fprintf (stdout,FOREMOST_USAGE_STRING);
}


/* catch_alarm and clean_up are used for signal handling...
   If the user sends a SIGINT (CTRL-C) or SIGTERM (kill) we 
   would like to have a chance to finish the current operation
   and close the audit file.

   catch_alarm is the signal handler, which sets the global variable
   signal_caught.

   At the top of each loop through digBuffer and digImageFile we check
   to see if signal_caught has been set.  If so, we call clean_up.
 */

void catch_alarm (int signum) {
  signal_caught = signum;
  signal(signum,catch_alarm);

#ifdef DEBUG
  fprintf(stderr,"\nCaught signal: %s.\n",(char*) strsignal(signum));
#endif

  fprintf (stderr, "\nKill signal detected. Cleaning up...\n");
}

void clean_up (struct foremostState* state, int signum) {
  fprintf(stderr,"Cleaning up...\n");
  fprintf(state->auditFile,
	  "\nCaught signal: %s. Program is terminating early\n",
	  (char*) strsignal(signum));  
  if(closeFile(state->auditFile)){
    fprintf(stderr,"Error closing %s/audit.txt -- %s",state->outputdirectory,
	    (char*) strerror(ferror(state->auditFile))); 
  }  
  exit(1);
}



/* display Position: Tell the user how far through the infile we are */
int displayPosition(int *units,
		    unsigned long long pos,
		    unsigned long long size, 
		    char *fn) {
  
  double percentDone = (((double)pos)/(double)(size) * 100);
  double position = pos;
  int count;
  
  /* We don't use MAX_STRING_LENGTH because we're just printing the
     units of how much of the file we've read. At worst case, this is
     the string "bytes\0" == 6 characters. */
  char buf[7];

  /* First, reduce the position to the right units */
  for (count = 0; count < *units; count++) {
    position = position / 1024;
  }
  
  /* Now check if we've hit the next type of units */
  while (position > 1023) {
    position = position / 1024;
    (*units)++;
  }
		       
  switch (*units) {

  case UNITS_BYTES:
    sprintf(buf,"bytes");    break;
  case UNITS_KILOB:
    sprintf(buf,"KB");       break;
  case UNITS_MEGAB:
    sprintf(buf,"MB");       break;
  case UNITS_GIGAB:
    sprintf(buf,"GB");       break;
  case UNITS_TERAB:
    sprintf(buf,"TB");       break;
  case UNITS_PETAB:
    sprintf(buf,"PB");       break;
  case UNITS_EXAB:
    sprintf(buf,"EB");       break;

  default:
    
    /* Steinbach's Guideline for Systems Programming:
         Never test for an error condition you don't know how to handle.
	 
       Because we're going to hit this time and time again, we should
       humbly admit our failure at this point. We might want to be silly
       and chastise the user for trying to process an 1.0+ Exabyte file with
       this program, but we'll take the moral high ground. */
    
    fprintf (stdout, "Unable to compute progress.\n");
    return FOREMOST_OK;
  }
  
  fprintf (stdout,"\n%s: %5.1f%% done (%.1f %s read)\n", 
	   fn, percentDone, position, buf);
  return FOREMOST_OK;
}




/*
  digBuffer: looks for any of our tags within a buffer and extracts and writes 
             them to disk...
  Args:
  state           -- The foremost state object
  buf             -- the buffer to dig
  infile          -- the file we are reading
  shortestneedle  -- shortest needle we are looking for
  longestneedle   -- longest needle we are looking for
  maxchars        -- size of the largest extraction we would make
  lengthofbuf     -- length of buf
  offset          -- where does buf start within the infile
*/

int digBuffer(struct foremostState* state, char* buf, FILE* infile, 
	      int shortestneedle, int longestneedle, int maxchars,
	      unsigned long long lengthofbuf, unsigned long long offset) {
  
  unsigned long long startLocation = 0;
  struct CharBucket extractbuf = {0,NULL};
  struct SearchSpecLine matchedtype;
  static struct NeedlesinHaystack_ret currnh = {-1,NULL};
  struct NeedlesinHaystack_ret* nhret = &currnh;
    
  extractbuf.str = (char*) malloc(maxchars*sizeof(char));
  
  /* Repeatedly call needlesinhaystack to move through the buffer finding
     matches for the filetypes we are interested in...
     nhret->foundat points to the section within buf that matched a needle.
     nhret->needlenum tells us which needle matched at nhret->foundat.
     */
  
  nhret->foundat = buf;
  while (nhret->foundat){
    
    /*  First check to make sure we haven't caught a signal */
    if (signal_caught == SIGTERM || signal_caught == SIGINT){
      clean_up(state,signal_caught);
    }
    
    /*  Go on with searching...  */
    nhret = (struct NeedlesinHaystack_ret*) 
      needlesinhaystack(state->SearchSpec, 
			nhret->foundat, 
			lengthofbuf-(nhret->foundat-buf), 
			state->modeQuick);
    
    if(nhret->foundat != NULL && nhret->foundat >= 0){
      
      /*  We've found a header!
	  now we extract it into extractbuf and write to a file. */
      
      matchedtype = state->SearchSpec[nhret->needlenum];
      startLocation = offset + (nhret->foundat-buf);	

      /* Zero out the extraction buffer before we load new data into it */
      memset(extractbuf.str,'\x00',maxchars*sizeof(char));
      extractbuf.length = 0;
      
      /* The "startLocation" variable represents the position of the 'found' 
	 file inside of the image we're digging. Originally this
	 function had a "startLocation + 1" in the function call, which 
	 skewed the on-screen output. I've removed the +1  (JK) */

      if (state->modeVerbose) {
	printf("A %s was found at: %Ld\n",
	       matchedtype.suffix,startLocation);
      }
            
      extractString(&extractbuf,startLocation,infile,matchedtype);
      
      if (writeToDisk(matchedtype.suffix,state,&extractbuf,startLocation)) {
	return FOREMOST_ERROR_FILE_WRITE;	  
      }
      
      /* Skip forward past the header we just found and move on...
	 If we are in quick mode we skip to the next sector boundary */
      if (state->modeQuick){
	nhret->foundat = nhret->foundat + FOREMOST_BLOCK_SIZE;
      }else{
	nhret->foundat = nhret->foundat + matchedtype.beginlength;
      }
    }
  }      
  free(extractbuf.str);  
  return FOREMOST_OK;
}



/* digImageFile is the engine for the program. It uses the foremostState
   variable passed to it to find the image file to use, looks for the
   specified headers to find, and writes any files it finds to the disk
   Return values are defined in foremost.h                                */

int digImageFile(struct foremostState* state) {
  
  FILE* infile;
  int longestneedle = 0, shortestneedle = 0;
  unsigned long long filesize = 0, bytesread = 0, 
    fileposition = 0, filebegin = 0, beginreadpos = 0;
  long maxchars=0, err=0;
  int status, displayUnits = UNITS_BYTES;
  char *buf = (char*) malloc(SIZE_OF_BUFFER*sizeof(char));
  
  if (state->SearchSpec[0].suffix == NULL) {
    fputs("No valid extensions were found in your search specification.",
	  stderr);
    free(buf);
    return FOREMOST_ERROR_NO_SEARCH_SPEC;
  }
  
  /* Figure out which is the longest needle and which is the shortest...
     We also need to know which needle snarfs the most data, so we can set 
     extractbuf.str to be that size */
  
  longestneedle  = findLongestNeedle  (state->SearchSpec);
  shortestneedle = findShortestNeedle (state->SearchSpec);
  maxchars       = findLongestLength  (state->SearchSpec);
    
  /*  Now we're ready to open the image file and start digging */
  infile = fopen(state->imagefile,"r");
  if (infile == NULL){
    fprintf(stderr, "Couldn't open input file: %s -- %s\n", 
	    (*(state->imagefile)=='\0')?"<blank>":state->imagefile,
	    strerror(errno));
    free(buf);
    return FOREMOST_ERROR_FILE_OPEN;
  }
  fcntl(fileno(infile),F_SETFL, O_LARGEFILE);

  
  /* Seek to the position specified in state->skip... */
  if(state->skip > 0){
    if ((fseeko(infile,state->skip,SEEK_SET))) {

      fprintf(stderr,
	      "Couldn't skip %Ld bytes at the beginning of image file %s\n",
	      state->skip,state->imagefile);
      return FOREMOST_ERROR_FILE_READ;
    }

    else {
      fprintf(stderr,"Skipped the first %Ld bytes of %s...\n",state->skip,
	      state->imagefile);
    }
    
  }
  
  filebegin = ftello(infile);
  
  if ((filesize = measureOpenFile(infile)) == -1) {
    fprintf (stderr,
	     "Couldn't measure size of image file %s\n", 
	     state->imagefile);
    return FOREMOST_ERROR_FILE_READ;
  }
  
  if (state->modeVerbose) {
    fprintf (stdout, "Total file size is %Ld bytes\n", filesize);
  }
  
  
  /* Now we can get started reading the image file.
     We read SIZE_OF_BUFFER bytes into memory and then
     look for headers    */   
  
  while((bytesread = fread(buf,1,
			   SIZE_OF_BUFFER,infile)) > longestneedle-1){

    /* Check for read errors */
    if ((err = ferror(infile))) {

      fprintf (stderr, 
	       "A read error occured at position %Ld -- %s.\nAborting.\n",
	       (long long int)ftello(infile),strerror(err));
      free(buf);
      return FOREMOST_ERROR_FILE_READ;      
    }
    
    /* Find out where we are in the file and tell the user */
    fileposition = ftello(infile);
    displayPosition(&displayUnits,fileposition-filebegin,filesize,state->imagefile);
    beginreadpos = fileposition - bytesread;
    
    /* Check to see if the program has been interrupted by the user */
    if (signal_caught == SIGTERM || signal_caught == SIGINT)
      clean_up(state,signal_caught);
    
    /* Let's dig through the current buffer and write the results to disk */
    status = digBuffer(state,buf,infile,shortestneedle,longestneedle,
		       maxchars,bytesread,beginreadpos);
    
    if (status != FOREMOST_OK) {

      /* Uh oh. Something bad happened. Maybe somebday we should have 
	 a nicer error handler */
      return status;
    }
    
    /* We seek back a little bit in the file so that we don't miss
       any headers that bridge the gap between buffers */
    fseeko(infile, ftello(infile) - (longestneedle - 1), SEEK_SET);

  }  

  closeFile(infile);
    
  /* Clean up the memory we're using here */
  free(buf);
  return FOREMOST_OK;
}

int readSearchSpecFile(struct foremostState *state) {

  int i=0, currline=0,len=0;
  FILE *f;


  /* The buffer holds one line of the input file at a time
     The length should be more than enough for the whitespace on one line */

  char* buffer = malloc(    MAX_SUFFIX_LENGTH  * sizeof(char) +
                       (2 * MAX_STRING_LENGTH) * sizeof(char) +
		                            16 * sizeof(char) + 
		                           256 * sizeof(char) + 1);
		                              
  char* buf;
  char* token;
  char** tokenarray = malloc(6*sizeof(token));

  
  f = fopen(state->conffile,"r");  
  if (f == NULL) {
    fprintf (stderr,
	     "ERROR: Couldn't open configuration file: %s -- %s\n", 
	     state->conffile,strerror(errno));
    return FOREMOST_ERROR_FILE_OPEN;
  }
  
  /*
    We read through the file one line at a time
    skipping any lines that start with # or contain
    no tokens (as delimited by tabs and spaces...)
  */
    
  while(fgets(buffer,MAX_SUFFIX_LENGTH+2*MAX_STRING_LENGTH+16+256,f)){
    buf = buffer;
    i=0;

    len = strlen(buffer);
    
    if(buffer[len-2] == 0x0d && buffer[len-1] == 0x0a){
      buffer[len-2] = buffer[len-1];
      buffer[len-1] = buffer[len];
#ifdef DEBUG
      fprintf(stderr,"Found a CTL-M before a newline....Chomping the CTL-M\n");
#endif
      
    }

    buf = (char*) skipWhiteSpace(buf);
    token = strtok(buf," \t\n");

    /* Comments start with #, skip any line that starts with # */
    if(token == NULL || token[0] == '#'){  
      continue;
    }
    
    /* Check for the wildcard line */
    if (strcasecmp(token,"wildcard") == 0){
      token = strtok(NULL," \t\n");
      if (token != NULL){
	translate(token);
      }
      if (strlen(token) > 1){
	fprintf(stderr,"Invalid argument for wildcard option.\nWildcard must be one character long. Skipping custom wildcard\n");
      } 
      wildcard = token[0];
      continue;
    }
    
    /* Process a normal line with 3-4 tokens on it
       token[0] = suffix
       token[1] = case sensitive
       token[2] = size to snarf
       token[3] = begintag
       token[4] = endtag (optional)
    */
    currline++;
    while(token && (i < NUM_SEARCH_SPEC_ELEMENTS)){
      tokenarray[i] = token;
      i++;
      token = strtok(NULL," \t\n");
    }
    if(!(i==NUM_SEARCH_SPEC_ELEMENTS)){
      if (i==NUM_SEARCH_SPEC_ELEMENTS-1){
	tokenarray[NUM_SEARCH_SPEC_ELEMENTS-1] = "";
      }else{
	fprintf(stderr, "Error in line %d of search spec file:\n %d or %d tokens expected, %d tokens found",
		currline,NUM_SEARCH_SPEC_ELEMENTS,NUM_SEARCH_SPEC_ELEMENTS,i);
	return FOREMOST_ERROR_NO_SEARCH_SPEC;
      }
    }  
    
    /* Allocate the memory for these lines.... */
    state->SearchSpec[currline-1].suffix = malloc(MAX_SUFFIX_LENGTH*sizeof(char));
    state->SearchSpec[currline-1].begin = malloc(MAX_STRING_LENGTH*sizeof(char));
    state->SearchSpec[currline-1].end = malloc(MAX_STRING_LENGTH*sizeof(char));    
    
    /* Assign the current line to the SearchSpec object */
    memcpy(state->SearchSpec[currline-1].suffix,tokenarray[0],MAX_SUFFIX_LENGTH);
    
    if (tokenarray[1][0] == 'y' || tokenarray[1][0] == 'Y' || !strcasecmp(tokenarray[1],"yes")){
      state->SearchSpec[currline-1].casesensitive = 1;
    }else{
      state->SearchSpec[currline-1].casesensitive = 0;
    }
      
    
    state->SearchSpec[currline-1].length = atoi(tokenarray[2]);

    /* Translate returns the length of the translation, and by side-effect
       translates its arg..... */
    state->SearchSpec[currline-1].beginlength = translate(tokenarray[3]);
    memcpy(state->SearchSpec[currline-1].begin,tokenarray[3],MAX_STRING_LENGTH);

    state->SearchSpec[currline-1].endlength = translate(tokenarray[4]);
    memcpy(state->SearchSpec[currline-1].end,tokenarray[4],MAX_STRING_LENGTH);

  }
  /* Fill in one last null object so we can loop later without worrying
     about dereferencing a NULL pointer...  */
  state->SearchSpec[currline].suffix = NULL;
  state->SearchSpec[currline].casesensitive = 0;
  state->SearchSpec[currline].length = 0;
  state->SearchSpec[currline].begin = NULL;
  state->SearchSpec[currline].beginlength = 0;
  state->SearchSpec[currline].end = NULL;
  state->SearchSpec[currline].endlength = 0;
  
  fclose(f);
  free(buffer);
  return FOREMOST_OK;
}


/* 
   The default state values are:
     Output to the current directory
     Use configuration file "foremost.conf" from the current directory
*/   
   
void setStateDefaults(struct foremostState *state) {
  strncpy(state->outputdirectory,".",3);
  strcpy(state->conffile,FOREMOST_DEFAULT_CONFIG_FILE);
}


void copyArgvtoState(char** argv, struct foremostState *state){
  char** argvcopy = argv;
  do{
    strncat(state->invocation,  
	    *argvcopy, 
	    MAX_STRING_LENGTH-strlen(state->invocation));
    strncat(state->invocation,
	    " ",
	    MAX_STRING_LENGTH-strlen(state->invocation));
    ++argvcopy;  
  } while (*argvcopy);  
}


int main (int argc, char **argv){

  char c;
  int userspecifiedoutdir = 0;
  int i = 0, j=0; 
  int readlistoffilesfromfile = 0;
  FILE* listoffiles = NULL;
  char* filename = NULL;  

  struct foremostState state = {
    (char*) malloc(MAX_STRING_LENGTH * sizeof(char)),       // imagefile
    (char*) malloc(MAX_STRING_LENGTH * sizeof(char)),       // conffile
    (char*) malloc(MAX_STRING_LENGTH * sizeof(char)),       // outputdir
    (struct SearchSpecLine*) malloc(MAX_FILE_TYPES*(3*sizeof(char *)+3*(sizeof(int)))), // specline
    0,                                                      // fileswritten
    0,                                                      // modeVerbose
    NULL,                                                   // auditFile
    (char*) malloc(MAX_STRING_LENGTH * sizeof(char)),       //invocation str  
    0,                                                      //num bytes to skip
    0                                                       // modeQuick
 }; 


  /* Before we do *anything*, we must check that SIZE_OF_BUFFER is
     divisible by FOREMOST_BLOCK_SIZE. If it's not quick mode won't work. 
     This should only happen if somebody messes with foremost.h */

  if (ldiv(SIZE_OF_BUFFER,FOREMOST_BLOCK_SIZE).rem != 0) {
    fprintf (stderr, FOREMOST_SIZEOFBUFFER_PANIC_STRING);
    exit (-1);
  }

  fprintf (stdout,FOREMOST_BANNER_STRING);

  /* Initialize our global variables */
  signal_caught = 0;
  wildcard = FOREMOST_DEFAULT_WILDCARD;

  /* Initialize the state variable */
  setStateDefaults(&state);    
  copyArgvtoState(argv,&state);

  while ((i = getopt(argc, argv, "hvVqf:c:o:s:i:")) != -1)
    {
      switch (i)
	{

	case 'V':
	  
	  fprintf (stdout,FOREMOST_COPYRIGHT_STRING);
	  exit (1);

	case 'h':

	  usage();
	  exit (1);
	  
	case 'v':
	  
	  state.modeVerbose = 1;
	  fprintf (stdout,"Verbose mode on\n");
	  break;
	  
	case 'q':

	  state.modeQuick = 1;
	  fprintf (stdout,"Quick mode on\n");
	  break;

	case 's':
	  
	  state.skip = strtoull(optarg,NULL,10);
	  fprintf (stdout,"Skipping the first %Ld bytes of each infile\n",
		   state.skip);
	  break;

	  
	case 'c':

	  /* If we use something from argv, we can free what we
	     malloc()'ed ourselves */
	  free (state.conffile);
	  state.conffile = optarg;
	  break;
	  
	case 'o':
	  
	  /* If we use something from argv, we can free what we
	     malloc()'ed ourselves */
	  free(state.outputdirectory);
	  state.outputdirectory = optarg;

	  fprintf(stdout, 
		  "Using output directory: %s\n",
		  state.outputdirectory);

	  userspecifiedoutdir = 1;
	  break;
	  
	case 'i':
	  readlistoffilesfromfile = 1;
	  filename = optarg;  
	  break;	  
	}
    }
  
  /* We're done with the command line switches. Anything else that's on
     the commmand line represents image files the user wants us to
     process. Now let's read the user's configuration file. */

  fprintf (stdout,"Using configuration file: %s\n", state.conffile);
  if (readSearchSpecFile(&state)) {
    exit(-1);
  }
  

  argv += optind;

  if (*argv != NULL || readlistoffilesfromfile) {
    
    /* Check with the user before using the default output directory */
    if (!userspecifiedoutdir){
      fprintf(stdout,"You didn't specify an output directory, using default: %s\n",state.outputdirectory);
      fprintf(stdout,"Is this OK? (Y/N)\n");
      c = getc(stdin);
      if(c != 'y' && c != 'Y'){
	exit(-1);
      }
    }
    
    /* Open the audit file */
    if(openAuditFile(&state)){
      fprintf (stderr, "Couldn't create file \"audit.txt\" in %s. Aborting.\n\n",state.outputdirectory);
      exit(-1);
    }

    /* Register the signal-handler that will write to 
       the audit file and close it if we catch a SIGINT or SIGTERM */
    if(signal (SIGINT, catch_alarm) == SIG_IGN)
      signal (SIGINT, SIG_IGN);
    if(signal (SIGTERM,catch_alarm) == SIG_IGN)
      signal (SIGTERM, SIG_IGN);
    
    /* Now we finally start reading the image files */
    
    if (readlistoffilesfromfile){
      fprintf(stdout, "Using batch mode: reading list of files from %s instead of command line",filename);
      listoffiles = fopen(filename,"r");
      if (listoffiles == NULL){
	fprintf(stderr, "Couldn't open file: %s -- %s\n", 
		(*(filename)=='\0')?"<blank>":filename,strerror(errno));
	exit(-1);
      }
      j=0;
      do {
	j++;
	
	/* Because we don't use the return value for anything else,
	   I removed the 'temp' variable (and therefore the compiler
	   warning about not casting it to an int value)  -- Jesse */

	if (fgets(state.imagefile,MAX_STRING_LENGTH,listoffiles) == NULL) {
	  
	  fprintf(stderr,"Error reading line %d of %s. Skipping line.\n", j,filename);
	  continue;
	}
	if(state.imagefile[strlen(state.imagefile)-1] == '\n'){
	  state.imagefile[strlen(state.imagefile)-1] = '\x00';
	}
	fprintf(stdout,"Opening %s.\n", state.imagefile);

	if ((i = digImageFile(&state))) {
	  handleError(&state,i);
	}
      } while (!feof(listoffiles));
      closeFile(listoffiles);
    }
    else{
      do {
      
	state.imagefile = *argv;
	fprintf(stdout,"Opening %s.\n", state.imagefile);

	if ((i = digImageFile(&state))) {
	  handleError(&state,i);
	}		

	++argv;  
      } while (*argv);
    }
    
    closeFile(state.auditFile);
  }

  else {      
    fprintf(stderr,"\nError: No image files specified.\n");
    usage();
    
  }
  

  printf ("\nForemost is done.\n");
  
  /* We don't do any free() calls here because we're about to exit.
     The system will automatically return all used memory. Yes, this
     is lazy and bad, but I'm lazy and bad too. It all works out.         */

  return i;
}









