/* showmesh.c

   12-FEB-1999, David Mathog, Biology Division, Caltech

   mathog@seqaxp.bio.caltech.edu

   Copyright Caltech and David Mathog, 1999.



   This software is free.  However, it may not be redistributed,

   or incorporated into other software or packages, either in part or 

   in its entirety,  without first obtaining permission from the author.



   V1.01 16-FEB-1999,  made name comparisons case insensitive.  If

   you want the old behavior, change all strcasecmp back to strcmp.



   ------------------------------------------------------------



   Showmesh is a program which examines the files in the relations

   directory of a GCG assembly project, and compares them to a file

   "ends.txt", which contains lines like:



   p234f,p234r !comment



   That is, pairs of sequences which came from the ends of a single

   clone, arbitrarily called "left" and "right".



   The program then finds all of the fragments in the ends file

   within the relation files of a GCG assembly project, and for

   each clone outputs a status line.  The status line tells where

   each end was found, or if an end was missing.  IntraContig means

   that both ends where within a single Contig.  InterMesh means that

   the ends were in different contigs, which should therefore be able

   to mesh together by filling in the the interior of that clone through

   directed sequencing.



   Program usage



   $ showmesh relations_directory ends_file output_file

 

   For more information, run the program with no arguments.



Compilation directions:



  VMS:  $ cc/standard=ansi89/prefix=all/warn=enable=all showmesh.c

        $ link showmesh.exe



  Irix: % cc -ansi -o showmesh showmesh.c



(not tested on Unix, should work though)





*/

#include <stdlib.h>

#include <stdio.h>

#include <string.h>

#include <strings.h> /* for strcasecmp() */

#include <dirent.h>



typedef struct isclone CLONENODE;

struct isclone {

  char * left;        /* name of the LEFT sequence */

  char * right;       /* name of the RIGHT sequence */

  char * c_left;      /* contig in which LEFT has been placed */

  char * c_right;     /* contig in which RIGHT has been placed */

  CLONENODE * last;   /* link to previous clone record, NULL if none */

  CLONENODE * next;   /* link to next clone record, NULL if none */

  };



CLONENODE * firstcnode=NULL;  /* pointer to first node in clone list */

CLONENODE * lastcnode=NULL;  /* pointer to last node in clone list */

CLONENODE * prevcnode=NULL;  /* pointer to next to last node in clone list */



void initcnode(CLONENODE *lastcnode){

  lastcnode->left    = NULL;

  lastcnode->right   = NULL;

  lastcnode->c_left  = NULL;

  lastcnode->c_right = NULL;

  lastcnode->last    = NULL;

  lastcnode->next    = NULL;

}



int main(int argc, char *argv[]){



char *relfilename;

char *endsfilename;

char *outfilename;

#define MAXINPLINE 1000

char cinpline[MAXINPLINE];

char *inpline=&cinpline[0];

char cfrag1[200];

char *frag1=&cfrag1[0];

char cfrag2[200];

char *frag2=&cfrag2[0];

char crelfilename[MAXINPLINE];

char *rrelfilename=&crelfilename[0];

char *cwork;

char *twork;

char *clonename;

FILE *relfile;

FILE *endsfile;

FILE *outfile;

struct dirent *dp;

DIR *  dir_pointer;

int  clonecount=0;



/* process the input parameters */



  if(argc != 4){

    (void)fprintf(stderr,"\n\n"\

    "$ showmesh relations_directory ends_file output_file\n\n"\

    "  Where:\n"\

    "    relations_directory is the path to the GCG relations directory\n"\

    "    end_file is the name/location of the ends file\n"\

    "    output_file (optional) is the output file\n\n\n"\

    "The input file records have the format:\n"\

    "  Left,right !comments\n"\

    "    Left is the name of the sequence from the left end of the clone\n"\

    "    Right is the sequence from the right end\n"\

    "    Comments is any string\n\n"\

    "    The cases of the names used for Left, and Right need not match those\n"\

    "       used in the assembly project itself\n\n"\

    "The output file records consist of 5 columns, which contain:\n"\

    "  Left sequence name\n"\

    "  Contig holding left sequence or MISSING\n"\

    "  Right sequence name\n"\

    "  Contig holding right sequence or MISSING\n"\

    "  Status for clone, one of:\n"\

    "    left_messing, right_missing, IntraContig, or IntraMesh\n\n");

    exit(EXIT_FAILURE);

  }



/* set up relations directory */



  relfilename=malloc(strlen(argv[1]) * sizeof(char));

  if(relfilename != NULL){

     (void) strcpy(relfilename,argv[1]);

  }

  else {

    (void)fprintf(stderr,"showmesh: fatal error: could not store relationsfile name\n");

    exit(EXIT_FAILURE);

  }



  for(twork=cwork=relfilename,twork++ ; *twork != '\0' ; cwork++,twork++){}





/* set up ends file */



  endsfilename=malloc(strlen(argv[2]) * sizeof(char));

  if(endsfilename != NULL){

     (void) strcpy(endsfilename,argv[2]);

  }

  else {

    (void)fprintf(stderr,"showmesh: fatal error: could not store ends file name\n");

    exit(EXIT_FAILURE);

  }



  (void) fprintf(stderr," about to open %s\n",endsfilename);

  endsfile=fopen(endsfilename,"r");

  if(endsfile == NULL){

    (void)fprintf(stderr,"showmesh: fatal error: could not open the ends file\n");

    exit(EXIT_FAILURE);

  }



/* load data from ends file */



  for(;fgets(inpline,MAXINPLINE,endsfile) != NULL;){

     if(*inpline != '!'){

       if(lastcnode != NULL)prevcnode=lastcnode;

       lastcnode=malloc(sizeof(CLONENODE));

       if(lastcnode == NULL){

         (void)fprintf(stderr,"showmesh: fatal error: out of memory while reading ends file\n");

         exit(EXIT_FAILURE);

       }

       if(firstcnode==NULL)firstcnode=lastcnode;

       initcnode(lastcnode);



       frag1 = strtok(inpline,"! ,   "); /*exclam,space, comma, and tab*/

       frag2 = strtok(NULL,"! ,   "); /*exclam,space, comma, and tab*/



       if(frag1 == NULL || frag2 == NULL){

         (void)fprintf(stderr,"showmesh: fatal error: bad line in ends file: %s\n",inpline);

         exit(EXIT_FAILURE);

       }



       lastcnode->left = malloc((1+strlen(frag1))*sizeof(char));

       lastcnode->right = malloc((1+strlen(frag2))*sizeof(char));

       if(lastcnode->left==NULL || lastcnode->right == NULL){

         (void)fprintf(stderr,"showmesh: fatal error: out of memory during read of ends file\n");

         exit(EXIT_FAILURE);

       }



       (void) strcpy(lastcnode->left,frag1);

       (void) strcpy(lastcnode->right,frag2);



       if(prevcnode != NULL){

          lastcnode->last = prevcnode;

          prevcnode->next = lastcnode;

       }



       clonecount++;

    }

  }

  (void) fprintf(stderr,"Read in ends for %d clones \n",clonecount);



/* set up output file */



  outfilename=malloc(strlen(argv[3]) * sizeof(char));

  if(outfilename != NULL){

     (void) strcpy(outfilename,argv[3]);

  }

  else {

    (void)fprintf(stderr,"showmesh: fatal error: could not store output file name\n");

    exit(EXIT_FAILURE);

  }



  outfile=fopen(outfilename,"w");

  if(outfile == NULL){

    (void)fprintf(stderr,"showmesh: fatal error: could not open a output file\n");

    exit(EXIT_FAILURE);

  }

  



/* */



  dir_pointer = opendir(relfilename);

  for (dp = readdir(dir_pointer); dp != NULL; dp = readdir(dir_pointer)){



     clonename = malloc((1+strlen(dp->d_name))*sizeof(char));

     if(clonename == NULL){

       (void)fprintf(stderr,"showmesh: fatal error: out of memory while processing contigs\n");

       exit(EXIT_FAILURE);

     }

     (void) strcpy(clonename,dp->d_name);



     (void) strcpy(rrelfilename,relfilename);

     switch (*cwork){

       case ':':     /* VMS logical name */

       case ']':     /* VMS directory specification */

         (void) strcat(rrelfilename,dp->d_name);

         break;

       default:      /* Unix directory specification */

         (void) strcat(rrelfilename,"/");

         (void) strcat(rrelfilename,dp->d_name);

         break;

     }

  

     relfile = fopen(rrelfilename,"r");

     if(relfile == NULL){

       (void) fprintf(stderr,"showmesh: fatal error: could not open relations file: %s\n",rrelfilename);

       exit(EXIT_FAILURE);

     }



/* scan down, ignoring lines up to ".." */



     for(;fgets(inpline,MAXINPLINE,relfile) != NULL;){

       if(strstr(inpline,"..") != NULL) break;

     }



/* process the rest of the lines */



     for(;fgets(inpline,MAXINPLINE,relfile) != NULL;){

       frag1 = strtok(inpline,"! ,   "); /*exclam,space, comma, and tab*/

       if(frag1 == NULL){

         (void) fprintf(stderr,"showmesh: fatal error: bad line in relationfile: %s\n",rrelfilename);

         (void) fprintf(stderr,"showmesh: fatal error: line is: %s\n",inpline);

         exit(EXIT_FAILURE);

       }

       for(lastcnode=firstcnode; lastcnode != NULL; lastcnode=lastcnode->next){

         if(lastcnode->c_left == NULL){

           if(strcasecmp(lastcnode->left,frag1)==0)lastcnode->c_left = clonename;

         }

         if(lastcnode->c_right == NULL){

           if(strcasecmp(lastcnode->right,frag1)==0)lastcnode->c_right = clonename;

         }

       }

     }

/* */



     if(fclose(relfile) != 0){

       (void) fprintf(stderr,"showmesh: fatal error: could not close relations file: %s\n",rrelfilename);

       exit(EXIT_FAILURE);

     }

     else {

        (void) fprintf(stderr,"opened file %s\n",dp->d_name);

     }

  }

  (void) closedir(dir_pointer);



/* Now look through the cnode list and output only those which have ends in 

different contigs */



  for(lastcnode=firstcnode; lastcnode != NULL; lastcnode=lastcnode->next){

    if(lastcnode->c_left == NULL){

      if(lastcnode->c_right == NULL){

        (void) fprintf(outfile,"%15.15s %15.15s %15.15s %15.15s both_missing\n",

           lastcnode->left,"MISSING",

           lastcnode->right,"MISSING");

      }

      else {

        (void) fprintf(outfile,"%15.15s %15.15s %15.15s %15.15s left_missing\n",

           lastcnode->left,"MISSING",

           lastcnode->right,lastcnode->c_right);

      }

    }

    else {

      if(lastcnode->c_right == NULL){

        (void) fprintf(outfile,"%15.15s %15.15s %15.15s %15.15s right_missing\n",

           lastcnode->left,lastcnode->c_left,

           lastcnode->right,"MISSING");

      }

      else {

        if(strcasecmp(lastcnode->c_right,lastcnode->c_left) != 0){ 

         (void) fprintf(outfile,"%15.15s %15.15s %15.15s %15.15s IntraMesh\n",

           lastcnode->left,lastcnode->c_left,

           lastcnode->right,lastcnode->c_right);

        }

        else {

         (void) fprintf(outfile,"%15.15s %15.15s %15.15s %15.15s IntraContig\n",

           lastcnode->left,lastcnode->c_left,

           lastcnode->right,lastcnode->c_right);

        }

      }

    }



  }



} /* end of main() */

