#include "sites.h"

st_type	Sites(int ntyp,int *len_elem, int nseq, int *len_seq)
/*******************************************************************
 Create and return site structure containing no sites. 
							nsites: A B C
  seq[1]    = ................................................. 0 0 0
  seq[2]    = ...........................................	0 0 0
     :
  seq[nseq] = ...............................................   0 0 0
*********************************************************************/
{
    int		n,t,max;
    st_type	S;

    if(ntyp > MAX_NO_TYPESITES) sites_error("Too many types.");
    NEW(S,1,sites_type);
    S->ntyp = ntyp;
    S->nseq = nseq;
    S->len_seq = len_seq;
    NEWP(S->type, nseq+1, char);
    NEW(S->pos, nseq+1, ol_type);
    for(max=0,n = 1; n <= nseq; n++){
	max = MAX(int,max,S->len_seq[n]);
	NEW(S->type[n],(len_seq[n]+1),char);
	S->pos[n] = Olist(S->len_seq[n]+1);
    }
    NEW(S->tmp,max+1,int);
    NEW(S->len_elem, ntyp+1, int);
    NEWP(S->nsites,ntyp+1,int);
    NEWPP(S->pos_prob,ntyp+1,double);
    for(t = 1; t <= ntyp; t++) {
        S->len_elem[t] = len_elem[t];
        NEW(S->nsites[t], nseq+1,int);
        NEWP(S->pos_prob[t],nseq+1,double);
	for(n = 1; n <= nseq; n++) {
		S->nsites[t][n] = 0;
        	NEW(S->pos_prob[t][n],(len_seq[n]+1),double);
	}
	if(t==1) S->maxinc = S->len_elem[t] - 1;
	else S->maxinc = MIN(int,S->maxinc,(S->len_elem[t]-1));
   }
   S->maxinc = MAX(int,1,S->maxinc);
   return S;
}

void	InitSites(st_type S)
/* inititalize S to contain no sites; i.e., vacate sites. */
{
    int		n,t,i;

    for(n = 1; n <= S->nseq; n++) {
	ClearOlist(S->pos[n]);
	for(i = 0; i<= S->len_seq[n]; i++) S->type[n][i] = VACANT;
    }
    for(t = 1; t <= S->ntyp; t++) {
	for(n = 1; n <= S->nseq; n++) {
	   S->nsites[t][n] = 0;
	   S->pos_prob[t][n][0] = (double) S->len_seq[n];
	   for(i = 1; i<= S->len_seq[n]; i++) S->pos_prob[t][n][i] = 1.0;
	}
   }
}

st_type CopySites(st_type S)
/* Create a Null "copy" of S; i.e., S2 has the same number and lengths of 
   sequences and the same number and lengths of elements - 
   WARNING: NO SITE ARE ADDED. */
{
	int     *len_elem,n,t,*len_seq;

	NEW(len_seq,S->nseq+1,int);
	for(n=1; n<=S->nseq; n++) len_seq[n] = S->len_seq[n];
	return Sites(S->ntyp,S->len_elem, S->nseq, len_seq);
}

void	NilSites(st_type S)
/* destroy sites structure S. */
{
    int	t,n;

   free(S->len_elem); free(S->len_seq); free(S->tmp);
   for(n = 1; n <= S->nseq; n++) { NilOlist(S->pos[n]); free(S->type[n]); }
   free(S->type); free(S->pos);
   for(t = 1; t <= S->ntyp; t++) {
	free(S->nsites[t]);
   	for(n = 1; n <= S->nseq; n++) free(S->pos_prob[t][n]);
   	free(S->pos_prob[t]);
   }
   free(S->pos_prob);
   free(S->nsites);
   free(S);
}

int	AddRandomSite(int t,int n, st_type S)
/* Add and return a random available type t site in sequence n */
{
	int	i,isite,newsite,seqlen,iter=0;
	double	r,ran;

	seqlen = S->len_seq[n] - S->len_elem[t] + 1;
	do {
	   do{
		  ran = (double) RandomNum();
		  r = ran/(double) LONG_MAX;
       		  newsite=(int)(r*(double) seqlen)+1;
	   } while(newsite > seqlen);
	   if(iter++ > 100000) 
		print_error("Too many sites. Try a smaller size.");
      	} while(OccupiedSite(t,n,newsite,S));
	AddSite(t,n,newsite,S);
	return newsite;
}

void	ShiftSitesM(st_type S, int t, int d)
/* shift all type t sites d spaces to the left(?). */
{
	int	i,c;
	boolean	left;

	if(d < 0){ c = '-'; d *= -1; left = FALSE;}
	else if(d > 0){ c = '+'; left = TRUE; }
	else return;
	fprintf(stderr,"[");
	for(i=1; i <=d; i++) {
		fprintf(stderr,"%c",c);
		ShiftSites(S, t, left);
	}
	fprintf(stderr,"]");
}

void	ShiftSites(st_type S, int t, boolean left)
/*********************** shift left *******************
 Shift all type t sites one position to the left or right.
 	WARNING: assumes that new positions are available.
 SHIFT LEFT:

     site 		  type[n][site] = VACANT ('o')
      |			  type[n][site + 1] = t; ('A')
      A   x   x   x   ...   x   o       	<- sequence n
    |+0 |+1 |+2 |+3 | ... |w-1| w |
      o   A   x   x   ...   x   x       	
          |		  type[n][site + w] = BLOCKED ('x')
        site + 1
	
 SHIFT RIGHT:

         site 		  type[n][site] = BLOCKED ('x')
          |		  type[n][site - 1] = t; ('A')
      o   A   x   x   ...   x   x       	<- sequence n
    |-1 |+0 |+1 |+2 | ... |w-2|w-1|
      A   x   x   x   ...   x   o       	
      |			  type[n][site + w - 1] = VACANT ('o')
     site - 1
*******************************************************/
{
	int	n,k,site;

    if(left){		/* free 1; 1..w-1 = 2..w; w = new */
	for(n=1; n<= S->nseq; n++){
           GetOlist(S->tmp,S->pos[n]);
	   for(k=1;(site=S->tmp[k]) != 0; k++){
		if(S->type[n][site]==t){
		   RmOlist(site,S->pos[n]);	   /* move pattern left */
		   S->type[n][site] = VACANT; 
		   S->type[n][site+1] = t;	 /* move over +1 */
		   if(S->type[n][site+S->len_elem[t]] != VACANT)
		   	sites_error("shift operation is blocked.");
		   S->type[n][site+S->len_elem[t]] = BLOCKED;
		   InsertOlist(site +1, S->pos[n]);
		}
	   }
	}
    } else {		/* free w; w..2 = w-1..1; 1 = new */
	for(n=1; n<= S->nseq; n++){
           GetOlist(S->tmp,S->pos[n]);
	   for(k=1;(site=S->tmp[k]) != 0; k++){
		if(S->type[n][site]==t){
		   RmOlist(site,S->pos[n]);	   /* move pattern left */
		   if(S->type[n][site-1] != VACANT)
			sites_error("shift operation is blocked.");
		   S->type[n][site-1] = t;	   /* move over -1 */
		   S->type[n][site] = BLOCKED;
		   S->type[n][site+S->len_elem[t]-1] = VACANT;
		   InsertOlist(site-1, S->pos[n]);
		}
	   }
	}
    }
}

void    GrowSites(int t, st_type S)
/*********************** grow right *************************
 Lengthen all tyep t sites one position to the right.
	 WARNING: assumes that new positions are available.
 GROW RIGHT:
     site 
      |
      A   x   x   x   ...   x   o    	<- sequence n
    |+0 |+1 |+2 |+3 | ... |w-1|	w |	 len_elem[t]++;
      A   x   x   x   ...   x   x       	
           		 type[n][site + w] = BLOCKED
***************************************************************/
{
	int	w,n,k,site;

	w = S->len_elem[t]++;		/* w = old length */
	for(n=1; n<= S->nseq; n++){
           GetOlist(S->tmp,S->pos[n]);
	   for(k=1;(site=S->tmp[k]) != 0; k++){
		if(S->type[n][site]==t){
			if(S->type[n][site + w] != VACANT)
				sites_error("grow operation is blocked.");
			S->type[n][site + w] = BLOCKED;
		}
	   }
	}
	for(S->maxinc=w+1,t=1; t<=S->ntyp; t++) 
		S->maxinc = MIN(int, S->maxinc,(S->len_elem[t]-1));
}

void    ShrinkSites(int t, st_type S)
/*********************** shift left *******************
 Shortens all type t sites one position on the right.
 SHRINK RIGHT:
     site 
      |
      A   x   x   x   ...   x   x      	<- sequence n
    |+0 |+1 |+2 |+3 | ... |w-2|w-1|		 len_elem[t]--;
      A   x   x   x   ...   x   o           	
           		 type[n][site + w - 1] = VACANT 

*******************************************************/
{
	int	w,n,k,site;

	w = S->len_elem[t]--;		/* w = old length */
	if(w<3) sites_error("cannot shrink element to length < 3");
	for(n=1; n<= S->nseq; n++){
           GetOlist(S->tmp,S->pos[n]);
	   for(k=1;(site=S->tmp[k]) != 0; k++){
		if(S->type[n][site]==t) S->type[n][site + w-1] = VACANT;
	   }
	}
	S->maxinc = MIN(int, S->maxinc,(S->len_elem[t]-1));
}

void	VacateSite(int t, int n, int site, st_type S)
/*********************** vacate site *******************
 Remove site in sequence n of type t by vacating all positions. 
     site 
      |
      A   x   x   x   ...   x   x      	<- sequence n
    |+0 |+1 |+2 |+3 | ... |w-2|w-1|	
      o   o   o   o   ...   o   o           	
        for p = site ... site + w - 1 -> type[n][p] = VACANT 
*******************************************************/
{
	int	p,end;

	if(S->type[n][site] != t){
		fprintf(stderr,"ELEMENT %c; seq %d; site %d\n",
			'A' +t -1, n,site);
		sites_error("attempt to remove site where none exists.");
	}
	RmOlist(site,S->pos[n]);
	end = site + S->len_elem[t] -1; 
	for(p=site; p <= end ; p++) S->type[n][p] = VACANT; 
	S->nsites[t][n]--;
}

void	AddSite(int t, int n, int site, st_type S)
/*************************** add site ********************************
 Add type t site in sequence n.
     site 
      |
      o   o   o   o   ...   o   o           	
    |+0 |+1 |+2 |+3 | ... |w-2|w-1|	
      A   x   x   x   ...   x   x      	<- sequence n
	type[n][site] = t  ('A')
        for p = site + 1 ... site + w - 1 -> type[n][p] = BLOCKED ('x') 
***********************************************************************/
{
	int	p,end;

	if(OccupiedSite(t, n, site, S)){
		fprintf(stderr,"ELEMENT %c; seq %d; site %d\n",
			'A' +t -1, n,site);
		sites_error("attempt to add site where one exists.");
	}
	InsertOlist(site, S->pos[n]);
	S->type[n][site] = t;
	end = site + S->len_elem[t] - 1; 
	for(p=site+1; p <= end ; p++) S->type[n][p] = BLOCKED; 
	S->nsites[t][n]++;
}

boolean OccupiedSite(register int t, register int n, register int site, 
	register st_type S)
/* determine if a site is blocked. */
{
	register int	p,end,inc;

	end = site + S->len_elem[t] - 1; 
	for(p=site; p<end ; p+=S->maxinc){if(S->type[n][p])return TRUE;}
	if(S->type[n][end]) return TRUE;
	return FALSE;
}

int     ChooseSite(int t, int n, st_type S)
/* sample a t site in sequence n of S. return site location 
   WARNING: Assumes that BLOCKED sites have zero probability. */
{
        double  rand_no, cum_prob;
        int     site,seqlen;

	seqlen = S->len_seq[n] - S->len_elem[t] + 1;
        rand_no  =  (double) RandomNum()/(double) LONG_MAX;
        rand_no *= S->pos_prob[t][n][0];
        for(site=1,cum_prob = 0.0; site <= seqlen; site++){
           if((cum_prob += (double) S->pos_prob[t][n][site]) >= rand_no){
        	AddSite(t,n,site,S);
        	return site;
	   }
	}
	sites_error("ChooseSite( ) - this should not happen!?");
}

void	GetSitePos(int ***site_pos, st_type S)
/* return a 3-dimensional array of the site positions = pos[t][n][s] */
{
	int t,n,site,i,s;

    for(n = 1; n <= S->nseq; n++){
	GetOlist(S->tmp,S->pos[n]);
        for(t = 1; t <= S->ntyp; t++){
	   for(s=1,i=1;(site=S->tmp[i]) != 0; i++){
		if(S->type[n][site]==t){ site_pos[t][n][s++] = site; }
	   }
	}
	
    }
}

double	MissInfoSites(int typ, st_type S)
/* Given a 2xarray of probabilities for sites occurs at each position 
   in each sequence return the missing position information in bits */
{
	double	term2,term3;
	double	n,r,d;
	double	**zeta,lambda;
	int	s,t,end;
	double  **pos_prob=S->pos_prob[typ];

	NEWP(zeta, S->nseq+1,double);
	for(s=1; s<=S->nseq; s++) {
	   NEW(zeta[s], S->len_seq[s],double);
	   end = S->len_seq[s] - SiteLen(typ,S) + 1;
	   for(d=0.0, t=1; t<= end; t++) d += pos_prob[s][t];
	   for(t=1; t<= end; t++)  zeta[s][t] = pos_prob[s][t]/d;
	}
	for(term2=0.0,s=1; s<=S->nseq; s++) {
		end = S->len_seq[s] - SiteLen(typ,S) + 1;
		lambda = 1.0/(double) end;
		for(t=1; t<= end; t++) 
			term2 += zeta[s][t]*log(lambda);
	}
	for(term3=0.0,s=1; s<=S->nseq; s++) {
	   end = S->len_seq[s] - SiteLen(typ,S) + 1;
	   for(t=1; t<= end; t++) 
		if(zeta[s][t]>0.0) term3 += zeta[s][t]*log(zeta[s][t]);
	}
	for(s=1; s<=S->nseq; s++) free(zeta[s]);
	free(zeta);
	return (-1.4427*(term2 - term3));
}

void	OrderSites(int n, int *order, st_type S)
/* modifies an array to give the order of the types of sites in
   sequence n.  WARNING: array is assumed to be long enough to 
   hold all sites in seq n. */
{
	int	s;
	GetOlist(S->tmp, S->pos[n]); 
	for(s=1; S->tmp[s] != 0; s++) order[s] = S->type[n][S->tmp[s]];
	order[s] = 0;
}

void	PosTSites(int t, int n, int *pos, st_type S)
/* modifies array pos to contain the positions of type t sites in seq. n */
{
	int	s,site,i;
	GetOlist(S->tmp, S->pos[n]); 
	for(i=s=1; (site=S->tmp[s]) != 0; s++) {
		if(S->type[n][site]==t) pos[i++] = S->tmp[s];
	}
	pos[i] = 0;
}

void	PosSites(int n, int *pos, st_type S)
/* modifies array pos to contain the positions of sites in sequence n */
{
	int	s;
	GetOlist(S->tmp, S->pos[n]); 
	for(s=1; S->tmp[s] != 0; s++) pos[s] = S->tmp[s];
	pos[s] = 0;
}

void    PutSites(FILE *fptr, st_type S)
/*  	e.g.    3: D(24)-A(35)-C(45)  */
{
	int i,n,t,tot;

	fprintf(fptr,"\n");
	for(n=1; n<=S->nseq; n++){
	   for(tot=0,t=1; t<=S->ntyp; t++) tot += S->nsites[t][n];
	   if(tot > 0){
	      fprintf(fptr,"%3d: ",n);
	      for(i=1; i<=S->len_seq[n]; i++){
		if(S->type[n][i] > 0){
		   t = S->type[n][i];
	   	   if(tot == 0) fprintf(fptr,"-",n); tot = 0;
		   if(S->ntyp <= 26){
			fprintf(fptr,"%c(%d)",('A'+t-1),i);
		   } else { fprintf(fptr,"%d(%d)",t,i); }
		}
	      }
	      fprintf(fptr,"\n");
	   }
	}
	fprintf(fptr,"\n");
}

void	sites_error(char *s){fprintf(stderr,"sites error: %s\n",s);exit(1);}

