#include "data.h"

p_type	Pop1(char *filename, e_type E,a_type A)
/* create a population of one sequence E */
{
	p_type	P;
	int	s,r,nsize[MAX_NUMBER_SEQS+1];

	NEW(P,1,population_type);
	P->name = String(filename); P->A = A;
	P->nent = 1; 
	NEW(P->entity,2,e_type); 
	NEW(P->counts,nAlpha(A)+1,long);
	P->entity[1]=E;
	for(s=1; s<= (int) LenSeq(E); s++){
		r = ResSeq(s,E);
		P->counts[r]++;
	}
	P->max_leng = P->min_leng = P->total = LenSeq(E);
	P->tfreq = NULL;
	calcpopfreq(P);
	return (P);
}

p_type	Pop(char *filename,a_type A) { return population(filename,A); }

p_type	Pop_fptr(FILE *fptr,a_type A) 
{ return fptr_population(fptr,A); }

p_type	fptr_population(FILE *fptr,a_type A) 
/* create a population from the input file with segment length k and */
/* alphabet A. */
{
	p_type	P;
	int	i,s,r,nsize[MAX_NUMBER_SEQS+1];
	e_type	E;

	NEW(P,1,population_type);
	NEW(P->name,25,char);
	strcpy(P->name,"temp_file"); P->A = A;
	P->nent = count_pop_entities(fptr,P,nsize); 
	NEW(P->entity,P->nent+1,e_type); 
	NEW(P->counts,nAlpha(A)+1,long); P->total = 0;
	rewind(fptr);
	for(i=1; i<=P->nent; i++){
	   E = ReadSeq(fptr,i,nsize[i],A);
	   P->entity[i]=E;
	   for(s=1;s<=(int)LenSeq(E);s++){r=ResSeq(s,E);P->counts[r]++;}
	   P->total += LenSeq(E);
	}
	P->tfreq = NULL; calcpopfreq(P); 
	return (P);
}

p_type	population(char *filename,a_type A) 
/* create a population from the input file with segment length k and */
/* alphabet A. */
{
	p_type	P;
	FILE	*fptr;
	int	i,s,r,nsize[MAX_NUMBER_SEQS+1];
	e_type	E;

	NEW(P,1,population_type);
	NEW(P->name,strlen(filename)+2,char);
	strcpy(P->name,filename); P->A = A;
	fptr = OpenPopFile(P);
	P->nent = count_pop_entities(fptr,P,nsize); 
        fclose(fptr);
	NEW(P->entity,P->nent+1,e_type); 
	NEW(P->counts,nAlpha(A)+1,long); P->total = 0;
	fptr = OpenPopFile(P);
	for(i=1; i<=P->nent; i++){
	   E = ReadSeq(fptr,i,nsize[i],A);
	   P->entity[i]=E;
	   for(s=1;s<=(int)LenSeq(E);s++){r=ResSeq(s,E);P->counts[r]++;}
	   P->total += LenSeq(E);
	}
	fclose(fptr); P->tfreq = NULL; calcpopfreq(P); 
	return (P);
}

FILE    *OpenPopFile(p_type P)
{
        FILE    *fptr;
        if((fptr = fopen(P->name,"r")) == NULL) {
                fprintf(stderr,"Could not open file \"%s\"\n",P->name);
                pop_error("File does not exist!\n");
        }
        return fptr;
}

p_type	NilPop(p_type P)
{
	int i;
	for(i=1; i<=P->nent; i++){ NilSeq(P->entity[i]); }
	free(P->name);
	free(P->entity);
	free(P->counts);
	if(P->tfreq != NULL) free(P->tfreq); 
	free(P);
	return (p_type) NULL;
}

double  LogL0Pop(p_type P)
{
        double  *freq,L0,n,r;
        int     b;

        freq = tFreqPop(P);
        for(L0=0.0, b=1; b<= nAlpha(P->A); b++){
                    if(CountsPop(b,P) > 0){
                        n = (double) CountsPop(b,P);
                        r = freq[b];
                        L0 += n * log(r);
                    }
        }
        return (1.4427*L0);
}

int	*PopSeqLengths(p_type P)
/* returns an array containing the sequence lengths */
{
	int	*len_seq,n;

	NEW(len_seq,NSeqsPop(P) +1,int);
	for(n=1; n<= NSeqsPop(P); n++)len_seq[n]=LenSeqP(n,P);
	return len_seq;
}

/******************** Counting and Numbering Operations *******************/
int     count_pop_entities(FILE *fptr,p_type P, int nsize[])
{
        int i=0,length; char c; 

	P->max_leng = 0; P->min_leng = 1000000; 
	while((c=fgetc(fptr))!=EOF){ if(c=='>') break; }
        for(i=1,length=0;c!=EOF;length=0,i++) { 
		if(c=='>') while((c=fgetc(fptr))!=EOF){ if(c=='\n') break; }
		while(c!='>') {
		   if(isalpha(c)) length++;
		   else if(!isspace(c)) {
			fprintf(stderr,"seq %d: illegal character -> %c",i,c);
			while((c=fgetc(fptr)) != EOF) {
				fprintf(stderr,"%c",c);
				if(c == '\n') break;
			}  
			fprintf(stderr,"\n");
		   } 
           	   if((c=fgetc(fptr))==EOF) break; 
	     	}
		if(i >= MAX_NUMBER_SEQS) 
		   pop_error("too many sequences; reset MAX_NUMBER_SEQS");
	   	P->max_leng = MAX(int,P->max_leng,length);
		P->min_leng = MIN(int,P->min_leng,length);
		nsize[i] = length;
	}
	i--;
        return i;
}

p_type	PutPop(FILE *fptr,p_type P)
{
	fprintf(fptr,"\n  input file:\n");
	fprintf(fptr,"\tname: \"%s\"\n\ttotal sequences: %d",
			P->name,P->nent);
	fprintf(fptr,"\n\tsequence lengths: %d-%d residues\n",
		       P->min_leng,P->max_leng);
	return P;
}

/*********************** Put Pop Entities Operations **********************/
p_type	PutPopEs(FILE *fptr,p_type P)
/* print all sequence entities in population using fasta format */
{
	int     i;
	for(i=1;i<=P->nent; i++) {
 	   if(SeqI(P->entity[i]) !=0) PutPopE(fptr, i,P);
	}
	fprintf(fptr,"\n\n");
	return P;
}

p_type	PutPopE(FILE *fptr, int i, p_type P) /* print the ith entity */
{
	e_type	E;

	if(i <= P->nent && i > 0) { E = P->entity[i]; PutSeq(fptr,E,P->A); }
	return P;
}

p_type	PutPopPIDs(FILE *fptr, p_type P)
/* print entity ids for selected entities */
{
	e_type	E;
	int     i;
	for(i=1;i<=P->nent;i++) {
	   E = P->entity[i];
 	   if(SeqI(E) != 0){
		fprintf(fptr,"#%-3d ",SeqI(E));
		PutSeqID(fptr,E);
	   }
	}
	fprintf(fptr,"\n\n");
	return P;
}

/********************** Population Frequency Operations ********************/

p_type	calcpopfreq(p_type P)
/* calculate the residue frequencies for pop */
{
    int		s;
    a_type	A=P->A;

    if(P->tfreq==NULL) NEW(P->tfreq,nAlpha(A)+1,double);
    for(s=0;s<=(int) nAlpha(A);s++) {
	P->tfreq[s] = (double) P->counts[s]/(double) P->total;
    }
    return P;
}

double	PopEntropy(p_type P)
{
	int	i;
	double	*freq,H;

	freq = tFreqPop(P);
	for(H=0.0,i = 1; i <= nAlpha(P->A); i++){
		if(freq[i] > 0.0) H += freq[i] * log(freq[i]);
	}
	return (-1.442695041*H);
}

p_type	PutPoptFreqs(FILE *fptr,p_type P)
{
	int i; double T=0.0;

	fprintf(fptr,"RES    %-6s %-s\n","NUM","FREQ");
	for(i=0;i<=nAlpha(P->A);T+=P->tfreq[i],i++)
	    fprintf(fptr,"%c(%2d): %-6d %-2.3f\n",
			AlphaChar(i,P->A),i,P->counts[i],P->tfreq[i]);
	fprintf(fptr,"TOTAL: %-6d %-2.3f\n\n", P->total, T);
	return P;
}

/************************* Randomization Routines ***********************/

p_type	ShufflePop2(p_type P)
{ 
	int r,s,i,n,item;
	dh_type H;
	e_type	E;
	char	*S;

	for(n=0,i=1;i<=P->nent;i++) { E=P->entity[i]; n += LenSeq(E); }
	H = dheap(n+2,4);
	NEW(S,n+2,char);
	for(item=i=1;i<=P->nent;i++) {
		E=P->entity[i];
		for(s=1; s<= (int) LenSeq(E); s++){
			r = ResSeq(s,E);
			insrtHeap(item,((keytyp)RandomNum()),H);
			S[item++]=r;
		}
	}
	for(i=1;i<=P->nent;i++) {
		E=P->entity[i];
		for(s=1; s<= (int) LenSeq(E); s++){
			item=delminHeap(H);
			if(item==0) pop_error("shufflepop2 error");
			r=S[item];
			EqSeq(s,r,E);
		}
		EqSeqI(i,P->entity[i]);
	}
	Nildheap(H); free(S);
	return P;
}

p_type	ShufflePop(p_type P)
{ 
	int i;
	for(i=1;i<=P->nent;i++) {
		ShuffleSeq(P->entity[i]);
		EqSeqI(i,P->entity[i]);
	}
	return P;
}

void	pop_error(char *s)
{fprintf(stderr,"Input Data Error: %s\n",s); exit(1);}

