
#define LOG_TO_STR(x) (((x)==LOGICAL_TRUE) ? "T" : "F")

#define MAKE_TREE_SCAN(datatype,op,id) \
int tree_##op##_scan_##datatype##(int        desc[][6],\
				 int        numdim,\
				 int        proc,\
				 datatype   *scanout,\
				 datatype   *scanin,\
				 int        scanblocks,\
				 int        totalblocks,\
				 int        scandim)\
{\
  int i,j, prochop;\
  int procsinscandim;\
  int level, levels;\
  int m1, m2;\
  int procindim;\
  datatype *commbuf;\
  datatype base;\
\
  for (i=0;i<totalblocks;i++)\
    scanin[i]=id;\
  if ((commbuf=(datatype*)alloca(sizeof(datatype)*totalblocks))==NULL)\
    exit(-1);\
  procsinscandim=desc[scandim][TNUMPROC];\
  levels=ceil_log(procsinscandim);\
/* Compute initial processor hop along the scan dimension*/\
  prochop=1;\
  for (i=0;i<scandim;i++) {\
    prochop*=desc[i][TNUMPROC];\
  }\
  procindim=(proc/prochop)%procsinscandim;\
  init_stack(totalblocks,sizeof(datatype),procsinscandim);\
  m1=m2=1;\
  for (level=0;level<levels;level++) {\
    if ((procindim&m1)==0 && (procindim+m2)<procsinscandim) {\
      push_stack_newlist();\
      add_stack_list(scanout,sizeof(datatype),totalblocks);\
      RECV(proc+m2*prochop,commbuf,totalblocks*sizeof(datatype));\
      for (i=0;i<totalblocks;i++)\
	scanout[i]=op(scanout[i],commbuf[i]);\
    } else {\
      if (((procindim|m2)&m1)==m2 && (procindim-m2)>=0) {\
	SEND(proc-m2*prochop,scanout,totalblocks*sizeof(datatype));\
      } \
    }\
    m1=(m1<<1)+1;\
    m2=(m2<<1)+0;\
  }\
/*  printf("%d: Scanout: ",proc);\
  for (i=0;i<totalblocks;i++) {\
    printf("%3.0f ",scanout[i]);\
  }\
  puts(""); */ \
  for (i=0;i<totalblocks;i+=scanblocks) {\
     base=id;\
     for (j=i;j<(i+scanblocks);j++) {\
       scanin[j]=op(scanin[j],base);\
       base=op(base,scanout[j]);\
     }\
  }\
  push_stack_newlist();\
  for (level=levels-1;level>=0;level--) {\
    m1=m1>>1;\
    m2=m2>>1;\
    if ((procindim&m1)==0 && (procindim+m2)<procsinscandim) {\
      pop_stack_list();\
      get_stack_list(commbuf,sizeof(datatype),totalblocks);\
      base=id;\
      for (i=0;i<totalblocks;i++)\
	commbuf[i]=op(commbuf[i],scanin[i]);\
      SEND(proc+m2*prochop,commbuf,totalblocks*sizeof(datatype));\
    } else {\
      if (((procindim|m2)&m1)==m2 && procindim-m2>=0) {\
	RECV(proc-m2*prochop,scanin,totalblocks*sizeof(datatype));\
      }\
    }\
  }\
/*  printf("%d: Scanin: ",proc);\
  for (i=0;i<totalblocks;i++) {\
    printf("%3.0f ",scanin[i]);\
  }\
  puts(""); */\
  deinit_stack();\
\
}


/* 
   lrout is of form { (l,m,r) }, l is the segment start value
   m is true if the scan restarted in the block, and r is the
   segment end value
*/
  
#define MAKE_TREE_SCAN_SEG(datatype,op,id) \
int tree_##op##_scan_##datatype##_seg(int        desc[][6],\
				      int        numdim,\
				      int        proc,\
				      datatype   *scanout,\
				      datatype   *scanin,\
				      LOGICAL    *lrout,\
                                      int        scanblocks,\
				      int        totalblocks,\
				      int        scandim)\
{\
  int i, j, k,prochop;\
  int procsinscandim;\
  int level;\
  int m1, m2;\
  int procindim;\
  char *commbuf;\
  datatype *databuf;\
  LOGICAL  *lrbuf, baselr;\
  datatype base;\
  int levels;\
\
  for (i=0;i<totalblocks;i++)\
    scanin[i]=id;\
  if ((commbuf=(char*)alloca((sizeof(datatype)+3*sizeof(LOGICAL))\
              *totalblocks))\
         ==NULL)\
    exit(-1);\
  procsinscandim=desc[scandim][TNUMPROC];\
  levels=ceil_log(procsinscandim);\
/* Compute initial processor hop along the scan dimension*/\
  prochop=1;\
  for (i=0;i<scandim;i++) {\
    prochop*=desc[i][TNUMPROC];\
  }\
  procindim=(proc/prochop)%procsinscandim;\
  init_stack(totalblocks,(sizeof(datatype)+2*3*sizeof(LOGICAL)),procsinscandim);\
  m1=m2=1; \
  for (level=0;level<levels;level++) {\
    if ((procindim&m1)==0 && (procindim+m2)<procsinscandim) {\
      push_stack_newlist();\
      add_stack_list(scanout,sizeof(datatype),totalblocks);\
      add_stack_list(lrout,sizeof(LOGICAL)*3,totalblocks);\
      RECV(proc+m2*prochop,commbuf,totalblocks*(sizeof(datatype)+3*sizeof(LOGICAL)));\
      databuf=(datatype *)commbuf;\
      lrbuf=(LOGICAL *) (databuf+totalblocks);\
      add_stack_list(lrbuf,sizeof(LOGICAL)*3,totalblocks);\
      for (i=0,j=0;i<totalblocks;i++,j+=3) {\
         if (lrout[j+2]!=lrbuf[j] || lrbuf[j+1]==LOGICAL_TRUE) {\
            scanout[i]=databuf[i];\
            lrout[j+1]=LOGICAL_TRUE;\
         } else {\
            scanout[i]=op(scanout[i],databuf[i]);\
            lrout[j+1]=LOGICAL_FALSE;\
         }\
         lrout[j+2]=lrbuf[j+2];\
      }\
    } else {\
      if (((procindim|m2)&m1)==m2 && (procindim-m2)>=0) {\
        databuf=(datatype*)commbuf;\
        lrbuf=(LOGICAL *) (databuf+totalblocks);\
        for (i=0;i<totalblocks;i++) {\
           databuf[i]=scanout[i];\
	}\
        for (j=0;j<totalblocks*3;j+=1) {\
           lrbuf[j]=lrout[j];\
	}\
	SEND(proc-m2*prochop,commbuf,totalblocks*(sizeof(datatype)+ \
                   3*sizeof(LOGICAL)));\
      } \
    }\
    m1=(m1<<1)+1;\
    m2=(m2<<1)+0;\
  }\
/*  printf("%d: Scanout: ",proc);\
  for (i=0;i<totalblocks;i++) {\
    printf("%3.0f ",scanout[i]);\
  }\
  puts(""); \
  printf("%d: Lrout: ",proc);\
  for (j=0;j<totalblocks*3;j+=3){\
    printf("(%s,%s,%s) ",LOG_TO_STR(lrout[j]),LOG_TO_STR(lrout[j+1]),\
                         LOG_TO_STR(lrout[j+2]));\
  }\
  puts(""); */\
  for (i=0;i<totalblocks;i+=scanblocks) { \
     base=id;\
     baselr=lrout[i*3];\
     for (j=i,k=i*3;j<i+scanblocks;j+=1,k+=3) {\
        if (lrout[k]!=baselr) {\
	   scanin[j]=id;\
           base=scanout[j];\
           baselr=lrout[k+2];\
	} else {\
           scanin[j]=base;\
           if (lrout[k+1]==LOGICAL_TRUE || lrout[k+2]!=baselr) {\
              base=scanout[j];\
              baselr=lrout[k+2];\
	   } else {\
              base=op(base,scanout[j]);\
	   }\
	}\
     }\
  }\
  push_stack_newlist();\
  for (level=levels-1;level>=0;level--) {\
    m1=m1>>1;\
    m2=m2>>1;\
    if ((procindim&m1)==0 && (procindim+m2)<procsinscandim) {\
      pop_stack_list();\
      databuf=(datatype *)commbuf;\
      lrbuf=(LOGICAL *) (databuf+totalblocks);\
      get_stack_list(databuf,sizeof(datatype),totalblocks);\
      get_stack_list(lrbuf,sizeof(LOGICAL)*3,totalblocks);\
      get_stack_list(lrout,sizeof(LOGICAL)*3,totalblocks);\
      base=id;\
      for (i=0,j=0;i<totalblocks;i++,j+=3) { \
        if (lrbuf[j+2]!=lrout[j]) {\
           databuf[i]=id;\
        } else {\
            if (lrbuf[j+1]!=LOGICAL_TRUE) {\
 	        databuf[i]=op(databuf[i],scanin[i]);\
 	    } else {\
                databuf[i]=databuf[i];\
            }\
	}\
      }\
      SEND(proc+m2*prochop,commbuf,totalblocks*sizeof(datatype));\
    } else {\
      if (((procindim|m2)&m1)==m2 && procindim-m2>=0) {\
	RECV(proc-m2*prochop,scanin,totalblocks*sizeof(datatype));\
      }\
    }\
  }\
/*  printf("%d: Scanin: ",proc);\
  for (i=0;i<totalblocks;i++) {\
    printf("%3.0f ",scanin[i]);\
  } \
  puts("");*/ \
  deinit_stack();\
\
}


#define MAKE_TREE_FAMILY(datatype,op,id)\
 MAKE_TREE_SCAN(datatype,op,id)\
 MAKE_TREE_SCAN_SEG(datatype,op,id)

