 /*  *				s o r t c . c   *  * Sort utility   */    /*)BUILD 		$(PROGRAM)	= sortc 		$(FILES)	= { sortc qksort }  		$(TKBOPTIONS) = {  			TASK 	= ...SOR 
 			ACTFIL	= 6  			UNITS	= 6 		}  */   #ifdef	DOCUMENTATION   title	sort	Sort Data Files index		Sort Data Files   synopsis  ( 	sortc [-options] [-oOUTFILE] [file ...]   description   B 	Sortc sorts all of the named files together and writes the result> 	to the standard output or to the file named in the -o option.A 	The standard input is sorted if no file names are supplied; sort  	may thus be used as a filter. 	.s ; 	The output file may be the same as one of the input files.  	.s : 	The default sort is ascending in ASCII collating sequence< 	using the entire line.  Upper and lower case are considered; 	different.  Using optional arguments, up to ten key fields = 	may be specified.  Lines with equal keys are further ordered - 	using the entire line as a single ASCII key.  	.s 0 	The following options apply to the entire sort:	 	.s.lm +4 : 	.s.i-4;-o##The sorted output is written to the named file8 	instead of to the standard output.  The file may be the  	same as one of the input files.5 	.s.i-4;-u##(Unique) only the first of a set of lines : 	having equal keys is output.  Only the specified keys are* 	considered in the definition of 'unique.'. 	.s.i-4;-v##(Verbose) Print elapsed time, etc.	 	.s.lm -4 < 	The following options define the form of the sort.  If they@ 	preceed the first key field definition, they define the default= 	for all keys.  If the option includes a key definition, only  	that key is affected.	 	.s.lm +4,8 	.s.i-4;-b##(Blank) causes leading whitespace in the key 	field to be ignored.R9 	.s.i-4;-d##(Dictionary) sorts in dictionary order;  only ; 	letters, digits, and blanks are considered in the compare, C 	all else is ignored.  (Note: national letters (with values greater 0 	than 128 decimal) are not processed correctly.); 	.s.i-4;-f##(Fold case) folds all letters to lower-case for 
 	comparisons. B 	.s.i-4;-i##(Ignore whitespace) causes all non-printing characters 	to be ignored. ; 	.s.i-4;-k##(Key) selects a field to be used as a sort key. > 	Up to ten key fields can be specified.  The keys are in order@ 	of decreasing significance.  Key fields are compared, beginning? 	with the most significant one, until a not-equal key is found.m 	.si? 	The format of the -k option is "-kM1.N1,M2.N2"  The formatting	B 	flags "bdfinrt?" may be applied to a particular key by preceeding8 	the 'k' appropriately as will be shown in the examples. 	.se= 	The values following the "-k" define the starting and ending	0 	position of the key using a field/offset value: 	.lm+8> 	.s.i-8;M1.N1##Start of the key position (first character that 	is to be compared).= 	.s.i-8;M2.N2##End of the key position (first character aftern
 	the key). 	.lm-8= 	The 'M' values define the number of fields to skip, from theP5 	start of the data line.  (0 means "skip no fields").n 	.sA9 	The 'N' values define the number of bytes to skip in thef 	selected field. 	.s	< 	If "M1.N1" are omitted, start at the beginning of the line.4 	If "M2.N2" are omitted, end at the end of the line.; 	Except for "M2.N2" being omitted, omitting any of the fourf( 	values means "use zero" for that value. 	.s*@ 	Neither of these specifiers will go beyond the end of the line.: 	A record can have a null key if the start-of-key is on or5 	after the end-of-key or at or after the end-of-line.f+ 	A null key is lower than any non-null key.9 	.s)9 	If -b was specified, leading whitespace is skipped after 8 	advancing to the M1 (M2) field, but before advancing by 	N1 (N2) characters.< 	.s.i-4;-n##(Numeric) changes the sort ordering to ascending7 	arithmetic on a leading signed integer numeric string.o> 	.s.i-4;-r##(Reverse) changes the sort ordering from ascending 	to descending.n; 	.s.i-4;-t?#(Terminator) changes the field terminator.  Theb 	new definition is:	 	.s.lm+4A 	String of zero or more characters ending with the '?' character.n 	.s.lm-4? 	The terminator character may be escaped by using the backslash	@ 	convention.  By default, whitespace (blanks or tabs) terminates	 	a field.p 	.lm-4   Examples of Key Field Selectionb  + 	.br;Let a record be "ABC#DEF##########GHI"n 	.s.nf 	Flag		Key field 	-k1		" DEF         GHI" 	-bk1		"DEF         GHI" 	-bk1.1		"EF         GHI"f 	-bk1,2		"DEF         "	 	-k1,1		"" (Null field)N 	-k1.0,2.1	" DEF " 	-bk1.0,2.1	"DEF         G"  	-k0.1,1.3	"BC DE" 	-k1.0,2.0	" DEF"t 	-k0.5,1.0	"" (Null field) 	.s.fu" 	Let a record be "ABC,DEFGHIJ,KLM" 	.s.nf 	Flag		Key field 	-t,k1		"DEFGHIJ,KLM"r 	-t,k1,2		"DEFGHIJ," 	-t,k0.1,0.6	"BC,DE" 	-t,k1.0,0.10	"DEFGHI" 	.f	   Files   	 	sort.tmp	   diagnostics   : 	The following messages occur on a non-severe error. SORTC 	will exit with "error" status.	   	.lm +8F+ 	.s.i -8;"?SORT-F-Cannot create temp. file"e 	.br7 	The required temporary file cannot be created  in  thei 	current directory. * 	.s.i -8;"?SORT-F-Cannot open input file." 	.br. 	An input file cannot be accessed for reading.- 	.s.i -8;"?SORT-F-Cannot create output file."m 	.br0 	An output file cannot be  created  for writing.  	.s.i -8;"?SORT-F-Out of space." 	.br2 	There was insufficient memory space for the sort. 	.lm -8	: 	The following messages occur on a severe error. SORT will+ 	exit with "severe error" status. Get help.t 	.st 	.nf! 	"?SORT-U-Unexpected end of file"S 	"?SORT-U-Empty run" 	"?SORT-U-temp. file"f 	.fo author  
 	David Conroyt 	.ss' 	Very slightly modified by Martin Minowe 	.s.nf 	Extensively modified by 	  Ray Van Tasslea 	  Motorolah 	  1301 E. Algonquin Rd. 	  Room 4135 	  Shaumburg, Ill. 	  (312)-576-6017i   Bugs   Internal  B 	See the source of sortc.c for a discussion of Workfile strategies 	and sort timings.   #endif  C /*--- EDIT # 0492	27 Apr 1982   13:01:26	DB1:[21,6]SORTX.C;1077  */)E /*--- PREVIOUS EDIT	27 Apr 1982   12:59:18	DB1:[21,6]SORTX.C;1076  */: /*  * Edit history (after 0492)B  * 22-Jun-82	MM	Changed systime call to use library ftime routine.D  * 13-May-84	TEK	Make subtraction and conversion of millisec signed.  */2   /*  *  *      * Work-file(s)dG  *   This program uses a Quicksort for the distribution phase (creating.J  *   runs of sorted lines) and puts each run into a work file. In essence,J  *   the merge is one n-way merge onto the output file. ("n" is the numberL  *   of runs created in the 1st pass, as the file position is saved for each
  *   run).  *    A  * During the distribution (input) phase, the sorted array (line)E@  * is used as a heap while putting the run out to the work file.I  * We replace the lines with input lines, to attempt to increase the sizeiD  * of the run. It is muchly and widely claimed that this doubles the  * average run size.A  * My experiments bear this out. This should make the merge phasex  * go faster (less runs). A  * Oh hell. It turns out that doing so greatly increases the timeeH  * of the distribution phase (50% to 100%) but decreases the time of the7  * merge phase only a little bit. Perhaps it would helpEB  * in a very specialized environment, where you could make severalH  * merge work-files, all on different devices, and have them contiguous,7  * so that you wouldn't have the overhead of arm seeks.tE  * The absolute best you could ever hope to do would require 2 passes*=  * over the data: one for the distribution phase, and one forn@  * the merge phase. According to one of my books, about the bestJ  * thing to use with several work files is a "three file polyphase merge",?  * with a Fibonacci series for the number of runs on each file.cH  * The quoted figures are 2.7-4.6 equivalent passes over the entire dataF  * (polyphase is rough to figure exactly, because it does not pass the  * entire file on each pass). I  * My figures on several varied files are the entire sort taking anywhere	F  * from 3 to 5 times as long as just passing the file thru a pgm whichF  * converts it to lower case. This seems quite good, as the time spentB  * in doing comparisons is hefty. Maybe changine the way we do theG  * merge could improve things a lot, but I don't think so. Anyway, this;?  * is a pretty neat trick, cramming all the runs into one file.*  *  *C  * I also pre-read several lines from each run whenever I do a seeknF  * to that run (this is H_P_READ). This improves the time of the mergeC  * phase by making it as much as ten (!!!) times faster. Evidently,e<  * the seek time is the dominating factor of this phase, andG  * reading in several lines at each seek buys us a whole lot. Note that F  * each actual read operation gets 512 bytes, which is usually several<  * lines, so we have done all the work to get many lines, soF  * we might as well take advantage of that fact and save some of them.H  * It turns out that there is very little to be gained by increasing the?  * size of the pre-read from 5 to 10 lines. I suspect that most K  * of the improvement is in going from none to 1, and each incremental line *  * that is added gains less than the last.  *F  * Doing all these things, and increasing the size of the in-core sort=  * area (MX_RLINE) will make this a pretty fast sort program.   *  * Some timings:>  * which	distr	merge	runs (distr,merge times in seconds 11/70)6  * 	Orig	327.2	146.1	214	Joe Sventeks DICT 45000 words4  *	This	171	177	75	same (The file is already sorted)0  *	pass	117			convert it to lower case (pass it)  *7  *	orig	96.4	2038	161	32000 random numbers (ascii sort)   *	this	174	271	54	same   *	pass	111   *,  *	orig	17.3	100.7	49	words.doc (9628 words)  *	this	40	61	17
  *	pass	23  *  */  #include <stdio.h> #include <ctype.h> #define	FALSE	0  #define	TRUE	1
 #define	EOS	0  #ifdef unix	 #include	<sys/types.h> #include	<sys/timeb.h> #else( #include	<timeb.h> #endif
 #ifdef vms #include		<ssdef.h>S #include		<stsdef.h>1 #define	IO_SUCCESS	(SS$_NORMAL | STS$M_INHIB_MSG)e #define	IO_ERROR	SS$_ABORT #endif /*H  * Note: IO_SUCCESS and IO_ERROR are defined in the Decus C stdio.h file  */a #ifndef	IO_SUCCESS #define	IO_SUCCESS	0 #endif #ifndef	IO_ERROR #define	IO_ERROR	1 #endif  3 #define MX_RLINE 600	/* number of lines in a run */s #define TEMP	"sort.tmp"y= #define MAX_WK_FILES 1	/* # of different work-files to use */aH #define MAX_KEYS 11	/* Max number of key fields that can be specified */6 #define H_P_READ 8	/* # of lines a heapelt can hold */   typedef char BYTE;   /*********** */ 4 /* These change the way internal routines perform */< #define TREEVER	/* verify the tree after doing the reheap */ #undef TREEVER8 #define REPLPUT 	/* read replacement lines in putline */ #undef REPLPUT   struct work_file {    char w_filename[35];o    FILE *w_fp; };   struct	run {    struct run *r_rp;    long r_seek;e    int r_size; };   struct heap_s {n    char *h_lp_a[H_P_READ];    struct run *h_rp; };  . struct	run *crp  = NULL;	/* current run ptr */* struct	run *frp  = NULL;	/* 1st run ptr */+ struct	run *lrp  = NULL;	/* last run ptr */   # struct work_file wkf[MAX_WK_FILES];T   char **line = NULL;f7 int first_out = TRUE;	/* This is the 1st output line */y   FILE	*ofp = NULL;e FILE	*tfp	= NULL;t FILE	*ifp	= NULL;  char	*ofn	= NULL;  struct	heap_s *heap; int	nline	= 0; int	nruns	= 0;I int max_mline = 0;	/* max # of lines we can hold in memory during merge*/ A int pre_lines = 0;	/* # of lines actually pre-read into memory */   E long int in_records = 0;	/* # of records read from the input files */ 0 long int in_bytes = 0;		/* # of bytes in them */  A /*** This doubles as the input buffer and the detailed usage help 5  * info. It had better be at least 200 bytes long. */ A char	lbuf[] = "-b  ignore leading blanks\n-d  dictionary order\n\ , -f  fold letters to lowercase for compare\n\  -i  ignore non-printing chars\n\F -km1.n1,m2.n2 select key field (max 9 keys)\n-n  numeric comparison\n\6 -oOUT  specify output file (otherwise it is stdout)\n\" -r  reverse order to descending\n\$ -t? specify field terminator char\n\+ -u  output only 1st line with equal keys\n\  -v  verbose\n"; > int nlbuf = -1;	/* length of string in lbuf (<0 means none) */5 char	**llout = NULL;	/* last line put out (for -u) */   / int	dflag[MAX_KEYS] = 0;	/* dictionary order */ 4 int	iflag[MAX_KEYS] = 0;	/* ignore all whitespace */< int	fflag[MAX_KEYS] = 0;	/* fold upper-case to lower-case */- int	vflag = 0;		/* give elapsed times, etc */ @ int	hard_way[MAX_KEYS] = 0;	/* if any of the above are set, this- 				* comparison must be done the hard way.*/ 7 int	uflag = 0;		/* only output 1st of set of lines with  				* equal keys */ C int	nflag[MAX_KEYS] = 0;	/* numeric comparison (else alphabetic) */ . int	rflag[MAX_KEYS] = 0;	/* reverse sort sense* 				* (descending instead of ascending) */8 int	bflag[MAX_KEYS] = 0;	/* ignore leading whitespace */> char term_char[MAX_KEYS] = 0;	/* field terminator character */; int	kflag = 0;		/* number of field specifiers (0 = none) */   4 /***** m1/n1 specify the 1st character of the key */: int	m1[MAX_KEYS] = 0;	/* fields to skip to start-of-key */= int	n1[MAX_KEYS] = 0;	/* chars to skip from start-of-field */   E /***** m2/n2 specify the 1st character after the key. This char isn't   *	included in the key. */A int	m2[MAX_KEYS] = -1;	/* Number of fields to skip to end-of-key.   				 * -1 means 'end of line' */? int	n2[MAX_KEYS] = 0;	/* chars to skip from end-of-key field */    extern	char *getline();  extern  char *xalloc(); + extern int compare();	/* compare routine */ * extern qksort();	/* the sorting routine */6 struct heap_s heap_tmp;	/* temp area for the reheap */  < extern long int systime();	/* time in msec since midnight */ long int start_time;   main(argc, argv)
 char *argv[];  {     register struct run *rp;     register char *cp;     char *cptmp;     char *argsave;     struct heap_s *hp;     int c, i, nf;  
 #ifdef vms" 	argc = getredirection(argc,argv); #endif  7    start_time = systime();		/* init for elapsed time */     nf = argc - 1;     for (i=1; i<argc; ++i) {        cp = argsave = argv[i];        if (*cp == '-') {  	 --nf;  	 argv[i] = NULL;  	 ++cp;  	 while (c = *cp++) {  	    switch (tolower(c)) {   	    case 'v': 	       vflag++; 	       break;   	    case 'n': 	       ++nflag[kflag];  	       break;   	    case 'b': 	       ++bflag[kflag];  	       break;   	    case 'd': 	       ++dflag[kflag];  	       ++hard_way[kflag]; 	       break;   	    case 'i': 	       ++iflag[kflag];  	       ++hard_way[kflag]; 	       break;   	    case 'f': 	       ++fflag[kflag];  	       ++hard_way[kflag]; 	       break;   	    case 'u': 	       ++uflag; 	       break;   	    case 'o':3 	       if (*cp == NULL) { 	/* value is next arg */  		  if (++i >= argc)0 		     usage("argument need after", argsave, c); 					/* no next arg, error! */  		  if (*(ofn = argv[i]) == '-')? 		     usage("next argument may not be an option", argsave, c); & 					/* next arg is itself an arg!! */+ 		  argv[i] = NULL;	/* eliminate the arg	*/ 	 	       } * 	       else {		/* value is in this arg */
 		  ofn = cp; 	 	       } ? 	       cp = argv[i];	/* re-examine current arg (to skip it) */ 
 	       --nf;  	       break;   	    case 'r': 	       ++rflag[kflag];  	       break;   	    case 'k': 	       defin_key(cp); 	       kflag++;> 	       m1[kflag] = n1[kflag] = n2[kflag] = 0; m2[kflag] = -1;7 	       bflag[kflag] = rflag[kflag] = nflag[kflag] = 0; 7 	       iflag[kflag] = fflag[kflag] = dflag[kflag] = 0; / 	       hard_way[kflag] = term_char[kflag] = 0;  	       cp = &argv[i]; 	       break;   	    case 't': 	       if ((c = *cp++) == EOS) / 			usage("no field separator after 't' option",  				argsave, c);, 	       if (c == '\\') {		/* escaped char */ 		  if (*cp == NULL)1 			usage("no field separator after 't\\' option",  				argsave, c); 		  cptmp = cp; ' 		  c = esc_char(&cptmp);   cp = cptmp; 	 	       }  	       term_char[kflag] = c;  	       break;  
 	    default: , 	       usage("unknown option", argsave, c); 	    } 	 }        }     }    if (nf == 0)        ifp = stdin;*    if ((tfp = fopen(TEMP, "w")) == NULL) {4       fprintf(stderr, "Cannot create temp file.\n");       exit(IO_ERROR);     }$    fgetname(tfp, wkf[0].w_filename);,    line = xalloc(MX_RLINE * sizeof(char *));$    crp = xalloc(sizeof(struct run));    crp->r_size = 0; B /* There is always an implied last key of: ascii, ascending order,  *	using the entire line.   */     if (kflag == 0) {       kflag++;    }9    m1[kflag] = n1[kflag] = n2[kflag] = 0; m2[kflag] = -1; 2    bflag[kflag] = rflag[kflag] = nflag[kflag] = 0;2    iflag[kflag] = fflag[kflag] = dflag[kflag] = 0;*    hard_way[kflag] = term_char[kflag] = 0;>    if (m1[kflag] == m1[kflag-1] && n1[kflag] == n1[kflag-1] &&6 	m2[kflag] == m2[kflag-1] && n2[kflag] == n2[kflag-1])J    if (bflag[kflag] == bflag[kflag-1] && rflag[kflag] == rflag[kflag-1] &&B 	nflag[kflag] == nflag[kflag-1] && iflag[kflag] == iflag[kflag-1])J    if (fflag[kflag] == fflag[kflag-1] && dflag[kflag] == dflag[kflag-1] &&( 	term_char[kflag] == term_char[kflag-1])    kflag--; B /*** Distribution phase. Create sorted runs from the input file(s)  * to the temp file(s).  */
    for (;;) {        if (ifp == NULL) {; 	 for (i=1; i<argc; ++i)	/* find next file-name argument */   	    if ((cp = argv[i]) != NULL) 	    break;  	 if (i >= argc) 	    break;  	 argv[i] = NULL; ' 	 if ((ifp = fopen(cp, "r")) == NULL) { / 	    fprintf(stderr, "%s: cannot open.\n", cp);  	    quit(); 	 }        } /       get_in_line();		/* get line from input */        if (nlbuf >= 0) {  	 if (nline >= MX_RLINE 4 	  || (cp = malloc(nlbuf + sizeof(char))) == NULL) {	 	    do { 7 	       qksort(line, nline, sizeof(line[0]), &compare);  	       saverun(); 	       putline(tfp); ) 	       crp = xalloc(sizeof(struct run));a 	       crp->r_size = 0;! 	    } while (nline == MX_RLINE);. 	    if (nlbuf >= 0)% 		cp = xalloc(nlbuf + sizeof (char));d 	 }e 	 if (nlbuf >= 0) {d 	    strcpy(cp, lbuf); 	    nlbuf = -1; 	    line[nline++] = cp; 	 }l       }c    }3    qksort(line, nline, sizeof (line[0]), &compare);aD    if (frp == NULL) { /* We have only 1 run, so put it right out. */       openoutput();.       if (uflag) 	 llout = xalloc(sizeof(lbuf));        putline(ofp); 4       pr_eltim("Completed, all data fit in memory");
       quit();	    }  : /*** Merge phase. We are all done with the input files. */    if (nline > 0) {i       saverun();       putline(tfp);s    }    fclose(tfp);s  +    pr_eltim("Distribution phase complete");I    start_time = systime();    free(line);
    if (uflag):#       llout = xalloc(sizeof(lbuf));i    openoutput();5    if ((tfp = fopen(wkf[0].w_filename, "r")) == NULL)0*       panic("Cannot reopen temp file.\n");0    heap = xalloc(nruns * sizeof(struct heap_s));  H /* See how many lines can be pre-read form the various runs. All this isG  * in an attempt to read several lines from a run into memory each timefD  * we read, because the seek time is most of the time expense of the  * merge phase. G  * We are limited by: 1) the size of the h_lps array in a heap element,lI  * and 2) the amount of memory we have available to put these lines into.hE  * To arrive at the memory we have/need, allocate as much as we need.uC  * This will most likely fail (no way is a very large file going to]H  * fit in memory). This failure point defines the max # of lines we willE  * be able to hold. There is a safety margin here, but bad luck could;E  * cause the alloc to fail during the merge. If this happens, bump uptG  * the safety margin. With any luck, using the average record size will;  * be good enough. */    i = nruns * H_P_READ;&    if (i > in_records) i = in_records;    c = in_bytes / in_records;e*    if (c < (sizeof(int))) c = sizeof(int);4    lrp = xalloc(25 * c);	/* alloc a safety margin */    lrp->r_rp = NULL;-    while (i-- && (crp = malloc(c)) != NULL) {l       crp->r_rp = lrp;       lrp = crp;       max_mline++;    }"    /* now free the space all up */    while (lrp != NULL) {       crp = lrp;       lrp = lrp->r_rp;       free(crp);    }  >    /* Read the same number of lines initially for all runs. */    i = max_mline / nruns;t    if (i <= 0) i = 1;d"    if (i > H_P_READ) i = H_P_READ;    rp = frp;    hp = &heap[0]; 0    while (rp != NULL) { /* init for the merge */       hp->h_rp = rp;       run_read(hp, i);        if (hp->h_lp_a[0] == NULL) 	 panic("Empty run.\n");       rp = rp->r_rp;       hp++;\    }3    /* Sort it, to get the heap initially set up. */e:    qksort (heap, nruns, sizeof (struct heap_s), &compare);      while (nruns) {       cp = heap[0].h_lp_a[0];        if (llout) 	 sp_fputs(cp, ofp);
       else 	 fputss(cp, ofp);       free(cp);u8       copy(&heap_tmp, &heap[0], sizeof (struct heap_s));:       pre_lines--;	/* shift up the other pre-read lines */4       copy(&heap_tmp.h_lp_a[0], &heap_tmp.h_lp_a[1],+ 	   sizeof(heap.h_lp_a[0]) * (H_P_READ-1));u+       heap_tmp.h_lp_a[H_P_READ - 1] = NULL;*?       if (heap_tmp.h_lp_a[0] == NULL)	/* we used them all up */n  	 run_read(&heap_tmp, H_P_READ);A       if (heap_tmp.h_lp_a[0] == NULL) { /* Done with this run. */  	 pr_eltim("Run complete");t
 	 --nruns;= 	 reheap (heap, nruns, sizeof (struct heap_s), &heap[nruns]);l       }e
       else: 	 reheap (heap, nruns, sizeof (struct heap_s), &heap_tmp);    }    if (vflag) {gD       i = (systime() - start_time) /100;	/* get&print elapsed time*/B       fprintf (stderr,"Merge Elapsed: %d.%01d sec\n", i/10, i%10);    }
    quit(); }e    / /*********************************************/t
 pr_eltim(why)r
 char	*why; { 	    int i;K      if (!vflag)
       return;aA    i = (systime() - start_time) /100;	/* get&print elapsed time*/fB    fprintf(stderr, "%s, %ld records, %ld bytes, run number %d.\n",# 	why, in_records, in_bytes, nruns);y>    fprintf (stderr,"Elapsed time: %d.%01d sec\n", i/10, i%10); }k  / /*********************************************/K3 /* Get the next line from the input file to "lbuf". *  * ifp == NULL means input file is closed.B  * nlbuf == length of the line in lbuf (<0 means nothing in lbuf). */
 get_in_line()  {t    if (ifp == NULL) return;p0    if (fgetss(lbuf, sizeof lbuf, ifp) == NULL) {$       if (ifp != stdin) fclose(ifp);       ifp = NULL;s       nlbuf = -1;a    }	    else {*       nlbuf = strlen(lbuf);u&       in_records++; in_bytes += nlbuf;    } }t   /**  * Open the output file and stash its file)  * pointer in 'ofp'. If no output file isr%  * given 'ofp' is a dup. of 'stdout'.a  */m openoutput() {*    if (ofn == NULL)e       ofp = stdout;1.    else if ((ofp = fopen(ofn, "w")) == NULL) {3       fprintf(stderr, "%s: cannot create.\n", ofn); 
       quit();L    } }p  , /******************************************/5 /* Special fputs, used for '-u' flag, to put out onlya  * unique lines.A  * The very first line is deemed 'unique', and is always put out.  */ sp_fputs(buf, usr_file) 
 char *buf; FILE *usr_file;  { D    if (com_par(&llout, &buf, kflag? kflag-1 : kflag) || first_out) {@       fputss(buf, usr_file); /* this line different from last */       first_out = FALSE;       strcpy (llout, buf);    } }   ( /**************************************/, /* read some lines from a run into memory */ run_read(helt, m_lines) 8 struct heap_s *helt;	/* the heap element for this run */* int m_lines;		/* max # of lines to read */ {r	    int i;   <    for (i = 0; i < m_lines && pre_lines <= max_mline; i++) {:       if ((helt->h_lp_a[i] = getline(helt->h_rp)) == NULL) 	 break;       pre_lines++;    },    if (i < H_P_READ) helt->h_lp_a[i] = NULL; }a    2 /************************************************/6 /* convert escaped char to a char value. Update cp. */ esc_char (cpp) char **cpp;t {k1    register char c;	/* the converted character */c    register char c1;%    register char *p;	/* buffer ptr */s  &    p = *cpp;	/* point to the string */    c = tolower(*p++);g    if (c == 't')!       c = '\t';			/* \t is tab */     else if (c == 's')k"       c = ' ';			/* \s is space */A    else if (c >= '0' && c <= '7') { /*  \digits is the obvious */k       c -= '0';[-       while ((c1 = *p++) >= '0' && c1 <= '7')r 	 c = (c<<3) + (c1 - '0');+       p--;			/* back up, we went too far */e    }'    else			/* anything else is itself */c       c = *(p-1);=-    *cpp = p;		/* update the buffer pointer */L    return (c); }e s= /******************** routines passed into quicksort *******/p /*  * Compare routine.m  */  static int     compare(sa, sb) char *sa[], *sb[]; { #    return (com_par(sa, sb, kflag));  }       ( static int     com_par(sa, sb, num_keys) char *sa[], *sb[];1 int num_keys;	/* max number of keys to examine */  {o    extern char *get_to_key();     register char *a, *b;    register c;    char ch;     long d, atol();3    int field_num;			/* field number loop counter */aE    char *aeok_ptr, *beok_ptr;	/* for saving the char after the key */     char aeok_char, beok_char;i  	    c = 0;mD    for (field_num = 0; !c && (field_num <= num_keys); field_num++) {(       /* find the key-field addresses */8       aeok_ptr = beok_ptr = &ch;	/* in case EOK = EOL */2       a = get_to_key(m1[field_num], n1[field_num],! 	   m2[field_num], n2[field_num],[< 	   bflag[field_num], *sa, &aeok_ptr, term_char[field_num]);2       b = get_to_key(m1[field_num], n1[field_num],! 	   m2[field_num], n2[field_num], < 	   bflag[field_num], *sb, &beok_ptr, term_char[field_num]);C       aeok_char = *aeok_ptr; *aeok_ptr = NULL;	/* terminate keys */ C       beok_char = *beok_ptr; *beok_ptr = NULL;	/* terminate keys */l,       /*fprintf(stderr,"a key:'%s'\n", a);*/         if (nflag[field_num]) {=  	 if ((d = atol(a)-atol(b)) < 0)	 	    --c;r 	 else if (d > 0)t	 	    ++c;n       }e#       else if (hard_way[field_num]))8 	 c = hard_cmp(a, b, iflag[field_num], dflag[field_num], 	   fflag[field_num]);
       else 	 c = strcmp(a, b); +       /* restore the char after the keys */        *aeok_ptr = aeok_char;       *beok_ptr = beok_char;       if (rflag[field_num]) 	 	 c = -c;n    }    return (c); }e   /***************/i2 /* return pointer to the next field of the string.9  * A field is defined as: optional whitespace followed by=-  *	non-whitespace; the next field is the nexth  *	whitespace or NULL.  *;  * The returned result is a pointer to this 1st char of the   * next field.0  * It will never advance past the trailing NULL. */ char *nxt_fld(s, tch)  register char *s; 6 register char tch;	/* field terminator char or NULL */ {p    register char c;e      if (tch != NULL) {()       while (*s != NULL && *s++ != tch) ;     }    else        {l7       while (((c = *s) != EOS) && isspace(c) != 0) s++;,7       while (((c = *s) != EOS) && isspace(c) == 0) s++;W    }    return (s); }   + /*****************************************/f- /********* get the definition for a key field  */
 defin_key(cp) 	 char *cp;m {o    if (kflag >= MAX_KEYS - 1)t*       panic ("Too many keys specified\n");  -    n1[kflag] = n2[kflag] = 0; m2[kflag] = -1;     m1[kflag] = atodl(&cp);    if (*cp == '.') {/       cp++;		/* char adv from start of field */        n1[kflag] = atodl(&cp);     }*    if (!*cp++)	return;	/* no end-of-key */      m2[kflag] = atodl(&cp);    if (!*cp) return;    if (*cp++ != '.')+       panic ("Invalid key field format\n");d    n2[kflag] = atodl(&cp);  
    return; }a   /*****************/ ( /* unsigned ascii to decimal conversion.5  * stops on a non-digit. Update the buffer pointer tos'  * point to this terminating non-digit.u */
 int atodl(cp);
 char **cp; {     register int n;    register char *p;    register char ch;  	    n = 0;E%    p = *cp;	/* point to the string */     while (isdigit((ch = *p++)))'       n = 10*n + (ch - '0');-    *cp = p-1;	/* update the buffer pointer */l    return (n); }p  2 /************************************************/4 /* get pointers to the start-of-key, and end-of-key.&  * returns ptr to 1st char of the key.K  * "eok_ptr" points to next char after the key, unless eok = "end-of-line".c,  * The pointers will never go past the NULL.(  * eok-ptr will never be before sok-ptr. */> char *get_to_key (sok_fld, sok_adv, eok_fld, eok_adv, skip_ws,      str, eok_ptr, term_ch) > int sok_fld, sok_adv;	/* start-of-key fields & chars to skip*/< int eok_fld, eok_adv;	/* end-of-key fields & chars to skip*/D int skip_ws;		/* skip leading whitespace before advancing by chars*/ char *str;		/* the string */> char **eok_ptr;	/* set to point to the 1st char after the key, 		 * if any eok was specified.   		 * otherwise not modified. */a- char term_ch;		/* field terminator or NULL */t {g2    register char *kptr;	/* start-of-key pointer */0    register char *eptr;	/* end-of-key pointer */    register int i;    char ch;a  - /* skip over fields to get to start-of-key */n*    kptr = str;	/* init to start of line */    i = sok_fld; -    while (i--) kptr = nxt_fld(kptr, term_ch);pE    /* probably we can keep going forward from the SOK to EOK field */s&    i = eok_fld - sok_fld;	eptr = kptr;/ /* advance SOK ptr by chars (never past end) */a    if (skip_ws)E6       while ((ch = *kptr) && isspace(ch) != 0) kptr++;     while (sok_adv-- && *kptr++);1    if (eok_fld >= 0) { /* position to eok field*/        if (i < 0) {2 	 i = eok_fld;	/* eok search from start of line */
 	 eptr = str;0       }C0       while (i--) eptr = nxt_fld(eptr, term_ch);5       /* advance eok ptr by chars (never past end) */        if (skip_ws)2 	 while ((ch = *eptr) && isspace(ch) != 0) eptr++;#       while (eok_adv-- && *eptr++); 9       *eok_ptr = eptr;	/* return ptr to char after key */i       if (eptr < kptr)' 	 *eok_ptr = kptr;	/* force null key */e    }    return (kptr);F }.  ? /*************************************************************/p= /* string comparisons the hard-way, one character at a time, .,  * because one of the special flags was set. */ hard_cmp(a, b, ifl, dfl, ffl) 6 register char *a, *b;	/* the two strings to compare */$ int ifl;	/* ignore all whitespace */= int dfl;	/* dictionary order, ignore all but letters, digits,  		* and blanks 		*/; int ffl;	/* fold upper-case to lower case before compare */e {c    register char achar, bchar;    int c;	/* result */  	    c = 0;e/    while (!c && (achar = *a) && (bchar = *b)) { '       if (dfl) {	/* dictionary-style */e, 	 while ((achar = *a) && !(isdigit(achar) ||. 	      isalpha(achar) || (achar == ' '))) a++;, 	 while ((bchar = *b) && !(isdigit(bchar) ||. 	      isalpha(bchar) || (bchar == ' '))) b++;       }5,       if (ifl) {	/* ignore all whitespace */- 	 while ((achar = *a) && isspace(achar)) a++;r- 	 while ((bchar = *b) && isspace(bchar)) b++;        } )       if (ffl) {	/* fold to lower-case */  	 achar = tolower(achar);1 	 bchar = tolower(bchar);I       }1       c = achar - bchar;       if (!c && achar) a++;        if (!c && bchar) b++;u    }    if (!c) c = achar - bchar;i
    return(c);t }. * /*  * Rebuild a heap.   * This is essentially TREESORT.+  * Used to reorder the heap when a new iteme  * is read into it.aG  * The 'heap' is a binary tree, such that for each node 'i', the key atrF  * 'i' is the lowest of it and it's two sons, (2*i + 1) and (2*i + 2).8  * The new element is to be inserted where it should go.D  * We can move things up in the heap, as array element [0] is empty.  * At each step, h[i] is empty.   */r   reheap(h, n, elt_size, new_elt) , BYTE h[];		/* The array which is the heap */+ int n;			/* number of elements in array h*/ 3 int elt_size;		/* size of array element in bytes */t5 struct heap_s *new_elt;	/* new element to be added */i {y    register int i, j;l0    BYTE *ip, *jp, *jp1;	/* ptrs for i, j, j+1 */  9    for (i = 0, ip = h; (j = 2*i+1) < n; i = j, ip = jp) {(       jp = h+(j*elt_size);       jp1 = jp + elt_size;0       if ((j+1 < n) && (compare(jp1, jp) < 0)) { 	 jp = jp1;1 	 ++j;       }s2       /* j now points to the smaller child of i */$       if (compare(new_elt, jp) <= 0) 	 break;       copy(ip, jp, elt_size);a    }    copy(ip, new_elt, elt_size); 2 #ifdef	TREEVER	/**** verify that the tree is ok */*    for (i = 0; (j = 2*i+1) < nruns; i++) {       if (h != heap) break;yE       if ((j < nruns && compare(&heap[i].h_lp, &heap[j].h_lp) > 0) || B 	   (j+1 < nruns && compare(&heap[i].h_lp,&heap[j+1].h_lp) > 0)) {> 	 fprintf (stderr, "Tree out of order. n = %d. Index = %d.\n", 	      nruns, i);a> 	 fprintf (stderr,"addresses: %o %o %o\n", &heap[i], &heap[j], 	      &heap[j+1]);e9 	 fprintf (stderr,"nruns: %d\n%d->%s%d->%s%d->%s", nruns,s) 	      i,heap[i].h_lp,j,heap[j].h_lp,j+1,r* 	      j+1<nruns ? heap[j+1].h_lp : NULL);, 	 fprintf (stderr,"New->%s", new_elt->h_lp);& 	 fprintf (stderr,"At %o\n", new_elt);- 	 fprintf (stderr,"%d->%s", 0, heap[0].h_lp);t 	 for (i = 0; i <n; i++) {1 	    fprintf (stdout, "%d->%s", i, heap[i].h_lp);  	 } 
 	 abort();       },    }*    for (i = 0; (j = 2*i+1) < nline; i++) {       if (h != line) break; ;       if ((j < nline && compare(&line[i], &line[j]) > 0) ||k8 	   (j+1 < nline && compare(&line[i],&line[j+1]) > 0)) {> 	 fprintf (stderr, "Tree out of order. n = %d. Index = %d.\n", 	      nline, i);w> 	 fprintf (stderr,"addresses: %o %o %o\n", &line[i], &line[j], 	      &line[j+1]);e9 	 fprintf (stderr,"nline: %d\n%d->%s%d->%s%d->%s", nline,s 	      i,line[i],j,line[j],j+1,e% 	      j+1<nline ? line[j+1] : NULL);, 	 for (i = 0; i <n; i++) {, 	    fprintf (stdout, "%d->%s", i, line[i]); 	 } 
 	 abort();       }p    } #endif }b   /*  * Save a run.&  * The run block has been preallocated(  * because there may not be enough space  * to allocate it now.  */h	 saverun()  {e    long ftell();      crp->r_rp = NULL;    crp->r_seek = ftell(tfp);    if (frp == NULL)s       frp = crp;    elset       lrp->r_rp = crp;
    lrp = crp;     ++nruns;n }s   /*$  * Get a line from the specified run  * on the temp. file.d'  * Pack the line into allocated storageg  * and return a pointer to it.)  * Return NULL if there are no lines left_%  * in the run; real end of file is ant  * internal botch.  */i char *getline(rp)s register struct run *rp; {m    register char *cp;*    long ftell();      if (rp->r_size == 0)0       return (NULL);    fseek(tfp, rp->r_seek, 0);s.    if (fgetss(lbuf, sizeof lbuf, tfp) == NULL)(       panic("Unexpected end of file\n");    rp->r_seek = ftell(tfp);     --rp->r_size;,    cp = xalloc(strlen(lbuf) + sizeof(char));    strcpy(cp, lbuf);    return (cp);	 }    /*************************/<. /* Dump the lines in the array to the file.	*/ putline(fp)  FILE *fp;	/* the output fp */p {eE    register int ilater;	/* ptr for lines that can't go in this run */     register int i;    register char *cp;f #ifdef REPLPUT
    char *ncp;s #endif      crp->r_size = nline;a #ifndef REPLPUT(    for (i=0; i<nline; i++) {       cp = line[i];a       if (llout) 	 sp_fputs(cp, fp); 
       else 	 fputss(cp, fp);l       free(cp);U    }
    nline = 0;n #elsep   /*G  * To improve the length of the runs, replace each line as it goes out. C  * If it can, it will become part of this run, otherwise it will bel'  * saved up to be part of the next run.s  *G  * In general, when this routine returns, the array will be filled withhH  * lines that wouldn't go in this run. This will be the case except when  * we hit EOF on the input.(  **  * The next input line is already in lbuf. */    ilater = MX_RLINE;*    while (nline > 0) {       cp = line[0];*       if (llout) 	 sp_fputs(cp, fp);l
       else 	 fputss(cp, fp);p.       if (nlbuf >= 0 && crp->r_size < 30000 &&3 	   (ncp = malloc(nlbuf + sizeof(char))) != NULL) {n 	 strcpy(ncp, lbuf); 	 get_in_line();B 	 if (compare(&ncp, &cp) >= 0) { /* the line can be in this run */ 	    crp->r_size++; 0 	    reheap(line, nline, sizeof(line[0]), &ncp); 	 }r. 	 else {	/* it will be part of the next run */
 	    nline--; 9 	    reheap (line, nline, sizeof(line[0]), &line[nline]); - 	    line[--ilater] = ncp;	/* tuck it away */p 	 }        };*       else {	/* nothing to replace with */
 	 nline--;5 	 reheap(line, nline, sizeof(line[0]), &line[nline]);f       }r       free(cp);     }  9 /* The new replacement lines that couldn't become part of D  * this run are at the end of the array. Move them up and adjust the	  * index.= */    nline = MX_RLINE - ilater; !    if (nline != 0 && ilater != 0) =       copy(&line[0], &line[ilater], nline * sizeof(line[0]));i #endif }    /*  * Allocate space."  * If no space, abort with a nasty  * little message.  */  char *xalloc(n)e {a    register char *p;  !    if ((p = malloc(n)) == NULL) {")       fprintf(stderr, "Out of space.\n");        exit(IO_ERROR);n    }    return (p); }l   /*  * Quit.  * Get rid of the temp. file.   * Exit.  */t quit() {r    if (tfp != NULL),       fmkdl(tfp);e    exit(IO_SUCCESS); }r   /*&  * Tell the user just what is expected
  * of him.  */l usage(why, argsave, c) char		*why;k char		*argsave;e char		c; {a    if (c != '?') {-       fprintf(stderr, "Sort error: %s", why);c       if (c == EOS)*6          fprintf(stderr, "\nat end of option string");       else if (c < ' ') 1          fprintf(stderr, " at CTRL/%c", c + '@');i*       else fprintf(stderr, " at '%c'", c);8       fprintf(stderr, ", argument = \"%s\"\n", argsave);    }J    fprintf(stderr, "Usage: sort [-bdfinrt?uv] [-[bdfinrt?]km1.n1,m2.n2]");@    fprintf (stderr, " [-k...]\n\t    [-oOUTFILE] [file ...]\n");    if (c == '?') {       printf(lbuf);     }    exit(IO_ERROR); }  /*
  * Errors.9  * Print a message and die with "error" status on RT/RSX,g  * "error" on UNIX (I think).K  */d	 errxit(a)=    {$    fprintf(stderr,"?SORT-F-%r", &a);    exit(IO_ERROR);    }   /*  * Fatal errors.  * Print a message and die.p  */& panic(a) {!$    fprintf(stderr, "Panic: %r", &a);    exit(IO_ERROR); }d   static int		time_first = TRUE; static struct timeb	first_time;_   long int	 systime()  /*'  * Returns elapsed time in milliseconds   */h {  	long		time;2 	int		millisec;	/* To get signed compare -- TEK	*/ 	struct timeb	time_buffer;   	if (time_first) { 		/*@ 		 * This makes sure we can store enough milliseconds in 32 bits 		 */t 		ftime(&first_time);= 		time_first = FALSE;  	} 	ftime(&time_buffer);f+ 	time = time_buffer.time - first_time.time;u5 	millisec = time_buffer.millitm - first_time.millitm;* 	time *= 1000; 	return (time + millisec); }*