/*: ==============================================================================*/
/*:*/
/*: Subroutine Hyphenate*/
/*: April 1988*/
/*:*/
/*:                                                                     Greg Janee*/
/*:                                                   General Research Corporation*/
/*:                                                                  P.O. Box 6770*/
/*:                                                   Santa Barbara, CA 93160-6770*/
/*:*/
/*: This  subroutine locates the legal hyphenation points within a given word.  It*/
/*: accepts two read-only arguments -- a word to be  hyphenated  and  a  character*/
/*: conversion table -- and one write-only argument, a hyphen location array.  The*/
/*: word to be hyphenated is a string of zero or more characters passed by  fixed-*/
/*: length  descriptor.  If the word is longer than 255 characters, only the first*/
/*: 255 characters are read.  The character conversion table is a  256-byte  table*/
/*: passed  by  reference  that  describes any transformations that are to be per-*/
/*: formed on the word before it is hyphenated.  The table may not map any charac-*/
/*: ters  to  '\0'  or  '\377'.   The  hyphen location array is an array of bytes,*/
/*: passed by reference, of the same length as the word to be  hyphenated.   If  a*/
/*: byte  in  the  array  is  odd, a hyphen may be placed before the corresponding*/
/*: character in the word.  No error checking is done on the arguments.  No  error*/
/*: can arise from calling this routine.*/
/*:*/
/*: This file must be passed through a preprocessor which converts lines having  a*/
/*: colon  in column 1 to comment lines.  Subroutine Hyphenate must be linked with*/
/*: MOVTC.MAR and a HYPHENATION_TABLES module produced by  program  Build_hyphena-*/
/*: tion_tables.*/
/*:*/
/*: The  hyphenation  algorithm used by this subroutine and program Build_hyphena-*/
/*: tion_tables was adapted from that used by TeX82, the document compiler written*/
/*: by  Donald  E.  Knuth  at  Stanford  University.  References are "The TeXbook"*/
/*: (Reading, Mass.:  Addison Wesley, 1984)  and  "TeX:   The  Program"  (Reading,*/
/*: Mass.:  Addison Wesley, 1986), both by Knuth; and "Word Hy-phen-a-tion by Com-*/
/*: pu-ter" (Stanford Computer Science Report 977,  Stanford,  California,  August*/
/*: 1983) by Franklin Mark Liang.*/
/*:*/
/*: ==============================================================================*/

#include descrip

/*: We define here an interface to the exception and pattern data structures  that*/
/*: are  produced by program Build_hyphenation_tables.  We use ETABLE and EPOOL to*/
/*: refer to the exception table and exception pool, respectively.*/

globalref unsigned short int HYPH$AW_EXCEPTION_TABLE[];
globalvalue int HYPH$S_EXCEPTION_TABLE;
#define etable(x) HYPH$AW_EXCEPTION_TABLE[(x)]

globalref unsigned char HYPH$AB_EXCEPTION_POOL[];
#define epool(x) HYPH$AB_EXCEPTION_POOL[(x)]

/*: For brevity we refer to the components of the weight location table and packed*/
/*: trie as separate arrays.*/

globalref struct {
   unsigned char _next_weight;
   unsigned char _weight;
   unsigned short int _offset;
} HYPH$AL_WEIGHT_LOCATION_TABLE[];

#define next_weight(x) HYPH$AL_WEIGHT_LOCATION_TABLE[(x)]._next_weight
#define weight(x) HYPH$AL_WEIGHT_LOCATION_TABLE[(x)]._weight
#define offset(x) HYPH$AL_WEIGHT_LOCATION_TABLE[(x)]._offset

globalref struct {
   unsigned short int _next_node;
   unsigned char _weight_list;
   unsigned char _character;
} HYPH$AL_PACKED_TRIE[];

#define next_node(x) HYPH$AL_PACKED_TRIE[(x)]._next_node
#define weight_list(x) HYPH$AL_PACKED_TRIE[(x)]._weight_list
#define character(x) HYPH$AL_PACKED_TRIE[(x)]._character

/*: ------------------------------------------------------------------------------*/

void Hyphenate ( word_arg, conversion_table, hyphen_loc_array )

   struct dsc$descriptor_s *word_arg;
   unsigned char conversion_table[256];
   unsigned char hyphen_loc_array[];
{
   int c;
   int cur_node;
   int cur_pos;
   int i, j;
   int length;
   int start_pos;
   unsigned char word[1+255+1];

   int compare();
   int movtc();
   void ots$move5();

/*:  -----------------------------------------------------------------------------*/

/*:  Argument processing.  We  place  the  transformed  word  in  WORD[1..LENGTH],*/
/*:  leaving WORD[0] and WORD[LENGTH+1] free to (later) identify the boundaries of*/
/*:  the word.  HYPHEN_LOC_ARRAY is cleared.*/

   length = movtc( word_arg->dsc$w_length, word_arg->dsc$a_pointer, 0,
      conversion_table, 255, &word[1] );

   ots$move5( 0, 0, 0, word_arg->dsc$w_length, hyphen_loc_array );

/*:  -----------------------------------------------------------------------------*/

/*:  We  first  attempt  to  find  the word in the exception table.  To agree with*/
/*:  Build_hyphenation_table's hash function we must "mod"  after  each  addition;*/
/*:  we  would otherwise not have to do this since C does not signal integer over-*/
/*:  flows.*/

   j = 0;
   for ( i = 1; i <= length; ++i ) {
      j = ((int) ( 2*j + word[i] )) % HYPH$S_EXCEPTION_TABLE;
   }

/*:  The exception table is an ordered hash table, and hence we can terminate  our*/
/*:  search  not  only when we reach a zero entry, but also when we reach a lesser*/
/*:  word.*/

   while ( (i = etable(j)) != 0 ) {

      if ( (c = compare( epool(i), &epool(i+1), length, &word[1] )) == 0 ) {

/*:        The  WORD  indexes of the characters of the caller's word run from 1 to*/
/*:        LENGTH, and the HYPHEN_LOC_ARRAY indexes run from 0 to  LENGTH-1;  thus*/
/*:        we  should modify HYPHEN_LOC_ARRAY[EPOOL(I)-1] below.  But by switching*/
/*:        the meaning of a weight from "may place a hyphen after the  correspond-*/
/*:        ing  character"  to  "may place a hyphen before the corresponding char-*/
/*:        acter" we can get away with modifying HYPHEN_LOC_ARRAY[EPOOL(I)].*/

         i += epool(i) + 1;
         while ( epool(i) != 0 ) hyphen_loc_array[epool(i++)] = 1;

         return;

      }
      else if ( c < 0 ) break;

      j = ( j == 0 ? HYPH$S_EXCEPTION_TABLE-1 : j-1 );

   }

/*:  -----------------------------------------------------------------------------*/

/*:  The  word does not match any of the exception words, and thus to hyphenate it*/
/*:  we must use the pattern tables.  Note that there are no algorithmic  restric-*/
/*:  tions  on where a hyphen may be placed within a word; however, it is intended*/
/*:  that this subroutine be used with the TeX82 patterns, which erroneously iden-*/
/*:  tify  hyphens  near word boundaries.  To correct this we restrict where a hy-*/
/*:  phen may be placed:  it must leave at least two characters to the left and at*/
/*:  least  three  characters  to the right.  Thus a four-character word cannot be*/
/*:  hyphenated, and a five-character word can be hyphenated only as xx-xxx.*/

   word[0] = word[length+1] = 255;

/*:  At  this  point  we  have  a word of length LENGTH+2 whose first character is*/
/*:  WORD[0].  For each iteration of this outermost loop we find all  patterns  in*/
/*:  WORD  that  start  at WORD[START_POS].  Because of the above restriction, and*/
/*:  because a pattern cannot affect potential hyphens previous  to  its  location*/
/*:  within WORD, we can stop after START_POS = LENGTH-2.*/

   for ( start_pos = 0; start_pos <= length-2; ++start_pos ) {

      cur_node = 0 /* the root of the packed trie */ + word[start_pos];
      cur_pos  = start_pos;

      while ( character(cur_node) == word[cur_pos] ) {

/*:        The current node's character matches the current character,  indicating*/
/*:        that  WORD[START_POS..CUR_POS]  partially matches at least one pattern.*/
/*:        If the current node has an attached weight list, we've fully matched  a*/
/*:        pattern, and we maximize the pattern's weights into HYPHEN_LOC_ARRAY.*/

         if ( (i = weight_list(cur_node)) != 0 ) {

/*:           The  WORD  indexes of the characters of the caller's word run from 1*/
/*:           to LENGTH, and the HYPHEN_LOC_ARRAY indexes run from 0 to  LENGTH-1;*/
/*:           thus we should modify HYPHEN_LOC_ARRAY[J-1] below.  But by switching*/
/*:           the meaning of a weight from "may place a hyphen  after  the  corre-*/
/*:           sponding  character" to "may place a hyphen before the corresponding*/
/*:           character" we can get away with modifying HYPHEN_LOC_ARRAY[J].*/

            do {
               j = cur_pos - offset(i);
               if ( j > 1 && j < length-2 && hyphen_loc_array[j] < weight(i) ) {
                  hyphen_loc_array[j] = weight(i);
               }
            } while ( (i = next_weight(i)) != 0 );

         }

/*:        We  advance  to  the  next character in WORD and to the next node.  The*/
/*:        following check keeps us from looping past the end of WORD.*/

         if ( cur_pos++ > length ) break;
         cur_node = next_node(cur_node) + word[cur_pos];

      }

   }

   return;
}
/*: ==============================================================================*/

/*: ==============================================================================*/

/*: We  use  the following local function to compare two words.  The result of the*/
/*: comparison is returned as a signed integer.  Note that the words  are  ordered*/
/*: primarily on length and secondarily on lexicographic ordering.*/

static int compare ( len1, s1, len2, s2 )

   int len1;
   unsigned char s1[];
   int len2;
   unsigned char s2[];
{
   int d;
   int i;

   if ( (d = len1 - len2) != 0 ) return d;
   for ( i = 0; i < len1; ++i ) if ( (d = s1[i] - s2[i]) != 0 ) return d;

   return 0;
}
/*: ==============================================================================*/

/*: end*/
