/*****************************************************************************/
/*
                                Spinneret.c

Given a URL for an HTML document will retrieve all or selected components 
comprising that document.


QUALIFIERS and PARAMETERS
-------------------------
/[NO]ALL                all subdocuments of the original specified
/ALWAYS_GET=            comma-separated list of file types to always get
/DBUG                   turns on all "if (Debug)" statements
/DEPTH=                 integer, depth of subdocuments to be retrieved
/EXCLUDE=               comma-separated list of file types
/INCLUDE=               comma-separated list of file types
/RANGE=                 integer, distance of documents to be retrieved
/[NO]VERBOSE            output lots of status message (NO default)


BUILD DETAILS
-------------
See BUILD_SPINNERET.COM procedure.


VERSION HISTORY
---------------
28-AUG-94  MGD  v1.0.0, initial development
*/
/*****************************************************************************/

char SoftwareID [] = "SPINNERET v1.0.0";

/* standard C header files */
#include <stdio.h>
#include <ctype.h>
#include <time.h>
#include <errno.h>
#include <types.h>
#include <file.h>

/* VMS-related header files */
#include <brkdef.h>
#include <descrip.h>
#include <iodef.h>
#include <jpidef.h>
#include <lnmdef.h>
#include <prcdef.h>
#include <psldef.h>
#include <ssdef.h>
#include <stsdef.h>
#include <syidef.h>
#include <rms.h>
#include <unixio.h>

/* Internet-related header files */

#ifdef UCX_TCPIP

#include <socket.h>
#include <in.h>
#include <netdb.h>
#include <inet.h>
#include <ucx$inetdef.h>

#define IP_errno vaxc$errno

#define IP_close_socket close

#else

#include "multinet_root:[multinet.include.sys]types.h"
#include "multinet_root:[multinet.include.sys]socket.h"
#include "multinet_root:[multinet.include.netinet]in.h"
#include "multinet_root:[multinet.include.vms]inetiodef.h"
#include "multinet_root:[multinet.include]errno.h"
#include "multinet_root:[multinet.include]netdb.h"

#define IP_errno vmserrno

#define IP_close_socket socket_close

#endif

#define boolean int
#define true 1
#define false 0

#define VMSok(x) ((x) & STS$M_SUCCESS)
#define VMSnok(x) (!((x) & STS$M_SUCCESS))
#define VMSwarning(x) (((x) & 0x7) == STS$M_WARNING)
#define VMSerror(x) (((x) & 0x7) == STS$M_ERROR)
#define VMSinfo(x) (((x) & 0x7) == STS$M_INFO)
#define VMSfatal(x) (((x) & 0x7) == STS$M_SEVERR)

#define HttpPort 80

#define ConnectFailRetries 3

#ifndef ImportedExplanationUrl
#   define ImportedExplanationUrl "$IMPORTED$.HTML"
#endif

struct SourceData
{
   boolean  AnchorEncountered,
            HrefEncountered,
            InsideAnchorTag,
            InsideImgTag,
            IsAnHtmlFile,
            IsAPostScriptFile,
            IsATextFile,
            LineModified,
            LinkObtained,
            PartialLink;

   int  BlockNumber,
        ClientSocket,
        ServerPort,
        ServerSocket,
        SubdocumentDepth;

   unsigned short  ClientChannel;

   char  FileType [128],
         HostName [128],
         Protocol [128],
         VmsFileName [128],
         Uri [512],
         Url [512];

   struct FAB  FileFab;
   struct RAB  FileRab;
   struct NAM  FileNam;

   struct hostent  RemoteHostEntry;
   struct hostent  *RemoteHostEntryPtr;
   struct sockaddr_in  SocketName;
};

/*
   This data structure is a bit shonky!  The 'Url' member is defined
   as only having a single character but in fact will be as many as
   required to accomodate the URL.  It is done this way so it can be
   accessed as string.  The structure is dynamically allocated using
   "malloc (strlen (Url) + 1 + sizeof (struct ObtainedData*))".
*/
struct ObtainedData
{
   struct ObtainedData  *NextPtr;
   char  Url [1];
};

char Utility [] = "SPINNERET";

char HtmlTypeList [] = "HTML,HTM";
char TextTypeList [] = "TEXT,TXT";
char PostScriptTypeList [] = "PS";

boolean  AllRootSubdocuments,
         Debug,
         DoConfirm,
         DoShowHelp,
         Verbose;

int  Depth,
     DocumentRange = 0,
     Range;

char  AlwaysGetTheseFileTypes [256],
      CommandLine [256],
      ExcludeFileTypes [256],
      IncludeFileTypes [256],
      RootUri [256],
      UrlParameter [256];

struct ObtainedData  *ObtainedHead = NULL,
                     *ObtainedTail = NULL;

/* required prototypes */
char* SysGetMsg (int);

/*****************************************************************************/
/*
*/

int main ()

{
   register char  *cptr, *sptr;
   int  status;
   struct SourceData  Source;

   if (VMSnok (status = ParseCommandLine ()))
      exit (status);

   if (DoShowHelp) exit (ShowHelp ());

   Source.SubdocumentDepth = 0;

   ProcessUrl (&Source, UrlParameter, NULL);

   exit (status);
}

/*****************************************************************************/
/*
The URL represents a link.  Parse the URL into its component parts.  Make a 
decision (or ask the user) if this link should be retrieved.  Recursively 
retrieve the link.
*/ 

ProcessUrl
(
struct SourceData *sdptr,
char *Url,
char *OriginalUrl
)
{
   static char  LinkReply [16] = "";

   register char  *cptr, *luptr, *sptr;
   int  status,
        UrlLength;
   char  Scratch [256],
         Uri [256];
   struct SourceData  Source;

   /*********/
   /* begin */
   /*********/

   if (Debug) fprintf (stdout, "ProcessUrl() |%s|\n", Url);

   Source.AnchorEncountered =
   Source.HrefEncountered =
   Source.InsideAnchorTag =
   Source.InsideImgTag =
   Source.IsATextFile =
   Source.IsAnHtmlFile =
   Source.IsAPostScriptFile =
   Source.LinkObtained = false;

   if (strsame (Url, ImportedExplanationUrl, -1)) return;

   ParseUrl (Url, Source.Protocol, Source.HostName, &Source.ServerPort, Uri);

   /* ignore if URL contains just a local link (e.g. HREF="#there") */
   if (Uri[0] == '#')
   {
      sdptr->LinkObtained = true;
      return;
   }

   /* copy any non-supplied components from the current source */
   if (!Source.Protocol[0]) strcpy (Source.Protocol, sdptr->Protocol);
   if (!Source.HostName[0]) strcpy (Source.HostName, sdptr->HostName);
   if (!Source.ServerPort) Source.ServerPort = sdptr->ServerPort;
   if (!Source.ServerPort) Source.ServerPort = HttpPort;

   if (ParseUri (Uri, sdptr->Uri, Source.Uri))
   {
      /* absolute */
      Source.PartialLink = false;
      Source.SubdocumentDepth = 0;
      DocumentRange++;
   }
   else
   {
      /* relative (partial) */
      Source.PartialLink = true;
      Source.SubdocumentDepth = sdptr->SubdocumentDepth + 1;
   }
   if (!RootUri[0]) GetRootUri (Source.Uri);

   if (IgnoreThis (Source.Uri)) return;

   sprintf (Source.Url, "%s//%s:%d%s",
            Source.Protocol,
            Source.HostName, Source.ServerPort,
            Source.Uri);

   VmsFileNameFromUri (Source.Uri, Source.VmsFileName, Source.FileType);
   if (!Source.VmsFileName[0]) VmsFileNameFromHostName (&Source);

   if (GetThisUrl (&Source))
   {
      /************************************************/
      /* check if this file has already been obtained */
      /************************************************/

      if (AlreadyObtained (&Source))
      {
         if (Verbose)
            fprintf (stdout, "[%d.%d][Already obtained: \"%s\"]\n",
                     DocumentRange, Source.SubdocumentDepth, Source.Url);

         FlattenUrl (sdptr, Url, OriginalUrl);
         sdptr->LinkObtained = true;
         if (!Source.PartialLink) DocumentRange--;
         return;
      }

      if (DoConfirm)
      {
         /**********************************/
         /* check if this link is required */
         /**********************************/

         /* an 'A' in the link reply means "get all" */
         if (LinkReply[0] != 'A')
         {
            fprintf (stdout, "[%d.%d] Get \"%s\"? (Yes,No,Quit,All) [Y]: ",
                     DocumentRange, Source.SubdocumentDepth, Source.Url);
            fgets (LinkReply, sizeof(LinkReply), stdin);
            LinkReply[0] = toupper(LinkReply[0]);
            if (LinkReply[0] == 'Q') exit (SS$_NORMAL);
            if (!(LinkReply[0] == '\n' ||
                  LinkReply[0] == 'Y' ||
                  LinkReply[0] == 'A'))
            {
               if (!Source.PartialLink) DocumentRange--;
               return;
            }
         }
      }

      /**********************************/
      /* recursively process the source */
      /**********************************/

      if (VMSok (status = RetrieveUrl (&Source)))
      {
         FlattenUrl (sdptr, Url, OriginalUrl);
         sdptr->LinkObtained = true;
      }
      if (!Source.PartialLink) DocumentRange--;
      if (DocumentRange)
         fprintf (stdout, "[%d.%d][Continuing: \"%s\"]\n",
                  DocumentRange, sdptr->SubdocumentDepth, sdptr->VmsFileName);

      return;
   }
   else
   {
      if (!Source.PartialLink) DocumentRange--;
      return;
   }
}

/*****************************************************************************/
/*
Decide whether this link should be retrieved.  Reasons for not include having 
already obtained it, specifically excluded file type, not a spcifically 
included file type, not HTTP URL.  If /CONFIRM was used then leave it up to 
the user to have the final say.

Return true if it should be retrieved, false if not.
*/ 

boolean GetThisUrl (struct SourceData *sdptr)

{
   /*********/
   /* begin */
   /*********/

   if (Debug) fprintf (stdout, "GetThisUrl()\n");

   if (!strsame (sdptr->Protocol, "http:", -1))
   {
      if (Verbose)
         fprintf (stdout, "[%d.%d][Not HTTP: \"%s\"]\n",
                  DocumentRange, sdptr->SubdocumentDepth, sdptr->Url);
      return (false);
   }

   /*
      If this file type is not always retrieved regardless
      then check if this type should be retrieved this time!
   */

   if (!AlwaysGetTheseFileTypes[0] ||
       !InCommaList (sdptr->FileType+1, AlwaysGetTheseFileTypes))
   {
      /***************************************/
      /* check if this file type is included */
      /***************************************/

      if (IncludeFileTypes[0] &&
          !InCommaList (sdptr->FileType+1, IncludeFileTypes))
      {
         if (Verbose)
            fprintf (stdout, "[%d.%d][Not included: \"%s\"]\n",
                     DocumentRange, sdptr->SubdocumentDepth, sdptr->Url);
         return (false);
      }

      /***************************************/
      /* check if this file type is excluded */
      /***************************************/

      if (ExcludeFileTypes[0] &&
          InCommaList (sdptr->FileType+1, ExcludeFileTypes))
      {
         if (Verbose)
            fprintf (stdout, "[%d.%d][Excluded: \"%s\"]\n",
                     DocumentRange, sdptr->SubdocumentDepth, sdptr->Url);
         return (false);
      }

      /*
         Checking just the URI without the host etc., is not a foolproof
         method for establishing whether this is part of a document hierarchy.
         However it is practical, as some servers direct the client to a
         different host (name, possibly) for the same hierarchy.
      */
      if (!AllRootSubdocuments || !SameRootUri (sdptr->Uri))
      {
         if (sdptr->PartialLink)
         {
            /**************************************************/
            /* check if this link is within subdocument depth */
            /**************************************************/
      
            if (sdptr->SubdocumentDepth > Depth)
            {
               if (Verbose)
                  fprintf (stdout, "[%d.%d][Too deep: \"%s\"]\n",
                           DocumentRange, sdptr->SubdocumentDepth, sdptr->Url);
               return (false);
            }
            else;
         }
         else
         {
            /***************************************/
            /* check if this link is within range */
            /***************************************/

            if (DocumentRange > Range)
            {
               if (Verbose)
                  fprintf (stdout, "[%d.%d][Too far: \"%s\"]\n",
                           DocumentRange, sdptr->SubdocumentDepth, sdptr->Url);
               return (false);
            }
         }
      }
   }

   return (true);
}

/*****************************************************************************/
/*
This overly-long function is recursively called to process each URL.  It 
connects with the server, opens the output file, retrieves each network record 
from the server, if a "binary" file writes the records to the output file in 
block mode, if a "text" file breaks that record up into lines and writes each 
line/record to the output file.

Then if its an HTML file rewinds and looks through each line for links to 
other documents which it accesses and if required retrieves recursively.
*/ 

int RetrieveUrl (struct SourceData *sdptr)

{
   register char  *cptr, *lptr, *rptr;
   boolean  ErrorResponse = false,
            HtmlHeader = true;
   int  status,
        ByteCount = 0,
        ConnectFailCount,
        LineCount = 0,
        RecordLength,
        TotalBytes = 0;
   char  ErrorReply [16],
         Line [2048],
         Record [2048];

   /*********/
   /* begin */
   /*********/

   fprintf (stdout, "[%d.%d][Retrieving: \"%s\"]\n",
            DocumentRange, sdptr->SubdocumentDepth, sdptr->Url);

   if (InCommaList (sdptr->FileType+1, HtmlTypeList))
      sdptr->IsATextFile = sdptr->IsAnHtmlFile = true;
   else
   if (InCommaList (sdptr->FileType+1, TextTypeList))
      sdptr->IsATextFile = true;
   else
   if (InCommaList (sdptr->FileType+1, PostScriptTypeList))
      sdptr->IsATextFile = sdptr->IsAPostScriptFile = true;

   /*********************/
   /* connect to server */
   /*********************/

   for (ConnectFailCount = ConnectFailRetries; ConnectFailCount--;)
   {
      fprintf (stdout, "[%d.%d][Connecting: %s:%d]\n",
               DocumentRange, sdptr->SubdocumentDepth,
               sdptr->HostName, sdptr->ServerPort);

      if (VMSok (status = ConnectToServer (sdptr))) break;

      if (!status)
      {
         fprintf (stdout, "%%%s-E-CONNECT, failed\n", Utility);
         status = STS$K_ERROR;
      }
      else
         fprintf (stdout, "%%%s-E-CONNECT, failed\n-%s\n",
                  Utility, SysGetMsg(status)+1);
      sleep (5);
   }
   if (!ConnectFailCount) return (status | STS$M_INHIB_MSG);

   /* send request to server */
   sprintf (Line, "GET %s HTTP/1.0\r\n\r\n", sdptr->Uri);
   WriteData (sdptr, Line, strlen(Line));

   /**********************/
   /* create output file */
   /**********************/

   if (VMSnok (status = CreateFile (sdptr)))
   {
      fprintf (stdout, "%%%s-E-CREATE, file create failed\n-%s\n",
               Utility, SysGetMsg(status)+1);
      return (status | STS$M_INHIB_MSG);
   }
   if (VMSnok (status = StampFile (sdptr)))
      return (status);

   /*********************************/
   /* loop reading data from source */
   /*********************************/

   fprintf (stdout, "\r[%d.%d][Received: 0 bytes]",
            DocumentRange, sdptr->SubdocumentDepth);

   /* set the pointers to the start of the line */
   lptr = sdptr->FileRab.rab$l_rbf = Line;

   while (VMSok (status =
          ReadData (sdptr, Record, sizeof(Record)-1, &RecordLength)))
   {
      if (!RecordLength) break;

      ByteCount += RecordLength;
      if (TotalBytes)
         fprintf (stdout, "\r[%d.%d][Received: %d of %d bytes]",
                  DocumentRange, sdptr->SubdocumentDepth,
                  ByteCount, TotalBytes);
      else
         fprintf (stdout, "\r[%d.%d][Received: %d bytes]",
                  DocumentRange, sdptr->SubdocumentDepth, ByteCount);

      Record[sdptr->FileRab.rab$w_rsz = RecordLength] = '\0';
      if (Debug && sdptr->IsATextFile)
         fprintf (stdout, "Record |%s|\n", Record);

      rptr = Record;
      while (RecordLength)
      {
         if (sdptr->IsATextFile || HtmlHeader)
         {
            /***************************/
            /* process textual content */
            /***************************/

            /* isolate a carriage-control delimitted line */
            while (RecordLength && *rptr && *rptr != '\n')
            {
               RecordLength--;

               /* absorb carriage-returns, line ends at new-line (line-feed) */
               if (*rptr == '\r')
                  rptr++;
               else
               {
                  if (sdptr->IsAnHtmlFile &&
                      rptr[0] == '<' && rptr[1] == '/' &&
                      toupper(rptr[2]) == 'A' && rptr[3] == '>')
                  {
                     *lptr++ = '^';
                     *lptr++ = *rptr++;
                  }
                  else
                     *lptr++ = *rptr++;
               }
            }

            /* if we ran out of characters from the source get some more! */
            if (!RecordLength) continue;

            /* otherwise, terminate and write the line */
            *lptr = '\0';
            if (*rptr == '\n')
            {
               rptr++;
               RecordLength--;
            }
            LineCount++;
            if (Debug) fprintf (stdout, "Line |%s|\n", Line);
         }

         else

         if (!sdptr->IsATextFile)
         {
            /**************************/
            /* process binary content */
            /**************************/

            while (RecordLength && lptr < Line+512)
            {
               *lptr++ = *rptr++;
               RecordLength--;
            }
            if (lptr < Line+512) continue;
         }

         if (HtmlHeader)
         {
            /**********************/
            /* process the header */
            /**********************/

            if (Line[0])
            {
               if (LineCount == 1)
               {
                  for (cptr = Line; *cptr && !isspace(*cptr); cptr++);
                  while (isspace(*cptr)) cptr++;
                  if (*cptr != '2')
                  {
                     ErrorResponse = true;
                     fprintf (stdout,
                     "\n%%%s-W-RESPONSE, server error reponse\n \\%s\\\n",
                     Utility, Line);
                     fprintf (stdout, "Skip? (Yes,No,Quit) [Y]: ");
                     fgets (ErrorReply, sizeof(ErrorReply), stdin);
                     ErrorReply[0] = toupper(ErrorReply[0]);
                     if (ErrorReply[0] == 'Q') exit (SS$_NORMAL);
                     if (!(ErrorReply[0] == '\n' || ErrorReply[0] == 'Y'))
                        continue;
                  }
                  else;
               }
               else
               {
                  if (toupper(Line[0]) == 'C' &&
                      strsame (Line, "Content-Length:", 15))
                  {
                     for (cptr = Line + 15; *cptr && !isdigit(*cptr); cptr++);
                     TotalBytes = strtol (cptr, NULL, 10);
                  }
                  if (Debug)
                     fprintf (stdout, "TotalBytes %d\n", TotalBytes);
                  else;
               }
            }
            else
            {
               HtmlHeader = false;
               if (TotalBytes) ByteCount = 0;
            }
         }
         else
         {
            /*************************/
            /* store content to file */
            /*************************/

            if (VMSnok (status = WriteFile (sdptr, Line, lptr-Line)))
            {
               if (Debug) fprintf (stdout, "sys$put() %%X%08.08X\n", status);
               sys$close (&sdptr->FileFab, 0, 0);
               return (status);
            }
         }

         /* reset the pointer to the start of the line */
         lptr = Line;
      }

      /* if "Content-Length:" bytes have been received then break */
      if (TotalBytes && ByteCount >= TotalBytes) break;
   }

   /****************************/
   /* end get source data loop */
   /****************************/

   fputc ('\n', stdout);

   if (lptr > Line)
   {
      /* if any characters remain in the line buffer then flush to file */
      if (VMSnok (status = WriteFile (sdptr, Line, lptr-Line)))
      {
         if (Debug) fprintf (stdout, "sys$put() %%X%08.08X\n", status);
         sys$close (&sdptr->FileFab, 0, 0);
         return (status);
      }
   }

   /**************************/
   /* disconnect from server */
   /**************************/

   DisconnectFromServer (sdptr);

   /***********************/
   /* update the database */
   /***********************/

   if (VMSok (status) && !ErrorResponse) UpdateObtained (sdptr);

   /******************************/
   /* find links in an HTML file */
   /******************************/

   if (sdptr->IsAnHtmlFile && VMSok (status) && !ErrorResponse)
   {
      if (VMSnok (status = sys$rewind (&sdptr->FileRab, 0, 0)))
      {
         if (Debug) fprintf (stdout, "sys$rewind() %%X%08.08X\n", status);
         sys$close (&sdptr->FileFab, 0, 0);
         return (status);
      }

      sdptr->FileRab.rab$l_ubf = Line;
      sdptr->FileRab.rab$w_usz = sizeof(Line)-1;
      while (VMSok (status = sys$get (&sdptr->FileRab, 0, 0)))
      {
         Line[sdptr->FileRab.rab$w_rsz] = '\0';
         sdptr->LineModified = false;
         /* find and recursively process any links in this line */
         ProcessLine (sdptr, Line);
         if (sdptr->LineModified)
         {
            if (VMSnok (status =
                UpdateFile (sdptr, Line, sdptr->FileRab.rab$w_rsz)))
            {
               if (Debug) fprintf (stdout, "sys$update() %%X%08.08X\n", status);
               sys$close (&sdptr->FileFab, 0, 0);
               return (status);
            }
         }
      }
      if (status == RMS$_EOF) status = SS$_NORMAL;
   }

   /*****************************/
   /* end processing HTML links */
   /*****************************/

   sys$close (&sdptr->FileFab, 0, 0);

   return (status);
}

/*****************************************************************************/
/*
The opening tag (e.g. <A) may be on a separate line to the reference parameter 
(e.h. HREF=), but the reference itself cannot be broken across lines (e.g. 
cannot be HREF="/this/is/\r\nthe/reference").  Spaces may exists between the 
parameter, the equate symbol and the parameter value (e.g. HREF = "example").
*/ 

int ProcessLine
(
struct SourceData *sdptr,
char *Line
)
{
   register char  *cptr, *luptr, *sptr;
   boolean  LinkObtainedBuffer;
   int  status;
   char  Url [256];

   /*********/
   /* begin */
   /*********/

   if (Debug) fprintf (stdout, "ProcessLine() |%s|\n", Line);

   /*********************/
   /* loop through line */
   /*********************/

   cptr = Line;
   while (*cptr)
   {
      if (sdptr->InsideAnchorTag)
      {
         /*******************/
         /* inside "<A" tag */
         /*******************/

         if (*cptr == '>')
         {
            sdptr->InsideAnchorTag = false;
            cptr++;
            continue;
         }
         else
         if (toupper(cptr[0]) == 'H' &&
             toupper(cptr[1]) == 'R' &&
             toupper(cptr[2]) == 'E' &&
             toupper(cptr[3]) == 'F' &&
             (cptr[4] == '=' || isspace(cptr[4])))
         {
            /* get the URL */
            while (*cptr && *cptr != '=' && *cptr != '>' ) cptr++;
            while (*cptr && *cptr != '>' && (*cptr == '=' || isspace(*cptr)))
                cptr++;
            sptr = Url;
            if (*cptr == '\"')
            {
               cptr++;
               luptr = cptr;
               while (*cptr && *cptr != '\"') *sptr++ = *cptr++;
               if (*cptr) cptr++;
            }
            else
            {
               luptr = cptr;
               while (*cptr && !isspace(*cptr) && *cptr != '>')
                   *sptr++ = *cptr++;
            }
            *sptr = '\0';
            if (Debug) fprintf (stdout, "Url |%s|\n", Url);
            ProcessUrl (sdptr, Url, luptr);
            sdptr->HrefEncountered = true;
            continue;
         }
      }

      if (sdptr->InsideImgTag)
      {
         /*******************/
         /* inside "<IMG" tag */
         /*******************/

         if (*cptr == '>')
         {
            sdptr->InsideImgTag = false;
            cptr++;
            continue;
         }
         else
         if (toupper(cptr[0]) == 'S' &&
             toupper(cptr[1]) == 'R' &&
             toupper(cptr[2]) == 'C' &&
             (cptr[3] == '=' || isspace(cptr[3])))
         {
            /* get the URL */
            while (*cptr && *cptr != '=' && *cptr != '>' ) cptr++;
            while (*cptr && *cptr != '>' && (*cptr == '=' || isspace(*cptr)))
                cptr++;
            sptr = Url;
            if (*cptr == '\"')
            {
               cptr++;
               luptr = cptr;
               while (*cptr && *cptr != '\"') *sptr++ = *cptr++;
               if (*cptr) cptr++;
            }
            else
            {
               luptr = cptr;
               while (*cptr && !isspace(*cptr) && *cptr != '>')
                   *sptr++ = *cptr++;
            }
            *sptr = '\0';
            if (Debug) fprintf (stdout, "Url |%s|\n", Url);
            LinkObtainedBuffer = sdptr->LinkObtained;
            ProcessUrl (sdptr, Url, luptr);
            sdptr->LinkObtained = LinkObtainedBuffer;
            continue;
         }
      }

      /*******************/
      /* check for caret */
      /*******************/

      if (cptr[0] == '^' && cptr[1] == '<' && cptr[2] == '/' &&
          toupper(cptr[3]) == 'A' && cptr[4] == '>')
      {
         if (sdptr->AnchorEncountered &&
             (!sdptr->HrefEncountered || sdptr->LinkObtained))
         {
            cptr[0] = ' ';
            sdptr->LineModified = true;
            sdptr->AnchorEncountered = false;
         }
         cptr += 5;
         continue;
      }

      /********************************/
      /* check for tag, ignore if not */
      /********************************/

      if (cptr[0] == '<')
      {
         if (toupper(cptr[1]) == 'A' &&
             (isspace(cptr[2]) || !cptr[2]))
         {
            /**************************/
            /* anchor tag ("<A ...>") */
            /**************************/

            sdptr->InsideAnchorTag = sdptr->AnchorEncountered = true;
            sdptr->HrefEncountered = sdptr->LinkObtained = false;
            cptr += 2;
            continue;
         }
         else
         if (toupper(cptr[1]) == 'I' &&
             toupper(cptr[2]) == 'M' &&
             toupper(cptr[3]) == 'G' &&
             (isspace(cptr[4]) || !cptr[4]))
         {
            /***************************/
            /* image tag ("<IMG ...>") */
            /***************************/

            sdptr->InsideImgTag = true;
            cptr += 4;
            continue;
         }
         else
         {
            cptr++;
            continue;
         }
      }

      cptr++;
   }
}

/*****************************************************************************/
/*
'Url' points to a buffer containing a null-terminated string.
'OriginalUrl' points to the URL in the file line itself.

This function flattens a hierarchical URL down to just the 'file-name' itself 
so that it may be all contained in one directory.  The original URL in the 
original line is modified and the file record 'sys$updated()'ed to reflect the 
change.
*/ 

FlattenUrl
(
struct SourceData *sdptr,
char *Url,
char *OriginalUrl
)
{
   register int  UrlLength;
   register char  *cptr, *luptr, *sptr;

   /*********/
   /* begin */
   /*********/

   if (Debug) fprintf (stdout, "FlattenUrl() |%s|", Url);

   if (OriginalUrl == NULL) return;

   /* do not flatten the URL that points to the "imported explanation" */
   if (!strsame (Url, ImportedExplanationUrl, -1))
   {
      /* get the length of the current URL */
      UrlLength = strlen(Url);
      /* find the last occurance (if any) of a slash in the URL */
      for (cptr = luptr = Url; *cptr; cptr++) if (*cptr == '/') luptr = cptr;
      /* step over the last slash (if any) encountered */
      if (*luptr == '/') luptr++;
      /* copy into the actual line/record */
      sptr = OriginalUrl;
      for (cptr = luptr; *cptr; *sptr++ = *cptr++) UrlLength--;
      /* replace any remaining length with spaces */
      while (UrlLength--) *sptr++ = ' ';
      /* indicate that the file record should be updated */
      sdptr->LineModified = true;
   }
   if (Debug) fprintf (stdout, "->|%s|\n", OriginalUrl);
}

/*****************************************************************************/
/*
Separates a URL into its component protocol, port, host and URI.  For any 
components not present it either nulls or zeros the respective variable.
*/ 

int ParseUrl
(
char *Url,
char *Protocol,
char *Host,
int *PortPtr,
char *Uri
)
{
   register char  *sptr, *uptr, *zptr;

   /*********/
   /* begin */
   /*********/

   if (Debug) fprintf (stdout, "ParseUrl() |%s|->", Url);

   /* get the protocol (e.g. "http:" from "hhtp://host:port/uri") */
   uptr = Url;
   sptr = Protocol;
   while (*uptr && *uptr != ':') *sptr++ = *uptr++;
   if (uptr[0] == ':')
   {
      *sptr++ = *uptr++;
      *sptr = '\0';
   }
   else
   {
      /* no protocol was specifically supplied */
      Protocol[0] = '\0';
      uptr = Url;
   }

   if (uptr[0] == '/' && uptr[1] == '/')
   {
      while (*uptr == '/') uptr++;

      /* get the host/port (e.g. "host:port" from "hhtp://host:port/uri") */
      sptr = Host;
      while (*uptr && *uptr != '/' && *uptr != ':') *sptr++ = *uptr++;
      *sptr = '\0';
      *PortPtr = 0;
      if (*uptr == ':')
      {
         /* specific port number was supplied */
         if (Debug) fprintf (stdout, "Port uptr |%s|\n", uptr);
         *PortPtr = strtol (uptr+1, NULL, 10);
         while (*uptr && *uptr != '/') uptr++;
      }
   }
   else
   {
      Host[0] = '\0';
      *PortPtr = 0;
   }

   /* get the URI (e.g. "uri" from "hhtp://host:port/uri") */
   sptr = Uri;
   if (*uptr == '/') *sptr++ = *uptr++;
   while (*uptr) *sptr++ = *uptr++;
   *sptr = '\0';

   if (Debug)
      fprintf (stdout, "|%s|%s|%d|%s|\n",
               Protocol, Host, *PortPtr, Uri);
}

/*****************************************************************************/
/*
Create a new URI from a context URI (the current absolute URI) and a supplied 
URI, that can be absoloute or partial.
*/ 

boolean ParseUri
(
register char *Uri,
register char *ContextUri,
register char *NewUri
)
{
   register char  *cptr, *sptr, *uptr, *zptr;
   boolean  AbsoluteLink;

   /*********/
   /* begin */
   /*********/

   if (Debug) fprintf (stdout, "ParseUri() |%s|->", Uri);

   if (Uri[0] == '/')
   {
      if (Uri[1] == '/')
      {
         /***************************/
         /* "consecutive slash" URI */
         /***************************/

         sptr = NewUri;
         uptr = Uri;
         cptr = ContextUri;
         while (uptr[0] == '/' && uptr[1] == '/')
         {
            if (*cptr == '/')
            {
               *sptr++ = *cptr++;
               while (*cptr && *cptr != '/') *sptr++ = *cptr++;
               *sptr = '\0';
            }
            else
               *sptr++ = *uptr;
            uptr++;
         }
         /* append the parameter URI to new URI, terminating at any fragment */
         while (*uptr && *uptr != '#') *sptr++ = *uptr++;
         *sptr = '\0';
         if (Debug) fprintf (stdout, "(consecutive) NewUri |%s|\n", NewUri);
      }
      else
      {
         /****************/
         /* absolute URI */
         /****************/

         sptr = NewUri;
         for (cptr = Uri; *cptr && *cptr != '#'; *sptr++ = *cptr++);
         *sptr = '\0';
         if (Debug) fprintf (stdout, "(full) NewUri |%s|\n", NewUri);
      }
      AbsoluteLink = true;
   }
   else
   if (Uri[0])
   {
      /***************/
      /* partial URI */
      /***************/

      zptr = NULL;
      sptr = NewUri;
      for (cptr = ContextUri; *cptr; *sptr++ = *cptr++)
         if (*cptr == '/') zptr = sptr;
      /* reset the new URI pointer the the last slash from the context URI */
      if (zptr != NULL) sptr = zptr+1;
      /* append the parameter URI to new URI, terminating at any fragment */
      for (cptr = Uri; *cptr && *cptr != '#'; *sptr++ = *cptr++);
      *sptr = '\0';
      if (Debug) fprintf (stdout, "(partial) NewUri |%s|\n", NewUri);
      AbsoluteLink = false;
   }
   else
   {
      /************/
      /* null URI */
      /************/

      NewUri[0] = '\0';
      AbsoluteLink = true;
   }

   /*****************************************/
   /* process any "../" and "./" constructs */
   /*****************************************/

   uptr = NewUri;
   while (*uptr)
   {
      if (*uptr == '/')
      {
         for (cptr = uptr+1; *cptr && *cptr != '/'; cptr++);
         if (*cptr)
         {
            if (cptr[1] == '.' && cptr[2] == '.' && cptr[3] == '/')
            {
               sptr = cptr + 3;
               cptr = uptr;
               while (*sptr) *cptr++ = *sptr++;
               *cptr = '\0';
               uptr = NewUri;
            }
            else
            if (cptr[1] == '.' && cptr[2] == '/')
            {
               sptr = cptr + 2;
               while (*sptr) *cptr++ = *sptr++;
               *cptr = '\0';
               uptr = NewUri;
            }
         }
      }
      uptr++;
   }

   if (Debug) fprintf (stdout, "NewUri |%s|\n", NewUri);
   return (AbsoluteLink);
}

/*****************************************************************************/
/*
Create a VMS file name from the last part of the URI, making sure it is RMS 
compliant (i.e: URIs often contain Unix style names, containing all sorts of 
characters/combinations, munge these to something acceptable to RMS; e.g: 
"name_version-2.0.html" to "NAME_VERSION-2$0.HTML")
*/ 

VmsFileNameFromUri
(
char *Uri,
char *VmsFileName,
char *FileType
)
{
   register char  *cptr, *sptr, *uptr, *zptr;

   /* locate the last "/" in the URI, anything following will be the name */
   cptr = NULL;
   for (uptr = Uri; *uptr; uptr++)
      if (*uptr == '/') cptr = uptr;
   if (cptr == NULL)
      cptr = uptr;
   else
      cptr++;

   /* munge the name so it is acceptable to RMS as a file name */
   zptr = NULL;
   sptr = VmsFileName;
   while (*cptr)
   {
      /* replace multiple periods with dollar symbols */
      if (*cptr == '.')
      {
         zptr = sptr;
         *sptr++ = '$';
      }
      else
      {
         /* substitute unacceptable characters with dollar symbols */
         if (isalnum(*cptr) || *cptr == '-' || *cptr == '_' || *cptr == '$')
            *sptr++ = toupper(*cptr);
         else
            *sptr++ = '$';
      }
      cptr++;
   }
   *sptr = '\0';

   if (zptr == NULL)
   {
      /* if there was no period at all then the file type is undetermined */
      strcpy (FileType, "?");
   }
   else
   {
      /* replace the last dollar substituted period with a real period */
      *zptr = '.';
      strcpy (FileType, zptr);
   }

   if (Debug) fprintf (stdout, "VmsFileName |%s|\n", VmsFileName);
}

/*****************************************************************************/
/*
Generate a VMS file name using the host name.  Replace the periods between the 
host name components with dollar symbols and append a file type of ".HTML".
*/ 

int VmsFileNameFromHostName (struct SourceData *sdptr)

{
   register char  *cptr, *sptr;

   /*********/
   /* begin */
   /*********/

   if (Debug) fprintf (stdout, "VmsFileNameFromHostName()\n");

   sptr = sdptr->VmsFileName;
   for (cptr = sdptr->HostName; *cptr; cptr++)
      if (*cptr == '.') *sptr++ = '$'; else *sptr++ = *cptr;
   /* ensure the file name is no longer than 40 characters */
   sdptr->VmsFileName[39] = '\0';
   strcat (sdptr->VmsFileName, ".HTML");
   strcpy (sdptr->FileType, ".HTML");

   if (Debug) fprintf (stdout, "|%s|%s|", sdptr->HostName, sdptr->VmsFileName);
}

/*****************************************************************************/
/*
Removes anything including and following the final slash in a URL.
*/ 

GetRootUri (char *Uri)

{
   register char  *rptr, *uptr;

   if (Debug) fprintf (stdout, "GetRootUri() |%s|\n", Uri);

   rptr = NULL;
   for (uptr = Uri; *uptr; uptr++)
      if (*uptr == '/') rptr = uptr;

   if (rptr == NULL)
      strcpy (RootUri, Uri);
   else
   {
      rptr++;
      strcpy (RootUri, Uri, rptr-Uri);
      RootUri[rptr-Uri] = '\0';
   }

   if (Debug) fprintf (stdout, "RootUri |%s|\n", RootUri);
}

/*****************************************************************************/
/*
*/ 

boolean SameRootUri (char *Uri)

{
   static int  RootUriLength = 0;

   register char  c;

   if (Debug) fprintf (stdout, "SameRootUri |%s|%s", Uri, RootUri);

   if (!RootUriLength) RootUriLength = strlen(RootUri);

   c = Uri[RootUriLength];
   Uri[RootUriLength] = '\0';
   if (Debug) fprintf (stdout, "|%s|\n", Uri);

   if (strcmp (RootUri, Uri) == 0)
   {
      Uri[RootUriLength] = c;
      return (true);
   }
   else
   {
      Uri[RootUriLength] = c;
      return (false);
   }
}

/*****************************************************************************/
/*
*/

boolean IgnoreThis (char *Uri)

{
   register char  *cptr, *uptr;

   if (Debug) fprintf (stdout, "IgnoreThis() |%s|\n", Uri);

   cptr = NULL;
   for (uptr = Uri; *uptr; uptr++)
      if (*uptr == '/') cptr = uptr;
   if (cptr == NULL)
      cptr = uptr;
   else
      cptr++;

   if (strsame (cptr, "mailto:", 7)) return (true);
   if (strsame (cptr, "news:", 5)) return (true);
   return (false);
}

/*****************************************************************************/
/*
If the character string in 'String' is found in a comma-separated list of 
strings in 'List' then return, true else return false.  Example of the list of 
strings: "string1,string2,string3,etc".  Obviously a comma cannot be part of 
the searched for 'String'.  The match is case-insensitive.
*/ 

boolean InCommaList
(
char *String,
char *List
)
{
   register char  *lptr, *sptr;

   if (Debug) fprintf (stdout, "InCommaList() |%s|%s|\n", String, List);

   lptr = List;
   while (*lptr)
   {
      sptr = String;
      while (*sptr && *lptr != ',')
      {
         if (toupper(*sptr) != toupper(*lptr)) break;
         sptr++;
         lptr++;
      }
      if (!*sptr && (!*lptr || *lptr == ',')) return (true);
      while (*lptr && *lptr != ',') lptr++;
      if (*lptr) lptr++;
   }
   return (false);
}

/*****************************************************************************/
/*
Keeps track of all URLs obtained.  It checks the supplied source URL against 
this database and if already obtained returns true.  If not in the database it 
returns false. 

The database comprises a linked list of URLs.
*/ 

boolean AlreadyObtained (struct SourceData *sdptr)

{
   register struct ObtainedData  *odptr;
   int  status;

   /*********/
   /* begin */
   /*********/

   if (Debug) fprintf (stdout, "AlreadyObtained() |%s|\n", sdptr->Url);

   odptr = ObtainedHead;
   while (odptr != NULL)
   {
      if (Debug) fprintf (stdout, "old odptr %d |%s|\n", odptr, odptr->Url);
      if (strsame (odptr->Url, sdptr->Url, -1))
         break;
      odptr = odptr->NextPtr;
   }
   if (odptr == NULL)
      return (false);
   else
      return (true);
}

/*****************************************************************************/
/*
Update the list of obtained URLs.

The database comprises a linked list of URLs.
*/ 

UpdateObtained (struct SourceData *sdptr)

{
   register struct ObtainedData  *odptr;
   int  status;

   /*********/
   /* begin */
   /*********/

   if (Debug) fprintf (stdout, "UpdateObtained() |%s|\n", sdptr->Url);

   odptr = malloc (strlen(sdptr->Url)+1 + sizeof(struct ObtainedData*));
   if (odptr == NULL)
   {
      status = vaxc$errno;
      fprintf (stdout, "%%%s-E-OBTAINED, database update failed\n-%s\n",
               Utility, SysGetMsg(status)+1);
      exit (status | STS$M_INHIB_MSG);
   }
   if (ObtainedHead == NULL)
      ObtainedHead = ObtainedTail = odptr;
   else
   {
      ObtainedTail->NextPtr = odptr;
      ObtainedTail = odptr;
   }
   odptr->NextPtr = NULL;
   strcpy (odptr->Url, sdptr->Url);
   if (Debug) fprintf (stdout, "new odptr %d |%s|\n", odptr, odptr->Url);
}

/*****************************************************************************/
/*
Open the output file.  "Text" files (e.g. .HTML, .PS, .TXT) are opened for 
record I/O.  "Binary" files (e.g. .XBM, .GIF) are opened for block I/O.
*/ 

int CreateFile (struct SourceData *sdptr)

{
   int  status;

   /*********/
   /* begin */
   /*********/

   if (Debug) fprintf (stdout, "CreateFile() |%s|\n", sdptr->VmsFileName);

   /***************/
   /* create file */
   /***************/

   fprintf (stdout, "[%d.%d][Creating: %s]\n",
            DocumentRange, sdptr->SubdocumentDepth, sdptr->VmsFileName);

   sdptr->FileFab = cc$rms_fab;
   sdptr->FileFab.fab$b_fac = FAB$M_PUT | FAB$M_GET | FAB$M_UPD | FAB$M_BRO;
   sdptr->FileFab.fab$l_fna = sdptr->VmsFileName;  
   sdptr->FileFab.fab$b_fns = strlen(sdptr->VmsFileName);
   /* deferred write performance option */
   sdptr->FileFab.fab$l_fop = FAB$M_DFW | FAB$M_SQO;
   if (sdptr->IsATextFile)
   {
      sdptr->FileFab.fab$b_rat = FAB$M_CR;
      sdptr->FileFab.fab$b_rfm = FAB$C_VAR;
   }
   else
   {
      sdptr->FileFab.fab$b_rat = 0;
      sdptr->FileFab.fab$b_rfm = FAB$C_STMLF;
   }

   if (VMSnok (status = sys$create (&sdptr->FileFab, 0, 0)))
   {
      if (Debug) fprintf (stdout, "sys$create %%X%08.08X\n", status);
      return (status);
   }

   sdptr->FileRab = cc$rms_rab;
   sdptr->FileRab.rab$l_fab = &sdptr->FileFab;
   /* 2 buffers, write behind performance option */
   sdptr->FileRab.rab$b_mbf = 2;
   if (sdptr->IsATextFile)
      sdptr->FileRab.rab$l_rop = RAB$M_WBH;
   else
   {
      sdptr->FileRab.rab$l_rop = RAB$M_WBH | RAB$M_BIO;
      sdptr->BlockNumber = 0;
   }

   if (VMSnok (status = sys$connect (&sdptr->FileRab, 0, 0)))
   {
      if (Debug) fprintf (stdout, "sys$connect %%X%08.08X\n", status);
      sys$close (&sdptr->FileFab, 0, 0);
      return (status);
   }

   return (status);
}

/*****************************************************************************/
/*
Stamp files where the information can be hidden within comment structures.  
The stamp shows the URL used to obtain the file, the utility version, and the 
date the file was obtained on.
*/ 

int StampFile (struct SourceData *sdptr)

{
   static $DESCRIPTOR (FromFaoDsc, "%!AZ-I-FROM, \"!AZ\"");
   static $DESCRIPTOR (ByFaoDsc, "%!AZ-I-BY, !AZ");
   static $DESCRIPTOR (DateFaoDsc, "%!AZ-I-DATE, !11%D");
   static $DESCRIPTOR (ImportedFaoDsc,
   "<P><I>[<A HREF=\"!AZ\">Imported</A> from the Internet WWW]</I><P>");

   int  status;
   unsigned short  Length;
   char  Line [256];
   $DESCRIPTOR (LineDsc, Line);

   /*********/
   /* begin */
   /*********/

   if (Debug) fprintf (stdout, "StampFile() |%s|\n", sdptr->VmsFileName);

   if (!(sdptr->IsAnHtmlFile || sdptr->IsAPostScriptFile))
      return (SS$_NORMAL);

   if (sdptr->IsAnHtmlFile)
   {
      sdptr->FileRab.rab$l_rbf = "<!--";
      sdptr->FileRab.rab$w_rsz = 4;
      sys$put (&sdptr->FileRab, 0, 0);
   }

   /*
      PostScript comment lines begin with a percentage, hence PostScript
      files do not require any other commentary indicator for these lines.
      Also make sure the "magic number" is the first in the file!
   */
   if (sdptr->IsAPostScriptFile)
   {
      sdptr->FileRab.rab$l_rbf = "%!";
      sdptr->FileRab.rab$w_rsz = 2;
      sys$put (&sdptr->FileRab, 0, 0);
   }

   sdptr->FileRab.rab$l_rbf = Line;

   sys$fao (&FromFaoDsc, &Length, &LineDsc, Utility, sdptr->Url);
   Line[sdptr->FileRab.rab$w_rsz = Length] = '\0';
   sys$put (&sdptr->FileRab, 0, 0);

   sys$fao (&ByFaoDsc, &Length, &LineDsc, Utility, SoftwareID);
   Line[sdptr->FileRab.rab$w_rsz = Length] = '\0';
   sys$put (&sdptr->FileRab, 0, 0);

   sys$fao (&DateFaoDsc, &Length, &LineDsc, Utility, 0);
   Line[sdptr->FileRab.rab$w_rsz = Length] = '\0';
   sys$put (&sdptr->FileRab, 0, 0);

   if (sdptr->IsAnHtmlFile)
   {
      sdptr->FileRab.rab$l_rbf = "-->";
      sdptr->FileRab.rab$w_rsz = 3;
      sys$put (&sdptr->FileRab, 0, 0);

      sdptr->FileRab.rab$l_rbf = Line;
      sys$fao (&ImportedFaoDsc, &Length, &LineDsc, ImportedExplanationUrl);
      Line[sdptr->FileRab.rab$w_rsz = Length] = '\0';
      sys$put (&sdptr->FileRab, 0, 0);

      /* blank line */
      sdptr->FileRab.rab$l_rbf = "";
      sdptr->FileRab.rab$w_rsz = 0;
      sys$put (&sdptr->FileRab, 0, 0);
   }

   return (SS$_NORMAL);
}

/*****************************************************************************/
/*
Write a buffer to the file.  If a "text" file then write using record I/O, if 
a "binary" file the write using block I/O.  The block must be 512 bytes in 
size (or less if it is the final block).
*/ 

int WriteFile
(
struct SourceData *sdptr,
char *Buffer,
int BufferLength
)
{
   int  status;

   /*********/
   /* begin */
   /*********/

   if (Debug) fprintf (stdout, "WriteFile()\n");

   sdptr->FileRab.rab$l_rbf = Buffer;
   sdptr->FileRab.rab$w_rsz = BufferLength;

   if (sdptr->IsATextFile)
   {
      /* use record I/O for a text file */
      return (sys$put (&sdptr->FileRab, 0, 0));
   }
   else
   {
      /* use block I/O for a binary file */
      sdptr->FileRab.rab$l_bkt = ++sdptr->BlockNumber;
      return (sys$write (&sdptr->FileRab, 0, 0));
   }
}

/*****************************************************************************/
/*
Write a buffer to the file.  If a "text" file then write using record I/O, if 
a "binary" file the write using block I/O.  The block must be 512 bytes in 
size (or less if it is the final block).
*/ 

int UpdateFile
(
struct SourceData *sdptr,
char *Buffer,
int BufferLength
)
{
   /*********/
   /* begin */
   /*********/

   if (Debug) fprintf (stdout, "UpdateFile()\n");

   sdptr->FileRab.rab$l_rbf = Buffer;
   sdptr->FileRab.rab$w_rsz = BufferLength;

   return (sys$update (&sdptr->FileRab, 0, 0));
}

/*****************************************************************************/
/*
*/ 

int ConnectToServer (struct SourceData *sdptr)

{
   int  status;
   unsigned short  Length;

   /*********/
   /* begin */
   /*********/

   if (Debug)
      fprintf (stdout, "ConnectToServer() |%s:%d|\n",
              sdptr->HostName, sdptr->ServerPort);

   /* get the remote host details */
   if ((sdptr->RemoteHostEntryPtr =
        gethostbyname (sdptr->HostName)) == NULL)
   {
      if (Debug) fprintf (stdout, "gethostbyname() %%X%08.08X\n", IP_errno);
      return (IP_errno);
   }
   sdptr->RemoteHostEntry = *sdptr->RemoteHostEntryPtr;
   strcpy (sdptr->HostName, sdptr->RemoteHostEntry.h_name);
   sdptr->SocketName.sin_family = sdptr->RemoteHostEntry.h_addrtype;
   sdptr->SocketName.sin_port = htons (sdptr->ServerPort);
   sdptr->SocketName.sin_addr =
      *((struct in_addr *)sdptr->RemoteHostEntry.h_addr);

   if ((sdptr->ClientSocket = socket (AF_INET, SOCK_STREAM, 0)) < 0)
   {
      if (Debug) fprintf (stdout, "socket() %%X%08.08X\n", IP_errno);
      return (IP_errno);
   }
   if (connect (sdptr->ClientSocket,
                &sdptr->SocketName,
                sizeof(sdptr->SocketName)))
   {
      status = IP_errno;
      if (Debug) fprintf (stdout, "connect() %%X%08.08X\n", status);
      close (sdptr->ClientSocket);
      return (status);
   }

#  ifdef UCX_TCPIP
      sdptr->ClientChannel = vaxc$get_sdc (sdptr->ClientSocket);
#  else
      /* multinet */
      sdptr->ClientChannel = sdptr->ClientSocket;
#  endif

   return (SS$_NORMAL);
}

/*****************************************************************************/
/*
*/
 
int DisconnectFromServer (struct SourceData *sdptr)

{
   int  status;

   /*********/
   /* begin */
   /*********/

   if (Debug) fprintf (stdout, "DisconnectFromServer()\n");

   shutdown (sdptr->ClientSocket, 2);
   IP_close_socket (sdptr->ClientSocket);

   return (SS$_NORMAL);
}

/*****************************************************************************/
/*
*/ 
 
int ReadData
(
struct SourceData *sdptr,
char *DataBuffer,
int SizeOfDataBuffer,
int *DataLengthPtr
)
{
   int  status;
   struct {
      unsigned short  Status;
      unsigned short  Count;
      char *Buffer;
   } IOsb;

   /*********/
   /* begin */
   /*********/

   if (Debug) fprintf (stdout, "ReadData()\n");

   *DataLengthPtr = 0;
   DataBuffer[0] = '\0';

#  ifdef UCX_TCPIP
      status = sys$qiow (0, sdptr->ClientChannel, IO$_READVBLK, &IOsb, 0, 0,
                         DataBuffer, SizeOfDataBuffer, 0, 0, 0, 0);
#  else
      /* multinet */
      status = sys$qiow (0, sdptr->ClientChannel, IO$_RECEIVE, &IOsb, 0, 0,
                         DataBuffer, SizeOfDataBuffer, 0, 0, 0, 0);
#  endif

   if (Debug)
      fprintf (stdout, "sys$qiow() %%X%08.08X IOsb: %%X%08.08X Count: %d\n",
               status, IOsb.Status, IOsb.Count);
   if (VMSnok (status)) return (status);
   if (VMSnok (IOsb.Status)) return (IOsb.Status);

   DataBuffer[*DataLengthPtr = IOsb.Count] = '\0';
   if (Debug) fprintf (stdout, "recv() %d\n", *DataLengthPtr);

   return (status);
}

/*****************************************************************************/
/*
*/
 
int WriteData
(
struct SourceData *sdptr,
char *DataPtr,
int DataLength
)
{
   int  status;
   struct {
      unsigned short  Status;
      unsigned short  Count;
      char *Buffer;
   } IOsb;

   /*********/
   /* begin */
   /*********/

   if (Debug) fprintf (stdout, "WriteData() %d |%s|\n", DataLength, DataPtr);

   if (DataLength < 0) DataLength = strlen(DataPtr);

#  ifdef UCX_TCPIP
      status = sys$qiow (0, sdptr->ClientChannel, IO$_WRITEVBLK, &IOsb, 0, 0,
                         DataPtr, DataLength, 0, 0, 0, 0);
#  else
      /* multinet */
      status = sys$qiow (0, sdptr->ClientChannel, IO$_SEND, &IOsb, 0, 0,
                         DataPtr, DataLength, 0, 0, 0, 0);
#  endif

   if (Debug)
      fprintf (stdout, "sys$qiow() %%X%08.08X IOsb: %%X%08.08X Count: %d\n",
               status, IOsb.Status, IOsb.Count);
   if (VMSok (status) && VMSnok (IOsb.Status)) status = IOsb.Status;
   if (VMSnok (status)) return (status);

   return (status);
}

/*****************************************************************************/
/*
*/
 
char* SysGetMsg (int StatusValue)
 
{
   static char  Message [256];
   short int  Length;
   $DESCRIPTOR (MessageDsc, Message);
 
   sys$getmsg (StatusValue, &Length, &MessageDsc, 0, 0);
   Message[Length] = '\0';
   if (Debug) fprintf (stdout, "SysGetMsg() |%s|\n", Message);
   return (Message);
}
 
/****************************************************************************/
/*
Does a case-insensitive, character-by-character string compare and returns 
true if two strings are the same, or false if not.  If a maximum number of 
characters are specified only those will be compared, if the entire strings 
should be compared then specify the number of characters as 0.
*/ 
 
boolean strsame
(
register char *sptr1,
register char *sptr2,
register int  count
)
{
   while (*sptr1 && *sptr2)
   {
      if (toupper (*sptr1++) != toupper (*sptr2++)) return (false);
      if (count)
         if (!--count) return (true);
   }
   if (*sptr1 || *sptr2)
      return (false);
   else
      return (true);
}
 
/****************************************************************************/

int ShowHelp ()

{
   fprintf (stdout,
"%%%s-I-HELP, Basic usage of the Spinneret Utility (%s)\n\
\n\
def: \"the body part used by an arachnid to extrude gossamer\"\n\
\n\
Spinneret allows the retrieval and local compilation of documents from the\n\
Internet WWW.  Files comprising the document(s) are created in the local\n\
directory.  URLs within the files are modified for compilation within a single\n\
directory.  URLs not retrieved are marked with a trailing caret (^).\n\
\n\
$ SDM2HTM \"url\" [qualifiers]\n\
\n\
/[NO]ALL(d) /ALWAYS_GET=string /CONFIRM /DEPTH=integer /EXCLUDE=string\n\
/HELP /INCLUDE=string /RANGE=integer /[NO(d)]VERBOSE\n\
\n\
$ SDM2HTM \"http://host/file.type\"  !retrieve document with this URL\n\
$ SDM2HTM \"url\" /ALL               !get all files from child directories\n\
$ SDM2HTM \"url\" /ALWAYS=GIF,XBM    !always retrieve this file type(s)\n\
$ SDM2HTM \"url\" /INCLUDE=HTML,HTM  !only retrieve this file type(s)\n\
$ SDM2HTM \"url\" /EXCLUDE=GIF,XBM   !do not retrieve this file type(s)\n\
$ SDM2HTM \"url\" /RANGE=integer     !get other documents up to 'integer' away\n\
$ SDM2HTM \"url\" /DEPTH=integer     !from child directories to 'integer' deep\n\
$ SDM2HTM \"url\" /CONFIRM           !ask before retrieving any file\n\
\n",
   Utility, SoftwareID);
   return (SS$_NORMAL);
}
 
/****************************************************************************/
/*
This function allows images activated by a "foreign verb" to behave in a way 
that approximates the CLI$ (Command Line Interpreter) utility calls.  Get the 
entire command Line following the verb that activated the image.  The command 
Line is returned in uppercase, space compressed (i.e. maximum of one space 
between text elements, trimmed of leading and trailing spaces).  Returns a 
warning status if there were no parameters/qualifiers on the command Line.
The variable CommandLine is global.
*/ 
 
int ParseCommandLine ()
 
{
   int  status;
   unsigned short  Length;
   unsigned long  Flags = 0;
   struct dsc$descriptor_s 
          CommandLineDsc = { sizeof(CommandLine)-1, DSC$K_DTYPE_T,
                             DSC$K_CLASS_S, CommandLine };
 
   /* get the entire command Line following the verb */
   if (VMSnok (status = lib$get_foreign (&CommandLineDsc, 0, &Length, &Flags)))
      return (status);
   CommandLine[Length] = '\0';
 
   if (ParseCommand (CommandLine))
      return (SS$_NORMAL);
   else
      return (STS$K_ERROR | STS$M_INHIB_MSG);
}
 
/****************************************************************************/
/*
This function allows images activated by a "foreign verb" to behave in a way 
that approximates the CLI$ (Command Line Interpreter) utility calls.  Quoted 
strings are always indicated by being parsed to include a single leading 
quote.
*/ 
 
boolean ParseCommand (char *CommandLine)
 
{
   register int  QuoteCount = 0;
   register char  *cptr, *eptr;
   boolean  CommandLineOK = true;
   char  Entity [256] = "";
 
   /* set up any argument defaults */
   ParseCommandEntity (NULL);
 
   cptr = CommandLine;
   eptr = Entity;
 
   for (;;)
   {
      if (*cptr == '\"')
      {
         QuoteCount++;
         *eptr++ = *cptr++;
         continue;
      }
 
      if (QuoteCount & 1 && *cptr)
      {
         /* inside quoted text, copy all characters as literals */
         *eptr++ = *cptr++;
         continue;
      }
 
      if (*cptr == '/' || isspace (*cptr) || !*cptr)
      {
         if (isspace (*cptr))
         {
            /* span the white space */
            while (*cptr && isspace (*cptr)) cptr++;
            if (*cptr == '=')
            {
               /* part of a qualifier, continue to get the value */
               *eptr++ = *cptr++;
               /* span any intervening white space */
               while (*cptr && isspace (*cptr)) cptr++;
               continue;
            }
         }
 
         if (Entity[0])
         {
            *eptr = '\0';
            if (!ParseCommandEntity (Entity)) CommandLineOK = false;
         }
 
         /* if end of command Line then break from loop */
         if (!*cptr) break;
 
         /* start of new entity */
         eptr = Entity;
         /* if start of qualifier ensure slash is copied */
         if (*cptr == '/') *eptr++ = *cptr++;
 
         continue;
      }
 
      /* any other character, just copy, ensure upper case */
      *eptr++ = toupper(*cptr++);
   }
 
   return (CommandLineOK);
}
 
/*****************************************************************************/
/*
Get a string value from a qualifier, e.g. '/EXAMPLE=TEST'.
*/
 
boolean ParseCommandString
(
char *Entity,
char *String,
boolean Qualifier,
boolean ReportErrors,
boolean EnsureUpperCase
)
{
   register int  QuoteCount = 0;
   register char  *eptr, *sptr;
 
   if (Debug) fprintf (stdout, "ParseCommandString()\nEntity: '%s'\n", Entity);
 
   eptr = Entity;
 
   if (Qualifier)
   {
      /* scan down to equate symbol */
      while (*eptr && *eptr != '=') eptr++;
      if (*eptr) eptr++;
      if (!*eptr)
      {
         if (ReportErrors)
         {
            fprintf (stdout,
            "%%%s-E-VALREQ, missing qualifier or keyword value\n \\%s\\\n",
            Utility, Entity+1);
         }
         return (false);
      }
   }
 
   sptr = String;
   while (*eptr)
   {
      if (*eptr == '\"')
      {
         if (QuoteCount & 1)
         {
            /* are inside quotes, check for escaped quotes ("") */
            if (*++eptr != '\"')
            {
               /* now outside quotes */
               QuoteCount++;
            }
            /* drop thru to character copy */
         }
         else
         {
            /* now inside quotes */
            QuoteCount++;
            eptr++;
            continue;
         }
      }
 
      if (EnsureUpperCase)
         *sptr++ = toupper(*eptr++);
      else
         *sptr++ = *eptr++;
   }
   *sptr = '\0';
 
   if (Debug) fprintf (stdout, "String: '%s'\n", String);
 
   return (true);
}
 
/*****************************************************************************/
/*
Get an integer value from a qualifier, e.g. '/EXAMPLE=99'.
*/
 
boolean ParseCommandInteger
(
char *Entity,
int *IntegerPtr,
int Base,
boolean ReportErrors
)
{
   register char  *eptr;
   char  *sptr;
 
   if (Debug)
      fprintf (stdout, "ParseCommandInteger() '%s' Base: %d\n", Entity, Base);
 
   for (eptr = Entity; *eptr && *eptr != '='; eptr++);
   if (*eptr) eptr++;
   if (*eptr)
   {
      *IntegerPtr = strtol (eptr, &sptr, Base);
      if (sptr > eptr && !*sptr)
         return (true);
      else
      {
         if (ReportErrors)
         {
            fprintf (stdout,
            "%%%s-E-BADVALUE, '%s' is an invalid keyword value\n",
            Utility, eptr);
         }
         return (false);
      }
   }
   else
   {
      if (ReportErrors)
      {
         fprintf (stdout,
         "%%%s-E-VALREQ, missing qualifier or keyword value\n \\%s\\\n",
         Utility, Entity+1);
      }
      return (false);
   }
}
 
/*****************************************************************************/
/*
A single command Line "entity" has been parsed, check if its recognised.  This 
function is the one modified for the individual requirements of each program.
*/
 
boolean ParseCommandEntity (char *Entity)
 
{
   if (Entity == NULL)
   {
      /* set up any argument defaults */
      Debug = DoConfirm = DoShowHelp = Verbose = false;
      AllRootSubdocuments = true;
      AlwaysGetTheseFileTypes[0] =
      ExcludeFileTypes[0] = IncludeFileTypes[0] = '\0';
      Depth = Range = 1;
      return (true);
   }
 
   if (Debug) fprintf (stdout, "ParseCommandEntity() Entity: '%s'\n", Entity);
 
   if (Entity[0] == '/')
   {
      if (strsame (Entity, "/ALL", 4))
         return (AllRootSubdocuments = true);
      if (strsame (Entity, "/NOALL", 6))
      {
         AllRootSubdocuments = false;
         return (true);
      }

      if (strsame (Entity, "/ALWAYS_GET=", 4))
         return (ParseCommandString (Entity, AlwaysGetTheseFileTypes,
                                     true, true, true));

      if (strsame (Entity, "/CONFIRM", 4))
         return (DoConfirm = true);

      /* turns on all "if (Debug)" statements */
      if (strsame (Entity, "/DBUG", -1))
         return (Debug = true);

      if (strsame (Entity, "/DEPTH=", 4))
         return (ParseCommandInteger (Entity, &Depth, 10, true));

      if (strsame (Entity, "/EXCLUDE=", 4))
         return (ParseCommandString (Entity, ExcludeFileTypes,
                                     true, true, true));

      if (strsame (Entity, "/INCLUDE=", 4))
         return (ParseCommandString (Entity, IncludeFileTypes,
                                     true, true, true));

      if (strsame (Entity, "/HELP", 4))
         return (DoShowHelp = true);

      if (strsame (Entity, "/RANGE=", 4))
         return (ParseCommandInteger (Entity, &Range, 10, true));

      if (strsame (Entity, "/VERBOSE", 4))
         return (Verbose = true);
      if (strsame (Entity, "/NOVERBOSE", 6))
      {
         Verbose = false;
         return (true);
      }

      fprintf (stdout,
      "%%%s-E-IVQUAL, unrecognised qualifier\n \\%s\\\n", Utility, Entity+1);
      return (false);
   }
 
   if (!UrlParameter[0])
      return (ParseCommandString (Entity, UrlParameter, false, true, false));

   fprintf (stdout,
   "%%%s-E-MAXPARM, too many parameters\n \\%s\\\n", Utility, Entity);
   return (false);
}
   
/*****************************************************************************/

