/*
 *  Author     : Arne Vajhj
 *
 *  Programmed : may 1995
 *
 *  Purpose    : scan URL's for HREF's and check their existance
 *
 *  Modified   : may 1995
 *               cut off # as part of URL's.
 *
 *               june 1995
 *               accept URL's without protocol and node
 *               handle only nodename cases
 *
 *               june 1995
 *               handle case when HTTP_DEFAULT_HOST not defined
 *               accept URL's without filename
 *               NOVERBOSE switch
 *
 *               june 1995
 *               check if node/port works for gopher and ftp/file links
 *               removes extra spaces
 *
 *               june 1995
 *               add prefix/suffix part
 *               more comments
 *
 *               june 1995
 *               VMS style command-line
 *
 *               july 1995
 *               support for news: URL
 *
 *               april 1996
 *               /BASEURL qualifier to deal with relative URL's
 *
 *
 *  Usage      : $ URLSCAN:==$disk:[dir]URLSCAN.EXE
 *               $ URLSCAN file-specification
 *               (where file-specification could be [.WWW...]*.HTML)
 *
 *               see HLP-file for more detailed description of usage
 *
 */

#include <stdio.h>
#include <stdlib.h>
#include <string.h>

#include <descrip.h>
#include <rmsdef.h>

#include <socket.h>
#include <netdb.h>

#include <unixio.h>
#include <errno.h>

int lib$find_file();
int lib$find_file_end();

void cli_dcl_parse();
int cli_present();
char *cli_get_value();

#define BACKstyle 1
#define FRONTstyle 2
#define VMSISHstyle 3

#define OKlev 1
#define FNDlev 2
#define CVTlev 3
#define ASSlev 4
#define ERR2lev 5
#define ERRlev 6

char defaulthost[65];
char baseurl[257];
char OKpref[33],FNDpref[33],CVTpref[33],ASSpref[33],ERR2pref[33],ERRpref[33];
char OKsuf[33],FNDsuf[33],CVTsuf[33],ASSsuf[33],ERR2suf[33],ERRsuf[33];
int checknonhttp;
int verbose;

/* send HEAD request for HTTP-server
   or connect to node/port for GOPHER/FTP-server */
int test_head(char *hostname,int port,char *fnm,int head)
{
   int sd,status,len,tmp,ok;
   char cmd[512],resp[51200];
   struct sockaddr local,remote;
   struct hostent *hostinfo;
   ok=0;
   /* create socket */
   sd=socket(AF_INET,SOCK_STREAM,0);
   if(sd<0) {
      if(verbose<=ERR2lev) printf("%sError creating socket: %s%s\n",
                                  ERR2pref,strerror(errno),ERR2suf);
      goto fin;
   }
   /* bind socket */
   local.sa_family=AF_INET;
   memset(local.sa_data,0,sizeof(local.sa_data));
   status=bind(sd,&local,sizeof(local));
   if(status<0) {
      if(verbose<=ERR2lev) printf("%sError binding socket: %s%s\n",
                                  ERR2pref,strerror(errno),ERR2suf);
      goto fin;
   }
   /* lookup host */
   hostinfo=gethostbyname(hostname);
   if(!hostinfo) {
      if(verbose<=ERR2lev) printf("%sError looking up host: %s%s\n",
                                  ERR2pref,hostname,ERR2suf);
      goto fin;
   }
   /* connect to host */
   remote.sa_family=hostinfo->h_addrtype;
   memcpy(remote.sa_data+2,hostinfo->h_addr_list[0],hostinfo->h_length);
   *((short *)remote.sa_data)=port;
   tmp=remote.sa_data[0];
   remote.sa_data[0]=remote.sa_data[1];
   remote.sa_data[1]=tmp;
   status=connect(sd,&remote,sizeof(remote));
   if(status!=0) {
      if(verbose<=ERR2lev) printf("%sError connecting to host: %s port: %d%s\n",
                                  ERR2pref,hostname,port,ERR2suf);
      goto fin;
   }
   /* finish if not necesarry to send HEAD request */
   if(!head) {
      ok=1;
      goto fin;
   }
   /* send HEAD request */
   sprintf(cmd,"HEAD %s HTTP/1.0\r\n\r\n",fnm);
   status=send(sd,cmd,strlen(cmd),0);
   if(status<0) {
      if(verbose<=ERR2lev) printf("%sError sending HEAD request%s\n",
                                  ERR2pref,ERR2suf);
      goto fin;
   }
   /* read response */
   while ((len=recv(sd,resp,sizeof(resp)-1,0))>0) {
      resp[len-1]='\0';
      if(strstr(resp,"HTTP/1.0 200")!=NULL) ok=1;
   }
fin:
   close(sd);
   return ok;
}

/* convert URL to node/port/filename
   and send HEAD request for HTTP-server */
void processhttp(char *url)
{
   int l,port;
   char node[65],fnm[256],tmp[256],*p;
   /* append "/" if no filename at all */
   if(strstr(url+7,"/")==NULL) {
      strcpy(tmp,url);
      sprintf(url,"%s/",tmp);
      if(verbose<=CVTlev) printf("%sConverted to URL: %s%s\n",
                                 CVTpref,url,CVTsuf);
   }
   /* get node */
   l=strstr((url+7),"/")-(url+7);
   strncpy(node,(url+7),l);
   node[l]='\0';
   /* get filename */
   strcpy(fnm,url+7+l);
   /* get port */
   p=strstr(node,":");
   if(p==NULL) {
      port=80;
   } else {
      *p='\0';
      p++;
      port=atoi(p);
   }
   /* strip # part of URL */
   p=strstr(fnm,"#");
   if(p!=NULL) *p='\0';
   /* send HEAD request to HTTP-server */
   if(!test_head(node,port,fnm,1)) {
      if(verbose<=ERRlev) printf("%sHEAD request not successfull for URL: %s%s\n",
                                 ERRpref,url,ERRsuf);
   } else {
      if(verbose<=OKlev) printf("%sHEAD request successfull for URL: %s%s\n",
                                OKpref,url,OKsuf);
   }
   return;
}

/* convert URL to node/port
   and connect to node/port for GOPHER/FTP-server */
void processnonhttp(char *url,int prefixlen,int defport)
{
   int l,port;
   char node[65],tmp[256],*p;
   if(checknonhttp) {
      /* append "/" if no filename at all */
      if(strstr(url+prefixlen,"/")==NULL) {
         strcpy(tmp,url);
         sprintf(url,"%s/",tmp);
         if(verbose<=CVTlev) printf("%sConverted to URL: %s%s\n",
                                    CVTpref,url,CVTsuf);
      }
      /* get node */
      l=strstr((url+prefixlen),"/")-(url+prefixlen);
      strncpy(node,(url+prefixlen),l);
      node[l]='\0';
      /* get port */
      p=strstr(node,":");
      if(p==NULL) {
         port=defport;
      } else {
         *p='\0';
         p++;
         port=atoi(p);
      }
      /* connect to node/port for GOPHER/FTP-server */
      if(!test_head(node,port,"",0)) {
         if(verbose<=ERRlev) printf("%sConnect to node not successfull for URL: %s%s\n",
                                    ERRpref,url,ERRsuf);
      } else {
         if(verbose<=OKlev) printf("%sConnect to node successfull for URL: %s%s\n",
                                   OKpref,url,OKsuf);
      }
   } else {
      if(verbose<=ASSlev) printf("%sAssuming valid URL: %s%s\n",
                                  ASSpref,url,ASSsuf);
   }
   return;
}

void processgopher(char *url)
{
   processnonhttp(url,9,70);
   return;
}

void processftp(char *url)
{
   processnonhttp(url,6,21);
   return;
}

void processfile(char *url)
{
   processnonhttp(url,7,21);
   return;
}

/* analyze URL and act according to type */
void processurl(char *url)
{
   int i;
   char tmp[256];
   if(verbose<=FNDlev) printf("%sFound URL: %s%s\n",FNDpref,url,FNDsuf);
   /* ignore URL with only # */
   if(url[0]=='#') {
      if(verbose<=ASSlev) printf("%sAssuming valid URL: %s%s\n",
                                 ASSpref,url,ASSsuf);
      return;
   }
   /* for URL's without protocol and node add http protocol + default node
      or baseurl except for mailto and news URL's */
   if((strstr(url,"://")==NULL)&&(strstr(url,"mailto:")!=url)&&
      (strstr(url,"news:")!=url)) {
      if((url[0]!='/')&&(strlen(baseurl)>0)) {
         strcpy(tmp,baseurl);
         i=strlen(baseurl)-1;
         while((i>=0)&&(baseurl[i]!='/')) i--;
         tmp[i+1]='\0';
         strcat(tmp,url);
         strcpy(url,tmp);
      } else {
         strcpy(tmp,url);
         sprintf(url,"http://%s%s",defaulthost,tmp);
      }
      if(verbose<=CVTlev) printf("%sConverted to URL: %s%s\n",
                                 CVTpref,url,CVTsuf);
   }
   /* http protocol */
   if((strstr(url,"http:")==url) ||
      (strstr(url,"HTTP:")==url)) {
      processhttp(url);
      return;
   }
   /* gopher protocol */
   if((strstr(url,"gopher:")==url) ||
      (strstr(url,"GOPHER:")==url)) {
      processgopher(url);
      return;
   }
   /* ftp protocol */
   if((strstr(url,"ftp:")==url) ||
      (strstr(url,"FTP:")==url)) {
      processftp(url);
      return;
   }
   /* telnet protocol */
   if((strstr(url,"telnet:")==url) ||
      (strstr(url,"TELNET:")==url)) {
      if(verbose<=ASSlev) printf("%sAssuming valid URL: %s%s\n",
                                 ASSpref,url,ASSsuf);
      return;
   }
   /* mail protocol */
   if((strstr(url,"mailto:")==url) ||
      (strstr(url,"MAILTO:")==url)) {
      if(verbose>ASSlev) printf("%sAssuming valid URL: %s%s\n",
                                ASSpref,url,ASSsuf);
      return;
   }
   /* nntp protocol */
   if((strstr(url,"news:")==url) ||
      (strstr(url,"NEWS:")==url)) {
      if(verbose>ASSlev) printf("%sAssuming valid URL: %s%s\n",
                                ASSpref,url,ASSsuf);
      return;
   }
   /* ftp protocol */
   if((strstr(url,"file:")==url) ||
      (strstr(url,"FILE:")==url)) {
      processftp(url);
      return;
   }
}

/* strip spaces */
void removespaces(char *l2,char *l)
{
   char *p,*p2;
   p=l;
   p2=l2;
   while(*p) {
      if(*p!=' ') {
         *p2=(*p);
         p2++;
      }
      p++;
   }
   *p2='\0';
   return;
}

/* parse one line for URL's */
void processline(char *line)
{
   char *p1,*p2,*p3;
   char url[512],line2[1024];
   removespaces(line2,line);
   p1=line2;
   p3=strstr(p1,"HREF=\"");
   if(p3==NULL) p3=strstr(p1,"href=\"");
   p1=p3;
   while(p1!=NULL) {
      p1=p1+6;
      p2=p1;
      p2=strstr(p2,"\"");
      strncpy(url,p1,p2-p1);
      url[p2-p1]='\0';
      processurl(url);
      p1=p2;
      p3=strstr(p1,"HREF=\"");
      if(p3==NULL) p3=strstr(p1,"href=\"");
      p1=p3;
   }
   return;
}

/* read all lines from one file */
void processone(char *fnm)
{
   FILE *fp;
   char line[1024];
   fp=fopen(fnm,"r");
   if(fp==NULL) {
      if(verbose<=ERRlev) printf("%sError opening file: %s%s\n",
                                 ERRpref,fnm,ERRsuf);
   } else {
      if(verbose<=FNDlev) printf("%sProcessing file: %s%s\n",FNDpref,fnm,FNDsuf);
      while(!feof(fp)) {
         fgets(line,sizeof(line),fp);
         line[strlen(line)-1]='\0';
         processline(line);
      }
      fclose(fp);
   }
   return;
}

/* find all files matching wildcard specification */
void process(char *fnms)
{
   int contxt;
   int status;
   char *s;
   char res[256];
   $DESCRIPTOR(fnmdes,"");
   $DESCRIPTOR(resdes,res);
   fnmdes.dsc$w_length=strlen(fnms);
   fnmdes.dsc$a_pointer=fnms;
   contxt=0;
   do {
      status = lib$find_file(&fnmdes,&resdes,&contxt,0,0,0,0);
      if (status == RMS$_NORMAL) {
         s = strchr(res,' ');
         *s = '\0';
         processone(res);
      };
   } while (status == RMS$_NORMAL);
   lib$find_file_end(&contxt);
   return;
}

main(int argc,char *argv[])
{
   int style;
   char *p,tmp[257];
   /* parse command line arguments */
   cli_dcl_parse();
   /* /BASEURL */
   if(cli_present("BASEURL")) {
      strcpy(tmp,cli_get_value("BASEURL"));
      if(tmp[0]=='"') {
         strcpy(baseurl,tmp+1);
         if(baseurl[strlen(baseurl)-1]=='"') baseurl[strlen(baseurl)-1]='\0';
      } else {
         strcpy(baseurl,tmp);
      }
   } else {
      strcpy(baseurl,"");
   }
   /* /HTTPSERVER */
   if(cli_present("HTTPSERVER")) {
      strcpy(defaulthost,cli_get_value("HTTPSERVER"));
   } else {
      p=getenv("HTTP_DEFAULT_HOST");
      if(p!=NULL) {
         strcpy(defaulthost,p);
      } else {
         printf("Enter default hostname> ");
         scanf("%s",defaulthost);
      }
   }
   /* /STYLE */
   strcpy(tmp,cli_get_value("STYLE"));
   if(strcmp(tmp,"BACK")==0) style=BACKstyle;
   if(strcmp(tmp,"FRONT")==0) style=FRONTstyle;
   if(strcmp(tmp,"VMSISH")==0) style=VMSISHstyle;
   switch (style) {
      case BACKstyle:
         strcpy(OKpref,"");
         strcpy(OKsuf,"");
         strcpy(FNDpref,"");
         strcpy(FNDsuf,"");
         strcpy(CVTpref,"");
         strcpy(CVTsuf,"");
         strcpy(ASSpref,"");
         strcpy(ASSsuf,"");
         strcpy(ERRpref,"");
         strcpy(ERRsuf," <----------");
         strcpy(ERR2pref,"");
         strcpy(ERR2suf,"");
         break;
      case FRONTstyle:
         strcpy(OKpref,"    ");
         strcpy(OKsuf,"    ");
         strcpy(FNDpref,"    ");
         strcpy(FNDsuf,"    ");
         strcpy(CVTpref,"    ");
         strcpy(CVTsuf,"    ");
         strcpy(ASSpref,"    ");
         strcpy(ASSsuf,"    ");
         strcpy(ERRpref,"--->");
         strcpy(ERRsuf,"    ");
         strcpy(ERR2pref,"    ");
         strcpy(ERR2suf,"    ");
         break;
      case VMSISHstyle:
         strcpy(OKpref,"%URLSCAN-OK-");
         strcpy(OKsuf,"");
         strcpy(FNDpref,"%URLSCAN-FND-");
         strcpy(FNDsuf,"");
         strcpy(CVTpref,"%URLSCAN-CVT-");
         strcpy(CVTsuf,"");
         strcpy(ASSpref,"%URLSCAN-ASS-");
         strcpy(ASSsuf,"");
         strcpy(ERRpref,"%URLSCAN-ERR-");
         strcpy(ERRsuf,"");
         strcpy(ERR2pref,"%URLSCAN-ERR2-");
         strcpy(ERR2suf,"");
         break;
   }
   /* /NONHTTP */
   checknonhttp=0;
   if(cli_present("NONHTTP")) checknonhttp=1;
   /* /VERBOSE */
   strcpy(tmp,cli_get_value("VERBOSE"));
   if(strcmp(tmp,"OK")==0) verbose=OKlev;
   if(strcmp(tmp,"FND")==0) verbose=FNDlev;
   if(strcmp(tmp,"CVT")==0) verbose=CVTlev;
   if(strcmp(tmp,"ASS")==0) verbose=ASSlev;
   if(strcmp(tmp,"ERR2")==0) verbose=ERR2lev;
   if(strcmp(tmp,"ERR")==0) verbose=ERRlev;
   /* /OUTPUT */
   if(cli_present("OUTPUT")) freopen(cli_get_value("OUTPUT"),"w",stdout);;
   /* FILE */
   while(p=cli_get_value("FILE")) process(p);
}
