/* Parsing FTP DIR output.
   Copyright (C) 1995, 1996, 1997 Free Software Foundation, Inc.

This file is part of Wget.

This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.

This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
GNU General Public License for more details.

You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.  */

#include <config.h>

#include <stdio.h>
#include <stdlib.h>
#ifdef HAVE_STRING_H
# include <string.h>
#else
# include <strings.h>
#endif
#ifdef HAVE_UNISTD_H
# include <unistd.h>
#endif
#include <sys/types.h>
#include <ctype.h>
#include <errno.h>

#include "wget.h"
#include "utils.h"
#include "ftp.h"

#ifdef VMS
#include "vms.h"
#endif /* VMS */

/* Converts symbolic permissions to number-style ones, e.g. string
   rwxr-xr-x to 755.  For now, it knows nothing of
   setuid/setgid/sticky.  ACLs are ignored.  */
static int
symperms (const char *s)
{
  int perms = 0, i;

  if (strlen (s) < 9)
    return 0;
  for (i = 0; i < 3; i++, s += 3)
    {
      perms <<= 3;
      perms += (((s[0] == 'r') << 2) + ((s[1] == 'w') << 1) +
		(s[2] == 'x' || s[2] == 's'));
    }
  return perms;
}


/* Convert the Un*x-ish style directory listing stored in FILE to a
   linked list of fileinfo (system-independent) entries.  The contents
   of FILE are considered to be produced by the standard Unix `ls -la'
   output (whatever that might be).  BSD (no group) and SYSV (with
   group) listings are handled.

   The time stamps are stored in a separate variable, time_t
   compatible (I hope).  The timezones are ignored.  */
static struct fileinfo *
ftp_parse_unix_ls (const char *file)
{
  FILE *fp;
  static const char *months[] = {
    "Jan", "Feb", "Mar", "Apr", "May", "Jun",
    "Jul", "Aug", "Sep", "Oct", "Nov", "Dec"
  };
  int dt, next, len, i, error, ignore;
  int year, month, day;		/* for time analysis */
  int hour, min, sec;
  struct tm timestruct, *tnow;
  time_t timenow;

  char *line, *tok;		/* tokenizer */
  struct fileinfo *dir, *l, cur; /* list creation */

  fp = fopen (file, "r");
  if (!fp)
    {
      logprintf (LOG_NOTQUIET, "%s: %s\n", file, strerror (errno));
      return NULL;
    }
  dir = l = NULL;

  /* Line loop to end of file: */
  while ((line = read_whole_line (fp)))
    {
      len = strlen (line);
      /* Destroy <CR> if there is one.  */
      if (len && line[len - 1] == '\r')
	line[--len] = '\0';

      DEBUGP (("line: >%s<\n", line));

      /* Skip if total...  */
      if (!strncasecmp (line, "total", 5))
	{
	  free (line);
	  continue;
	}
      /* Get the first token (permissions).  */
      tok = strtok (line, " ");
      if (!tok)
	{
	  free (line);
	  continue;
	}

      cur.name = NULL;
      cur.linkto = NULL;

      /* Decide whether we deal with a file or a directory.  */
      switch (*tok)
	{
	case '-':
	  cur.type = FT_PLAINFILE;
	  DEBUGP (("PLAINFILE; "));
	  break;
	case 'd':
	  cur.type = FT_DIRECTORY;
	  DEBUGP (("DIRECTORY; "));
	  break;
	case 'l':
	  cur.type = FT_SYMLINK;
	  DEBUGP (("SYMLINK; "));
	  break;
	default:
	  cur.type = FT_UNKNOWN;
	  DEBUGP (("UNKOWN; "));
	  break;
	}

      cur.perms = symperms (tok + 1);
      DEBUGP (("perms %0o; ", cur.perms));

      error = ignore = 0;       /* Errnoeous and ignoring entries are
				   treated equally for now.  */
      year = hour = min = sec = 0; /* Silence the compiler.  */
      month = day = 0;
      next = -1;
      /* While there are tokens on the line, parse them.  Next is the
	 number of tokens left until the filename.

	 Use the month-name token as the "anchor" (the place where the
	 position wrt the file name is "known").  When a month name is
	 encountered, `next' is set to 5.  Also, the preceding
	 characters are parsed to get the file size.

	 This tactic is quite dubious when it comes to
	 internationalization issues (non-English month names), but it
	 works for now.  */
      while ((tok = strtok (NULL, " ")))
	{
	  --next;
	  if (next < 0)		/* a month name was not encountered */
	    {
	      for (i = 0; i < 12; i++)
		if (!strcmp (tok, months[i]))
		  break;
	      /* If we got a month, it means the token before it is the
		 size, and the filename is three tokens away.  */
	      if (i != 12)
		{
		  char *t = tok - 2;
		  long mul = 1;

		  for (cur.size = 0; t > line && ISDIGIT (*t); mul *= 10, t--)
		    cur.size += mul * (*t - '0');
		  if (t == line)
		    {
		      /* Something is seriously wrong.  */
		      error = 1;
		      break;
		    }
		  month = i;
		  next = 5;
		  DEBUGP (("month: %s; ", months[month]));
		}
	    }
	  else if (next == 4)	/* days */
	    {
	      if (tok[1])	/* two-digit... */
		day = 10 * (*tok - '0') + tok[1] - '0';
	      else		/* ...or one-digit */
		day = *tok - '0';
	      DEBUGP (("day: %d; ", day));
	    }
	  else if (next == 3)
	    {
	      /* This ought to be either the time, or the year.  Let's
		 be flexible!

		 If we have a number x, it's a year.  If we have x:y,
		 it's hours and minutes.  If we have x:y:z, z are
		 seconds.  */
	      year = 0;
	      min = hour = sec = 0;
	      /* We must deal with digits.  */
	      if (ISDIGIT (*tok))
		{
		  /* Suppose it's year.  */
		  for (; ISDIGIT (*tok); tok++)
		    year = (*tok - '0') + 10 * year;
		  if (*tok == ':')
		    {
		      /* This means these were hours!  */
		      hour = year;
		      year = 0;
		      ++tok;
		      /* Get the minutes...  */
		      for (; ISDIGIT (*tok); tok++)
			min = (*tok - '0') + 10 * min;
		      if (*tok == ':')
			{
			  /* ...and the seconds.  */
			  ++tok;
			  for (; ISDIGIT (*tok); tok++)
			    sec = (*tok - '0') + 10 * sec;
			}
		    }
		}
	      if (year)
		DEBUGP (("year: %d (no tm); ", year));
	      else
		DEBUGP (("time: %02d:%02d:%02d (no yr); ", hour, min, sec));
	    }
	  else if (next == 2)    /* The file name */
	    {
	      int fnlen;
	      char *p;

	      /* Since the file name may contain a SPC, it is possible
		 for strtok to handle it wrong.  */
	      fnlen = strlen (tok);
	      if (fnlen < len - (tok - line))
		{
		  /* So we have a SPC in the file name.  Restore the
		     original.  */
		  tok[fnlen] = ' ';
		  /* If the file is a symbolic link, it should have a
		     ` -> ' somewhere.  */
		  if (cur.type == FT_SYMLINK)
		    {
		      p = strstr (tok, " -> ");
		      if (!p)
			{
			  error = 1;
			  break;
			}
		      cur.linkto = xstrdup (p + 4);
		      DEBUGP (("link to: %s\n", cur.linkto));
		      /* And separate it from the file name.  */
		      *p = '\0';
		    }
		}
	      /* If we have the filename, add it to the list of files or
		 directories.  */
	      /* "." and ".." are an exception!  */
	      if (!strcmp (tok, ".") || !strcmp (tok, ".."))
		{
		  DEBUGP (("\nIgnoring `.' and `..'; "));
		  ignore = 1;
		  break;
		}
	      /* Some FTP sites choose to have ls -F as their default
		 LIST output, which marks the symlinks with a trailing
		 `@', directory names with a trailing `/' and
		 executables with a trailing `*'.  This is no problem
		 unless encountering a symbolic link ending with `@',
		 or an executable ending with `*' on a server without
		 default -F output.  I believe these cases are very
		 rare.  */
	      fnlen = strlen (tok); /* re-calculate `fnlen' */
	      cur.name = (char *)xmalloc (fnlen + 1);
	      memcpy (cur.name, tok, fnlen + 1);
	      if (fnlen)
		{
		  if (cur.type == FT_DIRECTORY && cur.name[fnlen - 1] == '/')
		    {
		      cur.name[fnlen - 1] = '\0';
		      DEBUGP (("trailing `/' on dir.\n"));
		    }
		  else if (cur.type == FT_SYMLINK && cur.name[fnlen - 1] == '@')
		    {
		      cur.name[fnlen - 1] = '\0';
		      DEBUGP (("trailing `@' on link.\n"));
		    }
		  else if (cur.type == FT_PLAINFILE
			   && (cur.perms & 0111)
			   && cur.name[fnlen - 1] == '*')
		    {
		      cur.name[fnlen - 1] = '\0';
		      DEBUGP (("trailing `*' on exec.\n"));
		    }
		} /* if (fnlen) */
	      else
		error = 1;
	      break;
	    }
	  else
	    abort ();
	} /* while */

      if (!cur.name || (cur.type == FT_SYMLINK && !cur.linkto))
	error = 1;

      DEBUGP (("\n"));

      if (error || ignore)
	{
	  DEBUGP (("Skipping.\n"));
	  FREE_MAYBE (cur.name);
	  FREE_MAYBE (cur.linkto);
	  free (line);
	  continue;
	}

      if (!dir)
	{
	  l = dir = (struct fileinfo *)xmalloc (sizeof (struct fileinfo));
	  memcpy (l, &cur, sizeof (cur));
	  l->prev = l->next = NULL;
	}
      else
	{
	  cur.prev = l;
	  l->next = (struct fileinfo *)xmalloc (sizeof (struct fileinfo));
	  l = l->next;
	  memcpy (l, &cur, sizeof (cur));
	  l->next = NULL;
	}
      /* Get the current time.  */
      timenow = time (NULL);
      tnow = localtime (&timenow);
      /* Build the time-stamp (the idea by zaga@fly.cc.fer.hr).  */
      localtime_r( &timenow, &timestruct);
      timestruct.tm_sec   = sec;
      timestruct.tm_min   = min;
      timestruct.tm_hour  = hour;
      timestruct.tm_mday  = day;
      timestruct.tm_mon   = month;
      if (year == 0)
	{
	  /* Some listings will not specify the year if it is "obvious"
	     that the file was from the previous year.  E.g. if today
	     is 97-01-12, and you see a file of Dec 15th, its year is
	     1996, not 1997.  Thanks to Vladimir Volovich for
	     mentioning this!  */
	  if (month > tnow->tm_mon)
	    timestruct.tm_year = tnow->tm_year - 1;
	  else
	    timestruct.tm_year = tnow->tm_year;
	}
      else
	timestruct.tm_year = year;
      if (timestruct.tm_year >= 1900)
	timestruct.tm_year -= 1900;
      timestruct.tm_wday  = 0;
      timestruct.tm_yday  = 0;

/* Bad idea.  Causes local time to differ according to DST status of
   remote time. */ /*
      timestruct.tm_isdst = -1;
*/

      /* Convert struct tm local time to time_t local time. */
      timenow = mktime (&timestruct);
      /* Offset local time according to environment variable (seconds). */
      if ((tok = getenv( "WGET_TIMEZONE_DIFFERENTIAL")) != NULL)
        {
        dt = atoi( tok);
	DEBUGP (("Time differential = %d.\n", dt));
        }
      else
        {
        dt = 0;
        }

      if (dt >= 0)
        {
        timenow += dt;
        }
      else
        {
        timenow -= (-dt);
        }
      l->tstamp = timenow; /* Store the time-stamp. */
      DEBUGP (("tstamp = %d.\n", timenow));

      free (line);
    }

  fclose (fp);
  return dir;
}

/* Convert the VMS-style directory listing stored in "file" to a
   linked list of fileinfo (system-independent) entries.  The contents
   of FILE are considered to be produced by the standard VMS
   "DIRECTORY [/SIZE [= ALL]] /DATE [/OWNER] [/PROTECTION]" command.
   (Different FTP servers do not supply the same data, but all should be
   subsets of this.)

   Define (logical name) "WGET_TIMEZONE_DIFFERENTIAL" (seconds) to
   adjust receiving local times if different from remote local times. */

#define VMS_DEFAULT_PROT 0644

static struct fileinfo *
ftp_parse_vms (const char *file)
{
  char *line, *tok;		/* tokenizer */
  char date_str[ 32];
  int dt, have_line, i, j, len, perms, reading, state;
  FILE *fp;
  time_t timenow;
  struct tm timestruct;
  struct fileinfo *dir, *l, cur; /* list creation */

  fp = fopen (file, "r");
  if (!fp)
    {
      logprintf (LOG_NOTQUIET, "%s: %s\n", file, strerror (errno));
      return NULL;
    }
  dir = l = NULL;

  /* Line loop to end of file: */
  have_line = 0;
  reading = 1;
  state = 0;
  while (reading)
    {
      if (! have_line)
	{
	  line = read_whole_line (fp);
	  if (!line)
	    {
	      reading = 0;
	      continue;
	    }
	}
      have_line = 0;

      len = strlen (line);
      /* Destroy <CR> if there is one.  */
      if (len && line[len - 1] == '\r')
	line[--len] = '\0';

      DEBUGP (("1 line: >%s<\n", line));

      /* Skip (once) if "Directory ..." */
      if ((state == 0) &&
       (!strncmp (line, "Directory ", 10)) &&
       (line[ len- 1] == ']'))
	{
	  DEBUGP (("Skipping \"Directory \" heading.\n"));
	  state = 1;
	  free (line);
	  continue;
	}

      /* Skip (finally) if "Total of ..." */
      if ((state != 0) &&
       (!strncmp (line, "Total of ", 9)))
	{
	  DEBUGP (("Skipping \"Total of \" footing.\n"));
	  state = 1;
	  free (line);
	  continue;
	}

      /* Get token 1, file_name. */
      tok = strtok (line, " ");
      if (!tok)
	{
	  free (line);
	  continue;
	}
      DEBUGP (("Name: >%s<\n", tok));
      cur.linkto = NULL;
      len = strlen( tok);
      cur.type = FT_UNKNOWN;
      if (len >= 6)
	{
	if (!strncmp( (tok+ (len- 6)), ".DIR;1", 6))
	  {
	  cur.type = FT_DIRECTORY;
	  *(tok+ (len -= 6)) = '\0';
	  DEBUGP (("DIRECTORY; "));
	  }
	}
      if (cur.type == FT_UNKNOWN)
	{
	cur.type = FT_PLAINFILE;
	DEBUGP (("PLAINFILE; "));
	}
      /* Store the name. */
      cur.name = (char *)xmalloc (len+ 1);
      memcpy (cur.name, tok, (len+ 1));
      /* Null the date and time string. */
      *date_str = '\0';
      /* Set default permissions (which may be overridden later). */
      cur.perms = VMS_DEFAULT_PROT;
      /* Set unknown size. */
      cur.size = 0;

      /* Get token 2, if any.  A long name may force all other data onto
         a second line. */
      tok = strtok (NULL, " ");
      if (!tok)
	{
	  free (line);
	  /* Expecting more than just the name.  Try another line. */
	  line = read_whole_line (fp);
	  if (!line)
	    {
	      reading = 0;
	      continue;
	    }

	  len = strlen (line);
	  /* Destroy <CR> if there is one.  */
	  if (len && line[len - 1] == '\r')
	    line[--len] = '\0';

	  DEBUGP (("2 line: >%s<\n", line));

	  /* Second line must begin with " ". */
	  if ((len <=0) || (line[ 0] != ' '))
	    {
	      have_line = 1;
	      continue;
	    }
	  else
	    {
	      tok = strtok (line, " ");
	      if (!tok)
		{
		  free (line);
		  continue;
		}
	    }
	}

      while (tok)
	{
	  DEBUGP (("Token: >%s<: ", tok));

	  if ((strlen( tok) < 12) && (strchr( tok, '-') != NULL))
	    {
	      /* Date. */
	      DEBUGP (("Date.\n"));
	      strcpy( date_str, tok);
	      strcat( date_str, " ");
	    }
	  else if ((strlen( tok) < 12) && (strchr( tok, ':') != NULL))
	    {
	      /* Time. */
	      DEBUGP (("Time. "));
	      strncat( date_str,
	       tok,
	       (sizeof( date_str)- strlen( date_str)- 1));
	      DEBUGP (("Date time: >%s<\n", date_str));
	    }
	  else if (strchr( tok, '[') != NULL)
	    {
	      /* Owner.  (Ignore.) */
	      DEBUGP (("Owner.\n"));
	    }
	  else if (strchr( tok, '(') != NULL)
	    {
	      /* Protections. */
	      perms = 0;
	      j = 0;
	      for (i = 0; i < strlen( tok); i++)
		{
		switch (tok[ i])
		  {
		  case '(':
		    break;
		  case ')':
		    break;
		  case ',':
		    if (j == 0)
		      {
		      perms = 0;
		      j = 1;
		      }
		    else
		      {
		      perms <<= 3;
		      }
		    break;
		  case 'R':
		    perms |= 4;
		    break;
		  case 'W':
		    perms |= 2;
		    break;
		  case 'E':
		    perms |= 1;
		    break;
		  case 'D':
		    perms |= 2;
		    break;
		  }
		}
	      cur.perms = perms;
	      DEBUGP (("Prot.  perms = %0o.\n", cur.perms));
	    }
	  else
	    {
	      /* Nondescript.  Probably size(s), probably in blocks.
                 (Therefore, ignore.) */
	      DEBUGP (("Ignored (size?).\n"));
	    }

	  tok = strtok (NULL, " ");
	  if (!tok)
	    {
	      free (line);
	      continue;
	    }
	}

      timenow = time( NULL);
      localtime_r( &timenow, &timestruct);
      strptime( date_str, "%d-%b-%Y %H:%M:%S", &timestruct);

      if (!dir)
	{
	  l = dir = (struct fileinfo *)xmalloc (sizeof (struct fileinfo));
	  memcpy (l, &cur, sizeof (cur));
	  l->prev = l->next = NULL;
	}
      else
	{
	  cur.prev = l;
	  l->next = (struct fileinfo *)xmalloc (sizeof (struct fileinfo));
	  l = l->next;
	  memcpy (l, &cur, sizeof (cur));
	  l->next = NULL;
	}

      /* Convert struct tm local time to time_t local time. */
      timenow = mktime (&timestruct);
      /* Offset local time according to environment variable (seconds). */
      if ((tok = getenv( "WGET_TIMEZONE_DIFFERENTIAL")) != NULL)
        {
        dt = atoi( tok);
	DEBUGP (("Time differential = %d.\n", dt));
        }
      else
        {
        dt = 0;
        }

      if (dt >= 0)
        {
        timenow += dt;
        }
      else
        {
        timenow -= (-dt);
        }
      l->tstamp = timenow; /* Store the time-stamp. */
      DEBUGP (("tstamp = %d.\n", timenow));
    }

  fclose (fp);
  return dir;
}

/* Attempt to determine FTP DIR output format, UNIX or VMS.
   Not over-clever.  Just open, read until it's obvious, close,
   and then let the specific routine do it again.  Sensing system type
   using FTP SYST is unreliable, as some VMS servers return listings
   in a UNIX format.  Default = UNIX. */

struct fileinfo *
ftp_parse_ls (const char *file)
{
  char *line, *tok;		/* tokenizer */
  int len;
  FILE *fp;
  enum { UNIX_LS, VMS_DIR } list_type ;

  fp = fopen (file, "r");
  if (!fp)
    {
      logprintf (LOG_NOTQUIET, "%s: %s\n", file, strerror (errno));
      return NULL;
    }

  list_type = UNIX_LS;

  while ((line = read_whole_line (fp)))
    {
      len = strlen (line);
      /* Destroy <CR> if there is one.  */
      if (len && line[len - 1] == '\r')
	line[--len] = '\0';

      DEBUGP (("Sensing. >%s<\n", line));

      /* Skip if total...  */
      if (!strncasecmp (line, "total", 5))
	{
	  DEBUGP (("Sensed UNIX \"ls\" format.\n"));
	  free (line);
	  break;
	}

      if ((!strncmp (line, "Directory ", 10)) && (line[ len- 1] == ']'))
	{
	  DEBUGP (("Sensed VMS \"DIRECTORY\" format.\n"));
	  list_type = VMS_DIR;
	  free (line);
	  break;
	}
      free (line);
    }

  fclose (fp);

  if (list_type == UNIX_LS)
     {
	return ftp_parse_unix_ls (file);
     }
  else
     {
	return ftp_parse_vms (file);
     }
}
                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                   