                                     ?                                         HFRD Hypertext Services <                                         - Technical Overview      $                    1st December 1995  >                    (with minor revisions for freeware release)  <                    Supercedes: 9th August 1995, 1st May 1995                      Abstract   I                    This document is a technical overview of the implemen- H                    tation of the HFRD VMS HyperText Transport Daemon. ItG                    contains information on server configuration and CGI H                    scripting, as well as brief descriptions of the major.                    code modules of the server.  G                    It covers the environment supported by the version 3 I                    release of the HFRD VMS Hypertext Services HTTP server #                    (December 1995).   H                    Also see "HFRD Hypertext Environment" for information<                    on using the HFRD VMS Hypertext Services.  H                    It is strongly suggested those using printed versionsI                    of this document also access the Hypertext version. It @                    provides online access to some examples, etc.                      Author   !                    Mark G. Daniel 2                    Senior Information Technologist  2                    Mark.Daniel@dsto.defence.gov.au  (                    +61 (8) 2596031 (bus)(                    +61 (8) 2596673 (fax)                 !                    Mark G. Daniel 0                    High Frequency Radar Division>                    Defence Science and Technology Organisation                    PO Box 1500                    Salisbury'                    South Australia 5108                         Printed Copy   F                    This book is available for printing to a PostScriptE                    printer. Use a hypertext browser to access a print .                    menu in this same location.  H                    Some of the online demonstrations may not work due toF                    the local organisation of the hypertext environmentG                    differing from HFRD where it was originally written.                                                                                ii                                                                                     F                HFRD VMS Hypertext Services, Copyright (C) 1996 Mark G.                Daniel.  L                This package is free software; you can redistribute it and/orJ                modify it under the terms of the GNU General Public LicenseM                as published by the Free Software Foundation; version 2 of the -                License, or any later version.   F                This package is distributed in the hope that it will beI                useful, but WITHOUT ANY WARRANTY; without even the implied F                warranty of MERCHANTABILITY or FITNESS FOR A PARTICULARL                PURPOSE. See the GNU General Public License for more details.  H                You should have received a copy of the GNU General PublicI                License along with this package; if not, write to the Free L                Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139,                USA.                        P                Contents_________________________________________________________  P                Chapter_1__Introduction__________________________________________      P                Chapter_2__HyperText_Transport_Protocol_Daemon_-_Overview________      P                Chapter_3__HTTPd_Server_Account_and_Environment__________________  P                3.1    HTTPd Command Line.....................................3-5  P                       3.1.1     Server Startup...............................3-5  P                       3.1.2     Server Control...............................3-5  P                         3.1.2.1       Server Shutdown........................3-6  P                         3.1.2.2       Mapping Reload.........................3-6  P                         3.1.2.3       Counter Reset..........................3-6  P                         3.1.2.4       Logging................................3-6  P                         3.1.2.5       Authentication.........................3-7  P                Chapter_4__HTTPd_Configuration___________________________________  P                4.1    Authentication.........................................4-1  P                4.2    Current Directives.....................................4-1  P                Chapter_5__HTTPd_Mapping_Rules___________________________________  P                5.1    Mapping User Directories (tilde character ("~")).......5-6  P                Chapter_6__HTTPd_Scripting_______________________________________  P                6.1    CGI Compliance.........................................6-1  P                       6.1.1     Example DCL Scripts..........................6-2  P                6.2    Non-CGI Compliance Output..............................6-3  P                6.3    Raw HTTP Input.........................................6-4  1                                               iii                  M             Chapter_7__Utilities_____________________________________________   M             7.1    HTTPd Monitor..........................................7-1   M             7.2    Mapping Rule Checker...................................7-2   M             7.3    Server Workout (stress-test)...........................7-3   M             Chapter_8__Organizing_Data_Areas_________________________________     M             Chapter_9__Brief_Introduction_to_HTTPd_Code______________________   M             9.1    HTTPD.C................................................9-5   M             9.2    REQUEST.C..............................................9-5   M             9.3    FILE.C.................................................9-6   M             9.4    MENU.C.................................................9-8   M             9.5    DIR.C..................................................9-9   M             9.6    DCL.C.................................................9-10   M             9.7    SHTML.C...............................................9-12   M             9.8    ISMAP.C...............................................9-13   M             9.9    LOGGING.C.............................................9-14   M             Chapter_10__References___________________________________________                                     .                                             iv                     M             Chapter__1_______________________________________________________                Introduction    G                This document provides an technical overview of the HFRD J                VMS HyperText Transport Daemon environment. It is primarilyL                written for use internal to HFRD and assumes that perpsectiveJ                without apology. Any additional usage is subordinate to itsL                role within HFRD. The software has been written expressly forL                supporting an intra-organisational hypertext environment on aM                VMS platform. It too, is unreservedly tailored to this purpose L                and the requirements of HFRD. All programs were designed onlyL                to specifically comply with the requirements of VAX C and DECC                C, within a DEC TCP/IP Services for VMS environment.   J                The document assumes a basic understanding of the hypertextH                technologies and uses terms without explaining them (e.g.M                HTTP, HTML, URL, CGI, etc.) The reader is refered to documents K                specifically on these topics (these are often best consulted -                on-line, on the Internet WWW).   M                Also see "HFRD Hypertext Environment" for information on using 1                the HFRD VMS hypertext facilities.   L                It is strongly suggested those using printed versions of thisM                document also access the Hypertext version. It provides online /                demonstrations of some concepts.   (                Reasons For a Local HTTPd  F                Reasons for developing a local HTTPd server are few but                compelling:  K                o  It was prefered to support the hypertext environment on a J                   VMS platform. This is currently the most widely used and5                   accessable environment within HFRD.   M                o  Existing servers (and there are quite a few variations) are J                   largely Unix based, although it is being supported (to aM                   greater or lesses extent) across a wide range of platforms. M                   Ports to VMS, if they exist, are often in progress or half- M                   baked, employing Unixisms that don't translate elegantly to &                   the VMS environment.        N                                                              Introduction  1-1                 J                o  The VMS version of the CERN server (3.0-6) was evaluated"                   during mid-1994:  L                   -  It is not multi-threaded under VMS (i.e. cannot supportK                      concurrent clients). For example, a lengthy search may B                      delay other clients for unacceptable periods.  J                   -  Its performance was good with document transfers, but7                      became poor when running a script.   J                   -  It is acknowleged in the release notes that it cannotG                      handle a client cancelling a data transfer (a not- I                      uncommon action). This was confirmed experimentally.   D                   Future versions of this server should be evaluatedH                   periodically as it is receiving a good deal of support                    and attention.  G                o  HyperText Transport Protocol, in its current form, is I                   relatively simple to implement to the level required to 8                   support intra-Divisional requirements.  I                o  As of December 1995 the HFRD HTTPd has worked extremely J                   well and has a number of facilities tailored for the VMSM                   environment. It can continue to be utilized until there are G                   overwhelming reasons for implementing something else.                                                              1-2  Introduction                      M             Chapter__2_______________________________________________________   :             HyperText Transport Protocol Daemon - Overview    G                The most fundamental component of the HFRD VMS Hypertext G                Services environment is the HTTPd, or HyperText Protocol H                Transport Daemon, or HTTP server. It provides full multi-I                threaded support. VMS ASTs (Asynchronous System Traps) are <                used to construct an I/O event-driven server.  L                It provides a complete implementation of a basic HTTP server,                including:   ;                o  concurrent, multi-threaded client support   K                o  "GET", "HEAD", and "POST" (albeit, limited by the lack of 5                   authentication) HTTP method support   E                o  versatile directory listing (generic and VMS-style)   M                o  CGI-compliant scripting (with configurable, automatic, MIME 4                   content-type initiated activation)  <                o  HTML pre-processing (server-side includes)  A                o  clickable-image support (NCSA and CERN formats)   G                o  "If-Modified-Since:"/"304 Not Modified" functionality I                   (document is only sent if modified since time specified                    by client)  D                o  Web-standard, "common"-format access log (allowing8                   processing by most log-analysis tools)  H                o  host-level access controlled on per-host or per-domain)                   acceptance or rejection   H                It executes permanently on the server host, listening forL                client connection requests on TCP/IP port 80 (by default). ItJ                provides concurrent services for a (technically) unlimittedL                number of clients (constrained only by the server resources).M                When a client connects HTTPd performs the following functions:   3                1. creates a thread for this request   ;                2. reads and analyzes the HTTP request sent, >                   depending on the nature of the request . . .  H                   o  initiates I/O-driven transfer of the requested file  G                   o  initiates I/O-driven interpretation of a menu file   N                            HyperText Transport Protocol Daemon - Overview  2-1                 N                   o  initiates I/O-driven processing of a pre-processable HTML                      file   I                   o  initiates I/O-driven processing of a clickable-image !                      mapping file   ;                   o  initiates I/O-driven directory listing   L                   o  spawns a subprocess to execute a script (DCL procedure)                      with:  H                     -  SYS$INPUT and SYS$OUTPUT assigned to intermediate4                        mailboxes (essentially pipes)  K                     -  HTTP$INPUT logical name providing a mailbox allowing K                        the script to read the raw HTTP data stream from the                         client   K                     -  CGI-compliant symbols representing the important CGI /                        variables of the request   <                     -  for the life of the subprocess HTTPd:  L                        o  controls the essential behaviour of the subprocess3                           via its SYS$INPUT mailbox   H                        o  receives data written by the subprocess to itsM                           SYS$OUTPUT via the associated mailbox, writing this '                           to the client   H                        o  receives data sent by the client, writing thisH                           to the mailbox associated with the subprocess'$                           HTTP$INPUT  I                3. closes the connection to the client and disposes of the (                   thread data structures  L                For I/O intensive activities like file transfer and directoryI                listing, the AST-driven code provides an efficient, multi- J                threaded environment for the concurrent serving of multiple                clients.   J                For scripts, the technique of using multi-threaded, concur-L                rent, spawned subprocesses, attached to standard input/outputL                streams, provides a versatile, extensible, powerful scriptingK                environment. Any DCL procedure or image executing within the J                subprocess can behave as an HTTP server. This capability isK                employed to easily extend the basic services provided by the K                core daemon code. An HTTP script/server for this environment J                does not need to concern itself with network activities, itE                merely reads and writes from the standard I/O streams.   ?             2-2  HyperText Transport Protocol Daemon - Overview                      M             Chapter__3_______________________________________________________   0             HTTPd Server Account and Environment    E                The HTTPd server account should be a standard account, K                preferably in a group of its own (definitely at least a non- L                system, non-user group), with sufficient quotas to handle the                 expected traffic.                    VMS Account  =                The following provides a guide to the account:   U                   Username: HTTP$SERVER                      Owner:  HyperText Daemon d                   Account:  HTTPD                            UIC:    [377,377] ([HTTPD,HTTP$SERVER])N                   CLI:      DCL                              Tables: DCLTABLES1                   Default:  HT_ROOT:[HTTP$SERVER]v!                   LGICMD:   LOGINt/                   Flags:  Restricted DisNewMailr5                   Primary days:   Mon Tue Wed Thu Frip=                   Secondary days:                     Sat SunsX                   Primary   000000000011111111112222  Secondary 000000000011111111112222X                   Day Hours 012345678901234567890123  Day Hours 012345678901234567890123X                   Network:  ##### Full access ######            ##### Full access ######X                   Batch:    ##### Full access ######            ##### Full access ######X                   Local:    -----  No access  ------            -----  No access  ------X                   Dialup:   -----  No access  ------            -----  No access  ------X                   Remote:   -----  No access  ------            -----  No access  ------V                   Expiration:            (none)    Pwdminimum:  6   Login Fails:     0P                   Pwdlifetime:         90 00:00    Pwdchange:      (pre-expired)b                   Last Login:            (none) (interactive), 11-MAY-1995 08:44 (non-interactive)K                   Maxjobs:         0  Fillm:       300  Bytlm:       300000 K                   Maxacctjobs:     0  Shrfillm:      0  Pbytlm:           0dK                   Maxdetach:       0  BIOlm:       512  JTquota:       1024 K                   Prclm:         100  DIOlm:       512  WSdef:         1000nK                   Prio:            4  ASTlm:       600  WSquo:         2000fK                   Queprio:         0  TQElm:       100  WSextent:     20000 K                   CPU:        (none)  Enqlm:       256  Pgflquo:     200000 (                   Authorized Privileges:$                     NETMBX    TMPMBX%                   Default Privileges: $                     NETMBX    TMPMBX        N                                      HTTPd Server Account and Environment  3-1 f  e                             Account LOGIN.COM  I                The following is suggested as the LOGIN.COM for the server K                account. It provides a secure DCL environment for the server '                image to execute within.   #                   $ SET NOCONTROL=Y                    $ SET NOON-                   $ IF F$MODE() .EQS. "OTHER"                    $ THEN7                   $    IF F$TRNLNM("MULTINET") .EQS. "" 6                   $       THEN HTTPD = "$HT_EXE:HTTPD"?                   $       ELSE HTTPD = "$HT_EXE:HTTPD_MULTINET"t                   $    ENDIF"                   $    HTTPD_LOOP:G                   $       HTTPD /LOG=HT_LOGS:'F$GETSYI("NODENAME")'.LOGLO                   $!      (non-error exit, must be a restart, loop immediately)h9                   $       IF $STATUS THEN GOTO HTTPD_LOOPAP                   $!      (error exit, wait, then try to start the server again)'                   $       WAIT 00:01:00 )                   $       GOTO HTTPD_LOOPn&                   $!   END_HTTPD_LOOP:                    $    STOP/ID=0                   $ ENDIF -                   $ IF F$MODE() .EQS. "BATCH"n                   $ THENP                   $    PURGE /KEEP=3 HT_SERVER_LOGS:'F$GETSYI("NODENAME")'80.LOG0                   $    RUN SYS$SYSTEM:LOGINOUT -1                            /DETACHED /AUTHORIZE -_]                            /INPUT=NL: /OUTPUT=HT_SERVER_LOGS:'F$GETSYI("NODENAME")'_HTTPD.LOG_                    $    STOP/ID=0                   $ ENDIF_>                   $!(interactive and network modes stop here!)                   $ STOP/ID=0v                  Privileged Image   L                As this image is to be installed with privileges unauthorizedH                use should be prevented by applying an ACL similar to the6                following against the executable image:  3                   $ SET SECURITY HT_EXE:HTTPD.EXE - O                     /ACL=((IDENT=HTTP$SERVER,ACCESS=R+E),(IDENT=*,ACCESS=NONE))   K                This can be done once, at installation, or for peace-of-mind @                (a.k.a. VMS-ish paranoia) at each server startup.  L                As the HTTP$SERVER account should be completely unprivileged,K                and the HTTPd image requires NETMBX, TMPMBX, PRMMBX, PSWAPM,.I                SYSNAM and SYSPRV privileges, it must be installed using a_0                command similar to the following:  5             3-2  HTTPd Server Account and Environment                  3                   $ INSTALL = "$SYS$SYSTEM:INSTALL".Y                   $ INSTALL ADD HT_EXE:HTTPD.EXE /PRIVILEGE=(PRMMBX,PSWAPM,SYSPRV,SYSNAM)                     Logical Names  M                The following logical names are essential for the operation of_C                the HTTPd server and must be defined before startup:1  L                o  HTTPD$CONFIG - location of the configuration file (definedF                   system-wide, or in the job table if server-specific)  H                o  HTTPD$MAP - location of the mapping rule file (definedF                   system-wide, or in the job table if server-specific)  G                o  HTTPD$GMT - offset from GMT (e.g. "+10:30", "-01:15")   H                o  HTTPD$LOG - if logging is enabled and no log file nameM                   specified on the command line, this logical must be defined.$                   to locate the file  E                o  HT_EXE - directory containing the executable images   H                o  HT_LOGS - optional definition, for convenient log file                   specificationa  F                o  HT_SERVER_LOGS - optional definition, for convenient@                   detached server process log file specification  K                The following logical name is created by the executing HTTPd B                server and defines the name of the control mailbox:  #                o  HTTPDport$CONTROL.  M                The following logical names are created by the executing HTTPd.>                server if the HTTPd monitor utility is enabled:  !                o  HTTPDport$COUNT.                  o  HTTPDport$PID   #                o  HTTPDport$REQUEST.  /                Server Process Logging Directory   H                The server process log directory (output for the detachedK                HTTPd server processes) may require explicit access controls I                for the HTTPd account. This can be done by applying an ACL (                similar to the following:  :                   $ SET SECURITY HT_ROOT:[LOG]SERVER.DIR -N                     /ACL=((IDENT=HTTP$SERVER,ACCESS=R+W+E, OPTIONS=DEFAULT), -=                           (IDENT=HTTP$SERVER,ACCESS=R+W+E), -_C                           (IDENT=*,ACCESS=NONE, OPTIONS=DEFAULT), - 0                           (IDENT=*,ACCESS=NONE))  N                                      HTTPd Server Account and Environment  3-3                 H                As with the ACL on the server executable this can be doneH                once, at installation (or, if right over the top, at eachK                server startup). Appropriate disk quotas may also need to bes                applied.s                    Startup  K                Putting all this together the HTTPd server startup procedureu:                becomes something similar to the following:  S                   $ DEFINE /SYSTEM /TRANSLATION=CONCEALED HT_ROOT DSA811:[HT_ROOT.]f                   $!:                   $ SET SECURITY HT_ROOT:[LOG]SERVER.DIR -N                     /ACL=((IDENT=HTTP$SERVER,ACCESS=R+W+E, OPTIONS=DEFAULT), -=                           (IDENT=HTTP$SERVER,ACCESS=R+W+E), - C                           (IDENT=*,ACCESS=NONE, OPTIONS=DEFAULT), -r0                           (IDENT=*,ACCESS=NONE))                   $!8                   $ IF F$GETSYI("ARCH_NAME") .EQS. "VAX"?                   $    THEN DEFINE /SYSTEM HT_EXE HT_ROOT:[VAX]o?                   $    ELSE DEFINE /SYSTEM HT_EXE HT_ROOT:[AXP]e                   $ ENDIF                    $!8                   $ DEFINE /SYSTEM HT_LOGS HT_ROOT:[LOG]F                   $ DEFINE /SYSTEM HT_SERVER_LOGS HT_ROOT:[LOG.SERVER]Q                   $ DEFINE /SYSTEM HTTPD$CONFIG HT_ROOT:[000000]HTTPD$CONFIG.CONFcK                   $ DEFINE /SYSTEM HTTPD$MAP HT_ROOT:[000000]HTTPD$MAP.CONFw5                   $ DEFINE /SYSTEM HTTPD$GMT "+10:30"t                   $!3                   $ INSTALL = "$SYS$SYSTEM:INSTALL" 4                   $ IF F$TRNLNM("MULTINET") .NES. ""                   $ THEN6                   $    SET SECURITY HT_EXE:HTTPD.EXE -R                        /ACL=((IDENT=HTTP$SERVER,ACCESS=R+E),(IDENT=*,ACCESS=NONE))5                   $    INSTALL ADD HT_EXE:HTTPD.EXE - K                                    /PRIVILEGE=(SYSPRV,SYSNAM,PRMMBX,PSWAPM)                    $ ELSE?                   $    SET SECURITY HT_EXE:HTTPD_MUTLINET.EXE - R                        /ACL=((IDENT=HTTP$SERVER,ACCESS=R+E),(IDENT=*,ACCESS=NONE))>                   $    INSTALL ADD HT_EXE:HTTPD_MULTINET.EXE -K                                    /PRIVILEGE=(SYSPRV,SYSNAM,PRMMBX,PSWAPM)                    $ ENDIFeO                   $ SUBMIT /QUEUE=SYS$BATCH /USER=HTTP$SERVER /NOLOG /NOPRINT - 9                            HT_ROOT:[HTTP$SERVER]LOGIN.COMt          5             3-4  HTTPd Server Account and Environmenta n  y            "             3.1 HTTPd Command Line                3.1.1 Server Startup  I                When starting up the server several characteristics of the L                may be specified using qualifiers on the command line. If not;                specified appropriate defaults are employed.   I                o  /CGI_PREFIX= the prefix to the CGI symbol names created K                   for a script (defaults to "WWW_", similar to the CERN VMS '                   HTTPd, see Chapter 6)o  G                o  /FILBUF= number of bytes in the read buffer for filesdE                   open for processing (i.e. menu files, image mappingyJ                   configuration files, pre-processed HTML files, etc., not(                   direct file transfers)  D                o  /[NO]LOG[=name] either disables logging (overridesM                   configuration directive), or enables logging and optionally G                   specifies the log file name (also see section Logical 8                   Names, logging is disabled by default)  M                o  /NETBUF= minimum number of bytes in the network read bufferh  K                o  /OUTBUF= number of bytes in the output buffer (for directtK                   file transfers, buffered output from menu interpretation,_+                   HTML-preprocessing, etc.)   C                o  /PRIORITY= server process priority (default is 4)   J                o  /SUBBUF= number bytes in a subprocess' SYS$OUTPUT buffer  G                o  /[NO]SWAP= controls whether the server process may besE                   swapped out of the balance set (default is swappingr                   disabled)c  M                Note:  buffer sizes apply on a per-request (thread) basis, andsK                may be tailored for specific environments at server startup.P                3.1.2 Server Control  I                A foreign command for the HTTPD control functionality will M                need to be assigned in the adminstration users' LOGIN.COM, for                 example:P  *                   HTTPD == "$HT_EXE:HTTPD"  D                The control functionality (via the /DO= qualifier) isH                available to the privileged user. If a non-default serverJ                port, or multiple servers on the one system are being used,L                then it will be necessary to append a /PORT= qualifier to the8                any command, and issue it multiple times.  N                                      HTTPd Server Account and Environment  3-5 d  n            #             3.1.2.1 Server Shutdown   M                The running server may be elegantly shut down, without loss of K                existing client requests, using the following command on thel                server system:o  "                   $ HTTPD /DO=EXIT  M                The running server may be immediately and unconditionally shutlE                down using the following command on the server system:8  #                   $ HTTPD /DO=ABORTi  M                The running server may be elegantly restarted, without loss ofcK                existing client requests, using the following command on thee                server system:f  %                   $ HTTPD /DO=RESTART   "             3.1.2.2 Mapping Reload  H                The mapping rules may be reloaded into the running server@                using the following command on the server system:  !                   $ HTTPD /DO=MAPo  !             3.1.2.3 Counter Reset   G                The server counter values are carried over when a server M                (re)starts (provided the system has stayed up). To reset these G                counters use the following command on the server system:s  "                   $ HTTPD /DO=ZERO  J                Note that the count of the number of time the counters have8                been zeroed is not reset by this command.               3.1.2.4 Loggingr  M                The HTTPd server request log may be opened (enabled) or closedr7                (disabled) using the following commands:   &                   $ HTTPD /DO=LOG=OPEN                  and  '                   $ HTTPD /DO=LOG=CLOSE)  I                Any records still in the RMS buffers may be flushed using:n  '                   $ HTTPD /DO=LOG=FLUSHa  5             3-6  HTTPd Server Account and Environment  c  n            "             3.1.2.5 Authentication  $                Also see Section 4.1.  L                The HTTPd's authentication facility tracks failures and afterM                a given threshold consistently rejects authentication requestslE                for the given username without actually validating theUJ                username/password. This is done to prevent password attacks$                via the HTTPd server.  B                The authentication database may be displayed using:  &                   $ HTTPD /DO=AUTH=ALL  L                Authentication database failure records only may be displayed                using:s  '                   $ HTTPD /DO=AUTH=FAIL   L                Any failure records may have their count reset to zero using:  7                   $ HTTPD /DO=AUTH=RESET=realm:usernamer                                                        N                                      HTTPd Server Account and Environment  3-7    i                M             Chapter__4_______________________________________________________                HTTPd Configuration     G                HFRD's HTTPd configuration is a subset based on the CERN L                HTTPd. By default, the system-table logical name HTTPD$CONFIGH                locates a common configuration file, unless an individualH                configuration file is specified using a job-table logicalK                name. Simple editing of this file changes the configuration. L                Comment lines may be included by prefixing them with the hashI                "#" character. Configuration file directives are not case- K                sensitive. Any changes to the configuration file can only be J                enabled by restarting the HTTPd process using the following,                command on the server system:  %                   $ HTTPD /DO=RESTART   M                The reader is also refered to the CERN HTTPd documentation for L                background information on HTTP daemon configuration. Also see8                Chapter 5 for mapping rule configuration.               4.1 Authentication  M                The HFRD VMS HTTPd provides little in the way of authorization G                and path access control. It does have a scheme providing I                user name authentication from the host's VMS authorizationtK                database (SYSUAF.DAT). This is not recommended for Internet,pI                WAN or insecure LAN use. Passwords are transmitted encoded L                but unencrypted from client to server, and so on anything butI                a carefully secured LAN poses a distinct and real securityRJ                threat. Even on an internal LAN a privileged account shouldF                never, ever be authenticated in this fashion. Hence, byF                default, this facility is disabled. You have be warned!  M                The author acknowleges this to be a significant deficiency and1J                would be willing to remedy it if appropriate demand exists.  "             4.2 Current Directives  E                Note that all boolean directives are disabled (OFF) by#L                default. This is done so that there can be no confusion aboutH                what is enabled and disabled by default. To use directiveA                controlled facility it must be explicitly enabled.   8                1. Accept host/domain name (default: all)    N                                                       HTTPd Configuration  4-1                 L                   One or more (comma-separated if on the same line) internetL                   host/domain names, with "*" wildcarding for host/subdomainI                   matching, to be explicitly allowed access. Also see the4E                   Reject directive. Reject directives have precedence3J                   over Accept directives. The Accept directive may be used?                   multiple times. Also see DNSLookup directive.                      Examples:   0                      Accept *.remote.dsto.gov.au.                      Accept *.hfrd.dsto.gov.au  E                2. AddType .suffix representation encoding script-name +                   description  (no default)   I                   Binds a file suffix (extension, type) to a mime content K                   type and encoding. The script name is used to auto-script M                   (see Auto-Scripting) a specified file type. The description B                   is used as documentation for directory listings.                     Examples:m  \                      AddType  .html  text/html   8bit      -       HyperText Markup LanguageM                      AddType  .txt   text/plain  8bit      -       plain texteN                      AddType  .gif   image/gif   binary    -       image (GIF)S                      AddType  .hlb   text/x-script 7bit    /Conan  VMS Help library b                      AddType  .decw$book   text/x-script   8bit    /HyperReader    Bookreader book  B                3. AddIcon icon-URL ALT-text template  (no default)  M                   Specifies a directory listing icon and alternative text for B                   the mime content type specified in the template.                     Examples:O  A                      AddIcon  /icon/-/doc.xbm    [HTM]  text/htmleB                      AddIcon  /icon/-/text.xbm   [TXT]  text/plainA                      AddIcon  /icon/-/image.xbm  [IMG]  image/gifr  '                4. AddBlankIcon icon-URLg.                   AddDirIcon icon-URL ALT-text1                   AddParentIcon icon-URL ALT-text @                   AddUnknownIcon icon-URL ALT-text (no defaults)  K                   Specifies a directory listing icon for these non-content- ,                   type parts of the listing.                     Examples:G  6                      AddBlankIcon    /icon/-/blank.xbm?                      AddDirIcon      /icon/-/dir.xbm      [DIR] ?                      AddParentIcon   /icon/-/back.xbm     [<--]'?                      AddUnknownIcon  /icon/-/unknown.xbm  [???]0  3                5. AuthLocal ON | OFF (default: OFF)r  $             4-2  HTTPd Configuration                 J                   Enables or disables SYSUAF user name authentication. See                   Section 4.1.  ,                6. Busy integer (default: 10)  L                   The maximum number of concurrent client connections beforeK                   a "server too busy right now ... try again shortly" error ,                   is returned to the client.  7                7. CommentedInfo ON | OFF (default: OFF)   L                   Includes, commented at the beginning of any HTML document,I                   the software ID of the server and any relevant VMS file 1                   specification for the document.   ?                8. DirAccess ON | OFF | SELECTIVE (default: OFF).  K                   Controls directory listings. SELECTIVE allows access only H                   to those directories containing a file .WWW_BROWSABLE.I                   The HFRD HTTPd directory access facility always ignoresTK                   directories containing a file named .WWW_HIDDEN. Also seeE,                   the DirWildcard directive.  ;                9. DirLayout string (default: I__L__R__S__D)l  L                   Allows specification of the directory listing layout. ThisG                   is a short string that specifies the included fields,-D                   relative placement and optionally the width of theI                   fields in a directory listing. Each field is controlledAH                   by a single letter and optional leading decimal numberF                   specifying its width. If a width is not specified anG                   appropriate default applies. An underscore is used toPL                   indicate a single space and is used to separate the fields/                   (two consecutive works well).e  &                   o  C - creation date  M                   o  D - description (having no field-width attribute this is 0                      always best specified last)  >                   o  I - icon (takes no field-width attribute)  M                   o  L - link (highlighted anchor using the name of the file)a  ?                   o  N - name (no link, why bother? who knows!)l  0                   o  O - owner (can be disabled)  &                   o  R - revision date                     o  S - size   ;                   As illustrated in the following examples:i  1                      DirLayout       I__15L__S__D /                      DirLayout       15L__9R__Sy0                      DirLayout       15N_9C_9R_S  N                                                       HTTPd Configuration  4-3                 3               10. DirOwner ON | OFF  (default: OFF)e  H                   Allows specification and display of the RMS file owner                   information.  =               11. DirReadme TOP | BOTTOM | OFF (default: OFF)n  F                   If any of the files provided using the DirReadMeFileI                   directive are located in the directory the contents are I                   included at the top or bottom of the listing (or not at G                   all). Plain-text are included as plain-text, HTML are G                   included as HTML allowing markup tags to be employed.   8               12. DirReadMeFile FILE.SUFFIX (no default)  G                   Specifies the names and order in which a directory is L                   checked for read-me files. This can be enabled or disabledK                   using the DirReadme directive. Plain-text are included asrL                   plain-text, HTML are included as HTML allowing markup tags!                   to be employed.                      Examples:i  .                      DirReadMeFile readme.html-                      DirReadMeFile readme.htmo*                      DirReadMeFile readme.-                      DirReadMeFile readme.txtO-                      DirReadMeFile readme.1st   5               13. DirWildcard OFF | ON (default: OFF)   J                   This enables the facility to force the server to provideK                   a directory listing by providing a wildcard file specifi-EK                   cation, even if there is a home (welcome) document in theCK                   directory. This should not be confused with the DirAccess)D                   directive which controls directory listing itself.  3               14. DNSLookup ON | OFF (default: OFF)_  J                   Enables or disables connection request host name resolu-H                   tion. This functionality may be expensive (in terms ofK                   processing overhead) and make serving granularity coarserYL                   if DNS is involved. If not enabled and logging is, the en-L                   try is logged against the numeric internet address. If notM                   enabled any accept or reject directive must be expressed asL$                   numeric addresses.  3               15. InputTimeout integer (default: 2)   G                   Number of minutes to allow a connection request to be J                   in progress without submitting a complete request header(                   before terminating it.  1               16. Logging ON | OFF (default: OFF)R  7                   Enables or disables the activity log.   $             4-4  HTTPd Configuration T  E            5               17. OutputTimeout integer (default: 10)R  J                   Number of minutes to allow a request to be output before!                   terminating it.   ,               18. Port integer (default: 80)  7                   IP port number for server to bind to.   3               19. Recommend ON | OFF (default: OFF)G  M                   Provides a short message recommending action when reporting L                   an error to a client. For example, if a document cannot be#                   found it may say:   ?                      (document, or bookmark, requires revision)   9               20. Reject host/domain name (default: none)y  L                   One or more (comma-separated if on the same line) internetL                   host/domain names, with "*" wildcarding for host/subdomainH                   matching, to be explicitly denied access. Also see theE                   Accept directive. Reject directives have precedencesH                   of Accept directives. The Reject directive may be used?                   multiple times. Also see DNSLookup directive.u                     Example:  C                      Reject *.remote.dsto.gov.au,*.hfrd.dsto.gov.au   *               21. Search path (no default)  I                   Specifies the physical path to the default query-string (                   keyword search script.                     Examples:   1                      Search /ht_root/script/queryt  /               22. sHTML ON | OFF (default: OFF)i  :                   Enables or disables HTML pre-processing.  7               23. sHTMLaccesses ON | OFF (default: OFF)   E                   Enables or disables HTML pre-processing file access                    counter.  3               24. sHTMLexec ON | OFF (default: OFF)T  G                   Enables or disables HTML pre-processing DCL executiono                    functionality.  2               25. Welcome file.suffix (no default)  G                   Specifies the names and order in which a directory iseI                   checked for home page files. If no home page is found aa1                   directory listing is generated.   N                                                       HTTPd Configuration  4-5 y                                  Examples:v  '                      Welcome  home.htmln&                      Welcome  home.htm'                      Welcome  home.menu &                      Welcome  home.mnu  K                The example configuration file for the HFRD HTTPd server can                 be viewed.T  &                <online hypertext link>                                                                              $             4-6  HTTPd Configuration                     M             Chapter__5_______________________________________________________c               HTTPd Mapping Rules     L                URLs paths are mapped to physical file system locations usingH                mapping similar to other HTTPd implementations. HFRD's isJ                based on the CERN HTTPd implementation, with one additionalH                script mapping rule. By default, the system-table logicalK                name HTTPD$MAP locates a common mapping rule file, unless anrK                individual rule file is specified using a job- table logicaliJ                name. Simple editing of the mapping file changes the rules.L                Comment lines may be included by prefixing them with the hashM                "#" character. Although, there is no fixed limit on the numberiK                of rules there are the processing implications of scanning ac&                large, linear database.  J                Rules are given a basic consistency check when loaded (i.e.G                server startup, map reload, etc.) If there is an obviousnH                problem (unknown rule, missing component, etc.) a warningG                message is generated and the rule is not loaded into theeH                database. This will not cause the server startup to fail.M                These warning messages may be found in the server process log.   D                A command-line rule checking utility is available forG                assessing rule database files and the mappings generatedt7                against supplied paths. See Section 7.2.   I                Any changes to the mapping file may be (re)loaded into thegM                running HTTPd server using the following command on the serverc                system:  !                   $ HTTPD /DO=MAPs  ;                Also see Chapter 4 for daemon configuration.a  $                MAP, PASS, FAIL Rules  %                1. map template resultl  F                   If the URL path matches the template, substitute theJ                   result string for the path and use that for further rule                   processing..                  2. pass template &                   pass template result  F                   If the URL path matches the template, substitute theL                   result if present (if not just use the original URL path),.                   processing no further rules.  N                                                       HTTPd Mapping Rules  5-1 t  s            J                   The result should be a physical VMS file system specifi-K                   cation in URL format. If there is a direct correspondance L                   between the template and result the result may be omitted.  I                   The PASS directive is also used to reverse-map VMS filef>                   specifications to the URL format equivalent.                  3. fail templateT  H                   If the URL path matches the template, prohibit access,.                   processing no further rules.                  REDIRECT Rule  *                1. redirect template result  M                   If the URL path matches the template, substitute the resultgK                   string for the path. Process no further rules. The resulttG                   must be a full URL (http://host/path), and is used to I                   redirect requests to another server on a separate host.   I                This will probably not be used extensively on the internall                HFRD network.  4                EXEC and SCRIPT, Script Mapping Rules  "                Also see Chapter 6.  :                The EXEC rules maps CGI script directories.  J                The SCRIPT rules maps CGI script file names. It is a littleM                different to the EXEC rule and an extension to the CERN rules.   L                Both rules must have a template and result, and both must endM                in a wildcard asterisk. The placement of the wildcards and therF                subsequent functionality is slightly different however.  &                1. exec template result  I                   The EXEC rule requires the template's asterisk to imme-oJ                   diately follow the slash terminating the directory spec-K                   ification containing the scripts. The script name followsnI                   immediately as part of the wildcard-matched string. Fort                   example:  4                      exec /htbin/* /ht_root/script/*  K                   If the URL path matches the template, the result, includ-oM                   ing the first slash-terminated part of the wildcard-matchedvL                   section, becomes the URL format physical VMS file specifi-K                   cation for the DCL procedure of the script to be executediL                   (the default file extension of .COM is not required). WhatM                   remains of the original URL path is used to create the pathe8                   information. Process no further rules.  $             5-2  HTTPd Mapping Rules                 H                   Hence, the EXEC rule will match multiple script speci-H                   fications without further rules, the script name beingK                   supplied with the URL path. Hence any script (i.e. proce-nM                   dure, executable) in the specified directory is accessable,rJ                   a possible security concern if script management is dis-                   tributed.s  (                2. script template result  E                   The SCRIPT rule requires the template's asterisk to F                   immediately follow the unique string identifying theL                   script in the URL path. The wildcard-matched string is theJ                   following path, and supplied to the script. For example:  :                      script /conan* /ht_root/script/conan*  J                   If the URL path matches the template, the result becomesL                   the URL format physical VMS file specification for the DCLJ                   procedure of the script to be executed (the default fileI                   extension of .COM is not required). What remains of the K                   original URL path is used to create the path information. +                   Process no further rules.   4                                                 Note  E                         The wildcard asterisk is best located immedi-sD                         ately after the unique script identifier. InC                         this way there does not need to be any pathiF                         supplied with the script. If even a slash fol-C                         lows the script identifier it may be mappedrH                         into a file specification that may or may not be1                         meaningful to the script.g  M                   Hence, the SCRIPT rule will match only the script specifiedbL                   in the result, making for finely-granular scripting at theK                   expense of a rule for each script thus specified. It also J                   implies that only the script name need precede any other#                   path information.n  I                   It may be thought of as a more efficient implementationcI                   of the equivalent functionlity using two CERN rules, as 7                   illustrated in the following example:   /                      map /conan* /script/conan*t5                      exec /script/* /ht_root/script/*           N                                                       HTTPd Mapping Rules  5-3 i  -            "                Rule Interpretation  K                The rules are scanned from first towards last, until a pass,tL                exec or fail is encountered, when processing ceases and finalL                substitution occurs. Maped rules substitute the template with8                the result and continue to the next rule.  7                Use of wildcards in template and result:   E                o  The template may contain one or more asterisk ("*")-J                   wildcard symbols. These match zero or more characters upH                   until the character following the wildcard (or end-of-M                   string). If no wildcard is present then the path must matchF'                   the template exactly.   L                o  The result may contain one or more asterisk ("*") wildcardF                   symbols. It must not contain more wildcards than theL                   template. The result wildcards are expanded to replace theK                   matching characters of the respective template wildcards. I                   Characters represented by wildcards in the template notgG                   represented by a corresponding wildcard in the resultrJ                   are ignored. Non-wildcard result characters are directlyI                   inserted in reconstructed path. Non-wildcard characters H                   in the template are ignored. If the result contains no@                   wildcards it completely replaces the URL path.  M                The example mapping rule file for the HFRD HTTPd server can beg                viewed.  &                <online hypertext link>  #                Examples of Map Ruleh  L                The result string of these rules may or may not correspond toK                to a VMS physical file system path. Either way the resultingaC                rule is further processed before passing or failing.c  B                o  MAP /HYPERDATA/UNIX/* /HYPERDATA/SOFTWARE/UNIX/*  L                   If a URL path /HYPERDATA/UNIX/SHELLS/C is being mapped theX                   path would be replaced by /USER$RTS/HYPERDATA/SOFTWARE/UNIX/SCRIPTS/C,9                   and this used to process further rules.f  $                Examples of Pass Rule  M                The result string of these rules should correspond to to a VMSc"                physical file path.  =                o  PASS /HYPERDATA/RTS/* /USER$RTS/HYPERDATA/*   L                   If a URL path /HYPERDATA/RTS/HOME.HTML is being mapped theJ                   path would be replaced by /USER$RTS/HYPERDATA/HOME.HTML,7                   and this returned as the mapped path.   $             5-4  HTTPd Mapping Rules    g            :                o  PASS /ICON/BHTS/* /HYPERDATA/ICON/BHTS/*  K                   If a URL path /ICON/BHTS/DIR.XBM is being mapped the path M                   would be replaced by /HYPERDATA/ICON/BHTS/DIR.XBM, and thisS.                   returned as the mapped path.  $                Examples of Fail Rule  +                o  FAIL /HYPERDATA/PRIVATE/*   L                   If a URL path /HYPERDATA/PRIVATE/HOME.HTML is being mapped7                   the path would immediately be failed.                   o  FAIL /*   G                   To ensure all access fails, other than that explictlyFF                   passed, this entry should be included the the rules.  0                Examples of Exec and Script Rules  1                o  EXEC /HTBIN/* /HT_ROOT/SCRIPT/*   F                   If a URL path /HTBIN/ISMAP/HYPERDATA/EXAMPLE.CONF isE                   being mapped the "/ht_root/script/" must be the URLrI                   format equivalent of the physical VMS specification foraF                   the directory locating the script DCL procedure. TheH                   /HYPERDATA/EXAMPLE.CONF that followed the /HTBIN/ISMAPI                   in the original URL becomes the translated path for the K                   script. See Chapter 6 for other information on scripting.   7                o  SCRIPT /CONAN* /HT_ROOT/SCRIPT/CONAN*a  E                   If a URL path /CONAN/HYPERDATA/EXAMPLE.HLB is beinglK                   mapped the "/ht_root/script/conan" must be the URL formatrJ                   equivalent of the physical VMS specification for the DCLI                   procedure. The /HYPERDATA/EXAMPLE.HLB that followed therI                   /CONAN/ in the original URL becomes the translated path H                   for the script. See Chapter 6 for other information on                   scripting.  (                Examples of Redirect Rule  >                o  REDIRECT /ANOTHERGROUP/* HTTP://HOST/GROUP/*  B                   If a URL path /ANOTHERGROUP/THIS/THAT/OTHER.HTML@                   is being mapped the URL would be redirected to2                   HTTP://HOST/THIS/THAT/OTHER.HTML      N                                                       HTTPd Mapping Rules  5-5    t            @             5.1 Mapping User Directories (tilde character ("~"))  C                This server will map user directories using the samesC                mechanisms as for any other. No reference is made toeI                SYSUAF.DAT, user support is accomplished via a combination J                of mapping rule and logical name. This approach relies on aJ                correspondance between the user name and the home directoryF                name. Hence users are made known by the HTTPd using theG                name of their top-level directory. As the naming of home H                directories using the user name is a common practice thisM                mechanism should suffice in the majority of cases. Where thereoK                is no such correspondance individual rules could be used fort                each user.   I                The PASS rule provides a wildcard representation of users'bF                directory paths. As part of this mapping a subdirectoryM                specifically for the hypertext data should be always included. F                Never map users' top-level directories. For instance ifF                a user's account home directory was located in the areaL                USER$DISK:[DANIEL] the following rule would potentially allowM                the user DANIEL to provide hypertext information from the hometI                subdirectory [.WWW] (if the user has created it) using the                  accompanying URL:  /                   pass /~*/* /user$disk/*/www/*   &                   http://host/~daniel/  H                It is recommended that a separate logical name be createdJ                for locating user directories. This helps hide the internalJ                organisation of the file system. The following logical nameA                definition and mapping rule illustrate this point.e  W                   $ DEFINE /SYSTEM /EXEC /TRANSLATION=CONCEALED WWW_USER DSA811:[USER.]c  .                   pass /~*/* /www_user/*/www/*  G                Where users are grouped into different areas of the fileo;                system a logical search list may be defined.   A                   $ DEFINE /SYSTEM /EXEC /TRANSLATION=CONCEALED - %                            WWW_USER - -                            DISK1:[GROUP1.], -e-                            DISK1:[GROUP2.], -r-                            DISK2:[GROUP3.], - *                            DISK2:[GROUP4.]  .                   pass /~*/* /www_user/*/www/*  J                As logical search lists have specific uses and some compli-J                cations this is the only use for them recommended with thisL                server, although it is specifically coded to allow for search0                lists in document specifications.  $             5-6  HTTPd Mapping Rules d  b            H                If only a subset of all users are to be provided with WWWH                publishing access either their account directories can beL                individually mapped (best used only with a small number) or aL                separate area of the file system be provided for this purpose5                and specifically mapped as user space.   D                Of course, user mapping is amenable to all other ruleK                processing so it is a simple matter to redirect or otherwise M                process user paths. For instance, the published user name does J                not need to, or need to continue to, correspond to any realA                user area, or the user's actual name or home area:   @                   redirect /~doej/* http://a.nother.host/~doej/*;                   pass /~doej/* /www/messages/deceased.htmlH>                   pass /~danielm/* /special$www$area/danielm/*?                   pass /~Mark.Daniel/* /user$disk/danielm/www/* .                   pass /~*/* /www_user/*/www/*  L                A user directory is always presented as a top-level directoryM                (i.e. no parent directory is shown), although any subdirectoryo.                tree is accesssable by default.                                                          N                                                       HTTPd Mapping Rules  5-7 i                   M             Chapter__6_______________________________________________________                HTTPd Scriptingf    K                Scripts are mechanisms for creating simple "servers" sendingdM                data to a client, extending the services provided by the basic H                server. Anything that can write to SYS$OUTPUT can be usedJ                to generate script output. A DCL procedure or an executableK                can be the basis for a script. Simply TYPE-ing a file can be.%                provide script output.l  H                Scripts are enabled using the exec or script rules in theM                mapping file (see Chapter 5). The script portion of the resultuH                must be a URL equivalent of the physical VMS procedure orJ                executable specification. It is not necessary to supply theJ                .COM or .EXE file type (although not forbidden either), theM                server will first check for a procedure and if none found thene'                check for an executable.n               6.1 CGI Compliance  J                The HTTPd scripting mechanism is designed to be largely WWW8                CGI (Common Gateway Interface) compliant.  &                CGI Compliant Variables  H                Environment variables are created in a similar way to theM                CERN VMS HTTPd implementation, where CGI environment variables J                are provided to the script via DCL global symbols. Each CGIH                variable symbol name is prefixed with "WWW_" (by default,L                although this can be changed using the /CGI_PREFIX qualifier,G                see Section 3.1, this is not recommended if the HFRD VMS J                scripts are to be used, as they expect CGI variable symbols.                to be prefixed in this manner).  *                Extensions to CGI Variables  F                In line with other CGI implemenations, additional, non-H                compliant variables are provided to ease CGI interfacing.J                These provide the various components of the query string. AK                keyword query string and a form query string are parsed intoe)                separated variables, named                       WWW_KEY_number                   WWW_KEY_COUNTr,                   WWW_FORM_form-element-name  N                                                           HTTPd Scripting  6-1                 %                See the example below.c    $                CGI Variable Capacity  J                DCL symbol values are limited to approximately 1000 charac-J                ters. The CGI interface will provide symbols with values upL                to that limit if required. This should be sufficient for most                circumstances.   K                The basic CGI symbol names are demonstrated here with a calleG                to a script that simply executes the following DCL code:T  %                   $ SHOW SYMBOL WWW_* !                   $ SHOW SYMBOL *R  K                Note how the request components are represented for ISINDEX- K                style searching (third item) and a forms-based query (fourthe                item).   /                <online hypertext demonstration>i  #                CGI Compliant Output   K                Script output must behave in a CGI-compliant fashion (by way G                of contrast, see Section 6.2). That is, a CGI script mayiG                redirect the location of the document, using a Location:sH                header line, or may supply a data stream beginning with aJ                Content-Type: header line. Both must be followed by a blank                line.  M                If the script output begins with either of the these two lines L                HTTPd assumes that output will be line-oriented, without HTTPJ                carriage-control (each line terminated by a carriage-returnH                then a line-feed), and will thereafter ensure each recordK                it receives is correctly terminated before passing it to the K                client. In this way DCL procedure output (and the VMS CLI inr3                general) is supported transparently.L  %             6.1.1 Example DCL Scripts   C                A simple script to provide the system time might be:   ,                   $ say = "write sys$output"=                   $! the next two lines make it CGI-complianto2                   $ say "Content-Type: text/plain"                   $ say ""6                   $! start of plain-text script output                   $ show time                 6-2  HTTPd Scripting    c            J                A script to provide the system time more elaborately (using                HTML):   ,                   $ say = "write sys$output"=                   $! the next two lines make it CGI-compliante1                   $ say "Content-Type: text/html"                    $ say ""0                   $! start of HTML script output                    $ say "<HTML>"C                   $ say "Hello ''WWW_REMOTE_HOST'"  !(CGI variable)t                   $ say "<P>" I                   $ say "System time on node ''f$getsyi("nodename")' is:" 0                   $ say "<H1>''f$cvtime()'</H1>"!                   $ say "</HTML>"o  )             6.2 Non-CGI Compliance Output   L                A script does not have to output a CGI-compliant data stream.J                If it begins with a HTTP header status line (e.g. "HTTP/1.0M                200 OK"), HTTPd assumes it will supply a raw HTTP data stream, 4                containing all the HTTP requirements.  G                Any such script must observe HyperText Transfer ProtocolmG                (for detailed information on HTTP see Chapter 10). EveryaI                line must be terminated by a carriage-return and line-feed K                (represented as "\r""\n"), or as a minimum by a single line-dJ                feed. In particular, the type of the data being returned byK                the scripts must be included in an HTTP header sent prior to J                the data itself. Headers for the two most common data typesM                will be illustrated here. Note that the blank line is strictly 3                necessary, it terminates the header.f                  Plain-Texte  %                   HTTP/1.0 200 ok\r\nh.                   Content-Type: text/plain\r\n                   \r\n                  HTML   %                   HTTP/1.0 200 ok\r\n -                   Content-Type: text/html\r\n                    \r\n              N                                                           HTTPd Scripting  6-3 p  e            +                Non-CGI-Compliant DCL script   H                The following example show a non-CGI-compliant DCL scriptK                similar in function to the CGI-compliant one above. Note the I                full HTTP header and each line explictly terminated with am2                carriage-return and line-feed pair.  "                   $ cr[0,8] = %x0d"                   $ lf[0,8] = %x0a,                   $ say = "write sys$output"=                   $! the next line makes it non-CGI-compliant >                   $ say "HTTP/1.0 200 Time follows.''cr'''lf'";                   $ say "Content-Type: text/html''cr'''lf'"o$                   $ say "''cr'''lf'"0                   $! start of HTML script output*                   $ say "<HTML>''cr'''lf'"M                   $ say "Hello ''WWW_REMOTE_HOST'''cr'''lf'"  !(CGI variable)                    $ say "<P>"hS                   $ say "System time on node ''f$getsyi("nodename")' is:''cr'''lf'"s:                   $ say "<H1>''f$cvtime()'</H1>''cr'''lf'"+                   $ say "</HTML>''cr'''lf'"r               6.3 Raw HTTP Input  J                The logical name HTTP$INPUT defines a mailbox providing theK                raw HTTP input stream from the client. This is available fortE                procedures and executables to explictly open and read.r  H                Note that this is a raw stream, and HTTP lines (carriage-L                return/line-feed terminated sequences of characters) may haveM                be blocked together for network transport. These would need ton2                be expliclty parsed by the program.  K                To make this stream implicitly available to an executable aslL                the standard input stream the following DCL command should be>                executed immediately before invoking the image:  8                   DEFINE /USER_MODE SYS$INPUT HTTP$INPUT                                      6-4  HTTPd Scripting                     M             Chapter__7_______________________________________________________                Utilitiesr    J                Foreign commands for these utilities (and the HTTPD controlK                functionality) will need to be assigned in the adminstrationh-                users' LOGIN.COM, for example:n  ,                   CHKMAP == "$HT_EXE:CHKMAP"*                   HTTPD == "$HT_EXE:HTTPD"0                   HTTPDMON == "$HT_EXE:HTTPDMON"0                   WWWRKOUT == "$HT_EXE:WWWRKOUT"               7.1 HTTPd Monitor   L                The HTTPd server may be monitored using the HTTPDMON utility.I                This utility continuously displays a screen of information )                comprising three sections:   %                1. Process InformationuJ                   HTTPd process information includes its up-time, CPU-timeH                   consumed (excluding any subprocesses), I/O counts, and%                   memory utilization.i  !                2. Server Counters I                   The server counters keep track of the total connections M                   received, accepted, rejected, etc., totals for each request H                   type (file transfer, directory listing, image mapping,                   etc.).                   3. Latest RequestK                   This section provides the originating host, HTTP request,NH                   response status code, and some transaction statistics.  <                The following example provides sample output:  a                    Port: 80              HTTPDMON v1.0.0 AXP         Monday, 11-DEC-1995 11:12:12d  H                    Process: HTTPd:80    PID: 2DE0143C  User: HTTP$SERVER<                      Start: 1  Exit Status: n/a  (Zeroed: 0)=                         Up: 0 04:05:55.47  CPU: 0 00:00:22.57yJ                        BIO: 21493  DIO: 2760  Pg.Flts.: 3921  Pg.Used: 14%X                     WsSize: 1240 (620kB)  WsPeak: 4368 (2184kB)  PeakVirt: 6848 (3424kB)5                      Files: 1/100  Subprocesses: 0/50       N                                                                 Utilities  7-1 t  u            S                    Connect: 19  Accept: 19  Reject: 0  Busy: 0  Current: 1  Peak: 2pL                      Error: 0  Parsed: 19 (1 redirect)  Forbidden: 0  RMS: 00                        GET: 16  HEAD: 2  POST: 1=                       File: 13 (0)  Menu: 0 (0)  Directory: 5c4                      sHTML: 0  IsMap: 1  Internal: 0@                     Script: 0  Auto.Script: 0  Subprocess: 0 (0)W                        1xx: 0  2xx: 18  3xx: 0  4xx: 1  5xx: 0  (0 errors)  Redirect: 0 5                         Rx: 22806  Tx: 94757  (bytes)a  D                       Time: 11 11:12:08  Host: beta.hfrd.dsto.gov.au3                     Status: 200  Rx: 1198  Tx: 2274X>                    Request: GET /ht_root/doc/htd/htd_0700.html  B                The /HELP qualifier provides a brief usage summary.  H                This information is, in part, obtained from the following                logical names:d  !                o  HTTPDport$COUNTE                  o  HTTPDport$PIDP  #                o  HTTPDport$REQUESTA  G                The server counter values are carried over when a server K                (re)starts (provided the system has stayed up). To reset the G                counters use the following command on the server system:u  "                   $ HTTPD /DO=ZERO  $             7.2 Mapping Rule Checker  I                The mapping rule database may be verified using the CHKMAPgK                utility. This utility will load the rule database, reportingtC                on any rules that have an incorrect format, and thenbL                optionally map a URL format path to a VMS file specification,J                or reverse-map a VMS file specification to a URL format, orH                report an error! A script path will be mapped into scriptH                and derived path components. Various rules may be checkedH                from the command-line for correct mapping to and from VMSI                specifications before committing them to a working server.r  H                By default the utility will load any defined rule mappingJ                database (i.e. HTTPD$MAP) via the appropriate logical name.I                If a specific, non-default rule file is required it can bes3                specified using the /MAP= qualifier.s  L                URL format paths begin with, and contain forward-slashes. ForM                this reason, when being specified on the command line, must be M                enclosed within double-quotation marks. This does not apply toL'                VMS file specifications.l               7-2  Utilities L                                   $ CHKMAPm*                    $ CHKMAP /MAP=TEST.CONF2                    $ CHKMAP "/hyperdata/home.html"<                    $ CHKMAP USER$DISK:[DANIELM.WWW]HOME.HTML  B                The /HELP qualifier provides a brief usage summary.  ,             7.3 Server Workout (stress-test)  K                The WWWRKOUT ("World Wide Web Workout") utility exercises annJ                HTTP server, both in the number of simultaneous connectionsF                maintained and in the number of back-to-back sequential6                connection requests and data transfers.  H                This utility can be used to stress-test the HFRD VMS HTTPK                server (or any other), or to make comparisons between it ande                other servers.   J                It sets up and maintains a specified number of simultaneousK                connections to a server. It reads a buffer of data from each J                connection in turn, where data is waiting (does not block),I                until the document transfer is complete and the connection E                closed by the server. It then closes the local end andwI                immediately reuses the now-free socket to initiate anothercL                sequence. If enabled (it is by default), the utility attemptsJ                to reflect the real-world in varying the data transfer rateG                for each connection, by setting the number of bytes read-I                during each read loop differently for each connection. All ,                transfered data is discarded.  F                The data transfer rate for each connection is displayedH                at connection close. It is by default it is the effectiveJ                transfer rate, that is the rate from opening the connectionK                to closing it, and so includes request processing time, etc. H                If the /NOEFFECTIVE qualifier is employed it measures the0                document data transfer rate only.  F                Although a single document path may be specified on theJ                command line it is preferable to supply a range of documentK                paths, one per line in a plain text file. Each document path J                and/or type specified should be different to the others, toM                exercise the server and file system cache. Any number of paths/L                may be specified in the file. If the file is exhausted beforeL                the specified number of connections have been established theK                file contents are recycled from the first path. If a path or L                a file of paths is not specified the utility just continually-                requests the welcome document..      N                                                                 Utilities  7-3                 K                To assess a server's total throughput choose paths that lead_K                to large documents (> 50K), where the overhead of connectionPF                setup, rule processing and transfer initiation are lessI                significant than the data transfer itself. The buffer sizeiK                variation functionality should be disabled using the /NOVARY K                qualifier when assessing data transfer rates. ResponsivenessdE                is better assessed using small documents (< 2K), wherepG                the overhead of the non-data-transfer activities is more                 significant.d  E                WWWRKOUT [server_host_name[:port]] [path] [qualifiers]C  K                o  /BUFFER= number of bytes to be read from server each timevI                   (default is 1024, will be modified by the default /VARYt                   qualifier)  M                o  /COUNT= total number of connections and requests to be donet"                   (default is 100)  I                o  /[NO]EFFECTIVE measures data transfer rate from requestxK                   to close (if /NOEFFECTIVE is applied the rate is measuredp+                   during data transfer only   B                o  /FILEOF= file name containing paths to documents  7                o  /HELP display brief usage informationv  /                o  /OUTPUT= file name for output   @                o  /PATH= single path to document to be retrieved  F                o  /PORT= IP port number of HTTP server (default is 80)  0                o  /SERVER= HTTP server host name  M                o  /SIMULTANEOUS= number of simultaneous connections to be seth4                   up at any one time (default is 10)  G                o  /[NO]VARY varies the size of the read buffer for each .                   connection (default is vary)                  Examples:                     $ WWWRKOUTC                   $ WWWRKOUT www.server.host "/hyperdata/home.html"GC                   $ WWWRKOUT www.server.host:8080 /FILEOF=PATHS.TXTsQ                   $ WWWRKOUT /SERVER=www.server.host /PORT=8080 /FILEOF=PATHS.TXTmX                   $ WWWRKOUT www.server.host:8080 /FILEOF=PATHS.TXT /NOEFFECTIVE /NOVARY_                   $ WWWRKOUT www.server.host /FILEOF=PATHS.TXT /COUNT=500 /SIMUL=20 /BUFFER=512r  B                The /HELP qualifier provides a brief usage summary.               7-4  Utilities                     M             Chapter__8_______________________________________________________t  !             Organizing Data Areas     K                This section deals with the provision of physical file spacemK                for HFRD hypertext-related data, and the integration of thatlJ                within the logical organization of the HFRD hypertext data.  L                The logical organisation of served data is largely hierarchi-7                cal, and is achieved via two mechanisms.   F                1. The natural hierachy provided by a hierarchical file                   system.   G                2. The logical hierarchy possible using rules within theeG                   mapping file to place disparate physical areas into ac;                   single logical structure (see Chapter 5).   H                HFRD has a single section of the file system for the coreK                hypertext environment files, such as the Division home page, L                help areas, documentation, etc., it can be accessed using theJ                logical area HYPERDATA:[000000]. Physically integrated withG                this are directories providing data storage for specificaE                groupings of data, such as CEAC minutes, adminstrationhM                announcements, etc. Physically distinct areas are also logicaliK                grouped into the hypertext environment, areas such as the IEs-                group area, JFAS minutes, etc.   J                The reason for some areas being physically distinct are forL                legitimate physical reasons (e.g. the area can best be hostedG                on a group-local disk), for historical reasons (e.g. thehH                area existed before any hypertext environment existed) orJ                for reasons of convenience (e.g. lets put this where accessD                controls already allow the maintainers to manage it).  K                The reasons for an area being physically integrated with the H                core hypertext data area can be legitimate (e.g. there isL                really nowhere else it reasonably belongs), convenience (e.g.K                lets quickly put it here) or by logical necessity (it really D                does below as part of the core hypertext enviroment).              N                                                     Organizing Data Areas  8-1                                 Guidelines   J                In general, only hypertext enviroment files need to go intoK                the core physical hypertext file system. All other groupings!H                should, if possible, be decentralised into the portion ofI                the file system they represent and logically placed in the G                hypertext area using rules in the mapping file. That is,nI                a given project's hypertext files should be located in the M                project's part of the file system. If it doesn't have any then C                it may be a candidate for location in the core area.a  K                When locating a hypertext area in a physically distinct area I                it is possible the managers of that data will already havemK                the correct access controls. If locating an area in the coreyM                hypertext environment it will be necessary to give the manageraH                ownership of the directory area, or provide ACL access if.                multiple managers are involved.  K                When locating a hypertext area in a physically distinct areadI                it will be necessary to update the mapping file with a newyI                rule (see Chapter 5). If located within the core hypertextt9                area the rules do not need to be adjusted.t                                                        &             8-2  Organizing Data Areas                     M             Chapter__9_______________________________________________________   ,             Brief Introduction to HTTPd Code    J                This section is designed to be only a broad overview of theH                basic functionality of the HTTPd server. It also does notL                cover the full suite of HFRD VMS Hypertext Services software.8                The source code should also be consulted.  &                <online hypertext link>                  Multi-ThreadedT  H                The HFRD HTTPd is written to exploit VMS operating systemK                characteristics allowing the straight-forward implementationlF                of multi-threaded code. The server is written to be I/OJ                event driven. Asynchronous System Traps (ASTs), or softwareG                interrupts, at the conclusion of an I/O (or other) eventoM                allow functions to be activated to post-process the event. The M                event traps are automatically queued on a FIFO basis, allowing H                a series of events to be sequentially processed. When notL                responding to an event the process is quiescent, or otherwiseI                occupied, effectively interleaving I/O and processing, andf?                allowing a sophisticated client multi-threading.   J                When VMS supports kernel-threads (beginning v7.0 I believe)J                this may be enhanced to optionally support multiple threadsK                over multiple CPUs within the one process, further extendingT!                server throughput.a  J                Multi-threaded code is inherently more complex than single-K                threaded code, and there are issues involved in the synchro-yI                nization of some activities in such an environment. Fortu- H                nately VMS handles many of these issues internally. AfterM                connection acceptance, all of the processing done within HTTPd J                is at USER mode AST delivery level, and for all intents andH                purposes the processing done therein is atomic, implictly7                handling its own synchronization issues.g  H                HTTPd is written to make longer duration activities, suchI                as the transfer of a file's contents, event-driven. Other, E                shorter duration activites, such as accepting a client =                connection request, are handled synchronously._        N                                          Brief Introduction to HTTPd Code  9-1 s  t            H                It is worth noting that with asynchronous, and AST-drivenI                output, the data being written must be guaranteed to exist H                without modification for the duration of the write (untilH                completion AST delivery). This means data written must beK                static or in buffers that persist with the thread. Function- I                local (automatic) storage cannot be used. The HTTPd server L                allocates dynamic storage for general (e.g. output buffering)8                or specific (e.g. response headers) uses.                    Tasks  L                Each request can have one or more tasks executed sequentiallyJ                to fullfil the request. This occurs most obviously with theK                HTML pre-processor, but also, to a more limited extent, with J                directory listing and its read-me file inclusion. A task is!                defined as one of:e                  o  send fileu                  o  send menui  #                o  directory listinga                  o  DCL execution   K                Each one of these modules executes relatively independently.rJ                Before commencing a task, a next-task pointer can be set toI                the function required to execute at the conclusion of thateK                task. At that conclusion, the next-task functionality checksAH                for a specified task to start or continue. If it has beenL                specified control is passed to that next-task function via an                AST.d                   Memory Management  E                Per-Thread memory is managed as two distinct portions.   M                1. A fixed-size structure of dynamic memory is used to containeJ                   the core request thread data. This is released at thread                   disposal.   M                2. A heap of dynamically allocated memory is maintained duringt1                   the life of a thread structure.c  L                   When a dynamic structure is required this heap is expandedM                   by calloc()ing memory, placing this in a double-linked list6J                   structure, and returning a pointer to the usable portionI                   of the newly allocated memory. This list is released in J                   one operation at thread disposal, by traversing the listJ                   and free()ing each individual chunk (making it easier toK                   avoid the memory leaks associated with making autononmous1J                   allocations for each dynamic memory structure required).  1             9-2  Brief Introduction to HTTPd Code_ t  c                            Output BufferingP  J                To reduce the number of individual network writes, and thusE                provide significant improvements in efficiency, outputdJ                generated from all modules except File(),  is buffered intoH                larger packets before sending to the client. All modules,M                including File(),  work to implement a seamless integration ofhL                output via this mechanism (best seen in the Shtml()  module).  K                The AST-driven nature of the server means this functionality M                is moderately complex. A form of double buffering is employed,yL                allowing the buffer to overflow and be flushed to the networkL                asynchronously, without overwriting, losing or needing to re-K                request data. Two buffer spaces are employed. When one fillsVJ                it is written to the network and a pointer to the two areasH                exchanged, allowing the supplied (and overflowed) data toJ                be immediately buffered (without a synchronous wait for theM                network write to complete), and an immediate return to furthertJ                AST-driven processing. The alternate buffer continues to beA                used until it fills, when the process is repeated.n  K                The possibility of an asynchronous write with every bufferedrL                output introduces complexity. Every buffered output call mustH                be used as if it is an asynchronous network write, an ASTJ                function address supplied with every call on the off-chanceL                (and eventuality) that an actual network write will occur. IfK                a network write does not occur (most of the time) the AST isyK                explicitly declared for delivery. This need to supply an AST K                function with every buffered write, basically means only one B                buffered write may occur per AST-executed function.                  Rule-MappingR  F                A fundamental aspect of any HTTPd implementation is theF                rule mapping used to create a logical structure for theL                hypertext file system. The HTTPd mapping function is designedF                to be flexible enough that script programs can also useI                it. As a result it is text-file based, and opened and readiH                when mapping. This method of mapping provides a good dealG                of flexibility, coupled with acceptable performance. The K                function has received a high level of attention in an effort                 to optimize it.                N                                          Brief Introduction to HTTPd Code  9-3 e  r                            Auto-Scripting   I                The HFRD VMS HTTP server has the facility to automatically J                invoke a script to process a non-HTML document (file). ThisH                facility is based on detecting the MIME content data typeJ                (via the file's extension) and causing a transparent, localM                redirection, invoking the script as if it was specified in the                  original request.  "                Internal Directives  K                The HTTPd server detects certain strings as directives about F                its behaviour. These directives are passed in the queryI                string component of the request, and as reserved sequencestJ                cannot occur in normal requests (an unlikely combination of-                characters has been selected).                   o  ?httpd=reportoM                   Returns an HTML format report on server status and activity 2                   (see <RFERENCE>(head_httpdmon)).                  o  ?httpd=indexD                   Controls aspects of the directory listing module's                   behaviour:  #                   o  &autoscript=no                       o  &format=vms  #                   o  &layout=string                      o  &readme=no   #                   o  &script=string                   HTTPd Modules  J                The HTTPd server comprises eight main modules, implementingK                the obvious functionality of the server, and other, smaller,n                support modules.e  '                o  HTTPD.C (Section 9.1)e  )                o  REQUEST.C (Section 9.2)   &                o  FILE.C (Section 9.3)  &                o  MENU.C (Section 9.4)  %                o  DIR.C (Section 9.5)h  $                o  DCL.C (Section 9.6  '                o  SHTML.C (Section 9.7)   '                o  ISMAP.C (Section 9.8)   1             9-4  Brief Introduction to HTTPd Code                  )                o  LOGGING.C (Section 9.9)h                 9.1 HTTPD.Ct  &                <online hypertext link>  K                This is the main module of the server. It handles all TCP/IPoE                network activites, from creating the server socket andhM                listening on the port, to reading and writing network I/O. The M                network read and write functions have provision for specifyingoK                I/O completion AST function addresses. If these are providedhI                then the function is called upon completion of the networkrM                I/O. If not provided then the I/O completes without calling anp                AST routine.   F                The server begins by creating a network socket and thenG                binding that to the HTTP port. The server then enters anV9                infinite loop, waiting for IP connections.U  G                When a connection request is received the remote host is K                checked as an allowed connection. If allowed, a request dataaL                structure is created from dynamic memory, and an asynchronousJ                read is queued from the network client. The pointer to thisH                dynamic data structure becomes the request thread, and isL                passed from function to function, AST routine to AST routine.I                The AST completion routine of the network read specifies arJ                request analysis function. The function then returns to the*                connection acceptance loop.  L                When the network read completes an AST completion function inK                the Request()  module is called to process the HTTP request.                9.2 REQUEST.Cs  &                <online hypertext link>  K                This module reads the request header from the client, parses L                this, and then calls the appropriate task function to executeG                the request (i.e. send a file, pre-process an HTML file,OD                generate a directory listing, execute a script, etc.)  J                The request header is contained in the network read buffer.K                If it cannot be completely read in the first chunk, the readeJ                buffer is dynamically expanded so as to be read in multipleL                chunks. The request header is addressed by a specific pointerK                that allows the parse-and-execute function to process either_K                a genuine, initial client request header, or a pseudo-header 1                generated to effect a redirection.   N                                          Brief Introduction to HTTPd Code  9-5 t               L                The method, path information and query string are parsed fromK                the first line of the header. Other, specific request header I                fields are also parsed out and stored for later reference. F                Once this has been done the header is not further used.  I                Once the relevant information is obtained from the requestiG                header processing commences on implementing the request.iH                This comprsises the rule-mapping of any path information,J                the RMS parsing of any resulting VMS file specification and=                decision-making on how to execute the request.m  <                o  If an internal directive that is executed.  I                o  If the content-type of a supplied file specification iscH                   auto-scripting, an automatic redirection is generated.  L                o  If a file specification and no wildcards, the file is sent                    to the client.  L                o  If a wildcarded file specification, and no query string, a1                   directory listing is generated.e  E                o  If a directory specification (no file name), one ofeJ                   multiple, possible, home pages are attempted to be sent.L                   If no home page is found in the directory then a directory'                   listing is generated.n  I                o  If a script specification prefixed the path informationi*                   that script is executed.  G                o  If a query string is supplied (and it is not a serverlI                   directive), and no script name was included in the pathaG                   information, the server query script is automaticallya                   activated.  G                This functionality is used to parse and execute both theiF                initial client request and any pseudo-request generated2                internally to effect a redirection.               9.3 FILE.C  &                <online hypertext link>  I                This module implements the file transfer functionality. It M                obtains the file specification, mime content type and encodingiI                (binary/text) information from the request data structure.lK                It handles record-oriented (text) files slightly differentlyrH                to binary (e.g. image) files (specified using the AddTypeG                configuration directive, see Chapter 4). Record-orientednG                files will have multiple records buffered before writingfG                them collectively to the network (improving efficiency). I                Binary file reads are by Virtual Block, and are written tofJ                the network immediately. The essential behaviour however is                much the same.e  1             9-6  Brief Introduction to HTTPd Codet t  i            D                1. The primary function attempts to open the file. IfI                   unsuccessful it immediately returns the error status togK                   the calling routine for further action (this behaviour isAL                   used to try each of multiple home pages by detecting file-*                   not-found, for example).  J                2. After successfully opening the file it generates an HTTPJ                   response header if required. It then calls one of eitherJ                   two functions to queue the first read from the file, oneH                   for text files (record-oriented transfer), another forK                   binary files (block-oriented transfer). After the read is_?                   queued it returns with a success status code.r  I                3. When the asynchronous file read completes one of eitheroF                   two AST completion functions (one for text the otherJ                   for binary) is called to post-process the I/O. Status isJ                   checked for success or otherwise. If an error the statusM                   is reported to the client, the file closed, and the requestM#                   thread concluded.H  I                   If end-of-file, the file is closed, for record-orientedrK                   (text) files the buffer checked and if necessary flushed.-M                   If an end task function was specified control is now passedy=                   to that, otherwise the thread is concluded.r  L                   If not end-of-file, for text files multiple records may beJ                   buffered before writing to the network. If the buffer isL                   full (the read was unsuccessful due to insufficient space)I                   the contents are asynchronously written to the network,dH                   with the network write completion routine specifying aK                   function to re-read the the file record that just failed.wL                   If there is still space in the buffer another asynchronousH                   read of the file is queued in an attempt to append theK                   next record into the buffer. After the read is queued theC%                   function completes.e  H                   If not end-of-file, for binary files a successful readI                   results in a call to the network write function to send K                   this to the client. This call contains the address of themJ                   function to read the next blocks from the file as an ASTM                   completion routine. After the asynchronous network write ise0                   queued the function completes.  M                For text files the contents can be encapsulated as plain text. J                This involves prefixing the file send with a <PRE> HTML tagK                and postfixing it with a </PRE> tag. The buffer is filled as L                per normal but when ready to output a function is called thatJ                escapes all HTML-forbidden characters first (e.g. "<", ">",                "&", etc.)c  N                                          Brief Introduction to HTTPd Code  9-7                              9.4 MENU.C  &                <online hypertext link>  H                This module implements the HFRD menu interpretation func-L                tionality. It obtains the file specification from the requestL                data structure. Output from this module is buffered to reduce8                network writes, improving I/O efficiency.  #                Essential behaviour:i  D                1. The primary function attempts to open the file. IfI                   unsuccessful it immediately returns the error status to K                   the calling routine for further action (this behaviour is @                   used to try multiple home pages, for example).  J                2. After successfully opening the file it generates an HTTPE                   response header if required. A call is then made to I                   asynchronously read a record from the file opened. ThistM                   call contains the address of a function to count the numbertL                   of menu sections (blank-line delimited groups of lines) inJ                   the file. After the asynchronous file read is queued the>                   function returns with a success status code.  K                3. When the asynchronous file read completes the AST comple-aJ                   tion function is called to count the sections. Status isM                   checked for success or otherwise. If an error the status isoM                   reported to the client, the file closed, and the processings                   concluded.  K                   If end-of-file, or up to three sections counted, the filenL                   is repositioned to the start and then another asynchronousM                   file read is queued (starting with the first record again),iG                   with the AST completion routine specified as the menu I                   interpretation function. If still counting sections the F                   completion AST routine specified is the same sectionH                   counting function. After the asynchronous file read is0                   queued the function completes.  K                4. When the asynchronous file read completes the AST comple-gJ                   tion function is called to interpret the line, dependantK                   on the section number it occurs in. Status is checked forrJ                   success or otherwise. If an error the status is reportedL                   to the client, the file closed, and the request concluded.L                   If end-of-file, the file is closed and the processing con-J                   cluded. For a successful record read the line can eitherM                   be title, description or menu item. When the line is inter-_K                   preted and written to the network another read is queued,eJ                   with an AST completion routine again specifying the con-M                   tents interpretation function. The function then completes.o  1             9-8  Brief Introduction to HTTPd Code                               9.5 DIR.Ch  &                <online hypertext link>  M                This module implements the HTTPd directory listing functional-uI                ity. Directories are listed first, then files. File detailTG                format customizable, with the default resembling the de- I                fault CERN and NCSA server layout. Output from this module H                is buffered to reduce network writes, improving I/O effi-L                ciency. HTML files have the <TITLE></TITLE> element extracted'                as a "Description" item.s  #                Essential behaviour:   J                1. The primary function obtains the file specification fromL                   the request data structure. Server directives, controllingH                   some features of the directory listing beahaviour, areF                   checked for and parsed out if present. The directoryL                   listing layout is initialized. The directory specificationJ                   (path information) is parsed to obtain the directory andG                   file name/type components. After successfully parsing H                   the specification it generates an HTTP response header                   if required.  L                2. Column headings and (possibly) a parent directory item areM                   buffered in an asynchronous function call. An RMS structureaI                   is initialized to allow the asynchronous search for alluD                   files in the specified directory ending in ".DIR".  H                3. For each directory file found the directory search ASTF                   completion function is called. Status is checked forJ                   success or otherwise. If an error the status is reportedL                   to the client and the request processing concluded. If theJ                   directory contained no directory files, or the directoryM                   files are exhausted a call to a function to begin a listing F                   of non-directory files is made and the function then                   completes.  M                   If a directory file was returned a synchronous call to listnH                   the details of that directory is made and then anotherF                   asynchronous search call made with an AST completion7                   function again back to this function.i  M                4. When the directory files are exhausted the RMS structure isoJ                   reinitialized to allow the search for all specification-D                   matching, non-directory files in the directory. An3                   asynchronous search call is made.L  M                5. For each matching file found the file search AST completionnF                   function is called. Status is checked for success orM                   otherwise. If an error the status is reported to the clientsM                   and the processing concluded. If the directory contained no   N                                          Brief Introduction to HTTPd Code  9-9 b               L                   matching files, or the files are exhausted, the processingF                   is concluded and the function immediately completes.  H                   If a file was returned a call is made to a function toH                   check whether a file description can be obtained (HTMLL                   files only). If it can then a function to initiate this isM                   called and the function completes. If no description can beiK                   obtained a synchronous call is made to a function to list M                   the file details. After the file details are listed another J                   asynchronous search call is made, with the same functionM                   specified for AST completion. The function then immediately                    completes.  I                6. To asynchronously locate a description in an HTML file, H                   the file is opened and then each record asynchronouslyJ                   read and examined for the <TITLE> element. Once obtainedF                   a synchronous call is made to a function to list theI                   file details. After the file details are listed another H                   asynchronous search call is made, with the file searchJ                   function specified for AST completion. The function then(                   immediately completes.               9.6 DCL.C   &                <online hypertext link>  L                The DCL execution functionality must interface and coordinateK                with an external subprocess. It too is asynchronously driven K                by I/O once the subprocess has been created and is executing F                independently. Communication with the subprocess is via                mailboxes.n  5                The DCL facility is used is two modes:   F                1. To execute independent DCL commands. This is used toI                   provide DCL output for pre-processed HTML. In this mode F                   multiple DCL commands may be executed within the one                   request.  K                2. To execute CGI scripts. In this mode only one DCL command 1                   is executed during the request.e  F                DCL related structures and devices (e.g. mailboxes) areE                retained for life of the request, and may be reused iftH                multiple commands are required. This reduces the overhead                 of DCL execution.  D                1. The primary DCL function ensures any required fileI                   specification exists (e.g. script procedure). The first M                   time it is executed during an individual request it createss)                   two or three mailboxes:t  2                   1. for the subprocess' SYS$INPUT  2             9-10  Brief Introduction to HTTPd Code e  a            3                   2. for the subprocess' SYS$OUTPUT   J                   3. if CGI-script execution, available for the subprocessK                      to explicitly open providing direct read access to thedM                      HTTP data stream (this could be done with a DEFINE /USER<+                      SYS$INPUT HTTPD$INPUT)   J                   A function writes to the SYS$INPUT, creating a number ofI                   CGI-compliant symbol names and executing the command oreA                   invoking the execution of a DCL procedure, etc.   L                   A subprocess is spawned. Input and output are then I/O ASTM                   completion routine driven, and the primary function returnsn)                   to the calling routine.t  M                2. When the subprocess writes to the SYS$OUTPUT stream the I/OpM                   completion AST routine associated with reading that mailboxr                   is called.  L                   If CGI-script execution, the first I/O from this stream isM                   analyzed for CGI-compliance. It is determined whether a raweM                   HTTP data stream will be supplied by the script, or whetheroM                   the script will be CGI-compliant (requiring the addition ofmL                   HTTP header, etc.) and whether HTTP carriage-control needs6                   to be checked/added for each record.  K                   A CGI local redirection header (partial URL) is a special L                   case. When this is received all output from the subprocessJ                   is suppressed until the script processing is ready to beL                   concluded. At that time the "Location:" information of theJ                   header is used to reinitiate the request, using the same(                   thread data structure.  J                   When normal SYS$OUTPUT processing is complete the recordJ                   received can be handled in one of two ways. If it is rawK                   HTTP it is asynchronously written to the network. The ASToJ                   completion routine specified with the network write willJ                   queue another read from subprocess' SYS$OUTPUT. If it isJ                   record-oriented I/O (e.g. from DCL output), it is outputM                   buffered, into larger chunks, before these multiple records K                   are written to the network at the one time. This improves !                   I/O efficiency.a  K                   The SYS$OUTPUT stream is a little problematic. At subpro-eK                   cess exit there may be one or more records waiting in the J                   mailbox for reading and subsequent writing to the clientM                   over the network, delaying processing conclusion. DetectionlL                   of completion is accomplished by making each QIO sensitiveL                   to mailbox status via the SS$_NOWRITER status, which indi-L                   cates there is no channel assigned to the mailbox, and theM                   mailbox buffer is empty. It then becomes safe to dispose ofn9                   the client thread without loss of data.i  N                                         Brief Introduction to HTTPd Code  9-11 e  p            D                3. If CGI-script execution, The HTTP data stream madeK                   available is also AST driven. If the subprocess opens thedM                   stream and reads from it, the I/O completion routine calledcK                   queues another asynchronous read from the network client.r                 9.7 SHTML.Ca  &                <online hypertext link>  K                The HTML pre-processor module provides this functionality aseK                an integrated part of the server. Output from this module is K                buffered to reduce network writes, improving I/O efficiency.   #                Essential behaviour:t  D                1. The primary function attempts to open the file. IfI                   unsuccessful it immediately returns the error status to K                   the calling routine for further action (this behaviour is @                   used to try multiple home pages, for example).  J                2. After successfully opening the file it generates an HTTPE                   response header if required. A call is then made to H                   asynchronously read a record from the file opened. TheJ                   record read AST function scans the record (line) lookingG                   for pre-processor directives embedded in HTML commenthK                   directives. If no directive is found the record is outputn9                   buffered and another queued to be read.   H                3. If a directive is detected any part of the line up theG                   directive is output buffered and a function called to L                   parse the directive. This function reports an error if theK                   directive specified is not supported (unknown, etc.) If aSM                   supported directive a specific function is called accordingsI                   to the directive specified. These functions provide theu@                   pre-processor information in one of four ways:                     1. Internallys  J                      Information such as the system time, current documentH                      information, etc., can be provided from informationK                      contained in the request data, etc., or in the case of N                      specified document/file information obtained via the fileK                      system. These directives have the relevant informationeL                      buffered and then the function returns to the directive&                      parsing function.  &                   2. Via DCL Execution  L                      Information that must be obtained thorugh DCL executionH                      is obtained using an asynchronous call to the Dcl()G                      module. The next-task function is specified as theeL                      line parsing function. When the DCL module has finished  2             9-12  Brief Introduction to HTTPd Code                 M                      executing the required command control is passed back too#                      this function.   #                   3. Sending a Filet  D                      If a file is #included this is provided with anJ                      asynchronous call to the File() module. The next-taskM                      function is specified as the line parsing function. WhenoL                      the File() module has finished transfering the includedB                      file control is passed back to this function.  &                   4. Directory Listing  M                      If a directory listing is requested this is provided viaaL                      an asynchronous call to the Dir() module. The next-taskM                      function is specified as the line parsing function. When I                      the Dir() module has finished generating the listingf=                      control is passed back to this function.e  G                4. Directives continue to be parsed, and executed, asyn- J                   chronously if necessary (as just described), from withinH                   a line until the end-of-line is reached. Any remainingK                   characters are output buffered. Lines continue to be readnJ                   from the file using the AST mechanism until end-of-file.               9.8 ISMAP.Ct  &                <online hypertext link>  M                The clickable-image support module provides this functionality E                as an integrated part of the server. It supports imageaF                configuration file directives in either of NCSA or CERNM                formats. Extensive configuration specification error reportingt$                has been implemented.  6                                        Acknowlegement:  A                      Three coordinate mapping functions have beens@                      plagiarized from the NCSA IMAGEMAP.C scriptC                      program. These have been inserted unaltered inhB                      the module and an infrastructure built around?                      the essential processing they provide. Dueo?                      acknowlegement to the original authors andhH                      maintainers of that application. Any copyright over?                      portions of that code is also acknowleged:   $                        ** mapper 1.2K                        ** 7/26/93 Kevin Hughes, kevinh@pulua.hcc.hawaii.educc                        ** "macmartinized" polygon code copyright 1992 by Eric Haines, erich@eye.com   N                                         Brief Introduction to HTTPd Code  9-13                 #                Essential behaviour:e  I                1. The primary function attempts to open the configurationeH                   file. If unsuccessful it generates an error report and'                   concludes processing.   F                2. After successfully opening the configuration file itH                   extracts the client-supplied coordinate from the queryM                   string. A call is then made to asynchronously read a recordiH                   (line) from the configuration file. Configuration file=                   processing is asynchronous from that point.   H                3. The record (line) read AST function checks for end-of-J                   file, when it will return the default URL (if supplied).L                   After end-of-file the file is closed and the processing is                   concluded.  G                   If not end-of-file, a function is called to parse theeL                   record for an image mapping directive. When the componentsK                   have been parsed the NCSA IMAGEMAP.C routines are used tocM                   determine if the click coordinates are within the specified %                   region coordinates.   F                   If it is within the region the click has been mappedG                   and the URL is placed in heap memory and the thread's E                   redirection location pointer set to it. The file isnL                   closed and the processing conclusion function called. ThisJ                   function detects the redirection location and if a localI                   URL instead of disposing of the thread generates a new,nI                   internal request from the redirection information. In atM                   non-local URL the client is sent a redirection response and ,                   then the thread concluded.  I                   If not within the region a call is made to asynchronousiC                   read the next record from the configuration file.<               9.9 LOGGING.Ct  &                <online hypertext link>  F                The logging module provides an access log (server logs,K                including error messages are generated by the detached HTTPd I                process, see sections Server Process Logging Directory andu                 Section 3.1.2.4).  K                The access log format is that of the Web-standard, "common"-oK                format, allowing processing by most log-analysis tools. Eachi.                entry (record, line) comprises:  Z                   client_host r_ident auth_user [time] "request" reponse_status bytes_sent                  where:   B                o  client_host is from where the request originated  2             9-14  Brief Introduction to HTTPd Code                 E                o  r_ident is the user identified by the client host'suK                   authentication daemon (RFC931), this is not available and *                   is always a hyphen ("-")  K                o  auth_user the authenticated user-name associated with the ,                   request, or a hyphen ("-")  H                o  time the following format: dd/mmm/yyyy:hh:mm:ss +/-GMT6                   (e.g. "16/Dec/1995:21:15:34 +10:30")  J                o  request the method, a space, then the path and any query                   string  L                o  response_status the three digit response status code (e.g.                   200, 302)s  I                o  bytes_sent the number of bytes sent to the client, or ao                   hyphen ("-")  H                In addition to legitimate request entries the server addsL                bogus entries to time-stamp server startup, shutdown, and theK                log being explicitly opened or closed (see Section 3.1.2.4). J                These entries are correctly formatted so as to be processedK                by a log analysis tool, and are recognisable as being "POST"eM                method and coming from user "HTTPd". The request path contains M                the event and a hexadecimal VMS status code, that represents ay7                valid exit status only in "END" entries.L  L                Clickable-image requests are logged as "302" entries, and theB                resulting, redirected request entry logged as well.  I                When a log entry is required the file is opened if closed.eM                The file is again closed one minute after the initial request.aE                This flushes the contents of the write-behind buffers.                                 N                                         Brief Introduction to HTTPd Code  9-15 g                   M             Chapter__10______________________________________________________f               References    K                The following Hypertext documents can be found online within                 HFRD:  ,                o  A beginner's Guide to HTML  *                o  Beginner's Guide to URLs  ,                o  HFRD Hypertext Environment  ?                o  HFRD Hypertext Management Primer (on menuing)a  @                o  HTTP Hypertext Services - A Technical Overview  #                o  URI Specification                                                           N                                                               References  10-1