#!/usr/bin/perl -w
#
#  script name:   dir
#  used from  :   /data1/cmarz/htmlbin
#

my $version = "November 14, 2012, V1.26";

# Modified November 14, 2012. V1.26. Check for empty entry 
#	in get_entry routine before chop is done and sends
#	an error.  Add version output. rcg
# Modified October 15, 2012. Continue to add the style
#	options from data.bcodmo.org dir version
#	including the new routines write_page_header
#	and display_related_data. Replace reference to
#	cmarz to dmoserv3.bco-dmo.org. Replace references
#	from dmoserv3.whoi.edu to dmoserv3.bco-dmo.org. rcg
# Modified August 2, 2012. Add style sheet call to 
#	display_data_entries routine. Needs to be optional. rcg
# Modified August 2, 2012.  Increased display widths.  mda
# Modified May 15, 2012. Change infor server to be dmoserv2. rcg/smr
# Modified April 18, 2012 for use on dmoserv3. Increase display widths
#	for the Investigator name(s) by 3. rcg
# Local Modification November 2, 2006. V1.20a-2 Remove original
#	page header showing location and "Data System".  rcg
# Local Modification December 29, 2004. V1.20a-1 Add two spaces before 
#	Investigator header and three spaces before the Description header.  
#	rcg
# Modified May 7, 2002.  V1.20a.  Add dir server name to print statements
#	since new version of Netscape (V6) defaults to its own web site.  RCG
# mod January 14, 2000, CLH
#    fixed final </pre> tag  - had been incorrectly typed as <\pre>
#
# modified November 12, 1999, CLH
#    corrected use of infoserver, commented correct form - this value
#    must have the script attached to the host, ex: host.domain.edu/jg/info
#
# modified September 8, 1999, CLH
#    corrected HTML tag <pre> usage so that LYNX would properly 
#    align columns regardless of placement before or after H2, H4, etc.
#    also, center the identification of DMO data system text
#
# modified May 5, 1998, clh - restructure main section, placing bulk of 
#     code in subroutines display_data_entries and display_data_categories
#     add .almost - provides ability to display non-objects in directories
#
# Modified January 21, 1998.  Add test for test environment so as to
#   follow and display symbolic links in the object directory tree
#   Idea is to only display some directories that are in some kind of
#   test environment, defined via port number.  now set to port 8081 rcg/wjs
#
# modified to accept a .remoteobjects entry such as
#           stuff=//globec.whoi.edu/test(station=7,press<100)
#     and turn it into
#        http://globec.whoi.edu/jg/serv/test.html0?station=7,press<100
#   CLH, per GRF's suggestion
#   December 13, 1996
#
# added documentation for buttons
#   CLH
#   May 14,1996
#
# converted from dir (/bin/csh script) originally written by Glenn Flierl
#   Christine Hammond 
#   April 12, 1996
#
#  v1.5 distribution - new features
#   1) EXTENDED URL
#     creates a long extension to the object-serving URL, 
#     for example:  {dir=hostname/path/of/directory,
#                    info=hostname/jg/info/path/to/documentation}
#     used by methods to assign URL to DIRECTORY and DOCUMENTATION buttons.
#     similar extension to the info-serving URL, for linking to DIRECTORY
#     and DATA DISPLAY buttons.
#
#   2) A CUSTOM HEADER USED THROUGHOUT YOUR DIRECTORIES
#     looks for a file named 
#         '/data1/cmarz/htmlbin/htdocs/dir_header.html'
#     to use in displaying buttons or additional links to be presented
#     on all directory pages.  This file is HTML code and might contain 
#     reference to a logo image or custom navigation buttons which you
#     want displayed throughout your system, after the line which reads:
#
#        cmarz.whoi.edu --  Data System
#
#   3) CUSTOM HEADERS FOR EACH SUB-DIRECTORY
#    looks for a file named 
#        'dir_header.html' 
#    in each of this machine's /data1/cmarz/objects/CMarZ sub-directories,
#    to customize the top of each page displayed.  For example:
#        /data1/cmarz/objects/CMarZ/dir_header.html
#    would customize the look of the top-level page of your data system.
#    Examples of how this might be used are: welcome, policy notices, 
#    roadmap to the directory scheme, additional documentation about
#    your implementation of the JGOFS system.
#
#------------------------------------------------------------------------------
# Note:  An optional value is requested by the INSTALL script, which is the 
#        location where images or buttons can be found on this machine.  This
#        directory is one that the WWW server can locate easily, often a 
#        sub-directory of the DocumentRoot named '/images'.  If the installer
#        provides a value for this location, it will be used to find the GIF
#        image:
#
#        'youhere.gif'
#
#        which can be downloaded from anonymous ftp
#        on dataone.whoi.edu (128.128.80.28) in pub/images
#
############ Customized values for this site ###################
#
# jghome is top of data serving tree
#
$jghome="/data302/data_server/dmoserv3";
#
# topdir is uppermost level below jghome/objects (may be /)
#   where objects are defined
#
$topdir="/BCO-DMO/";
#
# infoserver is host and program (usually host.domain/jg/info)
#
$infoserver="dmoserv3.bco-dmo.org/jg/info";
#$infoserver="data.bcodmo.org/jg/info";
#
# DMO name, if appropriate
#
$office="Biological and Chemical Oceanography (Server 3)";
#
# Location where the image(s) are located, if any
#
$imagedir="http://dmoserv3.bco-dmo.org/images";

# Location where the stylesheet(s) are located, if any
#
$cssdir="http://data.bco-dmo.org/css";
#
#
############ End of Custom values for this site ################
#
$dictionary=".remoteobjects";
$logentry=".log";
$customhtmldir="$jghome"."/htmlbin/htdocs/";
$customhtml="dir_header.html";
$dirspacer = "     ";
$jgroot=$jghome."/objects";
$test_port="8081";  #accesses from this port considered test environment
$test_env=($ENV{"SERVER_PORT"} == $test_port);
#
#
# 
print "Content-type: text/html\n\n";
#
# strip the leading '//' from hostnames used by this script 
#
if ("$infoserver" =~ /\/\//) {
  $infoserver=substr($infoserver,2);
}

#
# get name of this server from env of httpd
#
if ($ENV{'SERVER_NAME'}) {
   $fromhost = $ENV{'SERVER_NAME'};
   if ($ENV{'SERVER_PORT'}) {
      $p=$ENV{'SERVER_PORT'};
      if ($p != 80) {
         $fromhost .= ":".$p;
      }
   }
} else {
   $fromhost = "dmoserv3.bco-dmo.org";
}

if ($fromhost =~ /\/\//) {
  $fromhost=substr($fromhost,2);
}
#
# Locate directory to display
#      NOTE: dir allows for either form: /jg/dir?/path/
#                                        /jg/dir/path/   <--- newer
#
if ($ENV{'PATH_INFO'}) {
   $arg = $ENV{'PATH_INFO'};   
} elsif ($ENV{'QUERY_STRING'}) {
   $arg = $ENV{'QUERY_STRING'};
} else {
   $arg = $topdir;
}

$currentdir = $arg;

$curdir = $currentdir;
chop($curdir);

&write_page_header;
&display_data_entries;
&display_data_categories;
&display_related_data;

# spacer at page bottom
#
print STDOUT ("\n", '<p class="sepLn">&nbsp;</p>', "\n");

# end page cleanly
#
print STDOUT ("\n", '</body>', "\n", '</html>', "\n");


# -------------------------Subroutines----------------------------
# write_page_header - display current directory for user,
#    display all customized headings
#
sub write_page_header {

print STDOUT ('<html>', "\n", '<head>', "\n");
print STDOUT ('<link rel="stylesheet" ',
	'type="text/css" href="', $cssdir,
	'/dataSystem.css">', "\n");
print STDOUT ('</head>', "\n", '<body>', "\n");

print STDOUT ("<title>", $currentdir,
	' data directory page</title>', "\n");

chdir ("$jgroot$currentdir");

#
# Always check for a file of HTML code to customize whole system
#
print "<center>";
print "<H1 class=\"titleCatSep\"> $office Data System</H1>\n";
print "</center>";

if (-e $customhtmldir && 
       -f "$customhtmldir$customhtml") {
    open(INFILE,"$customhtmldir$customhtml");
    while (<INFILE>) {
       print;
    }
    close(INFILE);
}
#
# At each directory level, check for a file to display to customize it
#
if (-r $customhtml) {
   open(INFILE,"$customhtml") || die "Can't open input file: $!\n";
   while (<INFILE>) {
     print;
   }
   close(INFILE);
}
}

# display_data_entries - display current directory for user,
#    display all customized headings, display all entries in 
#    either log dictionary or remoteobjects dictionary. Special
#    case entry displayed at end - for non-JGOFS entries.

sub display_data_entries {

# Check that there is some data to display (.remoteobjects or .log)
#
if (-r $dictionary || -r $logentry || -r ".almost") {

   print "\n<H2 class=\"titleCatSep\">Directory of Data in $currentdir</H2>\n";
   if (-r $logentry) {
      print "<pre class=\"text\">\n";
      &readdct($logentry);
      print "\n</pre>";
   }
# Widened field of data to accomodate longer dataset names. - mda -- 28 March 2011 
   if (-r $dictionary) {
     print "<pre><strong><em class=\"sepLn\">Data                          Investigator               Description and Documentation</em></strong>";
     print "<br>\n";
     &readdct($dictionary);
     print "</pre>\n<p class=\"sepLn\">&nbsp;</p>\n";
   }
#
# Special case for non-objects or almost-objects
#
   if (-r ".almost") {
     print "\n<pre class=\"text\">\n";
     open(INFILE,".almost");
     while (<INFILE>) {
      print;
     }
     close(INFILE);
     print "\n</pre>";
   }
   print "\n<p class=\"sepLn\">&nbsp;</p>\n";

} else {

  print "<p class=\"hilite\">No data found at this level. Select another category to view data.</p>\n";
}	# end of if block and subr (not sure how)


# -------------------------------------------
# display_related_data - display related data,
#    Special case entry displayed at end - for related entries.
#
sub display_related_data {

foreach $dirLevel ( "$currentdir", "$topdir" ) {
next if (("$currentdir" eq "$topdir") && ($second));
chdir("$jgroot$dirLevel");

# Check that there is related data to display (.related)
#

if ( -r ".related" ) {
#
# Special case for related data sources
#
   print "\n<H2 class=\"titleCatSep\">Directory of Data Related to $dirLevel</H2>\n";
   open(INFILE,".related");
     while (<INFILE>) {
       print;
     }
   close(INFILE); 
   print "\n\n";

}	# end if related
$second = "TRUE";
} 	# end foreach dirLevel to check for related
}	# end subr



# -------------------------------------------
#  readdct - gets a dictionary entry from 'get_entry' routine,
#            prints it, using 'print_entry' routine
#
sub readdct{
   local($whichone) = pop(@_);
   local($ndx) = 0;

   open(INFILE,"$whichone") || die "Can't open input file: $!\n";
#
# extended URL includes reference back to this directory, same for all entries
#
   $dirstring="$fromhost"."/jg/dir"."$currentdir";
   while (&get_entry()) {
      &analyze_entry();
      &print_entry();
   }
   close(INFILE);
}

# -------------------------------------------
# get_entry - read the dictionary entry,
#             return false if no more entries
#
sub get_entry{
   local($i);
   local($true) = "1";
   local($false) = "0";

   @dictentry = "";

   if (eof(INFILE)) {
      $i = $false;
   } else {
      foreach $i (1,2,3) {
        $dictentry[$i] = <INFILE>;
        chop($dictentry[$i]) if defined $dictentry[$i];
      }
      $i = $true;
   }
}

# -------------------------------------------------------
# analyze_entry - populate the fields that are displayed, 
#                 based on these rules governing entries:
##############################################################################
#  Rules
#   0. entries consist of 3 lines, as:
#
#      line 1    displayed_object_name=information
#      line 2    - PI_name
#      line 3    - [optional_info] description
#                ^Note: the carot indicates column 1 start
#
#   1. 3rd line contains a brief description of the data, which will
#      become the link to the documentation file.  Optionally, a URL
#      can be included, BEFORE the brief description and separated from it
#      by a space, to indicate a change from the system default documentation
#      scheme (see below).  Syntax for this optional_info is a URL (can omit 
#      hostname, if local host) - only exception applies if documentation file 
#      name is x.info, where x is displayed_object_name:  can end URL in '/'
#      after the path and omit the displayed_object_name.
#           For example, this .remoteobjects entry:
#      phys_params=//another_machine/pacific/cruise77/phys_params
#      - J.Investigator
#      - http://another_machine/jg/info/pacific/cruise77/
#           directs dir to another_machine's /jg/info script which will
#      read a file named 'phys_params.info' in the systems's objects/ tree
#      subdirectory names pacific/cruise77. 
#
#   2. 2nd line contains the Investigator's name responsible for data
#
#   3. 1st line identifies an object name and some information required to 
#      reach it, as:
#
#      displayed_object_name=information_for_hypertext_link
#
#      2 possible forms for syntax of right hand side are
#
#     i.     http:[host]/jg/serv/path/to/object.html0[Xsub-selectionsY]
#    ii.     //host/path/to/objectname[Xsub-selectionsY]
#
#        where [ and ] indicate an optional portion of the syntax,
#          X is a "(", then a comma-separated list of sub-selection parameters 
#          Y is a ")", used to complete the list
##############################################################################
#
sub analyze_entry{
#
#  check that anything is written on 3rd line
#

 $descript = "";
 $addobjtoinfo = "n";
 if (length($dictentry[3]) > 2) {
#
#  when supplied, the optional info-serving information is a complete URL
#
    if ($dictentry[3] =~ /http:\//) {
#
#  a space separates the info serving host and path from the description
#
       $ndx = index($dictentry[3]," ",2);
       $infostring = substr($dictentry[3],2,$ndx-2);
       $descript = substr($dictentry[3],$ndx+1);
       $ndx = index($infostring,"//");
       if ($ndx != -1) {
          $infostring = substr($infostring,$ndx+2);
       } else {
         $ndx = index($infostring,"http:/");
         $infostring = substr($infostring,$ndx+6);
       }
#
#  option to omit the objectname from end of URL, signalled by ending with '/'
#
       if (substr($infostring,-1,1) eq '/') {
          $addobjtoinfo = "y";
       } else {
          $addobjtoinfo = "n";
       }
    } else {
#
#  optional info-serving information is not supplied, must create it
#
       $descript = substr($dictentry[3],2);
       if ($infoserver =~ /\/jg\//) {
         $infostring="$infoserver"."$currentdir";
       } else {
         $infostring="$infoserver"."$currentdir";
       }
       $addobjtoinfo = "y";
    }
 } else {
#
# no information provided on line
#
     $descript = "";
     $infostring="$infoserver"."$currentdir";
     $addobjtoinfo = "y";
 }
#
# the PI name is on the 2nd line of a dictionary entry
#
 $PI="";
 if (length($dictentry[2]) > 2) {
    $PI=substr($dictentry[2],2);
 } 

#
# 1st line has object=serving-host entry
#

 $URL4obj="";

 $dispobj=substr($dictentry[1],0,index($dictentry[1],"="));
 $rightside=substr($dictentry[1],(index($dictentry[1],"=")+1));

#
# the serving host is provided, strip off anything before it
#
  $ndx = index($rightside,"//");
  if ($ndx != -1) { 
    $rightside = substr($rightside,$ndx+2);
    $haveserver = "y";
  } else {  
    $haveserver = "";
  }

#
# enhancement to catch entries with subselections and htmlify them
#
 $subselections="";
 if ($rightside =~ /\/.+\(.+\)/ ) {
   $ndx = index($rightside,"(");
   $subselections=substr($rightside,$ndx+1,length($rightside)-$ndx-2);
   $rightside=substr($rightside,0,$ndx);
 }

#
# syntax of rightside can include full url: http:[hostname]/jg/serv/test.html0
#
 if ($rightside =~ /jg\/serv/) {
    if ($haveserver) {
      $URL4obj="$rightside";
    } else {
#
# might be form, http:/jg/serv/path/obj
#
      $ndx = index($rightside,"http:");
      if ($ndx != -1) {
         $rightside = substr($rightside,$ndx+5);
         $URL4obj="$fromhost"."$rightside";
      }
    }
    $datastring="$URL4obj";
    $URL4obj.="{dir="."$dirstring";
 } else {
#
# it is a pseudo-URL, with no serving process listed - insert /jg/serv
#

    $servhost=substr($rightside,0,index($rightside,"/",2));
    $fullobj=substr($rightside,index($rightside,"/",2));

    if ($servhost =~ /\/\//) {
       $servhost = substr($servhost,2);
    }
    $URL4obj="$servhost"."/jg/serv"."$fullobj".".html0";
    $datastring=$URL4obj;
    $URL4obj.="{dir="."$dirstring";
 }

 if ($addobjtoinfo eq "y") {
    $infostring .= $dispobj;
 }

 $URL4obj.=",info="."$infostring"."}";
#
# Dont be tempted to place a defined() around following test
#
 if ($subselections) {
    $URL4obj.="?"."$subselections";
    $datastring.="?"."$subselections";
 }

#
#  create link to info, now that the other URLs are completed
# 
 if ($datastring =~ /http/) {
   $URL4doc="$infostring"."{dir="."$dirstring".",data="."$datastring"."}";
 } else {
   $URL4doc="$infostring"."{dir="."$dirstring".",data="."$datastring"."}";
 }
}

# --------------------------------
# print_entry -  format the output
#
sub print_entry{
#
# print the object name as a hypertext link
#
# widened the field from 18 to 30 to give more space to dataset names - mda - 28 March 2011
#
   printf("<a href=\"http://%s\">%-30s</a>",$URL4obj, $dispobj);
#
# print the PI name
#
   if (defined($PI)) {
      printf("<strong>%-26s</strong>", $PI);
   } else {
      printf("%-20s", "  ");
   }
#
# finally, print description linked to the /jg/info script - or as supplied
#
   printf(" <a href=\"http://%s\">%s</a>\n", $URL4doc, $descript);

}
# --------------------------------
# display_data_categories - list all JGOFS categories of data
#   read subdirectories from topdir and down, displaying the
#   final element (tail) of the directory list.
#
sub display_data_categories {
#
# Always print the directory categories of data in the jgroot 
#
print "<H2 class=\"titleCatSep\">$office Data Categories</H2>";
print "<H4>Go to the indicated category of data by clicking on its name</H4>\n";
print "<pre class=\"text\">\n";
#
# Find all subdirectories in the given tree
#   check first if its the test environment
#
$find_command="find $jgroot$topdir -type d";
if ($test_env) {
   $find_command .= " -follow";
}
$find_command .= " -print";
open(FIND,"$find_command | sort |");

$|=1;
while ($dir = <FIND>) {
  chop $dir;
#
#  temporary addition to allow only test port access to merged_objects
#  but will list merged_products directory
#
  if (!$test_env) {
     if ($dir =~ "merged_obj") {
        next; 
     }
  }

#
# find all subdirectories containing 'objects' and the top of the tree
#
  if ($dir =~ /\/objects\// && $dir =~ /$topdir/) {
#
# the url pathitem for the link starts at $topdir
#
    $urldir = substr($dir,(rindex($dir,$topdir)));
#
# strip final '/' from $topdir (only one that has it), printf will insert it.
#
    if ($urldir eq $topdir) {
       $urldir = substr($topdir,0,(rindex($topdir,"/")));
    }
    @subdirs = split(/\//,$dir);
    foreach $diritem (@subdirs) {
     print "${dirspacer} ";
    }
    @revlist = reverse(@subdirs);
#
# get tail of directory list - last item in array of directories
#
    $diritem = shift(@revlist);
#
# mark the current directory being displayed by an arrow
#
    if ($curdir eq $urldir) {
      if (defined($imagedir)) {
          printf "<a href=\"http://%s/jg/dir%s/\">%s</a>    <img \
          src=\"%s/youhere.gif\">\n",$fromhost,$urldir,$diritem,$imagedir;
      } else {
          printf "<a href=\"http://%s/jg/dir%s/\">%s</a>    <-----\n",
                   $fromhost,$urldir,$diritem;
      }
    } else {
       printf "<a href=\"http://%s/jg/dir%s/\">%s</a>\n",$fromhost,$urldir,$diritem;
    }
  }
}
close(FIND);
print "</pre>\n<p>&nbsp;</p>\n";

print STDOUT ("\n",'<p><font size="-2">DIR version: ',$version,
	"</font></p>\n");
                                                                                                                                   
}
}	# end subr
