#!/usr/bin/perl

$version = "V2.00c -March 11, 2011";

#  script name:   dir-join2
#  used from  :   /users/jgofs/htmlbin
#  taken from dir to allow user to select an object for join
#   Christine Hammond 
#   September 8, 1997
#
#  March 11, 2011 rcg
#	Hand edeit for GLOBEC version. Began changes to include BCO-DMO,
#	but these are not complete nor tested.
#  August 17, 2006  rcg
#	Hand edit for CMarZ use from NEC version.
#  August 15, 2003  rcg
#	"Hand edit" the following changes:
#		replace program name from US GLOBEC Georges Bank to US GLOBEC
#		allow access to other two programs, not just Georges Bank 
#			directory
#		replace display_data_categories subroutine with the one from
#			dir so it will handle multiple directories in a table
#  January 18, 2000  rcg
#	"Hand edit" variables for use on default server as per CLH
#	instructions.  Then again latter in the day, per email.
#  mod: January 14, 2000 clh
#    same change as to 'dir' wherein all <pre> tags are better
#    switched off after any switch on, to enable LYNX to properly
#    align columns
#
#  mod: November 13, 1998 clh
#    assign variable refhost to be the Optionserver calling this prog
#    checks HTTP_REFERER on first pass, then, uses a default value
#    passes value of refhost on to next dir-join using QUERY_STRING
#  
############ Customized values for this site ###################
#$jghome="/users/jgofs";
$jghome="/data5/globec";
#$jghome="/data1/cmarz";
#$jghome = "whatever it is for BCO-DMO";
#$topdir="/jgofs/";
#$topdir="/globec/gb/";
$topdir="/globec/";
#$topdir="/CMarZ/";
#$topdir="/whatever it is for BCO-DMO/";

$default_option_server="optserv1.whoi.edu";
#
# DMO name, if appropriate
#
#$office="US JGOFS";
$office="US GLOBEC";
#$office="CMarZ";
#$office = "BCO-DMO";
#
# Location where the image(s) are located, if any
#
#$imagedir="http://usjgofs.whoi.edu/images";
$imagedir="http://globec.whoi.edu/images";
#$imagedir="http://nec.whoi.edu/images";
$test_port="8081";  #accesses from this port considered test environment

# Specify subdirectory strings that should begin new columns
# 	These correspond to the top directory name for each "program" or element
#	you want to show in a different column
$start_new_col_strings = "gb,nep,soglobec,nec,CMarZ,BCO-DMO";
@start_new_col_string = split /,/, $start_new_col_strings;	
for ($i=0; $i<=$#start_new_col_string; $i++) {
	$start_new_col_string[$i] =~ s/\s//g;
}	

# At which subdirectory level do you want to start "counting" object
#	subdirectories.  For /data5/globec/objects/globec, it is 4.
$start_count_subdirectory_level = "4";

############ End of Custom values for this site ################
#
$dictionary=".remoteobjects";
$logentry=".log";
$dirspacer = "  ";
$dirspacer = "&nbsp;&nbsp;";
$jgroot=$jghome."/objects";
$test_env=($ENV{"SERVER_PORT"} == $test_port);

# Define column width for directory table
$column_width = "&nbsp;" x 30;

#
# 
print "Content-type: text/html\n\n";
#
# strip the leading '//' from hostnames used by this script 
#
if ("$infoserver" =~ /\/\//) {
  $infoserver=substr($infoserver,2);
}
#
# get name of this server from env of httpd
#
if ($ENV{'SERVER_NAME'}) {
   $fromhost = $ENV{'SERVER_NAME'};
   if ($ENV{'SERVER_PORT'}) {
      $p=$ENV{'SERVER_PORT'};
      if ($p != 80) {
         $fromhost .= ":".$p;
      }
   }
} else {
   $fromhost = "usjgofs.whoi.edu";
}

if ($fromhost =~ /\/\//) {
  $fromhost=substr($fromhost,2);
}
#
# Get referring host - Optionserver
#
#  if there is a QS, it is the referring host, having been sent
#  by a previous dir-join click - see printf lines sending the 
#  directory categories display
#
if ($ENV{'QUERY_STRING'}) {
  $refhost = $ENV{'QUERY_STRING'};
#
# otherwise, check for an environment var which is present on
#   some but not all httpd environments.
#
} elsif ($ENV{'HTTP_REFERER'}) {
    $refhost = $ENV{'HTTP_REFERER'};
    if (index($refhost,"//") != -1) {
      $r = substr($refhost,7);
      $refhost = substr($r,0,index($r,'/'));
    }
} else {
#    $refhost="$default_option_server";  Change per CH email 2000/1/18
    $refhost=$default_option_host;
}

#
if ($ENV{'PATH_INFO'}) {
   $arg = $ENV{'PATH_INFO'};   
} else {
   $arg = $topdir;
}

$currentdir = $arg;

$curdir = $currentdir;
chop($curdir);

&display_data_entries;
&display_data_categories;

# -------------------------Subroutines----------------------------
# display_data_entries - display current directory for user,
#    display all customized headings, display all entries in 
#    either log dictionary or remoteobjects dictionary. Special
#    case entry displayed at end - for non-JGOFS entries.
#
sub display_data_entries {

print "<title>$currentdir data directory page</title>\n";

chdir("$jgroot$currentdir");
#
# Always check for a file of HTML code to customizes whole system
#
print "<center>";
print "<font size=\"7\"><b>$office Data System</b></font>\n";
print "</center>";

#
# Check that there is some data to display (.remoteobjects or .log)
#

if (-r $dictionary || -r $logentry) {

   print "\n<H2>Directory of Data in $currentdir</H2>\n";
   print "<pre>\n";
   if (-r $logentry) {
      &readdct($logentry);
       print "\n</pre>";
   }
 
   if (-r $dictionary) {
     print "<pre><strong><em>Data                Investigator           Description and Documentation</em></strong>\n";
     print "<hr>\n";
     &readdct($dictionary);
   }
   print "<hr>\n</pre>";

}
#
# Special case for non-objects or almost-objects
#	*** commented out for dir-join2
#	*** because we cannot necessarily
#	*** join with 'almost-objects'
#
#   if (-r ".almost") {
#       print "<pre><hr>\n";
#       open(INFILE,".almost");
#       while (<INFILE>) {
#        print;
#       }
#       print "\n</pre>";
#   }

}

# ------------------------------------------
#  readdct - gets a dictionary entry from 'get_entry' routine,
#            prints it, using 'print_entry' routine
#
sub readdct{
   local($whichone) = pop(@_);
   local($ndx) = 0;

   open(INFILE,"$whichone") || die "Can't open input file: $!\n";

   while (&get_entry()) {
      &analyze_entry();
      &print_entry();
   }
   close(INFILE);
}

# -------------------------------------------
# get_entry - read the dictionary entry,
#             return false if no more entries
#
sub get_entry{
   local($i);
   local($true) = "1";
   local($false) = "0";

   @dictentry = "";

   if (eof(INFILE)) {
      $i = $false;
   } else {
      foreach $i (1,2,3) {
        $dictentry[$i] = <INFILE>;
        chop($dictentry[$i]);
      }
      $i = $true;
   }
}


# -------------------------------------------------------
# analyze_entry - populate the fields that are displayed, 
#                 based on these rules governing entries:
##############################################################################
#  Rules
#   0. entries consist of 3 lines, as:
#
#      line 1    displayed_object_name=information
#      line 2    - PI_name
#      line 3    - [optional_info] description
#                ^Note: the carot indicates column 1 start
#
#   1. 3rd line contains a brief description of the data, which will
#      become the link to the documentation file.  Optionally, a URL
#      can be included, BEFORE the brief description and separated from it
#      by a space, to indicate a change from the system default documentation
#      scheme (see below).  Syntax for this optional_info is a URL (can omit 
#      hostname, if local host) - only exception applies if documentation file 
#      name is x.info, where x is displayed_object_name:  can end URL in '/'
#      after the path and omit the displayed_object_name.
#           For example, this .remoteobjects entry:
#      phys_params=//another_machine/pacific/cruise77/phys_params
#      - J.Investigator
#      - http://another_machine/jg/info/pacific/cruise77/
#           directs dir to another_machine's /jg/info script which will
#      read a file named 'phys_params.info' in the systems's objects/ tree
#      subdirectory names pacific/cruise77. 
#
#   2. 2nd line contains the Investigator's name responsible for data
#
#   3. 1st line identifies an object name and some information required to 
#      reach it, as:
#
#      displayed_object_name=information_for_hypertext_link
#
#      2 possible forms for syntax of right hand side are
#
#     i.     http:[host]/jg/serv/path/to/object.html0[Xsub-selectionsY]
#    ii.     //host/path/to/objectname[Xsub-selectionsY]
#
#        where [ and ] indicate an optional portion of the syntax,
#          X is a "(", then a comma-separated list of sub-selection parameters 
#          Y is a ")", used to complete the list
##############################################################################
#
sub analyze_entry{
#
#  check that anything is written on 3rd line
#

 $descript = "";
 $addobjtoinfo = "n";
 if (length($dictentry[3]) > 2) {
#
#  when supplied, the optional info-serving information is a complete URL
#
    if ($dictentry[3] =~ /http:\//) {
#
#  a space separates the info serving host and path from the description
#
       $ndx = index($dictentry[3]," ",2);
       $descript = substr($dictentry[3],$ndx+1);

    } else {

       $descript = substr($dictentry[3],2);
    }
 } else {
#
# no information provided on line
#
     $descript = "";
 }
#
# the PI name is on the 2nd line of a dictionary entry
#
 $PI="";
 if (length($dictentry[2]) > 2) {
    $PI=substr($dictentry[2],2);
 } 

#
# 1st line has object=serving-host entry
#

 $URL4obj="";

 $dispobj=substr($dictentry[1],0,index($dictentry[1],"="));
 $rightside=substr($dictentry[1],(index($dictentry[1],"=")+1));

#
# the serving host is provided, strip off anything before it
#
  $ndx = index($rightside,"//");
  if ($ndx != -1) { 
    $rightside = substr($rightside,$ndx+2);
    $haveserver = "y";
  } else {  
    $haveserver = "";
  }

#
# enhancement to catch entries with subselections and htmlify them
#
 $subselections="";
 if ($rightside =~ /\/.+\(.+\)/ ) {
   $ndx = index($rightside,"(");
   $subselections=substr($rightside,$ndx+1,length($rightside)-$ndx-2);
   $rightside=substr($rightside,0,$ndx);
 }

#
# syntax of rightside can include full url: http:[hostname]/jg/serv/test.html0
#
 if ($rightside =~ /jg\/serv/) {
    if ($haveserver) {
      $URL4obj="$rightside";
    } else {
#
# might be form, http:/jg/serv/path/obj
#
      $ndx = index($rightside,"http:");
      if ($ndx != -1) {
         $rightside = substr($rightside,$ndx+5);
         $URL4obj="$fromhost"."$rightside";
      }
    }
#    $datastring="$URL4obj";
#    $URL4obj.="{dir="."$dirstring";
 } else {
#
# it is a pseudo-URL, with no serving process listed - insert /jg/serv
#

    $servhost=substr($rightside,0,index($rightside,"/",2));
    $fullobj=substr($rightside,index($rightside,"/",2));

    if ($servhost =~ /\/\//) {
       $servhost = substr($servhost,2);
    }
    $URL4obj="$servhost"."$fullobj".".html0";
 }

 if ($subselections) {
    $URL4obj.="?"."$subselections";
 }

}

# --------------------------------
# print_entry -  format the output
#
sub print_entry{
#
# print the object name as a hypertext link
#
   $URL4obj =~ s/\/jg\/serv//;
   printf("<a href=\"http://%s/jg/getobj2//%s\">%-18s</a>",
             $refhost, $URL4obj, $dispobj);
#
# print the PI name
#
   if ($PI) {
      printf("<b>%-20s</b>", $PI);
   } else {
      printf("%-20s", "  ");
   }
#
# finally, print description of the object
#
   print "$descript\n";

}


# --------------------------------
# display_data_categories - list all JGOFS categories of data
#   read subdirectories from topdir and down, displaying the
#   final element (tail) of the directory list.

# Replace this subroutine with the one from dir so it displays multiple
# directories in a table rather than a long line.  August 15, 2003

sub display_data_categories {
#
my ($c, @col, @col_pointer, $entry, 
	$j, $k, $line_out, $max, $max_col, $max_line_out, $more);
	
$max_col = $#start_new_col_string;
for ($c=0; $c <= $max_col; $c++) {
	$col_pointer[$c] = -1;
}

#
# Always print the directory categories of data in the jgroot 
#
print "<H2>$office Data Categories</H2>\n";
print "<b>Go to the indicated category of data by clicking on its name</b><br>\n";
#print "<p><pre>\n";
#print "<p>\n";
#
# Find all subdirectories in the given tree
#   check first if its the test environment
#
$find_command="find $jgroot$topdir -type d";
if ($test_env) {
   $find_command .= " -follow";
}
$find_command .= " -print";
open(FIND,"$find_command | sort |");

$|=1;

#Do special multicolumn printing 

while ($dir = <FIND>) {
  chop $dir;
  $entry = "";
#
# find all subdirectories containing 'objects' and the top of the tree
#
  if ($dir =~ /\/objects\// && $dir =~ /$topdir/) {
#
# the url pathitem for the link starts at $topdir
#
    $urldir = substr($dir,(rindex($dir,$topdir)));
#
# strip final '/' from $topdir (only one that has it), printf will insert it.
#
    if ($urldir eq $topdir) {
       $urldir = substr($topdir,0,(rindex($topdir,"/")));
    }
    @subdirs = split(/\//,$dir);
#    print "***debug, subdirs=@subdirs\n";
#    foreach $diritem (@subdirs) {
#     print $dirspacer;
    for ($j=0; $j <= $#subdirs-$start_count_subdirectory_level; $j++) {
     $entry = $entry . $dirspacer;
    }
    @revlist = reverse(@subdirs);
#
# get tail of directory list - last item in array of directories
#
    $diritem = shift(@revlist);
#
# mark the current directory being displayed by an arrow
#
    if ($curdir eq $urldir) {
      if (defined($imagedir)) {
#          printf "     <a href=\"http://$fromhost/jg/dir%s/\">%s</a>    <img \
#          src=\"%s/youhere.gif\">\n",$urldir,$diritem,$imagedir;
           $entry = $entry . 
          	sprintf "<a href=\"http://$fromhost/jg/dir%s/\">%s</a>    <img \
          src=\"%s/youhere.gif\">",$urldir,$diritem,$imagedir;
      } else {
#          printf "     <a href=\"http://$fromhost/jg/dir%s/\">%s</a>    <-----\n",
#                   $urldir,$diritem;
           $entry = $entry . 
          	sprintf "<a href=\"http://$fromhost/jg/dir%s/\">%s</a>    <-----",
                   $urldir,$diritem; 
      }
    } else {
#       printf "     <a href=\"http://$fromhost/jg/dir%s/\">%s</a>\n",$urldir,$diritem;
        $entry = $entry . 
       	     sprintf "<a href=\"http://$fromhost/jg/dir%s/\">%s</a>",$urldir,$diritem;
    }


    
    for ($c = 0; $c <= $max_col; $c++) {
#    	print "***debug, start_new_col_string[$c]=$start_new_col_string[$c]\n";
    	$k = index $entry, $start_new_col_string[$c];
#    	print "***debug, index=$k\n";
    	if ( $k > 0 ) {
    		$col_pointer[$c] = $col_pointer[$c] + 1;
    		$j = $col_pointer[$c];
    		$col[$c]->[$j] = $entry;
#    		print "***debug, col entry=$col[$c]->[$j]\n";
   		last;
    	}
    	else {
    		next;
    	}
    }
  }
}
close(FIND);

#print "***debug, col_pointer[0]=$col_pointer[0], col_pointer[1]=$col_pointer[1]\n";
#print "***debug, col[0][26]=$col[0]->[26]\n";

$max_line_out = -1;
for ($c = 0; $c <= $max_col; $c++) {
	if ( $max_line_out < $col_pointer[$c] ) { $max_line_out = $col_pointer[$c] }
}

#LINEOUT:
print "<table border=0>\n<tr>\n";
for ($c = 0; $c <= $max_col; $c++) {
	print "<td>$column_width</td>\n";
}
print "</tr>\n";
	
for ($line_out = 0; $line_out<=$max_line_out; $line_out++) {
  $entry = "";
  print "<tr>\n";
#  print "***debug, line_out=$line_out\n";
  for ($c = 0; $c <= $max_col; $c++) {
#  	print "***debug, c=$c\n";
#  	print "***debug, col_pointer[$c]=$col_pointer[$c]\n";
  	$max = $line_out;
  	if ( $col_pointer[$c] < $max ) {$max = $col_pointer[$c] } 	
	for ($j=$line_out; $j<=$max; $j++) {
#		print "***debug, col $c of $j=$col[$c]->[$j]\n";
  		$entry = $entry . "<td>" . $col[$c]->[$j] . "</td>";
  	}
  }
  $entry =~ s/^\s//g;
  print "$entry\n";
  if ($c == $max_col ) { print "</tr>"; }
#  if ( $entry == "" ) { last LINEOUT}
}
print "</table>\n";    	
#print "</pre>\n";
print "<p><hr><i>dir version $version</i>\n";
}

