#!/usr/bin/perl -w
#
#  script name:  info
#  used from  :  jgofsdir/htmlbin
#  converted from info (/bin/csh script) originally written by Glenn Flierl
#  modified to read from .info files by Christine Hammond
#
# NOTE: /images/dir.gif and /images/datadisp.gif are used by this script
#  if you do not have these buttons, you can get them on anonymous ftp at
#  dataone.whoi.edu (128.128.80.28) in /pub/images [date of this note
#  unknown  WJS Jul 10]

#	Versioning (invented by WJS Jan 10)
#    v 1 - G Flierl csh script
#    v 2 - C Hammond mod to perl
#    v 3 - (several) addition of BCO-DMO database as source for info

{
  $version = "info v 4.1  4 Aug 2010";
#  4 Aug 10  v 4.1  WJS
#	Honor env vars over values from build-env.pl
#	Add REMOTE_TRADITIONAL INFO_SOURCE
#	  [begin v 4.1]

#  2 Jul 10  v 4.0b  WJS
#	Bug fix: [hunh - I think this is perl being naughty]  Explicitly
#    "clear" @ARGV before require of build-env.pl.  Under unknown
#    circumstances, ARGV is defined to be ENV{'QUERY_STRING'} 
#	  [begin v 4.0b]
# 28 Jun 10  v 4.0a  WJS
#	Bug fix: info= portion of URL for data button should not
#    include extended URL info
#	Remove leading and trailing blanks from keywords in lists.
#    Let's see how many lists I miss.
#	  [begin v 4.0a]
#  3 Mar 10  v 4.0  WJS
#	Rewrite using OOserver parsing utilities
#	Generalize idea of looking in various places to get sources for info
#	Stiffen error checking
#	Pre v 4.0 "up top" comments moved to info_revision.doc
#	  [needs build-env.pl]
#	  [needs perl_utils.pl]
#	  [needs parse_path_info]
#	  [begin v 4.0]

#   General control switches and their defaults
  $action_on_source_error_env_var = 'INFO_ACTION_ON_DATA_SOURCE_ERROR';
  $action_on_logging_failure_env_var = 'INFO_ACTION_ON_LOGGING_FAILURE';
  $log_misses_env_var = 'INFO_LOG_MISSES'; 
  $log_all_sources_env_var = 'INFO_LOG_ALL_SOURCES';
  $default_action_on_source_error = 'CONTINUE';
  $default_action_on_logging_failure = 'CONTINUE'; 
  $default_log_misses_sink_list = "";
  $default_log_all_sink_list = "";
#   Also, old USEDATABASE and INFO_USEDATABASE env vars are hardcoded
#   into sub get_sources_from_old_booleans

#   BCO-DMO control switches and their defaults
  $BCODMO_host_port_env_var = 'INFO_BCO-DMO_HOST_PORT';
  $default_BCODMO_host_port = 'access1.bco-dmo.org';
  $BCODMO_access_techniques_env_var = 'INFO_BCO-DMO_ACCESS_TECHNIQUES';
  $default_BCODMO_access_techniques = 
			'DATASET_URL,MANUFACTURED_DATASET_URL,DDIR_OBJ';
  $BCODMO_attempt_all_techniques_env_var='INFO_BCO-DMO_ATTEMPT_ALL_TECHNIQUES';
  $default_BCODMO_attempt_all_techniques = 0;

#   REMOTE_TRADITIONAL control switches and their defaults
  $remote_traditional_servers_env_var = 'INFO_REMOTE_TRADITIONAL_SERVERS';
  $remote_traditional_servers_default = 'INFOSERVER';
  $remote_traditional_GLOBEC_server = "http://globec.whoi.edu/jg/info_v2";
  $attempt_all_remote_traditional_servers_env_var = 
				'INFO_ATTEMPT_ALL_REMOTE_TRADITIONAL_SERVERS';
  $default_attempt_all_remote_traditional_servers = 0;

  $file_spec_this_program = $0;
  ($file_name_this_program) = ($file_spec_this_program =~ q|.*/(.*)|);
  $file_name_this_program ||
			($file_name_this_program = $file_spec_this_program);

  require ("cgi-lib.pl");

#   We must get these headers in place in case we die trying to
#   things up (else die writes to stdout w/o headers, causing 500 error)
#   Code below is from printheader in perl_utils.pl, which we'd love to
#   use, except we don't have it yet!  Note we don't check to see if
#   the require for cgi-lib, which defines PrintHeader, succeeded.  Even
#   I give up sometimes.
  print &PrintHeader();
  $version && (print "<!--This page prepared by $version//-->\n");
  $print_header_done = 1;

#   Set up this JGOFS' server's env vars.  Save what we have first, however,
#   to allow JGOFS' server's info control params to be overridden by
#   "special-case" env vars.  Implemented in sub env_var_or_default
  %pre_build_env_environment = %ENV;
#   NB: name of next variable is NOT arbitrary - see build-env.doc
  $build_env_file_name = "../build-env.pl";
  ((-e $build_env_file_name) && (-r $build_env_file_name)) ||
			&CgiDie("Cannot find readable $build_env_file_name");
#   build-env.pl sensitive to @ARGV.  Under some circumstances, perl is
#   nice enough to receive $ENV{'QUERY_STRING'} as $ARGV[0] even if a human
#   doesn't specify it
  @save_argv = @ARGV;
  @ARGV = ();
  require $build_env_file_name;

  ($jgofs_root = $ENV{'JGOFSDIR'}) ||
	die "Did not get valid JGOFSDIR env var from $build_env_file";
#   Reflect difference in name of optbin directory between OO & data
#   servers.  Sigh.
  $optbin = ($ENV{'OPT'}) ? "optbin" : "htmlbin";
  $utils_file = "$jgofs_root/$optbin/perl_utils.pl";
  ((-e $utils_file) && (-r $utils_file)) ||
			&CgiDie("Cannot find readable $utils_file");
  require $utils_file;

  $object_root = "$jgofs_root/objects";

  $parse_path_info_executable = "$jgofs_root/bin/parse_path_info";
  &check_x_access ($parse_path_info_executable);

  ($imagedir = $ENV{'BUTTONIMAGESDIR'}) || 
	&quit ("Did not get BUTTONIMAGESDIR from $build_env_file");
  
#   Get error and logging options
  ($action_on_source_error,$action_on_logging_failure,
	$log_misses_sink_list,$log_all_sink_list)	=
						&get_runtime_options();
#   Set up hash of open status of various sinks.  Allows multiple sinks
#   to go to the same file (as long as the file is named the same way
#   each time)
  $log_sink_handle_and_open_status{"STDERR"} = 
	$log_sink_handle_and_open_status{"stderr"} =
	$log_sink_handle_and_open_status{"/dev/stderr"} =
							"STDERR,1";
  $log_sink_handle_and_open_status{"STDOUT"} = 
	$log_sink_handle_and_open_status{"stdout"} =
	$log_sink_handle_and_open_status{"/dev/stdout"} =
							"STDOUT,1";
  $n_sinks = 0;
  foreach (split (/\,/,$log_all_sink_list)) {
    s/^\s*//;
    s/\s*$//;
    if ( ! (defined $log_sink_handle_and_open_status{$_})  ) {
      $n_sinks++;
      $log_sink_handle_and_open_status{$_} = "SINK_$n_sinks,0";
    }
  }
  foreach (split (/\,/,$log_misses_sink_list)) {
    s/^\s*//;
    s/\s*$//;
    if ( ! (defined $log_sink_handle_and_open_status{$_})  ) {
      $n_sinks++;
      $log_sink_handle_and_open_status{$_} = "SINK_$n_sinks,0";
    }
  }

#   Get list of places we need to check for info
  @info_sources = &get_info_sources();

#   Get fields of PATH_INFO env var
  %path_info_fields = split(/[,=]/,
			    &parse_path_info($parse_path_info_executable),
			    -1);

#   Separate obj spec into its subfields
  ($status,$object_host,$object_subdir,$object_name,$object_args) =
			&parse_object_spec($path_info_fields{"object"});
  ($status eq "NG") && 
	(&quit ("Trouble parsing object spec in PATH_INFO",
		"Problem = $object_host",
		"Object spec = " . $path_info_fields{"object"})
	);
  $object_args && ( &quit ("Object spec has sel/proj list and should not",
			     "Object spec = " . $path_info_fields{"object"}) );
  (defined $object_host) || ($object_host = "");
  (defined $object_subdir) || ($object_subdir = "");

  $content_found = 0;
  $search_failure_text = "";
  foreach (@info_sources) {
    if ($_ eq "BCO-DMO") {
      ($per_source_content_found = &do_BCODMO()) || 
					&add_search_failure_text ("database");
    } elsif ($_ eq "TRADITIONAL") {
      ($per_source_content_found = &do_TRADITIONAL()) || 
			 &add_search_failure_text ("text based metadata file");
    } elsif ($_ eq "REMOTE_TRADITIONAL") {
      ($per_source_content_found = &do_REMOTE_TRADITIONAL()) || 
		  &add_search_failure_text ("remote text based metadata file");
    } else {
      &quit ("Illegal value for INFO_SOURCES ($_)",
	"Acceptable values are BCO-DMO, TRADITIONAL and REMOTE_TRADITIONAL");
    }
#     If we found something, leave ... unless we were asked to
#     look everywhere (presumably for QC purposes)
    if ($per_source_content_found != 0) {
      $content_found = $per_source_content_found;
      $log_all_sink_list ||  last;
    }
  }
  ($content_found == 0) && 
    (print STDOUT 
	"Could not find $search_failure_text\n " .
	"for object $object_name\n");

#   turn off pre tag if still active.  (Wonder if there should be
#   </body></html>, etc tags.  Not in info v 3)
  $open_pre_tag && (print "</pre>\n") && ($open_pre_tag = 0);

  $log_all_sink_list && &close_sinks ($log_all_sink_list);
  $log_misses_sink_list && &close_sinks ($log_misses_sink_list);

  undef $print_header_done;	# Avoid "only 1 use" diagnostic
  undef @save_argv;

#   Abnormal status (1; TRUE) if we found nothing ($content_found == 0)
#   Other exits via &quit, which calls CgiDie.  Forget what its status is,
#     but it's not 0, and I don't think it's 1 either
  exit ($content_found == 0);

}


sub do_TRADITIONAL
{
  my ($traditional_file);
  my ($msg,$n_recs);

  $traditional_file = $object_root;
  $object_subdir && ($traditional_file .= "/$object_subdir");
  $traditional_file .= "/$object_name.info";

  ($log_all_sink_list) && 
	(&log_attempt 
		("Going to TRADITIONAL source $traditional_file",
		$log_all_sink_list)
	);

  $n_recs = 0;
  if (open(INFILE, $traditional_file)) {
    &output_top;
    while (<INFILE>) {
      $n_recs++;
#	If data have already been found ($content_found !=0)
#	don't do any printing, etc.  We are only here to find out
#	if this source has any data, and if we're here, the answer is
#	yes and we can leave
      ($content_found == 0) || last;
      print $_;		# Be nice to know if there was a read error ...
    }
    close(INFILE);
    if ($content_found == 0) {
      $open_pre_tag && (print "</pre>\n") && ($open_pre_tag = 0);
      print "<hr>\n";
      print qq|<p><font size="-2">|;
      print "This document is created from the content of the info file.";
      print "</font></p>\n";
    }
  } else {
   $msg = "*** ERROR opening $traditional_file. \$!=$!";
   ($log_all_sink_list) && (&log_attempt ($msg,$log_all_sink_list));
   ($log_misses_sink_list) && (&log_attempt ($msg,$log_misses_sink_list));
   ($action_on_source_error eq "ABORT") && (&quit ($msg));
  }
  ($n_recs == 0) && ($log_misses_sink_list) && 
	(&log_attempt 
		("No content from TRADITIONAL access to $traditional_file",
			$log_misses_sink_list)
	);
  return $n_recs;
}



sub do_REMOTE_TRADITIONAL
{
  my ($log_all_sources);
  my ($status,$msg);
  my ($remote_traditional_server,$remote_traditional_servers);
  my ($remote_URL);
  my (@legal_remote_traditional_servers,@remote_traditional_servers);
  my ($n,$found,$length_printed_content);

  my ($info_program_name_on_dataservers) = "info";

#   env var name and default list are globals.
  $remote_traditional_servers = 
	&env_var_or_default($remote_traditional_servers_env_var,
			    $remote_traditional_servers_default); 
  $remote_traditional_servers || &quit
       ("remote traditional source requested but remote server not specified");
  $remote_traditional_servers =~ s/\s+$//;
  $remote_traditional_servers || &quit
       ("remote traditional source requested but remote server not specified");
  $remote_traditional_servers =~ s/^\s+//;
  $remote_traditional_servers || &quit
       ("remote traditional source requested but remote server not specified");
  @remote_traditional_servers = split /\,/,$remote_traditional_servers;
  @legal_remote_traditional_servers = ('DATASERVER','INFOSERVER','GLOBEC');
  foreach (@remote_traditional_servers) {
    s/^\s*//;
    s/\s*$//;
    $n = 0;
    $found = 0;
    while ($n < @legal_remote_traditional_servers) {
      if (  uc($legal_remote_traditional_servers[$n]) eq uc($_)  )   {
	$legal_remote_traditional_servers[$n] = 
	 $legal_remote_traditional_servers[$#legal_remote_traditional_servers];
	$#legal_remote_traditional_servers--;
	$found = 1;
	last;
      }
      $n++;
    }
    $found || &quit ("Illegal or duplicate remote traditional server $_");
    $_ = uc($_);
  }

#   Get value for env var from the env_var_or_default list of places,
#   then put it back in the environment so get_boolean_env_var can find
#   find it.  Do it that way to use the get_boolean regularization of
#   TRUE, 1, etc.
  $ENV{$attempt_all_remote_traditional_servers_env_var} =
	&env_var_or_default($attempt_all_remote_traditional_servers_env_var,
			    $default_attempt_all_remote_traditional_servers);
  ($attempt_all_remote_traditional_servers = 
	&get_boolean_env_var($attempt_all_remote_traditional_servers_env_var))
				||
     &quit("Internal error: $attempt_all_remote_traditional_servers_env_var " .
			"undefined/empty despite giving defn a default value");

  $content = "";
  $length_printed_content = 0;
#   $content_found is a cumulative boolean reflecting whether anything has
#   been found.  Normally, if something is found, that's it.  However,
#   if user has asked to go through all remote traditional servers, we do it
  $content_found = 0;
  foreach (@remote_traditional_servers) {
    $content_found && (! $attempt_all_remote_traditional_servers) && last;
    $remote_traditional_server = "";
    if ($_ eq 'DATASERVER') {
      if ($path_info_fields{"data"}) {
	($remote_traditional_server) = split q|/jg/|,$path_info_fields{"data"};
	$remote_traditional_server || 
		&quit("No /jg/ in data= portion of " . $ENV{'PATH_INFO'});
#	  Crude attempt at avoiding loops.  With all the possible synonymy
#	  regarding http servers, no idea how effective this is or if a
#	  loop breaker is possible
	if (
	      ($remote_traditional_server eq $ENV{'SERVER_NAME'}) 
					&&
	      ($file_spec_this_program eq $info_program_name_on_dataservers)
	   )								     {
	  $msg = "DATASERVER REMOTE_TRADITIONAL_SOURCE: problem - " .
		"info and data servers are the same, creating loop.  " .
		"Use TRADITIONAL INFO_SOURCE";
	  $log_all_sink_list  &&   &log_attempt ($msg,$log_all_sink_list);
	  $log_misses_sink_list  &&   &log_attempt($msg,$log_misses_sink_list);
	  $remote_traditional_server = "";
	} else {
	  $remote_traditional_server .= 
				"/jg/" . $info_program_name_on_dataservers;
	}
      } else {
	$msg = "DATASERVER REMOTE_TRADITIONAL_SOURCE: failure - no data= " .
								"in PATH_INFO";
	$log_all_sink_list  &&   &log_attempt ($msg,$log_all_sink_list);
	$log_misses_sink_list  &&   &log_attempt($msg,$log_misses_sink_list);
      }
    }
    if ($_ eq 'INFOSERVER') {
      ($remote_traditional_server = $ENV{'INFOSERVER'})
                        ||
      &quit("REMOTE_TRADITIONAL_SERVER INFOSERVER requested but no value for ".
            "JGOFS configuration variable INFOSERVER found");
    }
    if ($_ eq 'GLOBEC') {
      ($remote_traditional_server = $remote_traditional_GLOBEC_server)
			||
      &quit("Internal error: GLOBEC remote traditional server value ".
	    "empty or not defined");
    }
    $content = "";
    if ($remote_traditional_server) {
#	Make sure we start w/exactly 1 http://
      $remote_traditional_server =~ s/^http:\/\///;
      $remote_traditional_server = "http://" . $remote_traditional_server;

#	Send remote server everything we received.  Interesting question
#	about what happens if remote server is so old it doesn't understand
#	all of the extended URL... not sure if that ever happened, but
#	pretty sure it's of no consequence now
      $remote_URL = $remote_traditional_server . $ENV{'PATH_INFO'};

      ($status,$content) = &get_from_remote_source($_, $remote_URL);
#	Such logging as we decide to do for $status is done (for now) in
#	get_from_remote_source
      ($status eq "NG") && ($content = "");
    }
#     Print it, unless we've already printed something else, and are just
#     in this loop for checking purposes.  If we've already printed something
#     else, ($content_found != 0)
    if (  ( length($content) != 0 ) && ($content_found == 0)  )    {
      $content_found = 1;
      print "$content\n";
      $length_printed_content = length($content);
    }
  }

  return $length_printed_content;
}



sub do_BCODMO
{
  my ($host,$port,$host_port,$log_misses,$log_all_sources);
  my ($status,$dummy,$dummy1,$dummy2,$had_prefix,$original);
  my ($BCODMO_front_piece_URL);
  my ($access_techniques,@legal_access_techniques,@access_techniques);
  my ($n,$found,$length_printed_content);

#   Get BCO-DMO host spec, possibly w/a port number, then 
#   strip off leading // (and anything preceding
#   it) and handle a trailing : (and anything that follows)
  $host_port = 
      &env_var_or_default($BCODMO_host_port_env_var,$default_BCODMO_host_port);
  ($host,$port) = &parse_BCODMO_host_port($host_port);

#   Wonder if next is the URI...
#   Thought of parametrizing the cgi-bin app, but the app is associated w/
#      its own args, etc, which cannot really be parametrized either.
  $BCODMO_front_piece_URL = "http://$host";
  $port && ($BCODMO_front_piece_URL .= ":$port");
  $BCODMO_front_piece_URL .= "/bco-bin/getinfor.active?";

#   env var name and default list are globals.
  $access_techniques = 
	&env_var_or_default($BCODMO_access_techniques_env_var,
					$default_BCODMO_access_techniques); 
  $access_techniques || 
	&quit("BCODMO access requested but way to access not specified");
  $access_techniques =~ s/\s+$//;
  $access_techniques || 
	&quit("BCODMO access requested but way to access not specified");
  $access_techniques =~ s/^\s+//;
  $access_techniques || 
	&quit("BCODMO access requested but way to access not specified");
  @access_techniques = split /\,/,$access_techniques;
  @legal_access_techniques = 
			('DATASET_URL','MANUFACTURED_DATASET_URL','DDIR_OBJ');
  foreach (@access_techniques) {
    s/^\s*//;
    s/\s*$//;
    $n = 0;
    $found = 0;
    while ($n < @legal_access_techniques) {
      if (  uc($legal_access_techniques[$n]) eq uc($_)  )   {
	$legal_access_techniques[$n] = 
		$legal_access_techniques[$#legal_access_techniques];
	$#legal_access_techniques--;
	$found = 1;
	last;
      }
      $n++;
    }
    $found || &quit ("Illegal or duplicate BCO-DMO access technique $_");
    $_ = uc($_);
  }


#   Get value for env var from the env_var_or_default list of places,
#   then put it back in the environment so get_boolean_env_var can find
#   find it.  Do it that way to use the get_boolean regularization of
#   TRUE, 1, etc.
  $ENV{$BCODMO_attempt_all_techniques_env_var} =
	&env_var_or_default($BCODMO_attempt_all_techniques_env_var,
			    $default_BCODMO_attempt_all_techniques);
  ($BCODMO_attempt_all_techniques = 
		&get_boolean_env_var($BCODMO_attempt_all_techniques_env_var))
				||
     &quit("Internal error: $BCODMO_attempt_all_techniques_env_var " .
			"undefined/empty despite giving defn a default value");

#   Finally, do something!
#     Scheme: subject to user specs and in user order
#	      if we have data= in ext URL, send it in a dataset_url query
#	      if we do NOT have data= in ext URL, build one, guessing at
#	host (NB: this could be incorrect.  The info for data on this host
#	could be farmed out to a different box, and this box could be
#	serving as INFOSERVER for the data on a 3rd box)
#	      try a ddir=&obj= query
  $content = "";
  $length_printed_content = 0;
#   $content_found is a cumulative boolean reflecting whether anything has
#   been found.  Normally, if something is found, that's it.  However,
#   if user has asked to go through all techniques, we, well, go through
#   all techniques
  $content_found = 0;
  foreach (@access_techniques) {
    $content_found && (! $BCODMO_attempt_all_techniques) && last;
    ($_ eq 'DATASET_URL') && 
	($content = &get_from_BCODMO_dataset_url($BCODMO_front_piece_URL));
    ($_ eq 'MANUFACTURED_DATASET_URL') && 
	($content = &get_from_BCODMO_manufactured_dataset_url(
						$BCODMO_front_piece_URL));
    ($_ eq 'DDIR_OBJ') && ($content = &get_from_BCODMO_ddir_obj(
						$BCODMO_front_piece_URL));

#     Print it, unless we've already printed something else, and are just
#     in this loop for checking purposes.  If we've already printed something
#     else, ($content_found != 0)
    if (  ( length($content) != 0 ) && ($content_found == 0)  )    {
      $content_found = 1;
#       This block from v 3.0 (code reformatted, but output left alone)
      my ($sec, $min, $hour, $mday, $mon, $year, $wday, $yday, $isdst) = 
							      localtime(time);
      my $date = 
	sprintf "%4d-%02d-%02d&nbsp;&nbsp;%02d:%02d:%02d", 
		$year + 1900, 
		$mon + 1, 
		$mday, 
		$hour, 
		$min, 
		$sec;
      &output_top;
      print "$content\n";
      print qq|<p><font size="-2">|;
      print "This document is created from the content " .
			"of the BCO-DMO metadata database." .
			"&nbsp;&nbsp;&nbsp;&nbsp;$date</font></p>\n";
      $length_printed_content = length($content);
    }
  }

  return $length_printed_content;
}




sub output_directory_and_data_buttons
{
#     Not at all clear what to do if info is not being run in a web
#     environment (or, for that matter, how to determine conclusively that
#     it IS being run in a web environment.
#	However, we can't build a good button w/o a REQUEST_URI (or a lot
#     of work), so let's just not put out any buttons if we don't have
#     a REQUEST_URI ...
  $ENV{'REQUEST_URI'} || return;

  if ($path_info_fields{"dir"}) {
    print qq|<a href="http://$path_info_fields{"dir"}">| .
	  qq|<img alt="[Directory]"| .
	  qq|border=0 src="$imagedir/dir.gif"></a> \n|;
  }

#     For returning to data object display, we need a new extended URL, 
#	w/fields pointing to dir and info. 
#     We need this since if we return to data server without these
#       pointers, the data server will assume it should use its default 
#	dir and info servers, which might not be appropriate
#       Of course, I can't at the moment think WHY it might not be
#	appropriate, since odds are the data server wrote the extended URL
#	we are about to use ...  OK, I found something.  It's dir that
#	puts the OBJDIR "displacement" into urls to dir.  Left to its own
#	devices, outer seems to use top level.  Don't know why this is,
#	but another fight for another time.

  if ($path_info_fields{"data"}) {

#     Rebuild the URL that pointed to this program
    $info_extended_URL_parameter = $ENV{'SERVER_NAME'};
    $ENV{'SERVER_PORT'} && 
		($info_extended_URL_parameter .= ":" . $ENV{'SERVER_PORT'});
#     Doc says that SCRIPT_NAME comes with a leading / (unless it's empty) 
#     Although an object spec need not start with a /, it does if it comes from
#     a URL, which all those in info come from
    $info_extended_URL_parameter .= 
			$ENV{'SCRIPT_NAME'} . $path_info_fields{"object"}; 
#     The link href is the relevant field of the extended URL sent to info
#     (ie, the data= portion), followed by the new extended URL, 
#     followed by the data server's selection/projection info, which 
#     was passed here as OUR QUERY_STRING even though info doesn't use it.
    $return_data_URL = "http://" . $path_info_fields{"data"} . "{";

#     Ext URL dir part is just a copy of what was sent here in our ext URL
    $path_info_fields{"dir"} && 
      ($return_data_URL .= "dir=" . $path_info_fields{"dir"} . ",");

#     Ext URL info part is the URL that got us here (set above)
    $return_data_URL .= "info=" . $info_extended_URL_parameter . "}";

    $ENV{'QUERY_STRING'} &&
      ($return_data_URL .= "?" . $ENV{'QUERY_STRING'});

    print qq|<a href="$return_data_URL"><img alt="[Data...]"| .
          qq| border=0 src="$imagedir/datadisp.gif"></a>\n|;
  }

  return;
}


sub get_info_sources
#   Settle the sources no matter how specified
#   Return comma-separated string of sources.  Sources not checked for
#      legality
{
  my ($default_info_sources) = 'TRADITIONAL';
  my ($usedatabase,$info_usedatabase);
  my ($value,$info_sources,@info_sources);

#   If either old boolean was specified, get a common, consistent value
#   (aborting if they are different).
  $value = &get_sources_from_old_booleans();

#	Now reconcile what we have w/any INFO_SOURCES.  For the moment,
#	we are not checking INFO_SOURCES syntax, just whether its value
#	matches the implied USEDATABASE sources
  $info_sources = &env_var_or_default('INFO_SOURCES',"");
  if ($info_sources) {
    $info_sources = uc($info_sources);
    ($info_sources eq $value) || ($value eq "") ||
	&quit ("Value of INFO_SOURCES ($info_sources) conflicts with value",
		" inferred from *USEDATABASE ($value)");
  } else {
    $info_sources = ($value) ? $value : $default_info_sources;
  }

  @info_sources = ();
  foreach (split (/\,/,$info_sources)) {
    s/^\s*//;
    s/\s*$//;
    push @info_sources,$_;
  }
  return @info_sources;
}


sub get_runtime_options
{
  my ($action_on_source_error,$action_on_logging_failure);
  my ($log_misses_sink_list,$log_all_sink_list);

  $log_misses_sink_list = 
        &env_var_or_default($log_misses_env_var,$default_log_misses_sink_list);
  $log_all_sink_list = 
        &env_var_or_default($log_all_sources_env_var,
				$default_log_all_sink_list);

  $action_on_source_error = 
	&env_var_or_default($action_on_source_error_env_var,
				$default_action_on_source_error);
  $action_on_logging_failure = 
	&env_var_or_default($action_on_logging_failure_env_var,
				$default_action_on_logging_failure);

  return uc($action_on_source_error),uc($action_on_logging_failure),
	 $log_misses_sink_list,$log_all_sink_list;
}

sub get_from_BCODMO_dataset_url
{
  my ($front_piece_URL) = @_;
  my ($status,$content,$msg);
  my ($thing,%thing_fields);

  if ( $thing = $path_info_fields{"data"}  )  {
#     Neither the data= value we input nor the dataset_url value we output
#     is really standard.  Sigh.  FWIW, we expect the input thing to not have a
#     leading http://, but it DOES have protocol & level fields.
#     Not clear what getinfor.active does at the tail end of its thing.
#     info v 3 produced a URL w/o the protocol and level fields, but 
#     WITH (at least sometimes) a trailing period.  I think I tried 
#     getinfor.active WITHOUT the trailing . and it worked, so that's 
#     what info v 4 is doing ... for now.
#	Note that parse_path_info does not care about an embedded /jg/serv.
#     For all it knows, there could be a jgofs_root/objects/jg/serv/.objects
#     file
    ($thing =~ m"^//") && ($thing = "http:" . $thing);
    ($thing =~ m"^http\://") || ($thing = "http://" . $thing);
    %thing_fields = split(/[,=]/,
			  &parse_path_info($parse_path_info_executable,$thing),
			  -1);
    ($status,$content) = &get_from_remote_source
      ("BCO-DMO", $front_piece_URL . "dataset_url=" . $thing_fields{"object"});
#      See comments in ddir section re NG status
    ($status eq "NG") && ($content = "");
  } else {
    $content = "";
    $msg = "BCO-DMO DATASET_URL technique: failure - no data= in PATH_INFO";
    $log_all_sink_list  &&   &log_attempt ($msg,$log_all_sink_list);
    $log_misses_sink_list  &&   &log_attempt($msg,$log_misses_sink_list);
  }

  return $content;
}



sub get_from_BCODMO_manufactured_dataset_url
{
  my ($front_piece_URL) = @_;
  my ($status,$content);
  my ($host,$url);

  if ( ! ($host = $object_host)) {
#   $object_host has port in it, while $ENV{'SERVER_NAME'} doesn't
#   Add a non-80 port (seems to have been some concern in v 3.0 about
#   port spec
    $host = $ENV{'SERVER_NAME'};
    $ENV{'SERVER_PORT'} && ($ENV{'SERVER_PORT'} != 80) &&
				($host .= ":" . $ENV{'SERVER_PORT'});
  }
#   If not in web environment, SERVER_NAME & SERVER_PORT not defined
#   Probably should set host = "the name of the host" (from a dns
#   point of view) (What is ENV{'HOST_NAME'}?  Appears in environment in
#   terminal sessions), but leave that for now (v 4.0)
  if ($host) {
    $url = "http://$host/jg/serv";
#     Should really use make_path_info for next 2 lines.  Oh well
    ($object_subdir eq "") || ($url .= "/$object_subdir");
    $url .= "/$object_name";
    ($status,$content) = &get_from_remote_source 
			("BCO-DMO", $front_piece_URL . "dataset_url=" . $url);
#     See comments below (where we try the ddir=&obj= query) re NG status
    ($status eq "NG") && ($content = "");
  } else {
    $content = "";
    $msg = "BCO-DMO MANUFACTURED_DATASET_URL technique: failure - " .
	       "Could not guess a host name (not running in web environment?)";
    $log_all_sink_list  &&   &log_attempt ($msg,$log_all_sink_list);
    $log_misses_sink_list  &&   &log_attempt($msg,$log_misses_sink_list);
  }
  return $content;
}



sub get_from_BCODMO_ddir_obj
{
  my ($front_piece_URL) = @_;
  my ($status,$content);
  my ($subdir);

#   subdir comes back from parse_object_spec w/o leading or trailing /
#   Adjust per empirical observation of URLs in access log
  $subdir = ($object_subdir eq "") ? "" : "/$object_subdir";

#   "NG" status comes back w/ 3 args.  However, if we wanted to 
#   abort on error, we wouldn't have come back.  Further, error has been
#   logged as desired.  So, we have a choice of continuing w/or w/o
#   yet another error message.  For now, w/o
  ($status,$content) = &get_from_remote_source
	      ("BCO-DMO", $front_piece_URL . "ddir=$subdir&obj=$object_name");
  ($status eq "NG") && ($content = "");
  return $content;
}



sub get_from_remote_source
#   Returns 2 or 3 element array
#	"OK", content (content may be of length 0)
#		or
#	"NG",http code,message corresponding to code
{
  my ($info_source,$URL,$dummy) = @_;
  my ($content,$status,$err_string);
  my ($http_get_object,$http_response_object);

  use LWP::UserAgent;
  $http_get_object = LWP::UserAgent->new;

  ($URL && ! $dummy) || 
	&quit ("Internal error - wrong # non-empty args to get_from_BCODMO");
  
  ($log_all_sink_list) && 
	(&log_attempt ("Going to $info_source with $URL",$log_all_sink_list));
  $http_response_object = $http_get_object->get($URL);

  $http_response_object->is_info && ($status = "INFORMATIONAL");
  $http_response_object->is_redirect && ($status = "REDIRECT");
  $http_response_object->is_error && ($status = "ERROR");
  $http_response_object->is_success && ($status = "SUCCESS");
  (defined $status) || 
    &quit ("Internal error - none of 4 expected statuses received from ->get");

#   Don't really know what to do if status not success or error
#   Decided to let error be NG and the others OK
  if ($status eq "ERROR") {
    $err_string = $status . " from $info_source; " . 
					$http_response_object->status_line;
    $log_all_sink_list  &&   &log_attempt ($err_string,$log_all_sink_list);
    $log_misses_sink_list  &&   &log_attempt($err_string,$log_misses_sink_list);
    ($action_on_source_error eq "ABORT")  &&   &quit($err_string);
    return "NG",$http_response_object->code,$http_response_object->message;
  }

  $content = $http_response_object->content;
  (length($content) == 0) && ($log_misses_sink_list) && 
	(&log_attempt ("No content from $info_source with $URL",
			$log_misses_sink_list)
	);
  return "OK",$content;
}



sub log_attempt
{
  my ($msg,$sink_list,$dummy) = @_;
  my ($handle,$status);
  ($sink_list && ! $dummy) ||
	&quit ("Internal error - wrong # non-empty args to log_attempt");

#   Next is probably an error, too, but getting tired of error handling
  ($msg eq "") && ($msg = "*** No msg in call to log_attempt ***");

  foreach (split /\,/,$sink_list) {
    ($handle,$status,$dummy) = split /\,/,$log_sink_handle_and_open_status{$_};
    ($handle && ! $dummy) ||
	&quit ("Internal error - log_sink_handle_and_open_status structure" .
		" not properly initialized"); 
    if ($status == 0) {
      $open_status = open ($handle, ">> $_");
      if ($open_status) {
	$status = 1;
	$log_sink_handle_and_open_status{$_} = "$handle,1";
      } else {
	&error ("Failure to open sink $_. \$! = $!",$sink_list);
      }
    }
    if ($status == 1) {
#       Be nice to check this for errors
      print $handle (&format_log_msg($msg) . "\n");
    }
  }

  return;
}

sub error
{
  my ($msg,$sink_list,$dummy) = @_;
  my ($handle,$status);
  ($sink_list && ! $dummy) ||
	&quit ("Internal error - wrong # non-empty args to error");

#   Next is probably an error, too, but getting tired of error handling
  ($msg eq "") && ($msg = "*** No msg in call to log_error ***");

  foreach (split /\,/,$sink_list) {
    ($handle,$status,$dummy) = split /\,/,$log_sink_handle_and_open_status{$_};
    ($handle && ! $dummy) ||
	&quit ("Internal error - log_sink_handle_and_open_status structure" .
		" not properly initialized"); 
    if ($status == 0) {
      $open_status = open ($handle, ">> $_");
      if ($open_status) {
	$status = 1;
	$log_sink_handle_and_open_status{$_} = "$handle,1";
      } else {
	($action_on_logging_failure eq "ABORT") &&
		&quit ("Failure to open sink $_. " .
			"open status = $open_status; \$! = $!");
      }
    }
    if ($status == 1) {
#       Be nice to check this for errors
      print $handle (&format_log_msg($msg) . "\n");
    }
  }

  return;
}

sub format_log_msg
{
#   For fun, emulate httpd access log format a bit... format is
#   by inspection only (the specs are probably out there on the internet)
#   Also, did not re-format the time ...
#   A difference: this code needs a thread ID, while http doesn't
  my ($msg,$dummy) = @_;
  my ($host,$thread,$time);

  ($msg && ! $dummy) ||
	&quit ("Internal error - wrong # non-empty args to format_log_msg");

#   Take a shot at ID'ing "the host calling us".  No guarantees ...
#   We could try using (better, including) ENV{"HOST_NAME"} along w/localhost
#	Would STILL be no guarantees - I have no idea where that comes from
#	or how it is associated w/dns entries, etc
  ($host = $ENV{"REMOTE_HOST"}) || ($host = $ENV{"REMOTE_ADDR"}) ||
						($host = "localhost");
#   Use PID for thread ID
  $thread = $$;

  $time = localtime();

  return "$host - - [$time] $thread: $msg";
}



sub close_sinks
{
  my ($sink_list,$dummy) = @_;
  my ($handle,$status);

  ($sink_list && ! $dummy) ||
	&quit ("Internal error - wrong # non-empty args to close_sinks");

  foreach (split /\,/,$sink_list) {
    ($handle,$status,$dummy) = split /\,/,$log_sink_handle_and_open_status{$_};
    ($handle && ! $dummy) ||
	&quit ("Internal error - log_sink_handle_and_open_status structure" .
		" not properly initialized"); 
    if (($status != 0) && ($handle ne "STDERR") && ($handle ne "STDOUT")) {
      close ($handle);
#	Mark handle closed so we don't try to close it again ... not
#	that I think perl cares, and not that we tested to see that the
#	close actually succeeded, etc.
      $log_sink_handle_and_open_status{$_} = "$handle,0";
    }
  }
  return;
}



sub parse_path_info

# Do a call to parse_path_info w/opt user-passed string (defaulting to the
# env var).  Lord knows that there ought to be enough
# code below to detect things like a plain old undefined env var!
{
  my ($executable,$PATH_INFO_string,$dummy) = @_;
  my ($command,$exit_status,$err_text,@status_info,$parsed_path_info);

  ($executable && ! $dummy) ||    
	&quit ("Internal error - wrong # non-empty args to parse_path_info");

  (defined $PATH_INFO_string) || ($PATH_INFO_string = "");

  $command = "$executable $PATH_INFO_string -nonewline";
  ($parsed_path_info,$exit_status,@status_info) = &backtick($command);
  if ($exit_status != 0) {
    $err_text = ($parsed_path_info) ?
	"parse_path_info return = $parsed_path_info" 
			: 
	"*** Empty string (or zero) returned from parse_path_info";

    &quit (&format_backtick_return_status($command,
					  $exit_status,
					  @status_info,
			        	  $err_text));
  }

  $parsed_path_info || 
	&quit ("JGOFS error of some kind. $parse_path_info completed ",
		"w/normal status but returned no output");

  return $parsed_path_info;
}


sub add_search_failure_text
#   3 states: 1st failure - set list to "new_text"
#	      2nd failure - set list to "either text_there or new_text"
#	      more	  - tack on " or new_text"
{
  my ($add_txt,$dummy) = @_;
  ($add_txt && ! $dummy)||
    &quit ("Internal error - wrong # non-empty args to add_search_failure_text");

  if ($search_failure_text) {
    ($search_failure_text =~ /^either /) ||
		($search_failure_text = "either $search_failure_text");
    $search_failure_text .= " or ";
  }

  $search_failure_text .= $add_txt;
  return;
}


sub get_sources_from_old_booleans
#   Input from global vars because of associated env var names, etc. Sloppy
{
  my ($value);
  my ($sources_represented_by_TRUE_USEDATABASE) = 'BCO-DMO,TRADITIONAL';
  my ($sources_represented_by_FALSE_USEDATABASE) = 'TRADITIONAL';

#   get_boolean_env_var returns "" for undefined env var, since we've
#   decided that these 2 states are equivalent
  $usedatabase = &get_boolean_env_var('USEDATABASE');
  $info_usedatabase = &get_boolean_env_var('INFO_USEDATABASE');

  ($usedatabase eq "") && ($info_usedatabase eq "") && return "";
  
  if ($usedatabase eq $info_usedatabase) {
    $value = $usedatabase;
  } else {
#     Get here if vals are different and at least one was specified.  
#     Still OK if ONLY one of them was specified
    ($usedatabase eq "") && ($value = $info_usedatabase);
    ($info_usedatabase eq "") && ($value = $usedatabase);
    (defined $value) || 
	&quit ("Conflicting values of USEDATABASE ($usedatabase) && ",
		"INFO_USEDATABASE ($info_usedatabase)");
  }

  ($value eq "TRUE") && ($value = $sources_represented_by_TRUE_USEDATABASE);
  ($value eq "FALSE") && ($value = $sources_represented_by_FALSE_USEDATABASE);

  return $value;
}


sub parse_BCODMO_host_port
#   This turned into a parse of the standard front half of a URL
#   Probably a standard routine for this
{
  my ($host,$dummy) = @_;
  my ($dummy1,$dummy2,$had_prefix,$original,$port);

  ($host && ! $dummy)||
    &quit ("Internal error - wrong # non-empty args to parse_BCODMO_host");

  $original = $host;
  ($host) = ($host =~ /^\s*(\S+)\s*$/);
  $host || 
	&quit("BCO-DMO host specification -->$original<-- is an empty string, ",
		"all whitespace, 0, or contains embedded whitespace");

  ($dummy,$dummy1,$dummy2) = split (m"//",$host,-1);
  (defined $dummy2) && 
	&quit ("BCO-DMO host specification ($original) contains embedded //");
  $had_prefix = defined($dummy1);
  $host = ($had_prefix) ? $dummy1 : $dummy;
  $dummy2 = ($had_prefix) ? " after //" : "";
  $host || &quit("BCO-DMO host specified as blank or 0 $dummy2");

#   Deal w/trailing port spec
  ($dummy,$port,$dummy2) = split (/\:/,$host,-1);
  (defined $dummy2) && 
	&quit ("BCO-DMO host specification ($original) contains embedded :");
  (defined ($host = $dummy)) || 
	&quit ("BCO-DMO host specification ($original) empty before :");

  if (defined $port) {
    (   ($port =~ /^\d+$/) && ($port != 0)   )   || 
      &quit("Port in BCO-DMO host spec ($original) not a positive integer");
  } else {
    $port = "";
  }

  return $host,$port;
}

sub env_var_or_default
{
  my ($env_var,$default,$dummy) = @_;
  (defined $default) && ( ! defined $dummy) || 
	&quit ("Internal error: env_var_or_default called w/wrong # args");
  $env_var ||
	&quit ("Internal error: env_var_or_default called w/empty env var");
  (defined $pre_build_env_environment{$env_var}) &&
				(return $pre_build_env_environment{$env_var});
  (defined $ENV{$env_var}) && (return $ENV{$env_var});
  return $default;
}

sub output_top
{
  &output_directory_and_data_buttons();

#   show the objectname for which we have a .info
  print "<h1> $object_name </h1>\n";
  print "<pre>\n";
#   Next so that multiple exit routes can properly close the tag
#   Used by perl_utils routines if not used in here 
  $open_pre_tag = 1;

  return;
}
