#		perl_utils.pl			WJS Jan 10

#   Subset of OOserver's wjs_web_perl_utilities.pl for data server use
#
#	... along w/backtick.pl.  On the OOserver backtick is in its own file
#       so it can be used in build-opt-env.pl w/o including all of wjs*ut*.pl
#       Not a consideration on data server (at the moment)
#	... along w/new routine get_boolean_env_var

#   Hopefully, the routines moved here are sufficiently fixed that
#     there will be no skew issues between the subset and the original
#     Hmmm considering the ...s above, we seem well on the way to skewness...

#   Suggested location for this file is htmlbin, both for consistency
#     w/location of wjs_web_perl_utilities.pl, and because there ARE
#     html-related routines here.  Alternative would be some kind of lib
#     directory

#   NB: Do NOT define a $version variable in this file.  A global version
#     of $version is implicit input to some of these routines

#########
#   Global variables:
#	quit:		$version; read-only; optional
#			$unique; read-only; optional
#			$open_pre_tag; read-only; optional
#	printheader:	$version; read-only; optional
#		    	$print_header_done; read-write; optional
#   Needs cgi-lib.pl; assumed set up in program that uses this stuff

#  26 Jul 10  WJS
#	Bug fix: get_boolean_from_env_var needs to allow Y/y/N/n 
#  28 Jan 10  WJS
#	Bug fix: parse_object_spec sometimes returns a slash in the subdir
#    and sometimes doesn't.  Make it match the doc
#	Bug fix: typo caused lower case booleans to be unacceptable
#	Accept empty boolean to mean "not specified" (rather than rejecting
#    it as "bad syntax")

#################################################

sub backtick

#	Code very similar to get_JGOFS_record stuff (in wjs*perl*routines*)
#	and should probably be unified.		WJS  Dec 08

#   Perform backtick function (read all output of a command into a perl
#   scalar) recording all available diagnostic information.
#     Returns 0 or 6 values.
#	0 vals: Bad input argument list (eg, no command or > 1 arg)
#	6 vals:
#		Val 1 = data received from child process
#		Val 2 = $? of child process
#		Val 3 = pid of child process (= return value from open)
#		Val 4 = $! of open
#		Val 5 = $! of last read from child process
#		Val 6 = return value from close
#		Val 7 = $! of close
#	Vals 2-7 returned as numbers rather than strings
#   	None of vals 4-7 seem to be documented to mean anything, and, indeed, 
#   using them has often caused trouble in the sense that a normal perl 
#   operation will alter a 0 $! to a non-zero (abnormal) one.  Vals are
#   returned anyway in case my doc reading has come up short

#     A child that completes properly (not necessarily normally) will
#   communicate its status either via its $? or what it writes to this
#   routine.  If the child always writes SOMETHING (either an answer or an
#   error message), then this non-emptiness is the best status available.
#     The perl book says to check for the successful creation of the child
#   process by looking to see if the pid is defined (if undefined, this
#   routine returns the empty string).  However, it seems to me that the
#   child could be created, write to its output, and exit before the pid
#   check has been performed.  The fork system service (presumably used by
#   perl to create the child) is documented to return 0 in the child process
#   and, in the parent, either the pid of the child or -1.  If the latter,
#   errno is set to reflect the cause of failure.  Thus one could guess that
#   either the returned pid value or the $! of the open would have failure
#   info.  However, the perl book does not document either.
#     perl does not document any method of determining a read error
#   experienced via the angle bracket operator, either.  That operator
#   returns next record or undefined.  This routine returns the $! associated
#   with the undefined angle bracket return.
#     The perl close function is what causes $? to be set.  Presumably the
#   close could fail, too, but this is not documented to happen.  The
#   $! from close is notoriously flaky (my opinion).  Under
#   normal circumstances on globec (Solaris), for example, it acquires the
#   value "Illegal seek".  In perl's defense, there is no suggestion that
#   $! is in fact useful after a close.

{
  my ($command,$dummy) = @_;
  my ($open_result,$record,$command_result,$last_record_status);
  my ($close_return,$close_status);
  my ($pid);

  $dummy && return;
  ($command) = ($command =~ /^\s*(.+)\s*$/);
  $command || return;

  $command_result = $close_return = "";
  $open_status = $last_record_status = $close_status = 0;
  $! = $? = 0;

  $pid = open (BACKTICK,"$command |");
  $open_status = $!;
  defined($pid) || ($pid = "");
  if ($pid ne "") {
    $! = 0;
    while (defined ($record = <BACKTICK>)) {
      $command_result .= $record;
      $! = 0;
    }
    $last_record_status = $!;
    $! = 0;
    $close_return = close BACKTICK;
    $close_status = $!;
    defined ($close_return) || ($close_return = "");
  }

  return $command_result,$?+0,$pid+0,$open_status+0,$last_record_status+0,
	 $close_return+0,$close_status+0;
}

sub format_backtick_return_status
{
#   Used to format info returned by backtick.  
#	If this routine thinks that everything is normal, it returns the
#   empty string.  Suggested use of this routine is to call it only when
#   caller believes something has gone wrong.  In that case, an empty
#   string return from this routine represents its own exceptional case

#   Order of args below matches return from backtick.pl, w/ command
#   in place of the return of the command.  An extra optional arg can be
#   specified at the end, to be included w/the text w/the prefix "More info:"

#   backtick users can code as follows
#	($results,@status_info) = &backtick($command);
#	($results eq "") && 
#		die (&format_backtick_return_status($command,@status_info));
#			or
#	($results,$exit_status,@status_info) = &backtick($command);
#	($exit_status == 0) || 
#		die (
#			&format_backtick_return_status
#					($command,$exit_status,@status_info)
#		    );
#   If $results contains useful diagnostic info, it can be included in the
#   format call; eg
#	&format_backtick_return_status($command,@status_info,$results));
#			or
#	&format_backtick_return_status($command,@status_info,
#				       "return from command = $results"));

  my ($command,
	$command_exit_status,
	$open_return,$open_status,
	$last_io_status,
	$close_return,$close_status,
	$extra_info,
	$dummy
      ) = @_;
  my ($bad_call) = " *** terminating error processing due to bad/incomplete " .
		   "call to format_backtick_return_status";
  my ($return_val,$initial_return_val);

  $command || return "*** format_backtick_return_status called w/no args";
  $dummy && return "*** format_backtick_return_status called too many args\n" .
		   "*** Call probably dealt with I/O on pipe to $command\n";

  $initial_return_val = " *** Problem with I/O on pipe to $command\n";
  $return_val = $initial_return_val;

  (defined $command_exit_status) || return $return_val . $bad_call;
  ($command_exit_status == 0) ||
	($return_val .= " *** Abnormal exit status from child process: " .
			$command_exit_status . "\n");

  (defined $open_return) || return $return_val . $bad_call;
#   open return is documented to be pid, which in parent process is > 0
#   (pid of child).  perl pids seem to be in decimal, not hex.
#   Not documented what comes back if open fails - guess is undefined
#   (arriving here as empty), but could be a -1 (based on fork return
#   statuses)
  if ( ! (($open_return =~ /\d+/) && ($open_return > 0))  ) {
    $return_val .= " *** Abnormal return from pipe open: ";
    $return_val .= ($open_return eq "") ? "empty/undefined" : $open_return;
    $return_val .= "\n";
  }
  (defined $open_status) || return $return_val . $bad_call;
  ($open_status == 0) ||
	($return_val .= " *** Abnormal status of open close: $open_status\n");

  (defined $last_io_status) || return $return_val . $bad_call;
  ($last_io_status == 0) ||
	($return_val .= " *** Abnormal final I/O status: $last_io_status\n");

#   close return stuff empirically determined (under VMS, no less, so
#   feel relatively free to alter it)
  (defined close_return) || return $return_val . $bad_call;
  if ($close_return != 1) {
    $return_val .= " *** Abnormal return from pipe close: ";
    $return_val .= ($close_return eq "") ? "empty/undefined" : $close_return;
    $return_val .= "\n";
  }
  (defined $close_status) || return $return_val . $bad_call;
  ($close_status == 0) ||
	($return_val .= " *** Abnormal status of pipe close: $close_status\n");

  (defined $extra_info) && ($extra_info ne "") &&
	($return_val .= " ... More info (next line et seq)\n$extra_info\n");

  ($return_val eq $initial_return_val) && ($return_val = "");
  return $return_val;
}

#################################################

sub check_r_access
#   Games w/ $! value empirically determined for -r access on synthesis
{
  my ($tmp) = $_[0];
  my ($msg);

  $tmp || &quit("No file name sent to check_r_access for checking");

  $! = 0;
  if ( ! -r $tmp) {
    $msg = ($! == 0) ? "Permission denied" : $!;
    &quit ("No read access to $tmp: $msg");
  }
}

#################################################

sub check_x_access
#   Games w/ $! value empirically determined for -r access on synthesis
#   No particular idea if this works for -x or on non-synthesis nodes
#     but I guess I think it's worth assuming!
{
  my ($tmp) = $_[0];
  my ($msg);

  $tmp || &quit("No file name sent to check_x_access for checking");

  $! = 0;
  if ( ! -x $tmp) {
    $msg = ($! == 0) ? "Permission denied" : $!;
    &quit ("No execute access to $tmp: $msg");
  }
}

#################################################

sub printheader
{
  if ( ! $print_header_done) {
    print &PrintHeader();
    $version && (print "<!--This page prepared by $version//-->\n");
    $print_header_done = 1;
  }
  return;
}

#################################################

sub dump_environment_variables
{
  &printheader();

  print "<html><h1>Environment variables</h1>\n";
  print "<pre>\n";
  foreach (sort keys(%ENV)) {
    $out_value = (defined $ENV{$_}) ? $ENV{$_} : "<undefined env var>";
    print "$_ = $out_value\n";
  }
  print "\n\n";
  print "</pre>\n";

  return;
}

#################################################

sub html_line_breaks
#   Replace any <br> and <b> tags w/ \ns.  Then go back and replace
#   every \n with <br>\n.  Result should be printable in both html
#   and non-html environments (albeit in the latter, one sees the <br>)
{
  my ($string) = @_;

  $string =~ s/\<br\>/\n/g;
  $string =~ s/\<b>\>/\n/g;
  $string =~ s/\n/\<br\>\n/g;

  return $string;
}

#################################################

sub quit
{
  my ($temp,$errmsg1,$errmsg2);

  $open_pre_tag && print "</pre>";

  $errmsg1 = "";
  foreach (@_) {
    chomp ($temp = $_);
    $errmsg1 .= " *** $temp\n";
  }
  $unique && ($errmsg1 .= " ... working on job # $unique\n");

  $errmsg2 = "This message issued " . localtime() . "\n";
  $version && ($errmsg2 .= "$version\n");

  &CgiDie(&html_line_breaks($errmsg1),&html_line_breaks($errmsg2));
}

#################################################

sub get_boolean_env_var
{
  my ($env_var,$dummy) = @_;
  my ($value);
#   Yes, the next statement incorrectly allows a 2nd arg of 0.  Not
#   worth fooling w/perl existential questions for this (today, anyway)
  ($env_var && ! $dummy)  || 
	&quit ("Internal error.  Wrong # args sent to get_boolean_env_var");
  $value = $ENV{$env_var};
  if (  (defined $value) && ($value ne "")  ) {
    ($value =~ /(T|TRUE|Y|YES|1)/i) && return "TRUE";
    ($value =~ /(F|FALSE|N|NO|0)/i) && return "FALSE";
    &quit("Illegal boolean value for $env_var.  Bad value is",$value);
  } else {
    return "";
  }
}

#################################################

sub valid_number
{
#  See if a string is a valid number.	WJS  Apr 99
#	(mod Jul 05 to pre-test for most likely strings, on hypothesis
#	 that string test is quicker than exception testing.  WJS)
#  Idea is to turn warnings on, force a numeric calculation, trap
#    any resulting warning message, and see if it's appropriate.
#    Because it's only a warning, eval does not set $@ as it does for
#    worse errors.  Therefore, the fooling with signals...
#  Of course this breaks if the message changes.  Much better would
#    be to have a perl-callable strtod function...
#  The perl manual says that numbers match /[+-]\d*\.?\d*E[+-]\d+/
#    (when it was talking about library module BigFloat).  However, that
#    description clearly doesn't reflect the optional portions of numbers...
  my ($test_item) = @_;
  my ($number);
  
  ((defined $test_item) && ($test_item ne "")) || return 0;

  ($number) = ($test_item =~ /^\s*(\d*\.?\d*)\s*$/);
  $number && ($number ne '.') && return 1;

  local ($numeric_flag) = 1;

  my ($old_val_warn) = $^W;
  $^W = 1;			# Turn on warnings

#   Used to have sub test $_[0] (the warning message) to see if it was
#   an "Argument .* not numeric" message.  Now think that if the eval gets
#   any kind of warning, there must be a problem with the putative number,
#   so just decide it's not a number.  Presumably if there were
#   a numeric warning (overflow? is this a fatal?), the this technique
#   would be incorrect.  If we know that numeric warnings have their
#   own signal, presumably we could trap that, too (and we'd get it before
#   __WARN__ or __DIE__?)
  local $SIG{__WARN__} = sub { $numeric_flag = 0; };

  eval '$test_item + 1';		# Anything that does arithmetic

  $old_val_warn || ($^W = 0);	# Reset warnings if appropriate
  $SIG{__WARN__} = 'DEFAULT';	# Return signal to normal behavior

  return $numeric_flag;
}

##################

#################################################

sub parse_object_spec
#  //node/subdir/obj_name(sel_proj--and/or--obj_specific_args)
#  All but obj_name are optional
#    Returns a list.  1st element is status; "OK" or "NG"
#	If "NG", next element tells why/where
#	If "OK", next 4 elements are node, subdir, name, & args
#	    All but name may be empty
#	    Node does not have leading //
#	    Args are returned w/o leading and trailing parens (error
#		if no trailing paren.  However, matched pairs are not
#		checked)
#	    Subdir does not have leading or trailing / 
#		Accordingly, this code does NOT distinguish between
#			/dir/name  and   dir/name
#		nor does it distinguish
#			/name(args)  and  name(args)
#		Former may be object spec & latter may be method(args);
#		anyway, for now, we don't do it!

{
  my ($obj_spec) = @_;
  my ($b4paren,$aftparen);
  my ($node,$subdir,$name,$args);

  $obj_spec || return "NG","No name";

  ($b4paren,$aftparen) = split /\(/,$obj_spec,2;
  if ($aftparen) {
    ($args) = ($aftparen =~ /(.+)\)$/);
    $args || return ("NG","Argument portion");
  } else {
    $args = "";
  }

  $b4paren || return "NG","No name";
  ($b4paren eq '/') && return "NG","No name";
  ($b4paren =~ /^[\w\-\_\/\.\:]+$/) || 
				return "NG","Illegal char in node/char/name";

  ($node_dirs,$name) = ($b4paren =~ m"(.*)/(.*)");
  if (defined ($node_dirs)) {
#     Slash present
    $name || return "NG","No name";    
    $node_dirs || return "OK","","",$name,$args;
#     Next line deals w/specs like //foof.  Not sure what diagnostic
#     is appropriate
    ($node_dirs eq '/') && return "NG","No node";
    if ($node_dirs =~ m"^//") {
      (undef,undef,$node,$subdir) = split /\//,$node_dirs,4;
      $node || return "NG","No node";
      $subdir || ($subdir = "");
      return "OK",$node,$subdir,$name,$args;
    } else {
      ($node_dirs) = ($node_dirs =~ m|^/?(.*)|);
      return "OK","",$node_dirs,$name,$args;
    }
  } else {
#     No slash
    return "OK","","",$b4paren,$args;
  }
}

1;