# Various perl utilities I've found useful for OOserver stuff WJS Aug 04
#########
# Global variables:
# $CGI_LIB_LOADED (see 30 Apr 17 mod)
# $BACKTICK_LOADED (see 30 Apr 17 mod)
# get_form_var: %form_info; read-only; required
# quit: $version; read-only; optional
# $unique; read-only; optional
# $open_pre_tag; read-only; optional
# printheader: $version; read-only; optional
# $print_header_done; read-write; optional
# Needs cgi-lib.pl; assumed set up in program that uses this stuff
# Needs backtick.pl, require'd from . directory
# As of 30 Apr 17, try to get cgi-lib.pl if it isn't loaded, and
# try NOT to require cgi-lib.pl or backtick.pl if they ARE already loaded
#
# 30 Apr 17 WJS
# Try conditionally loading backtick.pl and cgi-lib.pl
# Moved 2010 & 2011 comments to bottom
# 27 Apr 17 WJS
# Bug fix: print_select_alpha_variables was loading the menu with
# strip($varlist[$_]) instead of strip($varname). Difference is that
# $varname has the attr list stripped off. Didn't show up until somebody
# used [datatype=alpha], showing that the latter was never used since its
# coding in 2010
# 17 Feb 17 WJS
# Alter valid_number
# 27 Jul 16 WJS
# val_from_embedded_key_val_pair
# list_as_text_plain_alternate_list_program
# 22 Jan 15 WJS
# Put INCLUDE_ATTRIBUTES_SWITCH in environment for print_select_alpha_variables
# Wonder how many other functions it belongs in ...
# 19 Oct 14 WJS
# Add test for -Inf
# 16 Oct 14 WJS
# NaN and Inf and variants are "now" considered legit numbers... but not by me!
# Alter valid_number
# 21 Dec 12 WJS
# Re "Remove do_shell_command" comment of 9 Jun: bad presumption
# since it was used IN HERE (possibly among other places), and
# apparently not noticed. In any case, cannot replace do_shell_command
# with backtick in cases where command must write to stdout. Doing so
# would put all the output into the backtick return symbol.
# 9 Jun 12 WJS
# Rewrite get_JGOFS_record to begin process of properly
# detecting errors on reads. Only
# took "since the beginning" (2004 was when I apparently gave up
# and wrote the previous incorrect code)
# Correction from a "good sounding", "reputable" web article, not me -
# I only provide the opinions in the quotes.
# Remove do_shell_command - presumably no longer needed
# 16 Apr 12 WJS
# Bug fix: OBJSPEC needs to allow @s since they are in
# object specs that have an indirect file spec in them
# Any guess how many more special char mods we need to OBJSPEC?
# Any guess how many security problems there probably are?
# Bug fix: print_select_alpha_variables became too subtle after
# use of get_hash_of_attrs; to wit, it accepted get_hash_of_attrs
# assumption that illegal varnames are, in fact, standalone attrs
# p_s_a_v then took g_h_o_a's "empty" varname and reported it as
# all blanks. Move more p_s_a_v error checking, since if it occurs
# in the menu display section, the errors displayed as menu items
#sub rel_filespec
#sub abs_filespec
#sub check_r_access
#sub check_x_access
#sub get_form_var
#sub bad_form_var
#sub check_build_opt_env_var
#sub printheader
#sub print_select_alpha_variables
#sub print_choose_memory_algorithm
#sub html_line_breaks
#sub quit
#sub dump_input_info
#sub dump_form_info
#sub dump_environment_variables
#sub hex_dump
#sub hex_dump_html
#sub add_to_file
#sub adjust_for_reqd_args
#sub do_shell_command
#sub get_query_string_args
#sub let_serv_do_it
#sub make_dir
#sub parse_object_spec
#sub file_system_to_web_cgi
#sub get_this_file_as_url
#sub replace_special_char
#sub shell_protect
# See also separate file trigram.pl
#sub valid_number
#sub val_from_embedded_key_val_pair
#sub whitespace_strip
#sub format_get_JGOFS_record_return_status
#sub get_cached_varlist
#sub get_hash_of_attrs
#sub get_JGOFS_record
#sub listvar_in_def_format
#sub add_file_to_path_string
#sub list_as_text_plain
$got_cgi_lib_already = ((defined $CGI_LIB_LOADED) && $CGI_LIB_LOADED);
if ( ! $got_cgi_lib_already) {
# filename contains hyphen; variable names can't
$cgi_lib_filename = "cgi-lib.pl";
require $cgi_lib_filename;
}
$got_backtick_already = ((defined $BACKTICK_LOADED) && $BACKTICK_LOADED);
if ( ! $got_backtick_already) {
$backtick_file_name = "./backtick.pl";
require $backtick_file_name;
}
#################################################
sub check_build_opt_env_var
{
my ($env_var,$build_opt_env) = @_;
my ($val);
defined ($ENV{$env_var}) ||
&quit ("Internal problem. ",
"Did not get defined env var $env_var from $build_opt_env");
($val = $ENV{$env_var}) ||
&quit ("Internal problem. ",
"Did not get non-empty env var $env_var from $build_opt_env");
return $val;
}
#################################################
sub list_as_text_plain
# See list_as_text_plain_alternate_list_program
{
my ($opts,$build_opt_env) = @_;
# Check that build-opt-env set up things as expected
my ($topdir) = &check_build_opt_env_var("OPTHOME",$build_opt_env);
my ($bindir) = "$topdir/bin";
my ($list) = "$bindir/list";
&list_as_text_plain_alternate_list_program($opts,$build_opt_env,$list);
return;
}
sub list_as_text_plain_alternate_list_program
# Assumes we are in OOserver environment with build-opt-env.pl run
# Run list against object we are working on and send it out
# as text/plain
# Accepts a bunch of list switches as arg 1, name of build-opt-env
# file as arg 2 for error message purposes, and file spec of list
# executable as arg 3
{
my ($opts,$build_opt_env,$list,$dummy) = @_;
(defined $opts) ||
&quit ("Internal problem - no args sent to list_as_text_plain_alternate_list_program");
(defined $dummy) &&
&quit ("Internal problem - too many args sent to list_as_text_plain_alternate_list_program");
my ($object,$command,$subsels);
$build_opt_env || ($build_opt_env = "?/?/build-opt-env.pl");
# Check that build-opt-env set up things as expected
$object = &check_build_opt_env_var("OBJECT",$build_opt_env);
defined ($ENV{'SUBSELS'}) || &quit ("Internal problem. ",
"Did not get defined env var SUBSELS from $build_opt_env");
&check_x_access($list);
$subsels = $ENV{'SUBSELS'};
$command = "$list $opts \"$object";
$subsels && ($command .= "($subsels)");
$command .= '"';
print "Content-type: text/plain\n\n";
&do_shell_command($list,$command,1);
return;
}
#################################################
sub html_line_breaks
# Replace any and tags w/ \ns. Then go back and replace
# every \n with \n. Result should be printable in both html
# and non-html environments (albeit in the latter, one sees the )
{
my ($string) = @_;
$string =~ s/\ /\n/g;
$string =~ s/\\>/\n/g;
$string =~ s/\n/\ \n/g;
return $string;
}
#################################################
sub adjust_for_reqd_args
# Remove arguments from query string that are known to be inner
# args and tack them onto the object name
# Problem example: In a URL of joinu.html0?obj1,obj2,selproj_list
# obj1 and obj2 are NOT selections or projections, but are often treated
# that way
# Problem amelioration: Instead of joinu, use object name join_2_arg
# Then this code will return join_2_arg(obj1,obj2) and selproj_list
# Future versions of this may replace the _N_arg stuff w/ "smart"
# parse_query_string (pqs) routines than syntactically figure out what
# each query string argument is (after freezing "originality" at existing
# level, and expecting the use of some formal procedures in the future)
# If input object names don't have special format, this routine
# just returns its input as output (w/NOARGKEY substatus; see below)
#
# Input: parse_query_string program name
# object spec (NOT URL), w/optional arg list in parens
# query string
# Returns 3 or 5 strings.
# string 1 = "OK" or "NG".
# "OK" followed by 4 strings
# Substatus
# Object spec (possibly modified from original)
# Query string (possibly shortened from original)
# Number of arguments moved from qs to objspec
# "NG" followed by 2 strings
# Substatus
# Error text
# "OK" substatuses Input obj spec
# "NOARGKEY" joinu
# "ARGSINSPEC" join_2_arg(obj1,obj2)
# "EXTRAARGSINSPEC" join_2_arg(obj1,obj2,x)
# "MOVEDNEEDEDARGS" join_2_arg [and at least 2 qs items]
# "MOVEDSOMEARGS" join_2_arg(obj1) [or join_2_arg and
# only 1 qs item]
# "NG" substatuses
# "MULTIPLEARGKEY" join_1_arg_2_arg
# string 1 is a syntax status only. Substatuses of EXTRAARGSINSPEC
# and MOVEDSOMEARGS probably indicate problems someplace else
# Does NOT return if parse_query_string programs dies - 1 Jun 10
# mod calls get_query_string_args, which &quit's. Pre 1 Jun 10,
# returned a PQSERR substatus to an NG status
{
my ($pqs_program,$inobj,$qs,$dummy) = @_;
my ($objspec,$objnode,$objdir,$objname,$objargs);
my ($list,@qs,@objargs);
my ($needed_args,$nargs_objargs,$nargs_qs);
my ($pqs_out,$separators,$rest,$pqs_sep,$qsarg_sep,$qs_split,@seps);
my ($status,$substatus,$outobj,$args_moved);
# The chars below are fundamental (should be in some trans-language
# .h file, etc). The pqs separator characters are deliberately
# independent of these. For example, given a qs of x,(y,z), the pqs
# routines will present something like x!(y,z) (along with the !) so
# that a split will return the correct x and (y,z) pieces. However,
# when re-constituting a list from the pieces, we have to put the comma
# back in
my ($objarg_sep) = ',';
my ($qs_sep) = ',';
($inobj && ! $dummy) ||
&quit("Internal problem - adjust_for_req_args called w/wrong # args");
($status,$objnode,$objdir,$objname,$objargs) = &parse_object_spec($inobj);
($status eq "OK") || return ("NG","BADOBJPARSE",$objnode);
($needed_args) = ($objname =~ /_(\d+)_arg/);
(defined $needed_args) || return ("OK","NOARGKEY",$inobj,$qs,0);
($dummy) = ($objname =~ /_\d+_arg.*_(\d+)_arg/);
(defined $dummy) &&
return ("NG","MULTIPLEARGKEY","object name had > 1 _N_arg string");
# Count number of arguments in $objargs & $qs using the query string
# subsystem. At the tactical level, this handles commas-within-parens
# Strategically, one can argue that an object's arg list and those
# args within a qs will always be the same, so any mods can be handled
# in a single spot. See parse_query_string doc (currently comments in
# the c source - Oct 08)
if ($objargs) {
@objargs = &get_query_string_args($pqs_program,$objargs);
$nargs_objargs = @objargs;
} else {
$nargs_objargs = 0;
}
($nargs_objargs == $needed_args) && return "OK","ARGSINSPEC",$inobj,$qs,0;
($nargs_objargs > $needed_args) && return "OK","EXTRAARGSINSPEC",$inobj,$qs,0;
if ($qs) {
@qs = &get_query_string_args($pqs_program,$qs);
$nargs_qs = @qs;
} else {
$nargs_qs = 0;
}
# Move args from beginning of query string to end of object arg list
($objargs eq "") || ($objargs .= $objarg_sep);
$args_moved = 0;
foreach ($nargs_objargs .. $needed_args-1) {
# Allowing empty and 0 values in query string
defined($dummy = shift(@qs)) || last;
$objargs .= $dummy . $objarg_sep;
$args_moved++;
}
chop $objargs;
$outobj = "";
$objnode && ($outobj .= "//$objnode");
$objdir && ($outobj .= "/$objdir");
$outobj && ($outobj .= "/");
$outobj .= "$objname($objargs)";
$substatus = (($nargs_objargs == 0) && (defined $dummy)) ?
"MOVEDNEEDEDARGS" : "MOVEDSOMEARGS";
return "OK", $substatus, $outobj, join("$qs_sep",@qs), $args_moved;
}
#################################################
sub parse_object_spec
# //node/subdir/obj_name(sel_proj--and/or--obj_specific_args)
# All but obj_name are optional
# Returns a list. 1st element is status; "OK" or "NG"
# If "NG", next element tells why/where
# If "OK", next 4 elements are node, subdir, name, & args
# All but name may be empty
# Node does not have leading //
# Args are returned w/o leading and trailing parens (error
# if no trailing paren. However, matched pairs are not
# checked)
# Subdir does not have leading or trailing /
# Accordingly, this code does NOT distinguish between
# /dir/name and dir/name
# nor does it distinguish
# /name(args) and name(args)
# Former may be object spec & latter may be method(args);
# anyway, for now, we don't do it!
{
my ($obj_spec) = @_;
my ($b4paren,$aftparen);
my ($node,$subdir,$name,$args);
$obj_spec || return "NG","No name";
($b4paren,$aftparen) = split /\(/,$obj_spec,2;
if ($aftparen) {
($args) = ($aftparen =~ /(.+)\)$/);
$args || return ("NG","Argument portion");
} else {
$args = "";
}
$b4paren || return "NG","No name";
($b4paren eq '/') && return "NG","No name";
($b4paren =~ /^[\w\-\_\/\.\:]+$/) ||
return "NG","Illegal char in node/char/name";
($node_dirs,$name) = ($b4paren =~ m"(.*)/(.*)");
if (defined ($node_dirs)) {
# Slash present
$name || return "NG","No name";
$node_dirs || return "OK","","",$name,$args;
# Next line deals w/specs like //foof. Not sure what diagnostic
# is appropriate
($node_dirs eq '/') && return "NG","No node";
if ($node_dirs =~ m"^//") {
(undef,undef,$node,$subdir) = split /\//,$node_dirs,4;
$node || return "NG","No node";
$subdir || ($subdir = "");
return "OK",$node,$subdir,$name,$args;
} else {
($node_dirs) = ($node_dirs =~ m|^/?(.*)|);
return "OK","",$node_dirs,$name,$args;
}
} else {
# No slash
return "OK","","",$b4paren,$args;
}
}
#################################################
sub rel_filespec
# Assume argument is a relative file spec. It should not have
# a leading slash. Get rid of trailing slash, too
{
my ($string) = @_;
($string =~ m"//") &&
&quit ("File spec $string contains unallowed consecutive slashes");
($tmp) = ($string =~ m"^/?(.*)/?$");
# Deliberately allowing an empty string. Will this be regretted?
return $tmp;
}
#################################################
sub abs_filespec
# Assume argument is an absolute file spec. It must have
# a leading slash. Get rid of any trailing slash
{
my ($string,$string_description) = @_;
my ($tmp);
$string ||
&quit ("$string_description empty/undefined. ",
"Supposed to be an absolute file spec");
($string eq "/") && (return "/");
($string =~ m"//") &&
&quit ("$string_description (value $string) " .
"contains unallowed consecutive slashes");
($tmp) = ($string =~ m"^/(.*)/?$");
$tmp && return "/$tmp";
&quit ("$string_description (value $string) is not an absolute file spec",
"(no leading slash or emptiness where file name expected)");
}
#################################################
sub check_r_access
# Games w/ $! value empirically determined for -r access on synthesis
{
my ($tmp) = $_[0];
my ($msg);
$tmp || &quit("No file name sent to check_r_access for checking");
$! = 0;
if ( ! -r $tmp) {
$msg = ($! == 0) ? "Permission denied" : $!;
&quit ("No read access to $tmp: $msg");
}
}
#################################################
sub check_x_access
# Games w/ $! value empirically determined for -r access on synthesis
# No particular idea if this works for -x or on non-synthesis nodes
# but I guess I think it's worth assuming!
{
my ($tmp) = $_[0];
my ($msg);
$tmp || &quit("No file name sent to check_x_access for checking");
$! = 0;
if ( ! -x $tmp) {
$msg = ($! == 0) ? "Permission denied" : $!;
&quit ("No execute access to $tmp: $msg");
}
}
#################################################
sub get_form_var
{
my ($variable,$required,$check_string) = @_;
my ($list_indicator,$list_separator);
my ($tmp) = $form_info{$variable};
($required =~ /^(REQ|OPT)/) ||
&quit("Internal error - get_form_var dummy \$required does not " .
"begin with REQ or OPT. Its value is $required\n");
# Allow a binary 0 if thing we want from form is a list of values
# (signified by $check_string ending in _LIST)
(defined $check_string) || ($check_string = "STRING");
($check_string,$list_indicator) = ($check_string =~ /(.+?)(_LIST)?$/);
$list_separator = ($list_indicator) ? '\x00' : '';
(defined $tmp) || ($tmp = "");
if ($tmp eq "") {
($required =~ /^REQ/) &&
(&quit("Did not get required variable $variable from form"));
} else {
if ($check_string eq "STRING") {
($tmp =~ /^[\w\-\_\#\.$list_separator]+$/) ||
&bad_form_var($variable,$tmp,"string",'a-zA-Z0-9#._-');
} elsif ($check_string eq "SPACEOK") {
($tmp =~ /^[\w\-\_\#\. $list_separator]+$/) ||
&bad_form_var($variable,$tmp,"string",'a-zA-Z0-9#._- & space');
} elsif ($check_string eq "FILESPEC") {
($tmp =~ /^[\w\-\_\/\.$list_separator]+$/) ||
&bad_form_var($variable,$tmp,"file specification",'a-zA-Z0-9/._-');
} elsif ($check_string eq "OBJSPEC") {
# Note: although accepting chars like & % and |, deliberately leaving
# them out of the "acceptable" string in the error message. Attempt is
# to discourage a hacker who might see the error message from realizing
# that these chars are accepted
($tmp =~ /^[\w\-\_\/\.\:\(\)\,\{\}\=\<\>\%\&\@\| $list_separator]+$/) ||
&bad_form_var($variable,$tmp,"JGOFS object specification",
'a-zA-Z0-9/._-(),:{}=<> & space');
} elsif ($check_string eq "NUMBER") {
($tmp =~ /^[0-9E\+\-\.$list_separator]+$/) ||
&bad_form_var($variable,$tmp,"numeric specification",'0-9E.+-');
} elsif ($check_string ne "NOCHECK") {
&quit("Internal error - passed get_form_var argument " .
"\$check_string is not " .
"STRING, NUMBER, FILESPEC, SPACEOK, OBJSPEC or NOCHECK.",
"Its value is $check_string\n");
}
}
return $tmp;
}
#################################################
sub bad_form_var
{
my ($var,$val,$desc,$legal_string) = @_;
&quit("For form variable $var, ",
"please provide a non-empty $desc built of chars $legal_string",
"string causing problem: $val");
}
#################################################
sub make_dir
{
my ($dir) = $_[0];
if (-e $dir) {
(-d $dir) || &quit ("$dir is not a directory");
(-w $dir) || &quit ("Cannot create files in directory $dir");
} else {
mkdir ($dir,0755)|| &quit("Cannot create directory $dir: $!");
}
return;
}
#################################################
sub printheader
{
if ( ! $print_header_done) {
print &PrintHeader();
$version && (print "\n");
$print_header_done = 1;
}
return;
}
#################################################
sub add_to_file
{
my ($file,$addend) = @_;
(open (APPEND,"$file")) || &quit("Cannot open $file: \$! = $!");
(print APPEND "$addend\n") || &quit("Cannot write $file: \$! = $!");
(close APPEND) || &quit("Problem closing $file: \$! = $!");
return;
}
#################################################
sub quit
{
my ($temp,$errmsg1,$errmsg2);
$open_pre_tag && print "";
$errmsg1 = "";
foreach (@_) {
chomp ($temp = $_);
$errmsg1 .= " *** $temp\n";
}
$unique && ($errmsg1 .= " ... working on job # $unique\n");
$errmsg2 = "This message issued " . localtime() . "\n";
$version && ($errmsg2 .= "$version\n");
&CgiDie(&html_line_breaks($errmsg1),&html_line_breaks($errmsg2));
}
#################################################
sub dump_input_info
{
&dump_environment_variables();
&dump_form_info(@_);
return;
}
#################################################
sub dump_form_info
{
my ($separator_for_multiple_selections,%form_info) = @_;
my ($key,$value,$out_value);
&printheader();
print "
\n";
return;
}
#################################################
sub get_this_file_as_url
# Should use &file_system_to_web_cgi. However,
# there's evidence of trouble w/that routine. Code is fine
# as long as this routine is in top-level jg directory (and as long
# as file system is using /s)
# Other possible curiosities could occur because we assume that in the
# case of, say, multiple links to a file, $0 and SCRIPT_NAME will come
# out w/the same file name, etc. However, seems to me that return from
# this should be usable
{
my ($filespec,$dummy) = @_;
my ($jgdefn,$filename,$dir);
($filespec && ! $dummy) ||
&quit ("Internal error - get_this_file_as_url called w/wrong # args");
# In line below, jgdefn does not end up as a filespec, nor is
# JGSCRIPTDIR a directory. So much for naming things...
$jgdefn = &abs_filespec($ENV{"JGSCRIPTDIR"},"JGSCRIPTDIR env var");
($dir,$filename) = ($filespec =~ /(.*)\/(.+)/);
$filename || $dir || ($filename = $filespec); # No slashes
$filename ||
&quit ("Malformed filespec input to get_this_file_as_url",
"filespec = $filespec");
return "$jgdefn/$filename";
}
sub file_system_to_web_cgi
{
# Idea is to try to allow name & location of this file to change
# Presumably the directory portion of the file contains a string that
# has been defined to the web server as the cgi root. Substitute
# the cgi root name for that string
# Try to compensate for various abnormalities, such as running this
# script online, or the symlink'ing of the directory defined to the
# web server as the cgi root. Compensation consists of assuming that
# the script is in the top level cgi root. Compensation also means
# that if the input args are truly wrong, we will compound the error...
# Both cgi root and its defn are assumed "absolute"; eg, leading off
# w/slash. (Don't care about any trailing slash)
my ($infile,$cgi_root_name,$directory_defined_as_cgi_root) = @_;
my ($out);
$directory_defined_as_cgi_root =
&abs_filespec($directory_defined_as_cgi_root,
"Directory arg to file_system_to_web_cgi");
$cgi_root_name =
&abs_filespec($cgi_root_name,
"cgi root arg to file_system_to_web_cgi");
$out = $infile;
if ($out =~ m"^/") {
$out =~ s/$directory_defined_as_cgi_root/$cgi_root_name/;
if ( ($out eq $infile) &&
($directory_defined_as_cgi_root ne $cgi_root_name) ) {
($out) = ($out =~ m".*/(.+)$");
$out || &quit ("File spec $infile seems to be a directory " .
"(It ends with a slash)" );
$out = "$cgi_root_name/$out";
}
} else {
$out = "$cgi_root_name/$out";
}
return $out;
}
#################################################
sub get_query_string_args
# Argument 1 is file spec of parse_query_string image
# Arg 2 (optional) is a query_string to parse
# See parse_query_string.c for doc
{
my ($parse_query_string,$query_string) = @_;
my ($pqs_output,$separators,$rest,$pqs_sep,$qs_sep,$qs_arg_list);
my ($command,$exit_status,@status);
&check_x_access ($parse_query_string);
(defined $query_string) || ($query_string = "");
$query_string = &shell_protect($query_string);
$command = "$parse_query_string $query_string";
($pqs_output,$exit_status,@status) = &backtick($command);
(defined $exit_status) || &quit ("Internal error - bad call to backtick");
($exit_status == 0) ||
&quit ("parse_query_string problem",
&format_backtick_return_status
($command,$exit_status,@status,$pqs_output)
);
($separators,$rest) = split (/ /,$pqs_output,2);
(defined $rest) || return "";
($pqs_sep,$qs_sep) = split (//,$separators);
$split_string = '\\' . $pqs_sep;
($qs_arg_list) = split (/$split_string/,$rest);
$split_string = '\\' . $qs_sep;
return split (/$split_string/,$qs_arg_list);
}
#################################################
sub
val_from_embedded_key_val_pair
{
# NB: THIS ROUTINE MUST NOT USE OTHER wjs_web*.pl PROGRAMS. This routine operates before
# "the environment" is set up by build-opt-env.pl
# Input is keyword, string and separator-within-string character
# Program searches for keyword= . If found, program returns, as the 2nd return arg, characters between
# = and next separator-within-string character. An empty string is NOT returned; ie, the condition
# where the = is immediately followed by the separator is considered an error.
# User caveat: quotation marks, parentheses, etc are a problem for the caller of this routine.
# Consider using get_query_string_args ("does" parens but deemed too clumsy to do work of this routine)
# Returns 2 or 3 vals. First is a status, "NG" or "OK".
# If OK, 2nd return val is value string for keyword as described above
# If OK, 3rd return val is string with keyword=value removed. If appropriate, a
# separator-within-string character is also removed. Note that empty string return is valid
# If NG, 2nd return val is "NOTFOUND" if keyword was not in input string.
# Otherwise 2nd return val is some other amplification of the problem
my ($keyword,$in_string,$sep,$dummy) = @_;
(defined $dummy) &&
return "NG","Internal error: too many args passed to val_from_embedded_key_val_pair";
my ($val_string,$remainder,$tmp,$new_string);
my ($qm_keyword) = quotemeta($keyword);
my ($qm_sep) = quotemeta($sep);
my ($part1,$part2,$part3) = split (/$qm_keyword/,$in_string);
$part3 && return "NG", "$keyword specified more than once in input $in_string";
if ($part2) {
($tmp,$remainder) = split (/$qm_sep/,$part2,2);
$tmp || return "NG","Found keyword $keyword but could not get value from -->$part2<--";
($val_string) = ($tmp =~ /\s*=\s*(.+)/);
$val_string || return "NG","Found keyword $keyword but could not get value from -->$tmp<--";
defined ($part1) || ($part1 = "");
defined ($remainder) || ($remainder = "");
# Note that it is NOT necessary to put in a separator between $part1 & $remainder because
# there is one at the end of $part1 (assuming $part1 non-empty)
$new_string = $part1 . $remainder;
} else {
return "NG","NOTFOUND";
}
return "OK",$val_string,$new_string;
}
#################################################
sub shell_protect
# Try to put a backslash in front of everything "the shell"
# will treat as a special character. Sigh
{
my ($input) = $_[0];
my ($shell_chars) = ' "`;{}&|()\\\''; # Add more when needed...
my ($output);
$output = "";
foreach (split(//,$input)) {
/[$shell_chars]/ && ($output .= '\\');
$output .= $_;
}
return $output;
}
#################################################
sub replace_special_char
# Idea is that there is a string with an offensive special character
# which you would like replaced with another. Original problem was
# to make a filename out of an object spec, which meant the slashes were
# an issue. Not clear if something else like, say, trigramming, would
# do the job just as well if not better ...
# Routine requires 2 args. Arg 1 = char to be replaced. Arg 2 =
# string containing arg 1. Arg 1 could probably be a string if one is
# careful - this routine does not check how long it is.
# Routine returns 2 or 3 values. 1st value is a status. If "OK",
# 2nd value is replacement string, and 3rd value is char that was used
# for replacement. If status not "OK", it is the name of a problem, and
# the 2nd value is a text message describing the problem
{
my ($char_to_replace,$input,$dummy) = @_;
my ($replacement_candidates,$replacement_char,$string_with_replacement);
my ($quoted_replacement_char,$quoted_char_to_replace);
((defined $char_to_replace) && (defined $input) && ( ! defined $dummy)) ||
return "NEEDEXACTLYTWOARGS","Need exactly 2 args";
# Altering order of replacement candidates will hurt callers who expect
# strings generated by this routine to match strings generated by this
# routine "a while ago", so don't do it. Order below is approx a guess
# at chars that won't cause trouble if they get passed to shell, etc
# which is why | ; etc are not on list. If needed, add them on end...
# and run output of this routine through shell_protect!
$replacement_candidates = '#^@$&+=:?/';
$quoted_char_to_replace = quotemeta($char_to_replace);
foreach ( split (//,$replacement_candidates) ) {
($_ eq $char_to_replace) && next;
$replacement_char = $_;
$quoted_replacement_char = quotemeta($replacement_char);
($input =~ /$quoted_replacement_char/) && next;
$string_with_replacement = $input;
$string_with_replacement =~
s($quoted_char_to_replace)($replacement_char)g;
last;
}
(defined $string_with_replacement) &&
(return "OK",$string_with_replacement,$replacement_char);
return "ALLREPLCHARSFAILED",
"All replacement chars for $char_to_replace occur in input string";
}
#################################################
sub let_serv_do_it
{
my ($serv,$path_info,$query_string,$output_redirect) = @_;
my ($command);
$path_info || return "NO_PATH_INFO";
&check_x_access($serv);
$command = "PATH_INFO=" . &shell_protect($path_info) . ";export PATH_INFO;";
$query_string &&
($command .= "QUERY_STRING=" . &shell_protect($query_string)
. ";export QUERY_STRING;");
$command .= $serv;
$output_redirect && ($command .= " > $output_redirect");
$! = $? = 0;
system ($command);
(($! == 0) && ($? == 0)) || return "SERV_FAILURE",$!,$?,$command;
return "OK";
}
#################################################
sub hex_dump_html
{
my ($label,$string,$handle,$line_length_arg) = @_;
my ($default_output_handle) = ("STDOUT");
my ($stat);
$handle || ($handle = $default_output_handle);
$! = 0;
(print {$handle} "
\n") || return $!;
return $stat;
}
#################################################
sub hex_dump
# Arg 1 - name or other label for string being dumped
# 2 - string to be dumped (NB: valid binary #s will be dumped as strings, not #s
# eg; 255 gets a 3 character 32 35 35 dump, not a
# 1 char FF dump. This is perl's "pick the correct
# integer/string context")
# 3 - file handle on which to dump. Defaults to STDOUT
# 4 - max length of a dumped line. Defaults to 132. If negative, represents
# number of dumped characters on a line (results in line length of
# (length of arg 1) + 3 - 3*(arg 4) ... approx, and if arg 1 is not too long)
# Returns "OK"
# "NOSTRING" for empty arg 2. Also prints msg on arg 3
# $! if print returns 0. Don't know what gets printed on arg 3
{
my ($label,$string,$handle,$line_length_arg) = @_;
my ($default_line_length,$default_output_handle) = (132,"STDOUT");
my ($max_len_label,$default_len_per_line_prefix) = (16,4);
# 3 is a magic constant in this routine, representing a unit of 2 hex chars plus
# a blank. To parametrize it, we'd have to insert variable spacing in formats, etc
my ($iline,$nlines,$last_char,$first_char,@chars);
my ($min_line_length,$short_label,$indentation);
my ($visible,$len_string,$max_dumped_chars_per_line);
$handle || ($handle = $default_output_handle);
$label || ($label = "[dumped string]");
$line_length_arg || ($line_length_arg = $default_line_length);
$! = 0;
if ( ! $string) {
(print {$handle} $label," = [no string to dump]\n") || return $!;
return "NOSTRING";
}
# Fool w/indentation. If label is "long", indent less
$short_label = (length($label) <= $max_len_label);
if ($short_label) {
$label .= " = ";
$indentation = length($label);
} else {
$indentation = $default_len_per_line_prefix;
$label .= "\n" . ' ' x $indentation;
}
$min_line_length = $indentation + 3;
if ($line_length_arg < 0) {
$max_dumped_chars_per_line = -$line_length_arg;
} else {
# Override user-supplied line length if it's too small. The error logic to
# do it exactly, backing out our indentation, etc, if necessary, is convoluted
# and I think the total length involved is < 10 bytes
($line_length_arg < $min_line_length) && ($line_length_arg = $min_line_length);
$max_dumped_chars_per_line = ($line_length_arg - $indentation)/3;
}
$len_string = length($string);
$nlines = int(($len_string-1)/$max_dumped_chars_per_line) + 1;
$first_char = $last_char = 0;
@chars = split //,$string;
foreach $iline (0..$nlines-1) {
$last_char += $max_dumped_chars_per_line;
($last_char > $len_string) && ($last_char = $len_string);
(print {$handle} ($iline == 0) ? $label : ' ' x $indentation) || return $!;
foreach ($first_char..$last_char-1) {
if ($chars[$_] =~
/[\w\~\`\!\@\#\$\%\^\&\*\(\)\-\_\=\+\[\]\{\}\;\:\'\"\\\|\<\>\,\.\/\?]/
) {
(print {$handle} "$chars[$_] ") || return $!;
} elsif ($chars[$_] eq ' ') {
(print {$handle} " ") || return $!;
} elsif ($chars[$_] eq '\t') {
(print {$handle} "\\t ") || return $!;
} elsif ($chars[$_] =~ /[\s]/) {
(print {$handle} "\\s ") || return $!;
} else {
(print {$handle} "** ") || return $!;
}
}
(print {$handle} "\n") || return $!;
(print {$handle} ' ' x $indentation) || return $!;
foreach ($first_char..$last_char-1) {
(printf {$handle} "%x ",ord($chars[$_])) || return $!;
}
(print {$handle} "\n") || return $!;
(print {$handle} "\n") || return $!;
$first_char += $max_dumped_chars_per_line;
}
return "OK";
}
#################################################
sub add_file_to_path_string
# First arg is name of env var containing path
# Second arg is file we want in the path.
# All the rest of this stuff deals w/the possibility that "relevant
# things" are in the path before we get here. If so, this routine
# preserves the order and repetitions of the existing path.
# Rest of args (optional) is a list of library directories that might
# conflict w/arg we want to add (eg, we want to add matlab5 library
# but matlab6 library might be in path). Irrelevant if what we
# want to add is in "conflict list". Each such conflicting directory
# is replaced with the directory we want.
# Returns 3 element list.
# 1) "OK"/"NG". OK means file exists, is readable,
# and is in path (whether we fooled w/path or not)
# We don't alter the path if the file is not
# accessible
# 2) "NO_PATH_NAME"
# "NOTHING_TO_ADD"
# Calling error - one or both of 1st 2 args missing
# "FILE_DOES_NOT_EXIST"
# "FILE_CANNOT_BE_READ"
# Refers to arg 2
# "WOULD_MAKE_PATH"
# "WOULD_ADD_TO_PATH"
# "WOULD_MODIFY_PATH"
# "WOULD_NOT_AFFECT_PATH"
# describing how this routine would affect the path.
# WOULD_MODIFY_PATH means that we found elements of
# of the "conflict list" in the path, possibly
# in addition to the file we wanted.
# 3) Desired path string as calculated by this routine
# Calling routine must actually place this string in environment
#
# NOTE: Path separator character (colon), if found in input env var, is
# considered the separator of path elements. This routine does not look
# at quotation marks, etc that might indicate character is NOT a
# separator
{
my ($path_name,$file_to_add,@conflict_list) = @_;
my ($path_sep) = (':');
my ($nsubs,$already_in_path,$return2);
$path_name || return "NG","NO_PATH_NAME";
$file_to_add || return "NG","NOTHING_TO_ADD";
(-e $file_to_add) || return "NG","FILE_DOES_NOT_EXIST";
(-r $file_to_add) || return "NG","FILE_CANNOT_BE_READ";
$return2 = "WOULD_NOT_AFFECT_PATH";
$path = defined ($ENV{$path_name}) ? $ENV{$path_name} : "";
# Add $file_to_add to $path_name, replacing any
# others provided in $conflict_list. Keeping order would help in the
# unlikely situation that the file already in the path
# works only in that position in the path
if ($path) {
$path .= $path_sep;
$file_to_add .= $path_sep;
$already_in_path = ($path =~ /$file_to_add/);
$nsubs = 0;
foreach (@conflict_list) {
$_ .= $path_sep;
($_ eq $file_to_add) || ( $nsubs += ($path =~ s/$_/$file_to_add/g) );
}
( ! $already_in_path) && ($nsubs == 0)
&& ($path .= $file_to_add)
&& ($return2 = "WOULD_ADD_TO_PATH");
($nsubs == 0) || ($return2 = "WOULD_MODIFY_PATH");
chop $path;
} else {
$path = $file_to_add;
$return2 = "WOULD_MAKE_PATH";
}
return "OK",$return2,$path;
}
##################
sub do_shell_command
# backtick.pl enhanced, so this is now a "skeleton" calling backtick -
# better to use the backtick.pl entries than to use this
# Too bad that download-4 wasn't only place do_shell_command was used -
# it is used in this file!! In any case, cannot use backtick when
# command must write to stdout, so do_shell_command must stay ... but DO
# deprecate its use
# In Jun 12, read comments in download-4 that said do_shell_command
# had been removed from there in 2011. Accordingly comment it out here
# As of Nov 10, used only in download-4. Deprecate this in favor
# of backtick.pl - among other things, the ESPIPE stuff is wrong
{
my ($executable,$command_including_executable,$pipeerr_OK) = @_;
my ($return,$ok,@status,$command_to_backtick,$child_exit_status);
($executable eq "NO_X_ACCESS_CHECK") || &check_x_access($executable);
($command_to_backtick) = ($command_including_executable =~ /^`(.+)`$/);
if ($command_to_backtick) {
($return,$child_exit_status,@status) = &backtick($command_to_backtick);
} else {
($return,$child_exit_status,@status) =
&execute_command($command_including_executable);
}
(defined $return) ||
&quit ("Internal error calling backtick routines from do_shell_command");
($child_exit_status == 0) && (return $return);
&quit (&format_backtick_return_status
($command_including_executable,$child_exit_status,@status)
);
}
##################
# Email below "officially" out of date as of Jun 2012
# The perl eof function can be used to distinguish between
# a read that returns undefined because of eof and a read that
# returns undefined because of error. Supposedly in the latter
# case, $! correctly IDs the error
#From: WJS::SYSTEM_WJS1 "Warren J. Sass" 17-APR-2004 21:48:58.68
# [edited to change routine names. WJS 23 Mar 05]
#To: rgroman
#CC: cchandler@whoi.edu,wsass
#Subj: Using perl to read "JGOFS stuff" from pipes
# I guess you have been opening perl pipes to list for quite
#a bit longer than I have, but my recent tanglings with the issue
#have most recently led me to write a routine to do this. If you
#want more details about the routine, let me know and I can do a better
#write-up. The point of this email is to broadcast the issues I've
#found.
# The main problem is that a pipe read failure can be associated
#with a failure of the child process at the other end of the pipe. To
#get that info requires that the pipe be closed. The act of closing
#the pipe is itself liable to error in the parent process. Accordingly,
#when the perl <> operator returns "undefined", there are 3 statuses
#of interest: the status of the read, the status of the close, and the
#exit status of the child process.
# A sub-issue of this is the infernal habit of "something" (perl
#library? OS?) on globec to return an ESPIPE condition as the status
#of a close.
# A 2nd sub-issue is that it's nice to separate closes that happen
#following EOF from closes that do not. The former should have a "normal"
#child exit status, while the latter might well have a status of EPIPE
#(classic example here: read a few records from pipe to list, then
#close pipe)
#
# Another issue is that, as far as I can tell, perl does not
#distinguish between read error and EOF. Presumably both cases result
#in an "undefined" return from <>. I have experimentally verified that
#$! at this point is unaltered from what it was before the <>. In
#particular it is NOT set to "EOF" or some such. Presumably in the case
#of a real read error (eg, bad disk block), $! WOULD be set. However,
#to figure this out, one must know the value of $! before the <>.
#
# Because of these issues, it seems that every time <> returns
#"undefined" one must explicitly close the filehandle and execute
#the equivalent of the code at bottom. Since every program encounters
#this condition, I felt I should try to write something common! While
#I was at it, I thought I could try to handle the application-level &x
#errors. (Note: list 1.5 (Oct 03) allows redirection of application
#-level &x info. This routine will NOT handle that circumstance. See
#get_JGOFS_line in JGOFS_SQL package for mod where destination of
#error info are passed as an argument to preserve the error-handling
#capabilities of this routine. WJS Nov 06)
#
#Reference summary:
# Input: file handle, already open to the pipe
# Output: status summary string, data record, status array;
# where status array consists of
# $! of read
# $! of close
# $? of child process
# JGOFS error text
# Output side effect: file handle is closed unless I/O was "normal"
# Value of status summary is one of the strings
# OK, EOF, JGOFS_ERR, READ_ERR, CLOSE_ERR
# (*ERRs reported in that order if > 1 occurs)
# JGOFS error text consists of "all" (I put in a sanity check)
# the lines from the one starting with &x to EOF
# $! will be 0 if <> did not result in its modification (special,
# presumably impossible, case: defined data +
# altered $!. Action: READ_ERR, non-0 $!,
# JGOFS error text = "Unknown error [+more]")
#
#Sample metacode:
# open ($FH,"$command |") or die...
# ($status,$line_from_FH,@err_array) = &get_JGOFS_record($FH);
# while ($status eq "OK") {
# # process $line_from_FH
# ($status,$line_from_FH,@err_array) = &get_JGOFS_record($FH);
# }
# unless ($status eq "EOF")
# &format_get_JGOFS_record_return_status($command,@err_array)
#################################################
sub format_get_JGOFS_record_return_status
{
my ($command,$io_status,$close_status,$exit_status,$message) = @_;
my ($return_val);
$return_val = " *** Problem with I/O on pipe to $command\n";
($message eq "") ||
($return_val .= " *** Non-operating-system info follows:\n$message");
($io_status == 0) ||
($return_val .= " *** Abnormal final I/O status: $io_status\n");
($close_status == 0) ||
($return_val .= " *** Abnormal status of pipe close: $close_status\n");
($exit_status == 0) ||
($return_val .= " *** Abnormal exit status from child process: $exit_status\n");
return $return_val;
}
sub get_JGOFS_record
{
my ($FH) = @_;
my ($rec,$err_rec_sanity_count,$status,$read_bang_status,$message);
my ($err_rec_max) = 100;
my ($JGOFS_err_indicator) = "&x";
$message = "";
$status = "EOF";
$read_bang_status = 0; # For completeness - will not be
# returned for normal JGOFS I/O; will
# be set below for abnormal. We would
# LIKE to return an EOF status, but there
# does not seem to be an errno entry for that
while ( ! eof($FH) ) {
if (defined ($rec = <$FH>) ) {
($rec =~ /^[\* ]*$JGOFS_err_indicator/) || return "OK",$rec;
$status = "JGOFS_ERR";
$message = $rec;
$rec = "";
$err_rec_sanity_count = 0;
$read_bang_status = 0; # No error, since <> was defined
while ( ! eof ($FH) ) {
if (defined ($_ = <$FH>)) {
if ($err_rec_sanity_count++ > $err_rec_max) {
$message .= " *** Ignoring msgs after message #$err_rec_max";
last;
}
$message .= $_;
} else {
$read_bang_status = $!; # Error, since <> undefined and no EOF
last;
}
}
} else {
$read_bang_status = $!; # Error, since <> undefined and no EOF
$status = "READ_ERR";
$rec = "";
last;
}
}
# Coding below from my interpretation of a perldoc.org online article
# Jun 2012. Main idea is that the T/F close return needs to be tested
# in order to determine the validity of the $! setting ...
# close returns F if there was a local close problem or if
# the other end of a pipe died. According to article, if the latter,
# $! == 0 (and presumably $? != 0). I assume that in the non-pipe case,
# $! will be set.
# It is not clear what happens if we close after a read error during
# the main loop. We try to report READ_ERR rather than CLOSE_ERR if,
# in fact, close-after-error produces a close error.
# We return "the works" w/the idea that the formatting routine
# (format_get_JGOFS_record_return_status; above) sorts out all the
# choices. Modify that routine if/when needed - it is my present
# belief (2012) that the interpretations there are correct despite
# the coding changes here.
$! = $? = 0;
close ($FH) || ($status ne "EOF") || ($status = "CLOSE_ERR");
return $status,$rec,$read_bang_status,$!,$?,$message;
}
#################################################
sub print_choose_memory_algorithm
{
my ($form_var_name) = @_;
print "
Memory algorithm choice
\n";
print "" .
"Applicable only to variables whose values are non-numeric" .
" \n";
# 0 means "Use width= attribute vals" (see makemat.c for technique)
# Any other number is minimized with OOserver "system max" value
# This implementation just offers users min or max - can be altered to
# allow middle ground if a) desired b) a way can be determined to
# allow user to choose meaningfully; ie, if user doesn't know width=
# attributes or system max, tough to pick a number
print "
";
return;
}
#################################################
sub print_select_alpha_variables
{
my ($list_program,$obj,$form_var_name,$missings_are_alphas) = @_;
my ($command);
my ($pid,$save_PATH_INFO,$save_bang,$save_ques);
my ($line,@varlist,@datalist,$varname);
my ($status,@err_msg,$strip);
my ($select_count);
my (%attr_hash,%alpha_var);
# Users of the print_select_alpha_variables form generally will not
# really know what they should select until after the fact. Try to
# assist our default value-in-first-record algorithm in as general a
# way as possible. Idea is that following script, if present, will
# contain perl code with an entry called numeric_alpha_opinion
# This entry will be called with a varname and will return an opinion
# and an authoritative value (eg, can user override, etc)
# As of Nov 10, this idea never implemented. In Nov 10, implemented
# recognition of a variable attribute called datatype, which can have
# the values "alpha" or "numeric".
my ($optional_advisory_script) = "numeric_alpha_advice.pl";
my ($JGOFS_missing) = "nd";
# Chose tab here since guarantees no conflict w/embedded separator
# Coordinate this char w/list option (in this case -t) but don't blame
# me if changing this causes trouble. "Extra" \ so 2-char \t is
# sent to perl split (wonder if binary tab; eg, w/o extra \, would work...)
my ($list_split_char) = "\\t";
# Get list of variables in object statisticker will work on. Get 1st value
# of each variable and see if it's numeric. Do this by taking 1st 2 lines
# of flat, nocomment list of object. Offer menu of variables to
# be compared alphabetically, defaulting per the numeric test. If problems
# getting this info, just don't offer the menu. Drop warning into server
# error log (I hope). Someone may see it some day... (Actually, error
# appears pretty dramatically in front of user's eyes - Feb 10)
&check_x_access($list_program);
$command = $list_program .
" -f -c -t -z -l -a -nopipeerr -forceheader -errout /dev/stderr " .
"\"" . $obj . "\"";
# Clean out PATH_INFO to avoid problems w/outer used by $obj
(defined $ENV{"PATH_INFO"}) && ($save_PATH_INFO = $ENV{"PATH_INFO"});
$ENV{"PATH_INFO"} = "";
# Ensure that the -a option for list is honored no matter how list was built
# Requires list 1.8b. No point in saving a previous value of the env var - all
# OOserver work "prefers" that -a work, and I don't think this file is used in a
# non-OOserver environment; esp used by "not me"
$ENV{"INCLUDE_ATTRIBUTES_SWITCH"} = "TRUE";
$! = $? = 0;
$pid = open (IN, "$command |");
$save_bang = $!;
$save_ques = $?;
if (defined ($save_PATH_INFO)) {
$ENV{"PATH_INFO"} = $save_PATH_INFO;
undef $save_PATH_INFO;
} else {
delete $ENV{"PATH_INFO"};
}
( ($pid !=0 ) && ($save_ques == 0) ) ||
&quit ("Problem opening pipe to command '" . $command .
"'\n\$! = $save_bang; \$? = $save_ques\n");
($status,$line,@err_msg) = &get_JGOFS_record(IN);
($status eq "OK") ||
&quit ("Could not get variable list",
&format_get_JGOFS_record_return_status($command,@err_msg));
(defined $line) || &quit("Did not get variable list (but list succeeded!)");
chomp $line;
(@varlist = split (/[$list_split_char]+/,$line)) ||
&quit ("No variables returned from '" . $command . "'\n");
# vars have appended non-width attrs (-a but not also -aw switches on list)
# Parse off attr list, noting those vars w/ defined datatypes
foreach (0..$#varlist) {
($status,$varname,%attr_hash) = &get_hash_of_attrs($varlist[$_]);
($status eq "OK") || ($status eq "AMBIGUOUS") ||
&quit ("Problem w/attrib list for a variable in $obj\n",
"More info follows: $varname");
if (defined $attr_hash{"datatype"}) {
($attr_hash{"datatype"} eq "alpha") && ($alpha_var{$_} = 1);
($attr_hash{"datatype"} eq "numeric") && ($alpha_var{$_} = 0);
(defined $alpha_var{$_}) ||
&quit ("Illegal value for attribute datatype",
"Legal values are the strings alpha and numeric",
"Problem varname/attr list: $varlist[$_]",
"from object spec $obj");
}
$varname ||
&quit ("Problem w/varname[attrib-list] $varlist[$_]\n" .
"Illegal character in varname? (legal = letter,digits,underscore)");
# list output may have formatting blank space in it
$strip = &whitespace_strip($varname);
($strip eq "") && &quit ("varname all whitespace");
($strip =~ /\s/) &&
&quit ("Embedded white space in varname","varname = $varlist[$_]");
$varlist[$_] = $strip;
}
($status,$line,@err_msg) = &get_JGOFS_record(IN);
($status eq "EOF") &&
&quit ("Sorry, no data in this object as presently selected",
"Data request via command '" . $command . "'\n");
($status eq "OK") || &quit ("Could not get 1st data record",
&format_get_JGOFS_record_return_status($command,@err_msg));
# Problems at this point could be due to bad selection list
# Should not get here w/defined line - should have exited via status="EOF"
# However extra tests are cheap.
(defined $line) || &quit ("No data returned from '" . $command . "'\n");
chomp $line;
(@datalist = split (/[$list_split_char]+/,$line)) ||
&quit ("No data returned from '" . $command . "'\n");
close IN;
($#datalist == $#varlist) ||
&quit("Length mismatch between varlist and data\n",
"varlist = " . join("|",@varlist) . "\n",
"data = " . join("|",@datalist) . "\n");
print "
Select the variables whose data are to be considered " .
"non-numeric
\n";
print "";
return;
}
#################################################
sub get_cached_varlist
# Args: 1) listvar command w/switches but not object
# 2) directory where cache files are
# 3) object name
# 4) TRUE means refresh cache from net - don't use any existing cache
# Returns: TRUE/FALSE reflecting whether or not cache was used, followed by
# varlist in def format. Getting it in def format because
# that routine is already written!
# Does NOT cache varlists for objects w/sel/proj lists. Regenerates
# those each time. Esp important for "passthrough" objects like
# join, where "sel/proj lists" aren't just sel's & proj's
# Dies on error.
{
my ($listvar_w_switches,$cache_dir,$object,$force_refresh,$dummy) = @_;
my ($varlist_file,$use_cached_file,$status,$object_without_slashes);
my ($listvar,$obj_name,$suffix,@def_varlist);
my ($switch_list,$switch_addend,@switches);
((defined $object) && (! defined $dummy)) ||
&quit ("Internal error: get_cached_varlist not called w/4 args");
@switches = ();
($listvar,@switches) = split ' ',$listvar_w_switches;
$switch_list = (@switches == 0) ? "" : join " ",@switches;
$switch_addend = (@switches == 0) ? "" : '_' . join "",sort(@switches);
($obj_name,$sel_proj) = split (/\(/,$object);
if ($sel_proj) {
$suffix = "_w_sel_proj_list";
$force_refresh = 1;
} else {
$suffix = "";
$obj_name = $object;
}
($status,$object_without_slashes) = &replace_special_char('/',$obj_name);
($status eq "OK") ||
&quit("Internal error trying to replace slashes in object spec $object",
"Problem: $object_without_slashes");
$varlist_file = $cache_dir . "/" . $object_without_slashes .
$switch_addend . $suffix . ".varlist";
# 1 in next line sets cache to expire in 1 day
$use_cached_file = (-e $varlist_file) && ((-M $varlist_file) < 1) &&
( ! -z $varlist_file) && ( ! $force_refresh);
if ($use_cached_file) {
(open (VARLIST_IN,"< $varlist_file")) ||
&quit ("Cannot open $varlist_file for read. \$! = $!");
@def_varlist = ;
close VARLIST_IN || &quit ("Cannot close $varlist_file. \$! = $!");
} else {
# DO NOT put the active sub-selections on the listvar command
@def_varlist = &listvar_in_def_format($listvar,$object,$switch_list);
(open (VARLIST_OUT,"> $varlist_file")) ||
&quit ("Cannot open $varlist_file for write. \$! = $!");
foreach (@def_varlist) {
(print VARLIST_OUT "$_\n") ||
&quit ("Cannot write $varlist_file. \$! = $!");
}
close VARLIST_OUT ||
&quit ("Cannot close $varlist_file. \$! = $!");
}
return $use_cached_file,@def_varlist;
}
#################################################
sub listvar_in_def_format
{
my ($listvar_program,$obj,$listvar_switches) = @_;
my ($command);
my ($save_PATH_INFO,$save_bang,$save_ques);
my ($line,@varlist);
my ($status,@err_msg);
my ($varname,$varname_spec,$attr);
my ($new_level,$rec,$level,$varline,$open_OK);
my ($open_delim,$close_delim,$separator);
# Next is a function of how listvar is coded. It's not parametrized
# there, so there isn't even a common .h file we could use if we ever
# tried to export .h files into perl
my ($listvar_level_varname_separator) = ',';
(defined $listvar_switches) || ($listvar_switches = "");
&check_x_access($listvar_program);
# This coding assumes that $listvar_switches does not substantively
# change the format of listvar -l output and that multiple -l switches
# are acceptable to listvar. So far, so good (Sep 10)
$command = qq|$listvar_program -l $listvar_switches "$obj"|;
# Clean out PATH_INFO to avoid problems w/outer used by $obj
(defined $ENV{"PATH_INFO"}) && ($save_PATH_INFO = $ENV{"PATH_INFO"});
$ENV{"PATH_INFO"} = "";
$! = $? = 0;
$open_OK = open (IN, "$command |");
$save_bang = $!;
$save_ques = $?;
if (defined ($save_PATH_INFO)) {
$ENV{"PATH_INFO"} = $save_PATH_INFO;
undef $save_PATH_INFO;
} else {
delete $ENV{"PATH_INFO"};
}
$open_OK || &quit ("Problem with command '" . $command .
"'\n\$! = $save_bang; \$? = $save_ques\n");
$level = 0;
$varline = "";
($status,$line,@err_msg) = &get_JGOFS_record(IN);
$rec = 0;
while ($status eq "OK") {
$rec++;
# Code that follows must be coordinated w/changes in listvar
$separator = quotemeta($listvar_level_varname_separator);
($new_level,$varname_spec) = split /$separator/,$line,2;
(defined($new_level) && $varname_spec) ||
&quit ("Internal problem. Did not get expected listvar format " .
"in listvar record $rec\n\t$line\nProduced by command",
$command);
$new_level = &whitespace_strip($new_level);
$varname_spec = &whitespace_strip($varname_spec);
( ($new_level =~ /^(\d+)$/) && ($varname_spec ne "") ) ||
&quit ("Internal problem. Did not get expected listvar format " .
"in listvar record $rec\n\t$line\nProduced by command",
$command);
########Attr stuff here
if ($new_level == $level) {
# Use of tab here dictated by def's acceptance of \t as separator
$varline .= "$varname_spec\t";
} else {
# Use of > here is again what def expects
# Deliberately leaving in trailing tab.
$varline .= '>';
push @varlist,$varline;
$level = $new_level;
$varline = "$varname_spec\t";
}
($status,$line,@err_msg) = &get_JGOFS_record(IN);
}
($status eq "EOF") ||
&quit ("Problem w/ data record #" . ++$rec,
&format_get_JGOFS_record_return_status($command,@err_msg));
close IN;
# $varline has trailing tab - should be OK
return @varlist,$varline;
}
#################################################
sub valid_number
{
# See if a string is a valid number. WJS Apr 99
# (mod Jul 05 to pre-test for most likely strings, on hypothesis
# that string test is quicker than exception testing. WJS)
# (mod Feb 17 to make quick numeric test report numbers leading with + or -. WJS)
# Idea is to turn warnings on, force a numeric calculation, trap
# any resulting warning message, and see if it's appropriate.
# Because it's only a warning, eval does not set $@ as it does for
# worse errors. Therefore, the fooling with signals...
# Of course this breaks if the message changes. Much better would
# be to have a perl-callable strtod function...
# The perl manual says that numbers match /[+-]\d*\.?\d*E[+-]\d+/
# (when it was talking about library module BigFloat). However, that
# description clearly doesn't reflect the optional portions of numbers...
my ($test_item) = @_;
my ($number);
((defined $test_item) && ($test_item ne "")) || return 0;
# Quick test - numbers
($number) = ($test_item =~ /^\s*([+-]?\d*\.?\d*)\s*$/);
$number && ($number ne '.') && return 1;
# Quick test - strings. Will incorrectly reject non-decimal radix if such strings
# can be represented without quoting characters. Will correctly reject NaN
# and Inf, but more rigorous test for those later in case we pull this quick test
# Also, next test will incorrectly accept -Inf (more rigorous test, blah blah)
($test_item =~ /^[A-Za-z]*$/) && return 0;
local ($numeric_flag) = 1;
my ($old_val_warn) = $^W;
$^W = 1; # Turn on warnings
# Used to have sub test $_[0] (the warning message) to see if it was
# an "Argument .* not numeric" message. Now think that if the eval gets
# any kind of warning, there must be a problem with the putative number,
# so just decide it's not a number. Presumably if there were
# a numeric warning (overflow? is this a fatal?), the this technique
# would be incorrect. If we know that numeric warnings have their
# own signal, presumably we could trap that, too (and we'd get it before
# __WARN__ or __&quit__?)
local $SIG{__WARN__} = sub { $numeric_flag = 0; };
eval '$test_item + 1'; # Anything that does arithmetic
$old_val_warn || ($^W = 0); # Reset warnings if appropriate
$SIG{__WARN__} = 'DEFAULT'; # Return signal to normal behavior
# NaN test
$numeric_flag && ($test_item != $test_item) && ($numeric_flag = 0);
# Inf test
$numeric_flag && ($test_item == $test_item+1) && ($numeric_flag = 0);
# -Inf test
$numeric_flag && ($test_item == $test_item-1) && ($numeric_flag = 0);
return $numeric_flag;
}
sub get_hash_of_attrs
{
# Input: string that is either
# varname[attr_list]
# [attr_list]
# attr_list
# Output:
# status
# status_text or varname
# list w/alternating keys & values. May be undefined (if no attrs)
# Keys are stripped of leading and trailing whitespace. Keys
# will be non-empty strings of letters, numbers and underscores
# with at least 1 non-number
# If attr has value but no key, the key is its position in the
# attrlist (1 is the first list element)
# Values may be empty and may consist of strings of whitespace
# (unless passed to this routine in 3rd format above (ie; w/o []s)
#
# Statuses:
# OK
# Returns varname in status_text field. May be empty string
# AMBIGUOUS
# Input was a string such as "date", which could be
# a variable w/o an attr list, or a 1-element attr list
# Returns varname in status_text field. May be empty string
# AMBIGUOUS is only an issue if the caller does not know
# the format of what is being sent to get_hash_of_attrs.
# If the get_hash_of_attrs input is known to include the
# varname, then a return of AMBIGUOUS means "empty attribute list"
# If the input contains []s (with or without varname) AMBIGUOUS
# will not be the return status. If the input is attr-list
# only (ie, known NOT to include the varname), then AMBIGUOUS
# means "single-element attribute list"
# NULLINPUT
# Input was empty or all whitespace
# BADFORMAT
# Status string has more detail
# BADKEY
# Key portion of key=value is not a non-empty string
# consisting of letters, numbers, and underscores with at
# least 1 non-number
# This routine could, but does not, validate varnames with respect
# to things like embedded blanks, embedded special chars, etc
#
my ($input,$dummy) = @_;
((defined $input) && ! (defined $dummy)) ||
&quit ("Internal error: get_hash_of_attrs not called w/1 arg");
# Next is from core.h ATTRIB_SEP
my ($attrib_sep) = ';';
# Next is from core.h ATTR_DELIM
my ($attr_delim) = '[]';
# Next is NOT from core.h. Attributes can be any string.
# Made the obvious decision that an = is special when we did width=
# Formally extended the idea now (Oct 10) to allow for datatype=,
# but this is well after core.h, etc
my ($key_value_sep) = '=';
# Note that the legal chars in a key (letters, numbers, _) are NOT
# in core.h, nor do I feel like parametrizing it here and
# dealing with embedded \s, etc. Oh well, oh well...
my ($varname,$attr_list,$explicit_attr_list,@attr);
my ($count,@attr_pairs,$key,$value);
my ($open_delim,$close_delim);
my ($qm_open_delim,$qm_close_delim,$qm_attrib_sep,$qm_key_value_sep);
my ($dummy2);
$qm_attrib_sep = quotemeta($attrib_sep);
$qm_key_value_sep = quotemeta($key_value_sep);
# Too tired to test that $*delim is 100% legit
($open_delim,$close_delim) = split //,$attr_delim;
$qm_open_delim = quotemeta($open_delim);
$qm_close_delim = quotemeta($close_delim);
$input = &whitespace_strip($input);
$input || return "NULLINPUT","No nonblanks in string";
($varname,$attr_list,$dummy) = split /$qm_open_delim/,$input;
(defined $dummy) && return "BADFORMAT","More than 1 $open_delim";
if ($explicit_attr_list = (defined $attr_list)) {
($attr_list,$dummy,$dummy2) = split /$qm_close_delim/,$attr_list;
(defined $dummy) || return "BADFORMAT","Missing $close_delim";
(defined $dummy2) && return "BADFORMAT","Extra $close_delim";
($dummy eq "") || return "BADFORMAT","Non-whitespace after $close_delim";
($varname eq &whitespace_strip($varname)) ||
return "BADFORMAT","Whitespace between varname and attr list";
} else {
($varname =~ /$qm_close_delim/) &&
return "BADFORMAT","Missing $open_delim";
$attr_list = $varname;
$varname = "";
}
# [] (possibly w/whitespace in between) is OK
(&whitespace_strip($attr_list) eq "") && return "OK",$varname;
$count = 0;
@attr_pairs = split /$qm_attrib_sep/,$attr_list;
foreach (@attr_pairs) {
$count++;
# Deliberately allowing $value to contain an =
# Deliberately allowing $value to be null string
($key,$value) = split /$qm_key_value_sep/,$_,2;
if (defined $value) {
($key =~ /^[\w\d\_]+$/) ||
return "BADKEY",
"key must be non-empty and consist of letters, numbers and _. " .
"Problem in attribute -->$_<-- in list -->$attr_list<--";
($key =~ /^\d+$/) &&
return "BADKEY",
"key must have one non-number in it. " .
"Problem in attribute -->$_<-- in list -->$attr_list<--";
} else {
$value = $key;
$key = $count;
}
push @attr,$key;
push @attr,$value;
}
($count == 0) &&
&quit ("Internal error in get_hash_of_attrs. Impossible attr count of 0");
$explicit_attr_list && return "OK",$varname,@attr;
# AMBIGUOUS as far as this routine's parsing is concerned
# if there was only 1 attribute in list and we couldn't tell
# that it was an attribute because it didn't have key=value form (and
# therefore its key is 1). We can disambiguate if it's syntactically
# illegal as a varname. Rule for that set in dim past - varnames must
# be same chars as a varname in c, which happens to be letters, numbers,
# and _, which may sound familiar but is its own (non parametrized) thing
if (($count == 1) && ($attr[0] eq "1")) {
# In the non-explicit attr list case, we anticipated that there was
# no varname and we set attr_list to the varname string. This explains
# why next test is attr_list and why varname gets restored
if ($attr_list =~ /^[\w\d\_]+$/) {
$status = "AMBIGUOUS";
$varname = $attr_list;
} else {
$status = "OK";
}
} else {
$status = "OK";
}
return $status,$varname,@attr;
}
sub whitespace_strip
# Returns null if all whitespace, but consider that stripped string could be 0
{
my ($strip,$dummy) = @_;
(defined $strip && ! defined($dummy)) ||
&quit ("Internal error: whitespace_strip not called w/1 arg");
($strip eq "") || ($strip =~ s/^\s*//);
($strip eq "") || ($strip =~ s/\s*$//);
return $strip;
}
##################
$WJS_WEB_PERL_UTILITIES_LOADED = 1;
# Revision history pre 2012
# 16 Feb 11 WJS
# Bug fix: OBJSPEC needs to allow =s since they are in
# selection specs. Interesting that 3 Jun 10 fix "worked" -
# guess selections there were alpha, "eq" selections.
# While at it, throw in < and >
# 15 Nov 10 WJS
# get_hash_of_attrs
# Mod print_select_alpha_variables to use get_hash_of_attrs to
# check for datatype
# 30 Sep 10 WJS
# whitespace_strip
# Improve parsing of listvar output in listvar_in_def_format
# 14 Jun 10 WJS
# 1 Jun mod of 21 Feb mod STILL not correct. 3rd time the charm?
# 3 Jun 10 WJS
# Bug fix: OBJSPEC needs to allow &s and |s since they are in
# selection specs
# 1 Jun 10 WJS
# Mod adjust_for_reqd_args to use get_query_string_args
# instead of doing same code inline (not same really; get_
# uses backtick.pl instead of actual backticks.). Consequence:
# adjust_ no longer returns PQSERR, since get_ dies internally if
# such a thing happens
# Mod shell_protect to protect ampersands. How many more did I miss?
# 21 Feb mod not syntactically correct - fix (more good testing)
# 21 Feb 10 WJS
# Blank stripper of 7 Sep no good either. (.+) is greedy and
# includes the trailing blanks. google search for "perl whitespace
# trim" does NOT show any regex, which might be why these don't
# work. They use s/\s*$//, and so shall I (although I'm
# tempted by split ...)
# 18 Feb 10 WJS
# Bug fix: print_select_alpha_variables should diagnose "no data in
# object" situation rather than generic death
# 6 Feb 10 WJS
# Diagnostic fix: error message from check_form_var needed updating
# to mention OBJSPEC
# Bug fix: OBJSPEC needs to allow blanks and %s for character selections
# 3 Feb 10 WJS
# Bug fix: parse_object_spec sometimes returns a slash in the subdir
# and sometimes doesn't. Make it match the doc
# [Moved older comments to bottom of this file. Dec 12]
# 5 Nov 09. WJS
# Replace some ESPIPE code w/backtick.pl (which is now require'd)
# Take a shot at making replace_special_char actually work
# 17 Oct 09. WJS
# Better error checking on open in listvar_in_def_format
# 7 Sep 09. WJS
# Put better blank stripper regex in print_select_alpha_variables,
# replacing /^\s*(.+)\s*/ with /^\s*(\S+)\s*/. Thought former
# was a bug if line was all blanks, but now think both return
# the same thing. Further reflection: old form allowed
# "multi-word lines" (w/embedded blanks). That is an error,
# as is the existence of all-blank lines. Should be no
# practical effect, since in actual use, all-blank lines
# would have required other pre-existing errors
# Doubt that numeric/alpha stuff mentioned 5 Sep will fly, but leave
# comments in ...
# 5 Sep 09. WJS
# Add print_choose_memory_algorithm
# Put in comments for a hook for "outside advice" about
# numeric/alpha status of variables.
# 20 Jul 09. WJS
# Doc note: trigram.pl available, but in standalone file to avoid
# loading this whole thing when not needed. If trigram func desired
# here, too, best way might be to have this file include/require that
# one
# 23 Jan 09. WJS
# Add an OBJSPEC legal character set for get_form_var to check
# 28 Dec 08. WJS
# Bug fix: open pipe error handling in print_select_alpha_variables
# failed on fleetlink, again due to attempt to use $!. Switch
# to using return from open command. See backtick.pl (would be
# nice if that could be expanded for all pipe opening - would need
# to subroutine-ize per-record processing and pass that to backtick)
# 27 Nov 08. WJS
# Bug fix: separator chars that come back from parse_query_string
# need protection if they happen to be perl special chars.
# 1 Nov 08. WJS
# adjust_for_reqd_args
# 25 Oct 08. WJS
# get_this_file_as_url
# Mod to abs_file_spec to check for empty string
# Syntax fix to 18 Oct work
# 18 Oct 08. WJS
# varlist-via-listvar subsystem enhanced to allow attributes
# 4 Aug 08. WJS
# parse_object_spec needs to accept : as legal character
# 31 Jul 08. WJS
# Comment fix
# 25 Jul 08. WJS
# get_cached_varlist
# 26 Apr 08. WJS
# replace_special_char
# 3 Apr 08. WJS
# Fix error message in listvar_in_def_format
# 14 Mar 08. WJS
# Only mod is this comment. Decided NOT to modify this code for
# Apache 2 double slash nonsense. If the pieces of PATH_INFO
# get here via parse_path_info (which is invoked by build-opt-env.pl),
# we should be OK. This is a motivation to use build-opt-env.pl!
# 24 Oct 07. WJS
# Add -l switch to list command
# check_build_opt_env_var
# list_as_text_plain
# 23 Aug 07. WJS
# Put format_get_JGOFS_record_return_status back in here! Apparently
# pulled in Sep 05 when it and get_JGOFS_record were copied to
# the fleetlink jgofs_sql stuff. Guess we didn't have many statuses
# to format!
# 21 May 07. WJS
# Text change for user instruction in print_select_alpha_variables.
# 23 Nov 06. WJS
# valid_number should realize 0 is a number
# 6 Sep 06. WJS
# Mod to get_JGOFS_record to ignore $! unless read returns undefined
# 1 Sep 05. WJS
# do_shell_command
# make check_?_access do a better job w/null input
# 23 Aug 05. WJS
# Have parse_object_spec check the easy part of the spec for
# illegal chars. I give up on the sel/proj list (which, to be
# fair, isn't even limited to sel/proj-architecturally could be
# anything, so can't challenge any chars)
# Comment change
# Err msg update
# 21 Aug 05. WJS
# parse_object_spec (should take parse_path_info-like approach.
# However, c code already exists, distributed in pieces within
# jdbopen & routines it calls...)
# 15 Aug 05. WJS
# Add print_select_alpha_variables
# 4 Aug 05. WJS
# Allow get_form_var to get a form variable that's a list
# Add check_r_access
# 1 Aug 05. WJS
# Change add_to_library_path to add_file_to_path_string
# 27 Jul 05. WJS
# quit to use html_line_breaks. Also in quit, if $open_pre_tag,
# close the pre tag so CgiDie's formatting looks reasonable.
# Add (& modify) valid_number
# Add html_line_breaks
# 29 Jun 05. WJS
# Add add_to_library_path
# 24 Mar 05. WJS
# Add get_JGOFS_record, et al
# 18 Mar 05. WJS
# Add hex_dump
#################################################