# Various perl utilities I've found useful for OOserver stuff WJS Aug 04 ######### # Global variables: # $CGI_LIB_LOADED (see 30 Apr 17 mod) # $BACKTICK_LOADED (see 30 Apr 17 mod) # get_form_var: %form_info; read-only; required # quit: $version; read-only; optional # $unique; read-only; optional # $open_pre_tag; read-only; optional # printheader: $version; read-only; optional # $print_header_done; read-write; optional # Needs cgi-lib.pl; assumed set up in program that uses this stuff # Needs backtick.pl, require'd from . directory # As of 30 Apr 17, try to get cgi-lib.pl if it isn't loaded, and # try NOT to require cgi-lib.pl or backtick.pl if they ARE already loaded # # 30 Apr 17 WJS # Try conditionally loading backtick.pl and cgi-lib.pl # Moved 2010 & 2011 comments to bottom # 27 Apr 17 WJS # Bug fix: print_select_alpha_variables was loading the menu with # strip($varlist[$_]) instead of strip($varname). Difference is that # $varname has the attr list stripped off. Didn't show up until somebody # used [datatype=alpha], showing that the latter was never used since its # coding in 2010 # 17 Feb 17 WJS # Alter valid_number # 27 Jul 16 WJS # val_from_embedded_key_val_pair # list_as_text_plain_alternate_list_program # 22 Jan 15 WJS # Put INCLUDE_ATTRIBUTES_SWITCH in environment for print_select_alpha_variables # Wonder how many other functions it belongs in ... # 19 Oct 14 WJS # Add test for -Inf # 16 Oct 14 WJS # NaN and Inf and variants are "now" considered legit numbers... but not by me! # Alter valid_number # 21 Dec 12 WJS # Re "Remove do_shell_command" comment of 9 Jun: bad presumption # since it was used IN HERE (possibly among other places), and # apparently not noticed. In any case, cannot replace do_shell_command # with backtick in cases where command must write to stdout. Doing so # would put all the output into the backtick return symbol. # 9 Jun 12 WJS # Rewrite get_JGOFS_record to begin process of properly # detecting errors on reads. Only # took "since the beginning" (2004 was when I apparently gave up # and wrote the previous incorrect code) # Correction from a "good sounding", "reputable" web article, not me - # I only provide the opinions in the quotes. # Remove do_shell_command - presumably no longer needed # 16 Apr 12 WJS # Bug fix: OBJSPEC needs to allow @s since they are in # object specs that have an indirect file spec in them # Any guess how many more special char mods we need to OBJSPEC? # Any guess how many security problems there probably are? # Bug fix: print_select_alpha_variables became too subtle after # use of get_hash_of_attrs; to wit, it accepted get_hash_of_attrs # assumption that illegal varnames are, in fact, standalone attrs # p_s_a_v then took g_h_o_a's "empty" varname and reported it as # all blanks. Move more p_s_a_v error checking, since if it occurs # in the menu display section, the errors displayed as menu items #sub rel_filespec #sub abs_filespec #sub check_r_access #sub check_x_access #sub get_form_var #sub bad_form_var #sub check_build_opt_env_var #sub printheader #sub print_select_alpha_variables #sub print_choose_memory_algorithm #sub html_line_breaks #sub quit #sub dump_input_info #sub dump_form_info #sub dump_environment_variables #sub hex_dump #sub hex_dump_html #sub add_to_file #sub adjust_for_reqd_args #sub do_shell_command #sub get_query_string_args #sub let_serv_do_it #sub make_dir #sub parse_object_spec #sub file_system_to_web_cgi #sub get_this_file_as_url #sub replace_special_char #sub shell_protect # See also separate file trigram.pl #sub valid_number #sub val_from_embedded_key_val_pair #sub whitespace_strip #sub format_get_JGOFS_record_return_status #sub get_cached_varlist #sub get_hash_of_attrs #sub get_JGOFS_record #sub listvar_in_def_format #sub add_file_to_path_string #sub list_as_text_plain $got_cgi_lib_already = ((defined $CGI_LIB_LOADED) && $CGI_LIB_LOADED); if ( ! $got_cgi_lib_already) { # filename contains hyphen; variable names can't $cgi_lib_filename = "cgi-lib.pl"; require $cgi_lib_filename; } $got_backtick_already = ((defined $BACKTICK_LOADED) && $BACKTICK_LOADED); if ( ! $got_backtick_already) { $backtick_file_name = "./backtick.pl"; require $backtick_file_name; } ################################################# sub check_build_opt_env_var { my ($env_var,$build_opt_env) = @_; my ($val); defined ($ENV{$env_var}) || &quit ("Internal problem. ", "Did not get defined env var $env_var from $build_opt_env"); ($val = $ENV{$env_var}) || &quit ("Internal problem. ", "Did not get non-empty env var $env_var from $build_opt_env"); return $val; } ################################################# sub list_as_text_plain # See list_as_text_plain_alternate_list_program { my ($opts,$build_opt_env) = @_; # Check that build-opt-env set up things as expected my ($topdir) = &check_build_opt_env_var("OPTHOME",$build_opt_env); my ($bindir) = "$topdir/bin"; my ($list) = "$bindir/list"; &list_as_text_plain_alternate_list_program($opts,$build_opt_env,$list); return; } sub list_as_text_plain_alternate_list_program # Assumes we are in OOserver environment with build-opt-env.pl run # Run list against object we are working on and send it out # as text/plain # Accepts a bunch of list switches as arg 1, name of build-opt-env # file as arg 2 for error message purposes, and file spec of list # executable as arg 3 { my ($opts,$build_opt_env,$list,$dummy) = @_; (defined $opts) || &quit ("Internal problem - no args sent to list_as_text_plain_alternate_list_program"); (defined $dummy) && &quit ("Internal problem - too many args sent to list_as_text_plain_alternate_list_program"); my ($object,$command,$subsels); $build_opt_env || ($build_opt_env = "?/?/build-opt-env.pl"); # Check that build-opt-env set up things as expected $object = &check_build_opt_env_var("OBJECT",$build_opt_env); defined ($ENV{'SUBSELS'}) || &quit ("Internal problem. ", "Did not get defined env var SUBSELS from $build_opt_env"); &check_x_access($list); $subsels = $ENV{'SUBSELS'}; $command = "$list $opts \"$object"; $subsels && ($command .= "($subsels)"); $command .= '"'; print "Content-type: text/plain\n\n"; &do_shell_command($list,$command,1); return; } ################################################# sub html_line_breaks # Replace any
and tags w/ \ns. Then go back and replace # every \n with
\n. Result should be printable in both html # and non-html environments (albeit in the latter, one sees the
) { my ($string) = @_; $string =~ s/\/\n/g; $string =~ s/\\>/\n/g; $string =~ s/\n/\\n/g; return $string; } ################################################# sub adjust_for_reqd_args # Remove arguments from query string that are known to be inner # args and tack them onto the object name # Problem example: In a URL of joinu.html0?obj1,obj2,selproj_list # obj1 and obj2 are NOT selections or projections, but are often treated # that way # Problem amelioration: Instead of joinu, use object name join_2_arg # Then this code will return join_2_arg(obj1,obj2) and selproj_list # Future versions of this may replace the _N_arg stuff w/ "smart" # parse_query_string (pqs) routines than syntactically figure out what # each query string argument is (after freezing "originality" at existing # level, and expecting the use of some formal procedures in the future) # If input object names don't have special format, this routine # just returns its input as output (w/NOARGKEY substatus; see below) # # Input: parse_query_string program name # object spec (NOT URL), w/optional arg list in parens # query string # Returns 3 or 5 strings. # string 1 = "OK" or "NG". # "OK" followed by 4 strings # Substatus # Object spec (possibly modified from original) # Query string (possibly shortened from original) # Number of arguments moved from qs to objspec # "NG" followed by 2 strings # Substatus # Error text # "OK" substatuses Input obj spec # "NOARGKEY" joinu # "ARGSINSPEC" join_2_arg(obj1,obj2) # "EXTRAARGSINSPEC" join_2_arg(obj1,obj2,x) # "MOVEDNEEDEDARGS" join_2_arg [and at least 2 qs items] # "MOVEDSOMEARGS" join_2_arg(obj1) [or join_2_arg and # only 1 qs item] # "NG" substatuses # "MULTIPLEARGKEY" join_1_arg_2_arg # string 1 is a syntax status only. Substatuses of EXTRAARGSINSPEC # and MOVEDSOMEARGS probably indicate problems someplace else # Does NOT return if parse_query_string programs dies - 1 Jun 10 # mod calls get_query_string_args, which &quit's. Pre 1 Jun 10, # returned a PQSERR substatus to an NG status { my ($pqs_program,$inobj,$qs,$dummy) = @_; my ($objspec,$objnode,$objdir,$objname,$objargs); my ($list,@qs,@objargs); my ($needed_args,$nargs_objargs,$nargs_qs); my ($pqs_out,$separators,$rest,$pqs_sep,$qsarg_sep,$qs_split,@seps); my ($status,$substatus,$outobj,$args_moved); # The chars below are fundamental (should be in some trans-language # .h file, etc). The pqs separator characters are deliberately # independent of these. For example, given a qs of x,(y,z), the pqs # routines will present something like x!(y,z) (along with the !) so # that a split will return the correct x and (y,z) pieces. However, # when re-constituting a list from the pieces, we have to put the comma # back in my ($objarg_sep) = ','; my ($qs_sep) = ','; ($inobj && ! $dummy) || &quit("Internal problem - adjust_for_req_args called w/wrong # args"); ($status,$objnode,$objdir,$objname,$objargs) = &parse_object_spec($inobj); ($status eq "OK") || return ("NG","BADOBJPARSE",$objnode); ($needed_args) = ($objname =~ /_(\d+)_arg/); (defined $needed_args) || return ("OK","NOARGKEY",$inobj,$qs,0); ($dummy) = ($objname =~ /_\d+_arg.*_(\d+)_arg/); (defined $dummy) && return ("NG","MULTIPLEARGKEY","object name had > 1 _N_arg string"); # Count number of arguments in $objargs & $qs using the query string # subsystem. At the tactical level, this handles commas-within-parens # Strategically, one can argue that an object's arg list and those # args within a qs will always be the same, so any mods can be handled # in a single spot. See parse_query_string doc (currently comments in # the c source - Oct 08) if ($objargs) { @objargs = &get_query_string_args($pqs_program,$objargs); $nargs_objargs = @objargs; } else { $nargs_objargs = 0; } ($nargs_objargs == $needed_args) && return "OK","ARGSINSPEC",$inobj,$qs,0; ($nargs_objargs > $needed_args) && return "OK","EXTRAARGSINSPEC",$inobj,$qs,0; if ($qs) { @qs = &get_query_string_args($pqs_program,$qs); $nargs_qs = @qs; } else { $nargs_qs = 0; } # Move args from beginning of query string to end of object arg list ($objargs eq "") || ($objargs .= $objarg_sep); $args_moved = 0; foreach ($nargs_objargs .. $needed_args-1) { # Allowing empty and 0 values in query string defined($dummy = shift(@qs)) || last; $objargs .= $dummy . $objarg_sep; $args_moved++; } chop $objargs; $outobj = ""; $objnode && ($outobj .= "//$objnode"); $objdir && ($outobj .= "/$objdir"); $outobj && ($outobj .= "/"); $outobj .= "$objname($objargs)"; $substatus = (($nargs_objargs == 0) && (defined $dummy)) ? "MOVEDNEEDEDARGS" : "MOVEDSOMEARGS"; return "OK", $substatus, $outobj, join("$qs_sep",@qs), $args_moved; } ################################################# sub parse_object_spec # //node/subdir/obj_name(sel_proj--and/or--obj_specific_args) # All but obj_name are optional # Returns a list. 1st element is status; "OK" or "NG" # If "NG", next element tells why/where # If "OK", next 4 elements are node, subdir, name, & args # All but name may be empty # Node does not have leading // # Args are returned w/o leading and trailing parens (error # if no trailing paren. However, matched pairs are not # checked) # Subdir does not have leading or trailing / # Accordingly, this code does NOT distinguish between # /dir/name and dir/name # nor does it distinguish # /name(args) and name(args) # Former may be object spec & latter may be method(args); # anyway, for now, we don't do it! { my ($obj_spec) = @_; my ($b4paren,$aftparen); my ($node,$subdir,$name,$args); $obj_spec || return "NG","No name"; ($b4paren,$aftparen) = split /\(/,$obj_spec,2; if ($aftparen) { ($args) = ($aftparen =~ /(.+)\)$/); $args || return ("NG","Argument portion"); } else { $args = ""; } $b4paren || return "NG","No name"; ($b4paren eq '/') && return "NG","No name"; ($b4paren =~ /^[\w\-\_\/\.\:]+$/) || return "NG","Illegal char in node/char/name"; ($node_dirs,$name) = ($b4paren =~ m"(.*)/(.*)"); if (defined ($node_dirs)) { # Slash present $name || return "NG","No name"; $node_dirs || return "OK","","",$name,$args; # Next line deals w/specs like //foof. Not sure what diagnostic # is appropriate ($node_dirs eq '/') && return "NG","No node"; if ($node_dirs =~ m"^//") { (undef,undef,$node,$subdir) = split /\//,$node_dirs,4; $node || return "NG","No node"; $subdir || ($subdir = ""); return "OK",$node,$subdir,$name,$args; } else { ($node_dirs) = ($node_dirs =~ m|^/?(.*)|); return "OK","",$node_dirs,$name,$args; } } else { # No slash return "OK","","",$b4paren,$args; } } ################################################# sub rel_filespec # Assume argument is a relative file spec. It should not have # a leading slash. Get rid of trailing slash, too { my ($string) = @_; ($string =~ m"//") && &quit ("File spec $string contains unallowed consecutive slashes"); ($tmp) = ($string =~ m"^/?(.*)/?$"); # Deliberately allowing an empty string. Will this be regretted? return $tmp; } ################################################# sub abs_filespec # Assume argument is an absolute file spec. It must have # a leading slash. Get rid of any trailing slash { my ($string,$string_description) = @_; my ($tmp); $string || &quit ("$string_description empty/undefined. ", "Supposed to be an absolute file spec"); ($string eq "/") && (return "/"); ($string =~ m"//") && &quit ("$string_description (value $string) " . "contains unallowed consecutive slashes"); ($tmp) = ($string =~ m"^/(.*)/?$"); $tmp && return "/$tmp"; &quit ("$string_description (value $string) is not an absolute file spec", "(no leading slash or emptiness where file name expected)"); } ################################################# sub check_r_access # Games w/ $! value empirically determined for -r access on synthesis { my ($tmp) = $_[0]; my ($msg); $tmp || &quit("No file name sent to check_r_access for checking"); $! = 0; if ( ! -r $tmp) { $msg = ($! == 0) ? "Permission denied" : $!; &quit ("No read access to $tmp: $msg"); } } ################################################# sub check_x_access # Games w/ $! value empirically determined for -r access on synthesis # No particular idea if this works for -x or on non-synthesis nodes # but I guess I think it's worth assuming! { my ($tmp) = $_[0]; my ($msg); $tmp || &quit("No file name sent to check_x_access for checking"); $! = 0; if ( ! -x $tmp) { $msg = ($! == 0) ? "Permission denied" : $!; &quit ("No execute access to $tmp: $msg"); } } ################################################# sub get_form_var { my ($variable,$required,$check_string) = @_; my ($list_indicator,$list_separator); my ($tmp) = $form_info{$variable}; ($required =~ /^(REQ|OPT)/) || &quit("Internal error - get_form_var dummy \$required does not " . "begin with REQ or OPT. Its value is $required\n"); # Allow a binary 0 if thing we want from form is a list of values # (signified by $check_string ending in _LIST) (defined $check_string) || ($check_string = "STRING"); ($check_string,$list_indicator) = ($check_string =~ /(.+?)(_LIST)?$/); $list_separator = ($list_indicator) ? '\x00' : ''; (defined $tmp) || ($tmp = ""); if ($tmp eq "") { ($required =~ /^REQ/) && (&quit("Did not get required variable $variable from form")); } else { if ($check_string eq "STRING") { ($tmp =~ /^[\w\-\_\#\.$list_separator]+$/) || &bad_form_var($variable,$tmp,"string",'a-zA-Z0-9#._-'); } elsif ($check_string eq "SPACEOK") { ($tmp =~ /^[\w\-\_\#\. $list_separator]+$/) || &bad_form_var($variable,$tmp,"string",'a-zA-Z0-9#._- & space'); } elsif ($check_string eq "FILESPEC") { ($tmp =~ /^[\w\-\_\/\.$list_separator]+$/) || &bad_form_var($variable,$tmp,"file specification",'a-zA-Z0-9/._-'); } elsif ($check_string eq "OBJSPEC") { # Note: although accepting chars like & % and |, deliberately leaving # them out of the "acceptable" string in the error message. Attempt is # to discourage a hacker who might see the error message from realizing # that these chars are accepted ($tmp =~ /^[\w\-\_\/\.\:\(\)\,\{\}\=\<\>\%\&\@\| $list_separator]+$/) || &bad_form_var($variable,$tmp,"JGOFS object specification", 'a-zA-Z0-9/._-(),:{}=<> & space'); } elsif ($check_string eq "NUMBER") { ($tmp =~ /^[0-9E\+\-\.$list_separator]+$/) || &bad_form_var($variable,$tmp,"numeric specification",'0-9E.+-'); } elsif ($check_string ne "NOCHECK") { &quit("Internal error - passed get_form_var argument " . "\$check_string is not " . "STRING, NUMBER, FILESPEC, SPACEOK, OBJSPEC or NOCHECK.", "Its value is $check_string\n"); } } return $tmp; } ################################################# sub bad_form_var { my ($var,$val,$desc,$legal_string) = @_; &quit("For form variable $var, ", "please provide a non-empty $desc built of chars $legal_string", "string causing problem: $val"); } ################################################# sub make_dir { my ($dir) = $_[0]; if (-e $dir) { (-d $dir) || &quit ("$dir is not a directory"); (-w $dir) || &quit ("Cannot create files in directory $dir"); } else { mkdir ($dir,0755)|| &quit("Cannot create directory $dir: $!"); } return; } ################################################# sub printheader { if ( ! $print_header_done) { print &PrintHeader(); $version && (print "\n"); $print_header_done = 1; } return; } ################################################# sub add_to_file { my ($file,$addend) = @_; (open (APPEND,"$file")) || &quit("Cannot open $file: \$! = $!"); (print APPEND "$addend\n") || &quit("Cannot write $file: \$! = $!"); (close APPEND) || &quit("Problem closing $file: \$! = $!"); return; } ################################################# sub quit { my ($temp,$errmsg1,$errmsg2); $open_pre_tag && print ""; $errmsg1 = ""; foreach (@_) { chomp ($temp = $_); $errmsg1 .= " *** $temp\n"; } $unique && ($errmsg1 .= " ... working on job # $unique\n"); $errmsg2 = "This message issued " . localtime() . "\n"; $version && ($errmsg2 .= "$version\n"); &CgiDie(&html_line_breaks($errmsg1),&html_line_breaks($errmsg2)); } ################################################# sub dump_input_info { &dump_environment_variables(); &dump_form_info(@_); return; } ################################################# sub dump_form_info { my ($separator_for_multiple_selections,%form_info) = @_; my ($key,$value,$out_value); &printheader(); print "

Form variables

\n"; print "
\n";
  foreach $key (sort keys(%form_info)) {
    $value = $form_info{$key};
    $out_value = "";
    foreach (split("\0",$value)) {
      $out_value .= $_ . $separator_for_multiple_selections;
    }
    chop $out_value;
    print "$key = $out_value\n";
  }
  print "\n\n";
  print "
\n"; return; } ################################################# sub dump_environment_variables { &printheader(); print "

Environment variables

\n"; print "
\n";
  foreach (sort keys(%ENV)) {
    $out_value = (defined $ENV{$_}) ? $ENV{$_} : "";
    print "$_ = $out_value\n";
  }
  print "\n\n";
  print "
\n"; return; } ################################################# sub get_this_file_as_url # Should use &file_system_to_web_cgi. However, # there's evidence of trouble w/that routine. Code is fine # as long as this routine is in top-level jg directory (and as long # as file system is using /s) # Other possible curiosities could occur because we assume that in the # case of, say, multiple links to a file, $0 and SCRIPT_NAME will come # out w/the same file name, etc. However, seems to me that return from # this should be usable { my ($filespec,$dummy) = @_; my ($jgdefn,$filename,$dir); ($filespec && ! $dummy) || &quit ("Internal error - get_this_file_as_url called w/wrong # args"); # In line below, jgdefn does not end up as a filespec, nor is # JGSCRIPTDIR a directory. So much for naming things... $jgdefn = &abs_filespec($ENV{"JGSCRIPTDIR"},"JGSCRIPTDIR env var"); ($dir,$filename) = ($filespec =~ /(.*)\/(.+)/); $filename || $dir || ($filename = $filespec); # No slashes $filename || &quit ("Malformed filespec input to get_this_file_as_url", "filespec = $filespec"); return "$jgdefn/$filename"; } sub file_system_to_web_cgi { # Idea is to try to allow name & location of this file to change # Presumably the directory portion of the file contains a string that # has been defined to the web server as the cgi root. Substitute # the cgi root name for that string # Try to compensate for various abnormalities, such as running this # script online, or the symlink'ing of the directory defined to the # web server as the cgi root. Compensation consists of assuming that # the script is in the top level cgi root. Compensation also means # that if the input args are truly wrong, we will compound the error... # Both cgi root and its defn are assumed "absolute"; eg, leading off # w/slash. (Don't care about any trailing slash) my ($infile,$cgi_root_name,$directory_defined_as_cgi_root) = @_; my ($out); $directory_defined_as_cgi_root = &abs_filespec($directory_defined_as_cgi_root, "Directory arg to file_system_to_web_cgi"); $cgi_root_name = &abs_filespec($cgi_root_name, "cgi root arg to file_system_to_web_cgi"); $out = $infile; if ($out =~ m"^/") { $out =~ s/$directory_defined_as_cgi_root/$cgi_root_name/; if ( ($out eq $infile) && ($directory_defined_as_cgi_root ne $cgi_root_name) ) { ($out) = ($out =~ m".*/(.+)$"); $out || &quit ("File spec $infile seems to be a directory " . "(It ends with a slash)" ); $out = "$cgi_root_name/$out"; } } else { $out = "$cgi_root_name/$out"; } return $out; } ################################################# sub get_query_string_args # Argument 1 is file spec of parse_query_string image # Arg 2 (optional) is a query_string to parse # See parse_query_string.c for doc { my ($parse_query_string,$query_string) = @_; my ($pqs_output,$separators,$rest,$pqs_sep,$qs_sep,$qs_arg_list); my ($command,$exit_status,@status); &check_x_access ($parse_query_string); (defined $query_string) || ($query_string = ""); $query_string = &shell_protect($query_string); $command = "$parse_query_string $query_string"; ($pqs_output,$exit_status,@status) = &backtick($command); (defined $exit_status) || &quit ("Internal error - bad call to backtick"); ($exit_status == 0) || &quit ("parse_query_string problem", &format_backtick_return_status ($command,$exit_status,@status,$pqs_output) ); ($separators,$rest) = split (/ /,$pqs_output,2); (defined $rest) || return ""; ($pqs_sep,$qs_sep) = split (//,$separators); $split_string = '\\' . $pqs_sep; ($qs_arg_list) = split (/$split_string/,$rest); $split_string = '\\' . $qs_sep; return split (/$split_string/,$qs_arg_list); } ################################################# sub val_from_embedded_key_val_pair { # NB: THIS ROUTINE MUST NOT USE OTHER wjs_web*.pl PROGRAMS. This routine operates before # "the environment" is set up by build-opt-env.pl # Input is keyword, string and separator-within-string character # Program searches for keyword= . If found, program returns, as the 2nd return arg, characters between # = and next separator-within-string character. An empty string is NOT returned; ie, the condition # where the = is immediately followed by the separator is considered an error. # User caveat: quotation marks, parentheses, etc are a problem for the caller of this routine. # Consider using get_query_string_args ("does" parens but deemed too clumsy to do work of this routine) # Returns 2 or 3 vals. First is a status, "NG" or "OK". # If OK, 2nd return val is value string for keyword as described above # If OK, 3rd return val is string with keyword=value removed. If appropriate, a # separator-within-string character is also removed. Note that empty string return is valid # If NG, 2nd return val is "NOTFOUND" if keyword was not in input string. # Otherwise 2nd return val is some other amplification of the problem my ($keyword,$in_string,$sep,$dummy) = @_; (defined $dummy) && return "NG","Internal error: too many args passed to val_from_embedded_key_val_pair"; my ($val_string,$remainder,$tmp,$new_string); my ($qm_keyword) = quotemeta($keyword); my ($qm_sep) = quotemeta($sep); my ($part1,$part2,$part3) = split (/$qm_keyword/,$in_string); $part3 && return "NG", "$keyword specified more than once in input $in_string"; if ($part2) { ($tmp,$remainder) = split (/$qm_sep/,$part2,2); $tmp || return "NG","Found keyword $keyword but could not get value from -->$part2<--"; ($val_string) = ($tmp =~ /\s*=\s*(.+)/); $val_string || return "NG","Found keyword $keyword but could not get value from -->$tmp<--"; defined ($part1) || ($part1 = ""); defined ($remainder) || ($remainder = ""); # Note that it is NOT necessary to put in a separator between $part1 & $remainder because # there is one at the end of $part1 (assuming $part1 non-empty) $new_string = $part1 . $remainder; } else { return "NG","NOTFOUND"; } return "OK",$val_string,$new_string; } ################################################# sub shell_protect # Try to put a backslash in front of everything "the shell" # will treat as a special character. Sigh { my ($input) = $_[0]; my ($shell_chars) = ' "`;{}&|()\\\''; # Add more when needed... my ($output); $output = ""; foreach (split(//,$input)) { /[$shell_chars]/ && ($output .= '\\'); $output .= $_; } return $output; } ################################################# sub replace_special_char # Idea is that there is a string with an offensive special character # which you would like replaced with another. Original problem was # to make a filename out of an object spec, which meant the slashes were # an issue. Not clear if something else like, say, trigramming, would # do the job just as well if not better ... # Routine requires 2 args. Arg 1 = char to be replaced. Arg 2 = # string containing arg 1. Arg 1 could probably be a string if one is # careful - this routine does not check how long it is. # Routine returns 2 or 3 values. 1st value is a status. If "OK", # 2nd value is replacement string, and 3rd value is char that was used # for replacement. If status not "OK", it is the name of a problem, and # the 2nd value is a text message describing the problem { my ($char_to_replace,$input,$dummy) = @_; my ($replacement_candidates,$replacement_char,$string_with_replacement); my ($quoted_replacement_char,$quoted_char_to_replace); ((defined $char_to_replace) && (defined $input) && ( ! defined $dummy)) || return "NEEDEXACTLYTWOARGS","Need exactly 2 args"; # Altering order of replacement candidates will hurt callers who expect # strings generated by this routine to match strings generated by this # routine "a while ago", so don't do it. Order below is approx a guess # at chars that won't cause trouble if they get passed to shell, etc # which is why | ; etc are not on list. If needed, add them on end... # and run output of this routine through shell_protect! $replacement_candidates = '#^@$&+=:?/'; $quoted_char_to_replace = quotemeta($char_to_replace); foreach ( split (//,$replacement_candidates) ) { ($_ eq $char_to_replace) && next; $replacement_char = $_; $quoted_replacement_char = quotemeta($replacement_char); ($input =~ /$quoted_replacement_char/) && next; $string_with_replacement = $input; $string_with_replacement =~ s($quoted_char_to_replace)($replacement_char)g; last; } (defined $string_with_replacement) && (return "OK",$string_with_replacement,$replacement_char); return "ALLREPLCHARSFAILED", "All replacement chars for $char_to_replace occur in input string"; } ################################################# sub let_serv_do_it { my ($serv,$path_info,$query_string,$output_redirect) = @_; my ($command); $path_info || return "NO_PATH_INFO"; &check_x_access($serv); $command = "PATH_INFO=" . &shell_protect($path_info) . ";export PATH_INFO;"; $query_string && ($command .= "QUERY_STRING=" . &shell_protect($query_string) . ";export QUERY_STRING;"); $command .= $serv; $output_redirect && ($command .= " > $output_redirect"); $! = $? = 0; system ($command); (($! == 0) && ($? == 0)) || return "SERV_FAILURE",$!,$?,$command; return "OK"; } ################################################# sub hex_dump_html { my ($label,$string,$handle,$line_length_arg) = @_; my ($default_output_handle) = ("STDOUT"); my ($stat); $handle || ($handle = $default_output_handle); $! = 0; (print {$handle} "
\n") || return $!;
  $stat = &hex_dump(@_);
  (print {$handle} "
\n") || return $!; return $stat; } ################################################# sub hex_dump # Arg 1 - name or other label for string being dumped # 2 - string to be dumped (NB: valid binary #s will be dumped as strings, not #s # eg; 255 gets a 3 character 32 35 35 dump, not a # 1 char FF dump. This is perl's "pick the correct # integer/string context") # 3 - file handle on which to dump. Defaults to STDOUT # 4 - max length of a dumped line. Defaults to 132. If negative, represents # number of dumped characters on a line (results in line length of # (length of arg 1) + 3 - 3*(arg 4) ... approx, and if arg 1 is not too long) # Returns "OK" # "NOSTRING" for empty arg 2. Also prints msg on arg 3 # $! if print returns 0. Don't know what gets printed on arg 3 { my ($label,$string,$handle,$line_length_arg) = @_; my ($default_line_length,$default_output_handle) = (132,"STDOUT"); my ($max_len_label,$default_len_per_line_prefix) = (16,4); # 3 is a magic constant in this routine, representing a unit of 2 hex chars plus # a blank. To parametrize it, we'd have to insert variable spacing in formats, etc my ($iline,$nlines,$last_char,$first_char,@chars); my ($min_line_length,$short_label,$indentation); my ($visible,$len_string,$max_dumped_chars_per_line); $handle || ($handle = $default_output_handle); $label || ($label = "[dumped string]"); $line_length_arg || ($line_length_arg = $default_line_length); $! = 0; if ( ! $string) { (print {$handle} $label," = [no string to dump]\n") || return $!; return "NOSTRING"; } # Fool w/indentation. If label is "long", indent less $short_label = (length($label) <= $max_len_label); if ($short_label) { $label .= " = "; $indentation = length($label); } else { $indentation = $default_len_per_line_prefix; $label .= "\n" . ' ' x $indentation; } $min_line_length = $indentation + 3; if ($line_length_arg < 0) { $max_dumped_chars_per_line = -$line_length_arg; } else { # Override user-supplied line length if it's too small. The error logic to # do it exactly, backing out our indentation, etc, if necessary, is convoluted # and I think the total length involved is < 10 bytes ($line_length_arg < $min_line_length) && ($line_length_arg = $min_line_length); $max_dumped_chars_per_line = ($line_length_arg - $indentation)/3; } $len_string = length($string); $nlines = int(($len_string-1)/$max_dumped_chars_per_line) + 1; $first_char = $last_char = 0; @chars = split //,$string; foreach $iline (0..$nlines-1) { $last_char += $max_dumped_chars_per_line; ($last_char > $len_string) && ($last_char = $len_string); (print {$handle} ($iline == 0) ? $label : ' ' x $indentation) || return $!; foreach ($first_char..$last_char-1) { if ($chars[$_] =~ /[\w\~\`\!\@\#\$\%\^\&\*\(\)\-\_\=\+\[\]\{\}\;\:\'\"\\\|\<\>\,\.\/\?]/ ) { (print {$handle} "$chars[$_] ") || return $!; } elsif ($chars[$_] eq ' ') { (print {$handle} " ") || return $!; } elsif ($chars[$_] eq '\t') { (print {$handle} "\\t ") || return $!; } elsif ($chars[$_] =~ /[\s]/) { (print {$handle} "\\s ") || return $!; } else { (print {$handle} "** ") || return $!; } } (print {$handle} "\n") || return $!; (print {$handle} ' ' x $indentation) || return $!; foreach ($first_char..$last_char-1) { (printf {$handle} "%x ",ord($chars[$_])) || return $!; } (print {$handle} "\n") || return $!; (print {$handle} "\n") || return $!; $first_char += $max_dumped_chars_per_line; } return "OK"; } ################################################# sub add_file_to_path_string # First arg is name of env var containing path # Second arg is file we want in the path. # All the rest of this stuff deals w/the possibility that "relevant # things" are in the path before we get here. If so, this routine # preserves the order and repetitions of the existing path. # Rest of args (optional) is a list of library directories that might # conflict w/arg we want to add (eg, we want to add matlab5 library # but matlab6 library might be in path). Irrelevant if what we # want to add is in "conflict list". Each such conflicting directory # is replaced with the directory we want. # Returns 3 element list. # 1) "OK"/"NG". OK means file exists, is readable, # and is in path (whether we fooled w/path or not) # We don't alter the path if the file is not # accessible # 2) "NO_PATH_NAME" # "NOTHING_TO_ADD" # Calling error - one or both of 1st 2 args missing # "FILE_DOES_NOT_EXIST" # "FILE_CANNOT_BE_READ" # Refers to arg 2 # "WOULD_MAKE_PATH" # "WOULD_ADD_TO_PATH" # "WOULD_MODIFY_PATH" # "WOULD_NOT_AFFECT_PATH" # describing how this routine would affect the path. # WOULD_MODIFY_PATH means that we found elements of # of the "conflict list" in the path, possibly # in addition to the file we wanted. # 3) Desired path string as calculated by this routine # Calling routine must actually place this string in environment # # NOTE: Path separator character (colon), if found in input env var, is # considered the separator of path elements. This routine does not look # at quotation marks, etc that might indicate character is NOT a # separator { my ($path_name,$file_to_add,@conflict_list) = @_; my ($path_sep) = (':'); my ($nsubs,$already_in_path,$return2); $path_name || return "NG","NO_PATH_NAME"; $file_to_add || return "NG","NOTHING_TO_ADD"; (-e $file_to_add) || return "NG","FILE_DOES_NOT_EXIST"; (-r $file_to_add) || return "NG","FILE_CANNOT_BE_READ"; $return2 = "WOULD_NOT_AFFECT_PATH"; $path = defined ($ENV{$path_name}) ? $ENV{$path_name} : ""; # Add $file_to_add to $path_name, replacing any # others provided in $conflict_list. Keeping order would help in the # unlikely situation that the file already in the path # works only in that position in the path if ($path) { $path .= $path_sep; $file_to_add .= $path_sep; $already_in_path = ($path =~ /$file_to_add/); $nsubs = 0; foreach (@conflict_list) { $_ .= $path_sep; ($_ eq $file_to_add) || ( $nsubs += ($path =~ s/$_/$file_to_add/g) ); } ( ! $already_in_path) && ($nsubs == 0) && ($path .= $file_to_add) && ($return2 = "WOULD_ADD_TO_PATH"); ($nsubs == 0) || ($return2 = "WOULD_MODIFY_PATH"); chop $path; } else { $path = $file_to_add; $return2 = "WOULD_MAKE_PATH"; } return "OK",$return2,$path; } ################## sub do_shell_command # backtick.pl enhanced, so this is now a "skeleton" calling backtick - # better to use the backtick.pl entries than to use this # Too bad that download-4 wasn't only place do_shell_command was used - # it is used in this file!! In any case, cannot use backtick when # command must write to stdout, so do_shell_command must stay ... but DO # deprecate its use # In Jun 12, read comments in download-4 that said do_shell_command # had been removed from there in 2011. Accordingly comment it out here # As of Nov 10, used only in download-4. Deprecate this in favor # of backtick.pl - among other things, the ESPIPE stuff is wrong { my ($executable,$command_including_executable,$pipeerr_OK) = @_; my ($return,$ok,@status,$command_to_backtick,$child_exit_status); ($executable eq "NO_X_ACCESS_CHECK") || &check_x_access($executable); ($command_to_backtick) = ($command_including_executable =~ /^`(.+)`$/); if ($command_to_backtick) { ($return,$child_exit_status,@status) = &backtick($command_to_backtick); } else { ($return,$child_exit_status,@status) = &execute_command($command_including_executable); } (defined $return) || &quit ("Internal error calling backtick routines from do_shell_command"); ($child_exit_status == 0) && (return $return); &quit (&format_backtick_return_status ($command_including_executable,$child_exit_status,@status) ); } ################## # Email below "officially" out of date as of Jun 2012 # The perl eof function can be used to distinguish between # a read that returns undefined because of eof and a read that # returns undefined because of error. Supposedly in the latter # case, $! correctly IDs the error #From: WJS::SYSTEM_WJS1 "Warren J. Sass" 17-APR-2004 21:48:58.68 # [edited to change routine names. WJS 23 Mar 05] #To: rgroman #CC: cchandler@whoi.edu,wsass #Subj: Using perl to read "JGOFS stuff" from pipes # I guess you have been opening perl pipes to list for quite #a bit longer than I have, but my recent tanglings with the issue #have most recently led me to write a routine to do this. If you #want more details about the routine, let me know and I can do a better #write-up. The point of this email is to broadcast the issues I've #found. # The main problem is that a pipe read failure can be associated #with a failure of the child process at the other end of the pipe. To #get that info requires that the pipe be closed. The act of closing #the pipe is itself liable to error in the parent process. Accordingly, #when the perl <> operator returns "undefined", there are 3 statuses #of interest: the status of the read, the status of the close, and the #exit status of the child process. # A sub-issue of this is the infernal habit of "something" (perl #library? OS?) on globec to return an ESPIPE condition as the status #of a close. # A 2nd sub-issue is that it's nice to separate closes that happen #following EOF from closes that do not. The former should have a "normal" #child exit status, while the latter might well have a status of EPIPE #(classic example here: read a few records from pipe to list, then #close pipe) # # Another issue is that, as far as I can tell, perl does not #distinguish between read error and EOF. Presumably both cases result #in an "undefined" return from <>. I have experimentally verified that #$! at this point is unaltered from what it was before the <>. In #particular it is NOT set to "EOF" or some such. Presumably in the case #of a real read error (eg, bad disk block), $! WOULD be set. However, #to figure this out, one must know the value of $! before the <>. # # Because of these issues, it seems that every time <> returns #"undefined" one must explicitly close the filehandle and execute #the equivalent of the code at bottom. Since every program encounters #this condition, I felt I should try to write something common! While #I was at it, I thought I could try to handle the application-level &x #errors. (Note: list 1.5 (Oct 03) allows redirection of application #-level &x info. This routine will NOT handle that circumstance. See #get_JGOFS_line in JGOFS_SQL package for mod where destination of #error info are passed as an argument to preserve the error-handling #capabilities of this routine. WJS Nov 06) # #Reference summary: # Input: file handle, already open to the pipe # Output: status summary string, data record, status array; # where status array consists of # $! of read # $! of close # $? of child process # JGOFS error text # Output side effect: file handle is closed unless I/O was "normal" # Value of status summary is one of the strings # OK, EOF, JGOFS_ERR, READ_ERR, CLOSE_ERR # (*ERRs reported in that order if > 1 occurs) # JGOFS error text consists of "all" (I put in a sanity check) # the lines from the one starting with &x to EOF # $! will be 0 if <> did not result in its modification (special, # presumably impossible, case: defined data + # altered $!. Action: READ_ERR, non-0 $!, # JGOFS error text = "Unknown error [+more]") # #Sample metacode: # open ($FH,"$command |") or die... # ($status,$line_from_FH,@err_array) = &get_JGOFS_record($FH); # while ($status eq "OK") { # # process $line_from_FH # ($status,$line_from_FH,@err_array) = &get_JGOFS_record($FH); # } # unless ($status eq "EOF") # &format_get_JGOFS_record_return_status($command,@err_array) ################################################# sub format_get_JGOFS_record_return_status { my ($command,$io_status,$close_status,$exit_status,$message) = @_; my ($return_val); $return_val = " *** Problem with I/O on pipe to $command\n"; ($message eq "") || ($return_val .= " *** Non-operating-system info follows:\n$message"); ($io_status == 0) || ($return_val .= " *** Abnormal final I/O status: $io_status\n"); ($close_status == 0) || ($return_val .= " *** Abnormal status of pipe close: $close_status\n"); ($exit_status == 0) || ($return_val .= " *** Abnormal exit status from child process: $exit_status\n"); return $return_val; } sub get_JGOFS_record { my ($FH) = @_; my ($rec,$err_rec_sanity_count,$status,$read_bang_status,$message); my ($err_rec_max) = 100; my ($JGOFS_err_indicator) = "&x"; $message = ""; $status = "EOF"; $read_bang_status = 0; # For completeness - will not be # returned for normal JGOFS I/O; will # be set below for abnormal. We would # LIKE to return an EOF status, but there # does not seem to be an errno entry for that while ( ! eof($FH) ) { if (defined ($rec = <$FH>) ) { ($rec =~ /^[\* ]*$JGOFS_err_indicator/) || return "OK",$rec; $status = "JGOFS_ERR"; $message = $rec; $rec = ""; $err_rec_sanity_count = 0; $read_bang_status = 0; # No error, since <> was defined while ( ! eof ($FH) ) { if (defined ($_ = <$FH>)) { if ($err_rec_sanity_count++ > $err_rec_max) { $message .= " *** Ignoring msgs after message #$err_rec_max"; last; } $message .= $_; } else { $read_bang_status = $!; # Error, since <> undefined and no EOF last; } } } else { $read_bang_status = $!; # Error, since <> undefined and no EOF $status = "READ_ERR"; $rec = ""; last; } } # Coding below from my interpretation of a perldoc.org online article # Jun 2012. Main idea is that the T/F close return needs to be tested # in order to determine the validity of the $! setting ... # close returns F if there was a local close problem or if # the other end of a pipe died. According to article, if the latter, # $! == 0 (and presumably $? != 0). I assume that in the non-pipe case, # $! will be set. # It is not clear what happens if we close after a read error during # the main loop. We try to report READ_ERR rather than CLOSE_ERR if, # in fact, close-after-error produces a close error. # We return "the works" w/the idea that the formatting routine # (format_get_JGOFS_record_return_status; above) sorts out all the # choices. Modify that routine if/when needed - it is my present # belief (2012) that the interpretations there are correct despite # the coding changes here. $! = $? = 0; close ($FH) || ($status ne "EOF") || ($status = "CLOSE_ERR"); return $status,$rec,$read_bang_status,$!,$?,$message; } ################################################# sub print_choose_memory_algorithm { my ($form_var_name) = @_; print "


Memory algorithm choice

\n"; print "" . "Applicable only to variables whose values are non-numeric" . "
\n"; # 0 means "Use width= attribute vals" (see makemat.c for technique) # Any other number is minimized with OOserver "system max" value # This implementation just offers users min or max - can be altered to # allow middle ground if a) desired b) a way can be determined to # allow user to choose meaningfully; ie, if user doesn't know width= # attributes or system max, tough to pick a number print "

"; return; } ################################################# sub print_select_alpha_variables { my ($list_program,$obj,$form_var_name,$missings_are_alphas) = @_; my ($command); my ($pid,$save_PATH_INFO,$save_bang,$save_ques); my ($line,@varlist,@datalist,$varname); my ($status,@err_msg,$strip); my ($select_count); my (%attr_hash,%alpha_var); # Users of the print_select_alpha_variables form generally will not # really know what they should select until after the fact. Try to # assist our default value-in-first-record algorithm in as general a # way as possible. Idea is that following script, if present, will # contain perl code with an entry called numeric_alpha_opinion # This entry will be called with a varname and will return an opinion # and an authoritative value (eg, can user override, etc) # As of Nov 10, this idea never implemented. In Nov 10, implemented # recognition of a variable attribute called datatype, which can have # the values "alpha" or "numeric". my ($optional_advisory_script) = "numeric_alpha_advice.pl"; my ($JGOFS_missing) = "nd"; # Chose tab here since guarantees no conflict w/embedded separator # Coordinate this char w/list option (in this case -t) but don't blame # me if changing this causes trouble. "Extra" \ so 2-char \t is # sent to perl split (wonder if binary tab; eg, w/o extra \, would work...) my ($list_split_char) = "\\t"; # Get list of variables in object statisticker will work on. Get 1st value # of each variable and see if it's numeric. Do this by taking 1st 2 lines # of flat, nocomment list of object. Offer menu of variables to # be compared alphabetically, defaulting per the numeric test. If problems # getting this info, just don't offer the menu. Drop warning into server # error log (I hope). Someone may see it some day... (Actually, error # appears pretty dramatically in front of user's eyes - Feb 10) &check_x_access($list_program); $command = $list_program . " -f -c -t -z -l -a -nopipeerr -forceheader -errout /dev/stderr " . "\"" . $obj . "\""; # Clean out PATH_INFO to avoid problems w/outer used by $obj (defined $ENV{"PATH_INFO"}) && ($save_PATH_INFO = $ENV{"PATH_INFO"}); $ENV{"PATH_INFO"} = ""; # Ensure that the -a option for list is honored no matter how list was built # Requires list 1.8b. No point in saving a previous value of the env var - all # OOserver work "prefers" that -a work, and I don't think this file is used in a # non-OOserver environment; esp used by "not me" $ENV{"INCLUDE_ATTRIBUTES_SWITCH"} = "TRUE"; $! = $? = 0; $pid = open (IN, "$command |"); $save_bang = $!; $save_ques = $?; if (defined ($save_PATH_INFO)) { $ENV{"PATH_INFO"} = $save_PATH_INFO; undef $save_PATH_INFO; } else { delete $ENV{"PATH_INFO"}; } ( ($pid !=0 ) && ($save_ques == 0) ) || &quit ("Problem opening pipe to command '" . $command . "'\n\$! = $save_bang; \$? = $save_ques\n"); ($status,$line,@err_msg) = &get_JGOFS_record(IN); ($status eq "OK") || &quit ("Could not get variable list", &format_get_JGOFS_record_return_status($command,@err_msg)); (defined $line) || &quit("Did not get variable list (but list succeeded!)"); chomp $line; (@varlist = split (/[$list_split_char]+/,$line)) || &quit ("No variables returned from '" . $command . "'\n"); # vars have appended non-width attrs (-a but not also -aw switches on list) # Parse off attr list, noting those vars w/ defined datatypes foreach (0..$#varlist) { ($status,$varname,%attr_hash) = &get_hash_of_attrs($varlist[$_]); ($status eq "OK") || ($status eq "AMBIGUOUS") || &quit ("Problem w/attrib list for a variable in $obj\n", "More info follows: $varname"); if (defined $attr_hash{"datatype"}) { ($attr_hash{"datatype"} eq "alpha") && ($alpha_var{$_} = 1); ($attr_hash{"datatype"} eq "numeric") && ($alpha_var{$_} = 0); (defined $alpha_var{$_}) || &quit ("Illegal value for attribute datatype", "Legal values are the strings alpha and numeric", "Problem varname/attr list: $varlist[$_]", "from object spec $obj"); } $varname || &quit ("Problem w/varname[attrib-list] $varlist[$_]\n" . "Illegal character in varname? (legal = letter,digits,underscore)"); # list output may have formatting blank space in it $strip = &whitespace_strip($varname); ($strip eq "") && &quit ("varname all whitespace"); ($strip =~ /\s/) && &quit ("Embedded white space in varname","varname = $varlist[$_]"); $varlist[$_] = $strip; } ($status,$line,@err_msg) = &get_JGOFS_record(IN); ($status eq "EOF") && &quit ("Sorry, no data in this object as presently selected", "Data request via command '" . $command . "'\n"); ($status eq "OK") || &quit ("Could not get 1st data record", &format_get_JGOFS_record_return_status($command,@err_msg)); # Problems at this point could be due to bad selection list # Should not get here w/defined line - should have exited via status="EOF" # However extra tests are cheap. (defined $line) || &quit ("No data returned from '" . $command . "'\n"); chomp $line; (@datalist = split (/[$list_split_char]+/,$line)) || &quit ("No data returned from '" . $command . "'\n"); close IN; ($#datalist == $#varlist) || &quit("Length mismatch between varlist and data\n", "varlist = " . join("|",@varlist) . "\n", "data = " . join("|",@datalist) . "\n"); print "

Select the variables whose data are to be considered " . "non-numeric

\n"; print ""; return; } ################################################# sub get_cached_varlist # Args: 1) listvar command w/switches but not object # 2) directory where cache files are # 3) object name # 4) TRUE means refresh cache from net - don't use any existing cache # Returns: TRUE/FALSE reflecting whether or not cache was used, followed by # varlist in def format. Getting it in def format because # that routine is already written! # Does NOT cache varlists for objects w/sel/proj lists. Regenerates # those each time. Esp important for "passthrough" objects like # join, where "sel/proj lists" aren't just sel's & proj's # Dies on error. { my ($listvar_w_switches,$cache_dir,$object,$force_refresh,$dummy) = @_; my ($varlist_file,$use_cached_file,$status,$object_without_slashes); my ($listvar,$obj_name,$suffix,@def_varlist); my ($switch_list,$switch_addend,@switches); ((defined $object) && (! defined $dummy)) || &quit ("Internal error: get_cached_varlist not called w/4 args"); @switches = (); ($listvar,@switches) = split ' ',$listvar_w_switches; $switch_list = (@switches == 0) ? "" : join " ",@switches; $switch_addend = (@switches == 0) ? "" : '_' . join "",sort(@switches); ($obj_name,$sel_proj) = split (/\(/,$object); if ($sel_proj) { $suffix = "_w_sel_proj_list"; $force_refresh = 1; } else { $suffix = ""; $obj_name = $object; } ($status,$object_without_slashes) = &replace_special_char('/',$obj_name); ($status eq "OK") || &quit("Internal error trying to replace slashes in object spec $object", "Problem: $object_without_slashes"); $varlist_file = $cache_dir . "/" . $object_without_slashes . $switch_addend . $suffix . ".varlist"; # 1 in next line sets cache to expire in 1 day $use_cached_file = (-e $varlist_file) && ((-M $varlist_file) < 1) && ( ! -z $varlist_file) && ( ! $force_refresh); if ($use_cached_file) { (open (VARLIST_IN,"< $varlist_file")) || &quit ("Cannot open $varlist_file for read. \$! = $!"); @def_varlist = ; close VARLIST_IN || &quit ("Cannot close $varlist_file. \$! = $!"); } else { # DO NOT put the active sub-selections on the listvar command @def_varlist = &listvar_in_def_format($listvar,$object,$switch_list); (open (VARLIST_OUT,"> $varlist_file")) || &quit ("Cannot open $varlist_file for write. \$! = $!"); foreach (@def_varlist) { (print VARLIST_OUT "$_\n") || &quit ("Cannot write $varlist_file. \$! = $!"); } close VARLIST_OUT || &quit ("Cannot close $varlist_file. \$! = $!"); } return $use_cached_file,@def_varlist; } ################################################# sub listvar_in_def_format { my ($listvar_program,$obj,$listvar_switches) = @_; my ($command); my ($save_PATH_INFO,$save_bang,$save_ques); my ($line,@varlist); my ($status,@err_msg); my ($varname,$varname_spec,$attr); my ($new_level,$rec,$level,$varline,$open_OK); my ($open_delim,$close_delim,$separator); # Next is a function of how listvar is coded. It's not parametrized # there, so there isn't even a common .h file we could use if we ever # tried to export .h files into perl my ($listvar_level_varname_separator) = ','; (defined $listvar_switches) || ($listvar_switches = ""); &check_x_access($listvar_program); # This coding assumes that $listvar_switches does not substantively # change the format of listvar -l output and that multiple -l switches # are acceptable to listvar. So far, so good (Sep 10) $command = qq|$listvar_program -l $listvar_switches "$obj"|; # Clean out PATH_INFO to avoid problems w/outer used by $obj (defined $ENV{"PATH_INFO"}) && ($save_PATH_INFO = $ENV{"PATH_INFO"}); $ENV{"PATH_INFO"} = ""; $! = $? = 0; $open_OK = open (IN, "$command |"); $save_bang = $!; $save_ques = $?; if (defined ($save_PATH_INFO)) { $ENV{"PATH_INFO"} = $save_PATH_INFO; undef $save_PATH_INFO; } else { delete $ENV{"PATH_INFO"}; } $open_OK || &quit ("Problem with command '" . $command . "'\n\$! = $save_bang; \$? = $save_ques\n"); $level = 0; $varline = ""; ($status,$line,@err_msg) = &get_JGOFS_record(IN); $rec = 0; while ($status eq "OK") { $rec++; # Code that follows must be coordinated w/changes in listvar $separator = quotemeta($listvar_level_varname_separator); ($new_level,$varname_spec) = split /$separator/,$line,2; (defined($new_level) && $varname_spec) || &quit ("Internal problem. Did not get expected listvar format " . "in listvar record $rec\n\t$line\nProduced by command", $command); $new_level = &whitespace_strip($new_level); $varname_spec = &whitespace_strip($varname_spec); ( ($new_level =~ /^(\d+)$/) && ($varname_spec ne "") ) || &quit ("Internal problem. Did not get expected listvar format " . "in listvar record $rec\n\t$line\nProduced by command", $command); ########Attr stuff here if ($new_level == $level) { # Use of tab here dictated by def's acceptance of \t as separator $varline .= "$varname_spec\t"; } else { # Use of > here is again what def expects # Deliberately leaving in trailing tab. $varline .= '>'; push @varlist,$varline; $level = $new_level; $varline = "$varname_spec\t"; } ($status,$line,@err_msg) = &get_JGOFS_record(IN); } ($status eq "EOF") || &quit ("Problem w/ data record #" . ++$rec, &format_get_JGOFS_record_return_status($command,@err_msg)); close IN; # $varline has trailing tab - should be OK return @varlist,$varline; } ################################################# sub valid_number { # See if a string is a valid number. WJS Apr 99 # (mod Jul 05 to pre-test for most likely strings, on hypothesis # that string test is quicker than exception testing. WJS) # (mod Feb 17 to make quick numeric test report numbers leading with + or -. WJS) # Idea is to turn warnings on, force a numeric calculation, trap # any resulting warning message, and see if it's appropriate. # Because it's only a warning, eval does not set $@ as it does for # worse errors. Therefore, the fooling with signals... # Of course this breaks if the message changes. Much better would # be to have a perl-callable strtod function... # The perl manual says that numbers match /[+-]\d*\.?\d*E[+-]\d+/ # (when it was talking about library module BigFloat). However, that # description clearly doesn't reflect the optional portions of numbers... my ($test_item) = @_; my ($number); ((defined $test_item) && ($test_item ne "")) || return 0; # Quick test - numbers ($number) = ($test_item =~ /^\s*([+-]?\d*\.?\d*)\s*$/); $number && ($number ne '.') && return 1; # Quick test - strings. Will incorrectly reject non-decimal radix if such strings # can be represented without quoting characters. Will correctly reject NaN # and Inf, but more rigorous test for those later in case we pull this quick test # Also, next test will incorrectly accept -Inf (more rigorous test, blah blah) ($test_item =~ /^[A-Za-z]*$/) && return 0; local ($numeric_flag) = 1; my ($old_val_warn) = $^W; $^W = 1; # Turn on warnings # Used to have sub test $_[0] (the warning message) to see if it was # an "Argument .* not numeric" message. Now think that if the eval gets # any kind of warning, there must be a problem with the putative number, # so just decide it's not a number. Presumably if there were # a numeric warning (overflow? is this a fatal?), the this technique # would be incorrect. If we know that numeric warnings have their # own signal, presumably we could trap that, too (and we'd get it before # __WARN__ or __&quit__?) local $SIG{__WARN__} = sub { $numeric_flag = 0; }; eval '$test_item + 1'; # Anything that does arithmetic $old_val_warn || ($^W = 0); # Reset warnings if appropriate $SIG{__WARN__} = 'DEFAULT'; # Return signal to normal behavior # NaN test $numeric_flag && ($test_item != $test_item) && ($numeric_flag = 0); # Inf test $numeric_flag && ($test_item == $test_item+1) && ($numeric_flag = 0); # -Inf test $numeric_flag && ($test_item == $test_item-1) && ($numeric_flag = 0); return $numeric_flag; } sub get_hash_of_attrs { # Input: string that is either # varname[attr_list] # [attr_list] # attr_list # Output: # status # status_text or varname # list w/alternating keys & values. May be undefined (if no attrs) # Keys are stripped of leading and trailing whitespace. Keys # will be non-empty strings of letters, numbers and underscores # with at least 1 non-number # If attr has value but no key, the key is its position in the # attrlist (1 is the first list element) # Values may be empty and may consist of strings of whitespace # (unless passed to this routine in 3rd format above (ie; w/o []s) # # Statuses: # OK # Returns varname in status_text field. May be empty string # AMBIGUOUS # Input was a string such as "date", which could be # a variable w/o an attr list, or a 1-element attr list # Returns varname in status_text field. May be empty string # AMBIGUOUS is only an issue if the caller does not know # the format of what is being sent to get_hash_of_attrs. # If the get_hash_of_attrs input is known to include the # varname, then a return of AMBIGUOUS means "empty attribute list" # If the input contains []s (with or without varname) AMBIGUOUS # will not be the return status. If the input is attr-list # only (ie, known NOT to include the varname), then AMBIGUOUS # means "single-element attribute list" # NULLINPUT # Input was empty or all whitespace # BADFORMAT # Status string has more detail # BADKEY # Key portion of key=value is not a non-empty string # consisting of letters, numbers, and underscores with at # least 1 non-number # This routine could, but does not, validate varnames with respect # to things like embedded blanks, embedded special chars, etc # my ($input,$dummy) = @_; ((defined $input) && ! (defined $dummy)) || &quit ("Internal error: get_hash_of_attrs not called w/1 arg"); # Next is from core.h ATTRIB_SEP my ($attrib_sep) = ';'; # Next is from core.h ATTR_DELIM my ($attr_delim) = '[]'; # Next is NOT from core.h. Attributes can be any string. # Made the obvious decision that an = is special when we did width= # Formally extended the idea now (Oct 10) to allow for datatype=, # but this is well after core.h, etc my ($key_value_sep) = '='; # Note that the legal chars in a key (letters, numbers, _) are NOT # in core.h, nor do I feel like parametrizing it here and # dealing with embedded \s, etc. Oh well, oh well... my ($varname,$attr_list,$explicit_attr_list,@attr); my ($count,@attr_pairs,$key,$value); my ($open_delim,$close_delim); my ($qm_open_delim,$qm_close_delim,$qm_attrib_sep,$qm_key_value_sep); my ($dummy2); $qm_attrib_sep = quotemeta($attrib_sep); $qm_key_value_sep = quotemeta($key_value_sep); # Too tired to test that $*delim is 100% legit ($open_delim,$close_delim) = split //,$attr_delim; $qm_open_delim = quotemeta($open_delim); $qm_close_delim = quotemeta($close_delim); $input = &whitespace_strip($input); $input || return "NULLINPUT","No nonblanks in string"; ($varname,$attr_list,$dummy) = split /$qm_open_delim/,$input; (defined $dummy) && return "BADFORMAT","More than 1 $open_delim"; if ($explicit_attr_list = (defined $attr_list)) { ($attr_list,$dummy,$dummy2) = split /$qm_close_delim/,$attr_list; (defined $dummy) || return "BADFORMAT","Missing $close_delim"; (defined $dummy2) && return "BADFORMAT","Extra $close_delim"; ($dummy eq "") || return "BADFORMAT","Non-whitespace after $close_delim"; ($varname eq &whitespace_strip($varname)) || return "BADFORMAT","Whitespace between varname and attr list"; } else { ($varname =~ /$qm_close_delim/) && return "BADFORMAT","Missing $open_delim"; $attr_list = $varname; $varname = ""; } # [] (possibly w/whitespace in between) is OK (&whitespace_strip($attr_list) eq "") && return "OK",$varname; $count = 0; @attr_pairs = split /$qm_attrib_sep/,$attr_list; foreach (@attr_pairs) { $count++; # Deliberately allowing $value to contain an = # Deliberately allowing $value to be null string ($key,$value) = split /$qm_key_value_sep/,$_,2; if (defined $value) { ($key =~ /^[\w\d\_]+$/) || return "BADKEY", "key must be non-empty and consist of letters, numbers and _. " . "Problem in attribute -->$_<-- in list -->$attr_list<--"; ($key =~ /^\d+$/) && return "BADKEY", "key must have one non-number in it. " . "Problem in attribute -->$_<-- in list -->$attr_list<--"; } else { $value = $key; $key = $count; } push @attr,$key; push @attr,$value; } ($count == 0) && &quit ("Internal error in get_hash_of_attrs. Impossible attr count of 0"); $explicit_attr_list && return "OK",$varname,@attr; # AMBIGUOUS as far as this routine's parsing is concerned # if there was only 1 attribute in list and we couldn't tell # that it was an attribute because it didn't have key=value form (and # therefore its key is 1). We can disambiguate if it's syntactically # illegal as a varname. Rule for that set in dim past - varnames must # be same chars as a varname in c, which happens to be letters, numbers, # and _, which may sound familiar but is its own (non parametrized) thing if (($count == 1) && ($attr[0] eq "1")) { # In the non-explicit attr list case, we anticipated that there was # no varname and we set attr_list to the varname string. This explains # why next test is attr_list and why varname gets restored if ($attr_list =~ /^[\w\d\_]+$/) { $status = "AMBIGUOUS"; $varname = $attr_list; } else { $status = "OK"; } } else { $status = "OK"; } return $status,$varname,@attr; } sub whitespace_strip # Returns null if all whitespace, but consider that stripped string could be 0 { my ($strip,$dummy) = @_; (defined $strip && ! defined($dummy)) || &quit ("Internal error: whitespace_strip not called w/1 arg"); ($strip eq "") || ($strip =~ s/^\s*//); ($strip eq "") || ($strip =~ s/\s*$//); return $strip; } ################## $WJS_WEB_PERL_UTILITIES_LOADED = 1; # Revision history pre 2012 # 16 Feb 11 WJS # Bug fix: OBJSPEC needs to allow =s since they are in # selection specs. Interesting that 3 Jun 10 fix "worked" - # guess selections there were alpha, "eq" selections. # While at it, throw in < and > # 15 Nov 10 WJS # get_hash_of_attrs # Mod print_select_alpha_variables to use get_hash_of_attrs to # check for datatype # 30 Sep 10 WJS # whitespace_strip # Improve parsing of listvar output in listvar_in_def_format # 14 Jun 10 WJS # 1 Jun mod of 21 Feb mod STILL not correct. 3rd time the charm? # 3 Jun 10 WJS # Bug fix: OBJSPEC needs to allow &s and |s since they are in # selection specs # 1 Jun 10 WJS # Mod adjust_for_reqd_args to use get_query_string_args # instead of doing same code inline (not same really; get_ # uses backtick.pl instead of actual backticks.). Consequence: # adjust_ no longer returns PQSERR, since get_ dies internally if # such a thing happens # Mod shell_protect to protect ampersands. How many more did I miss? # 21 Feb mod not syntactically correct - fix (more good testing) # 21 Feb 10 WJS # Blank stripper of 7 Sep no good either. (.+) is greedy and # includes the trailing blanks. google search for "perl whitespace # trim" does NOT show any regex, which might be why these don't # work. They use s/\s*$//, and so shall I (although I'm # tempted by split ...) # 18 Feb 10 WJS # Bug fix: print_select_alpha_variables should diagnose "no data in # object" situation rather than generic death # 6 Feb 10 WJS # Diagnostic fix: error message from check_form_var needed updating # to mention OBJSPEC # Bug fix: OBJSPEC needs to allow blanks and %s for character selections # 3 Feb 10 WJS # Bug fix: parse_object_spec sometimes returns a slash in the subdir # and sometimes doesn't. Make it match the doc # [Moved older comments to bottom of this file. Dec 12] # 5 Nov 09. WJS # Replace some ESPIPE code w/backtick.pl (which is now require'd) # Take a shot at making replace_special_char actually work # 17 Oct 09. WJS # Better error checking on open in listvar_in_def_format # 7 Sep 09. WJS # Put better blank stripper regex in print_select_alpha_variables, # replacing /^\s*(.+)\s*/ with /^\s*(\S+)\s*/. Thought former # was a bug if line was all blanks, but now think both return # the same thing. Further reflection: old form allowed # "multi-word lines" (w/embedded blanks). That is an error, # as is the existence of all-blank lines. Should be no # practical effect, since in actual use, all-blank lines # would have required other pre-existing errors # Doubt that numeric/alpha stuff mentioned 5 Sep will fly, but leave # comments in ... # 5 Sep 09. WJS # Add print_choose_memory_algorithm # Put in comments for a hook for "outside advice" about # numeric/alpha status of variables. # 20 Jul 09. WJS # Doc note: trigram.pl available, but in standalone file to avoid # loading this whole thing when not needed. If trigram func desired # here, too, best way might be to have this file include/require that # one # 23 Jan 09. WJS # Add an OBJSPEC legal character set for get_form_var to check # 28 Dec 08. WJS # Bug fix: open pipe error handling in print_select_alpha_variables # failed on fleetlink, again due to attempt to use $!. Switch # to using return from open command. See backtick.pl (would be # nice if that could be expanded for all pipe opening - would need # to subroutine-ize per-record processing and pass that to backtick) # 27 Nov 08. WJS # Bug fix: separator chars that come back from parse_query_string # need protection if they happen to be perl special chars. # 1 Nov 08. WJS # adjust_for_reqd_args # 25 Oct 08. WJS # get_this_file_as_url # Mod to abs_file_spec to check for empty string # Syntax fix to 18 Oct work # 18 Oct 08. WJS # varlist-via-listvar subsystem enhanced to allow attributes # 4 Aug 08. WJS # parse_object_spec needs to accept : as legal character # 31 Jul 08. WJS # Comment fix # 25 Jul 08. WJS # get_cached_varlist # 26 Apr 08. WJS # replace_special_char # 3 Apr 08. WJS # Fix error message in listvar_in_def_format # 14 Mar 08. WJS # Only mod is this comment. Decided NOT to modify this code for # Apache 2 double slash nonsense. If the pieces of PATH_INFO # get here via parse_path_info (which is invoked by build-opt-env.pl), # we should be OK. This is a motivation to use build-opt-env.pl! # 24 Oct 07. WJS # Add -l switch to list command # check_build_opt_env_var # list_as_text_plain # 23 Aug 07. WJS # Put format_get_JGOFS_record_return_status back in here! Apparently # pulled in Sep 05 when it and get_JGOFS_record were copied to # the fleetlink jgofs_sql stuff. Guess we didn't have many statuses # to format! # 21 May 07. WJS # Text change for user instruction in print_select_alpha_variables. # 23 Nov 06. WJS # valid_number should realize 0 is a number # 6 Sep 06. WJS # Mod to get_JGOFS_record to ignore $! unless read returns undefined # 1 Sep 05. WJS # do_shell_command # make check_?_access do a better job w/null input # 23 Aug 05. WJS # Have parse_object_spec check the easy part of the spec for # illegal chars. I give up on the sel/proj list (which, to be # fair, isn't even limited to sel/proj-architecturally could be # anything, so can't challenge any chars) # Comment change # Err msg update # 21 Aug 05. WJS # parse_object_spec (should take parse_path_info-like approach. # However, c code already exists, distributed in pieces within # jdbopen & routines it calls...) # 15 Aug 05. WJS # Add print_select_alpha_variables # 4 Aug 05. WJS # Allow get_form_var to get a form variable that's a list # Add check_r_access # 1 Aug 05. WJS # Change add_to_library_path to add_file_to_path_string # 27 Jul 05. WJS # quit to use html_line_breaks. Also in quit, if $open_pre_tag, # close the pre tag so CgiDie's formatting looks reasonable. # Add (& modify) valid_number # Add html_line_breaks # 29 Jun 05. WJS # Add add_to_library_path # 24 Mar 05. WJS # Add get_JGOFS_record, et al # 18 Mar 05. WJS # Add hex_dump #################################################