#!/usr/bin/perl -w { # # selopt.pl WJS Apr 08 # (from selopt CLH (GF?) 1998?) # option server web interface to allow users to add # selections/projections to JGOFS URL # Basic input is the QUERY_STRING env var, which contains the sel/proj # list. selopt.pl is typically called recursively several times, each # time altering the QUERY_STRING. W/that idea, program structure starts # w/manipulation of the Q_S due to the previous program call, w/the first # time through being a special case. After the manipulation is done, # the form is re-displayed w/the altered info. # Tricky input is the HTTP_REFERER env var. See the 3 Apr 08 comments # Tricky output includes various trigrammed strings. The assumption # is that a trigrammed string output here will not be trigrammed again. # Aside from basic trigram issue, trickery is needed because of # wildcard issues. First, % is sql's (and therefore our) match-any # character, which confounds nicely w/the trigramming. Second, sql # allows its wildcard characters to be searched for if they are escaped # via \. Problem is to convey that \, uninterpreted, to the method. $version = "selopt.pl version 2.3b 6 Apr 2017"; # 6 Apr 17. v 2.3b wjs # Can't replace do with require if you want it "done" twice. Replace 2nd # require with do # 21 Jul 16. v 2.3b wjs # Replace do build-opt-env.pl w/ require build-opt-env.pl. Date probable since # SOMEBODY did NOT make a comment at the time! # 1 Jun 11. v 2.3a wjs # Bug fix: varlist cache refresh did not work # Bug fix: user-entered sel/proj list did not work for at least 2 reasons (so far) # Some reworking of parameter processing and some improved input checking # 31 May 10. v 2.3 wjs # ... glad you stay tuned # 17 May 10. v 2.3 wjs # Parametrize some commas being used as query string separators # Address issue that query string can contain more than sel/proj list # Using download scheme, got it to display correct varlist. Unfortunately # the base selopt 2.0 rewrite changed comma-separated query strings to # "&" combined strings, which is fine for selections but not for what # we want to do here. Stay tuned for later 2.3 ... # 5 Nov 09. v 2.2 wjs # Fool w/wildcards on form a bit more, just in case we want it # some day. # 31 Oct 09. v 2.2 wjs # Essentially disable wildcard selections. Not only are there # the deceptions due to 15 Oct implementation, but, assuming we # get the wildcard to travel to the destination outer, if the # destination method takes input from an object, we don't know if # THAT outer will do wildcards, so we have an optimization issue. # The "contains" operator really should do for us ... # Remember to escape the escape character if the user wants to # look for it w/a wildcard. # 17 Oct 09. v 2.2 wjs # Fix an otheroptN issue that came up on fleetlink (and the # format of the diagnostic which accompanied the error) # 15 Oct 09. v 2.2 wjs # Let user choose wildcard chars, carefully excluding some options. # Helps w/a couple of tricky issues # 4 Jun 09. v 2.1 wjs # Newest case sensitivity idea: radio buttons # 3 Jun 09. v 2.1 wjs # Modify formatting; text per RCG suggestion # Make case sensitivity a checkbox instead of listing the case # sensitive operators - idea offered by me & liked by RCG # 1 Jun 09. v 2.1 wjs # Add capability to "not" a selection (from the form - can always # enter free-form (so to speak!) sel/proj lists) # 25 Jul 08. v 2.0c wjs # Pull debug PRINT statements # 18 Jul 08. v 2.0b wjs # Put cached varlist stuff into library. May have addressed problem # where cache file is not defined "first time through" (if problem existed, # it is now presumably addressed) # 23 May 08. v 2.0a wjs # get rid of "work in progress" warning # 27 Apr 08. v 2.0 wjs # perl rewrite; rename to selopt.pl # Among other things, get rid of seloptN files # Use the name of the referring file to determine whether to # offer the outer 3.0 enhanced string selections. HOW to use # that name is another story. The present agreement is that data # servers that have outer 3.0 should use otheropt2. Presumably # there will be other otheroptNs, or various other strings, and # what these strings mean is a whole different story. As a guess, # assume an otheroptN "cumulative" format, so we will offer the enhancements # if N >= 2. # Bug fix: ORs could be tacked onto projections, leading to # syntactically incorrect strings. # Bug fix: PATH_INFO lost protocol/level info on exit from this # routine. Presumably caused no trouble, but we should leave here w/the # same format as we arrived (or as if selections had been done by outer, etc) # 21 Oct 05. wjs # Add more string operators # mod: September 13, 1999 rcg -- change help.gif to helpbutton.gif per # Chris Hammond request. # mod: November 2, 1998 clh -- add a combination operator button # used to indicate 'OR' or 'AND' to list # of selections # require ("cgi-lib.pl"); use HTML::Entities; require ("wjs_web_perl_utilities.pl"); # Coordinate names w/query_string pkg. Ramifications of "PREFERRED" not # really appreciated at this point $PREFERRED_QS_SEPARATOR = ','; # Shorter version of above for coding convenience $qs_sep = $PREFERRED_QS_SEPARATOR; # $PREFERRED_PQS_RETURN_ARGS_SEPARATOR = '!' # unused at moment $trigram_char = '%'; $outer_match_any = '%'; $outer_match_one = '_'; $outer_escape = '\\'; # Next string must NOT include outer's chars $wild_card_charset = "*?@~#^&+-"; foreach ( split (//,$wild_card_charset) ) { &wild_validate($_, "Internal error: illegal character in wild card offerings. Bad char is"); } ($outer_match_one eq $outer_match_any) && &quit ('Internal error: "Match-any" and "Match-one" chars must differ'); # Stole "key" line from trigram.pl rather than requiring it and calling it # OK, I'm lazy $blank_trigram = $trigram_char . sprintf ("%2.X",ord(' ')); $outer_match_any_trigram = $trigram_char . sprintf ("%2.X",ord($outer_match_any)); $outer_match_one_trigram = $trigram_char . sprintf ("%2.X",ord($outer_match_one)); $outer_escape_trigram = $trigram_char . sprintf ("%2.X",ord($outer_escape)); @numeric_ops = qw( = < > <> <= >= ); @case_sensitive_ops = qw ( eq ne lt le gt ge contains is_contained_in begins_with ends_with ); $h = q!input type="hidden"!; # Variables we can get from the form, defined as their form # variable names, including hidden variables $caseblind = 'caseblind'; $negate_selection = 'negate_selection'; $selection_variable = 'var'; $selection_operator = 'op'; $selection_value = 'val'; $and_or = 'combo'; $projection_list = 'varlist'; $user_supplied_sel_proj_list = 'usr'; $which_otheropt = 'otheropt'; $selopt_todo = 'usr_request'; $wild_any_value = 'wild_any_char'; $wild_single_value = 'wild_exact_one_char'; # Values of the $selopt_todo form variable $selopt_reset = "Clear subset list"; $selopt_refresh = "Refresh varlist cache"; $selopt_selections = "Add selection"; $selopt_projections = "Add projection(s)"; $selopt_user_text = "Replace criteria"; # Set up environment. Assume .pl routine is in our directory $build_opt_env = "./build-opt-env.pl"; &check_r_access($build_opt_env); require $build_opt_env; # Check that build-opt-env set up things as expected $topdir = &check_build_opt_env_var("OPTHOME",$build_opt_env); $bindir = "$topdir/bin"; $listvar = "$bindir/listvar"; $pqs = "$bindir/parse_query_string"; # Next is our best guess of list of qs params that are not sel/proj's & # 1) must therefore stay with qs # 2) must NOT be "logically combined" with sel/proj's user is presumably # adding, etc # Note: OBJECT env var is NOT defined after "first pass through", so don't test # for "empty" and abort (as has been done every so often) $base_qs = ""; if ($tmp_obj = $ENV{"OBJECT"}) { # selopt assumes that anything in a query string is a sel/proj # This is not necessarily true - see discussion in *perl*routine*.pl # in description of adjust_for_reqd_args. For sure the assumption # fouls up the attempt to get a varlist and the "reset" action of # wiping the query string. $tmp_sels = $ENV{"SUBSELS"}; ($status,$substatus,$adjusted_object,$adjusted_qs,$n_moved) = &adjust_for_reqd_args($pqs,$tmp_obj,$tmp_sels); ($status eq "OK") || &quit ("Problem adjusting object and subsels $tmp_obj and $tmp_sels", "$substatus: $adjusted_object"); if ($n_moved > 0) { if ($adjusted_qs) { # I wonder if it's worth continuing to guess about how to use # perl's regex ... $tail = quotemeta($qs_sep . $adjusted_qs); ($base_qs) = ($tmp_sels =~ /(.*)$tail$/); } else { $base_qs = $tmp_sels; } } (defined $base_qs) || &quit ("Internal error - failed to define $base_qs"); } # Read in all the variables set by the form &ReadParse(*form_info); #### &dump_input_info('^',%form_info); $otheropt = &get_form_var($which_otheropt,"OPT"); # Presumably undefined only on initial invocation from otheropt # NOCHECK because of $selopt_projections $selopt_opt = &get_form_var($selopt_todo,"OPT","NOCHECK"); $refresh_cache = ($selopt_opt eq $selopt_refresh); $do_selections = ($selopt_opt eq $selopt_selections); $do_projections = ($selopt_opt eq $selopt_projections); $do_reset = ($selopt_opt eq $selopt_reset); $do_user_text = ($selopt_opt eq $selopt_user_text); $first_time = ($selopt_opt eq ""); $refresh_cache || $do_selections || $do_projections || $do_reset || $do_user_text || $first_time || &quit ("Invalid value for form var $selopt_todo. Bad value is $selopt_opt"); $do_reset && ($ENV{"QUERY_STRING"} = $base_qs); $empty_user_text_warning = 0; if ($do_user_text) { $user_list = &get_form_var($user_supplied_sel_proj_list,"OPT","NOCHECK"); if ($user_list) { $ENV{'QUERY_STRING'} = ($base_qs eq "" ) ? $user_list : $base_qs . $qs_sep . $user_list; } else { $empty_user_text_warning = 1; } } # # Get varlist for this object. Get it AFTER we get $refresh_cache from form # It's possible that what we actually need to do does NOT # require the varlist (eg, a reset). However, a) most do # b) we cache things & c) it's tricky to try to figure out # when we actually need it $adjusted_object || &quit ("Internal error - undefined/empty/0 object defn to get_cached_varlist"); (($used_cached_file,@def_varlist) = &get_cached_varlist($listvar, $ENV{"USETEMPDIR"}, $adjusted_object, $refresh_cache)); if ($first_time) { # Should be here when we arrive from otheropt. Accordingly # there should be no form variables (might be QUERY_STRING # stuff if an input selection was of the form var=val. ? Could be - # ReadParse looks at QUERY_STRING for GETs, which this is first # time through). # Find out if we were called from otheropt or otheropt2, etc ($otheropt eq "") || &quit ("Internal error - form values available b4 form printed"); # HTTP_REFERER is the whole URL, not just the referring file. # Look at tail of string before PATH_INFO to get referring file. if ($ENV{"HTTP_REFERER"}) { ($otheropt) = ($ENV{"HTTP_REFERER"} =~ m!.+/(.+)$ENV{"PATH_INFO"}!); ($otheropt =~ /^otheropt/) || &quit ("HTTP_REFERER env var should contain /jg/otheroptX. HTTP_REFERER = " . $ENV{"HTTP_REFERER"}); # On fleetlink, we get back a trailing slash. Not sure if # that's fleetlink and/or part of the 'no double slash stuff' or what # If it's there, get rid of it $save_inp_rec_sep = $/; $/ = "/"; chomp $otheropt; $/ = $save_inp_rec_sep; } else { $otheropt = "otheropt"; } } # selection processing (at least) requires varlist to distinguish sels from projs # so it must be deferred until after get_cached_varlist $do_projections && &do_projections; $do_selections && &do_selections($base_qs,$adjusted_qs); # Too lazy to "calculate" jg/selopt.pl from $0, esp since I don't # think "jg" is parametrized anyplace $form_action_routine = ($ENV{"SCRIPT_NAME"}) ? $ENV{"SCRIPT_NAME"} : "/jg/selopt.pl"; # Figure out which version of otheropt we came from (and to which we # will eventually return). Figure out if that version of otheropt means # we should offer extended string selections and "not" logic $otheropt || ($otheropt = &get_form_var($which_otheropt,"REQ")); (undef,$otheropt_version) = split "otheropt",$otheropt; (defined $otheropt_version) || &quit ("html form variable $which_otheropt should be otheroptN. " . "Value = $otheropt"); if ($otheropt_version eq "") { $case_sensitive_selections = $wild_card_selections = 0; } else { ($otheropt_version =~ /^\d+$/) || &quit ("html form variable $which_otheropt should be otheroptN. " . "Value = $otheropt"); $wild_card_selections = ($otheropt_version >= 99); $case_sensitive_selections = ($otheropt_version >= 2); $server_notlogic_works = ($otheropt_version >= 3); } $exit_selopt_routine = "/jg/$otheropt"; &printheader(); print "
| EOFSEL-010 # # SELECTIONS # print qq( | XXstuffXX-020 # # PROJECTIONS # print qq( |
if the variable lists above (which are cached) look incorrect
XXstuffXX-040
}
print "
Then, " .
qq!continue using the new subset list\n!;
# Avoid "used only once" diagnostic for $version & %form_info
undef $version;
undef %form_info;
exit;
}
sub do_selections
{
my ($base_qs,$input_sel_proj_string,$dummy) = @_;
my ($op,$ok_opt,$negate,$blind,$tmp,$adding_selection,$combo_op);
my (%varlist,@query_string_args,$old_selection,$n_old_selections,$proj_list);
( ( defined($input_sel_proj_string) ) && ( ! defined($dummy) ) )
||
&quit ("Internal error: do_selections ".
"called w/wrong # args");
$blind = &get_form_var($caseblind,"OPT","STRING");
($blind eq "") && ($blind = "FALSE"); # Default is sensitive per original coding
($blind eq "TRUE") || ($blind eq "FALSE") ||
&quit("Form variable $caseblind must be TRUE or FALSE. ",
"It is $blind");
$op = &get_form_var($selection_operator,"REQ","NOCHECK");
# Wrap non-numeric ops w/a pair of trigrammed blanks
# (Orig selopt did htmlesc of numeric ops, but this does not make sense
# to me)
$ok_opt = 0;
foreach (@numeric_ops) {
($op eq $_) && ($ok_opt = 1) && last;
}
if ($ok_opt) {
$wild_card_selection = 0;
} else {
foreach ("like",@case_sensitive_ops) {
# W/3 Jun 09 mod, all ops should be lower case since that's what's
# offered on form. Choice here is to handle upper/mixed case or
# report error ... today we're handling.
if (lc($op) eq lc($_)) {
$op = ($blind eq "TRUE") ? uc($op) : lc($op);
$ok_opt = 1;
last;
}
}
$wild_card_selection = (($op eq "like") || ($op eq "LIKE"));
$op = $blank_trigram . $op . $blank_trigram;
}
$ok_opt || &quit ("Invalid selection operator. op=$op");
$negate = &get_form_var($negate_selection,"OPT","STRING");
($negate eq "TRUE") || ($negate eq "") ||
&quit("Form variable $negate_selection can only take the value TRUE. ",
"It is $negate_selection");
# Note: we do not check to see if the selection variable from the
# form is actually in the object. The rationalization is that since
# we allow a free-form sel/proj string anyway, we are admitting that
# we will allow the user to err in this fashion. Here, at least,
# user would have to interfere between menu and post. If doing that,
# who knows what's going on (and we may all shudder about the "NOCHECK"!)
$selection_string = &get_form_var($selection_value,"REQ","NOCHECK");
$wild_card_selection &&
($selection_string = &wild_escaped_selection_string($selection_string));
$adding_selection = &get_form_var($selection_variable,"REQ")
. $op .
$selection_string;
$negate && ( $adding_selection = "!(" . $adding_selection . ")" );
if ( $ENV{"QUERY_STRING"} ) {
$tmp = &get_form_var($and_or,"REQ");
($tmp eq "AND") && ($combo_op = '&');
($tmp eq "OR") && ($combo_op = '|');
(defined $combo_op) || &quit ("Illegal and/or value. Value=$tmp");
# Make a hash of varnames to help distinguish sels from projs
foreach (@def_varlist) {
@tmp = split;
foreach (@tmp) {
# > is artifact of the varlist being in "def format"
($_ eq '>') || ($varlist{$_} = 1);
}
}
# Rearrange the existing query string args. A q-s arg is a projection
# if it is a variable name, or a selection otherwise
# [v 2.3: acknowledge the falsity of the previous sentence. Since
# q-s args are arguments to a method, they can be anything and in
# any order. We now do a better job, but we cannot be algorithmically
# correct]
# Make a comma-separated list of the projections
# Combine all the comma_separated selections into a single ANDed
# selection within parentheses.
# Final query string is all the projections, followed by a comma,
# followed by the ANDed selection, followed by the selection the
# user just added, using the AND/OR that the user just added
# Note nasty problem if user wants to string-select for a comma
# (present value of $qs_sep). Another "forever" problem
@old_sel_proj_list = split /$qs_sep/,$input_sel_proj_string;
$proj_list = "";
$old_selection = "";
$n_old_selections = 0;
foreach (@old_sel_proj_list) {
if (defined $varlist{$_}) {
$proj_list .= $_ . $qs_sep;
} else {
$old_selection .= "($_)&";
$n_old_selections++;
}
}
# proj_list and old_selection (if they were in orig QS)
# both have trailing separators that we added. Leave the comma
# on proj_list to separate it from (at least) the new selection.
# Take care of old_selection as needed
$new_qs = $proj_list;
if ($n_old_selections == 0) {
# and/or specified by user irrelevant since there's nothing to and/or to
# Not an error since that's how 1st sel would be added to a projlist
$new_qs .= $adding_selection;
} else {
chop $old_selection;
# Parenthesize old_selection if it is in fact a list of selections
# It already has a set of parens to take care of the 1-selection case.
$new_qs .= ($n_old_selections == 1) ? $old_selection : "($old_selection)";
$new_qs .= $combo_op . "($adding_selection)";
}
} else {
$new_qs = $adding_selection;
}
$base_qs && ($new_qs = $base_qs . $qs_sep . $new_qs);
$ENV{"QUERY_STRING"} = $new_qs;
}
sub do_projections
{
my ($proj_string,$newprojs);
# Note that we do not check that vars from for are actually in object
# See comment in do_selections for more about this
$proj_string = &get_form_var($projection_list,"REQ","STRING_LIST");
$newprojs= join ($qs_sep,&SplitParam($proj_string));
if ($ENV{'QUERY_STRING'}) {
$ENV{'QUERY_STRING'} .= $qs_sep . $newprojs;
} else {
$ENV{'QUERY_STRING'} = $base_qs . $qs_sep . $newprojs;
}
}
sub wild_escaped_selection_string
# Called w/ a user-specified selection string containing wildcard chars
# Check that these chars are NOT the sql/outer chars. That means that
# any sql/outer chars in the string are to be searched for. These
# characters must be sql-escaped (preceded w/backslash) to avoid
# their being interpreted as wildcards once we reach outer. Use
# trigramming on the backslash to steer it through any shells we
# encounter along the way. Use trigramming on sql's % wildcard to avoid
# http issues. Use trigramming on sql's _ wildcard just because it
# seems more consistent than leaving it alone
# Finally, replace user-specified wildcard chars w/the sql wildcard
# chars. Again, sql's % requires trigramming and we do sql's _ for
# consistency
# Note that universally trigramming these chars provides some
# protection if they get changed to duplicate each other, or match
# other shell metacharacters, etc
{
my ($input_string,$dummy) = @_;
( defined($input_string) && ( ! defined($dummy) ) ) ||
&quit ("Internal error: wild_escaped_selection_string " .
"called w/wrong # args");
my ($user_match_any,$user_match_one,$output_piece,@output_pieces);
# Validation of user-spec'ed wildcard chars must happen despite the
# fact that they are only offered legit choices on the menu. They could
# fool w/things. Along those lines, there is no logical reason why
# they must HAVE wildcard chars, but if they don't, they are both using
# a wildcard search operator w/o wildcards as well as fiddling w/our menu
# to allow themselves to offer nothing. Consider than an error.
$user_match_any = &get_form_var($wild_any_value,"REQ","NOCHECK"),
&wild_validate($user_match_any,'"Match-any" wildcard character may not be');
$user_match_one = &get_form_var($wild_single_value,"REQ","NOCHECK"),
&wild_validate($user_match_one,
'"Match-exactly-one" wildcard character may not be');
($user_match_any eq $user_match_one) &&
&quit ('"Match-any" and "Match-exactly-1" wildcard characters ' .
"must differ. Both are $user_match_any");
$escaped_outer_match_one = $outer_escape_trigram . $outer_match_one_trigram;
$escaped_outer_match_any = $outer_escape_trigram . $outer_match_any_trigram;
$escaped_outer_escape = $outer_escape_trigram . $outer_escape_trigram;
# Reason for working on pieces of split string is to avoid "doing"
# the % twice. I'm not sure that the outer_match_any in the next
# statement shouldn't be trigram_char, from a logical perspective.
# (-1 in split takes care of strings that end in %)
$quoted_outer_match_any = quotemeta($outer_match_any);
foreach ( split (/$quoted_outer_match_any/,$input_string,-1) ) {
if ($_ eq "") {
push @output_pieces,$_;
} else {
$output_piece = &subst_string_for_char($_,
$outer_match_one,$escaped_outer_match_one);
$output_piece = &subst_string_for_char($output_piece,
$outer_escape,$escaped_outer_escape);
$output_piece = &subst_string_for_char($output_piece,
$user_match_any,$outer_match_any_trigram);
$output_piece = &subst_string_for_char($output_piece,
$user_match_one,$outer_match_one_trigram);
push @output_pieces,$output_piece;
}
}
return join $escaped_outer_match_any,@output_pieces;
}
sub wild_validate
{
my ($wild_char,$bad_char_message,$dummy) = @_;
( ( defined($bad_char_message) ) && ( ! defined($dummy) ) )
||
&quit ("Internal error: wild_validate ".
"called w/wrong # args");
(length($wild_char) == 1) ||
&quit ("Wild card must be single character, not a string. ",
"Objectionable value is $wild_char");
foreach ($trigram_char,$outer_match_any,$outer_match_one,$outer_escape) {
($wild_char eq $_) && &quit ("$bad_char_message $_");
}
return;
}
sub subst_string_for_char
# Can't figure out how to make the s/// work w/ possibly special characters
# in both pattern and replacement
{
my ($input_string,$char,$replacement,$dummy) = @_;
my ($output_string);
( ( defined($replacement) ) && ( ! defined($dummy) ) )
||
&quit ("Internal error: subst_string_for_char ".
"called w/wrong # args");
($input_string eq "") &&
&quit ("Internal error: subst_string_for_char: no input string");
(length($char) == 1) ||
&quit ("Internal error: subst_string_for_char not called ".
"w/single char arg. Objectionable value is $char");
$output_string = "";
foreach ( split (//,$input_string) ) {
$output_string .= ($_ eq $char) ? $replacement : $_;
}
return $output_string;
}