#!/usr/bin/perl -w # add-persistent.pl # Create a jgofs object (w/trimmings) from PATH_INFO/QUERY_STRING combo # Problem: user has gone through optionserver and done something; say, joined # 2 objects. User would like to come back tomorrow and not have to go # through the joining again. # Solution: save the results of the join as a jgofs object "somewhere" # (read on) # Architecture: # We need a directory for .objects & .remoteobjects entries for this # object. Decided to # a) keep these objects on OO server # b) have a {OO_objectroot}/persistent_object object root # c) under that root, have "many" directories # By default, use REMOTE_ADDR/REMOTE_HOST string # [Someday] allow user to create a directory # [Someday] change default directory to a "personal" directory # (after login?) # Let user name object (.objects & .remoteobjects files) and provide # .remoteobjects comment. Any temp files user needs will be kept # in a per-object subdirectory of {USETEMPDIR}/persistent_object. # A particular set of temp files should carry # a unique stamp. This will leave a trail if the object changes. # Basic object is a .remote_objects entry of what comes in, not # requiring re-serving or temporary files (or .objects file entry) # at all. Optionally, however, make a copy of the data and serve # the copy w/defgb & jgof_read.pl. This DOES require a .object # file entry and "temporary" files (how long temporary is remains # an interesting question) # Make an info entry consisting of the URL of an/the input object. # Idea is that user of saved object can reach doc for a real object # eventually. # Someday, lock .remoteobjects and .objects against other use while # either is being modified. This will be a bigger problem when we # allow changes & deletes, which have to look things up. $version = "add_persistent.pl version 1.3a 5 Sep 2014"; # *** See "someday" sentences in synopsis above for future work. # *** Remove" someday"s when work is done!. # 5 Sep 14. version 1.3a WJS # Replace `` with call to backtick # Protect path info string from shell when giving it to parse_path_info. Don't # know why this ever worked if it's needed now ... # 29 Aug 14. version 1.3a WJS # Get location of mv program from build-env subsystem # 25 Aug 07. version 1.3 WJS # Be sure per-user .objects files have entries for joinmethod, etc # Otherwise, they might not be able to save objects generated on # OOserver itself. For example, if user does join and wants to # save it, obj defn in .remoteobjects is # //optserver/jg/serv/user-persistent_dir/optjoin(obj1,obj2) # 27 Aug 05. version 1.2a WJS # Bug fix - test for existence of env vars before testing their values # Bug fix - test for existence of objects/persistent_objects # 27 Oct 04. version 1.2 WJS # Allow deletion of object. Note this does NOT clean up any persistent # data files (including timecnv defgb optional files, etc). In its # generality, that problem is intractable, since it requires parsing # a .objects file entry, knowing about defgb, etc. # 21 Sep 04. version 1.2 WJS # "Copy of data" stuff needs better parsing of QUERY_STRING # Add delete option - not yet implemented # 20 Aug 04. version 1.2 WJS # Combine w/add_persistent_driver.pl # Option to make copy of data, and serve it w/defgb(jgof_read.pl(file)) # Remove special timecnv hooks. # Change perl location # Change some debug stuff to point to optserv1 # Validate more input fields. In particular, don't trust stuff we thought # arrived as a "hidden" variable # Use algorithm instead of list to ID "local" objects # [Needs wjs_web_perl_utilities.pl] # [Begin 1.2] # 23 May 04. version 1.1b WJS # Put version in html output as a comment (although since there is no # html display from this module, don't know if it'll do much good) # Automatically change user-supplied blanks in object name to underscores # per Bob's request. Don't allow #s in name to match restriction in # download (although that's a file name and this isn't) # Make no objectname a separate error from name w/illegal character # [Begin 1.1b] # 16 Jul 00. version 1.1a WJS # Add statisticker to list of local objects. # [Begin 1.1a] # 17 May 00. version 1.1 WJS # No more distinction between "optmethod"s and "datamethod"s. # [Begin 1.1] # 13 Sep 99. version 1.0a WJS # Add time stamp to info files we generate # 9 Sep 99. version 1.0a WJS # Bug fix: datajoinumethod, etc come from OO_server's top level .objects # file. Refine logic with respect to "empty" .objects subdir - when # does this mean "not spec'd" and when does it mean "want top level" # Improve error message about allowable chars in remobj name # [Begin 1.0a] # 3 Aug 99. version 1.0 WJS # Add timecnv hook. # 22 Jul 99. version 1.0 WJS # [Needs cgi-lib.pl] # [Needs pid_file_cleanup.pl, which needs valid_number.pl] # [Begin 1.0] { require ("pid_file_cleanup.pl"); require ("cgi-lib.pl"); require ("wjs_web_perl_utilities.pl"); $this_programs_file_spec = $0; $build_opt_env = "./build-opt-env.pl"; $pid_file_cleanup_debug_env_var = "PID_FILE_CLEANUP_DEBUG"; # NB: Excellent chance that as of v 1.2, debug stuff is flaky $debug = $ENV{"ADD_PERSISTENT_DEBUG"}; defined($debug) || ($debug = "F"); $debug = ($debug =~ /^(TRUE|true|T|t|Y|y|1)$/) ? 1 : 0; $debug_driver = $ENV{"ADD_PERSISTENT_DRIVER_DEBUG"}; defined($debug_driver) || ($debug_driver = "F"); $debug_driver = ($debug_driver =~ /^(TRUE|true|T|t|Y|y|1)$/) ? 1 : 0; $debug_either = ($debug || $debug_driver); if ($debug_either) { ($test_path_info = $ENV{"PATH_INFO"}) || ($test_path_info = "//optserv1.whoi.edu:8200/mathmethod.html0"); $test_server = "optserv1.whoi.edu"; # Try to choose an "unreal" port, mostly so that JGTEMP, below, # won't have nobody-"somebody" ownership problems. $test_port = "32767"; $test_remote_host = "fleetlink.whoi.edu"; $test_path_info = "//globec.whoi.edu:8081/globec/gb/test/test.html0"; $test_server = "optserv1.whoi.edu"; $ENV{"SERVER_NAME"} = $test_server; $ENV{"SERVER_PORT"} = $test_port; $ENV{"PATH_INFO"} = $test_path_info; $ENV{"REMOTE_HOST"} = $test_remote_host; $ENV{"OPTHOME"} = "/data/wsass/port8200/experimental"; $ENV{"OPTRUNDIR"} = "optbin"; $ENV{"MYADDR"} = "$test_server:$test_port"; $ENV{"DEFAULT_PORT"} = $test_port; $ENV{"JGTEMP"} = "/tmp/$test_port"; # Purpose of next line was to point to a DIFFERENT build-opt-env.pl # However, at moment, don't have one $build_opt_env = $ENV{"OPTHOME"} . "/" . $ENV{"OPTRUNDIR"} . "/build-opt-env.pl"; &printheader(); } # Hard to tell if do command works. It's doc'ed to return the last # evaluated line of the "done" file. However, what if file isn't # there, or there are compilation errs, etc? Some testing seems to # show that $! is set to "No such file" even if do works. However, # if $! is NOT "No such file", it seems to indicate an appropriate # error; eg, "Permission denied". Wouldn't expect $? to be set # and it's not. $@ had a chance, but isn't set either... # Decided to test for read access; otherwise let chips fall... # -r does NOT set $! for "Permission denied", so set it up # ourselves. Seems to be set for "file not found"... Fun w/ # string/numeric context trying to set $!, too. # (Since we're perl "do"-ing, -r is needed rather than -x... I think) $! = 0; if ( ! -r $build_opt_env) { $msg = ($! == 0) ? "Permission denied" : $!; &quit ("Cannot read $build_opt_env: $msg"); } # Note that when invoked as action routine of form, we do NOT have # the PATH_INFO & QUERY_STRING-related output of build-opt-env require "$build_opt_env"; ### if (1) { $debug_either && &dump_environment_variables(); # Directories foreach ('OPTHOME','JGTEMP','USETEMPDIR','OPTRUNDIR','JGSCRIPTDIR','MVPROG') { # JGSCRIPTDIR does NOT come from $build_opt_env (as of Aug 05), # but we do not know that here. (It comes from opt-build-env.pl via # $build_opt_env) defined ($ENV{"$_"}) || &quit ("Internal problem. ", "Did not get defined env var $_ from $build_opt_env"); } $tmpdir_root = &abs_filespec($ENV{"JGTEMP"},"Env var JGTEMP"); $tmpdir = &abs_filespec($ENV{"USETEMPDIR"},"Env var USETEMPDIR"); $jgofs_root= &abs_filespec($ENV{"OPTHOME"},"Env var OPTHOME"); $htmlbin_dir = &rel_filespec($ENV{"OPTRUNDIR"}); $jg_cgi = &rel_filespec($ENV{"JGSCRIPTDIR"}); # object_root Directory (absolute) from which directories # that contain .objects files descend # persistent_object_root Directory (absolute) from which directories # that contain persistent objects' # .objects files descend # Must be a subdir (to some depth) of object_root $object_root = "$jgofs_root/objects"; $persistent_object_root = "$object_root/persistent_objects"; &check_r_access($persistent_object_root); # Form action routine $form_action = &file_system_to_web_cgi ($this_programs_file_spec,"/$jg_cgi",$jgofs_root); ($remote_host) = ($tmpdir =~ m"^$tmpdir_root/(.+)$"); $remote_host || ($remote_host = "?"); $htmlbin_dir = "$jgofs_root/$htmlbin_dir"; # Where dir, etc are $bin_dir = "$jgofs_root/bin"; # Where listvar, etc are $util_dir = $bin_dir; # Where parse_path_info stuff is $methods_dir = "$jgofs_root/methods"; # Where defgb is # Next 3 lengths empirically determined. $maxlen_remobj_name = 17; $maxlen_remobj_investigator = 21; $maxlen_remobj_comment = 50; ($debug) ? &get_dummy_form_info() : &ReadParse(*form_info); # Main purpose of putting form action routine on form as a hidden # variable is to distinguish between use of this program in the # "put the form up" and "process the form results" modes. # That's because ReadParse looks at QUERY_STRING for input as well as # at the form. # The consistency check is just on general principles - anyone who de- # liberately mis-routes things here can change the hidden value as well. $after_submit = &get_form_var("form_action","OPT","FILESPEC"); if ($after_submit) { ($after_submit eq $form_action) || &quit ("Submit action routine should be same as form value" , "Action = $form_action; Form value = $after_submit" ); } if ($after_submit) { &process_form; } else { &print_form; } # $version used in &quit, which gets in here via require. Hence we # have a "used only once" diagnostic undef $version; exit; } sub print_form { { # add_persistent_driver.pl WJS Jul 99 # Basic job is to copy .shtml form to stdout. While doing so, it # inserts some environment stuff as hidden variables, so that # add_persistent.pl, the form's submit routine, can have them. It also # creates a unique id for temp files used in this connection, and passes # that along. It also "handles" the .shtml file's #include statements, # while ignoring #execs $unique = $$; # Send along PATH_INFO and QUERY_STRING. Values from $build_opt_env # not always exactly what we want. Also, we may eventually want # extended URL stuff. If we don't have a PATH_INFO, odds are excellent # that we are dead. However, for all we know, $build_opt_env is smarter # than we are (as of Jul 99, it needs PATH_INFO, so this is moot), so # struggle a little bit $path_info = $ENV{"PATH_INFO"}; $path_info || ($path_info = $ENV{"OBJEXT"}); $path_info || ($path_info = $ENV{"OBJECT"}); $path_info || &quit ("No object information reached this program"); (defined ($query_string = $ENV{"QUERY_STRING"})) || ($query_string = ""); $html_dir = "."; # Where add_persistent.shtml lives # Assuming no protection problems, either user has temp # directory, but no persistent object directory, or # there is no node-specific temp directory for this user # Latter shouldn't happen, but if we're running online and # forget to set REMOTE_, that's the situation we're in. # build-opt-env.pl doesn't like this at the moment (Jul 99), # but maybe some day it will be legal. $per_host_persistent_object_root = $persistent_object_root; $remote_host && ($per_host_persistent_object_root .= "/$remote_host"); # Form action routine $form_action = &file_system_to_web_cgi ($this_programs_file_spec,"/$jg_cgi",$jgofs_root); # Data files $html_file = "$html_dir/add_persistent.shtml"; $debug_driver && &redefine_files(); # #include values in add_persistent.shtml that we will deal with. Anything # not on this list makes us issue an error $directory_key = "add_persistent.directory"; $object_key = "add_persistent.objectname"; $hidden_key = "add_persistent.hidden"; $name_text_key ="add_persistent.name_text"; $comment_text_key ="add_persistent.comment_text"; # Presumably build-opt-env.pl took care of temp dir stuff. However, # leave it here 1) in case we want to change location from that in # build-opt-env 2) build-opt-env doesn't check that dir was created OK, etc &make_dir($tmpdir); # Get "display object" out of environment (put there by build-opt-env) ($dispobj = $ENV{"DISPOBJ"}) || &quit("No object information reached this program"); &printheader(); # Copy .shtml file to stdout, detecting and acting on #include # directives and inserting proper form action routine open (FILE,$html_file) || &quit ("Cannot read $html_file : $!"); $need_to_print_form_action=1; while () { # Look for $x[0]<--\n"; # print "Node/port -->$x[1]<--\n"; # print "Cgi -->$x[2]<--\n"; # print "Subdir -->$x[3]<--\n"; # print "Obj -->$x[4]<--\n"; # print "\n"; { # /jg/serv /objname my(@x) = ( $_[0] =~ m"^(.+?\:)?(//.+?(?:\:\d+)?)?(/$jg_cgi/.+?)?(/.+)?(/.+)$" ); # http: //node:port /subdir my($jg_len) = length("/$jg_cgi/"); if ($x[0]) { chop $x[0]; } # Remove : from http: else { $x[0]=""; } $x[1] = ($x[1]) ? substr($x[1],2) : ""; # Remove // from //node-port $x[2] = ($x[2]) ? substr($x[2],$jg_len) : ""; # Remove /jg/ from /jg/cgi $x[3] = ($x[3]) ? substr($x[3],1) : ""; # Remove initial / from /subdir $x[4] = ($x[4]) ? substr($x[4],1) : ""; # Remove initial / from /object return @x; } sub merge_passthru_object_specs # Args are an input .objects file and a .objects file to be updated # Look through input file for "passthrough" object defns like # joinumethod=join # Characterization: right side has no input args (but might have # env var defns). If output file doesn't have a particular # defn, append it to the output file # Returns 2 args. First arg is status. If status is 'OK', 2nd arg is # # records written to updated file. If status not 'OK, 2nd arg is # error info. # This code not really a good idea since it is parsing .objects files # and there should be an API using dctsearch for this purpose. However, # not holding breath until such a thing is written { my ($in,$update,$dummy) = @_; my (%in_objs,%update_objs); my ($obj_name,$obj_defn,$method); $in || return "NO_INFILE_ARG",""; $update || return "NO_UPDATEFILE_ARG",""; $dummy && return "TOO_MANY_ARGS",""; # One could argue that no input file is OK; just return after # performing the "null update". However, in practice, no point in # calling this routine if no input file (-e $in) || return "NO_INFILE","File: $in"; ($status,$status_info,%in_objs) = &get_passthrough_defns($in); ($status eq 'OK') || return $status,$status_info; # If no passthrough entries in input file, no updates needed (keys(%in_objs) == 0) && return 'OK',0; # Not a problem if file to be updated does not exist if (-e $update) { ($status,$status_info,%update_objs) = &get_passthrough_defns($update); ($status eq 'OK') || return $status,$status_info; } while (($obj_name,$obj_defn) = each %in_objs) { if (exists $update_objs{$obj_name}) { ($status,$status_info) = &compare_object_defns($obj_defn,$update_objs{$obj_name}); if ($status eq 'MATCH') { delete $in_objs{$obj_name}; } elsif ($status eq 'NOMATCH') { return "OBJ_NAME_CONFLICT", "Defns for object $obj_name in $in & $update conflict\n" . $status_info; } else { return ($status,$status_info); } } } # If no unaccounted-for entries in input file, no updates needed (keys(%in_objs) == 0) && return 'OK',0; $n_updates = $! = 0; # Add the previously unfound defns to the end of the file needing updating (open (UPDATE,">> $update")) || return "BAD_UPDATE_OPEN_FOR_APPEND","File: $update; Status: $!"; while (($obj_name,$obj_defn) = each %in_objs) { $n_updates++; $status = (print UPDATE "$obj_name=$obj_defn\n"); ($status == 0) && return "BAD_UPDATE_WRITE", "File: $update; Status: $!; Problem occurred w/update #$n_updates"; } close UPDATE; return 'OK',$n_updates; } sub get_passthrough_defns { my ($in,$dummy) = @_; my ($obj_name,$obj_defn,$method,%in); ($in && (! $dummy)) || return "BAD_ARGS",""; $! = 0; (open (IN,$in)) || return "BAD_OPEN_FOR_READ","File: $in; Status: $!"; while () { chomp; # Not sure if blank lines are allowed, but let's be tolerant $_ || next; # Doing it this way finesses issue of multiple =s on line # First one is the one that "counts" ($obj_name,$obj_defn) = split '=',$_,2; if ( ! ($obj_name && $obj_defn)) { close IN; return "BAD_OBJ_DEFN","File: $in; defn_line: $_"; } # Whitespace is tricky in non-passthrough obj defns. It is # permitted within the method arguments. Since we aren't interested # in method arguments, though, we can just split away. See comment # up top about using a (as yet non-existent) API to the dct routines. ($obj_name) = ($obj_name =~ /\s*(.*)\s*/); ($method,undef) = split ' ',$obj_defn; # split off env vars if ( ! ($obj_name && $method)) { close IN; return "BAD_OBJ_DEFN","File: $in; defn_line: $_"; } # Don't want remote objects or objects w/input args # Remote objects lead off with //; input args will have open parens # before any whitespace ($method =~ q"^//") || ($method =~ q"\(") || ($in{$obj_name} = $obj_defn); } close IN; return 'OK',"",%in; } sub compare_object_defns # Test if 2 "passthrough" .objects defns are the same. The method strings # must be identical. Env var strings must be identical but can # occur in any order. Return 'BAD_ARGS', 'MATCH' or 'NOMATCH'. If the # latter, return more info about what was different. { my ($defn1,$defn2,$dummy) = @_; my (%env_vars1,$method1,%env_vars2,$method2); ($defn1 && $defn2 && (! $dummy)) || return 'BAD_ARGS',""; # Can't figure out how to make split work on "string a or string b" # bud I DO know how to make it work on "char a or char b". Sigh $defn1 =~ s/\s+/ /g; $defn2 =~ s/\s+/ /g; ($method1,%env_vars1) = split /[=\s]/,$defn1; ($method2,%env_vars2) = split /[=\s]/,$defn2; ($method1 eq $method2) || return 'NOMATCH','Method name mismatch'; # Check that each env var defined in $defn1 is defined in $defn2, and # is defined to the same value while (($env_var_name,$env_var_value) = each %env_vars1) { (exists $env_vars2{$env_var_name}) || return 'NOMATCH', "Env var $env_var_name defined in one obj defn but not the other"; ($env_var_value eq $env_vars2{$env_var_name}) || return 'NOMATCH', "Value for env var $env_var_name differs between obj defns"; delete $env_vars1{$env_var_name}; delete $env_vars2{$env_var_name}; } # All env vars defined in both objects have matched and been removed # from the hashes. Any env vars remaining in either list constitute # a mismatch (($env_var_name,undef) = each %env_vars1) && return 'NOMATCH', "Env var $env_var_name defined in one obj defn but not the other"; (($env_var_name,undef) = each %env_vars2) && return 'NOMATCH', "Env var $env_var_name defined in one obj defn but not the other"; return 'MATCH',""; }