#!/usr/bin/perl -w { $version = "download_inquiry.pl version 1.0c 18 Sep 2014"; # 18 Sep 14. v 1.0c WJS # Bug fix: conflicting ps switches now (gb11; Ubuntu) diagnosed, so fix # Note that this code has the discredited Errno processing. Not fixing at this time # 23 Jan 09. v 1.0b WJS # Bug fix: treating a passed object spec as a "filename" caused # rejection if the object spec included a :port # Note: $version for v1.0a was incorrectly dated 1 Sep 05 # 11 Jul 08. v 1.0a WJS # MY_ADDR->MYADDR typo fix (plus needs an http://) # 1 Sep 05. v 1.0 WJS require "cgi-lib.pl"; require "wjs_web_perl_utilities.pl"; # We do some calculation with age in days. For convenience, set # up a number of days that should be before any age we realistically # encounter $long_ago = 999999; use Errno; $EBADF = &Errno::EBADF; # value 9 on globec $ESPIPE = &Errno::ESPIPE; &printheader; $build_opt_env = "./build-opt-env.pl"; &check_r_access($build_opt_env); require "$build_opt_env"; $temp_root = &abs_filespec($ENV{"JGTEMP"},"Env var JGTEMP"); $this_users_temp_root = &abs_filespec($ENV{"USETEMPDIR"},"Env var USETEMPDIR"); $jgofs_root= &abs_filespec($ENV{"OPTHOME"},"Env var OPTHOME"); $jg_cgi = &rel_filespec($ENV{"JGSCRIPTDIR"}); ($this_programs_filename) = ($0 =~ /.*\/(.+)/); $this_programs_url = "http://$ENV{'MYADDR'}/$jg_cgi/$this_programs_filename"; $uptime = &get_system_uptime(); &ReadParse(*form_info); $pid = &get_form_var("process_id","OPT","NUMBER"); $object_inquiry = &get_form_var("object","OPT","OBJSPEC"); $pid && $object_inquiry && &quit("Cannot simultaneously inquire by process ID and object name"); $back_to_otheropt = ($object_inquiry) ? "http://$ENV{'MYADDR'}/$jg_cgi/otheropt$object_inquiry" : ""; $this_computer = &get_form_var("this_computer","OPT","STRING"); ($this_computer eq "TRUE") || ( ! $this_computer) || &quit("Form variable this_computer can only take the value TRUE. ", "It is $this_computer"); $other_computer = &get_form_var("computer","OPT","STRING"); $other_computer && $this_computer && &quit("Cannot check box for \'this computer\' and also specify ", "the name of a computer ($other_computer)"); ($sort = &get_form_var("sort","OPT","STRING")) || ($sort = "by_date"); ($sort eq "by_date") || ($sort eq "by_name") || &quit("Form variable sort must be either by_name or by_date. ", "It is $sort"); $list_all = &get_form_var("computer_list","OPT","STRING"); ($list_all eq "TRUE") || ( ! $list_all) || &quit("Form variable computer_list can only take the value TRUE. ", "It is $list_all"); $this_computer && $other_computer && &quit ("Please EITHER enter a computer name OR check the ", "computer-now-being-used box"); # Next line sets computer to this_computer even if the $this_computer # box wasn't selected. This covers us for a default for $pid # & $object_inquiry searches if ($other_computer) { $computer = $other_computer; } else { ($computer) = ($this_users_temp_root =~ /$temp_root\/(.*)/); } # quit's error messages print more nicely if
 is not in effect.  Too
#   bad about error messages that follow...
  print "
";

  @list = ();
  $n_printed = 0;

  if ($pid || $this_computer || $other_computer || $object_inquiry) {
#     Good chance we can find what user wants w/o going through all the
#     the directories.  Give it a shot
    &process_download_dir($computer);
    if (@list > 0) {
      if ($pid || $object_inquiry) {
	for $i (0 .. $#list) {
	  if (&match($list[$i],"PID",$pid,"OBJECT",$object_inquiry)) {
	    print "Download was initiated from host $computer\n";
	    &print_downloaded_object_info($list[$i]);
	    $n_printed++;
	    last;
	  }
	}
	$object_inquiry && ($n_printed == 1) &&
	  print "\n\nNote: object downloads can be initiated from multiple\n" .
		"computers.  If the above download is not the one you want\n" .
		"you can continue the inquiry " .
		"here\n";
      } else {
	@temp = ($sort eq "by_name") ?
			sort sort_by_object @list : sort sort_by_age @list;
	$ess = ($#temp == 0) ? "" : "s";
	print "

Download$ess initiated from host $computer

\n"; for $i (0 .. $#temp) { &print_downloaded_object_info($temp[$i]); $n_printed++; } } } if ($n_printed == 0) { # Oh well, we'll have to go through all the directories after all ($this_computer || $other_computer) && print "Sorry, could not find any downloads intiated from specified " . "computer $computer\nWill list all computers that " . "initiated downloads\n\n\n"; $list_all = "TRUE"; } else { # Next stands a chance of overriding user-spec'd "list everything" # However, it means user spec'd that along w/something else more # specific, we found something else more specific, and user is # not supposed to pick more than one choice anyway $list_all = ""; } } else { # Turns out we don't really care what user entered on form for # this choice. If they didn't ask for anything else, this is default $list_all = "TRUE"; } if ($list_all) { (opendir ROOTDIRHANDLE, $temp_root) || &quit ("Cannot open $temp_root: $!\n"); $! = 0; while (defined($downloader = readdir ROOTDIRHANDLE)) { # Skip . & .., and, while at it, hidden directories (of which there # shouldn't be any) ($downloader =~ /^\./) && next; &process_download_dir($downloader); $! = 0; # set by -e, so clear for readddir } ($! == 0) || ($! == $EBADF) || &quit ("Trouble reading $temp_root: $!\n"); (closedir ROOTDIRHANDLE) || &quit ("Trouble closing $temp_root: $!\n"); if ($#list > 0) { if ($pid || $object_inquiry) { for $i (0 .. $#list) { if (&match($list[$i],"PID",$pid,"OBJECT",$object_inquiry)) { print "Download initiated from $list[$i]{'host'}\n"; &print_downloaded_object_info($list[$i]); $n_printed++; last; } } $object_inquiry && ($n_printed == 1) && print "\n\nNote: object downloads can be initiated from multiple\n" . "computers. If the above download is not the one you want\n" . "you can continue the inquiry " . "here\n"; } else { # We could list everything here. However, to discourage fishing for # downloads that aren't one's own (as well as the more noble purpose # of not displaying stuff user doesn't want to see), just list # computers and dates and a link to come back here w/a computer name @list = sort sort_by_host_and_age @list; # Get rid of all but newest entry for each host $temp[0] = $list[0]; $j = 0; for $i (1 .. $#list) { ($temp[$j]{"host"} eq $list[$i]{"host"}) || ($temp[++$j] = $list[$i]); } ($sort eq "by_date") && (@temp = sort sort_by_age @temp); print "Click on a computer to get more info about downloads " . "initiated from there\n\n"; print "Computer initiating download\tMost recent download began\n"; for $i (0 .. $#temp) { $time = ($temp[$i]{"first_time_stamp"}) ? $temp[$i]{"first_time_stamp"} : "<unknown>"; print &recursive_url($temp[$i]{"host"}) . "\t$time\n"; $n_printed++; } } } } if ($n_printed == 0) { print "Sorry, no downloads matched your criteria.\n"; if ($pid || $object_inquiry) { print "If you'd like to do a more general search for downloads " . "you can continue the inquiry " . "here\n"; } else { print "Please note that files awaiting download are removed every few ". "days - perhaps that is what happened\n"; } } # Note that sel/proj string is lost. We can NOT use SUBSELS to get # it back since in general, we wipe QUERY_STRING (see recursive_url # below, and download_inquiry_form.pl). $back_to_otheropt && (print "\n\t\t\t\t\t" . "" . "Back to Plotting and Other Operations Menu" . "\n"); # Next line to avoid "1-time use" diagnostic (undef %form_info) && (undef $version) && (undef $jgofs_root); print "
"; print &HtmlBot; exit; } sub recursive_url { my ($host) = @_; my ($pseudo_form_variables); $pseudo_form_variables ="computer=$host&back_to_otheropt=$back_to_otheropt"; return "$host"; } sub match # Given a pointer to a hash and several key/value pairs, report # true if, for all non-empty values, hash{key}=value { my ($hash_ptr,%test_hash) = @_; my ($match_val) = ""; foreach (keys %test_hash) { if ($test_hash{$_}) { $match_val = (defined $hash_ptr->{$_}); $match_val || last; # Note we have our friendly old problem w/numerics. Punt for now; # use valid_number.pl later... once we can figure out if # 041219 is in fact supposed to be the same as 41219 $match_val = ($hash_ptr->{$_} eq $test_hash{$_}); $match_val || last; } } return $match_val; } sub print_downloaded_object_info { my ($info_ptr) = @_; my (%info); my ($day,$month,$date,$time,$year); my ($pid,$status,$error_status,$EOJ_status,$extract_status); my ($running,$process_state,$system_crashed); my ($start_time,$last_time,$hours_from_download_start_to_system_crash); %info = %{$info_ptr}; print "

Info regarding download of $info{'object'} data

\n"; # PID is first line of HISTORY file, so if it's not defined, nothing # much we can do if ($info{'PID'}) { $pid = $info{'PID'}; } else { print "Sorry, no information available\n\n\n\n\n"; return; } $info{'OBJECT'} && (print "Full object spec: $info{'OBJECT'}\n"); ($day,$month,$date,$time,$year) = split (' ',$info{'first_time_stamp'}); $start_time = "$month $date @ $time"; ($day,$month,$date,$time,$year) = split (' ',$info{'last_time_stamp'}); $last_time = "$month $date @ $time"; print "Download started $start_time\t" . "Last logged time stamp was $last_time\n"; # Non-empty EOJ is presumably an error message if (defined $info{'EOJ'}) { $EOJ_status = ($info{'EOJ'}) ? $info{'EOJ'} : "OK"; } else { $EOJ_status = "Incomplete"; } $hours_from_download_start_to_system_crash = 24 * ($info{'age'} - $uptime); if ( $system_crashed = ($hours_from_download_start_to_system_crash > 0) ) { # System crashed - job can't be running $running = 0; } else { # Should be a neater way of doing this, but I don't know what it is # Don't know how portable this command is, either. -o s asks for # the process state so we can see if it's a zombie. = after the s # represses the header line # Turns out that -l conflicts w/ -o. That makes sense, but gb6 never complained. # gb11 does. ####### $process_state = `ps -l -o s= -p $pid`; $process_state = `ps -o s= -p $pid`; chomp $process_state; $running = ($process_state && ($process_state ne "Z")); } $status = $error_status = ""; if ($EOJ_status eq "Incomplete") { if ($running) { $status = "Job still running"; } else { $status = "Job did not finish"; if ( defined($info{'END_DATA_EXTRACTION'}) ) { # empty extract status means extract succeeded, so problem # is that bundling failed or job was killed in bundling phase. # Fairly unlikely, so I'm not trying to find a non-empty string # for $error_status in that case $error_status = $info{'END_DATA_EXTRACTION'}; } else { # undefined extract status most likely means somebody killed # the thing during list, etc. If the system crashed close to # the job start time, note that. Pick your value for "close to" if ( $system_crashed && ($hours_from_download_start_to_system_crash < 5) ) { $error_status = "System crash?"; } else { $error_status = ""; } } } } else { if ($running) { $status = "Job still running although logged as complete"; } else { if ($EOJ_status eq "OK") { # Can we can get an error status here? who knows? $info{'END_DATA_EXTRACTION'} && ($error_status = $info{'END_DATA_EXTRACTION'}); } else { $error_status = $EOJ_status; } $error_status && ($status = "Job finished but there were problems"); } } if ($status) { print "Status of this download is/was not entirely normal\n"; print "$status\n"; $error_status && (print "Logged status information: $error_status\n"); $info{'READMEURL'} && (print "Suggest at least looking at README file before downloading\n"); } $info{'READMEURL'} && (print "README file" . " (info about the download process)\n"); $info{'$INFOURL'} && (print "information file" . " (info about the data downloaded)\n"); if ($info{'READMEURL'}) { print "Download\n"; } else { print "Sorry, no pointer to download package is available\n"; } print "\n\n\n\n"; return; } sub sort_by_age { my ($a_age,$b_age); $a_age = ($a->{"age"} == -1) ? $long_ago : $a->{"age"}; $b_age = ($b->{"age"} == -1) ? $long_ago : $b->{"age"}; ($a_age <=> $b_age) || ($a->{"host"} cmp $b->{"host"}) || ($a->{"object"} cmp $b->{"object"}); } sub sort_by_host_and_age { my ($a_age,$b_age); $a_age = ($a->{"age"} == -1) ? $long_ago : $a->{"age"}; $b_age = ($b->{"age"} == -1) ? $long_ago : $b->{"age"}; ($a->{"host"} cmp $b->{"host"}) || ($a_age <=> $b_age) || ($a->{"object"} cmp $b->{"object"}); } sub sort_by_object { ($a->{"object"} cmp $b->{"object"}) || ($a->{"host"} cmp $b->{"host"}); } sub process_download_dir { my ($host) = @_; my (%temp,$dir,$object,$status,$file); $dir = "$temp_root/$host/download"; (-e $dir) || return; (opendir DOWNLOADDIRHANDLE, $dir) || &quit ("Cannot open $dir: $!\n"); $! = 0; while (defined($object = readdir DOWNLOADDIRHANDLE)) { ($object =~ /^\./) && next; $file = "$dir/$object/HISTORY"; ($status,%temp) = &process_history_file($file); # Decided not to print err msg below, but if it's to be re-added, # print name of this program along w/it! # ($status eq "OK") || # print STDERR # "[" . scalar localtime() . # "] Problem with $file: $status\n"; $temp{"host"} = $host; $temp{"object"} = $object; push @list, {%temp}; $! = 0; } # EBADF test empirically determined on globec (when it was gb6; Solaris) ($! == 0) || ($! == $EBADF) || &quit ("Trouble reading $dir: $!\n"); (closedir DOWNLOADDIRHANDLE) || &quit ("Trouble closing $dir: $!\n"); return; } sub process_history_file # Returns status variable ("OK" or something else) followed by a hash of info # hash keys are history file keys, plus "age", "first_time_stamp" & # "last_time_stamp". Only "age" is guaranteed to be defined (set to -1 # if age cannot be determined) { my ($file) = @_; my (%temp,$rec,$status,$file_ok,$key,$value,$rest); $file_ok = "OK"; $! = 0; if (-e $file) { $temp{"age"} = -M _; (-z _) && ($file_ok = "EMPTYHISTFILE"); } else { $temp{"age"} = -1; $file_ok = ($! == 0) ? "file does not exist" : $!; } if ($file_ok eq "OK") { $! = 0; if (open HIST, $file) { $! = 0; while (defined($rec = )) { chomp $rec; ($temp{"last_time_stamp"},$key,$value,$rest) = split ('\|',$rec); (defined $key) && ( ($key) = ($key =~ /^\s*(.+?)\s*$/) ); if ( $rest || ! $key) { $file_ok = "BADHISTFMT"; last; } (defined $temp{"first_time_stamp"}) || ($temp{"first_time_stamp"} = $temp{"last_time_stamp"}); ($value) = ($value =~ /^\s*(.*?)\s*$/); $temp{$key} = $value; } ($file_ok eq "OK") && ($! != 0) && ($file_ok = $!); close HIST; } else { $file_ok = $!; } } return $file_ok,%temp; } sub get_system_uptime # Waste some time figuring out "age of this incarnation of system", in days # Most waste is attempting to compensate for programs which might not # exist, while simultaneously trying not to generate error messages { my ($uptime_program,$uptime_text); my ($days,$hrs,$mins); $? = $! = 0; # /dev/null & status checks here refer to the "which" program $uptime_program = `which uptime 2> /dev/null`; ( (($! == 0) || ($! == $ESPIPE)) && ($? == 0) ) || return $long_ago; # Non-existent argument to "which" does NOT set which's return status # Assume all which implementations return "no whatever" ($uptime_program =~ /^no uptime/) && return $long_ago; chomp $uptime_program; ((-e $uptime_program) && (-x _)) || return $long_ago; $? = $! = 0; $uptime_text = `uptime 2> /dev/null`; ( (($! == 0) || ($! == $ESPIPE)) && ($? == 0) ) || return $long_ago; chomp $uptime_text; # 10:05pm up 42 day(s), 7:07, 7 users, load average: 0.00, 0.01, 0.01 ($days,$hrs,$mins) = ($uptime_text =~ /^.*?up (\d+) day.*?\,\s+(\d+)\:(\d+)\,/); ($days && $hrs && $mins) || return $long_ago; return $days + $hrs/60. + $mins/3600.; }