#!/usr/bin/perl -w
{
$version = "plotxy version 2.1  12 Aug 2010";

# 12 Aug 2010. v 2.1 WJS
#	Return "overplot" behavior.  Future: allow user to overplot or not
#	  */src/bin/pl.f does a pen up when
#	  jdbread says it "has gone back up a level".  The projection of
#	  x & y vars added in plotxy 2.1 very probably confined the input
#	  object data to single level, but if fed by old versions of outer,
#	  input object still had upper, albeit null, levels.  However, outer
#	  3.0 eliminated the null levels.  Bob defined a "plot segment" as
#	  each time we go down to the level that has the plotted vars - I
#	  couldn't think of a good name

# 12 Jul 2009. v 2.0e WJS
#	Put all diagnostics into the args to quit so that they show
#	  up in httpd log file as well as on-screen
#	Software doesn't work on horizontal or vertical lines - diagnose
# 29 May 2009. v 2.0d WJS
#	Diagnose situation where user reloads (user shouldn't...)
#	Diagnose situation where user plots var against itself (if
#	  allowed, we do an object(var,var) selection which returns 1
#	  col of data but code wants 2 cols.  Blooie!)
# 18 Mar 2009. v 2.0c WJS
#	Add -forceheader and -nohelp switches to list command
# 11 Jul 2008. v 2.0b  WJS
#	Add an undef %form_info to avoid diagnostic
# 25 Oct 2007. v 2.0a  WJS
#	Add -l & -z switches to list command.
# 29 Jul 2005. v 2.0  WJS
#	Rewrite in perl.  More transcription than rewrite except for next items
#	Scan input to see if problem has a "0 or 1 unique input data 
#	  points" problem (and tell user!).  More efficiently done in one
#	  of the plotting routines that needs to read all the data anyway,
#	  but that optimization will await another time.
#	Save tek output in its own temp file.  This allows separate
#	  checking of tek production and tek-to-gif conversion.  
#	  Errors will be id'ed "tek production" errors if tek production
#	  ended w/non-zero status or there is nothing in .tek file.
#	  If there was in fact a tek error but neither of these applies
#	  (eg, exits w/zero status after writing error info to .tek file),
#	  error will end up a tek-to-gif error (I hope!!)
#	Add projection of x & y vars to object spec.  Minor win if they
#	  happened to be absent.  Expected win if object had no projections-
#	  we then reduce amount of data being sent to plot routines.  Most
#	  efficient thing to do would be to replace existing projection
#	  string with x & y vars.  However, that awaits the ability to
#	  ID projection & selection strings
#	Replace poststring stuff with ReadParse, etc
#	Replace many env vars w/local vars.  I think info was put into
#	  env for coding convenience, not "down-the-line" use.
#	Check accessibility & exit statuses of programs
#	Use process ID as "unique" indicator rather than portion of time
#	Use absolute directory specs (except for assumption that
#	  current directory is JGOFS_root/optbin & OO perl libs are
#	  in it)

# 20 Jul 2005. wjs. Use URL for ID'ing image instead of file spec
#  8 Nov 1999. clh. using the build-opt-env script to conform
#       to the same environment as all other OO scripts.  Note that
#	that script (build..) deals with formatting for WWW and for
#	trigramming/jgofs
#  6 Apr 99.  object in pathinfo received from optionserver menu has no
#       extension, no need to remove (obj:r), plus this breaks stuff that
#       has decimal points in projections/selections. clh
#  5 Mar 99.  %20s in FULLOBJ (=object(query_string) are removed by httpd
#	server between plotopt form and here.  Put  them back.  WJS
#

require ("cgi-lib.pl");
require "wjs_web_perl_utilities.pl";
require "pid_file_cleanup.pl";
require "build-opt-env.pl";

&printheader();

$bindir = "$ENV{'OPTHOME'}/bin";
$tempdir = $ENV{'USETEMPDIR'};
$tempurl = $ENV{'USETEMPADDR'};

$p = "$tempdir/p";
&check_x_access($p);
$ch = "$bindir/ch";
&check_x_access($ch);
$tekgif = "$bindir/tekgif";
&check_x_access($tekgif);

#   Accessibility checked if/when needed, in sub listvar_in_def_format
$listvar = "$bindir/listvar";

$unique = $$;

#   Next line is safe.  Not sure if it's necessary.  At minimum, consider
#   that program p reads an object, and if .html is in PATH_INFO at that
#   time, there will be trouble.  Of course, if that's the issue, better
#   to have p clear the env var...
$ENV{'PATH_INFO'} = "";

&ReadParse(*form_info);
#    Next is OPT only so we can try to diagnose a user reload
#    We don't want to prohibit GETs in case somebody figures out a neat
#    way to use this routine by supplying the 5 args in QUERY_STRING
#    REQUEST_METHOD env var is part of cgi-bin standard interface.
#    If failure was NOT due to a GET, the redo the get_form_var w/REQ to 
#    get the standard error exit.
$xvar = &get_form_var("xvar","OPT","STRING");
if ( ! $xvar) {
  ($ENV{'REQUEST_METHOD'} eq "GET") && 
    &quit("To remake plot, please return to Simple XY Plot page ",
	  "and use the Make Plot button");
  $xvar = &get_form_var("xvar","REQ","STRING");
}
$yvar = &get_form_var("yvar","REQ","STRING");
($xvar eq $yvar) && &quit("Probable error: asking to plot var against itself");

#   FILESPEC is a cheap way to allow the "/" in "rev/log"
$xopt = &get_form_var("xopt","REQ","FILESPEC");
$yopt = &get_form_var("yopt","REQ","FILESPEC");
$symbolopt = &get_form_var("sym","REQ","NOCHECK");

#   Although next files is unique to this plot, cannot add $unique to file
#   spec.  Whole plot system in program p (a script, BTW) and "after" has
#   hard-coded file names in it.
$outfile = "$tempdir/plotopt";
(open (PLOTOPT_FILE,">$outfile")) ||
	&quit ("Could not open $outfile for write","\$! = $!");
$! = 0;
(print PLOTOPT_FILE "$xvar $xopt $yvar $yopt $symbolopt\n") ||
			&quit ("Could not print to $outfile","\$! = $!");
close PLOTOPT_FILE;

if ($xopt eq "none") {
  $xopt = "";
} elsif ($xopt eq "reverse") {
  $xopt = "-r";
} elsif ($xopt eq "log") {
  $xopt = "-l";
} elsif ($xopt eq "rev/log") {
  $xopt = "-r -l";
} else {
  &quit ("Unknown x option string $xopt received from web page",
	 $poststr,
	 "Known options are 'none', 'reverse', 'log' & 'rev/log'");
}

if ($yopt eq "none") {
  $yopt = "";
} elsif ($yopt eq "reverse") {
  $yopt = "-r";
} elsif ($yopt eq "log") {
  $yopt = "-l";
} elsif ($yopt eq "rev/log") {
  $yopt = "-r -l";
} else {
  &quit ("Unknown y option string $yopt received from web page",
	 $poststr,
	 "Known options are 'none', 'reverse', 'log' & 'rev/log'");
}

if ($symbolopt eq "line") {
  $symbolopt = "";
} elsif ($symbolopt eq "+") {
  $symbolopt = "-siz 3 -sym 4";
} elsif ($symbolopt eq "x") {
  $symbolopt = "-siz 3 -sym 6";
} elsif ($symbolopt eq "o") {
  $symbolopt = "-siz 3 -sym 2";
} elsif ($symbolopt eq "+(connected)") {
  $symbolopt = "-siz 3 +sym 4";
} elsif ($symbolopt eq "x(connected)") {
  $symbolopt = "-siz 3 +sym 6";
} elsif ($symbolopt eq "o(connected)") {
  $symbolopt = "-siz 3 +sym 2";
} else {
  &quit ("Unknown symbol option string $symbolopt received from web page",
	 "Known options are '+', 'x', 'o', '+(connected), 'x(connected) " .
							  "& 'o(connected)'");
}

#   Get next switch from user at some point
$pen_up_between_plot_segments = 1;

#   Make sure x & y vars are in subselection list.  Lazily, we rely 
#   on system allowing multiple specs of same projected variable
#     Note that this explicitly overrides an input object which 
#   did NOT include the x & y vars.  However, the web interface has 
#   always ignored the projection list when it displays the list of 
#   variables available for plotting.
#     If lucky, we will reduce data flow to just x & y data
$callfull = $ENV{'OBJECT'} . '(';
$ENV{'SUBSELS'} && ($callfull .= $ENV{'SUBSELS'} . ",");
$callfull .= "$xvar,$yvar)";

#   Check input data.  Could also do this only in error situations.
#   However, it is likely that whole object must be gone through in
#     error situations, so doing it here keeps "time-to-user-output"
#     more consistent.  In OK situations, we will probably only need
#     to read 3 lines from input object to validate it, so major cost
#     is in extra object open
($status,$err_msg) = &analyze_plot_vars($xvar,$yvar);
if ($status eq "CHECK_FAILED") {
#     Some kind of problem with list object.  Problem could well kill
#     plot also (eg, bad object spec), but presumably that will produce
#     whatever error it would have produced had we not stuck this analyze
#     in.  Object is to not die because of an analyze problem - might
#     eventually change this
  print STDERR "Problem with analyze_plot_vars in plotxy\n" .
	       "Might or might not be a problem w/analyze_plot_vars itself\n".
		$err_msg;
} elsif ($status eq "NG") {
  &quit ("Problem with input data",$err_msg,"Object spec: $callfull");
} elsif ($status ne "OK") {
  print STDERR "analyze_plot_vars in plotxy returned unknown status $status\n";
}

#   To get a pen-up between plot segments, need a multi-level object
#   (see *src/bin/pl.f).  Do this by projecting in a level N-1 variable
#   where level N is "the" level above the plotted vars
#   Doesn't work if object is only 1 level, but plot segment idea is a 
#   multilevel concept anyway
if ($pen_up_between_plot_segments) {
  @def_varlist = &listvar_in_def_format($listvar,$ENV{'OBJECT'});
  $level_of_first_plotvar = -1;
  foreach (@def_varlist) {
    $level_of_first_plotvar++;
    foreach (split /\t/) {
      ($found = ($_ eq $xvar)) && last;
      ($found = ($_ eq $yvar)) && last;
    }
    $found && last;
  }
  if ($level_of_first_plotvar > 0) {
    ($var_from_level_before_plotvars) = 
		split   /\t/,  $def_varlist[$level_of_first_plotvar - 1],  2;
    $var_from_level_before_plotvars || &quit
	("Internal error - could not get variable from level above plotvars");
    chop $callfull;
    $callfull .= ",$var_from_level_before_plotvars)";
  }
}

$command = "$ch $tempdir/plot.var xvmin=15 yvmin=15 xvmax=85 yvmax=85";
$? = $! = 0;
system ($command);
(($? == 0) && ($! == 0)) || 
	&quit ("Problem executing command $command","\$? = $?; \$! = $!");

$outfile_name = "temp$unique.tek";
$tekfile = "$tempdir/$outfile_name";
push @tempfiles,$tekfile;
$tekurl = "$tempurl/$outfile_name";
$command = "cd $tempdir;" .
	   "PATH=$tempdir:$bindir:$ENV{'PATH'};" .
	   "export PATH;" .
	   "$p \"$callfull\" $xopt $xvar $yopt $yvar $symbolopt > $tekfile";
$? = $! = 0;
system ($command);
$save_sys = $!;
$save_exit = $?;
$ok_command_status = ($save_sys == 0) && ($save_exit == 0);
$ok_file_status = (-s $tekfile);
if ( ! ($ok_command_status && $ok_file_status)) {
  $errmsg = "Problem producing tektronix intermediate output\n";
  $errmsg .= "Command that had problem: $command\n";
  if ( ! $ok_command_status) {
    $errmsg .= "Abnormal system and/or exit status. " .
	  "system status = $save_sys; exit status = $save_exit\n";
  }
  if (! -e _) {
    $errmsg .= "Tektronix file $tekfile does not exist\n";
  } elsif (-z _) {
    $errmsg .= "Tektronix file $tekfile is empty\n";
  } elsif (-T _) {	# Note: most binary tek files pass the -T test
    $errmsg .= "$tekfile may have useful info.  (Link to file above)\n";
    print "<a href=\"$tekurl\"> This file </a> " .
				"may have info about problem\n";
  }
  &quit($errmsg);
}

$outfile_name = "temp$unique.gif";
$giffile = "$tempdir/$outfile_name";
push @tempfiles,$giffile;
$gifurl = "$tempurl/$outfile_name";
$command = "cd $tempdir;" .
	   "PATH=$tempdir:$bindir:$ENV{'PATH'};" .
	   "export PATH;" .
	   "$tekgif < $tekfile > $giffile";
$? = $! = 0;
system ($command);
$save_sys = $!;
$save_exit = $?;
$ok_command_status = ($save_sys == 0) && ($save_exit == 0);
$ok_file_status = (-s $giffile);
if ( ! ($ok_command_status && $ok_file_status)) {
  $errmsg = "Problem converting tektronix intermediate output to gif\n";
  $errmsg .= "Command that had problem: $command\n";
  if ( ! $ok_command_status) {
    $errmsg .= "Abnormal system and/or exit status. " .
	  "system status = $save_sys; exit status = $save_exit\n";
  }
  if (! -e _) {
    $errmsg .= "gif file $giffile does not exist\n";
  } elsif (-z _) {
    $errmsg .= print "gif file $giffile is empty\n";
  }
  if ( (-s $tekfile) && (-T $tekfile)  ) {
    $errmsg .= "$tekfile may have useful info.  User was offered URL to it\n";
    print "<a href=\"$tekurl\"> This file </a> " .
				"may have info about problem<\n";
  }
  &quit($errmsg);
}

print "<p> <img src=\"$gifurl\"> <p>\n";

&pid_file_cleanup($unique,@tempfiles);
#  Avoid '1 time use' diagnostic.  $version is used by &quit; %form_info
#  by the &get_ routines
undef $version;
undef %form_info;
exit 0;
}

sub analyze_plot_vars
{
  my ($x_var,$y_var) = @_;

  my ($list) = "$bindir/list";
  my ($JGOFS_missing) = "nd";
  my ($command,$status,$rec,@err_array);

  my ($unique_valid_pairs_x,$unique_valid_pairs_y);
  my ($n_unique_valid_pairs,$n_dup_pairs,$nrecs);
  my ($n_good_x,$n_good_y,$n_missing_x,$n_missing_y);
  my ($waste);
  my ($x,$y,$x_var_index,$y_var_index,$x_good,$y_good);
  my (@data);

  &check_x_access ($list);
  
  $command = "$list -c -f -t -nohelp -forceheader -z -l \"$callfull\" |";
  open (LIST,$command) ||
	return ("CHECK_FAILED","Problem opening pipe $command","\$! = $!");

  ($status,$rec,@err_array) = &get_JGOFS_record(LIST);
  ($status eq "OK") || 
	return "CHECK_FAILED",
	&format_get_JGOFS_record_return_status($command,@err_array);
  chomp $rec;
  $i = 0;
  for (split /\t/,$rec) {
#     Note that somebody might want to plot a var against itself,
#     so both of the next 2 statements could be executed during 1
#     pass through the loop
    (/^\s*$x_var\s*$/) && ($x_var_index = $i);
    (/^\s*$y_var\s*$/) && ($y_var_index = $i);
    (defined $x_var_index) && (defined $y_var_index) && last;
    $i++;
  }
  if (! defined $x_var_index) {
    close LIST; 
    return "NG", "Variable $x_var not found in input object as projected";
  }
  if (! defined $y_var_index) {
    close LIST; 
    return "NG", "Variable $y_var not found in input object as projected";
  }

#     Whole idea is to test hypothesis that bad plots happened because
#     there weren't as many as 2 good, distinct data points.  It is 
#     certainly true that 0 or 1 good, distinct data points causes failure
#     Turns out horizontal and vertical lines cause trouble, too ...
#     Can't use the statisticker to get this info because analysis must
#     be done on pairs, not individual variables
  $n_unique_valid_pairs = $n_dup_pairs = $nrecs = 0;
  $n_good_x = $n_good_y = $n_missing_x = $n_missing_y = 0;
  $n_different_good_x = $n_different_good_y = 0;
#   At present (2.0e), n_dup_x and _y are computed but not used
  $n_dup_x = $n_dup_y = 0;

  ($status,$rec,@err_array) = &get_JGOFS_record(LIST);
  while ($status eq "OK") {
    $nrecs++;
    chomp $rec;
    @data = split /\t/,$rec;
    $x = $data[$x_var_index];
    $y = $data[$y_var_index];
    $x_good = $y_good = 0;
    if ($x eq $JGOFS_missing) {
      $n_missing_x++;
    } elsif (&valid_number($x)) {
      $n_good_x++;
      $x_good = 1;
    }
    if ($y eq $JGOFS_missing) {
      $n_missing_y++;
    } elsif (&valid_number($y)) {
      $n_good_y++;
      $y_good = 1;
    }
    if ($x_good && $y_good) {
      if ($n_unique_valid_pairs == 0) {
	$unique_valid_pairs_x = $x;
	$unique_valid_pairs_y = $y;
	$n_unique_valid_pairs++; 
      } else {
#	  Note that this comparison must be numeric, since the pairs
#	  (1,2) and (01,2) are in fact NOT different pairs (here anyway!)
	if (($unique_valid_pairs_x == $x) && ($unique_valid_pairs_y == $y)) {
	  $n_dup_pairs++;
	} else {
#	    Since we only care about getting to 2, not important that
#	    count below could be too big (does not account for dups that
#	    are NOT unique_valid_pairs_x
	  $n_unique_valid_pairs++;
	}
      }
      if ($n_different_good_x == 0) {
	$unique_valid_x = $x;
	$n_different_good_x++;
      } else {
	if ($unique_valid_x == $x) {
	  $n_dup_x++;
	} else {
#	    Since we only care about getting to 2 ... see comment above
	  $n_different_good_x++;
	}
      }
      if ($n_different_good_y == 0) {
	$unique_valid_y = $y;
	$n_different_good_y++;
      } else {
	if ($unique_valid_y == $y) {
	  $n_dup_y++;
	} else {
#	    Since we only care about getting to 2 ... see comment above
	  $n_different_good_y++;
	}
      }
    }
    ($n_unique_valid_pairs >= 2) && 
	($n_different_good_x >= 2) &&
	($n_different_good_y >= 2) &&
		last;
    ($status,$rec,@err_array) = &get_JGOFS_record(LIST);
  }

  if ($status eq "OK") {
    close LIST;
  } elsif ($status ne "EOF") {
    return "CHECK_FAILED",
		&format_get_JGOFS_record_return_status($command,@err_array);
  }

  ($n_unique_valid_pairs >= 2) && 
	($n_different_good_x >= 2) &&
	($n_different_good_y >= 2) &&
		return "OK";

  ($nrecs == 0) && return "NG","No data in object as presently selected";
  ($nrecs == 1) && return "NG","Only 1 record in object as presently selected";

  ($n_good_x < 2) && 
	return "NG",&report_coordinate_error($n_missing_x,$nrecs,$x_var);
  ($n_good_y < 2) && 
	return "NG",&report_coordinate_error($n_missing_y,$nrecs,$y_var);

#   If we get here, both $x_good & $y_good were >= 2.  Accordingly,
  ($n_unique_valid_pairs == 0) && 
    return "NG",
	   "All valid x values were paired w/missing or invalid y values\n" .
	   "and/or vice versa";
  ($n_dup_pairs >= $nrecs - 1) &&
    return "NG",
	   "Only one distinct data point ($x_var=$unique_valid_pairs_x, " . 
	   "$y_var=$unique_valid_pairs_y) was represented in the data set";

#   Choice of errors at this point.  Not sure if the horizontal/verticals
#   are distinguishable from the "residual", one-good-coord-one-bad issues
#   Assume the horiz/verts more likely
#     Note that we have at least 2 good x's and 2 good y's at this point
#   even if we don't know how they are connected w/each other
  ($n_different_good_x < 2) && 
    return "NG",
	   "This program does not plot vertical lines\n" .
	   "All x's were the same; namely, $unique_valid_x\n";
  ($n_different_good_y < 2) && 
    return "NG",
	   "This program does not plot horizontal lines\n" .
	   "All y's were the same; namely, $unique_valid_y\n";

  $waste = ($n_dup_pairs == 0) ? 
	"Problems " :
	$n_dup_pairs+1 . 
		" of $nrecs points duplicated each other.  Rest of problems\n";
  return "NG",
    "Could not find 2 distinct data points w/good data for both variables\n" .
    $waste .
    "due to pairing valid x's with invalid or missing y's and/or vice versa";
}

sub report_coordinate_error
{
  my ($n_missing,$total,$varname) = @_;
  my ($tmp,$missing1,$missing2);
 
  if ($n_missing >= $total - 1) {
    $tmp = ($n_missing == $total) ? "" : " but 1";
    return "All$tmp values for variable $varname were considered missing data";
  }

  $missing1 = $missing2 = "";
  if ($n_missing != 0) {
    $missing1 = ", non-missing";
    $tmp = ($n_missing == 1) ? "was" : "were";
    $missing2 = "\n$n_missing of $total values $tmp missing.";
  }

  return "Could not find 2 valid (ie, numeric)" .
	 $missing1 .
	 " data for variable $varname" .
	 $missing2;
}
