#!/usr/bin/perl -w { # makeodv.pl Aug 08 $version = "makeodv.pl version 1.1a 25 Apr 2009"; # 25 Apr 09. WJS v 1.1a # Fix typo. Might have goofed up time data # 16 Apr 09. WJS v 1.1 # Tighten up format, etc checking on time variable (there were # warnings that got through to the error log from an object w/o # time info ... so we don't know what user gave makeodv as time var) # 24 Oct 08. WJS v 1.1 # Remove hardcoded /jg/makeodv # 24 Oct 08. WJS v 1.0b # Refer to time splitter # 4 Sep 08. WJS v 1.0a # Avoid "only used one time" diagnostic # 15 Aug 08. WJS v 1.0 # This software based on ODV User's Guide Version 3.4.0 found at # http://odv.awi.de" require ("cgi-lib.pl"); require "wjs_web_perl_utilities.pl"; # Set up environment. Assume .pl routine is in our directory $build_opt_env = "./build-opt-env.pl"; &check_r_access($build_opt_env); require $build_opt_env; # Define form action routine as this file. Undefined SCRIPT_NAME code # to allow possibility that this program is not being run in web environment ($form_action_routine = $ENV{"SCRIPT_NAME"}) || ($form_action_routine = &get_this_file_as_url($0)); # Check that build-opt-env set up things as expected $topdir = &check_build_opt_env_var('OPTHOME',$build_opt_env); $tempdir = &check_build_opt_env_var('USETEMPDIR',$build_opt_env); $tempdir = &abs_filespec($tempdir,"Env var USETEMPDIR"); $bindir = &abs_filespec($topdir,"Env var OPTHOME") . "/bin"; $listvar = "$bindir/listvar"; $list = "$bindir/list"; # ODV info from doc source cited in comments at top # ODV required fields # hash key is our form variable name (and our name for the "concept") # hash value is ODV column name/JGOFS thesaurus name(s) # Thesaurus names are in preferred order (= Bob said depth is better than # depth_w if both) If no var matches, will look for var whose initial # string matches the first thesaurus name provided. All this just # guesses - user gets full varlist to choose from $odv_req_varnames{"cruise"} = "Cruise"; $odv_req_varnames{"station"} = "Station"; $odv_req_varnames{"year"} = "year"; $odv_req_varnames{"month"} = "month"; $odv_req_varnames{"day"} = "day"; $odv_req_varnames{"time"} = "hhmm"; $odv_req_varnames{"lat"} = "Latitude"; $odv_req_varnames{"lon"} = "Longitude"; $odv_req_varnames{"depth"} = "Bot. Depth"; $odv_req_varnames{"station_type"} = "Type"; $max_num_ODV_variables = 50; $max_len_ODV_varname = 60; $max_len_ODV_cruise_datum = 20; $max_len_ODV_station_datum = 20; $ODV_missing_indicator = ""; $ODV_missing_depth_indicator = "0"; $ODV_comment_string_prefix = "//"; $let_ODV_choose_type_field_indicator = "*"; # If next character is changed, code must be added to test to # see if a JGOFS datum contains the character. Tab is as guaranteed # NOT to be in a JGOFS datum as we can get $ODV_col_separator = "\t"; $JGOFS_varname_guess{"cruise"} = "cruiseid"; $JGOFS_varname_guess{"station"} = "station"; $JGOFS_varname_guess{"year"} = "year"; $JGOFS_varname_guess{"month"} = "month"; $JGOFS_varname_guess{"day"} = "day"; $JGOFS_varname_guess{"time"} = "time"; $JGOFS_varname_guess{"lat"} = "lat"; $JGOFS_varname_guess{"lon"} = "lon"; $JGOFS_varname_guess{"depth"} = "depth,depth_w"; $JGOFS_comment_string_prefix = "#"; $JGOFS_missing_indicator = "nd"; # If next character changed, code must be added to check for # that character embedded in a JGOFS datum. Easiest way is # to test the # of data after splitting the record vs the # of variables. $JGOFS_col_separator = "\t"; $task_form_var_name = "Task_to_be_performed"; $data_generation_form_value = "Generate_ODV_data"; $refresh_cache_form_value = "Refresh_varlist_cache"; $object_form_var_name = "object_defn"; $job_id_form_var_name = "unique"; $include_missings_form_var_name = "include_missings"; &ReadParse(*form_info); $task = &get_form_var($task_form_var_name,"OPT"); if (($task eq "") || ($task eq $refresh_cache_form_value)) { &print_form; }elsif ($task eq $data_generation_form_value) { &process_form; } else { &quit ("Bad value for form var $task_form_var_name; namely,",$task); } # Avoid "used one time" diagnostic undef %form_info; exit; } sub print_form { &printheader(); print "\n"; $title = 'ODV download page'; print "$title\n"; print "

$title

\n"; # print < If the #JGOFS-format object does not have this date/time format, please go to the # #time conversion or # #time splitting utilities where it may be possible to generate this format. #EOT print < If the JGOFS-format object does not have this date/time format, the time conversion and/or time splitting utilities may help.
Links to these utilities are on the main "Plotting and Other Operations Menu" (most likely the previous web page) EOT $h = qq|input type="hidden"|; # First time through, get values from environment. Otherwise, # get values from form. As of now, the only way there can be a # 2nd time through is if we are doing a cache refresh # Wherever they come from, put values on the next form $refresh_cache = ($task eq $refresh_cache_form_value); if ($refresh_cache) { # Should really check object somehow after getting it - await # improvement to parse_object_spec stuff $object = &get_form_var($object_form_var_name,"REQ","NOCHECK"); # See process_form for why job_id is OPT. Reason less likely here # than there, but possible $unique = &get_form_var($job_id_form_var_name,"OPT"); } else { $object = &check_build_opt_env_var('OBJECT',$build_opt_env); $ENV{'SUBSELS'} && ( $object .= "(" . $ENV{'SUBSELS'} . ")" ); } $unique || ($unique = $$); print qq|
\n|; print qq|<$h name="$job_id_form_var_name" value="$unique">\n|; print qq|<$h name="$object_form_var_name" value="$object">\n|; ($used_cached_file,@def_varlist) = &get_cached_varlist($listvar,$tempdir,$object,$refresh_cache); # Check that varnames are short enough and that there are not too many # variables. Note that there is a more restrictive limit on the length # of a couple of varnames that cannot be checked here # There is a minimum # of variables, too. That will get indirectly # enforced since program will not allow a JGOFS variable to be mapped # into 2 ODV variables, and the program will not proceed if a "required" # required variable has nothing mapped to it. $nvar = 0; foreach (@def_varlist) { foreach (split (' ',$_)) { ($_ eq '>') && next; (length($_) > $max_len_ODV_varname) && &quit ("JGOFS variable name $_ too long. ", "ODV maximum is $max_len_ODV_varname characters"); $nvar++; } } ($nvar > $max_num_ODV_variables - 1) && &quit("JGOFS object has too many variables as presently subselected", "Object has $nvar variables. This program adds 1 variable. ", "ODV maximum is $max_num_ODV_variables"); foreach ("cruise","station","lat","lon","year","month","day","time","depth"){ $required = ($_ eq "depth") ? 0 : 1; &print_varlist ($_,$JGOFS_varname_guess{$_},$required); } print qq|

lines that are missing values for required ODV variables\n"; print qq|

\n"; if ($used_cached_file) { print << "XXstuffXX-040";


if the variable lists above (which are cached) look incorrect XXstuffXX-040 } print "

\n"; exit; } sub print_varlist { my ($req_var,$guess,$required,$dummy) = @_; my ($i,$best,$best_guess,$next_best,$default_guess,$next_best_rank); # Protect against typos in data structure defns ((defined $required) && (! defined $dummy)) || &quit ("Internal error: print_varlist not called w/3 args"); (($req_var ne "") && ($guess ne "") && ($required ne "")) || &quit ("Internal error: print_varlist called w/empty strings"); # Try to find a JGOFS variable that is likely to correspond to the # ODV variable. 1st choice: JGOFS variable which matches 1st item in # guess list we receive. If no 1st choice, use JGOFS variable which # matches highest item in guess list we receive. Default choice: # 1st JGOFS variable which begins with the 1st item in the guess list we # receive. Do not present overly-long varnames as a guess. (Allow # user to puzzle over why not, manually select them, and get officially # dinged during forms processing. We COULD just not present overly- # long varnames as a choice at all, but I think that's even more confusing) ($best_guess,$guess) = split(/,/,$guess,2); $best = $next_best = $default_guess = ""; $next_best_rank = 99999; foreach (@def_varlist) { foreach $var (split (' ',$_)) { ($var eq '>') and next; ($var eq $best_guess) && ($best = $var) && last; if ( ! $default_guess) { ($var =~ /^$best_guess/) && ($default_guess = $var); } if ($guess) { $i = 0; foreach (split (/,/,$guess)) { $i++; ($next_best_rank <= $i) && last; ($var eq $_) && ($next_best = $var) && ($next_best_rank = $i) && last; } } } $best && last; } ($guess = $best) || ($guess = $next_best) || ($guess = $default_guess); print "

Variable corresponding to ODV requirement for $req_var\n"; $name = &form_var_name($req_var); print qq|"; return; } sub process_form { &printheader(); print "\n"; # If we are here because of a web page, get unique from that page # If we were called directly (eg, args in QUERY_STRING), generate our # own unique stamp $unique = &get_form_var($job_id_form_var_name,"OPT"); $unique || ($unique = $$); $include_records_w_missing_required_fields = &get_form_var($include_missings_form_var_name,"REQ","NUMBER"); $title = 'ODV spreadsheet generation results page'; print "$title\n"; print "

$title

\n"; print "
\n";
$open_pre_tag = 1;	# Global variable used by quit

$object = &get_form_var($object_form_var_name,'REQ','NOCHECK');
($status,$errmsg,undef,$object_name) = &parse_object_spec($object);
($status eq "OK") || 
		&quit ("Parsing problem",$errmsg,"with object spec",$object);

#   Don't be deceived into thinking that the varlist we end up with after
#   the next statement has anything to do w/the varlist in the print_form
#   section.  We COULD check that the next line used the cache, but that
#   would disable stand-alone use of process_form
(undef,@def_varlist) = &get_cached_varlist($listvar,$tempdir,$object,0);
foreach (@def_varlist) {
  foreach (split (' ',$_)) {
    ($_ eq '>') && next;
    $master_varlist{$_} = 1;
  }
}


foreach ("cruise","station","lat","lon","year","month","day","time","depth") {
  $required = ($_ eq "depth") ? 'OPT' : 'REQ';
  $odv_varname = $odv_req_varnames{$_};
  $temp = &get_form_var(&form_var_name($_),$required);
  $odv_to_jgofs_varname{$odv_varname} = $temp;
  if ($temp) {
    (defined $master_varlist{$temp}) || 
		&quit ("$temp not a variable found in this JGOFS object");
    $jgofs_to_odv_varname{$temp} && 
	&quit ("Must use different JGOFS object variable name" . 
						" for each ODV variable",
		"JGOFS variable name $temp used more than once");
    $jgofs_to_odv_varname{$temp} = $odv_varname;
  }
}

#   ODV wants .txt extension
$outfile_name = "ODV_" . $object_name. "_" . $unique . ".txt";
$outfile = "$tempdir/$outfile_name";

$usetempaddr = &check_build_opt_env_var('USETEMPADDR',$build_opt_env);
$outfile_url = "$usetempaddr/$outfile_name";

if (-e $outfile) {
#   Use get_system_uptime (now in download_inquiry.pl) to check
#   $unique vs system age to see if unlink below is "proper"; eg, came
#   from a previous system incarnation
 (unlink $outfile);
}
$! = 0;
(open OUT,"> $outfile") || 
			&quit ("Cannot open output file $outfile. \$! = $!");

(defined $JGOFS_col_separator) ||
	&quit  ("Internal problem.  Undefined JGOFS_col_separator");
($JGOFS_col_separator eq "\t") && ($sep_switch = "-t");
($JGOFS_col_separator eq " ") && ($sep_switch = "-s");
($JGOFS_col_separator eq ",") && ($sep_switch = "");
(defined $sep_switch) || 
	&quit  ("Internal problem.  Illegal JGOFS_col_separator.",
		"Bad value (if printable) = $JGOFS_col_separator");
&check_x_access($list);
$command = qq|$list -f -l -z $sep_switch -forceheader "$object"|;
$! = $? = 0;
(open ($FH,"$command |")) || 
	&quit ("Failure to open pipe from $command", "\$! = $!; \$? = $?");

$n_lines = 0;

#   Copy comment lines leading up to varlist
($status,$line,@err_array) = &get_JGOFS_record($FH);
while (($status eq "OK") && ($line =~ /^$JGOFS_comment_string_prefix.*/)) {
  $n_lines++;
  $line =~ s/$JGOFS_comment_string_prefix/$ODV_comment_string_prefix/o;
  print OUT $line;
  ($status,$line,@err_array) = &get_JGOFS_record($FH);
}
($status eq "OK") ||
	&quit ("Failure before finding varlist",
	       &format_get_JGOFS_record_return_status($command,@err_array));
$n_lines++;
chomp $line;

#  do varlist
$nvar = 0;
foreach (split (' ',$line)) {
  $out_varname = $jgofs_to_odv_varname{$_} ? $jgofs_to_odv_varname{$_} : $_;
  push  @out_varname_list, $out_varname;
  $var_position{$out_varname} = $nvar++;
}
#  At this point, out_varname_list contains all the JGOFS varnames w/
#  the varnames that correspond to ODV required varnames taking their
#  ODV names.  The ODV required station_type is not in the list, and the
#  ODV required depth might not be there.  Add those, and make sure you
#  add the required data the same way later!
push @out_varname_list,$odv_req_varnames{"station_type"};
$odv_to_jgofs_varname{"depth"} || 
			push @out_varname_list,$odv_req_varnames{"depth"};
print OUT join($ODV_col_separator,@out_varname_list),"\n";


#   process data
$n_data_lines_in = $n_data_lines_out = $n_time_roundoff_abnormalities = 0;
($status,$line,@err_array) = &get_JGOFS_record($FH);
while ($status eq "OK") {
  $n_lines++;
  if ($line =~ /^$JGOFS_comment_string_prefix.*/) {
     $line =~ s/$JGOFS_comment_string_prefix/$ODV_comment_string_prefix/o;
     print OUT $line;
  } else {
    $n_data_lines_in++;
    chomp $line;
    @data = split (/$JGOFS_col_separator/,$line);
#      If needed, check that we have all required data
    if ( ! $include_records_w_missing_required_fields) {
      $skip_it = 0;
      foreach ("cruise","station","lat","lon","year","month","day","time") {
	$i = $var_position{$odv_req_varnames{$_}};
	$skip_it = ($data[$i] eq $JGOFS_missing_indicator);
	$skip_it && last;
      }
      if ($skip_it) {
	($status,$line,@err_array) = &get_JGOFS_record($FH);
	next;
      }
    }
#     Add missing required fields.  Must coordinate this code w/the
#     code above that adds the missing required field names.
    push @data,$let_ODV_choose_type_field_indicator;
    $odv_to_jgofs_varname{"depth"} || push @data,$ODV_missing_depth_indicator;
#     Change all JGOFS missing values to ODV missing values
    $i = 0;
    foreach (@data) {
      ($data[$i] eq $JGOFS_missing_indicator) && 
					($data[$i] = $ODV_missing_indicator);
      $i++;
    }
#     If depth IS in JGOFS object and was missing, it was changed to
#     ODV missing.  Further change it to the special ODV missing depth
#     indicator (hope that's what they want...)
    if ($odv_to_jgofs_varname{"depth"}) {
      $i = $var_position{$odv_to_jgofs_varname{"depth"}};
      ($data[$i] eq $ODV_missing_indicator) && 
				($data[$i] = $ODV_missing_depth_indicator);
    }       

#     Various formatting checks
    $i = $var_position{$odv_req_varnames{"cruise"}};
    $cruise_datum = $data[$i];
    (length($cruise_datum) > $max_len_ODV_cruise_datum) &&
	&quit ($odv_to_jgofs_varname{"cruise"} . " datum too long.",
		"Max length = $max_len_ODV_cruise_datum",
		"Offending datum = $cruise_datum",
		"in input data line $line");

    $i = $var_position{$odv_req_varnames{"station"}};
    $station_datum = $data[$i];
    (length($station_datum) > $max_len_ODV_station_datum) &&
	&quit ($odv_to_jgofs_varname{"station"} . " datum too long.",
		"Max length = $max_len_ODV_station_datum",
		"Offending datum = $station_datum",
		"in input data line $line");

    $i = $var_position{$odv_req_varnames{"time"}};
    if ($data[$i] ne $ODV_missing_indicator) {
      ($hhmm,$dec_min,$dummy) = split (/\./,$data[$i]);
      $dummy && &quit ("Badly formatted time info: 2 .s in datum $data[$i]" .
			" on input data line $line");
      (defined $hhmm) || 
		&quit("Could not get hhmm time data from datum $data[$i]" .
			" on input data line $line");
      &valid_number($hhmm) || 
		&quit ("Non-numeric hhmm time value: $hhmm " .
			"(derived from datum $data[$i])"     .
			" on input data line $line");
      ($hhmm >= 2400) && 
	   &quit("Badly formatted time info: hhmm >= 2400 in datum $data[$i]"
			.  " on input data line $line");
      if ((defined $dec_min) && ($dec_min ne "")) {
        ($dig1) = ($dec_min =~ /^(.)/);
        if ($dig1 >= 5) {
#	    Deliberately avoid rounding to next day ...
          if  ($hhmm < 2359) {
	    $min = $hhmm%100;
	    ($min >= 60) && &quit
	       ("Badly formatted time info: minutes >= 60 in datum $data[$i]"
			. " on input data line $line");
	    $min++;
	    $hhmm += ($min < 60) ? 1 : 40;
	    (length($hhmm) < 4) && ($hhmm = "0" . $hhmm);
	    (length($hhmm) < 4) && ($hhmm = "0" . $hhmm);
	    (length($hhmm) < 4) && ($hhmm = "0" . $hhmm);
          } else {
	    $n_time_roundoff_abnormalities++;
	  }
        }
        $data[$i] = $hhmm;
      }
    }
  }
  $cruise_station_key = $cruise_datum . $ODV_col_separator . $station_datum;
  if (defined $last_outline{$cruise_station_key}) {
    if ($last_outline{$cruise_station_key} != $n_data_lines_out) {
      $n_data_lines_out++;
      &quit("Violation of ODV requirement that all station data be together",
	    "Data for cruise $cruise_datum, station $station_datum appears" .
		" in output line $n_data_lines_out",
	    "It was previously output on line " .
					$last_outline{$cruise_station_key} 
	   )
    }
  }
  $last_outline{$cruise_station_key} = ++$n_data_lines_out; 
  print OUT join($ODV_col_separator,@data),"\n";
  ($status,$line,@err_array) = &get_JGOFS_record($FH);
}

($status eq "EOF") ||
	&quit ("Problem while processing JGOFS object",
	       &format_get_JGOFS_record_return_status($command,@err_array));
#   Better to close before handling pipe error above, since if there WAS
#   pipe error, we'll exit with OUT open.  However, pipe error much more
#   interesting, and bookkeeping on pending troubles is a drag...
$! = 0;
(close OUT) || &quit("Trouble closing output file $outfile. \$! = $!");

print "Conversion statistics: \n";
print "Number of input lines: $n_lines\n";
print "Number of input data lines: $n_data_lines_in\n";
print "Number of output data lines: $n_data_lines_out\n";
if ($n_data_lines_out == 0) {
  print "WARNING: no data in output\n";
  if ($n_data_lines_in == 0) {
    print "because no data lines were input.  Input selection too strict?\n";
  } else {
    print "because input data had missing values for ODV required variables?\n";
  }
}
if ($n_time_roundoff_abnormalities != 0) {
  print "WARNING: $n_time_roundoff_abnormalities times after 2359.5 found" .
		" and rounded down to 2359\n";
}

print "";

print qq|\n| .
      qq|Download ODV spreadsheet (Right click; | .
      qq|"Save link/target as" may work)\n|;

print "
\n"; undef $version; # Global variable used in quit undef $open_pre_tag; # Global variable used in quit return; } sub form_var_name { # Help form writer and form reader use same names my ($string,$dummy) = @_; ((defined $string) && ! (defined $dummy)) || &quit ("Internal error - form_var_name not called w/exactly 2 args"); return "odv_$string"; }