#!/usr/local/bin/perl -w # Program fill_in.pl # Program reads in data via a JGOFS object, fills in missing data using # the most recent valid value for that field and outputs a new flat file. # Run time parameters are specified via a configuration file specified # as run-time. # file_in.pl run-time-parameters-file-spec $version = "V1.00, April 10, 2000"; # April 10, 2000 V1.0 R. Groman $| = 1; $error = "&x"; $warning = "#"; $info = " "; $date = scalar localtime; $configuration_file = $ARGV[0]; print STDOUT ("\n Program $0 Version: $version\n"); print STDOUT (" configuration file is $configuration_file\n", " Date of run: ", $date, "\n\n"); @required = ( "input_object","fill_in_field_names" ); &read_configuration_file($configuration_file); $okay = "yes"; for ($i=0; $i<=$#required; $i++) { unless (exists $config_param{$required[$i]} ) { $okay="no"; &sendmessage ($error, "$required[$i] is missing from configuration ", "file=$configuration_file"); } else { print STDOUT ("\t$required[$i]=$config_param{$required[$i]}\n"); } } if ( $okay eq "no") { &sendmessage ($error, "One or more parameters are missing from the configuration file", "Cannot continue."); exit; } $object = $config_param{'input_object'}; $status = &open_input_object ($object); if ($status =~ m/failed/i ) { &sendmessage ($error, "Could not open input object $object", "Status=$status"); exit; } if ( exists $config_param{'output_file_name'} and defined $config_param{'output_file_name'} ) { $output_file = $config_param{'output_file_name'}; } else { $output_file = $object; $output_file =~ s/.*\/(.*)/$1/; } print STDOUT (" Output file name will be $output_file\n"); $status = &open_output_file ($output_file); if ($status =~ m/failed/i ) { &sendmessage ($error, "Could not open output file $output_file", "Error=$status"); exit; } &process_file_in ($config_param{'fill_in_field_names'} ); print STDOUT ("\nProgram done.\n\n"); exit; #--------------------------------------------------------------------- sub read_configuration_file { # Open and read configuration file specified as first passed parameter # $_[0]. Return the contents of the file as the hash array %config_param. # Lines beginning with "#" are treated as comments. It is assumed # that the indirect file contains lines as # parameter = value # and this information is stored as # $config_param{"parameter"} = value my $filename = $_[0]; my ($parameter, $value); #print STDOUT ("#**debug, indirect filename=$filename\n"); unless (open CONFIG_FILE, $filename) { &sendmessage ($error, "Could not open configuration file=$filename", "Error code=$!. Cannot continue."); exit; } while () { chomp; s/\s//g; if (m/^#/) { next;} unless ( m/\S+/ ) {next;} unless ( m/=/ ) { &sendmessage ($warning, "No equal sign in line of configuration file=$filename", "Line is=$_"); } # print STDOUT ("***debug, config input line=$_\n"); ($parameter, $value) = split /=/; $config_param{$parameter} = $value; # print STDOUT ("#**debug, config_param{$parameter}=", # $config_param{$parameter}, "\n"); } close CONFIG_FILE; } #--------------------------------------------------------------------- sub sendmessage { #Send a message to the user. #The message sent will be in the strings $_[0] and $_[1] my ( @args, $mailfile, $message0, $message1, $prefix, $who); my ($sec,$min,$hour,$mday,$mon,$year,$wday,$yday,$isdst); $prefix=$_[0]; $message0=$_[1]; $message1=$_[2]; ($sec,$min,$hour,$mday,$mon,$year,$wday,$yday,$isdst)=localtime(time); if ($year >= 100 and $year <= 1000) {$year = $year + 1900} if ($year < 1900) { $year = $year + 2000; } $mon++; if ($mon < 10) { $mon = "0" . $mon; } if ($mday < 10) {$mday = "0" . $mday; } if ($hour < 10) { $hour = "0" . $hour; } if ($min < 10) { $min = "0" . $min; } if ($sec < 10) { $sec = "0" . $sec; } undef $wday; undef $isdst; $mailfile="> /tmp/sendmess" . $year . $yday . $hour . $min . $sec . ".tmp"; if ( open TEMPFILE, $mailfile) { print TEMPFILE ("Message from $0\n"); if ( exists $ENV{'REMOTE_HOST'} ) {$who=$ENV{'REMOTE_HOST'} ; } elsif (exists $ENV{'REMOTE_ADDR'} ) {$who=$ENV{'REMOTE_ADDR'} ; } else {$who="not available"; } print TEMPFILE (" Date of message: $year/$mon/$mday $hour:$min\n"); print TEMPFILE (" From: $who\n"); print TEMPFILE (" $message0\n"); print TEMPFILE (" $message1\n"); close TEMPFILE; #### `/bin/mail -s "Probem with $0" dmo\@globec.whoi.edu <$mailfile`; unlink $mailfile; } print STDOUT ($prefix," $message0\n"); print STDOUT ($prefix," $message1\n"); print STDOUT ($prefix," Above message from $0\n"); print STDOUT ($prefix," Date of message: $year/$mon/$mday $hour:$min\n"); return 0; } #------------------------------------------------------------------- sub open_input_object { # Open the input object specified as $_[0]. Returns open channel # called DATAIN. Returns list of variables in @list_of_field_names my ($err, $i, $listprogram, $listvar, $object); #includes header info and comment $listprogram = "/data5/globec/bin/list -n -t -f "; $listvar = "/data5/globec/bin/listvar "; $object = $listprogram . '"' . $_[0] . '"'; #print STDOUT ("***debug, object=$object\n"); unless (open DATAIN, "$object |") { $err=$!; # &sendmessage ($warning, "Could not open input object, $object", # "Error=$err"); return "Failed, error=$err"; } #print STDOUT ("***debug, DATAIN opened with object=$object\n"); @list_of_field_names = `$listvar "$_[0]"`; if ($#list_of_field_names < 0) { $err = $! . "and" . $?; sendmessage ($warning, "Could not obtain list of field names", "Error=$err"); return "Failed, error=$err"; } for ($i=0; $i <=$#list_of_field_names; $i++) { chomp $list_of_field_names[$i]; $list_of_field_names[$i] =~ s/\s//g; } #print STDOUT ("***debug, list_of_field_names=@list_of_field_names\n"); return "Okay"; } #------------------------------------------------------------------- sub open_output_file { # Open output file specified in $_[0] my ($output_file); $output_file = $_[0]; unless (open DATAOUT, ">$output_file") { $err = $!; # &sendmessage ($error, "Could not open new output file $output_file", # "Error=$err"); return "Failed, error=$err"; } return "Okay"; } #------------------------------------------------------------------- sub process_file_in { # Process input data from DATAIN and output to DATAOUT. The # passed parameter $_[0] contains comma separated list of field names # that should be filled in from the last valid value. # Assumptions. # 1. Input object included comments and header as output by list # program # 2. Currently assumes data comes in as a single level flat # file via list my (@expected_field_names, $field_name_flag, @field_names_from_data, @fill_in_names, $i, $j, @new_values, @old_values, $records_in, $records_out, $status); @fill_in_names = split /,/, $config_param{'fill_in_field_names'} ; for ($i=0; $i <= $#fill_in_names; $i++) { chomp $fill_in_names[$i]; $fill_in_names[$i] =~ s/\s//g; } #print STDOUT ("***debug, fill_in_names=@fill_in_names \n"); #Check that field names are within data. Issue warning if not for ($i=0; $i<= $#fill_in_names; $i++) { for ($j=0; $j <= $#list_of_field_names; $j++) { if ($fill_in_names[$i] eq $list_of_field_names[$j] ) { last; } if ($j == $#list_of_field_names) { &sendmessage ($warning, "$fill_in_names[$i] not in input data", "Continuing"); } } } $field_name_flag = "no"; $records_in = 0; $records_out = 0; while () { chomp; # print STDOUT ("***debug, DATAIN=$_\n"); $records_in++; if ( m/^#/ ) { print DATAOUT ($_,"\n"); $records_out++; next; } @new_values = split /\t/; for ($i=0; $i <= $#new_values; $i++) { $new_values[$i] =~ s/\s//g; } if ($field_name_flag eq "no" ) { $field_name_flag = "yes"; @field_names_from_data = split /\t/; # print STDOUT ("***debug, field_names_from_data=", # @field_names_from_data, "\n"); print DATAOUT ($_,"\n"); $records_out++; for ($i=0; $i<= $#field_names_from_data; $i++) { $field_names_from_data[$i] =~ s/\s//g; } # Also set old values to be "nd" initially. for ($i=0; $i<= $#field_names_from_data; $i++) { $old_values[$i] = "nd"; } } else { for ($i=0; $i <= $#field_names_from_data; $i++) { for ($j=0; $j <= $#fill_in_names; $j++) { if ($field_names_from_data[$i] eq $fill_in_names[$j] ) { if ($new_values[$i] eq "nd" ) { $new_values[$i] = $old_values[$i]; } else { $old_values[$i] = $new_values[$i]; } } } # print STDOUT ("***debug, i=$i, #field_names_from_data=", # $#field_names_from_data,"\n"); unless ($i == $#field_names_from_data ) { print DATAOUT ($new_values[$i],"\t"); } else { print DATAOUT ($new_values[$i],"\n"); $records_out++; } } } } &sendmessage ($info, "$records_in records in", "$records_out records out"); close DATAIN; close DATAOUT; return "Okay"; } #-----------------------------------------------------------------------