#!/usr/bin/perl -w # # mocpro_level2_conf.pl (uses conf file) # May 20, 1999. Check for tow number with the word Tow in it and try # to isolate the real tow number, e.g. cruise EN263. rcg # May 29, 1999. Need to work on tow vs cast/comment test. There # are problems with non-numeric values. Not repaired yet. # Need to repair comparison in numalpha to test correctly for # non-numeric characters. rcg # June 4, 1999. Use list rather than listgb program. rcg # June 8, 1999. Add additional code to handle tow & cast numbers with # non-numeric characters. rcg # October 3, 2001. V2.12so - Set up for Southern Ocean. Based on GB version # Naming convention for tow and # December 24, 2001. V2.13ZG. Modified for ZooGene data. rcg # July 10, 2006. V2.15CM Modify for CMarZ data # # Jan. 30, 2012. V3.0BC Modified for BCO-DMO data. Scripts # on dmoserv2 at /data/Scripts/ctd_mocness mda + rcg # February 13, 2012. V3.01BC Divide comment so it fits on two lines. rcg # # November 7, 2012. V3.1 Modified to account for those instances when. # cruiseid column is missing from eventlog. mda + rcg # February 21, 2014. V3.1DM3 Copied over to dmoserv3 to serve ctd_mocness data # that lives there. mda # Apirl 11, 2014. V3.2. Change where the script finds the make_log_entry routine # so it finds it from the "library" area. rcg # Apirl 11, 2014. V3.21. Add code to handle no event log url's being used # since there is no station information. Improve subroutine looking at # the eventlogs to declair all variable names. Check that there are data # for all the field names specified for this level, level 2. rcg # # Passed parameter # $ARGV[0] = configuration file # $ARGV[1] = data directory for these data # $ARGV[2] = MOC1, or MOC10 or MOC.25 specifying which MOCNESS # data type to view # $ARGV[3] = file type to process, e.g. pro, tab, raw (note: # the code will have to change to accommodate # other than pro format.) # $ARGV[4] = cruiseid # $ARGV[5] = local four digit year $version="V3.21/April 11, 2014"; unless ( defined $ARGV[0] ) { print STDOUT ("$error Displayed by $0 \n# $version\n", "$error Required configuration file not specified.\n"); exit; } use lib '/data1/www/bco-dmo/perlmodule'; #use lib '/home/rgroman/perllib'; require ('sendmessage.pl'); require ('get_config_file_hash.pl'); require ('make_log_entry.pl'); $configuration_file = $ARGV[0]; $i_am_level = 2; @required = ( 'fieldnames_level_0', 'fieldnames_level_1', 'fieldnames_level_2', 'fieldnames_level_3', 'level_3_script', 'data_directories', 'moc10patterns', 'moc1patterns', 'mocpoint25patterns', 'instrument_column_name','deployment_column_name' ); $config_param_ref=&get_config_file_hash($configuration_file); unless(defined $config_param_ref){ print STDOUT ("#**error, could not get configuration file contents\n"); exit; } %config_param=%{$config_param_ref}; $status = ' '; foreach (@required) { unless (exists $config_param{$_} and defined $config_param{$_} ) { $status = $status . ' ' . $_; } } unless ($status eq ' ') { &make_log_entry( "Required configuration parameters not defined=$status"); &sendmessage($error, "Required configuration parameters $status", "not defined."); exit; } # This is a spot where we might specify a data object from the config file #$inventory_object = "/globec/gb/inventory"; #$inventory_object = "/globec/unaffiliated/inventory_lmg0602"; $error="&x"; $warning="#"; $| = 1; #set to flush output $moc10patterns = $config_param{'moc10patterns'}; $moc1patterns = $config_param{'moc1patterns'}; $mocpoint25patterns = $config_param{'mocpoint25patterns'}; $level_3_script = $config_param{'level_3_script'}; ($configuration_file, $moc_dir, $datatype, $filetype, $cruiseid, $year) = @ARGV; $filetypeuc=uc $filetype; # Set up fieldname lists for ($j=0; $j<=9; $j++) { $level = 'fieldnames_level_'.$j; unless(exists ($config_param{$level}) and defined($config_param{$level})) { last; } $fieldnames_level[$j]=$config_param{$level}; # print STDOUT (" **fieldnames_level[$j]=$fieldnames_level[$j] \n"); } print STDOUT ("#Displayed by $0 \n# Version: $version\n", "# For $moc_dir, \n# $datatype, $filetype, $cruiseid, $year\n"); $datatype = uc $datatype; if ($datatype eq "MOC10" ) { open PATTERN, $moc10patterns or die "# $0 could not read $moc10patterns patterns file, $!"; @pattern=; chomp @pattern; # print STDOUT "#**debug, numb patterns=$#pattern, patterns=@pattern\n"; close PATTERN; }elsif ($datatype eq "MOC1" ) { open PATTERN, $moc1patterns or die "# $0 could not read $moc1patterns patterns file, $!"; @pattern=; chomp @pattern; # print STDOUT "#**debug, numb patterns=$#pattern, patterns=@pattern\n"; close PATTERN; }elsif ($datatype eq "MOC.25" ) { open PATTERN, $mocpoint25patterns or die "# $0 could not read $mocpoint25patterns patterns file, $!"; @pattern=; chomp @pattern; # print STDOUT "#**debug, numb patterns=$#pattern, patterns=@pattern\n"; close PATTERN; } else { print STDOUT "# $0 cannot recognize datatype=$datatype"; exit 0; } my $debug; if ($filetype eq "pro" ) { $debug='no'; if (exists $config_param{'debug'} and defined $config_param{'debug'}) { if ($config_param{'debug'} =~ m/^y/i) { $debug = 'yes'; } } # yes, we can handle this format } else { print STDOUT "# $0 cannot handle filetype=$filetype"; exit 0; } $max_number_of_urls=10; my @url=(); for (my $url_index=1; $url_index<=$max_number_of_urls; $url_index++) { $key="eventlog_url_" . $url_index; if (exists ($config_param{$key}) and defined ($config_param{$key})) { push @url, $config_param{$key}; } else { last; } } @instrument_column_name=split /,/, $config_param{'instrument_column_name'}; if (exists $config_param{'deployment_column_name'} and defined $config_param{'deployment_column_name'}) { if ($config_param{'deployment_column_name'} =~ m/\w{1,}/) { @deployment_column_name=split /,/, $config_param{'deployment_column_name'}; } else { $deployment_column_name[0] = "does not exist"; } } else { $deployment_column_name[0] = "does not exist"; } print STDOUT ("#**debug,deployment_column_name=@deployment_column_name\n") if $debug eq 'yes'; my @column_names = split /,/, $fieldnames_level[$i_am_level]; print STDOUT ("# **debug, column_names=@column_names\n") if $debug eq 'yes'; $index=-1; $data=(); @datafile = <$moc_dir/*.$filetype>; # get the data files push @datafile, <$moc_dir/*.$filetypeuc>; foreach $checkfile (@datafile) { # print STDOUT "\n#**debug, checkfile: $checkfile\n"; foreach $pattern (@pattern) { $_=$checkfile; if ( m/$pattern/ ) { $datafile=$checkfile; # print STDOUT ("#**debug: fileokay, pattern=$pattern, ", # " datafile=$datafile\n"); goto FILEOKAY; } } # print STDOUT ("#**debug, skip this file, not of interest\n"); next; #skip this file, not of interest my $dataline; my $string; FILEOKAY:open(PROFILE, $datafile) or die "# Could not open data file $datafile, $!"; # print STDOUT "\n**debug, opening file=$datafile\n"; $index++; $tow[$index]="nd"; $month_local[$index]=0; $day_local[$index]=0; $datafile[$index]=$datafile; READDATA:while() { s/^\% /%/; chomp; $record=$_; print STDOUT ("#***debug, record=$record\n") if $debug eq 'yes'; if( m/Tow/ ) { # print STDOUT ("#***debug, tow record=$_\n"); @towline = split; $tow[$index] = $towline[1]; # print STDOUT ("#***debug, tow[$index]=$tow[$index]\n"); } elsif( m/Date:/ ) { if ((index $record, "/" ) > 0) { @dateline = split; $dateline[1]=~s/ //g; @cruisedate = split(/\//,$dateline[1]); # print STDOUT ("#**debug, file=$datafile ", # "dateline[1]=$dateline[1] ", # "cruisedate[0]=$cruisedate[0] ", # "cruisedate[1]=$cruisedate[1]\n"); $month_local[$index] = $cruisedate[0]; $day_local[$index]=$cruisedate[1]; } elsif ($record =~ m/.*Date.*\D{3,}.*/) { print STDOUT ("#***debug, in new date format section\n") if $debug eq 'yes'; @cruisedate = split /\s/, $record; unless ($record =~ m/Date/i) { &sendmessage ($warning, "# no Date string found", "# record=$record"); } for (my $t=0; $t<=$#cruisedate; $t++){ print STDOUT ("#***debug, cruisedate[$t]=$cruisedate[$t]\n") if $debug eq 'yes'; } if (length($cruisedate[$#cruisedate]) == 4){ print STDOUT ("#***debug, year? cruisedate[$#cruisedate]=", "$cruisedate[$#cruisedate]\n") if $debug eq 'yes'; $test= $cruisedate[$#cruisedate-1]; if ($test=~ m/\D+/){ $test=&monthtonum ($test); $month_local[$index] = $test; $day_local[$index] = $cruisedate[$#cruisedate-2]; print STDOUT ("#***debug, month? cruisedate[$#cruisedate-1]=", "$cruisedate[$#cruisedate-1]\n") if $debug eq 'yes'; } else { $test=$cruisedate[$#cruisedate-2]; unless ($test=~ m/\D+/){ &sendmessage ($warning, "unexpected Date format", "record=$record"); $month_local[$index] = 0; $day_local[$index] = 0; } else { $test=&monthtonum ($test); $month_local[$index] = $test; $day_local[$index] = $cruisedate[$#cruisedate-1]; $day_local[$index] =~ s/.*(\d+).*/$1/; print STDOUT ("#***debug, month? cruisedate[$#cruisedate-2]=", "$cruisedate[$#cruisedate-2]\n") if $debug eq 'yes'; } } } } else { @cruisedate = split /\s/, $record; $month_local[$index]=$cruisedate[$#cruisedate-2]; print STDOUT ("#***debug, month_local[$index]=$month_local[$index]\n") if $debug eq 'yes'; $day_local[$index]=$cruisedate[$#cruisedate-1]; $test=&monthtonum ($day_local[$index]); if ( $test <= 12 and $test >= 1) { $day_local[$index]=$month_local[$index]; $month_local[$index]=$test; } } if (! defined $month_local[$index]) { $month_local[$index]=0; } if (! defined $day_local[$index]) { $day_local[$index]=0; } if ($month_local[$index] < 1 or $month_local[$index] > 12 ) { $message0="Bad month value of " . $month_local[$index]; $message1="Data file=$datafile"; &sendmessage ($warning, $message0, $message1); } if ($day_local[$index] < 1 or $day_local[$index] > 31 ) { $message0="Bad day value of $day_local[$index]"; $message1="Data file=$datafile"; &sendmessage ($warning, $message0, $message1); } print STDOUT ("#***debug, month_local[$index]=$month_local[$index]\n") if $debug eq 'yes'; print STDOUT ("#***debug, day_local[$index]=$day_local[$index]\n") if $debug eq 'yes'; last READDATA; } elsif ( m/^\d\d.*/ ) { print STDOUT ("#**warning in $0\n#\toption not available\n"); last READDATA; $tow[$index] = $datafile; $tow[$index] =~ s/^.*-(\d{1,})\.$filetype$/$1/; &get_some_data_from_eventlog ( $cruiseid, $datatype, $tow[$index], $day_local[$index], $month_local[$index]); # Look at this. This call is not right. unless (defined $month_local[$index]) { $month_local[$index] = "nd"; } unless (defined $day_local[$index] ) { $day_local[$index]= "nd"; } last READDATA; } else { $message0= "Unexpected data format found" . " in file " . $datafile[$index]; $message1= "Record read=" . $_; &sendmessage ($warning, $message0, $message1); last READDATA; } } close(PROFILE); my $i=$index; $string= "(" . $level_3_script . " " . $configuration_file . " " . $datatype . " " . $filetype . " " . $datafile[$i] . " " . $cruiseid . ")"; $townumb = $tow[$i]; $townumb =~ s/_/-/g; # print STDOUT ("#***debug, townumb=$townumb\n"); if ( $townumb =~ m/[-Mm]/ ) { $townumb =~ s/.*-(\d+[a-zA-Z]*)$/$1/; } if ( $townumb =~ m/Tow/ ) { $townumb =~ s/.*-Tow(\d+[a-zA-Z]*)$/$1/; } my $station=&get_some_data_from_eventlog ($cruiseid, $datatype, $townumb, \@url, \@instrument_column_name, \@deployment_column_name ); unless (defined $station) {$station='nd'} # print STDERR (" $0 : station=$station\n"); # $station=~ s/.*(\w+).*/$1/; print STDOUT ("#**debug, townum=$townumb\n") if $debug eq 'yes'; print STDOUT ("#**debug, day_local[$i]=$day_local[$i]\n") if $debug eq 'yes'; print STDOUT ("#**debug, month_local[$i]=$month_local[$i]\n") if $debug eq 'yes'; print STDOUT ("#**debug, station=$station\n") if $debug eq 'yes'; # Need to check that the number of data columns match the number of values # and somehow try to match up the field name to the value. for (my $k=0; $k<=$#column_names; $k=$k+1) { if ($k == 0) { # print STDOUT ("# **debug, k=$k\n"); $dataline = $townumb . "\t" ; next; } elsif ($column_names[$k] =~ m/day/) { $dataline = $dataline . $day_local[$i] . "\t"; print STDOUT ("# **debug for day, column_names[$k]=$column_names[$k]") if $debug eq 'yes'; next; } elsif ($column_names[$k] =~ m/month/) { print STDOUT ("# **debug for month, column_names[$k]=$column_names[$k]") if $debug eq 'yes'; $dataline = $dataline . $month_local[$i] . "\t"; next; } elsif ($column_names[$k] =~ m/station/ or $column_names[$k] =~ m/sta/) { print STDOUT ("# **debug for station, column_names[$k]=$column_names[$k]") if $debug eq 'yes'; $dataline = $dataline . $station . "\t"; next; } elsif ($column_names[$k] =~ m/\>/) { next; } my $mess1 = "Cannot deal with output data column name $column_names[$k]"; my $mess2 = "It will be ignored."; &sendmessage ($warning, $mess1, $mess2); } $dataline = $dataline . $string . "\n"; push @data, $dataline; } #print STDOUT "#**debug, length of fieldnames_level=$#fieldnames_level\n"; for ($i=$i_am_level; $i <= $#fieldnames_level; $i++) { print STDOUT ($fieldnames_level[$i],"\n"); } #print STDOUT ("#**debug, last index=$index\n#\t", # "Lengths of tow, day_local, month_local=$#tow, ", # "$#day_local, $#month_local\n"); foreach $line (sort numalpha @data) { print STDOUT $line; } undef $error; undef $warning; exit 0; #--------------------------------------------------------------------------- sub numalpha { my ($a1, $b1, @rest, $type); ($a1, @rest)=split /\t/, $a; ($b1, @rest)=split /\t/, $b; $a1=lc $a1; $b1=lc $b1; #print STDOUT "#**debug, a1=$a1, b1=$b1, "; $type="n"; $_=$a1; #if ( m/.*^\D.*/ ) { $type="l";} if ( m/\D{1,}/ ) { $type="l";} $_=$b1; if ( m/\D{1,}/ ) { $type="l";} #print STDOUT "# type=$type\n"; if ($type eq "n") { if ( ($a1 <=> $b1) == -1) { return -1; } elsif( ($a1 <=> $b1) == 0) { return 0; } else { return 1; } } else { if ( ($a1 cmp $b1) == -1) { return -1; } elsif( ($a1 cmp $b1) == 0) { return 0; } else { return 1; } } } #--------------------------------------------------------------------------- sub monthtonum { #Given the text string for the month, $_[0], , return the month number, 1 - 12 my %month = ( jan => 1, january => 1, feb => 2, february => 2, mar => 3, march => 3, apr => 4, april => 4, may => 5, jun => 6, june => 6, jul => 7, july => 7, aug => 8, august => 8, sep => 9, sept => 9, september => 9, "oct" => 10, october => 10, nov => 11, november => 11, dec => 12, december => 12, ); if (defined $month{lc $_[0]}) { return $month{lc $_[0]}; } else { return "nd"; } } #--------------------------------------------------------------------------- sub get_some_data_from_eventlog { # Use the event log object to get at day and month for old format data # Arguments passed are: # $_[0] = cruiseid # $_[1] = datatype # $_[2] = tow # $_[3] = @url as a reference # $_[4] = @instrument_column_name as a reference # $_[5] = @deployment_column_name as a reference # Value returned is: # $station print STDOUT ("#**debug in subroutine=$debug\n") if $debug eq 'yes'; my ($cast, $cruiseid, $datatype, $day, @fieldnames, $input, $listgb, $listgb_options, $message0, $message1, $month, $object_spec, $station, $tow ); $cruiseid = $_[0]; $datatype = uc $_[1]; $tow = $_[2]; my $url_ref=$_[3]; my $instrument_column_name_ref=$_[4]; my $deployment_column_name_ref=$_[5]; my @url=@$url_ref; my @instrument_column_name=@$instrument_column_name_ref; my @deployment_column_name=@$deployment_column_name_ref; print STDOUT ("#**debug, cruiseid=$cruiseid, datatype=$datatype, tow=$tow\n") if $debug eq 'yes'; $listgb = "/data302/data_server/dmoserv3/bin/list "; $listgb_options = "-c -n -f -t "; $listvar = "/data302/data_server/dmoserv3/bin/listvar "; URL: for ($j=0; $j<=$#url; $j++) { unless (defined ($url[$j]) and $url[$j] =~ m/\w{2,}/) {next;} my $url=$url[$j]; for (my $i=0; $i<=$#deployment_column_name; $i++) { my $deployment_column_name=$deployment_column_name[$i]; print STDOUT ("#**debug, deployment_column_name=$deployment_column_name\n") if $debug eq 'yes'; my $fieldnames_command = $listvar . ' ' . $url . "|"; print STDOUT ("#**debug, fieldnames_command=$fieldnames_command\n") if $debug eq 'yes'; unless ( open LISTVAR, $fieldnames_command) { $message0="Could not get fieldnames"; $message1="fieldnames_command=$fieldnames_command"; &sendmessage ($warning, $message0, $message1); return "nd"; } @fieldnames = ; close LISTVAR; for ($m=0; $m<=$#fieldnames; $m++) { $fieldnames[$m]=~ s/^\s+//; $fieldnames[$m]=~ s/\s+$//; } for ($k=0; $k<=$#fieldnames; $k++) { if ($deployment_column_name eq $fieldnames[$k] or $deployment_column_name eq "does not exist") { print STDOUT ("#**debug, deployment_column_name=", "$deployment_column_name\n") if $debug eq 'yes'; for ($ii=0; $ii<=$#instrument_column_name; $ii++) { $instrument_column_name=$instrument_column_name[$ii]; for ($l=0; $l<=$#fieldnames; $l++) { if ($instrument_column_name eq $fieldnames[$l]) { $object_spec = '"' . $url . '(' ; unless ($deployment_column_name eq "does not exist"){ $object_spec = $object_spec . $deployment_column_name . '=' . $cruiseid . ',' ; } $object_spec = $object_spec . $instrument_column_name . '=' . $datatype . ',station,cast)"'; $input = $listgb . $listgb_options . $object_spec . "|"; print STDOUT ("\n#***debug, input=$input\n") if $debug eq 'yes'; unless ( open EVENTLOG, $input) { $message0="Could not open eventlog to get day and month"; $message1="open=$input"; &sendmessage ($warning, $message0, $message1); return "nd"; } @event_data=; if ($#event_data < 1) { next URL} $station=&get_station($tow, @event_data); unless (defined $station) {$station='nd' } close EVENTLOG; return $station; } } } } } } } #print STDERR ("\n#***warning, bad logic in $0, cruiseid=$cruiseid, datatype=$datatype, tow=$tow\n"); #print STDOUT ("\n#***warning, bad logic in $0, cruiseid=$cruiseid, datatype=$datatype, tow=$tow\n"); return "nd"; } #-------------------------------------------------------------------------------------------- sub get_station { # $_[0] = tow # $_[1] = array of eventlog data # Search for correct tow number #print STDOUT ("\n#***debug, Just before while statement to read the eventlog\n"); my ($tow, @event_data) = @_; print STDOUT ("\n#***debug, looking for tow=$tow\n") if $debug eq 'yes'; # First get the fieldnames from the eventlog, first record. $data_record = $event_data[0]; ($column_1,$column_2)= split /\t/, $data_record; print STDOUT ("\n#***debug, column_1=$column_1, column_2=$column_2\n") if $debug eq 'yes'; undef $column_2; if ($column_1 =~ m/station/){ $order='station first'; } else { $order='station second'; } my ($cast, $station); for (my $i=1; $i<=$#event_data; $i++) { $data_record=$event_data[$i]; if ($order eq 'station first'){ ($station,$cast) = split /\t/, $data_record; } else { ($cast,$station) = split /\t/, $data_record; } $cast=~ s/^\s+//; $cast=~ s/\s+$//; unless (defined $station) {$station="nd";} $station=~ s/^\s+//; $station=~ s/\s+$//; print STDOUT ("\n#***debug, station=$station\n") if $debug eq 'yes'; if (defined $cast and ($cast=~m/\d+/) and ($tow=~m/\d+/) and ( not $cast=~m/[a-zA-Z]+/) and ( not $tow=~m/[a-zA-Z]+/) ) { if ( $cast == $tow ) { last; } } elsif (defined $cast) { if ( $cast =~ m/$tow/ ) { last; } } } print STDOUT ("\n#***debug, cast=$cast, tow=$tow, station=$station\n") if $debug eq 'yes'; return $station; }