#!/usr/bin/perl -w
#
# mocpro_level2_conf.pl  (uses conf file)

# May 20, 1999.  Check for tow number with the word Tow in it and try
#	to isolate the real tow number, e.g. cruise EN263.  rcg
# May 29, 1999.  Need to work on tow vs cast/comment test.  There
#	are problems with non-numeric values.  Not repaired yet.  
#	Need to repair comparison in numalpha to test correctly for
#	non-numeric characters.  rcg
# June 4, 1999.  Use list rather than listgb program.  rcg
# June 8, 1999.  Add additional code to handle tow & cast numbers with
#	non-numeric characters. rcg
# October 3, 2001.  V2.12so - Set up for Southern Ocean.  Based on GB version
#	Naming convention for tow and 
# December 24, 2001.  V2.13ZG. Modified for ZooGene data.  rcg
# July 10, 2006.  V2.15CM  Modify for CMarZ data
#
# Jan. 30, 2012. V3.0BC Modified for BCO-DMO data. Scripts 
#       on dmoserv2 at /data/Scripts/ctd_mocness  mda + rcg
# February 13, 2012. V3.01BC Divide comment so it fits on two lines. rcg
# 
# November 7, 2012. V3.1  Modified to account for those instances when.  
# 	cruiseid column is missing from eventlog.  mda + rcg 
# February 21, 2014.  V3.1DM3  Copied over to dmoserv3 to serve ctd_mocness data
#       that lives there.  mda
# Apirl 11, 2014. V3.2. Change where the script finds the make_log_entry routine
#	so it finds it from the "library" area. rcg
# Apirl 11, 2014. V3.21. Add code to handle no event log url's being used
#	since there is no station information. Improve subroutine looking at
#	the eventlogs to declair all variable names. Check that there are data
#	for all the field names specified for this level, level 2. rcg
#
# Passed parameter
#	$ARGV[0] = configuration file
#	$ARGV[1] = data directory for these data
#	$ARGV[2] = MOC1, or MOC10 or MOC.25 specifying which MOCNESS
#			data type to view
#	$ARGV[3] = file type to process, e.g. pro, tab, raw  (note:
#			the code will have to change to accommodate
#			other than pro format.)
#	$ARGV[4] = cruiseid
#	$ARGV[5] = local four digit year

$version="V3.21/April 11, 2014";

unless ( defined $ARGV[0] ) {
	print STDOUT ("$error Displayed by $0 \n#  $version\n",
		"$error Required configuration file not specified.\n");
	exit;
}

use lib '/data1/www/bco-dmo/perlmodule';
#use lib '/home/rgroman/perllib';
	require ('sendmessage.pl');
	require ('get_config_file_hash.pl');
	require ('make_log_entry.pl');

$configuration_file = $ARGV[0];

$i_am_level = 2;

@required = (
	'fieldnames_level_0', 'fieldnames_level_1', 'fieldnames_level_2',
        'fieldnames_level_3', 'level_3_script', 'data_directories',
	'moc10patterns', 'moc1patterns', 'mocpoint25patterns', 
	'instrument_column_name','deployment_column_name'
);

$config_param_ref=&get_config_file_hash($configuration_file);
unless(defined $config_param_ref){
	print STDOUT ("#**error, could not get configuration file contents\n");
	exit;
}

%config_param=%{$config_param_ref};

$status = ' ';
foreach (@required) {
	unless (exists $config_param{$_} and defined $config_param{$_} ) {
		$status = $status . ' ' . $_;
	}
}

unless ($status eq ' ') {
	&make_log_entry(
		"Required configuration parameters not defined=$status");
	&sendmessage($error, "Required configuration parameters $status",
		"not defined.");
	exit;
}


# This is a spot where we might specify a data object from the config file
#$inventory_object = "/globec/gb/inventory";
#$inventory_object = "/globec/unaffiliated/inventory_lmg0602";

$error="&x";
$warning="#";

$| = 1;		#set to flush output

$moc10patterns = $config_param{'moc10patterns'};
$moc1patterns = $config_param{'moc1patterns'};
$mocpoint25patterns = $config_param{'mocpoint25patterns'};

$level_3_script = $config_param{'level_3_script'};

($configuration_file, $moc_dir, $datatype, $filetype, $cruiseid, $year) = @ARGV;

$filetypeuc=uc $filetype;

# Set up fieldname lists
for ($j=0; $j<=9; $j++) {
	$level = 'fieldnames_level_'.$j;
	unless(exists ($config_param{$level}) and
		defined($config_param{$level})) {
		last;
	}
	$fieldnames_level[$j]=$config_param{$level};
#	print STDOUT (" **fieldnames_level[$j]=$fieldnames_level[$j] \n");
}


print STDOUT ("#Displayed by $0 \n#  Version: $version\n",
	"#  For $moc_dir, \n#  $datatype, $filetype, $cruiseid, $year\n");

$datatype = uc $datatype;
if ($datatype eq "MOC10" ) {
	open PATTERN, $moc10patterns or 
		die "# $0 could not read $moc10patterns patterns file, $!";
	@pattern=<PATTERN>;
	chomp @pattern;
#	print STDOUT "#**debug, numb patterns=$#pattern, patterns=@pattern\n";
	close PATTERN;
}elsif ($datatype eq "MOC1" ) {
	open PATTERN, $moc1patterns or 
		die "# $0 could not read $moc1patterns patterns file, $!";
	@pattern=<PATTERN>;
	chomp @pattern;
#	print STDOUT "#**debug, numb patterns=$#pattern, patterns=@pattern\n";
	close PATTERN;
}elsif ($datatype eq "MOC.25" ) {
	open PATTERN, $mocpoint25patterns or 
		die "# $0 could not read $mocpoint25patterns patterns file, $!";
	@pattern=<PATTERN>;
	chomp @pattern;
#	print STDOUT "#**debug, numb patterns=$#pattern, patterns=@pattern\n";
	close PATTERN;
} else {
	print STDOUT "# $0 cannot recognize datatype=$datatype";
	exit 0;
}

my $debug;

if ($filetype eq "pro" ) {
	$debug='no';
	if (exists $config_param{'debug'} and defined $config_param{'debug'}) {
		if ($config_param{'debug'} =~ m/^y/i) {
			$debug = 'yes';
		}
	}
#	yes, we can handle this format
}
else {
	print STDOUT "# $0 cannot handle filetype=$filetype";
	exit 0;
}

$max_number_of_urls=10;
my @url=();

for (my $url_index=1; $url_index<=$max_number_of_urls; $url_index++) {
	$key="eventlog_url_" . $url_index;
	if (exists ($config_param{$key}) and defined ($config_param{$key})) {
		push @url, $config_param{$key};
	}
	else {
		last;
	}
}

@instrument_column_name=split /,/, $config_param{'instrument_column_name'};


if (exists $config_param{'deployment_column_name'} and 
		defined $config_param{'deployment_column_name'}) {
	if ($config_param{'deployment_column_name'} =~ m/\w{1,}/) {
		@deployment_column_name=split /,/, $config_param{'deployment_column_name'};
	}
	else {
		$deployment_column_name[0] = "does not exist";
	}
}
else {
	$deployment_column_name[0] = "does not exist";
}



print STDOUT ("#**debug,deployment_column_name=@deployment_column_name\n")
	if $debug eq 'yes';

my @column_names = split /,/, $fieldnames_level[$i_am_level];
print STDOUT ("# **debug, column_names=@column_names\n") if $debug eq 'yes';
	
$index=-1;
$data=();
@datafile = <$moc_dir/*.$filetype>;    # get the data files
push @datafile, <$moc_dir/*.$filetypeuc>;
foreach $checkfile (@datafile) {
#	print STDOUT "\n#**debug, checkfile: $checkfile\n";
	foreach $pattern (@pattern) {
		$_=$checkfile;

		if ( m/$pattern/ ) {
			$datafile=$checkfile;
#			print STDOUT ("#**debug: fileokay, pattern=$pattern, ",
#				" datafile=$datafile\n");
			goto FILEOKAY;
		}
	}
#	print STDOUT ("#**debug, skip this file, not of interest\n");
	next;	#skip this file, not of interest
	
my $dataline;
my $string;

FILEOKAY:open(PROFILE, $datafile) or 
		die "# Could not open data file $datafile, $!";
#	print STDOUT "\n**debug, opening file=$datafile\n";
	$index++;
	$tow[$index]="nd";
	$month_local[$index]=0;
	$day_local[$index]=0;
	$datafile[$index]=$datafile;
READDATA:while(<PROFILE>) {
		s/^\% /%/;
		chomp;
		$record=$_;
		
		print STDOUT ("#***debug, record=$record\n")
			if $debug eq 'yes';
		if( m/Tow/ )  {
#			print STDOUT ("#***debug, tow record=$_\n");
			@towline = split;
			$tow[$index] = $towline[1];
#			print STDOUT ("#***debug, tow[$index]=$tow[$index]\n");
		}
		elsif( m/Date:/ ) {
			if ((index $record, "/" ) > 0) {
				@dateline = split;
				$dateline[1]=~s/ //g;
				@cruisedate = 
					split(/\//,$dateline[1]);
#				print STDOUT ("#**debug, file=$datafile ",
#					"dateline[1]=$dateline[1] ",
#					"cruisedate[0]=$cruisedate[0] ",
#					"cruisedate[1]=$cruisedate[1]\n"); 
				$month_local[$index] = $cruisedate[0];
				$day_local[$index]=$cruisedate[1];
			}
			elsif ($record =~ m/.*Date.*\D{3,}.*/) {
print STDOUT ("#***debug, in new date format section\n") if $debug eq 'yes';
				@cruisedate = split /\s/, $record;
				unless ($record =~ m/Date/i) {
					&sendmessage ($warning, "# no Date string found",
						"# record=$record");
				}
				for (my $t=0; $t<=$#cruisedate; $t++){
					print STDOUT ("#***debug, cruisedate[$t]=$cruisedate[$t]\n")
						if $debug eq 'yes';
				}
				if (length($cruisedate[$#cruisedate]) == 4){
					print STDOUT ("#***debug, year? cruisedate[$#cruisedate]=",
						"$cruisedate[$#cruisedate]\n") if $debug eq 'yes';
					$test= $cruisedate[$#cruisedate-1];
					if ($test=~ m/\D+/){
						$test=&monthtonum ($test);
						$month_local[$index] = $test;
						$day_local[$index] = $cruisedate[$#cruisedate-2];
						print STDOUT ("#***debug, month? cruisedate[$#cruisedate-1]=",
							"$cruisedate[$#cruisedate-1]\n") if $debug eq 'yes';
					}
					else {
						$test=$cruisedate[$#cruisedate-2];
						unless ($test=~ m/\D+/){
							&sendmessage ($warning, 
								"unexpected Date format",
								"record=$record");
							$month_local[$index] = 0;
							$day_local[$index] = 0;
						}
						else {
							$test=&monthtonum ($test);
							$month_local[$index] = $test;
							$day_local[$index] = $cruisedate[$#cruisedate-1];
							$day_local[$index] =~ s/.*(\d+).*/$1/;
						   print STDOUT ("#***debug, month? cruisedate[$#cruisedate-2]=",
							   "$cruisedate[$#cruisedate-2]\n") if $debug eq 'yes';
						}
					}
				}
			}
			else {
				@cruisedate = split /\s/, $record;
				$month_local[$index]=$cruisedate[$#cruisedate-2];
				print STDOUT ("#***debug, month_local[$index]=$month_local[$index]\n")
					if $debug eq 'yes';
				$day_local[$index]=$cruisedate[$#cruisedate-1];
				$test=&monthtonum ($day_local[$index]);
				if (  $test <= 12 and $test >= 1) {
					$day_local[$index]=$month_local[$index];
					$month_local[$index]=$test;
				}
			}
			if (! defined $month_local[$index]) {
				$month_local[$index]=0;
			}
			if (! defined $day_local[$index]) {
				$day_local[$index]=0;
			}
			if ($month_local[$index] < 1 or 
					$month_local[$index] > 12 ) {
				$message0="Bad month value of " . 
					$month_local[$index];
				$message1="Data file=$datafile";
				&sendmessage ($warning, $message0, $message1);
			}
			if ($day_local[$index] < 1 or 
					$day_local[$index] > 31 ) {
				$message0="Bad day value of $day_local[$index]";
				$message1="Data file=$datafile";
				&sendmessage ($warning, $message0, $message1);
			}
			print STDOUT ("#***debug, month_local[$index]=$month_local[$index]\n")
				if $debug eq 'yes';
			print STDOUT ("#***debug, day_local[$index]=$day_local[$index]\n")
				if $debug eq 'yes';
			last READDATA;
		}
		elsif ( m/^\d\d.*/ )  {
			print STDOUT ("#**warning in $0\n#\toption not available\n");
			last READDATA;
			$tow[$index] = $datafile;
			$tow[$index] =~ s/^.*-(\d{1,})\.$filetype$/$1/;
			&get_some_data_from_eventlog (
				$cruiseid, $datatype, $tow[$index], 
				$day_local[$index],
				$month_local[$index]);
# Look at this.  This call is not right.			
			unless (defined $month_local[$index]) {
				$month_local[$index] = "nd"; }
			unless (defined $day_local[$index] ) {
				$day_local[$index]= "nd"; }
			last READDATA;
		}
		else {
			$message0= "Unexpected data format found" .
				" in file " . $datafile[$index];
			$message1= "Record read=" . $_;
			&sendmessage ($warning, $message0, $message1);
			last READDATA;
		}
	}
	close(PROFILE);
	my $i=$index;		
	$string= "(" . $level_3_script . " " . $configuration_file . " " . 
		$datatype . " " . $filetype . " " . $datafile[$i] . 
		" " . $cruiseid . ")";
	$townumb = $tow[$i];
	$townumb =~ s/_/-/g;

#	print STDOUT ("#***debug, townumb=$townumb\n");

	if ( $townumb =~ m/[-Mm]/ ) { $townumb =~ s/.*-(\d+[a-zA-Z]*)$/$1/; }
	if ( $townumb =~ m/Tow/ ) { $townumb =~ s/.*-Tow(\d+[a-zA-Z]*)$/$1/; }
	my $station=&get_some_data_from_eventlog ($cruiseid, $datatype, $townumb, 
		\@url, \@instrument_column_name, \@deployment_column_name );
	unless (defined $station) {$station='nd'}
	 
#	print STDERR (" $0 : station=$station\n");
#	$station=~ s/.*(\w+).*/$1/;
	print STDOUT ("#**debug, townum=$townumb\n")
		if $debug eq 'yes';
	print STDOUT ("#**debug, day_local[$i]=$day_local[$i]\n")
		if $debug eq 'yes';	
	print STDOUT ("#**debug, month_local[$i]=$month_local[$i]\n")
		if $debug eq 'yes';
	print STDOUT ("#**debug, station=$station\n")
		if $debug eq 'yes';

# Need to check that the number of data columns match the number of values
# and somehow try to match up the field name to the value.
	
	for (my $k=0; $k<=$#column_names; $k=$k+1) {
		if ($k == 0) {
			print STDOUT ("# **debug, k=$k\n");
			$dataline = $townumb . "\t" ;
			next;
		}
		elsif ($column_names[$k] =~ m/day/) {
			$dataline = $dataline . $day_local[$i] . "\t";
			print STDOUT ("# **debug for day, column_names[$k]=$column_names[$k]")
				 if $debug eq 'yes';
			next;
		}
		elsif ($column_names[$k] =~ m/month/) {
			print STDOUT ("# **debug for month, column_names[$k]=$column_names[$k]")
				 if $debug eq 'yes';
			$dataline = $dataline . $month_local[$i] . "\t";
			next;
		}
		elsif ($column_names[$k] =~ m/station/ or $column_names[$k] =~ m/sta/) {
			print STDOUT ("# **debug for station, column_names[$k]=$column_names[$k]")
				 if $debug eq 'yes';
			$dataline = $dataline . $station . "\t";
			next;
		}
		elsif ($column_names[$k] =~ m/\>/) {
			next;
		}
		my $mess1 = "Cannot deal with output data column name $column_names[$k]";
		my $mess2 = "It will be ignored.";
		&sendmessage ($warning, $mess1, $mess2);
	}
	$dataline = $dataline . $string . "\n";
	push @data, $dataline;       
}
#print STDOUT "#**debug, length of fieldnames_level=$#fieldnames_level\n";

for ($i=$i_am_level; $i <= $#fieldnames_level; $i++) {
	print STDOUT ($fieldnames_level[$i],"\n");
}

#print STDOUT ("#**debug, last index=$index\n#\t",
#	"Lengths of tow, day_local, month_local=$#tow, ",
#	"$#day_local, $#month_local\n");
	
foreach $line (sort numalpha @data) {
	print STDOUT $line;
}

undef $error;
undef $warning;
exit 0;

#---------------------------------------------------------------------------

sub numalpha {
my ($a1, $b1, @rest, $type);
($a1, @rest)=split /\t/, $a;
($b1, @rest)=split /\t/, $b;
$a1=lc $a1;
$b1=lc $b1;
#print STDOUT "#**debug, a1=$a1, b1=$b1, ";
$type="n";
$_=$a1;
#if ( m/.*^\D.*/ ) {	$type="l";}
if ( m/\D{1,}/ ) {	$type="l";}
$_=$b1;
if ( m/\D{1,}/ ) {	$type="l";}
#print STDOUT "# type=$type\n"; 
if ($type eq "n") {
	if ( ($a1 <=> $b1) == -1) {
		return -1;
	}
	elsif( ($a1 <=> $b1) == 0) {
		return 0;
	}
	else {
		return 1;
	}
}
else {
	if ( ($a1 cmp $b1) == -1) {
		return -1;
	}
	elsif( ($a1 cmp $b1) == 0) {
		return 0;
	}
	else {
		return 1;
	}
}
}

#---------------------------------------------------------------------------

sub monthtonum {

#Given the text string for the month, $_[0], , return the month number, 1 - 12

my %month = (
	jan => 1,
	january => 1,
	feb => 2,
	february => 2,
	mar => 3,
	march => 3,
	apr => 4,
	april => 4,
	may => 5,
	jun => 6,
	june => 6,
	jul => 7,
	july => 7,
	aug => 8,
	august => 8,
	sep => 9,
	sept => 9,
	september => 9,
	"oct" => 10,
	october => 10,
	nov => 11,
	november => 11,
	dec => 12,
	december => 12,
	);
if (defined $month{lc $_[0]})  {
	return $month{lc $_[0]};
}
else {
	return "nd";
}

}



#---------------------------------------------------------------------------

sub get_some_data_from_eventlog {

# Use the event log object to get at day and month for old format data
# Arguments passed are:
#	$_[0] = cruiseid
#	$_[1] = datatype
#	$_[2] = tow
#	$_[3] = @url as a reference
#	$_[4] = @instrument_column_name as a reference
#	$_[5] = @deployment_column_name as a reference

# Value returned is:
#	$station

print STDOUT ("#**debug in subroutine=$debug\n")
	if $debug eq 'yes';


my ($cast, $cruiseid, $datatype, $day, @fieldnames, $input, 
	$listgb, $listgb_options, 
	$message0, $message1, $month, $object_spec, 
	$station, $tow );

$cruiseid = $_[0];
$datatype = uc $_[1];
$tow = $_[2];
my $url_ref=$_[3];
my $instrument_column_name_ref=$_[4];
my $deployment_column_name_ref=$_[5];
my @url=@$url_ref;
my @instrument_column_name=@$instrument_column_name_ref;
my @deployment_column_name=@$deployment_column_name_ref;

print STDOUT ("#**debug, cruiseid=$cruiseid, datatype=$datatype, tow=$tow\n")
	if $debug eq 'yes';
$listgb = "/data302/data_server/dmoserv3/bin/list ";
$listgb_options = "-c -n -f -t ";

$listvar = "/data302/data_server/dmoserv3/bin/listvar ";


URL:
for ($j=0; $j<=$#url; $j++) {
	unless (defined ($url[$j]) and $url[$j] =~ m/\w{2,}/) {next;}
	my $url=$url[$j];
	
	for (my $i=0; $i<=$#deployment_column_name; $i++)  {
		
		my $deployment_column_name=$deployment_column_name[$i];
		print STDOUT ("#**debug, deployment_column_name=$deployment_column_name\n")
			if $debug eq 'yes';
		my $fieldnames_command = $listvar . ' ' . $url . "|";
		print STDOUT ("#**debug, fieldnames_command=$fieldnames_command\n")
			if $debug eq 'yes';
		
		unless ( open LISTVAR, $fieldnames_command) {
			$message0="Could not get fieldnames";
			$message1="fieldnames_command=$fieldnames_command";
			&sendmessage ($warning, $message0, $message1);
			return "nd";
		}
		@fieldnames = <LISTVAR>;
		close LISTVAR;
		for ($m=0; $m<=$#fieldnames; $m++) {
			$fieldnames[$m]=~ s/^\s+//;
			$fieldnames[$m]=~ s/\s+$//;
		}
		
		for ($k=0; $k<=$#fieldnames; $k++) {
			if ($deployment_column_name eq $fieldnames[$k] or
					$deployment_column_name eq "does not exist") {
				print STDOUT ("#**debug, deployment_column_name=",
					"$deployment_column_name\n") if $debug eq 'yes';
				for ($ii=0; $ii<=$#instrument_column_name; $ii++)  {
					$instrument_column_name=$instrument_column_name[$ii];
					for ($l=0; $l<=$#fieldnames; $l++) {
						if ($instrument_column_name eq 
							$fieldnames[$l]) {
						   $object_spec = '"' . $url . '(' ;
						   unless ($deployment_column_name eq "does not exist"){
							$object_spec = $object_spec . $deployment_column_name . '=' .
								$cruiseid . ',' ;
						   }
						   $object_spec = $object_spec . 
							$instrument_column_name . '=' .
							$datatype . ',station,cast)"';
						   $input = $listgb . $listgb_options . 
							$object_spec . "|";

						   print STDOUT ("\n#***debug, input=$input\n") 
							if $debug eq 'yes';
						   unless ( open EVENTLOG, $input) {
							$message0="Could not open eventlog to get day and month";
							$message1="open=$input";
							&sendmessage ($warning, $message0, $message1);
							return "nd";
						   }
						   @event_data=<EVENTLOG>;
						   if ($#event_data < 1) { next URL}
						   $station=&get_station($tow, @event_data);
						   unless (defined $station) {$station='nd' }
						   close EVENTLOG;
						   return $station;
						}
					}
				}
			}
		}
  	}
}

#print STDERR ("\n#***warning, bad logic in $0, cruiseid=$cruiseid, datatype=$datatype, tow=$tow\n");
#print STDOUT ("\n#***warning, bad logic in $0, cruiseid=$cruiseid, datatype=$datatype, tow=$tow\n");
return "nd";
}
#--------------------------------------------------------------------------------------------
sub get_station {

# $_[0] = tow
# $_[1] = array of eventlog data

# Search for correct tow number 
#print STDOUT ("\n#***debug, Just before while statement to read the eventlog\n");

my ($tow, @event_data) = @_;
print STDOUT ("\n#***debug, looking for tow=$tow\n") if $debug eq 'yes';

# First get the fieldnames from the eventlog, first record.

$data_record = $event_data[0];

($column_1,$column_2)= split /\t/, $data_record;
print STDOUT ("\n#***debug, column_1=$column_1, column_2=$column_2\n")
	if $debug eq 'yes';

undef $column_2;

if ($column_1 =~ m/station/){
	$order='station first';
}
else {
	$order='station second';
}

my ($cast, $station);

for (my $i=1; $i<=$#event_data; $i++) { 
	$data_record=$event_data[$i];
	if ($order eq 'station first'){
		($station,$cast) = split /\t/, $data_record;
	}
	else {
		($cast,$station) = split /\t/, $data_record;
	}

	$cast=~ s/^\s+//;
	$cast=~ s/\s+$//;
	unless (defined $station) {$station="nd";}
	$station=~ s/^\s+//;
	$station=~ s/\s+$//;

	print STDOUT ("\n#***debug, station=$station\n") if $debug eq 'yes';
	
	if (defined $cast  and ($cast=~m/\d+/) and ($tow=~m/\d+/) and
		( not $cast=~m/[a-zA-Z]+/) and ( not $tow=~m/[a-zA-Z]+/) ) {
		if ( $cast == $tow ) {
			last;
		}
	}
	elsif (defined $cast) {
		if ( $cast =~ m/$tow/ ) {
			last;
		}
	}
}

print STDOUT ("\n#***debug, cast=$cast, tow=$tow, station=$station\n")
	if $debug eq 'yes';
return $station;
}


