#define STAT_VERSION "statisticker version 3.1a  30 Aug 2004"

/*	Input & output doc appears after revision info			*/

/*   30 Aug 04.  WJS							*/
/*	jdbfuncdefns.h							*/
/*   28 Apr 04.  WJS							*/
/*	errn now in utils						*/
/*   23 Apr 04.  WJS							*/
/*	Mods to "version-returning function".				*/
/*	  a) change from local to global to ensure it can't be "opti-	*/
/*	     mized out"							*/
/*	  b) don't want "version" in any function name since such       */
/* 	     names appear when grep'ping for "version"		        */
/*   18 Feb 04.  WJS							*/
/*	Use add_id_to_err from utils					*/
/*	Include .h file versions					*/
/*	Switch from defgb.h to utils.h					*/
/*		[Needs utils.c 1.8 (in jgofs.a)]			*/
/*		[Needs utils.h 1.0]					*/
/*		[Begin v 3.1a]						*/
/*   22 Aug 03.  WJS							*/
/*	Take environment variables from argument list, too.  This	*/
/*	  allows the propagation of this info via QUERY_STRING while	*/
/*	  web mode.  Otherwise, web must invoke different object per	*/
/*	  set of env vars						*/
/*      Try to control distinct-count stuff at runtime.  Accept control	*/
/*	  from both environment and argument list.  (Runtime control	*/
/*	  predicated on feature being enabled at compile time)		*/
/*	Get some routines from defgb_utils & some defns from defgb.h	*/
/*		[Needs defgb_utils 1.5a]				*/
/*		[Needs defgb.h 4.7]					*/
/*		[Begin v 3.1]						*/

/*   29 Jul 03.  WJS							*/
/*	Let's not give "Bad Object:" style error messages		*/
/*		[Begin v 3.0c]						*/
/*   10 Dec 02.  WJS							*/
/*	Bug fix: 28 Feb 01 use of DBL_MIN was incorrect			*/
/*	Bug fix: "Level splits" incorrectly computed			*/
/*		[Begin v 3.0b]						*/
/*   19 Mar 01.  WJS							*/
/*	Comment fix							*/
/*   28 Feb 01.  WJS							*/
/*	Use DBL_MIN instead of -DBL_MAX					*/
/*   26 Feb 01.  WJS							*/
/*	Bug fix: some "distinct-count" stuff not controlled by DO_	*/
/*	  DISTINCT							*/
/*	Bug fix: May need to be more careful with getmem.  Changed	*/
/*	  input to size_t type; changed internal pointer to type double	*/
/*	  to try avoid alignment problems later				*/
/*   25 Feb 01.  WJS							*/
/*	Add #include <limits.h> needed on Solaris & VMS			*/
/*		[Begin v 3.0a]						*/
/*   13 Apr 00.  WJS							*/
/*	Add a "Standard station-Arabian Sea" comparison type		*/
/*    4 Apr 00.  WJS							*/
/*	Add ability to define comparison type for max, min, etc		*/
/*		[Begin v 3.0]						*/

/*   28 Jan 00.  WJS							*/
/*	Add "distinct" counts as a select-at-compile-time option	*/
/*	  (by default, do them)						*/
/*	Minimum precision to 9 chars					*/
/*	Bug fix: accessed variable "N+1" instead of stopping at "N"	*/
/*	  when computing all stats					*/
/*		[Begin v 2.0]						*/

/*   17 Jan 00.  WJS							*/
/*	More output precision for sums					*/
/*	Propagate width= attrib (others aren't propagated)		*/
/*		[Begin v 1.1]						*/
/*    3 Jan 00.  WJS							*/
/*		[Begin v 1.0]						*/

/*  Input: 1 required parameter and a number of optional parameters	*/
/*	   Some optional parameters can get default values from the	*/
/*		environment						*/
/*									*/
/*	The required parameter is parameter 1.  It is the object	*/
/*  specifier (in "jdb format"; no /jg/serv; parameters parenthesized)	*/
/*  pointing to the input data for the statisticker.			*/

/*	The optional parameters, if any, may appear anywhere in the	*/
/*  parameter list after the object specifiers.  There are 2 types of	*/
/*  optional parameters - program control parameters and comparison	*/
/*  type parameters.							*/
/*	Program control parameters can also be specified in the en-	*/
/*  vironment.  If specified there as well as in the parameter list,	*/
/*  the parameter list value takes precedence.  See "Output": for info	*/
/*  about the rows_are_stats parameter.  See descrip of comparison type	*/
/*  parameters for info about the vars_must_be_in_object parameter.	*/
/*  The "distinct" parameter controls whether the statisticker will	*/
/*  compute the distinct count statistic (there is a performance hit	*/
/*  computing this if some data are logically real rather than integer)	*/
/*	Comparison type parameters define the				*/
/*  type of comparison to be used when testing for max, min, etc.	*/
/*  The default test type is determined at compilation time.  As	*/
/*  released (v 3.0, WJS), the default test type is numeric.		*/
/*  As of v 3.0, the other types of test are alpha and "JGOFS standard	*/
/*  station-Arabian Sea".						*/

/*	The syntax of each parameter is					*/
/*  		parameter_specifier=string				*/
/*  Multiple parameters can be concatenated with semicolons or slashes	*/
/*  if desired (Purpose of choice between single list and multiple	*/
/*  parameters is to ease syntax when using program from command line	*/
/*  vs http QUERY_STRING, etc)						*/
/*	where								*/
/*	    parameter_specifier is of the form XkeywordX		*/
/*		X represents a character illegal to a JGOFS variable	*/
/*		  name.  We accept any of !@#$%&?			*/
/*		keywords for program control parameters are		*/
/*			rows_are_stats					*/
/*			vars_must_be_in_object				*/
/*			distinct					*/
/*		keywords for comparison type parameters are		*/
/*			numeric						*/
/*			alpha						*/
/*			std_sta_AS					*/
/*		keywords are case insensitive (except that program	*/
/*		  control parameters in the environment must be in	*/
/*		  upper case)						*/
/*	    string for program control parameters is a logical value	*/
/*		1,Y,y,T,t,YES,yes,TRUE,true represent TRUE		*/
/*		0,N,n,F,f,NO,no,FALSE,false represent FALSE		*/
/*	    string for comparison type parameters is a list of JGOFS	*/
/*		variable names or a quantifier.				*/
/*		If a list, the character used to separate list elements	*/
/*		  can be any one of ,+-^*				*/
/*		If a list, all variables must be in object unless 	*/
/*		  program control parameter vars_must_be_in_object is	*/
/*		  set to FALSE (or program is compiled that way)	*/
/*		A quantifier is of the form YkeywordY, where Y comes	*/
/*		  from the set of special characters defined in		*/
/*		  parameter specifier.  The keyword is either "all" or	*/
/*		  "rest" (case insensitive)				*/
/*	Comparison types may appear multiple times.  Variable		*/
/*  names and quantifiers can appear only once				*/
/*  Example:								*/
/*	#numeric#=temp,sal,press;#alpha#=#rest#				*/
/*    This says "do numeric comparisons for the variables temp, sal and	*/
/*    press, and do alpha comparisons for all other variables"		*/
/*									*/
/*  Output: Statistics, presented as a JGOFS object.			*/
/*	    A perl function (calcstat.pl) exists which will take this	*/
/*		output and present it to the perl calling program as a	*/
/*		perl list which can be interpreted as a hash whose keys	*/
/*		are the statistic names and whose values are the	*/
/*		statistics for each variable				*/
/*									*/
/*	The "variables" (columns) of the output object can be either	*/
/*  the variables (columns) of the input object, with the rows being	*/
/*  the statistics, or vice versa.  The environment variable		*/
/*  ROWS_ARE_STATS controls this.  If unspecified, the compiled value	*/
/*  of this switch is used.  As released (v 3.0, WJS), the compiled	*/
/*  value is TRUE.							*/
/*	The statistics computed and their definitions are presented to	*/
/*  the program user as "JGOFS object comments".  To see them now, look	*/
/*  at the definition of the C variable "comments" in program text.	*/

/***********************/

#include "utils.h"
#include "jdbfuncdefns.h"

#include INNEROPTIONS

Logical get_logical_from_string();	/*  from utils			*/

  /*  Process compile-time defaults here.  Can be overridden @ runtime	*/
  /*  from either environment or parameter list				*/
#ifndef ROWS_ARE_STATS
#define ROWS_ARE_STATS TRUE
#endif
#if ! ((ROWS_ARE_STATS == TRUE) || (ROWS_ARE_STATS == FALSE))
#error "Illegal ROWS_ARE_STATS"
#endif
#define ENV_VAR_ROWS_ARE_STATS "ROWS_ARE_STATS"
Logical rows_are_stats = ROWS_ARE_STATS;

#ifndef CHECK_VARS
#define CHECK_VARS TRUE
#endif
#if ! ((CHECK_VARS == TRUE) || (CHECK_VARS == FALSE))
#error "Illegal CHECK_VARS"
#endif
#define ENV_VAR_CHECK_VARS "VARS_MUST_BE_IN_OBJECT"
Logical check_vars = CHECK_VARS;

#ifndef DO_DISTINCT
#define DO_DISTINCT TRUE
#endif
#if ! ((DO_DISTINCT == TRUE) || (DO_DISTINCT == FALSE))
#error "Illegal DO_DISTINCT"
#endif
#define ENV_VAR_DO_DISTINCT "DISTINCT"
Logical do_distinct=DO_DISTINCT;  
  /*  Try to get value of compilation switch into executable so we can	*/
  /*  do "strings" and find out which option was compiled in		*/
#if DO_DISTINCT
  char *distinct = "computing distinct counts enabled at compile time";
#else
  char *distinct = "computing distinct counts disabled at compile time";
#endif

/****/

#if DO_DISTINCT

char *comments = "\
Statistics returned include minima, maxima, counts, distinct counts,\n\
sums, and sums of squares\n\
	The numeric suffixes indicate how irregular data are treated\n\
Irregular data are of 2 types.  The first type is data explicitly marked\n\
as 'missing' in the input object (input object data whose values are the\n\
2-character string 'nd').  The second type is data whose characters do not\n\
decode into legal values.  This can be because of error, or because the data\n\
do not match the type expected (eg, a station 'J1' when station is numeric)\n\
	0	All data are included in the statistic.  Except for counts,\n\
		any irregular datum causes the corresponding statistic to\n\
		be given the value 'nd'\n\
	1	Data explictly marked as irregular are skipped.  Except for\n\
		counts, non-decodable values cause the corresponding\n\
		statistic to be given the value 'nd'.  'nd' is also assigned\n\
		if all data are irregular\n\
	2	All irregular data are skipped.  'nd' is assigned to non-\n\
		count statistics if all data are irregular\n\
As far as distinct counts are concerned, 1 is added to the appropriate\n\
counts if any explicitly irregular data are found (and explicitly irregular\n\
data are included in the count), and another 1 is added if any non-decodable\n\
data are found (and non-decodable data are included in the count).\n\
";
#define COUNT2 0
#define DISTINCT2 1
#define MIN2 2
#define MAX2 3
#define SUM2 4
#define SUM_SQUARES2 5
#define COUNT1 6
#define DISTINCT1 7
#define MIN1 8
#define MAX1 9
#define SUM1 10
#define SUM_SQUARES1 11
#define COUNT0 12
#define DISTINCT0 13
#define MIN0 14
#define MAX0 15
#define SUM0 16
#define SUM_SQUARES0 17
#define NSTATS 18		/*  Maintain = length of list above	*/

#else

char *comments = "\
Statistics returned include minima, maxima, counts, sums, and sums of squares\n\
	The numeric suffixes indicate how irregular data are treated\n\
Irregular data are of 2 types.  The first type is data explicitly marked\n\
as 'missing' in the input object (input object data whose values are the\n\
2-character string 'nd').  The second type is data whose characters do not\n\
decode into legal values.  This can be because of error, or because the data\n\
do not match the type expected (eg, a station 'J1' when station is numeric)\n\
	0	All data are included in the statistic.  Except for counts,\n\
		any irregular datum causes the corresponding statistic to\n\
		be given the value 'nd'\n\
	1	Data explictly marked as irregular are skipped.  Except for\n\
		counts, non-decodable values cause the corresponding\n\
		statistic to be given the value 'nd'.  'nd' is also assigned\n\
		if all data are irregular\n\
	2	All irregular data are skipped.  'nd' is assigned to non-\n\
		count statistics if all data are irregular\n\
";
#define COUNT2 0
#define MIN2 1
#define MAX2 2
#define SUM2 3
#define SUM_SQUARES2 4
#define COUNT1 5
#define MIN1 6
#define MAX1 7
#define SUM1 8
#define SUM_SQUARES1 9
#define COUNT0 10
#define MIN0 11
#define MAX0 12
#define SUM0 13
#define SUM_SQUARES0 14
#define NSTATS 15		/*  Maintain = length of list above	*/

#endif

/****/

int vnamesize=VARNAMESIZE+1;
int valuesize=DATUMSIZE+1+1;	/* "Extra" +1 for ALPH_SEP		*/

void error_();			/*  ... entry in outer			*/
Logical add_id_to_err();	/* in utils.c				*/

int handle = -1;		/*  Not sure why this needs = -1	*/

  /*  Things related to input object					*/
int nvars,maxlev;
char names[NVAR * (VARNAMESIZE+1)];
char *values;			/* values[nvars][DATUMSIZE+1]		*/
int *firstvarlevel;		/* firstvarlevel[maxlev]		*/
signed char *comparison_type;	/* comparison_type[nvars]		*/
#
#if DO_DISTINCT
struct value_list {
    /*  Use 1 of data_list or adata_list based on comparison_type	*/
    /*  Could have union-ed, but seems more trouble than worth		*/
  double *data_list;		/* data_list[n_distinct_vals]		*/
  char *adata_list;		/* ptr to string holding all 		*/
				/*  distinct alpha vals concatenated	*/
  int list_next;
  int list_size;
} *val_list;			/* val_list[nvars]			*/
#ifndef NUM_DATALIST_PER_MALLOC 
#define NUM_DATALIST_PER_MALLOC 200
#endif
#ifndef AV_SIZE_ADATUM 
#define AV_SIZE_ADATUM 15
#endif
#ifndef NUM_ADATALIST_PER_MALLOC 
#define NUM_ADATALIST_PER_MALLOC NUM_DATALIST_PER_MALLOC*AV_SIZE_ADATUM
#endif
#endif

  /*  Things related to stats						*/
Logical valid_data[NVAR][NSTATS];
double stats[NVAR][NSTATS];
char *astats[NVAR][NSTATS];	/*  Only need # alph stats and a subset	*/
				/*  of NSTATS, but making it NSTATS easier */
#define STATNAMESIZE 20
int snamesize = STATNAMESIZE+1;
char stat_names[NSTATS * (STATNAMESIZE+1)];
int stat_widths[NSTATS];
int stat_precisions[NSTATS];

#define VARNAME_FOR_STAT_ID "stat_name"
#define VARNAME_FOR_VARIABLE_ID "variable"

int stat_being_worked_on = -1;
int var_being_worked_on = -1;

int *out_widths,*out_attr_widths;

  /*  Stuff relating to parsing comparison selection list 		*/
  /*  Use some ndefs so user can override at compile time		*/
#define COMP_TYPE_UNINIT -1
#define COMP_TYPE_NUMERIC 0
#define COMP_TYPE_ALPHA 1
#define COMP_TYPE_STD_STA_AS 2

#ifndef DEFAULT_COMP_TYPE 
#define DEFAULT_COMP_TYPE COMP_TYPE_NUMERIC
#endif
#if ! ( (DEFAULT_COMP_TYPE == COMP_TYPE_NUMERIC) ||		\
	(DEFAULT_COMP_TYPE == COMP_TYPE_ALPHA)   ||		\
	(DEFAULT_COMP_TYPE == COMP_TYPE_STD_STA_AS) )
#error "Illegal DEFAULT_COMP_TYPE"
#endif

#define COMP_TYPE_NUMERIC_KEYWORD "numeric"
#define COMP_TYPE_ALPHA_KEYWORD "alpha"
#define COMP_TYPE_STD_STA_AS_KEYWORD "std_sta_AS"

#define COMP_QUANTIFIER_1 "all"
#define COMP_QUANTIFIER_2 "rest"

#define KEYWORD_LIST_SEPARATOR '='
#ifndef ILLEGAL_JGOF_CHARS 
#define ILLEGAL_JGOF_CHARS "!@#$%&?"
#endif
#ifndef LIST_SEPARATORS 
#define LIST_SEPARATORS ";/"
#endif
#ifndef VARNAME_SEPARATORS 
#define VARNAME_SEPARATORS ",+-^*"
#endif
#ifndef ALPH_SEP 
#define ALPH_SEP '\1'		/*  A non-zero, non-alpha character	*/
#endif

  /*  From max to min, order is N1 -> N11, M1, S15 -> S1, A		*/
  /*	(from Chris 11 Apr 2000).  					*/
#define NSTD_STAS_AS 28
#define MAXLEN_STD_STA_AS 3	/* Could runtime strlen() the vals...	*/
char *std_stas_AS[NSTD_STAS_AS] = {
  "A",
  "S1", "S2", "S3", "S4", "S5", "S6", "S7", "S8", "S9", "S10","S11",
        "S12","S13","S14","S15",
  "M1",
  "N11","N10","N9", "N8", "N7", "N6", "N5", "N4", "N3", "N2", "N1"
};

/************************************************************************/

char *stats_return_vers()
/*  Dummy routine.  Exists only to force .h file version string into	*/
/*  this module.  Note string must not be global or we'll have con-	*/
/*  flicts if another routine similarly includes the version string	*/
{
  static char version[] = 
	STAT_VERSION"/"FULL_UTILSH_VERSION"/"FULL_JDBFUNCDEFNSH_VERSION;
  return version;
}

void err(s,t)
char *s,*t;
{
  char *ss,*tt;
  add_id_to_err(&ss,&tt,s,t,STAT_VERSION);
  error_(ss,tt);
  return;		/*  Not that it should ever get here...		*/
}

void *getmem(bufname,nbytes)
char *bufname;
size_t nbytes;
{
#define SIZERRBUF 150
  static char errbuf[SIZERRBUF]; /*  Deliberately static and fixed size	*/
				 /*   since if we need it we're having	*/
				 /*     trouble allocating memory	*/
  static char *piece1 = "Could not get ";
  static char *piece2 = " bytes memory for '";
  static char *piece3 = "' buffer/structure(s)";

  int errmsglen;
  double *ptr;	/* Chosen for "most restrictive alignment" purposes	*/
		/* Presumably, this is system-dependent, but I don't	*/
		/* know where the appropriate defn is			*/

  ptr = (double *) malloc(nbytes);

  if (ptr == NULL) {
      /*  Length is that of 3 pieces of fixed text (above) + width of	*/
      /*  args passed here + 1 for '\0'					*/
    errmsglen = strlen(piece1) + strlen(piece2) + strlen(piece3);
    errmsglen += log10((double)nbytes) + 1;
    errmsglen += strlen(bufname) + 1;

    if (errmsglen <= SIZERRBUF) {
      sprintf(errbuf,"Could not get %d bytes memory for '%s' buffer\n",
				nbytes,  	bufname		  );
      err(errbuf,"");
    } else
	/*  Calling err not as insane as it might seem.  Might be	*/
	/*  possible to get memory for error message even if allocation	*/
	/*  for buffer failed						*/
      err("Could not get memory for buffer/structure ",bufname);
  }

  return (void *)ptr;
#undef SIZERRBUF
}

void init_stuff()
{
  int i,j;
  char *ptr;

/*  Do some sanity checks on compile-time assumptions, vals, etc	*/

  if (isalnum(ALPH_SEP))
    err ("Internal error - ALPH_SEP must not be alphanumeric","");
  if (strpbrk(ILLEGAL_JGOF_CHARS,LIST_SEPARATORS) != NULL) err (
    "Internal error - ILLEGAL_JGOF_CHARS & LIST_SEPARATORS must be disjoint",
						"");
  if (strpbrk(ILLEGAL_JGOF_CHARS,VARNAME_SEPARATORS) != NULL) err (
    "Internal error - ILLEGAL_JGOF_CHARS & VARNAME_SEPARATORS must be disjoint",
						"");
  if (strpbrk(VARNAME_SEPARATORS,LIST_SEPARATORS) != NULL) err (
    "Internal error - VARNAME_SEPARATORS & LIST_SEPARATORS must be disjoint",
						"");
  if (  (strchr(ILLEGAL_JGOF_CHARS,KEYWORD_LIST_SEPARATOR) != NULL) ||
	(strchr(LIST_SEPARATORS,KEYWORD_LIST_SEPARATOR) != NULL)    ||
	(strchr(VARNAME_SEPARATORS,KEYWORD_LIST_SEPARATOR) != NULL)	)
    err ("Internal error - KEYWORD_LIST_SEPARATOR may not appear as special\
 character on other lists","");

  if (  (ptr = getenv(ENV_VAR_ROWS_ARE_STATS)) != NULL  )
      rows_are_stats = get_logical_from_string
				(ptr,"getting env var ",ENV_VAR_ROWS_ARE_STATS);
  if (  (ptr = getenv(ENV_VAR_CHECK_VARS)) != NULL  )
      check_vars = get_logical_from_string
				(ptr,"getting env var ",ENV_VAR_CHECK_VARS);
  if (  (ptr = getenv(ENV_VAR_DO_DISTINCT)) != NULL  )
      do_distinct = get_logical_from_string
				(ptr,"getting env var ",ENV_VAR_DO_DISTINCT);


    /*  Do next stuff at run time instead of compile time to make it	*/
    /*  easier to alter order of statistics				*/

    /*  Init the values that the VARNAME_FOR_STAT_ID defn will take	*/
    /*  I give up on checking if these constants will fit in their bufs	*/
    /*    STATNAMESIZE is what they all should be no bigger than	*/
  strcpy (  stat_names + COUNT0*snamesize,  "count0" );
  strcpy (  stat_names + COUNT1*snamesize,  "count1" );
  strcpy (  stat_names + COUNT2*snamesize,  "count2" );
  strcpy (  stat_names + MIN0*snamesize,  "min0" );
  strcpy (  stat_names + MIN1*snamesize,  "min1" );
  strcpy (  stat_names + MIN2*snamesize,  "min2" );
  strcpy (  stat_names + MAX0*snamesize,  "max0" );
  strcpy (  stat_names + MAX1*snamesize,  "max1" );
  strcpy (  stat_names + MAX2*snamesize,  "max2" );
  strcpy (  stat_names + SUM0*snamesize,  "sum0" );
  strcpy (  stat_names + SUM1*snamesize,  "sum1" );
  strcpy (  stat_names + SUM2*snamesize,  "sum2" );
  strcpy (  stat_names + SUM_SQUARES0*snamesize,  "sum_squares0" );
  strcpy (  stat_names + SUM_SQUARES1*snamesize,  "sum_squares1" );
  strcpy (  stat_names + SUM_SQUARES2*snamesize,  "sum_squares2" );
#if DO_DISTINCT
  strcpy (  stat_names + DISTINCT0*snamesize,  "distinct0" );
  strcpy (  stat_names + DISTINCT1*snamesize,  "distinct1" );
  strcpy (  stat_names + DISTINCT2*snamesize,  "distinct2" );
#endif

  return;
}

int get_std_sta_AS(sta)
char *sta;
  /*  Return index into std_stas_AS list.  If not in list, return -1	*/
{
  int i;

  for (i = 0; i < NSTD_STAS_AS; i++)
    if (strcmp(sta,std_stas_AS[i]) == 0) break;

  return (i == NSTD_STAS_AS) ? -1 : i;
}

#if DO_DISTINCT
Logical new_distinct_val(val,list_struct)
double val;
struct value_list *list_struct;
  /*  See if val is in list found within list_struct.  If so, return	*/
  /*  FALSE.  If not, add val to that list, then return TRUE		*/
{
  int i;
  double *dptr;			/*  Just for convenience		*/

  dptr = list_struct->data_list;

  for (i=0;  i < list_struct->list_next;  i++)
    if (val == dptr[i]) return FALSE;

  if (list_struct->list_next == list_struct->list_size) {
    list_struct->list_size += NUM_DATALIST_PER_MALLOC;
    dptr = (double *) realloc (dptr ,  list_struct->list_size * sizeof(double));
    if (dptr == NULL)
      err("Could not get memory for '.data_list' buffer","");
    list_struct->data_list = dptr;
  }

  dptr[(list_struct->list_next)++] = val;

  return TRUE;
}

Logical alph_new_distinct_val(val,list_struct)
char *val;
struct value_list *list_struct;
  /*  alpha version of new_distinct_val					*/
{
  int len;

  len = strlen(val);

/*  Use ALPH_SEP so whole string can be searched w/1 strstr call.	*/
/*  Hypothesis is that this more efficient than loop of many strcmp's	*/
/*  strstr tests more chars; loop does more function calls.  strstr	*/
/*  also requires single string to contain all alph data (65767 char	*/
/*  limit on VMS strings?).  Recode if necessary...			*/
  val[len++] = ALPH_SEP;
  val[len] = '\0';

  if (strstr(list_struct->adata_list,val) == NULL) {

/*    Not on list, so new unique value.					*/
    if (  (list_struct->list_next) + len > list_struct->list_size  ) {
/*	Need more memory						*/
      list_struct->list_size += NUM_ADATALIST_PER_MALLOC;
      list_struct->adata_list = (char *) realloc (
					list_struct->adata_list,
					list_struct->list_size);
      if (list_struct->adata_list == NULL)
        err("Could not get memory for '.adata_list' buffer","");
    }

/*      Add new val to list						*/
    strcpy ( (list_struct->adata_list) + (list_struct->list_next),  val );
    list_struct->list_next += len;

    val[--len] = '\0';
    return TRUE;


  } else {


/*    On list, so not a unique value.					*/
    val[--len] = '\0';
    return FALSE;

  }
}
#endif

void set_widths (nrows,ncols,
		 row_headers,row_header_maxsize,
		 data_col_headers,data_col_header_maxsize,
		 row_headers_col_header,
		 data_col_widths_array,data_col_widths_array_size)
  /*  Set up out_widths array (and a copy, out_attr_widths)		*/
  /*    2 parts.  1st, set up the width of the first column, which	*/
  /*    is the max of the name of that column and all the values in	*/
  /*    that column.  2nd, the rest of the widths are either constant	*/
  /*    or a copy of an input array, maximized w/the name of the column	*/
int nrows;		/*  Not counting title row			*/
int ncols;		/*  Not counting id column			*/
char *row_headers,*data_col_headers,*row_headers_col_header;
int row_header_maxsize,data_col_header_maxsize;
int data_col_widths_array[];
int data_col_widths_array_size;
{
  int i,j,k;
  int ptr;

  if ( ! (
	  (data_col_widths_array_size == 1) ||
	  (data_col_widths_array_size == ncols)
         )
     )
    errn("Internal error: bad data_col_widths_array_size to set_widths. Size=",
					data_col_widths_array_size);

  out_widths = (int *) getmem (  "out_widths",
			         (size_t)((ncols + 1) * sizeof(int))  );
  out_attr_widths = (int *) getmem (  "out_attr_widths",
			         (size_t)((ncols + 1) * sizeof(int))  );

    /*  1st col - take biggest row header unless col header for the row	*/
    /*  headers is bigger						*/
  k = strlen(row_headers_col_header);
  for (i = 0; i < nrows; i++) {
    j = strlen(row_headers + i*row_header_maxsize);
    if (j > k)  k = j;
  }
  out_attr_widths[0] = out_widths[0] = k;

    /*  Other cols - take max of data width w/ col header width		*/
  for (i = 1; i <= ncols; i++) {
    j = strlen(data_col_headers + i*data_col_header_maxsize);
    k = (data_col_widths_array_size == 1) ?
			data_col_widths_array[0] : data_col_widths_array[i-1];
    out_attr_widths[i] = out_widths[i] = (j > k) ? j : k;
  }
      
  return;
}

void set_stat_widths()
/*  Set the output width of each statistic.  Used to set the JGOFS	*/
/*  object width= attribute; not so simple because that object can	*/
/*  be presented w/stats as rows or columns...				*/
{
  int i,j,k,max;
  int non_precision_width_chars;
  int numeric_single_width,numeric_single_digits_of_precision;

    /*  Want enough precision in output so that if people use sums of	*/
    /*  squares, etc, stats will be valid.  Decided to use single float	*/
    /*  precision for max & min; double for sums and sums_of_squares	*/
    /*  Counts get single kind of by default.  Constants come from 	*/
    /*  float.h								*/
    /*    Temporarily (?) increase precision so that 8 digit event	*/
    /*  numbers don't print in E format.  Using 9 because I'm confused	*/
    /*  about FLT_DIG vs printf precision and I don't want to edit	*/
    /*  this twice.  Check vs DBL_DIG is sanity...  assume DBL_DIG >	*/
    /*  FLT_DIG!!!							*/
  numeric_single_digits_of_precision = (9 > FLT_DIG) ? 9 : FLT_DIG;
  if (numeric_single_digits_of_precision  >= DBL_DIG)
    			numeric_single_digits_of_precision  = DBL_DIG - 1;

    /* "+N.", "E+", max width of exponent				*/
  non_precision_width_chars = 3 + 2 + (1 + log10(log10(DBL_MAX)));
  numeric_single_width =
		numeric_single_digits_of_precision  + non_precision_width_chars;

  for (i=0; i<NSTATS; i++)
    switch (i) {
      case SUM0:          case SUM1:          case SUM2:
      case SUM_SQUARES0:  case SUM_SQUARES1:  case SUM_SQUARES2:
	  /*  Not sure why, but seemed to get lots of ".NNN9999"s w/o	*/
	  /*  -1 in following line.  Perhaps it's because when using	*/
	  /*  precision in printf, there is a significant digit before	*/
	  /*  the .							*/
	stat_precisions[i] = DBL_DIG - 1;
	stat_widths[i] = DBL_DIG - 1 + non_precision_width_chars;
	break;
      case MIN0:  case MIN1:  case MIN2:
      case MAX0:  case MAX1:  case MAX2:
	stat_precisions[i] = numeric_single_digits_of_precision;
	max = 0;
	for (j = 0; j < nvars; j++)
	  if (comparison_type[j] == COMP_TYPE_ALPHA) {
	    k = strlen(astats[j][i]);
	    if (max < k) max = k;
	  } else if (comparison_type[j] == COMP_TYPE_STD_STA_AS) {
	      /*  Could strlen(std_sta_AS((int)stats[j][i]) but		*/
	      /*  usefulness of this is already moot since MAXLEN..=3	*/
	    if (max < MAXLEN_STD_STA_AS) max = MAXLEN_STD_STA_AS;
	  } else if (comparison_type[j] == COMP_TYPE_NUMERIC) {
	    if (max < numeric_single_width) max = numeric_single_width;
	  } else
	    errn
	("Internal error - set_stat_widths: no code to handle comparison type",
						comparison_type[j]);
	stat_widths[i] = max;
	break;
      default:
	stat_precisions[i] = numeric_single_digits_of_precision;
	stat_widths[i] = numeric_single_width;
	break;
    }

  return;
}

Logical stats_param(source)
char *source;
/*  Return truth of the hypothesis "source is a 'stats parameter'".	*/
/*  Algorithm: stats param if source begins XstringY[=], Xstring[=], or	*/
/*    stringY[=], where X & Y are members of "our" character set.  Only	*/
/*    XstringX= is legal, but assuming others were supposed to be ours,	*/
/*    will get better error message if we assume they ARE ours.		*/
{
  char *ptr;

    /*  TRUE if first char is "one of ours"				*/
  if (strchr(ILLEGAL_JGOF_CHARS,*source) != NULL) return TRUE;

    /*  Get one past end of keyword string (or whole string)		*/
  if (  (ptr = strchr(source,'=')) == NULL  )  ptr = source + strlen(source);

    /*  TRUE if last char is "one of ours"				*/
  return (strchr(ILLEGAL_JGOF_CHARS,*(--ptr)) != NULL);
}

void p_stats_err(msg,param)
char *msg,*param;
{
  char *errbuf;
  char *piece1 = "Problem parsing parameter ->";
  char *piece2 = "<-\nExpected 'Keyword=list' format.  Problem: ";

  errbuf = (char *) getmem ( "p_stats_err",
    (size_t)(strlen(piece1) + strlen(piece2) + strlen(msg)+ strlen(param) + 1)
			   );
  sprintf (errbuf,"%s%s%s%s\n",piece1,param,piece2,msg);
  err (errbuf,"");

    /*  Not that we're coming back here, but...				*/
  free (errbuf);
  return;
}

void process_stats_param(default_type,type_list,source)
char *default_type,*source;
signed char *type_list;
/*  Source contains a list of variables to be assigned a certain comp-	*/
/*  arison type, or the instruction that all unspecified variables	*/
/*  shall have a certain type.  Extract that info.			*/
{
  char *ptr1,*ptr2,*startptr,*endptr,*strtok_tmp;
  signed char type;
  int i,j;

  strtok_tmp = startptr = (char *) getmem ("Copy of parameter",strlen(source));
  strcpy(startptr,source);
  while (  (ptr1 = strtok(strtok_tmp,LIST_SEPARATORS)) != NULL  ) {
    strtok_tmp = NULL;
    if (  (ptr2 = strchr(ptr1,KEYWORD_LIST_SEPARATOR)) == NULL  )
      p_stats_err("missing the =",source);
    if ((ptr2 - ptr1) < 3) p_stats_err
	("0 length keyword (after accounting for special chars, if any)",
					source);
      /*  Char before = needs to be "one of ours"			*/
    if (strchr(ILLEGAL_JGOF_CHARS,*(--ptr2)) == NULL)
      p_stats_err("last char of keyword must be 'special' to statisticker",
									source);
      /*   ... and must match the first char				*/
      /*  While at it, advance ptr1 beyond special char			*/
    if (  *ptr1++ != *ptr2  )
      p_stats_err("1st char of keyword not same special char as last",source);
      /*  Terminate keyword before special char while advancing ptr2	*/
      /*  to point to =							*/
    *ptr2++ = '\0';
      /*  Advance ptr 2 to first char of list, which must be non-empty	*/
    if ( ++ptr2 == '\0' ) p_stats_err ("0 length list",source);

      /*  Decode keyword string						*/
      /*    1st 3 cases are T/F keywords, so do the T/F work and then	*/
      /*    skip to end of loop (lazy coding...)			*/
    if (strcasecmp(ptr1,ENV_VAR_ROWS_ARE_STATS) == 0)  {
      rows_are_stats = get_logical_from_string(ptr2,"getting param ",ptr1);
      continue;
    }
    if (strcasecmp(ptr1,ENV_VAR_CHECK_VARS) == 0) {
      check_vars = get_logical_from_string(ptr2,"getting param ",ptr1);
      continue;
    }
    if (strcasecmp(ptr1,ENV_VAR_DO_DISTINCT) == 0) {
      do_distinct = get_logical_from_string(ptr2,"getting param ",ptr1);
      continue;
    }

    if (strcasecmp(ptr1,COMP_TYPE_NUMERIC_KEYWORD) == 0)
      type = COMP_TYPE_NUMERIC;
    else if (strcasecmp(ptr1,COMP_TYPE_ALPHA_KEYWORD) == 0)
      type = COMP_TYPE_ALPHA;
    else if (strcasecmp(ptr1,COMP_TYPE_STD_STA_AS_KEYWORD) == 0)
      type = COMP_TYPE_STD_STA_AS;
    else
      p_stats_err("Illegal keyword",source);

      /*  Logic splits if variable list is a quantifier.		*/
    i = strlen(ptr2);
    if (strchr(ILLEGAL_JGOF_CHARS,*ptr2) == NULL) {
	/*  List - do our own strtok since we're already "in" one	*/
      endptr = ptr2 + i;
      while (TRUE) {
        i = strcspn(ptr2,VARNAME_SEPARATORS);
        *(ptr2 + i) = '\0';
	for (j = 0; j < nvars; j++)    
	  if (strcmp( ptr2,  names + j*vnamesize  ) == 0) break;
	if (j == nvars) {
	  if (check_vars) p_stats_err("Var name in list not in object",source);
        } else {
	    /*  Can't spec 2 different vals for 1 var.  The "else if"	*/
	    /*   contradicts the doc but doesn't cost anything		*/
	  if (type_list[j] == COMP_TYPE_UNINIT) type_list[j] = type;
	  else if (type_list[j] != type) p_stats_err
	    ("Duplicate definition for at least 1 var name in list",source);
	}
        if ((ptr2 + i) == endptr) break;
	ptr2 += i+1;
      }
    } else {
	/*  Quantifier							*/
      if (--i == 0) p_stats_err
	("0 length 'quantifier' list item after initial special char",source);
      if (  *(ptr2 + i) != *ptr2  ) p_stats_err
	("last char of 'quantifier' list item must match first",source);
      *(ptr2 + i) = '\0';
      *ptr2++;
	/*  "all" and "rest" are really the same;  "all" = "rest, but	*/
	/*  you didn't enumerate any".  I suppose the latter is an	*/
	/*  an error, but we don't treat it that way			*/
      if (  (strcasecmp(COMP_QUANTIFIER_1,ptr2) == 0) &&
	    (strcasecmp(COMP_QUANTIFIER_2,ptr2) == 0)     )
	p_stats_err("Illegal 'quantifier' list item",source);
	/*  Can't spec 2 different defaults				*/
	/*  The "else if" contradicts the doc but doesn't cost anything	*/
      if (*default_type == COMP_TYPE_UNINIT) *default_type = type;
      else if (*default_type != type) p_stats_err
 	("Duplicate definition for 'quantifier' list item ",source);
    }
  }

  free (startptr);
  return;
}

int ioopen_(params,nparams,ntotal)
char *params[];
int *nparams,*ntotal;
/*
    s[0..nparams-1]: parameter strings. Inner sets s[j][0]=0 for any 
    strings which it processes; others will be processed by outer. Thus
    selection/projections would normally be ignored by inner.
    nparams: number of parameter strings
    ntotal (returned): total number of variable names */
{
  double df;
  int i,j,k,lev;
  signed char default_comp_type;
  char *ptr,*end_ptr;
  Logical non_decodable,explicit_invalid;
  Logical is_a_count,is_a_distinct_count;

  init_stuff();

  if (params[0] == NULL) err("Missing input object name","");
  if (strlen(params[0]) == 0)
		err ("No characters in input object name","");

  nvars = -NVAR;
  maxlev = jdbopen_(&handle,params[0],names,&vnamesize,&nvars);
  if (maxlev < 0) err("jdbopen problem trying to open object ",params[0]);
  *params[0] = '\0';

    /*  Get comparison type for each variable.  Check each parameter to	*/
    /*  see if it's of a stats format.  If so, extract its info (either	*/
    /*  info about a specific variable or info about the default for	*/
    /*  all unspecified variables) and mark the parameter "used" so	*/
    /*  outer doesn't think it's a selection/projection.  Finally,	*/
    /*  initialize type for each variable not specifically mentioned	*/
    /*  to be the default (either specified, or a "default default"	*/
    /*  compiled into the statisticker).				*/
  comparison_type = (signed char *) getmem ("comparison_type",(size_t)nvars);
  default_comp_type = COMP_TYPE_UNINIT;
  for (i = 0; i < nvars; i++)
    comparison_type[i] = COMP_TYPE_UNINIT;
  for (i = 1; i < *nparams; i++) {
    if (stats_param(params[i])) {
      process_stats_param(&default_comp_type,comparison_type,params[i]);
      *params[i] = '\0';
    }
  }
  if (do_distinct && ! DO_DISTINCT) 
    err("Distinct count statistic desired but this program compiled ",
	 "without that capability");
  if (default_comp_type == COMP_TYPE_UNINIT)
				default_comp_type = DEFAULT_COMP_TYPE;
  for (i = 0; i < nvars; i++)
    if (comparison_type[i] == COMP_TYPE_UNINIT)
				comparison_type[i] = default_comp_type;

    /*  Generate "level splits" for this object				*/
  firstvarlevel = (int *)  getmem("firstvarlevel",  
				  (size_t)((maxlev+1) * sizeof(int))  );
    /*    Init								*/
  for (i = 0;  i <= maxlev;  i++)
    firstvarlevel[i] = 0;
    /*    Compute per-level totals					*/
  for (i = 0;  i < nvars;  i++)
    ++ firstvarlevel [ 1 + jdblevel_(&handle,&i) ];
    /*    Generate splits						*/
  for (i = 1;  i <= maxlev;  i++)
    firstvarlevel[i] += firstvarlevel[i-1] ;

#if DO_DISTINCT
  if (do_distinct) {
      /*  Create one value_list per variable				*/
    val_list =
     (struct value_list *) getmem ("val_list", 
				 (size_t)(nvars * sizeof(struct value_list)));
    for (i = 0; i < nvars; i++) {
      val_list[i].data_list = NULL;
      val_list[i].adata_list = (char *) getmem("initial .adata_list",1);
      *val_list[i].adata_list = '\0';
      val_list[i].list_next = 0;
      val_list[i].list_size = 0;
    }
  }
#endif

    /*  Size data buffer big enough to hold a "line" of input data	*/
  values = (char *) getmem ("values",(size_t)(nvars*valuesize));

  for (i = 0; i < nvars; i++) {
    for (j = 0; j < NSTATS; j++) {
      switch (j) {
	case COUNT0:  case COUNT1:  case COUNT2:
	  valid_data[i][j] = TRUE;
	  stats[i][j] = 0;
	  break;
	case MIN0:  case MIN1:  case MIN2:
	  valid_data[i][j] = TRUE;
	  if (comparison_type[i] == COMP_TYPE_ALPHA) {
	    astats[i][j] = (char *) getmem ("astats min",(size_t)valuesize);
	      /*  CHAR_MAX takes care of difference between machines	*/
	      /*  where char is signed and machines where it's unsigned	*/
	    for (k = 0; k < DATUMSIZE; k++)
	      astats[i][j][k] = CHAR_MAX;
	    astats[i][j][DATUMSIZE] = '\0';
	  } else
	    stats[i][j] = DBL_MAX;
	  break;
	case MAX0:  case MAX1:  case MAX2:
	  valid_data[i][j] = TRUE;
	  if (comparison_type[i] == COMP_TYPE_ALPHA) {
	    astats[i][j] = (char *) getmem ("astats max",(size_t)valuesize);
	      /*  Do not need CHAR_MIN here.  The C standard guarantees	*/
	      /*  that "any character in the machine's standard char-	*/
	      /*  acter set will never be negative" (K&R 1978).  There-	*/
	      /*  fore, empty string is fine as a minimum.  (Of course,	*/
	      /*  this assumes that the empty string has value 0!)	*/
	    astats[i][j][0] = '\0';
	  } else
	      /*  Do not use DBL_MIN here! RTFM!  More interesting is	*/
	      /*  whether -DBL_MAX is  a) guaranteed to exist & b) the	*/
	      /*  smallest double float.  At least 1 book implies that	*/
	      /*  C expects floating point representation to have a	*/
	      /*  sign bit, in which case a & b are true		*/
	    stats[i][j] = -DBL_MAX;
	  break;
	case SUM0:  case SUM1:  case SUM2:
	  valid_data[i][j] = (comparison_type[i] == COMP_TYPE_NUMERIC);
	  stats[i][j] = 0;    /*  Not needed if not numeric comp type	*/
	  break;
	case SUM_SQUARES0:  case SUM_SQUARES1:  case SUM_SQUARES2:
	  valid_data[i][j] = (comparison_type[i] == COMP_TYPE_NUMERIC);
	  stats[i][j] = 0;    /*  Not needed if not numeric comp type	*/
	  break;
#if DO_DISTINCT
	case DISTINCT0:  case DISTINCT1:  case DISTINCT2:
	  if (do_distinct) {
	    valid_data[i][j] = TRUE;
	    stats[i][j] = 0;
	  }
	  break;
#endif
	default:
	  errn("Internal error: unaccounted-for stat type ",j);
	  break;
      }
    }
  }

  while (   (lev = jdbreada_(&handle,values,&valuesize))  >=  0   ) {
    for (i = firstvarlevel[lev]; i < nvars; i++) {
      ptr = values + i*valuesize;
      explicit_invalid = (strcmp("nd",ptr) == 0);
      if (explicit_invalid)
	  /*  non_decodable not used if explicit_invalid, but be complete */
	non_decodable = TRUE;
      else {
	switch (comparison_type[i]) {
	  case COMP_TYPE_NUMERIC:
	    df = strtod(ptr,&end_ptr);
	    non_decodable = (*end_ptr != '\0');
	    break;
	  case COMP_TYPE_ALPHA:
	    non_decodable = FALSE;
	    break;
	  case COMP_TYPE_STD_STA_AS:
	    df = (double) get_std_sta_AS(ptr);
	    non_decodable = (df < 0);
	    break;
	  default:
	    errn("Internal error: no code to handle comparison type",
							comparison_type[i]);
	    break;
	}
      }
      stats[i][COUNT0]++;
      if (explicit_invalid) {
#if DO_DISTINCT
	  /*  Use valid_data flag for DISTINCT0 to indicate		*/
	  /*  "at least one 'nd' occurred"				*/
	if (do_distinct) valid_data[i][DISTINCT0] = FALSE;
#endif
	valid_data[i][MIN0] = FALSE;
	valid_data[i][MAX0] = FALSE;
	valid_data[i][SUM0] = FALSE;
	valid_data[i][SUM_SQUARES0] = FALSE;
      } else {
	stats[i][COUNT1]++;
	if (non_decodable) {
#if DO_DISTINCT
	    /*  Use valid_data flag for DISTINCT1 to indicate		*/
	    /*  "at least one non-decodable occurred"			*/
	  if (do_distinct) valid_data[i][DISTINCT0] = FALSE;
#endif
	  valid_data[i][MIN0] = FALSE;
	  valid_data[i][MAX0] = FALSE;
	  valid_data[i][SUM0] = FALSE;
	  valid_data[i][SUM_SQUARES0] = FALSE;
	  valid_data[i][MIN1] = FALSE;
	  valid_data[i][MAX1] = FALSE;
	  valid_data[i][SUM1] = FALSE;
	  valid_data[i][SUM_SQUARES1] = FALSE;
	} else {
	  stats[i][COUNT2]++;
#if DO_DISTINCT
	  if (do_distinct) {
	    switch (comparison_type[i]) {
	      case COMP_TYPE_NUMERIC:
	        if (new_distinct_val(df,&val_list[i])) stats[i][DISTINCT2]++;
	        break;
	      case COMP_TYPE_ALPHA:
	      case COMP_TYPE_STD_STA_AS:
	        if (alph_new_distinct_val(ptr,&val_list[i]))
						     stats[i][DISTINCT2]++;
	        break;
	      default:
	        errn("Internal error: no code to handle comparison type ",
							comparison_type[i]) ;
	        break;
	    }
	  }
#endif
	  switch (comparison_type[i]) {
	    case COMP_TYPE_STD_STA_AS:
	    case COMP_TYPE_NUMERIC:
	      if (valid_data[i][MIN0])
	        if (stats[i][MIN0] > df) stats[i][MIN0] = df;
	      if (valid_data[i][MIN1])
	        if (stats[i][MIN1] > df) stats[i][MIN1] = df;
	      if (valid_data[i][MIN2])
	        if (stats[i][MIN2] > df) stats[i][MIN2] = df;
	      if (valid_data[i][MAX0])
	        if (stats[i][MAX0] < df) stats[i][MAX0] = df;
	      if (valid_data[i][MAX1])
	        if (stats[i][MAX1] < df) stats[i][MAX1] = df;
	      if (valid_data[i][MAX2])
	        if (stats[i][MAX2] < df) stats[i][MAX2] = df;
	      if (valid_data[i][SUM0]) stats[i][SUM0] += df;
	      if (valid_data[i][SUM1]) stats[i][SUM1] += df;
	      if (valid_data[i][SUM2]) stats[i][SUM2] += df;
	      df *= df;
	      if (valid_data[i][SUM_SQUARES0]) stats[i][SUM_SQUARES0] += df;
	      if (valid_data[i][SUM_SQUARES1]) stats[i][SUM_SQUARES1] += df;
	      if (valid_data[i][SUM_SQUARES2]) stats[i][SUM_SQUARES2] += df;
	      break;
	    case COMP_TYPE_ALPHA:
	      if (valid_data[i][MIN0])
	        if (strcmp(astats[i][MIN0],ptr) >0) strcpy(astats[i][MIN0],ptr);
	      if (valid_data[i][MIN1])
	        if (strcmp(astats[i][MIN1],ptr) >0) strcpy(astats[i][MIN1],ptr);
	      if (valid_data[i][MIN2])
	        if (strcmp(astats[i][MIN2],ptr) >0) strcpy(astats[i][MIN2],ptr);
	      if (valid_data[i][MAX0])
	        if (strcmp(astats[i][MAX0],ptr) <0) strcpy(astats[i][MAX0],ptr);
	      if (valid_data[i][MAX1])
	        if (strcmp(astats[i][MAX1],ptr) <0) strcpy(astats[i][MAX1],ptr);
	      if (valid_data[i][MAX2])
	        if (strcmp(astats[i][MAX2],ptr) <0) strcpy(astats[i][MAX2],ptr);
	      break;
	    default:
	      errn("Internal error: no code to handle comparison type",
							comparison_type[i]);
	      break;
	  }
	}
      }
    }
  }
#if DO_DISTINCT
  if (do_distinct) {
      /*  Incorporate irregular data into DISTINCT vals			*/
      /*  There are at least as many distinct vals as "real" distinct	*/
      /*  vals.  If we're disregarding nd's but we found non-decodables	*/
      /*  add 1 to that count.  If we're counting both nd's AND non-	*/
      /*  decodables, add 1 for each that we found			*/
    for (i = 0; i < nvars; i++) {
      stats[i][DISTINCT0] = stats[i][DISTINCT2];
      stats[i][DISTINCT1] = stats[i][DISTINCT2];
      if ( ! valid_data[i][DISTINCT0])
        stats[i][DISTINCT0]++;  
      if ( ! valid_data[i][DISTINCT1]) {
        stats[i][DISTINCT0]++;  
        stats[i][DISTINCT1]++;  
      }
        /*  Restore valid_data flags to their real purpose.		*/
      valid_data[i][DISTINCT0] = TRUE;
      valid_data[i][DISTINCT1] = TRUE;
    }
  }
#endif
  for (i = 0; i < nvars; i++)
      /*  COUNT2=0 means nothing decodable was ever found		*/
    if (stats[i][COUNT2] == 0)
      for (j = 0; j < NSTATS; j++) {
	is_a_count = ((j == COUNT0) || (j == COUNT1) || (j == COUNT2));
#if DO_DISTINCT
	if (do_distinct) {
	  is_a_distinct_count =
		((j == DISTINCT0) || (j == DISTINCT1) || (j == DISTINCT2));
	  is_a_count = (is_a_count || is_a_distinct_count);
	}
#endif
	if ( ! is_a_count ) valid_data[i][j] = FALSE;
      }

    /*  Set widths for display						*/
  set_stat_widths();
  if (rows_are_stats) {
    j = stat_widths[0];
    for (i=1; i<NSTATS; i++)
      if (j < stat_widths[i]) j = stat_widths[i];
    set_widths (NSTATS,nvars,
		stat_names,snamesize,
		names,vnamesize,
		VARNAME_FOR_STAT_ID,
		&j,1);
  } else
    set_widths (nvars,NSTATS,
		names,vnamesize,
		stat_names,snamesize,
		VARNAME_FOR_VARIABLE_ID,
		stat_widths,NSTATS);

    /*  +1 for "id" variable						*/
  *ntotal = (rows_are_stats) ? nvars+1 : NSTATS+1;

  return 0;  /* stat object has only 1 level.  This is NOT input obj level */
}

int ioreadrec_(level)
int *level;
/*
    Read record at appropriate level. Return 0 if end at that level. Return
    1 if ok. 
*/
{
  if (rows_are_stats)  return  (++stat_being_worked_on == NSTATS) ? 0 : 1;
  else  return  (++var_being_worked_on == nvars) ? 0 : 1;
}

void ioclose_()
/* Close files */
{
  jdbclose_(&handle);
}

void iovalstr_(vn,tmp)
int *vn;
char *tmp;
/*
    Return string value (tmp) for variable indexed by vn. 
*/
{
  int i,var,stat;
  char *row_id;

  if (*vn == 0) {
    row_id = (rows_are_stats) ?
	stat_names + stat_being_worked_on*snamesize :
	names + var_being_worked_on*vnamesize;
    strcpy (tmp, row_id);
  } else {
    stat = (rows_are_stats) ? stat_being_worked_on : *vn - 1;
    var = (rows_are_stats) ? *vn - 1 : var_being_worked_on;
    if (valid_data[var][stat])
	/*  Need to select stats for which there might be non-numeric	*/
	/*  vals.  Then, within that, check which vars are non-numeric	*/
	/*  Would be nice if we could nest switch statements...		*/
      if (  (stat == MAX0) || (stat == MAX1) || (stat == MAX2) ||
	    (stat == MIN0) || (stat == MIN1) || (stat == MIN2)    )
	  switch (comparison_type[var]) {
	    case COMP_TYPE_NUMERIC:
	      sprintf(tmp, "%.*g", stat_precisions[stat], stats[var][stat]);
	      break;
	    case COMP_TYPE_ALPHA:
	      strcpy (tmp,astats[var][stat]);
	      break;
	    case COMP_TYPE_STD_STA_AS:
		/*  Hopefully conversion from int to double & back,	*/
		/*  along w/ the .1 slop, will be OK 			*/
	      i = (int) (stats[var][stat] + .1);
	      strcpy (tmp,std_stas_AS[i]);
	      break;
	    default:
	      errn
	        ("Internal error - iovalstr: no code to handle comparison type",
							comparison_type[var]); 
	      break;
	  }
	else
	  sprintf(tmp, "%.*g", stat_precisions[stat], stats[var][stat]);
    else
      strcpy (tmp,"nd");
  }

  return;
}

void iovaldouble_(vn,f)
int *vn;
double *f;
{
  if (*vn == 0)
    *f = -9999.0;
  else
    if (rows_are_stats)
      if (  (comparison_type[*vn-1] == COMP_TYPE_NUMERIC) &&
             valid_data[*vn-1][stat_being_worked_on]		)
				*f = stats[*vn-1][stat_being_worked_on];
      else *f = -9999.0;
    else
      if (  (comparison_type[var_being_worked_on] == COMP_TYPE_NUMERIC) &&
	     valid_data[var_being_worked_on][*vn-1]			    )
				*f = stats[var_being_worked_on][*vn-1];
      else *f = -9999.0;

  return;
}

void iovalreal_(vn,f)
int *vn;
float *f;
/*
    Return real value (f) for variable indexed by vn. -9999 for strings */
{
  double df;

  iovaldouble_(vn,&df);
  *f = df;

  return;
}

int iovarlevel_(vn)
int *vn;
/*	"Return level corresponding to variable indexed by vn"		*/
/*	(returns maxlev for indices illegally big; 0 for indices 	*/
/*	illegally small)						*/
{
  return 0;	/*  All vars at level 0					*/
}

int ioattrout_(vn,str)
int *vn;
char *str;
/*
    Output attributes for variable indexed by vn. */
{
  if (out_attr_widths[*vn] == 0) return 0;

  sprintf (str, "width=%d", out_attr_widths[*vn]);
  out_attr_widths[*vn] = 0;
  return 1;
}

void ioname_(vn,s)
int *vn;
char *s;
/*
    Return name (s) corresponding to variable number vn. */
{

  if (rows_are_stats)
    if (*vn == 0) strcpy (s,VARNAME_FOR_STAT_ID);
    else strcpy ( s,  names + (*vn-1)*vnamesize  );
  else
    if (*vn == 0) strcpy (s,VARNAME_FOR_VARIABLE_ID);
    else strcpy ( s,  stat_names + (*vn-1)*snamesize  );

  return;
}

int iocommout_(s)
char *s;
{
  static char *comment_ptr = NULL;
  char *newline_ptr;
  int len;

  if (comment_ptr == NULL) comment_ptr = comments;

  newline_ptr = strchr(comment_ptr,'\n');
  if (newline_ptr == NULL) return 0;

  len = newline_ptr - comment_ptr;	/*  Don't send back trailing \n	*/
  strncpy (s,comment_ptr,len);
  s[len] = '\0';

  comment_ptr = newline_ptr + 1;
  return 1;
}

int iowidth_(vn)
int *vn;
/*  "Return length of variable field indexed by vn" [from outer, JGOFS 1.5]  */
{
  return out_widths[*vn];
}
