char *stat_version="statisticker version 3.0 14 Apr 2000"; /* Input & output doc appears after revision info */ /* 13 Apr 00. WJS */ /* Add a "Standard station-Arabian Sea" comparison type */ /* 4 Apr 00. WJS */ /* Add ability to define comparison type for max, min, etc */ /* [Begin v 3.0] */ /* 28 Jan 00. WJS */ /* Add "distinct" counts as a select-at-compile-time option */ /* (by default, do them) */ /* Minimum precision to 9 chars */ /* Bug fix: accessed variable "N+1" instead of stopping at "N" */ /* when computing all stats */ /* [Begin v 2.0] */ /* 17 Jan 00. WJS */ /* More output precision for sums */ /* Propagate width= attrib (others aren't propagated) */ /* [Begin v 1.1] */ /* 3 Jan 00. WJS */ /* [Begin v 1.0] */ /* Input: 1 required parameter and a number of optional parameters */ /* Also, an environment variables controlling output - see */ /* "Output:", below */ /* */ /* The required parameter is parameter 1. It is the object */ /* specifier (in "jdb format"; no /jg/serv; parameters parenthesized) */ /* pointing to the input data for the statisticker. */ /* The optional parameters, if any, may appear anywhere in the */ /* parameter list after the object specifiers. They define the */ /* type of comparison to be used when testing for max, min, etc. */ /* The default test type is determined at compilation time. As */ /* released (v 3.0, WJS), the default test type is numeric. */ /* As of v 3.0, the other types of test are alpha and "JGOFS standard */ /* station-Arabian Sea". */ /* The syntax of each parameter is */ /* comparison_type=list;comparison_type=list;... */ /* where */ /* comparison_type is of the form XkeywordX */ /* X represents a character illegal to a JGOFS variable */ /* name. We accept one of !@#$%&? */ /* keyword is either "numeric", "alpha", or "std_sta_AS" */ /* keyword is case insensitive */ /* list is a list of JGOFS variable names or a quantifier. */ /* If a list, the character used to separate list elements */ /* can be any one of ,+-^* */ /* If a list, all variables must be in object unless env */ /* var VARS_MUST_BE_IN_OBJECT is set to FALSE (or program */ /* is compiled that way) */ /* A quantifier is of the form YkeywordY, where Y comes */ /* from the set of special characters defined in compari- */ /* son type. The keyword is either "all" or "rest" (case */ /* insensitive) */ /* A slash can be used in place of the semicolon that separates */ /* the "type=list" pairs */ /* Specification of more than one parameter is the equivalent of */ /* specifying all the parameters as a single list. (Purpose of choice */ /* between single list and multiple parameters is to ease syntax when */ /* using program from command line vs http QUERY_STRING, etc) */ /* Comparison types may appear multiple times. Variable */ /* names and quantifiers can appear only once */ /* Example: */ /* #numeric#=temp,sal,press;#alpha#=#rest# */ /* This says "do numeric comparisons for the variables temp, sal and */ /* press, and do alpha comparisons for all other variables" */ /* */ /* Output: Statistics, presented as a JGOFS object. */ /* A perl function (calcstat.pl) exists which will take this */ /* output and present it to the perl calling program as a */ /* perl list which can be interpreted as a hash whose keys */ /* are the statistic names and whose values are the */ /* statistics for each variable */ /* */ /* The "variables" (columns) of the output object can be either */ /* the variables (columns) of the input object, with the rows being */ /* the statistics, or vice versa. The environment variable */ /* ROWS_ARE_STATS controls this. If unspecified, the compiled value */ /* of this switch is used. As released (v 3.0, WJS), the compiled */ /* value is TRUE. */ /* The statistics computed and their definitions are presented to */ /* the program user as "JGOFS object comments". To see them now, look */ /* at the definition of the C variable "comments" in program text. */ #include #include #include #include #include #include #ifdef VMS #include #define strcasecmp strcmp #define CHAR_MAX '\177' #else #include #endif #ifndef FALSE #define FALSE 0 #endif #ifndef TRUE #define TRUE !FALSE #endif typedef signed char Logical; /* NB: occasionally gets -1, */ /* hence need signed */ /***********************/ /* Next statement should really be "if exactly one of HP or IBM is */ /* defined and also TRUE" but I'm tired... */ #if defined(HP) || defined(IBM) #define jdbopen_ jdbopen #define jdbreada_ jdbreada #define jdbclose_ jdbclose #define jdblevel_ jdblevel #define jdbattributes_ jdbattributes #define jdbcomments_ jdbcomments #endif int jdbopen_(); int jdbreada_(); int jdbclose_(); int jdblevel_(); int jdbattributes_(); int jdbcomments_(); #include OPTIONS #include INNEROPTIONS #ifndef DO_DISTINCT #define DO_DISTINCT TRUE #endif /* Try to get value of compilation switch into executable so we can */ /* do "strings" and find out which option was compiled in */ #if DO_DISTINCT char *distinct = "computing distinct counts enabled"; #else char *distinct = "computing distinct counts disabled"; #endif /****/ #if DO_DISTINCT char *comments = "\ Statistics returned include minima, maxima, counts, distinct counts,\n\ sums, and sums of squares\n\ The numeric suffixes indicate how irregular data are treated\n\ Irregular data are of 2 types. The first type is data explicitly marked\n\ as 'missing' in the input object (input object data whose values are the\n\ 2-character string 'nd'). The second type is data whose characters do not\n\ decode into legal values. This can be because of error, or because the data\n\ do not match the type expected (eg, a station 'J1' when station is numeric)\n\ 0 All data are included in the statistic. Except for counts,\n\ any irregular datum causes the corresponding statistic to\n\ be given the value 'nd'\n\ 1 Data explictly marked as irregular are skipped. Except for\n\ counts, non-decodable values cause the corresponding\n\ statistic to be given the value 'nd'. 'nd' is also assigned\n\ if all data are irregular\n\ 2 All irregular data are skipped. 'nd' is assigned to non-\n\ count statistics if all data are irregular\n\ As far as distinct counts are concerned, 1 is added to the appropriate\n\ counts if any explicitly irregular data are found (and explicitly irregular\n\ data are included in the count), and another 1 is added if any non-decodable\n\ data are found (and non-decodable data are included in the count).\n\ "; #define COUNT2 0 #define DISTINCT2 1 #define MIN2 2 #define MAX2 3 #define SUM2 4 #define SUM_SQUARES2 5 #define COUNT1 6 #define DISTINCT1 7 #define MIN1 8 #define MAX1 9 #define SUM1 10 #define SUM_SQUARES1 11 #define COUNT0 12 #define DISTINCT0 13 #define MIN0 14 #define MAX0 15 #define SUM0 16 #define SUM_SQUARES0 17 #define NSTATS 18 /* Maintain = length of list above */ #else char *comments = "\ Statistics returned include minima, maxima, counts, sums, and sums of squares\n\ The numeric suffixes indicate how irregular data are treated\n\ Irregular data are of 2 types. The first type is data explicitly marked\n\ as 'missing' in the input object (input object data whose values are the\n\ 2-character string 'nd'). The second type is data whose characters do not\n\ decode into legal values. This can be because of error, or because the data\n\ do not match the type expected (eg, a station 'J1' when station is numeric)\n\ 0 All data are included in the statistic. Except for counts,\n\ any irregular datum causes the corresponding statistic to\n\ be given the value 'nd'\n\ 1 Data explictly marked as irregular are skipped. Except for\n\ counts, non-decodable values cause the corresponding\n\ statistic to be given the value 'nd'. 'nd' is also assigned\n\ if all data are irregular\n\ 2 All irregular data are skipped. 'nd' is assigned to non-\n\ count statistics if all data are irregular\n\ "; #define COUNT2 0 #define MIN2 1 #define MAX2 2 #define SUM2 3 #define SUM_SQUARES2 4 #define COUNT1 5 #define MIN1 6 #define MAX1 7 #define SUM1 8 #define SUM_SQUARES1 9 #define COUNT0 10 #define MIN0 11 #define MAX0 12 #define SUM0 13 #define SUM_SQUARES0 14 #define NSTATS 15 /* Maintain = length of list above */ #endif /****/ int vnamesize=VARNAMESIZE+1; int valuesize=DATUMSIZE+1+1; /* "Extra" +1 for ALPH_SEP */ void error_(); /* ... entry in outer */ int handle = -1; /* Not sure why this needs = -1 */ /* Things related to input object */ int nvars,maxlev; char names[NVAR * (VARNAMESIZE+1)]; char *values; /* values[nvars][DATUMSIZE+1] */ int *firstvarlevel; /* firstvarlevel[maxlev] */ signed char *comparison_type; /* comparison_type[nvars] */ # #if DO_DISTINCT struct value_list { /* Use 1 of data_list or adata_list based on comparison_type */ /* Could have union-ed, but seems more trouble than worth */ double *data_list; /* data_list[n_distinct_vals] */ char *adata_list; /* ptr to string holding all */ /* distinct alpha vals concatenated */ int list_next; int list_size; } *val_list; /* val_list[nvars] */ #ifndef NUM_DATALIST_PER_MALLOC #define NUM_DATALIST_PER_MALLOC 200 #endif #ifndef AV_SIZE_ADATUM #define AV_SIZE_ADATUM 15 #endif #ifndef NUM_ADATALIST_PER_MALLOC #define NUM_ADATALIST_PER_MALLOC NUM_DATALIST_PER_MALLOC*AV_SIZE_ADATUM #endif #endif /* Things related to stats */ Logical valid_data[NVAR][NSTATS]; double stats[NVAR][NSTATS]; char *astats[NVAR][NSTATS]; /* Only need # alph stats and a subset */ /* of NSTATS, but making it NSTATS easier */ #define STATNAMESIZE 20 int snamesize = STATNAMESIZE+1; char stat_names[NSTATS * (STATNAMESIZE+1)]; int stat_widths[NSTATS]; int stat_precisions[NSTATS]; #define VARNAME_FOR_STAT_ID "stat_name" #define VARNAME_FOR_VARIABLE_ID "variable" int stat_being_worked_on = -1; int var_being_worked_on = -1; int *out_widths,*out_attr_widths; /* Process compile-time defaults here. Can be overridden @ runtime */ #ifndef ROWS_ARE_STATS #define ROWS_ARE_STATS TRUE #endif Logical rows_are_stats = ROWS_ARE_STATS; #ifndef CHECK_VARS #define CHECK_VARS TRUE #endif Logical check_vars = CHECK_VARS; /* Stuff relating to parsing comparison selection list */ /* Use some ndefs so user can override at compile time */ #define COMP_TYPE_UNINIT -1 #define COMP_TYPE_NUMERIC 0 #define COMP_TYPE_ALPHA 1 #define COMP_TYPE_STD_STA_AS 2 #ifndef DEFAULT_COMP_TYPE #define DEFAULT_COMP_TYPE COMP_TYPE_NUMERIC #endif #if ! ( (DEFAULT_COMP_TYPE == COMP_TYPE_NUMERIC) || \ (DEFAULT_COMP_TYPE == COMP_TYPE_ALPHA) || \ (DEFAULT_COMP_TYPE == COMP_TYPE_STD_STA_AS) ) #error "Illegal DEFAULT_COMP_TYPE" #endif #define COMP_TYPE_NUMERIC_KEYWORD "numeric" #define COMP_TYPE_ALPHA_KEYWORD "alpha" #define COMP_TYPE_STD_STA_AS_KEYWORD "std_sta_AS" #define COMP_QUANTIFIER_1 "all" #define COMP_QUANTIFIER_2 "rest" #define KEYWORD_LIST_SEPARATOR '=' #ifndef ILLEGAL_JGOF_CHARS #define ILLEGAL_JGOF_CHARS "!@#$%&?" #endif #ifndef LIST_SEPARATORS #define LIST_SEPARATORS ";/" #endif #ifndef VARNAME_SEPARATORS #define VARNAME_SEPARATORS ",+-^*" #endif #ifndef ALPH_SEP #define ALPH_SEP '\1' /* A non-zero, non-alpha character */ #endif /* From max to min, order is N1 -> N11, M1, S15 -> S1, A */ /* (from Chris 11 Apr 2000). */ #define NSTD_STAS_AS 28 #define MAXLEN_STD_STA_AS 3 /* Could runtime strlen() the vals... */ char *std_stas_AS[NSTD_STAS_AS] = { "A", "S1", "S2", "S3", "S4", "S5", "S6", "S7", "S8", "S9", "S10","S11", "S12","S13","S14","S15", "M1", "N11","N10","N9", "N8", "N7", "N6", "N5", "N4", "N3", "N2", "N1" }; /************************************************************************/ void *getmem(bufname,nbytes) char *bufname; int nbytes; { #define SIZERRBUF 150 static char errbuf[SIZERRBUF]; /* Deliberately static and fixed size */ /* since if we need it we're having */ /* trouble allocating memory */ static char *piece1 = "Could not get "; static char *piece2 = " bytes memory for '"; static char *piece3 = "' buffer/structure(s)"; int errmsglen; char *ptr; ptr = (void *) malloc(nbytes); if (ptr == NULL) { /* Length is that of 3 pieces of fixed text (above) + width of */ /* args passed here + 1 for '\0' */ errmsglen = strlen(piece1) + strlen(piece2) + strlen(piece3); errmsglen += log10((double)nbytes) + 1; errmsglen += strlen(bufname) + 1; if (errmsglen <= SIZERRBUF) { sprintf(errbuf,"Could not get %d bytes memory for '%s' buffer\n", nbytes, bufname ); error_(errbuf,stat_version); } else error_("Could not get memory for buffer/structure ",bufname); } return ptr; #undef SIZERRBUF } Logical get_logical_value (string) char *string; /* Return 1 if string is synonym for TRUE, 0 for FALSE, -1 if not a synonym */ /* (From outer. Sigh, sigh (2 because outer's is from defgb_utils)) */ { #define GET_LOGICAL_ERR_VAL -1 #if (GET_LOGICAL_ERR_VAL == TRUE) || (GET_LOGICAL_ERR_VAL == FALSE) #error "Non-TRUE/FALSE value is either TRUE or FALSE" #endif switch (*string) { case 't': case 'T': case 'y': case 'Y': case '1': if (*(string+1) == '\0') return TRUE; if ( (strcmp("YES",string) == 0) || (strcmp("yes",string) == 0) || (strcmp("TRUE",string) == 0) || (strcmp("true",string) == 0) ) return TRUE; break; case 'f': case 'F': case 'n': case 'N': case '0': if (*(string+1) == '\0') return FALSE; if ( (strcmp("NO",string) == 0) || (strcmp("no",string) == 0) || (strcmp("FALSE",string) == 0) || (strcmp("false",string) == 0) ) return FALSE; break; default: /* For completeness */ ; break; } return GET_LOGICAL_ERR_VAL; } void init_stuff() { int i,j; char *ptr; /* Do some sanity checks on compile-time assumptions, vals, etc */ if (isalnum(ALPH_SEP)) error_ ("Internal error - ALPH_SEP must not be alphanumeric",stat_version); if (strpbrk(ILLEGAL_JGOF_CHARS,LIST_SEPARATORS) != NULL) error_ ( "Internal error - ILLEGAL_JGOF_CHARS & LIST_SEPARATORS must be disjoint", stat_version); if (strpbrk(ILLEGAL_JGOF_CHARS,VARNAME_SEPARATORS) != NULL) error_ ( "Internal error - ILLEGAL_JGOF_CHARS & VARNAME_SEPARATORS must be disjoint", stat_version); if (strpbrk(VARNAME_SEPARATORS,LIST_SEPARATORS) != NULL) error_ ( "Internal error - VARNAME_SEPARATORS & LIST_SEPARATORS must be disjoint", stat_version); if ( (strchr(ILLEGAL_JGOF_CHARS,KEYWORD_LIST_SEPARATOR) != NULL) || (strchr(LIST_SEPARATORS,KEYWORD_LIST_SEPARATOR) != NULL) || (strchr(VARNAME_SEPARATORS,KEYWORD_LIST_SEPARATOR) != NULL) ) error_ ("Internal error - KEYWORD_LIST_SEPARATOR may not appear as special\ character on other lists",stat_version); if ( (ptr = getenv("ROWS_ARE_STATS")) != NULL ) if ( (rows_are_stats = get_logical_value(ptr)) == GET_LOGICAL_ERR_VAL ) error_("Illegal value for TRUE/FALSE env var ROWS_ARE_STATS",ptr); if ( (ptr = getenv("VARS_MUST_BE_IN_OBJECT")) != NULL ) if ( (check_vars = get_logical_value(ptr)) == GET_LOGICAL_ERR_VAL ) error_("Illegal value for TRUE/FALSE env var VARS_MUST_BE_IN_OBJECT",ptr); /* Do next stuff at run time instead of compile time to make it */ /* easier to alter order of statistics */ /* Init the values that the VARNAME_FOR_STAT_ID defn will take */ /* I give up on checking if these constants will fit in their bufs */ /* STATNAMESIZE is what they all should be no bigger than */ strcpy ( stat_names + COUNT0*snamesize, "count0" ); strcpy ( stat_names + COUNT1*snamesize, "count1" ); strcpy ( stat_names + COUNT2*snamesize, "count2" ); strcpy ( stat_names + MIN0*snamesize, "min0" ); strcpy ( stat_names + MIN1*snamesize, "min1" ); strcpy ( stat_names + MIN2*snamesize, "min2" ); strcpy ( stat_names + MAX0*snamesize, "max0" ); strcpy ( stat_names + MAX1*snamesize, "max1" ); strcpy ( stat_names + MAX2*snamesize, "max2" ); strcpy ( stat_names + SUM0*snamesize, "sum0" ); strcpy ( stat_names + SUM1*snamesize, "sum1" ); strcpy ( stat_names + SUM2*snamesize, "sum2" ); strcpy ( stat_names + SUM_SQUARES0*snamesize, "sum_squares0" ); strcpy ( stat_names + SUM_SQUARES1*snamesize, "sum_squares1" ); strcpy ( stat_names + SUM_SQUARES2*snamesize, "sum_squares2" ); #if DO_DISTINCT strcpy ( stat_names + DISTINCT0*snamesize, "distinct0" ); strcpy ( stat_names + DISTINCT1*snamesize, "distinct1" ); strcpy ( stat_names + DISTINCT2*snamesize, "distinct2" ); #endif return; } int get_std_sta_AS(sta) char *sta; /* Return index into std_stas_AS list. If not in list, return -1 */ { int i; for (i = 0; i < NSTD_STAS_AS; i++) if (strcmp(sta,std_stas_AS[i]) == 0) break; return (i == NSTD_STAS_AS) ? -1 : i; } #if DO_DISTINCT Logical new_distinct_val(val,list_struct) double val; struct value_list *list_struct; /* See if val is in list found within list_struct. If so, return */ /* FALSE. If not, add val to that list, then return TRUE */ { int i; double *dptr; /* Just for convenience */ dptr = list_struct->data_list; for (i=0; i < list_struct->list_next; i++) if (val == dptr[i]) return FALSE; if (list_struct->list_next == list_struct->list_size) { list_struct->list_size += NUM_DATALIST_PER_MALLOC; dptr = (double *) realloc (dptr , list_struct->list_size * sizeof(double)); if (dptr == NULL) error_("Could not get memory for '.data_list' buffer",stat_version); list_struct->data_list = dptr; } dptr[(list_struct->list_next)++] = val; return TRUE; } Logical alph_new_distinct_val(val,list_struct) char *val; struct value_list *list_struct; /* alpha version of new_distinct_val */ { int len; len = strlen(val); /* Use ALPH_SEP so whole string can be searched w/1 strstr call. */ /* Hypothesis is that this more efficient than loop of many strcmp's */ /* strstr tests more chars; loop does more function calls. strstr */ /* also requires single string to contain all alph data (65767 char */ /* limit on VMS strings?). Recode if necessary... */ val[len++] = ALPH_SEP; val[len] = '\0'; if (strstr(list_struct->adata_list,val) == NULL) { /* Not on list, so new unique value. */ if ( (list_struct->list_next) + len > list_struct->list_size ) { /* Need more memory */ list_struct->list_size += NUM_ADATALIST_PER_MALLOC; list_struct->adata_list = (char *) realloc ( list_struct->adata_list, list_struct->list_size); if (list_struct->adata_list == NULL) error_("Could not get memory for '.adata_list' buffer",stat_version); } /* Add new val to list */ strcpy ( (list_struct->adata_list) + (list_struct->list_next), val ); list_struct->list_next += len; val[--len] = '\0'; return TRUE; } else { /* On list, so not a unique value. */ val[--len] = '\0'; return FALSE; } } #endif void set_widths (nrows,ncols, row_headers,row_header_maxsize, data_col_headers,data_col_header_maxsize, row_headers_col_header, data_col_widths_array,data_col_widths_array_size) /* Set up out_widths array (and a copy, out_attr_widths) */ /* 2 parts. 1st, set up the width of the first column, which */ /* is the max of the name of that column and all the values in */ /* that column. 2nd, the rest of the widths are either constant */ /* or a copy of an input array, maximized w/the name of the column */ int nrows; /* Not counting title row */ int ncols; /* Not counting id column */ char *row_headers,*data_col_headers,*row_headers_col_header; int row_header_maxsize,data_col_header_maxsize; int data_col_widths_array[]; int data_col_widths_array_size; { int i,j,k; int ptr; if ( ! ( (data_col_widths_array_size == 1) || (data_col_widths_array_size == ncols) ) ) error_("Internal error: bad data_col_widths_array_size to set_widths", stat_version); out_widths = (int *) getmem ( "out_widths", (ncols + 1) * sizeof(int) ); out_attr_widths = (int *) getmem ( "out_attr_widths", (ncols + 1) * sizeof(int) ); /* 1st col - take biggest row header unless col header for the row */ /* headers is bigger */ k = strlen(row_headers_col_header); for (i = 0; i < nrows; i++) { j = strlen(row_headers + i*row_header_maxsize); if (j > k) k = j; } out_attr_widths[0] = out_widths[0] = k; /* Other cols - take max of data width w/ col header width */ for (i = 1; i <= ncols; i++) { j = strlen(data_col_headers + i*data_col_header_maxsize); k = (data_col_widths_array_size == 1) ? data_col_widths_array[0] : data_col_widths_array[i-1]; out_attr_widths[i] = out_widths[i] = (j > k) ? j : k; } return; } void set_stat_widths() /* Set the output width of each statistic. Used to set the JGOFS */ /* object width= attribute; not so simple because that object can */ /* be presented w/stats as rows or columns... */ { int i,j,k,max; int non_precision_width_chars; int numeric_single_width,numeric_single_digits_of_precision; /* Want enough precision in output so that if people use sums of */ /* squares, etc, stats will be valid. Decided to use single float */ /* precision for max & min; double for sums and sums_of_squares */ /* Counts get single kind of by default. Constants come from */ /* float.h */ /* Temporarily (?) increase precision so that 8 digit event */ /* numbers don't print in E format. Using 9 because I'm confused */ /* about FLT_DIG vs printf precision and I don't want to edit */ /* this twice. Check vs DBL_DIG is sanity... assume DBL_DIG > */ /* FLT_DIG!!! */ numeric_single_digits_of_precision = (9 > FLT_DIG) ? 9 : FLT_DIG; if (numeric_single_digits_of_precision >= DBL_DIG) numeric_single_digits_of_precision = DBL_DIG - 1; /* "+N.", "E+", max width of exponent */ non_precision_width_chars = 3 + 2 + (1 + log10(log10(DBL_MAX))); numeric_single_width = numeric_single_digits_of_precision + non_precision_width_chars; for (i=0; i= 0 ) { for (i = firstvarlevel[lev]; i < nvars; i++) { ptr = values + i*valuesize; explicit_invalid = (strcmp("nd",ptr) == 0); if (explicit_invalid) /* non_decodable not used if explicit_invalid, but be complete */ non_decodable = TRUE; else { switch (comparison_type[i]) { case COMP_TYPE_NUMERIC: df = strtod(ptr,&end_ptr); non_decodable = (*end_ptr != '\0'); break; case COMP_TYPE_ALPHA: non_decodable = FALSE; break; case COMP_TYPE_STD_STA_AS: df = (double) get_std_sta_AS(ptr); non_decodable = (df < 0); break; default: error_("Internal error: no code to handle comparison type", stat_version); break; } } stats[i][COUNT0]++; if (explicit_invalid) { #if DO_DISTINCT /* Use valid_data flag for DISTINCT0 to indicate */ /* "at least one 'nd' occurred" */ valid_data[i][DISTINCT0] = FALSE; #endif valid_data[i][MIN0] = FALSE; valid_data[i][MAX0] = FALSE; valid_data[i][SUM0] = FALSE; valid_data[i][SUM_SQUARES0] = FALSE; } else { stats[i][COUNT1]++; if (non_decodable) { #if DO_DISTINCT /* Use valid_data flag for DISTINCT1 to indicate */ /* "at least one non-decodable occurred" */ valid_data[i][DISTINCT0] = FALSE; #endif valid_data[i][MIN0] = FALSE; valid_data[i][MAX0] = FALSE; valid_data[i][SUM0] = FALSE; valid_data[i][SUM_SQUARES0] = FALSE; valid_data[i][MIN1] = FALSE; valid_data[i][MAX1] = FALSE; valid_data[i][SUM1] = FALSE; valid_data[i][SUM_SQUARES1] = FALSE; } else { stats[i][COUNT2]++; #if DO_DISTINCT switch (comparison_type[i]) { case COMP_TYPE_NUMERIC: if (new_distinct_val(df,&val_list[i])) stats[i][DISTINCT2]++; break; case COMP_TYPE_ALPHA: case COMP_TYPE_STD_STA_AS: if (alph_new_distinct_val(ptr,&val_list[i])) stats[i][DISTINCT2]++; break; default: error_("Internal error: no code to handle comparison type", stat_version); break; } #endif switch (comparison_type[i]) { case COMP_TYPE_STD_STA_AS: case COMP_TYPE_NUMERIC: if (valid_data[i][MIN0]) if (stats[i][MIN0] > df) stats[i][MIN0] = df; if (valid_data[i][MIN1]) if (stats[i][MIN1] > df) stats[i][MIN1] = df; if (valid_data[i][MIN2]) if (stats[i][MIN2] > df) stats[i][MIN2] = df; if (valid_data[i][MAX0]) if (stats[i][MAX0] < df) stats[i][MAX0] = df; if (valid_data[i][MAX1]) if (stats[i][MAX1] < df) stats[i][MAX1] = df; if (valid_data[i][MAX2]) if (stats[i][MAX2] < df) stats[i][MAX2] = df; if (valid_data[i][SUM0]) stats[i][SUM0] += df; if (valid_data[i][SUM1]) stats[i][SUM1] += df; if (valid_data[i][SUM2]) stats[i][SUM2] += df; df *= df; if (valid_data[i][SUM_SQUARES0]) stats[i][SUM_SQUARES0] += df; if (valid_data[i][SUM_SQUARES1]) stats[i][SUM_SQUARES1] += df; if (valid_data[i][SUM_SQUARES2]) stats[i][SUM_SQUARES2] += df; break; case COMP_TYPE_ALPHA: if (valid_data[i][MIN0]) if (strcmp(astats[i][MIN0],ptr) >0) strcpy(astats[i][MIN0],ptr); if (valid_data[i][MIN1]) if (strcmp(astats[i][MIN1],ptr) >0) strcpy(astats[i][MIN1],ptr); if (valid_data[i][MIN2]) if (strcmp(astats[i][MIN2],ptr) >0) strcpy(astats[i][MIN2],ptr); if (valid_data[i][MAX0]) if (strcmp(astats[i][MAX0],ptr) <0) strcpy(astats[i][MAX0],ptr); if (valid_data[i][MAX1]) if (strcmp(astats[i][MAX1],ptr) <0) strcpy(astats[i][MAX1],ptr); if (valid_data[i][MAX2]) if (strcmp(astats[i][MAX2],ptr) <0) strcpy(astats[i][MAX2],ptr); break; default: error_("Internal error: no code to handle comparison type", stat_version); break; } } } } } #if DO_DISTINCT /* Incorporate irregular data into DISTINCT vals */ /* There are at least as many distinct vals as "real" distinct */ /* vals. If we're disregarding nd's but we found non-decodables, */ /* add 1 to that count. If we're counting both nd's AND non- */ /* decodables, add 1 for each that we found */ for (i = 0; i < nvars; i++) { stats[i][DISTINCT0] = stats[i][DISTINCT2]; stats[i][DISTINCT1] = stats[i][DISTINCT2]; if ( ! valid_data[i][DISTINCT0]) stats[i][DISTINCT0]++; if ( ! valid_data[i][DISTINCT1]) { stats[i][DISTINCT0]++; stats[i][DISTINCT1]++; } /* Restore valid_data flags to their real purpose. */ valid_data[i][DISTINCT0] = TRUE; valid_data[i][DISTINCT1] = TRUE; } #endif for (i = 0; i < nvars; i++) /* COUNT2=0 means nothing decodable was ever found */ if (stats[i][COUNT2] == 0) for (j = 0; j < NSTATS; j++) { is_a_count = ((j == COUNT0) || (j == COUNT1) || (j == COUNT2)); is_a_distinct_count = ((j == DISTINCT0) || (j == DISTINCT1) || (j == DISTINCT2)); is_a_count = (is_a_count || is_a_distinct_count); if ( ! is_a_count ) valid_data[i][j] = FALSE; } /* Set widths for display */ set_stat_widths(); if (rows_are_stats) { j = stat_widths[0]; for (i=1; i