#define STAT_VERSION "statisticker version 3.1e 4 Aug 2008" /* Input & output doc appears after revision info */ /* 4 Aug 08. WJS */ /* Accept trailing blanks in numeric data */ /* [Begin v 3.1e] */ /* 11 Jan 07. WJS */ /* Begin process of making argument analysis/parse code general */ /* Scheme has been moved to utils, along w/a little code. Job */ /* of modifying stats to use utils code remains to be done. */ /* 6 Aug 06. WJS */ /* So why didn't 3.1c use MISSING_VALUE_REAL instead of -9999?! */ /* Do it now */ /* [Needs utils.c 2.2 (in jgofs.a)] */ /* [Begin v 3.1d] */ /* 5 Aug 05. WJS */ /* Fix display width problem for minN statistics occurring when an */ /* alpha-comparison variable has all of its values missing */ /* (maybe having the first value missing is sufficient to cause */ /* bug for some values of N) */ /* Get level_splits from library */ /* Get MISSING_VALUE_STRING from core.h instead of using "nd" */ /* [Needs utils.c 1.9a (in jgofs.a)] */ /* [Begin v 3.1c] */ /* 3 Jun 05. WJS */ /* 28 Apr 04 mod means we need a declaration for errn */ /* [Begin v 3.1b] */ /* 30 Aug 04. WJS */ /* jdbfuncdefns.h */ /* 28 Apr 04. WJS */ /* errn now in utils */ /* 23 Apr 04. WJS */ /* Mods to "version-returning function". */ /* a) change from local to global to ensure it can't be "opti- */ /* mized out" */ /* b) don't want "version" in any function name since such */ /* names appear when grep'ping for "version" */ /* 18 Feb 04. WJS */ /* Use add_id_to_err from utils */ /* Include .h file versions */ /* Switch from defgb.h to utils.h */ /* [Needs utils.c 1.8 (in jgofs.a)] */ /* [Needs utils.h 1.0] */ /* [Begin v 3.1a] */ /* 22 Aug 03. WJS */ /* Take environment variables from argument list, too. This */ /* allows the propagation of this info via QUERY_STRING while */ /* web mode. Otherwise, web must invoke different object per */ /* set of env vars */ /* Try to control distinct-count stuff at runtime. Accept control */ /* from both environment and argument list. (Runtime control */ /* predicated on feature being enabled at compile time) */ /* Get some routines from defgb_utils & some defns from defgb.h */ /* [Needs defgb_utils 1.5a] */ /* [Needs defgb.h 4.7] */ /* [Begin v 3.1] */ /* 29 Jul 03. WJS */ /* Let's not give "Bad Object:" style error messages */ /* [Begin v 3.0c] */ /* 10 Dec 02. WJS */ /* Bug fix: 28 Feb 01 use of DBL_MIN was incorrect */ /* Bug fix: "Level splits" incorrectly computed */ /* [Begin v 3.0b] */ /* 19 Mar 01. WJS */ /* Comment fix */ /* 28 Feb 01. WJS */ /* Use DBL_MIN instead of -DBL_MAX */ /* 26 Feb 01. WJS */ /* Bug fix: some "distinct-count" stuff not controlled by DO_ */ /* DISTINCT */ /* Bug fix: May need to be more careful with getmem. Changed */ /* input to size_t type; changed internal pointer to type double */ /* to try avoid alignment problems later */ /* 25 Feb 01. WJS */ /* Add #include needed on Solaris & VMS */ /* [Begin v 3.0a] */ /* 13 Apr 00. WJS */ /* Add a "Standard station-Arabian Sea" comparison type */ /* 4 Apr 00. WJS */ /* Add ability to define comparison type for max, min, etc */ /* [Begin v 3.0] */ /* 28 Jan 00. WJS */ /* Add "distinct" counts as a select-at-compile-time option */ /* (by default, do them) */ /* Minimum precision to 9 chars */ /* Bug fix: accessed variable "N+1" instead of stopping at "N" */ /* when computing all stats */ /* [Begin v 2.0] */ /* 17 Jan 00. WJS */ /* More output precision for sums */ /* Propagate width= attrib (others aren't propagated) */ /* [Begin v 1.1] */ /* 3 Jan 00. WJS */ /* [Begin v 1.0] */ /* Input: 1 required parameter and a number of optional parameters */ /* Some optional parameters can get default values from the */ /* environment */ /* */ /* The required parameter is parameter 1. It is the object */ /* specifier (in "jdb format"; no /jg/serv; parameters parenthesized) */ /* pointing to the input data for the statisticker. */ /* The optional parameters, if any, may appear anywhere in the */ /* parameter list after the object specifiers. There are 2 types of */ /* optional parameters - program control parameters and comparison */ /* type parameters. */ /* Program control parameters can also be specified in the en- */ /* vironment. If specified there as well as in the parameter list, */ /* the parameter list value takes precedence. See "Output": for info */ /* about the rows_are_stats parameter. See descrip of comparison type */ /* parameters for info about the vars_must_be_in_object parameter. */ /* The "distinct" parameter controls whether the statisticker will */ /* compute the distinct count statistic (there is a performance hit */ /* computing this if some data are logically real rather than integer) */ /* Comparison type parameters define the */ /* type of comparison to be used when testing for max, min, etc. */ /* The default test type is determined at compilation time. As */ /* released (v 3.0, WJS), the default test type is numeric. */ /* As of v 3.0, the other types of test are alpha and "JGOFS standard */ /* station-Arabian Sea". */ /* The syntax of each parameter is */ /* parameter_specifier=string */ /* Multiple parameters can be concatenated with semicolons or slashes */ /* if desired (Purpose of choice between single list and multiple */ /* parameters is to ease syntax when using program from command line */ /* vs http QUERY_STRING, etc) */ /* where */ /* parameter_specifier is of the form XkeywordX */ /* X represents a character illegal to a JGOFS variable */ /* name. We accept any of !@#$%&? */ /* keywords for program control parameters are */ /* rows_are_stats */ /* vars_must_be_in_object */ /* distinct */ /* keywords for comparison type parameters are */ /* numeric */ /* alpha */ /* std_sta_AS */ /* keywords are case insensitive (except that program */ /* control parameters in the environment must be in */ /* upper case) */ /* string for program control parameters is a logical value */ /* 1,Y,y,T,t,YES,yes,TRUE,true represent TRUE */ /* 0,N,n,F,f,NO,no,FALSE,false represent FALSE */ /* string for comparison type parameters is a list of JGOFS */ /* variable names or a quantifier. */ /* If a list, the character used to separate list elements */ /* can be any one of ,+-^* */ /* If a list, all variables must be in object unless */ /* program control parameter vars_must_be_in_object is */ /* set to FALSE (or program is compiled that way) */ /* A quantifier is of the form YkeywordY, where Y comes */ /* from the set of special characters defined in */ /* parameter specifier. The keyword is either "all" or */ /* "rest" (case insensitive) */ /* Comparison types may appear multiple times. Variable */ /* names and quantifiers can appear only once */ /* Example: */ /* #numeric#=temp,sal,press;#alpha#=#rest# */ /* This says "do numeric comparisons for the variables temp, sal and */ /* press, and do alpha comparisons for all other variables" */ /* */ /* Output: Statistics, presented as a JGOFS object. */ /* A perl function (calcstat.pl) exists which will take this */ /* output and present it to the perl calling program as a */ /* perl list which can be interpreted as a hash whose keys */ /* are the statistic names and whose values are the */ /* statistics for each variable */ /* */ /* The "variables" (columns) of the output object can be either */ /* the variables (columns) of the input object, with the rows being */ /* the statistics, or vice versa. The environment variable */ /* ROWS_ARE_STATS controls this. If unspecified, the compiled value */ /* of this switch is used. As released (v 3.0, WJS), the compiled */ /* value is TRUE. */ /* The statistics computed and their definitions are presented to */ /* the program user as "JGOFS object comments". To see them now, look */ /* at the definition of the C variable "comments" in program text. */ /***********************/ #include "utils.h" #include "jdbfuncdefns.h" #include INNEROPTIONS int *level_splits(); /* from utils */ Logical get_logical_from_string(); /* from utils */ /* Process compile-time defaults here. Can be overridden @ runtime */ /* from either environment or parameter list */ #ifndef ROWS_ARE_STATS #define ROWS_ARE_STATS TRUE #endif #if ! ((ROWS_ARE_STATS == TRUE) || (ROWS_ARE_STATS == FALSE)) #error "Illegal ROWS_ARE_STATS" #endif #define ENV_VAR_ROWS_ARE_STATS "ROWS_ARE_STATS" Logical rows_are_stats = ROWS_ARE_STATS; #ifndef CHECK_VARS #define CHECK_VARS TRUE #endif #if ! ((CHECK_VARS == TRUE) || (CHECK_VARS == FALSE)) #error "Illegal CHECK_VARS" #endif #define ENV_VAR_CHECK_VARS "VARS_MUST_BE_IN_OBJECT" Logical check_vars = CHECK_VARS; #ifndef DO_DISTINCT #define DO_DISTINCT TRUE #endif #if ! ((DO_DISTINCT == TRUE) || (DO_DISTINCT == FALSE)) #error "Illegal DO_DISTINCT" #endif #define ENV_VAR_DO_DISTINCT "DISTINCT" Logical do_distinct=DO_DISTINCT; /* Try to get value of compilation switch into executable so we can */ /* do "strings" and find out which option was compiled in */ #if DO_DISTINCT char *distinct = "computing distinct counts enabled at compile time"; #else char *distinct = "computing distinct counts disabled at compile time"; #endif /****/ #if DO_DISTINCT char *comments = "\ Statistics returned include minima, maxima, counts, distinct counts,\n\ sums, and sums of squares\n\ The numeric suffixes indicate how irregular data are treated\n\ Irregular data are of 2 types. The first type is data explicitly marked\n\ as 'missing' in the input object (input object data whose values are the\n\ 2-character string 'nd'). The second type is data whose characters do not\n\ decode into legal values. This can be because of error, or because the data\n\ do not match the type expected (eg, a station 'J1' when station is numeric)\n\ 0 All data are included in the statistic. Except for counts,\n\ any irregular datum causes the corresponding statistic to\n\ be given the value 'nd'\n\ 1 Data explictly marked as irregular are skipped. Except for\n\ counts, non-decodable values cause the corresponding\n\ statistic to be given the value 'nd'. 'nd' is also assigned\n\ if all data are irregular\n\ 2 All irregular data are skipped. 'nd' is assigned to non-\n\ count statistics if all data are irregular\n\ As far as distinct counts are concerned, 1 is added to the appropriate\n\ counts if any explicitly irregular data are found (and explicitly irregular\n\ data are included in the count), and another 1 is added if any non-decodable\n\ data are found (and non-decodable data are included in the count).\n\ "; #define COUNT2 0 #define DISTINCT2 1 #define MIN2 2 #define MAX2 3 #define SUM2 4 #define SUM_SQUARES2 5 #define COUNT1 6 #define DISTINCT1 7 #define MIN1 8 #define MAX1 9 #define SUM1 10 #define SUM_SQUARES1 11 #define COUNT0 12 #define DISTINCT0 13 #define MIN0 14 #define MAX0 15 #define SUM0 16 #define SUM_SQUARES0 17 #define NSTATS 18 /* Maintain = length of list above */ #else char *comments = "\ Statistics returned include minima, maxima, counts, sums, and sums of squares\n\ The numeric suffixes indicate how irregular data are treated\n\ Irregular data are of 2 types. The first type is data explicitly marked\n\ as 'missing' in the input object (input object data whose values are the\n\ 2-character string 'nd'). The second type is data whose characters do not\n\ decode into legal values. This can be because of error, or because the data\n\ do not match the type expected (eg, a station 'J1' when station is numeric)\n\ 0 All data are included in the statistic. Except for counts,\n\ any irregular datum causes the corresponding statistic to\n\ be given the value 'nd'\n\ 1 Data explictly marked as irregular are skipped. Except for\n\ counts, non-decodable values cause the corresponding\n\ statistic to be given the value 'nd'. 'nd' is also assigned\n\ if all data are irregular\n\ 2 All irregular data are skipped. 'nd' is assigned to non-\n\ count statistics if all data are irregular\n\ "; #define COUNT2 0 #define MIN2 1 #define MAX2 2 #define SUM2 3 #define SUM_SQUARES2 4 #define COUNT1 5 #define MIN1 6 #define MAX1 7 #define SUM1 8 #define SUM_SQUARES1 9 #define COUNT0 10 #define MIN0 11 #define MAX0 12 #define SUM0 13 #define SUM_SQUARES0 14 #define NSTATS 15 /* Maintain = length of list above */ #endif /****/ int vnamesize=VARNAMESIZE+1; int valuesize=DATUMSIZE+1+1; /* "Extra" +1 for ALPH_SEP */ void error_(); /* ... entry in outer */ Logical add_id_to_err(); /* in utils.c */ void errn(); /* in utils.c */ int handle = -1; /* Not sure why this needs = -1 */ /* Things related to input object */ int nvars,maxlev; char names[NVAR * (VARNAMESIZE+1)]; char *values; /* values[nvars][DATUMSIZE+1] */ int *firstvarlevel; /* firstvarlevel[maxlev] */ signed char *comparison_type; /* comparison_type[nvars] */ # #if DO_DISTINCT struct value_list { /* Use 1 of data_list or adata_list based on comparison_type */ /* Could have union-ed, but seems more trouble than worth */ double *data_list; /* data_list[n_distinct_vals] */ char *adata_list; /* ptr to string holding all */ /* distinct alpha vals concatenated */ int list_next; int list_size; } *val_list; /* val_list[nvars] */ #ifndef NUM_DATALIST_PER_MALLOC #define NUM_DATALIST_PER_MALLOC 200 #endif #ifndef AV_SIZE_ADATUM #define AV_SIZE_ADATUM 15 #endif #ifndef NUM_ADATALIST_PER_MALLOC #define NUM_ADATALIST_PER_MALLOC NUM_DATALIST_PER_MALLOC*AV_SIZE_ADATUM #endif #endif /* Things related to stats */ Logical valid_data[NVAR][NSTATS]; double stats[NVAR][NSTATS]; char *astats[NVAR][NSTATS]; /* Only need # alph stats and a subset */ /* of NSTATS, but making it NSTATS easier */ #define STATNAMESIZE 20 int snamesize = STATNAMESIZE+1; char stat_names[NSTATS * (STATNAMESIZE+1)]; int stat_widths[NSTATS]; int stat_precisions[NSTATS]; #define VARNAME_FOR_STAT_ID "stat_name" #define VARNAME_FOR_VARIABLE_ID "variable" int stat_being_worked_on = -1; int var_being_worked_on = -1; int *out_widths,*out_attr_widths; /* Stuff relating to parsing comparison selection list */ /* Use some ndefs so user can override at compile time */ #define COMP_TYPE_UNINIT -1 #define COMP_TYPE_NUMERIC 0 #define COMP_TYPE_ALPHA 1 #define COMP_TYPE_STD_STA_AS 2 #ifndef DEFAULT_COMP_TYPE #define DEFAULT_COMP_TYPE COMP_TYPE_NUMERIC #endif #if ! ( (DEFAULT_COMP_TYPE == COMP_TYPE_NUMERIC) || \ (DEFAULT_COMP_TYPE == COMP_TYPE_ALPHA) || \ (DEFAULT_COMP_TYPE == COMP_TYPE_STD_STA_AS) ) #error "Illegal DEFAULT_COMP_TYPE" #endif #define COMP_TYPE_NUMERIC_KEYWORD "numeric" #define COMP_TYPE_ALPHA_KEYWORD "alpha" #define COMP_TYPE_STD_STA_AS_KEYWORD "std_sta_AS" #define COMP_QUANTIFIER_1 "all" #define COMP_QUANTIFIER_2 "rest" #define KEYWORD_LIST_SEPARATOR '=' #ifndef LIST_SEPARATORS #define LIST_SEPARATORS ";/" #endif #ifndef VARNAME_SEPARATORS #define VARNAME_SEPARATORS ",+-^*" #endif #ifndef ALPH_SEP #define ALPH_SEP '\1' /* A non-zero, non-alpha character */ #endif /* From max to min, order is N1 -> N11, M1, S15 -> S1, A */ /* (from Chris 11 Apr 2000). */ #define NSTD_STAS_AS 28 #define MAXLEN_STD_STA_AS 3 /* Could runtime strlen() the vals... */ char *std_stas_AS[NSTD_STAS_AS] = { "A", "S1", "S2", "S3", "S4", "S5", "S6", "S7", "S8", "S9", "S10","S11", "S12","S13","S14","S15", "M1", "N11","N10","N9", "N8", "N7", "N6", "N5", "N4", "N3", "N2", "N1" }; /************************************************************************/ char *stats_return_vers() /* Dummy routine. Exists only to force .h file version string into */ /* this module. Note string must not be global or we'll have con- */ /* flicts if another routine similarly includes the version string */ { static char version[] = STAT_VERSION"/"FULL_UTILSH_VERSION"/"FULL_JDBFUNCDEFNSH_VERSION; return version; } void err(s,t) char *s,*t; { char *ss,*tt; add_id_to_err(&ss,&tt,s,t,STAT_VERSION); error_(ss,tt); return; /* Not that it should ever get here... */ } void *getmem(bufname,nbytes) char *bufname; size_t nbytes; { #define SIZERRBUF 150 static char errbuf[SIZERRBUF]; /* Deliberately static and fixed size */ /* since if we need it we're having */ /* trouble allocating memory */ static char *piece1 = "Could not get "; static char *piece2 = " bytes memory for '"; static char *piece3 = "' buffer/structure(s)"; int errmsglen; double *ptr; /* Chosen for "most restrictive alignment" purposes */ /* Presumably, this is system-dependent, but I don't */ /* know where the appropriate defn is */ ptr = (double *) malloc(nbytes); if (ptr == NULL) { /* Length is that of 3 pieces of fixed text (above) + width of */ /* args passed here + 1 for '\0' */ errmsglen = strlen(piece1) + strlen(piece2) + strlen(piece3); errmsglen += log10((double)nbytes) + 1; errmsglen += strlen(bufname) + 1; if (errmsglen <= SIZERRBUF) { sprintf(errbuf,"Could not get %d bytes memory for '%s' buffer\n", nbytes, bufname ); err(errbuf,""); } else /* Calling err not as insane as it might seem. Might be */ /* possible to get memory for error message even if allocation */ /* for buffer failed */ err("Could not get memory for buffer/structure ",bufname); } return (void *)ptr; #undef SIZERRBUF } void init_stuff() { int i,j; char *ptr; /* Do some sanity checks on compile-time assumptions, vals, etc */ if (isalnum(ALPH_SEP)) err ("Internal error - ALPH_SEP must not be alphanumeric",""); if (strpbrk(ILLEGAL_VARNAME_CHARS,LIST_SEPARATORS) != NULL) err ( "Internal error - ILLEGAL_VARNAME_CHARS & LIST_SEPARATORS must be disjoint", ""); if (strpbrk(ILLEGAL_VARNAME_CHARS,VARNAME_SEPARATORS) != NULL) err ( "Internal error - ILLEGAL_VARNAME_CHARS & VARNAME_SEPARATORS must be disjoint", ""); if (strpbrk(VARNAME_SEPARATORS,LIST_SEPARATORS) != NULL) err ( "Internal error - VARNAME_SEPARATORS & LIST_SEPARATORS must be disjoint", ""); if ( (strchr(ILLEGAL_VARNAME_CHARS,KEYWORD_LIST_SEPARATOR) != NULL) || (strchr(LIST_SEPARATORS,KEYWORD_LIST_SEPARATOR) != NULL) || (strchr(VARNAME_SEPARATORS,KEYWORD_LIST_SEPARATOR) != NULL) ) err ("Internal error - KEYWORD_LIST_SEPARATOR may not appear as special\ character on other lists",""); if ( (ptr = getenv(ENV_VAR_ROWS_ARE_STATS)) != NULL ) rows_are_stats = get_logical_from_string (ptr,"getting env var ",ENV_VAR_ROWS_ARE_STATS); if ( (ptr = getenv(ENV_VAR_CHECK_VARS)) != NULL ) check_vars = get_logical_from_string (ptr,"getting env var ",ENV_VAR_CHECK_VARS); if ( (ptr = getenv(ENV_VAR_DO_DISTINCT)) != NULL ) do_distinct = get_logical_from_string (ptr,"getting env var ",ENV_VAR_DO_DISTINCT); /* Do next stuff at run time instead of compile time to make it */ /* easier to alter order of statistics */ /* Init the values that the VARNAME_FOR_STAT_ID defn will take */ /* I give up on checking if these constants will fit in their bufs */ /* STATNAMESIZE is what they all should be no bigger than */ strcpy ( stat_names + COUNT0*snamesize, "count0" ); strcpy ( stat_names + COUNT1*snamesize, "count1" ); strcpy ( stat_names + COUNT2*snamesize, "count2" ); strcpy ( stat_names + MIN0*snamesize, "min0" ); strcpy ( stat_names + MIN1*snamesize, "min1" ); strcpy ( stat_names + MIN2*snamesize, "min2" ); strcpy ( stat_names + MAX0*snamesize, "max0" ); strcpy ( stat_names + MAX1*snamesize, "max1" ); strcpy ( stat_names + MAX2*snamesize, "max2" ); strcpy ( stat_names + SUM0*snamesize, "sum0" ); strcpy ( stat_names + SUM1*snamesize, "sum1" ); strcpy ( stat_names + SUM2*snamesize, "sum2" ); strcpy ( stat_names + SUM_SQUARES0*snamesize, "sum_squares0" ); strcpy ( stat_names + SUM_SQUARES1*snamesize, "sum_squares1" ); strcpy ( stat_names + SUM_SQUARES2*snamesize, "sum_squares2" ); #if DO_DISTINCT strcpy ( stat_names + DISTINCT0*snamesize, "distinct0" ); strcpy ( stat_names + DISTINCT1*snamesize, "distinct1" ); strcpy ( stat_names + DISTINCT2*snamesize, "distinct2" ); #endif return; } int get_std_sta_AS(sta) char *sta; /* Return index into std_stas_AS list. If not in list, return -1 */ { int i; for (i = 0; i < NSTD_STAS_AS; i++) if (strcmp(sta,std_stas_AS[i]) == 0) break; return (i == NSTD_STAS_AS) ? -1 : i; } #if DO_DISTINCT Logical new_distinct_val(val,list_struct) double val; struct value_list *list_struct; /* See if val is in list found within list_struct. If so, return */ /* FALSE. If not, add val to that list, then return TRUE */ { int i; double *dptr; /* Just for convenience */ dptr = list_struct->data_list; for (i=0; i < list_struct->list_next; i++) if (val == dptr[i]) return FALSE; if (list_struct->list_next == list_struct->list_size) { list_struct->list_size += NUM_DATALIST_PER_MALLOC; dptr = (double *) realloc (dptr , list_struct->list_size * sizeof(double)); if (dptr == NULL) err("Could not get memory for '.data_list' buffer",""); list_struct->data_list = dptr; } dptr[(list_struct->list_next)++] = val; return TRUE; } Logical alph_new_distinct_val(val,list_struct) char *val; struct value_list *list_struct; /* alpha version of new_distinct_val */ { int len; len = strlen(val); /* Use ALPH_SEP so whole string can be searched w/1 strstr call. */ /* Hypothesis is that this more efficient than loop of many strcmp's */ /* strstr tests more chars; loop does more function calls. strstr */ /* also requires single string to contain all alph data (65767 char */ /* limit on VMS strings?). Recode if necessary... */ val[len++] = ALPH_SEP; val[len] = '\0'; if (strstr(list_struct->adata_list,val) == NULL) { /* Not on list, so new unique value. */ if ( (list_struct->list_next) + len > list_struct->list_size ) { /* Need more memory */ list_struct->list_size += NUM_ADATALIST_PER_MALLOC; list_struct->adata_list = (char *) realloc ( list_struct->adata_list, list_struct->list_size); if (list_struct->adata_list == NULL) err("Could not get memory for '.adata_list' buffer",""); } /* Add new val to list */ strcpy ( (list_struct->adata_list) + (list_struct->list_next), val ); list_struct->list_next += len; val[--len] = '\0'; return TRUE; } else { /* On list, so not a unique value. */ val[--len] = '\0'; return FALSE; } } #endif void set_widths (nrows,ncols, row_headers,row_header_maxsize, data_col_headers,data_col_header_maxsize, row_headers_col_header, data_col_widths_array,data_col_widths_array_size) /* Set up out_widths array (and a copy, out_attr_widths) */ /* 2 parts. 1st, set up the width of the first column, which */ /* is the max of the name of that column and all the values in */ /* that column. 2nd, the rest of the widths are either constant */ /* or a copy of an input array, maximized w/the name of the column */ int nrows; /* Not counting title row */ int ncols; /* Not counting id column */ char *row_headers,*data_col_headers,*row_headers_col_header; int row_header_maxsize,data_col_header_maxsize; int data_col_widths_array[]; int data_col_widths_array_size; { int i,j,k; int ptr; if ( ! ( (data_col_widths_array_size == 1) || (data_col_widths_array_size == ncols) ) ) errn("Internal error: bad data_col_widths_array_size to set_widths. Size=", data_col_widths_array_size); out_widths = (int *) getmem ( "out_widths", (size_t)((ncols + 1) * sizeof(int)) ); out_attr_widths = (int *) getmem ( "out_attr_widths", (size_t)((ncols + 1) * sizeof(int)) ); /* 1st col - take biggest row header unless col header for the row */ /* headers is bigger */ k = strlen(row_headers_col_header); for (i = 0; i < nrows; i++) { j = strlen(row_headers + i*row_header_maxsize); if (j > k) k = j; } out_attr_widths[0] = out_widths[0] = k; /* Other cols - take max of data width w/ col header width */ for (i = 1; i <= ncols; i++) { j = strlen(data_col_headers + i*data_col_header_maxsize); k = (data_col_widths_array_size == 1) ? data_col_widths_array[0] : data_col_widths_array[i-1]; out_attr_widths[i] = out_widths[i] = (j > k) ? j : k; } return; } void set_stat_widths() /* Set the output width of each statistic. Used to set the JGOFS */ /* object width= attribute; not so simple because that object can */ /* be presented w/stats as rows or columns... */ { int i,j,k,max; int non_precision_width_chars; int numeric_single_width,numeric_single_digits_of_precision; /* Want enough precision in output so that if people use sums of */ /* squares, etc, stats will be valid. Decided to use single float */ /* precision for max & min; double for sums and sums_of_squares */ /* Counts get single kind of by default. Constants come from */ /* float.h */ /* Temporarily (?) increase precision so that 8 digit event */ /* numbers don't print in E format. Using 9 because I'm confused */ /* about FLT_DIG vs printf precision and I don't want to edit */ /* this twice. Check vs DBL_DIG is sanity... assume DBL_DIG > */ /* FLT_DIG!!! */ numeric_single_digits_of_precision = (9 > FLT_DIG) ? 9 : FLT_DIG; if (numeric_single_digits_of_precision >= DBL_DIG) numeric_single_digits_of_precision = DBL_DIG - 1; /* "+N.", "E+", max width of exponent */ non_precision_width_chars = 3 + 2 + (1 + log10(log10(DBL_MAX))); numeric_single_width = numeric_single_digits_of_precision + non_precision_width_chars; for (i=0; i= 0 ) { for (i = firstvarlevel[lev]; i < nvars; i++) { ptr = values + i*valuesize; explicit_invalid = (strcmp(MISSING_VALUE_STRING,ptr) == 0); if (explicit_invalid) /* non_decodable not used if explicit_invalid, but be complete */ non_decodable = TRUE; else { switch (comparison_type[i]) { case COMP_TYPE_NUMERIC: GET_NUMBER_FROM_STRING(df,ptr,decodable) non_decodable = ! decodable; break; case COMP_TYPE_ALPHA: non_decodable = FALSE; break; case COMP_TYPE_STD_STA_AS: df = (double) get_std_sta_AS(ptr); non_decodable = (df < 0); break; default: errn("Internal error: no code to handle comparison type", comparison_type[i]); break; } } stats[i][COUNT0]++; if (explicit_invalid) { #if DO_DISTINCT /* Use valid_data flag for DISTINCT0 to indicate */ /* "at least one 'nd' occurred" */ if (do_distinct) valid_data[i][DISTINCT0] = FALSE; #endif valid_data[i][MIN0] = FALSE; valid_data[i][MAX0] = FALSE; valid_data[i][SUM0] = FALSE; valid_data[i][SUM_SQUARES0] = FALSE; } else { stats[i][COUNT1]++; if (non_decodable) { #if DO_DISTINCT /* Use valid_data flag for DISTINCT1 to indicate */ /* "at least one non-decodable occurred" */ if (do_distinct) valid_data[i][DISTINCT0] = FALSE; #endif valid_data[i][MIN0] = FALSE; valid_data[i][MAX0] = FALSE; valid_data[i][SUM0] = FALSE; valid_data[i][SUM_SQUARES0] = FALSE; valid_data[i][MIN1] = FALSE; valid_data[i][MAX1] = FALSE; valid_data[i][SUM1] = FALSE; valid_data[i][SUM_SQUARES1] = FALSE; } else { stats[i][COUNT2]++; #if DO_DISTINCT if (do_distinct) { switch (comparison_type[i]) { case COMP_TYPE_NUMERIC: if (new_distinct_val(df,&val_list[i])) stats[i][DISTINCT2]++; break; case COMP_TYPE_ALPHA: case COMP_TYPE_STD_STA_AS: if (alph_new_distinct_val(ptr,&val_list[i])) stats[i][DISTINCT2]++; break; default: errn("Internal error: no code to handle comparison type ", comparison_type[i]) ; break; } } #endif switch (comparison_type[i]) { case COMP_TYPE_STD_STA_AS: case COMP_TYPE_NUMERIC: if (valid_data[i][MIN0]) if (stats[i][MIN0] > df) stats[i][MIN0] = df; if (valid_data[i][MIN1]) if (stats[i][MIN1] > df) stats[i][MIN1] = df; if (valid_data[i][MIN2]) if (stats[i][MIN2] > df) stats[i][MIN2] = df; if (valid_data[i][MAX0]) if (stats[i][MAX0] < df) stats[i][MAX0] = df; if (valid_data[i][MAX1]) if (stats[i][MAX1] < df) stats[i][MAX1] = df; if (valid_data[i][MAX2]) if (stats[i][MAX2] < df) stats[i][MAX2] = df; if (valid_data[i][SUM0]) stats[i][SUM0] += df; if (valid_data[i][SUM1]) stats[i][SUM1] += df; if (valid_data[i][SUM2]) stats[i][SUM2] += df; df *= df; if (valid_data[i][SUM_SQUARES0]) stats[i][SUM_SQUARES0] += df; if (valid_data[i][SUM_SQUARES1]) stats[i][SUM_SQUARES1] += df; if (valid_data[i][SUM_SQUARES2]) stats[i][SUM_SQUARES2] += df; break; case COMP_TYPE_ALPHA: if (valid_data[i][MIN0]) if (strcmp(astats[i][MIN0],ptr) >0) strcpy(astats[i][MIN0],ptr); if (valid_data[i][MIN1]) if (strcmp(astats[i][MIN1],ptr) >0) strcpy(astats[i][MIN1],ptr); if (valid_data[i][MIN2]) if (strcmp(astats[i][MIN2],ptr) >0) strcpy(astats[i][MIN2],ptr); if (valid_data[i][MAX0]) if (strcmp(astats[i][MAX0],ptr) <0) strcpy(astats[i][MAX0],ptr); if (valid_data[i][MAX1]) if (strcmp(astats[i][MAX1],ptr) <0) strcpy(astats[i][MAX1],ptr); if (valid_data[i][MAX2]) if (strcmp(astats[i][MAX2],ptr) <0) strcpy(astats[i][MAX2],ptr); break; default: errn("Internal error: no code to handle comparison type", comparison_type[i]); break; } } } } } #if DO_DISTINCT if (do_distinct) { /* Incorporate irregular data into DISTINCT vals */ /* There are at least as many distinct vals as "real" distinct */ /* vals. If we're disregarding nd's but we found non-decodables */ /* add 1 to that count. If we're counting both nd's AND non- */ /* decodables, add 1 for each that we found */ for (i = 0; i < nvars; i++) { stats[i][DISTINCT0] = stats[i][DISTINCT2]; stats[i][DISTINCT1] = stats[i][DISTINCT2]; if ( ! valid_data[i][DISTINCT0]) stats[i][DISTINCT0]++; if ( ! valid_data[i][DISTINCT1]) { stats[i][DISTINCT0]++; stats[i][DISTINCT1]++; } /* Restore valid_data flags to their real purpose. */ valid_data[i][DISTINCT0] = TRUE; valid_data[i][DISTINCT1] = TRUE; } } #endif for (i = 0; i < nvars; i++) /* COUNT2=0 means nothing decodable was ever found */ if (stats[i][COUNT2] == 0) for (j = 0; j < NSTATS; j++) { is_a_count = ((j == COUNT0) || (j == COUNT1) || (j == COUNT2)); #if DO_DISTINCT if (do_distinct) { is_a_distinct_count = ((j == DISTINCT0) || (j == DISTINCT1) || (j == DISTINCT2)); is_a_count = (is_a_count || is_a_distinct_count); } #endif if ( ! is_a_count ) valid_data[i][j] = FALSE; } /* Set widths for display */ set_stat_widths(); if (rows_are_stats) { j = stat_widths[0]; for (i=1; i