#define DECIMATOR_VERSION "decimator version 1.0 5 Feb 2007" /* 5 Feb 07. WJS Try to rework 1st point returned when in average mode 31 Jan 07. WJS HIGHER_PRECISION 27 Jan 07. WJS AVERAGE 10 Jan 07. WJS KEY_VARIABLE, INITIAL_SKIP, INTEGRAL 16 Nov 06. wjs Accept decimation factor from command line and env var Be sure PATH_INFO .html, .flat, etc doesn't throw us off Check input parameters a bit more [needs utils 2.2] [needs outer_utils 1.0a] [needs path_info_routines 1.3] [begin v 1.0] 16 Nov 06. wjs Change iocommout_ to use nxttok Simplify width handling Add err entry 15 Nov 06. wjs Bug fix: Single level objects did not return 1st rec Get rid of some unused code. Use some utils routines. Dynamically allocate comments buffer. */ #include INNEROPTIONS #include "path_info_routines.h" /* utils.h also declares jdb function defns */ #include "utils.h" /* outer functions */ void error_(); /* utils functions */ Logical add_id_to_err(); char *buildstring(); void errn(); Logical extract_wjstbl(); void free_lengthened_str(); int get_integer_attribute(); Logical get_logical_from_string(); int *level_splits(); int keyword_pairs_to_wjstbl(); char *lengthen_str(); char *nxttok(); char *set_option_string(); char *strip_space_n(); /* outer_utils functions */ char *un_trigram(); /* path_info_routines functions */ void new_and_old_path_infos(); int fldwidths[NVAR]; int name_array_size=VARNAMESIZE+1; char names[NVAR][VARNAMESIZE+1]; int value_array_size=DATUMSIZE+1; char values[NVAR][DATUMSIZE+1]; char save_values[NVAR][DATUMSIZE+1]; int handle; int max_object_level; int nvars_read; int initial_skip; Logical integral,average; char *key_variable_name; int key_variable; Logical ok_init_decimation; int n_bottom_level_records_qualified,n_bottom_level_records,records_cutoff; double key_variable_last_val,key_variable_val,key_variable_cutoff; int *level_split; double *sum; int *count; Logical eof_pending_for_averages; Logical reset_values_from_save_values = FALSE; int decimation_factor; double f_decimation_factor; #define DEFAULT_DECIMATION_FACTOR 100 #ifndef DECIMATION_FACTOR #define DECIMATION_FACTOR DEFAULT_DECIMATION_FACTOR #endif Logical higher_precision; int max_precision, max_precision_fldwidth; char *comments,*comments_ptr; /************************************************************************/ char *decimator_return_vers() /* Routine exists mostly to force .h file version string into this */ /* module, but we could call it if we want. Note string must not be */ /* global or we'll have conflicts if another routine similarly */ /* includes the version string */ { static char version[] = \ DECIMATOR_VERSION"/"FULL_PATH_INFO_ROUTINESH_VERSION"/"FULL_UTILSH_VERSION; return version; } void err(s,t) char *s,*t; { char *ss,*tt; add_id_to_err(&ss,&tt,s,t,DECIMATOR_VERSION); error_(ss,tt); return; /* Not that it should ever get here... */ } void average_initialize() { int i; for (i = level_split[max_object_level]; i < nvars_read; i++) { sum[i] = 0.; count[i] = 0; } return; } void average_accumulate(values) char *values; { int i, n; double f; char *ptr,*sp; for (i = level_split[max_object_level]; i < nvars_read; i++) { ptr = values + i * value_array_size; ptr = strip_space_n(ptr,&n); if (ptr != NULL) { /* Rely on "nd" failing the strtod test. Treat missing */ /* and invalid numeric data the same */ f = strtod(ptr,&sp); if (*sp == *(ptr+n)) { count[i]++; sum[i] += f; } } } return; } void replace_vals_w_averages() { int i,j; /* level_split[max_object_level] is index of 1st var on bottom lev */ for (i = level_split[max_object_level]; i < nvars_read; i++) { /* save values first */ strcpy(save_values[i],values[i]); if (count[i] == 0) { strcpy(values[i],MISSING_VALUE_STRING); } else { /* Note that fldwidths has been modified if necessary to */ /* reflect the values of the AVERAGE and HIGHER_PRECISION */ /* input parameters. See ioopen_ for a discussion of */ /* max_precision_fld_width. -1 is to try to guarantee a */ /* space between fields (not sure if other JGOFS software */ /* puts one in when printing consecutive fields). */ /* Further note that sprintf uses j as a precision indicator */ /* not a width indicator */ j = (fldwidths[i] - 1 < max_precision) ? fldwidths[i] - 1 : max_precision; sprintf(values[i], "%.*g", j , sum[i]/count[i]); } } /* Set flag so that averages are NOT used in place of the values */ /* we overwrote above */ reset_values_from_save_values = TRUE; return; } Logical init_decimation() { double key_variable_val; char *ptr; int i; n_bottom_level_records_qualified = n_bottom_level_records = 0; if (key_variable >= 0) { if ( strcmp(values[key_variable],MISSING_VALUE_STRING) == 0 ) { return FALSE; } else { key_variable_val = strtod(values[key_variable],&ptr); if (*ptr != '\0') { ptr = buildstring("Non-numeric value for key variable ", names[key_variable], " Value = ", "non-numeric err msg init_decimation"); err(ptr,values[key_variable]); } /* set up so that this record itself passes the return_record */ /* comparison with _last_val */ if (decimation_factor > 0.) key_variable_last_val = key_variable_val - 1.; else key_variable_last_val = key_variable_val + 1.; } } else { /* +1 in next line reflects fact that by default we print rec 1 */ records_cutoff = decimation_factor+1; /* If you want recs 10, 20, etc instead of 11, 21, etc... */ if (integral) records_cutoff = decimation_factor; } return TRUE; } Logical return_record() /* Returns TRUE if decimator should select this record; FALSE if */ /* decimator should skip this record */ { double key_variable_val; char *ptr; if ( ! ok_init_decimation) ok_init_decimation=init_decimation(); if ( ! ok_init_decimation) return FALSE; if (key_variable >= 0) { if ( strcmp(values[key_variable],MISSING_VALUE_STRING) == 0 ) { return FALSE; } else { n_bottom_level_records++; key_variable_val = strtod(values[key_variable],&ptr); if (*ptr != '\0') { ptr = buildstring("Non-numeric value for key variable ", names[key_variable], " Value = ", "non-numeric err msg ioreadrec_"); err(ptr,values[key_variable]); } if (f_decimation_factor > 0) { if (key_variable_val <= key_variable_last_val) { ptr = buildstring("Non-increasing value for key variable ", names[key_variable], " Value = ", "non-increasing err msg ioreadrec_"); err(ptr,values[key_variable]); } /* Record 1 always qualifies for return (barring initial skip) */ if (n_bottom_level_records != 1) if (key_variable_val < key_variable_cutoff) return FALSE; } else { /* Same if block as above except sense of inequalities */ /* reversed for decreasing data (signified by a negative */ /* decimation factor) */ if (key_variable_val >= key_variable_last_val) { ptr = buildstring("Non-decreasing value for key variable ", names[key_variable], " Value = ", "non-decreasing err msg ioreadrec_"); err(ptr,values[key_variable]); } /* Record 1 always qualifies for return (barring initial skip) */ if (n_bottom_level_records != 1) if (key_variable_val > key_variable_cutoff) return FALSE; } key_variable_last_val = key_variable_val; } /* Always add to cutoff because decimation factor is signed */ if (n_bottom_level_records == 1) key_variable_cutoff = key_variable_val + f_decimation_factor; else key_variable_cutoff += f_decimation_factor; } else { n_bottom_level_records++; /* Record 1 always qualifies for return (barring initial skip) */ if (n_bottom_level_records != 1) { if (n_bottom_level_records < records_cutoff) return FALSE; records_cutoff += decimation_factor; } } /* Record 1 always qualifies for return (barring initial skip) */ /* ... except for average (bet you thought record 1 always qualifies) */ if ((n_bottom_level_records == 1) && average) return FALSE; return (++n_bottom_level_records_qualified > initial_skip); } /* Next 2 routines make up the ioreadrec stuff for the decimator */ /* Return of 0 means EOF; return of 1 means "normal" */ /* Note that outer tests ioreadrec's return as a Boolean... */ #define IOREADREC_EOF 0 #define IOREADREC_OK 1 int get_decimated_record(level) /* This is "really" ioreadrec. It returns ioreadrec return values */ int *level; { int minlevelread,i; static int next_unprocessed_level = MAXLEVELS; if (*level > MAXLEVELS) errn ("ioreadrec requested to read level beyond JGOFS system max level "\ "Requested level = ",*level); /* We altered values array to contain averages. Restore it */ /* This may well not be necessary, since we may well be reading */ /* new data into values via jdbreada_ in all cases when we had */ /* previously wiped the old values. However, too lazy to think */ /* this through. (If intending to remove this code, be sure to */ /* test on single-level object, which I think will give most */ /* problems) */ if (reset_values_from_save_values) { for (i = level_split[max_object_level]; i < nvars_read; i++) { strcpy(values[i],save_values[i]); } reset_values_from_save_values = FALSE; } /* Every call to jdbread returns a full set of data; ie, data from */ /* all levels. Accordingly, there is not a 1-1 match of ioreadrec */ /* calls and jdbread calls. If, say, ioreadrec is called to read */ /* level 0, jdbread will be called many times to skip all lower */ /* level data. Conversely, once jdbread returns "level 0 read" */ /* ioreadrec can be called once per object level to process each */ /* level's data. */ /* ioreadrec has no provision for returning an error status */ /* Historically, EOF has been returned in error cases. This */ /* software issues a diagnostic and dies */ if (*level < next_unprocessed_level) { /* EOF code & error codes are negative & therefore < *level */ minlevelread = jdbreada_(&handle,values,&value_array_size); while (*level < minlevelread) minlevelread = jdbreada_(&handle,values,&value_array_size); if (minlevelread == JDB_CONDITION_EOF) return IOREADREC_EOF; if (minlevelread < 0) errn("ioreadrec reports jdbread error # ",minlevelread); /* Test to see if level "went back up". Only occurs when */ /* reading lowest level, when there are no more lowest level */ /* records w/same upper level. Hence, this represents EOD */ /* at lowest level */ if (*level > minlevelread) { next_unprocessed_level = minlevelread; ok_init_decimation = FALSE; return IOREADREC_EOF; } } /* We have a record at the level in which we're interested. */ /* Except for the average and init_decimation stuff above, all */ /* ioreadrec's will arrive here */ /* The decimator is only interested in lowest level stuff */ /* Accordingly, all its code is in the next if block */ if (*level == max_object_level) { if ( average && ! ok_init_decimation ) average_initialize(); while ( ! return_record() ) { if (average) average_accumulate(values); minlevelread = jdbreada_(&handle,values,&value_array_size); if (minlevelread == JDB_CONDITION_EOF) return IOREADREC_EOF; if (minlevelread < 0) errn("ioreadrec reports jdbread error # ",minlevelread); if (max_object_level > minlevelread) { next_unprocessed_level = minlevelread; ok_init_decimation = FALSE; return IOREADREC_EOF; } } /* A "decimation interval" has passed. We need to return a */ /* representative value. We got here upon encountering the */ /* first record of the "next" decimation interval. If we are */ /* in sampling mode, we can just return that record. However, */ /* if we are in average mode, we want to return the average for */ /* the interval we just left. This causes the buffering and */ /* related confusion of the next block */ if (average) { /* At this point, the accepted record is NOT in the */ /* accumulation. Since this is the record above the cutoff, */ /* it is logically in the "next" interval, so report */ /* average of accumulated stuff without it. The exception */ /* to this is the first record. If that is selected (the */ /* default) there will be no previous. Hence the code will */ /* have a wart someplace... (seems to done in the code that */ /* sinks this stuff 2 calls below ioreadrec) */ replace_vals_w_averages(); average_initialize(); average_accumulate(save_values); /* save_values has "this" rec */ } } next_unprocessed_level = *level+1; return IOREADREC_OK; } int ioreadrec_(level) int *level; { int ret_val; if (eof_pending_for_averages) { eof_pending_for_averages = FALSE; return IOREADREC_EOF; } ret_val = get_decimated_record(level); if ((ret_val == IOREADREC_EOF) && average && (*level == max_object_level)) { replace_vals_w_averages(); eof_pending_for_averages = TRUE; ret_val = IOREADREC_OK; } return ret_val; } void ioclose_() { jdbclose_(&handle); return; } void get_inner_options (wjstbl,nargs) char *wjstbl; int *nargs; /* Values set are all globals */ { char *ptr,*sp; int n; key_variable_name = set_option_string ("KEY_VARIABLE",NULL,wjstbl,nargs); ptr = set_option_string("INTEGRAL","FALSE",wjstbl,nargs); integral = get_logical_from_string (ptr,"processing keyword INTEGRAL",NULL); ptr = set_option_string("AVERAGE","FALSE",wjstbl,nargs); average = get_logical_from_string (ptr,"processing keyword AVERAGE",NULL); ptr = set_option_string("HIGHER_PRECISION","FALSE",wjstbl,nargs); higher_precision = get_logical_from_string (ptr,"processing keyword HIGHER_PRECISION",NULL); ptr = set_option_string("INITIAL_SKIP","0",wjstbl,nargs); ptr = strip_space_n(ptr,&n); initial_skip = strtol(ptr,&sp,10); if (*sp != *(ptr+n)) err("Non-numeric character in value of INITIAL_SKIP. Value = ", ptr); if (initial_skip < 0) errn ("initial skip must be non-negative. Value (includes defaults) = ", initial_skip); ptr = set_option_string ("DECIMATION_FACTOR",NULL,wjstbl,nargs); if (ptr == NULL) { if (key_variable_name == NULL) decimation_factor = DECIMATION_FACTOR; else err ("decimation factor must be specified if key variable name ", "specified"); } else { ptr = strip_space_n(ptr,&n); if (key_variable_name == NULL) { decimation_factor = strtol(ptr,&sp,10); if (*sp != *(ptr+n)) err("Non-numeric character in value of DECIMATION_FACTOR. Value = ", ptr); if (decimation_factor < 1) errn ("decimation count must be positive. Value (includes defaults)= ", decimation_factor); } else { f_decimation_factor = strtod(ptr,&sp); if (*sp != *(ptr+n)) err("Non-numeric character in value of DECIMATION_FACTOR. Value = ", ptr); if (f_decimation_factor == 0.) err ("decimation factor must be non-zero.",""); } } } int ioopen_(s,nparams,ntotal) char *s[]; int *nparams; int *ntotal; { char tmp[INBUFSIZE],*sp,*ptr,*per_arg_wjstbl,*wjstbl; int i,n,n_inner_args; /* PATH_INFO we got. Must be static since after we reset it, */ /* it "survives" in process table after ioopen_ exits) */ static char *PATH_INFO_orig_putenv; /* PATH_INFO modified to not have .flat, .html, etc */ char *PATH_INFO_jgof_putenv; /* If user wants more precision than display widths set in object */ /* use either the max double precision width or the buffer size */ /* To be a bit more precise (no pun intended), regardless of */ /* object field widths, we should never print out more than */ /* DBL_DIG digits or we are printing out falsely precise #s. */ /* (There's a different although equally compelling set of reasons */ /* why we should never generate more digits than the buffer will */ /* hold!) */ max_precision = (DBL_DIG < DATUMSIZE) ? DBL_DIG : DATUMSIZE; /* Allow for a sign and a decimal point. Note that this does NOT */ /* allow for E format characters */ max_precision_fldwidth = max_precision + 2; /* Make sure outers "down the line" don't produce flat, html, etc */ new_and_old_path_infos(&PATH_INFO_jgof_putenv,&PATH_INFO_orig_putenv); if (PATH_INFO_jgof_putenv != NULL) if ( (i = putenv(PATH_INFO_jgof_putenv)) != 0 ) err ("putenv failure for PATH_INFO",strerror(i)); if (s[0] == NULL) err("No input object provided",""); if (s[0][0] == '\0') err("No input object provided",""); COPY_INTO_FIXED_LEN_BUFFER(tmp,s[0],"copying object spec"); s[0][0] = '\0'; /* Make wjstbl from optional args to inner. These args are */ /* distinguishable from outer args because they begin with a */ /* character not legal in JGOFS varnames */ /* After processing, make these args empty so outer doesn't */ /* try to process them */ n_inner_args = 0; wjstbl = NULL; for ( i = 1; i < *nparams; i++ ) { if (strchr(ILLEGAL_VARNAME_CHARS,s[i][0]) != NULL) { ptr = un_trigram(s[i],DEFAULT_TRIGRAM_CHAR,NULL); n = keyword_pairs_to_wjstbl (&per_arg_wjstbl, ptr, STANDARD_PARSE_SEPARATORS, "processing input args"); free(ptr); if (n < 0) err ("outer and inner args mixed in parameter ",s[i]); n_inner_args += n; wjstbl = lengthen_str(wjstbl,per_arg_wjstbl,NULL,0,"adding to wjstbl"); free(per_arg_wjstbl); s[i][0] = '\0'; } } get_inner_options(wjstbl,&n_inner_args); free_lengthened_str(wjstbl); if (n_inner_args != 0) errn ("Not all args to inner processed (misspellings?). Num unprocessed=", n_inner_args); nvars_read = -NVAR; max_object_level=jdbopen_(&handle,tmp,names,&name_array_size,&nvars_read); /* Restore original PATH_INFO if we changed it */ if (PATH_INFO_jgof_putenv != NULL) { if ( (i = putenv(PATH_INFO_orig_putenv)) != 0 ) err ("putenv failure restoring PATH_INFO",strerror(i)); free(PATH_INFO_jgof_putenv); } if (max_object_level<0) { ptr = buildstring("Error returned from jdbopen processing object spec ", tmp, "error code = ", "building jdbopen error string"); errn(ptr,max_object_level); } if (average) { level_split = level_splits(handle,max_object_level,nvars_read); if (level_split == NULL) errn ("Could not get memory for level split array. n ints wanted was ", max_object_level+2); /* sum and count don't need to be nvars_read in size. We only */ /* average final level variables. This is easier, though */ sum = (double *) malloc (nvars_read*sizeof(double)); if (sum == NULL) errn ("Could not get memory for sum array. n doubles wanted was ", nvars_read); count = (int *) malloc (nvars_read*sizeof(int)); if (count == NULL) errn ("Could not get memory for count array. n ints wanted was ", nvars_read); eof_pending_for_averages = FALSE; } if (key_variable_name == NULL) key_variable = -1; else { for (key_variable=0; key_variable= level_split[max_object_level]) if (fldwidths[*vn] < max_precision_fldwidth) { fldwidths[*vn] = max_precision_fldwidth; /* No way of avoiding potential string overflow here (or */ /* in the original jdbattributes_ call) */ sprintf(str,"width=%d",max_precision_fldwidth); } } return 1; } void iovaldouble_(vn,f) int *vn; double *f; { int i; char *end_char_ptr; i=*vn; if (i < 0) { *f = MISSING_VALUE_REAL; return; } *f = strtod(values[i],&end_char_ptr); if (*end_char_ptr != '\0') *f= MISSING_VALUE_REAL; return; } void iovalreal_(vn,f) int *vn; float *f; { double df; iovaldouble_(vn,&df); *f = df; return; } void iovalstr_(vn,tmp) int *vn; char *tmp; { char *s; s = values[*vn]; s = s+strspn(s," "); strcpy(tmp,s); return; } void ioname_(vn,s) int *vn; char *s; { strcpy(s,names[*vn]); return; } int iocommout_(str) char *str; { char *tok; tok = nxttok(comments_ptr,"\n",&comments_ptr,NULL,FALSE,FALSE); if (tok == NULL) return 0; strcpy(str,tok); return 1; } int iowidth_(vn) int *vn; { return fldwidths[*vn]; }