/* ****************************************************************** * * * File : rs.c * * * * Purpose : * * * * Version Number : 2.0c * * * * Revision History : * * * * Date Developer * * ---- --------- * * used to establish levels in a flat file * */ #define RS_VERSION "rs version 2.0c 31 Dec 2009" /* * 31 Dec 09 wjs * * The 12 Nov work also fixed a problem due to calling * * lengthen_str_and_free instead of lengthen_str * * 12 Nov 09 wjs * * Retain trigrams when passing selections to input object. * * There might be a significant wildcard in it. * * Grab more memory when building "pass-through" obj spec * * Get rid of unused buildstring reference * * [Begin 2.0c] * * 7 Oct 09 wjs * * Bug fix: Levelization argument parsing was being done * * against the optimized, reduced varlist. Result was re- * * jection of args referencing vars that wouldn't be needed * * Bug fix: Off-by-1 error counting # elements in list (!) * * Bug fix: Off-by-1 error failing to account for trailing NULL * * Reformat some error msgs * * [Begin 2.0b] * * 2 Jul 09 wjs * * Bug fix: when restructuring orig, no-bracket if statements, * * blew a biggie. Guess brackets are a good thing! * * [Begin 2.0a] * * * * 8 Jun 09 wjs * * Remove direct access to parse in favor of varnames_in_sel * * [needs varnames_in_sel 1.0] * * [does NOT need parse.c - no longer exists w/that name] * * [does NOT need parse.h] * * THIS should have been the beginning of 2.0a * * 2.0 version below was released w/JGOFS 2.1 * * 14 Feb 09 wjs * * Reference a couple of utils functions * * Change entry point to parse. * * Note that correct rs behavior requires sufficiently new * * versions of the parse package * * 23 Oct 08 wjs * * ILLEGAL_LEVEL now defined in core.h. Pull from here * * 12 Oct 07 wjs * * Rework selection/projection optimization * * 5 Jul 07 wjs * * Change name of switch that controls optimization * * 14 Jun 07 wjs * * New operator to signify "levelize but do not compress" * * Needed "forever" since otherwise, 2 consecutive records * * with identical, say, temperature values might be com- * * pressed to 1 record. * * Similarly, need to deal with consecutive "nd"s per above * * Rework input arg processing to diagnose and reject multiple * * specs of an input variable * * Honor level part of input arg even if input args are not * * spec'ed in level order * * Some common code moved to libraries - use it from there * * Remove array that was filled in but never used. Checked * * that it hadn't been used in rs 1.5 ("before our time") * * [needs parse 2.1] * * [needs parse.h 2.0] * * [needs parse_utils 1.0] * * [needs path_info_routines 1.3a] * * [Needs utils 2.3] * * [Begin 2.0] * * * * 9 Nov 06 wjs * * Add switch for 1.9 optimization feature (along w/option to * * establish default state of switch) * * Change max # levels to depend on MAXLEVELS (core.h) rather * * than on char - '0' (and a hardcoded 5!) * * Parametrize the sizes of a couple of arrays * * 1 Nov 06 wjs * * Pass outer args to rs' input object * * [Begin 1.9] * * * * 6 Oct 06 wjs * * Parametrize MISSING_VALUE_REAL * * [Begin 1.8b] * * 5 Jul 05 wjs * * Get int get_integer_attribute from library * * [Needs utils 1.9a] * * 30 Jun 05 wjs * * Bug fix: v 1.7 code did not handle situation where object * * had no comments. * * 4 Jun 05 wjs * * Needs jdb function defns * * [Needs jdbfuncdefns.h] * * [Begin 1.8a] * * 9 Jul 04 wjs * * Typo fix * * Add iovaldouble_ entry * * 23 Apr 04 wjs * * Mods to "version-returning function". * * a) change from local to global to ensure it can't be * * "optimized out" * * b) don't want "version" in any function name since such * * names appear when grep'ping for "version" * * 18 Feb 04 wjs * * Error if variable in levelizing list is not in object being * * levelized * * Check some potential buffer overflows * * #include core.h. Allows us to throw out stuff, and also * * gets rid of some compiler warnings * * "Full" error messages * * [Needs utils; path_info_routines] * * [Begin 1.8] * * 25 Jul 03 wjs * * Dynamically allocate comment buffer size * * [Needs path_info_routines 1.1 or later (.c & .h)] * * [Begin 1.7] * * 28 Jan 99 wjs * * Use path_info_routines to control PATH_INFO; change * * PATH_INFO just before needed; change it back when done * * Add version string; reorder comments * * [Needs path_info_routines 1.1 or later (.c & .h)] * * [Begin 1.6] * * June 1997 grf * * changed sub names to in_xxxx, fixes for v 1.5 * * May 1997 Glenn Flierl * * changed error calls to use error_ in outer.c * * May 21, 1996 clh * * iovalreal change to allow alpha vars to begin with digits * * March 1996 clh * * use minbufsize.h to determine size of buffers * * January 1996 clh * * fixed rsw to respect the re-ordering of variables * * July 1995 clh * * add subroutine to return column widths - iowidth_() * ********************************************************************* */ #include INNEROPTIONS #include "jdbfuncdefns.h" #include "path_info_routines.h" double strtod(); /* #define DEBUG */ /* Next 2 are the operators rs works with. LEVELIZE means "put */ /* named variable at the level following the operator". CONDENSE, */ /* the traditional operator, means "LEVELIZE, and, in addition, if */ /* consecutive records at this level are identical, get rid of 2nd" */ /* Be careful w/choice of characters, since they are the keys for */ /* rs to determine if a method argument is for inner or outer. In */ /* particular, watch out for outer's selection operators. See utils */ /* keyword_pairs_to_wjstbl/set_option_string (used by decimator, for */ /* one) for an alternate parsing approach if push comes to shove */ #define CONDENSE ':' #define LEVELIZE '#' /* Character that means "all un-named variables" in levelization */ /* parameters */ #define UNNAMED_VARS '*' /* See SELPROJ_OPTIMIZE doc released in outer's distribution */ #ifndef DEFAULT_SELPROJ_OPTIMIZE #define DEFAULT_SELPROJ_OPTIMIZE TRUE #endif #if DEFAULT_SELPROJ_OPTIMIZE static char *optimize_state = "Compiled with DEFAULT_SELPROJ_OPTIMIZE on"; #else static char *optimize_state = "Compiled with DEFAULT_SELPROJ_OPTIMIZE off"; #endif char *SELPROJ_OPTIMIZE_env_var_name = "SELPROJ_OPTIMIZE"; Logical do_optimize; #ifndef DEFAULT_RS_OPTIMIZE #define DEFAULT_RS_OPTIMIZE TRUE #endif #if DEFAULT_RS_OPTIMIZE static char *rs_optimize_state = "Compiled with DEFAULT_RS_OPTIMIZE on"; #else static char *rs_optimize_state = "Compiled with DEFAULT_RS_OPTIMIZE off"; #endif char *RS_OPTIMIZE_env_var_name = "RS_OPTIMIZE"; Logical rs_optimize; int nlevels,nnames; /* Pre v 1.9, dimension in next line was 6. Taking guess at new size */ int firstvar[MAXLEVELS+1]; char names[NVAR][VARNAMESIZE]; int fldwidths[NVAR]; int namesize=VARNAMESIZE; char values[NVAR][DATUMSIZE]; char comp[NVAR][DATUMSIZE]; int valuesize=DATUMSIZE; int handle; int minlevelread; char *comments; /* levelization info; derived from level:varname args to program */ struct varlevel { int level; char *var; } *ss; int num_ss; int newlev[NVAR]; int newpntr[NVAR]; char comp_type[NVAR]; int maxoutlev; /* removed newlpntr[MAXLEVELS+1]; - see Jun 07 comments */ void error_(); /* in outer */ char *un_trigram(); /* in outer_utils.c(in jgofs.a library) */ /* Functions below in utils.c (in jgofs.a library) */ Logical add_id_to_err(); void errn(); void free_lengthened_str(); int get_integer_attribute(); Logical get_logical_from_string(); Logical is_a_varname(); char *lengthen_str(); char *strdupl(); Logical string_in_list(); char **string_sets_intersection(); char **string_sets_union(); int varnames_in_sel(); /************************************************************************/ char *rs_return_vers() /* Dummy routine. Exists only to force .h file version string into */ /* this module. Note string must not be global or we'll have con- */ /* flicts if another routine similarly includes the version string */ { static char version[] = \ RS_VERSION"/"FULL_PATH_INFO_ROUTINESH_VERSION"/"FULL_JDBFUNCDEFNSH_VERSION; return version; } void err(s,t) char *s,*t; { char *ss,*tt; add_id_to_err(&ss,&tt,s,t,RS_VERSION); error_(ss,tt); return; /* Not that it should ever get here... */ } int iovarlevel_(vn) int *vn; { return newlev[newpntr[*vn]]; } int ioattrout_(vn,str) int *vn; char *str; { int j; j = newpntr[*vn]; if ((jdbattributes_(&handle,&j,str)) == 0) return 0; else { if (strncmp(str,"width=",6) == 0) fldwidths[j] = get_integer_attribute(str); return 1; } } void iovaldouble_(vn,df) int *vn; double *df; /* See iovalreal_ */ { int i; char *end_char_ptr; i=newpntr[*vn]; if(i<0) { *df=MISSING_VALUE_REAL; return; } *df=strtod(values[i],&end_char_ptr); if (*end_char_ptr != '\0') *df= MISSING_VALUE_REAL; return; } void iovalreal_(vn,f) int *vn; float *f; /* "Return real value (f) for variable indexed by vn. -9999" */ /* "for strings" */ { double df; iovaldouble_(*vn,&df); *f = df; return; } void iovalstr_(vn,tmp) int *vn; char *tmp; { char *s; s=values[newpntr[*vn]]; s=s+strspn(s," "); strcpy(tmp,s); } void ioname_(vn,s) int *vn; char *s; { strcpy(s,names[newpntr[*vn]]); } int iocommout_(str) char *str; { char *at; if (comments == NULL) return 0; if (comments[0]){ at=strchr(comments,'\n'); if(at){ *at = '\0'; strcpy(str,comments); strcpy(comments,at+1); } else { strcpy(str,comments); comments[0]='\0'; }; return 1; } else return 0; } int iowidth_(vn) int *vn; { return fldwidths[newpntr[*vn]]; } int ioreadrec_(level) int *level; { int clevel,i; if (*level == minlevelread) { minlevelread = *level+1; return 1; } else if (*level > minlevelread) { return 0; } while (TRUE) { clevel = jdbreada_(&handle,values,&valuesize); /* printf("clevel %d\n",clevel); */ if (clevel < 0) { minlevelread= ILLEGAL_LEVEL; return 0; } for (i=firstvar[clevel]; i cgi? */ /* Note that as of 21c, sels have trigramming in them on arrival */ /* here, rendering arg moot (and casting "empirically deter- */ /* mined" into doubt, as far as "truth" goes). To be fair, */ /* 21c mod was due to wildcard character being same as trigram */ /* character */ objspec = lengthen_str(objspec,sel_list[i],",",objspec_size_guess, "adding selection to objspec"); } i = strlen(objspec); objspec[i-1] = ')'; return objspec; } int ioopen_(s,nparams,ntotal) char *s[]; int *nparams; int *ntotal; { char tmp[INBUFSIZE],*sp,*objspec,*sel_proj_list_start; char *varname_ptr; char default_comp_type; int i,j,k,m,maxclev; int position_in_comments,size_comments; int default_level,html_max_level; Logical default_specified,any_levelization_args; Logical var_at_level[MAXLEVELS+1]; /* u_proj = user projections; specified as args to program */ /* l_proj = implied projections due to level restrictions; eg, */ /* .html0 implies only level 0 vars. Populated from varlevel */ /* struct. Only used for .html output. */ /* sel_list = user selections; specified as args to program */ /* vars_in_sels = list of variables used in selections */ char **u_proj,**l_proj,**sel_list,**vars_in_sels; int num_u_proj,num_l_proj,num_sels,num_vars_in_sels; /* PATH_INFO function and strings. */ int get_level(); char *get_protocol(); int new_and_old_path_infos(); /* "PATH_INFO=" + getenv("PATH_INFO"). Must be static */ /* since it "survives" in process table after ioopen_ exits) */ static char *PATH_INFO_orig_putenv; /* "PATH_INFO=" + getenv("PATH_INFO") w/ protocol = "jgof" */ char *PATH_INFO_jgof_putenv; if ( (sp = getenv(SELPROJ_OPTIMIZE_env_var_name)) == NULL ) { do_optimize = DEFAULT_SELPROJ_OPTIMIZE; } else { do_optimize = get_logical_from_string (sp,"SELPROJ_OPTIMIZE env var",SELPROJ_OPTIMIZE_env_var_name); } /* As of rs 2.0, NOT using rs_optimize. rs_optimize is a form of */ /* sel/proj optimization that may be OK even if full sel/proj */ /* optimization is not. It allows pass-through projection */ /* optimization for LEVELIZE vars while not allowing it for */ /* CONDENSE vars. Not implemented because it requires pre-jdbopen */ /* processing of the levelization parameters, which is not hap- */ /* pening in 2.0. Let's see how many times we need to turn off */ /* full optimization first... Once to workaround 20b issue, */ /* pending its fix (Oct 09) ... */ if ( (sp = getenv(RS_OPTIMIZE_env_var_name)) == NULL ) { rs_optimize = DEFAULT_RS_OPTIMIZE; } else { rs_optimize = get_logical_from_string (sp,"RS_OPTIMIZE env var",RS_OPTIMIZE_env_var_name); } /* If in html mode, save maximum level for optimization purposes */ /* If problems w/PATH_INFO, just bag optimization and let whatever */ /* used to deal w/this problem continue to deal w/it */ html_max_level = ILLEGAL_LEVEL; if (do_optimize) { sp = get_protocol(NULL); if (sp != NULL) { if ( (strcmp(sp,"html") == 0) || (strcmp(sp,"brev") == 0) ) { i = get_level(NULL); if ( (i != LEVEL_NOT_SPECIFIED) && (i != LEVEL_ERROR) ) html_max_level = i; } free (sp); } } /* Must be sure that methods down the line produce JGOFS */ /* protocol (or whatever jdbopen requires), and not html, */ /* flat, or whatever. Do this by replacing proto string */ /* with "jgof" (WJS decision at this point... jgof has no */ /* meaning to system). */ /* Make 2 strings for putenv. 1 sets up existing */ /* PATH_INFO; the other sets up PATH_INFO with "jgof" */ /* protocol */ /* Make sure outers "down the line" don't produce flat, html, etc */ if (new_and_old_path_infos(&PATH_INFO_jgof_putenv,&PATH_INFO_orig_putenv) != PATH_INFO_NEW_AND_OLD_OK) err ("problem processing PATH_INFO env var. Memory issues or bad fmt. env var=", getenv(PATH_INFO_ENV_VAR) ); if (PATH_INFO_jgof_putenv != NULL) if ( (i = putenv(PATH_INFO_jgof_putenv)) != 0 ) err ("putenv failure for PATH_INFO",strerror(i)); /* Process command line args. */ /* "Rules" are that inner copies its args and replaces them w/ */ /* null strings so outer can process the rest */ /* 1st arg to rs is object specifier. Other args to rs are of the */ /* form level:varlist (or level#varlist), and can (hopefully!) be */ /* intermixed w/outer args */ /* In addition to processing the inner args, we process the */ /* outer args, anticipating optimization is in effect. The pro- */ /* cessing consists of making a list of variables known to be */ /* needed. These variables are projections + the variables used */ /* in selections */ if (s[0] == NULL) err ("No input object spec",""); if (s[0][0] == '\0') err ("Empty input object spec",""); objspec = NULL; i = (*nparams) * sizeof(struct varlevel); ss = (struct varlevel *)malloc(i); if (ss == NULL) errn ("Could not get memory for varlevel structs. nbytes=",i); num_ss = 0; any_levelization_args = FALSE; default_level = ILLEGAL_LEVEL; maxoutlev = ILLEGAL_LEVEL; default_comp_type = '\0'; /* Since u_proj is NULL-terminated, need possible # proj +1. */ /* Since argv[0] is obj spec, nparams IS 1 bigger */ i = (*nparams) * sizeof(char *); u_proj = (char **)malloc(i); if (u_proj == NULL) errn ("Could not get memory for u_proj list. nbytes=",i); num_u_proj = 0; sel_list = (char **)malloc(i); if (sel_list == NULL) errn ("Could not get memory for selection list. nbytes=",i); num_sels = 0; num_vars_in_sels = 0; vars_in_sels = NULL; for (i=1; i<*nparams; i++) { if (is_rs_levelization_arg(s[i])) { j = (int) strtol(s[i],&sp,10); if (j >= MAXLEVELS) err ("level too big in levelization parameter ",s[i]); if (j < 0) err("level is negative in levelization parameter ",s[i]); if ((j > maxoutlev) || (maxoutlev == ILLEGAL_LEVEL)) maxoutlev = j; varname_ptr = sp+1; if (*varname_ptr == UNNAMED_VARS) { if (default_level == ILLEGAL_LEVEL) { default_level = j; default_comp_type = *sp; } else { err ("Default (*) set more than once",""); } } else { for (k=0; k= default_level) html_max_level = ILLEGAL_LEVEL; num_l_proj = 0; i = (num_ss + 1) * sizeof (char *); /* +1 for trailing NULL */ l_proj = (char **)malloc(i); if (l_proj == NULL) errn ("Could not get memory for l_proj list. nbytes=",i); if (html_max_level != ILLEGAL_LEVEL) { for (i=0; i COMMENTSIZE) ? i+2 : COMMENTSIZE; comments = (char *)realloc(comments,size_comments); if (comments == NULL) err("Could not allocate space for comments buffer",""); } strncpy (comments + position_in_comments, tmp, i); position_in_comments += i; *(comments + position_in_comments++) = '\n'; *(comments + position_in_comments) = '\0'; } j = 0; firstvar[j] = 0; for (i=0; i j) firstvar[++j] = i; } m = 0; for (k=0; k maxoutlev) maxoutlev = default_level; } i = 0; for (j=0; j