/* ******************************************************************* * * * Copyright (c) L-DGO/MIT/JGOFS * * * * * * File : defgb.c * * * * Purpose : Enhanced def method for globec purposes. * * * * Incompatibilities with def 1.7 (JGOFS 1.5 release) * * 1) Ignore embedded comments anywhere * * 2) Apostrophe is special character (unless changed w/ * * datafieldopts optional file) * * 3) Tab & newline characters must be data separators * * 4) Data lines cannot begin with &x (unless recompiled w/ * * different ERR_PREFIX string) * * 5) Variables in data files don't have to appear at level * * indicated in master variable list - can appear soon- * * er and are copied later when needed * * 6) Variable lists in subfiles need not be "master" lists * * for later subfiles. First line of list is list for * * subfile itself. Then lines are skipped until 1 * * does not end in > * * 7) Subfiles need not supply sub-subfiles even if master * * level indicates that sub-subfiles exist. "nd"s will * * be automatically inserted in such cases. * * 8) Arguments that begin w/a % are subject to decoding be- * * fore further processing (also implies that no arg * * to outer can begin w/%) * * * * Version Number : 4.3 * * * * Revision History : * * * * Date Developer * * ---- --------- */ #define INNER_VERSION "defgb version 4.3 23 Aug 2005" /* 23 Aug 05 4.3 WJS * * Mod to reflect new level_splits calling sequence * * Comments changes, some reflecting new cumulative Julian days * * capability * * [Needs ioreadrec_routines 1.7] * * [Needs ioopen_routines 2.8] * * 5 Jul 05 4.3 WJS * * replace getwidth_from_attr with library routine * * get_integer_attribute * * [Needs utils 1.9a] * * 3 Jun 05 4.3 WJS * * errn needs function declaration * * get level splits from library * * [Needs utils 1.9] * * 30 Mar 05 4.3 WJS * * Untrigram args beginning w/trigram key * * [Needs ioopen_routines 2.7c] * * [Needs ioreadrec_routines 1.6d] * * [Needs outer_utils 1.0] * * [Needs defgb.h 5.0 (which needs path_info_routines.h * * which in turn needs core.h)] * * [Needs path_info_routines.c 1.2] * * [Needs startchild 1.2a] * * [Needs utils 1.8] * * [Begin 4.3] * * * * 30 Aug 04 4.2 WJS * * jdb defns now in .h file. Remove them here * * 20 Jul 04 4.2 WJS * * Move detect-same-sink-destination code to open_sink (from * * configure_output in ioopen_routines). If sink destina- * * tions are new files, can only get valid comparison after * * file has been created * * 21 May 04 4.2 WJS * * Bug fix: get_comment_sources needs source_type of varlist * * and level 0 data file as well as file name * * 28 Apr 04 4.2 WJS * * Bug fix: forgot to steal NO_USERNAME defn from utils.c * * Fool w/4 Feb function yet again. Don't want "version" in * * its name (or any function's) because those names would * * show up when grep'ping for "version" in image. * * 26 Mar 04 4.2 WJS * * Various ioreadrec_* date fixes * * ioopen* enhancement re: spec of common sink destinations * * Make dummy local function mentioned on 4 Feb into global * * function (which should DEFINITELY not get optimized out) * * [Needs ioreadrec_routines 1.6c] * * 24 Feb 04 4.2 WJS * * Bug fix: reopen_object was producing an incorrectly-for- * * matted object+projection string in case of no vars being * * projected (itself an abnormal situation) * * 11 Feb 04 4.2 WJS * * Declare get_comment_sources * * 4 Feb 04 4.2 WJS * * Switch to FULL_DEFGBH_VERSION, appended to defgb version * * in a phony local function that might not get optimized out * * 30 Jan 04 4.2 WJS * * add username to error messages * * errn now in utils. Remove it from here * * Code changes might suggest this should be 4.1c, but there * * is a re-org of defgb.h, among other things, that suggests * * a new "full" . release should happen * * STDxxx_DEVICE->STDxxx_NAME per core.h (+ idea that these * * strings need not be present on a particular unix box) * * [Needs ioreadrec_routines 1.6a] * * [Needs ioopen_routines 2.7c] * * [Needs defgb.h 5.0 (which needs path_info_routines.h 1.4 * * which in turn needs core.h)] * * [Needs path_info_routines.c 1.2] * * [Needs startchild 1.2a] * * [Needs utils 1.8] * * [Begin 4.2] * * [Comments about versions 4.0->4.1b in defgb_revision.doc as of ] * * [ 30 Jul 05 WJS ] * * [Comments about versions 1.1->3.8c in defgb_revision.doc as of ] * * [ 11 Feb 04 WJS ] * * From defw.c vers 1.2; itself from def.c date unknown * * Comments in quotation marks in the body of defgb come from * * outer-url 2.5's descriptions of inner routines. outer-url * * 2.5 was released as the outer of JGOFS release 1.5, May 1997 * ******************************************************************* */ #include "defgb.h" /* outer routine */ void error_(); /* outer_utils.c routine */ char *un_trigram(); /* utils.c routines */ void analy_source(); char *buildstring(); char *copy_into_fixed_len_buffer(); void errn(); int extract_wjstbl(); void free_lengthened_str(); void free_lengthened_str_special(); int get_integer_attribute(); char *lengthen_str(); char *lengthen_str_nl(); char *lengthen_str_and_free(); int *level_splits(); char *lookup_wjstbl(); char *nxttok(); char *rem_delims(); char *strdupl(); /* PATH_INFO routines */ int get_level(); char *get_protocol(); char *make_PATH_INFO_putenv_string(); #if READ_COMMANDS char *startchild(); #endif #if READ_OBJECTS #include "jdbfuncdefns.h" /* core.h #include'd here,too */ int jdbunit = -1; /* Index (> 0) internal to jdb package */ /* Logically a part of each datafile */ /* (analogous to file stream), but since we */ /* never have > 1 open... */ #endif #ifdef VMS int putenv(); /* ? in stdlib.h if ansi and vms version > 7 ? */ pid_t waitpid(); /* ? in wait.h if vms version > 7 ? */ #endif /* Pointers array is not parallel to the rest. pointers[N] contains */ /* the position within the master variables array corresponding to */ /* variable N within the particular permutation of the master */ /* variable list we're working on. This is the reverse of the */ /* original def pointers array */ /* Example: master variable list is a b > c d > e f */ /* Files we are now reading have structure c b a > d > f */ /* pointers array is 2 1 0 3 5 */ int pointers[NVAR]; int inpwidths[NVAR]; /* Input field width for each fixed-format variable */ int inpstarts[NVAR]; /* Input field starts for each fixed-format variable */ int tabwidths[NVAR]; char tabnames[NVAR][MAXVARNAMESIZE+1]; char *tabattributes[NVAR]; char tabvalues[NVAR][DATUMSIZE+1]; /* Spare set of buffers for object input. Idea is to get its values */ /* into these buffers, then process them into "our" buffers. When */ /* possible, we go straight to "our" buffers, but convenient to have */ /* these around... Sizes are too large since apparently jdb routines */ /* truncate w/o error when getting data off net. We want to know if */ /* there's an error. */ #define MAXOBJVARNAMESIZE MAXVARNAMESIZE+1 #define MAXOBJTOKEN TOKEN+1 char objnames[NVAR][MAXOBJVARNAMESIZE+1]; char objvalues[NVAR][MAXOBJTOKEN+1]; /* Add one more level of indirection to allow removal of variables */ /* post_removal_varlist is list of pointers w/entries beyond */ /* removed variables "shoved up" one. That is, if we read in 3 */ /* variables and want to remove #2, post_removal_varlist[0]=0, */ /* post_removal_varlist[1]=2, and post_removal_varlist[2] doesn't */ /* exist */ /* Because of the indirection, a set of io*__ routines replaced */ /* the io*_ routines. The io*_ routines now call the io*__ */ /* routines with post_removal_varlist[*variable] as an arg instead */ /* of *variable. The io*__ are used directly by defgb, since all */ /* variables "in here" "exist" */ int post_removal_varlist[NVAR]; /* Size of the comment buffer is tricky. */ /* First, the comment buffer is used to hold comments from the */ /* indirect file and the diagnostic optional file. After the lat- */ /* ter is read, the various comment sinks are known. At this */ /* point, the comments in the comment buffer are sent to the */ /* appropriate sink. */ /* By default, all comments go to stdout. After the various */ /* sink defns, redirections, etc, any comments that still go to */ /* stdout will use the comment buffer (note that it is possible to */ /* redirect all comments, so that it is possible that the comment */ /* buffer is not used further). This buffer is emptied by */ /* outer every so often, so its size depends on how often outer */ /* empties it. At present (outerw 13 Sep 1995), the buffer is */ /* emptied once after the level 0 file is read, then once per */ /* level if outer processes any data at that level. */ /* Therefore, the comment buffer must be large enough to contain */ /* the level 0 comments. If outer processes data from each level */ /* (as it usually does), the comment buffer must be large enough */ /* to contain any individual level's comments. However, because */ /* of data selection, the whole dataset may fail to produce any */ /* data that matches the selection/projection criteria that the */ /* user gave to outer. In such a case, the buffer must be large */ /* enough to contain all the non-level0 comments in the dataset. */ /* Thus, a buffer that seems perfectly OK can overflow if a par- */ /* ticular selection is sufficiently inopportune. */ /* Gordian knot solution (defgb 3.8a) - dynamically allocate */ /* space and keep allocating... */ char *tabcomments; long tabcomments_size; char *tabcomments_ptr; /* Points to logical end of comments buffer */ int maxlev; /* Number of the lowest level in this dataset */ int ncnt; /* Number of the last variable in dataset */ int objmaxlev,objncnt; /* Above; temps for object input */ int objmaxlev_opt; /* Above; temp for optimized object input */ int jdblev; /* Need to maintain state while processing obj */ /* input. This var is the lowest as-yet */ /* unprocessed level from the most recent */ /* jdbread. See jdb.c */ Logical objeof; /* Temp until indirect file->eof logic */ char *last_objname; /* Name of object "now" being processed */ /* Index into master variable list of first variable at each level */ /* in the master structure. For any particular permutation, the same */ /* information is held in datalev[lev].firstvar, where lev goes from */ /* 0 to MAXLEVELS. */ /* + 1 entry is 1 beyond end of list (number of variables in the */ /* object)-used as upper bound for loops */ /* */ /* Example: master variable list is a b > c d > e f */ /* Files we are now reading have structure c b a > d > f */ /* firstvarlevel is 0 2 4 6 */ /* datalev[0].firstvar = 0; [1] = 3; [2] = 4 */ /* */ /* pointers array (above) finishes the mapping of the permutation */ /* into the master list */ /* */ /* We have different structures for the master list and for the per- */ /* mutations for a couple of reasons. The main one is that we want */ /* this information to be passed with the structure for the */ /* permutation level so anything "looking" at the level "knows all". */ /* Another is to easily distinguish between master list processing and */ /* permutation list processing. */ int firstvarlevel[MAXLEVELS+1]; int *objfirstvarlevel; /* Same for object input */ /* Historical convenience-could be replaced w/[lev].firstvar */ /* File information */ char *dirstring; struct fileinfo indirect_file; struct fileinfo files[NFILETYPES]; struct fileinfo datalev[MAXLEVELS+1]; /* +1 because we init "next */ /* level" w/o knowing if it's needed */ struct fileinfo comment_sources[MAX_DATACOMMENTS_SOURCES]; /* data field options */ /* data_field_trim cannot be changed from TRUE as of v 2.4. */ /* Issues about leading separators throughout system need to be */ /* addressed as well as issues about "would-be" separators (eg, */ /* leading blanks in a method where tabs are separators but blanks */ /* are not). Code is in ioreadrec_ to implement data_field_trim = */ /* FALSE for fixed fields. iovalstr__ strips off leading blanks */ /* anyway */ Logical significant_consec_separators = SIGNIFICANT_CONSECUTIVE_SEPARATORS; Logical significant_embedded_separators = SIGNIFICANT_EMBEDDED_SEPARATORS; Logical data_field_trim = DATA_FIELD_TRIM; /* latitude & longitude info */ struct latlonformat inlatformat; struct latlonformat outlatformat; struct latlonformat inlonformat; struct latlonformat outlonformat; /* varlistopts info */ int varlists_end; char *varname_for_lev1s; char *upcase_flag_for_coll; char *coll_from_objobj_obj; /* time/date info */ int num_outtimes; /* A time/date "fragment" is one of a list of possible fragments */ /* that can be input. The 5 output time variables (year, month, */ /* day, time, and displacement from prime meridian) are built */ /* from the input fragments provided. See comments in ioopen_ */ /* routines and defgb.h, and the "parameter 1" documentation. */ struct outtime out_timedate[NUM_OUTTIMES][NUM_TIMEDATE_FRAGS]; struct intime in_timedate[NUM_OUTTIMES][NUM_TIMEDATE_FRAGS]; /* Extra length of next list is because we present a window of */ /* length NUM_TIMEDATE_FRAGS+1 to get_timedate_frag. See it for */ /* reason. */ char *timedatefragbuflist[NUM_OUTTIMES][2*NUM_TIMEDATE_FRAGS]; /* Next 2 are, essentially, sets of scratch buffers into which */ /* we accumulate and/or keep track of "how time is doing" as we */ /* progress through input levels */ struct cumtime cumtime[NUM_OUTTIMES]; struct timefrac fractime[NUM_OUTTIMES]; /* PATH_INFO pieces and strings */ char *PATH_INFO_orig_putenv; /* "PATH_INFO=" + getenv("PATH_INFO") */ int defgb_reqlevel; /* level from getenv("PATH_INFO") */ char *PATH_INFO_jgof_putenv; /* "PATH_INFO=" + getenv("PATH_INFO") */ /* w/ protocol = "jgof" */ /* Used w/ object input */ char *PATH_INFO_none_putenv; /* Same as PATH_INFO_jgof_putenv but */ /* with protocol = "none" */ /* Miscellaneous info about what to output & where */ struct outinfo output_opts; /* Next defns for convenience, since stuff on right so long */ struct outfile *primary_diagout = &output_opts.file[PRIMARY_DIAG_SINK]; struct outfile *diagout = &output_opts.file[DIAG_SINK]; /* Next var convenient if unwanted trace message would have to */ /* built. */ int max_trace_level; char trace_msg[100]; /* Trace message buf for messages that need */ /* formatting. Too lazy to count better, etc */ /* For info about format of next 3 strings, see ioopen_routines */ /* (they are "wjstbl"s) */ char disp_data_widths[DISP_DATA_WIDTHS+1]; char var_data_widths[VAR_DATA_WIDTHS+1]; char trans_list[TRANS_LIST+1]; /* Need single char -> string buffer. Also need an empty string - */ /* use pointer to terminating null of this string */ char one_char_buf[2] = {'\0','\0'}; Logical no_interest_by_outer; /* */ /************************************************************************/ /* */ char *defgb_return_vers() /* Routine exists mostly to force .h file version string(s) into */ /* this module, but it could be called, I suppose. Note string must */ /* not be global or we'll have conflicts if another routine similarly */ /* includes the version string */ { #ifdef READCOMMANDS static char version[] = INNER_VERSION"/"FULL_DEFGBH_VERSION"/"FULL_JDBFUNCDEFNSH_VERSION; #else static char version[] = INNER_VERSION"/"FULL_DEFGBH_VERSION; #endif return version; } int ioexpress_outer_interest_(flag) /* flag to allow outer to tell inner whether or not it wants more */ /* data. Return flag that tells outer that we support this */ /* At present (v 4.0), this means "any more data at this */ /* level for this combo of upper levels"; ie, if NO, outer wants */ /* inner to advance to the EOF for this level if possible, what- */ /* ever that may mean. outer is responsible for continuing to */ /* read until EOF (in case inner cannot/does not implement the */ /* advance). outer is responsible for setting the flag to YES */ /* when it wants more data. */ /* At present (v 4.0), flag is TRUE/FALSE. Made it int and */ /* tested it to allow for expansion. OUTER_WANTS_NEXT_REC should */ /* be in a .h file shared between inner and outer, of course */ int flag; #define OUTER_WANTS_NEXT_REC TRUE { no_interest_by_outer = (flag != OUTER_WANTS_NEXT_REC); return TRUE; } void ioclose_() /* "Close files" */ { int i; for (i=0; i<=maxlev; i++) if (datalev[i].open) { #if READ_OBJECTS /* Note that .open is set at each level of the object, so */ /* we are potentially doing jdbclose more than once per object */ /* jdbclose handles this. */ if (datalev[i].source_type == JGOFS_OBJECT) jdbclose_(&jdbunit); else #endif fclose(datalev[i].stream); datalev[i].open=FALSE; } for (i=0; i=0; i--) if (n >= firstvarlevel[i]) return i; /* Really an error, but see comment below & stay compatible ... */ return 0; } int iovarlevel_(vn) int *vn; /* "Return level corresponding to variable indexed by vn" */ /* (returns maxlev for indices illegally big; 0 for indices */ /* illegally small) */ { return iovarlevel__(post_removal_varlist[*vn]); } int ioattrout__(n,str) int n; char *str; { char *at; /* if there IS an attribute, find one and move next one to */ /* front of string */ if (*tabattributes[n] == '\0') { free (tabattributes[n]); return 0; } else { at=strchr(tabattributes[n],ATTRIB_SEP); if(at == NULL){ /* this is the only attribute for variable *vn */ strcpy(str,tabattributes[n]); *tabattributes[n]='\0'; } else { /* there is more than 1 attribute */ *at = '\0'; strcpy(str,tabattributes[n]); strcpy(tabattributes[n],at+1); } return 1; } } int ioattrout_(vn,str) int *vn; char *str; /* "Output next attribute for variable indexed by vn. 0=none left"*/ { return ioattrout__(post_removal_varlist[*vn],str); } void iovaldouble__(n,df) int n; double *df; /* NB: defgb uses different "pointers" logic from def, hence */ /* different-looking missing value logic */ { char *end_char_ptr; *df=strtod(tabvalues[n],&end_char_ptr); /* Next line ignores possibility that tabvalues[i] is the empty */ /* string. If it is, df will end up w/value 0 instead of "missing" */ if (*end_char_ptr != '\0') *df= MISSING_VALUE_REAL; if (output_opts.iovalreal != 0) { output_opts.n_iovalreals++; if (output_opts.n_iovalreals == 1) fprintf(*(diagout->stream_ptr),"iovaldouble__ %d %f\n",n,*df); if (output_opts.n_iovalreals == output_opts.iovalreal) output_opts.n_iovalreals = 0; } return; } void iovaldouble_(vn,df) int *vn; double *df; /* See iovalreal_ */ { iovaldouble__(post_removal_varlist[*vn],df); return; } void iovalreal_(vn,f) int *vn; float *f; /* "Return real value (f) for variable indexed by vn. -9999" */ /* "for strings" */ { double df; iovaldouble__(post_removal_varlist[*vn],&df); *f = df; return; } void iovalstr__(n,tmp) int n; char *tmp; /* NB: defgb uses different "pointers" logic from def, hence */ /* different-looking missing value logic */ { static char *s; s = tabvalues[n]; s += strspn(s," "); strcpy(tmp,s); if (output_opts.iovalstr != 0) { output_opts.n_iovalstrs++; if (output_opts.n_iovalstrs == 1) fprintf(*(diagout->stream_ptr),"iovalstr %d %s\n",n,tmp); if (output_opts.n_iovalstrs == output_opts.iovalstr) output_opts.n_iovalstrs = 0; } return; } void iovalstr_(vn,tmp) int *vn; char *tmp; /* "Return string value (tmp) for variable indexed by vn." */ { iovalstr__(post_removal_varlist[*vn],tmp); return; } int iowidth__(n) int n; { return tabwidths[n]; } int iowidth_(vn) int *vn; /* "Return length of variable field indexed by vn" */ { return iowidth__(post_removal_varlist[*vn]); } /* */ /********* End functions called with index of variable. ***************/ /********* Error handling *********************************************/ /* */ Logical same_file (out1,out2) struct stat *out1,*out2; { #if VMS return ((out1->st_ino[0] == out2->st_ino[0]) && (out1->st_ino[1] == out2->st_ino[1]) && (out1->st_ino[2] == out2->st_ino[2]) && (out1->st_dev == out2->st_dev) ); #else return ((out1->st_ino == out2->st_ino) && (out1->st_dev == out2->st_dev) ); #endif } Logical open_sink(sink) struct outfile *sink; /* Open diagnostic/error sinks. */ /* Special-case /dev/stdout & /dev/stderr since they don't exist as */ /* "devices" on all systems. */ /* Don't open them, either. Not sure of consequences of */ /* this, but latest thoughts (May 97) say this is correct */ /* Do NOT call err from this routine, since it can be called by err */ { int j; /* Don't worry about mode for stdout/stderr. Presumably this is */ /* has been set by shell */ if (strcmp(sink->sink,STDOUT_NAME) == 0) { *(sink->stream_ptr) = stdout; *(sink->open_ptr) = TRUE; return TRUE; } if (strcmp(sink->sink,STDERR_NAME) == 0) { *(sink->stream_ptr) = stderr; *(sink->open_ptr) = TRUE; return TRUE; } /* Sink could be open from before configure_output, with compile- */ /* time file names, etc. See start of ioopen_ */ /* Next if may be redundant, but makes things easier to understand */ if ( ! *(sink->open_ptr) ) { /* See if this sink matches any others. If so, make its stream */ /* and open pointers point to those of that earlier one. This */ /* lets us send multiple streams to same sink w/o worrying about */ /* file sharing (multiple opens of same file, etc) */ /* Don't care about stat failure. Most often it will be a new */ /* file. Otherwise, sinks will be declared "different", and any */ /* real issue will presumably be repeated (and handled) at open */ /* time */ if (sink->fileinfo.st_dev == NO_SUCH_DEVICE) stat(sink->sink,&(sink->fileinfo)); if (sink->fileinfo.st_dev != NO_SUCH_DEVICE) for (j=0; jfileinfo),&output_opts.file[j].fileinfo) ) { sink->stream_ptr = output_opts.file[j].stream_ptr; sink->open_ptr = output_opts.file[j].open_ptr; sink->mode_ptr = output_opts.file[j].mode_ptr; break; } } } if ( ! *(sink->open_ptr) ) { if ( *(sink->mode_ptr) == NULL ) *(sink->mode_ptr) = "a"; *(sink->stream_ptr) = fopen(sink->sink,*(sink->mode_ptr)); *(sink->open_ptr) = (*(sink->stream_ptr) != NULL); } return *(sink->open_ptr); } char *add_to_errbuf (errbuf_ptr,s,t,addend) char **errbuf_ptr,*s,*t; char addend; /* errbuf_ptr = NULL asks only for status */ /* s must be non-null; don't care about t */ /* If previous call failed, return NULL. Otherwise, */ /* call lengthen_str(_nl) to add to error buffer. Addend is */ /* really just a flag ('\n' means call lengthen_str_nl; otherwise */ /* don't) but calling code is easier to see w/ \n in it. */ /* Buffer pack either succeeds or not. If it succeeds, alter */ /* input pointer and return it. If it does NOT succeed, leave input */ /* pointer alone and return NULL. Save last return. */ /* Main point of this routine is error handling in case allocation */ /* fails-cannot call err!! */ { #define ERRBUF_EXTEND_SIZE 800 /* Approx 10 lines. */ static char *buf = one_char_buf + 1; /* Anything non-NULL (but note */ /* this value used in code below to */ /* to test for "first time through") */ if ( (errbuf_ptr == NULL) || (buf == NULL) ) return buf; /* Since we're using the "special" buffer over and over, we need to */ /* free it every time but the first (Normally, the null *errbuf_ptr */ /* would cause lengthen_str to allocate a new set of buffers) */ if ( (*errbuf_ptr == NULL) && (buf != one_char_buf+1) ) free_lengthened_str_special(); buf = *errbuf_ptr; /* Statement below presumably replaces commented-out if-else */ /* further below. If trouble, act accordingly! */ buf = ( (addend == '\n') ? lengthen_str_nl : lengthen_str ) (buf,s,t,ERRBUF_EXTEND_SIZE,USE_SPECIALS); /* if (addend == '\n') */ /* buf = lengthen_str_nl(buf,s,t,ERRBUF_EXTEND_SIZE,USE_SPECIALS); */ /* else */ /* buf = lengthen_str(buf,s,t,ERRBUF_EXTEND_SIZE,USE_SPECIALS); */ if (buf != NULL) *errbuf_ptr = buf; return buf; } void read_beyond_error(f,errbuf_ptr,max) int max; char **errbuf_ptr; struct fileinfo *f; /* There might be diagnostic info after the &x. Copy it (but */ /* have sanity limit) */ { int ndiags; char *ptr; char end_of_string; ptr = f->buf; ndiags = 0; while ( (fgets(ptr,MAXREC,f->stream) != NULL) && (ndiags++ < max) ) { if (ndiags == 1) add_to_errbuf(errbuf_ptr, " More diagnostic info from command(?) follows:\n", NULL,'\0'); /* Make sure message ends w/exactly 1 newline */ end_of_string = ( *(ptr + strlen(ptr) - 1) == '\n' ) ? '\0' : '\n'; add_to_errbuf(errbuf_ptr," ",ptr,end_of_string); } if (ndiags == max) if (fgets(ptr,MAXREC,f->stream) != NULL) add_to_errbuf(errbuf_ptr," *** too many diags\n",NULL,'\0'); return; } void get_replacement_char(buf,u) char *buf; struct unprint_repl *u; /* Find a printable character to replace the unprintable ones. We */ /* have a list of candidates. If any of them is NOT presently in msg */ /* use it. Otherwise, the fun begins. If any of them is used only */ /* once, use that. If all are used at least twice, try to find one */ /* that is unused before the bad character. Failing that, pick the */ /* one that occurs least. Filling in the appropriate structure fields */ /* will help issue an accurate summary diagnostic (see */ /* issue_unprintable_diag) */ { char *ptr,*ptr2; char *repl_char_in_msg; char repl_char_appearing_just_once = '\0'; char bad_appears_before_any_of_these = '\0'; int count; /* Try for unique char, while keeping "only once" & "bad first" */ /* info */ ptr = UNPRINT_REPL_CHARS; /* Defined in defgb.h */ while (*ptr != '\0') { repl_char_in_msg = strchr(buf,*ptr); if (repl_char_in_msg == NULL) { u->repl_char = *ptr; return ; } /* See if this char appears just once */ if (strchr(repl_char_in_msg+1,*ptr) == NULL) /* If repl char appears just once, prefer that it appear */ /* after first bad char, but want "just appearing once" in any */ /* case */ if ( (u->first_repl_char_ptr > repl_char_in_msg) || (repl_char_appearing_just_once == '\0') ) repl_char_appearing_just_once = *ptr; /* See if this char appears after first bad char */ if (u->first_repl_char_ptr < repl_char_in_msg) bad_appears_before_any_of_these = *ptr; ptr++; } /* Do we have a candidate that appeared just once? If so, use it */ if (repl_char_appearing_just_once != '\0') { u->first_repl_char_already_in_msg = strchr(buf,repl_char_appearing_just_once); u->repl_char_already_in_msg = 1; u->repl_char = repl_char_appearing_just_once; return; } /* Do we have a candidate that first appears after first bad char? */ /* If so, use it */ if (bad_appears_before_any_of_these != '\0') { u->first_repl_char_already_in_msg = strchr(buf,bad_appears_before_any_of_these); u->repl_char_already_in_msg = 1; u->repl_char = bad_appears_before_any_of_these; return; } /* Sigh. Count them. */ ptr = UNPRINT_REPL_CHARS; /* Defined in defgb.h */ u->repl_char_already_in_msg = INT_MAX; while (*ptr != '\0') { count = 0; ptr2 = repl_char_in_msg = strchr(buf,*ptr); while (ptr2 != NULL) { count++; ptr2 = strchr(++ptr2,*ptr); } if (count < u->repl_char_already_in_msg) { u->repl_char_already_in_msg = count; u->first_repl_char_already_in_msg = repl_char_in_msg; u->repl_char = *repl_char_in_msg; } } return; } void replace_unprintables(buf,u) char *buf; struct unprint_repl *u; /* Check buf for unprintable chars. If any found, select a printable */ /* character not used in message and replace unprints with that char. */ /* Return info is placed in structure. If we can get away with what */ /* we just described, only interesting return info is what character */ /* we used for replacement, how many we replaced, and, for kicks, what */ /* character we replaced. If we can't find a replacement character */ /* otherwise unused in message, we do some contortions and return in- */ /* fo to allow the resulting message about what we did to be accurate */ { char *ptr; char repl_char; Logical ok; u->repl_char = '\0'; u->first_bad_char = '\0'; u->chars_replaced = 0; u->repl_char_already_in_msg = 0; u->first_repl_char_ptr = NULL; u->first_repl_char_already_in_msg = NULL; /* Find first unprintable character in buf. Obvious thing to use */ /* is isprint(). This assumes a partition of the 7 bit char set */ /* into printable chars and control chars. First problem is that */ /* control chars tab and newline are expected in our buffers due */ /* to our own formatting work. Second problem is that I'm not */ /* sure all the characters in the printable range actually print */ /* something. Accordingly, decided to use isalnum (upper & lower */ /* case and digits), and an empirically selected set of punctua- */ /* tion chars. */ /* There is a further problem with what the ctype.h functions */ /* do with 8 bit characters. They are supposed to work w/un- */ /* signed chars (an interesting problem if char on a particular */ /* machine is signed), but empirical testing showed 'df'x consid- */ /* ered printable even w/unsigned taken into account. */ /* Eventually, I decided to "hardcode" the 7 bit ascii print- */ /* ables as defined in K & R. Everything else seemed to exhibit */ /* holes during testing. Feel free to recode! */ ptr = buf; while (*ptr != '\0') { ok = ((*ptr >= '\x20') && (*ptr <= '\x7e')); if ( ! ok ) ok = ((*ptr == '\t') || (*ptr == '\n')); if ( ! ok) { if (u->chars_replaced == 0) { u->first_repl_char_ptr = ptr; u->first_bad_char = (unsigned char) *ptr; /* get_replacement_char needs u->first_repl_char_ptr, so */ /* call after setting that... */ get_replacement_char(buf,u); } *ptr = u->repl_char; (u->chars_replaced)++; } ptr++; } return; } void issue_unprintable_diag(errbuf,u) char **errbuf; struct unprint_repl *u; /* If unprintable characters were replaced, issue appropriate message */ /* In normal case, where the replacement character is unique in the */ /* error buffer, the message is simple. Otherwise... */ { /* Try to protect against memory allocation problems at runtime */ /* by allocating this at compile time */ /* #defined character is where we will place actual repl char */ #define EMERGENCY_MSG_PLACEHOLDER '?' static char *emergency_msg = "*** ?(s) above replace strange characters\n"; Logical first_is_repl; char *prefix = "***"; char *trail,*clause,*which_is_repl,*line_break; char *ptr1,*ptr2,*ptr3,*ptr4,*ptr5,*ptr6; char *buf; if (u->chars_replaced == 0) return; first_is_repl = (u->first_repl_char_ptr < u->first_repl_char_already_in_msg); if (u->repl_char_already_in_msg == 0) { /* Normal case - all replacement chars came from us */ trail = clause = line_break = ""; if (u->chars_replaced != 1) { trail = "s"; clause = "first of which is "; line_break = "\n *** "; } ptr1 = "strange character"; ptr2 = "hex)"; ptr3 = "replaced with"; ptr4 = "above\n"; /* +20 is slop. Must represent blanks which are in format in- */ /* stead of in strings for format readability. */ /* +5 allows for 99999 strange characters; +2 is for " ("; */ /* +2 is for hex rep of bad char; +2 is 2*strlen(trail); */ /* +1 is for repl char; +1 is for trailing '\0' */ buf = (char *) malloc(strlen(prefix) + strlen(ptr1) + strlen(ptr2) + strlen(ptr3) + strlen(ptr4) + strlen(clause) + strlen(line_break) + 20 + 5 + 2 + 2 + 2 + 1 + 1); if (buf != NULL) sprintf(buf," %s %d %s%s (%s%02X %s %s%s %c%s %s", prefix, u->chars_replaced, ptr1, trail, clause, u->first_bad_char, ptr2, line_break, ptr3, u->repl_char, trail, ptr4); } else if ((u->repl_char_already_in_msg == 1) && (u->chars_replaced == 1)) { /* Special case based on idea that we most likely have only 1 */ /* bad char, and, if we couldn't find a unique char to use for */ /* replacement, it's likely that we could find a char that */ /* appeared only once. Accordingly, we have 2 replacement */ /* chars in the buffer. One came from us representing the bad */ /* char; the other was there before; and we can issue a defin- */ /* itive message about which is which */ which_is_repl = (first_is_repl) ? "first" : "second"; ptr1 = "in the message above replaces a strange character ("; ptr2 = "hex)\n"; /* +20 is slop. Must represent blanks which are in format in- */ /* stead of in strings for format readability. */ /* +3 is for "The"; +2 is for hex rep of bad char; */ /* +1 is for repl char; +1 is for trailing '\0' */ buf = (char *) malloc(strlen(prefix) + strlen(which_is_repl) + strlen(ptr1) + strlen(ptr2) + 20 + 3 + 2 + 1 + 1); if (buf != NULL) { sprintf (buf, " %s The %s %c %s%02X %s", prefix, which_is_repl, u->repl_char, ptr1, u->first_bad_char, ptr2); } } else { /* Oh well. We don't know which replacement chars in message */ /* came from us and which replaced bad chars. We might be able */ /* to speak definitively about the first character; if so, do */ /* do it. If not, do the best we can... */ if (first_is_repl) { ptr1 = "s above replace strange characters.\n"; ptr2 = "The first"; ptr3 = "replaces a"; ptr4 = "hex.\n"; /* +20 is slop. Must represent blanks which are in format in- */ /* stead of in strings for format readability. */ /* Each +5 allows for 99999 strange characters; */ /* +2 is for "of"; +2 is for hex rep of bad char; */ /* +2 is for repl char twice; +1 is for trailing '\0' */ buf = (char *) malloc( 2*strlen(prefix) + strlen(ptr1) + strlen(ptr2) + strlen(ptr3) + strlen(ptr4) + 20 + 5 + 5 + 2 + 2 + 2 + 1 ); if (buf != NULL) { sprintf (buf, " %s %d of %d %c%s %s %s %c %s %02X %s", prefix, u->chars_replaced, u->chars_replaced + u->repl_char_already_in_msg, u->repl_char, ptr1, prefix, ptr2, u->repl_char, ptr3, u->first_bad_char, ptr4); } } else { ptr1 = "s above replace strange characters.\n"; ptr2 = "The first"; ptr3 = "represents itself. The first strange character is"; ptr4 = "hex.\n"; ptr5 = "We cannot further identify which"; ptr6 = "s represent strange characters\n"; /* +20 is slop. Must represent blanks which are in format in- */ /* stead of in strings for format readability. */ /* Each +5 allows for 99999 strange characters; */ /* +2 is for "of"; +2 is for hex rep of bad char; */ /* +3 is for repl char thrice; +1 is for trailing '\0' */ buf = (char *) malloc( 3*strlen(prefix) + strlen(ptr1) + strlen(ptr2) + strlen(ptr3) + strlen(ptr4) + strlen(ptr5) + strlen(ptr6) + 20 + 5 + 5 + 2 + 2 + 3 + 1 ); if (buf != NULL) sprintf (buf, " %s %d of %d %c%s %s %s %c %s %02X %s %s %s %c %s", prefix, u->chars_replaced, u->chars_replaced + u->repl_char_already_in_msg, u->repl_char, ptr1, prefix, ptr2, u->repl_char, ptr3, u->first_bad_char, ptr4, prefix, ptr5, u->repl_char, ptr6); } } if (buf == NULL) { /* We couldn't make fancy message because of memory allocation */ /* failure. Try to sneak in emergency message - there may be */ /* space in errbuf w/o needing further allocation */ if ( (ptr1 = strchr(emergency_msg,EMERGENCY_MSG_PLACEHOLDER)) != NULL ) /* If it DOES = NULL, I give up!! */ *ptr1 = u->repl_char; add_to_errbuf(errbuf,emergency_msg,NULL,'\0'); } else add_to_errbuf(errbuf,buf, " Strange characters may or may not cause problems",'\n'); return; } void analyze_whitespace(errbuf_ptr,s) char **errbuf_ptr; char *s; /* See if s has leading and/or trailing whitespace and if it has */ /* embedded tabs. We only need look at blanks, tabs and newlines */ /* because replace_unprintables has already declared other whitespace */ /* illegal and marked it somehow */ { size_t i,end_input,first,end_first,last,end_last; /* Much ado about nothing, but decided to try to order things in */ /* case we want to add another character to search (highly unlikely */ /* - see comment above re replace_unprintables). */ /* NWHITE must equal last_INDEX+1. Whole table is list of prefix */ /* & suffix chars of interest. Embedded chars of interest must */ /* follow TAB_INDEX. Additional embedded char requires mod to error */ /* message. */ /* table must end with null to make it legit char string */ #define SPACE_INDEX 0 #define TAB_INDEX 1 #define NEWLINE_INDEX 2 #define NWHITE 3 char white[NWHITE+1] = {' ','\t','\n','\0'}; char *embedded_string = white + TAB_INDEX; Logical embedded_non_blank_whitespace; Logical leading_whitespace,trailing_whitespace; char *prefix = "***"; char *ptr1,*ptr2; char *buf1; /* buf2 needs to hold something like */ /* is preceded and followed by whitespace, and contains embedded whitespace */ /* If potential content increases, size may need to change, since we */ /* don't bother to check sizes as we fill this buffer */ char buf2[120]; #define MAXLEN_STRING_TERMINI 4 #define ELLIPSIS "..." int terminus_first,init_last,len_ellipsis,maxlen_errsubstring; char *ellipsis1,*ellipsis2; char repl_first, repl_last; char buf3[MAXLEN_STRING_TERMINI + 3]; /* 2 for 's; 1 for \0 */ if (s == NULL) return; if (*s == '\0') return; len_ellipsis = strlen(ELLIPSIS); /* get first word = first non-white to first white after that */ /* get last word = last non-white to first white preceding that */ /* If whitespace precedes first word, analyze mix. Ditto for */ /* after last word. Find out if there are embedded tabs. */ /* If diag needed make copies of first & last word and replace_ */ /* printables. diag of the form: */ /* "Note: the string beginning with X and ending with Y is preceded */ /* and trailed by whitespace, and contains embedded non_blank */ /* whitespace" */ leading_whitespace = trailing_whitespace = FALSE; /* end_input is last char of input, not length. Also, be sure a */ /* single trailing newline is not counted as trailing whitespace */ end_input = strlen(s) - 1; if (s[end_input] == '\n') end_input--; first = strspn(s,white); if (first > end_input) { add_to_errbuf (errbuf_ptr, prefix, " Note: the string causing difficulty may consist entirely of white space", '\n'); return; } leading_whitespace = (first != 0); end_first = first + strcspn(s+first,white) - 1; for (end_last = end_input; end_last > end_first; end_last--) { if (strchr(white,s[end_last]) == NULL) break; trailing_whitespace = TRUE; } embedded_non_blank_whitespace = FALSE; if (end_last == end_first) last = first; else { for (last = end_last-1; last > end_first; last--) if (strchr(white,s[last]) != NULL) break; /* If next is true, we have some kind of logic error in this */ /* routine. Can't err so just get out w/o whitespace analysis */ if (last++ == end_first) return; /* If first embedded whitespace we find is a newline, don't */ /* report anything. Newline itself will be obvious in err */ /* msg, and following whitespace is logically "leading", not */ /* "embedded" anyway. Preceding is justification to allow us */ /* to generate multiline err msgs w/leading tabs - too bad */ /* it's too late to ID the string we REALLY want to analyze */ /* for unprintables/embedded whitespace */ if (s[end_first+1] != '\n') { for (i = end_first+1; i < last; i++) if (strchr(embedded_string,s[i]) != NULL) break; embedded_non_blank_whitespace = (i < last); } } if ( ! (leading_whitespace || trailing_whitespace || embedded_non_blank_whitespace) ) return; /* Make strings out of first and last words */ /* We assume unprintables have been removed (so much for being */ /* self-contained). Abbreviate first and/or last words if needed */ if (first == last) { /* abbreviation from 'xxxxxyyyyyy' to 'xxx'...'yyy' */ /* +2 in next line for extra set of apostrophes */ maxlen_errsubstring = 2*MAXLEN_STRING_TERMINI + len_ellipsis + 2; i = end_first - first + 1; if (i > maxlen_errsubstring) { i = maxlen_errsubstring; terminus_first = first + MAXLEN_STRING_TERMINI; ellipsis1 = ELLIPSIS; strcpy(buf3+1,s+end_last-MAXLEN_STRING_TERMINI+1); buf3[0] = buf3[MAXLEN_STRING_TERMINI+1] = '\''; buf3[MAXLEN_STRING_TERMINI+2] = '\0'; } else { terminus_first = end_first + 1; ellipsis1 = ""; buf3[0] = '\0'; } repl_first = s[terminus_first]; s[terminus_first] = '\0'; ptr1 = "Note: the string"; /* The 20 is slop for blanks in the format string. The 2 is */ /* for the apostrophes and the 1 for the newline */ buf1 = (char *) malloc (strlen(prefix) + 20 + 2 + 1 + strlen(ptr1) + i); if (buf1 != NULL) sprintf(buf1, "%s %s '%s'%s%s\n", prefix, ptr1, s+first, ellipsis1, buf3); } else { /* Abbreviate first word from 'xxxxxxxxx' to 'xxx...' */ /* Abbreviate last word from 'yyyyyyyyy' to '...yyy' */ maxlen_errsubstring = MAXLEN_STRING_TERMINI + len_ellipsis; if (end_first - first + 1 > maxlen_errsubstring) { terminus_first = first + MAXLEN_STRING_TERMINI; ellipsis1 = ELLIPSIS; } else { terminus_first = end_first + 1; ellipsis1 = ""; } if (end_last - last + 1 > maxlen_errsubstring) { init_last = end_last - MAXLEN_STRING_TERMINI + 1; ellipsis2 = ELLIPSIS; } else { init_last = last; ellipsis2 = ""; } repl_first = s[terminus_first]; s[terminus_first] = '\0'; repl_last = s[end_last+1]; s[end_last+1] = '\0'; ptr1 = "Note: the string beginning with"; ptr2 = "and ending with"; /* The 20 is slop for blanks in the format string. The 4 is */ /* for the apostrophes and the 1 for the newline */ buf1 = (char *) malloc (strlen(prefix) + 20 + 4 + 1 + strlen(ptr1) + strlen(ptr2) + 2 * maxlen_errsubstring ); if (buf1 != NULL) sprintf(buf1, "%s %s '%s'%s %s %s'%s'\n", prefix, ptr1, s+first, ellipsis1, ptr2, ellipsis2, s+init_last); s[end_last+1] = repl_last; } s[terminus_first] = repl_first; /* Consult buf2 sizing above if content of buf2 changes */ buf2[0] = '\0'; strcat (buf2,prefix); if (leading_whitespace || trailing_whitespace) { strcat (buf2," is "); if (leading_whitespace) strcat (buf2,"preceded "); if (trailing_whitespace) { if (leading_whitespace) strcat (buf2,"and "); strcat (buf2,"followed "); } strcat (buf2,"by whitespace"); } if (embedded_non_blank_whitespace) { if (leading_whitespace && trailing_whitespace) strcat (buf2,","); if (leading_whitespace || trailing_whitespace) strcat (buf2," and"); strcat (buf2," contains embedded non_blank whitespace"); } strcat (buf2,"\n"); if ((buf1 == NULL) || (buf2 == NULL)) /* We couldn't make fancy message because of memory allocation */ /* failure. Try to sneak in emergency message - there may be */ /* space in errbuf w/o needing further allocation */ add_to_errbuf(errbuf_ptr, "*** Problem string has leading, trailing and/or embedded whitespace", NULL,'\0'); else add_to_errbuf(errbuf_ptr,buf1,buf2,'\0'); return; } char *build_err(errbuf,ss,tt,error_level,maxscriptdiags) char *ss,*tt; char **errbuf; int error_level,maxscriptdiags; /* Generate desired level of diagnostic output into a buffer & return */ /* pointer to that buffer. Return is both via arg list and function */ /* return. Difference is that function return can also be NULL, in- */ /* dicating failure to build complete error message. If this occurs */ /* arg list pointer points to some valid string (possibly empty) */ /* See defgb.h and/or opt file doc for descrip of various levels */ { int ndiags,i,nopen_files,j; /* Shouldn't have null strings as input but be ready to repl with */ /* empty strings */ char *s = one_char_buf+1, *t = one_char_buf+1; char *ptr,*uname; char intbuf[10],end_of_string; Logical got_remote_addr,got_remote_host; time_t timbuf; struct passwd *passwd; struct unprint_repl unprint_repl_struct; if (ss != NULL) s = ss; if (tt != NULL) t = tt; if (error_level >= ERROR_MESSAGE) { /* Check last char for newline to ensure we don't add blank line */ ptr = (*t == '\0') ? s : t; end_of_string = ( *(ptr + strlen(ptr) - 1) == '\n' ) ? '\0' : '\n'; /* Keep track of where we are in errbuf before adding s & t */ /* (As of 3.7, we're at beginning, but we might move this code) */ i = (*errbuf == NULL) ? 0 : strlen(*errbuf); add_to_errbuf(errbuf,s,t,end_of_string); /* Analyze "user" strings s & t for unprintable characters. */ /* Replace unprintables with a (hopefully) unique printable */ /* If any replacements done, issue appropriate message */ replace_unprintables(*errbuf+i,&unprint_repl_struct); issue_unprintable_diag(errbuf,&unprint_repl_struct); /* Attempt some kind of diagnostic about troubling white space */ /* Problem is that we potentially have generated whitespace in */ /* the s & t strings because we've wrapped the "original" */ /* problem strings with descriptive info and formatting. */ /* As a guess, look at the t string. Most of the time, things */ /* look like s="xyz type of problem with string ", t=problem */ /* string */ analyze_whitespace(errbuf,t); } if (error_level >= ID_PROBLEM_FILE) { for (i=MAXLEVELS-1; i>=0; i--) if (datalev[i].open) break; if (i < 0) { /* Problem file not a data file. The way we wrote the error */ /* stuff, there's no way to find out what the "problem file" */ /* is, if any. However, the way defgb works, only one file is */ /* open at a time, so check for all open files. This is a */ /* kludge, and depends on this code occurring before open file */ /* code */ add_to_errbuf(errbuf," No data files open\n",NULL,'\0'); error_level = (ID_PROBLEM_FILE > ID_ALL_OPEN_FILES) ? ID_PROBLEM_FILE : ID_ALL_OPEN_FILES; } else { sprintf (intbuf,"%d\n",i); add_to_errbuf(errbuf," Lowest level file open is at level ",intbuf,0); if (datalev[i].source_type == COMMAND_FILE) { add_to_errbuf(errbuf, " File is output of command\n Command & params = ", datalev[i].source, '\n'); if (error_level >= GET_MORE_FROM_SCRIPT) read_beyond_error(&datalev[i],errbuf,maxscriptdiags); } else if (datalev[i].source_type == JGOFS_OBJECT) add_to_errbuf(errbuf," File is JGOFS object. Object name = ", datalev[i].source,'\n'); else add_to_errbuf(errbuf," Name = ",datalev[i].source,'\n'); sprintf (intbuf,"%d\n",datalev[i].nrecs); if (datalev[i].source_type == JGOFS_OBJECT) add_to_errbuf(errbuf, " Number of calls to jdbreada executed so far: ",intbuf,'\0'); else { add_to_errbuf(errbuf," Last record read was # ",intbuf,'\n'); add_to_errbuf(errbuf," (Count includes comments, blanks, etc; ", "excludes addl diag info, if any)\n",'\0'); } } } if (error_level >= ID_ALL_OPEN_FILES) { for (j=i-1; j>=0; j--) if (datalev[j].open) { sprintf (intbuf,"%d\n",j); add_to_errbuf(errbuf," Data file open at level ",intbuf,'\0'); switch (datalev[j].source_type) { case COMMAND_FILE: ptr = " File is output of command\n Command & params = "; break; case JGOFS_OBJECT: ptr = " File is JGOFS object\n Object = "; break; default: ptr = " Name = "; break; } add_to_errbuf(errbuf,ptr,datalev[j].source,'\n'); } if (indirect_file.open) { add_to_errbuf(errbuf," Indirect file ",indirect_file.source,'\0'); add_to_errbuf(errbuf," open\n",NULL,'\0'); } nopen_files=0; for (j=0; j= ID_SESSION) { got_remote_host = ( (ptr=getenv("REMOTE_HOST")) != NULL ); if (got_remote_host) got_remote_host = (*ptr != '\0'); if (got_remote_host) { add_to_errbuf(errbuf," REMOTE_HOST = ",ptr,'\0'); got_remote_addr = FALSE; } else { got_remote_addr = ( (ptr=getenv("REMOTE_ADDR")) != NULL ); if (got_remote_addr) got_remote_addr = (*ptr != '\0'); if (got_remote_addr) add_to_errbuf(errbuf," REMOTE_ADDR = ",ptr,'\0'); } if (got_remote_host || got_remote_addr) { if ( (ptr=getenv("REMOTE_USER")) != NULL ) if (*ptr != '\0') add_to_errbuf(errbuf,"; REMOTE_USER = ",ptr,'\0'); } else add_to_errbuf(errbuf," No remote ID info",NULL,'\0'); add_to_errbuf(errbuf,"\n",NULL,'\0'); time(&timbuf); /* ctime includes a newline */ add_to_errbuf(errbuf," This msg issued ",ctime(&timbuf),'\0'); #if VMS uname = cuserid(NULL); #else if ( (passwd = getpwuid(geteuid())) == NULL ) uname = "cannot be determined"; else uname = passwd->pw_name; #endif add_to_errbuf(errbuf," Effective username of process: ",uname,'\n'); } if (error_level >= ID_INNER) { add_to_errbuf(errbuf," This msg from method ",METHOD_NAME,'\0'); add_to_errbuf(errbuf,". Source: ",INNER_VERSION,'\n'); } return add_to_errbuf(NULL,NULL,NULL,'\0'); } Logical write_err(errout,s,t) char *s,*t; struct outfile *errout; { FILE **stream; stream = errout->stream_ptr; if (errout->sink != NULL) if (strcmp(errout->sink,JGOFS_ERROR_FILE) == 0) { /* error_ will call ioclose_ but will not return to inner */ /* However, stick in return just in case... */ error_(s,t); return FALSE; } else { if ( ! *(errout->open_ptr) ) if (! open_sink(errout)) return FALSE; fprintf (*stream,"%s %s\n",s,t); } return TRUE; } char *build_and_write_err(errsink,s,t,errlev,maxs,err_doing_errmsg) struct outfile *errsink; char *s,*t; char *err_doing_errmsg; int errlev,maxs; { char *errmsg = NULL; char **errmsg_ptr = &errmsg; /* If did not build whole error message, check to see if what was */ /* built at least includes s & t. If so, write incomplete msg */ /* along w/msg that it is incomplete. If not, write s & t */ if ( (errmsg = build_err(errmsg_ptr,s,t,errlev,maxs)) == NULL) { errmsg = *errmsg_ptr; if (errmsg == NULL) { errmsg = s; err_doing_errmsg = t; } else if ( (strstr(errmsg,s) == NULL) || (strstr(errmsg,t) == NULL) ) { errmsg = s; err_doing_errmsg = t; } else err_doing_errmsg = " (Preceding message is truncated due to memory allocation failure)\n"; } /* Write to sink. In general, that will be outer's sink, which */ /* will cause a call to outer's error_, which will not return here */ /* However, if we had a 2nd sink (or 1st sink not outer's), we */ /* will come back here, and we might find that there was an error */ /* writing to the first sink. In that case, try writing someplace */ /* else, as well as adding that info for 2nd sink, if any. If */ /* err_doing_errmsg was already set, drop that info. Don't want */ /* to try building strings at this point, since presumably that's */ /* already gotten us into trouble. Of course, presumably fprintf */ /* needs to do dynamic allocation... */ if ( ! write_err(errsink,errmsg,err_doing_errmsg)) { err_doing_errmsg = " (Could not write preceding message to error sink)\n"; fprintf(ULTIMATE_ERROR_STREAM,"%s%s Error sink = %s\n", errmsg,err_doing_errmsg,errsink->sink); } if (err_doing_errmsg == t) err_doing_errmsg = " (Preceding message truncated when written to previous sink)\n"; return err_doing_errmsg; } void err(s,t) char *s,*t; /* Put out error message to the correct sink(s), if any */ { /* one_char_buf+1 is an empty string */ char *errmsg = one_char_buf+1, *err_doing_errmsg = one_char_buf+1; struct outfile *sink1, *sink2 = NULL; int errlev1, errlev2, maxs1, maxs2 = 0; /* Generate sink/level/scriptdiags triplets for each error sink */ /* User spec's 1, and may spec "addl-". If former, trio is */ /* ERROR_SINK/.error_level/.maxscriptdiags */ /* If latter, primary trio is */ /* PRIMARY_ERROR_SINK/.error_level/.maxscriptdiags */ /* and secondary trio is */ /* ERROR_SINK/.dup_error_level/0 */ sink1 = &output_opts.file[ERROR_SINK]; errlev1 = output_opts.error_level; maxs1 = output_opts.maxscriptdiags; /* Because using outer's error sink does not come back to us, be */ /* sure that if there is more than one sink, and if either is */ /* outer's, that one is last (PRIMARY_ERROR_FILE is where 2nd */ /* output stream goes - as of v 3.0, defined as JGOFS_ERROR_FILE) */ if ( sink1->dup && (strcmp(sink1->sink,PRIMARY_ERROR_FILE) != 0) ) { if (strcmp(PRIMARY_ERROR_FILE,JGOFS_ERROR_FILE) == 0) { sink2 = &output_opts.file[PRIMARY_ERROR_SINK]; maxs2 = maxs1; /* Script diagnostics can only go to 1 sink, & */ maxs1 = 0; /* if 2, we chose PRIMARY. See param1 doc */ errlev2 = errlev1; errlev1 = output_opts.dup_error_level; } else { sink2 = sink1; errlev2 = output_opts.dup_error_level; sink1 = &output_opts.file[PRIMARY_ERROR_SINK]; } } if (errlev1 > NOERROR_OUTPUT) err_doing_errmsg = build_and_write_err(sink1,s,t,errlev1,maxs1,err_doing_errmsg); if ( (sink2 != NULL) && (errlev2 > NOERROR_OUTPUT) ) build_and_write_err(sink2,s,t,errlev2,maxs2,err_doing_errmsg); ioclose_(); exit(1); } /* */ /********* End error handling *****************************************/ void mark_missing_vars(lev) int lev; /* Put missing data indicators into appropriate values of a */ /* master structure level */ { int i,j; /* i represents variables in the master list. They must be in */ /* the permutation by now, or they are missing. Don't have to */ /* check master vars at lower levels, but DO have to check per- */ /* mutation at lower levels */ for (i = firstvarlevel[lev]; i < firstvarlevel[lev+1]; i++) { for (j = 0; j < datalev[lev].lastvar; j++) if (i == pointers[j]) break; if (j == datalev[lev].lastvar) strcpy(tabvalues[i],MISSING_VALUE_STRING); } return; } void print_data_trace(stream,f,bufs,size_one_buf,n_bufs) struct fileinfo *f; FILE *stream; char *bufs; int size_one_buf,n_bufs; /* A -1 for n_bufs is a special case meaning that the string in buf */ /* was generated by defgb and is not in the actual data stream at */ /* all (as of 3.5, this string is 1 or more "nd"s) */ /* A 0 for n_bufs is a pre-4.1 indicator of 1 buf to be printed */ /* Since we don't use size in a single-buffer case, it's value in */ /* such a case is irrelevant */ /* If we're not tracing data from a JGOFS object, there's usually */ /* (always?) just 1 string */ /* If datafile, ID with file spec; else ID with opt file description */ { int i; char *ptr,*ptr2,*ptr3; ptr2 = (n_bufs == -1) ? "(*)" : ""; if (strcmp((ptr3 = f->descrip),"datafile") == 0) ptr3 = f->source; if (n_bufs <= 0) n_bufs = 1; ptr = bufs; fprintf (stream, "%s rec %-4d%s : %s", ptr3, f->nrecs, ptr2, ptr); for (i=1; idup) { /* sink is "addl-" */ if (print_it(f->nrecs,f->dup_print_lines)) /* "addl-" output */ print_data_trace(*(diagout->stream_ptr), f,bufs,size_one_buf,n_bufs); if (print_it(f->nrecs,f->print_lines)) /* "non addl-" output */ print_data_trace(*(primary_diagout->stream_ptr), f,bufs,size_one_buf,n_bufs); } else /* sink is "non addl-" */ if (print_it (f->nrecs,f->print_lines)) /* "non addl-" output */ print_data_trace(*(diagout->stream_ptr), f,bufs,size_one_buf,n_bufs); return; } void print_diag_trace (stream,msg,cumu_time,last_time) FILE *stream; char *msg; double *cumu_time; time_t *last_time; /* Time stamp and print msg on stream. Update time buffers */ /* last_time = NULL means just print msg - don't fool w/time */ { double diff; time_t timbuf; char *time_string; /* User requested "addl-" output but not "addl-" sink... at least */ /* I hope that's why we'd return here */ if (stream == NULL) return; if (last_time == NULL) fprintf (stream,"%s\n",msg); else { time(&timbuf); time_string = ctime(&timbuf); /* Includes trailing \n */ diff = (*last_time == 0) ? 0 : difftime(timbuf,*last_time); *cumu_time += diff; *last_time = timbuf; fprintf (stream, "%s\t(%.0f secs diff; %.0f secs cumu)\t%s", msg,diff,*cumu_time,time_string); } return; } void do_diag_trace_work (limit,msg,timestamp) int limit; char *msg; Logical timestamp; /* If trace_level >= limit passed here, call print_diag_trace to time */ /* stamp msg and print it. Keep track of times here */ /* Where DOES the output go? */ /* diagout contains user-specified info; primary_diagout is fixed */ /* User can spec "addl-" for sink and/or for output requests */ /* If user spec'd "addl-" sink, "addl-" output goes to sink and */ /* "non addl-" goes to primary. Otherwise, the only output is */ /* "non addl-" and goes to sink */ /* See where we open these things in ioopen_ for more discussion */ { static time_t last_timbuf = 0; static time_t last_timbuf_primary = 0; static double cumu_time = 0; static double cumu_time_primary = 0; time_t *timbuf_ptr; if (diagout->dup) { /* sink is "addl-" */ if (output_opts.dup_trace_level >= limit) { /* "addl-" output */ timbuf_ptr = (timestamp) ? &last_timbuf : NULL; print_diag_trace (*(diagout->stream_ptr),msg,&cumu_time,timbuf_ptr); } if (output_opts.trace_level >= limit) { /* "non addl-" output */ timbuf_ptr = (timestamp) ? &last_timbuf_primary : NULL; print_diag_trace (*(primary_diagout->stream_ptr), msg,&cumu_time_primary,timbuf_ptr); } } else /* sink is "non addl-" */ if (output_opts.trace_level >= limit) { /* "non addl-" output */ timbuf_ptr = (timestamp) ? &last_timbuf : NULL; print_diag_trace (*(diagout->stream_ptr),msg,&cumu_time,timbuf_ptr); } return; } void do_diag_trace(limit,msg) int limit; char *msg; /* Old do_diag_trace entry from before timestamp arg added (3.7) */ { do_diag_trace_work (limit,msg,TRUE); return; } void dump_environment() /* Note that by the time we get here, outer could have fiddled */ /* with the environment, and could fiddle again at any point */ /* environ declared in defgb.h */ { char **ptr; ptr = environ-1; while (*(++ptr) != NULL) do_diag_trace_work(DUMP_ENVIRONMENT,*ptr,FALSE); return; } Logical ok_to_calc (var,this_level) int var,this_level; /* Returns true if it's OK to calculate variable var. */ /* It's OK if variable exists and is on this level (hence we need */ /* present level as an input argument) */ { Logical ok; ok = (var >= 0); if (ok) ok = (iovarlevel__(var) == this_level); return ok; } Logical missing (var) int var; /* Check datum corresponding to legal (specified in level 0 */ /* variable list) variable. */ { return (strcmp(MISSING_VALUE_STRING,tabvalues[var]) == 0); } Logical ok_to_use (var,this_level) int var,this_level; /* Returns true if it's OK to use variable var. */ /* It's OK if variable exists and is on or above this level (hence */ /* we need present level as an input argument), and is not missing */ { Logical ok; ok = (var >= 0); if (ok) ok = (iovarlevel__(var) <= this_level); if (ok) ok = ( ! missing(var)); return ok; } int lookup_lev0varlist (name) char *name; /* See if argument name is in master variable list for this dataset */ /* Return position if so; -1 if not */ /* tabnames and its size, ncnt, are implicit input to this routine */ { int i; for (i=0; i= ncnt) i= -1; return i; } int transvar(synonym,max_len_synonym_buf,variable) char *variable,*synonym; int max_len_synonym_buf; /* Call extract_wjstbl with the variable translation list */ /* Same returns as extract_wjstbl */ /* Note: when used to look up variables during variable name transla- */ /* tion, transvar may generally be called whether or not a translation */ /* file was provided. However, there is a difference if the same */ /* name appears on both the pre & post translation lists; eg, start */ /* with long and long1; translate long to lon and long1 to long */ { /* Max len of synonym is max len buffer -1. Eventually, extract_ */ /* wjstbl should be changed to take buffer size instead of string */ /* length (WJS Jul 03 - defgb 4.1a) */ return extract_wjstbl(synonym,max_len_synonym_buf-1,variable,trans_list); } char *pack_comment_into_buffer(buf_start,buf_end,comment,postfix,max_to_add) char *buf_start,*buf_end,*comment,*postfix; int max_to_add; /* Specialized routine for add_to_comment_stream since this stuff */ /* might need to be done twice. Basic idea is to see if comment, */ /* which is known to fit in MAXCOMMENTLINE, will fit between buf_start */ /* & buf_end. Various alterations are attempted (we'd like to have */ /* postfix, we insist on putting in truncation string if we can't fit */ /* anything else, etc) */ /* Returns pointer to logical end of buffer it worked on */ { int lencomment,len_to_add,lenpostfix,lentruncind; char *end_ptr,*buf_ptr,*trunc_ptr; lentruncind = strlen(COMM_TRUNC_IND); /* Give up if we can't add at least 1 character of the comment, */ /* along with the indicator that we're truncating the comment */ /* Since add_to_comment_stream now (> 3.8a) dynamically allocates */ /* comment buffer space, if we change stuff we want to add here, */ /* make corresponding change to add_ so that it will allocate */ /* enough. We don't really want this test to fail (but leave it */ /* here as protection) */ if (buf_start + lentruncind + 1 > buf_end) err ("Comment buffer overflow attempting to add comment\n Comment = ", comment); lencomment=strlen(comment); lenpostfix=strlen(postfix); /* First force comment by itself onto a single line */ /* Neither len* nor MAXCOMMENTLINE includes newline char */ len_to_add = lencomment; if (lencomment > MAXCOMMENTLINE) len_to_add = MAXCOMMENTLINE; /* See if postfix will fit. If so, add full desired line width */ /* since postfix will be left-justified. max_to_add may */ /* differ from MAXCOMMENTLINE because of comment display issues */ /* The postfix is displayed only if it looks nice; a full */ /* comment's worth of text is completly displayed, looks or not. */ /* This is an issue for the calling routine, however. */ if (lenpostfix > 0) if (lencomment + lenpostfix <= max_to_add) len_to_add = max_to_add; else lenpostfix = 0; /* See if desired comment will fit in what's left of buffer */ /* If not, throw away postfix and see if that will fit */ /* If still not enough room, copy what will fit, truncating */ end_ptr = buf_start+len_to_add; /* + 1 in next line is for newline char */ if (end_ptr+1 > buf_end) { lenpostfix = 0; len_to_add = lencomment; end_ptr = buf_start+len_to_add; if (end_ptr+1 > buf_end) { len_to_add = buf_end - buf_start; end_ptr = buf_end; } } if (lenpostfix == 0) { /* Comment only-might need truncation */ strncpy(buf_start,comment,len_to_add); if (len_to_add < lencomment) { strcpy (end_ptr-lentruncind,COMM_TRUNC_IND); /* Comment itself might be formatted w/ {}s if we got tricky */ /* Toss-up whether to treat contents as a postfix (& there- */ /* fore discard above) or as a comment (& truncate it). De- */ /* cided on latter, but we can't truncate the closing } or */ /* things that look at {}s will be unhappy */ /* Anybody who starts nesting {}s had better start writing code*/ trunc_ptr = comment + len_to_add - lentruncind; if (strchr(comment,*TAG_DELIM) <= trunc_ptr) if (strchr(trunc_ptr,*(TAG_DELIM+1)) != 0) *end_ptr = *(TAG_DELIM+1); } } else { /* Comment & right justified postfix both fit-we checked */ strcpy(buf_start,comment); for (buf_ptr = buf_start + lencomment; buf_ptr < end_ptr - lenpostfix; buf_ptr++) *buf_ptr=' '; strcpy(end_ptr-lenpostfix,postfix); } *(end_ptr++) = '\n'; *end_ptr = '\0'; return end_ptr; } void add_to_comment_stream (desired_postfix,comment) char *desired_postfix,*comment; /* Adds comment and postfix strings to end of comments stream, */ /* checking for buffer overflow. NULL postfix means throw comment out */ /* (just return) */ /* If the output stream is going to stdout, we don't actually do */ /* output. Instead, we add the comment to tabcomments, to be */ /* processed by outer. We make other decisions about what to output */ /* based on parameters described in diagnostics optional file doc */ /* If individual comment is too long, comment is truncated, with */ /* last 4 legal characters replaced with " ..." (unless comment is */ /* html tag). */ /* Individual comment size refers to size between newlines, */ /* not necessarily size of comment string. However, since we read */ /* with fgets, we are guaranteed that "normal" comments (eg, those */ /* not generated by the program) contain exactly one newline as the */ /* last comment character. Subr will add newline to comment if not */ /* present. */ /* comment normally does not begin with #. Outer routines */ /* supply a leading "# " before each comment line in html mode and */ /* supply no prefix in flat mode. (Comments in a non-display mode are */ /* ignored). postfix is used to allow differentiation between */ /* comments, if desired. Postfix is positioned at end of comment */ /* line. It is omitted if it will not fit in its entirety */ /* tabcomments_ptr is implicit input; points to present end of */ /* comments string; initialized in ioopen_; altered in iocommout_ */ /* output_opts is also implicit input. From it we get the output */ /* destination as well as info about optional comment output. Note */ /* that comments about optional files come here before output_opts is */ /* set up according to the diagnostics optional file. These comments */ /* go to the compile-time sink. If this sink causes the comments */ /* to be buffered instead of output, they will eventually be output */ /* to the run-time sink */ { char *ptr,*postfix; static char *line = NULL; static int len_line = 0; int lencomment,lenpostfix,len_to_add,comment_sink,position_in_buffer; int pack_comment_overhead; int len; Logical inserted_line,add_to_buffer,write_to_file; struct outfile *commout; if ( (postfix=desired_postfix) == NULL ) return; /* Empty postfix distinguishes comments from data files from other */ /* comments */ lenpostfix = strlen(postfix); if (lenpostfix == 0) comment_sink = DATA_COMMENT_SINK; else { /* See if we want the comment at all. */ /* Inserted lines have postfix of {method name}. */ ptr=strrchr(postfix,*TAG_DELIM); if (ptr != NULL) { ptr=rem_delims(ptr,TAG_DELIM); inserted_line = (strcmp (ptr,METHOD_NAME) == 0); /* rem_delims destroys closing delimiter. Restore it */ *(ptr+strlen(ptr)) = *(TAG_DELIM+1); if ( inserted_line && (! output_opts.inserted_comments) ) return; } comment_sink = NON_DATA_COMMENT_SINK; /* See if we want postfix. Non-inserted lines that are non- */ /* data comments are (I hope!) comments from opt files */ if ( ( inserted_line && (! output_opts.inserted_comment_id)) || ( ! inserted_line && (! output_opts.opt_file_comment_id)) ) postfix = ""; } commout = &output_opts.file[comment_sink]; /* Find out if we're adding to buffer and/or writing to file */ write_to_file = (strcmp(commout->sink,ADD_TO_BUFFER_FILE) != 0); add_to_buffer = commout->dup || (! write_to_file); lencomment=strlen(comment); ptr=strchr(comment,'\n'); if (ptr != NULL) { if (ptr != comment + lencomment - 1) err ("Comment contains embedded newline char\n Bad comment = ",comment); /* Remove newline. Will add after postfix */ *ptr = '\0'; lencomment--; } /* Add to the comment buffer */ /* Value of next line depends on logic in pack_comment_into_ */ /* buffer. Don't see an easy way to coordinate w/that routine, */ /* so try to do it manually */ pack_comment_overhead = strlen(COMM_TRUNC_IND) + 1; if (add_to_buffer) { /* If line too long, should abort if an html tag because trun- */ /* cation could truncate tag, producing bad html effect. */ /* However, as of JGOFS release 1.5, tag can appear anywhere in */ /* line (Feb 96 agreement was that it could appear only at start)*/ /* which is too tough to parse for here */ /* Be sure we have enough space for comment, its # and */ /* \n, and pack_comment's overhead (which I believe deals with */ /* the \0). This is conservative; I think we only need enough */ /* for EFFECTIVE_DISPLAY_WIDTH. */ len = lencomment + 2 + pack_comment_overhead; if (tabcomments + tabcomments_size < tabcomments_ptr + len) { position_in_buffer = tabcomments_ptr - tabcomments; /* COMMENTSIZE, below, could be "anything" */ tabcomments_size += (len > COMMENTSIZE) ? len : COMMENTSIZE; tabcomments = (char *)realloc(tabcomments,tabcomments_size); if (tabcomments == NULL) errn ("Could not allocate/extend comment buffer to size", tabcomments_size); tabcomments_ptr = tabcomments + position_in_buffer; } /* Restrict postfix display by display width. Set pointer to */ /* logical end of buffer */ tabcomments_ptr = pack_comment_into_buffer (tabcomments_ptr,tabcomments+tabcomments_size, comment,postfix,EFFECTIVE_DISPLAY_WIDTH); } /* Write to comment sink */ if (write_to_file) { if (line == NULL) { /* Since we only intend to allocate this once, size for max */ /* pack_comment will truncate if comment > MAXCOMMENTLINE */ len_line = MAXCOMMENTLINE + 2 + pack_comment_overhead; line = (char *)malloc(len_line); if (line == NULL) err ("Could not allocate space for buffer for comments headed for ", commout->sink); } pack_comment_into_buffer (line, line + len_line, comment,postfix, MAXCOMMENTLINE); if ( ! *(commout->open_ptr) ) if ( ! open_sink(commout)) err( buildstring("Error opening comment sink ", commout->sink, " : \n ", " open commentsink msg"), strerror(errno) ); fprintf (*(commout->stream_ptr),"%s",line); } return; } int null_data(file) struct fileinfo *file; /* Idea of this routine is that we have reached EOF w/o data. Should */ /* something be done? The idea is implemented only for data files, */ /* I'm not sure how good it is even for that, so beware new uses */ { int i,len; char *ptr; file->eod = TRUE; if (strcmp(file->descrip,"datafile") == 0) { /* Build data line of "nd"s if we have vars but no data. */ /* Otherwise, build line w/no vars and no next level (known as */ /* an empty line). Note as of 3.2, file->firstvar = file-> */ /* lastvar, so we aren't putting any nd's in here (well, */ /* sometimes firstvar != lastvar; better check code. WJS as */ /* of v 3.5) */ *(ptr = file->buf) = '\0'; len = strlen(MISSING_VALUE_STRING); /* +1 after len for per-copy separator. Final -1 because last */ /* copy does not get separator. Apparently no trailing \0s in- */ /* volved. ->*var better start from 0! */ if ( ((len + 1) * (file->lastvar - file->firstvar) - 1) > file->buf_size ) err ("Cannot fit line of missing value strings into data buffer\n" "Single missing value string is ",MISSING_VALUE_STRING); for (i = file->firstvar; i < file->lastvar; i++) { strcpy(ptr,MISSING_VALUE_STRING); ptr += len; if (i != file->lastvar - 1) *(ptr++) = *(file->item_separators); } (file->nrecs)++; do_data_trace(file,file->buf,0,-1); return -1; } else return 0; } int getrec_proccomment (file,postfix) char *postfix; struct fileinfo *file; /* Does all reading for defgb */ /* In cases where multiple "files" are coming from a single data */ /* stream (eg; input is JGOFS object, or script that wants to provide */ /* all data), this routine must be called in the "correct" order. */ /* This is ioreadrec's problem. It, in turn, depends on outer to do */ /* the honors */ /* */ /* Called in 2 modes-null or non-null postfix string */ /* Non-null postfix corresponds to expected comments. Called at */ /* start of a level, with nothing in buf. Read all comments into */ /* tabcomments. Result is single string with embedded newline */ /* characters. Add postfix argument to each comment line */ /* Null postfix corresponds to unexpected comments. buf contains */ /* potential comment. Skip all comments */ /* File's record counter is incremented after every successful */ /* read. do_data_trace is called to copy the input line if set up to */ /* do so via the diagnostics optional file. */ /* */ /* Return values: -1, 0, 1 */ /* 1 means buffer contains data. Any comments have been */ /* processed as above. */ /* 0 means that there was an error or an EOD has been encountered */ /* If the latter, EOD bit is set. If EOD was caused by */ /* EOF, EOF bit is set */ /* -1 is a special return. It means that there */ /* were no more data records in the file, but we are re- */ /* turning 1 or more "nd"s. Any comments have been */ /* processed as above. EOD bit is set. If EOD was caused */ /* by EOF, EOF bit is set. At present, this return can */ /* only come from data files, but suggest considering this */ /* as well as 0 to mean EOD for all reads */ /* Implicit returns: EOD & EOF bits; count of all records read and */ /* data records read. -1 returns increment both counters for */ /* data files (as if a line of "missing" indicators was in file). */ /* For object input, count reflects number of jdbreada_ calls */ /* */ /* "Files" whose data does not correspond to an external source never */ /* get their EOF bit set. Therefore it is safe to close files */ /* based on their EOF bits */ /* */ /* Uses object global variables, others? */ { int len; char *buf; buf = file->buf; switch (file->source_type) { /* Handle case where "file" doesn't exist or when we are after */ /* EOD/EOF; eg, allow getrec_ callers to ignore EOD/EOF. Their */ /* end logic better be correct! */ case CREATE_NULL_DATA: return null_data(file); /* Handle case where "file" is just 1 saved record */ case INDIRECT_FILE_LINE: if (file->eod) return 0; (file->nrecs)++; len=strlen(file->source); if (file->source[len-1] == '\n') file->source[--len] = '\0'; copy_into_fixed_len_buffer (buf,file->buf_size,file->source,"Input record exceeds buffer size\n"); do_data_trace(file,buf,0,0); file->eod = TRUE; if (buf[0] == COMMENT_CHAR) { add_to_comment_stream(postfix,buf+1); /* If only line is comment, we have a file w/no data in it */ return null_data(file); } else return 1; #if READ_OBJECTS /* Handle case where file is object */ /* Note that since we are emulating "real data" mode, we get a */ /* new file structure per level even though it's all one */ /* object. Thus we have new nrecs, objlevel, etc */ case JGOFS_OBJECT: if (file->eod) return 0; if (objeof) { file->eod = TRUE; return 0; } /* jdb routines' internal comment buffer is filled as part */ /* of jdbopen and cleared after the first jdbread. Therefore, */ /* jdbcomments will return nothing except before the first */ /* jdbread, but no harm will be done by "processing comments" */ /* as part of every call to getrec_proccomments, just like all */ /* the other modes. */ /* The catch is where the comments end up. Usually, we want */ /* to save comments "at the beginning" and discard others. */ /* Thus, scanheader calls getrec with "save", and all other */ /* data calls say "discard". However, scanheader_obj does not */ /* call getrec. The data call to getrec is the same for all */ /* modes, hence it will be "discard" always. We "fix" this */ /* by jimmying postfix to always say "save". Sigh. */ while (jdbcomments_(&jdbunit,buf) != 0) add_to_comment_stream("",buf); /* Each read returns data from the "next" level to the max */ /* level so read each time level you want isn't in that range */ /* jdbreada returns -1 for EOF, -999 for err, others?? Pos- */ /* itive vals are (I think) lowest "new" level filled in */ /* See jdb.c for more. */ while (file->objlevel < jdblev) { len = sizeof(objvalues[0]); jdblev = jdbreada_(&jdbunit,objvalues,&len); /* -999 represents logical error from remote object; ie, a */ /* record that starts with &x. As far as I can tell, real */ /* I/O errors are treated just like EOFs. jdbread sends the */ /* &x record to stdout and shuts down the I/O stream, which */ /* means that err couldn't read beyond for more info (the */ /* way it does with script input) even if it were coded */ /* that way. -999 not parametrized since it would need to */ /* be coordinated w/jdb */ if (jdblev == -999) err("Error reading JGOFS object ",file->source); if ( ! (objeof = (jdblev < 0)) ) { (file->nrecs)++; do_data_trace(file,objvalues[0],len,ncnt); } } /* If level "goes back up" = no more data at this level = */ /* data at some other level, we have EOD. */ /* Note if file->objlevel != jdblev+1, there is an error */ /* in the structure as I understand it. Also note I'm too */ /* lazy/unsure to test... */ if ( objeof || (file->objlevel > jdblev) ) { file->eod = TRUE; return 0; } jdblev++; /* Mark this level as processed */ return 1; #endif /* Reading from file or child process... */ /* Read until there's a record without a leading # */ /* If appropriate, save contents of records */ /* If string starts with error token, treat as error. Requires */ /* coordination of tokens, which (v 2.3) is not yet */ /* automatically done */ /* If immediate EOF, supply a record of missing values */ /* Read extra character, which, if present, indicates error */ /* (since all the other buffers are 1 char smaller...) */ /* Also, count records and, if asked, print every so often */ default: if (file->eod) return 0; while (TRUE) { if (fgets(buf,MAXREC+1,file->stream) == NULL) { file->eof = TRUE; file->eod = TRUE; if (ferror(file->stream) != 0) err (buildstring("File read error ", file->source, "\nError: ", " errmsg getrec" ), strerror(errno) ); /* If no data in a data file, supply a "missing value" */ /* record and convert file source to CREATE_NULL_DATA so */ /* that any followon levels will be handled correctly. */ if (strcmp(file->descrip,"datafile") == 0) { file->source_type = CREATE_NULL_DATA; return null_data(file); } else return 0; } (file->nrecs)++; do_data_trace(file,buf,0,0); len=strlen(buf); /* Skip "blank" lines & keep reading */ if (strspn(buf,file->item_separators) != len) { if (strncmp(buf,ERR_PREFIX,strlen(ERR_PREFIX)) == 0) err (buf,""); if (buf[len-1] == '\n') buf[--len] = '\0'; if (len >= MAXREC) err ("Input record exceeds buffer size\n Rec begins with ",buf); /* Next line is how we get out of here!! */ if (buf[0] != COMMENT_CHAR) break; /* +1 in line below is really +strlen(COMMENT_CHAR) */ add_to_comment_stream(postfix,buf+1); } } return 1; } /* Should never get here-each section of switch logically ends w/ a */ /* return (I hope) */ } void scanheader_obj(lev) int lev; /* */ /* scanheader_obj processes the variable list of a JGOFS */ /* object used as a data source against the master variable list */ /* of the object to which it belongs. */ /* It notes expected variables missing as well */ /* as noting where each variable in this subfile belongs */ /* (variables need not occur in the same order as in the */ /* master list nor at the same level). */ /* "Count" variable within level ([lev].lastvar) assumed */ /* properly initialized. */ /* Uses global variable datalev */ { int i,j,k; int objlevel; char varname[MAXVARNAMESIZE+1]; char *tok; do_diag_trace (TRACE_PERFILE_ROUTINES,"scanheader_obj"); objlevel = datalev[lev].objlevel; /* Loop for all variables in the input object at its level. Note */ /* that this is not necessarily the same as the # of variables in */ /* the master level 0 list of variables at this level. */ for (i=objfirstvarlevel[objlevel]; ibuf,fptr->item_separators,&nxt,DEF_QUOTES,FALSE,FALSE); while (tok != NULL) { last_tok = tok; tok = nxttok(nxt,fptr->item_separators,&nxt,DEF_QUOTES,FALSE,FALSE); } } return nskip; } /***************** Begin scanheader ********************************/ void scanheader(lev) int lev; /* */ /* scanheader reads the portion of a JGOFS-formatted data */ /* file that occurs before the actual data. This portion includes */ /* any comment lines, and the variable list for this data file */ /* and any that occur below it. */ /* Data sets can consist of many data files. The top-most */ /* data file usually contains the master variable list and, if so, */ /* is processed by routine scanheader0 - see doc for optional file */ /* varlist. If not, it is treated as described herein. */ /* Scanheader verifies the variable list of the file against */ /* the master variable list. It notes expected variables missing */ /* as well as noting where each existing variable in this subfile */ /* is located in the master list (variables need not occur in the */ /* same order as in the master list, nor do they need to occur on */ /* the same level). It counts how many variables */ /* are present. ("Count" variable within level ([lev].lastvar) */ /* assumed properly initialized). It creates a list of where */ /* each fixed field variable begins if this level contains fixed */ /* field data. */ /* It then skips over the lists of variables for levels below */ /* the one being processed, if any. (It skips all lines ending in */ /* >, and then one more) */ { int inp_index; char *tok,*last_tok,*nxt; char *separators; char *tmp; do_diag_trace (TRACE_PERFILE_ROUTINES,"scanheader_"); /* Haven't determined fixed/free status of this level yet */ /* (But if no data, assume free field - don't know if this needed) */ inp_index = -1; /* For fixed-field; position within data record */ datalev[lev].maxlenrec = 0; /* On return, buffer contains first non-comment record, which is THE */ /* record containing variables at this level */ if (getrec_proccomment(&datalev[lev],"") <= 0) return; /* Copy so we can use whole in err msg */ strdupl(&tmp,datalev[lev].buf,"scanheader, duplicating data buffer"); /* Token-ize input, removing surrounding "s if present (def behav- */ /* ior - not sure why). Stop at >, if any. Otherwise, process each */ /* as a variable name, possibly with attrib list */ separators = datalev[lev].item_separators; tok = nxttok(tmp,separators,&nxt,DEF_QUOTES,FALSE,FALSE); while (tok != NULL) { if (*tok == LEVEL_CONTINUATION_CHAR) break; process_varname(tok,lev,&inp_index); tok = nxttok(nxt,separators,&nxt,DEF_QUOTES,FALSE,FALSE); } /* If we are in collection-from-objobj case, add variable name that */ /* will be used for level 1 specs. At first thought, it's weird to */ /* see this code in scanheader, since coll-from-objobj is a single */ /* level situation, and scanheader operates on levels 1->N (and not */ /* level 0). However, in most coll-from-objobj cases, we have a */ /* varlist for level 0, and scanheader processes the level 0 file as */ /* a subfile. Therefore, it needs to be here... */ if (varname_for_lev1s != NULL) process_varname(varname_for_lev1s,lev,&inp_index); skip_rest_of_varlist(tok,nxt,&datalev[lev]); /* Save largest possible length for records on this level. */ /* Individual records can be variable length, in that fixed */ /* fields beyond end are considered missing, but this is length if all */ /* variables in this header show up. */ /* 0 means this is a free-field level */ datalev[lev].maxlenrec = inp_index; do_diag_trace (TRACE_PERFILE_ROUTINES,"end scanheader_"); if (tmp != NULL) free (tmp); return; } /* */ /***************** End scanheader ********************************/ int get_int_from_string (string,min,max,delim,errmsg) char *string,*errmsg,delim; int min,max; { int value; char *end_ptr; value = strtol (string,&end_ptr,10); /* Check that scan was terminated by delimiter or end-of-string */ if ( ! ((*end_ptr == '\0') || (*end_ptr == delim)) ) err ("Non-numeric input ",errmsg); if ( (value < min) || (value > max) ) err ("Integer value too big or too small ",errmsg); return value; } char *get_int_from_keyword(value,keyword,wjstbl,min,max,wjstbl_id) char *keyword,*wjstbl,*wjstbl_id; int *value,min,max; /* wjstbl_id is for error messages */ /* Returns start of where it found integer string; NULL if not found */ /* Integer value goes into *value; 0 if key not found */ { char *keyval; keyval=lookup_wjstbl(keyword,wjstbl); if (keyval == NULL) *value = 0; else *value = get_int_from_string(keyval,min,max,wjstbl[strlen(wjstbl)-1], buildstring(wjstbl_id," keyword ",keyword, " building get_int_from_keyword msg")); return keyval; } int make_and_save_width(ptr, attr, fixed_width, opt_width, pre_trans_width) int fixed_width, opt_width, pre_trans_width; char *ptr, attr[]; /* Routine to determine output field width of a variable and save */ /* it as a data attribute and an integer. There are 4 ways the */ /* width can be determined */ /* 1) Use value from dispwidths opt file, if any */ /* 2) Failing that, the user can have explicitly specified */ /* a "width=" data attribute. Use that width. */ /* 3) Failing that, the user could have padded the */ /* variable name to the desired width with spacer */ /* characters (defined by constant W_EXTEND). Use */ /* that width. This extension can be on pre- or post- */ /* translation name. Use pre-translation width if it */ /* exceeds post-translation width */ /* 4) Failing that, we have no clues from the user */ /* Calculate the width of the post-translation variable */ /* name itself and use the max of that width and the */ /* input field width if input is fixed field. */ /* Routine receives the title string (pointed to by 'ptr'), its */ /* attribute list, the fixed width of the field (0 for free */ /* field input), the width for the field from any dispwidths */ /* file (0 if not in the dispwidths file) and the width of the */ /* field as padded by W_EXTEND characters(pre-translation) (0 if */ /* no padding done to pre-translation name) */ /* An attribute beginning with 'width=' is constructed (or an */ /* existing one modified) to be saved in the calling routine. */ { int len,extended_width,explicit_width = 0; char attr_sep_string[1+1] = { ATTRIB_SEP, '\0' }; char *end_ptr,*attr_ptr; /* Get len of (possibly) extended label (last char is W_EXTEND) */ /* then strip off extenders. Must do strip whether we use this */ /* as the width source or not */ extended_width = strlen(ptr); if (ptr[extended_width-1] == W_EXTEND) striptoken(ptr); /* Use pre-translation extended width if it's bigger... */ if (pre_trans_width > extended_width) extended_width = pre_trans_width; /* Copy attribute string without width= attrib. If width= found */ /* get its value. Could ALMOST use ioattrout_ for this, but, */ /* among other things, it uses strtok, and this routine is */ /* called within a loop already using strtok... */ attr_ptr = attr - 1; while ( attr_ptr++ != NULL) { if (strncmp(attr_ptr,"width=",6) == 0) { /* Get the width */ explicit_width = get_integer_attribute(attr_ptr); /* Copy stuff after concluding ;, if anything, overwriting */ /* width=. If nothing, terminate string before width=. */ if ( (end_ptr = strchr(attr_ptr,ATTRIB_SEP)) == NULL ) *attr_ptr = '\0'; else strcpy (attr_ptr,++end_ptr); /* get beyond ; before copying */ break; } attr_ptr = strchr(attr_ptr,ATTRIB_SEP); } /* Make sure non-empty attr string ends w/ ; */ if (*attr != '\0') if ( attr[strlen(attr)-1] != ATTRIB_SEP ) strcat (attr,attr_sep_string); len = strlen(ptr); /* Var name len */ if (opt_width > 0) len = opt_width; /* From opt file */ else if (explicit_width > 0) len = explicit_width; /* width= */ else if (extended_width > len) len = extended_width; /* Padded with _s */ else if (fixed_width > len) len = fixed_width; /* Fixed field len */ /* Add width to end of (possibly null) input attr string */ sprintf( &attr[strlen(attr)], "width=%d", len); return len; } void process_varname0(tok,ncnt,fptr) char *tok; int ncnt; struct fileinfo *fptr; /* Processes one variable name for scanheader0. */ /* Translates variable name if requested */ /* Checks for uniqueness */ /* Calculates width */ /* Processes attribute list, adding width if necessary */ /* Calculates start position if fixed field */ /* Many global variables used */ { static char attrsav[TOKEN+1]; int i,pre_trans_extended_width; pre_trans_extended_width = strip_varname_addends(tok,ATTR_DELIM,attrsav,sizeof(attrsav)); if (*tok == '\0') err ("Zero length variable name in record ",fptr->buf); /* What's left is variable name. Store it permanently, trans- */ /* lating if necessary */ /* Didn't do it sooner because variable + attribute might be */ /* too long while neither is too long by itself */ if (fptr->pre_trans) { if (TRANSVAR_CALL(tabnames[ncnt],tok) == -1) err ("Translated variable name too long\n Name (pre-trans) = ",tok); } else { COPY_INTO_FIXED_LEN_BUFFER(tabnames[ncnt],tok,"Variable name too long"); } tok = tabnames[ncnt]; if (lookup_lev0varlist(tok) >= 0) err ("Duplicate variable name (after translation, if any)\n Name = ", tok); /* Create list of widths of fixed format variables (if any) in */ /* the order they appear */ get_int_from_keyword (&inpwidths[ncnt],tok,var_data_widths,0,MAXREC,"inpwidths"); /* Create "width=" attribute if necessary, and save name, */ /* output width, and attributes of variable in tabxxx arrays */ /* Get width for this variable, if any, from dispwidths file */ get_int_from_keyword (&i,tok,disp_data_widths,0,INT_MAX,"dispwidths"); /* Have fun getting "actual" width! */ tabwidths[ncnt] = make_and_save_width(tok,attrsav,inpwidths[ncnt],i,pre_trans_extended_width); strdupl(&tabattributes[ncnt],attrsav," saving attribute"); return; } /***************** Begin scanheader0 *******************************/ int scanheader0(fptr,total_number_variables,comment_postfix) struct fileinfo *fptr; int *total_number_variables; char *comment_postfix; /* For stuff we usually write to /dev/null, we */ /* do a lot of logic!!!! */ /* */ /* scanheader0 reads the master variable list for this object */ /* This list defines what variable names can appear in the object, */ /* as well as defining the subfile (level) structure of the object */ /* The master variable list usually appears in the "level 0" data */ /* file, but can also appear in a file of its own (see varlist */ /* optional file doc). */ /* scanheader0 creates several lists. It saves the list of */ /* variables (translated if necessary-see transvar optional file */ /* doc). It saves the variables' attribute lists. Special */ /* attention is paid to the width attribute. If not present, one */ /* is calculated (see code in here, or dispwidths optional file */ /* doc). If it is present, it is a parameter in the calculation. */ /* It also creates a list of data field widths (see inpwidths opt */ /* file doc). */ /* It creates a level split list; that is, it records the */ /* variable number of the first variable on each level. */ /* Finally, assuming that the master list is coming from a */ /* data file, it does the scanheader chores for level 0 (simpli- */ /* fied by the fact that there are no missing variables). If in */ /* fact the master list is coming from a different file, scan- */ /* header will redo those chores when the level 0 file is read. */ /* scanheader0 returns the total number of variables in the */ /* data set through its argument, and the number of levels in the */ /* data set as its function value */ { int i,j; int pre_trans_extended_width; char *tok,*end_ptr,*nxt; char *separators; char *tmp; do_diag_trace (TRACE_PERFILE_ROUTINES,"scanheader0"); /* On return, buffer in structure contains first non-comment record */ if (getrec_proccomment(fptr,comment_postfix) <= 0) err ("EOF/error at or before variable list",""); ncnt=0; /* "Live" (global) count of variables processed so far */ maxlev=0; separators = fptr->item_separators; /* Loop, reading records, until we find one that does not end with > */ while (TRUE) { /* Copy so we can use whole buf in err msg */ strdupl (&tmp,fptr->buf,"scanheader0, duplicating data buffer"); tok = nxttok(tmp,separators,&nxt,DEF_QUOTES,FALSE,FALSE); while (tok != NULL) { if (*tok == LEVEL_CONTINUATION_CHAR) break; if (ncnt == NVAR) errn ("Too many variables found during scan of level 0 list. Max = ",NVAR); process_varname0(tok,ncnt,fptr); ncnt++; tok = nxttok(nxt,separators,&nxt,DEF_QUOTES,FALSE,FALSE); } /* If in collection_from_objobj case, add variable name for level */ /* 1 specs. Later, skip rest of variable list and end processing */ if (varname_for_lev1s != NULL) { if (ncnt == NVAR) errn ("Adding varname_for_lev1s to level 0 list exceeds max # vars. Max = ", NVAR); process_varname0(varname_for_lev1s,ncnt,fptr); /* Take care of fixed field case. Error if inpwidths 1->i-1 are */ /* not consistent, but this gets diagnosed later. TOKEN is max */ /* hopefully ioreadrec allows last fixed field token to be short */ /* If not, eventually we may find out (expect fixed field */ /* case w/level1_specs-as-data to be rare) */ inpwidths[ncnt++] = (inpwidths[0] == 0) ? 0 : TOKEN; } /* All tokens processed, which means this level processed */ /* Save the number of variables read in */ firstvarlevel[maxlev+1]= ncnt; /* If last variable was the new level character, advance the level */ /* counter and read next record */ /* If it wasn't, we're done. */ if ( (tok == NULL) || (varname_for_lev1s != NULL) ) { skip_rest_of_varlist(tok,nxt,fptr); break; } else if (*tok == LEVEL_CONTINUATION_CHAR) { if (*(tok+1) != '\0') err ("Level continuation includes extraneous characters\nRecord = ", fptr->buf); if (nxttok(nxt,fptr->item_separators,&nxt,DEF_QUOTES,FALSE,FALSE) != NULL) err ("Token found after level continuation\nRecord = ",fptr->buf); if (++maxlev >= MAXLEVELS) errn ("Too many data levels. Allowed maximum = ",MAXLEVELS); if (getrec_proccomment(fptr,NULL) <= 0) err ("EOF/error trying to read a variable list record",""); } else err ("Internal consistency problem in scanheader0\nBad varlist terminating token ", tok); } /* At this point, we've finished processing all header lines */ if (ncnt == 0) err ("No variables found after scan of level 0 list",""); *total_number_variables = ncnt; /* scanheader work for level 0 */ /* Logically, scanheader0 is called for the "master list file", */ /* which is closed after use. ioreadrec would then open the level */ /* 0 file, and end up doing the work below. That would avoid this */ /* wart, as well as the close file/don't close file wart in ioopen */ /* However, an extra open could have performance impact if what's */ /* being opened is not a file (assumes file opens have no impact!) */ datalev[0].firstvar = firstvarlevel[0]; /* = 0 */ datalev[0].lastvar = firstvarlevel[1]; for (j=0; j is correct */ /* since buffer size is actually 1 bigger) */ if ( (ptr += strlen(ptr)) > end_attrsav ) err ("Attribute too long\n Attribute = ",attrsav); *(ptr++) = ATTRIB_SEP; } if (ptr != attrsav) ptr--; *ptr = '\0'; /* Translate/copy variable & check for dups */ if (TRANSVAR_CALL(tabnames[i],objnames[i]) == -1) err ("Translated variable name too long\n Name (pre-trans) = ", objnames[i]); if (lookup_lev0varlist(tabnames[i]) >= 0) err ("Duplicate variable name (after translation, if any)\n Name = ", tabnames[i]); /* Create "width=" attribute if necessary, and save */ /* output width and attributes of variable in tabxxx arrays */ /* Get width for this variable, if any, from dispwidths file */ get_int_from_keyword (&j,tabnames[i],disp_data_widths,0,INT_MAX,"dispwidths"); tabwidths[i] = make_and_save_width(tabnames[i],attrsav,0,j,0); strdupl(&tabattributes[ncnt],attrsav," saving attribute from obj"); ncnt++; } *total_number_variables = ncnt; /* scanheader work for level 0 */ datalev[0].firstvar = firstvarlevel[0]; datalev[0].lastvar = firstvarlevel[1]; for (j=0; j TRANS_LIST ) err ("transvar list overflow adding translation to ",new); ptr = trans_list + strlen(trans_list); *(ptr++) = WJSTBL_SEPARATOR; in_ptr = old; while (*in_ptr != '\0') *(ptr++) = *(in_ptr++); *(ptr++) = WJSTBL_SEPARATOR; in_ptr = new; while (*in_ptr != '\0') *(ptr++) = *(in_ptr++); *(ptr++) = WJSTBL_WHITE_SPACE; *ptr = '\0'; return; } void propagate_flag_for_coll_objobj(objobj,added_varname) char **objobj,*added_varname; /* If object is local, put flag into environment so method that */ /* pipes to us will see it. If object is remote, arrange to read */ /* remote collection-from-object-of-objects object, giving that */ /* thing the original object as an argument. Note that "remote" */ /* here means connect across socket interface instead of pipe to a */ /* child process. It has nothing to do with whether the actual node */ /* being referenced is this one or another. */ /* Rewrite input argument if necessary, freeing old one. */ /* coll_from_objobj_obj is implicit input */ { char *ptr,*in_ptr; char *host,*obj; char temp_string[3+1]; int i; /* Test below is what dctsearch does... */ if (strncmp(*objobj,REMOTE_OBJECT_PREFIX,strlen(REMOTE_OBJECT_PREFIX)) == 0) { host = *objobj; /* Seemed to be a problem on globec.whoi.edu when I didn't use */ /* ptr as a temp variable... */ ptr = host+strlen(REMOTE_OBJECT_PREFIX); if ( (obj = strchr(ptr,DIRSEP)) == NULL ) err ("Cannot find object spec after remote host spec in string ",ptr); else *(obj++) = '\0'; /* Redirect to special remote object */ /* Much fooling trying to use characters out of parametrized */ /* strings */ temp_string[0] = *OBJECT_PARAM_STRING_DELIM; /* ( */ temp_string[1] = *OBJECT_DELIM; /* ({ */ temp_string[2] = DIRSEP; /* ({/ */ temp_string[3] = '\0'; /* ({/\0 */ ptr = buildstring(host,coll_from_objobj_obj,temp_string, " building coll_from_objobj spec"); temp_string[0] = *(OBJECT_DELIM+1); /* } */ temp_string[1] = *(OBJECT_PARAM_STRING_DELIM+1); /* }) */ temp_string[2] = '\0'; /* })\0 */ *objobj = buildstring(ptr,obj,temp_string, " building coll_from_objobj spec"); free (ptr); free (host); } else { /* Object OK. Set env var so local method (if defgb) will */ /* give desired output */ i = putenv (buildstring(upcase_flag_for_coll,"=",added_varname, " building putenv string for coll_from_objobj") ); if (i != 0) err ("putenv failure for flag_for_coll: ",strerror(i)); } return; } void open_datafile(file) struct fileinfo *file; /* open_datafile opens file as a datafile. This structure was init'ed */ /* when we processed the "next level file" item from the level above */ /* Should only return if open succeeded */ { char *tmp; int i; int varnamesize; if (max_trace_level >= TRACE_PERFILE_ROUTINES) { tmp = buildstring("open ",file->source,NULL, " building trace msg"); do_diag_trace (TRACE_PERFILE_ROUTINES,tmp); free (tmp); } errno = 0; /* Be sure that errno errors are ours! */ switch (file->source_type) { case COMMAND_FILE: #if READ_COMMANDS /* If a method is run by the command file, it will look */ /* at the PATH_INFO string. Ideally, the command file it- */ /* self would set PATH_INFO appropriately. However, */ /* we sometimes have a command itself, not a command file. */ /* Tough choice, since presumably command file might want */ /* to know PATH_INFO, but replace proto string with "none" */ /* (and drop options - see ioopen_ where none_putenv string */ /* is made). My decision at this point... none has no */ /* meaning to system. */ if (PATH_INFO_none_putenv != NULL) if ( (i = putenv(PATH_INFO_none_putenv)) != 0 ) err ("putenv failure for PATH_INFO: ",strerror(i)); tmp = startchild(file->source, NULL, &file->stream); /* Not sure if bad startchild result would always give */ /* bad return status */ if ( (file->stream == NULL) && (tmp == NULL) ) tmp = "Reason unknown"; if (tmp != NULL) err ( buildstring("Error opening data script", file->source, " : \n ", " open_datafile-script msg"), tmp ); file->open=TRUE; /* Restore original PATH_INFO if we changed it */ if (PATH_INFO_none_putenv != NULL) if ( (i = putenv(PATH_INFO_orig_putenv)) != 0 ) err ("putenv failure restoring PATH_INFO: ",strerror(i)); #else err("Method not compiled with command-reading capability,\n\ needed to read data from script ",file->source); #endif break; case DATA_FILE: if ( (file->stream = fopen(file->source,"r")) == NULL ) err ( buildstring("Error opening data file ", file->source, " : \n ", " open datafile msg"), strerror(errno) ); file->open=TRUE; break; case CREATE_NULL_DATA: break; case JGOFS_OBJECT: #if READ_OBJECTS /* Must be sure that methods down the line produce JGOFS */ /* protocol (or whatever jdbopen requires), and not html, */ /* flat, or whatever. Do this by replacing proto string */ /* with "jgof" (my decision at this point... jgof has no */ /* meaning to system). */ if (PATH_INFO_jgof_putenv != NULL) if ( (i = putenv(PATH_INFO_jgof_putenv)) != 0 ) err ("putenv failure for PATH_INFO: ",strerror(i)); /* If we are in a collection_from_objobj situation, we have to */ /* fiddle before opening the object. If object is remote, we */ /* want to open the special "coll_from_objobj" object on the */ /* remote node with our original object as an argument. If */ /* object is local, we need to set a flag. */ if (varname_for_lev1s != NULL) propagate_flag_for_coll_objobj (&(file->source),varname_for_lev1s); /* For values of next 2, see jdbopen_ */ objncnt = -NVAR; varnamesize = MAXOBJVARNAMESIZE + 1; /* If we're switching objects, close old one. */ /* jdbclose doesn't care if you close a closed, but jdbunit */ /* must be a legal index (and, logically, should have been */ /* returned from jdbopen) */ if (jdbunit >= 0) if (strcmp(last_objname,file->source) != 0) { jdbclose_(&jdbunit); /* See comments at waitpid in dataeod */ waitpid((pid_t)-1,NULL,WNOHANG); errno = 0; } objmaxlev = jdbopen_ (&jdbunit, file->source, objnames, &varnamesize, &objncnt); if (objmaxlev < 0) err("Failure to access JGOFS data object ",file->source); /* Next > NVAR test too late - objnames has already overflowed */ if ( (objncnt > NVAR) || (objncnt <= 0) ) err("0, negative or too many variables in JGOFS object ", file->source); last_objname = file->source; /* If user's variable name doesn't match name we know is */ /* coming from coll_from_objobj_obj, arrange a transvar. */ if (varname_for_lev1s != NULL) if (strcmp(varname_for_lev1s,objnames[objncnt-1]) != 0) add_translation(objnames[objncnt-1],varname_for_lev1s); /* Build table of level splits */ objfirstvarlevel = level_splits(jdbunit,objmaxlev,objncnt); if (objfirstvarlevel == NULL) errn ("level_splits memory problem. # ints in attempted allocation = ", objmaxlev+1); /* Set up "present level" to force first read */ jdblev = objmaxlev + 1; objeof = FALSE; /* Temp until indirect file->eof */ /* datalev.objlevel set to 0 in init routines... this level's */ /* structure corresponds to object level 0. All level's */ /* .objlevels so set since we copy lev N-1's structure to */ /* lev N in ioreadrec. Therefore we need the level BEFORE */ /* this one's set to 0 as well as this one's. Logically, */ /* though, the init belongs here */ file->open=TRUE; /* Restore original PATH_INFO if we changed it */ if (PATH_INFO_jgof_putenv != NULL) if ( (i = putenv(PATH_INFO_orig_putenv)) != 0 ) err ("putenv failure restoring PATH_INFO: ",strerror(i)); /* More kludge related to "transparent" reopen_object */ /* optimization and outer's desire for vars beyond the max */ /* level it's been asked for [see v 3.5 bug fix, but good luck */ /* tracing the exact history of things). After optimization */ /* reopen, we cannot provide info after the max asked-for */ /* level. ioreadrec_ needs to know this. Previously we */ /* confined the following defn to reopen. However, all logic */ /* paths don't end up calling reopen. Rather than think */ /* properly about when reopen gets called, put defn here and */ /* see what happens */ /* This setting means "object not reopened - use results */ /* from jdbopen in open_datafile" */ objmaxlev_opt = objmaxlev; #else err("Method not compiled with object-reading capability,\n\ needed to read data from object ",file->source); #endif break; case INDIRECT_FILE_LINE: /* Highly unlikely a real data file would have just a single */ /* line. However, a) could (esp if "noheader") b) we use */ /* open_datafile to open datacomments files, too, and they */ /* could well be immediate */ file->open = FALSE; break; default: tmp = buildstring( "Attempt to open illegal file type as data file\n File type = ", file->descrip, "\n File source type = ", " building open_datafile err string"); *one_char_buf = file->source_type; err (tmp, one_char_buf); free (tmp); break; } file->eod=FALSE; file->eof=FALSE; file->nrecs=0; /* A bit deceptive. Counts between EODs. If */ /* >1 dataset comes from a single file, this is */ /* NOT the count on the whole file */ return; } void reopen_object(file) struct fileinfo *file; /* reopen_object is an optimization for transfer of data from a jgofs */ /* object. Idea is that if we know we don't want all levels, specify */ /* projections for the variables we want. This avoids transfer of */ /* lower level data. */ /* Returns # levels in optimized object; -1 if object not reopened */ /* Does not return if problem reopening object */ /* [Above return values spurious. function is void, and has been */ /* since it was written. WJS, as of defgb 4.2] */ /* See open_datafile for details about path_info stuff, jdbopen_ */ /* params, returns, etc */ { char *tmp; int i; int varnamesize; if (max_trace_level >= TRACE_PERFILE_ROUTINES) { tmp = buildstring("reopen ",file->source,NULL, " building trace msg"); do_diag_trace (TRACE_PERFILE_ROUTINES,tmp); free (tmp); } #if OPTIMIZE_OBJECT_TRANSFER /* Check desired last level in this call to the method. */ /* If we want all levels, just return */ if (defgb_reqlevel < 0) return; /* defgb_reqlevel - file->level is the last level we need */ /* from this JGOFS object. If JGOFS object has more */ /* levels, reopen object, selecting only variables at */ /* levels we need. Otherwise just return. */ if (objmaxlev <= defgb_reqlevel - file->level) return; /* Add projection string to object spec. */ /* If obj spec doesn't have string, add one; else */ /* prep spec to append to existing string */ if ( *(tmp = file->source + strlen(file->source) -1) == OBJECT_PARAM_STRING_DELIM[1] ) /* Replace close paren with comma */ *tmp = OBJECT_PARAM_STRING_SEP; else { /* Add comma to end of source string */ *one_char_buf = OBJECT_PARAM_STRING_DELIM[0]; file->source = lengthen_str_and_free( file->source, one_char_buf, NULL, 100, " building object projection string 1"); } i = 0; *one_char_buf = OBJECT_PARAM_STRING_SEP; while (jdblevel_(&jdbunit,&i) <= defgb_reqlevel - file->level) file->source = lengthen_str_and_free( file->source, objnames[i++], one_char_buf, 100, " building object projection string 2"); /* Replace last comma with terminating close paren (unless there */ /* were no variables in list, in which case return. No vars in */ /* list = no vars at some level = some kind of trouble, but it's */ /* unclear what system is supposed to do about this, much less */ /* what this function should do) */ tmp = file->source + strlen(file->source) - 1; if (*tmp == OBJECT_PARAM_STRING_DELIM[0]) return; *tmp = OBJECT_PARAM_STRING_DELIM[1]; /* Reopen object with projection in place */ /* See this call in open_datafile for comments */ if (PATH_INFO_jgof_putenv != NULL) if ( (i = putenv(PATH_INFO_jgof_putenv)) != 0 ) err ("putenv failure for PATH_INFO: ",strerror(i)); jdbclose_(&jdbunit); /* See comments at waitpid in dataeod */ waitpid((pid_t)-1,NULL,WNOHANG); errno = 0; i = -NVAR; varnamesize = MAXOBJVARNAMESIZE + 1; /* Not supposed to use anything returned from this jdbopen_ */ /* except the unit (presumably attached to an "optimized" */ /* data link). maxlev_opt is a safety thing */ tmp = (char *) malloc ( NVAR * varnamesize ); if (tmp == NULL) err ("Could not get memory for tmp objnamelist",""); objmaxlev_opt = jdbopen_ (&jdbunit, file->source, tmp, &varnamesize, &i); free (tmp); if (objmaxlev_opt < 0) err("Failure to access JGOFS data object ",file->source); /* Should be impossible to fail next tests since they were */ /* already satisfied at larger objmaxlev & objncnt, but... */ if (objmaxlev_opt >= MAXLEVELS) errn ("Too many data levels in JGOFS object. Allowed maximum = ", MAXLEVELS); if ( (i > NVAR) || (i <= 0) ) err("0, negative or too many variables in JGOFS object ",file->source); /* Restore original PATH_INFO if we changed it */ if (PATH_INFO_jgof_putenv != NULL) if ( (i = putenv(PATH_INFO_orig_putenv)) != 0 ) err ("putenv failure restoring PATH_INFO: ",strerror(i)); #endif /* Save copy of lengthened string so "lengthener" routine can have its */ /* buffer position back... */ tmp = file->source; strdupl(&file->source,tmp," copying reopened object spec"); free_lengthened_str(tmp); return; } /****************** Begin ioopen_ *********************************/ /* */ /* "s[0..nparams-1]: parameter strings. Inner sets s[j][0]=0" */ /* "for any strings which it processes; others will be processed" */ /* "by outer. Thus selection/projections would normally be" */ /* "ignored by inner." */ /* "nparams: number of parameter strings" */ /* "ntotal (returned): total number of variable names" */ /* ioopen_ also handles the indirect input file, if any, and sets */ /* up each option, which may do things now (like remove */ /* variables), or prepare for later work by scanheader* & ioreadrec_ */ int ioopen_(s,nparams,ntotal) char *s[]; int *nparams,*ntotal; { Logical configure_output(); /* diagnostics opt file */ int create_wjstbl(); /* make tbl from opt file */ Logical get_datafield_options(); /* data file opts opt file */ void get_files(); /* Indirect file */ Logical get_comment_sources(); /* datacomments opt file */ Logical get_latlonparams(); /* latlonparams opt file */ Logical get_timedateparams(); /* timedateparams opt file */ Logical get_translation_table(); /* transvar opt file */ Logical get_varlist_options(); /* varlist opts opt file */ void init_outinfo (); void init_time_structs (); void init_file_structs (); Logical open_and_log_opt(); /* general opt file opener */ Logical remove_vars (); /* removals opt file handler */ char postfix_buf[MAXCOMMENTLINE+1]; char *tmp_commentbuf; char *tok,*postfix,*ptr; char *separators,*alt_separators; char *c_buf,*c_source; FILE *c_stream; int start_lev0comments,end_lev0comments,len_lev0comments; int i,j,len; Logical requested_diag_output,open_diagout; struct fileinfo *fptr; /* Note that this cannot easily/logically be parametrized on a */ /* wide scale. The trigram routines accept any key. The */ /* necessary coordination is between routines that trigram and */ /* routines that un_trigram, and the agreement need only be */ /* bi-lateral (not necessary to be system-wide). Further, since */ /* said routines may be in different languages, etc, there are */ /* problems. This is one reason we use a variable here instead of */ /* a compile-time constant - coordination at runtime may turn out */ /* to be easier than coordination at compile time in the unlikely */ /* event we ever get coordinated */ char trigram_char = DEFAULT_TRIGRAM_CHAR; /* Defines debug switch for ioopen_ itself, so must be early */ init_outinfo (&output_opts); /* Open diag stream(s). Level test is only to try to avoid */ /* opening file if nothing is going to be written... */ /* If, at runtime, diag output is redirected, these files will */ /* be closed and their structures re-init'ed (configure_output) */ if (output_opts.trace_level >= TRACE_IOOPEN) if ( ! *(diagout->open_ptr) ) if ( ! open_sink(diagout)) err( buildstring("Error opening diagnostic sink ", diagout->sink, " : \n ", " open diagsink msg"), strerror(errno) ); if (output_opts.dup_trace_level >= TRACE_IOOPEN) if ( ! *(primary_diagout->open_ptr) ) if ( ! open_sink(primary_diagout)) err( buildstring("Error opening diagnostic sink ", primary_diagout->sink, " : \n ", " open primarydiagsink msg"), strerror(errno) ); do_diag_trace(TRACE_IOOPEN,"ioopen_"); /* Can't figure out how to do following at compile time */ if (COMMENT_CHAR == *ERR_PREFIX) err (" Special strings may not begin with comment character",""); if (strlen(MISSING_VALUE_STRING) >= sizeof(tabvalues[0])) err (" Datum buffer not big enough for missing value string ", MISSING_VALUE_STRING); tabcomments = (char *) malloc (1); if (tabcomments == NULL) err ("Could not malloc 1 byte for tabcomments",""); tabcomments_ptr = tabcomments; tabcomments_size = 1; *tabcomments = '\0'; /* Init this, since when processing level 0 list, we'll be */ /* figuring the first var of level 1 */ firstvarlevel[0] = 0; init_file_structs (files,&indirect_file,comment_sources); init_time_structs (in_timedate,out_timedate,timedatefragbuflist, cumtime,fractime); if (*nparams <= 0) errn ("Inner got no args from outer. # args = ",*nparams); for (i = 0; i < *nparams; i++) if (s[i][0] == trigram_char) { ptr = un_trigram(s[i],trigram_char,&j); if (ptr == NULL) { errn (buildstring("Bad untrigram of argument ",s[i]," at position ", "untrigram problem"), j); } else { /* Mark arg as "processed" to outer, then switch pointer to */ /* untrigrammed string (which will later get unnecessarily */ /* marked as "processed") */ s[i][0] = '\0'; s[i] = ptr; } } /* Get names of level 0 data file and optional files, if any */ if (s[0][0] == INDIRECT_FILE_CHAR) { get_files(files,s[0]+1,&indirect_file,s,*nparams); if (files[DATAFILE].source == NULL) err ("Level 0 input specifier not found in indirect file ",s[0]+1); } else { strdupl(&files[DATAFILE].source,s[0]," saving level 0 source info"); /* Use indirect file's separators because too lazy to set up */ /* .object file's separators... for now, anyway */ analy_source(&files[DATAFILE],indirect_file.item_separators,NULL); } /* Inner must indicate to outer that this parameter was processed */ s[0][0] = '\0'; configure_output (&files[DIAGNOSTICS],&output_opts,files); /* Open diagnostics streams if not already open and if any diags */ /* are requested. This might still result in an empty file (if */ /* no diags are produced) but much easier than opening before */ /* first output */ /* User can spec diagout, but not primary_diagout. Therefore, */ /* there is at most 1 user-spec'ed file spec, and that's in */ /* diagout. By specifying "addl-", user says "there are to be */ /* 2 streams, and I'm specifying the file for the "addl-" stream. */ /* You provide the file spec (stdout or stderr) for other stream" */ /* Therefore, existence of diagout spec determines whether or not */ /* to open primary_diagout (which is the "other" stream). There */ /* is probably a noxious interaction if user spec's a primary_ */ /* diagout file spec at compile time, but does not specify "addl-" */ /* at run time. Output may go to wrong sink. However, handle */ /* that (potential) bug if/when it happens - this has been tough */ /* enough (v 3.7)! */ /* See if user asked for "non addl-" output */ requested_diag_output = (output_opts.trace_level > NOTRACE) || (output_opts.iovalstr > 0) || (output_opts.iovalreal > 0) ; if ( ! requested_diag_output ) { for (i=0; i 0) break; requested_diag_output = (i < NFILETYPES); } if ( ! *(primary_diagout->open_ptr) ) /* Need primary_diagout if asked for "addl-" file and */ /* "non addl-" output */ if ( diagout->dup && requested_diag_output ) if ( ! open_sink(primary_diagout)) err( buildstring("Error opening diagnostic sink ", primary_diagout->sink, " : \n ", " open primarydiagsink msg 2"), strerror(errno) ); if ( ! *(diagout->open_ptr) ) { /* Need diagout if asked for "non addl-" file and "non addl-" */ /* output */ open_diagout = ( ( ! diagout->dup ) && requested_diag_output ); /* Failing above condition, need diagout if asked for "addl-" */ /* file and "addl-" output */ if ( ( ! open_diagout ) && diagout->dup ) { open_diagout = (output_opts.dup_trace_level > NOTRACE) || (output_opts.dup_iovalstr > 0) || (output_opts.dup_iovalreal > 0) ; if ( ! open_diagout ) { for (i=0; i 0) break; open_diagout = (i < NFILETYPES); } } if (open_diagout) if ( ! open_sink(diagout)) err( buildstring("Error opening diagnostic sink ", diagout->sink, " : \n ", " open diagsink msg 2"), strerror(errno) ); } max_trace_level = (output_opts.trace_level > output_opts.dup_trace_level) ? output_opts.trace_level : output_opts.dup_trace_level ; if (max_trace_level >= DUMP_ENVIRONMENT) dump_environment(); /* Get level out of PATH_INFO. */ /* Make 3 strings for putenv. 1 sets up existing PATH_INFO; the */ /* others set up PATH_INFO with different protocols. See */ /* open_datafile */ if ( (ptr = getenv(PATH_INFO_ENV_VAR)) == NULL ) { defgb_reqlevel = LEVEL_NOT_SPECIFIED; PATH_INFO_orig_putenv = NULL; PATH_INFO_jgof_putenv = NULL; PATH_INFO_none_putenv = NULL; } else { *one_char_buf = ENV_VAR_DEFN_CHAR; PATH_INFO_orig_putenv = buildstring(PATH_INFO_ENV_VAR,one_char_buf,ptr," saving PATH_INFO"); defgb_reqlevel = get_level(ptr); PATH_INFO_jgof_putenv = make_PATH_INFO_putenv_string(NULL,ptr,NULL,"jgof",defgb_reqlevel,NULL); if (PATH_INFO_jgof_putenv == NULL) errn ("Cannot make PATH_INFO value from jgof & ",defgb_reqlevel); PATH_INFO_none_putenv = make_PATH_INFO_putenv_string(NULL,ptr,NULL,"none",defgb_reqlevel,NULL); if (PATH_INFO_none_putenv == NULL) errn ("Cannot make PATH_INFO value from none & ",defgb_reqlevel); } /* Reprocess comments we already have stashed in tabcomments */ strdupl(&tmp_commentbuf,tabcomments," copying comment buffer"); tabcomments_ptr = tabcomments; *tabcomments = '\0'; tok = strtok(tmp_commentbuf,"\n"); /* Loop for each saved comment */ while (tok != NULL) { /* Must split postfix off any saved comment for correct */ /* processing. Also, length of comment with postfix was */ /* determined based on default sink-might now be different. */ /* Therefore, must strip off blanks before postfix since they */ /* could have been artificially added... */ if ( (postfix=strrchr(tok,*TAG_DELIM)) == NULL ) postfix = ""; else /* Take care of comment consisting entirely of postfix */ if (postfix == tok) tok = ""; else { /* Point to first blank after comment. Use of blank here */ /* assumes add_to_comment_stream pads w/blanks */ for (i=postfix-tok-1; i>=0; i--) if (tok[i] != ' ') break; i++; /* If comment runs right up to postfix, copy postfix so we */ /* can terminate comment "on top of" original postfix */ if (tok+i == postfix) postfix = strcpy(postfix_buf,postfix); tok[i] = '\0'; } add_to_comment_stream(postfix,tok); tok = strtok(NULL,"\n"); } free (tmp_commentbuf); /* Level 0 data file includes info from opt files, so must be */ /* initialized after get_files call. It also includes diag info, */ /* so must be init'd after configure_output. */ /* Note some info applies only to level 0, while some applies */ /* to all data files. Latter requires work at other data levels */ datalev[0]=files[DATAFILE]; datalev[0].level = 0; /* Get directory string from full file spec of level 0 file */ switch (datalev[0].source_type) { case DATA_FILE: case COMMAND_FILE: strdupl(&dirstring,datalev[0].source, " saving directory string"); /* If command w/parameters, truncate to its file spec portion */ /* to protect against a / in a parameter. Assumes no */ /* SCRIPT_PARAM_SEP chars in a plain file spec */ if ( (ptr = strchr(dirstring,SCRIPT_PARAM_SEP)) != NULL ) *ptr = '\0'; /* If there is a directory, split it from file spec */ /* after its trailing / */ if ( (ptr = strrchr(dirstring,DIRSEP)) == NULL ) { free (dirstring); dirstring = NULL; } else *(++ptr) = '\0'; /* Truncate after found / */ break; default: dirstring = NULL; break; } /* Must precede anything that might need variable translation */ create_wjstbl(&files[TRANSVAR],trans_list,TRANS_LIST); create_wjstbl(&files[INPWIDTHS],var_data_widths,VAR_DATA_WIDTHS); create_wjstbl(&files[DISPWIDTHS],disp_data_widths,DISP_DATA_WIDTHS); get_varlist_options (&files[VARLISTOPTS], &varlists_end,&varname_for_lev1s,&coll_from_objobj_obj); if (varlists_end == NO_VARLISTS_END_SPECIFIED) varlists_end = VARLISTS_END; if (varlists_end < -1) varlists_end = MAXLEVELS; /* Convenient. Note */ /* -1 doesn't need parametrization... */ /* See if varname_for_lev1s is in environment. If so, and if it */ /* wasn't in optional file, use env value as if it came from opt */ /* file. This is how list_level1_specs logically adds varname */ /* _for_lev1s to the .ind file of the object we're dealing with. */ /* In any case, clear the environment since defgb can be called */ /* from here out, and, since we did this in lieu of adding an opt */ /* file, it should only have the scope of that opt file; eg, 1 */ /* object. */ strdupl(&upcase_flag_for_coll,FLAG_FOR_COLL_FROM_OBJOBJ, " copying FLAG_FOR_COLL"); ptr = upcase_flag_for_coll; /* The more compact *(ptr++)=toupper(*ptr); caused a warning under */ /* OSF unix (became DEC unix; became TRU64 unix or some such thing */ while (*ptr != '\0') { *ptr = toupper(*ptr); ptr++; } if ( (ptr = getenv(upcase_flag_for_coll)) != NULL) { if ((varname_for_lev1s == NULL) && (*ptr != '\0')) varname_for_lev1s = ptr; /* Don't know how to unsetenv from program... */ i = putenv (buildstring(upcase_flag_for_coll,"=",NULL, " clearing putenv string for coll_from_objobj") ); if (i != 0) err ("putenv failure clearing flag_for_coll: ",strerror(i)); } get_datafield_options ( &files[DATAFIELDOPTS], &data_field_trim, &significant_consec_separators, &significant_embedded_separators, &datalev[0].item_separators, datalev[0].item_alt_separators ); if (strchr(datalev[0].item_separators,ATTRIB_SEP) != 0) error_("Attribute separator character cannot be data separator character", ""); if (get_comment_sources(&files[DATACOMMENTS], comment_sources, &files[VARLIST], &datalev[0]) ) { for (i=0; i= MAXREC) err ("Input record exceeds buffer size\n" " Rec begins with ",c_buf); add_to_comment_stream("",c_buf); } comment_sources[i].eof = TRUE; comment_sources[i].eod = TRUE; fclose(c_stream); comment_sources[i].open=FALSE; } } break; } } } } /* Get & analyze variable list (see scanheader0) */ /* Use list in opt file if opt file provided; else use lev 0 file */ /* Logically, this is independent of opening level 0, */ /* so could almost use same code. In particular, closing lev 0 */ /* does no logical harm, nor does accepting VARLIST from object */ if (open_and_log_opt(&files[VARLIST])) { maxlev = scanheader0(&files[VARLIST],ntotal,files[VARLIST].comment_postfix); /* Save start (here) and end (below) of level 0 file comments */ /* so they can be moved to precede any following opt file com- */ /* ments. Done here because open_and_log_opt puts in a comment. */ /* So might scanheader0 of VARLIST */ start_lev0comments = tabcomments_ptr - tabcomments; /* Decision to close VARLIST does not depend on its eod or eof */ /* bits, since we are willing to close in the middle of a file */ /* We do test the open bit in case info does not come from file */ if (files[VARLIST].open) { fclose(files[VARLIST].stream); files[VARLIST].open=FALSE; } } else { if (varlists_end < 0) err ("varlist option not spec'ed and no varlist in level 0 file",""); /* Save start (here) and end (below) of level 0 file comments */ /* so they can be moved to precede any following opt file com- */ /* ments. Note that because of side effects of open_and_log_opt */ /* the apparently repeated next line cannot be placed outside if */ start_lev0comments = tabcomments_ptr - tabcomments; open_datafile(&datalev[0]); #if READ_OBJECTS if (datalev[0].source_type == JGOFS_OBJECT) { maxlev = scanheader0_obj(&datalev[0],ntotal); /* Do a trick so that we don't need level 1, etc data if only */ /* processing level 0. If trouble, can always turn off switch */ /* Done AFTER scanheader0_obj so that object's full var list */ /* is processed. After reopen, "nobody is supposed to know" */ /* the restricted # levels available. If asked for more */ /* levels, ioreadrec will be generating nds */ reopen_object(&datalev[0]); } else { #endif /* Note: if datalev[0] is closed and reread by scanheader */ /* need to do something about dup comments. Easiest is */ /* probably to put in NULL, below, & let scanheader print */ /* them */ /* Similarly, if comments have already been processed */ /* during datacomments processing, repress dups via NULL */ postfix = ""; for (i=0; i start_lev0comments) { if (start_lev0comments != 0) { /* There are comments before the lev0comments */ /* Since we are just swapping portions of the string, no */ /* length checking should be needed. Also, tabcomments_ptr, */ /* before and after swap, points to null at end of comments */ /* buffer. Said null should not be overwritten by anything, */ /* either */ /* Chars overwritten w/nulls are newlines, so need not save */ tabcomments[start_lev0comments-1] = '\0'; tabcomments[end_lev0comments] = '\0'; len_lev0comments = end_lev0comments - start_lev0comments; strdupl(&tmp_commentbuf,tabcomments," copying comment buffer2"); strcpy ( tabcomments , tabcomments + start_lev0comments ); tabcomments[len_lev0comments]='\n'; strcpy ( tabcomments + len_lev0comments + 1 , tmp_commentbuf ); tabcomments[end_lev0comments]='\n'; free (tmp_commentbuf); } } return maxlev; } /* */ /****************** End ioopen_ *********************************/ char *getfreedata (varlist,fptr,more_string) int varlist[]; struct fileinfo *fptr; char **more_string; /* Gets free field data from buffer into tabvalues. */ /* Variables filled in are the string of variables in varlist starting */ /* at fptr->firstvar & ending before fptr->lastvar */ /* Returns pointer to string that spec's next level's file */ /* Also returns flag as to whether buffer was completely processed */ /* Uses global variables tabvalues, significant_consec_separators */ { char *tok,*next,*buf; int i,j; /* Make copy of buffer for possible diagnostic purposes */ strdupl(&buf,fptr->buf,"getfreedata"); /* Work w/real buffer since we return pointers to pieces of it */ /* which must exist after we exit this routine */ next = fptr->buf; /* Read no more than the number of variables expected */ for (i = fptr->firstvar; i < fptr->lastvar; i++) { tok = nxttok (next, fptr->item_separators, &next, fptr->item_alt_separators, significant_consec_separators, FALSE); if (tok == NULL) break; if (*tok == '\0') strcpy(tabvalues[varlist[i]],MISSING_VALUE_STRING); else { /* Tabs or returns almost guaranteed to cause trouble down */ /* the line in JGOFS processing, so kill it now */ if (strpbrk(tok,REQUIRED_SEPARATORS) != NULL) err ("May not have tabs or returns in delimited field. Field = ",tok); COPY_INTO_FIXED_LEN_BUFFER(tabvalues[varlist[i]],tok,"datum too long"); } } /* What's left should be only a subfile spec, pointer to which we */ /* are supposed to return. */ /* Specs are parsed w/insignificant consecutive blanks and */ /* nested apostrophes (so apostrophe can be in command data) */ /* Before returning subfile, fill in missing data in cases where */ /* short records are allowed */ if (i == fptr->lastvar) tok = nxttok (next, fptr->item_separators, &next, fptr->item_alt_separators, FALSE, TRUE); else if (significant_consec_separators) { for (j = i; j < fptr->lastvar; j++) strcpy(tabvalues[varlist[j]],MISSING_VALUE_STRING); tok = NULL; } else err("Too few variables in data record.\n Record = ",buf); *more_string = next; free (buf); return tok; } void getobjectdata (varlist,fptr) int varlist[]; struct fileinfo *fptr; /* Copies data from an object into tabvalues. */ /* Variables filled in are the string of variables in varlist starting */ /* at fptr->firstvar & ending before fptr->lastvar. In the input */ /* object, these variables are stored sequentially beginning at */ /* objfirstvarlevel[this data level's object level]. They are */ /* scatterwritten into tabvalues at the appropriate spots (said */ /* spots are those in varlist) */ /* Uses global variables tabvalues, objfirstvarlevel & objvalues */ { int i,objvar; char *tok; objvar = objfirstvarlevel[fptr->objlevel]; for (i = fptr->firstvar; i < fptr->lastvar; i++) { tok = objvalues[objvar++]; COPY_INTO_FIXED_LEN_BUFFER(tabvalues[varlist[i]],tok,"datum too long"); } return; } char *getfixeddata (varlist,fptr,more_string) int varlist[]; struct fileinfo *fptr; char **more_string; /* Gets fixed field data from buffer into tabvalues. */ /* Variables filled in are the string of variables in varlist starting */ /* at fptr->firstvar & ending before fptr->lastvar */ /* Returns pointer to string that spec's next level's file */ /* Also returns pointer to unprocessed buffer (if any) */ /* Uses global variables tabvalues, inpstarts, inpwidths, */ /* significant_embedded_separators, data_field_trim, */ { int field_term_pos; int i,len_tmp,var; char save_char,*tok,*endtok; char *tmp,*separators,*alt_separators; char *ptr = NULL; tmp = fptr->buf; separators = fptr->item_separators; alt_separators = fptr->item_alt_separators; len_tmp=strlen(tmp); for (i = fptr->firstvar; i < fptr->lastvar; i++) { var = varlist[i]; /* The interaction between fixed field and separators found */ /* in that fixed field is a bit complex. There can be */ /* strings of separators at the start of the string, the */ /* end of the string, and possibly embedded in the string */ /* The data_field_trim switch controls whether or not we */ /* strip off the separators at string start and string end. */ /* Regardless of this option, we find string start and */ /* string end and see if there is a separator in between. */ /* If so, the significant_embedded_separators switch */ /* determines whether this is an error condition (if TRUE, */ /* embedded separators are significant; ie, they define 2 fields; */ /* ie, there are 2 fields where there should be 1; ie, error. */ /* A field consisting entirely of separators is considered */ /* missing data provided we are trimming the field. If we */ /* are not trimming, then we give a field of separators! */ if (inpstarts[var] < len_tmp) { if ( (field_term_pos=inpstarts[var]+inpwidths[var]) > len_tmp ) field_term_pos = len_tmp; endtok = tmp + field_term_pos; save_char = *endtok; *endtok = '\0'; tok = tmp + inpstarts[var]; tok = tok + strspn(tok,separators); if (*tok == '\0') { /* Field consists entirely of separators */ if (data_field_trim) tok = MISSING_VALUE_STRING; } else if ( (ptr = strpbrk(tok,separators)) != NULL ) { /* There is a separator between string start and field */ /* end. Find out if separator is embedded or trailing */ /* by finding last non-separator and comparing */ /* position to separator that put us here */ endtok = tmp + field_term_pos; while (strchr(separators,*(--endtok)) != NULL) ; if ( (++endtok > ptr) && significant_embedded_separators ) err("More than one datum in single fixed field.\n Field = ", tok); } /* Either trim end, or return to untrimmed start */ if (data_field_trim) *endtok = '\0'; else tok = tmp + inpstarts[var]; /* Tabs or returns almost guaranteed to cause trouble down */ /* the line in JGOFS processing, so kill it now */ if (strpbrk(tok,REQUIRED_SEPARATORS) != NULL) err ("May not have tabs or returns in fixed field. Field = ",tok); COPY_INTO_FIXED_LEN_BUFFER(tabvalues[var],tok,"datum too long"); tmp[field_term_pos] = save_char; } else strcpy(tabvalues[var],MISSING_VALUE_STRING); } /* Get next token as free format. This should ONLY be a subfile */ /* specification. Must begin after full-width record; that is, */ /* there cannot be "missing fields at end", followed by subfile */ /* spec (else we'd consider the subfile spec to be a datum) */ /* Subfile specs are parsed w/insignificant consecutive blanks */ /* and nested apostrophes (so apostrophe can be in command data) */ if (fptr->maxlenrec < len_tmp) endtok = nxttok(tmp+field_term_pos, separators, &ptr, alt_separators, FALSE, TRUE); else endtok=NULL; *more_string = ptr; return endtok; } void provide_null_data_from_now_on(file) struct fileinfo *file; { strdupl(&file->source,"no_more_data"," init of null data source"); file->source_type = CREATE_NULL_DATA; file->maxlenrec = 0; /* "nd"'s are free-field input */ return; } void set_to_stop_reading(lev) int lev; /* Handle termination situation where ioreadrec source has more info */ /* but we don't want it. If source is a file or a pipe supplying one */ /* level's worth of info, declare EOF. If source is an object (or */ /* some future (v 4.0) single-file-multi-level source), flush this */ /* level's data. If source is one of our manufactured oddities, */ /* nothing need be done. The actual closing of the file and other */ /* cleanup activities are done in the routine that handles real EOFs */ { int i; if (max_trace_level >= TRACE_PERFILE_ROUTINES) { sprintf(trace_msg,"set_to_stop_reading at level %d",lev); do_diag_trace(TRACE_PERFILE_ROUTINES,trace_msg); } switch (datalev[lev].source_type) { case CREATE_NULL_DATA: case INDIRECT_FILE_LINE: break; #if READ_OBJECTS case JGOFS_OBJECT: if (objeof) break; /* Each read returns data from the "next" level to the max */ /* level so read each time level you want isn't in that range */ /* jdbreada returns -1 for EOF, -999 for err, others?? Pos- */ /* itive vals are (I think) lowest "new" level filled in */ /* See jdb.c for more. */ /* Be interesting if somehow outer says "no more interest in */ /* level N, but now have interest in level N+1" (or N again */ /* w/o an intervening N-M) */ /* "extra" i=sizeof, below, because some compilers don't like */ /* &sizeof directly */ while (datalev[lev].objlevel <= jdblev) { i = sizeof(objvalues[0]); jdblev = jdbreada_(&jdbunit,objvalues,&i); if (jdblev == -999) err("Read error while flushing JGOFS object ",datalev[lev].source); objeof = (jdblev < 0); } break; #endif case DATA_FILE: case COMMAND_FILE: if ( ! datalev[lev].eod) { datalev[lev].eof = TRUE; /* Next line should be unnecessary since presumably we are */ /* not reading any more. Still not real clear to me what */ /* happens if outer somehow re-expresses interest w/o */ /* changing the level */ datalev[lev].source_type = CREATE_NULL_DATA; } break; default: one_char_buf[0] = datalev[lev].source_type; err("defgb coding error: no skip logic for file source type ",one_char_buf); } datalev[lev].eod = TRUE; return; } void dataeod (lev) int lev; /* Handle eof action for ioreadrec. Amazingly enough, common to all */ /* input types (maybe!) [v 3.5a - fclose on pipes is failing] */ /* [v 4.0 - not sure fclose on pipes is failing. Maybe problem was */ /* the one addressed by v 4.0 mod] */ /* Uses global variable datalev */ { if (max_trace_level >= TRACE_PERFILE_ROUTINES) { sprintf(trace_msg,"eod at level %d",lev); do_diag_trace(TRACE_PERFILE_ROUTINES,trace_msg); } if (datalev[lev].eof) { if (fclose(datalev[lev].stream) != 0) err ("Failure to close level. errno msg is ",strerror(errno)); datalev[lev].open=FALSE; /* Lazy process cleanup. If there happens to be a child of ours */ /* that's done, test its status so it leaves zombie state */ /* If no such process, we don't care. We may miss a few, */ /* which will die at defgb exit - point is to avoid huge */ /* number of children hanging around. waitpid is documented */ /* as a POSIX standard function. If not available, */ /* waitid can be used (if THAT'S available). If neither, */ /* using wait is trickier because of possibility that */ /* child is hung. WNOHANG is presumably defined in */ /* sys/wait.h. Reset errno in case it had been set */ /* by waitpid. In particular, can expect ECHILDs if */ /* nothing is out there to be waited for. */ waitpid((pid_t)-1,NULL,WNOHANG); errno = 0; } return; } /****************** Begin ioreadrec_ *********************************/ /* */ /* ioreadrec is the routine most responsible for reading */ /* and manipulating the data corresponding to the variables set */ /* up in scanheader. */ /* ioreadrec also opens subfiles and calls scanheader to check */ /* them and prepare the variable list for this particular level */ /* (Subfiles need not contain all the variables specified at the */ /* top level) */ /* "Read record at appropriate level. Return 0 if end at that" */ /* "level. Return 1 if ok." */ int ioreadrec_(level) int *level; { void calc_timedate(),calc_latlon(); char *save_comments,*tok,*endtok,*ptr; char *nextfile; char *unprocessed_input_ptr; int *nparams,*ntotal; int i,j,k; int nitem; Logical first_data_rec = FALSE; if (max_trace_level >= TRACE_IOREADREC) { sprintf (trace_msg,"ioreadrec %d",*level); do_diag_trace(TRACE_IOREADREC,trace_msg); } k = *level; /* By default, ignore any comments found by ioreadrec */ save_comments=NULL; /* Initialization for this level if first time through */ if (datalev[k].nrecs == 0) { if (! datalev[k].open) open_datafile(&datalev[k]); datalev[k].firstvar = (k == 0) ? 0 : datalev[k-1].lastvar; datalev[k].lastvar = datalev[k].firstvar; if (datalev[k].source_type != CREATE_NULL_DATA) /* Call scanheader if data files have variable lists. */ /* If not, do relevant initialization here */ if (k <= varlists_end) if (datalev[k].source_type == JGOFS_OBJECT) { scanheader_obj(k); /* Optimize if appropriate. Don't do level 0 since it was */ /* done in ioopen_. Feh! Recode to move reopen_ back into */ /* open_datafile from whence it came. Solve varlist problem */ /* some other way (problem is to give complete obj varlist */ /* when reopen_ only returns projected part...) */ if ((datalev[k].objlevel == 0) && (k != 0)) reopen_object(&datalev[k]); } else scanheader(k); else { save_comments=""; /* set up to save comments found at file top */ /* Can't have variable permutation w/o a sublevel var list */ for (i = firstvarlevel[k]; i < firstvarlevel[k+1]; i++) pointers[i]=i; datalev[k].lastvar = firstvarlevel[k+1]; } mark_missing_vars(k); /* Not needed in the noheader case, but no harm */ first_data_rec = TRUE; /* .nrecs gets incremented for all recs... */ } if (no_interest_by_outer) { set_to_stop_reading(k); i = 0; } else { i = getrec_proccomment(&datalev[k],save_comments); } /* Fancy part of test below says "declare EOD after 1 line of */ /* defgb-generated 'nd's" */ if ( (i == 0) || ( (i < 0) && ( ! first_data_rec) ) ) { dataeod(k); return 0; } /* Process data record */ if (datalev[k].source_type == JGOFS_OBJECT) { getobjectdata (pointers,&datalev[k]); nextfile = NULL; /* Set for last-processed token test */ } else if (datalev[k].maxlenrec == 0) nextfile = getfreedata (pointers,&datalev[k],&unprocessed_input_ptr); else if (datalev[k].maxlenrec > 0) nextfile = getfixeddata (pointers,&datalev[k],&unprocessed_input_ptr); else errn ("Fixed-field/free-field status not determined for level ",k); /* "Data line" processed. See if we need to process another level */ if (k objmaxlev_opt ) provide_null_data_from_now_on(&datalev[k+1]); else /* Arrange that we won't reopen the object */ datalev[k+1].open = TRUE; break; default: if (nextfile == NULL) provide_null_data_from_now_on(&datalev[k+1]); else { /* Need source string and source type */ /* Set source to string at end of data record, then */ /* process it, getting source type. (Note that we need to */ /* do analysis here, while we know the relevant separator */ /* characters. Right now, all levels' are the same, but */ /* who knows?) */ strdupl(&datalev[k+1].source,nextfile," init of next data source"); analy_source(&datalev[k+1],datalev[k].item_separators,dirstring); /* What's next on line? (shouldn't be anything...tested */ /* below) */ /* unprocessed_input_ptr tells us if there's something after */ /* the next level's file, but it could just be blanks */ if ( (nextfile = unprocessed_input_ptr) != NULL ) { nextfile += strspn(nextfile,datalev[k].item_separators); if (*nextfile == '\0') nextfile=NULL; } } break; } } /* Should be no more data */ if (nextfile != NULL) err("Too many variables in data record\n Extra var = ",nextfile); /* Data processing for "special" variables */ calc_latlon(tabvalues,sizeof(tabvalues[0]),k, &outlonformat,&inlonformat,&outlatformat,&inlatformat); for (i=0; i .... */ /* so don't need to return -1 for those... */ /* */ /* There is a substantial temptation to alter this code so that we */ /* don't repeatedly recopy the saved comments after sending a comment */ /* back. This may be tricker than it looks, esp since we went to */ /* dynamically allocated tabcomments. First, we shuffle comments */ /* around in defgb. More importantly, though, we may add comments in */ /* between sending them back. Should be possible to recode, but use */ /* care. */ { char *at; if (*tabcomments == '\0') { tabcomments_ptr=tabcomments; return 0; } at=strchr(tabcomments,'\n'); if (at == NULL) { /* Should not be possible. WJS Mar 96 */ /* Therefore, reset pointer above! Duhh! WJS 10 Oct 96 */ strcpy(str,tabcomments); *tabcomments='\0'; tabcomments_ptr=tabcomments; } else { *at = '\0'; strcpy(str,tabcomments); strcpy(tabcomments,at+1); tabcomments_ptr=tabcomments+strlen(tabcomments); } return 1; } /* */ /************************ End defgb **********************************/