/*joinu from C. Hammond, from puddle.mit.edu 31 October 1996 r groman "It needs to be linked with outer.o (some variation - whatever you are calling yours) and jgofs.a which is usually in ../lib." */ /* Scheme/"interesting" details: */ /* 1) Read 2nd object into memory. */ /* 2) The variable list of the constructed object is the union */ /* of the variable lists of the 2 input objects. The */ /* "match set" of variables is the intersection of the 2 lists */ /* 3) Records of the 2 objects match when the values for all */ /* variables in the match set have the same value. */ /* Test is numeric if both vals are numeric; string else. */ /* Matching has nothing to do with levels - records that */ /* match are output w/the appropriate levelizing */ /* 4) Number of records in constructed object is number of */ /* records in the 1st object that have a match with some */ /* record in 2nd object. */ /* 5) It is possible that many records from each object have */ /* the same set of values for the match set of variables. */ /* In such cases, every record from the 1st object is */ /* matched with the first record from the 2nd object. Other */ /* records from the 2nd object do not appear in the con- */ /* structed object. */ /* 6) Except for the string test mentioned in 3, above, the */ /* functionality described in 1-5 was in the joinu code as */ /* of 30 Sep 98. */ /* WJS 10 Nov 98 */ char join_version[]="joinu version 2.1a 27 Sep 1999"; /* 27 Sep 99. WJS */ /* Bug fix: counting error. Again, amazed it even seemed to work! */ /* [Begin v 2.1a] */ /* 4 May 99. WJS */ /* Fix massive bug caused by assuming jdbread_ gives a "line" */ /* instead of a level */ /* Add version info */ /* Add iovaldouble_ entry */ /* Define required functions */ /* Change #includes to OPTIONS & INNEROPTIONS per other methods */ /* (this mod in official OO version but don't know how it got */ /* there) */ /* Incorporate CLH fix of 13 Nov 98 made to official OO version */ /* [Begin v 2.1] */ /* 17 Oct 97. WJS */ /* Dynamically allocate space for saved object */ /* Correct bad parametrization */ /* 2 Oct 97. WJS */ /* Dynamically shape values2 */ /* Add alpha join (assumes alpha vals are illegal numerically) */ /* Provide non-zero widths */ /* Allow tokens of size DATUMSIZE (instead of DATUMSIZE-1) */ /* JGOFS 1.5 mods: */ /* Call error_ instead of doing printf */ /* Change iovalreal_ alpha value test */ /* Add in_ to internal function names */ /* #include *default.h files & reparametrize accordingly */ /* 30 Sep 97. WJS */ /* Process any input width attributes, and add iowidths_ entry */ /* (per JGOFS 1.5) to return this information. */ /* Type the void io*_ entries */ /* Notes: */ /* 1) requires PATH_INFO control if used on local objects */ /* Otherwise, if the environment variable is set to */ /* .html or .flat, program breaks */ /* 2) does not propagate widths (or any other attributes) */ #include #include #include #ifdef VMS #include #include #else #include #endif /* Next statement should really be "if exactly one of HP or IBM is */ /* defined and also TRUE" but I'm tired... */ #if defined(HP) || defined(IBM) #define jdbopen_ jdbopen #define jdbreada_ jdbreada #define jdbclose_ jdbclose #define jdblevel_ jdblevel #define jdbattributes_ jdbattributes #define jdbcomments_ jdbcomments #endif int jdbopen_(); int jdbreada_(); int jdbclose_(); int jdblevel_(); int jdbattributes_(); int jdbcomments_(); #include OPTIONS #include INNEROPTIONS void error_(); /* ... entry in outer */ /* #define DEBUG */ #ifndef MEMORY_ALLOCATION_CHUNK #define MEMORY_ALLOCATION_CHUNK 41960 #endif #define POINTERS_CHUNK 100 struct { int pntr,index,level; } v[NVAR],tempv; double strtod(); int nlevels[2]; char names[2][NVAR][VARNAMESIZE+1]; int fldwidths[NVAR]; int namesize=VARNAMESIZE+1; int valuesize=DATUMSIZE+1; char *values; /* values[NVAR][DATUMSIZE+1]; ptr for convenience */ /* Dynamic memory scheme: */ /* Logically, we want values2 [nlines2+1] [nv2] [valuesize] */ /* We could allocate, say, NLINES worth, and, after filling it, */ /* allocate 2*NLINES, copy the original space into the new, free */ /* the original, etc. This is a lot of bytes to move, however. */ /* Instead, we make an array of pointers to chunks of size NLINES */ /* The pointers start out NULL. We allocate NLINES worth to the */ /* first pointer. When we exceed LINES, we allocate NLINES */ /* worth to the second pointer, and start using that. Logically */ /* this breaks up the [nlines2+1] dimension into 2 dimensions. */ /* One is "which chunk_size/NLINES chunk?". The other runs */ /* from 0 to NLINES-1 within each chunk. */ /* If we run out of pointers, we dynamically allocate more space, */ /* copy over, free, etc, but we do this to the pointers array, */ /* which is lots easier to move. */ /* There is a question of how much memory to allocate at once. */ /* I chose 41960 bytes, but it can be specified as a */ /* compilation constant (MEMORY_ALLOCATION_CHUNK). It */ /* should be tuned to the efficiency of the system's memory */ /* allocation scheme (actually, the system's malloc routine's */ /* allocation scheme). We allocate that much, then fit an */ /* integral number of lines into each chunk, and do not use the */ /* end of the chunk. Clearly it is possible to have some */ /* values that interact horribly with a particular object and/ */ /* or system, but we don't do any work on this (beyond being */ /* sure that the chunk will hold at least one line). */ char **values2; char *val2lineptr; int nlines2,nv2,linesize2; int nlines2perchunk; int handle[2]={ -1, -1}; int nt[2]; int match[NVAR]; int minlevelread; char *comments; /************************************************************************/ void in_get_width(width,object_selector,variable_number) int *width; int object_selector,variable_number; /* If width attribute for variable variable_number in object */ /* object_selector specifies a width > &width, set &width to that */ /* attribute value */ { int j; char str[ATTRSIZE]; while ( jdbattributes_(handle+object_selector, &variable_number, str) ) { if (strncmp(str,"width=",6) == 0) { if ( (j = atoi(str+6)) > *width ) *width = j; break; } } return; } int in_getrec(object_selector,buf) int object_selector; char *buf; { return jdbreada_(handle+object_selector,buf,&valuesize); } int in_get_chunk (object_selector,chunk,linesize,nlines,level_splits) int object_selector,linesize,nlines; int *level_splits; char *chunk; /* Reads, into chunk, up to nlines of size linesize from object */ /* associated with object_selector. */ /* Returns number of lines read (!= nlines equivalent to EOF) */ { int linecnt,j,lev; char *start_this_line; static char *start_previous_line; start_this_line = chunk - linesize; for (linecnt=0; linecnt infinity in steps of */ /* POINTERS_CHUNK */ int ichunk; /* Goes from 0 -> POINTERS_CHUNK. */ /* Actual chunk pointer is */ /* chunk_pointer_base + ichunk */ char *ptr; char **temp; char **object_chunk_ptrs; int allocate; object_chunk_ptrs = *data_array; allocate = (MEMORY_ALLOCATION_CHUNK > linesize) ? MEMORY_ALLOCATION_CHUNK : linesize; *nlinesperchunk = allocate/linesize; linecnt = 0; chunk_pointer_base = 0; ichunk = 0; while (1) { if (ichunk == 0) { /* Out of pointers to chunks. Allocate one POINTERS_CHUNK */ /* more than we have, move what we have, & free old space */ temp = object_chunk_ptrs; object_chunk_ptrs = (char **) malloc( (chunk_pointer_base + POINTERS_CHUNK) * sizeof(char *) ); if (object_chunk_ptrs == NULL) error_("Could not get memory for 'object_chunk_ptrs'",""); if (chunk_pointer_base != 0) { memcpy(object_chunk_ptrs,temp,chunk_pointer_base*sizeof(char *)); free (temp); } } /* Allocate a chunk of memory and fill it with a chunk's worth */ /* of lines from the object. */ /* Save pointer to chunk in object_chunk_ptrs */ ptr = (char *) malloc(allocate); if (ptr == NULL) error_("Could not get memory for 'object_chunk_ptrs[i]'",""); i = in_get_chunk(object_selector,ptr,linesize,*nlinesperchunk,splits); linecnt += i; object_chunk_ptrs[chunk_pointer_base+ichunk] = ptr; if (i != *nlinesperchunk) break; if (++ichunk == POINTERS_CHUNK) { chunk_pointer_base += POINTERS_CHUNK; ichunk = 0; } } *data_array = object_chunk_ptrs; jdbclose_(handle+object_selector); /* Original code made an extra line of "nd"s for some reason. If it */ /* supposed to "not match anything", "nd"s aren't really good - I */ /* suggest "**NoTrEaLdAtUm**" or some such. In any case, though, */ /* the returned line count was never incremented, so that line */ /* wasn't used. Just in case, commented out code follows... */ /* If needed, it must be modified for dynamic memory scheme */ /* for (i=0; i= 0) { val2ptr = val2lineptr + match[i]*valuesize; v0 = strtod(values + i*valuesize, &endptr); if (*endptr == '\0') v1 = strtod(val2ptr,&endptr); if (*endptr == '\0') { if (v0 != v1) break; } else { if (strcmp(values + i*valuesize, val2ptr) != 0) break; } } if (i == nt[0]) break; } return (testline==nlines2); } int ioopen_(params,nparams,ntotal) char *params[]; int *nparams,*ntotal; /* s[0..nparams-1]: parameter strings. Inner sets s[j][0]=0 for any strings which it processes; others will be processed by outer. Thus selection/projections would normally be ignored by inner. nparams: number of parameter strings ntotal (returned): total number of variable names */ { char *tmp; int i,j,j1,k,maxlev; int ih; /* Temp cumulative variable/level array for "second" object */ int *nvar2; /* Don't know why QUERY_STRING is squashed... */ putenv("QUERY_STRING="); /* Size data buffer big enough to hold a "line" of data */ values = (char *) malloc (NVAR*valuesize); if (values == NULL) error_("Could not get memory for 'values' buffer",""); /* Size a couple of buffers that hold object spec(s) */ /* We haven't parametrized that in defaults.h or innerdefaults.h */ /* (could use PATHSIZE) but our idea is to go dynamic anyway (& */ /* dynamic is better!) */ /* comments never used, but leave it in for now ... length is */ /* length of "join ", a blank, a terminating null, and params */ i = strlen(params[0]); j = strlen(params[1]); comments = (char *) malloc (5 + i + 1 + j + 1); if (comments == NULL) error_("Could not get memory for 'comments' buffer",""); strcpy(comments,"join "); strcat(comments,params[0]); strcat(comments," "); strcat(comments,params[1]); /* tmp needs to hold each spec consecutively, so size to max */ tmp = (i > j) ? (char *)malloc(i+1) : (char *)malloc(j+1); if (tmp == NULL) error_("Could not get memory for 'tmp' buffer",""); for (ih = 0; ih <= 1; ih++) { strcpy(tmp,params[ih]); nt[ih] = -NVAR; nlevels[ih] = jdbopen_(handle+ih,tmp,names[ih],&namesize,nt+ih); if (nlevels[ih]<0) error_("Bad Object",params[ih]); params[ih][0] = 0; } free (tmp); for (i = 0; i < nt[0]; i++) { for (j = 0; j < nt[1]; j++) if (!strcmp(names[0][i],names[1][j])) break; if (j == nt[1]) match[i]= -1; else { match[i] = j; names[1][j][0] = '\0'; } } k = 0; maxlev = 0; for (i = 0; i < nt[0]; i++) if (match[i] == -1) { v[k].level = jdblevel_(handle,&i); if (v[k].level > maxlev) maxlev=v[k].level; v[k].index = 0; v[k++].pntr = i; } else { j = jdblevel_(handle,&i); j1 = jdblevel_(handle+1,match+i); v[k].level = (j >= j1) ? j : j1; if (v[k].level > maxlev) maxlev=v[k].level; v[k].index = 0; v[k++].pntr = i; } for (i = 0; i < nt[1]; i++) if (names[1][i][0]) { v[k].level = jdblevel_(handle+1,&i); if (v[k].level > maxlev) maxlev=v[k].level; v[k].index = 1; v[k++].pntr = i; } for (i = 0; i < k-1; i++) for (j = i+1; j < k; j++) if (v[i].level > v[j].level) { tempv = v[i]; v[i] = v[j]; v[j] = tempv; } /* See if there are any width attributes in either input object */ /* for each variable. Use the max of the length of the variable */ /* name, the attribute in object 1 (if any) and the attribute in */ /* object 2 (if any). */ for (i = 0; i < k; i++) { fldwidths[i] = strlen ( names [v[i].index] [v[i].pntr] ); in_get_width ( &fldwidths[i], v[i].index, v[i].pntr ); if (match[i] >= 0) in_get_width ( &fldwidths[i], 1, match[i] ); } ih = 0; minlevelread = maxlev+1; nv2 = nt[1]; linesize2 = nv2*valuesize; /* For the in-memory object, want cumulative variable/level array */ /* 0th value is 0; = # vars before the 0th level; etc */ nvar2 = (int *) malloc ( (nlevels[1]+1)*sizeof(int) ); if (nvar2 == NULL) error_("Could not get memory for 'nvar2' buffer",""); /* Generate "level splits" for this object */ for (i = 0; i <= nlevels[1] + 1; i++) nvar2[i] = 0; for (i = 0; i < nv2; i++) ++ nvar2 [ jdblevel_(handle+1,&i)+1 ]; nlines2 = in_get_entire_object(1,&values2,linesize2,&nlines2perchunk,nvar2); free (nvar2); *ntotal=k; return maxlev; } int ioreadrec_(level) int *level; /* Read record at appropriate level. Return 0 if end at that level. Return 1 if ok. */ { static int eofflag = 0; int level0,itest; if (eofflag) return 0; if (*level == minlevelread) { minlevelread++; return 1; } else if (*level > minlevelread) return 0; itest=1; while (itest) { level0 = in_getrec(0,values); if (level0 < 0) { eofflag=1; return 0; } if (level0 <= *level) { minlevelread = level0; itest = in_iotest(); } } if (minlevelread < *level) return 0; minlevelread++; return 1; } void ioclose_() /* Close files */ { jdbclose_(handle); } void iovalstr_(vn,tmp) int *vn; char *tmp; /* Return string value (tmp) for variable indexed by vn. */ /* val2lineptr is implicit input from ioreadrec_. It points to the */ /* line from the entirely-read-in object that iovalstr_ is getting */ /* values from. A pointer to the line of the "normal" object is */ /* implicit in that its values array is filled by ioreadrec_ from its */ /* current line */ { char *s; s = (v[*vn].index == 0) ? values : val2lineptr; s += v[*vn].pntr*valuesize; strcpy ( tmp, s+strspn(s," ") ); return; } void iovaldouble_(vn,f) int *vn; double *f; { char tmp[DATUMSIZE+1]; char *endptr; iovalstr_(vn,tmp); *f = strtod(tmp,&endptr); if (*endptr != '\0') *f = -9999.0; return; } void iovalreal_(vn,f) int *vn; float *f; /* Return real value (f) for variable indexed by vn. -9999 for strings */ { double df; iovaldouble_(vn,&df); *f = df; return; } int iovarlevel_(vn) int *vn; /* Return level corresponding to variable indexed by vn. */ { return v[*vn].level; } int ioattrout_(vn,str) int *vn; char *str; /* Output attributes for variable indexed by vn. */ { return 0; } void ioname_(vn,s) int *vn; char *s; /* Return name (s) corresponding to variable number vn. */ { strcpy ( s, names [ v[*vn].index] [v[*vn].pntr] ); return; } int iocommout_(s) char *s; { return 0; } int iowidth_(vn) int *vn; /* "Return length of variable field indexed by vn" [from outer, JGOFS 1.5] */ { return fldwidths[*vn]; }