/* makemat */ /* JGOFS data -> matlab .mat file capability */ /* Original version "knew" matlab format. This version uses */ /* matlab library. */ /* Arg 1 - object spec */ /* Arg 2 - (optional) comma-separated list of variable names to be */ /* treated as alphas, or keyword #NONE#. Elements of this */ /* list are NOT checked for being legit varnames for the */ /* object. */ /* In absence of this arg, value of var that occurs in */ /* 1st rec is examined. If the value is missing, data */ /* are assumed numeric (compatibility w/orig makemat.c) */ /* Note that problems can still occur if an alpha */ /* variable's first value happens to be numeric (eg, */ /* "standard station" variable) */ /* Switch -maxwidth[=width] - (optional; any position in arg list) */ /* Use specified value for width of each alpha variable */ /* If switch value is missing, the "JGOFS system maximum" */ /* is assumed. If the switch value is > "maximum", */ /* "maximum" is used - see note below. */ /* In absence of switch (or switch value <= 0), max of all */ /* "width" attributes is used. If any width attribute is */ /* missing or > "maximum", "maximum" is used. */ /* At issue is size of in-memory copy of the object */ /* and the size of the matlab output file */ /* Note: if the "maximum" for makemat's installation is */ /* < "maximum" for installation serving object, problems */ /* can still occur which cannot be overcome by use of this */ /* switch */ /* Output goes to /dev/stdout */ /* A missing numeric datum gets the matlab "mxGetNaN value". */ /* A missing alpha datum is copied, so it gets the JGOFS missing */ /* value (currently (Aug 05) "nd") */ /************************************************************************/ #define MAKEMAT_VERSION "makemat v 1.7b 21 Jul 2016" /* 21 Jul 16. WJS */ /* Comment change/clarification */ /* 3 Mar 10. WJS */ /* Bug fix: improper return of jdbopen status if error */ /* [Begin v 1.7b] */ /* 29 Oct 09. WJS */ /* Bug fix: widths stuff not working thanks to 'for (i==0;'. Sigh */ /* [Begin v 1.7a] */ /* 10 Sep 09. WJS */ /* Change some diagnostic text per N Copley suggestion (and */ /* then my reaction to how that looked) */ /* 5 Sep 09. WJS */ /* 1.6 not particularly well done, esp w/regard to what happens */ /* if a var is too wide. makenetcdf did this better - steal. */ /* Includes (and enhances) -maxwidth switch */ /* Bug fix: err msgs did not include ID info */ /* [Needs core.h 2.0c] */ /* [Begin v 1.7] */ /* 19 Feb 09. WJS */ /* Try a memory optimization which assumes that each var in an */ /* input line is <= max (widths of all vars). Only an assump- */ /* tion since width is display width and has nothing necessarily */ /* to do w/actual data width (eg, if comments are last var, */ /* width=1 will serve just fine as a display width) */ /* However, should be a pretty good guess. Note that a better */ /* job can be done if we use each col's width for each col. */ /* However, that requires redoing the "matrix" logic, & this is */ /* easy. */ /* [Begin v 1.6] */ /* 5 Aug 08. WJS */ /* Allow strings w/trailing blanks to be considered numeric */ /* [Needs utils 2.4] */ /* [Needs core.h 2.0] */ /* 30 May 08. WJS */ /* matlab lib interface changed w/matlab 6.5. Try to make this */ /* work for all based on a compile-time switch */ /* [Begin v 1.5] */ /* 20 Aug 05. WJS */ /* Documentation addition; err msg mod */ /* 18 Aug 05. WJS */ /* Add optional 2nd arg, which is a list of variables to be */ /* downloaded as alphas (or keyword #NONE#). This allows users */ /* to manually deal w/fact that JGOFS doesn't know about data */ /* types and that default algorithm (still applied in absence */ /* of arg 2) can fail. Added at request of Bob (after */ /* mentioning possibility) */ /* [Needs utils 2.0] */ /* [Begin v 1.2] */ /* 6 Aug 05. WJS */ /* Need some more function defn/declaration stuff */ /* Get level splitting routine from library */ /* [Needs utils 1.9] */ /* [Begin v 1.1a] */ /* 5 Mar 05. WJS */ /* Get some stuff from common locations */ /* Put in version function (and include makemat there) */ /* Try to get rid of a compiler warning */ /* [Needs utils 1.8; utils.h] */ /* [Begin v 1.1] */ /* 25 Sep 03. WJS */ /* Bug fix: error message didn't print object name */ /* Bug fix: bad formatting of error message */ /* [Begin v 1.0a] */ /* 16 Sep 03. WJS */ /* [Needs defgb_utils 1.6] */ /* [Needs defgb.h 4.8] */ /* [Begin v 1.0] */ /* Code to read whole object into memory comes from join 2.6 */ /* This became a template for "working on JGOFS stuff by column" */ /* which was called "transpose", because of most direct application */ /* Use, however, is driven by matlab application. Accordingly, we */ /* abandon the attempt to generalize... (Aug 05) */ #include "utils.h" #include "jdbfuncdefns.h" #include INNEROPTIONS #include "mat.h" #include "matrix.h" /* for mxGetNaN... might not be needed */ /* utils routines */ Logical add_id_to_err(); void errn(); int *level_splits(); char *buildstring(); char *lengthen_str(); char *lengthen_str_nl(); Logical string_in_list(); Logical string_is_numeric(); /* #define DEBUG */ #ifndef MEMORY_ALLOCATION_CHUNK #define MEMORY_ALLOCATION_CHUNK 41960 #endif #define POINTERS_CHUNK 100 int nlevels; char names[NVAR][VARNAMESIZE+1]; int namesize=VARNAMESIZE+1; char *fullsize_linebuf; int fullsize_datum = DATUMSIZE + 1; int valuesize; /* Dynamic memory scheme: */ /* Logically, we want values [nlines+1] [nvar] [valuesize] */ /* We could allocate, say, NLINES worth, and, after filling it, */ /* allocate 2*NLINES, copy the original space into the new, free */ /* the original, etc. This is a lot of bytes to move, however. */ /* Instead, we make an array of pointers to chunks of size NLINES */ /* The pointers start out NULL. We allocate NLINES worth to the */ /* first pointer. When we exceed LINES, we allocate NLINES */ /* worth to the second pointer, and start using that. Logically */ /* this breaks up the [nlines+1] dimension into 2 dimensions. */ /* One is "which chunk_size/NLINES chunk?". The other runs */ /* from 0 to NLINES-1 within each chunk. */ /* If we run out of pointers, we dynamically allocate more space, */ /* copy over, free, etc, but we do this to the pointers array, */ /* which is lots easier to move. */ /* There is a question of how much memory to allocate at once. */ /* I chose 41960 bytes, but it can be specified as a */ /* compilation constant (MEMORY_ALLOCATION_CHUNK). It */ /* should be tuned to the efficiency of the system's memory */ /* allocation scheme (actually, the system's malloc routine's */ /* allocation scheme). The actual allocation quantity is mod- */ /* fied up if necessary to hold at least one line, then further */ /* modified down to exactly hold an integral number of lines */ char **values = NULL; int nlines,linesize; int nlinesperchunk; int handle= -1; int nvar; int *level_this_line = NULL; /* Which level closest to 0 changed */ /* for each flat "line" */ /************************************************************************/ char *makemat_return_vers() /* Dummy routine. Exists only to force .h file version string into */ /* this module. Note string must not be global or we'll have con- */ /* flicts if another routine similarly includes the version string */ { static char version[] = MAKEMAT_VERSION"/"FULL_UTILSH_VERSION"/"FULL_JDBFUNCDEFNSH_VERSION; return version; } void err(s,t) char *s,*t; { char *ss,*tt; add_id_to_err(&ss,&tt,s,t,MAKEMAT_VERSION); printf ("%s\n%s\n",ss,tt); exit(ERROR_EXIT_STATUS); } void testsize (size,max,name,datum) /* Slightly different from makenetcdf's from which it came */ int size,max; char *name,*datum; { if (size <= max) return; char *err_ptr_1,*err_ptr_2; char *limit_exceeded; char *suggest_using_flag; /* In a situation on globec, err_ptr_2 was NOT init'ed to NULL on */ /* entry into testsize. ? */ err_ptr_1 = err_ptr_2 = NULL; suggest_using_flag = (max == fullsize_datum) ? NULL : "using -maxwidth switch or "; limit_exceeded = (max == fullsize_datum) ? "JGOFS system max" : "max of object's variables' width attributes values"; err_ptr_1 = lengthen_str ( err_ptr_1, "Length of datum for variable ", name, 500, "Too long error, part 1_1"); err_ptr_1 = lengthen_str_nl ( err_ptr_1, " exceeds ", limit_exceeded, 0, "Too long error, part 1_2"); err_ptr_2 = lengthen_str_nl ( err_ptr_2, "Datum = ", datum, 500, "Too long error, part 2_1"); err_ptr_2 = lengthen_str ( err_ptr_2, "Consider ", suggest_using_flag, 0, "Too long error, part 2_2"); err_ptr_2 = lengthen_str ( err_ptr_2, "not including this variable", NULL, 0, "Too long error, part 3_2"); err(err_ptr_1,err_ptr_2); exit(1); /* Not that it should get here... */ } int in_get_width(object_selector,variable_number) int object_selector,variable_number; /* Return as function value the width attribute found for variable */ /* "variable_number" in object whose jdbxxx_ "handle" is */ /* "object_selector". If no width attribute, return 0. If > 1 */ /* attribute (! - shouldn't be), return value of first one */ { int j; Logical ok; char str[ATTRSIZE]; j = 0; while ( jdbattributes_(&object_selector, &variable_number, str) ) if (strncmp(str,"width=",6) == 0) { GET_INTEGER_FROM_STRING(j,str+6,ok); if ( ! ok) err ("Illegal width attribute ",str); break; } return j; } int in_get_chunk (object_selector,chunk,linesize,nlines,level_splits, count,level_per_line) int object_selector,linesize,nlines; int *level_splits,*count,*level_per_line; char *chunk; /* Reads, into chunk, up to nlines of size linesize from object */ /* associated with object_selector. */ /* Returns number of lines read (!= nlines equivalent to EOF) */ { int linecnt,j,lev; char *start_this_line,*fullsize_linebuf_ptr,*chunk_buffer_ptr; static char *start_previous_line; start_this_line = chunk - linesize; for (linecnt=0; linecnt infinity in steps of */ /* POINTERS_CHUNK */ int ichunk; /* Goes from 0 -> POINTERS_CHUNK. */ /* Actual chunk pointer is */ /* chunk_pointer_base + ichunk */ char *ptr; char **temp; char **object_chunk_ptrs; int allocate; object_chunk_ptrs = *data_array; if (MEMORY_ALLOCATION_CHUNK <= linesize) { allocate = linesize; *nlinesperchunk = 1; } else { *nlinesperchunk = MEMORY_ALLOCATION_CHUNK /linesize; allocate = *nlinesperchunk * linesize; } linecnt = 0; chunk_pointer_base = 0; ichunk = 0; while (1) { if (ichunk == 0) { /* Out of pointers to chunks. Allocate one POINTERS_CHUNK */ /* more than we have */ object_chunk_ptrs = (char **) realloc( object_chunk_ptrs, (chunk_pointer_base + POINTERS_CHUNK) * sizeof(char *) ); if (object_chunk_ptrs == NULL) errn("Could not get memory for 'object_chunk_ptrs'. nbytes=", (chunk_pointer_base + POINTERS_CHUNK) * sizeof(char *)); } /* Allocate a chunk of memory and fill it with a chunk's worth */ /* of lines from the object. */ /* Save pointer to chunk in object_chunk_ptrs */ ptr = (char *) malloc(allocate); if (ptr == NULL) errn("Could not get memory for 'object_chunk_ptrs[i]'. nbytes=", allocate); /* Need memory for each line in this chunk, too */ level_this_line = (int *) realloc ( level_this_line, (linecnt + *nlinesperchunk) * sizeof (int *) ); if (level_this_line == NULL) errn("Could not get memory for 'level_this_line'. nbytes=", (linecnt + *nlinesperchunk) * sizeof (int *)); i = in_get_chunk(object_selector,ptr,linesize,*nlinesperchunk,splits, &linecnt,level_this_line); object_chunk_ptrs[chunk_pointer_base+ichunk] = ptr; if (i != *nlinesperchunk) break; if (++ichunk == POINTERS_CHUNK) { chunk_pointer_base += POINTERS_CHUNK; ichunk = 0; } } *data_array = object_chunk_ptrs; jdbclose_(&object_selector); return linecnt; } /* In matlab 6.5, the mxSetName and matPutArray functions (macros?) */ /* became undefined, apparently replaced with the matPutVariable */ /* function (or macro). Try to write code that will work under */ /* either. Default to newer stuff. */ /* Prefer NOT to use a MATLAB65_OR_LATER macro - who knows what */ /* else that will be needed for. If we end up w/such a thing, we */ /* can use it to set USE_MXSETNAME below */ /* Note that we still need to compile the Matlab routines w/diff- */ /* erent libraries; ie, mxSetName needs to be compiled w/either */ /* matlab 5 or matlab 6.1 libs; matPutVariable needs compilation w/ */ /* matlab 6.5 or matlab 7.3, etc. For that reason we can't have a */ /* single separately compiled object for name_and_fill_vector */ void name_and_fill_vector(matfile,name,mxarray) char *name; MATFile *matfile; mxArray *mxarray; { #ifndef USE_MXSETNAME #define USE_MXSETNAME FALSE #endif #if USE_MXSETNAME mxSetName(mxarray,name); matPutArray(matfile,mxarray); #else matPutVariable(matfile,name,mxarray); #endif return; } int main(argc,argv) char **argv; int argc; { int iline,ivar; int chunk,line_in_chunk; int i,j,maxlevel; int *splits; char *ptr,*object_name; Logical any_bad_numeric_vals; int *n_bad_numeric_vals; char *width_switch; int lenswitch; MATFile *pmat; mxArray *pa,*pn; /* const in next line does not make sense to me. It's there to match */ /* the arg defn of the appropriate matlab routine. It would seem to */ /* mean that the characters ultimately pointed to cannot be changed, */ /* whereas we want to get them from our object. */ /* It's the concept in C that I don't understand. Consider the ex- */ /* ample from Harbison & Steele, which reflects what's going on here: */ /* const int *pc; */ /* int *p; */ /* *pc = 5; Illegal */ /* *p = 5; Legal */ /* pc = p; Legal */ /* That which was pointed to by pc is clearly changeable. Guess it's */ /* just some kind of programmer helper to make it harder to mod things */ /* if you take the trouble to declare them const */ const char **column_as_alphas; double *column_as_doubles; char *matfile = "/dev/stdout"; Logical column_is_numeric; Logical all_numeric; Logical status,switch_no_value,switch_with_value; char *alpha_var_list; #define ALPHA_VAR_LIST_SEP ',' /* Next special string needs to differ from any legit var name. Also, */ /* must not include ALPHA_VAR_LIST_SEP */ #define ALPHA_VAR_LIST_NONE "#NONE#" /* Don't know why QUERY_STRING is squashed... */ /* Presumably we don't want QUERY_STRING getting to the child */ /* processes accessing local input objects. However, that should */ /* only be a problem if the children think they're in html mode, */ /* and that's already the PATH_INFO control problem alluded to */ /* in the 30 Sep 97 comment in join.c ... */ putenv("QUERY_STRING="); width_switch = "-maxwidth"; lenswitch = strlen(width_switch); object_name = NULL; valuesize = -1; /* "undetermined" flag */ /* Below a bit fancier than we need for 2 args and a switch, but */ /* makes it easier to add args and makes it look like makenetcdf */ /* (from which it is stolen and enhanced) */ for (i = 1; i < argc; i++) { if (argv[i][0] == '-') { switch_with_value = switch_no_value = FALSE; if (strncmp(argv[i],width_switch,lenswitch) == 0) { switch_no_value = (*(argv[i]+lenswitch) == '\0'); switch_with_value = (*(argv[i]+lenswitch) == '='); } if (switch_with_value) { if (valuesize >= 0) err (width_switch," specified more than once"); if (*(argv[i]+lenswitch+1) == '\0') err ("No value after = in switch ",argv[i]); GET_INTEGER_FROM_STRING(valuesize,argv[i]+lenswitch+1,status); if ( ! status) err ("Illegal number after = in switch ",argv[i]); } else if (switch_no_value) { if (valuesize >= 0) err (width_switch," specified more than once"); valuesize = DATUMSIZE; } else { err ("Illegal switch ",argv[i]); } } else { if (object_name == NULL) { object_name = argv[i]; } else if (alpha_var_list == NULL) { alpha_var_list = argv[i]; } else { err ("Too many non-switch arguments. First bad arg = ",argv[i]); } } } if (object_name == NULL) err("Missing input object name",""); if (alpha_var_list != NULL) { all_numeric = (strcmp(alpha_var_list,ALPHA_VAR_LIST_NONE) == 0); if (string_in_list(alpha_var_list,ALPHA_VAR_LIST_NONE) && ! all_numeric) err("Cannot supply an alpha variable along with the \ 'no alpha variables' choice. Selections were ",alpha_var_list); } nvar = -NVAR; maxlevel = jdbopen_(&handle,object_name,names,&namesize,&nvar); if (maxlevel < 0) errn( buildstring("jdbopen failure on object ", object_name, "\njdbopen return code="), maxlevel ); nlevels = maxlevel + 1; n_bad_numeric_vals = (int *)malloc(nvar * sizeof(int)); if (n_bad_numeric_vals == NULL) errn ("Could not get memory for bad numeric vals buffer. nbytes=", nvar * sizeof(int)); if (valuesize <= 0) { for (i=0; i valuesize) valuesize = j; } } if (valuesize > DATUMSIZE) valuesize = DATUMSIZE; if (valuesize < DATUMSIZE) { fullsize_linebuf = (char *) malloc (nvar * fullsize_datum); if (fullsize_linebuf == NULL) errn ("Could not get memory for full size line buffer. nbytes=", nvar * fullsize_datum); } /* allow for trailing \0. Will make a DATUMSIZE into fullsize_datum */ valuesize++; linesize = nvar*valuesize; /* Need cumulative variable/level array */ splits = level_splits(handle,maxlevel,nvar); if (splits == NULL) errn("Could not get memory for 'splits' buffer. nbytes=", (nlevels + 1)*sizeof(int)); nlines = in_get_entire_object (handle,&values,linesize,&nlinesperchunk,splits); free (splits); column_as_alphas = (const char **) malloc (nlines * sizeof (char *)); if (column_as_alphas == NULL) errn ("Could not get memory for column of char ptrs. nbytes=", nlines * sizeof(char *) ); pn = mxCreateDoubleMatrix(1,nlines,mxREAL); if (pn == NULL) errn("Could not mxCreateDoubleMatrix. ndoubles=",nlines); column_as_doubles = (double *)mxGetPr(pn); errno = 0; pmat = matOpen(matfile,"w"); if (pmat == NULL) err(buildstring("Could not open ", matfile, "for write. errno=", "open_matfile_write_failure"), strerror(errno)); any_bad_numeric_vals = FALSE; for (ivar = 0; ivar < nvar; ivar++) { if (alpha_var_list == NULL) { column_is_numeric = string_is_numeric(values[0]+ivar*valuesize,TRUE); } else { column_is_numeric = (all_numeric) ? TRUE : ( ! string_in_list(alpha_var_list,names[ivar],ALPHA_VAR_LIST_SEP)); } n_bad_numeric_vals[ivar] = 0; chunk = 0; iline = 0; while (iline < nlines) { ptr = values[chunk]+ivar*valuesize; for (line_in_chunk = 0; line_in_chunk < nlinesperchunk; line_in_chunk++) { if (column_is_numeric) { GET_NUMBER_FROM_STRING(column_as_doubles[iline],ptr,status); if ( ! status) { column_as_doubles[iline] = mxGetNaN(); if (strcmp(ptr,MISSING_VALUE_STRING) != 0) { any_bad_numeric_vals = TRUE; n_bad_numeric_vals[ivar]++; } } } else { /* Note that "nd" just goes in as data. Oh well... */ column_as_alphas[iline] = ptr; } if (++iline == nlines) break; ptr += linesize; } chunk++; } /* Looks to me like I can get away with reusing the numeric */ /* matlab array because it's always the same size. The string */ /* thing looks like it creates a 2 dimensional char array of */ /* size nlines x maxlen_any_string_of_data. Since that varies */ /* in each column, array is different for each use... */ if (column_is_numeric) { name_and_fill_vector(pmat,&names[ivar],pn); } else { pa = mxCreateCharMatrixFromStrings(nlines, column_as_alphas); name_and_fill_vector(pmat,&names[ivar],pa); mxDestroyArray(pa); } } mxDestroyArray(pn); matClose(pmat); if (any_bad_numeric_vals != 0) { char *msg1; msg1 = (alpha_var_list == NULL) ? "guessed to be" : "specified as"; fprintf (stderr, "%s %s %s %s %s", "\nFollowing variables", msg1, "numeric but had some non-missing, non-numeric values\n\n", " variable name :", "number_of_abnormal_instances_of_this_variable\n" ); for (ivar = 0; ivar < nvar; ivar++) { if (n_bad_numeric_vals[ivar] != 0) fprintf (stderr,"%s : %d\n",names[ivar],n_bad_numeric_vals[ivar]); } fprintf (stderr, "%s%s", "\nYou may wish to do another JGOFS->matlab conversion\n", "explicitly specifying the above variables to be non-numeric\n" ); } exit(EXIT_SUCCESS); }