/* jgofs_selection_parser.c */ /* from parse v 2.2 */ /* Separated test code from parse code to avoid precision issues */ /* Turns out it was better to REALLY avoid the precision issues, and */ /* things could have stayed as one module, but by that time other */ /* changes had been made /* Rename source modules to avoid having a "test.c" */ /* Start new versions at 2.5 for both modules */ /* parse.c */ /* parse and test sections from outer v 3.0a Jun 07 */ /* Separated out so that inners can parse selections. Of interest */ /* for optimization purposes to be sure that vars in selections are */ /* not projected out prematurely. Turns out this premature stuff */ /* would help outer, too, and is much more critical there! (Apr 09) */ #define JG_SEL_PARSER_VERSION "jg_sel_parse v 2.5 15 May 2009" /* 15 May 2009. wjs */ /* Check sizes as we add to a couple of arrays */ /* Split tester from parser (see above) */ /* No longer need per-precision variants */ /* [Needs parse.h v 2.2] */ /* [Begin v 2.5] */ /* 19 Apr 2009. wjs */ /* Bug fix: Type of var being returned needs to match func type */ /* 3 Apr 2009. wjs */ /* Modify jgofs_selection_test to return "Unknown" when it's */ /* unknown if record matches selection criteria. This happens */ /* when criteria are at a lower level than the record being */ /* tested. Previous behavior was to try to return */ /* "accept record" in such cases. The attempt did not succeed */ /* in cases where the "not" logical operator was used */ /* Increase size of stack in anticipation of more parenthesized */ /* stuff from the OOserver subselection page */ /* Change jgofs_selection_test function type to Logical from int */ /* Move more functions to parse_function_struct to allow parse */ /* to be used in a more standalone fashion */ /* Accept commented out old parse code formerly in outer */ /* [Needs parse.h v 2.1] */ /* [Begin v 2.2] */ /* 11 Feb 2009. wjs */ /* Need single and double precision versions */ /* As a consequence, move precision-independent stuff out */ /* As a consequence of the implementation lexically redefining */ /* entry names, change said names so "test" is not globally replaced */ /* in some innocent source code */ /* [Begin v 2.1] */ /* 2 May 2008. wjs */ /* Bug fix: uppercase copy of string operator was 1 char buf ovf */ /* [Begin v 2.0a] */ /* 25 Oct 2007. wjs */ /* Declare some functions */ /* 1 Aug 2007. wjs */ /* V 2.0 is kind of a punt. Code is certainly the "original", but */ /* much has been added (string ops, for one big thing). */ /* [Begin v 2.0] */ /* P-code form: 128..255 exec test(n-128) [negative #'s on Sun] [negativity not used in this code-fortunately. WJS] 1=or,2=and,3=not 0: end */ #include "parse.h" /* Function in parse_utils */ char *analyze_wilds(); /* To allow parse to be used standalone, we don't want inner or */ /* outer functions (like err) to be used by parse. Such functions */ /* are in the parse_functions data structure */ char *jgofs_selection_parser_return_vers() /* Dummy routine. Exists only to force .h file version string into */ /* this module. Note string must not be global or we'll have con- */ /* flicts if another routine similarly includes the version string */ { static char version[] = JG_SEL_PARSER_VERSION"/"FULL_PARSEH_VERSION; return version; } void jgofs_selection_parser(s,parse_data_struct,parse_function_struct) char *s; struct parse_data *parse_data_struct; struct parse_functions *parse_function_struct; /* Description is approximate. Seems to me it can do */ /* weird things with not... */ /* Parser has 4 states: variable or open paren */ /* comparison operator */ /* thing being compared */ /* logical operator or close paren or end */ /* Idea is to get next token, determine its type, the process it */ /* depending on the state the parser is in (possibly changing state) */ /* Odors observed or speculated about emanating from code */ /* 1) No escape mechanism for special characters. Therefore */ /* string ops cannot look for parens, etc. Don't confuse */ /* this w/escape mechanism in token that represents a */ /* "like" expression... */ /* 2) Suspect that we can't test for strings that conflict w/ */ /* our comparison operators (eg can't say op eq like) */ { #define OPSTACKSIZE 100 /* Depth of operation stack; eg (a+b)*(c+d) */ /* needs a depth of 3 */ /* Within each of next 4 arrays, "equivalent" strings (eg, != & <>) */ /* are grouped together. "Group size" is constant (eg, see empty */ /* strings in testop where there are no equivalent strings). Groups */ /* are ordered to drive "switch" statements in code, so order alter- */ /* ation means code alteration. Infix array is ordered by operation */ /* priority as well. testop is parallel to teststrop as far as it */ /* goes. testop defines numeric comparisons; teststrop string */ /* comparisons. */ /* Each array ends with NULL */ /* Recoded not to use prefix array, since "not" prefix (and any */ /* future one like it) must be tested for in variable section, */ /* while ! is tested for in "normal" special character section */ static char *prefix[]={"!","not",NULL}; static char *infix[]={"||","or","|","&&","and","&",NULL}; static char *testop[]={"<","","=","==",">","","<=","","<>","!=",">=","",NULL}; static char *teststrop[]={"lt","eq","gt","le","ne","ge","contains", "is_contained_in","begins_with","ends_with","like",NULL}; #define SIZE_PREFIX_GROUP 2 #define SIZE_INFIX_GROUP 3 #define SIZE_TESTOP_GROUP 2 /* Next is a non-null character we add to a selection string to */ /* indicate end-of-string. Accordingly, choose a character that */ /* will not normally be found in selection string (formerly used a */ /* $... either we got away w/it for a long time or my analysis is */ /* wrong) */ /* Note: we choose character at compile time so we can have switch */ /* statements use it */ #define SELECTION_STRING_END 1 /* Leading X is space to receive SELECTION_STRING_END. Gave up */ /* trying to get string generated at compile time */ char selection_token_terminators[] = "X ()=<>|&!"; int opstkp; double f; int opstack[OPSTACKSIZE]; int opprior[OPSTACKSIZE]; char *s1,*t,*t1; char tok[TOKEN+1],tmp[TOKEN+1]; int i,state,typ,finaland,type_string_op,paren_depth; Logical case_sensitive; /* Stuff from parse interface structures */ /* Data */ int tstcnt; int tstproccnt; struct parse_test_struct *tst; int *tstproc; tstcnt = parse_data_struct->tstcnt; tstproccnt = parse_data_struct->tstproccnt; tst = parse_data_struct->tst; tstproc = parse_data_struct->tstproc; selection_token_terminators[0] = SELECTION_STRING_END; paren_depth = 0; opstkp = 0; opstack[0] = 0; opprior[0] = -1; state = 0; finaland = (tstproccnt != 0); /* Append SELECTION_STRING_END to input string as "end of string" */ /* indicator */ if (strchr(s,SELECTION_STRING_END) != NULL) parse_function_struct->errn ("Selection string contains character whose decimal value follows", SELECTION_STRING_END); i = strlen(s); if ( ( s1 = (char *) malloc(i+2) ) == NULL ) parse_function_struct->malloc_err("s1",i+2); strcpy(s1,s); s1[i] = SELECTION_STRING_END; s1[i+1] = '\0'; t = s1; while (t != NULL) { /* Scheme: If first char not one of the special ones, save the */ /* token. Resulting alpha token can be "not" operator or one of */ /* the string operators. */ /* In original outer code, a token beginning with something like @ */ /* would show up as "bad prefix operator". In this code, I think */ /* it will show up as "error on conditions" */ /* The original outer code for this section is commented at the */ /* end of this file */ /* Skip leading blanks. Whole string cannot be blank because of */ /* appended SELECTION_STRING_END, above */ t += strspn(t," "); switch (*t) { case '(': paren_depth++; typ = 2; t++; break; case ')': if ((--paren_depth) < 0) parse_function_struct->err("Illegally nested parens",s); typ = 3; t++; break; case SELECTION_STRING_END: typ = 4; t = NULL; break; case '=': case '<': case '>': case '|': case '&': case '!': tok[0] = *t; /* Check for 2nd char of 2-char ops like !=. Validity */ /* of particular 2-char combo checked later */ switch (*(t+1)) { case '&': case '|': case '=': case '>': tok[1] = *(++t); tok[2] = '\0'; break; case SELECTION_STRING_END: parse_function_struct->err ("No argument after last logical operator",s); break; default: tok[1] = '\0'; break; } typ = 5; t++; break; default: /* alpha token. Could still be string operator... */ if ( (i = strcspn(t,selection_token_terminators)) > TOKEN ) parse_function_struct->err("Token too long",tok); strncpy(tok,t,i); tok[i] = '\0'; t += i; type_string_op = -1; while (teststrop[++type_string_op] != NULL) { /* All upper case = "abnormal spelling" = "abnormal */ /* processing" = not case sensitive */ case_sensitive = TRUE; if (strcmp(teststrop[type_string_op],tok) == 0) break; else { i = 0; while (tok[i] == toupper(teststrop[type_string_op][i])) { if (tok[i] == '\0') { case_sensitive = FALSE; break; } i++; } /* If not case_sensitive, we found all upper case, so we */ /* know it IS a string operator and we should leave string */ /* loop searching string operator list */ if ( ! case_sensitive) break; } } if (teststrop[type_string_op] == NULL) { /* Token is not string operator. See if it's numeric */ f=strtod(tok,&t1); typ = (*t1 == '\0') ? 1 : 0; } else { type_string_op++; /* Begins-w-0 vs begins-w-1 mod */ typ = 6; } break; } switch (state*10+typ) { case 0: /* variable or the string "not" */ case 6: /* string operator could be a variable name, too */ /* NB: "syntactic" varname_lookup routines are, */ /* essentially, 'return legal unless tok="not"' routines */ /* They may require change if something here changes */ /* (syntactic routines are called in an environment where a */ /* varlist is not available, such as optimizing inner stuff */ i = parse_function_struct->varname_lookup(tok); if (i == ILLEGAL_VARNUM) { if (strcmp(tok,"not") != 0) parse_function_struct->err("Error on conditions",tok); opstack[++opstkp]=3; opprior[opstkp]=10; break; } tst[tstcnt].testvar = i; tst[tstcnt].testlev = parse_function_struct->iovarlevel(&i); state=1; if (tstproccnt == N_JG_SEL_OPERATIONS) parse_function_struct->errn ("Too many ops in selection. Max =",N_JG_SEL_OPERATIONS); tstproc[tstproccnt++] = tstcnt+128; break; case 2: /* ( */ opstack[++opstkp] = 4; opprior[opstkp] = 1; break; case 5: /* ! */ if (tok[0] != '!') parse_function_struct->err("Bad prefix operator",tok); opstack[++opstkp] = 3; opprior[opstkp] = 10; break; case 15: /* ==, etc. */ i = -1; while (testop[++i] != NULL) if (strcmp(tok,testop[i]) ==0 ) break; if (testop[i] == NULL) parse_function_struct->err("Bad condition",tok); tst[tstcnt].testcode = (i/SIZE_TESTOP_GROUP)+1; state=2; break; case 16: /* eq, ls, etc */ tst[tstcnt].testcode = - type_string_op; tst[tstcnt].case_sensitive = case_sensitive; state=2; break; case 20: /* token being compared to is not numeric */ case 26: if (tst[tstcnt].testcode > 0) /* Requested test is numeric */ tst[tstcnt].testcode = - tst[tstcnt].testcode; /* Convert it */ /* Deliberate fall-through here */ case 21: /* token being compared to IS numeric */ if (tst[tstcnt].testcode < 0) { /* Requested test is string */ tst[tstcnt].lenteststr = strlen(tok); i = tst[tstcnt].lenteststr + 1; if ( (tst[tstcnt].teststr = (char *) malloc(i)) == NULL ) parse_function_struct->malloc_err("teststr",i); strcpy(tst[tstcnt].teststr,tok); if ( (tst[tstcnt].upteststr = (char *) malloc(i)) == NULL ) parse_function_struct->malloc_err("upteststr",i); i--; while (i >= 0) { tst[tstcnt].upteststr[i] = toupper(tok[i]); i--; } f = 0.; /* Convenient for trick used later in test */ } if (tstcnt == N_JG_SEL_TESTS) parse_function_struct->errn ("Too many relational ops in selection. Max =",N_JG_SEL_TESTS); tst[tstcnt++].testval=f; state=3; break; case 30: case 35: /* and or */ i = -1; while (infix[++i] != NULL) if (strcmp(tok,infix[i]) ==0 ) break; if (infix[i] == NULL) parse_function_struct->err("Bad infix operator",tok); i = (i/SIZE_INFIX_GROUP)+1; while (i+1 <= opprior[opstkp]) { if (tstproccnt == N_JG_SEL_OPERATIONS) parse_function_struct->errn ("Too many ops in selection. Max =",N_JG_SEL_OPERATIONS); tstproc[tstproccnt++] = opstack[opstkp--]; } opstack[++opstkp] = i; opprior[opstkp] = i+1; state=0; break; case 33: /* ) */ while (1 < opprior[opstkp]) { if (tstproccnt == N_JG_SEL_OPERATIONS) parse_function_struct->errn ("Too many ops in selection. Max =",N_JG_SEL_OPERATIONS); tstproc[tstproccnt++] = opstack[opstkp--]; } if ((--opstkp) < 0) parse_function_struct->err("Op stack underflow",""); break; case 34: /* SELECTION_STRING_END */ while (0 <= opprior[opstkp]) { if (tstproccnt == N_JG_SEL_OPERATIONS) parse_function_struct->errn ("Too many ops in selection. Max =",N_JG_SEL_OPERATIONS); tstproc[tstproccnt++] = opstack[opstkp--]; } if (finaland) { if (tstproccnt == N_JG_SEL_OPERATIONS) parse_function_struct->errn ("Too many ops in selection. Max =",N_JG_SEL_OPERATIONS); tstproc[tstproccnt++] = 2; } #ifdef DEBUG printf("finaland = %d, tstproccnt = %d\n",finaland,tstproccnt); for (i=0; i=128) printf("%d %d %f %s\n",tst[tstproc[i]-128].testvar, tst[tstproc[i]-128].testcode,tst[tstproc[i]-128].testval, tst[tstproc[i]-128].teststr); else printf("\n"); }; #endif free (s1); parse_data_struct->tstcnt = tstcnt; parse_data_struct->tstproccnt = tstproccnt; return; default: parse_function_struct->err("Parsing error",s); } } if (paren_depth != 0) parse_function_struct->err("Unpaired parentheses",s); parse_function_struct->err("Internal problem: ", "End-of-string while parsing selection, but normal return not taken"); } /******************** ********************** */ /* Replaced token processing code from parse in original outer if(isdigit(*t) || *t=='.' || *t=='+' || *t=='-'){ f=strtod(t,&t1); if(strchr(" ()$=<>|&!",*t1)){ typ=1; t=t1; } else { i=strcspn(t," ()$=<>|&!"); strncpy(tok,t,i); tok[i]='\0'; t += i; typ=0; }; } else if(isalpha(*t) || *t=='#'){ i=strcspn(t," ()$=<>|&!"); strncpy(tok,t,i); tok[i]='\0'; t += i; typ=0; } else if(*t == '('){ typ=2; t++; } else if(*t == ')'){ typ=3; t++; } else if(*t == '$'){ typ=4; t=NULL; } else { tok[0]= *t; if((int)strspn(t+1,"&|=>")>0){ tok[1]= *(++t); tok[2]='\0'; } else tok[1]='\0'; t++; typ=5; }; */