/* parse.c */ /* parse and test sections from outer v 3.0a Jun 07 */ /* Separated out so that inners can parse selections. Of interest */ /* for optimization purposes to be sure that vars in selections are */ /* not projected out prematurely */ #define PARSE_VERSION "parse v 2.0a 2 May 2008" /* 2 May 2008. wjs */ /* Bug fix: uppercase copy of string operator was 1 char buf ovf */ /* [Begin v 2.0a] */ /* 25 Oct 2007. wjs */ /* Declare some functions */ /* 1 Aug 2007. wjs */ /* V 2.0 is kind of a punt. Code is certainly the "original", but */ /* much has been added (string ops, for one big thing). */ /* [Begin v 2.0] */ /* P-code form: 128..255 exec test(n-128) [negative #'s on Sun] [negativity not used in this code-fortunately. WJS] 1=or,2=and,3=not 0: end */ #include "parse.h" /* Function in outer */ void malloc_err(); /* Functions in inner */ void iovalstr_(); void iovalreal_(); int iovarlevel_(); char *parse_return_vers() /* Dummy routine. Exists only to force .h file version string into */ /* this module. Note string must not be global or we'll have con- */ /* flicts if another routine similarly includes the version string */ { static char version[] = PARSE_VERSION"/"FULL_PARSEH_VERSION; return version; } #define LIKE_MATCH_ANY '%' #define LIKE_MATCH_1 '_' #define LIKE_ESC '\\' char *analyze_wilds(s) char *s; /* Given a string that starts with a "like" wildcard character */ /* Remove all but one any-chars wild and put that after */ /* any single-char wilds. (Essentially, a string of wildcards */ /* becomes "match N or more chars", where N is the number of */ /* single-char wilds). */ /* Return a pointer to the next non-wild char */ { char *strdupl(); /* from utils.c */ char *exp,*in_ptr,*out_ptr; Logical in_match_any_state,in_escape_state; in_match_any_state = FALSE; in_escape_state = FALSE; out_ptr = in_ptr = strdupl(&exp,s,"duplicating like expression"); while (*in_ptr != '\0') { if (in_escape_state) { /* This section copies the character after the \, including, */ /* particular, an escaped \ or match char. If the string */ /* improperly ended with \, it won't be detected here, but */ /* hopefully WILL be detected in strlike */ *out_ptr++ = *in_ptr++; in_escape_state = FALSE; } else if (*in_ptr == LIKE_MATCH_ANY) { in_ptr++; in_match_any_state = TRUE; } else if (*in_ptr == LIKE_MATCH_1) { *out_ptr++ = *in_ptr++; } else { /* This section copies an escaping \ (along w/non-match chars) */ in_escape_state = (*in_ptr == LIKE_ESC); if (in_match_any_state) { *out_ptr++ = LIKE_MATCH_ANY; in_match_any_state = FALSE; } *out_ptr++ = *in_ptr++; } } if (in_match_any_state) *out_ptr++ = LIKE_MATCH_ANY; *out_ptr = '\0'; return exp; } int strlike(s,like_exp,err) char *s,*like_exp; void (*err)(); /* Implementation of SQL "like". */ /* s is the candidate string. like_exp is a "like expression" */ /* consisting of normal characters and wildcard characters. */ /* wildcard characters are of 2 kinds; _ which represents */ /* exactly 1 character, and @ which represents 0 or more */ /* characters. (The actual characters used are compiled into */ /* this program. @ and _ are what were documented in the */ /* Oracle description). Characters can be escaped with a */ /* preceding \ (or other char compiled into program) */ /* Returns 0 if match; positive integer indicating point of */ /* non-match */ { char *s_ptr; char *exp,*exp_ptr; int ret_val; Logical prev_char_matched; ret_val = 0; prev_char_matched = TRUE; s_ptr = s; exp_ptr = exp = analyze_wilds(like_exp); while ((*exp_ptr != '\0') && (*s_ptr != '\0') && prev_char_matched) { switch (*exp_ptr) { case LIKE_MATCH_1: exp_ptr++; s_ptr++; break; case LIKE_MATCH_ANY: /* When s_ptr matches char after wildcard, reset to do */ /* test again through default: case, putting us back on */ /* "normal" track. */ if (*s_ptr++ == *(exp_ptr+1)) { s_ptr--; exp_ptr++; } break; case LIKE_ESC: if (*(++exp_ptr) == '\0') err ("like expression may not end w/escape. expression: ",exp); /* Deliberately fall through */ default: prev_char_matched = (*s_ptr++ == *exp_ptr++); break; } } /* Take care of case where everything matched up to a final */ /* match-any char */ if (*exp_ptr == LIKE_MATCH_ANY) exp_ptr++; ret_val = ((*exp_ptr == '\0') && (*s_ptr == '\0') && prev_char_matched) ? 0 : s_ptr - s; free (exp); return ret_val; } void parse(s,parse_data_struct,parse_function_struct) char *s; struct parse_data *parse_data_struct; struct parse_functions *parse_function_struct; /* Description is approximate. Seems to me it can do */ /* weird things with not... */ /* Parser has 4 states: variable or open paren */ /* comparison operator */ /* thing being compared */ /* logical operator or close paren or end */ /* Idea is to get next token, determine its type, the process it */ /* depending on the state the parser is in (possibly changing state) */ /* Odors observed or speculated about emanating from code */ /* 1) No escape mechanism for special characters. Therefore */ /* string ops cannot look for parens, etc. Don't confuse */ /* this w/escape mechanism in token that represents a */ /* "like" expression... */ /* 2) Suspect that we can't test for strings that conflict w/ */ /* our comparison operators (eg can't say op eq like) */ { #define OPSTACKSIZE 20 /* Depth of operation stack; eg (a+b)*(c+d) */ /* needs a depth of 3 */ /* Within each of next 4 arrays, "equivalent" strings (eg, != & <>) */ /* are grouped together. "Group size" is constant (eg, see empty */ /* strings in testop where there are no equivalent strings). Groups */ /* are ordered to drive "switch" statements in code, so order alter- */ /* ation means code alteration. Infix array is ordered by operation */ /* priority as well. testop is parallel to teststrop as far as it */ /* goes. testop defines numeric comparisons; teststrop string */ /* comparisons. */ /* Each array ends with NULL */ /* Recoded not to use prefix array, since "not" prefix (and any */ /* future one like it) must be tested for in variable section, */ /* while ! is tested for in "normal" special character section */ static char *prefix[]={"!","not",NULL}; static char *infix[]={"||","or","|","&&","and","&",NULL}; static char *testop[]={"<","","=","==",">","","<=","","<>","!=",">=","",NULL}; static char *teststrop[]={"lt","eq","gt","le","ne","ge","contains", "is_contained_in","begins_with","ends_with","like",NULL}; #define SIZE_PREFIX_GROUP 2 #define SIZE_INFIX_GROUP 3 #define SIZE_TESTOP_GROUP 2 /* Next is a non-null character we add to a selection string to */ /* indicate end-of-string. Accordingly, choose a character that */ /* will not normally be found in selection string (formerly used a */ /* $... either we got away w/it for a long time or my analysis is */ /* wrong) */ /* Note: we choose character at compile time so we can have switch */ /* statements use it */ #define SELECTION_STRING_END 1 /* Leading X is space to receive SELECTION_STRING_END. Gave up */ /* trying to get string generated at compile time */ char selection_token_terminators[] = "X ()=<>|&!"; int opstkp; Comp_precision f; int opstack[OPSTACKSIZE]; int opprior[OPSTACKSIZE]; char *s1,*t,*t1; char tok[TOKEN+1],tmp[TOKEN+1]; int i,state,typ,finaland,type_string_op,paren_depth; Logical case_sensitive; /* Stuff from parse interface structures */ /* Data */ int tstcnt; int tstproccnt; struct parse_test_struct *tst; int *tstproc; tstcnt = parse_data_struct->tstcnt; tstproccnt = parse_data_struct->tstproccnt; tst = parse_data_struct->tst; tstproc = parse_data_struct->tstproc; selection_token_terminators[0] = SELECTION_STRING_END; paren_depth = 0; opstkp = 0; opstack[0] = 0; opprior[0] = -1; state = 0; finaland = (tstproccnt != 0); /* Append SELECTION_STRING_END to input string as "end of string" */ /* indicator */ if (strchr(s,SELECTION_STRING_END) != NULL) parse_function_struct->errn ("Selection string contains character whose decimal value follows", SELECTION_STRING_END); i = strlen(s); if ( ( s1 = (char *) malloc(i+2) ) == NULL ) malloc_err("s1",i+2); strcpy(s1,s); s1[i] = SELECTION_STRING_END; s1[i+1] = '\0'; t = s1; while (t != NULL) { /* Scheme: If first char not one of the special ones, save the */ /* token. Resulting alpha token can be "not" operator or one of */ /* the string operators. */ /* In original outer code, a token beginning with something like @ */ /* would show up as "bad prefix operator". In this code, I think */ /* it will show up as "error on conditions" */ /* The original outer code for this section is commented at the */ /* end of this file */ /* Skip leading blanks. Whole string cannot be blank because of */ /* appended SELECTION_STRING_END, above */ t += strspn(t," "); switch (*t) { case '(': paren_depth++; typ = 2; t++; break; case ')': if ((--paren_depth) < 0) parse_function_struct->err("Illegally nested parens",s); typ = 3; t++; break; case SELECTION_STRING_END: typ = 4; t = NULL; break; case '=': case '<': case '>': case '|': case '&': case '!': tok[0] = *t; /* Check for 2nd char of 2-char ops like !=. Validity */ /* of particular 2-char combo checked later */ switch (*(t+1)) { case '&': case '|': case '=': case '>': tok[1] = *(++t); tok[2] = '\0'; break; case SELECTION_STRING_END: parse_function_struct->err ("No argument after last logical operator",s); break; default: tok[1] = '\0'; break; } typ = 5; t++; break; default: /* alpha token. Could still be string operator... */ if ( (i = strcspn(t,selection_token_terminators)) > TOKEN ) parse_function_struct->err("Token too long",tok); strncpy(tok,t,i); tok[i] = '\0'; t += i; type_string_op = -1; while (teststrop[++type_string_op] != NULL) { /* All upper case = "abnormal spelling" = "abnormal */ /* processing" = not case sensitive */ case_sensitive = TRUE; if (strcmp(teststrop[type_string_op],tok) == 0) break; else { i = 0; while (tok[i] == toupper(teststrop[type_string_op][i])) { if (tok[i] == '\0') { case_sensitive = FALSE; break; } i++; } /* If not case_sensitive, we found all upper case, so we */ /* know it IS a string operator and we should leave string */ /* loop searching string operator list */ if ( ! case_sensitive) break; } } if (teststrop[type_string_op] == NULL) { /* Token is not string operator. See if it's numeric */ f=strtod(tok,&t1); typ = (*t1 == '\0') ? 1 : 0; } else { type_string_op++; /* Begins-w-0 vs begins-w-1 mod */ typ = 6; } break; } switch (state*10+typ) { case 0: /* variable or the string "not" */ case 6: /* string operator could be a variable name, too */ /* NB: "syntactic" varname_lookup routines are, */ /* essentially, 'return legal unless tok="not"' routines */ /* They may require change if something here changes */ /* (syntactic routines are called in an environment where a */ /* varlist is not available, such as optimizing inner stuff */ i = parse_function_struct->varname_lookup(tok); if (i == ILLEGAL_VARNUM) { if (strcmp(tok,"not") != 0) parse_function_struct->err("Error on conditions",tok); opstack[++opstkp]=3; opprior[opstkp]=10; break; } tst[tstcnt].testvar = i; tst[tstcnt].testlev = iovarlevel_(&i); state=1; tstproc[tstproccnt++] = tstcnt+128; break; case 2: /* ( */ opstack[++opstkp] = 4; opprior[opstkp] = 1; break; case 5: /* ! */ if (tok[0] != '!') parse_function_struct->err("Bad prefix operator",tok); opstack[++opstkp] = 3; opprior[opstkp] = 10; break; case 15: /* ==, etc. */ i = -1; while (testop[++i] != NULL) if (strcmp(tok,testop[i]) ==0 ) break; if (testop[i] == NULL) parse_function_struct->err("Bad condition",tok); tst[tstcnt].testcode = (i/SIZE_TESTOP_GROUP)+1; state=2; break; case 16: /* eq, ls, etc */ tst[tstcnt].testcode = - type_string_op; tst[tstcnt].case_sensitive = case_sensitive; state=2; break; case 20: /* token being compared to is not numeric */ case 26: if (tst[tstcnt].testcode > 0) /* Requested test is numeric */ tst[tstcnt].testcode = - tst[tstcnt].testcode; /* Convert it */ /* Deliberate fall-through here */ case 21: /* token being compared to IS numeric */ if (tst[tstcnt].testcode < 0) { /* Requested test is string */ tst[tstcnt].lenteststr = strlen(tok); i = tst[tstcnt].lenteststr + 1; if ( (tst[tstcnt].teststr = (char *) malloc(i)) == NULL ) malloc_err("teststr",i); strcpy(tst[tstcnt].teststr,tok); if ( (tst[tstcnt].upteststr = (char *) malloc(i)) == NULL ) malloc_err("upteststr",i); i--; while (i >= 0) { tst[tstcnt].upteststr[i] = toupper(tok[i]); i--; } f = 0.; /* Convenient for trick used later in test */ } tst[tstcnt++].testval=f; state=3; break; case 30: case 35: /* and or */ i = -1; while (infix[++i] != NULL) if (strcmp(tok,infix[i]) ==0 ) break; if (infix[i] == NULL) parse_function_struct->err("Bad infix operator",tok); i = (i/SIZE_INFIX_GROUP)+1; while (i+1 <= opprior[opstkp]) tstproc[tstproccnt++] = opstack[opstkp--]; opstack[++opstkp] = i; opprior[opstkp] = i+1; state=0; break; case 33: /* ) */ while (1 < opprior[opstkp]) tstproc[tstproccnt++] = opstack[opstkp--]; if ((--opstkp) < 0) parse_function_struct->err("Op stack underflow",""); break; case 34: /* SELECTION_STRING_END */ while (0 <= opprior[opstkp]) tstproc[tstproccnt++] = opstack[opstkp--]; if (finaland) tstproc[tstproccnt++] = 2; #ifdef DEBUG printf("finaland = %d, tstproccnt = %d\n",finaland,tstproccnt); for (i=0; i=128) printf("%d %d %f %s\n",tst[tstproc[i]-128].testvar, tst[tstproc[i]-128].testcode,tst[tstproc[i]-128].testval, tst[tstproc[i]-128].teststr); else printf("\n"); }; #endif free (s1); parse_data_struct->tstcnt = tstcnt; parse_data_struct->tstproccnt = tstproccnt; return; default: parse_function_struct->err("Parsing error",s); } } if (paren_depth != 0) parse_function_struct->err("Unpaired parentheses",s); } int test(cl,parse_data_struct,parse_function_struct) int cl; struct parse_data *parse_data_struct; struct parse_functions *parse_function_struct; { Comp_precision f; static char tmp[DATUMSIZE+1],uptmp[DATUMSIZE+1]; int i,j,m; char *datum_ptr,*test_condition_ptr; /* Size below comes from conclusion that in tstproccnt loop below, */ /* the stack pointer could be incremented once each interaction, */ /* even though logically, I think the pointer is supposed to end up 0 */ int stk[NTESTS],stkp; /* Stuff from parse interface structures */ /* Data */ int tstcnt; int tstproccnt; struct parse_test_struct *tst; int *tstproc; tstcnt = parse_data_struct->tstcnt; tstproccnt = parse_data_struct->tstproccnt; tst = parse_data_struct->tst; tstproc = parse_data_struct->tstproc; #ifdef DEBUG printf("entered test at level %d\n",cl); #endif stkp = -1; stk[0] = TRUE; for (i=0; i= 128){ j = tstproc[i]-128; if (tst[j].testlev == cl) { #ifdef DEBUG printf("-- %f %d %f -- ",f,tst[j].testcode,tst[j].testval); #endif if (tst[j].testcode < 0) { iovalstr_(&(tst[j].testvar),tmp); if (tst[j].case_sensitive) { datum_ptr = tmp; test_condition_ptr = tst[j].teststr; } else { m = 0; while (tmp[m] != '\0') { uptmp[m] = toupper(tmp[m]); m++; } uptmp[m] = '\0'; datum_ptr = uptmp; test_condition_ptr = tst[j].upteststr; } if (tst[j].testcode >= -6) f = (double) strcmp(datum_ptr,test_condition_ptr); } else ioval(&(tst[j].testvar),&f); switch (abs(tst[j].testcode)){ case 1: m = (f < tst[j].testval); break; case 2: m = (f == tst[j].testval); break; case 3: m = (f > tst[j].testval); break; case 4: m = (f <= tst[j].testval); break; case 5: m = (f != tst[j].testval); break; case 6: m = (f >= tst[j].testval); break; case 7: /* contains */ m = (strstr(datum_ptr,test_condition_ptr) != NULL); break; case 8: /* is_contained_in */ m = (strstr(test_condition_ptr,datum_ptr) != NULL); break; case 9: /* begins_with */ m = (strncmp(datum_ptr,test_condition_ptr,tst[j].lenteststr) == 0); break; case 10: /* ends_with */ m = strlen(datum_ptr) - tst[j].lenteststr; m = (m >= 0) ? (strcmp(datum_ptr+m,test_condition_ptr) == 0) : FALSE; break; case 11: /* like */ m = (strlike(datum_ptr, test_condition_ptr, parse_function_struct->err) == 0); break; default: parse_function_struct->errn ("Internal error-illegal testcode",tst[j].testcode,"%d"); } tst[j].testres = m; stk[++stkp] = m; } /* Use previously saved test result-no need to recalc it */ else if (tst[j].testlev < cl) stk[++stkp] = tst[j].testres; /* Test is at level we haven't gotten to yet. Return true even */ /* if data at next level might not pass test. Therefore, all */ /* level N data shows up when displaying level N, but might not */ /* when displaying same object at level N+1. Only a problem */ /* when displaying by level... */ else stk[++stkp] = TRUE; } else { switch (tstproc[i]) { case 1: stk[stkp-1] = (stk[stkp-1] || stk[stkp]); stkp--; break; case 2: stk[stkp-1] = (stk[stkp-1] && stk[stkp]); stkp--; break; case 3: stk[stkp] = ( ! stk[stkp]); break; default: parse_function_struct->errn ("Internal error-illegal tstproc",tstproc[i],"%d"); } } } #ifdef DEBUG printf("test result = %d\n",stk[0]); #endif /* Don't know if test changes the next 2 - too lazy to check */ parse_data_struct->tstcnt = tstcnt; parse_data_struct->tstproccnt = tstproccnt; return stk[0]; }