/*	parse.c								*/
/*    parse and test sections from outer v 3.0a	 Jun 07			*/
/*    Separated out so that inners can parse selections.  Of interest	*/
/*  for optimization purposes to be sure that vars in selections are	*/
/*  not projected out prematurely					*/

#define PARSE_VERSION "parse v 2.0a  2 May 2008"
/*   2 May 2008.  wjs							*/
/*	Bug fix: uppercase copy of string operator was 1 char buf ovf	*/
/*	[Begin v 2.0a]							*/
/*  25 Oct 2007.  wjs							*/
/*	Declare some functions						*/
/*   1 Aug 2007.  wjs							*/
/*	V 2.0 is kind of a punt.  Code is certainly the "original", but	*/
/*    much has been added (string ops, for one big thing).		*/
/*	[Begin v 2.0]							*/

/*
P-code form:
128..255 exec test(n-128) [negative #'s on Sun]
			  [negativity not used in this code-fortunately.  WJS]
1=or,2=and,3=not
0: end
*/

#include "parse.h"

/*  Function in outer							*/
void malloc_err();
/*  Functions in inner							*/
void iovalstr_();
void iovalreal_();
int iovarlevel_();

char *parse_return_vers()
/*  Dummy routine.  Exists only to force .h file version string into	*/
/*  this module.  Note string must not be global or we'll have con-	*/
/*  flicts if another routine similarly includes the version string	*/
{
  static char version[] = PARSE_VERSION"/"FULL_PARSEH_VERSION;
  return version;
}

#define LIKE_MATCH_ANY '%'
#define LIKE_MATCH_1 '_'
#define LIKE_ESC '\\'

char *analyze_wilds(s)
char *s;
  /*  Given a string that starts with a "like" wildcard character	*/
  /*  Remove all but one any-chars wild and put that after		*/
  /*    any single-char wilds.  (Essentially, a string of wildcards	*/
  /*    becomes "match N or more chars", where N is the number of	*/
  /*    single-char wilds).						*/
  /*  Return a pointer to the next non-wild char			*/
{
  char *strdupl();	/* from utils.c					*/

  char *exp,*in_ptr,*out_ptr;
  Logical in_match_any_state,in_escape_state;

  in_match_any_state = FALSE;
  in_escape_state = FALSE;
  out_ptr = in_ptr = strdupl(&exp,s,"duplicating like expression");

  while (*in_ptr != '\0') {
    if (in_escape_state) {
	/*  This section copies the character after the \, including,	*/
	/*  particular, an escaped \ or match char.  If the string 	*/
	/*  improperly ended with \, it won't be detected here, but	*/
	/*  hopefully WILL be detected in strlike			*/
      *out_ptr++ = *in_ptr++;      
      in_escape_state = FALSE;
    } else if (*in_ptr == LIKE_MATCH_ANY) {
      in_ptr++;
      in_match_any_state = TRUE;
    } else if (*in_ptr == LIKE_MATCH_1) {
      *out_ptr++ = *in_ptr++;      
    } else {
	/*  This section copies an escaping \ (along w/non-match chars)	*/
      in_escape_state = (*in_ptr == LIKE_ESC);
      if (in_match_any_state) {
	*out_ptr++ = LIKE_MATCH_ANY;
	in_match_any_state = FALSE;
      }
      *out_ptr++ = *in_ptr++;      
    }
  }

  if (in_match_any_state) *out_ptr++ = LIKE_MATCH_ANY;
  *out_ptr = '\0';

  return exp;
}

int strlike(s,like_exp,err)
char *s,*like_exp;
void (*err)();
  /*  Implementation of SQL "like".					*/
  /*    s is the candidate string.  like_exp is a "like expression"	*/
  /*    consisting of normal characters and wildcard characters.	*/
  /*    wildcard characters are of 2 kinds; _ which represents		*/
  /*    exactly 1 character, and @ which represents 0 or more 		*/
  /*    characters.  (The actual characters used are compiled into	*/
  /*    this program.  @ and _ are what were documented in the		*/
  /*    Oracle description).  Characters can be escaped with a		*/
  /*    preceding \ (or other char compiled into program)		*/
  /*  Returns 0 if match; positive integer indicating point of		*/
  /*    non-match							*/
{
  char *s_ptr;
  char *exp,*exp_ptr;
  int ret_val;
  Logical prev_char_matched;

  ret_val = 0;
  prev_char_matched = TRUE;

  s_ptr = s;
  exp_ptr = exp = analyze_wilds(like_exp);

  while ((*exp_ptr != '\0') && (*s_ptr != '\0') && prev_char_matched) {
    switch (*exp_ptr) {
      case LIKE_MATCH_1:
	exp_ptr++;
	s_ptr++;
	break;
      case LIKE_MATCH_ANY:
	  /*  When s_ptr matches char after wildcard, reset to do	*/
	  /*  test again through default: case, putting us back on	*/
	  /*  "normal" track.						*/
	if (*s_ptr++ == *(exp_ptr+1)) {
	  s_ptr--;
	  exp_ptr++;
	}
	break;
      case LIKE_ESC:
	if (*(++exp_ptr) == '\0') 
	  err ("like expression may not end w/escape.  expression: ",exp);
				/*  Deliberately fall through		*/
      default:
	prev_char_matched = (*s_ptr++ == *exp_ptr++);
	break;
    }
  }
    /*  Take care of case where everything matched up to a final	*/
    /*  match-any char							*/
  if (*exp_ptr == LIKE_MATCH_ANY) exp_ptr++;

  ret_val = ((*exp_ptr == '\0') && (*s_ptr == '\0') && prev_char_matched) ? 
								0 : s_ptr - s;
  free (exp);
  return ret_val;
}

void parse(s,parse_data_struct,parse_function_struct)
char *s;
struct parse_data *parse_data_struct;
struct parse_functions *parse_function_struct;
  /*		Description is approximate.  Seems to me it can do	*/
  /*		weird things with not...				*/
  /*  Parser has 4 states: variable or open paren			*/
  /*			   comparison operator				*/
  /*			   thing being compared				*/
  /*			   logical operator or close paren or end	*/
  /*  Idea is to get next token, determine its type, the process it	*/
  /*  depending on the state the parser is in (possibly changing state)	*/
  /*	Odors observed or speculated about emanating from code		*/
  /*	  1) No escape mechanism for special characters.  Therefore	*/
  /*	     string ops cannot look for parens, etc.  Don't confuse	*/
  /*	     this w/escape mechanism in token that represents a 	*/
  /*	     "like" expression...					*/
  /*	  2) Suspect that we can't test for strings that conflict w/	*/
  /*	     our comparison operators (eg can't say op eq like)		*/
{

#define OPSTACKSIZE 20	/* Depth of operation stack; eg (a+b)*(c+d)	*/
			/*  needs a depth of 3				*/
  /*  Within each of next 4 arrays, "equivalent" strings (eg, != & <>)	*/
  /*  are grouped together.  "Group size" is constant (eg, see empty	*/
  /*  strings in testop where there are no equivalent strings).  Groups	*/
  /*  are ordered to drive "switch" statements in code, so order alter-	*/
  /*  ation means code alteration.  Infix array is ordered by operation	*/
  /*  priority as well.  testop is parallel to teststrop as far as it	*/
  /*  goes.  testop defines numeric comparisons; teststrop string	*/
  /*  comparisons.							*/
  /*  Each array ends with NULL						*/
  /*    Recoded not to use prefix array, since "not" prefix (and any	*/
  /*    future one like it) must be tested for in variable section,	*/
  /*    while ! is tested for in "normal" special character section	*/
static char *prefix[]={"!","not",NULL};
static char *infix[]={"||","or","|","&&","and","&",NULL};
static char *testop[]={"<","","=","==",">","","<=","","<>","!=",">=","",NULL};
static char *teststrop[]={"lt","eq","gt","le","ne","ge","contains",
		  "is_contained_in","begins_with","ends_with","like",NULL};
#define SIZE_PREFIX_GROUP 2
#define SIZE_INFIX_GROUP 3
#define SIZE_TESTOP_GROUP 2

  /*  Next is a non-null character we add to a selection string to	*/
  /*  indicate end-of-string.  Accordingly, choose a character that 	*/
  /*  will not normally be found in selection string (formerly used a	*/
  /*  $... either we got away w/it for a long time or my analysis is	*/
  /*  wrong)								*/
  /*    Note: we choose character at compile time so we can have switch	*/
  /*  statements use it							*/
#define SELECTION_STRING_END 1
  /*  Leading X is space to receive SELECTION_STRING_END.  Gave up	*/
  /*  trying to get string generated at compile time			*/
char selection_token_terminators[] = "X ()=<>|&!";

int opstkp;
Comp_precision f;
int opstack[OPSTACKSIZE];
int opprior[OPSTACKSIZE];
char *s1,*t,*t1;
char tok[TOKEN+1],tmp[TOKEN+1];
int i,state,typ,finaland,type_string_op,paren_depth;
Logical case_sensitive;

  /*  Stuff from parse interface structures				*/
    /*  Data								*/
int tstcnt;
int tstproccnt;
struct parse_test_struct *tst;
int *tstproc;

tstcnt = parse_data_struct->tstcnt;
tstproccnt = parse_data_struct->tstproccnt;
tst = parse_data_struct->tst;
tstproc = parse_data_struct->tstproc;

selection_token_terminators[0] = SELECTION_STRING_END;

paren_depth = 0;
opstkp = 0;
opstack[0] = 0;
opprior[0] = -1;
state = 0;

finaland = (tstproccnt != 0);

  /*  Append SELECTION_STRING_END to input string as "end of string"	*/
  /*  indicator								*/
if (strchr(s,SELECTION_STRING_END) != NULL) 
    parse_function_struct->errn
	("Selection string contains character whose decimal value follows",
	 SELECTION_STRING_END);
i = strlen(s);
if (   ( s1 = (char *) malloc(i+2) ) == NULL   ) malloc_err("s1",i+2);
strcpy(s1,s);
s1[i] = SELECTION_STRING_END;
s1[i+1] = '\0';

t = s1;
while (t != NULL) {

    /*  Scheme: If first char not one of the special ones, save the	*/
    /*  token.  Resulting alpha token can be "not" operator or one of	*/
    /*  the string operators.						*/
    /*  In original outer code, a token beginning with something like @	*/
    /*  would show up as "bad prefix operator".  In this code, I think	*/
    /*  it will show up as "error on conditions"			*/
    /*  The original outer code for this section is commented at the	*/
    /*  end of this file						*/
    /*  Skip leading blanks.  Whole string cannot be blank because of	*/
    /*  appended SELECTION_STRING_END, above				*/
  t += strspn(t," ");
  switch (*t) {
    case '(':
      paren_depth++;
      typ = 2;
      t++;
      break;
    case ')':
      if ((--paren_depth) < 0) 
		parse_function_struct->err("Illegally nested parens",s);
      typ = 3;
      t++;
      break;
    case SELECTION_STRING_END:
      typ = 4;
      t = NULL;
      break;
    case '=':  case '<':  case '>':  case '|':  case '&':  case '!':
      tok[0] = *t;
	/*  Check for 2nd char of 2-char ops like !=.  Validity		*/
	/*  of particular 2-char combo checked later			*/
      switch (*(t+1)) {
	case '&':  case '|':  case '=':  case '>':	
	  tok[1] = *(++t);
	  tok[2] = '\0';
	  break;
	case SELECTION_STRING_END:
	  parse_function_struct->err
				("No argument after last logical operator",s);
	  break;
	default:
	  tok[1] = '\0';
	  break;
      }
      typ = 5;
      t++;
      break;
    default:	/*  alpha token.  Could still be string operator...	*/
      if ( (i = strcspn(t,selection_token_terminators)) > TOKEN )
	parse_function_struct->err("Token too long",tok);
      strncpy(tok,t,i);
      tok[i] = '\0';
      t += i;

      type_string_op = -1;
      while (teststrop[++type_string_op] != NULL) {
	  /*  All upper case = "abnormal spelling" = "abnormal		*/
	  /*  processing" = not case sensitive				*/
	case_sensitive = TRUE;
	if (strcmp(teststrop[type_string_op],tok) == 0) break;
	else {
	  i = 0;
	  while (tok[i] == toupper(teststrop[type_string_op][i])) {
	    if (tok[i] == '\0') {
	      case_sensitive = FALSE;
	      break;
	    }
	    i++;
	  }
	    /*  If not case_sensitive, we found all upper case, so we	*/
	    /*  know it IS a string operator and we should leave string	*/
	    /*  loop searching string operator list			*/
	  if ( ! case_sensitive) break;
	}
      }
      if (teststrop[type_string_op] == NULL) {
	  /*  Token is not string operator.  See if it's numeric	*/
        f=strtod(tok,&t1);
        typ = (*t1 == '\0') ? 1 : 0;
      } else {
	type_string_op++;	/* Begins-w-0 vs begins-w-1 mod		*/
        typ = 6;
      }

      break;
  }

  switch (state*10+typ) {
    case 0: /* variable or the string "not" */
    case 6: /* string operator could be a variable name, too		*/
	    /*    NB: "syntactic" varname_lookup routines are,		*/
	    /* essentially, 'return legal unless tok="not"' routines	*/
	    /* They may require change if something here changes	*/
	    /* (syntactic routines are called in an environment where a	*/
	    /* varlist is not available, such as optimizing inner stuff	*/   
      i = parse_function_struct->varname_lookup(tok);
      if (i == ILLEGAL_VARNUM) {
        if (strcmp(tok,"not") != 0) 
			parse_function_struct->err("Error on conditions",tok);
        opstack[++opstkp]=3;
        opprior[opstkp]=10;
        break;
      }
      tst[tstcnt].testvar = i;
      tst[tstcnt].testlev = iovarlevel_(&i);
      state=1;
      tstproc[tstproccnt++] = tstcnt+128;
      break;
    case 2: /* ( */
      opstack[++opstkp] = 4;
      opprior[opstkp] = 1;
      break;
    case 5: /* ! */
      if (tok[0] != '!') parse_function_struct->err("Bad prefix operator",tok);
      opstack[++opstkp] = 3;
      opprior[opstkp] = 10;
      break;
    case 15: /* ==, etc. */
      i = -1;
      while (testop[++i] != NULL)
	if (strcmp(tok,testop[i]) ==0 ) break;
      if (testop[i] == NULL) parse_function_struct->err("Bad condition",tok);
      tst[tstcnt].testcode = (i/SIZE_TESTOP_GROUP)+1;
      state=2;
      break;
    case 16: /* eq, ls, etc  */
      tst[tstcnt].testcode = - type_string_op;
      tst[tstcnt].case_sensitive = case_sensitive;
      state=2;
      break;
    case 20: 		/* token being compared to is not numeric 	*/
    case 26:
      if (tst[tstcnt].testcode > 0)	/*  Requested test is numeric	*/
	tst[tstcnt].testcode = - tst[tstcnt].testcode; /* Convert it	*/
	  /*  Deliberate fall-through here				*/
    case 21: 		/* token being compared to IS numeric 		*/
      if (tst[tstcnt].testcode < 0) {	/*  Requested test is string	*/
	tst[tstcnt].lenteststr = strlen(tok);
	i = tst[tstcnt].lenteststr + 1;
	if (  (tst[tstcnt].teststr = (char *) malloc(i)) == NULL  )
						    malloc_err("teststr",i);
	strcpy(tst[tstcnt].teststr,tok);
	if (  (tst[tstcnt].upteststr = (char *) malloc(i)) == NULL  )
						    malloc_err("upteststr",i);
	i--;
	while (i >= 0) {
	  tst[tstcnt].upteststr[i] = toupper(tok[i]);
	  i--;
	}
	f = 0.;		/*  Convenient for trick used later in test	*/
      }
      tst[tstcnt++].testval=f;
      state=3;
      break;
    case 30:
    case 35: /* and or */
      i = -1;
      while (infix[++i] != NULL)
	if (strcmp(tok,infix[i]) ==0 ) break;
      if (infix[i] == NULL) 
			parse_function_struct->err("Bad infix operator",tok);
      i = (i/SIZE_INFIX_GROUP)+1;
      while (i+1 <= opprior[opstkp])
	tstproc[tstproccnt++] = opstack[opstkp--];
      opstack[++opstkp] = i;
      opprior[opstkp] = i+1;
      state=0;
      break;
    case 33: /* ) */
      while (1 < opprior[opstkp])
        tstproc[tstproccnt++] = opstack[opstkp--];
      if ((--opstkp) < 0) parse_function_struct->err("Op stack underflow","");
      break;
    case 34: /* SELECTION_STRING_END */
      while (0 <= opprior[opstkp])
        tstproc[tstproccnt++] = opstack[opstkp--];
      if (finaland) tstproc[tstproccnt++] = 2;
#ifdef DEBUG
      printf("finaland = %d, tstproccnt = %d\n",finaland,tstproccnt);
      for (i=0; i<tstproccnt; i++)  {
	printf("tstproc %d ",tstproc[i]);
	if(tstproc[i]>=128)
	  printf("%d %d %f %s\n",tst[tstproc[i]-128].testvar,
		 tst[tstproc[i]-128].testcode,tst[tstproc[i]-128].testval,
		 tst[tstproc[i]-128].teststr);
	else printf("\n");
      };
#endif
      free (s1);
      parse_data_struct->tstcnt = tstcnt;
      parse_data_struct->tstproccnt = tstproccnt;
      return;
    default:
      parse_function_struct->err("Parsing error",s);
  }
}

if (paren_depth != 0) parse_function_struct->err("Unpaired parentheses",s);

}

int test(cl,parse_data_struct,parse_function_struct)
int cl;
struct parse_data *parse_data_struct;
struct parse_functions *parse_function_struct;
{
Comp_precision f;
static char tmp[DATUMSIZE+1],uptmp[DATUMSIZE+1];
int i,j,m;
char *datum_ptr,*test_condition_ptr;
  /*  Size below comes from conclusion that in tstproccnt loop below,	*/
  /*  the stack pointer could be incremented once each interaction,	*/
  /*  even though logically, I think the pointer is supposed to end up 0 */
int stk[NTESTS],stkp;


  /*  Stuff from parse interface structures				*/
    /*  Data								*/
int tstcnt;
int tstproccnt;
struct parse_test_struct *tst;
int *tstproc;

tstcnt = parse_data_struct->tstcnt;
tstproccnt = parse_data_struct->tstproccnt;
tst = parse_data_struct->tst;
tstproc = parse_data_struct->tstproc;

#ifdef DEBUG
  printf("entered test at level %d\n",cl);
#endif
stkp = -1;
stk[0] = TRUE;
for (i=0; i<tstproccnt; i++) {
  if(tstproc[i] >= 128){
    j = tstproc[i]-128;
    if (tst[j].testlev == cl) {
#ifdef DEBUG
      printf("-- %f %d %f -- ",f,tst[j].testcode,tst[j].testval);
#endif
      if (tst[j].testcode < 0) { 
	iovalstr_(&(tst[j].testvar),tmp);
	if (tst[j].case_sensitive) {
	  datum_ptr = tmp;
	  test_condition_ptr = tst[j].teststr;
	} else {
	  m = 0;	
	  while (tmp[m] != '\0') {
	    uptmp[m] = toupper(tmp[m]);
	    m++;
	  }
	  uptmp[m] = '\0';
	  datum_ptr = uptmp;
	  test_condition_ptr = tst[j].upteststr;
	}
	if (tst[j].testcode >= -6) 
	  f = (double) strcmp(datum_ptr,test_condition_ptr);
      } else
	ioval(&(tst[j].testvar),&f);

      switch (abs(tst[j].testcode)){
	case 1:
	  m = (f < tst[j].testval);
	  break;
	case 2:
	  m = (f == tst[j].testval);
	  break;
	case 3:
	  m = (f > tst[j].testval);
	  break;
	case 4:
	  m = (f <= tst[j].testval);
	  break;
	case 5:
	  m = (f != tst[j].testval);
	  break;
	case 6:
	  m = (f >= tst[j].testval);
	  break;
	case 7:					/*  contains		*/
	  m = (strstr(datum_ptr,test_condition_ptr) != NULL);
	  break;
	case 8:					/*  is_contained_in	*/
	  m = (strstr(test_condition_ptr,datum_ptr) != NULL);
	  break;
	case 9:					/*  begins_with		*/
	  m = (strncmp(datum_ptr,test_condition_ptr,tst[j].lenteststr) == 0);
	  break;
	case 10:				/*  ends_with		*/
	  m = strlen(datum_ptr) - tst[j].lenteststr;
	  m = (m >= 0) ? (strcmp(datum_ptr+m,test_condition_ptr) == 0) : FALSE;
	  break;
	case 11:				/*  like		*/
	  m = (strlike(datum_ptr,
		       test_condition_ptr,
		       parse_function_struct->err) 
		== 0);
	  break;
	default:
	  parse_function_struct->errn
	  	("Internal error-illegal testcode",tst[j].testcode,"%d");
      }
      tst[j].testres = m;
      stk[++stkp] = m;    
    }

      /*  Use previously saved test result-no need to recalc it		*/
    else if (tst[j].testlev < cl) stk[++stkp] = tst[j].testres;

      /*  Test is at level we haven't gotten to yet.  Return true even	*/
      /*  if data at next level might not pass test.  Therefore, all	*/
      /*  level N data shows up when displaying level N, but might not	*/
      /*  when displaying same object at level N+1.  Only a problem	*/
      /*  when displaying by level...					*/
    else stk[++stkp] = TRUE;

 
 } else {
    switch (tstproc[i]) {
      case 1:
	stk[stkp-1] = (stk[stkp-1] || stk[stkp]);
	stkp--;
	break;
      case 2:
	stk[stkp-1] = (stk[stkp-1] && stk[stkp]);
	stkp--;
	break;
      case 3:
	stk[stkp] = ( ! stk[stkp]);
	break;
      default:
	parse_function_struct->errn
		("Internal error-illegal tstproc",tstproc[i],"%d");
    }
  }
}
#ifdef DEBUG
  printf("test result = %d\n",stk[0]);
#endif

  /*  Don't know if test changes the next 2 - too lazy to check		*/
parse_data_struct->tstcnt = tstcnt;
parse_data_struct->tstproccnt = tstproccnt;

return stk[0];
}