/* PATH_INFO_routines. WJS Jul 97 */ /* Purpose of these routines is to extract portions of information */ /* from the "PATH_INFO" environment variable as formatted by the JGOFS */ /* system, and to create a properly formatted string that can be used */ /* to created that environment variable. */ /* */ /* To get information, use the functions */ /* int get_level() */ /* char *get_object() */ /* char *get_object_old_format() */ /* char *get_options() */ /* char *get_protocol() */ /* char *get_jgofs_env_datum() */ /* Each of the first 5 takes an argument, which is usually NULL */ /* get_jgofs_env_datum takes 2 args, the first of which is a string */ /* describing the field of path_info that you want. */ /* (details in the per-function comments, below) */ /* Note that get_object_old_format is not really a get. The */ /* real get_s return unaltered values from the input string. Thus */ /* get_object can return either old or new format object spec. */ /* get_object_old_format actually calls make_PATH_INFO_putenv_string */ /* */ /* To create a string that can be used by putenv, use the function */ /* char *make_PATH_INFO_putenv_string() */ /* See its documentation, below (and the "Options package" discussion */ /* at bottom, which discusses possible enhancements) */ /* */ /* */ /* This implementation derived from code in outer from JGOFS */ /* release 1.5, but does not match it exactly. It will correctly */ /* deal with strings that JFOFS 1.5 will read, and hopefully will */ /* be flexible enough to deal with future strings. The get_protocol */ /* and get_level routines will validate things to more closely match */ /* the outer code (& therefore will probably need more modification */ /* in the future) */ /* */ /* At present, the environment variable PATH_INFO contains various */ /* information. Approximately, its layout is */ /* object_location,object_name,protocol,level,options */ /* options, if any, are contained in curly brackets. The rest of the */ /* string is formatted as a unix file spec, with the object name in */ /* the position of the file name, and the protocol/level string in */ /* the position of the file type. */ /* (Note that the URL precedes this string with the cgi-bin program */ /* that will process the string. Generally, it is not possible to */ /* tell where the object_location begins, but because the program is */ /* usually of the form "/jg/name" (eg, /jg/serv, /jg/dir), one can */ /* guess.) */ /* */ /* If PATH_INFO contains the strings .html or .flat (anywhere), */ /* those strings are considered "protocol" strings by outer. They */ /* must be followed by at least a 1 digit level. Other parts of the */ /* JGOFS system limit the level to 1 digit, but outer does not. In */ /* the absence of a protocol string, outer produces a default protocol */ /* */ #define PATH_INFO_ROUTINES_VERSION \ "path_info_routines version 1.4a 23 Oct 2009" /* 23 Oct 09. WJS */ /* Bug fix: be nice if # dummies matched # args. Good testing! */ /* [Begin version 1.4a] */ /* 10 Oct 08. WJS */ /* Bug fix: forgot to declare is_a_remote_object_new_way */ /* 7 Mar 08. WJS */ /* Be sure that make_PATH_INFO_putenv_string creates object names */ /* with initial double slashes */ /* Add get_object_old_format (which is not really a get_ but a */ /* make_ since reformatting may happen) */ /* [Begin version 1.4] */ /* 22 Mar 07. WJS */ /* Original new_and_old_path_infos needed err(); only routine here */ /* to do so. Seems inferior - change to make calling routine */ /* handle the err */ /* [Begin version 1.3a] */ /* 11 Jan 07. WJS */ /* new_and_old_path_infos */ /* [Begin version 1.3] */ /* 26 Aug 04. WJS */ /* Only look for protocol after all slashes in object spec */ /* [Begin version 1.2b] */ /* 23 Apr 04. WJS */ /* Mods to "version-returning function". */ /* a) change from local to global to ensure it can't be "opti- */ /* mized out" */ /* b) don't want "version" in any function name since such */ /* names appear when grep'ping for "version" */ /* 20 Feb 04. WJS */ /* Get .h file ID into binary modules */ /* Remove unnecessary system #includes (in core.h; itself in */ /* path_info_routines.h) */ /* [Begin version 1.2a] */ /* 12 Aug 97. WJS */ /* get_jgofs_env_datum entry */ /* [Begin version 1.2] */ /* 9 Aug 97. WJS */ /* get_object & get_options entries */ /* [Begin version 1.1] */ /* 25 Jul 97. WJS */ /* [Begin version 1.0] */ /************************************************************************/ #include "path_info_routines.h" /* All these functions are local; defined here so that they can */ /* be arbitrarily ordered in the source file */ char *parse_PATH_INFO_string(); char *make_PATH_INFO_putenv_string(); char *get_object(); char *get_object_old_format(); char *get_protocol(); char *get_options(); char *get_jgofs_env_datum(); int get_level(); char *copy_substring(); int new_and_old_path_infos(); int is_a_remote_object_new_way(); /* From utils.c */ /************************************************************************/ char *path_info_routines_return_vers() /* Dummy routine. Exists only to force .h file version string into */ /* this module. Note string must not be global or we'll have con- */ /* flicts if another routine similarly includes the version string */ { static char version[] = PATH_INFO_ROUTINES_VERSION"/"FULL_PATH_INFO_ROUTINESH_VERSION; return version; } int new_and_old_path_infos(new,old) char **new; char **old; /* *old is modified to point to a string of the form */ /* "PATH_INFO=" + getenv("PATH_INFO") */ /* *new is modified to point to the same string as old, except that it */ /* has JGOFS protocol "jgof" (instead of html, flat, etc) */ /* See rs.c for details about this PATH_INFO_xxx code */ /* Returns 0 if OK; 1 if problem copying old string; 2 if problem w/new */ { if (getenv(PATH_INFO_ENV_VAR) == NULL) { *old = NULL; *new = NULL; } else { *old = make_PATH_INFO_putenv_string (NULL,NULL,NULL,NULL,USE_EXISTING_LEVEL,NULL); if (*old == NULL) return PATH_INFO_NEW_AND_OLD_PROBLEM_WITH_OLD; *new = make_PATH_INFO_putenv_string (NULL,NULL,NULL,"jgof",USE_EXISTING_LEVEL,NULL); if (*new == NULL) return PATH_INFO_NEW_AND_OLD_PROBLEM_WITH_NEW; } return PATH_INFO_NEW_AND_OLD_OK; } char *get_jgofs_env_datum(datum_key,info_source) char *datum_key; char *info_source; /* See get_info routines doc for description of return value and */ /* info_source argument. */ /* The datum_key argument is a pointer to the name of the datum that */ /* is to be returned. 3 names are special: object, protocol, and */ /* options. Each of the special names is turned into a call to the */ /* corresponding get_special-name routine. Any names that are not */ /* special are searched for in the options section of PATH_INFO, whose */ /* format is assumed to be */ /* datum_key=datum,... */ { char *ptr,*key_ptr,*datum_ptr,*opt_ptr; if (strcmp(datum_key,"object") == 0) return get_object(info_source); if (strcmp(datum_key,"protocol") == 0) return get_protocol(info_source); opt_ptr = get_options(info_source); if ( (opt_ptr == NULL) || (strcmp(datum_key,"options") == 0) ) return opt_ptr; if (*opt_ptr == '\0') return opt_ptr; /* Parse a=b, ... string yet another way. Assumes most re- */ /* strictive rules (no white space, no quoting, no dups, etc.) */ /* If this gets ugly, see discussion of "Options package" at */ /* bottom. */ datum_ptr = NULL; ptr = opt_ptr; while ( ptr < opt_ptr + strlen(opt_ptr) ) { if ( (key_ptr = strstr(ptr,datum_key)) == NULL ) break; else { /* If char after datum_key is = and char before datum_key is */ /* , (or beginning of string), key found. Otherwise, it's */ /* embedded in something else, & we need to retry. (If found, */ /* move ptr past = sign) */ datum_ptr = key_ptr + strlen(datum_key); if (*(datum_ptr++) == PATH_INFO_OPTIONS_DEFN_CHAR) { /* Test this way to avoid testing opt_ptr-1, which is */ /* theoretically unknown, even though we know it's a { */ if (key_ptr == opt_ptr) break; if ( *(key_ptr-1) == PATH_INFO_OPTIONS_SEPARATOR_CHAR ) break; } datum_ptr = NULL; ptr = key_ptr+1; } } ptr = (datum_ptr == NULL) ? NULL : strchr(datum_ptr,PATH_INFO_OPTIONS_SEPARATOR_CHAR); datum_ptr = copy_substring(datum_ptr,ptr); free (opt_ptr); return datum_ptr; } /************************************************************************/ /* get_info routines. */ /************************************************************************/ /* All depend on order of fields in PATH_INFO */ /* All take 1 input argument, an "info_source" string */ /* If info_source is NULL, default source of info is used. If */ /* non-NULL, it is a pointer to a PATH_INFO string. Note that */ /* logically, the info we want has nothing to do with PATH_INFO, */ /* allowing us to change the mechanism of specifying the various */ /* pieces of information */ /* Routines that return a string pointer return a pointer to a */ /* dynamically allocated, null-terminated string. They return */ /* a pointer to an empty string if no info is found. They */ /* returns a NULL pointer if the string is bad somehow (see each */ /* routine) or if there's a memory allocation failure */ char *copy_substring(start_ptr,end_ptr) char *start_ptr,*end_ptr; /* Copy substring defined by start_ptr & end_ptr into dynamically */ /* allocated string. */ /* end_ptr = NULL means "copy to end of string" */ /* Return pointer to dynamically allocated string (empty if no */ /* characters in substring; NULL if allocation failed. */ { char *ret; char save1; if ( (start_ptr == NULL) || (start_ptr == end_ptr) ) { ret = (char *) malloc(1); *ret = '\0'; } else { if (end_ptr != NULL) { save1 = *end_ptr; *end_ptr = '\0'; } /* Emulate strdup function. +1 is for terminating '\0' */ ret = (char *) malloc(strlen(start_ptr)+1); if (ret != NULL) strcpy(ret,start_ptr); /* Restore original string, if necessary */ if (end_ptr != NULL) *end_ptr = save1; } return ret; } int get_level(info_source) char *info_source; /* See parse_PATH_INFO_string & info at top. */ /* See get_info argument doc, above */ /* This routine returns the level of output method has been re- */ /* quested to produce. If no level was provided, returns */ /* LEVEL_NOT_SPECIFIED, representing "all levels". Returns */ /* LEVEL_ERROR if format was bad (right now, this means first non- */ /* numeric character after level is not start of options string ({) */ /* or end-of-string (if no options). */ { char *proto_ptr,*lev_ptr,*options_ptr; char *term_char; int lev; parse_PATH_INFO_string(info_source,&proto_ptr,&lev_ptr,&options_ptr); if (lev_ptr == NULL) /* Could also return error if proto_ptr non-NULL if "rules" */ /* say protocol must have level */ lev = LEVEL_NOT_SPECIFIED; else { lev = strtol(lev_ptr,&term_char,10); /* Assume that level is terminated by options or end of string */ /* Further assumes no white space between level and options */ if ( (*term_char == '\0') || (*term_char == PATH_INFO_OPTIONS_DELIM[0]) ) /* Could validate level here (0-9, for example) */ ; else lev = LEVEL_ERROR; } return lev; } char *get_protocol(info_source) char *info_source; /* See parse_PATH_INFO_string & info at top. */ /* See get_info argument doc, above */ /* Returns a NULL pointer if the string is bad somehow (right now */ /* only happens if string is empty, which might not be considered an */ /* error. Oh, well) */ { char *proto_ptr,*lev_ptr,*options_ptr; char *end_proto_ptr,*orig_proto_ptr; parse_PATH_INFO_string(info_source,&proto_ptr,&lev_ptr,&options_ptr); orig_proto_ptr = proto_ptr; if (proto_ptr != NULL) { /* Advance beyond . */ proto_ptr++; /* Assume options follow level (which follows protocol) */ end_proto_ptr = (lev_ptr == NULL) ? options_ptr : lev_ptr; } /* copy_substring should handle NULL end_proto_ptr ... */ proto_ptr = copy_substring(proto_ptr,end_proto_ptr); if (proto_ptr != NULL) { /* Validate protocol here. If bad, free proto_ptr then set to */ /* NULL */ /* Can't have . without protocol */ if ( (*proto_ptr == '\0') && (orig_proto_ptr != NULL) ) { free (proto_ptr); proto_ptr = NULL; } } return proto_ptr; } char *get_object(info_source) char *info_source; /* See parse_PATH_INFO_string & info at top. */ /* See get_info argument doc, above */ /* Note that this routine returns either old or new format object */ /* names. This is fine if the intention is to use the object name */ /* without writing it for further use. Use get_object_old_format */ /* to ensure old format (which can be used by old software) */ { char *object_ptr,*proto_ptr,*lev_ptr,*options_ptr; char *end_object_ptr; object_ptr = parse_PATH_INFO_string(info_source,&proto_ptr,&lev_ptr,&options_ptr); if (object_ptr != NULL) { /* Find end of object string */ /* Assume order of things is object, protocol, level, options */ if ( (end_object_ptr = proto_ptr) == NULL ) if ( (end_object_ptr = lev_ptr) == NULL ) end_object_ptr = options_ptr; } /* copy_substring should handle NULL end_object_ptr ... */ object_ptr = copy_substring(object_ptr,end_object_ptr); if (object_ptr != NULL) { /* Validate object here. If bad, free object_ptr then set to */ /* NULL */ ; } return object_ptr; } char *get_options(info_source) char *info_source; /* See parse_PATH_INFO_string & info at top. */ /* See get_info argument doc, above */ /* Returns NULL if no terminating } is found */ { char *proto_ptr,*lev_ptr,*options_ptr; char *end_options_ptr; parse_PATH_INFO_string(info_source,&proto_ptr,&lev_ptr,&options_ptr); if (options_ptr != NULL) { /* Advance beyond { */ options_ptr++; /* Find {. Must be there if options exist, or we have an */ /* improperly formatted PATH_INFO */ end_options_ptr = strrchr(options_ptr,PATH_INFO_OPTIONS_DELIM[1]); if (end_options_ptr == NULL) return NULL; } options_ptr = copy_substring(options_ptr,end_options_ptr); if (options_ptr != NULL) { /* Validate options here. If bad, free options_ptr then set to */ /* NULL */ ; } return options_ptr; } /************************************************************************/ /* PATH_INFO analysis & formatting routines. */ /************************************************************************/ char *parse_PATH_INFO_string(PATH_INFO_string,proto_ptr,level_ptr,options_ptr) char *PATH_INFO_string; char **proto_ptr; char **level_ptr; char **options_ptr; /* This routine assumes that the final { in PATH_INFO marks the */ /* beginning of the options string. The final ., if any, before the */ /* options, if any, and after all slashes, if any, marks the beginning */ /* of a protocol/level string. The protocol consists entirely of */ /* non-numeric characters and is immediately followed by an all- */ /* numeric level, the options, or the end of the string */ /* */ /* This routine accepts PATH_INFO_string as input. If the input is */ /* NULL, it uses the value of environment variable PATH_INFO. */ /* It returns a pointer to the beginning of the string or NULL if */ /* the environment variable is undefined. It returns a pointer to */ /* the appropriate . in *proto_ptr (NULL if none), and a pointer to */ /* the first non-alphanumeric character after the . in *level_ptr */ /* (NULL if none). It returns a pointer to the appropriate { in */ /* *options_ptr (NULL if none). Note that since we do not return */ /* string lengths, routines that call this routine are sensitive */ /* to the order of fields in PATH_INFO and will probably break if */ /* the order is changed */ { char *env_ptr; char *ptr,*lptr; /* Set up for no protocol, level, or options info */ *proto_ptr = NULL; *options_ptr = NULL; lptr = NULL; env_ptr = (PATH_INFO_string == NULL) ? getenv(PATH_INFO_ENV_VAR) : PATH_INFO_string; if (env_ptr != NULL) { /* Temporarily lop off options, if any */ *options_ptr = strrchr(env_ptr,PATH_INFO_OPTIONS_DELIM[0]); if (*options_ptr != NULL) **options_ptr = '\0'; /* Protocol string, if any, is at end of option-less PATH_INFO */ /* and after any /s in the object spec */ ptr = strrchr(env_ptr,PATH_INFO_DIRECTORY_SEP); if (ptr == NULL) ptr = env_ptr; *proto_ptr = strrchr(ptr,PATH_INFO_PROTOCOL_SEP); if (*proto_ptr != NULL) { lptr = *proto_ptr; /* Find first non-alpha character after . */ while (isalpha(*(++lptr))) ; if (*lptr == '\0') lptr = NULL; } } *level_ptr = lptr; /* Restore options if necessary */ if (*options_ptr != NULL) **options_ptr = PATH_INFO_OPTIONS_DELIM[0]; return env_ptr; } char *get_object_old_format(s) char *s; { return make_PATH_INFO_putenv_string ("",s,NULL,"",LEVEL_NOT_SPECIFIED,""); } char *make_PATH_INFO_putenv_string (env_var,PATH_INFO_default,object,protocol,level,options) char *env_var,*object,*protocol,*options,*PATH_INFO_default; int level; /* See parse_PATH_INFO_string & info at top. */ /* Note this routine always returns "old" remote object specs (begin */ /* w/ double slashes) */ /* env_var is environment variable to set up. If *env_var = '\0', */ /* the string is constructed without the leading ENV_VAR=. NULL */ /* means set up the default environment variable (PATH_INFO) */ /* PATH_INFO_default is the "base" PATH_INFO string whose mod */ /* will be put into the env_var string. NULL means value of present */ /* PATH_INFO */ /* object, protocol, level, and options are pieces of the string to */ /* be inserted into the PATH_INFO string */ /* *object='\0' means put no object string in output */ /* object = NULL means use the existing object string */ /* *protocol='\0' means put no protocol string in output */ /* protocol = NULL means use the existing protocol */ /* level = LEVEL_NOT_SPECIFIED means put in no level information */ /* level = USE_EXISTING_LEVEL means use the existing level string */ /* *options='\0' means put no options string in output */ /* options = NULL means use the existing options string */ /* */ /* This routine returns a pointer to a dynamically allocated string */ /* suitable for putenv. (Do not free this string after calling */ /* putenv until you've called another putenv for the same environment */ /* variable.) Its value is a string of the form */ /* ENV_VAR=good-PATH_INFO-string */ /* (or just good-PATH_INFO-string - see env_var input argument) */ /* Returns NULL in case of trouble. Cause 1: inability to get */ /* some dynamic memory or other. Cause 2: finished string, when sent */ /* through the get_info routines, returned error. Note that we do */ /* NOT diagnose a properly formatted final string that is too long */ /* (for now, anyway) */ { char *object_ptr,*proto_ptr,*lev_ptr,*options_ptr; char *buf,*env; char *inptr,*outptr,*start_env_val; Logical new_remote_object_format; char levbuf[10]; char empty = '\0'; /* Get pieces from user arguments or PATH_INFO string */ /* (Note that valid string pieces can be empty). */ /* Failure if can't */ /* Do level first so we don't have to worry about free'ing if */ /* bad level */ if (level == USE_EXISTING_LEVEL) level = get_level(PATH_INFO_default); if (level == LEVEL_NOT_SPECIFIED) lev_ptr = ∅ else if (level < 0) return NULL; else sprintf ( (lev_ptr=levbuf), "%d", level ); object_ptr = (object == NULL) ? get_object(PATH_INFO_default) : object; if (object_ptr == NULL) return NULL; new_remote_object_format = (is_a_remote_object_new_way(object_ptr) != 0); proto_ptr = (protocol == NULL) ? get_protocol(PATH_INFO_default) : protocol; if (proto_ptr == NULL) return NULL; options_ptr = (options == NULL) ? get_options(PATH_INFO_default) : options; if (options_ptr == NULL) return NULL; env = (env_var == NULL) ? PATH_INFO_ENV_VAR : env_var; /* +5 is 1 for =, 1 for ., 2 for {}, 1 for terminating '\0' */ /* and 1 in case we need to prepend a / */ buf = (char *) malloc ( strlen(env) + strlen(object_ptr) + strlen(proto_ptr) + strlen(levbuf) + strlen(options_ptr) + 6 ); if (buf == NULL) { if (object_ptr != object) free (object_ptr); if (proto_ptr != protocol) free (proto_ptr); if (options_ptr != options) free (options_ptr); return NULL; } /* Build string from component pieces */ outptr = buf; if (*env != '\0') { /* strcpy (buf,env) & add = */ inptr = env; while (*inptr != '\0') *(outptr++) = *(inptr++); *(outptr++) = ENV_VAR_DEFN_CHAR; } /* Save this location, which is start of new PATH_INFO */ start_env_val = outptr; /* strcat (buf,object_ptr) */ /* (& make sure remote objects lead off w double slash) */ inptr = object_ptr; if (new_remote_object_format) *(outptr++) = '/'; while (*inptr != '\0') *(outptr++) = *(inptr++); if (object_ptr != object) free (object_ptr); if ( (*proto_ptr != '\0') || (*lev_ptr != '\0') ) { /* Add ., then strcat protocol & level info */ *(outptr++) = PATH_INFO_PROTOCOL_SEP; inptr = proto_ptr; while (*inptr != '\0') *(outptr++) = *(inptr++); if (proto_ptr != protocol) free (proto_ptr); inptr = lev_ptr; while (*inptr != '\0') *(outptr++) = *(inptr++); } if (*options_ptr != '\0') { /* strcat options; prepend { and add } */ *(outptr++) = PATH_INFO_OPTIONS_DELIM[0]; inptr = options_ptr; while (*inptr != '\0') *(outptr++) = *(inptr++); *(outptr++) = PATH_INFO_OPTIONS_DELIM[1]; if (options_ptr != options) free (options_ptr); } *outptr = '\0'; /* Validate input args by passing them through get_info routines */ /* That way any checks need only be coded in those routines */ if ( (proto_ptr = get_protocol(start_env_val)) == NULL ) return NULL; else free(proto_ptr); if ( (object_ptr = get_object(start_env_val)) == NULL ) return NULL; else free(object_ptr); if ( (options_ptr = get_options(start_env_val)) == NULL ) return NULL; else free(options_ptr); if (get_level(start_env_val) == LEVEL_ERROR) return NULL; return buf; } /************************************************************************/ /* Options package. Need something that verifies structure, */ /* something better that picks out value of desired "left sides"; */ /* eg, get_option("dir") returns xxx if PATH_INFO has dir=xxx; */ /* and something that builds a valid options string */ /* Much of this code already exists. def's attribute */ /* handling is quite similar to this. Building the attribute */ /* string after adding width=nn is the same as building */ /* the string here (which is actually adding an option, which is */ /* another function we'd like here). Looking for an existing */ /* width= is the same as looking up dir=, and has the same */ /* problems (what if there's an iowidth= attribute?). */ /* The defgb wjstbl package will parse & pick the */ /* a=b; c=d;... format, with some generality (eg, allowing white */ /* space). It will also verify structure (eg, detect duplicate */ /* keys). Structurally, there is a problem in that you only */ /* want to parse the string once, and then do all your picks. */ /* However, that would get away from allowing many PATH_INFO */ /* strings to be parsed by this package. Another problem is that */ /* wjstbl routines use others which might not be modular. In */ /* particular, uses defgb err() function. */