/* TVUTIL.C This file contains general utilities for the PDS Table Verifier tool. 16 March 2001, A.C.Raugh. 17 May 2001, acr: Modifications to checkdatefld and checktimefld for YYYY-DDD format dates and truncated times. 11 Jun 2001, acr: Modified 'breakline' to not signal underscore-within-quotes warning for pointers or FILE_NAME keywords. 28 Jun 2001, acr: Output cleanup: removed item list for max/min, since only overall max/min is recorded anyway; adjusted spacing. 14 Jan 2002, acr: Modifications to add batch mode processing. 26 Feb 2002, acr: Increased precision of max/min found in file 07 Mar 2002, acr: Added check for valid max/min before displaying. A null marker is now displayed if no valid values were encountered in the file. 08 Mar 2002, acr: Cleaned up table attributes listing for containers 16 Jan 2003, acr: Upgraded to ignore blank padding in MISSING_CONSTANT and INVALID_CONSTANT field checks. 01 Feb 2003, acr: Added handling for NOT_APPLICABLE_CONSTANT, NULL_CONSTANT and UNKNOWN_CONSTANT; adjusted output summary format 06 Dec 2006, acr: Reworking on time field checks to remove annoying and spurious error messages 11 Dec 2006, acr: Modified to allow TIME field checking to accept field values truncated at reasonable (i.e., valid) points. 11 Jan 2007, acr: Fixed stupid typos in final TIME field check (of full- length time fields) 21 Feb 2007, acr: Fixed yet another stupid bug in TIME field checking 03 May 2007, acr: Increased size of time field holding variables for larger time fields 19 Feb 2008, acr: Fixed memory leak error cause by undocumented and unsuccessful attempt to fix a faulty assumption in "breakline" that was messing up offsets to tables. Also fixed that faulty assumption. 19 Aug 2013, acr: Changed "getline" to "getlblline" to avoid conflicts in new compiler. */ #include #include #include #include #include #include #include "pdstv.h" #include "tverr.h" #include "tvutil.h" /*--------------------------------------------------------------------------- Global Variables */ extern FILE *label, *data; /* input files */ extern FILE *report; /* output file */ extern char barline[100],blanks[100]; /* report file dingbats */ extern char dblbar[100]; extern int flag_blank_fields; extern char *field_type[]; extern int batch_mode; extern table *table_top; /* Table object list */ /*--------------------------------------------------------------------------- Local Functions */ void printcnt(container *cnt, int indent); void printcol(column *col, int indent); void set_string_maxmin(char *value, column*col); void print_container_attributes(container *cnt, FILE *report); /*---------------------------------------------------------------------------*/ /*---------------------------------------------------------------------------*/ int getlblline(char *line, FILE *ifp, int *linecount) /* Routine to read the next ODL line in the label. It ignores comment lines and clips leading blanks from the input line. It returns zero if end of file is encountered. Parameters: char *line; input line buffer FILE *ifp; input file pointer int *linecount; input line number 03 Nov 1994, acr: It also checks for pairs of double-quotes and will continue reading (and dumping) lines until a matching end-quote is found. It does NOT, however, perform any sanity-checking on the position of the quotes. */ { char *ptr; int i,j,k; int done; int length; /* length of input string */ int lblanks; /* number of leading blanks */ char inptline[MAXRECORDLENGTH]; static char whitespace[] = { ' ', '\r', '\n', '\0'}; done = FALSE; while (!done) { if ((ptr=fgets(line,MAXRECORDLENGTH,ifp)) != NULL) { (*linecount)++; lblanks = strspn(ptr,whitespace); length = strlen(ptr); if (length!=0 && lblanks!=length) /* blank/null line check */ { ptr = ptr + lblanks; if (strstr(ptr,"/*") != ptr) /* comment check */ { strcpy(line,ptr); /* If this line ends with an '=' append the next line (and hope for the best): */ i = strlen(line)-1; while (isspace(line[i])) i--; if (line[i] == '=') { line[i+1] = ' '; line[i+2] = '\0'; ptr=fgets(inptline,MAXRECORDLENGTH,ifp); (*linecount)++; line = strcat(line,inptline); } done = TRUE; } } } else return 0; } /* Before returning, check to see if there is one or two double quotes in this line. If there are two, return normally; but if there is only one, read in lines until a matching double-quote is found. These lines are discarded. */ if ((ptr=strpbrk(line,"\"")) != NULL) { /* Found one quote. Look for a second: */ ptr++; while (strpbrk(ptr,"\"") == NULL) { if ((ptr=fgets(inptline,MAXRECORDLENGTH,ifp)) == NULL) { return 0; } else { (*linecount)++; } } } return 1; } /*+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++*/ int breakline(char *line, char *keyword, char *value, int linenum) /* Routine to break a label line up into keyword and value. If no "=" is found a status of 0 is returned; successful status is 1. The keyword is forced to upper case; the value is checked for underscores within double quotes. Blanks are trimmed from the end of the value and keyword. 09 Nov 1994, acr: discard comments on end of line 08 Mar 2000, acr: Move check for underscores inside quotes to here; fix null-value part handling. 06 Jan 2003, acr: Removed blank-trimming from value part, to preserve significant blanks in character strings. */ { int i,j,k; int length; char *ptr; /* substring pointer */ int status; /* return status value */ int inquotes; /* TRUE if value is in quotes */ status = 1; inquotes = FALSE; /* transfer letters until a blank or "=" is found: */ length = strlen(line); i = 0; while (i < length && line[i] != ' ' && line[i] != '=') { keyword[i] = toupper(line[i]); ++i; } /* delete blanks on the end, if any: */ while (isspace(keyword[--i])); keyword[i+1] = '\0'; /* add the string terminator */ /* If there is no value field, we're done. Pass back an empty string and the "no value" flag: */ if (!(strchr(line,'='))) { value[0] = '\0'; return 0; } /* Now, find the '=' and pass it and following blanks: */ while (line[i] != '=') ++i; ++i; while (line[i] == ' ') ++i; /*=========================================================================== This check is being disabled until PDS DEs can sort out what the heck to do about this. */ /* Check the value field for both underscores and double quotes - this is an error according to the way the PDS verifiers work. */ /* if (strpbrk(line+i,"_") && strpbrk(line+i,"\"")) { if (strstr(keyword,"DESC") == NULL && strstr(keyword,"NOTE") == NULL && strstr(keyword,"FILE_NAME") == NULL && keyword[0] != '^') { tverr(QUOTED_UNDERSCORES,linenum); } } */ /*=========================================================================*/ /* Copy characters over for the value string. The value will be enclosed by one of four characters: double quotes, parentheses, braces or blanks. NOTE: For the moment we are ignoring the possibility of nested braces and parentheses. This is extrememly rare to begin with and doesn't occur in SBN data sets to date - but BEWARE! */ j=0; if (line[i] == '"') { ++i; while (i 0) { if (!getlblline(line,label,linecount)) { tverr(UNEXP_EOF,linecount,"label"); return ENDOFFILE; } value_found = breakline(line,keyword,value,*linecount); if (strcmp(keyword,"END") == 0) { tverr(UNEXP_END,linecount); return ENDOFLABEL; } /* Check for beginning or end of object and adjust object_level: */ if (strcmp(keyword,"END_OBJECT")==0) { object_level--; } else if (strcmp(keyword,"OBJECT")==0) { object_level++; } } /* If the END_OBJECT included a label, it should match the input label: */ if (value_found && strcmp(value,objlabel) != 0) { tverr(OBJLBL_MISMATCH,linecount,value,objlabel); } /* Done. */ return READY; } /*+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++*/ int next_object(char *objlabel, int *linecount) /* This routine reads past comments, keywords and blank lines to the next "OBJECT =" statement. It stops at end of label (an "END" statement), end of file, or END_OBJECT and returns an appropriate code in those cases. the label (i.e., the value part) of the OBJECT statement is returned via the 'objlabel' parameter. */ { char keyword[80]; char line[MAXRECORDLENGTH]; int READY = 0; /* Normal return status */ /* This loop repeats until we encounter one of the terminal conditions: */ while (TRUE) { if (!getlblline(line,label,linecount)) { tverr(UNEXP_EOF,linecount,"label"); return ENDOFFILE; } breakline(line,keyword,objlabel,*linecount); /* Look for an "OBJECT =" line: */ if (strcmp(keyword,"OBJECT") == 0) { return READY; } /* Failing that, check for end of label: */ else if (strcmp(keyword,"END") == 0) { return ENDOFLABEL; } /* Check for END_OBJECT: */ else if (strcmp(keyword,"END_OBJECT") == 0) { return ENDOFOBJECT; } /* Continue until we're kicked out of the loop: */ } } /*+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++*/ void print_structure (table *top) /* Routine to display the table structures read in. */ { table *tab; field *fld; column *col; container *cnt; int indent; int tabcount; /* Loop through tables: */ tabcount = 0; tab = top; while (tab) { tabcount++; printf ("Table %d",tabcount); if (tab->label) printf (" [%s]",tab->label); printf (":\n\n"); printf ("Data File: %s\n",tab->datafile); printf (" Offset: %d\n",tab->offset); if (tab->ascii) printf (" Format: ASCII\n"); else printf (" Format: Binary\n"); printf (" Rows: %4d\n",tab->rows); printf (" Row Byes: %4d\n",tab->row_bytes); printf (" Columns: %4d\n",tab->columns); /* Loop through fields (stop if none): */ printf ("\n ........Fields...........\n\n"); fld = tab->fldlist; indent = 4; if (fld == NULL) { printf (" *** NO FIELDS FOUND ***\n\n"); return; } while (fld) { if (fld->type == COLUMN) { col = fld->ptr.col; printcol(col,indent); } else { cnt = fld->ptr.cnt; printcnt(cnt,indent); } fld = fld->next; } /* Next table: */ printf ("\n\n"); tab = tab->next; } /* Done. */ return; } /*---------------------------------------------------------------------------*/ void printcnt (container *cnt, int sp) { field *fld; column *cl; container *cn; int count; printf ("\n"); printf ("%*sCOLUMN %s [CONTAINER]:\n",sp," ",cnt->colnum); printf ("%*s %-15s %s\n",sp," ","Name",cnt->name); printf ("%*s %-15s %d\n",sp," ","Start Bytes",cnt->start_byte); printf ("%*s %-15s %d\n",sp," ","Byte",cnt->bytes); printf ("%*s %-15s %d\n",sp," ","Repetitions",cnt->repetitions); /* Loop through subobjects: */ printf ("\n%*s .......Fields.......\n\n",sp," "); fld = cnt->fldlist; if (fld == NULL) { printf ("\n%*s *** No columns found ***\n\n",sp," "); return; } while (fld) { if (fld->type == COLUMN) { cl = fld->ptr.col; printcol(cl,sp+4); } else { cn = fld->ptr.cnt; printcnt(cn,sp+4); } fld = fld->next; } /* Done. */ printf ("\n\n"); return; } /*---------------------------------------------------------------------------*/ void printcol (column *col, int sp) { printf ("\n"); /************** printf ("%*s COLUMN %2d_____________\n",sp," ",colcount); printf ("%*s Name: %s\n",sp," ",col->name); printf ("%*s Start Byte: %d\n",sp," ",col->start_byte); printf ("%*s Bytes: %d\n",sp," ",col->bytes); printf ("%*s Data Type: %s\n",sp," ",field_type[col->type]); printf ("%*s Items: %d\n",sp," ",col->items); ***************/ printf ("%*sCOLUMN %s:\n",sp," ",col->colnum); printf ("%*s %-15s %s\n",sp," ","Name",col->name); printf ("%*s %-15s %d\n",sp," ","Start Byte",col->start_byte); printf ("%*s %-15s %d\n",sp," ","Bytes",col->bytes); printf ("%*s %-15s %s\n",sp," ","Data Type",field_type[col->type]); printf ("%*s %-15s %d\n",sp," ","Items",col->items); return; } /*---------------------------------------------------------------------------*/ void printerror(int type, int field_error, int error_count[], int record, column *col, FILE *report) /* Routine to track and display error messages. Parameters: type error type field_error TRUE if the error applies only to the field record record number column column structure report output file */ { /* Increment the error count: */ error_count[type]++; /* If this does not exceed the maximum count, then display the message: */ if (error_count[type] <= MAXERRORS) { if (field_error) tverr(type,record,col->colnum,col->name); else tverr(type,record); } if (error_count[type] == MAXERRORS) { tverr(MAX_ERRORS,record,type,error_count[type]); } return; } /*+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++*/ int count_column_types(field *flist, int *numeric, int *character, int *date, int *time, int *spare, int *unknown) /* Checks each field and subfield and returns total count of defined fields of each type for the field list beginning at "fld". Returns total number of fields (and subfields). */ { field *fld; column *col; container *cnt; int typecount[8]; /* This is a shortcut. There are currently 8 types defined (in 'pdstv.h') by numbers which are convenient to use as subscripts. */ int i; int tot,num,chr,dte,tme,spr,unk; int total; /* Return value */ for (i=0; i<7; i++) typecount[i] = 0; total = 0; fld = flist; while (fld) { if (fld->type == COLUMN) { col = fld->ptr.col; typecount[col->type]++; } else if (fld->type == CONTAINER) { cnt = fld->ptr.cnt; count_column_types(cnt->fldlist,&num,&chr,&dte,&tme,&spr,&unk); typecount[REAL] += num; typecount[CHARACTER] += chr; typecount[DATE] += dte; typecount[TIME] += tme; typecount[UNRECOGNIZED] += unk; typecount[SPARE] += spr; } fld = fld->next; } /* Now add together the numeric field types and generate the overall total: */ *numeric = typecount[UNSIGNED_INTEGER] + typecount[SIGNED_INTEGER] + typecount[REAL]; *character = typecount[CHARACTER]; *date = typecount[DATE]; *time = typecount[TIME]; *spare = typecount[SPARE]; *unknown = typecount[UNRECOGNIZED]; total = *numeric + *character + *date + *time + *spare + *unknown; return total; } /*+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++*/ void writenumsum(field *fldlist_top, FILE *report) /* Routine to write out the summary statistics for numeric fields. */ { int i,j,k; /* loop/subscript */ int item; /* counter */ int useFmax,useFmin; /* printing format flags */ double max,min; /* holding places */ int maxd,mind; /* number of decimal places in output format */ double absmax,absmin; /* absolute values of max and min */ int type; /* field type */ int spare; /* True for SPARE byte fields */ int totalmissing; /* total MISSING, UNKNOWN and NULL fields */ int need_note; /* TRUE if a note is needed for MISSING count */ field *fld; column *col; /* Now we loop through the fields, calling a this routine recursively to deal with CONTAINERs: */ need_note = 0; fld = fldlist_top; while (fld) { if (fld->type == CONTAINER) { writenumsum(fld->ptr.cnt->fldlist,report); } else if (fld->type == COLUMN && NUMERIC_FIELD(fld->ptr.col->type)) { col = fld->ptr.col; totalmissing = 0; /* This mess is just to accommodate some pretty printing: */ type = col->type; max = col->maxfound.dbl; min = col->minfound.dbl; absmax = (max > 0)? max : -max; absmin = (min > 0)? min : -min; useFmax = (1.e-4 < absmax && absmax < 1.e7) || (absmax == 0.0); useFmin = (1.e-4 < absmin && absmin < 1.e7) || (absmin == 0.0); if (useFmax) maxd = findprec(max); if (useFmin) mind = findprec(min); /* Max/min is collected across all items, so we have only a single value to report. We check to make sure than somewhere along the line we encountered at least one valid value: */ fprintf (report,"%3.3s %-15.15s ", col->colnum,col->name); fprintf (report," %3d ",col->items); if (col->maxfound.dbl == -MAXDOUBLE) { fprintf (report," - "); /* Null minimum */ fprintf (report," - "); /* Null maximum */ } else { if (type == REAL && useFmin) { fprintf (report,"%9.*f ",mind,min); } else if (type == REAL) { fprintf (report,"%9.2e ",min); } else if (type == SIGNED_INTEGER || type == UNSIGNED_INTEGER) { fprintf (report,"%9d ",(int)min); } else { fprintf (report," n/a "); } if (type == REAL && useFmax) { fprintf (report,"%9.*f ",maxd,max); } else if (type == REAL) { fprintf (report,"%9.2e ",max); } else if (type == SIGNED_INTEGER || type == UNSIGNED_INTEGER) { fprintf (report,"%9d ",(int)max); } else { fprintf (report," n/a "); } } if (col->invflag) fprintf (report," %5d ",col->invalidcount); else fprintf (report," - "); if (col->missingcount) { totalmissing = col->missingcount; } if (col->unknowncount) { totalmissing += col->unknowncount; need_note += 1; } if (col->nullcount) { totalmissing += col->nullcount; need_note += 2; } if (totalmissing) { if (need_note) { fprintf (report," %5d* ",totalmissing); } else { fprintf (report," %5d ",totalmissing); } } else { fprintf (report," - "); } if (col->naflag) fprintf (report," %5d ",col->nacount); else fprintf (report," - "); fprintf (report,"%5d\n", col->badcount); } /* else (numeric column) */ /* Non-numeric and illegal objects are ignored: */ fld = fld->next; } /* That finishes the table. If we need a footnote, we add it: */ if (need_note == 3) { fprintf (report,"\n* Note: MISSING field count also includes "); fprintf (report,"UNKNOWN and NULL fields.\n"); } else if (need_note == 2) { fprintf (report,"\n* Note: MISSING field count also includes "); fprintf (report,"NULL fields.\n"); } else if (need_note == 1) { fprintf (report,"\n* Note: MISSING field count also includes "); fprintf (report,"UNKNOWN fields.\n"); } /* Done. */ return; } /*++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++*/ void writecharsum(field *fldlist_top, FILE *report) /* Routine to write out the summary statistics for non-numeric fields. */ { field *fld; column *col; int item; /* counter */ int type; /* field type */ int totalmissing; /* total MISSING, UNKNOWN and NULL fields */ int need_note; /* TRUE if a note is needed for MISSING count */ /* Loop through the fields in the list, looking for non-numeric types: */ need_note = 0; fld = fldlist_top; while (fld) { if (fld->type == CONTAINER) { writecharsum(fld->ptr.cnt->fldlist,report); } else if ( fld->type == COLUMN && fld->ptr.col->type != SPARE && !NUMERIC_FIELD(fld->ptr.col->type)) { col = fld->ptr.col; type = col->type; totalmissing = 0; fprintf (report,"%3.3s %-15.15s ", col->colnum,col->name); fprintf (report," %3d ",col->items); fprintf (report," %3d %3d ", col->minfound.bytes,col->maxfound.bytes); if (col->invflag) fprintf (report," %5d ",col->invalidcount); else fprintf (report," - "); if (col->missingcount) { totalmissing += col->missingcount; } if (col->unknowncount) { totalmissing += col->unknowncount; need_note += 1; } if (col->nullcount) { totalmissing += col->nullcount; need_note += 2; } if (totalmissing) { if (need_note) { fprintf (report," %5d* ",totalmissing); } else { fprintf (report," %5d ",totalmissing); } } else { fprintf (report," - "); } if (col->naflag) fprintf (report," %5d ",col->nacount); else fprintf (report," - "); fprintf (report,"%5d\n", col->badcount); /* Maximum and minimum values are printed on separate lines, unless no valid values were encountered: */ if (col->maxstr) { fprintf (report," Minimum: '%s'\n",col->minstr); fprintf (report," Maximum: '%s'\n",col->maxstr); } } /* else (non-numeric column) */ /* Everything else is ignored: */ fld = fld->next; } /* That finishes the table. If we need a footnote, we add it: */ if (need_note == 3) { fprintf (report,"\n* Note: MISSING field count also includes "); fprintf (report,"UNKNOWN and NULL fields.\n"); } else if (need_note == 2) { fprintf (report,"\n* Note: MISSING field count also includes "); fprintf (report,"NULL fields.\n"); } else if (need_note == 1) { fprintf (report,"\n* Note: MISSING field count also includes "); fprintf (report,"UNKNOWN fields.\n"); } /* Done. */ return; } /*++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++*/ int findprec(double value) /* Routine to return the number of places of precision after the decimal for use in an F output format. */ { double val; /* holding place for absolute value */ val = (value > 0.0)? value : -value; if (val >= 100000.) return 0; else if (val >= 10000.0) return 1; else if (val >= 1000.00) return 2; else if (val >= 100.000) return 3; else if (val >= 10.0000) return 4; else if (val >= 1.00000) return 5; else return 6; } /*++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++*/ void checkcharfld(char *val, int bytes, int record_count, column *col, int error_count[], FILE *report) /* Routine to read and check a character field against the label attributes. Parameters: val value buffer bytes length of value record_count data record count col column structure error_count error count accumulator array report output file */ { char *fmt; /* format string buffer */ int bc; /* FORMAT bytes count */ int blank; /* 1 if the field is blank */ int i,not_done; /* loop/subscript */ int FIELD_ERROR = TRUE; int bad_value; /* error tracking flag */ char *trimval; /* Blank-trimmed copy of the input value */ /* If this is a string field we'll check for blanks, *_CONSTANTs, and the extrema. Note that blank fields are treated as regular string fields unless there is a specific request from the user (by command line option) to flag blank fields. */ if (NUMERIC_FIELD(col->type)) return; /* A number */ blank = (strspn(val," ")==bytes); /* check for blank field */ if (flag_blank_fields && blank) /* flag blank field if requested */ { printerror(BLANK_FIELD,FIELD_ERROR,error_count,record_count,col, report); return; } /* From now on we'll work with a blank-trimmed copy of the input string: */ trimval = trim_copyof(val); /* Check for any one of the various *_CONSTANT values. If found, we return, as there's nothing else to check: */ if (col->invflag && (strcmp(col->invalid.str,trimval) == 0)) { col->invalidcount++; free(trimval); return; } if (col->missflag && (strcmp(col->missing.str,trimval) == 0)) { col->missingcount++; free(trimval); return; } if (col->naflag && (strcmp(col->not_applicable.str,trimval) == 0)) { col->nacount++; free(trimval); return; } if (col->unkflag && (strcmp(col->unknown.str,trimval) == 0)) { col->unknowncount++; free(trimval); return; } if (col->nullflag && (strcmp(col->null.str,trimval) == 0)) { col->nullcount++; free(trimval); return; } /* If there were bounds in the label, make sure we haven't exceeded them: */ bad_value = FALSE; if (col->mmflag%2) /* Max present */ { if (strcmp(col->max.str,trimval)<0) { printerror(GREATER_THAN_MAX,FIELD_ERROR,error_count, record_count,col,report); bad_value = TRUE; } } if (col->mmflag > 1) /* Min present */ { if (strcmp(trimval,col->min.str)<0) { printerror(LESS_THAN_MIN,FIELD_ERROR,error_count, record_count,col,report); bad_value = TRUE; } } if (col->vmflag%2) { if (strcmp(col->vmax.str,trimval)<0) { printerror(GREATER_THAN_VMAX,FIELD_ERROR,error_count, record_count,col,report); bad_value = TRUE; } } if (col->vmflag>1) { if (strcmp(col->vmax.str,trimval)<0) { printerror(LESS_THAN_MIN,FIELD_ERROR,error_count, record_count,col,report); bad_value = TRUE; } } /* Done with the trimmed copy: */ free(trimval); /* And finally, loop through the characters in the field to make sure they include only printing characters: */ i = 0; not_done = TRUE; while (ibytes && not_done) { if (!isprint(val[i])) { printerror(NONPRINT_CHAR,FIELD_ERROR,error_count, record_count,col,report); bad_value = TRUE; not_done = FALSE; } i++; } if (bad_value) col->badcount++; /* Now set extrema and we're done: */ set_string_maxmin(val,col); return; } /*++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++*/ void checkdatefld(char *val, int bytes, int record_count, column *col, int error_count[], FILE *report) /* Routine to read and check a DATE field. Parameters val string vlaue to be checked bytes length of value record_count data record count col column structure error_count error count accumulator array report output file */ { int blank; /* 1 if the field is blank */ int bad_value; /* flag indicating a format problem was found */ int FIELD_ERROR = TRUE; char date[50]; /* holds blank-trimmed value */ int datelen; /* length of fld */ int i,j,k; /* Make sure this is a date field: */ if (col->type != DATE) return; /* Not a DATE */ /* Check for a blank field. This is only an error if we've not been directed to ignore blank fields: */ blank = (strspn(val," ")==bytes); /* check for blank numeric field */ if (flag_blank_fields && blank) /* flag blank field if requested */ { printerror(BLANK_FIELD,FIELD_ERROR,error_count,record_count,col,report); return; } if (blank) return; /* blank fields are otherwise ignored */ /* Check to see if we've got a *_CONSTANT flag value rather than a date. If so, we return immediately as no further checking is required: */ if (col->invflag && trimcmp(val,col->invalid.str)==0) { col->invalidcount++; return; } if (col->missflag && trimcmp(val,col->missing.str)==0) { col->missingcount++; return; } if (col->naflag && trimcmp(val,col->not_applicable.str)==0) { col->nacount++; return; } if (col->unkflag && trimcmp(val,col->unknown.str)==0) { col->unknowncount++; return; } if (col->nullflag && trimcmp(val,col->null.str)==0) { col->nullcount++; return; } /* Get a blank-trimmed version of the value and its length: */ strcpy(date,val); datelen = strlen(date); while (date[datelen-1] == ' ' && datelen > 0) { datelen--; } date[datelen] = '\0'; /* If the field is not the size of a valid DATE format, we don't try verifying it, but we will signal the error, set extrema and return: */ if (datelen < 4 || datelen > MAXDATEWIDTH) { printerror(INVALID_DATE,FIELD_ERROR,error_count,record_count, col,report); col->badcount++; set_string_maxmin(val,col); return; } /* DATE fields MUST contain only date information in the format YYYY-MM-DD or YYYY-DDD (i.e., the ISO standard DATE formats). Anything else should be described either as individual integers or a string. All digits must be present (zero-padded if necessary) and the century may not be negative. We accomplish this checking through straight brute-force testing of each character: */ bad_value = 0; bad_value += (!isdigit(date[0])); bad_value += (!isdigit(date[1])); bad_value += (!isdigit(date[2])); bad_value += (!isdigit(date[3])); /* We know we had at least a year, but all bets are off after that, so we'll need to check for the presence of each field before checking its contents. If we haven't got just a naked year, then we must have at least three characters following it: a separator and two digits. */ if (datelen > 4 && datelen < 7) { bad_value++; } else if (datelen >= 7) { bad_value += (date[4] != '-'); bad_value += (!isdigit(date[5])); bad_value += (!isdigit(date[6])); } /* [Note it's also OK to stop at month (col->fwidth = 7)] */ /* Check for YYYY-DDD format: */ if (datelen == 8) { bad_value += (!isdigit(date[7])); } else if (datelen == 9) { bad_value++; } /* We've got YYYY-MM-DD, so check the DD field: */ else if (datelen == 10) { bad_value += (date[7] != '-'); bad_value += (!isdigit(date[8])); bad_value += (!isdigit(date[9])); } if (bad_value) { printerror(INVALID_DATE,FIELD_ERROR,error_count,record_count, col,report); } /* If there were bounds in the label, make sure we haven't exceeded them: */ if (col->mmflag%2 && !bad_value) /* MAXIMUM present */ { if (strcmp(col->max.str,val)<0) { printerror(GREATER_THAN_MAX,FIELD_ERROR,error_count,record_count, col,report); bad_value = TRUE; } } if (col->mmflag > 1 && !bad_value) /* MINIMUM present */ { if (strcmp(val,col->min.str)<0) { printerror(LESS_THAN_MIN,FIELD_ERROR,error_count,record_count, col,report); bad_value = TRUE; } } if (col->vmflag%2 && !bad_value) /* VALID_MAXIMUM */ { if (strcmp(col->vmax.str,val)<0) { printerror(GREATER_THAN_VMAX,FIELD_ERROR,error_count,record_count, col,report); bad_value = TRUE; } } if (col->vmflag > 1 && !bad_value) /* VALID_MINIMUM */ { if (strcmp(val,col->vmin.str)<0) { printerror(LESS_THAN_VMIN,FIELD_ERROR,error_count,record_count, col,report); bad_value = TRUE; } } /* Update the bad value counter as needed: */ if (bad_value) col->badcount++; /* Check the extrema and we're done: */ set_string_maxmin(val,col); return; } /*++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++*/ void checktimefld(char *val, int bytes, int record_count, column *col, int error_count[], FILE *report) /* Routine to read and check a TIME field. Parameters val string vlaue to be checked bytes length of value record_count data record count col column structure error_count error_count accumulator array report output file Note that TIME fields may be truncated on the right as far as needed. The minimum size is technically a bare year, but if a field is defined as being of type TIME, we expect at least hours of accuracy. */ { int blank; /* 1 if the field is blank */ int bad_value; /* flag indicating a format problem was found */ int last; /* last character marker for verification */ int FIELD_ERROR = TRUE; int doy; /* TRUE if date format is YYYY-DDD */ int ts; /* Subscript of start of time (HH:MM:SS) field */ char tmp[50]; /* Holding place for time part. */ int len; /* string length */ char time[100]; /* Blank-trimmed value */ int timelen; /* length of 'time' */ int i; /* Make sure this is a time field: */ if (col->type != TIME) return; /* Not a TIME */ /* Check for a blank field. This is only an error if we've not been directed to ignore blank fields: */ blank = (strspn(val," ")==bytes); /* check for blank numeric field */ if (flag_blank_fields && blank) /* flag blank field if requested */ { printerror(BLANK_FIELD,FIELD_ERROR,error_count,record_count,col,report); return; } if (blank) return; /* blank fields are otherwise ignored */ /* Check to see if we've got a MISSING or INVALID flag value rather than a time. If so, we return immediately as no further checking is required: */ if (col->invflag && trimcmp(val,col->invalid.str)==0) { col->invalidcount++; return; } if (col->missflag && trimcmp(val,col->missing.str)==0) { col->missingcount++; return; } /* Get a blank-trimmed version of the value and its length: */ strcpy(time,val); timelen = strlen(time); while (time[timelen-1] == ' ' && timelen > 0) { timelen--; } time[timelen] = '\0'; /* If the field size does not fall in the valid range for TIME formats, we won't try verifying it - we'll just check the extrema and exit. We are making an assumption of logical labelling, here - specifically, that if the DATA_TYPE is TIME, there there are at least hours given. A time field without at least hour accuracy should have had a type of DATE. */ if (timelen < 4 || timelen > MAXTIMEWIDTH) { printerror(INVALID_TIME,FIELD_ERROR,error_count,record_count, col,report); col->badcount++; set_string_maxmin(val,col); return; } /* TIME fields contain both date and time information, but they can be truncated on the right as appropriate to the precision, which may mean in extreme cases that only a date, and possibly only a year, is present. */ /* Date information must be in the same format as for DATE-only fields (i.e., YYYY-MM-DD or YYYY-DDD), so we'll check that first using the same method as in the "checkdatefld" subroutine. */ bad_value = 0; bad_value += (!isdigit(time[0])); bad_value += (!isdigit(time[1])); bad_value += (!isdigit(time[2])); bad_value += (!isdigit(time[3])); if (timelen > 4 && timelen < 7) { bad_value++; } else if (timelen >= 7) { bad_value += (time[4] != '-'); bad_value += (!isdigit(time[5])); bad_value += (!isdigit(time[6])); } /* In this case, if there's another character we need to determine if it's a '-', indicating YYYY-MM-DD format, or a digit, indicating YYyY-DDD format. Then we'll need to save that information for locating the time substring later: */ /* Two possible options here, DOY or MM-DD format. So we look for a separator to distinguish: */ if (timelen >= 8 && time[7] == '-') /* MM-DD */ { bad_value += (!isdigit(time[8])); bad_value += (!isdigit(time[9])); ts = 10; } else if (timelen >= 8) { bad_value += (!isdigit(time[5])); bad_value += (!isdigit(time[6])); bad_value += (!isdigit(time[7])); ts = 8; } /* If there is no time part, we're done, so check first: */ if (ts < timelen) { /* The next character after the date part has to be 'T': */ bad_value += (time[ts] != 'T'); ts++; /* We'll copy the time part into a separate string to make the checking a little easier: */ if (ts != strlen(time)) { strcpy(tmp,time+ts); len = strlen(tmp); /* Now we'll check in segments, being careful not to look past the end of the string: */ if (len < 2) { bad_value++; } else { bad_value += (!isdigit(tmp[0])); bad_value += (!isdigit(tmp[1])); } if (len > 2 && len < 5) { bad_value++; } else if (len >= 5) { bad_value += (tmp[2] != ':'); bad_value += (!isdigit(tmp[3])); bad_value += (!isdigit(tmp[4])); } if (len > 5 && len < 8) { bad_value++; } else if (len >= 8) { bad_value += (tmp[5] != ':'); bad_value += (!isdigit(tmp[6])); bad_value += (!isdigit(tmp[7])); } bad_value += (len > 8 && tmp[8] != '.'); for (i=9; immflag%2) /* MAXIMUM */ { if (strcmp(col->max.str,val)<0) { printerror(GREATER_THAN_MAX,FIELD_ERROR,error_count,record_count, col,report); bad_value = TRUE; } } if (col->mmflag > 1) /* MINIMUM */ { if (strcmp(val,col->min.str)<0) { printerror(LESS_THAN_MIN,FIELD_ERROR,error_count,record_count, col,report); bad_value = TRUE; } } if (col->vmflag%2) /* VALID_MAXIMUM */ { if (strcmp(col->vmax.str,val)<0) { printerror(GREATER_THAN_VMAX,FIELD_ERROR,error_count,record_count, col,report); bad_value = TRUE; } } if (col->vmflag > 1) /* VALID_MINIMUM */ { if (strcmp(val,col->vmin.str)<0) { printerror(LESS_THAN_VMIN,FIELD_ERROR,error_count,record_count, col,report); bad_value = TRUE; } } /* Update the bad value counter as needed: */ if (bad_value) col->badcount++; /* Check the extrema and we're done: */ set_string_maxmin(val,col); return; } /*++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++*/ void set_string_maxmin(char *value, column*col) /* Routine to check for and set extrema of a string field. Parameters value string to be compared col column parameters */ { int len; /* string length */ /* First, check for Maximum and minimum string values: */ if (col->maxstr == NULL) { col->maxstr = (char *)malloc(strlen(value)+1); strcpy(col->maxstr,value); } else if (strcmp(col->maxstr,value)<0) { free(col->maxstr); col->maxstr = (char *)malloc(strlen(value)+1); strcpy(col->maxstr,value); } if (col->minstr == NULL) { col->minstr = (char *)malloc(strlen(value)+1); strcpy(col->minstr,value); } else if (strcmp(value,col->minstr)<0) { free(col->minstr); col->minstr = (char *)malloc(strlen(value)+1); strcpy(col->minstr,value); } /* Next, check for maximum and minimum string lengths: */ len = strlen(value); while (len>0 && value[len-1]==' ') { len--; } col->maxfound.bytes = (col->maxfound.bytes < len)? len : col->maxfound.bytes; col->minfound.bytes = (col->minfound.bytes > len)? len : col->minfound.bytes; /* And we're done: */ return; } /*+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++*/ void print_table_attributes (table *tab, FILE *report) /* Print the summary attributes for the given table and its fields. Returns the total number of numeric fields encountered. */ { field *fld; column *col; /* Start by writing the general TABLE attributes: */ fprintf (report," Table attributes\n"); fprintf (report," ----------------\n"); fprintf (report," Table object %d: %s\n",tab->tblnum,tab->label); fprintf (report," ROWS: %6d\n",tab->rows); fprintf (report," COLUMNS: %6d\n",tab->columns); fprintf (report," ROW_BYTES: %6d\n",tab->row_bytes); /* Now the column attributes. A recursive routine is called to deal with sub-columns of CONTAINERS. */ fprintf (report, "\n Col Name Start Bytes Items "); fprintf (report," Type\n"); fld = tab->fldlist; while (fld) { if (fld->type == COLUMN) { col = fld->ptr.col; fprintf (report," %4.4s %-30.30s %5d %4d %4d %-10s\n", col->colnum,col->name,col->start_byte, col->bytes,col->items,field_type[col->type]); } else if (fld->type == CONTAINER) { print_container_attributes(fld->ptr.cnt,report); } fld = fld->next; } /* Done: */ return; } /*++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++*/ void print_container_attributes(container *cnt, FILE *report) /* Routine to print each field within a container, calling itself if needed. Returns the number of numeric fields encountered. */ { field *fld; column *col; fprintf (report, " %4.4s %-30.30s %5d %4d x%4d %-10s\n", cnt->colnum, cnt->name, cnt->start_byte, cnt->bytes, cnt->repetitions,"CONTAINER"); fld = cnt->fldlist; while (fld) { if (fld->type == COLUMN) { col = fld->ptr.col; fprintf (report," %6s %-30.30s %5d %4d %4d %-10s\n", col->colnum,col->name,col->start_byte, col->bytes,col->items,field_type[col->type]); } else if (fld->type == CONTAINER) { print_container_attributes(fld->ptr.cnt,report); } fld = fld->next; } /* Done */ return; } /*+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++*/ char *trim_copyof(char *s) /* Routine to return a copy of the input string 's' from which the leading and trailing blanks have been trimmed. */ { char *copy; /* Return pointer */ int first,last; /* end points */ int length; int i,j; /* First, intercept blank and empty strings: */ length = strlen(s); if (length < 1 || length == strspn(s," ")) { copy = (char *)malloc(2); copy[0] = '\0'; return copy; } /* For non-blank strings, find the end points in the original string: */ first = 0; while (s[first] == ' ') first++; last = strlen(s) - 1; while (s[last] == ' ') last--; length = last - first + 1; /* Allocate space and copy the non-blank part: */ copy = (char *)malloc(length+1); j = 0; for (i=first; i<=last; i++) copy[i-first] = s[i]; copy[length] = '\0'; /* Done: */ return copy; } /*+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++*/ int trimcmp(char *s1, char *s2) /* Routine to compare blank-trimmed versions of the input strings, returning the 'strcmp' result. */ { char *c1, *c2; /* Copies of the input strings */ int result; /* strcmp result */ c1 = trim_copyof(s1); c2 = trim_copyof(s2); result = strcmp(c1,c2); free(c1); free(c2); return result; } /*+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++*/ int spare_column(column *col) /* Routine to check if a column is designated as SPARE. Returns TRUE or FALSE accordingly. */ { int i; /* string subscript */ /* Spare columns are identified by the NAME value. If the NAME is "SPARE" or ends with "_SPARE" (or " SPARE"), we return a TRUE result. */ if (strcmp(col->name,"SPARE") == 0) { return TRUE; } /* To check for "_SPARE", we'll use the brute-force method: */ i = strlen(col->name); if ((col->name[i-6] == '_' || col->name[i-6] == ' ') && col->name[i-5] == 'S' && col->name[i-4] == 'P' && col->name[i-3] == 'A' && col->name[i-2] == 'R' && col->name[i-1] == 'E') { return TRUE; } /* If we're here, this is not a SPARE column: */ return FALSE; } /*+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++*/ /*+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++*/ /*+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++*/