/* BINTV.C This file contains the routine used to read and verify the contents of binary table objects. 15 March 2001, acr: Extracted from the 'btv' routine. 12 Jun 2001, acr: Fixed stupid problem in using RECORD_BYTES rather then ROW_BYTES for reading the table. Errors reported relative to table row count. 28 Jun 2001, acr: Adjusted output spacing. 14 Jan 2002, acr: Modifications to add batch mode processing 26 Feb 2002, acr: Increased precision of max/min found in file 01 Feb 2003, acr: Added NOT_APPLICABLE_CONSTANT, NULL_CONSTANT and UNKNOWN_CONSTANT handling; adjusted output summary format */ #include #include #include #include #include #include #include "pdstv.h" #include "bintv.h" #include "tverr.h" #include "tvutil.h" #define SIGDIFF(A,B) (fabs(A-B)/A)>1.e-06 /*--------------------------------------------------------------------------- Global Variables */ extern FILE *label, *data; /* input files */ extern FILE *report; /* output file */ extern char barline[100],blanks[100]; /* report file dingbats */ extern char dblbar[100]; extern int flag_blank_fields; extern char *field_type[]; extern char *error_msg[]; extern int batch_mode; extern table *table_top; /* Table object list */ /*=========================================================================== Static Arrays ===========================================================================*/ /*--------------------------------------------------------------------------- Local Functions, invisible to outside routines. */ static void check_field (char *line, int row_count, field *fld, int offset, int error_count[], FILE *report); static void check_column(char *line, int row_count, column *col, int offset, int error_count[], FILE *report); static void checknumfld (double value, int row_count, column *col, int error_count[], FILE *report); static double convert_to_double(union typeconv *value, column *col); /*---------------------------------------------------------------------------*/ /*---------------------------------------------------------------------------*/ long int read_binary_table_data(table *tab, int column_count, int record_bytes) /* Main driver routine for reading the binary data of the table object pointed to by 'tab'. Conflicts between the label descriptions and the actual contents of the data file are flagged as errors. */ { FILE *data; /* Data file pointer */ int i,j,k; /* Loop/subscript */ long int table_records; /* records in the current table */ void *line; /* input buffer */ int length; /* string length */ int max_length = 0; /* maximum record length */ int min_length = MAXINT; /* minimum record length */ int offset; /* Total offset to start of field */ int numcount, chrcount; /* Field type counts */ int datecount,timecount; int sparecount,unkcount; int error_count[ERRORLISTCOUNT]; /* error counters */ field *fld; column *col; container *cnt; /* Allocate an input line buffer: */ line = (void *)malloc(tab->row_bytes + 1); /* Open the data file and position the file pointer to the indicated offset: */ if ((data=fopen(tab->datafile,"rb")) == NULL) { for (i=0; i<(int)strlen(tab->datafile); ++i) tab->datafile[i] = tolower(tab->datafile[i]); if ((data=fopen(tab->datafile,"rb")) == NULL) { for (i=0; i<(int)strlen(tab->datafile); ++i) tab->datafile[i] = toupper(tab->datafile[i]); if ((data=fopen(tab->datafile,"rb")) == NULL) { tverr(DATA_FILE_NOT_FOUND,0,tab->datafile,strerror(errno)); if (batch_mode) fprintf (report,"OK\n"); fclose(report); exit(20); } } } /* The data file is now open. If an offset was indicated, advance the file pointer to the correct location: */ if (tab->offset > 0) { if (fseek(data,tab->offset,SEEK_SET) != 0) { tverr(FILE_SEEK_FAILED,0); if (batch_mode) fprintf (report,"OK\n"); fclose(report); exit(21); } } /* Report file banner: */ if (!batch_mode) { fprintf (report,"\n%79.79s\n\n",dblbar); fprintf (report,"DATA ERRORS\n------------\n"); fprintf (report,"Data file: %s\n\n",tab->datafile); } /* Initialize the error count array: */ memset((char *)error_count, '\0', sizeof error_count); /* Loop through the records: */ table_records = 0; while ( (fread(line,tab->row_bytes,1,data) == 1) && (table_records < tab->rows)) { ++table_records; /* Loop through the fields, checking each individually: */ offset = tab->row_prefix_bytes; fld = tab->fldlist; while (fld) { check_field(line, table_records, fld, offset, error_count, report); fld = fld->next; } } fclose(data); /* Make sure we read as many rows as we expected (this will not report extra rows at the end): */ if (table_records != tab->rows) { tverr(TOO_FEW_TABLE_ROWS,table_records,table_records,tab->rows); } /* Detailed reports are only produced in non-batch mode: */ if (!batch_mode) { /* Report file banner: */ fprintf (report,"\n%79.79s\n\n",dblbar); fprintf (report, "SUMMARY\n-------\n\n"); /**---------------------------**/ /* Display label parameters */ print_table_attributes(tab,report); /* Get counts of column types: */ column_count = count_column_types(tab->fldlist,&numcount,&chrcount, &datecount,&timecount,&sparecount, &unkcount); /* Write summary values for numeric fields: */ if (numcount > 0) { fprintf (report,"\n\n"); fprintf (report,"Data Values Summary for Numeric Fields. "); fprintf (report," Note that extrema below are the actual\n"); fprintf (report,"data values from the file, "); fprintf (report,"without scaling or offset:\n\n"); fprintf (report," Min "); fprintf (report," Max "); fprintf (report," INVALID MISSING N/A Bad\n"); fprintf (report,"Col Name Items Value"); fprintf (report," Value "); fprintf (report," Fields Fields Fields Data\n\n"); writenumsum(tab->fldlist,report); } /* Write summary value for character fields: */ if ((numcount + sparecount) < column_count) { fprintf (report,"\n\n"); fprintf (report,"Data Values Summary for Non-numeric Fields:\n\n"); fprintf (report,"%29.29sMin Max "," "); fprintf (report," INVALID MISSING N/A Bad\n"," "); fprintf (report,"Col Name Items Bytes Bytes"); fprintf (report," Fields Fields Fields Data\n\n"," "); writecharsum(tab->fldlist,report); } /* Show error count totals: */ fprintf (report,"\n\nTotal Error Counts by Type:\n\n"); for (i=0; itype == COLUMN) { col = fld->ptr.col; check_column(line, row_count, col, offset, error_count, report); } /* For CONTAINERs, we call this routine for each sub-field: */ else if (fld->type == CONTAINER) { cnt = fld->ptr.cnt; f = cnt->fldlist; while (f) { check_field(line,row_count,f,offset+cnt->start_byte-1, error_count,report); f = f->next; } } /* Non-standard objects are ignored. */ return; } /*++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++*/ void check_column(char *line, int row_count, column *col, int offset, int error_count[], FILE *report) /* Routine to check the contents of a single COLUMN object. Parameters; line input line buffer row_count table row counter col pointer to column structure offset total byte offset to start of parent object error_count error count accumulator array report output device */ { union typeconv value; /* Type conversion structure */ int i; int rec_offset; /* byte offset from start of record */ double real; /* real test space */ char *hold; /* temporary holding place for conversion strings */ int type,bytes; /* shorthand values */ /* SPARE columns are ignored: */ if (col->type == SPARE) return; /* A COLUMN may have one or more items. Each COLUMN is initialized to have 1 item, both at creation and as the attributes are being read. So, we can treat all COLUMNs the same and just loop through the items defined in the structure (usually just one). For each item, we determine the actual byte offset from the start of the record, transfer the data to the appropriate holding area, convert numeric values as needed, and call a routine to check the specific type of data indicated. */ for (i=0; iitems; i++) { rec_offset = offset + (col->start_byte - 1) + (col->item_offset * i); bytes = col->item_bytes; type = col->type; /* Character types get copied into a holding array. Numeric types are copied to the 'value' structure and converted to double precision. */ if (NUMERIC_FIELD(type)) { memcpy(&value,line+rec_offset,bytes); real = convert_to_double(&value,col); } else { hold = (char *)malloc(bytes+1); memcpy(hold,line+rec_offset,bytes); hold[bytes] = '\0'; } /* Now we call a data check routine based on type: */ if (NUMERIC_FIELD(type)) { checknumfld(real,row_count,col,error_count,report); } else if (type == DATE) { checkdatefld(hold,bytes,row_count,col,error_count,report); } else if (type == TIME) { checktimefld(hold,bytes,row_count,col,error_count,report); } else { checkcharfld(hold,bytes,row_count,col,error_count,report); } /* Free any temporary space allocated: */ if (!NUMERIC_FIELD(type)) free(hold); } /* All done. */ return; } /*++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++*/ double convert_to_double(union typeconv *value, column *col) /* Routine to input the numeric value passed in to double precision real. The value parameters are contained in the 'col' structure. */ { double real; /* holding place for return value */ char buff; /* byte-swap buffer */ int type,bytes; /* convenience variables */ type = col->type; bytes = col->item_bytes; /* If we're dealing with LSB integers, we need to reverse the order of the bytes before converting: */ if ((type == SIGNED_INTEGER || type == UNSIGNED_INTEGER) && col->msbflag == LSB) { if (bytes == 2) { buff = value->byte[0]; value->byte[0] = value->byte[1]; value->byte[1] = buff; } else if (bytes == 4) { buff = value->byte[0]; value->byte[0] = value->byte[3]; value->byte[3] = buff; buff = value->byte[1]; value->byte[1] = value->byte[2]; value->byte[2] = buff; } } /* Now that we know the bytes are in the right order, we can convert: */ if (type == UNSIGNED_INTEGER) { if (bytes == 1) { real = (double)value->uc; } else if (bytes == 2) { real = (double)value->usi; } else if (bytes == 4) { real = (double)value->uli; } else { tverr(OOPS_NUMBER_SIZE,1,"An unsigned integer",col->colnum,bytes); col->type = CHARACTER; real = 0.0; } } else if (type == SIGNED_INTEGER) { if (bytes == 1) { real = (double)value->sc; } else if (bytes == 2) { real = (double)value->ssi; } else if (bytes == 4) { real = (double)value->sli; } else { tverr(OOPS_NUMBER_SIZE,1,"A signed integer",col->colnum,bytes); col->type = CHARACTER; real = 0.0; } } else if (type == REAL) { if (bytes == 4) { real = (double)value->r4; } else if (bytes == 8) { real = value->r8; } else { tverr(OOPS_NUMBER_SIZE,1,"A real numer",col->colnum,bytes); col->type = CHARACTER; real = 0.0; } } /* Return the converted value: */ return real; } /*++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++*/ void checknumfld(double value, int row_count, column *col, int error_count[], FILE *report) /* Routine to read and check a numeric field against the its label attributes. Parameters: value double-precision version of the data value row_count table row count col column structure error_count error count accumulator array report output unit */ { double diff; /* difference between given value and INVALID flag */ double adjusted; /* value with offset and scaling applied */ double dabs; /* double-precision absolute value holder */ int bad_value; /* error tracking flag */ int FIELD_ERROR = TRUE; /* First check for potential special numeric values. If we find them, we can count them and return, since no further checking or comparison is needed: */ /* ...INVALID_CONSTANT... */ if (col->invflag) { dabs = value - col->invalid.dbl; dabs = (dabs > 0.0) ? dabs : -dabs; if (dabs < 1.e-6) { col->invalidcount++; return; } } /* ...MISSING_CONSTANT... */ if (col->missflag) { dabs = value - col->missing.dbl; dabs = (dabs > 0.0) ? dabs : -dabs; if (dabs < 1.e-6) { col->missingcount++; return; } } /* ...NOT_APPLICABLE_CONSTANT... */ if (col->naflag) { dabs = value - col->not_applicable.dbl; dabs = (dabs > 0.0) ? dabs : -dabs; if (dabs < 1.e-6) { col->nacount++; return; } } /* ...UNKNOWN_CONSTANT... */ if (col->unkflag) { dabs = value - col->unknown.dbl; dabs = (dabs > 0.0) ? dabs : -dabs; if (dabs < 1.e-6) { col->unknowncount++; return; } } /* ...NULL_CONSTANT... */ if (col->nullflag) { dabs = value - col->null.dbl; dabs = (dabs > 0.0) ? dabs : -dabs; if (dabs < 1.e-6) { col->nullcount++; return; } } /* If this contains an actual value, check it against the extrema in the label and save the max/min found in the data: */ col->maxfound.dbl = (value > col->maxfound.dbl)? value : col->maxfound.dbl; col->minfound.dbl = (value < col->minfound.dbl)? value : col->minfound.dbl; adjusted = value * col->scaling_factor + col->offset; bad_value = FALSE; /* ...MAXIMUM... */ if (col->mmflag%2 && value>col->max.dbl && SIGDIFF(value,col->max.dbl)) { printerror(GREATER_THAN_MAX,FIELD_ERROR,error_count,row_count, col,report); bad_value = TRUE; } /* ...MINIMUM... */ if (col->mmflag>1 && valuemin.dbl && SIGDIFF(value,col->min.dbl)) { printerror(LESS_THAN_MIN,FIELD_ERROR,error_count,row_count, col,report); bad_value = TRUE; } /* ...DERIVED_MAXIMUM... */ if (col->dmflag%2 && adjusted>col->dmax && SIGDIFF(adjusted,col->dmax)) { printerror(GREATER_THAN_DMAX,FIELD_ERROR,error_count,row_count, col,report); bad_value = TRUE; } /* ...DERIVED_MINIMUM... */ if (col->dmflag>1 && adjusteddmin && SIGDIFF(adjusted,col->dmin)) { printerror(LESS_THAN_DMIN,FIELD_ERROR,error_count,row_count, col,report); bad_value = TRUE; } /* ...VALID_MAXIMUM... */ if (col->vmflag%2 && value>col->vmax.dbl && SIGDIFF(value,col->vmax.dbl)) { printerror(GREATER_THAN_VMAX,FIELD_ERROR,error_count,row_count, col,report); bad_value = TRUE; } /* ...VALID_MAXIMUM... */ if (col->vmflag>1 && valuevmin.dbl && SIGDIFF(value,col->vmin.dbl)) { printerror(LESS_THAN_VMIN,FIELD_ERROR,error_count,row_count, col,report); bad_value = TRUE; } /* Update the bad data count and return: */ if (bad_value) col->badcount++; return; } /*++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++*/ /*++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++*/ /*++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++*/ /*++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++*/ /*++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++*/ /*++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++*/ /*++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++*/ /*++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++*/