/*

  TVUTIL.C

  This file contains general utilities for the PDS Table Verifier tool.

  16 March 2001, A.C.Raugh.

  17 May 2001, acr: Modifications to checkdatefld and checktimefld for
                    YYYY-DDD format dates and truncated times.
  11 Jun 2001, acr: Modified 'breakline' to not signal underscore-within-quotes
                    warning for pointers or FILE_NAME keywords.
  28 Jun 2001, acr: Output cleanup: removed item list for max/min, since only
                    overall max/min is recorded anyway; adjusted spacing.

  14 Jan 2002, acr: Modifications to add batch mode processing.

  26 Feb 2002, acr: Increased precision of max/min found in file

  07 Mar 2002, acr: Added check for valid max/min before displaying. A null
                    marker is now displayed if no valid values were encountered
                    in the file.
  08 Mar 2002, acr: Cleaned up table attributes listing for containers

  16 Jan 2003, acr: Upgraded to ignore blank padding in MISSING_CONSTANT and
                    INVALID_CONSTANT field checks.

  01 Feb 2003, acr: Added handling for NOT_APPLICABLE_CONSTANT, NULL_CONSTANT
                    and UNKNOWN_CONSTANT; adjusted output summary format

  06 Dec 2006, acr: Reworking on time field checks to remove annoying and 
                    spurious error messages

  11 Dec 2006, acr: Modified to allow TIME field checking to accept field 
                    values truncated at reasonable (i.e., valid) points.

  11 Jan 2007, acr: Fixed stupid typos in final TIME field check (of full-
                    length time fields)

  21 Feb 2007, acr: Fixed yet another stupid bug in TIME field checking

  03 May 2007, acr: Increased size of time field holding variables for
                    larger time fields

  19 Feb 2008, acr: Fixed memory leak error cause by undocumented and 
                    unsuccessful attempt to fix a faulty assumption in 
                    "breakline" that was messing up offsets to tables.  
                    Also fixed that faulty assumption.

  19 Aug 2013, acr: Changed "getline" to "getlblline" to avoid conflicts
                    in new compiler.
*/

#include <stdio.h>
#include <string.h>
#include <stdlib.h>
#include <ctype.h>
#include <errno.h>
#include <values.h>

#include "pdstv.h"
#include "tverr.h"
#include "tvutil.h"


/*---------------------------------------------------------------------------
  Global Variables 
*/

extern FILE    *label, *data;              /* input files                  */
extern FILE    *report;                    /* output file                  */
extern char     barline[100],blanks[100];  /* report file dingbats         */
extern char     dblbar[100];
extern int      flag_blank_fields; 
extern char    *field_type[];
extern int      batch_mode;

extern table   *table_top;                 /* Table object list            */


/*---------------------------------------------------------------------------
  Local Functions
*/

void printcnt(container *cnt, int indent);
void printcol(column *col, int indent);
void set_string_maxmin(char *value, column*col);
void print_container_attributes(container *cnt, FILE *report);

/*---------------------------------------------------------------------------*/
/*---------------------------------------------------------------------------*/

int getlblline(char *line, FILE *ifp, int *linecount)

/* Routine to read the next ODL line in the label.  It ignores comment lines
   and clips leading blanks from the input line.  It returns zero if end
   of file is encountered.

   Parameters:

     char *line;       input line buffer  
     FILE *ifp;        input file pointer 
     int  *linecount;  input line number  


   03 Nov 1994, acr:  It also checks for pairs of double-quotes and will
                      continue reading (and dumping) lines until a matching
                      end-quote is found.  It does NOT, however, perform
                      any sanity-checking on the position of the quotes.
*/

{ char *ptr;
  int   i,j,k;
  int   done;
  int   length;          /* length of input string */
  int   lblanks;         /* number of leading blanks */
  char  inptline[MAXRECORDLENGTH];
  static char whitespace[] = { ' ', '\r', '\n', '\0'};


  done = FALSE;

  while (!done)
    { if ((ptr=fgets(line,MAXRECORDLENGTH,ifp)) != NULL)
        { (*linecount)++;
          lblanks = strspn(ptr,whitespace);
          length = strlen(ptr);
          if (length!=0  &&  lblanks!=length) /* blank/null line check */
            { ptr = ptr + lblanks;
              if (strstr(ptr,"/*") != ptr)    /* comment check */
                { strcpy(line,ptr); 

                  /* If this line ends with an '=' append the next line (and
                     hope for the best):
                  */

                  i = strlen(line)-1;
                  while (isspace(line[i])) i--;
                  if (line[i] == '=')
                    { line[i+1] = ' ';
                      line[i+2] = '\0';
                      ptr=fgets(inptline,MAXRECORDLENGTH,ifp);
                      (*linecount)++;
                      line = strcat(line,inptline);
                    }

                  done = TRUE;
                }
            }
        }
      else
          return 0;
    }

  /* Before returning, check to see if there is one or two double quotes 
     in this line.  If there are two, return normally; but if there is
     only one, read in lines until a matching double-quote is found.
     These lines are discarded.
  */

  if ((ptr=strpbrk(line,"\"")) != NULL)

    { /* Found one quote.  Look for a second: */

      ptr++;

      while (strpbrk(ptr,"\"") == NULL)
        { if ((ptr=fgets(inptline,MAXRECORDLENGTH,ifp)) == NULL) 
            { return 0; }
          else
            { (*linecount)++; }
        }
    }
     
  return 1;
}

/*+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++*/

int breakline(char *line, char *keyword, char *value, int linenum)

/* Routine to break a label line up into keyword and value.  If no "=" is
   found a status of 0 is returned; successful status is 1. The keyword is 
   forced to upper case; the value is checked for underscores within double
   quotes.  Blanks are trimmed from the end of the value and keyword.

   09 Nov 1994, acr: discard comments on end of line
   08 Mar 2000, acr: Move check for underscores inside quotes to here; fix
                     null-value part handling.
   06 Jan 2003, acr: Removed blank-trimming from value part, to preserve
                     significant blanks in character strings. 

*/

{ int    i,j,k;
  int    length;
  char  *ptr;      /* substring pointer */
  int    status;   /* return status value */
  int    inquotes; /* TRUE if value is in quotes */

  status   = 1;
  inquotes = FALSE;

  /* transfer letters until a blank or "=" is found: */

  length = strlen(line);
  i = 0;
  while (i < length  &&  line[i] != ' '  &&  line[i] != '=')
    { keyword[i] = toupper(line[i]);
      ++i;
    }

  /* delete blanks on the end, if any: */

  while (isspace(keyword[--i]));
  keyword[i+1] = '\0';   /* add the string terminator */

  /* If there is no value field, we're done. Pass back an empty string
     and the "no value" flag:
  */

  if (!(strchr(line,'=')))
    { value[0] = '\0';
      return 0;
    }


  /* Now, find the '=' and pass it and following blanks: */

  while (line[i] != '=') ++i;
  ++i;
  while (line[i] == ' ') ++i;

/*===========================================================================
   This check is being disabled until PDS DEs can sort out what the heck to
   do about this.
*/
  /* Check the value field for both underscores and double quotes - this
     is an error according to the way the PDS verifiers work.
  */
/*
  if (strpbrk(line+i,"_") && strpbrk(line+i,"\"")) 
    { if (strstr(keyword,"DESC") == NULL  &&  strstr(keyword,"NOTE") == NULL
          &&  strstr(keyword,"FILE_NAME") == NULL  &&  keyword[0] != '^')
        { tverr(QUOTED_UNDERSCORES,linenum); }
    }
*/
/*=========================================================================*/

  /* Copy characters over for the value string.  The value will be enclosed
     by one of four characters: double quotes, parentheses, braces or blanks.

     NOTE: For the moment we are ignoring the possibility of nested braces
     and parentheses.  This is extrememly rare to begin with and doesn't
     occur in SBN data sets to date - but BEWARE!
  */

  j=0;
  if (line[i] == '"')
    { ++i;
      while (i<length  &&  line[i] != '"')
        { value[j++] = line[i];
          i++;
        }
    }
  else if (line[i] == '(')
    { while (i<length  &&  line[i] != ')')
        { value[j++] = line[i];
          i++;
        }
    }
  else if (line[i] == '{')
    { while (i<length  &&  line[i] != '}')
        { value[j++] = line[i];
          i++;
        }
    }
  else
    { while (!isspace(line[i]))
        { value[j++] = line[i]; 
          i++;
        }
    }

  /* Blank space is left intact here, so we delete any line delimiters and
     add the string terminator:
  */

  while (value[j-1] == '\r'  ||  value[j-1] == '\n')
    { j--; }
  value[j]='\0';


  /* Done */

  return status;
}

/*+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++*/

char *copy_of (char *value)

  /* Routine to return a pointer to a new copy of the input string           */

{ char *p;    /* New space */

  p = (char *)malloc(strlen(value)+1);
  strcpy(p,value);

  return p;
}

/*+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++*/

int skip_object(char *objlabel, int *linecount)

  /* Routine to skip over the definition of the current object. This routine
     is called when the current input line is the "OBJECT=" line at the start
     of the definition. The file pointers are global variables. On return,
     the current line should contain the "END_OBJECT" statement.

     Parameters:
        objlabel          PDS object being read
        linecount         Position in label file
  */

{ int   not_done;
  int   object_level;
  char  keyword[80];
  char  value[80];
  char  line[MAXRECORDLENGTH];
  int   value_found;
  int   READY = 0;             /* Normal return status */

  /* We read through the lines, keeping track of subobject definitions, 
     looking for the matching "END_OBJECT" line. We check for unexpected
     end of file or end of label, and signal appropriately when those
     error conditions arise.
  */

  object_level = 1;
  while (object_level > 0)
    { if (!getlblline(line,label,linecount))
        { tverr(UNEXP_EOF,linecount,"label");
          return ENDOFFILE;
        }
      value_found = breakline(line,keyword,value,*linecount);
  
      if (strcmp(keyword,"END") == 0)
        { tverr(UNEXP_END,linecount);
          return ENDOFLABEL;
        }

      /* Check for beginning or end of object and adjust object_level: */

      if (strcmp(keyword,"END_OBJECT")==0)
        { object_level--; }

      else if (strcmp(keyword,"OBJECT")==0)
        { object_level++; }
    }

  /* If the END_OBJECT included a label, it should match the input 
     label:
  */

  if (value_found  &&  strcmp(value,objlabel) != 0)
    { tverr(OBJLBL_MISMATCH,linecount,value,objlabel); }

  /* Done. */

  return READY;

}


/*+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++*/

int next_object(char *objlabel, int *linecount)

  /* This routine reads past comments, keywords and blank lines to the next
     "OBJECT =" statement. It stops at end of label (an "END" statement),
     end of file, or END_OBJECT and returns an appropriate code in those cases.
     the label (i.e., the value part) of the OBJECT statement is returned via
     the 'objlabel' parameter.
  */
{ char    keyword[80];
  char    line[MAXRECORDLENGTH];
  int     READY = 0;              /* Normal return status */

  /* This loop repeats until we encounter one of the terminal conditions: */

  while (TRUE)
    { if (!getlblline(line,label,linecount))
        { tverr(UNEXP_EOF,linecount,"label");
          return ENDOFFILE;
        }
      breakline(line,keyword,objlabel,*linecount);
  

      /* Look for an "OBJECT =" line: */

      if (strcmp(keyword,"OBJECT") == 0)
        { return READY; }

      /* Failing that, check for end of label: */

      else if (strcmp(keyword,"END") == 0)
        { return ENDOFLABEL; }

      /* Check for END_OBJECT: */

      else if (strcmp(keyword,"END_OBJECT") == 0)
        { return ENDOFOBJECT; }

      /* Continue until we're kicked out of the loop: */
    }
}


/*+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++*/

void print_structure (table *top)

  /* Routine to display the table structures read in. */

{ table     *tab;
  field     *fld;
  column    *col;
  container *cnt;

  int    indent;
  int    tabcount;


  /* Loop through tables: */

  tabcount = 0;
  tab = top;
  while (tab)
    { tabcount++;
      printf ("Table %d",tabcount);
      if (tab->label) printf (" [%s]",tab->label);
      printf (":\n\n");

      printf ("Data File: %s\n",tab->datafile);
      printf ("   Offset: %d\n",tab->offset);
      if (tab->ascii)
          printf ("   Format: ASCII\n");
      else
          printf ("   Format: Binary\n");
      printf ("     Rows: %4d\n",tab->rows);
      printf (" Row Byes: %4d\n",tab->row_bytes);
      printf ("  Columns: %4d\n",tab->columns);

      /* Loop through fields (stop if none): */

      printf ("\n    ........Fields...........\n\n");
      fld = tab->fldlist;
      indent = 4;
      if (fld == NULL)
        { printf ("    *** NO FIELDS FOUND ***\n\n"); 
          return;
        }

      while (fld)
        { if (fld->type == COLUMN)
            { col = fld->ptr.col;
              printcol(col,indent);
            }
          else
            { cnt = fld->ptr.cnt;
              printcnt(cnt,indent);
            }
          fld = fld->next;
        }


      /* Next table: */

      printf ("\n\n");
      tab = tab->next;
    }

  /* Done. */

  return;
}

/*---------------------------------------------------------------------------*/

void printcnt (container *cnt, int sp)

{ field      *fld;
  column     *cl;
  container  *cn;
  int         count;

  printf ("\n");
  printf ("%*sCOLUMN %s [CONTAINER]:\n",sp," ",cnt->colnum);
  printf ("%*s  %-15s %s\n",sp," ","Name",cnt->name);
  printf ("%*s  %-15s %d\n",sp," ","Start Bytes",cnt->start_byte);
  printf ("%*s  %-15s %d\n",sp," ","Byte",cnt->bytes);
  printf ("%*s  %-15s %d\n",sp," ","Repetitions",cnt->repetitions);

  /* Loop through subobjects: */

  printf ("\n%*s    .......Fields.......\n\n",sp," ");

  fld = cnt->fldlist;
  if (fld == NULL)
    { printf ("\n%*s    *** No columns found ***\n\n",sp," "); 
      return;
    }

  while (fld)
    { if (fld->type == COLUMN)
        { cl = fld->ptr.col;
          printcol(cl,sp+4);
        }
      else
        { cn = fld->ptr.cnt;
          printcnt(cn,sp+4);
        }
      fld = fld->next;
    }


  /* Done. */

  printf ("\n\n");
  return;
}

/*---------------------------------------------------------------------------*/

void printcol (column *col, int sp)

{ printf ("\n");
/**************
  printf ("%*s COLUMN %2d_____________\n",sp," ",colcount);
  printf ("%*s          Name: %s\n",sp," ",col->name);
  printf ("%*s    Start Byte: %d\n",sp," ",col->start_byte);
  printf ("%*s         Bytes: %d\n",sp," ",col->bytes);
  printf ("%*s     Data Type: %s\n",sp," ",field_type[col->type]);
  printf ("%*s         Items: %d\n",sp," ",col->items);
***************/

  printf ("%*sCOLUMN %s:\n",sp," ",col->colnum);
  printf ("%*s  %-15s %s\n",sp," ","Name",col->name);
  printf ("%*s  %-15s %d\n",sp," ","Start Byte",col->start_byte);
  printf ("%*s  %-15s %d\n",sp," ","Bytes",col->bytes);
  printf ("%*s  %-15s %s\n",sp," ","Data Type",field_type[col->type]);
  printf ("%*s  %-15s %d\n",sp," ","Items",col->items);

  return;
}

/*---------------------------------------------------------------------------*/


void printerror(int type, int field_error, int error_count[], int record,
                column *col, FILE *report)

/* Routine to track and display error messages.

   Parameters:
     type          error type    
     field_error   TRUE if the error applies only to the field
     record        record number 
     column        column structure 
     report        output file   

   
*/

{ /* Increment the error count: */

  error_count[type]++;

  /* If this does not exceed the maximum count, then display the message: */

  if (error_count[type] <= MAXERRORS)
    { if (field_error)
          tverr(type,record,col->colnum,col->name);
        else
          tverr(type,record);
    }

  if (error_count[type] == MAXERRORS)
    { tverr(MAX_ERRORS,record,type,error_count[type]); }

  return;
}

/*+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++*/
 
int count_column_types(field *flist, int *numeric, int *character,
                       int *date, int *time, int *spare, int *unknown)

  /* Checks each field and subfield and returns total count of defined fields
     of each type for the field list beginning at "fld".  Returns total number
     of fields (and subfields).
  */

{ field     *fld;
  column    *col;
  container *cnt;
  int        typecount[8];    /* This is a shortcut. There are currently 8
                                 types defined (in 'pdstv.h') by numbers
                                 which are convenient to use as subscripts.
                              */
  int        i;
  int        tot,num,chr,dte,tme,spr,unk;
  int        total;          /* Return value */

  for (i=0; i<7; i++) typecount[i] = 0; 
  total = 0;

  fld = flist;

  while (fld)
    { if (fld->type == COLUMN)
        { col = fld->ptr.col;
          typecount[col->type]++;
        }

      else if (fld->type == CONTAINER)
        { cnt = fld->ptr.cnt;
          count_column_types(cnt->fldlist,&num,&chr,&dte,&tme,&spr,&unk);
          typecount[REAL]         += num;
          typecount[CHARACTER]    += chr;
          typecount[DATE]         += dte;
          typecount[TIME]         += tme;
          typecount[UNRECOGNIZED] += unk;
          typecount[SPARE]        += spr;
        }
      fld = fld->next;
    }

  /* Now add together the numeric field types and generate the overall total: */

  *numeric   = typecount[UNSIGNED_INTEGER] + typecount[SIGNED_INTEGER] +
               typecount[REAL];
  *character = typecount[CHARACTER];
  *date      = typecount[DATE];
  *time      = typecount[TIME];
  *spare     = typecount[SPARE];
  *unknown   = typecount[UNRECOGNIZED];
  total      = *numeric + *character + *date + *time + *spare + *unknown;

  return total;
}

/*+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++*/

void writenumsum(field *fldlist_top, FILE *report)

  /* Routine to write out the summary statistics for numeric fields. */

{ int     i,j,k;            /* loop/subscript */
  int     item;             /* counter        */
  int     useFmax,useFmin;  /* printing format flags */
  double  max,min;          /* holding places */
  int     maxd,mind;        /* number of decimal places in output format */
  double  absmax,absmin;    /* absolute values of max and min */
  int     type;             /* field type */
  int     spare;            /* True for SPARE byte fields */
  int     totalmissing;     /* total MISSING, UNKNOWN and NULL fields */
  int     need_note;        /* TRUE if a note is needed for MISSING count */

  field  *fld;
  column *col;

  /* Now we loop through the fields, calling a this routine recursively to
     deal with CONTAINERs:
  */

  need_note = 0;
  fld = fldlist_top;
  while (fld)
    { if (fld->type == CONTAINER)
        { writenumsum(fld->ptr.cnt->fldlist,report); }

      else if (fld->type == COLUMN  &&  NUMERIC_FIELD(fld->ptr.col->type))
        { col = fld->ptr.col;
          totalmissing = 0;

          /* This mess is just to accommodate some pretty printing: */

          type = col->type;
          max  = col->maxfound.dbl;
          min  = col->minfound.dbl;
          absmax = (max > 0)? max : -max;
          absmin = (min > 0)? min : -min;
          useFmax = (1.e-4 < absmax  &&  absmax < 1.e7) || (absmax == 0.0);
          useFmin = (1.e-4 < absmin  &&  absmin < 1.e7) || (absmin == 0.0);
          if (useFmax) maxd = findprec(max);
          if (useFmin) mind = findprec(min);

          /* Max/min is collected across all items, so we have only a single
             value to report. We check to make sure than somewhere along the
             line we encountered at least one valid value:
          */

          fprintf (report,"%3.3s  %-15.15s ", col->colnum,col->name);
          fprintf (report," %3d ",col->items);

          if (col->maxfound.dbl == -MAXDOUBLE)
            { fprintf (report,"    -    ");  /* Null minimum */ 
              fprintf (report,"    -    ");  /* Null maximum */
            }
          else
            { if (type == REAL  &&  useFmin)
                { fprintf (report,"%9.*f ",mind,min); }
              else if (type == REAL)
                { fprintf (report,"%9.2e ",min); }
              else if (type == SIGNED_INTEGER  ||  type == UNSIGNED_INTEGER)
                { fprintf (report,"%9d ",(int)min); }
              else
                { fprintf (report,"   n/a   "); }

              if (type == REAL  &&  useFmax)
                { fprintf (report,"%9.*f ",maxd,max); }
              else if (type == REAL)
                { fprintf (report,"%9.2e ",max); }
              else if (type == SIGNED_INTEGER  ||  type == UNSIGNED_INTEGER)
                { fprintf (report,"%9d ",(int)max); }
              else
                { fprintf (report,"   n/a   "); }
            }

          if (col->invflag)
              fprintf (report," %5d  ",col->invalidcount);
          else
              fprintf (report,"    -   ");

          if (col->missingcount)
            { totalmissing = col->missingcount; }
          if (col->unknowncount)
            { totalmissing += col->unknowncount; 
              need_note += 1;
            }
          if (col->nullcount)
            { totalmissing += col->nullcount;
              need_note += 2;
            } 
          if (totalmissing)
            { if (need_note)
                { fprintf (report," %5d* ",totalmissing); }
              else
                { fprintf (report," %5d  ",totalmissing); }
            }
          else
            { fprintf (report,"    -   "); }

          if (col->naflag)
              fprintf (report," %5d  ",col->nacount);
          else
              fprintf (report,"    -   ");

          fprintf (report,"%5d\n", col->badcount);

        } /* else (numeric column) */

      /* Non-numeric and illegal objects are ignored: */

      fld = fld->next;
    }

  /* That finishes the table. If we need a footnote, we add it: */

  if (need_note == 3)
    { fprintf (report,"\n* Note: MISSING field count also includes ");
      fprintf (report,"UNKNOWN and NULL fields.\n");
    }
  else if (need_note == 2)
    { fprintf (report,"\n* Note: MISSING field count also includes ");
      fprintf (report,"NULL fields.\n");
    }
  else if (need_note == 1)
    { fprintf (report,"\n* Note: MISSING field count also includes ");
      fprintf (report,"UNKNOWN fields.\n");
    }

  /* Done. */

  return;
}

/*++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++*/

void writecharsum(field *fldlist_top, FILE *report)

  /* Routine to write out the summary statistics for non-numeric fields. */

{ field     *fld;
  column    *col;
  int        item;           /* counter */
  int        type;           /* field type */
  int        totalmissing;   /* total MISSING, UNKNOWN and NULL fields */
  int        need_note;      /* TRUE if a note is needed for MISSING count */

  /* Loop through the fields in the list, looking for non-numeric types: */

  need_note = 0;
  fld = fldlist_top;
  while (fld)
    { if (fld->type == CONTAINER)
        { writecharsum(fld->ptr.cnt->fldlist,report); }

      else if ( fld->type == COLUMN  && 
                fld->ptr.col->type != SPARE &&
                !NUMERIC_FIELD(fld->ptr.col->type))
        { col = fld->ptr.col;
          type = col->type;
          totalmissing = 0;

          fprintf (report,"%3.3s  %-15.15s ", col->colnum,col->name); 
          fprintf (report," %3d ",col->items);

          fprintf (report,"   %3d    %3d    ",
                          col->minfound.bytes,col->maxfound.bytes); 

          if (col->invflag)
              fprintf (report," %5d  ",col->invalidcount);
          else
              fprintf (report,"    -   ");

          if (col->missingcount)
            { totalmissing += col->missingcount; }
          if (col->unknowncount)
            { totalmissing += col->unknowncount; 
              need_note += 1;
            }
          if (col->nullcount)
            { totalmissing += col->nullcount;
              need_note += 2;
            } 
          if (totalmissing)
            { if (need_note)
                { fprintf (report," %5d* ",totalmissing); }
              else
                { fprintf (report," %5d  ",totalmissing); }
            }
          else
            { fprintf (report,"    -   "); }

          if (col->naflag)
              fprintf (report," %5d  ",col->nacount);
          else
              fprintf (report,"    -   ");

          fprintf (report,"%5d\n", col->badcount);

          /* Maximum and minimum values are printed on separate lines, 
             unless no valid values were encountered:
          */

          if (col->maxstr)
            { fprintf (report,"       Minimum: '%s'\n",col->minstr);
              fprintf (report,"       Maximum: '%s'\n",col->maxstr);
            }
 
        } /* else (non-numeric column) */

      /* Everything else is ignored: */

      fld = fld->next;
    }

  /* That finishes the table. If we need a footnote, we add it: */

  if (need_note == 3)
    { fprintf (report,"\n* Note: MISSING field count also includes ");
      fprintf (report,"UNKNOWN and NULL fields.\n");
    }
  else if (need_note == 2)
    { fprintf (report,"\n* Note: MISSING field count also includes ");
      fprintf (report,"NULL fields.\n");
    }
  else if (need_note == 1)
    { fprintf (report,"\n* Note: MISSING field count also includes ");
      fprintf (report,"UNKNOWN fields.\n");
    }

  /* Done. */

  return;
}


/*++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++*/

int findprec(double value)

/* Routine to return the number of places of precision after the decimal
   for use in an F output format.
*/

{ double val;    /* holding place for absolute value */

  val = (value > 0.0)? value : -value;

  if      (val >= 100000.)
      return 0; 
  else if (val >= 10000.0)
      return 1;
  else if (val >= 1000.00)
      return 2;
  else if (val >= 100.000)
      return 3;
  else if (val >= 10.0000)
      return 4;
  else if (val >= 1.00000)
      return 5;
  else 
      return 6;
}

/*++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++*/

void checkcharfld(char   *val,  int bytes,         int record_count,
                  column *col,  int error_count[], FILE *report)

  /* Routine to read and check a character field against the label attributes.

     Parameters:
        val           value buffer
        bytes         length of value
        record_count  data record count
        col           column structure
        error_count   error count accumulator array
        report        output file
  */

{ char    *fmt;          /* format string buffer */
  int      bc;           /* FORMAT bytes count */
  int      blank;        /* 1 if the field is blank */
  int      i,not_done;   /* loop/subscript */
  int      FIELD_ERROR = TRUE;
  int      bad_value;    /* error tracking flag */
  char    *trimval;      /* Blank-trimmed copy of the input value */


  /* If this is a string field we'll check for blanks, *_CONSTANTs,
     and the extrema.  Note that blank fields are treated as regular
     string fields unless there is a specific request from the 
     user (by command line option) to flag blank fields.
  */

  if (NUMERIC_FIELD(col->type)) return;  /* A number */


  blank = (strspn(val," ")==bytes);   /* check for blank field         */
  if (flag_blank_fields  &&  blank)    /* flag blank field if requested */
    { printerror(BLANK_FIELD,FIELD_ERROR,error_count,record_count,col,
                 report);
      return;
    }

  /* From now on we'll work with a blank-trimmed copy of the input string: */

  trimval = trim_copyof(val);

  /* Check for any one of the various *_CONSTANT values. If found, we
     return, as there's nothing else to check:
  */

  if (col->invflag  &&  (strcmp(col->invalid.str,trimval) == 0))
    { col->invalidcount++;
      free(trimval);
      return;
    }

  if (col->missflag  &&  (strcmp(col->missing.str,trimval) == 0))
    { col->missingcount++;
      free(trimval);
      return;
    }

  if (col->naflag  &&  (strcmp(col->not_applicable.str,trimval) == 0))
    { col->nacount++;
      free(trimval);
      return;
    }

  if (col->unkflag  &&  (strcmp(col->unknown.str,trimval) == 0))
    { col->unknowncount++;
      free(trimval);
      return;
    }

  if (col->nullflag  &&  (strcmp(col->null.str,trimval) == 0))
    { col->nullcount++;
      free(trimval);
      return;
    }

  /* If there were bounds in the label, make sure we haven't exceeded them: */

  bad_value = FALSE;

  if (col->mmflag%2)   /* Max present */
    { if (strcmp(col->max.str,trimval)<0)
        { printerror(GREATER_THAN_MAX,FIELD_ERROR,error_count,
                     record_count,col,report);
          bad_value = TRUE;
        }
    }
 
  if (col->mmflag > 1) /* Min present */
    { if (strcmp(trimval,col->min.str)<0)
        { printerror(LESS_THAN_MIN,FIELD_ERROR,error_count,
                     record_count,col,report);
          bad_value = TRUE;
        }
    }

  if (col->vmflag%2)
    { if (strcmp(col->vmax.str,trimval)<0)
        { printerror(GREATER_THAN_VMAX,FIELD_ERROR,error_count,
                     record_count,col,report);
          bad_value = TRUE;
        }
    }

  if (col->vmflag>1)
    { if (strcmp(col->vmax.str,trimval)<0)
        { printerror(LESS_THAN_MIN,FIELD_ERROR,error_count,
                     record_count,col,report);
          bad_value = TRUE;
        }
    }

  /* Done with the trimmed copy: */

  free(trimval);

  /* And finally, loop through the characters in the field to make sure they
     include only printing characters:
  */

  i = 0;
  not_done = TRUE;
  while (i<col->bytes  && not_done)
    { if (!isprint(val[i]))
        { printerror(NONPRINT_CHAR,FIELD_ERROR,error_count,
                     record_count,col,report);
          bad_value = TRUE;
          not_done  = FALSE;
        }
      i++;
    }
          
  if (bad_value) col->badcount++;


  /* Now set extrema and we're done: */

  set_string_maxmin(val,col);
  
  return;
}

/*++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++*/

void checkdatefld(char   *val, int bytes,         int   record_count,
                  column *col, int error_count[], FILE *report)

  /* Routine to read and check a DATE field.

     Parameters
        val             string vlaue to be checked
        bytes           length of value
        record_count    data record count
        col             column structure
        error_count     error count accumulator array
        report          output file
  */

{ int      blank;        /* 1 if the field is blank */
  int      bad_value;    /* flag indicating a format problem was found */
  int      FIELD_ERROR = TRUE;
  char     date[50];      /* holds blank-trimmed value */  
  int      datelen;       /* length of fld */  
  int      i,j,k;

  /* Make sure this is a date field:  */

  if (col->type != DATE) return;          /* Not a DATE */

  /* Check for a blank field.  This is only an error if we've not been 
     directed to ignore blank fields:
  */

  blank = (strspn(val," ")==bytes);  /* check for blank numeric field */
  if (flag_blank_fields  &&  blank)    /* flag blank field if requested */
    { printerror(BLANK_FIELD,FIELD_ERROR,error_count,record_count,col,report);
      return;
    }
  if (blank) return;                   /* blank fields are otherwise ignored */

  /* Check to see if we've got a *_CONSTANT flag value rather than a date.
     If so, we return immediately as no further checking is required:
  */

  if (col->invflag  &&  trimcmp(val,col->invalid.str)==0)
    { col->invalidcount++;
      return;
    }

  if (col->missflag  &&  trimcmp(val,col->missing.str)==0)
    { col->missingcount++;
      return;
    }

  if (col->naflag  &&  trimcmp(val,col->not_applicable.str)==0)
    { col->nacount++;
      return;
    }

  if (col->unkflag  &&  trimcmp(val,col->unknown.str)==0)
    { col->unknowncount++;
      return;
    }

  if (col->nullflag  &&  trimcmp(val,col->null.str)==0)
    { col->nullcount++;
      return;
    }

  /* Get a blank-trimmed version of the value and its length: */

  strcpy(date,val);
  datelen = strlen(date);
  while (date[datelen-1] == ' '  &&  datelen > 0)
    { datelen--; }
  date[datelen] = '\0';
  

  /* If the field is not the size of a valid DATE format, we don't try 
     verifying it, but we will signal the error, set extrema and return:
  */

  if (datelen < 4  ||  datelen > MAXDATEWIDTH)
    { printerror(INVALID_DATE,FIELD_ERROR,error_count,record_count,
                 col,report);
      col->badcount++;
      set_string_maxmin(val,col);
      return;
    }

  /* DATE fields MUST contain only date information in the format YYYY-MM-DD
     or YYYY-DDD (i.e., the ISO standard DATE formats).  Anything else should
     be described either as individual integers or a string.  All digits must
     be present (zero-padded if necessary) and the century may not be negative.
     We accomplish this checking through straight brute-force testing of each 
     character:
  */

  bad_value = 0;
  bad_value += (!isdigit(date[0]));
  bad_value += (!isdigit(date[1]));
  bad_value += (!isdigit(date[2]));
  bad_value += (!isdigit(date[3]));

  /* We know we had at least a year, but all bets are off after that, so
     we'll need to check for the presence of each field before checking its
     contents.  If we haven't got just a naked year, then we must have at
     least three characters following it: a separator and two digits.
  */

  if (datelen > 4  &&  datelen < 7)
    { bad_value++; }
  else if (datelen >= 7)
    { bad_value += (date[4] != '-');
      bad_value += (!isdigit(date[5]));
      bad_value += (!isdigit(date[6]));
    }

  /* [Note it's also OK to stop at month (col->fwidth = 7)] */

  /* Check for YYYY-DDD format: */

  if (datelen == 8)
    { bad_value += (!isdigit(date[7])); }

  else if (datelen == 9)
    { bad_value++; }


  /* We've got YYYY-MM-DD, so check the DD field: */

  else if (datelen == 10)
    { bad_value += (date[7] != '-');
      bad_value += (!isdigit(date[8]));
      bad_value += (!isdigit(date[9]));
    }

  if (bad_value)
    { printerror(INVALID_DATE,FIELD_ERROR,error_count,record_count,
                 col,report); 
    }

  /* If there were bounds in the label, make sure we haven't exceeded them: */

  if (col->mmflag%2  &&  !bad_value)   /* MAXIMUM present */
    { if (strcmp(col->max.str,val)<0)
        { printerror(GREATER_THAN_MAX,FIELD_ERROR,error_count,record_count,
                     col,report);
          bad_value = TRUE;
        }
    }
 
  if (col->mmflag > 1  &&  !bad_value) /* MINIMUM present */
    { if (strcmp(val,col->min.str)<0)
        { printerror(LESS_THAN_MIN,FIELD_ERROR,error_count,record_count,
                     col,report);
          bad_value = TRUE;
        }
    }

  if (col->vmflag%2  &&  !bad_value)   /* VALID_MAXIMUM */
    { if (strcmp(col->vmax.str,val)<0)
        { printerror(GREATER_THAN_VMAX,FIELD_ERROR,error_count,record_count,
                     col,report);
          bad_value = TRUE;
        }
    }
 
  if (col->vmflag > 1  &&  !bad_value) /* VALID_MINIMUM */
    { if (strcmp(val,col->vmin.str)<0)
        { printerror(LESS_THAN_VMIN,FIELD_ERROR,error_count,record_count,
                     col,report);
          bad_value = TRUE;
        }
    }

  /* Update the bad value counter as needed: */

  if (bad_value) col->badcount++;


  /* Check the extrema and we're done: */

  set_string_maxmin(val,col);

  return;
}


/*++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++*/

void checktimefld(char   *val, int bytes,         int   record_count,
                  column *col, int error_count[], FILE *report)

  /* Routine to read and check a TIME field.

     Parameters
        val             string vlaue to be checked
        bytes           length of value
        record_count    data record count
        col             column structure
        error_count     error_count accumulator array
        report          output file

     Note that TIME fields may be truncated on the right as far as 
     needed.  The minimum size is technically a bare year, but if a field
     is defined as being of type TIME, we expect at least hours of 
     accuracy.
  */

{ int      blank;        /* 1 if the field is blank */
  int      bad_value;    /* flag indicating a format problem was found */
  int      last;         /* last character marker for verification */
  int      FIELD_ERROR = TRUE;
  int      doy;          /* TRUE if date format is YYYY-DDD */
  int      ts;           /* Subscript of start of time (HH:MM:SS) field */
  char     tmp[50];      /* Holding place for time part. */
  int      len;          /* string length */
  char     time[100];    /* Blank-trimmed value */
  int      timelen;      /* length of 'time' */
  int      i;

  /* Make sure this is a time field:  */

  if (col->type != TIME) return;          /* Not a TIME */

  /* Check for a blank field.  This is only an error if we've not been 
     directed to ignore blank fields:
  */

  blank = (strspn(val," ")==bytes);   /* check for blank numeric field */
  if (flag_blank_fields  &&  blank)    /* flag blank field if requested */
    { printerror(BLANK_FIELD,FIELD_ERROR,error_count,record_count,col,report);
      return;
    }
  if (blank) return;                   /* blank fields are otherwise ignored */

  /* Check to see if we've got a MISSING or INVALID flag value rather than
     a time. If so, we return immediately as no further checking is required:
  */

  if (col->invflag  &&  trimcmp(val,col->invalid.str)==0)
    { col->invalidcount++;
      return;
    }

  if (col->missflag  &&  trimcmp(val,col->missing.str)==0)
    { col->missingcount++;
      return;
    }

  /* Get a blank-trimmed version of the value and its length: */

  strcpy(time,val);
  timelen = strlen(time);
  while (time[timelen-1] == ' '  &&  timelen > 0)
    { timelen--; }
  time[timelen] = '\0';

  /* If the field size does not fall in the valid range for TIME formats, we
     won't try verifying it - we'll just check the extrema and exit.  We are
     making an assumption of logical labelling, here - specifically, that if
     the DATA_TYPE is TIME, there there are at least hours given.  A time
     field without at least hour accuracy should have had a type of DATE.
  */

  if (timelen < 4  ||  timelen > MAXTIMEWIDTH)
    { printerror(INVALID_TIME,FIELD_ERROR,error_count,record_count,
                 col,report);
      col->badcount++;
      set_string_maxmin(val,col);
      return;
    }

  /* TIME fields contain both date and time information, but they can be 
     truncated on the right as appropriate to the precision, which may mean
     in extreme cases that only a date, and possibly only a year, is present.
  */

  /* Date information must be in the same format as for DATE-only fields 
    (i.e., YYYY-MM-DD or YYYY-DDD), so we'll check that first using the 
    same method as in the "checkdatefld" subroutine.
  */

  bad_value = 0;
  bad_value += (!isdigit(time[0]));
  bad_value += (!isdigit(time[1]));
  bad_value += (!isdigit(time[2]));
  bad_value += (!isdigit(time[3]));


  if (timelen > 4  &&  timelen < 7)
    { bad_value++; }
  else if (timelen >= 7)
    { bad_value += (time[4] != '-');
      bad_value += (!isdigit(time[5]));
      bad_value += (!isdigit(time[6]));
    }

  /* In this case, if there's another character we need to determine if
     it's a '-', indicating YYYY-MM-DD format, or a digit, indicating
     YYyY-DDD format.  Then we'll need to save that information for 
     locating the time substring later:
  */

  /* Two possible options here, DOY or MM-DD format.  So we look for a 
     separator to distinguish:
  */

  if (timelen >= 8  &&  time[7] == '-')    /* MM-DD */
    { bad_value += (!isdigit(time[8]));
      bad_value += (!isdigit(time[9]));

      ts = 10;
    }
  else if (timelen >= 8)
    { bad_value += (!isdigit(time[5]));
      bad_value += (!isdigit(time[6]));
      bad_value += (!isdigit(time[7]));

      ts =  8;
    }

  /* If there is no time part, we're done, so check first: */

  if (ts < timelen)

    { /* The next character after the date part has to be 'T': */

      bad_value += (time[ts] != 'T');
      ts++;

      /* We'll copy the time part into a separate string to make the checking a
         little easier:
      */

      if (ts != strlen(time))
        { strcpy(tmp,time+ts);
          len = strlen(tmp);

          /* Now we'll check in segments, being careful not to look past
             the end of the string:
          */

          if (len < 2)
            { bad_value++; }
          else
            { bad_value += (!isdigit(tmp[0]));
              bad_value += (!isdigit(tmp[1]));
            }

          if (len > 2  &&  len < 5) 
            { bad_value++; }
          else if (len >= 5)
            { bad_value += (tmp[2] != ':');
              bad_value += (!isdigit(tmp[3]));
              bad_value += (!isdigit(tmp[4]));
            }

          if (len > 5  &&  len < 8) 
            { bad_value++; }
          else if (len >= 8)
            { bad_value += (tmp[5] != ':');
              bad_value += (!isdigit(tmp[6]));
              bad_value += (!isdigit(tmp[7]));
            }

          bad_value += (len > 8  &&  tmp[8] != '.');
          for (i=9; i<len; i++)
            { bad_value += (!isdigit(tmp[i])); }
        }

    }  /* end "if there is a time part" */

  /* Now check to see if any of the above tests failed, and signal as needed.
     Then check extrema:
  */

  if (bad_value)
    { printerror(INVALID_TIME,FIELD_ERROR,error_count,record_count,
                 col,report); }

  /* If there were bounds in the label, make sure we haven't exceeded them: */

  if (col->mmflag%2)   /* MAXIMUM */
    { if (strcmp(col->max.str,val)<0)
        { printerror(GREATER_THAN_MAX,FIELD_ERROR,error_count,record_count,
                     col,report);
          bad_value = TRUE;
        }
    }
 
  if (col->mmflag > 1) /* MINIMUM */
    { if (strcmp(val,col->min.str)<0)
        { printerror(LESS_THAN_MIN,FIELD_ERROR,error_count,record_count,
                     col,report);
          bad_value = TRUE;
        }
    }

  if (col->vmflag%2)   /* VALID_MAXIMUM */
    { if (strcmp(col->vmax.str,val)<0)
        { printerror(GREATER_THAN_VMAX,FIELD_ERROR,error_count,record_count,
                     col,report);
          bad_value = TRUE;
        }
    }
 
  if (col->vmflag > 1) /* VALID_MINIMUM */
    { if (strcmp(val,col->vmin.str)<0)
        { printerror(LESS_THAN_VMIN,FIELD_ERROR,error_count,record_count,
                     col,report);
          bad_value = TRUE;
        }
    }

  /* Update the bad value counter as needed: */

  if (bad_value) col->badcount++;


  /* Check the extrema and we're done: */

  set_string_maxmin(val,col);

  return;
}

/*++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++*/

void set_string_maxmin(char *value, column*col)

  /* Routine to check for and set extrema of a string field. 

     Parameters
        value       string to be compared
        col         column parameters
  */

{ int len;      /* string length */

  /* First, check for Maximum and minimum string values: */

  if (col->maxstr == NULL)
    { col->maxstr = (char *)malloc(strlen(value)+1);
      strcpy(col->maxstr,value);
    }
  else if (strcmp(col->maxstr,value)<0)
    { free(col->maxstr);
      col->maxstr = (char *)malloc(strlen(value)+1);
      strcpy(col->maxstr,value);
    }

  if (col->minstr == NULL)
    { col->minstr = (char *)malloc(strlen(value)+1);
      strcpy(col->minstr,value);
    }
  else if (strcmp(value,col->minstr)<0)
    { free(col->minstr);
      col->minstr = (char *)malloc(strlen(value)+1);
      strcpy(col->minstr,value);
    }

  /* Next, check for maximum and minimum string lengths: */

  len = strlen(value);
  while (len>0 && value[len-1]==' ')
    { len--; }

  col->maxfound.bytes = (col->maxfound.bytes < len)? len : col->maxfound.bytes;
  col->minfound.bytes = (col->minfound.bytes > len)? len : col->minfound.bytes;


  /* And we're done: */

  return;
}


/*+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++*/

void print_table_attributes (table *tab, FILE *report)

  /* Print the summary attributes for the given table and its fields. Returns
     the total number of numeric fields encountered.
  */

{ field     *fld;
  column    *col;


  /* Start by writing the general TABLE attributes: */

  fprintf (report," Table attributes\n");
  fprintf (report," ----------------\n");
  fprintf (report," Table object %d: %s\n",tab->tblnum,tab->label);
  fprintf (report,"       ROWS: %6d\n",tab->rows);
  fprintf (report,"    COLUMNS: %6d\n",tab->columns);
  fprintf (report,"  ROW_BYTES: %6d\n",tab->row_bytes);

  /* Now the column attributes. A recursive routine is called to deal with
     sub-columns of CONTAINERS.
  */

  fprintf (report,
      "\n Col     Name                            Start   Bytes   Items ");
  fprintf (report,"  Type\n");

  fld = tab->fldlist;
  while (fld)
    { if (fld->type == COLUMN)
        { col = fld->ptr.col;
          fprintf (report," %4.4s    %-30.30s  %5d   %4d    %4d    %-10s\n",
                          col->colnum,col->name,col->start_byte,
                          col->bytes,col->items,field_type[col->type]);
        }

      else if (fld->type == CONTAINER)
        { print_container_attributes(fld->ptr.cnt,report); }
      fld = fld->next;
    }

  /* Done: */

  return;
}

/*++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++*/

void print_container_attributes(container *cnt, FILE *report)

  /* Routine to print each field within a container, calling itself if 
     needed.  Returns the number of numeric fields encountered. */

{ field  *fld;
  column *col;

  fprintf (report, " %4.4s    %-30.30s  %5d   %4d   x%4d    %-10s\n",
                   cnt->colnum, cnt->name, cnt->start_byte,
                   cnt->bytes, cnt->repetitions,"CONTAINER");

  fld = cnt->fldlist;
  while (fld)
    { if (fld->type == COLUMN)
        { col = fld->ptr.col;
          fprintf (report," %6s  %-30.30s  %5d   %4d    %4d    %-10s\n",
                          col->colnum,col->name,col->start_byte,
                          col->bytes,col->items,field_type[col->type]);
        }
      else if (fld->type == CONTAINER)
        { print_container_attributes(fld->ptr.cnt,report); }
      fld = fld->next;
    }

  /* Done */

  return;
}

/*+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++*/

char *trim_copyof(char *s)

  /* Routine to return a copy of the input string 's' from which the leading
     and trailing blanks have been trimmed.
  */

{ char *copy;          /* Return pointer */
  int   first,last;    /* end points     */
  int   length;
  int   i,j;

  /* First, intercept blank and empty strings: */

  length = strlen(s);
  if (length < 1  ||  length == strspn(s," "))
    { copy = (char *)malloc(2);
      copy[0] = '\0';
      return copy;
    }

  /* For non-blank strings, find the end points in the original string: */

  first = 0;
  while (s[first] == ' ') first++;
  last = strlen(s) - 1;
  while (s[last] == ' ') last--;
  length = last - first + 1;

  /* Allocate space and copy the non-blank part: */

  copy = (char *)malloc(length+1);
  j = 0;
  for (i=first; i<=last; i++) copy[i-first] = s[i];
  copy[length] = '\0';

  /* Done: */

  return copy;
}


/*+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++*/

int trimcmp(char *s1, char *s2)

  /* Routine to compare blank-trimmed versions of the input strings,
     returning the 'strcmp' result.
  */

{ char *c1, *c2;    /* Copies of the input strings */
  int   result;     /* strcmp result */

  c1 = trim_copyof(s1);
  c2 = trim_copyof(s2);

  result = strcmp(c1,c2);

  free(c1);
  free(c2);

  return result;

}

/*+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++*/

int spare_column(column *col)

  /* Routine to check if a column is designated as SPARE.  Returns TRUE or
     FALSE accordingly.
  */

{ int    i;    /* string subscript */

  /* Spare columns are identified by the NAME value. If the NAME is "SPARE"
     or ends with "_SPARE" (or " SPARE"), we return a TRUE result.
  */

  if (strcmp(col->name,"SPARE") == 0)
    { return TRUE; }

  /* To check for "_SPARE", we'll use the brute-force method: */

  i = strlen(col->name);

  if ((col->name[i-6] == '_'  ||  col->name[i-6] == ' ')  &&
       col->name[i-5] == 'S'  &&
       col->name[i-4] == 'P'  &&
       col->name[i-3] == 'A'  &&
       col->name[i-2] == 'R'  &&
       col->name[i-1] == 'E')
    { return TRUE; }

  /* If we're here, this is not a SPARE column: */

  return FALSE;
}

/*+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++*/
/*+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++*/
/*+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++*/