#!/usr/bin/perl use Getopt::Std; # Routine to generate an index file from PDS labels by extracting keywords # from the labels as specified by the input. # # 15 Mar 1995, acr. # 21 Apr 1995, acr: shaking out the bugs # 17 May 1995, acr: made ".LBL" extension check case-insensitive # 19 Jun 1995, acr: fixed file name case bug in 'runlabels' # 13 Jul 1995, acr: REALLY made ".LBL" extension check case-insensitive; # fixed bug which kept negative widths from left-justifying # 13 Nov 1995, acr: fixed leftover bug with last keyword width being negative # 29 Apr 1996, acr: fixed bug that caused keywords inside objects to be ignored # 30 May 1996, acr: made %val array local to &process_label to fix bug in # which old values of keywords missing in subsequent labels # were repeated # 08 Aug 1996, acr: Added some comments, removed some debug flags # 04 Mar 1997, acr: Fixed processing of object numbering to restart numbering # in each label # 19 May 1997, acr: beefed-up label line processing so it wouldn't choke on # '=' not surrounded by space, among other things; tweaked # ^PATH handling a bit so that relative ("../" etc.) paths # and the trailing slash are clipped. # 02 Feb 1999, acr: Removed gratuitous blank written onto the end of the last # field in each line. # 06 Mar 2003, acr: Added FILE_SPECIFICATION_NAME handling # 01 Oct 2003, acr: Fixed bug that wouldn't allow negative widths (for left- # justifying fields) from the command line. # 06 Dec 2005, acr: Modified to handle namespace IDs (local data dictionary # support; modified default fields list to coincide with # current INDEX_TABLE requirements; added default value # handling; use the now-standard Getopts package # # Input options: # # -a Check all files for attached labels # -n Do not use default keywords # -r Descend recursively through subdirectories # -o Send output to # -f Read keywords from # -d Start at directory # -v Include "VOLUME_ID" in default list. # # # Define the list of valid special fields as an associative array for easy # searching: %SPECIAL = ('^PATH', 1, '^FILE', 1, '^LABEL', 1, '^OFFSET', 1, '^OBJECT', 1, '^FILE_SPECIFICATION_NAME', 1); # # Start by dealing with the command line arguments and collecting the various # flags, fields, and file names. Getopts collects the options and their # arguments, adjusting the ARGV array accordingly. # getopts('anro:f:d:v'); # # Formatting and keyword selection are taken care of through two arrays: # one associative, the other a straight array. The associative array is # indexed on keyword name (including path) and returns the number of bytes # for the output field width. The second contains a list of the keywords # to be indexed, in the order in which they appeared on the command line # and/or in the field list file (default fields come first). This is used # to output the fields in order. A third array is used to hold any literal # values provided for use when the indicated element is not present in a # label. # # # If the "-n" switch was used, then no default fields are defined. # Otherwise, start with the default field list: # if (!$opt_n) { if ($opt_v) # Include VOLUME_ID { $num_fields = 5; %size = ( '^FILE_SPECIFICATION_NAME', -50, 'PRODUCT_ID', -30, 'DATA_SET_ID', -40, 'VOLUME_ID', -11, 'PRODUCT_CREATION_TIME', 19 ); @name = ( '^FILE_SPECIFICATION_NAME', 'PRODUCT_ID', 'DATA_SET_ID', 'VOLUME_ID', 'PRODUCT_CREATION_TIME' ); @literal = ( "", "", "", "NULL_0001", ""); } else { $num_fields = 4; %size = ( '^FILE_SPECIFICATION_NAME', -50, 'PRODUCT_ID', -30, 'DATA_SET_ID', -40, 'PRODUCT_CREATION_TIME', 19 ); @name = ( '^FILE_SPECIFICATION_NAME', 'PRODUCT_ID', 'DATA_SET_ID', 'PRODUCT_CREATION_TIME' ); @literal = ( "", "", "", "" ); } } # # Look for an explicit output file and open it: # if ($opt_o) { open (OUTPUT,">$opt_o") || die "Could not open '$opt_o' for output.\n"; } # # Next, check to see if a file of field descriptions was included. If so, # read it: # if ($opt_f) { open (FIELDS,$opt_f) || die "Could not open '$opt_f'\n"; while ($line=) { chop $line; if ( $line =~/^\s*$/ || $line =~ /^\s*#.*$/ ) { next; } $line =~ s/#.*$//; # Delete trailing comments $line =~ s/\s//g; # Delete whitespace ($field,$width,$val) = split(/\//,$line); # Split into fields if ($width eq "") # Insert default width { $width = ($val eq "")? 10 : length($val); } if ($width !~ /^(-|)[0-9]+$/) # Not a number - assume lit. { $val = $width; $width = length($val); } $tmpsize[$num_fields] = $width; $name[$num_fields] = $field; $literal[$num_fields] = $val; $num_fields++; } close(FIELDS); } # # Finally, retrieve field names from the command line, if any: # for ($i=0; $i<=$#ARGV; $i++) # Get field descriptions { ($fname, $fwidth, $val) = split(/\//,$ARGV[$i]); if ($fwidth eq "") { $fwidth = ($val eq "")? 10 : length($val); } if ($fwidth !~ /^(-|)[0-9]+$/) { $val = $fwidth; $fwidth = length($val); } $tmpsize[$num_fields] = $fwidth; $name[$num_fields] = $fname; $literal[$num_fields] = $val; $num_fields++; } # # Format and do syntax-checking for all collected field names: # $num_fields = &checknames($num_fields,*name,*tmpsize); if ($num_fields < 1) { printf STDOUT "pdsidx: no valid fields entered.\n"; exit; } # # And store output field sizes in an associative array: # for ($i=0; $i<$num_fields; $i++) { if ($tmpsize[$i] != 0) # Special Fields may have no size in the array { $size{$name[$i]} = $tmpsize[$i]; } } ## *** print keyword list *** ##for ($i=0; $i<$num_fields; $i++) ## { printf "Keyword: %-50.50s Width: %2d\n", $name[$i], $size{$name[$i]}; ## } # # Determine the starting directory: # if ($opt_d) { if (-d $opt_d) # Make sure it's a directory { $start_dir = $opt_d; } else { printf STDOUT "pdsidx: Invalid root directory (%s).\n", $opt_d; exit; } } else { $start_dir = '.'; } # # Now call the recursive routine which will process this and all # subdirectories, if requested: # &runlabels($start_dir,$opt_r); #============================================================================= sub checknames { local ($n,*fldnm,*width) = @_; local ($name,$i,$j,$k,$count,@tmpname,@tmpsize); local ($elm,$ldd); # First, run through the names, do syntax-checking and put in # uniform format: foreach $name (@fldnm) { # Delete blanks: $name =~ s/ *//g; # Force to upper case: $name =~ tr/a-z/A-Z/; # First, check for special fields (beginning with '^'): if ($name =~ /^\^\w+/) { if (!$SPECIAL{$name}) # Is it on the Special Field list? { printf STDOUT "pdsidx: Invalid field name, \"%s\" (%s).\n", $name,"not a special field"; $name = ""; } next; # No more checks for special names } # If there's a namespace, split if off, validate it and save it: if ($name =~ /:/) { ($ldd,$elm) = split(/:/,$name); if ($ldd !~ /^[A-Z][A-Z_0-9]*$/) { printf STDOUT "pdsidx: Invalid namespace, \"%s\" (%s).\n", $ldd, "invalid character"; $name = ""; next; } } else { $elm = $name; undef $ldd; } # Make sure there are only valid characters in the element name, and # that it begins with a letter: if ($elm !~ /^[A-Z][A-Z_0-9\-\[\]\.]*[^\.]$/) { printf STDOUT "pdsidx: Invalid field name, \"%s\" (%s).\n", $name, "invalid character"; $name = ""; next; } # Insert "[1]" before unnumbered objects: $elm =~ s/([A-Z]+)\./\1\[1\]./g; $name = ($ldd)? "$ldd:$elm" : $elm; # Check for equal numbers of []: if (split(/\[/,$name) != split(/\]/,$name)) { printf STDOUT "pdsidx: Invalid field name, \"%s\" (%s).\n", $name, "unbalanced brackets"; $name = ""; next; } # If there are brackets, make sure there are only digits # between them: if ( ($name =~ /\[/) && ($name =~ /\[\d*[^0-9\]]\d*\]/) ) { printf STDOUT "pdsidx: Invalid field name, \"%s\" (%s).\n", $name, "invalid subscript"; $name = ""; next; } } # Now, step through the lists and remove null field names: $count = 0; for ($i=0; $i<$n; $i++) { if ($fldnm[$i] ne "") { $tmpname[$count] = $fldnm[$i]; $tmpsize[$count] = $width[$i]; $count++; } } @fldnm = @tmpname; @width = @tmpsize; $count; } #+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ sub runlabels # # Subroutine to run the loop through label files and subdirectories as needed # # Arguments: $_[0] = directory (unopened) # $_[1] = $opt_r (recursion flag) # { local ($dir=$_[0], $rflag=$_[1]); local (@label_file); # file names ending with ".lbl" local (@subdir); # subdirectory names local ($lf,$sd); # element counters for filename lists local ($file,$i); # loop/subscript variables local ($full_name); # full file name # Open the directory: opendir(DIR,$dir) || die "pdsidx: Could not open \"$dir\".\n"; # Get a sorted list of files in the directory, and parse it into lists # of label file names and subdirectories: $lf = $sd = 0; foreach $file (sort readdir(DIR)) { if ($file =~ /^\./) { next; } else { $file = $dir . '/' . $file; } $tfile = $file; $tfile =~ tr/a-z/A-Z/; if ($tfile =~ /\.LBL$/) { $label_file[$lf] = $file; $lf++; } elsif ($opt_a && is_label($tfile)) { $label_file[$lf] = $file; $lf++; } elsif (-d $file && $rflag) { $subdir[$sd] = $file; $sd++; } } # Now process the label files: foreach $file (@label_file) { open (LABEL,$file) || die "pdsidx: Could not open \"$file\".\n"; &process_label($file); } # Close the current directory and descend through subdirectories: close(DIR); foreach $file (@subdir) { &runlabels($file,$rflag); } } #++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ sub process_label # Routine to read through the label file and pull out the desired keywords. # # Arguments: $_[0] = the full name of the file (already opened on LABEL) # { local ($file=$_[0]); # Full name of the file on LABEL local ($line); # input buffer local ($buffer); # string buffer local ($found_first) = 0; # flag for first object pointer local (%val); # Array of found values local ($name); # Object names for initialization loop local ($i); # string subscript/index local ($prtval); # Holding place for value to be printed # Before we can begin processing object names, we must initialize the # %object_count array. For the very first label, this array will have # no elements. In subsequent labels, all existing elements should be # re-initialized to zero, since object numbering is only relative to # other objects in the same label: foreach $name (keys %object_count) { $object_count{$name} = 0; } # First, assign the special values for ^PATH, ^LABEL and # ^FILE_SPECIFICATION_NAME values: $buffer = $file; $buffer =~ s/\/[a-zA-Z0-9_.]*$//; # Remove file name $buffer =~ s/^[\.\/]+//; # Remove relative part of path name $val{'^PATH'} = $buffer; $buffer = $file; $buffer =~ s/^.*\///; $buffer =~ s/\.lbl//; $val{'^LABEL'} = $buffer; $val{'^FILE_SPECIFICATION_NAME'} = $val{'^PATH'} . "/" . $val{'^LABEL'} . ".lbl"; $object = ""; while ($line=&nextline) { # Do some format cleanup to ease processing later: $line =~ s/^\s+//; # Clip leading blanks $line =~ s/\s*=\s*/=/; # Remove blanks around first '=' # Split the line at the first '=': $i = index($line,"="); if ($i >= 0) { $keyword = substr($line,0,$i); $value = substr($line,$i+1); $value =~ s/^("|')//; $value =~ s/("|')$//; } else { $keyword = $line; undef ($value); } $keyword =~ tr/a-z/A-Z/; # Force keyword to upper case last if ($keyword eq "END"); # Check for end of label # Check for end of object and pop object name as needed. Note that # we're assuming the label is well-formatted: if ($keyword eq "END_OBJECT") { $object =~ s/[^.]*\.$//; next; } # Any remaining one-word lines should be ignored as well: next unless ($value); # # OK, now we've got the keyword from the line. Time to check for new # OBJECT definitions and adjust the $object variable accordingly; # if ($keyword eq "OBJECT") { $object = $object . $value; #printf " New object: $object\n"; # Increment the counter for this type of object, and add the # appropriate subscript: $object_count{$object}++; $object = sprintf("%s[%d].",$object,$object_count{$object}); next; } # Add the object path to the keyword value. If this string has a value # in the %size array, save its corresponding value in the %val array: $keyword = $object . $keyword; #printf " Checking $keyword\n"; if ($size{$keyword}) { $val{$keyword} = $value; } # Check for the special fields associated with the data file pointer: if ( $keyword =~/^\^/ && !$found_first) # Ignore all but first { $keyword =~ s/^\^//; # pointer field # Check to see if the value is enclosed in parentheses, If so, # this indicates a filename with an offset, either in records # or bytes (BYTES flag included): if ( $value =~ /[(]/ ) { $value =~ s/ *//g; # Collapse blanks $value =~ s/[()]//g; # Remove parentheses ($filename, $offset, $byte_flag) = split(/,/,$value); $filename =~ s/("|')//g; # Remove quotes } else { $filename = $value; } # Assign the Filename and offset values to their respective elements: if ($byte_flag) { $offset .= " " . $byte_flag; } $val{'^FILE'} = $filename; $val{'^OFFSET'} = $offset; $val{'^OBJECT'} = $keyword; $found_first = 1; } } # # End of label. Print the index line for this label. Values are printed # in the order in which they were listed in the @name array: # if ($opt_o) { $oldhandle = select(OUTPUT); } # In each case, first we write the format string, then substitute the # literal value, if needed, for missing values, then write the value: for ($i=0; $i<$num_fields-1; $i++) { $bytes = $size{$name[$i]}; if ($bytes < 0) { $format = sprintf ("%%%d.%ds ",$bytes,-$bytes); } else { $format = sprintf ("%%%d.%ds ",$bytes,$bytes); } $prtval = ($val{$name[$i]} eq "")? $literal[$i] : $val{$name[$i]}; printf $format,$prtval; } $bytes = $size{$name[$num_fields-1]}; if ($bytes < 0) { $format = sprintf ("%%%d.%ds",$bytes,-$bytes); } else { $format = sprintf ("%%%d.%ds",$bytes,$bytes); } $prtval = ($val{$name[$num_fields-1]} eq "")? $literal[$num_fields-1] : $val{$name[$num_fields-1]}; printf "$format",$prtval; printf "\n"; if ($opt_o) { select($oldhandle); } } #++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ sub nextline # Routine to return the next complete line from the input label file open # on . { local($buff); # Line buffer local($line); # Line to be returned local($not_done); # Loop flag local($val,$t); # value part of line and remainder local($qt); # Quote character $line = ""; # Get the next line, formatted for processing: return undef unless ($buff = &cleanline); # If there is no "=", return now: return $buff if ($buff !~ /=/); # We have at least the beginning of an assignment statement. Save # it into $line: $line = $buff; # Now, we have to make sure we have the entire value. The cases are: # # 1. If nothing follows the "=", we must read the next line to find # the value. # 2. If a simple value follows the "=", return # 3. If a quote follows the "=", we must read lines until we find a # matching end quote. # ($t,$val) = split(/=/,$buff); # 1. Read next line if (! $val) { return $line unless ($buff = &cleanline); $line = "$line $buff"; $val = $buff; } # 2. Simple value (and check for a matching quote on this line): if ($val !~ /^("|')/ || $val =~ /^".*"/ || $val =~ /^'.*'/) { return $line; } # 3. Find matching quote. $qt = substr($val,0,1); $not_done = 1; while ($not_done) { return $line unless ($buff = &cleanline); # Look for closing quote. If found, delete anything after it and set # loop flag: if ($buff =~ /$qt/) { $buff =~ s/$qt.*$/$qt/; $not_done = 0; } # If the line is more than 255 characters long, then assume we're in # some kind of description field and ignore it. Otherwise, concatenate # it to the end of the current buffer. The last line is always added # so that the quotes will balance. if (length($line) < 255 || ! $not_done) { $line = "$line $buff"; } } # Return the line: return $line; } #----------------------------------------------------------------------------- sub cleanline # Reads the next line from and cleans it up for processing { local($buff); # input buffer $buff = ""; while ($buff eq "") { if (!($buff = )) { return undef; } chomp $buff; # Remove all record delimiters # Remove comments. This is tricky, since the comment delimiter may # appear within an open quote, or after a closing quote: if ($buff =~ /=\s*"/) { $buff =~ s/(=\s*"[^"]*").*$/$1/; } # Drops everything after closing " elsif ($buff =~ /=\s*'/) { $buff =~ s/(=\s*'[^']*').*$/$1/; } else { $buff =~ s/\s*\/\*.*$//; } $buff =~ s/^\s+//; $buff =~ s/\s*$//; $buff =~ s/\s*=\s*/=/; } return $buff; } #----------------------------------------------------------------------------- sub is_label # Checks for PDS_VERSION_ID as first line of a file to determine if it is a # PDS label. { local ($file) = $_[0]; my ($line); open(CHK,$file) || die "Could not open $file to check for label ($!)"; $line=; while (($line =~ /^\s*#/) || ($line =~ /^\s*$/) || ($line =~/SFDU/)) { $line = ; } if ($line =~ /^\s*PDS_VERSION_ID/) { return 1; } else { return 0; } }