#!/usr/bin/env python # Jim Tuttle 4/2007 http://www.braggtown.com # # Purpose: This script applies an FGDC metadata template to an orthphoto collection. It parses the geographic coordinates from a comma seperated value # file generated with the National Park Service extension for ESRI ArcCatalog. I wrote this to deal with ortho collections we received with no metadata. # I expected to find an ArcScript that did this, but couldn't find one. # How to use: You'll have to create an FGDC template. Perhaps there is an example on the projects page on my website at http://www.prairienet.org/~jtuttle/projects.html # Notes: Had to use safe_substitute instead of substitute because some templates containing dollar signs ($) to describe hourly charges. # Be aware of the columns in the CSV. If they're different than I've specified here, you'll need to fix the script. # Predicated on structure of the comma separated value output of the dbf being structured # in the following way: PATH,FILENAME,EXTENSION,XMIN,YMIN,XMAX,YMAX,RES_Met_NS,RES_Met_EW from os import listdir, abort from os.path import split, join, getsize, exists from string import Template #### Set these variables #### path = "/mnt/atabeast/wake_county_ortho_2005/WC-Tiff" # path to image collection csvfile = "Export_Output.csv" # comma separated value file with bounding coordinates img_ext = "tif" # image extension template = "/home/jjtuttle/tif/template.xml" # FGDC metadata template to be applied to collection of images verbosity = True confirmXML = True # Set to True to verify that each image file is accompanied by a metadata file overwriteXML = True # Set to True to overwrite .xml with metadata generated by this script. False skips over if xml exists. # Parse CSV dump from ArcMap. Return 2 lists- list of interesting elements and list of image names only for comparison to disk contents. def parseCSV(csvfile): header = True f = open(csvfile, "r") csv_list = [] csv_images = [] linecnt = 0 for line in f.readlines(): linecnt +=1 try: path, filename, extension, xmin, ymin, xmax, ymax, res_met_ns, res_met_ew = line.split(',') if (header and (filename == 'FILENAME')): # checking for header line identified by 'FILENAME' in filename element header = False else: item = [filename, xmin, ymin, xmax, ymax]# create list containing filename and bounding coordinates csv_list.append(item) # append above list to super list csv_images.append(filename) # generate list containing only filenames for comparison to on-disk images except ValueError: print "Wrong number of elements in line ", linecnt, " of the CSV. Offending line: ", line abort() csv_list.sort() csv_images.sort() return csv_list, csv_images # Generate list of images on disk to compare to CSV. Return list of images on disk. def listOnDiskImages(path, img_ext): disk_files = listdir(path) disk_images = [] for f in disk_files: # determine file extension for files in image collection directory try: stem, ext = f.split('.', 1) ext = ext.lower() # convert all extensions to lowercase except ValueError: # catch files without extensions stem = f # assign manually to avoid exception ext = None if ext == img_ext: disk_images.append(f) else: # ignoring non image files pass disk_images.sort() return disk_images # Compare CSV to disk. Warn of images appearing solely in either. def compareCSVtoDisk(csv_images, disk_images): # for lists a and b, iterate list a, for item x in a, if x exists in b, delete x from a and b. For remaining items in a or b, report to user. if not (csv_images == disk_images): # simple check to determine if lists are identical i = 0 print "Initial lengths are; cvs:%s and disk:%s" % (len(csv_images), len(disk_images)) for item in csv_images: # lists are not equal, find discrepancies try: x = disk_images.index(item) # try to find index of element containing the value except ValueError: # no match found, value does not occur in disk_images list #print "didn't find value %s (index %s) in disk_images. Setting x to false" % (item, i) x = 'a' if (x != 'a'): #print "Deleting %s from csv and %s from disk" % (i, x) del csv_images[i] #### Doesn't work! del disk_images[x] else: print "The value %s was not found in the disk_images list" % (item) i += 1 print "length is now ", len(csv_images) else: print "lists are equal" # Create an XML metadata file for each image after substituting values from the CSV into the metadata template. # xmin=westbc, ymin=southbc, xmax=eastbc, ymax=northbc def applyXMLTemplate(csv_list, template, path): t = Template(open(template).read()) # create template object from xml file for entry in csv_list: try: basename, junk = entry[0].split('.') except ValueError: print entry[0] , " doesn't conform to . pattern. Dirty data in CSV filename column?" floc = join(path, basename) + '.xml' if (exists(floc) and overwriteXML) or not exists(floc): # read 'em and weep! tsize = float(getsize(join(path, entry[0])))/1048576 if verbosity: print "Processing file ", entry[0] print "Using values: ", entry[1] +" "+ entry[2] +" "+ entry[3]+" "+ entry[4] +" "+ str(tsize) print "" n = t.safe_substitute(filename = entry[0], westbc = entry[1], southbc = entry[2], eastbc = entry[3], northbc = entry[4], transize = str(tsize)) f = open(floc, 'w') f.write(n) f.close() else: if verbosity: print floc, " exists and you chose not to overwrite. Skipping." # confirms that each image file has an associated metadata file def confirmMetadataExists(path, disk_images): print "Verifying metadata exists for images in ", path ,". Images with missing metadata..." for image in disk_images: basename, junk = image.split('.') f = join(path, basename) + ".xml" if not exists(f): print image, " has no metadata." #### Main logic #### csv_list, csv_images = parseCSV(csvfile) # csv is list of per-image metadata elements, csv_images is list of image names disk_images = listOnDiskImages(path, img_ext) compareCSVtoDisk(csv_images, disk_images) applyXMLTemplate(csv_list, template, path) if confirmXML: confirmMetadataExists(path, disk_images) print "Process complete"