From 16cf916a040e344ab400bc995174c928a94b4787 Mon Sep 17 00:00:00 2001
From: Tim Walsh <timothyryanwalsh@gmail.com>
Date: Thu, 12 Oct 2017 16:39:20 -0400
Subject: [PATCH 01/24] Add script for disks that require user-supplied tsk
 options

---
 process_with_tsk_options.py | 473 ++++++++++++++++++++++++++++++++++++
 1 file changed, 473 insertions(+)
 create mode 100644 process_with_tsk_options.py

diff --git a/process_with_tsk_options.py b/process_with_tsk_options.py
new file mode 100644
index 0000000..96b330a
--- /dev/null
+++ b/process_with_tsk_options.py
@@ -0,0 +1,473 @@
+#!/usr/bin/env python
+# -*- coding: utf-8 -*-
+
+"""
+Creates a SIP from any single disk image using options for
+tsk_recover provided by the user.
+
+Will only work for disk images containing a file system
+able to be parsed by TSK.
+
+Python 3
+
+MIT License
+(c) Tim Walsh 2017
+http://bitarchivist.net
+"""
+
+import argparse
+import csv
+import datetime
+import itertools
+import math
+import os
+import shutil
+import subprocess
+import sys
+import time
+
+def convert_size(size):
+    # convert size to human-readable form
+    if (size == 0):
+        return '0 bytes'
+    size_name = ("bytes", "KB", "MB", "GB", "TB", "PB", "EB", "ZB", "YB")
+    i = int(math.floor(math.log(size,1024)))
+    p = math.pow(1024,i)
+    s = round(size/p)
+    s = str(s)
+    s = s.replace('.0', '')
+    return '%s %s' % (s,size_name[i])
+
+def time_to_int(str_time):
+    dt = time.mktime(datetime.datetime.strptime(str_time, 
+        "%Y-%m-%dT%H:%M:%S").timetuple())
+    return dt
+
+def create_spreadsheet(files_only, exportall, sip_dir):
+    # process each SIP
+    current = os.path.abspath(sip_dir)
+    # test if entry if directory
+    if os.path.isdir(current):
+        
+        # intialize values
+        number_files = 0
+        total_bytes = 0
+        mtimes = []
+        ctimes = []
+        crtimes = []
+
+        # parse dfxml file
+        if args.bagfiles == True:
+            dfxml_file = os.path.abspath(os.path.join(current, 
+                'data', 'metadata', 'submissionDocumentation', 'dfxml.xml'))
+        else:
+            dfxml_file = os.path.abspath(os.path.join(current, 
+                'metadata', 'submissionDocumentation', 'dfxml.xml'))
+
+        # try to read DFXML file
+        try:
+            # gather info for each FileObject
+            for (event, obj) in Objects.iterparse(dfxml_file):
+                
+                # only work on FileObjects
+                if not isinstance(obj, Objects.FileObject):
+                    continue
+
+                # skip directories and links
+                if obj.name_type:
+                    if obj.name_type != "r":
+                        continue
+
+                # skip unallocated if args.exportall is False
+                if exportall == False:
+                    if obj.unalloc:
+                        if obj.unalloc == 1:
+                            continue
+                
+                # gather info
+                number_files += 1
+
+                try:
+                    mtime = obj.mtime
+                    mtime = str(mtime)
+                    mtimes.append(mtime)
+                except:
+                    pass
+
+                try:
+                    ctime = obj.ctime
+                    ctime = str(ctime)
+                    ctimes.append(ctime)
+                except:
+                    pass
+
+                try:
+                    crtime = obj.crtime
+                    crtime = str(crtime)
+                    crtimes.append(crtime)
+                except:
+                    pass
+        
+                total_bytes += obj.filesize
+
+            # filter 'None' values from date lists
+            for date_list in mtimes, ctimes, crtimes:
+                while 'None' in date_list:
+                    date_list.remove('None')
+
+
+            # build extent statement
+            size_readable = convert_size(total_bytes)
+            if number_files == 1:
+                extent = "1 digital file (%s)" % size_readable
+            elif number_files == 0:
+                extent = "EMPTY"
+            else:
+                extent = "%d digital files (%s)" % (number_files, size_readable)
+
+            # determine earliest and latest MAC dates from lists
+            date_earliest_m = ""
+            date_latest_m = ""
+            date_earliest_c = ""
+            date_latest_c = ""
+            date_earliest_cr = ""
+            date_latest_cr = ""
+            date_statement = ""
+
+            if mtimes:
+                date_earliest_m = min(mtimes)
+                date_latest_m = max(mtimes)
+            if ctimes:
+                date_earliest_c = min(ctimes)
+                date_latest_c = max(ctimes)
+            if crtimes:
+                date_earliest_cr = min(crtimes)
+                date_latest_cr = max(crtimes)
+
+            # determine which set of dates to use (logic: use set with earliest start date)
+            use_ctimes = False
+            use_crtimes = False
+
+            if not date_earliest_m:
+                date_earliest_m = "N/A"
+                date_latest_m = "N/A"
+            date_to_use = date_earliest_m # default to date modified
+
+            if date_earliest_c:
+                if date_earliest_c < date_to_use:
+                    date_to_use = date_earliest_c
+                    use_ctimes = True
+            if date_earliest_cr:
+                if date_earliest_cr < date_to_use:
+                    date_to_use = date_earliest_cr
+                    use_ctimes = False
+                    use_crtimes = True
+
+            # store date_earliest and date_latest values based on datetype used
+            if use_ctimes == True:
+                date_earliest = date_earliest_c[:10]
+                date_latest = date_latest_c[:10]
+            elif use_crtimes == True:
+                date_earliest = date_earliest_cr[:10]
+                date_latest = date_latest_cr[:10]
+            else:
+                date_earliest = date_earliest_m[:10]
+                date_latest = date_latest_m[:10]
+
+            # write date statement
+            if date_earliest[:4] == date_latest[:4]:
+                date_statement = '%s' % date_earliest[:4]
+            else:
+                date_statement = '%s - %s' % (date_earliest[:4], date_latest[:4])
+
+            # gather file system info, discern tool used
+            if args.bagfiles == True:
+                disktype = os.path.join(current, 'data', 'metadata', 
+                    'submissionDocumentation', 'disktype.txt')
+            else:
+                disktype = os.path.join(current, 'metadata', 
+                    'submissionDocumentation', 'disktype.txt')
+            # pull filesystem info from disktype.txt
+            disk_fs = ''
+            try:
+                for line in open(disktype, 'r'):
+                    if "file system" in line:
+                        disk_fs = line.strip()
+            except: # handle non-Unicode chars
+                for line in open(disktype, 'rb'):
+                    if "file system" in line.decode('utf-8','ignore'):
+                        disk_fs = line.decode('utf-8','ignore').strip()
+
+            # save tool used to carve files
+            if any(x in disk_fs.lower() for x in ('ntfs', 'fat', 'ext', 'iso9660', 'hfs+', 'ufs', 'raw', 'swap', 'yaffs2')):
+                tool = "carved from the disk image using the Sleuth Kit command line utility tsk_recover"
+            elif ('hfs' in disk_fs.lower()) and ('hfs+' not in disk_fs.lower()):
+                tool = "carved from disk image using the HFSExplorer command line utility"
+            elif 'udf' in disk_fs.lower():
+                tool = "copied from the mounted disk image"
+            else:
+                tool = "UNSUCCESSFULLY"
+
+            # gather info from brunnhilde & write scope and content note
+            if extent == 'EMPTY':
+                scopecontent = ''
+                formatlist = ''
+            else:
+                fileformats = []
+                formatlist = ''
+                fileformat_csv = ''
+                if args.bagfiles == True:
+                    fileformat_csv = os.path.join(current, 'data', 'metadata', 'submissionDocumentation', 
+                        'brunnhilde', 'csv_reports', 'formats.csv')
+                else:
+                    fileformat_csv = os.path.join(current, 'metadata', 'submissionDocumentation', 
+                        'brunnhilde', 'csv_reports', 'formats.csv')
+                try: 
+                    with open(fileformat_csv, 'r') as f:
+                        reader = csv.reader(f)
+                        next(reader)
+                        for row in itertools.islice(reader, 5):
+                            fileformats.append(row[0])
+                except:
+                    fileformats.append("ERROR! No formats.csv file to pull formats from.")
+                # replace empty elements with 'Unidentified
+                fileformats = [element or 'Unidentified' for element in fileformats]
+                formatlist = ', '.join(fileformats)
+                
+                
+                # create scope and content note
+                if files_only == True:
+                    scopecontent = 'File includes digital files %s. Most common file formats: %s' % (tool, formatlist)
+                else:
+                    scopecontent = 'File includes both a disk image and digital files %s. Most common file formats: %s' % (tool, formatlist)
+
+            # write csv row
+            writer.writerow(['', item, '', '', date_statement, date_earliest, date_latest, 'File', extent, 
+                scopecontent, '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', ''])
+            
+            print('Described %s successfully.' % (current))
+
+        # if error reading DFXML file, report that
+        except:
+            # write error to csv
+            writer.writerow(['', item, '', '', 'Error', 'Error', 'Error', 'File', 'Error', 
+                'Error reading DFXML file.', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', ''])
+
+            print('ERROR: DFXML file for %s not well-formed.' % (current))
+
+def keep_logical_files_only(objects_dir):
+    # get list of files in files dir
+    files_dir = os.path.join(objects_dir, 'files')
+    fileList = os.listdir(files_dir)
+    fileList = [files_dir + '/' + filename for filename in fileList]
+
+    # move files up one directory
+    for f in fileList:
+        shutil.move(f, objects_dir)
+
+    # delete file and diskimage dirs
+    shutil.rmtree(files_dir)
+    shutil.rmtree(os.path.join(objects_dir, 'diskimage'))
+
+# MAIN FLOW
+
+# parse arguments
+parser = argparse.ArgumentParser()
+parser.add_argument("-b", "--bagfiles", help="Bag files instead of writing checksum.md5", action="store_true")
+parser.add_argument("-e", "--exportall", help="Export all (not only allocated) with tsk_recover", action="store_true")
+parser.add_argument("-f", "--filesonly", help="Include digital files only (not disk images) in SIPs", action="store_true")
+parser.add_argument("-p", "--piiscan", help="Run bulk_extractor in Brunnhilde scan", action="store_true")
+parser.add_argument("-r", "--resforks", help="Export AppleDouble resource forks from HFS-formatted disks", action="store_true")
+parser.add_argument("--imgtype", help="Disk image type (see tsk_recover man page for values)", action="store")
+parser.add_argument("--fstype", help="File system type (see tsk_recover man page for values)", action="store")
+parser.add_argument("--sector_offset", help="Sector offset of partition to parse (see tsk-recover man page for details)", action="store")
+parser.add_argument("source", help="Source directory containing disk image (and related files)")
+parser.add_argument("destination", help="Output destination")
+args = parser.parse_args()
+
+destination = args.destination
+
+# create output directories
+if not os.path.exists(destination):
+    os.makedirs(destination)
+
+# open description spreadsheet
+try:
+    spreadsheet = open(os.path.join(destination,'description.csv'), 'w')
+    writer = csv.writer(spreadsheet, quoting=csv.QUOTE_NONNUMERIC)
+    header_list = ['Parent ID', 'Identifier', 'Title', 'Archive Creator', 'Date expression', 'Date start', 'Date end', 
+        'Level of description', 'Extent and medium', 'Scope and content', 'Arrangement (optional)', 'Accession number', 
+        'Appraisal, destruction, and scheduling information (optional)', 'Name access points (optional)', 
+        'Geographic access points (optional)', 'Conditions governing access (optional)', 'Conditions governing reproduction (optional)', 
+        'Language of material (optional)', 'Physical characteristics & technical requirements affecting use (optional)', 
+        'Finding aids (optional)', 'Related units of description (optional)', 'Archival history (optional)', 
+        'Immediate source of acquisition or transfer (optional)', "Archivists' note (optional)", 'General note (optional)', 
+        'Description status']
+    writer.writerow(header_list)
+except:
+    print('There was an error creating the processing spreadsheet.')
+    sys.exit()
+
+# iterate through files in source directory
+for file in sorted(os.listdir(args.source)):
+
+    # record filename in log
+    print('>>> NEW FILE: %s' % (file))
+
+    # determine if disk image
+    if file.endswith((".E01", ".000", ".001", ".raw", ".img", ".dd", ".iso")):
+
+        # save info about file
+        image_path = os.path.join(args.source, file)
+        image_id = os.path.splitext(file)[0]
+        image_ext = os.path.splitext(file)[1]
+
+        # create new folders
+        sip_dir = os.path.join(destination, file)
+        object_dir = os.path.join(sip_dir, 'objects')
+        diskimage_dir = os.path.join(object_dir, 'diskimage')
+        files_dir = os.path.join(object_dir, 'files')
+        metadata_dir = os.path.join(sip_dir, 'metadata')
+        subdoc_dir = os.path.join(metadata_dir, 'submissionDocumentation')
+
+        for folder in sip_dir, object_dir, diskimage_dir, files_dir, metadata_dir, subdoc_dir:
+            os.makedirs(folder)
+
+        # disk image status
+        raw_image = False
+
+        # check if disk image is ewf
+        if image_ext == ".E01":
+            # convert disk image to raw and write to /objects/diskimage
+            raw_out = os.path.join(diskimage_dir, image_id)
+            try:
+                subprocess.check_output(['ewfexport', '-t', raw_out, '-f', 'raw', '-o', '0', '-S', '0', '-u', image_path])
+                raw_image = True
+                os.rename(os.path.join(diskimage_dir, '%s.raw' % (image_id)), os.path.join(diskimage_dir, '%s.img' % image_id)) # change file extension from .raw to .img
+                os.rename(os.path.join(diskimage_dir, '%s.raw.info' % (image_id)), os.path.join(diskimage_dir, '%s.img.info' % image_id)) # rename sidecar md5 file
+                diskimage = os.path.join(diskimage_dir, '%s.img' % (image_id)) # use raw disk image in objects/diskimage moving forward
+            except subprocess.CalledProcessError:
+                print('ERROR: Disk image could not be converted to raw image format. Skipping disk.')
+
+        else:
+            raw_image = True
+            for movefile in os.listdir(args.source):
+                # if filename starts with disk image basename (this will also capture info and log files, multi-part disk images, etc.)
+                if movefile.startswith(image_id):
+                    # copy file to objects/diskimage
+                    try:
+                        shutil.copyfile(os.path.join(args.source, movefile), os.path.join(diskimage_dir, movefile))
+                    except:
+                        print('ERROR: File %s not successfully copied to %s' % (movefile, diskimage_dir))
+            diskimage = os.path.join(diskimage_dir, file) # use disk image in objects/diskimage moving forward
+
+        # if raw disk image, process
+        if raw_image == True:
+
+            # use fiwalk to make dfxml
+            fiwalk_file = os.path.join(subdoc_dir, 'dfxml.xml')
+            try:
+                subprocess.check_output(['fiwalk', '-X', fiwalk_file, diskimage])
+            except subprocess.CalledProcessError as e:
+                print('ERROR: Fiwalk could not create DFXML for disk. STDERR: %s' % (e.output))
+            
+            # carve images using tsk_recover with user-supplied options
+            if args.exportall == True:
+                carvefiles = ['tsk_recover', '-e', diskimage, files_dir]
+            else:
+                carvefiles = ['tsk_recover', '-a', diskimage, files_dir]
+
+            if args.fstype:
+                carvefiles.insert(2, '-f')
+                carvefiles.insert(3, args.fstype)
+            if args.imgtype:
+                carvefiles.insert(2, '-i')
+                carvefiles.insert(3, args.imgtype)
+            if args.sector_offset:
+                carvefiles.insert(2, '-o')
+                carvefiles.insert(3, args.sector_offset)
+
+            try:
+                subprocess.check_output(carvefiles)
+            except subprocess.CalledProcessError as e:
+                print('ERROR: tsk_recover could not carve files from disk. STDERR: %s' % (e.output))    
+
+            # modify file permissions
+            subprocess.call("sudo find '%s' -type d -exec chmod 755 {} \;" % (sip_dir), shell=True)
+            subprocess.call("sudo find '%s' -type f -exec chmod 644 {} \;" % (sip_dir), shell=True)
+
+            # rewrite last modified dates of files based on values in DFXML
+            for (event, obj) in Objects.iterparse(fiwalk_file):
+                
+                # only work on FileObjects
+                if not isinstance(obj, Objects.FileObject):
+                    continue
+
+                # skip directories and links
+                if obj.name_type:
+                    if obj.name_type != "r":
+                        continue
+
+                # record filename
+                dfxml_filename = obj.filename
+                dfxml_filedate = int(time.time()) # default to current time
+
+                # record last modified or last created date
+                try:
+                    mtime = obj.mtime
+                    mtime = str(mtime)
+                except:
+                    pass
+
+                try:
+                    crtime = obj.crtime
+                    crtime = str(crtime)
+                except:
+                    pass
+
+                # fallback to created date if last modified doesn't exist
+                if mtime and (mtime != 'None'):
+                    mtime = time_to_int(mtime[:19])
+                    dfxml_filedate = mtime
+                elif crtime and (crtime != 'None'):
+                    crtime = time_to_int(crtime[:19])
+                    dfxml_filedate = crtime
+                else:
+                    continue
+
+                # rewrite last modified date of corresponding file in objects/files
+                exported_filepath = os.path.join(files_dir, dfxml_filename)
+                if os.path.isfile(exported_filepath):
+                    os.utime(exported_filepath, (dfxml_filedate, dfxml_filedate))
+
+            # run brunnhilde and write to submissionDocumentation
+            files_abs = os.path.abspath(files_dir)
+            if args.piiscan == True: # brunnhilde with bulk_extractor
+                subprocess.call("brunnhilde.py -zb '%s' '%s' '%s'" % (files_abs, subdoc_dir, 'brunnhilde'), shell=True)
+            else: # brunnhilde without bulk_extractor
+                subprocess.call("brunnhilde.py -z '%s' '%s' '%s'" % (files_abs, subdoc_dir, 'brunnhilde'), shell=True)
+
+            # if user selected 'filesonly', remove disk image files and repackage
+            if args.filesonly == True:
+                keep_logical_files_only(object_dir)
+
+            # write checksums
+            if args.bagfiles == True: # bag entire SIP
+                subprocess.call("bagit.py --processes 4 '%s'" % (sip_dir), shell=True)
+            else: # write metadata/checksum.md5
+                subprocess.call("cd '%s' && md5deep -rl ../objects > checksum.md5" % (metadata_dir), shell=True)
+
+            # write description spreadsheet
+            create_spreadsheet(args.filesonly, args.exportall, sip_dir)
+
+        # no raw disk image
+        else:
+            print('NOTICE: No raw disk image. Skipping disk.')
+
+    else:
+        # write skipped file to log
+        print('NOTICE: File is not a disk image. Skipping file.')
+
+# close files
+spreadsheet.close()
+log.close()
\ No newline at end of file

From 77b76c86ea263551d21945d052d5fc2f71608317 Mon Sep 17 00:00:00 2001
From: Tim Walsh <timothyryanwalsh@gmail.com>
Date: Thu, 12 Oct 2017 16:40:22 -0400
Subject: [PATCH 02/24] import Objects

---
 process_with_tsk_options.py | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/process_with_tsk_options.py b/process_with_tsk_options.py
index 96b330a..e8545b5 100644
--- a/process_with_tsk_options.py
+++ b/process_with_tsk_options.py
@@ -26,6 +26,9 @@
 import sys
 import time
 
+#import Objects.py from python dfxml tools
+import Objects
+
 def convert_size(size):
     # convert size to human-readable form
     if (size == 0):

From e2d00f8e500d9752c5b68878525a842497628d45 Mon Sep 17 00:00:00 2001
From: Tim Walsh <timothyryanwalsh@gmail.com>
Date: Thu, 12 Oct 2017 16:48:13 -0400
Subject: [PATCH 03/24] Remove resforks option

---
 process_with_tsk_options.py | 1 -
 1 file changed, 1 deletion(-)

diff --git a/process_with_tsk_options.py b/process_with_tsk_options.py
index e8545b5..4b2eff3 100644
--- a/process_with_tsk_options.py
+++ b/process_with_tsk_options.py
@@ -280,7 +280,6 @@ def keep_logical_files_only(objects_dir):
 parser.add_argument("-e", "--exportall", help="Export all (not only allocated) with tsk_recover", action="store_true")
 parser.add_argument("-f", "--filesonly", help="Include digital files only (not disk images) in SIPs", action="store_true")
 parser.add_argument("-p", "--piiscan", help="Run bulk_extractor in Brunnhilde scan", action="store_true")
-parser.add_argument("-r", "--resforks", help="Export AppleDouble resource forks from HFS-formatted disks", action="store_true")
 parser.add_argument("--imgtype", help="Disk image type (see tsk_recover man page for values)", action="store")
 parser.add_argument("--fstype", help="File system type (see tsk_recover man page for values)", action="store")
 parser.add_argument("--sector_offset", help="Sector offset of partition to parse (see tsk-recover man page for details)", action="store")

From ae1f85696b12d98804e04d05455168611e5f99b1 Mon Sep 17 00:00:00 2001
From: Tim Walsh <timothyryanwalsh@gmail.com>
Date: Thu, 12 Oct 2017 16:51:22 -0400
Subject: [PATCH 04/24] Fix spreadsheet line

---
 process_with_tsk_options.py | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/process_with_tsk_options.py b/process_with_tsk_options.py
index 4b2eff3..8a6a1da 100644
--- a/process_with_tsk_options.py
+++ b/process_with_tsk_options.py
@@ -46,7 +46,7 @@ def time_to_int(str_time):
         "%Y-%m-%dT%H:%M:%S").timetuple())
     return dt
 
-def create_spreadsheet(files_only, exportall, sip_dir):
+def create_spreadsheet(files_only, exportall, sip_dir, filename):
     # process each SIP
     current = os.path.abspath(sip_dir)
     # test if entry if directory
@@ -245,7 +245,7 @@ def create_spreadsheet(files_only, exportall, sip_dir):
                     scopecontent = 'File includes both a disk image and digital files %s. Most common file formats: %s' % (tool, formatlist)
 
             # write csv row
-            writer.writerow(['', item, '', '', date_statement, date_earliest, date_latest, 'File', extent, 
+            writer.writerow(['', filename, '', '', date_statement, date_earliest, date_latest, 'File', extent, 
                 scopecontent, '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', ''])
             
             print('Described %s successfully.' % (current))
@@ -253,7 +253,7 @@ def create_spreadsheet(files_only, exportall, sip_dir):
         # if error reading DFXML file, report that
         except:
             # write error to csv
-            writer.writerow(['', item, '', '', 'Error', 'Error', 'Error', 'File', 'Error', 
+            writer.writerow(['', filename, '', '', 'Error', 'Error', 'Error', 'File', 'Error', 
                 'Error reading DFXML file.', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', ''])
 
             print('ERROR: DFXML file for %s not well-formed.' % (current))
@@ -460,7 +460,7 @@ def keep_logical_files_only(objects_dir):
                 subprocess.call("cd '%s' && md5deep -rl ../objects > checksum.md5" % (metadata_dir), shell=True)
 
             # write description spreadsheet
-            create_spreadsheet(args.filesonly, args.exportall, sip_dir)
+            create_spreadsheet(args.filesonly, args.exportall, sip_dir, file)
 
         # no raw disk image
         else:

From 26c1a4e041174505fc730cdaa2cee4f2cab2fc31 Mon Sep 17 00:00:00 2001
From: Tim Walsh <timothyryanwalsh@gmail.com>
Date: Thu, 12 Oct 2017 16:53:41 -0400
Subject: [PATCH 05/24] Hardcore tool info

---
 process_with_tsk_options.py | 32 ++------------------------------
 1 file changed, 2 insertions(+), 30 deletions(-)

diff --git a/process_with_tsk_options.py b/process_with_tsk_options.py
index 8a6a1da..bcd7420 100644
--- a/process_with_tsk_options.py
+++ b/process_with_tsk_options.py
@@ -183,34 +183,6 @@ def create_spreadsheet(files_only, exportall, sip_dir, filename):
             else:
                 date_statement = '%s - %s' % (date_earliest[:4], date_latest[:4])
 
-            # gather file system info, discern tool used
-            if args.bagfiles == True:
-                disktype = os.path.join(current, 'data', 'metadata', 
-                    'submissionDocumentation', 'disktype.txt')
-            else:
-                disktype = os.path.join(current, 'metadata', 
-                    'submissionDocumentation', 'disktype.txt')
-            # pull filesystem info from disktype.txt
-            disk_fs = ''
-            try:
-                for line in open(disktype, 'r'):
-                    if "file system" in line:
-                        disk_fs = line.strip()
-            except: # handle non-Unicode chars
-                for line in open(disktype, 'rb'):
-                    if "file system" in line.decode('utf-8','ignore'):
-                        disk_fs = line.decode('utf-8','ignore').strip()
-
-            # save tool used to carve files
-            if any(x in disk_fs.lower() for x in ('ntfs', 'fat', 'ext', 'iso9660', 'hfs+', 'ufs', 'raw', 'swap', 'yaffs2')):
-                tool = "carved from the disk image using the Sleuth Kit command line utility tsk_recover"
-            elif ('hfs' in disk_fs.lower()) and ('hfs+' not in disk_fs.lower()):
-                tool = "carved from disk image using the HFSExplorer command line utility"
-            elif 'udf' in disk_fs.lower():
-                tool = "copied from the mounted disk image"
-            else:
-                tool = "UNSUCCESSFULLY"
-
             # gather info from brunnhilde & write scope and content note
             if extent == 'EMPTY':
                 scopecontent = ''
@@ -240,9 +212,9 @@ def create_spreadsheet(files_only, exportall, sip_dir, filename):
                 
                 # create scope and content note
                 if files_only == True:
-                    scopecontent = 'File includes digital files %s. Most common file formats: %s' % (tool, formatlist)
+                    scopecontent = 'File includes digital files carved from a disk image using tsk_recover. Most common file formats: %s' % (formatlist)
                 else:
-                    scopecontent = 'File includes both a disk image and digital files %s. Most common file formats: %s' % (tool, formatlist)
+                    scopecontent = 'File includes both a disk image and digital files carved from the disk image using tsk_recover. Most common file formats: %s' % (formatlist)
 
             # write csv row
             writer.writerow(['', filename, '', '', date_statement, date_earliest, date_latest, 'File', extent, 

From bd24417b30719ab38108dca28c60ec651f24c0a3 Mon Sep 17 00:00:00 2001
From: Tim Walsh <timothyryanwalsh@gmail.com>
Date: Thu, 12 Oct 2017 16:57:06 -0400
Subject: [PATCH 06/24] Remove log close

---
 process_with_tsk_options.py | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/process_with_tsk_options.py b/process_with_tsk_options.py
index bcd7420..ef4c31e 100644
--- a/process_with_tsk_options.py
+++ b/process_with_tsk_options.py
@@ -443,5 +443,4 @@ def keep_logical_files_only(objects_dir):
         print('NOTICE: File is not a disk image. Skipping file.')
 
 # close files
-spreadsheet.close()
-log.close()
\ No newline at end of file
+spreadsheet.close()
\ No newline at end of file

From 92e9b62133e441647112e79c01d337f74febaaf9 Mon Sep 17 00:00:00 2001
From: Tim Walsh <timothyryanwalsh@gmail.com>
Date: Thu, 12 Oct 2017 17:17:31 -0400
Subject: [PATCH 07/24] Add process_with_tsk_options.py

---
 install.sh | 1 +
 1 file changed, 1 insertion(+)

diff --git a/install.sh b/install.sh
index 3cd04ec..74a6e65 100755
--- a/install.sh
+++ b/install.sh
@@ -24,6 +24,7 @@ fi
 # Move files into /usr/share/ccatools/diskimageprocessor
 sudo mv diskimageprocessor.py /usr/share/ccatools/diskimageprocessor
 sudo mv diskimageanalyzer.py /usr/share/ccatools/diskimageprocessor
+sudo mv process_with_tsk_options.py /usr/share/ccatools/diskimageprocessor
 sudo mv main.py /usr/share/ccatools/diskimageprocessor
 sudo mv launch /usr/share/ccatools/diskimageprocessor
 sudo mv design.py /usr/share/ccatools/diskimageprocessor

From d62eed6d2793bbd278df29826575249d68c7aacb Mon Sep 17 00:00:00 2001
From: Tim Walsh <timothyryanwalsh@gmail.com>
Date: Thu, 12 Oct 2017 20:16:27 -0400
Subject: [PATCH 08/24] Restructure into main()

---
 process_with_tsk_options.py | 711 ++++++++++++++++++------------------
 1 file changed, 356 insertions(+), 355 deletions(-)

diff --git a/process_with_tsk_options.py b/process_with_tsk_options.py
index ef4c31e..2da5ff8 100644
--- a/process_with_tsk_options.py
+++ b/process_with_tsk_options.py
@@ -46,189 +46,202 @@ def time_to_int(str_time):
         "%Y-%m-%dT%H:%M:%S").timetuple())
     return dt
 
-def create_spreadsheet(files_only, exportall, sip_dir, filename):
-    # process each SIP
-    current = os.path.abspath(sip_dir)
-    # test if entry if directory
-    if os.path.isdir(current):
-        
-        # intialize values
-        number_files = 0
-        total_bytes = 0
-        mtimes = []
-        ctimes = []
-        crtimes = []
-
-        # parse dfxml file
-        if args.bagfiles == True:
-            dfxml_file = os.path.abspath(os.path.join(current, 
-                'data', 'metadata', 'submissionDocumentation', 'dfxml.xml'))
-        else:
-            dfxml_file = os.path.abspath(os.path.join(current, 
-                'metadata', 'submissionDocumentation', 'dfxml.xml'))
-
-        # try to read DFXML file
-        try:
-            # gather info for each FileObject
-            for (event, obj) in Objects.iterparse(dfxml_file):
-                
-                # only work on FileObjects
-                if not isinstance(obj, Objects.FileObject):
-                    continue
+def create_spreadsheet(files_only, exportall, destination, sip_dir, filename):
+    # open description spreadsheet and write header
+    with open(os.path.join(destination,'description.csv'), 'w') as spreadsheet:
+        writer = csv.writer(spreadsheet, quoting=csv.QUOTE_NONNUMERIC)
+        header_list = ['Parent ID', 'Identifier', 'Title', 'Archive Creator', 'Date expression', 'Date start', 'Date end', 
+            'Level of description', 'Extent and medium', 'Scope and content', 'Arrangement (optional)', 'Accession number', 
+            'Appraisal, destruction, and scheduling information (optional)', 'Name access points (optional)', 
+            'Geographic access points (optional)', 'Conditions governing access (optional)', 'Conditions governing reproduction (optional)', 
+            'Language of material (optional)', 'Physical characteristics & technical requirements affecting use (optional)', 
+            'Finding aids (optional)', 'Related units of description (optional)', 'Archival history (optional)', 
+            'Immediate source of acquisition or transfer (optional)', "Archivists' note (optional)", 'General note (optional)', 
+            'Description status']
+        writer.writerow(header_list)
+
+        # add info for SIP in new line
+        current = os.path.abspath(sip_dir)
+        # test if entry if directory
+        if os.path.isdir(current):
+            
+            # intialize values
+            number_files = 0
+            total_bytes = 0
+            mtimes = []
+            ctimes = []
+            crtimes = []
+
+            # parse dfxml file
+            if args.bagfiles == True:
+                dfxml_file = os.path.abspath(os.path.join(current, 
+                    'data', 'metadata', 'submissionDocumentation', 'dfxml.xml'))
+            else:
+                dfxml_file = os.path.abspath(os.path.join(current, 
+                    'metadata', 'submissionDocumentation', 'dfxml.xml'))
 
-                # skip directories and links
-                if obj.name_type:
-                    if obj.name_type != "r":
+            # try to read DFXML file
+            try:
+                # gather info for each FileObject
+                for (event, obj) in Objects.iterparse(dfxml_file):
+                    
+                    # only work on FileObjects
+                    if not isinstance(obj, Objects.FileObject):
                         continue
 
-                # skip unallocated if args.exportall is False
-                if exportall == False:
-                    if obj.unalloc:
-                        if obj.unalloc == 1:
+                    # skip directories and links
+                    if obj.name_type:
+                        if obj.name_type != "r":
                             continue
-                
-                # gather info
-                number_files += 1
 
-                try:
-                    mtime = obj.mtime
-                    mtime = str(mtime)
-                    mtimes.append(mtime)
-                except:
-                    pass
+                    # skip unallocated if args.exportall is False
+                    if exportall == False:
+                        if obj.unalloc:
+                            if obj.unalloc == 1:
+                                continue
+                    
+                    # gather info
+                    number_files += 1
 
-                try:
-                    ctime = obj.ctime
-                    ctime = str(ctime)
-                    ctimes.append(ctime)
-                except:
-                    pass
+                    try:
+                        mtime = obj.mtime
+                        mtime = str(mtime)
+                        mtimes.append(mtime)
+                    except:
+                        pass
 
-                try:
-                    crtime = obj.crtime
-                    crtime = str(crtime)
-                    crtimes.append(crtime)
-                except:
-                    pass
-        
-                total_bytes += obj.filesize
-
-            # filter 'None' values from date lists
-            for date_list in mtimes, ctimes, crtimes:
-                while 'None' in date_list:
-                    date_list.remove('None')
-
-
-            # build extent statement
-            size_readable = convert_size(total_bytes)
-            if number_files == 1:
-                extent = "1 digital file (%s)" % size_readable
-            elif number_files == 0:
-                extent = "EMPTY"
-            else:
-                extent = "%d digital files (%s)" % (number_files, size_readable)
-
-            # determine earliest and latest MAC dates from lists
-            date_earliest_m = ""
-            date_latest_m = ""
-            date_earliest_c = ""
-            date_latest_c = ""
-            date_earliest_cr = ""
-            date_latest_cr = ""
-            date_statement = ""
-
-            if mtimes:
-                date_earliest_m = min(mtimes)
-                date_latest_m = max(mtimes)
-            if ctimes:
-                date_earliest_c = min(ctimes)
-                date_latest_c = max(ctimes)
-            if crtimes:
-                date_earliest_cr = min(crtimes)
-                date_latest_cr = max(crtimes)
-
-            # determine which set of dates to use (logic: use set with earliest start date)
-            use_ctimes = False
-            use_crtimes = False
-
-            if not date_earliest_m:
-                date_earliest_m = "N/A"
-                date_latest_m = "N/A"
-            date_to_use = date_earliest_m # default to date modified
-
-            if date_earliest_c:
-                if date_earliest_c < date_to_use:
-                    date_to_use = date_earliest_c
-                    use_ctimes = True
-            if date_earliest_cr:
-                if date_earliest_cr < date_to_use:
-                    date_to_use = date_earliest_cr
-                    use_ctimes = False
-                    use_crtimes = True
-
-            # store date_earliest and date_latest values based on datetype used
-            if use_ctimes == True:
-                date_earliest = date_earliest_c[:10]
-                date_latest = date_latest_c[:10]
-            elif use_crtimes == True:
-                date_earliest = date_earliest_cr[:10]
-                date_latest = date_latest_cr[:10]
-            else:
-                date_earliest = date_earliest_m[:10]
-                date_latest = date_latest_m[:10]
+                    try:
+                        ctime = obj.ctime
+                        ctime = str(ctime)
+                        ctimes.append(ctime)
+                    except:
+                        pass
 
-            # write date statement
-            if date_earliest[:4] == date_latest[:4]:
-                date_statement = '%s' % date_earliest[:4]
-            else:
-                date_statement = '%s - %s' % (date_earliest[:4], date_latest[:4])
+                    try:
+                        crtime = obj.crtime
+                        crtime = str(crtime)
+                        crtimes.append(crtime)
+                    except:
+                        pass
+            
+                    total_bytes += obj.filesize
 
-            # gather info from brunnhilde & write scope and content note
-            if extent == 'EMPTY':
-                scopecontent = ''
-                formatlist = ''
-            else:
-                fileformats = []
-                formatlist = ''
-                fileformat_csv = ''
-                if args.bagfiles == True:
-                    fileformat_csv = os.path.join(current, 'data', 'metadata', 'submissionDocumentation', 
-                        'brunnhilde', 'csv_reports', 'formats.csv')
+                # filter 'None' values from date lists
+                for date_list in mtimes, ctimes, crtimes:
+                    while 'None' in date_list:
+                        date_list.remove('None')
+
+
+                # build extent statement
+                size_readable = convert_size(total_bytes)
+                if number_files == 1:
+                    extent = "1 digital file (%s)" % size_readable
+                elif number_files == 0:
+                    extent = "EMPTY"
                 else:
-                    fileformat_csv = os.path.join(current, 'metadata', 'submissionDocumentation', 
-                        'brunnhilde', 'csv_reports', 'formats.csv')
-                try: 
-                    with open(fileformat_csv, 'r') as f:
-                        reader = csv.reader(f)
-                        next(reader)
-                        for row in itertools.islice(reader, 5):
-                            fileformats.append(row[0])
-                except:
-                    fileformats.append("ERROR! No formats.csv file to pull formats from.")
-                # replace empty elements with 'Unidentified
-                fileformats = [element or 'Unidentified' for element in fileformats]
-                formatlist = ', '.join(fileformats)
-                
-                
-                # create scope and content note
-                if files_only == True:
-                    scopecontent = 'File includes digital files carved from a disk image using tsk_recover. Most common file formats: %s' % (formatlist)
+                    extent = "%d digital files (%s)" % (number_files, size_readable)
+
+                # determine earliest and latest MAC dates from lists
+                date_earliest_m = ""
+                date_latest_m = ""
+                date_earliest_c = ""
+                date_latest_c = ""
+                date_earliest_cr = ""
+                date_latest_cr = ""
+                date_statement = ""
+
+                if mtimes:
+                    date_earliest_m = min(mtimes)
+                    date_latest_m = max(mtimes)
+                if ctimes:
+                    date_earliest_c = min(ctimes)
+                    date_latest_c = max(ctimes)
+                if crtimes:
+                    date_earliest_cr = min(crtimes)
+                    date_latest_cr = max(crtimes)
+
+                # determine which set of dates to use (logic: use set with earliest start date)
+                use_ctimes = False
+                use_crtimes = False
+
+                if not date_earliest_m:
+                    date_earliest_m = "N/A"
+                    date_latest_m = "N/A"
+                date_to_use = date_earliest_m # default to date modified
+
+                if date_earliest_c:
+                    if date_earliest_c < date_to_use:
+                        date_to_use = date_earliest_c
+                        use_ctimes = True
+                if date_earliest_cr:
+                    if date_earliest_cr < date_to_use:
+                        date_to_use = date_earliest_cr
+                        use_ctimes = False
+                        use_crtimes = True
+
+                # store date_earliest and date_latest values based on datetype used
+                if use_ctimes == True:
+                    date_earliest = date_earliest_c[:10]
+                    date_latest = date_latest_c[:10]
+                elif use_crtimes == True:
+                    date_earliest = date_earliest_cr[:10]
+                    date_latest = date_latest_cr[:10]
                 else:
-                    scopecontent = 'File includes both a disk image and digital files carved from the disk image using tsk_recover. Most common file formats: %s' % (formatlist)
+                    date_earliest = date_earliest_m[:10]
+                    date_latest = date_latest_m[:10]
 
-            # write csv row
-            writer.writerow(['', filename, '', '', date_statement, date_earliest, date_latest, 'File', extent, 
-                scopecontent, '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', ''])
-            
-            print('Described %s successfully.' % (current))
+                # write date statement
+                if date_earliest[:4] == date_latest[:4]:
+                    date_statement = '%s' % date_earliest[:4]
+                else:
+                    date_statement = '%s - %s' % (date_earliest[:4], date_latest[:4])
+
+                # gather info from brunnhilde & write scope and content note
+                if extent == 'EMPTY':
+                    scopecontent = ''
+                    formatlist = ''
+                else:
+                    fileformats = []
+                    formatlist = ''
+                    fileformat_csv = ''
+                    if args.bagfiles == True:
+                        fileformat_csv = os.path.join(current, 'data', 'metadata', 'submissionDocumentation', 
+                            'brunnhilde', 'csv_reports', 'formats.csv')
+                    else:
+                        fileformat_csv = os.path.join(current, 'metadata', 'submissionDocumentation', 
+                            'brunnhilde', 'csv_reports', 'formats.csv')
+                    try: 
+                        with open(fileformat_csv, 'r') as f:
+                            reader = csv.reader(f)
+                            next(reader)
+                            for row in itertools.islice(reader, 5):
+                                fileformats.append(row[0])
+                    except:
+                        fileformats.append("ERROR! No formats.csv file to pull formats from.")
+                    # replace empty elements with 'Unidentified
+                    fileformats = [element or 'Unidentified' for element in fileformats]
+                    formatlist = ', '.join(fileformats)
+                    
+                    
+                    # create scope and content note
+                    if files_only == True:
+                        scopecontent = 'File includes digital files carved from a disk image using tsk_recover. Most common file formats: %s' % (formatlist)
+                    else:
+                        scopecontent = 'File includes both a disk image and digital files carved from the disk image using tsk_recover. Most common file formats: %s' % (formatlist)
+
+                # write csv row
+                writer.writerow(['', filename, '', '', date_statement, date_earliest, date_latest, 'File', extent, 
+                    scopecontent, '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', ''])
+                
+                print('Described %s successfully.' % (current))
 
-        # if error reading DFXML file, report that
-        except:
-            # write error to csv
-            writer.writerow(['', filename, '', '', 'Error', 'Error', 'Error', 'File', 'Error', 
-                'Error reading DFXML file.', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', ''])
+            # if error reading DFXML file, report that
+            except:
+                # write error to csv
+                writer.writerow(['', filename, '', '', 'Error', 'Error', 'Error', 'File', 'Error', 
+                    'Error reading DFXML file.', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', ''])
 
-            print('ERROR: DFXML file for %s not well-formed.' % (current))
+                print('ERROR: DFXML file for %s not well-formed.' % (current))
 
 def keep_logical_files_only(objects_dir):
     # get list of files in files dir
@@ -244,203 +257,191 @@ def keep_logical_files_only(objects_dir):
     shutil.rmtree(files_dir)
     shutil.rmtree(os.path.join(objects_dir, 'diskimage'))
 
-# MAIN FLOW
-
-# parse arguments
-parser = argparse.ArgumentParser()
-parser.add_argument("-b", "--bagfiles", help="Bag files instead of writing checksum.md5", action="store_true")
-parser.add_argument("-e", "--exportall", help="Export all (not only allocated) with tsk_recover", action="store_true")
-parser.add_argument("-f", "--filesonly", help="Include digital files only (not disk images) in SIPs", action="store_true")
-parser.add_argument("-p", "--piiscan", help="Run bulk_extractor in Brunnhilde scan", action="store_true")
-parser.add_argument("--imgtype", help="Disk image type (see tsk_recover man page for values)", action="store")
-parser.add_argument("--fstype", help="File system type (see tsk_recover man page for values)", action="store")
-parser.add_argument("--sector_offset", help="Sector offset of partition to parse (see tsk-recover man page for details)", action="store")
-parser.add_argument("source", help="Source directory containing disk image (and related files)")
-parser.add_argument("destination", help="Output destination")
-args = parser.parse_args()
-
-destination = args.destination
-
-# create output directories
-if not os.path.exists(destination):
-    os.makedirs(destination)
-
-# open description spreadsheet
-try:
-    spreadsheet = open(os.path.join(destination,'description.csv'), 'w')
-    writer = csv.writer(spreadsheet, quoting=csv.QUOTE_NONNUMERIC)
-    header_list = ['Parent ID', 'Identifier', 'Title', 'Archive Creator', 'Date expression', 'Date start', 'Date end', 
-        'Level of description', 'Extent and medium', 'Scope and content', 'Arrangement (optional)', 'Accession number', 
-        'Appraisal, destruction, and scheduling information (optional)', 'Name access points (optional)', 
-        'Geographic access points (optional)', 'Conditions governing access (optional)', 'Conditions governing reproduction (optional)', 
-        'Language of material (optional)', 'Physical characteristics & technical requirements affecting use (optional)', 
-        'Finding aids (optional)', 'Related units of description (optional)', 'Archival history (optional)', 
-        'Immediate source of acquisition or transfer (optional)', "Archivists' note (optional)", 'General note (optional)', 
-        'Description status']
-    writer.writerow(header_list)
-except:
-    print('There was an error creating the processing spreadsheet.')
-    sys.exit()
-
-# iterate through files in source directory
-for file in sorted(os.listdir(args.source)):
-
-    # record filename in log
-    print('>>> NEW FILE: %s' % (file))
-
-    # determine if disk image
-    if file.endswith((".E01", ".000", ".001", ".raw", ".img", ".dd", ".iso")):
-
-        # save info about file
-        image_path = os.path.join(args.source, file)
-        image_id = os.path.splitext(file)[0]
-        image_ext = os.path.splitext(file)[1]
-
-        # create new folders
-        sip_dir = os.path.join(destination, file)
-        object_dir = os.path.join(sip_dir, 'objects')
-        diskimage_dir = os.path.join(object_dir, 'diskimage')
-        files_dir = os.path.join(object_dir, 'files')
-        metadata_dir = os.path.join(sip_dir, 'metadata')
-        subdoc_dir = os.path.join(metadata_dir, 'submissionDocumentation')
-
-        for folder in sip_dir, object_dir, diskimage_dir, files_dir, metadata_dir, subdoc_dir:
-            os.makedirs(folder)
-
-        # disk image status
-        raw_image = False
-
-        # check if disk image is ewf
-        if image_ext == ".E01":
-            # convert disk image to raw and write to /objects/diskimage
-            raw_out = os.path.join(diskimage_dir, image_id)
-            try:
-                subprocess.check_output(['ewfexport', '-t', raw_out, '-f', 'raw', '-o', '0', '-S', '0', '-u', image_path])
+def _make_parser():
+
+    parser = argparse.ArgumentParser()
+    parser.add_argument("-b", "--bagfiles", help="Bag files instead of writing checksum.md5", action="store_true")
+    parser.add_argument("-e", "--exportall", help="Export all (not only allocated) with tsk_recover", action="store_true")
+    parser.add_argument("-f", "--filesonly", help="Include digital files only (not disk images) in SIPs", action="store_true")
+    parser.add_argument("-p", "--piiscan", help="Run bulk_extractor in Brunnhilde scan", action="store_true")
+    parser.add_argument("--imgtype", help="Disk image type (see tsk_recover man page for values)", action="store")
+    parser.add_argument("--fstype", help="File system type (see tsk_recover man page for values)", action="store")
+    parser.add_argument("--sector_offset", help="Sector offset of partition to parse (see tsk-recover man page for details)", action="store")
+    parser.add_argument("source", help="Source directory containing disk image (and related files)")
+    parser.add_argument("destination", help="Output destination")
+    
+    return parser
+
+def main():
+    # parse args
+    parser = _make_parser(brunnhilde_version)
+    args = parser.parse_args()
+    source = os.path.abspath(args.source)
+    destination = os.path.abspath(args.destination)
+
+    # create output directories
+    if not os.path.exists(destination):
+        os.makedirs(destination)
+
+    # iterate through files in source directory
+    for file in sorted(os.listdir(source)):
+
+        # record filename in log
+        print('>>> NEW FILE: %s' % (file))
+
+        # determine if disk image
+        if file.endswith((".E01", ".000", ".001", ".raw", ".img", ".dd", ".iso")):
+
+            # save info about file
+            image_path = os.path.join(source, file)
+            image_id = os.path.splitext(file)[0]
+            image_ext = os.path.splitext(file)[1]
+
+            # create new folders
+            sip_dir = os.path.join(args.destination, file)
+            object_dir = os.path.join(sip_dir, 'objects')
+            diskimage_dir = os.path.join(object_dir, 'diskimage')
+            files_dir = os.path.join(object_dir, 'files')
+            metadata_dir = os.path.join(sip_dir, 'metadata')
+            subdoc_dir = os.path.join(metadata_dir, 'submissionDocumentation')
+
+            for folder in sip_dir, object_dir, diskimage_dir, files_dir, metadata_dir, subdoc_dir:
+                os.makedirs(folder)
+
+            # disk image status
+            raw_image = False
+
+            # check if disk image is ewf
+            if image_ext == ".E01":
+                # convert disk image to raw and write to /objects/diskimage
+                raw_out = os.path.join(diskimage_dir, image_id)
+                try:
+                    subprocess.check_output(['ewfexport', '-t', raw_out, '-f', 'raw', '-o', '0', '-S', '0', '-u', image_path])
+                    raw_image = True
+                    os.rename(os.path.join(diskimage_dir, '%s.raw' % (image_id)), os.path.join(diskimage_dir, '%s.img' % image_id)) # change file extension from .raw to .img
+                    os.rename(os.path.join(diskimage_dir, '%s.raw.info' % (image_id)), os.path.join(diskimage_dir, '%s.img.info' % image_id)) # rename sidecar md5 file
+                    diskimage = os.path.join(diskimage_dir, '%s.img' % (image_id)) # use raw disk image in objects/diskimage moving forward
+                except subprocess.CalledProcessError:
+                    print('ERROR: Disk image could not be converted to raw image format. Skipping disk.')
+
+            else:
                 raw_image = True
-                os.rename(os.path.join(diskimage_dir, '%s.raw' % (image_id)), os.path.join(diskimage_dir, '%s.img' % image_id)) # change file extension from .raw to .img
-                os.rename(os.path.join(diskimage_dir, '%s.raw.info' % (image_id)), os.path.join(diskimage_dir, '%s.img.info' % image_id)) # rename sidecar md5 file
-                diskimage = os.path.join(diskimage_dir, '%s.img' % (image_id)) # use raw disk image in objects/diskimage moving forward
-            except subprocess.CalledProcessError:
-                print('ERROR: Disk image could not be converted to raw image format. Skipping disk.')
+                for movefile in os.listdir(source):
+                    # if filename starts with disk image basename (this will also capture info and log files, multi-part disk images, etc.)
+                    if movefile.startswith(image_id):
+                        # copy file to objects/diskimage
+                        try:
+                            shutil.copyfile(os.path.join(source, movefile), os.path.join(diskimage_dir, movefile))
+                        except:
+                            print('ERROR: File %s not successfully copied to %s' % (movefile, diskimage_dir))
+                diskimage = os.path.join(diskimage_dir, file) # use disk image in objects/diskimage moving forward
+
+            # if raw disk image, process
+            if raw_image == True:
+
+                # use fiwalk to make dfxml
+                fiwalk_file = os.path.join(subdoc_dir, 'dfxml.xml')
+                try:
+                    subprocess.check_output(['fiwalk', '-X', fiwalk_file, diskimage])
+                except subprocess.CalledProcessError as e:
+                    print('ERROR: Fiwalk could not create DFXML for disk. STDERR: %s' % (e.output))
+                
+                # carve images using tsk_recover with user-supplied options
+                if args.exportall == True:
+                    carvefiles = ['tsk_recover', '-e', diskimage, files_dir]
+                else:
+                    carvefiles = ['tsk_recover', '-a', diskimage, files_dir]
+
+                if args.fstype:
+                    carvefiles.insert(2, '-f')
+                    carvefiles.insert(3, args.fstype)
+                if args.imgtype:
+                    carvefiles.insert(2, '-i')
+                    carvefiles.insert(3, args.imgtype)
+                if args.sector_offset:
+                    carvefiles.insert(2, '-o')
+                    carvefiles.insert(3, args.sector_offset)
 
-        else:
-            raw_image = True
-            for movefile in os.listdir(args.source):
-                # if filename starts with disk image basename (this will also capture info and log files, multi-part disk images, etc.)
-                if movefile.startswith(image_id):
-                    # copy file to objects/diskimage
+                try:
+                    subprocess.check_output(carvefiles)
+                except subprocess.CalledProcessError as e:
+                    print('ERROR: tsk_recover could not carve files from disk. STDERR: %s' % (e.output))    
+
+                # modify file permissions
+                subprocess.call("sudo find '%s' -type d -exec chmod 755 {} \;" % (sip_dir), shell=True)
+                subprocess.call("sudo find '%s' -type f -exec chmod 644 {} \;" % (sip_dir), shell=True)
+
+                # rewrite last modified dates of files based on values in DFXML
+                for (event, obj) in Objects.iterparse(fiwalk_file):
+                    
+                    # only work on FileObjects
+                    if not isinstance(obj, Objects.FileObject):
+                        continue
+
+                    # skip directories and links
+                    if obj.name_type:
+                        if obj.name_type != "r":
+                            continue
+
+                    # record filename
+                    dfxml_filename = obj.filename
+                    dfxml_filedate = int(time.time()) # default to current time
+
+                    # record last modified or last created date
                     try:
-                        shutil.copyfile(os.path.join(args.source, movefile), os.path.join(diskimage_dir, movefile))
+                        mtime = obj.mtime
+                        mtime = str(mtime)
                     except:
-                        print('ERROR: File %s not successfully copied to %s' % (movefile, diskimage_dir))
-            diskimage = os.path.join(diskimage_dir, file) # use disk image in objects/diskimage moving forward
+                        pass
 
-        # if raw disk image, process
-        if raw_image == True:
+                    try:
+                        crtime = obj.crtime
+                        crtime = str(crtime)
+                    except:
+                        pass
+
+                    # fallback to created date if last modified doesn't exist
+                    if mtime and (mtime != 'None'):
+                        mtime = time_to_int(mtime[:19])
+                        dfxml_filedate = mtime
+                    elif crtime and (crtime != 'None'):
+                        crtime = time_to_int(crtime[:19])
+                        dfxml_filedate = crtime
+                    else:
+                        continue
 
-            # use fiwalk to make dfxml
-            fiwalk_file = os.path.join(subdoc_dir, 'dfxml.xml')
-            try:
-                subprocess.check_output(['fiwalk', '-X', fiwalk_file, diskimage])
-            except subprocess.CalledProcessError as e:
-                print('ERROR: Fiwalk could not create DFXML for disk. STDERR: %s' % (e.output))
-            
-            # carve images using tsk_recover with user-supplied options
-            if args.exportall == True:
-                carvefiles = ['tsk_recover', '-e', diskimage, files_dir]
-            else:
-                carvefiles = ['tsk_recover', '-a', diskimage, files_dir]
-
-            if args.fstype:
-                carvefiles.insert(2, '-f')
-                carvefiles.insert(3, args.fstype)
-            if args.imgtype:
-                carvefiles.insert(2, '-i')
-                carvefiles.insert(3, args.imgtype)
-            if args.sector_offset:
-                carvefiles.insert(2, '-o')
-                carvefiles.insert(3, args.sector_offset)
+                    # rewrite last modified date of corresponding file in objects/files
+                    exported_filepath = os.path.join(files_dir, dfxml_filename)
+                    if os.path.isfile(exported_filepath):
+                        os.utime(exported_filepath, (dfxml_filedate, dfxml_filedate))
 
-            try:
-                subprocess.check_output(carvefiles)
-            except subprocess.CalledProcessError as e:
-                print('ERROR: tsk_recover could not carve files from disk. STDERR: %s' % (e.output))    
+                # run brunnhilde and write to submissionDocumentation
+                files_abs = os.path.abspath(files_dir)
+                if args.piiscan == True: # brunnhilde with bulk_extractor
+                    subprocess.call("brunnhilde.py -zb '%s' '%s' '%s'" % (files_abs, subdoc_dir, 'brunnhilde'), shell=True)
+                else: # brunnhilde without bulk_extractor
+                    subprocess.call("brunnhilde.py -z '%s' '%s' '%s'" % (files_abs, subdoc_dir, 'brunnhilde'), shell=True)
 
-            # modify file permissions
-            subprocess.call("sudo find '%s' -type d -exec chmod 755 {} \;" % (sip_dir), shell=True)
-            subprocess.call("sudo find '%s' -type f -exec chmod 644 {} \;" % (sip_dir), shell=True)
+                # if user selected 'filesonly', remove disk image files and repackage
+                if args.filesonly == True:
+                    keep_logical_files_only(object_dir)
 
-            # rewrite last modified dates of files based on values in DFXML
-            for (event, obj) in Objects.iterparse(fiwalk_file):
-                
-                # only work on FileObjects
-                if not isinstance(obj, Objects.FileObject):
-                    continue
+                # write checksums
+                if args.bagfiles == True: # bag entire SIP
+                    subprocess.call("bagit.py --processes 4 '%s'" % (sip_dir), shell=True)
+                else: # write metadata/checksum.md5
+                    subprocess.call("cd '%s' && md5deep -rl ../objects > checksum.md5" % (metadata_dir), shell=True)
 
-                # skip directories and links
-                if obj.name_type:
-                    if obj.name_type != "r":
-                        continue
-
-                # record filename
-                dfxml_filename = obj.filename
-                dfxml_filedate = int(time.time()) # default to current time
+                # write description spreadsheet
+                populate_spreadsheet(args.filesonly, args.exportall, destination sip_dir, file)
 
-                # record last modified or last created date
-                try:
-                    mtime = obj.mtime
-                    mtime = str(mtime)
-                except:
-                    pass
+            # no raw disk image
+            else:
+                print('NOTICE: No raw disk image. Skipping disk.')
 
-                try:
-                    crtime = obj.crtime
-                    crtime = str(crtime)
-                except:
-                    pass
-
-                # fallback to created date if last modified doesn't exist
-                if mtime and (mtime != 'None'):
-                    mtime = time_to_int(mtime[:19])
-                    dfxml_filedate = mtime
-                elif crtime and (crtime != 'None'):
-                    crtime = time_to_int(crtime[:19])
-                    dfxml_filedate = crtime
-                else:
-                    continue
-
-                # rewrite last modified date of corresponding file in objects/files
-                exported_filepath = os.path.join(files_dir, dfxml_filename)
-                if os.path.isfile(exported_filepath):
-                    os.utime(exported_filepath, (dfxml_filedate, dfxml_filedate))
-
-            # run brunnhilde and write to submissionDocumentation
-            files_abs = os.path.abspath(files_dir)
-            if args.piiscan == True: # brunnhilde with bulk_extractor
-                subprocess.call("brunnhilde.py -zb '%s' '%s' '%s'" % (files_abs, subdoc_dir, 'brunnhilde'), shell=True)
-            else: # brunnhilde without bulk_extractor
-                subprocess.call("brunnhilde.py -z '%s' '%s' '%s'" % (files_abs, subdoc_dir, 'brunnhilde'), shell=True)
-
-            # if user selected 'filesonly', remove disk image files and repackage
-            if args.filesonly == True:
-                keep_logical_files_only(object_dir)
-
-            # write checksums
-            if args.bagfiles == True: # bag entire SIP
-                subprocess.call("bagit.py --processes 4 '%s'" % (sip_dir), shell=True)
-            else: # write metadata/checksum.md5
-                subprocess.call("cd '%s' && md5deep -rl ../objects > checksum.md5" % (metadata_dir), shell=True)
-
-            # write description spreadsheet
-            create_spreadsheet(args.filesonly, args.exportall, sip_dir, file)
-
-        # no raw disk image
         else:
-            print('NOTICE: No raw disk image. Skipping disk.')
-
-    else:
-        # write skipped file to log
-        print('NOTICE: File is not a disk image. Skipping file.')
+            # write skipped file to log
+            print('NOTICE: File is not a disk image. Skipping file.')
 
-# close files
-spreadsheet.close()
\ No newline at end of file
+if __name__ == '__main__':
+    main()
\ No newline at end of file

From 0b818601f043b669d27624fdb02b5beed35f4e33 Mon Sep 17 00:00:00 2001
From: Tim Walsh <timothyryanwalsh@gmail.com>
Date: Thu, 12 Oct 2017 20:18:35 -0400
Subject: [PATCH 09/24] Fix typo

---
 process_with_tsk_options.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/process_with_tsk_options.py b/process_with_tsk_options.py
index 2da5ff8..8b9f5c2 100644
--- a/process_with_tsk_options.py
+++ b/process_with_tsk_options.py
@@ -433,7 +433,7 @@ def main():
                     subprocess.call("cd '%s' && md5deep -rl ../objects > checksum.md5" % (metadata_dir), shell=True)
 
                 # write description spreadsheet
-                populate_spreadsheet(args.filesonly, args.exportall, destination sip_dir, file)
+                populate_spreadsheet(args.filesonly, args.exportall, destination, sip_dir, file)
 
             # no raw disk image
             else:

From 6aadcc753c7c523155cb3287b9f121b2bde74261 Mon Sep 17 00:00:00 2001
From: Tim Walsh <timothyryanwalsh@gmail.com>
Date: Thu, 12 Oct 2017 20:20:10 -0400
Subject: [PATCH 10/24] Fix arg parser

---
 process_with_tsk_options.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/process_with_tsk_options.py b/process_with_tsk_options.py
index 8b9f5c2..86d1a43 100644
--- a/process_with_tsk_options.py
+++ b/process_with_tsk_options.py
@@ -274,7 +274,7 @@ def _make_parser():
 
 def main():
     # parse args
-    parser = _make_parser(brunnhilde_version)
+    parser = _make_parser()
     args = parser.parse_args()
     source = os.path.abspath(args.source)
     destination = os.path.abspath(args.destination)

From 4658f105abda820dadbaf488c9b3541e9ab7ba0a Mon Sep 17 00:00:00 2001
From: Tim Walsh <timothyryanwalsh@gmail.com>
Date: Thu, 12 Oct 2017 20:23:00 -0400
Subject: [PATCH 11/24] Modify spreadsheet function

---
 process_with_tsk_options.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/process_with_tsk_options.py b/process_with_tsk_options.py
index 86d1a43..549be68 100644
--- a/process_with_tsk_options.py
+++ b/process_with_tsk_options.py
@@ -433,7 +433,8 @@ def main():
                     subprocess.call("cd '%s' && md5deep -rl ../objects > checksum.md5" % (metadata_dir), shell=True)
 
                 # write description spreadsheet
-                populate_spreadsheet(args.filesonly, args.exportall, destination, sip_dir, file)
+                print('Generating description spreadsheet for file %s...' % (file))
+                create_spreadsheet(args.filesonly, args.exportall, destination, sip_dir, file)
 
             # no raw disk image
             else:

From 1b5456e3806132c50c954a4a642e2b0e56998528 Mon Sep 17 00:00:00 2001
From: Tim Walsh <timothyryanwalsh@gmail.com>
Date: Thu, 12 Oct 2017 20:27:46 -0400
Subject: [PATCH 12/24] Fix arg imports to create_spreadsheet function

---
 process_with_tsk_options.py | 9 +++++----
 1 file changed, 5 insertions(+), 4 deletions(-)

diff --git a/process_with_tsk_options.py b/process_with_tsk_options.py
index 549be68..3c1cdf0 100644
--- a/process_with_tsk_options.py
+++ b/process_with_tsk_options.py
@@ -46,9 +46,10 @@ def time_to_int(str_time):
         "%Y-%m-%dT%H:%M:%S").timetuple())
     return dt
 
-def create_spreadsheet(files_only, exportall, destination, sip_dir, filename):
+def create_spreadsheet(args, destination, sip_dir, filename):
     # open description spreadsheet and write header
     with open(os.path.join(destination,'description.csv'), 'w') as spreadsheet:
+        
         writer = csv.writer(spreadsheet, quoting=csv.QUOTE_NONNUMERIC)
         header_list = ['Parent ID', 'Identifier', 'Title', 'Archive Creator', 'Date expression', 'Date start', 'Date end', 
             'Level of description', 'Extent and medium', 'Scope and content', 'Arrangement (optional)', 'Accession number', 
@@ -95,7 +96,7 @@ def create_spreadsheet(files_only, exportall, destination, sip_dir, filename):
                             continue
 
                     # skip unallocated if args.exportall is False
-                    if exportall == False:
+                    if args.exportall == False:
                         if obj.unalloc:
                             if obj.unalloc == 1:
                                 continue
@@ -224,7 +225,7 @@ def create_spreadsheet(files_only, exportall, destination, sip_dir, filename):
                     
                     
                     # create scope and content note
-                    if files_only == True:
+                    if args.filesonly == True:
                         scopecontent = 'File includes digital files carved from a disk image using tsk_recover. Most common file formats: %s' % (formatlist)
                     else:
                         scopecontent = 'File includes both a disk image and digital files carved from the disk image using tsk_recover. Most common file formats: %s' % (formatlist)
@@ -434,7 +435,7 @@ def main():
 
                 # write description spreadsheet
                 print('Generating description spreadsheet for file %s...' % (file))
-                create_spreadsheet(args.filesonly, args.exportall, destination, sip_dir, file)
+                create_spreadsheet(args, destination, sip_dir, file)
 
             # no raw disk image
             else:

From 3ead8676fadd3c13d5d30b51b332bf589b3a7b3d Mon Sep 17 00:00:00 2001
From: Tim Walsh <timothyryanwalsh@gmail.com>
Date: Tue, 17 Oct 2017 17:26:08 -0400
Subject: [PATCH 13/24] Refactor

---
 diskimageprocessor.py | 988 +++++++++++++++++++++---------------------
 1 file changed, 498 insertions(+), 490 deletions(-)

diff --git a/diskimageprocessor.py b/diskimageprocessor.py
index a417781..8e0fe94 100644
--- a/diskimageprocessor.py
+++ b/diskimageprocessor.py
@@ -40,12 +40,13 @@
 import Objects
 
 def logandprint(message):
+    """ Print to log and terminal """
     log.write('\n' + (time.strftime("%H:%M:%S %b %d, %Y - ", 
         time.localtime())) + message)
     print(message)
 
 def convert_size(size):
-    # convert size to human-readable form
+    """ Convert size to human-readable form """
     if (size == 0):
         return '0 bytes'
     size_name = ("bytes", "KB", "MB", "GB", "TB", "PB", "EB", "ZB", "YB")
@@ -57,228 +58,247 @@ def convert_size(size):
     return '%s %s' % (s,size_name[i])
 
 def time_to_int(str_time):
+    """ Convert datetime to unix integer value """
     dt = time.mktime(datetime.datetime.strptime(str_time, 
         "%Y-%m-%dT%H:%M:%S").timetuple())
     return dt
 
-def create_spreadsheet(files_only, exportall):
-    # process each SIP
-    for item in sorted(os.listdir(sips)):
-        current = os.path.join(sips, item)
-        # test if entry if directory
-        if os.path.isdir(current):
-            
-            # intialize values
-            number_files = 0
-            total_bytes = 0
-            mtimes = []
-            ctimes = []
-            crtimes = []
-
-            # parse dfxml file
-            if args.bagfiles == True:
-                dfxml_file = os.path.abspath(os.path.join(current, 
-                    'data', 'metadata', 'submissionDocumentation', 'dfxml.xml'))
-            else:
-                dfxml_file = os.path.abspath(os.path.join(current, 
-                    'metadata', 'submissionDocumentation', 'dfxml.xml'))
-
-            # try to read DFXML file
-            try:
-                # gather info for each FileObject
-                for (event, obj) in Objects.iterparse(dfxml_file):
-                    
-                    # only work on FileObjects
-                    if not isinstance(obj, Objects.FileObject):
-                        continue
+def create_spreadsheet(args):
+    """ Create csv describing created SIPs """
+
+    # open description spreadsheet
+    csv_path = os.path.abspath(os.path.join(args.destination, 'description.csv'))
+    with open(csv_path, 'w') as description_csv:
+        writer = csv.writer(description_csv, quoting=csv.QUOTE_NONNUMERIC)
+        
+        # write header
+        header_list = ['Parent ID', 'Identifier', 'Title', 'Archive Creator', 'Date expression', 'Date start', 'Date end', 
+            'Level of description', 'Extent and medium', 'Scope and content', 'Arrangement (optional)', 'Accession number', 
+            'Appraisal, destruction, and scheduling information (optional)', 'Name access points (optional)', 
+            'Geographic access points (optional)', 'Conditions governing access (optional)', 'Conditions governing reproduction (optional)', 
+            'Language of material (optional)', 'Physical characteristics & technical requirements affecting use (optional)', 
+            'Finding aids (optional)', 'Related units of description (optional)', 'Archival history (optional)', 
+            'Immediate source of acquisition or transfer (optional)', "Archivists' note (optional)", 'General note (optional)', 
+            'Description status']
+        writer.writerow(header_list)
+
+        # process each SIP
+        for item in sorted(os.listdir(sips)):
+            current = os.path.join(sips, item)
+            # test if entry if directory
+            if os.path.isdir(current):
+                
+                # intialize values
+                number_files = 0
+                total_bytes = 0
+                mtimes = []
+                ctimes = []
+                crtimes = []
+
+                # parse dfxml file
+                if args.bagfiles == True:
+                    dfxml_file = os.path.abspath(os.path.join(current, 
+                        'data', 'metadata', 'submissionDocumentation', 'dfxml.xml'))
+                else:
+                    dfxml_file = os.path.abspath(os.path.join(current, 
+                        'metadata', 'submissionDocumentation', 'dfxml.xml'))
 
-                    # skip directories and links
-                    if obj.name_type:
-                        if obj.name_type != "r":
+                # try to read DFXML file
+                try:
+                    # gather info for each FileObject
+                    for (event, obj) in Objects.iterparse(dfxml_file):
+                        
+                        # only work on FileObjects
+                        if not isinstance(obj, Objects.FileObject):
                             continue
 
-                    # skip unallocated if args.exportall is False
-                    if exportall == False:
-                        if obj.unalloc:
-                            if obj.unalloc == 1:
+                        # skip directories and links
+                        if obj.name_type:
+                            if obj.name_type != "r":
                                 continue
-                    
-                    # gather info
-                    number_files += 1
-
-                    try:
-                        mtime = obj.mtime
-                        mtime = str(mtime)
-                        mtimes.append(mtime)
-                    except:
-                        pass
 
-                    try:
-                        ctime = obj.ctime
-                        ctime = str(ctime)
-                        ctimes.append(ctime)
-                    except:
-                        pass
+                        # skip unallocated if args.exportall is False
+                        if args.exportall == False:
+                            if obj.unalloc:
+                                if obj.unalloc == 1:
+                                    continue
+                        
+                        # gather info
+                        number_files += 1
+
+                        try:
+                            mtime = obj.mtime
+                            mtime = str(mtime)
+                            mtimes.append(mtime)
+                        except:
+                            pass
+
+                        try:
+                            ctime = obj.ctime
+                            ctime = str(ctime)
+                            ctimes.append(ctime)
+                        except:
+                            pass
+
+                        try:
+                            crtime = obj.crtime
+                            crtime = str(crtime)
+                            crtimes.append(crtime)
+                        except:
+                            pass
+                
+                        total_bytes += obj.filesize
 
-                    try:
-                        crtime = obj.crtime
-                        crtime = str(crtime)
-                        crtimes.append(crtime)
-                    except:
-                        pass
-            
-                    total_bytes += obj.filesize
-
-                # filter 'None' values from date lists
-                for date_list in mtimes, ctimes, crtimes:
-                    while 'None' in date_list:
-                        date_list.remove('None')
-
-
-                # build extent statement
-                size_readable = convert_size(total_bytes)
-                if number_files == 1:
-                    extent = "1 digital file (%s)" % size_readable
-                elif number_files == 0:
-                    extent = "EMPTY"
-                else:
-                    extent = "%d digital files (%s)" % (number_files, size_readable)
-
-                # determine earliest and latest MAC dates from lists
-                date_earliest_m = ""
-                date_latest_m = ""
-                date_earliest_c = ""
-                date_latest_c = ""
-                date_earliest_cr = ""
-                date_latest_cr = ""
-                date_statement = ""
-
-                if mtimes:
-                    date_earliest_m = min(mtimes)
-                    date_latest_m = max(mtimes)
-                if ctimes:
-                    date_earliest_c = min(ctimes)
-                    date_latest_c = max(ctimes)
-                if crtimes:
-                    date_earliest_cr = min(crtimes)
-                    date_latest_cr = max(crtimes)
-
-                # determine which set of dates to use (logic: use set with earliest start date)
-                use_ctimes = False
-                use_crtimes = False
-
-                if not date_earliest_m:
-                    date_earliest_m = "N/A"
-                    date_latest_m = "N/A"
-                date_to_use = date_earliest_m # default to date modified
-
-                if date_earliest_c:
-                    if date_earliest_c < date_to_use:
-                        date_to_use = date_earliest_c
-                        use_ctimes = True
-                if date_earliest_cr:
-                    if date_earliest_cr < date_to_use:
-                        date_to_use = date_earliest_cr
-                        use_ctimes = False
-                        use_crtimes = True
-
-                # store date_earliest and date_latest values based on datetype used
-                if use_ctimes == True:
-                    date_earliest = date_earliest_c[:10]
-                    date_latest = date_latest_c[:10]
-                elif use_crtimes == True:
-                    date_earliest = date_earliest_cr[:10]
-                    date_latest = date_latest_cr[:10]
-                else:
-                    date_earliest = date_earliest_m[:10]
-                    date_latest = date_latest_m[:10]
+                    # filter 'None' values from date lists
+                    for date_list in mtimes, ctimes, crtimes:
+                        while 'None' in date_list:
+                            date_list.remove('None')
 
-                # write date statement
-                if date_earliest[:4] == date_latest[:4]:
-                    date_statement = '%s' % date_earliest[:4]
-                else:
-                    date_statement = '%s - %s' % (date_earliest[:4], date_latest[:4])
 
-                # gather file system info, discern tool used
-                if args.bagfiles == True:
-                    disktype = os.path.join(current, 'data', 'metadata', 
-                        'submissionDocumentation', 'disktype.txt')
-                else:
-                    disktype = os.path.join(current, 'metadata', 
-                        'submissionDocumentation', 'disktype.txt')
-                # pull filesystem info from disktype.txt
-                disk_fs = ''
-                try:
-                    for line in open(disktype, 'r'):
-                        if "file system" in line:
-                            disk_fs = line.strip()
-                except: # handle non-Unicode chars
-                    for line in open(disktype, 'rb'):
-                        if "file system" in line.decode('utf-8','ignore'):
-                            disk_fs = line.decode('utf-8','ignore').strip()
+                    # build extent statement
+                    size_readable = convert_size(total_bytes)
+                    if number_files == 1:
+                        extent = "1 digital file (%s)" % size_readable
+                    elif number_files == 0:
+                        extent = "EMPTY"
+                    else:
+                        extent = "%d digital files (%s)" % (number_files, size_readable)
+
+                    # determine earliest and latest MAC dates from lists
+                    date_earliest_m = ""
+                    date_latest_m = ""
+                    date_earliest_c = ""
+                    date_latest_c = ""
+                    date_earliest_cr = ""
+                    date_latest_cr = ""
+                    date_statement = ""
+
+                    if mtimes:
+                        date_earliest_m = min(mtimes)
+                        date_latest_m = max(mtimes)
+                    if ctimes:
+                        date_earliest_c = min(ctimes)
+                        date_latest_c = max(ctimes)
+                    if crtimes:
+                        date_earliest_cr = min(crtimes)
+                        date_latest_cr = max(crtimes)
+
+                    # determine which set of dates to use (logic: use set with earliest start date)
+                    use_ctimes = False
+                    use_crtimes = False
+
+                    if not date_earliest_m:
+                        date_earliest_m = "N/A"
+                        date_latest_m = "N/A"
+                    date_to_use = date_earliest_m # default to date modified
+
+                    if date_earliest_c:
+                        if date_earliest_c < date_to_use:
+                            date_to_use = date_earliest_c
+                            use_ctimes = True
+                    if date_earliest_cr:
+                        if date_earliest_cr < date_to_use:
+                            date_to_use = date_earliest_cr
+                            use_ctimes = False
+                            use_crtimes = True
+
+                    # store date_earliest and date_latest values based on datetype used
+                    if use_ctimes == True:
+                        date_earliest = date_earliest_c[:10]
+                        date_latest = date_latest_c[:10]
+                    elif use_crtimes == True:
+                        date_earliest = date_earliest_cr[:10]
+                        date_latest = date_latest_cr[:10]
+                    else:
+                        date_earliest = date_earliest_m[:10]
+                        date_latest = date_latest_m[:10]
 
-                # save tool used to carve files
-                if any(x in disk_fs.lower() for x in ('ntfs', 'fat', 'ext', 'iso9660', 'hfs+', 'ufs', 'raw', 'swap', 'yaffs2')):
-                    tool = "carved from the disk image using the Sleuth Kit command line utility tsk_recover"
-                elif ('hfs' in disk_fs.lower()) and ('hfs+' not in disk_fs.lower()):
-                    tool = "carved from disk image using the HFSExplorer command line utility"
-                elif 'udf' in disk_fs.lower():
-                    tool = "copied from the mounted disk image"
-                else:
-                    tool = "UNSUCCESSFULLY"
+                    # write date statement
+                    if date_earliest[:4] == date_latest[:4]:
+                        date_statement = '%s' % date_earliest[:4]
+                    else:
+                        date_statement = '%s - %s' % (date_earliest[:4], date_latest[:4])
 
-                # gather info from brunnhilde & write scope and content note
-                if extent == 'EMPTY':
-                    scopecontent = ''
-                    formatlist = ''
-                else:
-                    fileformats = []
-                    formatlist = ''
-                    fileformat_csv = ''
+                    # gather file system info, discern tool used
                     if args.bagfiles == True:
-                        fileformat_csv = os.path.join(current, 'data', 'metadata', 'submissionDocumentation', 
-                            'brunnhilde', 'csv_reports', 'formats.csv')
+                        disktype = os.path.join(current, 'data', 'metadata', 
+                            'submissionDocumentation', 'disktype.txt')
                     else:
-                        fileformat_csv = os.path.join(current, 'metadata', 'submissionDocumentation', 
-                            'brunnhilde', 'csv_reports', 'formats.csv')
-                    try: 
-                        with open(fileformat_csv, 'r') as f:
-                            reader = csv.reader(f)
-                            next(reader)
-                            for row in itertools.islice(reader, 5):
-                                fileformats.append(row[0])
-                    except:
-                        fileformats.append("ERROR! No formats.csv file to pull formats from.")
-                    # replace empty elements with 'Unidentified
-                    fileformats = [element or 'Unidentified' for element in fileformats]
-                    formatlist = ', '.join(fileformats)
-                    
-                    
-                    # create scope and content note
-                    if files_only == True:
-                        scopecontent = 'File includes digital files %s. Most common file formats: %s' % (tool, formatlist)
+                        disktype = os.path.join(current, 'metadata', 
+                            'submissionDocumentation', 'disktype.txt')
+                    # pull filesystem info from disktype.txt
+                    disk_fs = ''
+                    try:
+                        for line in open(disktype, 'r'):
+                            if "file system" in line:
+                                disk_fs = line.strip()
+                    except: # handle non-Unicode chars
+                        for line in open(disktype, 'rb'):
+                            if "file system" in line.decode('utf-8','ignore'):
+                                disk_fs = line.decode('utf-8','ignore').strip()
+
+                    # save tool used to carve files
+                    if any(x in disk_fs.lower() for x in ('ntfs', 'fat', 'ext', 'iso9660', 'hfs+', 'ufs', 'raw', 'swap', 'yaffs2')):
+                        tool = "carved from the disk image using the Sleuth Kit command line utility tsk_recover"
+                    elif ('hfs' in disk_fs.lower()) and ('hfs+' not in disk_fs.lower()):
+                        tool = "carved from disk image using the HFSExplorer command line utility"
+                    elif 'udf' in disk_fs.lower():
+                        tool = "copied from the mounted disk image"
                     else:
-                        scopecontent = 'File includes both a disk image and digital files %s. Most common file formats: %s' % (tool, formatlist)
-
-                # write csv row
-                writer.writerow(['', item, '', '', date_statement, date_earliest, date_latest, 'File', extent, 
-                    scopecontent, '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', ''])
-                
-                logandprint('Described %s successfully.' % (current))
-
-            # if error reading DFXML file, report that
-            except:
-                # write error to csv
-                writer.writerow(['', item, '', '', 'Error', 'Error', 'Error', 'File', 'Error', 
-                    'Error reading DFXML file.', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', ''])
+                        tool = "UNSUCCESSFULLY"
 
-                logandprint('ERROR: DFXML file for %s not well-formed.' % (current))
+                    # gather info from brunnhilde & write scope and content note
+                    if extent == 'EMPTY':
+                        scopecontent = ''
+                        formatlist = ''
+                    else:
+                        fileformats = []
+                        formatlist = ''
+                        fileformat_csv = ''
+                        if args.bagfiles == True:
+                            fileformat_csv = os.path.join(current, 'data', 'metadata', 'submissionDocumentation', 
+                                'brunnhilde', 'csv_reports', 'formats.csv')
+                        else:
+                            fileformat_csv = os.path.join(current, 'metadata', 'submissionDocumentation', 
+                                'brunnhilde', 'csv_reports', 'formats.csv')
+                        try: 
+                            with open(fileformat_csv, 'r') as f:
+                                reader = csv.reader(f)
+                                next(reader)
+                                for row in itertools.islice(reader, 5):
+                                    fileformats.append(row[0])
+                        except:
+                            fileformats.append("ERROR! No formats.csv file to pull formats from.")
+                        # replace empty elements with 'Unidentified
+                        fileformats = [element or 'Unidentified' for element in fileformats]
+                        formatlist = ', '.join(fileformats)
+                        
+                        
+                        # create scope and content note
+                        if args.filesonly == True:
+                            scopecontent = 'File includes digital files %s. Most common file formats: %s' % (tool, formatlist)
+                        else:
+                            scopecontent = 'File includes both a disk image and digital files %s. Most common file formats: %s' % (tool, formatlist)
+
+                    # write csv row
+                    writer.writerow(['', item, '', '', date_statement, date_earliest, date_latest, 'File', extent, 
+                        scopecontent, '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', ''])
+                    
+                    logandprint('Described %s successfully.' % (current))
 
+                # if error reading DFXML file, report that
+                except:
+                    # write error to csv
+                    writer.writerow(['', item, '', '', 'Error', 'Error', 'Error', 'File', 'Error', 
+                        'Error reading DFXML file.', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', ''])
 
+                    logandprint('ERROR: DFXML file for %s not well-formed.' % (current))
 
-    logandprint('All SIPs described in spreadsheet. Process complete.')
+    logandprint('Description CSV created.')
 
 def keep_logical_files_only(objects_dir):
+    """ Remove disk image from SIP and repackage """
+
     # get list of files in files dir
     files_dir = os.path.join(objects_dir, 'files')
     fileList = os.listdir(files_dir)
@@ -292,326 +312,314 @@ def keep_logical_files_only(objects_dir):
     shutil.rmtree(files_dir)
     shutil.rmtree(os.path.join(objects_dir, 'diskimage'))
 
+def _make_parser():
+    parser = argparse.ArgumentParser()
+    parser.add_argument("-b", "--bagfiles", help="Bag files instead of writing checksum.md5", action="store_true")
+    parser.add_argument("-e", "--exportall", help="Export all (not only allocated) with tsk_recover", action="store_true")
+    parser.add_argument("-f", "--filesonly", help="Include digital files only (not disk images) in SIPs", action="store_true")
+    parser.add_argument("-p", "--piiscan", help="Run bulk_extractor in Brunnhilde scan", action="store_true")
+    parser.add_argument("-r", "--resforks", help="Export AppleDouble resource forks from HFS-formatted disks", action="store_true")
+    parser.add_argument("source", help="Source directory containing disk images (and related files)")
+    parser.add_argument("destination", help="Output destination")
+
+    return parser
+
+def main():
+
+    parser = _make_parser()
+    args = parser.parse_args()
+
+    destination = os.path.abspath(args.destination)
+
+    # create output directories
+    if not os.path.exists(destination):
+        os.makedirs(destination)
+
+    sips = os.path.join(destination, 'SIPs')
+    os.makedirs(sips)
+
+    # open log file
+    log_file = os.path.join(destination, 'diskimageprocessor-log.txt')
+    try:
+        log = open(log_file, 'w')   # open the log file
+        logandprint('Source of disk images: %s' % (args.source))
+    except:
+        logandprint('There was an error creating the log file.')
+
+    # make list for unprocessed disks
+    unprocessed = []
+
+    # iterate through files in source directory
+    for file in sorted(os.listdir(args.source)):
+
+        # record filename in log
+        logandprint('>>> NEW FILE: %s' % (file))
+        
+        # determine if disk image
+        if file.lower().endswith((".e01", ".000", ".001", ".raw", ".img", ".dd", ".iso")):
+
+            # save info about file
+            image_path = os.path.join(args.source, file)
+            image_id = os.path.splitext(file)[0]
+            image_ext = os.path.splitext(file)[1]
+
+            # create new folders
+            sip_dir = os.path.join(sips, file)
+            object_dir = os.path.join(sip_dir, 'objects')
+            diskimage_dir = os.path.join(object_dir, 'diskimage')
+            files_dir = os.path.join(object_dir, 'files')
+            metadata_dir = os.path.join(sip_dir, 'metadata')
+            subdoc_dir = os.path.join(metadata_dir, 'submissionDocumentation')
+
+            for folder in sip_dir, object_dir, diskimage_dir, files_dir, metadata_dir, subdoc_dir:
+                os.makedirs(folder)
+
+            # disk image status
+            raw_image = False
+
+            # check if disk image is ewf
+            if image_ext == ".E01":
+                # convert disk image to raw and write to /objects/diskimage
+                raw_out = os.path.join(diskimage_dir, image_id)
+                try:
+                    subprocess.check_output(['ewfexport', '-t', raw_out, '-f', 'raw', '-o', '0', '-S', '0', '-u', image_path])
+                    raw_image = True
+                    os.rename(os.path.join(diskimage_dir, '%s.raw' % (image_id)), os.path.join(diskimage_dir, '%s.img' % image_id)) # change file extension from .raw to .img
+                    os.rename(os.path.join(diskimage_dir, '%s.raw.info' % (image_id)), os.path.join(diskimage_dir, '%s.img.info' % image_id)) # rename sidecar md5 file
+                    diskimage = os.path.join(diskimage_dir, '%s.img' % (image_id)) # use raw disk image in objects/diskimage moving forward
+                except subprocess.CalledProcessError:
+                    logandprint('ERROR: Disk image could not be converted to raw image format. Skipping disk.')
 
-# MAIN FLOW
-
-# parse arguments
-parser = argparse.ArgumentParser()
-parser.add_argument("-b", "--bagfiles", help="Bag files instead of writing checksum.md5", action="store_true")
-parser.add_argument("-e", "--exportall", help="Export all (not only allocated) with tsk_recover", action="store_true")
-parser.add_argument("-f", "--filesonly", help="Include digital files only (not disk images) in SIPs", action="store_true")
-parser.add_argument("-p", "--piiscan", help="Run bulk_extractor in Brunnhilde scan", action="store_true")
-parser.add_argument("-r", "--resforks", help="Export AppleDouble resource forks from HFS-formatted disks", action="store_true")
-parser.add_argument("source", help="Source directory containing disk images (and related files)")
-parser.add_argument("destination", help="Output destination")
-args = parser.parse_args()
-
-destination = args.destination
-
-# create output directories
-if not os.path.exists(destination):
-    os.makedirs(destination)
-
-sips = os.path.join(destination, 'SIPs')
-os.makedirs(sips)
-
-# open log file
-log_file = os.path.join(destination, 'diskimageprocessor-log.txt')
-try:
-    log = open(log_file, 'w')   # open the log file
-    logandprint('Source of disk images: %s' % (args.source))
-except:
-    sys.exit('There was an error creating the log file.')
-
-# open description spreadsheet
-try:
-    spreadsheet = open(os.path.join(destination,'description.csv'), 'w')
-    writer = csv.writer(spreadsheet, quoting=csv.QUOTE_NONNUMERIC)
-    header_list = ['Parent ID', 'Identifier', 'Title', 'Archive Creator', 'Date expression', 'Date start', 'Date end', 
-        'Level of description', 'Extent and medium', 'Scope and content', 'Arrangement (optional)', 'Accession number', 
-        'Appraisal, destruction, and scheduling information (optional)', 'Name access points (optional)', 
-        'Geographic access points (optional)', 'Conditions governing access (optional)', 'Conditions governing reproduction (optional)', 
-        'Language of material (optional)', 'Physical characteristics & technical requirements affecting use (optional)', 
-        'Finding aids (optional)', 'Related units of description (optional)', 'Archival history (optional)', 
-        'Immediate source of acquisition or transfer (optional)', "Archivists' note (optional)", 'General note (optional)', 
-        'Description status']
-    writer.writerow(header_list)
-except:
-    logandprint('There was an error creating the processing spreadsheet.')
-    sys.exit()
-
-# make list for unprocessed disks
-unprocessed = []
-
-# iterate through files in source directory
-for file in sorted(os.listdir(args.source)):
-
-    # record filename in log
-    logandprint('>>> NEW FILE: %s' % (file))
-    
-    # determine if disk image
-    if file.endswith((".E01", ".000", ".001", ".raw", ".img", ".dd", ".iso")):
-
-        # save info about file
-        image_path = os.path.join(args.source, file)
-        image_id = os.path.splitext(file)[0]
-        image_ext = os.path.splitext(file)[1]
-
-        # create new folders
-        sip_dir = os.path.join(sips, file)
-        object_dir = os.path.join(sip_dir, 'objects')
-        diskimage_dir = os.path.join(object_dir, 'diskimage')
-        files_dir = os.path.join(object_dir, 'files')
-        metadata_dir = os.path.join(sip_dir, 'metadata')
-        subdoc_dir = os.path.join(metadata_dir, 'submissionDocumentation')
-
-        for folder in sip_dir, object_dir, diskimage_dir, files_dir, metadata_dir, subdoc_dir:
-            os.makedirs(folder)
-
-        # disk image status
-        raw_image = False
-
-        # check if disk image is ewf
-        if image_ext == ".E01":
-            # convert disk image to raw and write to /objects/diskimage
-            raw_out = os.path.join(diskimage_dir, image_id)
-            try:
-                subprocess.check_output(['ewfexport', '-t', raw_out, '-f', 'raw', '-o', '0', '-S', '0', '-u', image_path])
+            else:
                 raw_image = True
-                os.rename(os.path.join(diskimage_dir, '%s.raw' % (image_id)), os.path.join(diskimage_dir, '%s.img' % image_id)) # change file extension from .raw to .img
-                os.rename(os.path.join(diskimage_dir, '%s.raw.info' % (image_id)), os.path.join(diskimage_dir, '%s.img.info' % image_id)) # rename sidecar md5 file
-                diskimage = os.path.join(diskimage_dir, '%s.img' % (image_id)) # use raw disk image in objects/diskimage moving forward
-            except subprocess.CalledProcessError:
-                logandprint('ERROR: Disk image could not be converted to raw image format. Skipping disk.')
+                for movefile in os.listdir(args.source):
+                    # if filename starts with disk image basename (this will also capture info and log files, multi-part disk images, etc.)
+                    if movefile.startswith(image_id):
+                        # copy file to objects/diskimage
+                        try:
+                            shutil.copyfile(os.path.join(args.source, movefile), os.path.join(diskimage_dir, movefile))
+                        except:
+                            logandprint('ERROR: File %s not successfully copied to %s' % (movefile, diskimage_dir))
+                diskimage = os.path.join(diskimage_dir, file) # use disk image in objects/diskimage moving forward
+
+            # raw disk image
+            if raw_image == True:
+
+                # run disktype on disk image, save output to submissionDocumentation
+                disktype = os.path.join(subdoc_dir, 'disktype.txt')
+                subprocess.call("disktype '%s' > '%s'" % (diskimage, disktype), shell=True)
 
-        else:
-            raw_image = True
-            for movefile in os.listdir(args.source):
-                # if filename starts with disk image basename (this will also capture info and log files, multi-part disk images, etc.)
-                if movefile.startswith(image_id):
-                    # copy file to objects/diskimage
-                    try:
-                        shutil.copyfile(os.path.join(args.source, movefile), os.path.join(diskimage_dir, movefile))
-                    except:
-                        logandprint('ERROR: File %s not successfully copied to %s' % (movefile, diskimage_dir))
-            diskimage = os.path.join(diskimage_dir, file) # use disk image in objects/diskimage moving forward
-
-        # raw disk image
-        if raw_image == True:
-
-            # run disktype on disk image, save output to submissionDocumentation
-            disktype = os.path.join(subdoc_dir, 'disktype.txt')
-            subprocess.call("disktype '%s' > '%s'" % (diskimage, disktype), shell=True)
-
-            # pull filesystem info from disktype.txt
-            disk_fs = ''
-            try:
-                for line in open(disktype, 'r'):
-                    if "file system" in line:
-                        disk_fs = line.strip()
-            except: # handle non-Unicode chars
-                for line in open(disktype, 'rb'):
-                    if "file system" in line.decode('utf-8','ignore'):
-                        disk_fs = line.decode('utf-8','ignore').strip()
-            logandprint('File system: %s' % (disk_fs))
-
-            # handle differently by file system
-            if any(x in disk_fs.lower() for x in ('ntfs', 'fat', 'ext', 'iso9660', 'hfs+', 'ufs', 'raw', 'swap', 'yaffs2')):
-                # use fiwalk to make dfxml
-                fiwalk_file = os.path.join(subdoc_dir, 'dfxml.xml')
-                try:
-                    subprocess.check_output(['fiwalk', '-X', fiwalk_file, diskimage])
-                except subprocess.CalledProcessError as e:
-                    logandprint('ERROR: Fiwalk could not create DFXML for disk. STDERR: %s' % (e.output))
-                
-                # carve images using tsk_recover
-                carve_flag = '-a' # default to exporting allocated files
-                if args.exportall == True:
-                    carve_flag = '-e'
+                # pull filesystem info from disktype.txt
+                disk_fs = ''
                 try:
-                    subprocess.check_output(['tsk_recover', carve_flag, diskimage, files_dir])
-                except subprocess.CalledProcessError as e:
-                    logandprint('ERROR: tsk_recover could not carve allocated files from disk. STDERR: %s' % (e.output))    
-
-                # modify file permissions
-                subprocess.call("sudo find '%s' -type d -exec chmod 755 {} \;" % (sip_dir), shell=True)
-                subprocess.call("sudo find '%s' -type f -exec chmod 644 {} \;" % (sip_dir), shell=True)
+                    for line in open(disktype, 'r'):
+                        if "file system" in line:
+                            disk_fs = line.strip()
+                except: # handle non-Unicode chars
+                    for line in open(disktype, 'rb'):
+                        if "file system" in line.decode('utf-8','ignore'):
+                            disk_fs = line.decode('utf-8','ignore').strip()
+                logandprint('File system: %s' % (disk_fs))
 
-                # rewrite last modified dates of files based on values in DFXML
-                for (event, obj) in Objects.iterparse(fiwalk_file):
+                # handle differently by file system
+                if any(x in disk_fs.lower() for x in ('ntfs', 'fat', 'ext', 'iso9660', 'hfs+', 'ufs', 'raw', 'swap', 'yaffs2')):
+                    # use fiwalk to make dfxml
+                    fiwalk_file = os.path.join(subdoc_dir, 'dfxml.xml')
+                    try:
+                        subprocess.check_output(['fiwalk', '-X', fiwalk_file, diskimage])
+                    except subprocess.CalledProcessError as e:
+                        logandprint('ERROR: Fiwalk could not create DFXML for disk. STDERR: %s' % (e.output))
                     
-                    # only work on FileObjects
-                    if not isinstance(obj, Objects.FileObject):
-                        continue
+                    # carve images using tsk_recover
+                    carve_flag = '-a' # default to exporting allocated files
+                    if args.exportall == True:
+                        carve_flag = '-e'
+                    try:
+                        subprocess.check_output(['tsk_recover', carve_flag, diskimage, files_dir])
+                    except subprocess.CalledProcessError as e:
+                        logandprint('ERROR: tsk_recover could not carve allocated files from disk. STDERR: %s' % (e.output))    
+
+                    # modify file permissions
+                    subprocess.call("sudo find '%s' -type d -exec chmod 755 {} \;" % (sip_dir), shell=True)
+                    subprocess.call("sudo find '%s' -type f -exec chmod 644 {} \;" % (sip_dir), shell=True)
 
-                    # skip directories and links
-                    if obj.name_type:
-                        if obj.name_type != "r":
+                    # rewrite last modified dates of files based on values in DFXML
+                    for (event, obj) in Objects.iterparse(fiwalk_file):
+                        
+                        # only work on FileObjects
+                        if not isinstance(obj, Objects.FileObject):
                             continue
 
-                    # record filename
-                    dfxml_filename = obj.filename
-                    dfxml_filedate = int(time.time()) # default to current time
+                        # skip directories and links
+                        if obj.name_type:
+                            if obj.name_type != "r":
+                                continue
 
-                    # record last modified or last created date
-                    try:
-                        mtime = obj.mtime
-                        mtime = str(mtime)
-                    except:
-                        pass
+                        # record filename
+                        dfxml_filename = obj.filename
+                        dfxml_filedate = int(time.time()) # default to current time
+
+                        # record last modified or last created date
+                        try:
+                            mtime = obj.mtime
+                            mtime = str(mtime)
+                        except:
+                            pass
+
+                        try:
+                            crtime = obj.crtime
+                            crtime = str(crtime)
+                        except:
+                            pass
+
+                        # fallback to created date if last modified doesn't exist
+                        if mtime and (mtime != 'None'):
+                            mtime = time_to_int(mtime[:19])
+                            dfxml_filedate = mtime
+                        elif crtime and (crtime != 'None'):
+                            crtime = time_to_int(crtime[:19])
+                            dfxml_filedate = crtime
+                        else:
+                            continue
 
-                    try:
-                        crtime = obj.crtime
-                        crtime = str(crtime)
-                    except:
-                        pass
-
-                    # fallback to created date if last modified doesn't exist
-                    if mtime and (mtime != 'None'):
-                        mtime = time_to_int(mtime[:19])
-                        dfxml_filedate = mtime
-                    elif crtime and (crtime != 'None'):
-                        crtime = time_to_int(crtime[:19])
-                        dfxml_filedate = crtime
-                    else:
-                        continue
+                        # rewrite last modified date of corresponding file in objects/files
+                        exported_filepath = os.path.join(files_dir, dfxml_filename)
+                        if os.path.isfile(exported_filepath):
+                            os.utime(exported_filepath, (dfxml_filedate, dfxml_filedate))
+
+                    # run brunnhilde and write to submissionDocumentation
+                    files_abs = os.path.abspath(files_dir)
+                    if args.piiscan == True: # brunnhilde with bulk_extractor
+                        subprocess.call("brunnhilde.py -zb '%s' '%s' '%s'" % (files_abs, subdoc_dir, 'brunnhilde'), shell=True)
+                    else: # brunnhilde without bulk_extractor
+                        subprocess.call("brunnhilde.py -z '%s' '%s' '%s'" % (files_abs, subdoc_dir, 'brunnhilde'), shell=True)
 
-                    # rewrite last modified date of corresponding file in objects/files
-                    exported_filepath = os.path.join(files_dir, dfxml_filename)
-                    if os.path.isfile(exported_filepath):
-                        os.utime(exported_filepath, (dfxml_filedate, dfxml_filedate))
+                    # if user selected 'filesonly', remove disk image files and repackage
+                    if args.filesonly == True:
+                        keep_logical_files_only(object_dir)
 
-                # run brunnhilde and write to submissionDocumentation
-                files_abs = os.path.abspath(files_dir)
-                if args.piiscan == True: # brunnhilde with bulk_extractor
-                    subprocess.call("brunnhilde.py -zb '%s' '%s' '%s'" % (files_abs, subdoc_dir, 'brunnhilde'), shell=True)
-                else: # brunnhilde without bulk_extractor
-                    subprocess.call("brunnhilde.py -z '%s' '%s' '%s'" % (files_abs, subdoc_dir, 'brunnhilde'), shell=True)
+                    # write checksums
+                    if args.bagfiles == True: # bag entire SIP
+                        subprocess.call("bagit.py --processes 4 '%s'" % (sip_dir), shell=True)
+                    else: # write metadata/checksum.md5
+                        subprocess.call("cd '%s' && md5deep -rl ../objects > checksum.md5" % (metadata_dir), shell=True)
 
-                # if user selected 'filesonly', remove disk image files and repackage
-                if args.filesonly == True:
-                    keep_logical_files_only(object_dir)
 
-                # write checksums
-                if args.bagfiles == True: # bag entire SIP
-                    subprocess.call("bagit.py --processes 4 '%s'" % (sip_dir), shell=True)
-                else: # write metadata/checksum.md5
-                    subprocess.call("cd '%s' && md5deep -rl ../objects > checksum.md5" % (metadata_dir), shell=True)
+                elif ('hfs' in disk_fs.lower()) and ('hfs+' not in disk_fs.lower()):
+                    # mount disk image
+                    subprocess.call("sudo mount -t hfs -o loop,ro,noexec '%s' /mnt/diskid/" % (diskimage), shell=True)
 
+                    # use walk_to_dfxml.py to make dfxml
+                    dfxml_file = os.path.abspath(os.path.join(subdoc_dir, 'dfxml.xml'))
+                    try:
+                        subprocess.call("cd /mnt/diskid/ && python3 /usr/share/ccatools/diskimageprocessor/walk_to_dfxml.py > '%s'" % (dfxml_file), shell=True)
+                    except:
+                        logandprint('ERROR: walk_to_dfxml.py unable to generate DFXML for disk %s' % (diskimage))
 
-            elif ('hfs' in disk_fs.lower()) and ('hfs+' not in disk_fs.lower()):
-                # mount disk image
-                subprocess.call("sudo mount -t hfs -o loop,ro,noexec '%s' /mnt/diskid/" % (diskimage), shell=True)
+                    # unmount disk image
+                    subprocess.call('sudo umount /mnt/diskid', shell=True)
 
-                # use walk_to_dfxml.py to make dfxml
-                dfxml_file = os.path.abspath(os.path.join(subdoc_dir, 'dfxml.xml'))
-                try:
-                    subprocess.call("cd /mnt/diskid/ && python3 /usr/share/ccatools/diskimageprocessor/walk_to_dfxml.py > '%s'" % (dfxml_file), shell=True)
-                except:
-                    logandprint('ERROR: walk_to_dfxml.py unable to generate DFXML for disk %s' % (diskimage))
+                    # carve files using hfsexplorer
+                    if args.resforks == True:
+                        try:
+                            subprocess.check_output(['bash', '/usr/share/hfsexplorer/bin/unhfs', '-v', '-resforks', 'APPLEDOUBLE', '-o', files_dir, diskimage])
+                        except subprocess.CalledProcessError as e:
+                            logandprint('ERROR: HFS Explorer could not carve the following files from image: %s' % (e.output))
+                    else:
+                        try:
+                            subprocess.check_output(['bash', '/usr/share/hfsexplorer/bin/unhfs', '-v', '-o', files_dir, diskimage])
+                        except subprocess.CalledProcessError as e:
+                            logandprint('ERROR: HFS Explorer could not carve the following files from image: %s' % (e.output)) 
+
+                    # modify file permissions
+                    subprocess.call("sudo find '%s' -type d -exec chmod 755 {} \;" % (sip_dir), shell=True)
+                    subprocess.call("sudo find '%s' -type f -exec chmod 644 {} \;" % (sip_dir), shell=True)
+
+                    # run brunnhilde and write to reports directory
+                    files_abs = os.path.abspath(files_dir)
+                    if args.piiscan == True: # brunnhilde with bulk_extractor
+                        subprocess.call("brunnhilde.py -zb '%s' '%s' '%s'" % (files_abs, subdoc_dir, 'brunnhilde'), shell=True)
+                    else: # brunnhilde without bulk_extractor
+                        subprocess.call("brunnhilde.py -z '%s' '%s' '%s'" % (files_abs, subdoc_dir, 'brunnhilde'), shell=True)
+                    
+                    # if user selected 'filesonly', remove disk image files and repackage
+                    if args.filesonly == True:
+                        keep_logical_files_only(object_dir)
+
+                    # write checksums
+                    if args.bagfiles == True: # bag entire SIP
+                        subprocess.call("bagit.py --processes 4 '%s'" % (sip_dir), shell=True)
+                    else: # write metadata/checksum.md5
+                        subprocess.call("cd '%s' && md5deep -rl ../objects > checksum.md5" % (metadata_dir), shell=True)
+                
 
-                # unmount disk image
-                subprocess.call('sudo umount /mnt/diskid', shell=True)
+                elif 'udf' in disk_fs.lower():
+                    # mount image
+                    subprocess.call("sudo mount -t udf -o loop '%s' /mnt/diskid/" % (diskimage), shell=True)
 
-                # carve files using hfsexplorer
-                if args.resforks == True:
+                    # use walk_to_dfxml.py to create dfxml
+                    dfxml_file = os.path.abspath(os.path.join(subdoc_dir, 'dfxml.xml'))
                     try:
-                        subprocess.check_output(['bash', '/usr/share/hfsexplorer/bin/unhfs', '-v', '-resforks', 'APPLEDOUBLE', '-o', files_dir, diskimage])
-                    except subprocess.CalledProcessError as e:
-                        logandprint('ERROR: HFS Explorer could not carve the following files from image: %s' % (e.output))
-                else:
+                        subprocess.call("cd /mnt/diskid/ && python3 /usr/share/dfxml/python/walk_to_dfxml.py > '%s'" % (dfxml_file), shell=True)
+                    except:
+                        logandprint('ERROR: walk_to_dfxml.py unable to generate DFXML for disk %s' % (diskimage))
+                    
+                    # copy files from disk image to files dir
+                    shutil.rmtree(files_dir) # delete to enable use of copytree
                     try:
-                        subprocess.check_output(['bash', '/usr/share/hfsexplorer/bin/unhfs', '-v', '-o', files_dir, diskimage])
-                    except subprocess.CalledProcessError as e:
-                        logandprint('ERROR: HFS Explorer could not carve the following files from image: %s' % (e.output)) 
-
-                # modify file permissions
-                subprocess.call("sudo find '%s' -type d -exec chmod 755 {} \;" % (sip_dir), shell=True)
-                subprocess.call("sudo find '%s' -type f -exec chmod 644 {} \;" % (sip_dir), shell=True)
-
-                # run brunnhilde and write to reports directory
-                files_abs = os.path.abspath(files_dir)
-                if args.piiscan == True: # brunnhilde with bulk_extractor
-                    subprocess.call("brunnhilde.py -zb '%s' '%s' '%s'" % (files_abs, subdoc_dir, 'brunnhilde'), shell=True)
-                else: # brunnhilde without bulk_extractor
-                    subprocess.call("brunnhilde.py -z '%s' '%s' '%s'" % (files_abs, subdoc_dir, 'brunnhilde'), shell=True)
-                
-                # if user selected 'filesonly', remove disk image files and repackage
-                if args.filesonly == True:
-                    keep_logical_files_only(object_dir)
-
-                # write checksums
-                if args.bagfiles == True: # bag entire SIP
-                    subprocess.call("bagit.py --processes 4 '%s'" % (sip_dir), shell=True)
-                else: # write metadata/checksum.md5
-                    subprocess.call("cd '%s' && md5deep -rl ../objects > checksum.md5" % (metadata_dir), shell=True)
-            
-
-            elif 'udf' in disk_fs.lower():
-                # mount image
-                subprocess.call("sudo mount -t udf -o loop '%s' /mnt/diskid/" % (diskimage), shell=True)
-
-                # use walk_to_dfxml.py to create dfxml
-                dfxml_file = os.path.abspath(os.path.join(subdoc_dir, 'dfxml.xml'))
-                try:
-                    subprocess.call("cd /mnt/diskid/ && python3 /usr/share/dfxml/python/walk_to_dfxml.py > '%s'" % (dfxml_file), shell=True)
-                except:
-                    logandprint('ERROR: walk_to_dfxml.py unable to generate DFXML for disk %s' % (diskimage))
-                
-                # copy files from disk image to files dir
-                shutil.rmtree(files_dir) # delete to enable use of copytree
-                try:
-                    shutil.copytree('/mnt/diskid/', files_dir, symlinks=False, ignore=None)
-                except:
-                    logandprint("ERROR: shutil.copytree unable to copy files from disk %s" % (diskimage))
+                        shutil.copytree('/mnt/diskid/', files_dir, symlinks=False, ignore=None)
+                    except:
+                        logandprint("ERROR: shutil.copytree unable to copy files from disk %s" % (diskimage))
 
-                # unmount disk image
-                subprocess.call('sudo umount /mnt/diskid', shell=True) # unmount
+                    # unmount disk image
+                    subprocess.call('sudo umount /mnt/diskid', shell=True) # unmount
 
-                # modify file permissions
-                subprocess.call("sudo find '%s' -type d -exec chmod 755 {} \;" % (sip_dir), shell=True)
-                subprocess.call("sudo find '%s' -type f -exec chmod 644 {} \;" % (sip_dir), shell=True)
+                    # modify file permissions
+                    subprocess.call("sudo find '%s' -type d -exec chmod 755 {} \;" % (sip_dir), shell=True)
+                    subprocess.call("sudo find '%s' -type f -exec chmod 644 {} \;" % (sip_dir), shell=True)
 
-                # run brunnhilde and write to submissionDocumentation
-                files_abs = os.path.abspath(files_dir)
-                if args.piiscan == True: # brunnhilde with bulk_extractor
-                    subprocess.call("brunnhilde.py -zb '%s' '%s' '%s'" % (files_abs, subdoc_dir, 'brunnhilde'), shell=True)
-                else: # brunnhilde without bulk_extractor
-                    subprocess.call("brunnhilde.py -z '%s' '%s' '%s'" % (files_abs, subdoc_dir, 'brunnhilde'), shell=True)
-                
-                # if user selected 'filesonly', remove disk image files and repackage
-                if args.filesonly == True:
-                    keep_logical_files_only(object_dir)
+                    # run brunnhilde and write to submissionDocumentation
+                    files_abs = os.path.abspath(files_dir)
+                    if args.piiscan == True: # brunnhilde with bulk_extractor
+                        subprocess.call("brunnhilde.py -zb '%s' '%s' '%s'" % (files_abs, subdoc_dir, 'brunnhilde'), shell=True)
+                    else: # brunnhilde without bulk_extractor
+                        subprocess.call("brunnhilde.py -z '%s' '%s' '%s'" % (files_abs, subdoc_dir, 'brunnhilde'), shell=True)
+                    
+                    # if user selected 'filesonly', remove disk image files and repackage
+                    if args.filesonly == True:
+                        keep_logical_files_only(object_dir)
+
+                    # write checksums
+                    if args.bagfiles == True: # bag entire SIP
+                        subprocess.call("bagit.py --processes 4 '%s'" % (sip_dir), shell=True)
+                    else: # write metadata/checksum.md5
+                        subprocess.call("cd '%s' && md5deep -rl ../objects > checksum.md5" % (metadata_dir), shell=True)
 
-                # write checksums
-                if args.bagfiles == True: # bag entire SIP
-                    subprocess.call("bagit.py --processes 4 '%s'" % (sip_dir), shell=True)
-                else: # write metadata/checksum.md5
-                    subprocess.call("cd '%s' && md5deep -rl ../objects > checksum.md5" % (metadata_dir), shell=True)
+                else:
+                    logandprint('NOTICE: Skipping processing of unknown disk type.')
+                    unprocessed.append(file)
 
+            # no raw disk image
             else:
-                logandprint('NOTICE: Skipping processing of unknown disk type.')
+                logandprint('NOTICE: No raw disk image. Skipping disk.')
                 unprocessed.append(file)
 
-        # no raw disk image
         else:
-            logandprint('NOTICE: No raw disk image. Skipping disk.')
-            unprocessed.append(file)
+            # write skipped file to log
+            logandprint('NOTICE: File is not a disk image. Skipping file.')
 
+    # print unprocessed list
+    if unprocessed:
+        skipped_disks = ', '.join(unprocessed)
+        logandprint('Processing complete. Skipped disks: %s' % (skipped_disks))
     else:
-        # write skipped file to log
-        logandprint('NOTICE: File is not a disk image. Skipping file.')
-
-# print unprocessed list
-if unprocessed:
-    skipped_disks = ', '.join(unprocessed)
-    logandprint('Processing complete. Skipped disks: %s' % (skipped_disks))
-else:
-    logandprint('Processing complete. All disk images processed. Results in %s.' % (destination))
-
-# write description spreadsheet
-create_spreadsheet(args.filesonly, args.exportall)
-
-# close files
-spreadsheet.close()
-log.close()
+        logandprint('Processing complete. All disk images processed. Results in %s.' % (destination))
+
+    # write description spreadsheet
+    create_spreadsheet(args)
+
+    # close log
+    log.close()
+
+if __name__ == '__main__':
+    main()
\ No newline at end of file

From 5ede2b1b21f60cd984f470ae36de7ed20be50c43 Mon Sep 17 00:00:00 2001
From: Tim Walsh <timothyryanwalsh@gmail.com>
Date: Tue, 17 Oct 2017 17:26:20 -0400
Subject: [PATCH 14/24] Update version

---
 README.md | 6 +++++-
 main.py   | 2 +-
 2 files changed, 6 insertions(+), 2 deletions(-)

diff --git a/README.md b/README.md
index 54fe770..401b317 100644
--- a/README.md
+++ b/README.md
@@ -1,7 +1,7 @@
 # Disk Image Processor  
 
 Analyze disk images and/or create ready-to-ingest SIPs from a directory of disk images and related files.  
-Version: 0.6.1 (beta)
+Version: 0.7.0 (beta)
 
 ## Usage
 
@@ -63,6 +63,10 @@ The "metadata/submissionDocumentation" directory in each SIP contains:
 * Text output from "disktype"  
 * Brunnhilde reports (including logs and reports from clamAV and, optionally, bulk_extractor)  
 
+### Process a single disk image, providing options to tsk_recover (CLI only)  
+
+Also included is a Python 3 script `process_with_tsk_options.py`. This script allows the user to create a SIP and corresponding description for a single disk image (and accompanying files) while specifying the file system type, image type, and sector offset as needed for `tsk_recover`. This script may be useful for certain disks for which tsk_recover is unable to extract files using its autodetection methods.
+
 ## Supported file systems
 
 * NTFS  
diff --git a/main.py b/main.py
index 0eb874e..0d574ae 100644
--- a/main.py
+++ b/main.py
@@ -24,7 +24,7 @@ def __init__(self, parent=None):
 
     def about_dialog(self):
         QMessageBox.information(self, "About", 
-            "Disk Image Processor v0.6.1\nTim Walsh, 2017\nMIT License\nhttps://github.com/timothyryanwalsh/cca-diskimageprocessor")
+            "Disk Image Processor v0.7.0\nTim Walsh, 2017\nMIT License\nhttps://github.com/timothyryanwalsh/cca-diskimageprocessor")
 
     @pyqtSlot()
     def readStdOutput(self):

From 86305df41ea3cc15a4ccafb51ca6e6b6d69945d4 Mon Sep 17 00:00:00 2001
From: Tim Walsh <timothyryanwalsh@gmail.com>
Date: Tue, 17 Oct 2017 17:32:32 -0400
Subject: [PATCH 15/24] Pass log to longandprint with message

---
 diskimageprocessor.py | 44 +++++++++++++++++++++----------------------
 1 file changed, 22 insertions(+), 22 deletions(-)

diff --git a/diskimageprocessor.py b/diskimageprocessor.py
index 8e0fe94..f51a9c5 100644
--- a/diskimageprocessor.py
+++ b/diskimageprocessor.py
@@ -39,7 +39,7 @@
 #import Objects.py from python dfxml tools
 import Objects
 
-def logandprint(message):
+def logandprint(log, message):
     """ Print to log and terminal """
     log.write('\n' + (time.strftime("%H:%M:%S %b %d, %Y - ", 
         time.localtime())) + message)
@@ -284,7 +284,7 @@ def create_spreadsheet(args):
                     writer.writerow(['', item, '', '', date_statement, date_earliest, date_latest, 'File', extent, 
                         scopecontent, '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', ''])
                     
-                    logandprint('Described %s successfully.' % (current))
+                    logandprint(log, 'Described %s successfully.' % (current))
 
                 # if error reading DFXML file, report that
                 except:
@@ -292,9 +292,9 @@ def create_spreadsheet(args):
                     writer.writerow(['', item, '', '', 'Error', 'Error', 'Error', 'File', 'Error', 
                         'Error reading DFXML file.', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', ''])
 
-                    logandprint('ERROR: DFXML file for %s not well-formed.' % (current))
+                    logandprint(log, 'ERROR: DFXML file for %s not well-formed.' % (current))
 
-    logandprint('Description CSV created.')
+    logandprint(log, 'Description CSV created.')
 
 def keep_logical_files_only(objects_dir):
     """ Remove disk image from SIP and repackage """
@@ -342,9 +342,9 @@ def main():
     log_file = os.path.join(destination, 'diskimageprocessor-log.txt')
     try:
         log = open(log_file, 'w')   # open the log file
-        logandprint('Source of disk images: %s' % (args.source))
+        logandprint(log, 'Source of disk images: %s' % (args.source))
     except:
-        logandprint('There was an error creating the log file.')
+        logandprint(log, 'There was an error creating the log file.')
 
     # make list for unprocessed disks
     unprocessed = []
@@ -353,7 +353,7 @@ def main():
     for file in sorted(os.listdir(args.source)):
 
         # record filename in log
-        logandprint('>>> NEW FILE: %s' % (file))
+        logandprint(log, '>>> NEW FILE: %s' % (file))
         
         # determine if disk image
         if file.lower().endswith((".e01", ".000", ".001", ".raw", ".img", ".dd", ".iso")):
@@ -388,7 +388,7 @@ def main():
                     os.rename(os.path.join(diskimage_dir, '%s.raw.info' % (image_id)), os.path.join(diskimage_dir, '%s.img.info' % image_id)) # rename sidecar md5 file
                     diskimage = os.path.join(diskimage_dir, '%s.img' % (image_id)) # use raw disk image in objects/diskimage moving forward
                 except subprocess.CalledProcessError:
-                    logandprint('ERROR: Disk image could not be converted to raw image format. Skipping disk.')
+                    logandprint(log, 'ERROR: Disk image could not be converted to raw image format. Skipping disk.')
 
             else:
                 raw_image = True
@@ -399,7 +399,7 @@ def main():
                         try:
                             shutil.copyfile(os.path.join(args.source, movefile), os.path.join(diskimage_dir, movefile))
                         except:
-                            logandprint('ERROR: File %s not successfully copied to %s' % (movefile, diskimage_dir))
+                            logandprint(log, 'ERROR: File %s not successfully copied to %s' % (movefile, diskimage_dir))
                 diskimage = os.path.join(diskimage_dir, file) # use disk image in objects/diskimage moving forward
 
             # raw disk image
@@ -419,7 +419,7 @@ def main():
                     for line in open(disktype, 'rb'):
                         if "file system" in line.decode('utf-8','ignore'):
                             disk_fs = line.decode('utf-8','ignore').strip()
-                logandprint('File system: %s' % (disk_fs))
+                logandprint(log, 'File system: %s' % (disk_fs))
 
                 # handle differently by file system
                 if any(x in disk_fs.lower() for x in ('ntfs', 'fat', 'ext', 'iso9660', 'hfs+', 'ufs', 'raw', 'swap', 'yaffs2')):
@@ -428,7 +428,7 @@ def main():
                     try:
                         subprocess.check_output(['fiwalk', '-X', fiwalk_file, diskimage])
                     except subprocess.CalledProcessError as e:
-                        logandprint('ERROR: Fiwalk could not create DFXML for disk. STDERR: %s' % (e.output))
+                        logandprint(log, 'ERROR: Fiwalk could not create DFXML for disk. STDERR: %s' % (e.output))
                     
                     # carve images using tsk_recover
                     carve_flag = '-a' # default to exporting allocated files
@@ -437,7 +437,7 @@ def main():
                     try:
                         subprocess.check_output(['tsk_recover', carve_flag, diskimage, files_dir])
                     except subprocess.CalledProcessError as e:
-                        logandprint('ERROR: tsk_recover could not carve allocated files from disk. STDERR: %s' % (e.output))    
+                        logandprint(log, 'ERROR: tsk_recover could not carve allocated files from disk. STDERR: %s' % (e.output))    
 
                     # modify file permissions
                     subprocess.call("sudo find '%s' -type d -exec chmod 755 {} \;" % (sip_dir), shell=True)
@@ -514,7 +514,7 @@ def main():
                     try:
                         subprocess.call("cd /mnt/diskid/ && python3 /usr/share/ccatools/diskimageprocessor/walk_to_dfxml.py > '%s'" % (dfxml_file), shell=True)
                     except:
-                        logandprint('ERROR: walk_to_dfxml.py unable to generate DFXML for disk %s' % (diskimage))
+                        logandprint(log, 'ERROR: walk_to_dfxml.py unable to generate DFXML for disk %s' % (diskimage))
 
                     # unmount disk image
                     subprocess.call('sudo umount /mnt/diskid', shell=True)
@@ -524,12 +524,12 @@ def main():
                         try:
                             subprocess.check_output(['bash', '/usr/share/hfsexplorer/bin/unhfs', '-v', '-resforks', 'APPLEDOUBLE', '-o', files_dir, diskimage])
                         except subprocess.CalledProcessError as e:
-                            logandprint('ERROR: HFS Explorer could not carve the following files from image: %s' % (e.output))
+                            logandprint(log, 'ERROR: HFS Explorer could not carve the following files from image: %s' % (e.output))
                     else:
                         try:
                             subprocess.check_output(['bash', '/usr/share/hfsexplorer/bin/unhfs', '-v', '-o', files_dir, diskimage])
                         except subprocess.CalledProcessError as e:
-                            logandprint('ERROR: HFS Explorer could not carve the following files from image: %s' % (e.output)) 
+                            logandprint(log, 'ERROR: HFS Explorer could not carve the following files from image: %s' % (e.output)) 
 
                     # modify file permissions
                     subprocess.call("sudo find '%s' -type d -exec chmod 755 {} \;" % (sip_dir), shell=True)
@@ -562,14 +562,14 @@ def main():
                     try:
                         subprocess.call("cd /mnt/diskid/ && python3 /usr/share/dfxml/python/walk_to_dfxml.py > '%s'" % (dfxml_file), shell=True)
                     except:
-                        logandprint('ERROR: walk_to_dfxml.py unable to generate DFXML for disk %s' % (diskimage))
+                        logandprint(log, 'ERROR: walk_to_dfxml.py unable to generate DFXML for disk %s' % (diskimage))
                     
                     # copy files from disk image to files dir
                     shutil.rmtree(files_dir) # delete to enable use of copytree
                     try:
                         shutil.copytree('/mnt/diskid/', files_dir, symlinks=False, ignore=None)
                     except:
-                        logandprint("ERROR: shutil.copytree unable to copy files from disk %s" % (diskimage))
+                        logandprint(log, "ERROR: shutil.copytree unable to copy files from disk %s" % (diskimage))
 
                     # unmount disk image
                     subprocess.call('sudo umount /mnt/diskid', shell=True) # unmount
@@ -596,24 +596,24 @@ def main():
                         subprocess.call("cd '%s' && md5deep -rl ../objects > checksum.md5" % (metadata_dir), shell=True)
 
                 else:
-                    logandprint('NOTICE: Skipping processing of unknown disk type.')
+                    logandprint(log, 'NOTICE: Skipping processing of unknown disk type.')
                     unprocessed.append(file)
 
             # no raw disk image
             else:
-                logandprint('NOTICE: No raw disk image. Skipping disk.')
+                logandprint(log, 'NOTICE: No raw disk image. Skipping disk.')
                 unprocessed.append(file)
 
         else:
             # write skipped file to log
-            logandprint('NOTICE: File is not a disk image. Skipping file.')
+            logandprint(log, 'NOTICE: File is not a disk image. Skipping file.')
 
     # print unprocessed list
     if unprocessed:
         skipped_disks = ', '.join(unprocessed)
-        logandprint('Processing complete. Skipped disks: %s' % (skipped_disks))
+        logandprint(log, 'Processing complete. Skipped disks: %s' % (skipped_disks))
     else:
-        logandprint('Processing complete. All disk images processed. Results in %s.' % (destination))
+        logandprint(log, 'Processing complete. All disk images processed. Results in %s.' % (destination))
 
     # write description spreadsheet
     create_spreadsheet(args)

From e13c50d29a34e30417b38c19f0a148de5dbc2c50 Mon Sep 17 00:00:00 2001
From: Tim Walsh <timothyryanwalsh@gmail.com>
Date: Tue, 17 Oct 2017 17:39:09 -0400
Subject: [PATCH 16/24] Pass sip_dir to spreadsheet function

---
 diskimageprocessor.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/diskimageprocessor.py b/diskimageprocessor.py
index f51a9c5..8a54092 100644
--- a/diskimageprocessor.py
+++ b/diskimageprocessor.py
@@ -63,7 +63,7 @@ def time_to_int(str_time):
         "%Y-%m-%dT%H:%M:%S").timetuple())
     return dt
 
-def create_spreadsheet(args):
+def create_spreadsheet(args, sips):
     """ Create csv describing created SIPs """
 
     # open description spreadsheet
@@ -616,7 +616,7 @@ def main():
         logandprint(log, 'Processing complete. All disk images processed. Results in %s.' % (destination))
 
     # write description spreadsheet
-    create_spreadsheet(args)
+    create_spreadsheet(args, sip_dir)
 
     # close log
     log.close()

From 1aa5756edb20d8ae533b48d7d074fc1628d34921 Mon Sep 17 00:00:00 2001
From: Tim Walsh <timothyryanwalsh@gmail.com>
Date: Tue, 17 Oct 2017 17:42:33 -0400
Subject: [PATCH 17/24] Refactor

---
 diskimageanalyzer.py | 459 ++++++++++++++++++++++---------------------
 1 file changed, 231 insertions(+), 228 deletions(-)

diff --git a/diskimageanalyzer.py b/diskimageanalyzer.py
index 6ba0d9b..dbd7369 100644
--- a/diskimageanalyzer.py
+++ b/diskimageanalyzer.py
@@ -26,7 +26,7 @@
 import Objects
 
 def convert_size(size):
-    """convert size to human-readable form"""
+    """ Convert size to human-readable form """
     if (size == 0):
         return '0 bytes'
     size_name = ("bytes", "KB", "MB", "GB", "TB", "PB", "EB", "ZB", "YB")
@@ -38,11 +38,12 @@ def convert_size(size):
     return '%s %s' % (s,size_name[i])
 
 def time_to_int(str_time):
+    """ Convert datetime to unix integer value """
     dt = time.mktime(datetime.datetime.strptime(str_time, "%Y-%m-%dT%H:%M:%S").timetuple())
     return dt
 
 def write_to_spreadsheet(disk_result, spreadsheet_path, exportall):
-    """append info for current disk to analysis CSV"""
+    """ Append info for current disk to analysis CSV """
 
     # open description spreadsheet
     spreadsheet = open(spreadsheet_path, 'a')
@@ -228,258 +229,260 @@ def write_to_spreadsheet(disk_result, spreadsheet_path, exportall):
 
     spreadsheet.close()
 
-# MAIN FLOW
-
-# parse arguments
-parser = argparse.ArgumentParser()
-parser.add_argument("-e", "--exportall", help="Export all (not only allocated) with tsk_recover", action="store_true")
-parser.add_argument("-k", "--keepfiles", help="Retain exported logical files from each disk", action="store_true")
-parser.add_argument("-r", "--resforks", help="Export AppleDouble resource forks from HFS-formatted disks", action="store_true")
-parser.add_argument("source", help="Path to folder containing disk images")
-parser.add_argument("destination", help="Output destination")
-args = parser.parse_args()
-
-source = args.source
-destination = args.destination
-
-# make outdir disks
-if not os.path.exists(destination):
-    os.makedirs(destination)
-diskimages_dir = os.path.join(destination, 'diskimages')
-files_dir = os.path.join(destination, 'files')
-results_dir = os.path.join(destination, 'reports')
-for new_dir in diskimages_dir, files_dir, results_dir:
-    os.makedirs(new_dir)
-    
-# make list for unanalyzed disks
-unanalyzed = []
-
-# process each disk image
-for file in sorted(os.listdir(source)):
-    
-    # determine if disk image
-    if file.endswith((".E01", ".000", ".001", ".raw", ".img", ".dd", ".iso")):
-
-        # save info about file
-        image_path = os.path.join(source, file)
-        image_id = os.path.splitext(file)[0]
-        image_ext = os.path.splitext(file)[1]
-
-        # create new folders
-        disk_dir = os.path.join(results_dir, file)
-        os.makedirs(disk_dir)
-
-        # disk image status
-        raw_image = False
-
-        # check if disk image is ewf
-        if image_ext == ".E01":
-            # convert disk image to raw and write to diskimages_dir
-            raw_out = os.path.join(diskimages_dir, image_id)
-            try:
-                subprocess.check_output(['ewfexport', '-t', raw_out, '-f', 'raw', '-o', '0', '-S', '0', '-u', image_path])
-                raw_image = True
-                os.rename(os.path.join(diskimages_dir, '%s.raw' % image_id), os.path.join(diskimages_dir, '%s.img' % image_id)) # change file extension from .raw to .img
-                os.rename(os.path.join(diskimages_dir, '%s.raw.info' % image_id), os.path.join(diskimages_dir, '%s.img.info' % image_id)) # rename sidecar md5 file
-                diskimage = os.path.join(diskimages_dir, '%s.img' % image_id) # use raw disk image in diskimages_dir moving forward
-            except subprocess.CalledProcessError:
-                print('ERROR: Disk image could not be converted to raw image format. Skipping disk.')
-
-        else:
-            raw_image = True
-            for movefile in os.listdir(args.source):
-                # if filename starts with disk image basename (this will also capture info and log files, multi-part disk images, etc.)
-                if movefile.startswith(image_id):
-                    # copy file to objects/diskimage
-                    shutil.copyfile(os.path.join(args.source, movefile), os.path.join(diskimages_dir, movefile))
-            diskimage = os.path.join(diskimages_dir, file) # use disk image in diskimages_dir moving forward
-
-        # raw disk image
-        if raw_image == True:
-            
-            # run disktype on disk image, save output to disk_dir
-            disktype = os.path.join(disk_dir, 'disktype.txt')
-            subprocess.call("disktype '%s' > '%s'" % (diskimage, disktype), shell=True)
+def _make_parser():
 
-            # pull filesystem info from disktype.txt
-            disk_fs = ''
-            try:
-                for line in open(disktype, 'r'):
-                    if "file system" in line:
-                        disk_fs = line.strip()
-            except: # handle non-Unicode chars
-                for line in open(disktype, 'rb'):
-                    if "file system" in line.decode('utf-8','ignore'):
-                        disk_fs = line.decode('utf-8','ignore').strip()
-
-            # handle differently by file system
-            if any(x in disk_fs.lower() for x in ('ntfs', 'fat', 'ext', 'iso9660', 'hfs+', 'ufs', 'raw', 'swap', 'yaffs2')):
-                # use fiwalk to make dfxml
-                fiwalk_file = os.path.abspath(os.path.join(disk_dir, 'dfxml.xml'))
-                try:
-                    subprocess.check_output(['fiwalk', '-X', fiwalk_file, diskimage])
-                except subprocess.CalledProcessError as e:
-                    print('ERROR: Fiwalk could not create DFXML for disk. STDERR: %s' % (e.output))
-
-                # carve files
-                disk_files_dir = os.path.join(files_dir, file)
-                if not os.path.exists(disk_files_dir):
-                    os.makedirs(disk_files_dir)
-                # carve allocated or all files depending on option selected
-                if args.exportall == True:
-                    try:
-                        subprocess.check_output(['tsk_recover', '-e', diskimage, disk_files_dir])
-                    except subprocess.CalledProcessError as e:
-                        print('ERROR: tsk_recover could not carve all files from disk. STDERR: %s' % (e.output))
-                else:
-                    try:
-                        subprocess.check_output(['tsk_recover', '-a', diskimage, disk_files_dir])
-                    except subprocess.CalledProcessError as e:
-                        print('ERROR: tsk_recover could not carve allocated files from disk. STDERR: %s' % (e.output))
+    parser = argparse.ArgumentParser()
+    parser.add_argument("-e", "--exportall", help="Export all (not only allocated) with tsk_recover", action="store_true")
+    parser.add_argument("-k", "--keepfiles", help="Retain exported logical files from each disk", action="store_true")
+    parser.add_argument("-r", "--resforks", help="Export AppleDouble resource forks from HFS-formatted disks", action="store_true")
+    parser.add_argument("source", help="Path to folder containing disk images")
+    parser.add_argument("destination", help="Output destination")
 
-                # rewrite last modified dates of carved files based on values in DFXML
-                for (event, obj) in Objects.iterparse(fiwalk_file):
-                    
-                    # only work on FileObjects
-                    if not isinstance(obj, Objects.FileObject):
-                        continue
+def main():
 
-                    # skip directories and links
-                    if obj.name_type:
-                        if obj.name_type != "r":
-                            continue
+    parser = _make_parser()
+    args = parser.parse_args()
 
-                    # record filename
-                    dfxml_filename = obj.filename
-                    dfxml_filedate = int(time.time()) # default to current time
+    source = os.path.abspath(args.source)
+    destination = os.path.abspath(args.destination)
 
-                    # record last modified or last created date
-                    try:
-                        mtime = obj.mtime
-                        mtime = str(mtime)
-                    except:
-                        pass
-
-                    try:
-                        crtime = obj.crtime
-                        crtime = str(crtime)
-                    except:
-                        pass
-
-                    # fallback to created date if last modified doesn't exist
-                    if mtime and (mtime != 'None'):
-                        mtime = time_to_int(mtime[:19])
-                        dfxml_filedate = mtime
-                    elif crtime and (crtime != 'None'):
-                        crtime = time_to_int(crtime[:19])
-                        dfxml_filedate = crtime
-                    else:
-                        continue
+    # make outdir disks
+    if not os.path.exists(destination):
+        os.makedirs(destination)
+    diskimages_dir = os.path.join(destination, 'diskimages')
+    files_dir = os.path.join(destination, 'files')
+    results_dir = os.path.join(destination, 'reports')
+    for new_dir in diskimages_dir, files_dir, results_dir:
+        os.makedirs(new_dir)
+        
+    # make list for unanalyzed disks
+    unanalyzed = []
 
-                    # rewrite last modified date of corresponding file in objects/files
-                    exported_filepath = os.path.join(disk_files_dir, dfxml_filename)
-                    if os.path.isfile(exported_filepath):
-                        os.utime(exported_filepath, (dfxml_filedate, dfxml_filedate))
+    # process each disk image
+    for file in sorted(os.listdir(source)):
+        
+        # determine if disk image
+        if file.lower().endswith((".e01", ".000", ".001", ".raw", ".img", ".dd", ".iso")):
 
-                # run brunnhilde
-                subprocess.call("brunnhilde.py -zwb '%s' '%s' brunnhilde" % (disk_files_dir, disk_dir), shell=True)
+            # save info about file
+            image_path = os.path.join(source, file)
+            image_id = os.path.splitext(file)[0]
+            image_ext = os.path.splitext(file)[1]
 
-                # remove disk_files_dir unless keepfiles option selected
-                if args.keepfiles == False:
-                    shutil.rmtree(disk_files_dir)
+            # create new folders
+            disk_dir = os.path.join(results_dir, file)
+            os.makedirs(disk_dir)
 
-            elif ('hfs' in disk_fs.lower()) and ('hfs+' not in disk_fs.lower()):
-                # mount disk image
-                subprocess.call("sudo mount -t hfs -o loop,ro,noexec '%s' /mnt/diskid/" % (diskimage), shell=True)
+            # disk image status
+            raw_image = False
 
-                # use walk_to_dfxml.py to make dfxml
-                dfxml_file = os.path.abspath(os.path.join(disk_dir, 'dfxml.xml'))
+            # check if disk image is ewf
+            if image_ext == ".E01":
+                # convert disk image to raw and write to diskimages_dir
+                raw_out = os.path.join(diskimages_dir, image_id)
                 try:
-                    subprocess.call("cd /mnt/diskid/ && python3 /usr/share/ccatools/diskimageprocessor/walk_to_dfxml.py > '%s'" % (dfxml_file), shell=True)
-                except:
-                    print('ERROR: walk_to_dfxml.py unable to generate DFXML for disk %s' % (diskimage))
+                    subprocess.check_output(['ewfexport', '-t', raw_out, '-f', 'raw', '-o', '0', '-S', '0', '-u', image_path])
+                    raw_image = True
+                    os.rename(os.path.join(diskimages_dir, '%s.raw' % image_id), os.path.join(diskimages_dir, '%s.img' % image_id)) # change file extension from .raw to .img
+                    os.rename(os.path.join(diskimages_dir, '%s.raw.info' % image_id), os.path.join(diskimages_dir, '%s.img.info' % image_id)) # rename sidecar md5 file
+                    diskimage = os.path.join(diskimages_dir, '%s.img' % image_id) # use raw disk image in diskimages_dir moving forward
+                except subprocess.CalledProcessError:
+                    print('ERROR: Disk image could not be converted to raw image format. Skipping disk.')
+
+            else:
+                raw_image = True
+                for movefile in os.listdir(args.source):
+                    # if filename starts with disk image basename (this will also capture info and log files, multi-part disk images, etc.)
+                    if movefile.startswith(image_id):
+                        # copy file to objects/diskimage
+                        shutil.copyfile(os.path.join(args.source, movefile), os.path.join(diskimages_dir, movefile))
+                diskimage = os.path.join(diskimages_dir, file) # use disk image in diskimages_dir moving forward
+
+            # raw disk image
+            if raw_image == True:
                 
-                # run brunnhilde
-                subprocess.call("brunnhilde.py -zwb /mnt/diskid/ '%s' brunnhilde" % (disk_dir), shell=True)
+                # run disktype on disk image, save output to disk_dir
+                disktype = os.path.join(disk_dir, 'disktype.txt')
+                subprocess.call("disktype '%s' > '%s'" % (diskimage, disktype), shell=True)
 
-                # unmount disk image
-                subprocess.call('sudo umount /mnt/diskid', shell=True)
+                # pull filesystem info from disktype.txt
+                disk_fs = ''
+                try:
+                    for line in open(disktype, 'r'):
+                        if "file system" in line:
+                            disk_fs = line.strip()
+                except: # handle non-Unicode chars
+                    for line in open(disktype, 'rb'):
+                        if "file system" in line.decode('utf-8','ignore'):
+                            disk_fs = line.decode('utf-8','ignore').strip()
+
+                # handle differently by file system
+                if any(x in disk_fs.lower() for x in ('ntfs', 'fat', 'ext', 'iso9660', 'hfs+', 'ufs', 'raw', 'swap', 'yaffs2')):
+                    # use fiwalk to make dfxml
+                    fiwalk_file = os.path.abspath(os.path.join(disk_dir, 'dfxml.xml'))
+                    try:
+                        subprocess.check_output(['fiwalk', '-X', fiwalk_file, diskimage])
+                    except subprocess.CalledProcessError as e:
+                        print('ERROR: Fiwalk could not create DFXML for disk. STDERR: %s' % (e.output))
 
-                # export files to disk_files_dir if keepfiles selected
-                if args.keepfiles == True:
+                    # carve files
                     disk_files_dir = os.path.join(files_dir, file)
                     if not os.path.exists(disk_files_dir):
                         os.makedirs(disk_files_dir)
-                    # carve with or without resource forks depending on option selected
-                    if args.resforks == True:
+                    # carve allocated or all files depending on option selected
+                    if args.exportall == True:
                         try:
-                            subprocess.check_output(['bash', '/usr/share/hfsexplorer/bin/unhfs', '-v', '-resforks', 'APPLEDOUBLE', '-o', disk_files_dir, diskimage])
+                            subprocess.check_output(['tsk_recover', '-e', diskimage, disk_files_dir])
                         except subprocess.CalledProcessError as e:
-                            print('ERROR: HFS Explorer could not carve the following files from image: %s' % (e.output))
+                            print('ERROR: tsk_recover could not carve all files from disk. STDERR: %s' % (e.output))
                     else:
                         try:
-                            subprocess.check_output(['bash', '/usr/share/hfsexplorer/bin/unhfs', '-v', '-o', disk_files_dir, diskimage])
+                            subprocess.check_output(['tsk_recover', '-a', diskimage, disk_files_dir])
                         except subprocess.CalledProcessError as e:
-                            print('ERROR: HFS Explorer could not carve the following files from image: %s' % (e.output))
+                            print('ERROR: tsk_recover could not carve allocated files from disk. STDERR: %s' % (e.output))
 
+                    # rewrite last modified dates of carved files based on values in DFXML
+                    for (event, obj) in Objects.iterparse(fiwalk_file):
+                        
+                        # only work on FileObjects
+                        if not isinstance(obj, Objects.FileObject):
+                            continue
 
-            elif 'udf' in disk_fs.lower():
-                # mount image
-                subprocess.call("sudo mount -t udf -o loop '%s' /mnt/diskid/" % (diskimage), shell=True)
+                        # skip directories and links
+                        if obj.name_type:
+                            if obj.name_type != "r":
+                                continue
 
-                # use walk_to_dfxml.py to create dfxml
-                dfxml_file = os.path.abspath(os.path.join(disk_dir, 'dfxml.xml'))
-                try:
-                    subprocess.call("cd /mnt/diskid/ && python3 /usr/share/ccatools/diskimageprocessor/walk_to_dfxml.py > '%s'" % (dfxml_file), shell=True)
-                except:
-                    print('ERROR: walk_to_dfxml.py unable to generate DFXML for disk %s' % (diskimage))
-                
-                # write files to tempdir
-                disk_files_dir = os.path.join(files_dir, file)
-                shutil.copytree('/mnt/diskid/', disk_files_dir, symlinks=False, ignore=None)
+                        # record filename
+                        dfxml_filename = obj.filename
+                        dfxml_filedate = int(time.time()) # default to current time
+
+                        # record last modified or last created date
+                        try:
+                            mtime = obj.mtime
+                            mtime = str(mtime)
+                        except:
+                            pass
+
+                        try:
+                            crtime = obj.crtime
+                            crtime = str(crtime)
+                        except:
+                            pass
+
+                        # fallback to created date if last modified doesn't exist
+                        if mtime and (mtime != 'None'):
+                            mtime = time_to_int(mtime[:19])
+                            dfxml_filedate = mtime
+                        elif crtime and (crtime != 'None'):
+                            crtime = time_to_int(crtime[:19])
+                            dfxml_filedate = crtime
+                        else:
+                            continue
+
+                        # rewrite last modified date of corresponding file in objects/files
+                        exported_filepath = os.path.join(disk_files_dir, dfxml_filename)
+                        if os.path.isfile(exported_filepath):
+                            os.utime(exported_filepath, (dfxml_filedate, dfxml_filedate))
+
+                    # run brunnhilde
+                    subprocess.call("brunnhilde.py -zwb '%s' '%s' brunnhilde" % (disk_files_dir, disk_dir), shell=True)
+
+                    # remove disk_files_dir unless keepfiles option selected
+                    if args.keepfiles == False:
+                        shutil.rmtree(disk_files_dir)
+
+                elif ('hfs' in disk_fs.lower()) and ('hfs+' not in disk_fs.lower()):
+                    # mount disk image
+                    subprocess.call("sudo mount -t hfs -o loop,ro,noexec '%s' /mnt/diskid/" % (diskimage), shell=True)
+
+                    # use walk_to_dfxml.py to make dfxml
+                    dfxml_file = os.path.abspath(os.path.join(disk_dir, 'dfxml.xml'))
+                    try:
+                        subprocess.call("cd /mnt/diskid/ && python3 /usr/share/ccatools/diskimageprocessor/walk_to_dfxml.py > '%s'" % (dfxml_file), shell=True)
+                    except:
+                        print('ERROR: walk_to_dfxml.py unable to generate DFXML for disk %s' % (diskimage))
+                    
+                    # run brunnhilde
+                    subprocess.call("brunnhilde.py -zwb /mnt/diskid/ '%s' brunnhilde" % (disk_dir), shell=True)
+
+                    # unmount disk image
+                    subprocess.call('sudo umount /mnt/diskid', shell=True)
+
+                    # export files to disk_files_dir if keepfiles selected
+                    if args.keepfiles == True:
+                        disk_files_dir = os.path.join(files_dir, file)
+                        if not os.path.exists(disk_files_dir):
+                            os.makedirs(disk_files_dir)
+                        # carve with or without resource forks depending on option selected
+                        if args.resforks == True:
+                            try:
+                                subprocess.check_output(['bash', '/usr/share/hfsexplorer/bin/unhfs', '-v', '-resforks', 'APPLEDOUBLE', '-o', disk_files_dir, diskimage])
+                            except subprocess.CalledProcessError as e:
+                                print('ERROR: HFS Explorer could not carve the following files from image: %s' % (e.output))
+                        else:
+                            try:
+                                subprocess.check_output(['bash', '/usr/share/hfsexplorer/bin/unhfs', '-v', '-o', disk_files_dir, diskimage])
+                            except subprocess.CalledProcessError as e:
+                                print('ERROR: HFS Explorer could not carve the following files from image: %s' % (e.output))
+
+
+                elif 'udf' in disk_fs.lower():
+                    # mount image
+                    subprocess.call("sudo mount -t udf -o loop '%s' /mnt/diskid/" % (diskimage), shell=True)
+
+                    # use walk_to_dfxml.py to create dfxml
+                    dfxml_file = os.path.abspath(os.path.join(disk_dir, 'dfxml.xml'))
+                    try:
+                        subprocess.call("cd /mnt/diskid/ && python3 /usr/share/ccatools/diskimageprocessor/walk_to_dfxml.py > '%s'" % (dfxml_file), shell=True)
+                    except:
+                        print('ERROR: walk_to_dfxml.py unable to generate DFXML for disk %s' % (diskimage))
+                    
+                    # write files to tempdir
+                    disk_files_dir = os.path.join(files_dir, file)
+                    shutil.copytree('/mnt/diskid/', disk_files_dir, symlinks=False, ignore=None)
 
-                # change file permissions in disk_files_dir
-                subprocess.call("find '%s' -type d -exec chmod 755 {} \;" % (disk_files_dir), shell=True)
-                subprocess.call("find '%s' -type f -exec chmod 644 {} \;" % (disk_files_dir), shell=True)
+                    # change file permissions in disk_files_dir
+                    subprocess.call("find '%s' -type d -exec chmod 755 {} \;" % (disk_files_dir), shell=True)
+                    subprocess.call("find '%s' -type f -exec chmod 644 {} \;" % (disk_files_dir), shell=True)
 
-                # unmount disk image
-                subprocess.call('sudo umount /mnt/diskid', shell=True)
+                    # unmount disk image
+                    subprocess.call('sudo umount /mnt/diskid', shell=True)
 
-                # run brunnhilde
-                subprocess.call("brunnhilde.py -zwb '%s' '%s' brunnhilde" % (disk_files_dir, disk_dir), shell=True)
+                    # run brunnhilde
+                    subprocess.call("brunnhilde.py -zwb '%s' '%s' brunnhilde" % (disk_files_dir, disk_dir), shell=True)
+                    
+                    # remove disk_files_dir unless keepfiles option selected
+                    if args.keepfiles == False:
+                        shutil.rmtree(disk_files_dir)
                 
-                # remove disk_files_dir unless keepfiles option selected
-                if args.keepfiles == False:
-                    shutil.rmtree(disk_files_dir)
-            
-            else:
-                # add disk to unanalyzed list
-                unanalyzed.append(diskimage)          
-
-# delete temp directories
-shutil.rmtree(diskimages_dir)
-if args.keepfiles == False:
-    shutil.rmtree(files_dir)
-
-# create analysis spreadsheet
-spreadsheet_path = os.path.join(destination, 'analysis.csv')
-# open description spreadsheet
-spreadsheet = open(spreadsheet_path, 'w')
-writer = csv.writer(spreadsheet, quoting=csv.QUOTE_NONNUMERIC)
-header_list = ['Disk image', 'File system', 'Date type', 'Date statement', 'Date begin', 'Date end', 'Extent', 'Virus found', 'File formats']
-writer.writerow(header_list)
-
-# close description spreadsheet
-spreadsheet.close()
-
-# add info to description spreadsheet
-for item in sorted(os.listdir(results_dir)):
-    disk_result = os.path.join(results_dir, item)
-    write_to_spreadsheet(disk_result, spreadsheet_path, args.exportall)
-
-# write closing message
-if unanalyzed:
-    skipped_disks = ', '.join(unanalyzed)
-    print('Analysis complete. Skipped disks: %s.' % (skipped_disks))
-else:
-    print('Analysis complete. All disk images analyzed. Results in %s.' % (destination))
+                else:
+                    # add disk to unanalyzed list
+                    unanalyzed.append(diskimage)          
+
+    # delete temp directories
+    shutil.rmtree(diskimages_dir)
+    if args.keepfiles == False:
+        shutil.rmtree(files_dir)
+
+    # create analysis csv, write header, and close file
+    spreadsheet = open(os.path.join(destination, 'analysis.csv'), 'w')
+    writer = csv.writer(spreadsheet, quoting=csv.QUOTE_NONNUMERIC)
+    header_list = ['Disk image', 'File system', 'Date type', 'Date statement', 'Date begin', 'Date end', 'Extent', 'Virus found', 'File formats']
+    writer.writerow(header_list)
+    spreadsheet.close()
+
+    # add info to analysis csv for each SIP
+    for item in sorted(os.listdir(results_dir)):
+        disk_result = os.path.join(results_dir, item)
+        write_to_spreadsheet(disk_result, spreadsheet_path, args.exportall)
+
+    # write closing message
+    if unanalyzed:
+        skipped_disks = ', '.join(unanalyzed)
+        print('Analysis complete. Skipped disks: %s.' % (skipped_disks))
+    else:
+        print('Analysis complete. All disk images analyzed. Results in %s.' % (destination))
+
+if __name__ == '__main__':
+    main()
\ No newline at end of file

From 8416a637fa3ac35c699abfafd0befcfaa7023a55 Mon Sep 17 00:00:00 2001
From: Tim Walsh <timothyryanwalsh@gmail.com>
Date: Tue, 17 Oct 2017 17:47:17 -0400
Subject: [PATCH 18/24] Pass log path to csv function

---
 diskimageprocessor.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/diskimageprocessor.py b/diskimageprocessor.py
index 8a54092..4a86a4a 100644
--- a/diskimageprocessor.py
+++ b/diskimageprocessor.py
@@ -63,7 +63,7 @@ def time_to_int(str_time):
         "%Y-%m-%dT%H:%M:%S").timetuple())
     return dt
 
-def create_spreadsheet(args, sips):
+def create_spreadsheet(args, sips, log):
     """ Create csv describing created SIPs """
 
     # open description spreadsheet
@@ -616,7 +616,7 @@ def main():
         logandprint(log, 'Processing complete. All disk images processed. Results in %s.' % (destination))
 
     # write description spreadsheet
-    create_spreadsheet(args, sip_dir)
+    create_spreadsheet(args, sip_dir, log)
 
     # close log
     log.close()

From 020d237f4ff7cb86dae44d43ecb80446394fece3 Mon Sep 17 00:00:00 2001
From: Tim Walsh <timothyryanwalsh@gmail.com>
Date: Tue, 17 Oct 2017 17:52:14 -0400
Subject: [PATCH 19/24] Pass sips path to csv function, not sip_dir

---
 diskimageprocessor.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/diskimageprocessor.py b/diskimageprocessor.py
index 4a86a4a..60d7619 100644
--- a/diskimageprocessor.py
+++ b/diskimageprocessor.py
@@ -616,7 +616,7 @@ def main():
         logandprint(log, 'Processing complete. All disk images processed. Results in %s.' % (destination))
 
     # write description spreadsheet
-    create_spreadsheet(args, sip_dir, log)
+    create_spreadsheet(args, sips, log)
 
     # close log
     log.close()

From f3457af42c37f542524849693537eb0f58a24c8e Mon Sep 17 00:00:00 2001
From: Tim Walsh <timothyryanwalsh@gmail.com>
Date: Tue, 17 Oct 2017 17:53:52 -0400
Subject: [PATCH 20/24] Return parser from _make_parser()

---
 diskimageanalyzer.py | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/diskimageanalyzer.py b/diskimageanalyzer.py
index dbd7369..8e2dac4 100644
--- a/diskimageanalyzer.py
+++ b/diskimageanalyzer.py
@@ -238,6 +238,8 @@ def _make_parser():
     parser.add_argument("source", help="Path to folder containing disk images")
     parser.add_argument("destination", help="Output destination")
 
+    return parser
+
 def main():
 
     parser = _make_parser()

From 6b0239e76af120b2c3417f03eafacc247904a704 Mon Sep 17 00:00:00 2001
From: Tim Walsh <timothyryanwalsh@gmail.com>
Date: Tue, 17 Oct 2017 17:59:13 -0400
Subject: [PATCH 21/24] Update README.md

---
 README.md | 6 ++----
 1 file changed, 2 insertions(+), 4 deletions(-)

diff --git a/README.md b/README.md
index 401b317..c6ea727 100644
--- a/README.md
+++ b/README.md
@@ -90,9 +90,9 @@ Also included is a Python 3 script `process_with_tsk_options.py`. This script al
 
 ## Disk image extensions recognized
 
-Disk Image Processor recognizes which files are disk images by their file extensions. Currently, it looks for the following extensions:  
+Disk Image Processor recognizes which files are disk images by their file extensions. Currently, it looks for the following extensions (case-insensitive):  
 
-* .E01  
+* .e01  
 * .000  
 * .001  
 * .raw  
@@ -100,8 +100,6 @@ Disk Image Processor recognizes which files are disk images by their file extens
 * .dd  
 * .iso  
 
-*To add extensions to this list, add them as elements in the tuple inside `file.endswith((".E01", ".000", ".001", ".raw", ".img", ".dd", ".iso"))` on line 353 of `diskimageprocessor.py` and/or line 261 of `diskimageanalyzer.py`.*
-
 ## Installation and dependencies
 
 This utility is designed for easy use in BitCurator v1.8.0+. It requires Python 2.7 (to run the GUI) and Python 3.4+ (to run the scripts that analyze and process disk images), both of which are already included in BitCurator.    

From a37ffb1302d30a7be242cca8d601e1bb96638f32 Mon Sep 17 00:00:00 2001
From: Tim Walsh <timothyryanwalsh@gmail.com>
Date: Tue, 17 Oct 2017 18:03:08 -0400
Subject: [PATCH 22/24] Update README.md

---
 README.md | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/README.md b/README.md
index c6ea727..f618f78 100644
--- a/README.md
+++ b/README.md
@@ -81,6 +81,8 @@ Also included is a Python 3 script `process_with_tsk_options.py`. This script al
 * SWAP  
 * YAFFS2  
 
+For disks with exfat file systems you may need to use the `process_with_tsk_options.py` script and explicitly specify the file system type. This is due to disktype's inability to recognize exfat file systems.
+
 ## Supported disk image types  
 
 * raw (dd, iso, img, etc.)  

From 06771941d545c51d4156b8c5d1f48ba5bc08d8ac Mon Sep 17 00:00:00 2001
From: Tim Walsh <timothyryanwalsh@gmail.com>
Date: Wed, 18 Oct 2017 10:19:47 -0400
Subject: [PATCH 23/24] Modify spreadsheet path to write_to_spreadsheet()

---
 diskimageanalyzer.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/diskimageanalyzer.py b/diskimageanalyzer.py
index 8e2dac4..9b80e25 100644
--- a/diskimageanalyzer.py
+++ b/diskimageanalyzer.py
@@ -477,7 +477,7 @@ def main():
     # add info to analysis csv for each SIP
     for item in sorted(os.listdir(results_dir)):
         disk_result = os.path.join(results_dir, item)
-        write_to_spreadsheet(disk_result, spreadsheet_path, args.exportall)
+        write_to_spreadsheet(disk_result, os.path.join(destination, 'analysis.csv'), args.exportall)
 
     # write closing message
     if unanalyzed:

From e52900c3492e0a96cb9b63e6127e6b38347488b0 Mon Sep 17 00:00:00 2001
From: Tim Walsh <timothyryanwalsh@gmail.com>
Date: Fri, 20 Oct 2017 15:55:47 -0400
Subject: [PATCH 24/24] Update dfxml

---
 deps/dfxml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/deps/dfxml b/deps/dfxml
index e75ef19..a95919f 160000
--- a/deps/dfxml
+++ b/deps/dfxml
@@ -1 +1 @@
-Subproject commit e75ef197d387ca165d6fc6676273b4ce534ba0f6
+Subproject commit a95919fa67f97f92077339898e6b2de31f24b974