diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml new file mode 100644 index 0000000..4549cab --- /dev/null +++ b/.github/workflows/test.yml @@ -0,0 +1,73 @@ +--- +name: "Test" +on: + pull_request: + push: + +env: + ACTIONS_ALLOW_UNSECURE_COMMANDS: True + +jobs: + tox: + name: "Test ${{ matrix.toxenv }}" + runs-on: "ubuntu-18.04" + strategy: + matrix: + include: + - python-version: "3.7" + toxenv: "py37" + - python-version: "3.8" + toxenv: "py38" + - python-version: "3.9" + toxenv: "py39" + - python-version: "3.10" + toxenv: "py310" + steps: + - uses: actions/checkout@v2 + with: + submodules: "true" + - name: "Set up Python ${{ matrix.python-version }}" + uses: "actions/setup-python@v2" + with: + python-version: "${{ matrix.python-version }}" + - name: Install homebrew + run: | + /bin/bash -c "$(curl -fsSL https://raw.githubusercontent.com/Homebrew/install/master/install.sh)" + test -d ~/.linuxbrew && eval $(~/.linuxbrew/bin/brew shellenv) + test -d /home/linuxbrew/.linuxbrew && eval $(/home/linuxbrew/.linuxbrew/bin/brew shellenv) + echo "eval \$($(brew --prefix)/bin/brew shellenv)" >>~/.profile + echo "::add-path::/home/linuxbrew/.linuxbrew/bin" + brew --version + - name: Install Siegfried + run: | + brew install richardlehane/digipres/siegfried + - name: Install md5deep + run: | + brew install md5deep + - name: Install bulk_extractor + run: | + brew install bulk_extractor + - name: Run install script + run: | + ./test-install.sh + shell: bash + - name: "Get pip cache dir" + id: "pip-cache" + run: | + echo "::set-output name=dir::$(pip cache dir)" + - name: "Cache pip packages" + uses: "actions/cache@v2" + with: + path: "${{ steps.pip-cache.outputs.dir }}" + key: "${{ runner.os }}-pip-${{ hashFiles('**/base.txt','**/test.txt') }}" + restore-keys: | + ${{ runner.os }}-pip- + - name: "Install tox" + run: | + python -m pip install --upgrade pip + pip install tox + - name: "Run tox" + env: + TOXENV: ${{ matrix.toxenv }} + run: | + tox diff --git a/README.md b/README.md index e0fb1c2..e658d0c 100644 --- a/README.md +++ b/README.md @@ -1,6 +1,6 @@ # SIP Creator -Version: 1.0.0 +Version: 1.1.0 Creates an Archivematica-ready transfer (or SIP for another preservation repository) from user-selected directories and files, and generates a pre-populated description spreadsheet using data pulled from DFXML and Brunnhilde (start and end dates, extent, and a scope and content note containing information about the most common file formats present). @@ -16,12 +16,12 @@ This utility is designed for easy use in BitCurator v1.8.0+. It requires Python Install all of the CCA Tools together using the installation script in the [CCA Tools repo](https://github.com/CCA-Public/cca-tools). -### Install as a separate utlity +### Install as a separate utility + * Install [PyQt5](https://www.riverbankcomputing.com/software/pyqt/download5): `sudo pip3 install pyqt5` * Clone this repo to your local machine. -* Run the install script with sudo privileges: -`sudo chmod u+x install.sh` +* Run the install script with sudo privileges (assuming BitCurator 4; for BitCurator 2-3 run `./install-bc2-ubuntu18.sh` instead): `sudo ./install.sh` ### PyQt4 version diff --git a/install-bc2-ubuntu18.sh b/install-bc2-ubuntu18.sh new file mode 100755 index 0000000..7b2b6ea --- /dev/null +++ b/install-bc2-ubuntu18.sh @@ -0,0 +1,54 @@ +#!/bin/bash + +### Install script for CCA SIP Creator in Bitcurator + +# Update submodules +git submodule update --init --recursive + +# Make /usr/share/ccatools if doesn't already exist +if [ ! -d /usr/share/ccatools ]; then + sudo mkdir /usr/share/ccatools +fi + +# Delete /usr/share directory for SIP Creator if it already exists +if [ -d /usr/share/ccatools/sipcreator ]; then + sudo rm -rf /usr/share/ccatools/sipcreator +fi + +# Make /usr/share directory for SIP Creator +sudo mkdir /usr/share/ccatools/sipcreator + +# Move files into /usr/share/ccatools/sipcreator +sudo cp main.py /usr/share/ccatools/sipcreator +sudo cp launch /usr/share/ccatools/sipcreator +sudo cp design.py /usr/share/ccatools/sipcreator +sudo cp design.ui /usr/share/ccatools/sipcreator +sudo cp icon.png /usr/share/ccatools/sipcreator +sudo cp LICENSE /usr/share/ccatools/sipcreator +sudo cp README.md /usr/share/ccatools/sipcreator +sudo cp deps/dfxml/python/dfxml.py /usr/share/ccatools/sipcreator +sudo cp deps/dfxml/python/Objects.py /usr/share/ccatools/sipcreator +sudo cp deps/dfxml/python/walk_to_dfxml.py /usr/share/ccatools/sipcreator + +# Make "CCA Tools" folder on Desktop if doesn't already exist +if [ ! -d "/home/bcadmin/Desktop/CCA Tools" ]; then + sudo mkdir "/home/bcadmin/Desktop/CCA Tools" +fi + +# Create launch.desktop file +sudo touch '/home/bcadmin/Desktop/CCA Tools/SIP Creator.desktop' +echo '[Desktop Entry]' | sudo tee --append '/home/bcadmin/Desktop/CCA Tools/SIP Creator.desktop' +echo 'Type=Application' | sudo tee --append '/home/bcadmin/Desktop/CCA Tools/SIP Creator.desktop' +echo 'Name=SIP Creator' | sudo tee --append '/home/bcadmin/Desktop/CCA Tools/SIP Creator.desktop' +echo 'Exec=/usr/share/ccatools/sipcreator/launch' | sudo tee --append '/home/bcadmin/Desktop/CCA Tools/SIP Creator.desktop' +echo 'Icon=/usr/share/ccatools/sipcreator/icon.png' | sudo tee --append '/home/bcadmin/Desktop/CCA Tools/SIP Creator.desktop' + +# Change permissions, ownership for CCA Tools +sudo chown -R bcadmin:bcadmin '/home/bcadmin/Desktop/CCA Tools' +sudo chown -R bcadmin:bcadmin '/usr/share/ccatools/sipcreator' +sudo find '/home/bcadmin/Desktop/CCA Tools' -type d -exec chmod 755 {} \; +sudo find '/home/bcadmin/Desktop/CCA Tools' -type f -exec chmod 644 {} \; + +# Make files executable +sudo chmod u+x '/home/bcadmin/Desktop/CCA Tools/SIP Creator.desktop' +sudo chmod u+x /usr/share/ccatools/sipcreator/launch diff --git a/install.sh b/install.sh old mode 100755 new mode 100644 index 7b2b6ea..f0a6073 --- a/install.sh +++ b/install.sh @@ -1,54 +1,46 @@ #!/bin/bash -### Install script for CCA SIP Creator in Bitcurator +### Install script for CCA Disk Image Processor in Bitcurator 4/Ubuntu 22 -# Update submodules git submodule update --init --recursive -# Make /usr/share/ccatools if doesn't already exist if [ ! -d /usr/share/ccatools ]; then sudo mkdir /usr/share/ccatools fi -# Delete /usr/share directory for SIP Creator if it already exists -if [ -d /usr/share/ccatools/sipcreator ]; then - sudo rm -rf /usr/share/ccatools/sipcreator -fi +sipcreator_dir="/usr/share/ccatools/sipcreator/" -# Make /usr/share directory for SIP Creator -sudo mkdir /usr/share/ccatools/sipcreator - -# Move files into /usr/share/ccatools/sipcreator -sudo cp main.py /usr/share/ccatools/sipcreator -sudo cp launch /usr/share/ccatools/sipcreator -sudo cp design.py /usr/share/ccatools/sipcreator -sudo cp design.ui /usr/share/ccatools/sipcreator -sudo cp icon.png /usr/share/ccatools/sipcreator -sudo cp LICENSE /usr/share/ccatools/sipcreator -sudo cp README.md /usr/share/ccatools/sipcreator -sudo cp deps/dfxml/python/dfxml.py /usr/share/ccatools/sipcreator -sudo cp deps/dfxml/python/Objects.py /usr/share/ccatools/sipcreator -sudo cp deps/dfxml/python/walk_to_dfxml.py /usr/share/ccatools/sipcreator - -# Make "CCA Tools" folder on Desktop if doesn't already exist -if [ ! -d "/home/bcadmin/Desktop/CCA Tools" ]; then - sudo mkdir "/home/bcadmin/Desktop/CCA Tools" +if [ -d $sipcreator_dir ]; then + sudo rm -rf $sipcreator_dir fi +sudo mkdir $sipcreator_dir + +sudo cp main.py $sipcreator_dir +sudo cp launch $sipcreator_dir +sudo cp design.py $sipcreator_dir +sudo cp design.ui $sipcreator_dir +sudo cp icon.png $sipcreator_dir +sudo cp LICENSE $sipcreator_dir +sudo cp README.md $sipcreator_dir +sudo cp deps/dfxml/python/dfxml.py $sipcreator_dir +sudo cp deps/dfxml/python/Objects.py $sipcreator_dir +sudo cp deps/dfxml/python/walk_to_dfxml.py $sipcreator_dir + # Create launch.desktop file -sudo touch '/home/bcadmin/Desktop/CCA Tools/SIP Creator.desktop' -echo '[Desktop Entry]' | sudo tee --append '/home/bcadmin/Desktop/CCA Tools/SIP Creator.desktop' -echo 'Type=Application' | sudo tee --append '/home/bcadmin/Desktop/CCA Tools/SIP Creator.desktop' -echo 'Name=SIP Creator' | sudo tee --append '/home/bcadmin/Desktop/CCA Tools/SIP Creator.desktop' -echo 'Exec=/usr/share/ccatools/sipcreator/launch' | sudo tee --append '/home/bcadmin/Desktop/CCA Tools/SIP Creator.desktop' -echo 'Icon=/usr/share/ccatools/sipcreator/icon.png' | sudo tee --append '/home/bcadmin/Desktop/CCA Tools/SIP Creator.desktop' - -# Change permissions, ownership for CCA Tools -sudo chown -R bcadmin:bcadmin '/home/bcadmin/Desktop/CCA Tools' -sudo chown -R bcadmin:bcadmin '/usr/share/ccatools/sipcreator' -sudo find '/home/bcadmin/Desktop/CCA Tools' -type d -exec chmod 755 {} \; -sudo find '/home/bcadmin/Desktop/CCA Tools' -type f -exec chmod 644 {} \; - -# Make files executable -sudo chmod u+x '/home/bcadmin/Desktop/CCA Tools/SIP Creator.desktop' +launch_file="/usr/share/applications/SIPCreator.desktop" + +if [ -f $launch_file ]; then + sudo rm -rf $launch_file +fi + +sudo touch $launch_file +echo '[Desktop Entry]' | sudo tee --append $launch_file +echo 'Type=Application' | sudo tee --append $launch_file +echo 'Name=SIP Creator' | sudo tee --append $launch_file +echo 'Exec=/usr/share/ccatools/sipcreator/launch' | sudo tee --append $launch_file +echo 'Icon=/usr/share/ccatools/sipcreator/icon.png' | sudo tee --append $launch_file +echo 'Categories=Forensics and Reporting' | sudo tee --append $launch_file + +sudo chown -R bcadmin:bcadmin $sipcreator_dir sudo chmod u+x /usr/share/ccatools/sipcreator/launch diff --git a/main.py b/main.py index 132848e..387c08a 100644 --- a/main.py +++ b/main.py @@ -3,7 +3,7 @@ (c) Canadian Centre for Architecture Developed by Tessa Walsh -2017-2021 +2017-2023 MIT License """ import csv @@ -63,7 +63,7 @@ def convert_size(size): s = round(size / p) s = str(s) s = s.replace(".0", "") - return "%s %s" % (s, size_name[i]) + return "{} {}".format(s, size_name[i]) class CheckableDirModel(QDirModel): @@ -162,7 +162,10 @@ def create_sip(self, files_to_process, sip_dir, bag_files, scan_for_pii): # Bag files or write checksum manifest. if bag_files: - subprocess.call("bagit.py --processes 4 '{}'".format(sip_dir), shell=True) + # TODO: Multithread bagging via --processes when bug described at + # https://github.com/LibraryOfCongress/bagit-python/issues/130 is + # resolved. + subprocess.call("cd ~ && bagit.py '{}'".format(sip_dir), shell=True) else: md5deep_cmd = "cd '{}' && md5deep -rl ../objects > checksum.md5".format( metadata_dir @@ -226,8 +229,8 @@ def write_csv_row(writer, sip_path, bag_files): if mtimes: date_earliest = min(mtimes)[:10] date_latest = max(mtimes)[:10] - date_statement = "{} - {}".format(date_earliest[:4], date_latest[:4]) - if date_earliest == date_latest: + date_statement = "{}-{}".format(date_earliest[:4], date_latest[:4]) + if date_earliest[:4] == date_latest[:4]: date_statement = date_earliest[:4] # Write scope and content note from information in brunnhilde reports. @@ -259,7 +262,7 @@ def write_csv_row(writer, sip_path, bag_files): file_formats.append(row[0]) file_formats = [format_ or "Unidentified" for format_ in file_formats] formats_list = ", ".join(file_formats) - scope_content = 'Original directory name: "{}". Most common file formats: {}'.format( + scope_content = "Most common file formats: {}".format( os.path.basename(sip_path), formats_list ) @@ -325,7 +328,7 @@ def about_dialog(self): QMessageBox.information( self, "About", - "SIP Creator v1.0.0\nCanadian Centre for Architecture\nDeveloper: Tessa Walsh\n2018-2021\nMIT License\nhttps://github.com/CCA-Public/sipcreator", + "SIP Creator v1.1.0\nCanadian Centre for Architecture\nDeveloper: Tessa Walsh\n2018-2023\nMIT License\nhttps://github.com/CCA-Public/sipcreator", ) def browse_source(self): diff --git a/requirements.txt b/requirements.txt new file mode 100644 index 0000000..5603c37 --- /dev/null +++ b/requirements.txt @@ -0,0 +1 @@ +-r requirements/base.txt diff --git a/requirements/base.txt b/requirements/base.txt new file mode 100644 index 0000000..4870872 --- /dev/null +++ b/requirements/base.txt @@ -0,0 +1,3 @@ +bagit +brunnhilde +pyqt5 diff --git a/requirements/test.txt b/requirements/test.txt new file mode 100644 index 0000000..4c19f53 --- /dev/null +++ b/requirements/test.txt @@ -0,0 +1,5 @@ +-r base.txt + +pytest==6.2.4 +pytest-mock==3.6.1 +tox==3.23.1 diff --git a/test-install.sh b/test-install.sh new file mode 100755 index 0000000..24c8c0a --- /dev/null +++ b/test-install.sh @@ -0,0 +1,35 @@ +#!/bin/bash + +### Install SIP Creator for testing + +# Update submodules +git submodule update --init --recursive + +# Make /usr/share/ccatools if doesn't already exist +if [ ! -d /usr/share/ccatools ]; then + sudo mkdir /usr/share/ccatools +fi + +# Delete /usr/share directory for SIP Creator if it already exists +if [ -d /usr/share/ccatools/sipcreator ]; then + sudo rm -rf /usr/share/ccatools/sipcreator +fi + +# Make /usr/share directory for SIP Creator +sudo mkdir /usr/share/ccatools/sipcreator + +# Move files into /usr/share/ccatools/sipcreator +sudo cp main.py /usr/share/ccatools/sipcreator +sudo cp launch /usr/share/ccatools/sipcreator +sudo cp design.py /usr/share/ccatools/sipcreator +sudo cp design.ui /usr/share/ccatools/sipcreator +sudo cp icon.png /usr/share/ccatools/sipcreator +sudo cp LICENSE /usr/share/ccatools/sipcreator +sudo cp README.md /usr/share/ccatools/sipcreator +sudo cp deps/dfxml/python/dfxml.py /usr/share/ccatools/sipcreator +sudo cp deps/dfxml/python/Objects.py /usr/share/ccatools/sipcreator +sudo cp deps/dfxml/python/walk_to_dfxml.py /usr/share/ccatools/sipcreator + +sudo cp deps/dfxml/python/dfxml.py . +sudo cp deps/dfxml/python/Objects.py . +sudo cp deps/dfxml/python/walk_to_dfxml.py . diff --git a/test.py b/test.py new file mode 100644 index 0000000..5aba6df --- /dev/null +++ b/test.py @@ -0,0 +1,116 @@ +import os + +import bagit +import pytest + +from main import SIPThread + + +def is_non_zero_file(filepath): + return os.path.isfile(filepath) and os.path.getsize(filepath) > 0 + + +def test_create_sip(tmp_path): + OUTPUT_DIR = str(tmp_path / "output") + SIP_DIR = str(tmp_path / "dest") + + for dir_ in (OUTPUT_DIR, SIP_DIR): + os.makedirs(dir_) + + sip_thread = SIPThread( + files_to_process=[ + os.path.abspath("./requirements"), + os.path.abspath("./README.md"), + os.path.abspath("./.github/workflows/test.yml") + ], + destination=OUTPUT_DIR, + sip_dir=SIP_DIR, + bag_files=False, + scan_for_pii=False + ) + sip_thread.run() + + assert is_non_zero_file(os.path.join(OUTPUT_DIR, "description.csv")) + + OBJECTS_DIR = os.path.join(SIP_DIR, "objects") + METADATA_DIR = os.path.join(SIP_DIR, "metadata") + SUBDOC_DIR = os.path.join(METADATA_DIR, "submissionDocumentation") + BRUNNHILDE_DIR = os.path.join(SUBDOC_DIR, "brunnhilde") + + assert is_non_zero_file(os.path.join(OBJECTS_DIR, "requirements", "base.txt")) + assert is_non_zero_file(os.path.join(OBJECTS_DIR, "requirements", "test.txt")) + assert is_non_zero_file(os.path.join(OBJECTS_DIR, "README.md")) + assert is_non_zero_file(os.path.join(OBJECTS_DIR, "test.yml")) + + assert os.path.isdir(BRUNNHILDE_DIR) + assert is_non_zero_file(os.path.join(BRUNNHILDE_DIR, "report.html")) + + assert is_non_zero_file(os.path.join(SUBDOC_DIR, "dfxml.xml")) + + assert is_non_zero_file(os.path.join(METADATA_DIR, "checksum.md5")) + + +def test_create_sip_bagged(tmp_path): + OUTPUT_DIR = str(tmp_path / "output") + SIP_DIR = str(tmp_path / "dest") + + for dir_ in (OUTPUT_DIR, SIP_DIR): + os.makedirs(dir_) + + sip_thread = SIPThread( + files_to_process=[ + os.path.abspath("./requirements"), + os.path.abspath("./README.md"), + os.path.abspath("./.github/workflows/test.yml") + ], + destination=OUTPUT_DIR, + sip_dir=SIP_DIR, + bag_files=True, + scan_for_pii=False + ) + sip_thread.run() + + OBJECTS_DIR = os.path.join(SIP_DIR, "data", "objects") + METADATA_DIR = os.path.join(SIP_DIR, "data", "metadata") + + assert is_non_zero_file(os.path.join(OBJECTS_DIR, "requirements", "base.txt")) + assert is_non_zero_file(os.path.join(OBJECTS_DIR, "requirements", "test.txt")) + assert is_non_zero_file(os.path.join(OBJECTS_DIR, "README.md")) + assert is_non_zero_file(os.path.join(OBJECTS_DIR, "test.yml")) + + bag = bagit.Bag(SIP_DIR) + assert bag.validate() + + assert not os.path.isfile(os.path.join(METADATA_DIR, "checksum.md5")) + + +def test_bulk_extractor(tmp_path): + OUTPUT_DIR = str(tmp_path / "output") + SIP_DIR = str(tmp_path / "dest") + + for dir_ in (OUTPUT_DIR, SIP_DIR): + os.makedirs(dir_) + + sip_thread = SIPThread( + files_to_process=[ + os.path.abspath("./requirements"), + os.path.abspath("./README.md"), + os.path.abspath("./.github/workflows/test.yml") + ], + destination=OUTPUT_DIR, + sip_dir=SIP_DIR, + bag_files=False, + scan_for_pii=True + ) + sip_thread.run() + + OBJECTS_DIR = os.path.join(SIP_DIR, "objects") + SUBDOC_DIR = os.path.join(SIP_DIR, "metadata", "submissionDocumentation") + BRUNNHILDE_BE_DIR = os.path.join(SUBDOC_DIR, "brunnhilde", "bulk_extractor") + + assert is_non_zero_file(os.path.join(OBJECTS_DIR, "requirements", "base.txt")) + assert is_non_zero_file(os.path.join(OBJECTS_DIR, "requirements", "test.txt")) + assert is_non_zero_file(os.path.join(OBJECTS_DIR, "README.md")) + assert is_non_zero_file(os.path.join(OBJECTS_DIR, "test.yml")) + + assert is_non_zero_file(os.path.join(BRUNNHILDE_BE_DIR, "report.xml")) diff --git a/tox.ini b/tox.ini new file mode 100644 index 0000000..fee5c34 --- /dev/null +++ b/tox.ini @@ -0,0 +1,8 @@ +[tox] +envlist = py{37,38,39,310} +skipsdist = True + +[testenv] +deps = -r{toxinidir}/requirements/test.txt +skip_install = True +commands = pytest {toxinidir} --ignore=deps test.py