Skip to content

Commit

Permalink
Merge pull request #8 from nyudlts/feature/add-adoc-tweaks
Browse files Browse the repository at this point in the history
add modifications to support ADOC functionality
  • Loading branch information
dmnyu authored Feb 26, 2024
2 parents 67a7609 + fcc4cc2 commit 9f12b1d
Show file tree
Hide file tree
Showing 24 changed files with 3,935 additions and 21 deletions.
103 changes: 83 additions & 20 deletions bag.go
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,13 @@ import (
var manifestPtn = regexp.MustCompile("manifest-.*\\.txt$")
var tagmanifestPtn = regexp.MustCompile("tagmanifest-.*\\.txt$")

type getFilesOrDirsParams struct {
Location string
Matcher *regexp.Regexp
FindFiles bool
ReturnFirst bool
}

func ValidateBag(bagLocation string, fast bool, complete bool) error {
errs := []error{}
storedOxum, err := GetOxum(bagLocation)
Expand All @@ -27,7 +34,7 @@ func ValidateBag(bagLocation string, fast bool, complete bool) error {
return err
}

if fast == true {
if fast {
log.Printf("- INFO - %s valid according to Payload Oxum", bagLocation)
return nil
}
Expand Down Expand Up @@ -273,31 +280,87 @@ func directoryExists(inputDir string) error {
}

func GetFilesInBag(bagLocation string) ([]string, error) {
bagFiles := []string{}
err := filepath.Walk(bagLocation, func(path string, info os.FileInfo, err error) error {
if err != nil {
return err
}
if info.IsDir() != true {
bagFiles = append(bagFiles, path)
}
return nil
})
return getFilesOrDirsInBag(getFilesOrDirsParams{bagLocation, regexp.MustCompile(`.*`), true, false})
}

func FindFileInBag(bagLocation string, matcher *regexp.Regexp) (string, error) {
results, err := getFilesOrDirsInBag(getFilesOrDirsParams{bagLocation, matcher, true, true})
if err != nil {
return bagFiles, err
return "", err
}
return bagFiles, nil
if len(results) == 0 {
return "", fmt.Errorf("Could not locate file pattern in bag")
}
return results[0], nil
}

func FindFileInBag(bagLocation string, matcher *regexp.Regexp) (string, error) {
bagFiles, err := GetFilesInBag(bagLocation)
func FindFilesInBag(bagLocation string, matcher *regexp.Regexp) ([]string, error) {
return getFilesOrDirsInBag(getFilesOrDirsParams{bagLocation, matcher, true, false})
}

func GetDirsInBag(bagLocation string) ([]string, error) {
return getFilesOrDirsInBag(getFilesOrDirsParams{bagLocation, regexp.MustCompile(`.*`), false, false})
}

func FindDirInBag(bagLocation string, matcher *regexp.Regexp) (string, error) {
results, err := getFilesOrDirsInBag(getFilesOrDirsParams{bagLocation, matcher, false, true})
if err != nil {
return "", err
}
for _, p := range bagFiles {
if matcher.MatchString(p) {
return p, nil
}
if len(results) == 0 {
return "", fmt.Errorf("Could not locate directory pattern in bag")
}
return results[0], nil
}

func FindDirsInBag(bagLocation string, matcher *regexp.Regexp) ([]string, error) {
return getFilesOrDirsInBag(getFilesOrDirsParams{bagLocation, matcher, false, false})
}

// getFilesOrDirsInBag returns a slice of strings of matching files or directories.
// What is returned is controlled by the findFiles boolean.
// findFiles = true --> return matching files
// findFiles = false --> return matching directories
//
// How many matches are returned is determined by the returnFirst boolean.
// returnFirst = true --> halts search and returns with first match
// returnFirst = false --> returns all matching files or directories
func getFilesOrDirsInBag(params getFilesOrDirsParams) ([]string, error) {
results := []string{}

bagLocation := params.Location
matcher := params.Matcher
findFiles := params.FindFiles
returnFirst := params.ReturnFirst

err := filepath.Walk(bagLocation,
func(path string, info os.FileInfo, err error) error {
if err != nil {
return err
}

// if looking for files, but this is a directory, move on...
if findFiles && info.IsDir() {
return nil
}

// if looking for directories, but this is NOT a directory, move on...
if !findFiles && !info.IsDir() {
return nil
}

// OK, we found something that we might be looking for...
if matcher.MatchString(path) {
results = append(results, path)
if returnFirst {
return filepath.SkipAll
}
}
return nil
})

if err != nil {
return nil, err
}
return "", fmt.Errorf("Could not locate file pattern in bag")
return results, nil
}
199 changes: 199 additions & 0 deletions bag_test.go
Original file line number Diff line number Diff line change
@@ -1,7 +1,10 @@
package go_bagit

import (
"fmt"
"path/filepath"
"regexp"
"slices"
"strings"
"testing"
)
Expand Down Expand Up @@ -55,3 +58,199 @@ func TestValidateBag(t *testing.T) {
})
}
}

func TestGetFilesInBag(t *testing.T) {
t.Run("Test GetFilesInBag()", func(t *testing.T) {
bagRoot := filepath.Join("test", "valid-with-subdirs")

want := []string{
"test/valid-with-subdirs/bagit.txt",
"test/valid-with-subdirs/bag-info.txt",
"test/valid-with-subdirs/manifest-sha512.txt",
"test/valid-with-subdirs/tagmanifest-sha512.txt",
"test/valid-with-subdirs/data/test-file.txt",
"test/valid-with-subdirs/data/logs/output2.log",
"test/valid-with-subdirs/data/logs/output1.log",
}

got, err := GetFilesInBag(bagRoot)
if err != nil {
t.Fatal(err)
}

if len(want) != len(got) {
t.Fatal("length of returned slice does not match expectations")
}

slices.Sort(want)
slices.Sort(got)

status := true
msg := ""
for i := 0; i < len(want); i++ {
if want[i] != got[i] {
status = false
msg = msg + "\n" + fmt.Sprintf("%v != %v", want[i], got[i])
}
}
if !status {
t.Error(msg)
}
})
}

func TestGetDirsInBag(t *testing.T) {
t.Run("Test GetDirsInBag()", func(t *testing.T) {
bagRoot := filepath.Join("test", "valid-erecord-with-subdirs")

want := []string{
"test/valid-erecord-with-subdirs",
"test/valid-erecord-with-subdirs/data",
"test/valid-erecord-with-subdirs/data/logs",
"test/valid-erecord-with-subdirs/data/logs/transfers",
"test/valid-erecord-with-subdirs/data/logs/transfers/fales_mss2023_cuid39675-48b63462-0fec-4f6a-8913-1f2e2f9168e5",
"test/valid-erecord-with-subdirs/data/logs/transfers/fales_mss2023_cuid39675-48b63462-0fec-4f6a-8913-1f2e2f9168e5/logs",
"test/valid-erecord-with-subdirs/data/objects",
"test/valid-erecord-with-subdirs/data/objects/cuid39675",
"test/valid-erecord-with-subdirs/data/objects/metadata",
"test/valid-erecord-with-subdirs/data/objects/metadata/transfers",
"test/valid-erecord-with-subdirs/data/objects/metadata/transfers/fales_mss2023_cuid39675-48b63462-0fec-4f6a-8913-1f2e2f9168e5",
"test/valid-erecord-with-subdirs/data/objects/submissionDocumentation",
"test/valid-erecord-with-subdirs/data/objects/submissionDocumentation/transfer-fales_mss2023_cuid39675-48b63462-0fec-4f6a-8913-1f2e2f9168e5",
}

got, err := GetDirsInBag(bagRoot)
if err != nil {
t.Fatal(err)
}

if len(want) != len(got) {
t.Fatal("length of returned slice does not match expectations")
}

slices.Sort(want)
slices.Sort(got)

status := true
msg := ""
for i := 0; i < len(want); i++ {
if want[i] != got[i] {
status = false
msg = msg + "\n" + fmt.Sprintf("%v != %v", want[i], got[i])
}
}
if !status {
t.Fatal(msg)
}
})
}

func TestFindFileInBag(t *testing.T) {
t.Run("Test FindFileInBag()", func(t *testing.T) {
bagRoot := filepath.Join("test", "valid-with-subdirs")

want := "test/valid-with-subdirs/data/logs/output2.log"
wantPtn := regexp.MustCompile("output2.log$")

got, err := FindFileInBag(bagRoot, wantPtn)
if err != nil {
t.Error(err)
}

if want != got {
t.Errorf("\n%v !=\n%v", want, got)
}
})
}

func TestFindFilesInBag(t *testing.T) {
t.Run("Test FindFilesInBag()", func(t *testing.T) {
bagRoot := filepath.Join("test", "valid-erecord-with-subdirs")

want := []string{
"test/valid-erecord-with-subdirs/fales_mss2023_cuid39675_aspace_wo.tsv",
"test/valid-erecord-with-subdirs/data/objects/metadata/transfers/fales_mss2023_cuid39675-48b63462-0fec-4f6a-8913-1f2e2f9168e5/fales_mss2023_cuid39675_aspace_wo.tsv",
}
wantPtn := regexp.MustCompile("_aspace_wo.tsv$")

got, err := FindFilesInBag(bagRoot, wantPtn)
if err != nil {
t.Error(err)
}

if len(want) != len(got) {
t.Fatal("length of returned slice does not match expectations")
}

slices.Sort(want)
slices.Sort(got)

status := true
msg := ""
for i := 0; i < len(want); i++ {
if want[i] != got[i] {
status = false
msg = msg + "\n" + fmt.Sprintf("%v != %v", want[i], got[i])
}
}
if !status {
t.Fatal(msg)
}
})

}

func TestFindDirInBag(t *testing.T) {
t.Run("Test FindDirInBag()", func(t *testing.T) {
bagRoot := filepath.Join("test", "valid-erecord-with-subdirs")

want := "test/valid-erecord-with-subdirs/data/objects/cuid39675"
wantPtn := regexp.MustCompile("objects/cuid39675$")

got, err := FindDirInBag(bagRoot, wantPtn)
if err != nil {
t.Error(err)
}

if want != got {
t.Errorf("\n%v !=\n%v", want, got)
}
})
}

func TestFindDirsInBag(t *testing.T) {
t.Run("Test FindDirsInBag()", func(t *testing.T) {
bagRoot := filepath.Join("test", "valid-erecord-with-subdirs")

want := []string{
"test/valid-erecord-with-subdirs/data/logs/transfers/fales_mss2023_cuid39675-48b63462-0fec-4f6a-8913-1f2e2f9168e5",
"test/valid-erecord-with-subdirs/data/objects/metadata/transfers/fales_mss2023_cuid39675-48b63462-0fec-4f6a-8913-1f2e2f9168e5",
}
wantPtn := regexp.MustCompile("/fales_mss2023_cuid39675-48b63462-0fec-4f6a-8913-1f2e2f9168e5$")

got, err := FindDirsInBag(bagRoot, wantPtn)
if err != nil {
t.Error(err)
}

if len(want) != len(got) {
t.Fatalf("length of returned slice, (%d) does not match expectations (%d)", len(got), len(want))
}

slices.Sort(want)
slices.Sort(got)

status := true
msg := ""
for i := 0; i < len(want); i++ {
if want[i] != got[i] {
status = false
msg = msg + "\n" + fmt.Sprintf("%v != %v", want[i], got[i])
}
}
if !status {
t.Fatal(msg)
}
})

}
2 changes: 1 addition & 1 deletion go.mod
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
module github.com/nyudlts/go-bagit

go 1.17
go 1.21.4

require github.com/spf13/cobra v1.3.0

Expand Down
8 changes: 8 additions & 0 deletions tags_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -14,4 +14,12 @@ func TestTags(t *testing.T) {
t.Errorf("Wanted %s got %s", want, got)
}
})

t.Run("Test Querying Non-existant Tag", func(t *testing.T) {
want := ""
got := bagit.Tags["daea5275-bac0-486e-8cac-f1a061c623f6"]
if want != got {
t.Errorf("Wanted %s got %s", want, got)
}
})
}
21 changes: 21 additions & 0 deletions test/valid-erecord-with-subdirs/bag-info.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
Bag-Software-Agent: bagit.py v1.7.0 <https://github.com/LibraryOfCongress/bagit-python>
Bagging-Date: 2023-11-21
Contact-Email: [email protected]
Contact-Name: archivist
Contact-Phone: +1-212.555.5555
External-Identifier: 2255f60b-2b07-4ecc-9bfd-3421188493d7
Internal-Sender-Identifier: dlts/test
Organization-Address: 70 Washington Square South, New York, NY 10012
Payload-Oxum: 440656.15
Source-Organization: ACM
nyu-dl-content-classification: open
nyu-dl-content-type: electronic_records
nyu-dl-hostname: pco01la-1646s.cfs.its.nyu.edu
nyu-dl-package-format: 0.1.0
nyu-dl-pathname: /var/archivematica/tmp/fales_mss2023_cuid39675-2255f60b-2b07-4ecc-9bfd-3421188493d7
nyu-dl-project-name: dlts/test
nyu-dl-resource-id: MSS.2023
nyu-dl-resource-title: Weatherly Test Records
nyu-dl-resource-url: /repositories/3/resources/3472
nyu-dl-rstar-collection-id: b9612d5d-619a-4ceb-b620-d816e4b4340b
nyu-dl-use-statement: video-reading-room
2 changes: 2 additions & 0 deletions test/valid-erecord-with-subdirs/bagit.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
BagIt-Version: 0.97
Tag-File-Character-Encoding: UTF-8
Loading

0 comments on commit 9f12b1d

Please sign in to comment.