-
Notifications
You must be signed in to change notification settings - Fork 22
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
jade-2023-raw-datasets: prepare logbook records
Initial version of the JADE logbook record preparation script. To be improved by MPP in the coming weeks.
- Loading branch information
1 parent
a5abd78
commit d643ff3
Showing
7 changed files
with
189 additions
and
23 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,6 @@ | ||
======================== | ||
jade-2023-raw-datasets | ||
======================== | ||
|
||
This directory contains helper scripts used to prepare the initial release of | ||
JADE data in 2023. Includes raw datasets with accompanying logbooks and notes. |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,118 @@ | ||
#!/usr/bin/env python | ||
|
||
""" | ||
Create JADE logbook records. | ||
""" | ||
|
||
import json | ||
import re | ||
|
||
recid_start = 26100 | ||
year_published = "2023" | ||
|
||
|
||
def create_record(recid, path, size, checksum): | ||
"""Create record for the given JADE logbook.""" | ||
|
||
rec = {} | ||
|
||
try: | ||
logbook_number = re.match(r"^.*Log([0-9]+)\.pdf", path).groups(0)[0] | ||
except Exception: | ||
logbook_number = "FIXME" | ||
|
||
logbook_period = "1970" | ||
|
||
rec["abstract"] = {} | ||
rec["abstract"][ | ||
"description" | ||
] = f"This is JADE logbook {logbook_number} from {logbook_period}. FIXME add more description." | ||
|
||
rec["accelerator"] = "DESY-PETRA" | ||
|
||
rec["collaboration"] = {} | ||
rec["collaboration"]["name"] = "JADE collaboration" | ||
rec["collaboration"]["recid"] = "451" | ||
|
||
rec["collections"] = [ | ||
"JADE-Logbooks", | ||
] | ||
|
||
rec["date_created"] = [ | ||
logbook_period, | ||
] | ||
rec["date_published"] = year_published | ||
|
||
rec["distribution"] = {} | ||
rec["distribution"]["formats"] = [ | ||
"pdf", | ||
] | ||
rec["distribution"]["number_files"] = 1 | ||
rec["distribution"]["size"] = size | ||
|
||
rec["experiment"] = "JADE" | ||
|
||
rec["files"] = [] | ||
rec["files"].append( | ||
{ | ||
"checksum": "adler32:" + checksum, | ||
"size": size, | ||
"uri": path, | ||
} | ||
) | ||
|
||
rec["license"] = {} | ||
rec["license"]["attribution"] = "CC0" | ||
|
||
rec["publisher"] = "CERN Open Data Portal" | ||
|
||
rec["recid"] = str(recid) | ||
|
||
rec["title"] = f"JADE logbook number {logbook_number}" | ||
|
||
rec["type"] = {} | ||
rec["type"]["primary"] = "Supplementaries" | ||
rec["type"]["secondary"] = [ | ||
"Logbook", | ||
] | ||
|
||
return rec | ||
|
||
|
||
def create_records(): | ||
"""Create records.""" | ||
with open("./inputs/eos-file-information-logbooks.txt", "r") as f: | ||
records = [] | ||
recid = recid_start | ||
for line in f.readlines(): | ||
match = re.match(r"^path=(.*) size=(.*) checksum=(.*)$", line.strip()) | ||
if match: | ||
path, size, checksum = match.groups() | ||
size = int(size) | ||
records.append(create_record(recid, path, size, checksum)) | ||
recid += 1 | ||
return records | ||
|
||
|
||
def print_records(records): | ||
"""Print records.""" | ||
print( | ||
json.dumps( | ||
records, | ||
indent=2, | ||
sort_keys=True, | ||
ensure_ascii=False, | ||
separators=(",", ": "), | ||
) | ||
) | ||
|
||
|
||
def main(): | ||
"Do the job." | ||
|
||
records = create_records() | ||
print_records(records) | ||
|
||
|
||
if __name__ == "__main__": | ||
main() |
21 changes: 21 additions & 0 deletions
21
jade-2023-raw-datasets/inputs/eos-file-information-logbooks.txt
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,21 @@ | ||
path=root://eospublic.cern.ch//eos/opendata/jade/documentation/logbooks/Log01.pdf size=37913903 checksum=5537cb30 | ||
path=root://eospublic.cern.ch//eos/opendata/jade/documentation/logbooks/Log02.pdf size=38811120 checksum=8eb74619 | ||
path=root://eospublic.cern.ch//eos/opendata/jade/documentation/logbooks/Log03.pdf size=50652976 checksum=970d45ef | ||
path=root://eospublic.cern.ch//eos/opendata/jade/documentation/logbooks/Log04.pdf size=50162033 checksum=dbcf6160 | ||
path=root://eospublic.cern.ch//eos/opendata/jade/documentation/logbooks/Log05.pdf size=47007176 checksum=8af84e12 | ||
path=root://eospublic.cern.ch//eos/opendata/jade/documentation/logbooks/Log06.pdf size=48306806 checksum=da5c1a3e | ||
path=root://eospublic.cern.ch//eos/opendata/jade/documentation/logbooks/Log07.pdf size=50320784 checksum=ae36da39 | ||
path=root://eospublic.cern.ch//eos/opendata/jade/documentation/logbooks/Log08.pdf size=41087938 checksum=50e06402 | ||
path=root://eospublic.cern.ch//eos/opendata/jade/documentation/logbooks/Log09.pdf size=41056855 checksum=8c6b28a5 | ||
path=root://eospublic.cern.ch//eos/opendata/jade/documentation/logbooks/Log10.pdf size=40359158 checksum=a3936655 | ||
path=root://eospublic.cern.ch//eos/opendata/jade/documentation/logbooks/Log11.pdf size=41105169 checksum=20a2305c | ||
path=root://eospublic.cern.ch//eos/opendata/jade/documentation/logbooks/Log12.pdf size=39146329 checksum=9e547ac6 | ||
path=root://eospublic.cern.ch//eos/opendata/jade/documentation/logbooks/Log13.pdf size=38893971 checksum=8004225d | ||
path=root://eospublic.cern.ch//eos/opendata/jade/documentation/logbooks/Log14.pdf size=43056879 checksum=ed017f53 | ||
path=root://eospublic.cern.ch//eos/opendata/jade/documentation/logbooks/Log15.pdf size=41640363 checksum=8f39d465 | ||
path=root://eospublic.cern.ch//eos/opendata/jade/documentation/logbooks/Log16.pdf size=39921243 checksum=9f2a1c0f | ||
path=root://eospublic.cern.ch//eos/opendata/jade/documentation/logbooks/Log17.pdf size=43588114 checksum=9efd7aa2 | ||
path=root://eospublic.cern.ch//eos/opendata/jade/documentation/logbooks/Log18.pdf size=40612426 checksum=3b083c0b | ||
path=root://eospublic.cern.ch//eos/opendata/jade/documentation/logbooks/Log19.pdf size=42436620 checksum=996c06fe | ||
path=root://eospublic.cern.ch//eos/opendata/jade/documentation/logbooks/Log20.pdf size=40799092 checksum=ea108b6a | ||
path=root://eospublic.cern.ch//eos/opendata/jade/documentation/logbooks/Log21.pdf size=17001411 checksum=5d91f901 |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,18 @@ | ||
#!/bin/sh | ||
|
||
## 1) create EOS file indexes: | ||
|
||
# lxplus> eos find --xurl --size --checksum /eos/opendata/jade/upload/documentation/LogBooks | grep pdf > ./inputs/eos-file-information-logbooks.txt | ||
|
||
## 2) create JADE logbook records | ||
|
||
mkdir -p outputs | ||
python ./code/create_logbook_records.py > ./outputs/jade-logbooks.json | ||
|
||
## 6) check the validity of resulting JSON files | ||
|
||
jsonlint -q ./outputs/*.json | ||
|
||
## 7) copy them to CERN Open Data fixtures directory | ||
|
||
\cp outputs/*.json ../../opendata.cern.ch/cernopendata/modules/fixtures/data/records |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters