Check and test ChEBI updates #18
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
name: chebi | |
on: | |
workflow_dispatch: | |
schedule: | |
- cron: "0 0 1,15 * *" # Run the workflow on the 1st and 15th day of each month | |
permissions: | |
contents: write | |
pages: write | |
id-token: write | |
jobs: | |
check_release: | |
runs-on: ubuntu-latest | |
name: Check latest release date | |
steps: | |
# step 1: checkout the repository | |
- name: Download GitHub repo | |
uses: actions/checkout@v3 | |
# step 2: check the release date for the latest ChEBI release | |
- name: Check date | |
run: | | |
wget https://ftp.ebi.ac.uk/pub/databases/chebi/archive/rel223/SDF/ | |
##Extract the string containing pattern 'ChEBI_complete_3star.sdf.gz' | |
string=$(cat index.html | grep ">ChEBI_complete_3star.sdf.gz<") | |
echo $string | |
##Extract the date from the latest release (up to the day) | |
date_new=$(echo "$string" | grep -oP '<td align="right">\K[0-9-]+\s[0-9:]+(?=\s+</td>)' | awk '{print $1}') | |
echo $date_new | |
##Extract the date from the ChEBI README file (up to the day) | |
date_old=$(cat datasources/chebi/README.md | grep -oP 'released on \K[0-9-]+' | awk '{print $1}') | |
echo $date_old | |
##Print the dates | |
echo "Date of latest release: $date_new", "Date of release of the current version: $date_old" | |
##Compare the dates and set GITHUB_ENV variable if date_new is more recent | |
if [[ "$date_new" > "$date_old" ]]; then | |
echo "DOWNLOAD_FILE=true" >>$GITHUB_ENV | |
fi | |
##Clean up | |
rm index.htm* | |
##Download? | |
echo download is ${{ github.DOWNLOAD_FILE }} | |
download_file: | |
needs: check_release | |
name: Download release | |
if: github.DOWNLOAD_FILE == 'true' | |
runs-on: ubuntu-latest | |
steps: | |
- name: Download data | |
run: | | |
##Download ChEBI SDF file | |
wget https://ftp.ebi.ac.uk/pub/databases/chebi/archive/rel223/SDF/ChEBI_complete_3star.sdf.gz | |
##Unzip gz file: | |
gunzip ChEBI_complete_3star.sdf.gz | |
##Check file size if available | |
ls | |
##Print file size | |
FILENAME=ChEBI_complete_3star.sdf | |
FILESIZE=$(stat -c%s "$FILENAME") | |
echo "Size of $FILENAME = $FILESIZE bytes." | |
##Create temp. folder to store the data in | |
mkdir -p mapping_preprocessing/datasources/chebi/data | |
##Move the SDF file to the expected location | |
mv ChEBI_complete_3star.sdf mapping_preprocessing/datasources/chebi/data | |
# step 3: run the R script for ChEBI preprocessing (to be made into a separate job) | |
- name: Run R script to process SDF data | |
run: | | |
##Check R version: | |
R --version | |
##Install the readr package: | |
#Rscript -e 'install.packages("readr")' | |
##Running the R script: | |
#Rscript r/chebi_processing.R | |
#echo "R preprocessing of ChEBI data is done" | |
# step 4: run the Java .jar for ChEBI preprocessing (to be made into a separate job) | |
- name: Set up Java | |
uses: actions/setup-java@v2 | |
with: | |
java-version: '11' | |
distribution: 'adopt' | |
- name: Run .jar to process SDF data | |
run: | | |
inputFile="mapping_preprocessing/datasources/chebi/data/ChEBI_complete_3star.sdf" | |
outputDir="mapping_preprocessing/datasources/chebi/data" | |
java -cp java -cp target/mapping_prerocessing-0.0.1-jar-with-dependencies.jar org.sec2pri.chebi_sdf $inputFile $outputDir | |
echo "Java preprocessing of ChEBI data is done." | |
# Missing checkout and commit step if we intend on processing and pushing the data from the workflow |