Skip to content

Commit

Permalink
Update chebi.yml
Browse files Browse the repository at this point in the history
  • Loading branch information
jmillanacosta authored Jan 26, 2024
1 parent 0251590 commit 0d35816
Showing 1 changed file with 37 additions and 84 deletions.
121 changes: 37 additions & 84 deletions .github/workflows/chebi.yml
Original file line number Diff line number Diff line change
Expand Up @@ -2,11 +2,11 @@ name: chebi

on:
workflow_dispatch:
pull_request: # tests whether it is working on PR
paths:
- '.github/workflows/chebi.yml'
pull_request:
paths:
- '.github/workflows/chebi.yml' # Corrected the path syntax
schedule:
- cron: "0 0 1,15 * *" # Run the workflow on the 1st and 15th day of each month
- cron: "0 0 1,15 * *"

permissions:
contents: write
Expand All @@ -19,97 +19,65 @@ jobs:
name: Check latest release date

steps:
# checkout the repository
- name: Checkout
uses: actions/checkout@v4
# Download current version from Zenodo

- name: Download file from Zenodo
env:
zenodo_token: ${{ secrets.ZENODO }}
run: |
# Set up variables
chmod +x datasources/chebi/config
datasources/chebi/config .
zenodo_file_id="8348142"
file_name=$to_check_from_zenodo
echo file name: $file_name
echo 'Dowloading from: https://zenodo.org/api/files/${zenodo_file_id}/${file_name}'
echo ZENODO_FILE_NAME=$file_name >>$GITHUB_ENV
echo "File name: $file_name"
echo "Downloading from: https://zenodo.org/api/files/${zenodo_file_id}/${file_name}"
echo "ZENODO_FILE_NAME=$file_name" >> $GITHUB_ENV
# Request Zenodo API to download the file
curl -H "Authorization: Bearer $zenodo_token" -LJO 'https://zenodo.org/api/files/${zenodo_file_id}/${file_name}'
# Verify the downloaded file
curl -H "Authorization: Bearer $zenodo_token" -LJO "https://zenodo.org/api/files/${zenodo_file_id}/${file_name}"
ls -l $file_name
# check the release date for the latest ChEBI release
- name: Check for new ChEBI release
id: check_download
run: |
echo 'Accessing the ChEBI archive'
wget https://ftp.ebi.ac.uk/pub/databases/chebi/archive/
## Read config
. datasources/chebi/config .
## Check date for last element in index
string=$(tail -4 index.html | head -1)
##Extract the date from the latest release (up to the day)
date_new=$(echo "$string" | grep -oP '<td align="right">\K[0-9-]+\s[0-9:]+(?=\s+</td>)' | awk '{print $1}')
release=$(echo "$string" | grep -oP '(?<=a href="rel)\d\d\d')
echo "RELEASE=$release" >>$GITHUB_ENV
echo "RELEASE=$release" >> $GITHUB_ENV
echo "DATE_NEW=$date_new" >> $GITHUB_ENV
##Extract the date from the ChEBI README file
date_old=$date
##Compare the dates and set GITHUB_ENV variable if date_new is more recent
timestamp1=$(date -d "$date_new" +%s)
timestamp2=$(date -d "$date_old" +%s)
if [ "$timestamp1" -gt "$timestamp2" ]; then
echo "DOWNLOAD_FILE=true" >>$GITHUB_ENV
echo 'New release available', "$release"
echo "DOWNLOAD_FILE=true" >> $GITHUB_ENV
echo "New release available: $release"
else
echo "DOWNLOAD_FILE=false" >>$GITHUB_ENV
echo 'No new release available'
echo "DOWNLOAD_FILE=false" >> $GITHUB_ENV
echo "No new release available"
fi
echo "Date of latest release: $date_new", "Date of release of the current version: $date_old"
##Clean up
echo "Date of latest release: $date_new, Date of release of the current version: $date_old"
rm index.htm*
process_file:
name: Download and test latest ChEBI release
needs: check_release
# if: contains(github.DOWNLOAD_FILE, 'true')
runs-on: ubuntu-latest
steps:
- name: Download data
run: |
##Download ChEBI SDF file
wget https://ftp.ebi.ac.uk/pub/databases/chebi/archive/rel${{ github.RELEASE }}/SDF/ChEBI_complete_3star.sdf.gz
##Unzip gz file:
wget https://ftp.ebi.ac.uk/pub/databases/chebi/archive/rel${{ env.RELEASE }}/SDF/ChEBI_complete_3star.sdf.gz
gunzip ChEBI_complete_3star.sdf.gz
##Check file size if available
ls
##Print file size
FILENAME=ChEBI_complete_3star.sdf
FILESIZE=$(stat -c%s "$FILENAME")
echo "Size of $FILENAME = $FILESIZE bytes."
##Create temp. folder to store the data in
mkdir -p mapping_preprocessing/datasources/chebi/data
##Move the SDF file to the expected location
mv ChEBI_complete_3star.sdf mapping_preprocessing/datasources/chebi/data
# step 3: run the R script for ChEBI preprocessing (to be made into a separate job)
- name: Run R script to process SDF data
run: |
##Check R version:
R --version
##Install the readr package:
#Rscript -e 'install.packages("readr")'
##Running the R script:
#Rscript r/chebi_processing.R
#echo "R preprocessing of ChEBI data is done"
# step 4: run the Java .jar for ChEBI preprocessing (to be made into a separate job)
- name: Set up Java
uses: actions/setup-java@v2
with:
Expand All @@ -120,72 +88,58 @@ jobs:
run: |
inputFile="mapping_preprocessing/datasources/chebi/data/ChEBI_complete_3star.sdf"
outputDir="mapping_preprocessing/datasources/chebi/data"
# Run Java program and capture its exit code
java -cp target/mapping_prerocessing-0.0.1-jar-with-dependencies.jar org.sec2pri.chebi_sdf "$inputFile" "$outputDir"
# Check the exit status of the Java program
if [ $? -eq 0 ]; then
# Java program succeeded
echo "Java preprocessing of ChEBI data is done."
echo "FAILED=false" >>$GITHUB_ENV
echo "FAILED=false" >> $GITHUB_ENV
else
# Java program failed
echo "Java preprocessing of ChEBI data failed."
echo "FAILED=true" >>$GITHUB_ENV
echo "FAILED=true" >> $GITHUB_ENV
fi
- name: Diff versions
run: |
old=${{ github.ZENODO_FILE_NAME }}
new=mapping_preprocessing/datasources/chebi/data/${{ github.ZENODO_FILE_NAME }}
column_name="secondaryID"
# Extract the secondaryID column from both files and sort them
old=${{ env.ZENODO_FILE_NAME }}
new=mapping_preprocessing/datasources/chebi/data/${{ env.ZENODO_FILE_NAME }}
ids_old=$(cut -f 2 "$old" | sort | tr -d "\r")
ids_new=$(cut -f 2 "$new" | sort | tr -d "\r")
# Perform a diff between the sorted lists of secondaryIDs
diff_ENV=$(diff -u <(echo "$ids_old") <(echo "$ids_new"))
# Store the output
output_file="diff_ENV.txt"
# Additions
added=$(grep '^+CHEBI' "$output_file")
echo ADDED=$added >>$GITHUB_ENV
# Removals
echo "ADDED=$added" >> $GITHUB_ENV
removed=$(grep '^-CHEBI' "$output_file")
echo REMOVED=$removed >>$GITHUB_ENV
echo "REMOVED=$removed" >> $GITHUB_ENV
post-issue_update:
needs: process_file
name: Post issue about updating data
runs-on: ubuntu-latest
if: contains(github.DOWNLOAD_FILE, 'true')
if: contains(env.DOWNLOAD_FILE, 'true')
permissions:
contents: read
issues: write
steps:
- uses: actions/checkout@v3
- run: |
echo "---" >> issue.md
echo "title: Update ChEBI - release on github.DATE_NEW }}" >> issue.md
echo "title: Update ChEBI - release on ${{ env.DATE_NEW }}" >> issue.md
echo "assignees: tabbassidaloii" >> issue.md
echo "---" >> issue.md
echo "[New release for ChEBI](https://ftp.ebi.ac.uk/pub/databases/chebi/archive/rel${{ github.RELEASE }}/SDF/) available from ${{ github.DATE_NEW }}." >> issue.md
echo ## Removed secondary IDs
echo ${{ github.REMOVED }}
echo ## Added secondary IDs
echo ${{ github.ADDED }}
echo "[New release for ChEBI](https://ftp.ebi.ac.uk/pub/databases/chebi/archive/rel${{ env.RELEASE }}/SDF/) available from ${{ env.DATE_NEW }}." >> issue.md
echo "## Removed secondary IDs" >> issue.md
echo "${{ env.REMOVED }}" >> issue.md
echo "## Added secondary IDs" >> issue.md
echo "${{ env.ADDED }}" >> issue.md
- uses: JasonEtco/create-an-issue@v2
env:
GITHUB_TOKEN: secrets.GITHUB_TOKEN }}
with:
filename: issue.md
env:
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}

post-issue-fail:
needs: process_file
if: contains(github.FAILED, 'true')
if: contains(env.FAILED, 'true')
name: Post issue about failed test
runs-on: ubuntu-latest
permissions:
Expand All @@ -195,13 +149,12 @@ jobs:
- uses: actions/checkout@v3
- run: |
echo "---" >> issue.md
echo "title: Failed ChEBI processing for release ${{ github.RELEASE }}" >> issue.md
echo "title: Failed ChEBI processing for release ${{ env.RELEASE }}" >> issue.md
echo "assignees: tabbassidaloii" >> issue.md
echo "---" >> issue.md
echo "Processing failed for the [new release for ChEBI](https://ftp.ebi.ac.uk/pub/databases/chebi/archive/rel${{ github.RELEASE }}/SDF/) available from github.DATE_NEW }}." >> issue.md
echo "Processing failed for the [new release for ChEBI](https://ftp.ebi.ac.uk/pub/databases/chebi/archive/rel${{ env.RELEASE }}/SDF/) available from ${{ env.DATE_NEW }}." >> issue.md
- uses: JasonEtco/create-an-issue@v2
env:
GITHUB_TOKEN: secrets.GITHUB_TOKEN }}
with:
filename: issue.md

env:
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}

0 comments on commit 0d35816

Please sign in to comment.