Skip to content

Check and test ChEBI updates #124

Check and test ChEBI updates

Check and test ChEBI updates #124

Workflow file for this run

name: chebi
on:
workflow_dispatch:
pull_request: # tests whether it is working on PR
paths:
- '.`/workflows/chebi.yml'
schedule:
- cron: "0 0 1,15 * *" # Run the workflow on the 1st and 15th day of each month
permissions:
contents: write
pages: write
id-token: write
issues: write
jobs:
check_new_release:
runs-on: ubuntu-latest
name: Check latest release date
outputs:
RELEASE_NUMBER: ${{ steps.check_download.outputs.RELEASE_NUMBER }}
NEW_RELEASE: ${{ steps.check_download.outputs.NEW_RELEASE }}
DATE_NEW: ${{ steps.check_download.outputs.DATE_NEW }}
DATE_OLD: ${{ steps.check_download.outputs.DATE_OLD }}
steps:
# checkout the repository
- name: Checkout
uses: actions/checkout@v4
# check the release date for the latest ChEBI release
- name: Check for new ChEBI release
id: check_download
run: |
## Read config
. datasources/chebi/config .
echo 'Accessing the ChEBI archive'
wget https://ftp.ebi.ac.uk/pub/databases/chebi/archive/ -O chebi_index.html
## Check date for last element in index
##Extract the date from the latest release (up to the day)
date_new=$(tail -4 chebi_index.html | head -1 | grep -oP '<td align="right">\K[0-9-]+\s[0-9:]+(?=\s+</td>)' | awk '{print $1}')
release=$(tail -4 chebi_index.html | head -1 | grep -oP '(?<=a href="rel)\d\d\d')
#https://docs.github.com/en/actions/using-workflows/workflow-commands-for-github-actions#setting-an-output-parameter
echo "RELEASE_NUMBER=$release" >> $GITHUB_OUTPUT
##Extract the date from the ChEBI README file
date_old=$date
#Store dates to output
echo "DATE_OLD=$date_old" >> $GITHUB_OUTPUT
echo "DATE_NEW=$date_new" >> $GITHUB_OUTPUT
##Compare the dates and set variable if date_new is more recent
timestamp1=$(date -d "$date_new" +%s)
timestamp2=$(date -d "$date_old" +%s)
if [ "$timestamp1" -gt "$timestamp2" ]; then
echo 'New release available', "$release"
echo "NEW_RELEASE=true" >> $GITHUB_OUTPUT
else
echo 'No new release available'
fi
echo "Date of latest release: $date_new", "Date of release of the current version: $date_old"
##Clean up
rm chebi_index.html
test_sdf_processing:
name: Test release
if: needs.check_new_release.outputs.NEW_RELEASE
needs: check_new_release
env:
RELEASE_NUMBER: ${{ needs.check_new_release.outputs.RELEASE_NUMBER }}
DATE_OLD: ${{ needs.check_new_release.outputs.DATE_OLD }}
DATE_NEW: ${{ needs.check_new_release.outputs.DATE_NEW }}
runs-on: ubuntu-latest
steps:
- name: Checkout repository
uses: actions/checkout@v4
- name: Download SDF for new release
run: |
##Download ChEBI SDF file
echo $RELEASE_NUMBER
wget "https://ftp.ebi.ac.uk/pub/databases/chebi/archive/rel${RELEASE_NUMBER}/SDF/ChEBI_complete_3star.sdf.gz"
##Unzip gz file:
gunzip ChEBI_complete_3star.sdf.gz #TODO replace by config var
##Check file size if available
ls
##Print file size
# Set up zenodo_file_id and file_name variables from config file
chmod +x datasources/chebi/config
datasources/chebi/config .
##Create temp. folder to store the data in
mkdir -p mapping_preprocessing/datasources/chebi/data
# step 4: run the Java .jar for ChEBI preprocessing
- name: Set up Java
uses: actions/setup-java@v2
with:
java-version: '11'
distribution: 'adopt'
# Download current version from Zenodo
- name: Download current mapping file from Zenodo
id: download
env:
zenodo_token: ${{ secrets.ZENODO }}
run: |
# Set up zenodo_file_id and file_name variables from config file
#chmod +x datasources/chebi/config
#datasources/chebi/config .
file_name="ChEBI_secID2priID.tsv"
echo file name: $file_name
echo "file_name=$file_name" >> $GITHUB_ENV
echo 'Dowloading from: https://zenodo.org/api/files/${zenodo_file_id}/${file_name}'
# Request Zenodo API to download the file
curl -H "Authorization: Bearer $zenodo_token" -LJO 'https://zenodo.org/api/files/${zenodo_file_id}/${file_name}'
- name: Test SDF processing
id: sdf_process
run: |
inputFile="ChEBI_complete_3star.sdf"
mkdir new
outputDir="new/"
# Run Java program and capture its exit code
java -cp java/target/mapping_prerocessing-0.0.1-jar-with-dependencies.jar org.sec2pri.chebi_sdf "$inputFile" "$outputDir"
# Check the exit status of the Java program
if [ $? -eq 0 ]; then
# Java program succeeded
echo "Successful preprocessing of ChEBI data."
echo "FAILED=false" >> $GITHUB_ENV
else
# Java program failed
echo "Failed preprocessing of ChEBI data."
echo "FAILED=true" >> $GITHUB_ENV
fi
ls
cd new
ls
- name: Diff versions
if:
${{ env.FAILED == 'false' }}
run: |
old="$file_name"
new="new/$file_name"
column_name="secondaryID"
# Extract the secondaryID column from both files and sort them
ids_old=$(cut -f 2 "$old" | sort | tr -d "\r")
ids_new=$(cut -f 2 "$new" | sort | tr -d "\r")
# Perform a diff between the sorted lists of secondaryIDs
diff_ENV=$(diff -u <(echo "$ids_old") <(echo "$ids_new"))
# Store the output
output_file="diff_ENV.txt"
# Additions
ADDED=$(grep '^+CHEBI' "$output_file")
#TODO echo "added=$ADDED" to job output
# Removals
REMOVED=$(grep '^-CHEBI' "$output_file")
echo $REMOVED > removed.txt
echo $ADDED > added.txt
- id: create_issue_new
name: Create issue for new release availability
run: |
echo "---" > issue.md
echo "title: Update ChEBI - release on $DATE_NEW" >> issue.md
echo "assignees: tabbassidaloii" >> issue.md
echo "---" >> issue.md
echo "[New release for ChEBI](https://ftp.ebi.ac.uk/pub/databases/chebi/archive/rel$RELEASE_NUMBER/SDF/) available from $DATE_NEW." >> issue.md
echo "## Removed secondary IDs" >> issue.md
echo "```" >> issue.md
cat removed.txt >> issue.md
echo "```" >> issue.md
echo "## Added secondary IDs" >> issue.md
echo "```" >> issue.md
cat added.txt >> issue.md
echo "```" >> issue.md
- uses: JasonEtco/create-an-issue@v2
name: Post issue about release availability
env:
GITHUB_TOKEN: secrets.GITHUB_TOKEN }}
with:
filename: issue.md
- name: Create issue about failing test
if: ${{ env.FAILED == 'true' }}
env:
RELEASE_NUMBER: ${{ needs.check_new_release.outputs.RELEASE_NUMBER }}
run: |
echo "---" > issue.md
echo "title: Failed ChEBI processing for release $RELEASE" >> issue.md
echo "assignees: tabbassidaloii" >> issue.md
echo "---" >> issue.md
echo "Processing failed for the [new release for ChEBI](https://ftp.ebi.ac.uk/pub/databases/chebi/archive/rel$RELEASE_NUMBER/SDF/)." >> issue.md
- uses: JasonEtco/create-an-issue@v2
name: Post issue about failing test
env:
GITHUB_TOKEN: secrets.GITHUB_TOKEN }}
with:
filename: issue.md