chebi

Fixes to ChEBI workflow #52

Workflow file for this run

	name: chebi

	on:
	workflow_dispatch:
	pull_request: # tests whether it is working on PR
	schedule:
	- cron: "0 0 1,15 * *" # Run the workflow on the 1st and 15th day of each month

	permissions:
	contents: write
	pages: write
	id-token: write

	jobs:
	check_release:
	runs-on: ubuntu-latest
	name: Check latest release date

	steps:
	# checkout the repository
	- name: Checkout
	uses: actions/checkout@v4
	# Download current version from Zenodo
	- name: Download file from Zenodo
	env:
	zenodo_token: ${{ secrets.ZENODO }}
	run: \|
	# Set up variables
	chmod +x datasources/chebi/config
	datasources/chebi/config .
	zenodo_file_id="8348142"
	file_name=$to_check_from_zenodo
	echo file name: $file_name
	echo "Dowloading from: https://zenodo.org/api/files/${zenodo_file_id}/${file_name}"
	echo ZENODO_FILE_NAME=$file_name >>$GITHUB_ENV

	# Request Zenodo API to download the file
	curl -H "Authorization: Bearer $zenodo_token" -LJO "https://zenodo.org/api/files/$zenodo_file_id/$file_name"

	# Verify the downloaded file
	ls -l $FILE_NAME
	# check the release date for the latest ChEBI release
	- name: Check for new ChEBI release
	run: \|
	echo 'Accessing the ChEBI archive'
	wget https://ftp.ebi.ac.uk/pub/databases/chebi/archive/
	## Read config
	. datasources/chebi/config .
	## Check date for last element in index
	string=$(tail -4 index.html \| head -1)
	##Extract the date from the latest release (up to the day)
	date_new=$(echo "$string" \| grep -oP '<td align="right">\K[0-9-]+\s[0-9:]+(?=\s+</td>)' \| awk '{print $1}')
	release=$(echo "$string" \| grep -oP '(?<=a href="rel)\d\d\d')
	echo "RELEASE=$release" >>$GITHUB_ENV
	echo "DATE_NEW=$date_new" >> $GITHUB_ENV
	##Extract the date from the ChEBI README file
	date_old=$date
	##Compare the dates and set GITHUB_ENV variable if date_new is more recent
	timestamp1=$(date -d "$date_new" +%s)
	timestamp2=$(date -d "$date_old" +%s)

	if [ "$timestamp1" -gt "$timestamp2" ]; then
	echo "DOWNLOAD_FILE=true" >> $GITHUB_ENV
	echo 'New release available', '${ release}'
	else
	echo "DOWNLOAD_FILE=false" >> $GITHUB_ENV
	echo 'No new release available'
	fi
	echo "Date of latest release: $date_new", "Date of release of the current version: $date_old"
	##Clean up
	rm index.htm*



	process_file:
	needs: check_release
	name: Download and test latest ChEBI release
	if: github.DOWNLOAD_FILE == 'true'
	runs-on: ubuntu-latest
	steps:
	- name: Download data
	run: \|
	##Download ChEBI SDF file
	wget https://ftp.ebi.ac.uk/pub/databases/chebi/archive/rel223/SDF/ChEBI_complete_3star.sdf.gz
	##Unzip gz file:
	gunzip ChEBI_complete_3star.sdf.gz

	##Check file size if available
	ls
	##Print file size
	FILENAME=ChEBI_complete_3star.sdf
	FILESIZE=$(stat -c%s "$FILENAME")
	echo "Size of $FILENAME = $FILESIZE bytes."

	##Create temp. folder to store the data in
	mkdir -p mapping_preprocessing/datasources/chebi/data
	##Move the SDF file to the expected location
	mv ChEBI_complete_3star.sdf mapping_preprocessing/datasources/chebi/data

	# step 3: run the R script for ChEBI preprocessing (to be made into a separate job)
	- name: Run R script to process SDF data
	run: \|
	##Check R version:
	R --version
	##Install the readr package:
	#Rscript -e 'install.packages("readr")'
	##Running the R script:
	#Rscript r/chebi_processing.R
	#echo "R preprocessing of ChEBI data is done"
	# step 4: run the Java .jar for ChEBI preprocessing (to be made into a separate job)
	- name: Set up Java
	uses: actions/setup-java@v2
	with:
	java-version: '11'
	distribution: 'adopt'

	- name: Test SDF processing
	run: \|
	inputFile="mapping_preprocessing/datasources/chebi/data/ChEBI_complete_3star.sdf"
	outputDir="mapping_preprocessing/datasources/chebi/data"
	# Run Java program and capture its exit code
	java -cp target/mapping_prerocessing-0.0.1-jar-with-dependencies.jar org.sec2pri.chebi_sdf "$inputFile" "$outputDir"

	# Check the exit status of the Java program
	if [ $? -eq 0 ]; then
	# Java program succeeded
	echo "Java preprocessing of ChEBI data is done."
	echo "FAILED=false" >>$GITHUB_ENV
	else
	# Java program failed
	echo "Java preprocessing of ChEBI data failed."
	echo "FAILED=true" >>$GITHUB_ENV
	fi

	- name: Diff versions
	run: \|
	old=${{ env.ZENODO_FILE_NAME }}
	new=mapping_preprocessing/datasources/chebi/data/${{ env.ZENODO_FILE_NAME }}
	column_name="secondaryID"
	# Extract the secondaryID column from both files and sort them
	ids_old=$(cut -f 2 "$old" \| sort \| tr -d "\r")
	ids_new=$(cut -f 2 "$new" \| sort \| tr -d "\r")

	# Perform a diff between the sorted lists of secondaryIDs
	diff_output=$(diff -u <(echo "$ids_old") <(echo "$ids_new"))


	# Store the output
	output_file="diff_output.txt"
	# Additions
	added=$(grep '^+CHEBI' "$output_file")
	echo ADDED=$added >>$GITHUB_ENV
	# Removals
	removed=$(grep '^-CHEBI' "$output_file")

	echo REMOVED=$removed >>$GITHUB_ENV

	post-issue_update:
	needs: process_file
	name: Post issue about updating data
	runs-on: ubuntu-latest
	if: github.DOWNLOAD_FILE == 'true'
	permissions:
	contents: read
	issues: write
	steps:
	- uses: actions/checkout@v3
	- run: \|
	echo "---" >> issue.md
	echo "title: Update ChEBI - release on ${{ env.DATE_NEW }}" >> issue.md
	echo "assignees: tabbassidaloii" >> issue.md
	echo "---" >> issue.md
	echo "[New release for ChEBI](https://ftp.ebi.ac.uk/pub/databases/chebi/archive/rel${{ env.RELEASE }}/SDF/) available from ${{ env.DATE_NEW }}." >> issue.md
	echo ## Removed secondary IDs
	echo ${{ env.REMOVED }}
	echo ## Added secondary IDs
	echo ${{ env.ADDED }}
	- uses: JasonEtco/create-an-issue@v2
	env:
	GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
	with:
	filename: issue.md

	post-issue-fail:
	needs: process_file
	if: github.FAILED == 'true'
	name: Post issue about failed test
	runs-on: ubuntu-latest
	permissions:
	contents: read
	issues: write
	steps:
	- uses: actions/checkout@v3
	- run: \|
	echo "---" >> issue.md
	echo "title: Failed ChEBI processing for release ${{ env.RELEASE }}" >> issue.md
	echo "assignees: tabbassidaloii" >> issue.md
	echo "---" >> issue.md
	echo "Processing failed for the [new release for ChEBI](https://ftp.ebi.ac.uk/pub/databases/chebi/archive/rel${{ env.RELEASE }}/SDF/) available from ${{ env.DATE_NEW }}." >> issue.md
	- uses: JasonEtco/create-an-issue@v2
	env:
	GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
	with:
	filename: issue.md

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

Fixes to ChEBI workflow #52

Workflow file

Fixes to ChEBI workflow #52

Jobs

Run details

Workflow file for this run