Create uniprot.yml #4
Workflow file for this run
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
name: Check and test UniProt updates | |
on: | |
workflow_dispatch: | |
pull_request: # tests whether it is working on PR | |
paths: | |
- '.github/workflows/uniprot.yml' | |
schedule: | |
- cron: "0 0 1,15 * *" # Run the workflow on the 1st and 15th day of each month | |
permissions: | |
contents: write | |
pages: write | |
id-token: write | |
issues: write | |
jobs: | |
check_new_data: | |
runs-on: ubuntu-latest | |
name: Check the date of the latest data | |
outputs: | |
UNIPROT_SPROT_NEW: ${{ steps.check_download.outputs.UNIPROT_SPROT_NEW }} | |
SEC_AC_NEW: ${{ steps.check_download.outputs.SEC_AC_NEW }} | |
DELAC_SP_NEW: ${{ steps.check_download.outputs.DELAC_SP_NEW }} | |
DATE_NEW: ${{ steps.check_download.outputs.DATE_NEW }} | |
DATE_OLD: ${{ steps.check_download.outputs.DATE_OLD }} | |
steps: | |
# step 1: check the release date for the latest UniProt files | |
- name: Checkout | |
uses: actions/checkout@v4 | |
- name: Check for new uniprot files | |
id: check_download | |
run: | | |
##Extract the date from the uniprot config file | |
date_old=$(grep -E '^date=' datasources/uniprot/config | cut -d'=' -f2) | |
echo 'Accessing the uniprot data' | |
wget https://ftp.ebi.ac.uk/pub/databases/uniprot/current_release/knowledgebase/complete/ -O uniprot_index.html | |
less uniprot_index.html | |
date_new=$(grep -oP 'uniprot_sprot\.fasta\.gz</a></td><td[^>]*>\K[0-9]{4}-[0-9]{2}-[0-9]{2}' uniprot_index.html) | |
#Store dates to output | |
echo "DATE_OLD=$date_old" >> $GITHUB_OUTPUT | |
echo "DATE_NEW=$date_new" >> $GITHUB_OUTPUT | |
echo "Date of latest release: $date_new", "Date of release of the current version: $date_old" | |
test_new_data_processing: | |
name: Processing new data and check updates | |
needs: check_new_data | |
env: | |
DATE_OLD: ${{ needs.check_new_data.outputs.DATE_OLD }} | |
DATE_NEW: ${{ needs.check_new_data.outputs.DATE_NEW }} | |
runs-on: ubuntu-latest | |
steps: | |
- name: Checkout repository | |
uses: actions/checkout@v4 | |
# step 2: download the recent data | |
- name: Download the recent data | |
run: | | |
##Store outputs from previous job in environment variables | |
echo "$DATE_NEW=$DATE_NEW" >> $GITHUB_ENV | |
UNIPROT_SPROT_NEW=$(echo uniprot_sprot.fasta.gz) | |
SEC_AC_NEW=$(echo sec_ac.txt) | |
DELAC_SP_NEW=$(echo delac_sp.txt) | |
##Create temp. folder to store the data in | |
mkdir -p datasources/uniprot/data | |
##Download uniprot file | |
wget https://ftp.ebi.ac.uk/pub/databases/uniprot/current_release/knowledgebase/complete/${SEC_AC_NEW} | |
wget https://ftp.ebi.ac.uk/pub/databases/uniprot/current_release/knowledgebase/complete/${DELAC_SP_NEW} | |
wget https://ftp.ebi.ac.uk/pub/databases/uniprot/current_release/knowledgebase/complete/${UNIPROT_SPROT_NEW} | |
mv $DELAC_SP_NEW $SEC_AC_NEW $UNIPROT_SPROT_NEW datasources/uniprot/data | |
##Check file size if available | |
ls -trlh datasources/uniprot/data |