Skip to content

Commit

Permalink
Merge pull request #326 from CanDIG/stable-candidate-v4.2.0
Browse files Browse the repository at this point in the history
v4.2.0: Updates to logging, auth, server
  • Loading branch information
daisieh authored Oct 4, 2024
2 parents 9d00ade + e759f15 commit 07de505
Show file tree
Hide file tree
Showing 26 changed files with 407 additions and 299 deletions.
34 changes: 18 additions & 16 deletions .github/workflows/candig-dispatch.yml
Original file line number Diff line number Diff line change
@@ -1,9 +1,9 @@
---
name: Submodule PR
on:
push:
branches:
- develop
pull_request:
branches: [develop]
types: [closed]
jobs:
CanDIG-dispatch:
runs-on: ubuntu-latest
Expand All @@ -12,21 +12,23 @@ jobs:
CHECKOUT_BRANCH: develop
PR_AGAINST_BRANCH: develop
OWNER: CanDIG
if: github.event.pull_request.merged == true
steps:
- name: Check out repository code
uses: actions/checkout@v4
- name: get PR data
uses: actions/github-script@v7
id: get_pr_data
with:
script: |
return (
await github.rest.repos.listPullRequestsAssociatedWithCommit({
commit_sha: context.sha,
owner: context.repo.owner,
repo: context.repo.repo,
})
).data[0];
shell: python
run: |
import json
import os
with open('${{ github.event_path }}') as fh:
event = json.load(fh)
escaped = event['pull_request']['title'].replace("'", '"')
pr_number = event["number"]
print(escaped)
with open(os.environ['GITHUB_ENV'], 'a') as fh:
print(f'PR_TITLE={escaped}', file=fh)
print(f'PR_NUMBER={pr_number}', file=fh)
- name: Create PR in CanDIGv2
id: make_pr
uses: CanDIG/github-action-pr-expanded@v4
Expand All @@ -35,7 +37,7 @@ jobs:
parent_repository: ${{ env.PARENT_REPOSITORY }}
checkout_branch: ${{ env.CHECKOUT_BRANCH}}
pr_against_branch: ${{ env.PR_AGAINST_BRANCH }}
pr_title: "${{ github.repository }} merging: ${{ fromJson(steps.get_pr_data.outputs.result).title }}"
pr_description: "PR triggered by update to develop branch on ${{ github.repository }}. Commit hash: `${{ github.sha }}`. PR link:[#${{ fromJson(steps.get_pr_data.outputs.result).number }}](https://github.com/${{ github.repository }}/pull/${{ fromJson(steps.get_pr_data.outputs.result).number }})"
pr_title: "${{ github.repository }} merging: ${{ env.PR_TITLE }}"
pr_description: "PR triggered by update to develop branch on ${{ github.repository }}. Commit hash: `${{ github.sha }}`. PR link: [#${{ env.PR_NUMBER }}](https://github.com/${{ github.repository }}/pull/${{ env.PR_NUMBER }})"
owner: ${{ env.OWNER }}
submodule_path: lib/htsget/htsget_app
61 changes: 61 additions & 0 deletions .github/workflows/test.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,61 @@
name: Github Actions Test

on: [push]
jobs:
build:
env:
POSTGRES_USER: admin
POSTGRES_PASSWORD: OG0pjmnQDWUTvLotYrxPrg
POSTGRES_HOST: localhost
POSTGRES_HOST_AUTH_METHOD: password
POSTGRES_DB: metadata
POSTGRES_PORT: 5432
HTSGET_TEST_KEY: thisisatest
DB_PATH: localhost
SERVER_LOCAL_DATA: ${{github.workspace}}/data
PGPASSWORD: OG0pjmnQDWUTvLotYrxPrg
INDEXING_PATH: ${{github.workspace}}/tmp
TESTENV_URL: http://localhost:3000
AGGREGATE_COUNT_THRESHOLD: 5
POSTGRES_USERNAME: admin
runs-on: ubuntu-latest
# Add in environment variables for the entire "build" job
services:
postgres_main:
image: postgres:latest
env:
POSTGRES_USER: ${{ env.POSTGRES_USER }}
POSTGRES_PASSWORD: ${{ env.POSTGRES_PASSWORD }}
POSTGRES_DB: ${{ env.POSTGRES_DB }}
ports:
- 5432:5432
options: >-
--health-cmd pg_isready
--health-interval 10s
--health-timeout 5s
--health-retries 5
strategy:
matrix:
python-version: ['3.12']
steps:
- name: Checkout code
uses: actions/checkout@v4
with:
submodules: true
- name: Set up Python ${{ matrix.python-version }}
uses: actions/setup-python@v5
with:
python-version: ${{ matrix.python-version }}
- name: Install requirements
run: |
pip install -r requirements.txt
bash create_db.sh
mkdir -p ${{ env.INDEXING_PATH }}
sed -i s@\<AGGREGATE_COUNT_THRESHOLD\>@${{env.AGGREGATE_COUNT_THRESHOLD}}@ config.ini
sed -i s@\<POSTGRES_USERNAME\>@${{env.POSTGRES_USERNAME}}@ config.ini
- name: Test
run: |
python htsget_server/server.py &
python htsget_server/indexing.py &
sleep 5
pytest tests/test_htsget_server.py
3 changes: 3 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -9,3 +9,6 @@ htsget_server/__pycache__/
*.pyc
__pycache__
.nova/Configuration.json

.DS_Store
.idea/
30 changes: 0 additions & 30 deletions .travis.yml

This file was deleted.

4 changes: 2 additions & 2 deletions config.ini
Original file line number Diff line number Diff line change
Expand Up @@ -3,12 +3,12 @@ Port = 3000
BasePath = /htsget/v1
ChunkSize = 1000000
BucketSize = 10000
AGGREGATE_COUNT_THRESHOLD = <AGGREGATE_COUNT_THRESHOLD>

[paths]
DBPath = sqlite:///./data/files.db
PGPath = postgresql+psycopg2://admin:PASSWORD@HOST:5432/genomic
PGPath = postgresql+psycopg2://<POSTGRES_USERNAME>:PASSWORD@HOST:5432/genomic

[authz]
CANDIG_OPA_SECRET = <CANDIG_OPA_SECRET>
CANDIG_OPA_URL = <OPA_URL>
CANDIG_VAULT_URL = <VAULT_URL>
10 changes: 5 additions & 5 deletions create_db.sh
Original file line number Diff line number Diff line change
Expand Up @@ -3,11 +3,11 @@
set -Euo pipefail

# db=${DB_PATH:-data/files.db}
db=${DB_PATH:-"metadata-db"}
db=${DB_PATH:-"postgres-db"}

# PGPASSWORD is used by psql to avoid the password prompt
export PGPASSWORD=${PGPASSWORD:-`cat /run/secrets/metadata-db-secret`}
export PGUSER=${PGUSER:-`cat /run/secrets/metadata-db-user`}
export PGPASSWORD=${PGPASSWORD:-$(cat $POSTGRES_PASSWORD_FILE)}
export PGUSER=$POSTGRES_USERNAME

until pg_isready -h "$db" -p 5432 -U $PGUSER; do
echo "Waiting for the database at $db to be ready..."
Expand All @@ -32,13 +32,13 @@ if [[ $numgenes -lt 5 ]]; then
echo "adding data to ncbirefseq..."
awk '{ print "INSERT INTO ncbirefseq (reference_genome, transcript_name, contig, start, endpos, gene_name) VALUES (" "\047hg37\047, \047" $1 "\047, \047" $2 "\047, " $3 ", " $4 ", \047" $5 "\047) ON CONFLICT DO NOTHING;"}' data/refseq/ncbiRefSeqSelect.hg37.txt >> genes.sql
awk '{ print "INSERT INTO ncbirefseq (reference_genome, transcript_name, contig, start, endpos, gene_name) VALUES (" "\047hg38\047, \047" $1 "\047, \047" $2 "\047, " $3 ", " $4 ", \047" $5 "\047) ON CONFLICT DO NOTHING;"}' data/refseq/ncbiRefSeqSelect.hg38.txt >> genes.sql

psql --quiet -h "$db" -U $PGUSER -d genomic -a -f genes.sql >>setup_out.txt
psql --quiet -h "$db" -U $PGUSER -d genomic -a -c 'SET synchronous_commit TO off;' -c '\i genes.sql' -c 'SET synchronous_commit TO on;' >> setup_out.txt
# rm genes.sql
echo "...done"
fi

# run any migrations:
echo "running migrations..."
psql --quiet -h "$db" -U $PGUSER -d genomic -a -f data/pr_288.sql >>setup_out.txt
psql --quiet -h "$db" -U $PGUSER -d genomic -a -f data/pr_315.sql >>setup_out.txt
echo "...done"
2 changes: 1 addition & 1 deletion data/files.sql
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@ CREATE TABLE drs_object (
id VARCHAR NOT NULL,
name VARCHAR,
self_uri VARCHAR,
size INTEGER,
size BIGINT,
created_time VARCHAR,
updated_time VARCHAR,
version VARCHAR,
Expand Down
7 changes: 7 additions & 0 deletions data/pr_315.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
-- a drs_object's size can be really big
DO
$$
BEGIN
ALTER TABLE drs_object ALTER size TYPE bigint;
END;
$$;
23 changes: 8 additions & 15 deletions entrypoint.sh
Original file line number Diff line number Diff line change
Expand Up @@ -2,35 +2,28 @@

set -Euo pipefail

export VAULT_S3_TOKEN=$(cat /run/secrets/vault-s3-token)
export OPA_SECRET=$(cat /run/secrets/opa-service-token)
export VAULT_URL=$VAULT_URL
export AGGREGATE_COUNT_THRESHOLD=$AGGREGATE_COUNT_THRESHOLD

if [[ -f "initial_setup" ]]; then
if [[ -f "/run/secrets/cert.pem" ]]; then
CERT=$(head -n 2 /run/secrets/cert.pem | tail -n 1)
SITE_PKGS=$(python -c 'import site; print(site.getsitepackages()[0])')
echo $SITE_PKGS
if grep -q "$CERT" $SITE_PKGS/certifi/cacert.pem
then
echo "hi"
cat /run/secrets/cert.pem >> ${SITE_PKGS}/certifi/cacert.pem
fi
fi

sed -i s@\<CANDIG_OPA_SECRET\>@$OPA_SECRET@ config.ini
sed -i s@\<OPA_URL\>@$OPA_URL@ config.ini
sed -i s@\<VAULT_URL\>@$VAULT_URL@ config.ini
sed -i s@\<AGGREGATE_COUNT_THRESHOLD\>@$AGGREGATE_COUNT_THRESHOLD@ config.ini
sed -i s@\<POSTGRES_USERNAME\>@$POSTGRES_USERNAME@ config.ini

bash create_db.sh
mkdir $INDEXING_PATH
rm initial_setup
fi

python -c "import candigv2_logging.logging
candigv2_logging.logging.initialize()"

# use the following for development
#python3 htsget_server/server.py

python htsget_server/indexing.py &

# use the following instead for production deployment
uwsgi uwsgi.ini
cd htsget_server
gunicorn server:app
34 changes: 24 additions & 10 deletions htsget_server/authz.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,15 +3,18 @@
from flask import Flask
import database
import authx.auth
from candigv2_logging.logging import CanDIGLogger


logger = CanDIGLogger(__file__)


app = Flask(__name__)


def is_testing(request):
if request.headers.get("Authorization") == f"Bearer {TEST_KEY}":
print("WARNING: TEST MODE, AUTHORIZATION IS DISABLED")
app.logger.warning("WARNING: TEST MODE, AUTHORIZATION IS DISABLED")
logger.warning("TEST MODE, AUTHORIZATION IS DISABLED")
return True


Expand All @@ -20,10 +23,13 @@ def is_authed(id_, request):
return 401
if is_testing(request):
return 200 # no auth
if request_is_from_ingest(request):
return 200
if "Authorization" in request.headers:
obj = database.get_drs_object(id_)
if obj is not None and 'cohort' in obj:
if is_cohort_authorized(request, obj['cohort']):
if is_cohort_authorized(request, obj['cohort']) \
or request_is_from_query(request):
return 200
else:
return 404
Expand All @@ -36,17 +42,18 @@ def get_authorized_cohorts(request):
if is_testing(request):
return []
try:
return authx.auth.get_opa_datasets(request, admin_secret=AUTHZ['CANDIG_OPA_SECRET'])
return authx.auth.get_opa_datasets(request)
except Exception as e:
print(f"Couldn't authorize cohorts: {type(e)} {str(e)}")
app.logger.warning(f"Couldn't authorize cohorts: {type(e)} {str(e)}")
logger.warning(f"Couldn't authorize cohorts: {type(e)} {str(e)}")
return []


def is_cohort_authorized(request, cohort_id):
if is_testing(request):
return True
return authx.auth.is_action_allowed_for_program(authx.auth.get_auth_token(request), method=request.method, path=request.path, program=cohort_id, admin_secret=AUTHZ['CANDIG_OPA_SECRET'])
if request_is_from_ingest(request):
return True
return authx.auth.is_action_allowed_for_program(authx.auth.get_auth_token(request), method=request.method, path=request.path, program=cohort_id)


def is_site_admin(request):
Expand All @@ -55,12 +62,13 @@ def is_site_admin(request):
"""
if is_testing(request):
return True
if request_is_from_ingest(request):
return True
if "Authorization" in request.headers:
try:
return authx.auth.is_site_admin(request, admin_secret=AUTHZ['CANDIG_OPA_SECRET'])
return authx.auth.is_site_admin(request)
except Exception as e:
print(f"Couldn't authorize site_admin: {type(e)} {str(e)}")
app.logger.warning(f"Couldn't authorize site_admin: {type(e)} {str(e)}")
logger.warning(f"Couldn't authorize site_admin: {type(e)} {str(e)}")
return False
return False

Expand All @@ -73,3 +81,9 @@ def request_is_from_query(request):
if "X-Service-Token" in request.headers:
return authx.auth.verify_service_token(service="query", token=request.headers["X-Service-Token"])
return False


def request_is_from_ingest(request):
if "X-Service-Token" in request.headers:
return authx.auth.verify_service_token(service="candig-ingest", token=request.headers["X-Service-Token"])
return False
3 changes: 1 addition & 2 deletions htsget_server/beacon_openapi.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -488,8 +488,7 @@ components:
type: array
NumTotalResults:
description: Total number of results. NOT the number of results returned in this batch (after pagination) but the total obtained by the query.
minimum: 0
type: integer
type: string
PageToken:
description: A hash or similar that allows the server to retrieve a "page", e.g. (a subset of) a query response.
example: ab0sc&fe1dd
Expand Down
Loading

0 comments on commit 07de505

Please sign in to comment.