Skip to content

Commit

Permalink
Merge branch 'develop' into daisieh/cohort
Browse files Browse the repository at this point in the history
  • Loading branch information
daisieh committed Nov 25, 2024
2 parents 537e472 + fb58be0 commit 737f08a
Show file tree
Hide file tree
Showing 13 changed files with 128 additions and 99 deletions.
2 changes: 1 addition & 1 deletion .github/workflows/candig-testing.yml
Original file line number Diff line number Diff line change
Expand Up @@ -76,6 +76,6 @@ jobs:
with:
name: Post-build error log
path: |
tmp/error.txt
tmp/progress.txt
tmp/container_logs.txt
tmp/vault_audit.log
5 changes: 5 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -50,3 +50,8 @@ pnpm-debug.log*

# macOS-specific files
.DS_Store

# minio-related files
lib/minio/access-key
lib/minio/secret-key
lib/minio/aws-credentials
57 changes: 29 additions & 28 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,6 @@ SHELL = bash
CONDA = $(CONDA_INSTALL)/bin/conda
CONDA_ENV_SETTINGS = $(CONDA_INSTALL)/etc/profile.d/conda.sh

LOGFILE = tmp/progress.txt

.PHONY: all
all:
Expand Down Expand Up @@ -56,7 +55,7 @@ ifndef CONDA_INSTALL
echo "ERROR: Conda install location not specified. Do you have a .env?"
exit 1
endif
echo " started bin-conda" >> $(LOGFILE)
@printf "\nOutput of bin-conda:\n" | tee -a $(LOGFILE)
ifeq ($(VENV_OS), linux)
curl -Lo bin/miniconda_install.sh \
https://repo.anaconda.com/miniconda/Miniconda3-latest-Linux-x86_64.sh
Expand Down Expand Up @@ -84,7 +83,6 @@ endif
$(CONDA) config --remove channels defaults
$(CONDA) config --add channels conda-forge
$(CONDA) config --set channel_priority strict
echo " finished bin-conda" >> $(LOGFILE)


#>>>
Expand All @@ -93,7 +91,7 @@ endif
#<<<
.PHONY: build-all
build-all: mkdir
printf "Build started at `date '+%D %T'`.\n\n" >> $(ERRORLOG)
@printf "Build started at `date '+%D %T'`.\n\n" >> $(LOGFILE)
./pre-build-check.sh $(ARGS)

# Setup the entire stack
Expand Down Expand Up @@ -132,15 +130,14 @@ build-images: #toil-docker

#<<<
build-%:
printf "\nOutput of build-$*: \n" >> $(ERRORLOG)
echo " started build-$*" >> $(LOGFILE)
@printf "\nOutput of build-$*: \n" | tee -a $(LOGFILE)
source setup_hosts.sh
if [ -f lib/$*/$*_preflight.sh ]; then \
source lib/$*/$*_preflight.sh 2>&1 | tee -a $(ERRORLOG); \
source lib/$*/$*_preflight.sh 2>&1 | tee -a $(LOGFILE); \
fi
export SERVICE_NAME=$*; \
DOCKER_BUILDKIT=1 COMPOSE_DOCKER_CLI_BUILD=1 \
docker compose -f lib/candigv2/docker-compose.yml -f lib/$*/docker-compose.yml build $(BUILD_OPTS) 2>&1 | tee -a $(ERRORLOG)
docker compose -f lib/candigv2/docker-compose.yml -f lib/$*/docker-compose.yml build $(BUILD_OPTS) 2>&1 | tee -a $(LOGFILE)
echo " finished build-$*" >> $(LOGFILE)


Expand Down Expand Up @@ -188,7 +185,6 @@ clean-authx:
# Empties error and progress logs
.PHONY: clean-logs
clean-logs:
> $(ERRORLOG)
> $(LOGFILE)

#>>>
Expand Down Expand Up @@ -259,7 +255,7 @@ clean-secrets:


#>>>
# remove all peristant volumes and local data
# remove all persistent volumes and local data
# make clean-volumes

#<<<
Expand Down Expand Up @@ -289,20 +285,18 @@ containers=$(shell cat lib/$*/docker-compose.yml | yq -ojson '.services' | jq '
found=$(shell grep -ch $(containers) tmp/containers.txt)
#<<<
compose-%:
printf "\nOutput of compose-$*: \n" >> $(ERRORLOG)
echo " started compose-$*" >> $(LOGFILE)
@printf "\nOutput of compose-$*: \n" | tee -a $(LOGFILE)
source setup_hosts.sh; \
python settings.py; source env.sh; \
export SERVICE_NAME=$*; \
docker compose -f lib/candigv2/docker-compose.yml -f lib/$*/docker-compose.yml --compatibility up -d 2>&1 | tee -a $(ERRORLOG)
docker compose -f lib/candigv2/docker-compose.yml -f lib/$*/docker-compose.yml --compatibility up -d 2>&1 | tee -a $(LOGFILE)
cat tmp/containers.txt
if [ $(found) -eq 0 ]; then \
echo $(containers) >> tmp/containers.txt; \
fi
if [ -f lib/$*/$*_setup.sh ]; then \
source lib/$*/$*_setup.sh 2>&1 | tee -a $(ERRORLOG); \
source lib/$*/$*_setup.sh 2>&1 | tee -a $(LOGFILE); \
fi
echo " finished compose-$*" >> $(LOGFILE)


#>>>
Expand All @@ -324,8 +318,7 @@ recompose-%:

#<<<
down-%:
printf "\nOutput of down-$*: \n" >> $(ERRORLOG)
echo " started down-$*" >> $(LOGFILE)
@printf "\nOutput of down-$*: \n" | tee -a $(LOGFILE)
source setup_hosts.sh; \
export SERVICE_NAME=$*; \
docker compose -f lib/candigv2/docker-compose.yml -f lib/$*/docker-compose.yml --compatibility down 2>&1
Expand Down Expand Up @@ -360,7 +353,7 @@ docker-push:

#<<<
.PHONY: docker-secrets
docker-secrets: mkdir authx-secrets data-secrets #minio-secrets
docker-secrets: mkdir authx-secrets data-secrets


data-secrets: mkdir
Expand All @@ -383,11 +376,12 @@ authx-secrets: mkdir

minio-secrets: mkdir
@echo "making minio secrets"
@echo $(DEFAULT_ADMIN_USER) > tmp/secrets/minio-access-key
@echo $(DEFAULT_ADMIN_USER) > lib/minio/access-key
$(MAKE) secret-minio-secret-key
@echo '[default]' > tmp/secrets/aws-credentials
@echo "aws_access_key_id=`cat tmp/secrets/minio-access-key`" >> tmp/secrets/aws-credentials
@echo "aws_secret_access_key=`cat tmp/secrets/minio-secret-key`" >> tmp/secrets/aws-credentials
mv tmp/secrets/minio-secret-key lib/minio/secret-key
@echo '[default]' > lib/minio/aws-credentials
@echo "aws_access_key_id=`cat lib/minio/access-key`" >> lib/minio/aws-credentials
@echo "aws_secret_access_key=`cat lib/minio/secret-key`" >> lib/minio/aws-credentials


#>>>
Expand All @@ -399,8 +393,6 @@ minio-secrets: mkdir
docker-volumes:
docker volume create grafana-data --label candigv2=volume
docker volume create jupyter-data --label candigv2=volume
# docker volume create minio-config --label candigv2=volume
# docker volume create minio-data $(MINIO_VOLUME_OPT) --label candigv2=volume
docker volume create prometheus-data --label candigv2=volume
docker volume create toil-jobstore --label candigv2=volume
docker volume create keycloak-data --label candigv2=volume
Expand All @@ -425,14 +417,25 @@ init-authx: mkdir
$(foreach MODULE, $(CANDIG_AUTH_MODULES), $(MAKE) build-$(MODULE); $(MAKE) compose-$(MODULE); python settings.py;)


#>>>
# create a minio container (that won't be removed as part of clean-all)
# make init-minio

#<<<
init-minio: minio-secrets
docker volume create minio-config
docker volume create minio-data $(MINIO_VOLUME_OPT)
docker compose -f lib/candigv2/docker-compose.yml -f lib/minio/docker-compose.yml --compatibility up -d 2>&1 | tee -a $(LOGFILE)


#>>>
# initialize conda environment
# make init-conda

#<<<
.PHONY: init-conda
init-conda:
echo " started init-conda" >> $(LOGFILE)
@printf "\nOutput of init-conda: \n" | tee -a $(LOGFILE)
# source conda's script to be safe, so the conda command is found
source $(CONDA_ENV_SETTINGS) \
&& $(CONDA) create -y -n $(VENV_NAME) python=$(VENV_PYTHON) pip=$(VENV_PIP)
Expand All @@ -445,7 +448,6 @@ init-conda:
#@echo "Load local conda: source bin/miniconda3/etc/profile.d/conda.sh"
#@echo "Activate conda env: conda activate $(VENV_NAME)"
#@echo "Install requirements: pip install -U -r etc/venv/requirements.txt"
echo " finished init-conda" >> $(LOGFILE)


#>>>
Expand Down Expand Up @@ -494,7 +496,7 @@ secret-%:
#<<<
.PHONY: toil-docker
toil-docker:
echo " started toil-docker" >> $(LOGFILE)
@printf "\nOutput of toil-docker: \n" | tee -a $(LOGFILE)
VIRTUAL_ENV=1 DOCKER_BUILDKIT=1 COMPOSE_DOCKER_CLI_BUILD=1 TOIL_DOCKER_REGISTRY=$(DOCKER_REGISTRY) \
$(MAKE) -C lib/toil/toil-docker docker
$(foreach MODULE,$(TOIL_MODULES), \
Expand All @@ -504,7 +506,6 @@ toil-docker:
docker tag $(DOCKER_REGISTRY)/$(MODULE):$(TOIL_VERSION) \
$(DOCKER_REGISTRY)/$(MODULE):latest;)
$(foreach MODULE, $(TOIL_MODULES), docker push $(DOCKER_REGISTRY)/$(MODULE):latest;)
echo " finished toil-docker" >> $(LOGFILE)


#>>>
Expand Down
18 changes: 12 additions & 6 deletions docs/astro.config.mjs
Original file line number Diff line number Diff line change
Expand Up @@ -41,31 +41,31 @@ export default defineConfig({
}),
starlightOpenAPI([
{
base: 'technical/ingest',
base: 'technical/ingest-api',
label: 'ingest api',
schema: 'https://raw.githubusercontent.com/CanDIG/candigv2-ingest/refs/heads/develop/ingest_openapi.yaml',
collapsed: true
},
{
base: 'technical/query',
base: 'technical/query-api',
label: 'query api',
schema: 'https://raw.githubusercontent.com/CanDIG/candigv2-query/refs/heads/stable/query_server/openapi.yaml',
collapsed: true
},
{
base: 'technical/katsu',
base: 'technical/katsu-api',
label: 'katsu api',
schema: 'https://raw.githubusercontent.com/CanDIG/katsu/refs/heads/stable/chord_metadata_service/mohpackets/docs/schemas/schema.yml',
collapsed: true
},
{
base: 'technical/htsget/drs',
base: 'technical/htsget/drs-api',
label: 'htsget drs api',
schema: 'https://raw.githubusercontent.com/CanDIG/htsget_app/refs/heads/stable/htsget_server/drs_openapi.yaml',
collapsed: true
},
{
base: 'technical/htsget/beacon',
base: 'technical/htsget/beacon-api',
label: 'htsget beacon api',
schema: 'https://raw.githubusercontent.com/CanDIG/htsget_app/refs/heads/stable/htsget_server/beacon_openapi.yaml',
collapsed: true
Expand All @@ -76,6 +76,12 @@ export default defineConfig({
schema: 'https://raw.githubusercontent.com/CanDIG/htsget_app/refs/heads/stable/htsget_server/htsget_openapi.yaml',
collapsed: true
},
{
base: 'technical/federation-api',
label: 'htsget operations api',
schema: 'https://raw.githubusercontent.com/CanDIG/federation_service/refs/heads/develop/candig_federation/federation.yaml',
collapsed: true
},
])
],
sidebar: [
Expand Down Expand Up @@ -131,4 +137,4 @@ export default defineConfig({
},
],
})]
});
});
8 changes: 2 additions & 6 deletions docs/src/content/docs/ingest/ingest-help.mdx
Original file line number Diff line number Diff line change
Expand Up @@ -95,7 +95,7 @@ If you receive an error when using the `/ingest/genomic` endpoint something like
This means that the token used by ingest to submit to hts-get has expired. At the moment this happens after 30 minutes. Get a fresh token and try again.
### What if I need to delete or edit data that I already ingested into the system?
## What if I need to delete or edit data that I already ingested into the system?
Currently, there is no way to edit data that is already ingested into CanDIG. To change any data, the data must be deleted and re-ingested. Follow the steps below in order to delete data in CanDIG.
Expand Down Expand Up @@ -125,7 +125,7 @@ This token should be kept secure, it lasts for 30 mins
```bash
curl --request DELETE \
--url $CANDIG_URL'/katsu/v2/ingest/program/$PROGRAM_ID/' \
--url $CANDIG_URL'/katsu/v3/ingest/program/$PROGRAM_ID/' \
-H 'accept: application/json' \
-H 'Content-Type: application/json' \
-H 'Authorization: Bearer '$TOKEN
Expand Down Expand Up @@ -179,7 +179,3 @@ When attempting clinical data ingest into katsu, if you get a response such as b
```
It means you have not yet registered your program before ingesting. Please follow the instructions in [Register Programs](register-programs/) to submit a program authorization before attempting clinical ingest again.
:::danger
This token should be kept secure, it lasts for 30 mins.
:::
8 changes: 5 additions & 3 deletions etc/env/example.env
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@

# site options
CANDIG_MODULES=logging keycloak vault redis postgres htsget katsu query tyk opa federation candig-ingest candig-data-portal
#minio drs-server wes-server monitoring
#drs-server wes-server monitoring
CANDIG_AUTH_MODULES=keycloak vault tyk opa federation
CANDIG_DATA_MODULES=keycloak vault redis postgres logging

Expand Down Expand Up @@ -301,7 +301,9 @@ CANDIG_DATA_PORTAL_PRIVATE_URL=http://candig-data-portal:3000
TOKEN_PATH = ${PWD}/Vault-Helper-Tool/token.txt
PROGRESS_FILE = ${PWD}/tmp/progress.txt

# error logging
ERRORLOG=tmp/error.txt
# install logging
LOGFILE = tmp/progress.txt

CONDA_INSTALL=bin/miniconda3

COMPOSE_IGNORE_ORPHANS=True
37 changes: 15 additions & 22 deletions etc/tests/test_integration.py
Original file line number Diff line number Diff line change
Expand Up @@ -443,6 +443,7 @@ def test_ingest_not_admin_katsu():
while response.status_code == 200 and "status" in response.json():
time.sleep(2)
response = requests.get(f"{ENV['CANDIG_URL']}/ingest/status/{queue_id}", headers=headers)
print(response.text)
assert len(response.json()[f"{ENV['CANDIG_ENV']['CANDIG_SITE_LOCATION']}-SYNTH_01"]["errors"]) == 0
assert len(response.json()[f"{ENV['CANDIG_ENV']['CANDIG_SITE_LOCATION']}-SYNTH_01"]["results"]) == 13
katsu_response = requests.get(f"{ENV['CANDIG_ENV']['KATSU_INGEST_URL']}/v3/discovery/programs/")
Expand Down Expand Up @@ -1077,6 +1078,7 @@ def test_query_donor_search():
assert summary_stats[category][value] == expected_response[category][value]


# Can we can find donors by querying a specific region of the genome?
def test_query_genomic():
# tests that a request sent via query to htsget-beacon properly prunes the data
token = get_token(username=ENV['CANDIG_NOT_ADMIN2_USER'],
Expand Down Expand Up @@ -1150,29 +1152,8 @@ def test_query_genomic():
print(f"{donor["program_id"]}: {donor["submitter_donor_id"]}")
assert response and len(response.json()["results"]) == 1

# token = get_token(username=ENV['CANDIG_NOT_ADMIN_USER'],
# password=ENV['CANDIG_NOT_ADMIN_PASSWORD'])
# headers = {
# "Authorization": f"Bearer {token}",
# "Content-Type": "application/json; charset=utf-8",
# }
# params = {
# "gene": "TP53",
# "assembly": "hg38"
# }
# response = requests.get(
# f"{ENV['CANDIG_URL']}/query/query", headers=headers, params=params
# )
# pprint.pprint(response.json())
# if len(response.json()["results"]) != 0:
# print(f"\n\nExpected 0 results from the genomic query using gene name 'TP53' but got {len(response.json()["results"])}")
# if len(response.json()["results"]) > 0:
# print("Got results from:")
# for donor in response.json()["results"]:
# print(f"{donor["program_id"]}: {donor["submitter_donor_id"]}")
# assert response and len(response.json()["results"]) == 0


# Can we use a discovery query to get counts of donors we do not have access to?
def test_query_discovery():
katsu_response = requests.get(
f"{ENV['CANDIG_ENV']['KATSU_INGEST_URL']}/v3/discovery/programs/"
Expand Down Expand Up @@ -1203,6 +1184,18 @@ def test_query_discovery():
assert field in query_response["site"]["required_but_missing"][category]


# Can we check how many donors have genomics data?
def test_query_completeness():
query_response = requests.get(
f"{ENV['CANDIG_ENV']['QUERY_INTERNAL_URL']}/genomic_completeness").json()
pprint.pprint(query_response)
# Verify that the synthetic data shows up
assert "LOCAL-SYNTH_01" in query_response
assert query_response["LOCAL-SYNTH_01"]["genomes"] == 6
assert "LOCAL-SYNTH_02" in query_response
assert query_response["LOCAL-SYNTH_02"]["genomes"] == 5


def test_clean_up():
clean_up_program(f"{ENV['CANDIG_ENV']['CANDIG_SITE_LOCATION']}-SYNTH_01")
clean_up_program(f"{ENV['CANDIG_ENV']['CANDIG_SITE_LOCATION']}-SYNTH_02")
Expand Down
Loading

0 comments on commit 737f08a

Please sign in to comment.