From 588803a1de6bfd95f60d28d9d14caabf2407ccbb Mon Sep 17 00:00:00 2001 From: Paul Stretenowich Date: Thu, 19 Oct 2023 15:47:43 -0400 Subject: [PATCH 01/19] Adding digest_unanalyzed route + debug Operation being linked to Readset when ingesting transfer and GenPipes --- project_tracking/api/project.py | 34 ++++++++--- project_tracking/db_action.py | 90 ++++++++++++++++++++++++---- project_tracking/model.py | 23 +++++++ tests/data/genpipes_rnaseqlight.json | 2 +- tests/data/genpipes_tumourpair.json | 2 +- 5 files changed, 131 insertions(+), 20 deletions(-) diff --git a/project_tracking/api/project.py b/project_tracking/api/project.py index c72381d..1299897 100644 --- a/project_tracking/api/project.py +++ b/project_tracking/api/project.py @@ -433,13 +433,14 @@ def ingest_transfer(project_id: str): Add new location to file that has already been moved before the db was created """ - try: - ingest_data = request.get_json(force=True) - except: - flash('Data does not seems to be json') - return redirect(request.url) + if request.method == 'POST': + try: + ingest_data = request.get_json(force=True) + except: + flash('Data does not seems to be json') + return redirect(request.url) - return [i.flat_dict for i in db_action.ingest_transfer(project_id=project_id, ingest_data=ingest_data)] + return [i.flat_dict for i in db_action.ingest_transfer(project_id=project_id, ingest_data=ingest_data)] @bp.route('//ingest_genpipes', methods=['GET', 'POST']) # @capitalize @@ -463,13 +464,30 @@ def ingest_genpipes(project_id: str): return redirect(request.url) project_id_from_name = db_action.name_to_id("Project", ingest_data[vc.PROJECT_NAME].upper()) - if project_id != project_id_from_name: + if [int(project_id)] != project_id_from_name: return abort( 400, - f"project name in POST {ingest_data[vc.PROJECT_NAME].upper()} not Valid, {project_id} requires" + f"project name in POST {ingest_data[vc.PROJECT_NAME].upper()} not in the database, {project_id} required" ) output = db_action.ingest_genpipes(project_id=project_id, ingest_data=ingest_data) operation = output[0].flat_dict jobs = [job.flat_dict for job in output[1]] return [operation, jobs] + +@bp.route('//digest_unanalyzed', methods=['POST']) +def digest_unanalyzed(project_id: str): + """ + POST: list of Readset/Sample Name or id + return: Readsets or Samples unanalyzed + """ + logger.debug(f"\n\n{project_id}\n\n") + if request.method == 'POST': + try: + ingest_data = request.get_json(force=True) + except: + flash('Data does not seems to be json') + return redirect(request.url) + + return db_action.digest_unanalyzed(project_id=project_id, digest_data=ingest_data) + # return [i.flat_dict for i in db_action.digest_unanalyzed(project_id=project_id, digest_data=ingest_data)] diff --git a/project_tracking/db_action.py b/project_tracking/db_action.py index f0a301b..c0c3a4e 100644 --- a/project_tracking/db_action.py +++ b/project_tracking/db_action.py @@ -521,7 +521,7 @@ def create_project(project_name, fms_id=None, session=None): return session.scalars(select(Project).where(Project.name == project_name)).one() -def ingest_run_processing(project_id, ingest_data, session=None): +def ingest_run_processing(project_id: str, ingest_data, session=None): """Ingesting run for MoH""" if not isinstance(ingest_data, dict): ingest_data = json.loads(ingest_data) @@ -658,7 +658,7 @@ def ingest_run_processing(project_id, ingest_data, session=None): return [operation, job] -def ingest_transfer(project_id, ingest_data, session=None, check_readset_name=True): +def ingest_transfer(project_id: str, ingest_data, session=None, check_readset_name=True): """Ingesting transfer""" if not isinstance(ingest_data, dict): ingest_data = json.loads(ingest_data) @@ -682,9 +682,10 @@ def ingest_transfer(project_id, ingest_data, session=None, check_readset_name=Tr stop=datetime.now(), operation=operation ) - + readset_list = [] for readset_json in ingest_data[vb.READSET]: readset_name = readset_json[vb.READSET_NAME] + readset_list.append(session.scalars(select(Readset).where(Readset.name == readset_name)).unique().first()) for file_json in readset_json[vb.FILE]: src_uri = file_json[vb.SRC_LOCATION_URI] dest_uri = file_json[vb.DEST_LOCATION_URI] @@ -692,8 +693,8 @@ def ingest_transfer(project_id, ingest_data, session=None, check_readset_name=Tr file = session.scalars( select(File) .join(File.readsets) - .where(Readset.name == readset_name ) - .join(File.locations ) + .where(Readset.name == readset_name) + .join(File.locations) .where(Location.uri == src_uri) ).unique().first() if not file: @@ -712,6 +713,7 @@ def ingest_transfer(project_id, ingest_data, session=None, check_readset_name=Tr new_location = Location.from_uri(uri=dest_uri, file=file, session=session) file.jobs.append(job) session.add(new_location) + operation.readsets = readset_list session.add(job) session.flush() @@ -733,7 +735,7 @@ def ingest_transfer(project_id, ingest_data, session=None, check_readset_name=Tr return [operation, job] -def digest_readset_file(project_id, digest_data, session=None): +def digest_readset_file(project_id: str, digest_data, session=None): """Digesting readset file fields for GenPipes""" if not session: session = database.get_session() @@ -840,7 +842,7 @@ def digest_readset_file(project_id, digest_data, session=None): output.append(readset_line) return json.dumps(output) -def digest_pair_file(project_id, digest_data, session=None): +def digest_pair_file(project_id: str, digest_data, session=None): """Digesting pair file fields for GenPipes""" if not session: session = database.get_session() @@ -904,7 +906,7 @@ def digest_pair_file(project_id, digest_data, session=None): return json.dumps(output) -def ingest_genpipes(project_id, ingest_data, session=None): +def ingest_genpipes(project_id: str, ingest_data, session=None): """Ingesting GenPipes run""" if not isinstance(ingest_data, dict): ingest_data = json.loads(ingest_data) @@ -914,7 +916,7 @@ def ingest_genpipes(project_id, ingest_data, session=None): project = projects(project_id=project_id, session=session)[0] - operation_config = OperationConfig( + operation_config = OperationConfig.from_attributes( name=ingest_data[vb.OPERATION_CONFIG_NAME], version=ingest_data[vb.OPERATION_CONFIG_VERSION], md5sum=ingest_data[vb.OPERATION_CONFIG_MD5SUM], @@ -930,6 +932,7 @@ def ingest_genpipes(project_id, ingest_data, session=None): operation_config=operation_config ) + readset_list = [] for sample_json in ingest_data[vb.SAMPLE]: sample = session.scalars( select(Sample) @@ -942,6 +945,7 @@ def ingest_genpipes(project_id, ingest_data, session=None): select(Readset) .where(Readset.name == readset_json[vb.READSET_NAME]) ).unique().first() + readset_list.append(readset) if not readset: raise DidNotFindError(f"No readset named {readset_json[vb.READSET_NAME]}") if readset.sample != sample: @@ -1007,7 +1011,7 @@ def ingest_genpipes(project_id, ingest_data, session=None): session.add(job) session.flush() - + operation.readsets = readset_list operation_id = operation.id job_ids = [job.id for job in operation.jobs] try: @@ -1022,3 +1026,69 @@ def ingest_genpipes(project_id, ingest_data, session=None): jobs = [session.scalars(select(Job).where(Job.id == job_id)).first() for job_id in job_ids] return [operation, jobs] + + +def digest_unanalyzed(project_id: str, digest_data, session=None): + """ + Getting unanalyzed samples or readsets + """ + if not session: + session = database.get_session() + + session = database.get_session() + + if isinstance(project_id, str): + project_id = [project_id] + + sample_name_flag = digest_data["sample_name"] + sample_id_flag = digest_data["sample_id"] + readset_name_flag = digest_data["readset_name"] + readset_id_flag = digest_data["readset_id"] + run_id = digest_data["run_id"] + run_name = digest_data["run_name"] + if run_name: + run_id = name_to_id("Run", run_name)[0] + experiment_sequencing_technology = digest_data["experiment_sequencing_technology"] + location_endpoint = digest_data["location_endpoint"] + + if sample_name_flag: + stmt = select(Sample.name) + key = "sample_name" + elif sample_id_flag: + stmt = select(Sample.id) + key = "sample_id" + elif readset_name_flag: + stmt = select(Readset.name) + key = "readset_name" + elif readset_id_flag: + stmt = select(Readset.id) + key = "readset_id" + + stmt = ( + stmt.join(Sample.readsets) + .join(Readset.operations) + .where(Operation.name.ilike(f"%genpipes%")) + .join(Sample.patient) + .join(Patient.project) + .where(Project.id.in_(project_id)) + ) + + if run_id: + stmt = ( + stmt.where(Run.id == run_id) + .join(Readset.run) + ) + if experiment_sequencing_technology: + stmt = ( + stmt.where(Experiment.sequencing_technology == experiment_sequencing_technology) + .join(Readset.experiment) + ) + + # logger.debug(f"\n\n{stmt}\n\n") + output = { + "location_endpoint": location_endpoint, + key: session.scalars(stmt).unique().all() + } + # logger.debug(f"\n\n{session.scalars(stmt).unique().all()}\n\n") + + return json.dumps(output) diff --git a/project_tracking/model.py b/project_tracking/model.py index 4abbc71..2b74951 100644 --- a/project_tracking/model.py +++ b/project_tracking/model.py @@ -556,6 +556,29 @@ def config_data(cls, data): """ pass + @classmethod + def from_attributes(cls, name=None, version=None, md5sum=None, data=None, session=None): + """ + get operation_config if it exist, set it if it does not exist + """ + if not session: + session = database.get_session() + operation_config = session.scalars( + select(cls) + .where(cls.name == name) + .where(cls.version == version) + .where(cls.md5sum == md5sum) + .where(cls.data == data) + ).first() + if not operation_config: + operation_config = cls( + name=name, + version=version, + md5sum=md5sum, + data=data + ) + return operation_config + class Job(BaseTable): """ diff --git a/tests/data/genpipes_rnaseqlight.json b/tests/data/genpipes_rnaseqlight.json index 516b6c1..beb4d21 100644 --- a/tests/data/genpipes_rnaseqlight.json +++ b/tests/data/genpipes_rnaseqlight.json @@ -6,7 +6,7 @@ "operation_config_data": "[DEFAULT]\ncluster_server = beluga.genome.mcgill.ca\nsequencing_center = McGill Genome Centre\ncluster_submit_cmd_suffix = | grep \"[0-9]\" | cut -d\\ -f4\ncluster_other_arg = --mail-type=END,FAIL --mail-user=$JOB_MAIL -A $RAP_ID\ncluster_work_dir_arg = -D\ncluster_output_dir_arg = -o\ncluster_job_name_arg = -J\ncluster_cmd_produces_job_id = true\ncluster_dependency_arg = --depend=afterok:\ncluster_dependency_sep = :\ncluster_max_jobs = 3000\ntmp_dir = ${SLURM_TMPDIR}\nportal_output_dir = $PORTAL_OUTPUT_DIR\ncluster_walltime = 24:00:00\ncluster_cpu = 1\ncluster_node = 1\nALL_CPU = 40\nHALF_CPU = 20\nQUART_CPU = 10\nPINT_CPU = 5\nALL_MEM = 187G\nHALF_MEM = 90G\nQUART_MEM = 60G\nPINT_MEM = 30G\ncluster_mem = 4700M per cpu\ncluster_queue = \nLARGE_QUEUE = \nmodule_java = mugqic/java/openjdk-jdk1.8.0_72\nmodule_mugqic_R_packages = mugqic/mugqic_R_packages/1.0.6\nmodule_mugqic_tools = mugqic/mugqic_tools/2.8.1\nmodule_pandoc = mugqic/pandoc/2.16.1\nmodule_picard = mugqic/picard/2.0.1\nmodule_python = mugqic/python/2.7.11\nmodule_R = mugqic/R_Bioconductor/3.5.0_3.7\nmodule_trimmomatic = mugqic/trimmomatic/0.35\nmodule_kallisto = mugqic/kallisto/0.44.0\nmodule_perl = mugqic/perl/5.22.1\nscientific_name = Homo_sapiens\nassembly = GRCh38\nsource = Ensembl\nversion = 104\nassembly_dir = $MUGQIC_INSTALL_HOME/genomes/species/%(scientific_name)s.%(assembly)s\nannotations_prefix = %(assembly_dir)s/annotations/%(scientific_name)s.%(assembly)s.%(source)s%(version)s\ngenome_fasta = %(assembly_dir)s/genome/%(scientific_name)s.%(assembly)s.fa\ngtf = %(annotations_prefix)s.gtf\ngtf_transcript_id = %(annotations_prefix)s.transcript_id.gtf\nribosomal_fasta = %(assembly_dir)s/annotations/rrna_bwa_index/%(scientific_name)s.%(assembly)s.%(source)s%(version)s.rrna.fa\nchromosome_size = %(assembly_dir)s/genome/%(scientific_name)s.%(assembly)s.fa.fai\ngenes = %(annotations_prefix)s.genes.tsv\ngene_size = %(annotations_prefix)s.genes.length.tsv\ngene_ontology = %(annotations_prefix)s.GO.tsv\nannotation_flat = %(annotations_prefix)s.ref_flat.tsv\njava_other_options = -XX:ParallelGCThreads=4\nprotocol = TrueSeq mRNA\ncycle_number = 100\nstrand_info = fr-firststrand\nALL_MEM_EPS = 180G\nGPU_QUEUE = \ncommon_name = Human\nassembly_synonyms = hg38\ndbsnp_version = 142\ngnomad_exome = %(assembly_dir)s/annotations/%(scientific_name)s.%(assembly)s.gnomad.exomes.r2.0.2.sites.no-VEP.nohist.tidy.vcf.gz\ndbnsfp = %(assembly_dir)s/annotations/dbNSFPv3.5a/dbNSFPv3.5a.txt.gz.txt.gz\naf_gnomad = %(assembly_dir)s/annotations/%(scientific_name)s.%(assembly)s.af-only-gnomad.vcf.gz\nCTAT_bundle_version = GRCh38_gencode_v32_CTAT_lib_Dec062019\ngenome_mappability_bed_indexed = %(assembly_dir)s/annotations/mappabilityGC/GRCh38_100bpPAIREDEND.exclusion.bed\npopulation_AF = 1000Gp1_EUR_AF\nexcluded_chromosome = chrM,chr1_KI270706v1_random,chr1_KI270707v1_random,chr1_KI270708v1_random,chr1_KI270709v1_random,chr1_KI270710v1_random,chr1_KI270711v1_random,chr1_KI270712v1_random,chr1_KI270713v1_random,chr1_KI270714v1_random,chr2_KI270715v1_random,chr2_KI270716v1_random,chr3_GL000221v1_random,chr4_GL000008v2_random,chr5_GL000208v1_random,chr9_KI270717v1_random,chr9_KI270718v1_random,chr9_KI270719v1_random,chr9_KI270720v1_random,chr11_KI270721v1_random,chr14_GL000009v2_random,chr14_GL000225v1_random,chr14_KI270722v1_random,chr14_GL000194v1_random,chr14_KI270723v1_random,chr14_KI270724v1_random,chr14_KI270725v1_random,chr14_KI270726v1_random,chr15_KI270727v1_random,chr16_KI270728v1_random,chr17_GL000205v2_random,chr17_KI270729v1_random,chr17_KI270730v1_random,chr22_KI270731v1_random,chr22_KI270732v1_random,chr22_KI270733v1_random,chr22_KI270734v1_random,chr22_KI270735v1_random,chr22_KI270736v1_random,chr22_KI270737v1_random,chr22_KI270738v1_random,chr22_KI270739v1_random,chrY_KI270740v1_random,chrUn_KI270302v1,chrUn_KI270304v1,chrUn_KI270303v1,chrUn_KI270305v1,chrUn_KI270322v1,chrUn_KI270320v1,chrUn_KI270310v1,chrUn_KI270316v1,chrUn_KI270315v1,chrUn_KI270312v1,chrUn_KI270311v1,chrUn_KI270317v1,chrUn_KI270412v1,chrUn_KI270411v1,chrUn_KI270414v1,chrUn_KI270419v1,chrUn_KI270418v1,chrUn_KI270420v1,chrUn_KI270424v1,chrUn_KI270417v1,chrUn_KI270422v1,chrUn_KI270423v1,chrUn_KI270425v1,chrUn_KI270429v1,chrUn_KI270442v1,chrUn_KI270466v1,chrUn_KI270465v1,chrUn_KI270467v1,chrUn_KI270435v1,chrUn_KI270438v1,chrUn_KI270468v1,chrUn_KI270510v1,chrUn_KI270509v1,chrUn_KI270518v1,chrUn_KI270508v1,chrUn_KI270516v1,chrUn_KI270512v1,chrUn_KI270519v1,chrUn_KI270522v1,chrUn_KI270511v1,chrUn_KI270515v1,chrUn_KI270507v1,chrUn_KI270517v1,chrUn_KI270529v1,chrUn_KI270528v1,chrUn_KI270530v1,chrUn_KI270539v1,chrUn_KI270538v1,chrUn_KI270544v1,chrUn_KI270548v1,chrUn_KI270583v1,chrUn_KI270587v1,chrUn_KI270580v1,chrUn_KI270581v1,chrUn_KI270579v1,chrUn_KI270589v1,chrUn_KI270590v1,chrUn_KI270584v1,chrUn_KI270582v1,chrUn_KI270588v1,chrUn_KI270593v1,chrUn_KI270591v1,chrUn_KI270330v1,chrUn_KI270329v1,chrUn_KI270334v1,chrUn_KI270333v1,chrUn_KI270335v1,chrUn_KI270338v1,chrUn_KI270340v1,chrUn_KI270336v1,chrUn_KI270337v1,chrUn_KI270363v1,chrUn_KI270364v1,chrUn_KI270362v1,chrUn_KI270366v1,chrUn_KI270378v1,chrUn_KI270379v1,chrUn_KI270389v1,chrUn_KI270390v1,chrUn_KI270387v1,chrUn_KI270395v1,chrUn_KI270396v1,chrUn_KI270388v1,chrUn_KI270394v1,chrUn_KI270386v1,chrUn_KI270391v1,chrUn_KI270383v1,chrUn_KI270393v1,chrUn_KI270384v1,chrUn_KI270392v1,chrUn_KI270381v1,chrUn_KI270385v1,chrUn_KI270382v1,chrUn_KI270376v1,chrUn_KI270374v1,chrUn_KI270372v1,chrUn_KI270373v1,chrUn_KI270375v1,chrUn_KI270371v1,chrUn_KI270448v1,chrUn_KI270521v1,chrUn_GL000195v1,chrUn_GL000219v1,chrUn_GL000220v1,chrUn_GL000224v1,chrUn_KI270741v1,chrUn_GL000226v1,chrUn_GL000213v1,chrUn_KI270743v1,chrUn_KI270744v1,chrUn_KI270745v1,chrUn_KI270746v1,chrUn_KI270747v1,chrUn_KI270748v1,chrUn_KI270749v1,chrUn_KI270750v1,chrUn_KI270751v1,chrUn_KI270752v1,chrUn_KI270753v1,chrUn_KI270754v1,chrUn_KI270755v1,chrUn_KI270756v1,chrUn_KI270757v1,chrUn_GL000214v1,chrUn_KI270742v1,chrUn_GL000216v2,chrUn_GL000218v1,chrEBV\n\n[picard_sam_to_fastq]\njava_other_options = -XX:ParallelGCThreads=1 -Dsamjdk.buffer_size=4194304\nram = 16000G\ncluster_mem = 16G\n\n[trimmomatic]\ncluster_mem = 20G\nram = 20G\ncluster_cpu = %(QUART_CPU)s\nthreads = %(cluster_cpu)s\ntrailing_min_quality = 30\nmin_length = 32\nillumina_clip_settings = :2:30:15:8:true\ncluster_walltime = --time=24:00:0\njava_other_options = -XX:ParallelGCThreads=5 -Dsamjdk.buffer_size=1048576\n\n[kallisto]\ntranscriptome_idx = $MUGQIC_INSTALL_HOME/genomes/species/%(scientific_name)s.%(assembly)s/annotations/cdna_kallisto_index/%(scientific_name)s.%(assembly)s.%(source)s%(version)s.cdna.fa.idx\ntranscript2genes = $MUGQIC_INSTALL_HOME/genomes/species/%(scientific_name)s.%(assembly)s/annotations/cdna_kallisto_index/%(scientific_name)s.%(assembly)s.%(source)s%(version)s.tx2gene\ncluster_walltime = 23:00:0\ncluster_cpu = 10\nbootstraps = 120\nfragment_length = 120\nfragment_length_sd = 20\ncluster_mem = 42G\n\n[kallisto_count_matrix]\ncluster_walltime = --time=23:30:0\ncluster_cpu = 2\ncluster_mem = 24G\n\n[gq_seq_utils_exploratory_analysis_rnaseq_light]\ncluster_walltime = 00:30:0\ncluster_cpu = 2\ncluster_mem = 24G\n\n[sleuth_differential_expression]\ntx2gene = $MUGQIC_INSTALL_HOME/genomes/species/%(scientific_name)s.%(assembly)s/annotations/%(scientific_name)s.%(assembly)s.%(source)s%(version)s.tx2gene.csv\ncluster_mem = 16G\n\n[report]\ntitle = RNA-Seq Light Analysis Report\ncluster_walltime = 30:00\n\n[run_checkmate]\nbed = %(assembly_dir)s/annotations/%(scientific_name)s.%(assembly)s.SNP_GRCh38_hg19_woChr.bed\n\n[compute_effects]\nsnpeff_genome = %(assembly_synonyms)s\n\n[conpair_concordance_contamination]\nmarkers_bed = ${CONPAIR_DATA}/markers/%(assembly)s.autosomes.phase3_shapeit2_mvncall_integrated.20130502.SNV.genotype.sselect_v4_MAF_0.4_LD_0.8.liftover.bed\nmarkers_txt = ${CONPAIR_DATA}/markers/%(assembly)s.autosomes.phase3_shapeit2_mvncall_integrated.20130502.SNV.genotype.sselect_v4_MAF_0.4_LD_0.8.liftover.txt\n\n[gatk_mutect2]\npon = \n\n[amber]\nloci = %(assembly_dir)s/annotations/%(scientific_name)s.%(assembly)s.GermlineHetPon.vcf.gz\n\n[cobalt]\ngc_profile = %(assembly_dir)s/annotations/%(scientific_name)s.%(assembly)s.GC_profile.1000bp.cnp\n\n[purple]\ngc_profile = %(assembly_dir)s/annotations/%(scientific_name)s.%(assembly)s.GC_profile.1000bp.cnp\n\n[report_cpsr]\nassembly = grch38\n\n[report_pcgr]\nassembly = grch38\n\n[delly_call_filter]\nexclude_list = ${DELLY_PATH}/excludeTemplates/human.hg38.excl.tsv\n\n[cnvkit_batch]\naccess = %(assembly_dir)s/annotations/%(scientific_name)s.%(assembly)s.access-5k-mappable.bed\nrefFlat = %(assembly_dir)s/annotations/%(scientific_name)s.%(assembly)s.UCSC.ref_flat.tsv\n\n[wham_call_sv]\nexclude = chr1_KI270706v1_random,chr1_KI270707v1_random,chr1_KI270708v1_random,chr1_KI270709v1_random,chr1_KI270710v1_random,chr1_KI270711v1_random,chr1_KI270712v1_random,chr1_KI270713v1_random,chr1_KI270714v1_random,chr2_KI270715v1_random,chr2_KI270716v1_random,chr3_GL000221v1_random,chr4_GL000008v2_random,chr5_GL000208v1_random,chr9_KI270717v1_random,chr9_KI270718v1_random,chr9_KI270719v1_random,chr9_KI270720v1_random,chr11_KI270721v1_random,chr14_GL000009v2_random,chr14_GL000225v1_random,chr14_KI270722v1_random,chr14_GL000194v1_random,chr14_KI270723v1_random,chr14_KI270724v1_random,chr14_KI270725v1_random,chr14_KI270726v1_random,chr15_KI270727v1_random,chr16_KI270728v1_random,chr17_GL000205v2_random,chr17_KI270729v1_random,chr17_KI270730v1_random,chr22_KI270731v1_random,chr22_KI270732v1_random,chr22_KI270733v1_random,chr22_KI270734v1_random,chr22_KI270735v1_random,chr22_KI270736v1_random,chr22_KI270737v1_random,chr22_KI270738v1_random,chr22_KI270739v1_random,chrY_KI270740v1_random,chrUn_KI270302v1,chrUn_KI270304v1,chrUn_KI270303v1,chrUn_KI270305v1,chrUn_KI270322v1,chrUn_KI270320v1,chrUn_KI270310v1,chrUn_KI270316v1,chrUn_KI270315v1,chrUn_KI270312v1,chrUn_KI270311v1,chrUn_KI270317v1,chrUn_KI270412v1,chrUn_KI270411v1,chrUn_KI270414v1,chrUn_KI270419v1,chrUn_KI270418v1,chrUn_KI270420v1,chrUn_KI270424v1,chrUn_KI270417v1,chrUn_KI270422v1,chrUn_KI270423v1,chrUn_KI270425v1,chrUn_KI270429v1,chrUn_KI270442v1,chrUn_KI270466v1,chrUn_KI270465v1,chrUn_KI270467v1,chrUn_KI270435v1,chrUn_KI270438v1,chrUn_KI270468v1,chrUn_KI270510v1,chrUn_KI270509v1,chrUn_KI270518v1,chrUn_KI270508v1,chrUn_KI270516v1,chrUn_KI270512v1,chrUn_KI270519v1,chrUn_KI270522v1,chrUn_KI270511v1,chrUn_KI270515v1,chrUn_KI270507v1,chrUn_KI270517v1,chrUn_KI270529v1,chrUn_KI270528v1,chrUn_KI270530v1,chrUn_KI270539v1,chrUn_KI270538v1,chrUn_KI270544v1,chrUn_KI270548v1,chrUn_KI270583v1,chrUn_KI270587v1,chrUn_KI270580v1,chrUn_KI270581v1,chrUn_KI270579v1,chrUn_KI270589v1,chrUn_KI270590v1,chrUn_KI270584v1,chrUn_KI270582v1,chrUn_KI270588v1,chrUn_KI270593v1,chrUn_KI270591v1,chrUn_KI270330v1,chrUn_KI270329v1,chrUn_KI270334v1,chrUn_KI270333v1,chrUn_KI270335v1,chrUn_KI270338v1,chrUn_KI270340v1,chrUn_KI270336v1,chrUn_KI270337v1,chrUn_KI270363v1,chrUn_KI270364v1,chrUn_KI270362v1,chrUn_KI270366v1,chrUn_KI270378v1,chrUn_KI270379v1,chrUn_KI270389v1,chrUn_KI270390v1,chrUn_KI270387v1,chrUn_KI270395v1,chrUn_KI270396v1,chrUn_KI270388v1,chrUn_KI270394v1,chrUn_KI270386v1,chrUn_KI270391v1,chrUn_KI270383v1,chrUn_KI270393v1,chrUn_KI270384v1,chrUn_KI270392v1,chrUn_KI270381v1,chrUn_KI270385v1,chrUn_KI270382v1,chrUn_KI270376v1,chrUn_KI270374v1,chrUn_KI270372v1,chrUn_KI270373v1,chrUn_KI270375v1,chrUn_KI270371v1,chrUn_KI270448v1,chrUn_KI270521v1,chrUn_GL000195v1,chrUn_GL000219v1,chrUn_GL000220v1,chrUn_GL000224v1,chrUn_KI270741v1,chrUn_GL000226v1,chrUn_GL000213v1,chrUn_KI270743v1,chrUn_KI270744v1,chrUn_KI270745v1,chrUn_KI270746v1,chrUn_KI270747v1,chrUn_KI270748v1,chrUn_KI270749v1,chrUn_KI270750v1,chrUn_KI270751v1,chrUn_KI270752v1,chrUn_KI270753v1,chrUn_KI270754v1,chrUn_KI270755v1,chrUn_KI270756v1,chrUn_KI270757v1,chrUn_GL000214v1,chrUn_KI270742v1,chrUn_GL000216v2,chrUn_GL000218v1,chrEBV\n\n[run_arriba]\nblacklist = $ARRIBA_HOME/database/blacklist_hg38_GRCh38_2018-01-13.tsv", "operation_platform": "beluga", "operation_cmd_line": "module purge\nmodule load python/3.10.2 mugqic/genpipes/4.2.0\nrnaseq_light.py \n -j slurm \n -r readset.txt \n -s 1-5 \n -c $MUGQIC_PIPELINES_HOME/pipelines/rnaseq_light/rnaseq_light.base.ini \n $MUGQIC_PIPELINES_HOME/pipelines/common_ini/beluga.ini \n $MUGQIC_PIPELINES_HOME/resources/genomes/config/Homo_sapiens.GRCh38.ini \n RNA_light.custom.ini \n > RNASeq_light_run.sh\nrm -r RNA_CHUNKS;\nmkdir RNA_CHUNKS;\n$MUGQIC_PIPELINES_HOME/utils/chunk_genpipes.sh -n 100 RNASeq_light_run.sh RNA_CHUNKS", - "operation_name": "genpipes_rnaseq_light", + "operation_name": "GenPipes_RnaSeq.cancer", "sample": [ { "sample_name": "MoHQ-CM-1-3-6929-1RT", diff --git a/tests/data/genpipes_tumourpair.json b/tests/data/genpipes_tumourpair.json index dc201de..526b011 100644 --- a/tests/data/genpipes_tumourpair.json +++ b/tests/data/genpipes_tumourpair.json @@ -5,7 +5,7 @@ "operation_config_data": "\\\n[DEFAULT]\\\ncluster_server = beluga.genome.mcgill.ca\\\nsequencing_center = McGill Genome Centre\\\ncluster_submit_cmd_suffix = | grep \"[0-9]\" | cut -d\\ -f4\\\ncluster_other_arg = --mail-type=FAIL --mail-user=$JOB_MAIL -A $RAP_ID\\\ncluster_work_dir_arg = -D\\\ncluster_output_dir_arg = -o\\\ncluster_job_name_arg = -J\\\ncluster_cmd_produces_job_id = true\\\ncluster_dependency_arg = --depend=afterok:\\\ncluster_dependency_sep = :\\\ncluster_max_jobs = 3000\\\ntmp_dir = ${SLURM_TMPDIR}\\\nportal_output_dir = $PORTAL_OUTPUT_DIR\\\ncluster_walltime = 24:00:00\\\ncluster_cpu = 1\\\ncluster_node = 1\\\nALL_CPU = 40\\\nHALF_CPU = 20\\\nQUART_CPU = 10\\\nPINT_CPU = 5\\\nALL_MEM = 187G\\\nHALF_MEM = 90G\\\nQUART_MEM = 60G\\\nPINT_MEM = 30G\\\ncluster_mem = 4700M per cpu\\\ncluster_queue = \\\nLARGE_QUEUE = \\\nmodule_bvatools = mugqic/bvatools/1.6\\\nmodule_bwa = mugqic/bwa/0.7.17\\\nmodule_bwakit = mugqic/bwakit/0.7.15\\\nmodule_gatk = mugqic/GenomeAnalysisTK/4.1.8.1\\\nmodule_sambamba = mugqic/sambamba/0.8.0\\\nmodule_igvtools = mugqic/igvtools/2.3.14\\\nmodule_java = mugqic/java/openjdk-jdk1.8.0_72\\\nmodule_mugqic_R_packages = mugqic/mugqic_R_packages/1.0.4\\\nmodule_mugqic_tools = mugqic/mugqic_tools/2.10.5\\\nmodule_pandoc = mugqic/pandoc/2.16.1\\\nmodule_perl = mugqic/perl/5.34.0\\\nmodule_picard = mugqic/picard/2.9.0\\\nmodule_python = mugqic/python/3.9.1\\\nmodule_R = mugqic/R_Bioconductor/3.5.0_3.7\\\nmodule_samtools = mugqic/samtools/1.12\\\nmodule_bcftools = mugqic/bcftools/1.15\\\nmodule_snpeff = mugqic/snpEff/4.3\\\nmodule_trimmomatic = mugqic/trimmomatic/0.35\\\nmodule_vcftools = mugqic/vcftools/0.1.14\\\nmodule_skewer = mugqic/skewer/0.2.2\\\nmodule_qualimap = mugqic/qualimap/2.2.2dev\\\nmodule_fastqc = mugqic/fastqc/0.11.5\\\nmodule_htslib = mugqic/htslib/1.14\\\nmodule_verify_bam_id = mugqic/verifyBamID/1.1.3\\\nmodule_vt = mugqic/vt/0.57\\\nmodule_gemini = mugqic/gemini/0.20.1\\\nmodule_multiqc = mugqic/MultiQC/1.9\\\nmodule_checkmate = mugqic/NGSCheckMate/1.0.0_rjme\\\nmodule_variantBam = mugqic/variantbam/1.4.3\\\nmodule_cnvkit = mugqic/CNVkit/0.9.9\\\nmodule_delly = mugqic/Delly/0.8.1\\\nmodule_sv_annotations = mugqic/simple_sv_annotation/1.0.0\\\nmodule_manta = mugqic/Manta/1.5.0\\\nmodule_samblaster = mugqic/samblaster/0.1.24\\\nmodule_lumpy = mugqic/LUMPY-SV/0.2.13\\\nmodule_wham = mugqic/WHAM/1.8.0\\\nmodule_breakseq2 = mugqic/breakseq2/2.2\\\nmodule_vcflib = mugqic/vcflib/1.0.0\\\nmodule_spades = mugqic/SPAdes/3.10.0\\\nmodule_age = mugqic/AGE/master-20181210\\\nmodule_bedtools = mugqic/bedtools/2.26.0\\\nmodule_vawk = mugqic/vawk/0.0.2\\\nmodule_svaba = mugqic/SvABA/1.1.0\\\nscientific_name = Homo_sapiens\\\nassembly = GRCh38\\\nsource = Ensembl\\\nversion = 102\\\nassembly_alias = b37\\\ndbsnp_version = 142\\\nassembly_dir = $MUGQIC_INSTALL_HOME/genomes/species/%(scientific_name)s.%(assembly)s\\\ngenome_fasta = %(assembly_dir)s/genome/%(scientific_name)s.%(assembly)s.fa\\\ngenome_dictionary = %(assembly_dir)s/genome/%(scientific_name)s.%(assembly)s.dict\\\ngenome_bwa_index = %(assembly_dir)s/genome/bwa_index/%(scientific_name)s.%(assembly)s.fa\\\nknown_variants = %(assembly_dir)s/annotations/%(scientific_name)s.%(assembly)s.dbSNP%(dbsnp_version)s.vcf.gz\\\nigv_genome = %(genome_fasta)s.fai\\\nsnpeff_genome = %(assembly)s.%(version)s\\\nhapmap = %(assembly_dir)s/annotations/%(scientific_name)s.%(assembly)s.hapmap_3.3.vcf.gz\\\n1000G = %(assembly_dir)s/annotations/%(scientific_name)s.%(assembly)s.1000G_phase1.snps.high_confidence.vcf.gz\\\ndbsnp = %(known_variants)s\\\nomni = %(assembly_dir)s/annotations/%(scientific_name)s.%(assembly)s.1000G_omni2.5.vcf.gz\\\nmills = %(assembly_dir)s/annotations/%(scientific_name)s.%(assembly)s.Mills_and_1000G_gold_standard.indels.vcf.gz\\\ngenome_mappability_bed_indexed = %(assembly_dir)s/annotations/mappabilityGC/GRCh38_100bpPAIREDEND.exclusion.bed\\\ndbnsfp = %(assembly_dir)s/annotations/dbNSFPv3.5a/dbNSFPv3.5a.txt.gz.txt.gz\\\ncommon_snp_positions = %(assembly_dir)s/annotations/%(scientific_name)s.%(assembly)s.1000G_phase1.snps.high_confidence.allpop_q200.tsv\\\ngnomad_exome = %(assembly_dir)s/annotations/%(scientific_name)s.%(assembly)s.gnomad.exomes.r2.0.2.sites.no-VEP.nohist.tidy.vcf.gz\\\naf_gnomad = %(assembly_dir)s/annotations/%(scientific_name)s.%(assembly)s.af-only-gnomad.vcf.gz\\\nvcf_header = %(assembly_dir)s/annotations/%(scientific_name)s.%(assembly)s.vcf_header.tsv\\\njava_other_options = -XX:+UseParallelGC -XX:ParallelGCThreads=1 -Dsamjdk.buffer_size=4194304\\\ngatk4_java_options = -XX:+UseParallelGC -XX:ParallelGCThreads=1 -Dsamjdk.buffer_size=4194304 -Dsamjdk.use_async_io_read_samtools=true -Dsamjdk.use_async_io_write_samtools=true -Dsamjdk.use_async_io_write_tribble=false\\\nexperiment_type = wholeGenome\\\nexperiment_type_abrev = wgs\\\npopulation_AF = 1000Gp1_EUR_AF\\\nverifyBamID_variants_file = %(assembly_dir)s/annotations/%(scientific_name)s.%(assembly)s.dbSNP%(dbsnp_version)s_1000Gp1_%(population_AF)s_AF.vcf\\\nspecies_vcf_format_descriptor = %(assembly_dir)s/annotations/HumanVCFformatDescriptor.tsv\\\nexcluded_chromosome = chrM,chr1_KI270706v1_random,chr1_KI270707v1_random,chr1_KI270708v1_random,chr1_KI270709v1_random,chr1_KI270710v1_random,chr1_KI270711v1_random,chr1_KI270712v1_random,chr1_KI270713v1_random,chr1_KI270714v1_random,chr2_KI270715v1_random,chr2_KI270716v1_random,chr3_GL000221v1_random,chr4_GL000008v2_random,chr5_GL000208v1_random,chr9_KI270717v1_random,chr9_KI270718v1_random,chr9_KI270719v1_random,chr9_KI270720v1_random,chr11_KI270721v1_random,chr14_GL000009v2_random,chr14_GL000225v1_random,chr14_KI270722v1_random,chr14_GL000194v1_random,chr14_KI270723v1_random,chr14_KI270724v1_random,chr14_KI270725v1_random,chr14_KI270726v1_random,chr15_KI270727v1_random,chr16_KI270728v1_random,chr17_GL000205v2_random,chr17_KI270729v1_random,chr17_KI270730v1_random,chr22_KI270731v1_random,chr22_KI270732v1_random,chr22_KI270733v1_random,chr22_KI270734v1_random,chr22_KI270735v1_random,chr22_KI270736v1_random,chr22_KI270737v1_random,chr22_KI270738v1_random,chr22_KI270739v1_random,chrY_KI270740v1_random,chrUn_KI270302v1,chrUn_KI270304v1,chrUn_KI270303v1,chrUn_KI270305v1,chrUn_KI270322v1,chrUn_KI270320v1,chrUn_KI270310v1,chrUn_KI270316v1,chrUn_KI270315v1,chrUn_KI270312v1,chrUn_KI270311v1,chrUn_KI270317v1,chrUn_KI270412v1,chrUn_KI270411v1,chrUn_KI270414v1,chrUn_KI270419v1,chrUn_KI270418v1,chrUn_KI270420v1,chrUn_KI270424v1,chrUn_KI270417v1,chrUn_KI270422v1,chrUn_KI270423v1,chrUn_KI270425v1,chrUn_KI270429v1,chrUn_KI270442v1,chrUn_KI270466v1,chrUn_KI270465v1,chrUn_KI270467v1,chrUn_KI270435v1,chrUn_KI270438v1,chrUn_KI270468v1,chrUn_KI270510v1,chrUn_KI270509v1,chrUn_KI270518v1,chrUn_KI270508v1,chrUn_KI270516v1,chrUn_KI270512v1,chrUn_KI270519v1,chrUn_KI270522v1,chrUn_KI270511v1,chrUn_KI270515v1,chrUn_KI270507v1,chrUn_KI270517v1,chrUn_KI270529v1,chrUn_KI270528v1,chrUn_KI270530v1,chrUn_KI270539v1,chrUn_KI270538v1,chrUn_KI270544v1,chrUn_KI270548v1,chrUn_KI270583v1,chrUn_KI270587v1,chrUn_KI270580v1,chrUn_KI270581v1,chrUn_KI270579v1,chrUn_KI270589v1,chrUn_KI270590v1,chrUn_KI270584v1,chrUn_KI270582v1,chrUn_KI270588v1,chrUn_KI270593v1,chrUn_KI270591v1,chrUn_KI270330v1,chrUn_KI270329v1,chrUn_KI270334v1,chrUn_KI270333v1,chrUn_KI270335v1,chrUn_KI270338v1,chrUn_KI270340v1,chrUn_KI270336v1,chrUn_KI270337v1,chrUn_KI270363v1,chrUn_KI270364v1,chrUn_KI270362v1,chrUn_KI270366v1,chrUn_KI270378v1,chrUn_KI270379v1,chrUn_KI270389v1,chrUn_KI270390v1,chrUn_KI270387v1,chrUn_KI270395v1,chrUn_KI270396v1,chrUn_KI270388v1,chrUn_KI270394v1,chrUn_KI270386v1,chrUn_KI270391v1,chrUn_KI270383v1,chrUn_KI270393v1,chrUn_KI270384v1,chrUn_KI270392v1,chrUn_KI270381v1,chrUn_KI270385v1,chrUn_KI270382v1,chrUn_KI270376v1,chrUn_KI270374v1,chrUn_KI270372v1,chrUn_KI270373v1,chrUn_KI270375v1,chrUn_KI270371v1,chrUn_KI270448v1,chrUn_KI270521v1,chrUn_GL000195v1,chrUn_GL000219v1,chrUn_GL000220v1,chrUn_GL000224v1,chrUn_KI270741v1,chrUn_GL000226v1,chrUn_GL000213v1,chrUn_KI270743v1,chrUn_KI270744v1,chrUn_KI270745v1,chrUn_KI270746v1,chrUn_KI270747v1,chrUn_KI270748v1,chrUn_KI270749v1,chrUn_KI270750v1,chrUn_KI270751v1,chrUn_KI270752v1,chrUn_KI270753v1,chrUn_KI270754v1,chrUn_KI270755v1,chrUn_KI270756v1,chrUn_KI270757v1,chrUn_GL000214v1,chrUn_KI270742v1,chrUn_GL000216v2,chrUn_GL000218v1,chrEBV\\\nALL_MEM_EPS = 180G\\\nGPU_QUEUE = \\\nmodule_vardict_java = mugqic/VarDictJava/1.4.8\\\nmodule_strelka2 = mugqic/Strelka2/2.9.10\\\nmodule_bcbio_variation_recall = mugqic/bcbio.variation.recall/0.2.6\\\nmodule_varscan = mugqic/VarScan/2.4.3\\\nmodule_conpair = mugqic/Conpair/0.2\\\nmodule_scones = mugqic/SCoNEs/2.1.2\\\nmodule_sequenza_utils = mugqic/Sequenza-utils/3.0.0\\\nmodule_gcc = mugqic/gcc/4.9.3\\\nmodule_amber = mugqic/amber/3.5\\\nmodule_cobalt = mugqic/cobalt/1.11\\\nmodule_purple = mugqic/purple/2.53\\\nmodule_circos = mugqic/circos/0.69-6\\\nmodule_cpsr = mugqic/cpsr/0.6.2\\\nmodule_pcgr = mugqic/pcgr/0.9.2\\\nstrelka2_bed_file = %(assembly_dir)s/annotations/%(scientific_name)s.%(assembly)s.strelka2.bed.gz\\\ncommon_name = Human\\\nassembly_synonyms = hg38\\\nCTAT_bundle_version = GRCh38_gencode_v32_CTAT_lib_Dec062019\\\n\\\n[picard_sam_to_fastq]\\\ncluster_cpu = -c 12 -N 1\\\ncluster_mem = --mem=32G\\\nram = 18G\\\ncluster_walltime = --time=72:00:00\\\nother_options = --MAX_RECORDS_IN_RAM=5000000\\\n\\\n[samtools_cram_output]\\\noptions = -h -T %(genome_fasta)s -C\\\nvariantBam_options = --cram -strip-tags BI,OQ\\\ncluster_cpu = %(PINT_CPU)s\\\ncluster_walltime = 48:00:0\\\n\\\n[sym_link_fastq]\\\ncluster_walltime = 3:00:00\\\n\\\n[sym_link_bam]\\\ncluster_walltime = 3:00:00\\\n\\\n[trimmomatic]\\\ncluster_mem = 20G\\\nram = 19G\\\ncluster_cpu = 5\\\nthreads = %(cluster_cpu)s\\\ntrailing_min_quality = 30\\\nmin_length = 50\\\nillumina_clip_settings = :2:30:15\\\njava_other_options = -XX:ParallelGCThreads=5 -Dsamjdk.buffer_size=1048576\\\n\\\n[skewer_trimming]\\\ncluster_cpu = %(QUART_CPU)s\\\nthreads = %(cluster_cpu)s\\\noptions = --min 25 -q 25 --compress -f sanger\\\nadapter_fasta = \\\ncluster_walltime = 48:00:00\\\n\\\n[bwa_mem]\\\ncluster_cpu = %(HALF_CPU)s\\\nbwa_other_options = -t %(cluster_cpu)s -K 100000000 -Y\\\nsequencing_center = McGill University and Genome Quebec Innovation Centre\\\nsequencing_technology = Illumina\\\n\\\n[picard_sort_sam]\\\nram = 16G\\\nmax_records_in_ram = 3750000\\\n\\\n[sambamba_sort_sam]\\\noptions = -m 10G\\\n\\\n[bwa_mem_sambamba_sort_sam]\\\ncluster_cpu = %(HALF_CPU)s\\\ncluster_walltime = 48:00:00\\\ncluster_mem = 60G\\\n\\\n[bwakit_picard_sort_sam]\\\ncluster_cpu = 4\\\ncluster_walltime = 48:00:00\\\ncluster_mem = 60G\\\n\\\n[sambamba_merge_sam_extract_unmapped]\\\ncluster_cpu = 8\\\noptions = -t %(cluster_cpu)s\\\ncluster_walltime = 35:00:00\\\ncluster_mem = 32G\\\n\\\n[sambamba_extract_unmapped]\\\ncluster_cpu = %(PINT_CPU)s\\\noptions = -t %(PINT_CPU)s -f bam -F \"unmapped\"\\\ncluster_walltime = 3:00:00\\\n\\\n[gatk_indel_realigner]\\\njava_other_options = -XX:+UseParallelGC -XX:ParallelGCThreads=1 -Dsamjdk.buffer_size=4194304\\\nnb_jobs = 23\\\nram = 12G\\\nmax_reads_in_memory = 500000\\\ncluster_cpu = 1\\\nother_options = \\\ncluster_walltime = 35:00:0\\\ncluster_mem = 12G\\\nmodule_gatk = mugqic/GenomeAnalysisTK/3.8\\\n\\\n[gatk_realigner_target_creator]\\\njava_other_options = -XX:+UseParallelGC -XX:ParallelGCThreads=1 -Dsamjdk.buffer_size=4194304\\\nram = 12G\\\nknown_mills = %(mills)s\\\ncluster_cpu = 1\\\nother_options = -nt 1\\\nmodule_gatk = mugqic/GenomeAnalysisTK/3.8\\\nknown_indel_sites = %(mills)s\\\n\\\n[sambamba_merge_realigned]\\\ncluster_cpu = %(QUART_CPU)s\\\noptions = -t %(cluster_cpu)s\\\ncluster_walltime = 35:00:00\\\ncluster_mem = 48G\\\n\\\n[sambamba_merge_unmapped]\\\ncluster_cpu = %(QUART_CPU)s\\\noptions = -t %(cluster_cpu)s\\\ncluster_walltime = 12:00:00\\\n\\\n[merge_realigned]\\\ncluster_walltime = 35:00:00\\\ncluster_cpu = 3\\\ncluster_mem = --mem=32G\\\n\\\n[bvatools_groupfixmate]\\\njava_other_options = -XX:+UseParallelGC -XX:ParallelGCThreads=1 -Dsamjdk.buffer_size=4194304\\\nram = 16G\\\n\\\n[samtools_sort]\\\ncluster_cpu = 5\\\nother_options = -@ %(cluster_cpu)s -m 20000M\\\ncluster_walltime = 71:00:00\\\n\\\n[fix_mate_by_coordinate]\\\ncluster_cpu = 4\\\ncluster_walltime = 71:00:00\\\ncluster_mem = 52G\\\n\\\n[samtools_fixmate]\\\nmodule_samtools = mugqic/samtools/1.9\\\n\\\n[fix_mate_by_coordinate_samtools]\\\ncluster_cpu = %(QUART_CPU)s\\\noptions = -m -O bam -@ %(cluster_cpu)s\\\ncluster_walltime = 35:00:00\\\ncluster_mem = 48G\\\n\\\n[picard_mark_duplicates]\\\ncluster_cpu = %(PINT_CPU)s\\\njava_other_options = -XX:+UseParallelGC -XX:ParallelGCThreads=2 -Dsamjdk.buffer_size=4194304\\\nram = 14G\\\nmax_records_in_ram = 1000000\\\ncluster_walltime = 71:00:00\\\ncluster_mem = 15G\\\n\\\n[gatk_base_recalibrator]\\\ncluster_cpu = %(HALF_CPU)s\\\nthreads = %(cluster_cpu)s\\\ncluster_mem = 55G\\\nram = %(cluster_mem)s\\\noptions = --bqsr-baq-gap-open-penalty 30\\\nknown_dbsnp = %(dbsnp)s\\\nknown_gnomad = %(gnomad_exome)s\\\nknown_mills = %(mills)s\\\ncluster_walltime = 35:00:0\\\n\\\n[gatk_print_reads]\\\njava_other_options = -XX:+UseParallelGC -XX:ParallelGCThreads=4 -Dsamjdk.buffer_size=4194304\\\ncluster_cpu = -l nodes=1:ppn=40\\\nthreads = %(cluster_cpu)s\\\ncluster_mem = 187G\\\nram = 180G\\\ncluster_walltime = 96:00:0\\\n\\\n[gatk_apply_bqsr]\\\njava_other_options = -XX:+UseParallelGC -XX:ParallelGCThreads=2 -Dsamjdk.buffer_size=4194304 -Dsamjdk.use_async_io_read_samtools=true -Dsamjdk.use_async_io_write_samtools=true -Dsamjdk.use_async_io_write_tribble=false\\\ncluster_cpu = 40\\\nthreads = %(cluster_cpu)s\\\noptions = \\\ncluster_mem = 186G\\\nram = 180G\\\ncluster_walltime = 96:00:0\\\n\\\n[recalibration]\\\ncluster_walltime = 96:00:0\\\ncluster_mem = 51G\\\nram = 50G\\\n\\\n[sambamba_index]\\\ncluster_cpu = %(QUART_CPU)s\\\noptions = -t %(cluster_cpu)s\\\n\\\n[picard_collect_multiple_metrics]\\\nmodule_gatk = mugqic/GenomeAnalysisTK/4.2.2.0\\\ncluster_cpu = 1\\\ncluster_mem = 12G\\\nram = 11G\\\nmax_records_in_ram = 1000000\\\noptions = --FILE_EXTENSION \".txt\"\\\ncluster_walltime = 48:00:00\\\n\\\n[picard_calculate_hs_metrics]\\\nmodule_gatk = mugqic/GenomeAnalysisTK/4.2.2.0\\\ncluster_mem = 9G\\\nram = 8G\\\ncluster_walltime = 48:00:00\\\n\\\n[metrics]\\\nmodule_gatk = mugqic/GenomeAnalysisTK/4.2.2.0\\\ncluster_walltime = walltime=24:00:0\\\ncluster_mem = 12G\\\n\\\n[picard_collect_oxog_metrics]\\\nmodule_gatk = mugqic/GenomeAnalysisTK/4.2.2.0\\\ncluster_mem = 13G\\\nram = 12G\\\nmax_records_in_ram = 4000000\\\ncluster_walltime = 35:00:0\\\n\\\n[picard_collect_gcbias_metrics]\\\nmodule_gatk = mugqic/GenomeAnalysisTK/4.2.2.0\\\ncluster_mem = 13G\\\nram = 12G\\\nmax_records_in_ram = 4000000\\\ncluster_walltime = 35:00:0\\\n\\\n[dna_sample_qualimap]\\\ncluster_mem = 120G\\\nram = 120G\\\nuse_bed = false\\\ncluster_cpu = -l nodes=1:ppn=24\\\nqualimap_options = --skip-duplicated -nt 1 -gd HUMAN\\\ncluster_walltime = 35:00:0\\\n\\\n[dna_sambamba_flagstat]\\\ncluster_cpu = 6\\\nflagstat_options = -t %(cluster_cpu)s\\\ncluster_walltime = -l walltime=12:00:0\\\n\\\n[fastqc]\\\ncluster_cpu = 4\\\nthreads = %(cluster_cpu)s\\\ncluster_walltime = -l walltime=35:00:0\\\n\\\n[multiqc]\\\nmodule_python = mugqic/python/3.7.3\\\ncluster_walltime = -l walltime=2:00:0\\\noptions = \\\n\\\n[gatk_depth_of_coverage]\\\nmodule_gatk = mugqic/GenomeAnalysisTK/3.8\\\njava_other_options = -XX:ParallelGCThreads=2\\\nram = 8000M\\\ncluster_mem = 8G\\\nsummary_coverage_thresholds = 10,25,50,75,100,500\\\ncluster_cpu = 1\\\ncluster_walltime = 48:00:00\\\n\\\n[bvatools_depth_of_coverage]\\\ncoverage_targets = auto\\\nother_options = --gc --maxDepth 1001 --summaryCoverageThresholds 1,5,10,25,50,75,100,500,1000 --minMappingQuality 15 --minBaseQuality 15 --ommitN\\\ncluster_mem = 35G\\\nram = 35G\\\ncluster_cpu = %(QUART_CPU)s\\\nthreads = %(cluster_cpu)s\\\ncluster_walltime = 96:00:00\\\n\\\n[igvtools_compute_tdf]\\\nram = 6G\\\ncluster_walltime = 96:00:00\\\ncluster_cpu = 1\\\noption = -f min,max,mean -w 25\\\ncluster_mem = 7G\\\n\\\n[gatk_callable_loci]\\\nmodule_gatk = mugqic/GenomeAnalysisTK/3.8\\\njava_other_options = -XX:ParallelGCThreads=2\\\ncluster_mem = 11G\\\nram = 10G\\\nother_options = -dt none --minDepth 10 --maxDepth 500 --minDepthForLowMAPQ 10 --minMappingQuality 10 --minBaseQuality 15\\\ncluster_walltime = 35:00:00\\\n\\\n[bvatools_basefreq]\\\ncluster_mem = 8G\\\nthreads = 1\\\nram = 8G\\\n\\\n[extract_common_snp_freq]\\\ncluster_cpu = 2\\\ncluster_walltime = 35:00:00\\\ncluster_mem = 20G\\\n\\\n[bvatools_ratiobaf]\\\ncluster_cpu = 5\\\ncluster_mem = 71G\\\nram = 70G\\\nother_options = --plot --maxDepth 1000 --exclude %(excluded_chromosome)s\\\n\\\n[baf_plot]\\\ncluster_cpu = %(QUART_CPU)s\\\ncluster_walltime = 24:00:00\\\ncluster_mem = 48G\\\n\\\n[vcftools_missing_indv]\\\noptions = \\\ncluster_walltime = 24:00:00\\\n\\\n[vcftools_depth]\\\noptions = \\\ncluster_walltime = 24:00:00\\\n\\\n[gatk_crosscheck_fingerprint]\\\nmodule_gatk = mugqic/GenomeAnalysisTK/4.2.2.0\\\njava_other_options = -XX:+UseParallelGC -XX:ParallelGCThreads=1 -Dsamjdk.buffer_size=4194304 -Dsamjdk.use_async_io_read_samtools=true -Dsamjdk.use_async_io_write_samtools=true -Dsamjdk.use_async_io_write_tribble=false\\\ncluster_cpu = 4\\\noptions = --NUM_THREADS %(cluster_cpu)s --EXIT_CODE_WHEN_MISMATCH 0\\\ncluster_mem = 16G\\\nram = 15G\\\nhaplotype_database = %(assembly_dir)s/annotations/%(scientific_name)s.%(assembly)s.fingerprint.map\\\nlod_threshold = 3.0\\\ncluster_walltime = 24:00:00\\\n\\\n[gatk_cluster_crosscheck_metrics]\\\nmodule_gatk = mugqic/GenomeAnalysisTK/4.2.2.0\\\njava_other_options = -XX:+UseParallelGC -XX:ParallelGCThreads=1 -Dsamjdk.buffer_size=4194304 -Dsamjdk.use_async_io_read_samtools=true -Dsamjdk.use_async_io_write_samtools=true -Dsamjdk.use_async_io_write_tribble=false\\\noptions = \\\ncluster_mem = 16G\\\nram = 15G\\\nlod_threshold = 5.0\\\ncluster_walltime = 24:00:00\\\n\\\n[verify_bam_id]\\\noptions = --verbose --ignoreRG --noPhoneHome\\\nvcf = %(assembly_dir)s/annotations/%(scientific_name)s.%(assembly)s.1000G_phase1.snps.high_confidence.EUR.vcf\\\ncluster_walltime = 24:00:00\\\ncluster_cpu = 2\\\ncluster_mem = 20G\\\n\\\n[run_checkmate]\\\nmodule_python = mugqic/python/2.7.14\\\noptions = -V\\\nbed = %(assembly_dir)s/annotations/%(scientific_name)s.%(assembly)s.SNP_GRCh38_hg19_woChr.bed\\\ncluster_walltime = 24:00:00\\\ncluster_mem = 12G\\\n\\\n[run_peddy]\\\noptions = \\\ncluster_cpu = %(QUART_CPU)s\\\nthreads = %(cluster_cpu)s\\\ncluster_walltime = 24:00:00\\\ncluster_mem = 24G\\\n\\\n[gatk_haplotype_caller]\\\ncluster_cpu = %(PINT_CPU)s\\\noptions = --useNewAFCalculator --emitRefConfidence GVCF -dt none -nct %(cluster_cpu)s -G StandardAnnotation -G StandardHCAnnotation\\\ncluster_mem = 36G\\\nram = 35G\\\nnb_jobs = 23\\\ncluster_walltime = 35:00:00\\\ninterval_padding = \\\n\\\n[gatk_cat_variants]\\\noptions = \\\ncluster_mem = 8G\\\nram = 8G\\\ncluster_walltime = 24:00:00\\\n\\\n[picard_merge_vcfs]\\\nmax_records_in_ram = 2000000\\\noptions = \\\nram = 12G\\\n\\\n[gatk_merge_and_call_individual_gvcfs]\\\noptions = -nt 1\\\n\\\n[gatk_merge_and_call_combined_gvcfs]\\\noptions = -nt 1 -G StandardAnnotation -G StandardHCAnnotation -A FisherStrand -A QualByDepth -A ChromosomeCounts\\\n\\\n[gatk_genotype_gvcf]\\\noptions = --useNewAFCalculator -G StandardAnnotation -G StandardHCAnnotation\\\ncluster_mem = 30G\\\nram = 30G\\\ncluster_walltime = 35:00:00\\\ncluster_cpu = 2\\\n\\\n[gatk_combine_gvcf]\\\njava_other_options = -XX:+UseParallelGC -XX:ParallelGCThreads=4 -Dsamjdk.buffer_size=4194304\\\ncluster_mem = 25G\\\nram = 24G\\\nnb_haplotype = 4\\\nnb_batch = 1\\\ncluster_cpu = %(QUART_CPU)s\\\nother_options = \\\ncluster_walltime = 24:00:00\\\n\\\n[merge_and_call_combined_gvcf]\\\ncluster_mem = 25G\\\nram = 24G\\\njava_other_options = -XX:+UseParallelGC -XX:ParallelGCThreads=2 -Dsamjdk.buffer_size=4194304\\\ncluster_walltime = 96:00:00\\\ncluster_cpu = 2\\\n\\\n[merge_and_call_individual_gvcf]\\\njava_other_options = -XX:+UseParallelGC -XX:ParallelGCThreads=2 -Dsamjdk.buffer_size=4194304\\\ncluster_mem = 32G\\\nram = 32G\\\ncluster_walltime = 96:00:00\\\ncluster_cpu = 2\\\n\\\n[variant_recalibrator]\\\ntranch_other_options_snps = --resource:hapmap,known=false,training=true,truth=true,prior=15.0 %(hapmap)s --resource:omni,known=false,training=true,truth=false,prior=12.0 %(omni)s --resource:1000G,known=false,training=true,truth=false,prior=10.0 %(1000G)s --resource:dbsnp,known=true,training=false,truth=false,prior=6.0 %(dbsnp)s -an QD -an MQ -an MQRankSum -an ReadPosRankSum -an FS -an SOR --truth-sensitivity-tranche 100.0 --truth-sensitivity-tranche 99.95 --truth-sensitivity-tranche 99.9 --truth-sensitivity-tranche 99.95 --truth-sensitivity-tranche 99.5 --truth-sensitivity-tranche 99.0 --truth-sensitivity-tranche 90.0 -mode SNP\\\ntranch_other_options_indels = --resource:mills,known=false,training=true,truth=true,prior=12.0 %(mills)s --resource:dbsnp,known=true,training=false,truth=false,prior=2.0 %(dbsnp)s -an QD -an MQ -an MQRankSum -an ReadPosRankSum -an FS -an SOR --truth-sensitivity-tranche 100.0 --truth-sensitivity-tranche 99.9 --truth-sensitivity-tranche 99.4 --truth-sensitivity-tranche 99.0 --truth-sensitivity-tranche 90.0 -mode INDEL\\\napply_other_options_snps = --truth-sensitivity-filter-level 99.95 --mode SNP\\\napply_other_options_indels = --truth-sensitivity-filter-level 99.4 --mode INDEL\\\ncluster_walltime = 35:00:00\\\ncluster_cpu = 2\\\ncluster_mem = 30G\\\n\\\n[gatk_variant_recalibrator]\\\nsmall_sample_option = \\\ncluster_cpu = %(QUART_CPU)s\\\noptions = -nt %(cluster_cpu)s\\\ncluster_mem = 25G\\\nram = 24G\\\n\\\n[gatk_apply_recalibration]\\\ncluster_cpu = %(QUART_CPU)s\\\noptions = -nt %(cluster_cpu)s\\\ncluster_mem = 24G\\\nram = 24G\\\n\\\n[rawmpileup]\\\nnb_jobs = 25\\\nmpileup_other_options = -d 1000 -L 1000 -B -q 11 -Q 10\\\ncluster_walltime = -l walltime=12:00:0\\\ncluster_mem = 8G\\\n\\\n[snp_and_indel_bcf]\\\napproximate_nb_jobs = 15\\\nmpileup_other_options = -d 1000 -B -q 11 -Q 20 -Ou -a FORMAT/DP,FORMAT/AD,FORMAT/ADF,FORMAT/ADR,FORMAT/SP,FORMAT/SCR\\\nbcftools_other_options = -mvO b\\\ncluster_walltime = 48:00:00\\\ncluster_mem = 12G\\\n\\\n[snpsift_annotate]\\\ncluster_mem = 9G\\\nram = 8G\\\njava_other_options = -XX:ParallelGCThreads=2\\\ncluster_walltime = 24:00:00\\\n\\\n[snp_id_annotation]\\\ncluster_walltime = 24:00:00\\\ncluster_mem = 12G\\\n\\\n[mpileup_snp_id_annotation]\\\ncluster_walltime = 24:00:00\\\n\\\n[haplotype_caller_snp_id_annotation]\\\ncluster_walltime = 24:00:00\\\ncluster_mem = 16G\\\n\\\n[compute_effects]\\\njava_other_options = -XX:ParallelGCThreads=1\\\ncluster_mem = 12G\\\nram = 12000M\\\noptions = -lof\\\nsnpeff_genome = %(assembly_synonyms)s\\\ncluster_walltime = 12:00:0\\\n\\\n[mpileup_snp_effect]\\\ncluster_cpu = 2\\\ncluster_walltime = 24:00:00\\\ncluster_mem = 24G\\\n\\\n[haplotype_caller_snp_effect]\\\ncluster_cpu = 1\\\ncluster_walltime = 24:00:00\\\ncluster_mem = 16G\\\n\\\n[snpsift_dbnsfp]\\\ncluster_cpu = 2\\\nram = 24G\\\njava_other_options = -XX:ParallelGCThreads=2\\\n\\\n[dbnsfp_annotation]\\\ncluster_cpu = %(QUART_CPU)s\\\ncluster_walltime = 35:00:00\\\ncluster_mem = --mem=40G\\\n\\\n[gemini_annotations]\\\ncluster_cpu = %(QUART_CPU)s\\\noptions = -t snpEff --cores %(cluster_cpu)s --save-info-string\\\ncluster_walltime = -l walltime=72:00:0\\\ntmp_dir = ${SLURM_TMPDIR}\\\n\\\n[vcf_stats]\\\nmodule_python = mugqic/python/2.7.14\\\n\\\n[report]\\\ntitle = DNA-Seq Analysis Report\\\n\\\n[sv_annotation]\\\ncluster_walltime = 3:00:0\\\ncluster_mem = 12G\\\n\\\n[delly_call_filter]\\\ncluster_cpu = %(QUART_CPU)s\\\nthreads = %(cluster_cpu)s\\\noptions = -q 15\\\nsv_types_options = DEL,INS,DUP,INV,BND\\\nexclude_list = ${DELLY_PATH}/excludeTemplates/human.hg38.excl.tsv\\\ncluster_walltime = 35:00:0\\\ncluster_mem = 60G\\\n\\\n[delly_call_filter_germline]\\\ntype_options = germline\\\nDUP_options = -m 300 -a 0.1 -r 0.75\\\nDEL_options = -m 300 -a 0.1 -r 0.75\\\nINV_options = -m 300 -a 0.1 -r 0.75\\\nBND_options = -m 0 -a 0.1 -r 0.75\\\nINS_options = \\\nbcftools_options = \\\n\\\n[manta_sv]\\\nmodule_python = mugqic/python/2.7.14\\\noption_mode = local\\\ncluster_cpu = %(HALF_CPU)s\\\noption_nodes = %(cluster_cpu)s\\\ncluster_mem = 55G\\\nram = 55G\\\nexperiment_type_option = \\\ncluster_walltime = 24:00:0\\\n\\\n[extract_discordant_reads]\\\ncluster_cpu = %(QUART_CPU)s\\\nsambamba_options = -t %(cluster_cpu)s\\\ndiscordants_sort_option = -t %(cluster_cpu)s\\\ncluster_walltime = -l walltime=35:00:0\\\ncluster_mem = 32G\\\noptions = \\\n\\\n[extract_split_reads]\\\ncluster_cpu = %(QUART_CPU)s\\\nsambamba_options = -t %(cluster_cpu)s\\\nsplit_sort_option = -t %(cluster_cpu)s\\\ncluster_walltime = -l walltime=35:00:0\\\ncluster_mem = 32G\\\noptions = \\\n\\\n[lumpy_paired_sv_calls]\\\nmodule_python = mugqic/python/2.7.13\\\noptions = \\\nheader_options = -h %(vcf_header)s\\\ncluster_walltime = -l walltime=72:00:0\\\ncluster_cpu = 3\\\ncluster_mem = 16G\\\nmodule_samtools = mugqic/samtools/1.3\\\n\\\n[wham_call_sv]\\\nexclude = chr1_KI270706v1_random,chr1_KI270707v1_random,chr1_KI270708v1_random,chr1_KI270709v1_random,chr1_KI270710v1_random,chr1_KI270711v1_random,chr1_KI270712v1_random,chr1_KI270713v1_random,chr1_KI270714v1_random,chr2_KI270715v1_random,chr2_KI270716v1_random,chr3_GL000221v1_random,chr4_GL000008v2_random,chr5_GL000208v1_random,chr9_KI270717v1_random,chr9_KI270718v1_random,chr9_KI270719v1_random,chr9_KI270720v1_random,chr11_KI270721v1_random,chr14_GL000009v2_random,chr14_GL000225v1_random,chr14_KI270722v1_random,chr14_GL000194v1_random,chr14_KI270723v1_random,chr14_KI270724v1_random,chr14_KI270725v1_random,chr14_KI270726v1_random,chr15_KI270727v1_random,chr16_KI270728v1_random,chr17_GL000205v2_random,chr17_KI270729v1_random,chr17_KI270730v1_random,chr22_KI270731v1_random,chr22_KI270732v1_random,chr22_KI270733v1_random,chr22_KI270734v1_random,chr22_KI270735v1_random,chr22_KI270736v1_random,chr22_KI270737v1_random,chr22_KI270738v1_random,chr22_KI270739v1_random,chrY_KI270740v1_random,chrUn_KI270302v1,chrUn_KI270304v1,chrUn_KI270303v1,chrUn_KI270305v1,chrUn_KI270322v1,chrUn_KI270320v1,chrUn_KI270310v1,chrUn_KI270316v1,chrUn_KI270315v1,chrUn_KI270312v1,chrUn_KI270311v1,chrUn_KI270317v1,chrUn_KI270412v1,chrUn_KI270411v1,chrUn_KI270414v1,chrUn_KI270419v1,chrUn_KI270418v1,chrUn_KI270420v1,chrUn_KI270424v1,chrUn_KI270417v1,chrUn_KI270422v1,chrUn_KI270423v1,chrUn_KI270425v1,chrUn_KI270429v1,chrUn_KI270442v1,chrUn_KI270466v1,chrUn_KI270465v1,chrUn_KI270467v1,chrUn_KI270435v1,chrUn_KI270438v1,chrUn_KI270468v1,chrUn_KI270510v1,chrUn_KI270509v1,chrUn_KI270518v1,chrUn_KI270508v1,chrUn_KI270516v1,chrUn_KI270512v1,chrUn_KI270519v1,chrUn_KI270522v1,chrUn_KI270511v1,chrUn_KI270515v1,chrUn_KI270507v1,chrUn_KI270517v1,chrUn_KI270529v1,chrUn_KI270528v1,chrUn_KI270530v1,chrUn_KI270539v1,chrUn_KI270538v1,chrUn_KI270544v1,chrUn_KI270548v1,chrUn_KI270583v1,chrUn_KI270587v1,chrUn_KI270580v1,chrUn_KI270581v1,chrUn_KI270579v1,chrUn_KI270589v1,chrUn_KI270590v1,chrUn_KI270584v1,chrUn_KI270582v1,chrUn_KI270588v1,chrUn_KI270593v1,chrUn_KI270591v1,chrUn_KI270330v1,chrUn_KI270329v1,chrUn_KI270334v1,chrUn_KI270333v1,chrUn_KI270335v1,chrUn_KI270338v1,chrUn_KI270340v1,chrUn_KI270336v1,chrUn_KI270337v1,chrUn_KI270363v1,chrUn_KI270364v1,chrUn_KI270362v1,chrUn_KI270366v1,chrUn_KI270378v1,chrUn_KI270379v1,chrUn_KI270389v1,chrUn_KI270390v1,chrUn_KI270387v1,chrUn_KI270395v1,chrUn_KI270396v1,chrUn_KI270388v1,chrUn_KI270394v1,chrUn_KI270386v1,chrUn_KI270391v1,chrUn_KI270383v1,chrUn_KI270393v1,chrUn_KI270384v1,chrUn_KI270392v1,chrUn_KI270381v1,chrUn_KI270385v1,chrUn_KI270382v1,chrUn_KI270376v1,chrUn_KI270374v1,chrUn_KI270372v1,chrUn_KI270373v1,chrUn_KI270375v1,chrUn_KI270371v1,chrUn_KI270448v1,chrUn_KI270521v1,chrUn_GL000195v1,chrUn_GL000219v1,chrUn_GL000220v1,chrUn_GL000224v1,chrUn_KI270741v1,chrUn_GL000226v1,chrUn_GL000213v1,chrUn_KI270743v1,chrUn_KI270744v1,chrUn_KI270745v1,chrUn_KI270746v1,chrUn_KI270747v1,chrUn_KI270748v1,chrUn_KI270749v1,chrUn_KI270750v1,chrUn_KI270751v1,chrUn_KI270752v1,chrUn_KI270753v1,chrUn_KI270754v1,chrUn_KI270755v1,chrUn_KI270756v1,chrUn_KI270757v1,chrUn_GL000214v1,chrUn_KI270742v1,chrUn_GL000216v2,chrUn_GL000218v1,chrEBV\\\ncluster_cpu = %(QUART_CPU)s\\\ncores = %(cluster_cpu)s\\\nheader_options = -h %(vcf_header)s\\\ncluster_walltime = 24:00:0\\\ncluster_mem = %(HALF_MEM)s\\\n\\\n[cnvkit_batch]\\\nmin_background_samples = 20\\\nbatch_options = -m wgs --target-avg-size 5000 --short-names\\\nfix_options = --no-edge\\\nsegment_options = -m cbs -t 0.00001 --drop-low-coverage -p %(cluster_cpu)s\\\ncall_options = \\\nexport_options = vcf\\\nsegmetrics_options = --ci --pi\\\nmetrics_options = \\\nscatter_options = \\\ndiagram_options = \\\ncluster_cpu = 6\\\nthreads = 6\\\naccess = /cvmfs/soft.mugqic/CentOS6/genomes/species/Homo_sapiens.GRCh38/annotations/Homo_sapiens.GRCh38.access-5k-mappable.bed\\\nrefFlat = /cvmfs/soft.mugqic/CentOS6/genomes/species/Homo_sapiens.GRCh38/annotations/Homo_sapiens.GRCh38.Ensembl87.ref_flat.tsv\\\ncluster_walltime = -l walltime=48:00:0\\\nmodule_python = mugqic/python/2.7.14\\\nmodule_R = mugqic/R_Bioconductor/3.2.3_3.2\\\n\\\n[run_breakseq2]\\\nmodule_pyhton = mugqic/python/2.7.14\\\noptions = \\\nbcftools_options = -f PASS -Ov\\\ncluster_cpu = %(QUART_CPU)s\\\nthreads = %(cluster_cpu)s\\\ngff = %(assembly_dir)s/annotations/%(scientific_name)s.%(assembly)s.breakseq2_bplib_20150129.gff\\\ncluster_walltime = 35:00:00\\\n\\\n[metasv_ensemble]\\\noptions = --boost_sc --filter_gaps --keep_standard_contigs --mean_read_length 150\\\ncluster_cpu = %(QUART_CPU)s\\\nthreads = %(cluster_cpu)s\\\nfilter_pass_options = \\\ncluster_walltime = 35:00:0\\\ncluster_mem = --mem=60G\\\n\\\n[svaba_run]\\\ncluster_cpu = %(HALF_CPU)s\\\noptions = -p %(cluster_cpu)s\\\nref = %(genome_bwa_index)s\\\ndbsnp = $SVABA_HOME/dbsnp_indel.vcf\\\ncluster_walltime = 35:00:0\\\ncluster_mem = 60G\\\n\\\n[haplotype_caller_dbnsfp_annotation]\\\ncluster_walltime = 24:00:00\\\ncluster_cpu = 3\\\ncluster_mem = 40G\\\n\\\n[sambamba_merge_sam_files]\\\ncluster_cpu = %(QUART_CPU)s\\\noptions = -t %(cluster_cpu)s\\\n\\\n[sym_link_pair]\\\ncluster_walltime = 3:00:0\\\n\\\n[sambamba_mark_duplicates]\\\ncluster_cpu = %(QUART_CPU)s\\\noptions = -t %(cluster_cpu)s\\\ntmp_dir = tmp_dir\\\ncluster_mem = 36G\\\ncluster_walltime = 35:00:0\\\n\\\n[conpair_concordance_contamination]\\\nmodule_gatk = mugqic/GenomeAnalysisTK/3.8\\\njava_other_options = -XX:+UseParallelGC -XX:ParallelGCThreads=1 -Dsamjdk.buffer_size=4194304\\\nmarkers_bed = ${CONPAIR_DATA}/markers/%(assembly)s.autosomes.phase3_shapeit2_mvncall_integrated.20130502.SNV.genotype.sselect_v4_MAF_0.4_LD_0.8.liftover.bed\\\nmarkers_txt = ${CONPAIR_DATA}/markers/%(assembly)s.autosomes.phase3_shapeit2_mvncall_integrated.20130502.SNV.genotype.sselect_v4_MAF_0.4_LD_0.8.liftover.txt\\\nconcord_options = --normal_homozygous_markers_only\\\ncontam_options = \\\ncluster_mem = 6G\\\nram = 6G\\\ncluster_walltime = 24:00:0\\\n\\\n[picard_collect_sequencing_artifacts_metrics]\\\nFFPE = true\\\ncluster_mem = 12G\\\nram = 12000M\\\noptions = --FILE_EXTENSION \".txt\"\\\nmax_records_in_ram = 4000000\\\ncluster_walltime = -l walltime=35:00:0\\\n\\\n[qualimap]\\\ncluster_cpu = 4\\\ncluster_mem = 60G\\\nram = 60G\\\n\\\n[picard_ScatterIntervalsByNs]\\\ncluster_mem = 3G\\\nram = 3000M\\\n\\\n[gatk_splitInterval]\\\ncluster_mem = 3G\\\nram = 3000M\\\noptions = --subdivision-mode BALANCING_WITHOUT_INTERVAL_SUBDIVISION\\\n\\\n[gatk_interval_list2bed]\\\ncluster_mem = 3200M\\\nram = 3G\\\ncluster_walltime = -l walltime=3:00:0\\\ncluster_cpu = -l nodes=1:ppn=2\\\n\\\n[gatk_bed2interval_list]\\\ncluster_mem = 3200M\\\nram = 3G\\\n\\\n[gatk_preProcessInterval]\\\ncluster_mem = 3200M\\\nram = 3G\\\noptions = --interval-merging-rule OVERLAPPING_ONLY --bin-length 5000 --padding 250\\\n\\\n[rawmpileup_panel]\\\nnb_jobs = 25\\\nmodule_samtools = mugqic/samtools/1.3\\\npanel = test_rapid/SureSelectHumanAllExonV5.targets_ext1k.bed\\\nmpileup_other_options = -d 1000 -L 1000 -B -q 1 -Q 10\\\ncluster_walltime = 12:00:0\\\ncluster_cpu = 1\\\n\\\n[varscan2_somatic_panel]\\\njava_other_options = -XX:+UseParallelGC -XX:ParallelGCThreads=1 -Dsamjdk.buffer_size=4194304\\\nother_options = --min-coverage 3 --min-var-freq 0.05 --p-value 1 --somatic-p-value 0.05 --strand-filter 0\\\ncluster_mem = 4300M\\\nram = 4G\\\ncluster_walltime = 12:00:0\\\ncluster_cpu = 2\\\n\\\n[merge_varscan2]\\\nmodule_python = mugqic/python/2.7.18\\\nsomatic_filter_options = -Oz -i 'SS=\"2\"'\\\ngermline_filter_options = -Oz -i 'SS=\"1\"|SS=\"3\"'\\\ngenotype_filter_options = -e 'GT[*]=\"RR\"'\\\ntabix_options = -pvcf\\\ncluster_walltime = -l walltime=3:00:0\\\ncluster_cpu = 2\\\ncluster_mem = 8G\\\n\\\n[preprocess_vcf_panel]\\\ncluster_walltime = -l walltime=3:00:0\\\ncluster_mem = 8G\\\n\\\n[rawmpileup_cat]\\\ncluster_walltime = -l walltime=35:00:0\\\ncluster_mem = 8G\\\n\\\n[varscan2_somatic]\\\njava_other_options = -XX:+UseParallelGC -XX:ParallelGCThreads=1 -Dsamjdk.buffer_size=4194304\\\nother_options = --min-coverage 3 --min-var-freq 0.05 --p-value 0.10 --somatic-p-value 0.05 --strand-filter 0\\\ncluster_mem = 4G\\\nram = 4G\\\ncluster_walltime = -l walltime=12:00:0\\\ncluster_cpu = 2\\\n\\\n[varscan2_readcount_fpfilter]\\\nreadcount_options = -q 1 -b 20 -i -w 1000 -d 2500\\\nfpfilter_options = --dream3-settings --keep-failures\\\nsomatic_filter_options = -i 'SS=\"2\"'\\\ngermline_filter_options = -i 'SS=\"1\"|SS=\"3\"'\\\ngenotype_filter_options = -e 'GT[*]=\"RR\"'\\\nram = 12G\\\ncluster_cpu = 3\\\ncluster_mem = 12G\\\ncluster_walltime = -l walltime=3:00:0\\\n\\\n[gatk_mutect2]\\\ncluster_cpu = %(QUART_CPU)s\\\noptions = --pair-hmm-implementation AVX_LOGLESS_CACHING_OMP --native-pair-hmm-threads %(cluster_cpu)s --max-reads-per-alignment-start 0 --read-validation-stringency LENIENT --af-of-alleles-not-in-resource 0.0000025\\\ncluster_mem = 36G\\\nram = 36000M\\\nnb_jobs = 23\\\ncosmic = %(assembly_dir)s/annotations/%(scientific_name)s.%(assembly)s.cosmic.coding.87.vcf.gz\\\nknown_sites = %(af_gnomad)s\\\npon = \\\ncluster_walltime = --time=72:00:0\\\n\\\n[gatk_learn_read_orientation_model]\\\noptions = \\\ncluster_mem = 16G\\\nram = 16000M\\\ncluster_walltime = 24:00:0\\\n\\\n[gatk_merge_stats]\\\nram = 6G\\\noptions = \\\ncluster_walltime = -l walltime=3:00:0\\\n\\\n[gatk_get_pileup_summaries]\\\noptions = \\\nknown_sites = %(af_gnomad)s\\\ncluster_mem = 16G\\\nram = 16000M\\\ncluster_walltime = 24:00:0\\\n\\\n[gatk_calculate_contamination]\\\noptions = \\\ncluster_mem = 16G\\\nram = 16000M\\\ncluster_walltime = 24:00:0\\\n\\\n[gatk_filter_mutect_calls]\\\noptions = \\\ncluster_mem = 16G\\\nram = 16000M\\\ncluster_walltime = -l walltime=24:00:0\\\n\\\n[gatk_merge_vcfs]\\\noptions = \\\ncluster_mem = 16G\\\nram = 16000M\\\ncluster_walltime = -l walltime=24:00:0\\\n\\\n[merge_filter_mutect2]\\\nbcftools_options = \\\nfilter_options = -f PASS\\\ncluster_mem = 18G\\\nram = 18G\\\ncluster_walltime = -l walltime=3:00:0\\\n\\\n[samtools_paired]\\\nmodule_bcftools = mugqic/bcftools/1.9\\\nnb_jobs = 23\\\nmpileup_other_options = -d 1000 -B -q 10 -Q 10 -Ou -a DP,AD,ADF,ADR,SP\\\nbcftools_calls_options = -mvO b\\\ncluster_walltime = -l walltime=24:00:0\\\n\\\n[merge_filter_paired_samtools]\\\nconcat_options = -Ob\\\nsomatic_filter_options = -i 'INFO/STATUS~\"somatic\"'\\\nsomatic_vcffilter_options = -g \"! ( GT = 1/1 )\"\\\ngermline_loh_filter_options = -i 'INFO/STATUS~\"germline\"|INFO/STATUS~\"loh\"'\\\ngenotype_filter_options = -e 'GT[*]=\"RR\"'\\\ncluster_mem = 12G\\\ncluster_walltime = -l walltime=3:00:0\\\n\\\n[vardict_paired]\\\nmodule_python = mugqic/python/2.7.18\\\ncluster_cpu = 4\\\njava_other_options = -XX:+UseParallelGC -XX:ParallelGCThreads=1 -Dsamjdk.buffer_size=4194304\\\nuse_bed = false\\\ndict2bed_options = -c 5000 -o 250\\\nnb_jobs = 22\\\nvardict_options = -f 0.03 -Q 11 -c 1 -S 2 -E 3 -g 4 -th %(cluster_cpu)s\\\nvar2vcf_options = -f 0.03 -P 0.9 -m 4.25 -M\\\njava_options = -Xms768m\\\ncluster_mem = 16G\\\nram = 16000M\\\nclasspath = $VARDICT_HOME/lib/VarDict-1.4.8.jar:$VARDICT_HOME/lib/commons-cli-1.2.jar:$VARDICT_HOME/lib/jregex-1.2_01.jar:$VARDICT_HOME/lib/htsjdk-2.8.0.jar com.astrazeneca.vardict.Main\\\ncluster_walltime = -l walltime=72:00:0\\\n\\\n[merge_filter_paired_vardict]\\\nsomatic_filter_options = -f PASS -i 'INFO/STATUS~\".*Somatic\"'\\\ngermline_filter_options = -f PASS -i 'INFO/STATUS~\"Germline\"|INFO/STATUS~\".*LOH\"'\\\ngenotype_filter_options = -e 'GT[*]=\"RR\"'\\\ncluster_mem = 12G\\\ncluster_walltime = -l walltime=3:00:0\\\n\\\n[strelka2_paired_somatic]\\\nmodule_python = mugqic/python/2.7.18\\\noption_mode = local\\\ncluster_cpu = %(HALF_CPU)s\\\noption_nodes = %(cluster_cpu)s\\\ncluster_mem = 55G\\\nram = 55G\\\nbed_file = %(strelka2_bed_file)s\\\nexperiment_type_option = \\\nfilter_options = -f PASS -Oz\\\ncluster_walltime = -l walltime=24:00:0\\\n\\\n[strelka2_paired_germline]\\\nmodule_python = mugqic/python/2.7.18\\\noption_mode = local\\\ncluster_cpu = %(HALF_CPU)s\\\noption_nodes = %(cluster_cpu)s\\\ncluster_mem = 55G\\\nram = 55G\\\nbed_file = %(strelka2_bed_file)s\\\nexperiment_type_option = \\\nfilter_options = -f PASS -Oz -e 'GT[*]=\"RR\"'\\\ncluster_walltime = -l walltime=48:00:0\\\n\\\n[strelka2_paired_germline_snpeff]\\\nsplit_options = -Oz -i'GT=\"alt\"'\\\noptions = \\\ncluster_mem = 12G\\\nram = 12000M\\\ncluster_walltime = -l walltime=24:00:0\\\n\\\n[bcbio_ensemble]\\\ncluster_cpu = 2\\\ncluster_mem = 24G\\\nram = 24G\\\n\\\n[bcbio_ensemble_somatic]\\\ncluster_cpu = 6\\\noptions = --cores %(cluster_cpu)s --numpass 1 --names mutect2,strelka2,vardict,varscan2\\\ncluster_mem = 24G\\\nram = 24G\\\ncluster_walltime = -l walltime=3:00:0\\\n\\\n[bcbio_ensemble_germline]\\\ncluster_cpu = 6\\\noptions = --cores %(cluster_cpu)s --numpass 1 --names strelka2,vardict,varscan2\\\ncluster_mem = 24G\\\nram = 24G\\\ncluster_walltime = -l walltime=3:00:0\\\n\\\n[gatk_variant_annotator]\\\njava_other_options = -XX:+UseParallelGC -XX:ParallelGCThreads=1 -Dsamjdk.buffer_size=4194304\\\nmodule_gatk = mugqic/GenomeAnalysisTK/3.7\\\ncluster_mem = 12G\\\nram = 12000M\\\nnb_jobs = 20\\\ncluster_walltime = -l walltime=12:00:0\\\n\\\n[gatk_variant_annotator_somatic]\\\ncluster_cpu = 2\\\nother_options = -nt 2 --dbsnp %(known_variants)s -G StandardAnnotation -G StandardSomaticAnnotation -A HomopolymerRun -A Coverage -A DepthPerAlleleBySample -A ClippingRankSumTest -A BaseQualityRankSumTest -A MappingQualityRankSumTest -A MappingQualityZeroBySample -A LowMQ -A ReadPosRankSumTest -A GCContent\\\n\\\n[gatk_variant_annotator_germline]\\\ncluster_cpu = 2\\\nother_options = -nt 2 --dbsnp %(known_variants)s -A HomopolymerRun -A Coverage -A DepthPerAlleleBySample -A ClippingRankSumTest -A BaseQualityRankSumTest -A MappingQualityRankSumTest -A MappingQualityZeroBySample -A LowMQ -A ReadPosRankSumTest -A GCContent\\\n\\\n[merge_gatk_variant_annotator]\\\ncluster_cpu = 2\\\ncluster_walltime = -l walltime=3:00:0\\\n\\\n[filter_ensemble]\\\nmodule_python = mugqic/python/3.10.4\\\ncall_filter = 2\\\nsomatic_filter_options = -Oz -i'TDP>=10 && TVAF>=0.05 && NDP>=10 && NVAF<=0.05'\\\ngermline_filter_options = -Oz -i'TDP>=10 && TVAF>=0.05 && NDP>=10 && NVAF>=0.05'\\\ncluster_mem = 12G\\\ncluster_walltime = -l walltime=3:00:0\\\n\\\n[report_cpsr]\\\noptions = --no_vcf_validate --force_overwrite --no_docker --secondary_findings --gwas_findings --panel_id 0\\\nassembly = grch38\\\ncluster_cpu = %(PINT_CPU)s\\\ncluster_mem = 36G\\\ncluster_walltime = -l walltime=3:00:0\\\n\\\n[report_pcgr]\\\noptions = --no_vcf_validate --force_overwrite --no_docker --vep_buffer_size 500 --vep_regulatory --show_noncoding --vcf2maf\\\ntumor_type = --tumor_site 0\\\ntumor_options = --call_conf_tag TAL --tumor_dp_tag TDP --tumor_af_tag TVAF --tumor_dp_min 10 --tumor_af_min 0.05\\\nnormal_options = --control_dp_tag NDP --control_af_tag NVAF --control_dp_min 10 --control_af_max 0.05\\\nmutsig_options = --estimate_signatures\\\ntmb_options = --estimate_tmb --tmb_algorithm nonsyn\\\nmsi_options = --estimate_msi_status\\\nassembly = grch38\\\nassay = --assay WGS\\\ncluster_cpu = %(HALF_CPU)s\\\ncluster_mem = 36G\\\ncluster_walltime = -l walltime=3:00:0\\\n\\\n[compute_cancer_effects_somatic]\\\njava_other_options = -XX:ParallelGCThreads=1\\\noptions = -cancer -lof\\\nsnpeff_genome = hg19\\\ncluster_walltime = 12:00:0\\\ncluster_mem = 12G\\\nram = 12000M\\\n\\\n[compute_cancer_effects_germline]\\\njava_other_options = -XX:ParallelGCThreads=1\\\noptions = -lof\\\nsnpeff_genome = hg19\\\ncluster_walltime = -l walltime=12:00:0\\\nram = 12000M\\\ncluster_mem = 12G\\\n\\\n[gatk_combine_variants]\\\n\\\n[decompose_and_normalize_mnps]\\\ncluster_walltime = 12:00:0\\\n\\\n[set_somatic_and_actionable_mutations]\\\nset_somatic = --min-depth 30 --min-tumor-depth 10 --min-norm-depth 10\\\ncluster_walltime = -l walltime=24:00:0\\\n\\\n[mpileup_sequenza]\\\ncluster_walltime = -l walltime=12:00:0\\\n\\\n[sequenza]\\\nnb_jobs = 23\\\ngc_file = %(assembly_dir)s/annotations/%(scientific_name)s.%(assembly)s.gc50Base.txt\\\nbin_window_size = 50\\\nmpileup_options = -d 1000 -L 1000 -B -Q 25 -q 15\\\npileup_options = -q 20 -N 30\\\ncluster_walltime = -l walltime=24:00:0\\\ncluster_mem = 12G\\\n\\\n[amber]\\\njava_other_options = -XX:+UseParallelGC -XX:ParallelGCThreads=1 -Dsamjdk.buffer_size=4194304\\\ncluster_mem = 16G\\\nram = 16000M\\\ncluster_cpu = %(PINT_CPU)s\\\nthreads = %(cluster_cpu)s\\\nloci = %(assembly_dir)s/annotations/%(scientific_name)s.%(assembly)s.GermlineHetPon.vcf.gz\\\ncluster_walltime = -l walltime=12:00:0\\\n\\\n[cobalt]\\\njava_other_options = -XX:+UseParallelGC -XX:ParallelGCThreads=1 -Dsamjdk.buffer_size=4194304\\\ncluster_mem = 16G\\\nram = 16000M\\\ncluster_cpu = %(PINT_CPU)s\\\nthreads = %(cluster_cpu)s\\\ngc_profile = %(assembly_dir)s/annotations/%(scientific_name)s.%(assembly)s.GC_profile.1000bp.cnp\\\ncluster_walltime = -l walltime=12:00:0\\\n\\\n[purple]\\\njava_other_options = -XX:+UseParallelGC -XX:ParallelGCThreads=1 -Dsamjdk.buffer_size=4194304\\\nmodule_perl = mugqic/perl/5.34.0\\\ncluster_mem = 17G\\\nram = 16G\\\ncluster_cpu = %(PINT_CPU)s\\\nthreads = %(cluster_cpu)s\\\ngc_profile = %(assembly_dir)s/annotations/%(scientific_name)s.%(assembly)s.GC_profile.1000bp.cnp\\\ncluster_walltime = -l walltime=12:00:0\\\n\\\n[samtools_single]\\\nnb_jobs = 25\\\nmodule_samtools = mugqic/samtools/0.1.19\\\nmpileup_other_options = -B -q 15 -Q 25 -D -S -g\\\nbcftools_view_options = -bvcg\\\ncluster_walltime = -l walltime=12:00:0\\\n\\\n[merge_samtools_single]\\\ncluster_walltime = -l walltime=12:00:0\\\n\\\n[tabix_split]\\\ncluster_walltime = -l walltime=12:00:0\\\n\\\n[shapeit]\\\ncluster_cpu = %(PINT_CPU)s\\\ncheck_options = \\\ncheck_threads = %(cluster_cpu)s\\\nphase_options = \\\nphase_threads = %(cluster_cpu)s\\\ncluster_walltime = -l walltime=12:00:0\\\n\\\n[scnaphase]\\\ncluster_walltime = 12:00:0\\\ncluster_mem = 16G\\\n\\\n[delly_call_filter_somatic]\\\ntype_options = somatic\\\nDUP_options = -m 300 -a 0.1 -r 0.75\\\nDEL_options = -m 300 -a 0.1 -r 0.75\\\nINV_options = -m 300 -a 0.1 -r 0.75\\\nBND_options = -m 0 -a 0.1 -r 0.75\\\nINS_options = \\\nbcftools_options = -i 'FORMAT/DV[0]>4|(INFO/SR>1&INFO/PE>5)|(INFO/SR>5&INFO/PE>1)'\\\n\\\n[scones]\\\nwindow = 10000\\\nbest_model = 0\\\n\\\n[bvatools_bincounter]\\\nother_options = --minMapQ 15\\\ncluster_cpu = 2\\\njava_other_options = -XX:ParallelGCThreads=1\\\ncluster_mem = 31G\\\nram = 30G\\\ncluster_walltime = -l walltime=24:00:0\\\n\\\n[scones_pair]\\\ncluster_cpu = %(HALF_CPU)s\\\nother_options = -t %(cluster_cpu)s\\\ngc_map_bedfile = %(assembly_dir)s/annotations/mappabilityGC/hg1k_v37_bin10kb_GCMAP.bed\\\ncluster_walltime = -l walltime=24:00:0\\\n\\\n[scones_filter]\\\nbest_model = 3\\\ncluster_walltime = 3:00:0\\\ncluster_mem = 8G\\\n\\\n[scones_annotate]\\\nexcluded_regions_bed = %(assembly_dir)s/annotations/bed/%(scientific_name)s.%(assembly)s.TeloCentro_1Mb.bed\\\ngenes_bed = %(assembly_dir)s/annotations/bed/%(scientific_name)s.%(assembly)s.gene.bed\\\ndgv_bed = %(assembly_dir)s/annotations/bed/%(scientific_name)s.%(assembly)s.DGV.bed\\\nmicrosat_bed = %(assembly_dir)s/annotations/bed/%(scientific_name)s.%(assembly)s.microsat.bed\\\nrepeat_masker_bed = %(assembly_dir)s/annotations/bed/%(scientific_name)s.%(assembly)s.repeatMasker.bed\\\nautosome_size_file = %(assembly_dir)s/%(scientific_name)s.%(assembly)s.AutosomeSize.txt\\\ncluster_walltime = -l walltime=3:00:0\\\ncluster_mem = 8G\\\n\\\n[run_arriba]\\\nblacklist = $ARRIBA_HOME/database/blacklist_hg38_GRCh38_2018-01-13.tsv\\\n\\\n", "operation_platform": "beluga", "operation_cmd_line": "module purge\nmodule load python/3.10.2 mugqic/genpipes/4.2.0\ntumor_pair.py \\\n -j slurm \\\n -t ensemble \\\n -r readset.txt \\\n -p pair.txt \\\n -s 1-40 \\\n -c $MUGQIC_PIPELINES_HOME/pipelines/tumor_pair/tumor_pair.base.ini \\\n $MUGQIC_PIPELINES_HOME/pipelines/common_ini/beluga.ini \\\n $MUGQIC_PIPELINES_HOME/pipelines/tumor_pair/tumor_pair.extras.ini \\\n $MUGQIC_PIPELINES_HOME/resources/genomes/config/Homo_sapiens.GRCh38.ini \\\n > Tumour_Pair_run.sh\nrm -r Tumour_Pair_CHUNKS;\nmkdir Tumour_Pair_CHUNKS;\n$MUGQIC_PIPELINES_HOME/utils/chunk_genpipes.sh -n 100 Tumour_Pair_run.sh Tumour_Pair_CHUNKS", - "operation_name": "genpipes_tumour_pair", + "operation_name": "GenPipes_TumorPair.ensemble", "sample": [ { "sample_name": "MoHQ-CM-1-10-3393-1DT", From d262f872a18564735a5ab185233b5498b4ca61e9 Mon Sep 17 00:00:00 2001 From: Paul Stretenowich Date: Thu, 19 Oct 2023 16:01:09 -0400 Subject: [PATCH 02/19] Debug missing arg --- project_tracking/db_action.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/project_tracking/db_action.py b/project_tracking/db_action.py index c0c3a4e..78b8812 100644 --- a/project_tracking/db_action.py +++ b/project_tracking/db_action.py @@ -920,7 +920,8 @@ def ingest_genpipes(project_id: str, ingest_data, session=None): name=ingest_data[vb.OPERATION_CONFIG_NAME], version=ingest_data[vb.OPERATION_CONFIG_VERSION], md5sum=ingest_data[vb.OPERATION_CONFIG_MD5SUM], - data=bytes(ingest_data[vb.OPERATION_CONFIG_DATA], 'utf-8') + data=bytes(ingest_data[vb.OPERATION_CONFIG_DATA], 'utf-8'), + session=session ) operation = Operation( From d91dcfcc09a1ad0655f90e6a27a3de796a2ed41f Mon Sep 17 00:00:00 2001 From: Paul Stretenowich Date: Thu, 26 Oct 2023 13:06:44 -0400 Subject: [PATCH 03/19] Using project rather than project_name or project_id in routes - test 1 --- project_tracking/api/project.py | 51 ++++++++++++++++----------------- project_tracking/db_action.py | 11 +++++-- 2 files changed, 33 insertions(+), 29 deletions(-) diff --git a/project_tracking/api/project.py b/project_tracking/api/project.py index 1299897..6593fdd 100644 --- a/project_tracking/api/project.py +++ b/project_tracking/api/project.py @@ -29,43 +29,40 @@ def unroll(string): return unroll_list -def capitalize(func): +def standardize_project(func): """ - Capitalize project_name + Standardize project used by the client: allowing ID and name to be used """ @functools.wraps(func) - def wrap(*args, project_name = None, **kwargs): - if isinstance(project_name, str): - project_name = project_name.upper() - if project_name not in [p.name for p in db_action.projects(project_name)]: - return abort( - 404, - f"Project {project_name} not found" - ) - return func(*args, project_name=project_name, **kwargs) + def wrap(*args, project=None, **kwargs): + project_dict = { + "id": None, + "name": None + } + if project is None: + pass + elif project.isdigit(): + project_dict["id"] = project + else: + project_dict["name"] = project.upper() + project_dict["id"] = db_action.name_to_id("Project", project_dict["name"]) + + return func(*args, project=project_dict, **kwargs) return wrap @bp.route('/') -@bp.route('/') -# @capitalize -def projects(project_id: str = None): +@bp.route('/') +@standardize_project +def projects(project: str = None): """ - patient_id: uses the form "/project/1" - patient_name: uses the form "/project/'?name='" + project: uses the form "/project/1" for project ID and "/project/name" for project name return: list of all the details of the poject with name "project_name" or ID "project_id" """ - query = request.args - # valid query - name = None - if query.get('name'): - name = query['name'] - if name: - project_id = db_action.name_to_id("Project", name) - if project_id is None: - return {"Project list": [f"id: {i.id}, name: {i.name}" for i in db_action.projects(project_id)]} - return [i.flat_dict for i in db_action.projects(project_id)] + if project["id"] is None: + return {"Project list": [f"id: {project.id}, name: {project.name}" for project in db_action.projects(project["id"])]} + return [i.flat_dict for i in db_action.projects(project["id"])] @@ -255,7 +252,7 @@ def files(project_id: str, patient_id: str=None, sample_id: str=None, readset_id @bp.route('//patients//metrics') @bp.route('//samples//metrics') @bp.route('//readsets//metrics') -@capitalize +# @capitalize def metrics(project_id: str, patient_id: str=None, sample_id: str=None, readset_id: str=None, metric_id: str=None): """ metric_id: uses the form "1,3-8,9". Select metric by ids diff --git a/project_tracking/db_action.py b/project_tracking/db_action.py index 78b8812..6dee400 100644 --- a/project_tracking/db_action.py +++ b/project_tracking/db_action.py @@ -92,9 +92,16 @@ def projects(project_id=None, session=None): .where(Project.id.in_(project_id)) ) else: - raise DidNotFindError(f"Requested Project doesn't exist. Please try again with one of the following: {session.scalars(select(Project.name)).unique().all()}") + all_available = [f"id: {project.id}, name: {project.name}" for project in session.scalars(select(Project)).unique().all()] + raise DidNotFindError(f"Requested Project doesn't exist. Please try again with one of the following: {all_available}") - return session.scalars(stmt).unique().all() + ret = session.scalars(stmt).unique().all() + + if not ret: + all_available = [f"id: {project.id}, name: {project.name}" for project in session.scalars(select(Project)).unique().all()] + raise DidNotFindError(f"Requested Project doesn't exist. Please try again with one of the following: {all_available}") + + return ret def metrics_deliverable(project_id: str, deliverable: bool, patient_id=None, sample_id=None, readset_id=None, metric_id=None): """ From 8a982eebaabe12940bca994924db80602c35afc6 Mon Sep 17 00:00:00 2001 From: Paul Stretenowich Date: Tue, 31 Oct 2023 11:39:58 -0400 Subject: [PATCH 04/19] Adding project converter/checker --- README.md | 4 +- project_tracking/api/project.py | 220 +++++++++++++++++++------------- project_tracking/db_action.py | 88 +++++++------ 3 files changed, 183 insertions(+), 129 deletions(-) diff --git a/README.md b/README.md index 2323edb..0c5f8bd 100644 --- a/README.md +++ b/README.md @@ -19,7 +19,7 @@ python -m venv venv source ./venv/bin/activate pip install --upgrade pip pip install -e . -# Seting the db url is optiopnal, the default will be in the app installation folder +# Setting the db url is optional, the default will be in the app installation folder export C3G_SQLALCHEMY_DATABASE_URI="sqlite:////tmp/my_test_db.sql" # initialyse the db flask --app project_tracking init-db @@ -45,7 +45,7 @@ DOC: -Once the server is running, you can still initialise the database, you can even flush it clear of any entry with +Once the server is running, you can still initialise the database, you can even flush it clear of any entry with: ```bash # WARNING this will erase all entry to you Database! diff --git a/project_tracking/api/project.py b/project_tracking/api/project.py index 6593fdd..266777d 100644 --- a/project_tracking/api/project.py +++ b/project_tracking/api/project.py @@ -29,46 +29,56 @@ def unroll(string): return unroll_list -def standardize_project(func): +def convcheck_project(func): """ - Standardize project used by the client: allowing ID and name to be used + Converting project name to project id and checking if project found """ @functools.wraps(func) def wrap(*args, project=None, **kwargs): - project_dict = { - "id": None, - "name": None - } if project is None: - pass + project_id = None elif project.isdigit(): - project_dict["id"] = project + project_id = project + if not db_action.projects(project_id): + all_available = [f"id: {project.id}, name: {project.name}" for project in db_action.projects()] + project_id = {"DB_ACTION_WARNING": f"Requested Project '{project}' doesn't exist. Please try again with one of the following: {all_available}"} else: - project_dict["name"] = project.upper() - project_dict["id"] = db_action.name_to_id("Project", project_dict["name"]) + project_id = db_action.name_to_id("Project", project.upper()) + if not project_id: + all_available = [f"id: {project.id}, name: {project.name}" for project in db_action.projects()] + project_id = {"DB_ACTION_WARNING": f"Requested Project '{project}' doesn't exist. Please try again with one of the following: {all_available}"} - return func(*args, project=project_dict, **kwargs) + return func(*args, project_id=project_id, **kwargs) return wrap +def sanity_check(item, action_output): + if not action_output: + ret = {"DB_ACTION_WARNING": f"Requested {item} doesn't exist."} + else: + ret = [i.flat_dict for i in action_output] + return ret + @bp.route('/') @bp.route('/') -@standardize_project -def projects(project: str = None): +@convcheck_project +def projects(project_id: str = None): """ project: uses the form "/project/1" for project ID and "/project/name" for project name return: list of all the details of the poject with name "project_name" or ID "project_id" """ - if project["id"] is None: - return {"Project list": [f"id: {project.id}, name: {project.name}" for project in db_action.projects(project["id"])]} - return [i.flat_dict for i in db_action.projects(project["id"])] + if project_id is None: + return {"Project list": [f"id: {project.id}, name: {project.name}" for project in db_action.projects(project_id)]} + if isinstance(project_id, dict) and project_id.get("DB_ACTION_WARNING"): + return project_id + return [i.flat_dict for i in db_action.projects(project_id)] -@bp.route('//patients') -@bp.route('//patients/') -# @capitalize +@bp.route('//patients') +@bp.route('//patients/') +@convcheck_project def patients(project_id: str, patient_id: str = None): """ patient_id: uses the form "1,3-8,9" @@ -110,29 +120,28 @@ def patients(project_id: str, patient_id: str = None): for patient_name in name.split(","): patient_id.extend(db_action.name_to_id("Patient", patient_name)) + if isinstance(project_id, dict) and project_id.get("DB_ACTION_WARNING"): + return project_id + # pair being either True or False if pair is not None: - return [ - i.flat_dict for i in db_action.patient_pair( + action_output = db_action.patient_pair( project_id, patient_id=patient_id, pair=pair, tumor=tumor ) - ] else: - return [ - i.flat_dict for i in db_action.patients( + action_output = db_action.patients( project_id, patient_id=patient_id ) - ] - + return sanity_check("Patient", action_output) -@bp.route('//samples') -@bp.route('//samples/') -# @capitalize +@bp.route('//samples') +@bp.route('//samples/') +@convcheck_project def samples(project_id: str, sample_id: str = None): """ sample_id: uses the form "1,3-8,9", if not provides, all sample are returned @@ -153,11 +162,16 @@ def samples(project_id: str, sample_id: str = None): for sample_name in name.split(","): sample_id.extend(db_action.name_to_id("Sample", sample_name)) - return [i.flat_dict for i in db_action.samples(project_id, sample_id=sample_id)] + if isinstance(project_id, dict) and project_id.get("DB_ACTION_WARNING"): + return project_id -@bp.route('//readsets') -@bp.route('//readsets/') -# @capitalize + action_output = db_action.samples(project_id, sample_id=sample_id) + + return sanity_check("Sample", action_output) + +@bp.route('//readsets') +@bp.route('//readsets/') +@convcheck_project def readsets(project_id: str, readset_id: str=None): """ readset_id: uses the form "1,3-8,9", if not provided, all readsets are returned @@ -178,14 +192,19 @@ def readsets(project_id: str, readset_id: str=None): for readset_name in name.split(","): readset_id.extend(db_action.name_to_id("Readset", readset_name)) - return [i.flat_dict for i in db_action.readsets(project_id, readset_id=readset_id)] + if isinstance(project_id, dict) and project_id.get("DB_ACTION_WARNING"): + return project_id + + action_output = db_action.readsets(project_id, readset_id=readset_id) + return sanity_check("Readset", action_output) -@bp.route('//files/') -@bp.route('//patients//files') -@bp.route('//samples//files') -@bp.route('//readsets//files') -# @capitalize + +@bp.route('//files/') +@bp.route('//patients//files') +@bp.route('//samples//files') +@bp.route('//readsets//files') +@convcheck_project def files(project_id: str, patient_id: str=None, sample_id: str=None, readset_id: str=None, file_id: str=None): """ file_id: uses the form "1,3-8,9". Select file by ids @@ -225,8 +244,7 @@ def files(project_id: str, patient_id: str=None, sample_id: str=None, readset_id file_id = unroll(file_id) if deliverable is not None: - return [ - i.flat_dict for i in db_action.files_deliverable( + action_output = db_action.files_deliverable( project_id=project_id, patient_id=patient_id, sample_id=sample_id, @@ -234,25 +252,28 @@ def files(project_id: str, patient_id: str=None, sample_id: str=None, readset_id file_id=file_id, deliverable=deliverable ) - ] else: - return [ - i.flat_dict for i in db_action.files( + action_output = db_action.files( project_id=project_id, patient_id=patient_id, sample_id=sample_id, readset_id=readset_id, file_id=file_id ) - ] + + if isinstance(project_id, dict) and project_id.get("DB_ACTION_WARNING"): + return project_id + + return sanity_check("File", action_output) -@bp.route('//metrics', methods=['POST']) -@bp.route('//metrics/') -@bp.route('//patients//metrics') -@bp.route('//samples//metrics') -@bp.route('//readsets//metrics') -# @capitalize + +@bp.route('//metrics', methods=['POST']) +@bp.route('//metrics/') +@bp.route('//patients//metrics') +@bp.route('//samples//metrics') +@bp.route('//readsets//metrics') +@convcheck_project def metrics(project_id: str, patient_id: str=None, sample_id: str=None, readset_id: str=None, metric_id: str=None): """ metric_id: uses the form "1,3-8,9". Select metric by ids @@ -313,8 +334,7 @@ def metrics(project_id: str, patient_id: str=None, sample_id: str=None, readset_ metric_id = unroll(metric_id) if deliverable is not None: - return [ - i.flat_dict for i in db_action.metrics_deliverable( + action_output = db_action.metrics_deliverable( project_id=project_id, patient_id=patient_id, sample_id=sample_id, @@ -322,24 +342,26 @@ def metrics(project_id: str, patient_id: str=None, sample_id: str=None, readset_ metric_id=metric_id, deliverable=deliverable ) - ] else: - return [ - i.flat_dict for i in db_action.metrics( + action_output = db_action.metrics( project_id=project_id, patient_id=patient_id, sample_id=sample_id, readset_id=readset_id, metric_id=metric_id ) - ] -@bp.route('//samples//readsets') -# @capitalize + if isinstance(project_id, dict) and project_id.get("DB_ACTION_WARNING"): + return project_id + + return sanity_check("Metric", action_output) + +@bp.route('//samples//readsets') +@convcheck_project def readsets_from_samples(project_id: str, sample_id: str): """ sample_id: uses the form "1,3-8,9" - return: readsets for slected sample_id + return: readsets for selected sample_id """ query = request.args @@ -355,11 +377,16 @@ def readsets_from_samples(project_id: str, sample_id: str): for sample_name in name.split(","): sample_id.extend(db_action.name_to_id("Sample", sample_name)) - return [i.flat_dict for i in db_action.readsets(project_id, sample_id)] + if isinstance(project_id, dict) and project_id.get("DB_ACTION_WARNING"): + return project_id + action_output = db_action.readsets(project_id, sample_id) -@bp.route('//digest_readset_file', methods=['POST']) -# @capitalize + return sanity_check("Metric", action_output) + + +@bp.route('//digest_readset_file', methods=['POST']) +@convcheck_project def digest_readset_file(project_id: str): """ POST: list of Readset/Sample Name or id @@ -373,10 +400,14 @@ def digest_readset_file(project_id: str): flash('Data does not seems to be json') return redirect(request.url) + if isinstance(project_id, dict) and project_id.get("DB_ACTION_WARNING"): + return project_id + return db_action.digest_readset_file(project_id=project_id, digest_data=ingest_data) -@bp.route('//digest_pair_file', methods=['POST']) -# @capitalize + +@bp.route('//digest_pair_file', methods=['POST']) +@convcheck_project def digest_pair_file(project_id: str): """ POST: list of Readset/Sample Name or id @@ -390,16 +421,21 @@ def digest_pair_file(project_id: str): flash('Data does not seems to be json') return redirect(request.url) + if isinstance(project_id, dict) and project_id.get("DB_ACTION_WARNING"): + return project_id + return db_action.digest_pair_file(project_id=project_id, digest_data=ingest_data) -@bp.route('//ingest_run_processing', methods=['GET', 'POST']) -# @capitalize + +@bp.route('//ingest_run_processing', methods=['GET', 'POST']) +@convcheck_project def ingest_run_processing(project_id: str): """ - POST: json describing run processing + POST: json describing run processing return: The Operation object """ + # Is this if required? if request.method == 'GET': return abort( 405, @@ -413,22 +449,23 @@ def ingest_run_processing(project_id: str): flash('Data does not seems to be json') return redirect(request.url) + if isinstance(project_id, dict) and project_id.get("DB_ACTION_WARNING"): + return project_id + project_id_from_name = db_action.name_to_id("Project", ingest_data[vc.PROJECT_NAME].upper()) - if [int(project_id)] != project_id_from_name: - return abort( - 400, - f"project name in POST {ingest_data[vc.PROJECT_NAME].upper()} not Valid" - ) + + if project_id != project_id_from_name: + return {"DB_ACTION_WARNING": f"Requested Project {project_id_from_name} in the input json is not matching the Project in the route {project_id}"} return [i.flat_dict for i in db_action.ingest_run_processing(project_id=project_id, ingest_data=ingest_data)] -@bp.route('//ingest_transfer', methods=['POST']) -# @capitalize +@bp.route('//ingest_transfer', methods=['POST']) +@convcheck_project def ingest_transfer(project_id: str): """ - Add new location to file that has already been moved before - the db was created + POST: json describing a transfer + return: The Operation object """ if request.method == 'POST': try: @@ -437,16 +474,20 @@ def ingest_transfer(project_id: str): flash('Data does not seems to be json') return redirect(request.url) + if isinstance(project_id, dict) and project_id.get("DB_ACTION_WARNING"): + return project_id + return [i.flat_dict for i in db_action.ingest_transfer(project_id=project_id, ingest_data=ingest_data)] -@bp.route('//ingest_genpipes', methods=['GET', 'POST']) -# @capitalize +@bp.route('//ingest_genpipes', methods=['GET', 'POST']) +@convcheck_project def ingest_genpipes(project_id: str): """ - POST: json describing genpipes + POST: json describing genpipes return: The Operation object and Jobs associated """ + # Is this if required? if request.method == 'GET': return abort( 405, @@ -460,25 +501,26 @@ def ingest_genpipes(project_id: str): flash('Data does not seems to be json') return redirect(request.url) + if isinstance(project_id, dict) and project_id.get("DB_ACTION_WARNING"): + return project_id + project_id_from_name = db_action.name_to_id("Project", ingest_data[vc.PROJECT_NAME].upper()) - if [int(project_id)] != project_id_from_name: - return abort( - 400, - f"project name in POST {ingest_data[vc.PROJECT_NAME].upper()} not in the database, {project_id} required" - ) + + if project_id != project_id_from_name: + return {"DB_ACTION_WARNING": f"Requested Project {project_id_from_name} in the input json is not matching the Project in the route {project_id}"} output = db_action.ingest_genpipes(project_id=project_id, ingest_data=ingest_data) operation = output[0].flat_dict jobs = [job.flat_dict for job in output[1]] return [operation, jobs] -@bp.route('//digest_unanalyzed', methods=['POST']) +@bp.route('//digest_unanalyzed', methods=['POST']) +@convcheck_project def digest_unanalyzed(project_id: str): """ POST: list of Readset/Sample Name or id return: Readsets or Samples unanalyzed """ - logger.debug(f"\n\n{project_id}\n\n") if request.method == 'POST': try: ingest_data = request.get_json(force=True) @@ -486,5 +528,7 @@ def digest_unanalyzed(project_id: str): flash('Data does not seems to be json') return redirect(request.url) + if isinstance(project_id, dict) and project_id.get("DB_ACTION_WARNING"): + return project_id + return db_action.digest_unanalyzed(project_id=project_id, digest_data=ingest_data) - # return [i.flat_dict for i in db_action.digest_unanalyzed(project_id=project_id, digest_data=ingest_data)] diff --git a/project_tracking/db_action.py b/project_tracking/db_action.py index 6dee400..4f3ee18 100644 --- a/project_tracking/db_action.py +++ b/project_tracking/db_action.py @@ -91,17 +91,8 @@ def projects(project_id=None, session=None): select(Project) .where(Project.id.in_(project_id)) ) - else: - all_available = [f"id: {project.id}, name: {project.name}" for project in session.scalars(select(Project)).unique().all()] - raise DidNotFindError(f"Requested Project doesn't exist. Please try again with one of the following: {all_available}") - - ret = session.scalars(stmt).unique().all() - - if not ret: - all_available = [f"id: {project.id}, name: {project.name}" for project in session.scalars(select(Project)).unique().all()] - raise DidNotFindError(f"Requested Project doesn't exist. Please try again with one of the following: {all_available}") - return ret + return session.scalars(stmt).unique().all() def metrics_deliverable(project_id: str, deliverable: bool, patient_id=None, sample_id=None, readset_id=None, metric_id=None): """ @@ -469,14 +460,22 @@ def patients(project_id=None, patient_id=None): project_id = [project_id] if project_id is None and patient_id is None: - stmt = (select(Patient)) + stmt = select(Patient) elif patient_id is None and project_id: - stmt = (select(Patient).join(Patient.project).where(Project.id.in_(project_id))) + stmt = ( + select(Patient) + .join(Patient.project) + .where(Project.id.in_(project_id)) + ) else: if isinstance(patient_id, int): patient_id = [patient_id] - stmt = (select(Patient).where(Patient.id.in_(patient_id)) - .where(Project.id.in_(project_id))) + stmt = ( + select(Patient) + .where(Patient.id.in_(patient_id)) + .join(Patient.project) + .where(Project.id.in_(project_id)) + ) return session.scalars(stmt).unique().all() @@ -488,20 +487,26 @@ def samples(project_id=None, sample_id=None): session = database.get_session() if isinstance(project_id, str): project_id = [project_id] + if project_id is None: stmt = (select(Sample)) elif sample_id is None: - stmt = (select(Sample).join(Sample.patient).join(Patient.project) - .where(Project.id.in_(project_id))) + stmt = ( + select(Sample) + .join(Sample.patient) + .join(Patient.project) + .where(Project.id.in_(project_id)) + ) else: if isinstance(sample_id, int): sample_id = [sample_id] - stmt = (select(Sample) - .where(Sample.id.in_(sample_id)) - .join(Sample.patient) - .join(Patient.project) - .where(Project.id.in_(project_id)) - ) + stmt = ( + select(Sample) + .where(Sample.id.in_(sample_id)) + .join(Sample.patient) + .join(Patient.project) + .where(Project.id.in_(project_id)) + ) return session.scalars(stmt).unique().all() @@ -750,11 +755,14 @@ def digest_readset_file(project_id: str, digest_data, session=None): samples = [] readsets = [] output = [] + errors = { + "DB_ACTION_WARNING": [] + } + + location_endpoint = None if vb.LOCATION_ENDPOINT in digest_data.keys(): location_endpoint = digest_data[vb.LOCATION_ENDPOINT] - else: - location_endpoint = None if vb.SAMPLE_NAME in digest_data.keys(): for sample_name in digest_data[vb.SAMPLE_NAME]: @@ -792,9 +800,13 @@ def digest_readset_file(project_id: str, digest_data, session=None): raise DidNotFindError(table="Readset", attribute="id", query=readset_id) if readsets: set(readsets) - readset_files = [] for readset in readsets: - bed = "" + readset_files = [] + logger.debug(f"\n\n{readset}\n\n") + bed = None + fastq1 = None + fastq2 = None + bam = None for operation in [operation for operation in readset.operations if operation.name == 'run_processing']: for job in operation.jobs: for file in job.files: @@ -802,33 +814,27 @@ def digest_readset_file(project_id: str, digest_data, session=None): readset_files.append(file) for file in readset_files: if file.type in ["fastq", "fq", "fq.gz", "fastq.gz"]: - bam = "" if file.extra_metadata["read_type"] == "R1": if location_endpoint: for location in file.locations: if location_endpoint == location.endpoint: fastq1 = location.uri.split("://")[-1] - else: - fastq1 = file.locations[-1].uri.split("://")[-1] + if not fastq1: + errors["DB_ACTION_WARNING"].append(f"Looking for fastq R1 file for Sample {readset.sample.name} and Readset {readset.name} in '{location_endpoint}', file only exists on {[l.endpoint for l in file.locations]} system") elif file.extra_metadata["read_type"] == "R2": if location_endpoint: for location in file.locations: if location_endpoint == location.endpoint: - fastq1 = location.uri.split("://")[-1] - else: - fastq1 = file.locations[-1].uri.split("://")[-1] + fastq2 = location.uri.split("://")[-1] + if not fastq2: + errors["DB_ACTION_WARNING"].append(f"Looking for fastq R2 file for Sample {readset.sample.name} and Readset {readset.name} in '{location_endpoint}', file only exists on {[l.endpoint for l in file.locations]} system") elif file.type == "bam": - # bam = "" if location_endpoint: for location in file.locations: if location_endpoint == location.endpoint: bam = location.uri.split("://")[-1] if not bam: - raise DidNotFindError(f"looking for bam file for Sample {readset.sample.name} and Readset {readset.name} in '{location_endpoint}', file only exists on {[l.endpoint for l in file.locations]} system") - else: - bam = file.locations[-1].uri.split("://")[-1] - fastq1 = "" - fastq2 = "" + errors["DB_ACTION_WARNING"].append(f"Looking for bam file for Sample {readset.sample.name} and Readset {readset.name} in '{location_endpoint}', file only exists on {[l.endpoint for l in file.locations]} system") if file.type == "bed": bed = file.name readset_line = { @@ -847,7 +853,11 @@ def digest_readset_file(project_id: str, digest_data, session=None): "BAM": bam } output.append(readset_line) - return json.dumps(output) + if errors["DB_ACTION_WARNING"]: + ret = errors + else: + ret = output + return json.dumps(ret) def digest_pair_file(project_id: str, digest_data, session=None): """Digesting pair file fields for GenPipes""" From 83bc30cb571cb3e59d239a67fd572e4577ce75af Mon Sep 17 00:00:00 2001 From: Paul Stretenowich Date: Tue, 31 Oct 2023 11:48:24 -0400 Subject: [PATCH 05/19] Debug pytest --- tests/test_ingestion.py | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/tests/test_ingestion.py b/tests/test_ingestion.py index c19f81e..a92047b 100644 --- a/tests/test_ingestion.py +++ b/tests/test_ingestion.py @@ -14,13 +14,11 @@ def test_create_api(client, run_processing_json, app): project_name = run_processing_json[vb.PROJECT_NAME] - project_id = "1" response = client.get(f'admin/create_project/{project_name}') assert response.status_code == 200 assert json.loads(response.data)['name'] == f"{project_name}" assert json.loads(response.data)['id'] == 1 - # project_id = db_action.name_to_id("Project", project_name) - response = client.post(f'project/{project_id}/ingest_run_processing', data=json.dumps(run_processing_json)) + response = client.post(f'project/{project_name}/ingest_run_processing', data=json.dumps(run_processing_json)) assert response.status_code == 200 assert json.loads(response.data)[0]['name'] == "run_processing" assert json.loads(response.data)[0]['id'] == 1 From 84bbdc07583b11708c2404a60f220a140b1539d5 Mon Sep 17 00:00:00 2001 From: Paul Stretenowich Date: Thu, 2 Nov 2023 15:48:11 -0400 Subject: [PATCH 06/19] Adding if to avoid failure if project_name in json set to null --- project_tracking/api/project.py | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/project_tracking/api/project.py b/project_tracking/api/project.py index 266777d..6e2d1b3 100644 --- a/project_tracking/api/project.py +++ b/project_tracking/api/project.py @@ -452,10 +452,11 @@ def ingest_run_processing(project_id: str): if isinstance(project_id, dict) and project_id.get("DB_ACTION_WARNING"): return project_id - project_id_from_name = db_action.name_to_id("Project", ingest_data[vc.PROJECT_NAME].upper()) - if project_id != project_id_from_name: - return {"DB_ACTION_WARNING": f"Requested Project {project_id_from_name} in the input json is not matching the Project in the route {project_id}"} + if ingest_data[vc.PROJECT_NAME]: + project_id_from_name = db_action.name_to_id("Project", ingest_data[vc.PROJECT_NAME].upper()) + if project_id != project_id_from_name: + return {"DB_ACTION_WARNING": f"Requested Project {project_id_from_name} in the input json is not matching the Project in the route {project_id}"} return [i.flat_dict for i in db_action.ingest_run_processing(project_id=project_id, ingest_data=ingest_data)] @@ -504,10 +505,11 @@ def ingest_genpipes(project_id: str): if isinstance(project_id, dict) and project_id.get("DB_ACTION_WARNING"): return project_id - project_id_from_name = db_action.name_to_id("Project", ingest_data[vc.PROJECT_NAME].upper()) - if project_id != project_id_from_name: - return {"DB_ACTION_WARNING": f"Requested Project {project_id_from_name} in the input json is not matching the Project in the route {project_id}"} + if ingest_data[vc.PROJECT_NAME]: + project_id_from_name = db_action.name_to_id("Project", ingest_data[vc.PROJECT_NAME].upper()) + if project_id != project_id_from_name: + return {"DB_ACTION_WARNING": f"Requested Project {project_id_from_name} in the input json is not matching the Project in the route {project_id}"} output = db_action.ingest_genpipes(project_id=project_id, ingest_data=ingest_data) operation = output[0].flat_dict From 85e6a2feacba838639895ebf6a9ec873d292c0cc Mon Sep 17 00:00:00 2001 From: Paul Stretenowich Date: Thu, 9 Nov 2023 14:55:32 -0500 Subject: [PATCH 07/19] Small fix --- project_tracking/api/project.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/project_tracking/api/project.py b/project_tracking/api/project.py index 6e2d1b3..2e29d5c 100644 --- a/project_tracking/api/project.py +++ b/project_tracking/api/project.py @@ -454,7 +454,7 @@ def ingest_run_processing(project_id: str): if ingest_data[vc.PROJECT_NAME]: - project_id_from_name = db_action.name_to_id("Project", ingest_data[vc.PROJECT_NAME].upper()) + project_id_from_name = str(db_action.name_to_id("Project", ingest_data[vc.PROJECT_NAME].upper())[0]) if project_id != project_id_from_name: return {"DB_ACTION_WARNING": f"Requested Project {project_id_from_name} in the input json is not matching the Project in the route {project_id}"} @@ -507,7 +507,7 @@ def ingest_genpipes(project_id: str): if ingest_data[vc.PROJECT_NAME]: - project_id_from_name = db_action.name_to_id("Project", ingest_data[vc.PROJECT_NAME].upper()) + project_id_from_name = str(db_action.name_to_id("Project", ingest_data[vc.PROJECT_NAME].upper())[0]) if project_id != project_id_from_name: return {"DB_ACTION_WARNING": f"Requested Project {project_id_from_name} in the input json is not matching the Project in the route {project_id}"} From e6389d2e8ea9bb94e4229580075cc8329addc04b Mon Sep 17 00:00:00 2001 From: Paul Stretenowich Date: Thu, 9 Nov 2023 15:04:45 -0500 Subject: [PATCH 08/19] Need to convert project_id from name_to_id from list of int into str --- project_tracking/api/project.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/project_tracking/api/project.py b/project_tracking/api/project.py index 2e29d5c..c9ceb47 100644 --- a/project_tracking/api/project.py +++ b/project_tracking/api/project.py @@ -43,7 +43,7 @@ def wrap(*args, project=None, **kwargs): all_available = [f"id: {project.id}, name: {project.name}" for project in db_action.projects()] project_id = {"DB_ACTION_WARNING": f"Requested Project '{project}' doesn't exist. Please try again with one of the following: {all_available}"} else: - project_id = db_action.name_to_id("Project", project.upper()) + project_id = str(db_action.name_to_id("Project", project.upper())[0]) if not project_id: all_available = [f"id: {project.id}, name: {project.name}" for project in db_action.projects()] project_id = {"DB_ACTION_WARNING": f"Requested Project '{project}' doesn't exist. Please try again with one of the following: {all_available}"} From d935efd0f746e9aa5602d5a4ee11dfae28b05bd2 Mon Sep 17 00:00:00 2001 From: Paul Stretenowich Date: Mon, 20 Nov 2023 15:44:10 -0500 Subject: [PATCH 09/19] Adding a new attribute for experiment: nucleic_acid_type --- project_tracking/db_action.py | 169 +++++++++++++++++++++++++++++---- project_tracking/model.py | 25 ++++- project_tracking/vocabulary.py | 2 + tests/conftest.py | 2 +- tests/data/run_processing.json | 5 + tests/test_serialization.py | 3 +- 6 files changed, 182 insertions(+), 24 deletions(-) diff --git a/project_tracking/db_action.py b/project_tracking/db_action.py index 4f3ee18..a163a82 100644 --- a/project_tracking/db_action.py +++ b/project_tracking/db_action.py @@ -53,13 +53,22 @@ def to_dict(self): return rv class DidNotFindError(Error): - """DidNotFind""" + """DidNotFindError""" def __init__(self, message=None, table=None, attribute=None, query=None): super().__init__(message) if message: self.message = message else: - self.message = f"{table} with {attribute} {query} doesn't exist on database" + self.message = f"'{table}' with '{attribute}' '{query}' doesn't exist on database" + +class RequestError(Error): + """RequestError""" + def __init__(self, message=None, argument=None): + super().__init__(message) + if message: + self.message = message + else: + self.message = f"For current request '{argument}' is required" def name_to_id(model_class, name, session=None): """ @@ -590,6 +599,7 @@ def ingest_run_processing(project_id: str, ingest_data, session=None): experiment = Experiment.from_attributes( sequencing_technology=readset_json[vb.EXPERIMENT_SEQUENCING_TECHNOLOGY], type=readset_json[vb.EXPERIMENT_TYPE], + nucleic_acid_type=readset_json[vb.EXPERIMENT_NUCLEIC_ACID_TYPE], library_kit=readset_json[vb.EXPERIMENT_LIBRARY_KIT], kit_expiration_date=kit_expiration_date, session=session @@ -752,6 +762,7 @@ def digest_readset_file(project_id: str, digest_data, session=None): if not session: session = database.get_session() + patients = [] samples = [] readsets = [] output = [] @@ -760,13 +771,59 @@ def digest_readset_file(project_id: str, digest_data, session=None): } location_endpoint = None - if vb.LOCATION_ENDPOINT in digest_data.keys(): location_endpoint = digest_data[vb.LOCATION_ENDPOINT] + if vb.EXPERIMENT_NUCLEIC_ACID_TYPE in digest_data.keys(): + nucleic_acid_type = digest_data[vb.EXPERIMENT_NUCLEIC_ACID_TYPE] + else: + raise RequestError(argument="experiment_nucleic_acid_type") + + if vb.PATIENT_NAME in digest_data.keys(): + for patient_name in digest_data[vb.PATIENT_NAME]: + patient = session.scalars( + select(Patient) + .where(Patient.name == patient_name) + .join(Patient.samples) + .join(Sample.readsets) + .join(Readset.experiment) + .where(Experiment.nucleic_acid_type == nucleic_acid_type) + ).unique().first() + if patient: + patients.append(patient) + else: + raise DidNotFindError(table="Patient", attribute="name", query=patient_name) + if vb.PATIENT_ID in digest_data.keys(): + for patient_id in digest_data[vb.PATIENT_ID]: + # logger.debug(f"\n\n{patient_id}\n\n") + patient = session.scalars( + select(Patient) + .where(Patient.id == patient_id) + .join(Patient.samples) + .join(Sample.readsets) + .join(Readset.experiment) + .where(Experiment.nucleic_acid_type == nucleic_acid_type) + ).unique().first() + if patient: + patients.append(patient) + else: + raise DidNotFindError(table="Patient", attribute="id", query=patient_id) + if patients: + set(patients) + for patient in patients: + for sample in patient.samples: + for readset in sample.readsets: + readsets.append(readset) + if vb.SAMPLE_NAME in digest_data.keys(): for sample_name in digest_data[vb.SAMPLE_NAME]: - sample = session.scalars(select(Sample).where(Sample.name == sample_name)).unique().first() + sample = session.scalars( + select(Sample) + .where(Sample.name == sample_name) + .join(Sample.readsets) + .join(Readset.experiment) + .where(Experiment.nucleic_acid_type == nucleic_acid_type) + ).unique().first() if sample: samples.append(sample) else: @@ -774,7 +831,13 @@ def digest_readset_file(project_id: str, digest_data, session=None): if vb.SAMPLE_ID in digest_data.keys(): for sample_id in digest_data[vb.SAMPLE_ID]: # logger.debug(f"\n\n{sample_id}\n\n") - sample = session.scalars(select(Sample).where(Sample.id == sample_id)).unique().first() + sample = session.scalars( + select(Sample) + .where(Sample.id == sample_id) + .join(Sample.readsets) + .join(Readset.experiment) + .where(Experiment.nucleic_acid_type == nucleic_acid_type) + ).unique().first() if sample: samples.append(sample) else: @@ -784,16 +847,27 @@ def digest_readset_file(project_id: str, digest_data, session=None): for sample in samples: for readset in sample.readsets: readsets.append(readset) + if vb.READSET_NAME in digest_data.keys(): for readset_name in digest_data[vb.READSET_NAME]: - readset = session.scalars(select(Readset).where(Readset.name == readset_name)).unique().first() + readset = session.scalars( + select(Readset) + .where(Readset.name == readset_name) + .join(Readset.experiment) + .where(Experiment.nucleic_acid_type == nucleic_acid_type) + ).unique().first() if readset: readsets.append(readset) else: raise DidNotFindError(table="Readset", attribute="name", query=readset_name) if vb.READSET_ID in digest_data.keys(): for readset_id in digest_data[vb.READSET_ID]: - readset = session.scalars(select(Readset).where(Readset.id == readset_id)).unique().first() + readset = session.scalars( + select(Readset) + .where(Readset.id == readset_id) + .join(Readset.experiment) + .where(Experiment.nucleic_acid_type == nucleic_acid_type) + ).unique().first() if readset: readsets.append(readset) else: @@ -802,7 +876,6 @@ def digest_readset_file(project_id: str, digest_data, session=None): set(readsets) for readset in readsets: readset_files = [] - logger.debug(f"\n\n{readset}\n\n") bed = None fastq1 = None fastq2 = None @@ -869,35 +942,93 @@ def digest_pair_file(project_id: str, digest_data, session=None): # readsets = [] output = [] + if vb.EXPERIMENT_NUCLEIC_ACID_TYPE in digest_data.keys(): + nucleic_acid_type = digest_data[vb.EXPERIMENT_NUCLEIC_ACID_TYPE] + else: + raise RequestError(argument="experiment_nucleic_acid_type") + + if vb.PATIENT_NAME in digest_data.keys(): + for patient_name in digest_data[vb.PATIENT_NAME]: + patient = session.scalars( + select(Patient) + .where(Patient.name == patient_name) + .join(Patient.samples) + .join(Sample.readsets) + .join(Readset.experiment) + .where(Experiment.nucleic_acid_type == nucleic_acid_type) + ).unique().first() + if patient: + patients.append(patient) + else: + raise DidNotFindError(table="Patient", attribute="name", query=patient_name) + if vb.PATIENT_ID in digest_data.keys(): + for patient_id in digest_data[vb.PATIENT_ID]: + patient = session.scalars( + select(Patient) + .where(Patient.id == patient_id) + .join(Patient.samples) + .join(Sample.readsets) + .join(Readset.experiment) + .where(Experiment.nucleic_acid_type == nucleic_acid_type) + ).unique().first() + if patient: + patients.append(patient) + else: + raise DidNotFindError(table="Patient", attribute="id", query=patient_id) + if patients: + set(patients) + for patient in patients: + for sample in patient.samples: + samples.append(sample) + if vb.SAMPLE_NAME in digest_data.keys(): for sample_name in digest_data[vb.SAMPLE_NAME]: - sample = session.scalars(select(Sample).where(Sample.name == sample_name)).unique().first() - # logger.info(f"\n\n{sample}\n\n") + sample = session.scalars( + select(Sample) + .where(Sample.name == sample_name) + .join(Sample.readsets) + .join(Readset.experiment) + .where(Experiment.nucleic_acid_type == nucleic_acid_type) + ).unique().first() if sample: samples.append(sample) else: raise DidNotFindError(table="Sample", attribute="name", query=sample_name) if vb.SAMPLE_ID in digest_data.keys(): for sample_id in digest_data[vb.SAMPLE_ID]: - sample = session.scalars(select(Sample).where(Sample.id == sample_id)).unique().first() + sample = session.scalars( + select(Sample) + .where(Sample.id == sample_id) + .join(Sample.readsets) + .join(Readset.experiment) + .where(Experiment.nucleic_acid_type == nucleic_acid_type) + ).unique().first() if sample: samples.append(sample) else: raise DidNotFindError(table="Sample", attribute="id", query=sample_id) if vb.READSET_NAME in digest_data.keys(): for readset_name in digest_data[vb.READSET_NAME]: - readset = session.scalars(select(Readset).where(Readset.name == readset_name)).unique().first() + readset = session.scalars( + select(Readset) + .where(Readset.name == readset_name) + .join(Readset.experiment) + .where(Experiment.nucleic_acid_type == nucleic_acid_type) + ).unique().first() if readset: samples.append(readset.sample) - # readsets.append(readset) else: raise DidNotFindError(table="Readset", attribute="name", query=readset_name) if vb.READSET_ID in digest_data.keys(): for readset_id in digest_data[vb.READSET_ID]: - readset = session.scalars(select(Readset).where(Readset.id == readset_id)).unique().first() + readset = session.scalars( + select(Readset) + .where(Readset.id == readset_id) + .join(Readset.experiment) + .where(Experiment.nucleic_acid_type == nucleic_acid_type) + ).unique().first() if readset: samples.append(readset.sample) - # readsets.append(readset) else: raise DidNotFindError(table="Readset", attribute="id", query=readset_id) if samples: @@ -1066,7 +1197,7 @@ def digest_unanalyzed(project_id: str, digest_data, session=None): run_name = digest_data["run_name"] if run_name: run_id = name_to_id("Run", run_name)[0] - experiment_sequencing_technology = digest_data["experiment_sequencing_technology"] + experiment_nucleic_acid_type = digest_data["experiment_nucleic_acid_type"] location_endpoint = digest_data["location_endpoint"] if sample_name_flag: @@ -1096,17 +1227,15 @@ def digest_unanalyzed(project_id: str, digest_data, session=None): stmt.where(Run.id == run_id) .join(Readset.run) ) - if experiment_sequencing_technology: + if experiment_nucleic_acid_type: stmt = ( - stmt.where(Experiment.sequencing_technology == experiment_sequencing_technology) + stmt.where(Experiment.nucleic_acid_type == experiment_nucleic_acid_type) .join(Readset.experiment) ) - # logger.debug(f"\n\n{stmt}\n\n") output = { "location_endpoint": location_endpoint, key: session.scalars(stmt).unique().all() } - # logger.debug(f"\n\n{session.scalars(stmt).unique().all()}\n\n") return json.dumps(output) diff --git a/project_tracking/model.py b/project_tracking/model.py index 2b74951..8ad1ee6 100644 --- a/project_tracking/model.py +++ b/project_tracking/model.py @@ -35,6 +35,12 @@ from . import database +class NucleicAcidTypeEnum(enum.Enum): + """nucleic_acid_type enum""" + DNA = "DNA" + RNA = "RNA" + + class LaneEnum(enum.Enum): """ lane enum @@ -43,6 +49,10 @@ class LaneEnum(enum.Enum): TWO = "2" THREE = "3" FOUR = "4" + FIVE = "5" + SIX = "6" + SEVEN = "7" + EIGHT = "8" class SequencingTypeEnum(enum.Enum): @@ -90,6 +100,7 @@ class Base(DeclarativeBase): # this is needed for the enum to work properly right now # see https://github.com/sqlalchemy/sqlalchemy/discussions/8856 type_annotation_map = { + NucleicAcidTypeEnum: Enum(NucleicAcidTypeEnum), LaneEnum: Enum(LaneEnum), SequencingTypeEnum: Enum(SequencingTypeEnum), StatusEnum: Enum(StatusEnum), @@ -346,6 +357,7 @@ class Experiment(BaseTable): id integer [PK] sequencing_technology text type text + nucleic_acid_type nucleic_acid_type library_kit text kit_expiration_date text deprecated boolean @@ -358,13 +370,22 @@ class Experiment(BaseTable): sequencing_technology: Mapped[str] = mapped_column(default=None, nullable=True) type: Mapped[str] = mapped_column(default=None, nullable=True) + nucleic_acid_type: Mapped[NucleicAcidTypeEnum] = mapped_column(default=None, nullable=False) library_kit: Mapped[str] = mapped_column(default=None, nullable=True) kit_expiration_date: Mapped[datetime] = mapped_column(default=None, nullable=True) readsets: Mapped[list["Readset"]] = relationship(back_populates="experiment") @classmethod - def from_attributes(cls, sequencing_technology=None, type=None, library_kit=None, kit_expiration_date=None, session=None): + def from_attributes( + cls, + nucleic_acid_type, + sequencing_technology=None, + type=None, + library_kit=None, + kit_expiration_date=None, + session=None + ): """ get experiment if it exist, set it if it does not exist """ @@ -374,6 +395,7 @@ def from_attributes(cls, sequencing_technology=None, type=None, library_kit=None select(cls) .where(cls.sequencing_technology == sequencing_technology) .where(cls.type == type) + .where(cls.nucleic_acid_type == nucleic_acid_type) .where(cls.library_kit == library_kit) .where(cls.kit_expiration_date == kit_expiration_date) ).first() @@ -381,6 +403,7 @@ def from_attributes(cls, sequencing_technology=None, type=None, library_kit=None experiment = cls( sequencing_technology=sequencing_technology, type=type, + nucleic_acid_type=nucleic_acid_type, library_kit=library_kit, kit_expiration_date=kit_expiration_date ) diff --git a/project_tracking/vocabulary.py b/project_tracking/vocabulary.py index 90d8cfc..28920d5 100644 --- a/project_tracking/vocabulary.py +++ b/project_tracking/vocabulary.py @@ -10,6 +10,7 @@ # patient table PATIENT = "patient" +PATIENT_ID = "patient_id" PATIENT_FMS_ID = "patient_fms_id" PATIENT_NAME = "patient_name" PATIENT_COHORT = "patient_cohort" @@ -25,6 +26,7 @@ # experiment table EXPERIMENT_SEQUENCING_TECHNOLOGY = "experiment_sequencing_technology" EXPERIMENT_TYPE = "experiment_type" +EXPERIMENT_NUCLEIC_ACID_TYPE = "experiment_nucleic_acid_type" EXPERIMENT_LIBRARY_KIT = "experiment_library_kit" EXPERIMENT_KIT_EXPIRATION_DATE = "experiment_kit_expiration_date" EXPERIMENT_TYPE_LIST = ["PCR-FREE", "RNASEQ"] diff --git a/tests/conftest.py b/tests/conftest.py index 5a9d125..99b407a 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -28,7 +28,7 @@ def pre_filled_model(): project=project) sequencing_technology = 'Fancy Buzzword' - exp = model.Experiment(sequencing_technology=sequencing_technology) + exp = model.Experiment(nucleic_acid_type=model.NucleicAcidTypeEnum.DNA) pa_name = "P_O" pa = model.Patient(name=pa_name, project=project) sa_name = 'gros_bobo' diff --git a/tests/data/run_processing.json b/tests/data/run_processing.json index 4ff31b8..5e85ebd 100644 --- a/tests/data/run_processing.json +++ b/tests/data/run_processing.json @@ -21,6 +21,7 @@ { "experiment_sequencing_technology": null, "experiment_type": "PCR-free", + "experiment_nucleic_acid_type": "DNA", "experiment_library_kit": null, "experiment_kit_expiration_date": null, "readset_name": "MoHQ-JG-9-23-15000863775-19933DT.A01433_0157_1", @@ -73,6 +74,7 @@ { "experiment_sequencing_technology": null, "experiment_type": "PCR-free", + "experiment_nucleic_acid_type": "DNA", "experiment_library_kit": null, "experiment_kit_expiration_date": null, "readset_name": "MoHQ-JG-9-23-15000936286-19866DN.A01433_0157_2", @@ -133,6 +135,7 @@ { "experiment_sequencing_technology": null, "experiment_type": "RNASeq", + "experiment_nucleic_acid_type": "RNA", "experiment_library_kit": null, "experiment_kit_expiration_date": null, "readset_name": "MoHQ-CM-1-3-6929-1RT.A01433_0157_3", @@ -191,6 +194,7 @@ { "experiment_sequencing_technology": null, "experiment_type": "PCR-free", + "experiment_nucleic_acid_type": "DNA", "experiment_library_kit": null, "experiment_kit_expiration_date": null, "readset_name": "MoHQ-CM-1-3-15000863775-19933DT.A01433_0157_1", @@ -243,6 +247,7 @@ { "experiment_sequencing_technology": null, "experiment_type": "PCR-free", + "experiment_nucleic_acid_type": "DNA", "experiment_library_kit": null, "experiment_kit_expiration_date": null, "readset_name": "MoHQ-CM-1-3-15000936286-19866DN.A01433_0157_2", diff --git a/tests/test_serialization.py b/tests/test_serialization.py index fa75b15..473c8c9 100644 --- a/tests/test_serialization.py +++ b/tests/test_serialization.py @@ -8,7 +8,6 @@ def test_serialization(not_app_db): op_config_version = 0.1 op_config_name = 'generic_index' op_name = 'ingest' - sequencing_technology = 'Fancy Buzzword' pa_name = "P_O" sa_name = 'gros_bobo' ru_name = "cure the Conglomerat old director's partner 01" @@ -28,7 +27,7 @@ def test_serialization(not_app_db): operation_config=op_c, project=project) - exp = model.Experiment(sequencing_technology=sequencing_technology) + exp = model.Experiment(nucleic_acid_type=model.NucleicAcidTypeEnum.DNA) pa = model.Patient(name=pa_name, project=project) sa = model.Sample(name=sa_name, patient=pa) ru = model.Run(instrument=instrument, name=ru_name) From dae290ac167c9619961404e510088e6755a8a190 Mon Sep 17 00:00:00 2001 From: Paul Stretenowich Date: Tue, 21 Nov 2023 13:59:31 -0500 Subject: [PATCH 10/19] Debug --- project_tracking/db_action.py | 1 + 1 file changed, 1 insertion(+) diff --git a/project_tracking/db_action.py b/project_tracking/db_action.py index a163a82..c293589 100644 --- a/project_tracking/db_action.py +++ b/project_tracking/db_action.py @@ -939,6 +939,7 @@ def digest_pair_file(project_id: str, digest_data, session=None): pair_dict = {} samples = [] + patients = [] # readsets = [] output = [] From 66e19443dca7d17c71c0e9a74c6e391925370d6a Mon Sep 17 00:00:00 2001 From: Paul Stretenowich Date: Tue, 21 Nov 2023 16:41:30 -0500 Subject: [PATCH 11/19] Consolidating docstring + adding experiment_nucleic_acid_type to digest_unanalyzed output --- project_tracking/api/project.py | 32 ++++++++++++++++---------------- project_tracking/db_action.py | 1 + 2 files changed, 17 insertions(+), 16 deletions(-) diff --git a/project_tracking/api/project.py b/project_tracking/api/project.py index c9ceb47..6591009 100644 --- a/project_tracking/api/project.py +++ b/project_tracking/api/project.py @@ -81,8 +81,8 @@ def projects(project_id: str = None): @convcheck_project def patients(project_id: str, patient_id: str = None): """ - patient_id: uses the form "1,3-8,9" - return: list all patient or selected patient that are also par of + patient_id: uses the form "1,3-8,9", if not provided all patients are returned + return: list all patients or selected patients, belonging to Query: (pair, tumor): Default (None, true) @@ -144,8 +144,8 @@ def patients(project_id: str, patient_id: str = None): @convcheck_project def samples(project_id: str, sample_id: str = None): """ - sample_id: uses the form "1,3-8,9", if not provides, all sample are returned - return: all or selected sample that are in sample_id and part of project + sample_id: uses the form "1,3-8,9", if not provided all samples are returned + return: list all patients or selected samples, belonging to """ query = request.args @@ -174,8 +174,8 @@ def samples(project_id: str, sample_id: str = None): @convcheck_project def readsets(project_id: str, readset_id: str=None): """ - readset_id: uses the form "1,3-8,9", if not provided, all readsets are returned - return: selected readsets that are in sample_id and part of project + readset_id: uses the form "1,3-8,9", if not provided all readsets are returned + return: list all patients or selected readsets, belonging to """ query = request.args @@ -212,7 +212,7 @@ def files(project_id: str, patient_id: str=None, sample_id: str=None, readset_id sample_id: uses the form "1,3-8,9". Select file by sample ids redeaset_id: uses the form "1,3-8,9". Select file by readset ids - return: selected files + return: selected files, belonging to Query: (deliverable): Default (None) @@ -281,13 +281,13 @@ def metrics(project_id: str, patient_id: str=None, sample_id: str=None, readset_ sample_id: uses the form "1,3-8,9". Select metric by sample ids redeaset_id: uses the form "1,3-8,9". Select metric by readset ids - We also accespt POST data with comma separeted list + We also accept POST data with comma separeted list metric_name = [,NAME] [...] readset_name = [,NAME] [...] sample_name = [,NAME] [...] patient_name = [,NAME] [...] - return: selected metrics + return: selected metrics, belonging to Query: (deliverable): Default (None) @@ -361,7 +361,7 @@ def metrics(project_id: str, patient_id: str=None, sample_id: str=None, readset_ def readsets_from_samples(project_id: str, sample_id: str): """ sample_id: uses the form "1,3-8,9" - return: readsets for selected sample_id + return: selected readsets belonging to """ query = request.args @@ -382,14 +382,14 @@ def readsets_from_samples(project_id: str, sample_id: str): action_output = db_action.readsets(project_id, sample_id) - return sanity_check("Metric", action_output) + return sanity_check("Readset", action_output) @bp.route('//digest_readset_file', methods=['POST']) @convcheck_project def digest_readset_file(project_id: str): """ - POST: list of Readset/Sample Name or id + POST: json holding the list of Patient/Sample/Readset Name or id AND location endpoint + experiment nucleic_acid_type return: all information to create a "Genpipes readset file" """ @@ -410,7 +410,7 @@ def digest_readset_file(project_id: str): @convcheck_project def digest_pair_file(project_id: str): """ - POST: list of Readset/Sample Name or id + POST: json holding the list of Patient/Sample/Readset Name or id AND location endpoint + experiment nucleic_acid_type return: all information to create a "Genpipes pair file" """ @@ -484,7 +484,7 @@ def ingest_transfer(project_id: str): @convcheck_project def ingest_genpipes(project_id: str): """ - POST: json describing genpipes + POST: json describing genpipes analysis return: The Operation object and Jobs associated """ @@ -520,8 +520,8 @@ def ingest_genpipes(project_id: str): @convcheck_project def digest_unanalyzed(project_id: str): """ - POST: list of Readset/Sample Name or id - return: Readsets or Samples unanalyzed + POST: json holding the list of Sample/Readset Name or id AND location endpoint + experiment nucleic_acid_type + return: Samples/Readsets unanalyzed with location endpoint + experiment nucleic_acid_type """ if request.method == 'POST': try: diff --git a/project_tracking/db_action.py b/project_tracking/db_action.py index c293589..4c1ad22 100644 --- a/project_tracking/db_action.py +++ b/project_tracking/db_action.py @@ -1236,6 +1236,7 @@ def digest_unanalyzed(project_id: str, digest_data, session=None): output = { "location_endpoint": location_endpoint, + "experiment_nucleic_acid_type": experiment_nucleic_acid_type, key: session.scalars(stmt).unique().all() } From 620fdf44a9aea3e01cd8772df8e3c471d2e7a16b Mon Sep 17 00:00:00 2001 From: Paul Stretenowich Date: Tue, 21 Nov 2023 16:56:11 -0500 Subject: [PATCH 12/19] Code cleaning + docstring consolidation --- project_tracking/api/project.py | 55 ++++++++++++++------------------- 1 file changed, 23 insertions(+), 32 deletions(-) diff --git a/project_tracking/api/project.py b/project_tracking/api/project.py index 6591009..98d218f 100644 --- a/project_tracking/api/project.py +++ b/project_tracking/api/project.py @@ -64,7 +64,8 @@ def sanity_check(item, action_output): @convcheck_project def projects(project_id: str = None): """ - project: uses the form "/project/1" for project ID and "/project/name" for project name + GET: + project: uses the form "/project/1" for project ID and "/project/name" for project name return: list of all the details of the poject with name "project_name" or ID "project_id" """ @@ -81,7 +82,8 @@ def projects(project_id: str = None): @convcheck_project def patients(project_id: str, patient_id: str = None): """ - patient_id: uses the form "1,3-8,9", if not provided all patients are returned + GET: + patient_id: uses the form "1,3-8,9", if not provided all patients are returned return: list all patients or selected patients, belonging to Query: @@ -144,7 +146,8 @@ def patients(project_id: str, patient_id: str = None): @convcheck_project def samples(project_id: str, sample_id: str = None): """ - sample_id: uses the form "1,3-8,9", if not provided all samples are returned + GET: + sample_id: uses the form "1,3-8,9", if not provided all samples are returned return: list all patients or selected samples, belonging to """ @@ -174,7 +177,8 @@ def samples(project_id: str, sample_id: str = None): @convcheck_project def readsets(project_id: str, readset_id: str=None): """ - readset_id: uses the form "1,3-8,9", if not provided all readsets are returned + GET: + readset_id: uses the form "1,3-8,9", if not provided all readsets are returned return: list all patients or selected readsets, belonging to """ @@ -207,11 +211,11 @@ def readsets(project_id: str, readset_id: str=None): @convcheck_project def files(project_id: str, patient_id: str=None, sample_id: str=None, readset_id: str=None, file_id: str=None): """ - file_id: uses the form "1,3-8,9". Select file by ids - patient_id: uses the form "1,3-8,9". Select file by patient ids - sample_id: uses the form "1,3-8,9". Select file by sample ids - redeaset_id: uses the form "1,3-8,9". Select file by readset ids - + GET: + file_id: uses the form "1,3-8,9". Select file by ids + patient_id: uses the form "1,3-8,9". Select file by patient ids + sample_id: uses the form "1,3-8,9". Select file by sample ids + redeaset_id: uses the form "1,3-8,9". Select file by readset ids return: selected files, belonging to Query: @@ -276,10 +280,12 @@ def files(project_id: str, patient_id: str=None, sample_id: str=None, readset_id @convcheck_project def metrics(project_id: str, patient_id: str=None, sample_id: str=None, readset_id: str=None, metric_id: str=None): """ - metric_id: uses the form "1,3-8,9". Select metric by ids - patient_id: uses the form "1,3-8,9". Select metric by patient ids - sample_id: uses the form "1,3-8,9". Select metric by sample ids - redeaset_id: uses the form "1,3-8,9". Select metric by readset ids + GET: + metric_id: uses the form "1,3-8,9". Select metric by ids + patient_id: uses the form "1,3-8,9". Select metric by patient ids + sample_id: uses the form "1,3-8,9". Select metric by sample ids + redeaset_id: uses the form "1,3-8,9". Select metric by readset ids + return: selected metrics, belonging to We also accept POST data with comma separeted list metric_name = [,NAME] [...] @@ -287,8 +293,6 @@ def metrics(project_id: str, patient_id: str=None, sample_id: str=None, readset_ sample_name = [,NAME] [...] patient_name = [,NAME] [...] - return: selected metrics, belonging to - Query: (deliverable): Default (None) The deliverable query allows to get all metrics labelled as deliverable @@ -360,7 +364,8 @@ def metrics(project_id: str, patient_id: str=None, sample_id: str=None, readset_ @convcheck_project def readsets_from_samples(project_id: str, sample_id: str): """ - sample_id: uses the form "1,3-8,9" + GET: + sample_id: uses the form "1,3-8,9" return: selected readsets belonging to """ @@ -427,7 +432,7 @@ def digest_pair_file(project_id: str): return db_action.digest_pair_file(project_id=project_id, digest_data=ingest_data) -@bp.route('//ingest_run_processing', methods=['GET', 'POST']) +@bp.route('//ingest_run_processing', methods=['POST']) @convcheck_project def ingest_run_processing(project_id: str): """ @@ -435,13 +440,6 @@ def ingest_run_processing(project_id: str): return: The Operation object """ - # Is this if required? - if request.method == 'GET': - return abort( - 405, - "Use post method to ingest runs" - ) - if request.method == 'POST': try: ingest_data = request.get_json(force=True) @@ -480,7 +478,7 @@ def ingest_transfer(project_id: str): return [i.flat_dict for i in db_action.ingest_transfer(project_id=project_id, ingest_data=ingest_data)] -@bp.route('//ingest_genpipes', methods=['GET', 'POST']) +@bp.route('//ingest_genpipes', methods=['POST']) @convcheck_project def ingest_genpipes(project_id: str): """ @@ -488,13 +486,6 @@ def ingest_genpipes(project_id: str): return: The Operation object and Jobs associated """ - # Is this if required? - if request.method == 'GET': - return abort( - 405, - "Use post method to ingest genpipes analysis" - ) - if request.method == 'POST': try: ingest_data = request.get_json(force=True) From 8674a8a2cfb5b1d6e947075ba1c033d9e7fe67dc Mon Sep 17 00:00:00 2001 From: Paul Stretenowich Date: Wed, 29 Nov 2023 13:19:26 -0500 Subject: [PATCH 13/19] Changing error message --- project_tracking/api/project.py | 2 +- project_tracking/db_action.py | 12 ++++++------ 2 files changed, 7 insertions(+), 7 deletions(-) diff --git a/project_tracking/api/project.py b/project_tracking/api/project.py index 98d218f..4edd7b1 100644 --- a/project_tracking/api/project.py +++ b/project_tracking/api/project.py @@ -95,7 +95,7 @@ def patients(project_id: str, patient_id: str = None): Return: a subset of patient who have Tumor=False & Tumor=True samples (false, true): return: a subset of patient who only have Tumor=True samples - (false, true): + (false, false): return: a subset of patient who only have Tumor=false samples """ diff --git a/project_tracking/db_action.py b/project_tracking/db_action.py index 4c1ad22..d6af3c7 100644 --- a/project_tracking/db_action.py +++ b/project_tracking/db_action.py @@ -792,7 +792,7 @@ def digest_readset_file(project_id: str, digest_data, session=None): if patient: patients.append(patient) else: - raise DidNotFindError(table="Patient", attribute="name", query=patient_name) + raise DidNotFindError(f"'Patient' with 'name' '{patient_name}' AND 'nucleic_acid_type' '{nucleic_acid_type}' doesn't exist on database") if vb.PATIENT_ID in digest_data.keys(): for patient_id in digest_data[vb.PATIENT_ID]: # logger.debug(f"\n\n{patient_id}\n\n") @@ -807,7 +807,7 @@ def digest_readset_file(project_id: str, digest_data, session=None): if patient: patients.append(patient) else: - raise DidNotFindError(table="Patient", attribute="id", query=patient_id) + raise DidNotFindError(f"'Patient' with 'id' '{patient_id}' AND 'nucleic_acid_type' '{nucleic_acid_type}' doesn't exist on database") if patients: set(patients) for patient in patients: @@ -827,7 +827,7 @@ def digest_readset_file(project_id: str, digest_data, session=None): if sample: samples.append(sample) else: - raise DidNotFindError(table="Sample", attribute="name", query=sample_name) + raise DidNotFindError(f"'Sample' with 'name' '{patient_name}' AND 'nucleic_acid_type' '{nucleic_acid_type}' doesn't exist on database") if vb.SAMPLE_ID in digest_data.keys(): for sample_id in digest_data[vb.SAMPLE_ID]: # logger.debug(f"\n\n{sample_id}\n\n") @@ -841,7 +841,7 @@ def digest_readset_file(project_id: str, digest_data, session=None): if sample: samples.append(sample) else: - raise DidNotFindError(table="Sample", attribute="id", query=sample_id) + raise DidNotFindError(f"'Sample' with 'id' '{patient_name}' AND 'nucleic_acid_type' '{nucleic_acid_type}' doesn't exist on database") if samples: set(samples) for sample in samples: @@ -859,7 +859,7 @@ def digest_readset_file(project_id: str, digest_data, session=None): if readset: readsets.append(readset) else: - raise DidNotFindError(table="Readset", attribute="name", query=readset_name) + raise DidNotFindError(f"'Readset' with 'name' '{patient_name}' AND 'nucleic_acid_type' '{nucleic_acid_type}' doesn't exist on database") if vb.READSET_ID in digest_data.keys(): for readset_id in digest_data[vb.READSET_ID]: readset = session.scalars( @@ -871,7 +871,7 @@ def digest_readset_file(project_id: str, digest_data, session=None): if readset: readsets.append(readset) else: - raise DidNotFindError(table="Readset", attribute="id", query=readset_id) + raise DidNotFindError(f"'Readset' with 'id' '{patient_name}' AND 'nucleic_acid_type' '{nucleic_acid_type}' doesn't exist on database") if readsets: set(readsets) for readset in readsets: From 0f80cf8162db7fb5e31a6626c9cfb500d8a42be6 Mon Sep 17 00:00:00 2001 From: Paul Stretenowich Date: Wed, 29 Nov 2023 15:21:07 -0500 Subject: [PATCH 14/19] Accepting metric without flag set --- project_tracking/db_action.py | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/project_tracking/db_action.py b/project_tracking/db_action.py index d6af3c7..1d49502 100644 --- a/project_tracking/db_action.py +++ b/project_tracking/db_action.py @@ -651,10 +651,14 @@ def ingest_run_processing(project_id: str, ingest_data, session=None): metric_deliverable = metric_json[vb.METRIC_DELIVERABLE] else: metric_deliverable = False + if vb.METRIC_FLAG in metric_json: + metric_flag = FlagEnum(metric_json[vb.METRIC_FLAG]) + else: + metric_flag = None Metric( name=metric_json[vb.METRIC_NAME], value=metric_json[vb.METRIC_VALUE], - flag=FlagEnum(metric_json[vb.METRIC_FLAG]), + flag=metric_flag, deliverable=metric_deliverable, job=job, readsets=[readset] @@ -1150,10 +1154,14 @@ def ingest_genpipes(project_id: str, ingest_data, session=None): metric_deliverable = metric_json[vb.METRIC_DELIVERABLE] else: metric_deliverable = False + if vb.METRIC_FLAG in metric_json: + metric_flag = FlagEnum(metric_json[vb.METRIC_FLAG]) + else: + metric_flag = None Metric( name=metric_json[vb.METRIC_NAME], value=metric_json[vb.METRIC_VALUE], - flag=FlagEnum(metric_json[vb.METRIC_FLAG]), + flag=metric_flag, deliverable=metric_deliverable, job=job, readsets=[readset] From 24319cbb5508ca62c1da7c3bda1787dce7156176 Mon Sep 17 00:00:00 2001 From: Paul Stretenowich Date: Wed, 29 Nov 2023 15:47:42 -0500 Subject: [PATCH 15/19] Allowing job_status being null if job hasn't been submitted --- project_tracking/db_action.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/project_tracking/db_action.py b/project_tracking/db_action.py index 1d49502..575f345 100644 --- a/project_tracking/db_action.py +++ b/project_tracking/db_action.py @@ -1113,9 +1113,13 @@ def ingest_genpipes(project_id: str, ingest_data, session=None): job_stop = datetime.strptime(job_json[vb.JOB_STOP], vb.DATE_LONG_FMT) except TypeError: job_stop = None + if job_json[vb.JOB_STATUS]: + job_status = StatusEnum(job_json[vb.JOB_STATUS]) + else: + job_status = None job = Job( name=job_json[vb.JOB_NAME], - status=StatusEnum(job_json[vb.JOB_STATUS]), + status=job_status, start=job_start, stop=job_stop, operation=operation From 2375fdd7935daa3c8c119e1cf5a27a9a2cbdc96b Mon Sep 17 00:00:00 2001 From: Paul Stretenowich Date: Wed, 29 Nov 2023 16:11:50 -0500 Subject: [PATCH 16/19] Skipping null job during ingesting genpipes as we don't want files not generated --- project_tracking/db_action.py | 109 +++++++++++++++++----------------- 1 file changed, 55 insertions(+), 54 deletions(-) diff --git a/project_tracking/db_action.py b/project_tracking/db_action.py index 575f345..261494e 100644 --- a/project_tracking/db_action.py +++ b/project_tracking/db_action.py @@ -1113,63 +1113,64 @@ def ingest_genpipes(project_id: str, ingest_data, session=None): job_stop = datetime.strptime(job_json[vb.JOB_STOP], vb.DATE_LONG_FMT) except TypeError: job_stop = None + # Check if job_status exists otherwise skip it if job_json[vb.JOB_STATUS]: - job_status = StatusEnum(job_json[vb.JOB_STATUS]) - else: - job_status = None - job = Job( - name=job_json[vb.JOB_NAME], - status=job_status, - start=job_start, - stop=job_stop, - operation=operation - ) - for file_json in job_json[vb.FILE]: - suffixes = Path(file_json[vb.FILE_NAME]).suffixes - file_type = os.path.splitext(file_json[vb.FILE_NAME])[-1][1:] - if ".gz" in suffixes: - file_type = "".join(suffixes[-2:]) - if vb.FILE_DELIVERABLE in file_json: - file_deliverable = file_json[vb.FILE_DELIVERABLE] - else: - file_deliverable = False - # Need to have an the following otherwise assigning extra_metadata to None converts null into json in the db - if vb.FILE_EXTRA_METADATA in file_json.keys(): - file = File( - name=file_json[vb.FILE_NAME], - type=file_type, - extra_metadata=file_json[vb.FILE_EXTRA_METADATA], - deliverable=file_deliverable, - readsets=[readset], - jobs=[job] - ) - else: - file = File( - name=file_json[vb.FILE_NAME], - type=file_type, - deliverable=file_deliverable, - readsets=[readset], - jobs=[job] - ) - location = Location.from_uri(uri=file_json[vb.LOCATION_URI], file=file, session=session) - if vb.METRIC in job_json.keys(): - for metric_json in job_json[vb.METRIC]: - if vb.METRIC_DELIVERABLE in metric_json: - metric_deliverable = metric_json[vb.METRIC_DELIVERABLE] + job = Job( + name=job_json[vb.JOB_NAME], + status=StatusEnum(job_json[vb.JOB_STATUS]), + start=job_start, + stop=job_stop, + operation=operation + ) + for file_json in job_json[vb.FILE]: + suffixes = Path(file_json[vb.FILE_NAME]).suffixes + file_type = os.path.splitext(file_json[vb.FILE_NAME])[-1][1:] + if ".gz" in suffixes: + file_type = "".join(suffixes[-2:]) + if vb.FILE_DELIVERABLE in file_json: + file_deliverable = file_json[vb.FILE_DELIVERABLE] else: - metric_deliverable = False - if vb.METRIC_FLAG in metric_json: - metric_flag = FlagEnum(metric_json[vb.METRIC_FLAG]) + file_deliverable = False + # Need to have an the following otherwise assigning extra_metadata to None converts null into json in the db + if vb.FILE_EXTRA_METADATA in file_json.keys(): + file = File( + name=file_json[vb.FILE_NAME], + type=file_type, + extra_metadata=file_json[vb.FILE_EXTRA_METADATA], + deliverable=file_deliverable, + readsets=[readset], + jobs=[job] + ) else: - metric_flag = None - Metric( - name=metric_json[vb.METRIC_NAME], - value=metric_json[vb.METRIC_VALUE], - flag=metric_flag, - deliverable=metric_deliverable, - job=job, - readsets=[readset] - ) + file = File( + name=file_json[vb.FILE_NAME], + type=file_type, + deliverable=file_deliverable, + readsets=[readset], + jobs=[job] + ) + location = Location.from_uri(uri=file_json[vb.LOCATION_URI], file=file, session=session) + if vb.METRIC in job_json.keys(): + for metric_json in job_json[vb.METRIC]: + if vb.METRIC_DELIVERABLE in metric_json: + metric_deliverable = metric_json[vb.METRIC_DELIVERABLE] + else: + metric_deliverable = False + if vb.METRIC_FLAG in metric_json: + metric_flag = FlagEnum(metric_json[vb.METRIC_FLAG]) + else: + metric_flag = None + Metric( + name=metric_json[vb.METRIC_NAME], + value=metric_json[vb.METRIC_VALUE], + flag=metric_flag, + deliverable=metric_deliverable, + job=job, + readsets=[readset] + ) + # If job status is null then skip it as we don't want to ingest data not generated + else: + pass session.add(job) session.flush() From 3a126312281290a48a5fbb1b8944c213f37bd55b Mon Sep 17 00:00:00 2001 From: P-O Quirion Date: Tue, 19 Dec 2023 16:04:18 -0500 Subject: [PATCH 17/19] Build and push Image. Dockefile to Containerfile --- .github/workflows/run_test.yml | 38 ++++++++++++++++++++++++++++++++-- Dockerfile => Containerfile | 3 ++- 2 files changed, 38 insertions(+), 3 deletions(-) rename Dockerfile => Containerfile (85%) diff --git a/.github/workflows/run_test.yml b/.github/workflows/run_test.yml index 0222484..44235a0 100644 --- a/.github/workflows/run_test.yml +++ b/.github/workflows/run_test.yml @@ -11,9 +11,17 @@ on: - 'main' - 'dev' -jobs: - build: + tags: + - '[0-9]+.[0-9]+.[0-9]+' + +env: + REGISTRY_USER: c3genomics+github_pusher + IMAGE_REGISTRY: quay.io + REGISTRY_PASSWORD: ${{ secrets.QUAY_ROBOT_TOKEN }} + IMAGE: c3genomics/project_tracking +jobs: + test: runs-on: ${{ matrix.os }} strategy: matrix: @@ -41,3 +49,29 @@ jobs: - name: Test with pytest run: | pytest -v + build: + needs: test + if: startsWith(github.ref, 'refs/tags') + name: Build image + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v3 + - name: set tag + run: echo "RELEASE_VERSION=${GITHUB_REF#refs/*/}" >> $GITHUB_ENV + - name: Buildah Action + uses: redhat-actions/buildah-build@v2 + with: + image: ${{ env.IMAGE_REGISTRY }}/${{ env.IMAGE }} + tag: ${{ env.RELEASE_VERSION }} latest_release + containerfiles: ./Containerfile + - name: Push to repo + uses: redhat-actions/push-to-registry@v2 + with: + username: ${{ env.REGISTRY_USER }} + password: ${{ env.REGISTRY_PASSWORD }} + registry: ${{ env.IMAGE_REGISTRY }} + image: ${{ env.IMAGE_REGISTRY }}/${{ env.IMAGE }} + tag: ${{ env.RELEASE_VERSION }} latest_release + - name: Print image url + run: echo "Image pushed to ${{ steps.push-to-repo.outputs.registry-paths }}" + diff --git a/Dockerfile b/Containerfile similarity index 85% rename from Dockerfile rename to Containerfile index 5a421ca..55e7aaf 100644 --- a/Dockerfile +++ b/Containerfile @@ -1,4 +1,5 @@ -FROM fedora:36 +FROM fedora:39 +MAINTAINER P-O Quirion po.quirion@mcgill.ca ENV APP=project_tracking RUN mkdir /app /sqlite From 476ae6efdf842dc902c413aa9697392fa69e74e7 Mon Sep 17 00:00:00 2001 From: P-O Quirion Date: Tue, 19 Dec 2023 16:16:49 -0500 Subject: [PATCH 18/19] typo in tag --- .github/workflows/run_test.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/run_test.yml b/.github/workflows/run_test.yml index 44235a0..572295d 100644 --- a/.github/workflows/run_test.yml +++ b/.github/workflows/run_test.yml @@ -62,7 +62,7 @@ jobs: uses: redhat-actions/buildah-build@v2 with: image: ${{ env.IMAGE_REGISTRY }}/${{ env.IMAGE }} - tag: ${{ env.RELEASE_VERSION }} latest_release + tags: ${{ env.RELEASE_VERSION }} latest_release containerfiles: ./Containerfile - name: Push to repo uses: redhat-actions/push-to-registry@v2 @@ -71,7 +71,7 @@ jobs: password: ${{ env.REGISTRY_PASSWORD }} registry: ${{ env.IMAGE_REGISTRY }} image: ${{ env.IMAGE_REGISTRY }}/${{ env.IMAGE }} - tag: ${{ env.RELEASE_VERSION }} latest_release + tags: ${{ env.RELEASE_VERSION }} latest_release - name: Print image url run: echo "Image pushed to ${{ steps.push-to-repo.outputs.registry-paths }}" From 556206ed6e8a3250d85b84260081859e83cd4fd9 Mon Sep 17 00:00:00 2001 From: P-O Quirion Date: Tue, 19 Dec 2023 16:35:47 -0500 Subject: [PATCH 19/19] typo in action --- .github/workflows/run_test.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/run_test.yml b/.github/workflows/run_test.yml index 572295d..14ad43c 100644 --- a/.github/workflows/run_test.yml +++ b/.github/workflows/run_test.yml @@ -70,7 +70,7 @@ jobs: username: ${{ env.REGISTRY_USER }} password: ${{ env.REGISTRY_PASSWORD }} registry: ${{ env.IMAGE_REGISTRY }} - image: ${{ env.IMAGE_REGISTRY }}/${{ env.IMAGE }} + image: ${{ env.IMAGE }} tags: ${{ env.RELEASE_VERSION }} latest_release - name: Print image url run: echo "Image pushed to ${{ steps.push-to-repo.outputs.registry-paths }}"