From 588803a1de6bfd95f60d28d9d14caabf2407ccbb Mon Sep 17 00:00:00 2001
From: Paul Stretenowich <paul.stretenowich@mcgill.ca>
Date: Thu, 19 Oct 2023 15:47:43 -0400
Subject: [PATCH 01/19] Adding digest_unanalyzed route + debug Operation being
 linked to Readset when ingesting transfer and GenPipes

---
 project_tracking/api/project.py      | 34 ++++++++---
 project_tracking/db_action.py        | 90 ++++++++++++++++++++++++----
 project_tracking/model.py            | 23 +++++++
 tests/data/genpipes_rnaseqlight.json |  2 +-
 tests/data/genpipes_tumourpair.json  |  2 +-
 5 files changed, 131 insertions(+), 20 deletions(-)

diff --git a/project_tracking/api/project.py b/project_tracking/api/project.py
index c72381d..1299897 100644
--- a/project_tracking/api/project.py
+++ b/project_tracking/api/project.py
@@ -433,13 +433,14 @@ def ingest_transfer(project_id: str):
     Add new location to file that has already been moved before
     the db was created
     """
-    try:
-        ingest_data = request.get_json(force=True)
-    except:
-        flash('Data does not seems to be json')
-        return redirect(request.url)
+    if request.method == 'POST':
+        try:
+            ingest_data = request.get_json(force=True)
+        except:
+            flash('Data does not seems to be json')
+            return redirect(request.url)
 
-    return  [i.flat_dict for i in db_action.ingest_transfer(project_id=project_id, ingest_data=ingest_data)]
+        return [i.flat_dict for i in db_action.ingest_transfer(project_id=project_id, ingest_data=ingest_data)]
 
 @bp.route('/<string:project_id>/ingest_genpipes', methods=['GET', 'POST'])
 # @capitalize
@@ -463,13 +464,30 @@ def ingest_genpipes(project_id: str):
             return redirect(request.url)
 
         project_id_from_name = db_action.name_to_id("Project", ingest_data[vc.PROJECT_NAME].upper())
-        if project_id != project_id_from_name:
+        if [int(project_id)] != project_id_from_name:
             return abort(
                 400,
-                f"project name in POST {ingest_data[vc.PROJECT_NAME].upper()} not Valid, {project_id} requires"
+                f"project name in POST {ingest_data[vc.PROJECT_NAME].upper()} not in the database, {project_id} required"
                 )
 
         output = db_action.ingest_genpipes(project_id=project_id, ingest_data=ingest_data)
         operation = output[0].flat_dict
         jobs = [job.flat_dict for job in output[1]]
         return [operation, jobs]
+
+@bp.route('/<string:project_id>/digest_unanalyzed', methods=['POST'])
+def digest_unanalyzed(project_id: str):
+    """
+    POST: list of Readset/Sample Name or id
+    return: Readsets or Samples unanalyzed
+    """
+    logger.debug(f"\n\n{project_id}\n\n")
+    if request.method == 'POST':
+        try:
+            ingest_data = request.get_json(force=True)
+        except:
+            flash('Data does not seems to be json')
+            return redirect(request.url)
+
+        return db_action.digest_unanalyzed(project_id=project_id, digest_data=ingest_data)
+        # return [i.flat_dict for i in db_action.digest_unanalyzed(project_id=project_id, digest_data=ingest_data)]
diff --git a/project_tracking/db_action.py b/project_tracking/db_action.py
index f0a301b..c0c3a4e 100644
--- a/project_tracking/db_action.py
+++ b/project_tracking/db_action.py
@@ -521,7 +521,7 @@ def create_project(project_name, fms_id=None, session=None):
     return session.scalars(select(Project).where(Project.name == project_name)).one()
 
 
-def ingest_run_processing(project_id, ingest_data, session=None):
+def ingest_run_processing(project_id: str, ingest_data, session=None):
     """Ingesting run for MoH"""
     if not isinstance(ingest_data, dict):
         ingest_data = json.loads(ingest_data)
@@ -658,7 +658,7 @@ def ingest_run_processing(project_id, ingest_data, session=None):
     return [operation, job]
 
 
-def ingest_transfer(project_id, ingest_data, session=None, check_readset_name=True):
+def ingest_transfer(project_id: str, ingest_data, session=None, check_readset_name=True):
     """Ingesting transfer"""
     if not isinstance(ingest_data, dict):
         ingest_data = json.loads(ingest_data)
@@ -682,9 +682,10 @@ def ingest_transfer(project_id, ingest_data, session=None, check_readset_name=Tr
         stop=datetime.now(),
         operation=operation
         )
-
+    readset_list = []
     for readset_json in ingest_data[vb.READSET]:
         readset_name = readset_json[vb.READSET_NAME]
+        readset_list.append(session.scalars(select(Readset).where(Readset.name == readset_name)).unique().first())
         for file_json in readset_json[vb.FILE]:
             src_uri = file_json[vb.SRC_LOCATION_URI]
             dest_uri = file_json[vb.DEST_LOCATION_URI]
@@ -692,8 +693,8 @@ def ingest_transfer(project_id, ingest_data, session=None, check_readset_name=Tr
                 file = session.scalars(
                     select(File)
                     .join(File.readsets)
-                    .where(Readset.name ==  readset_name )
-                    .join(File.locations    )
+                    .where(Readset.name == readset_name)
+                    .join(File.locations)
                     .where(Location.uri == src_uri)
                     ).unique().first()
                 if not file:
@@ -712,6 +713,7 @@ def ingest_transfer(project_id, ingest_data, session=None, check_readset_name=Tr
             new_location = Location.from_uri(uri=dest_uri, file=file, session=session)
             file.jobs.append(job)
             session.add(new_location)
+    operation.readsets = readset_list
 
     session.add(job)
     session.flush()
@@ -733,7 +735,7 @@ def ingest_transfer(project_id, ingest_data, session=None, check_readset_name=Tr
     return [operation, job]
 
 
-def digest_readset_file(project_id, digest_data, session=None):
+def digest_readset_file(project_id: str, digest_data, session=None):
     """Digesting readset file fields for GenPipes"""
     if not session:
         session = database.get_session()
@@ -840,7 +842,7 @@ def digest_readset_file(project_id, digest_data, session=None):
             output.append(readset_line)
     return json.dumps(output)
 
-def digest_pair_file(project_id, digest_data, session=None):
+def digest_pair_file(project_id: str, digest_data, session=None):
     """Digesting pair file fields for GenPipes"""
     if not session:
         session = database.get_session()
@@ -904,7 +906,7 @@ def digest_pair_file(project_id, digest_data, session=None):
 
     return json.dumps(output)
 
-def ingest_genpipes(project_id, ingest_data, session=None):
+def ingest_genpipes(project_id: str, ingest_data, session=None):
     """Ingesting GenPipes run"""
     if not isinstance(ingest_data, dict):
         ingest_data = json.loads(ingest_data)
@@ -914,7 +916,7 @@ def ingest_genpipes(project_id, ingest_data, session=None):
 
     project = projects(project_id=project_id, session=session)[0]
 
-    operation_config = OperationConfig(
+    operation_config = OperationConfig.from_attributes(
         name=ingest_data[vb.OPERATION_CONFIG_NAME],
         version=ingest_data[vb.OPERATION_CONFIG_VERSION],
         md5sum=ingest_data[vb.OPERATION_CONFIG_MD5SUM],
@@ -930,6 +932,7 @@ def ingest_genpipes(project_id, ingest_data, session=None):
         operation_config=operation_config
         )
 
+    readset_list = []
     for sample_json in ingest_data[vb.SAMPLE]:
         sample = session.scalars(
             select(Sample)
@@ -942,6 +945,7 @@ def ingest_genpipes(project_id, ingest_data, session=None):
                 select(Readset)
                 .where(Readset.name == readset_json[vb.READSET_NAME])
                 ).unique().first()
+            readset_list.append(readset)
             if not readset:
                 raise DidNotFindError(f"No readset named {readset_json[vb.READSET_NAME]}")
             if readset.sample != sample:
@@ -1007,7 +1011,7 @@ def ingest_genpipes(project_id, ingest_data, session=None):
 
                 session.add(job)
                 session.flush()
-
+    operation.readsets = readset_list
     operation_id = operation.id
     job_ids = [job.id for job in operation.jobs]
     try:
@@ -1022,3 +1026,69 @@ def ingest_genpipes(project_id, ingest_data, session=None):
     jobs = [session.scalars(select(Job).where(Job.id == job_id)).first() for job_id in job_ids]
 
     return [operation, jobs]
+
+
+def digest_unanalyzed(project_id: str, digest_data, session=None):
+    """
+    Getting unanalyzed samples or readsets
+    """
+    if not session:
+        session = database.get_session()
+
+    session = database.get_session()
+
+    if isinstance(project_id, str):
+        project_id = [project_id]
+
+    sample_name_flag = digest_data["sample_name"]
+    sample_id_flag = digest_data["sample_id"]
+    readset_name_flag = digest_data["readset_name"]
+    readset_id_flag = digest_data["readset_id"]
+    run_id = digest_data["run_id"]
+    run_name = digest_data["run_name"]
+    if run_name:
+        run_id = name_to_id("Run", run_name)[0]
+    experiment_sequencing_technology = digest_data["experiment_sequencing_technology"]
+    location_endpoint = digest_data["location_endpoint"]
+
+    if sample_name_flag:
+        stmt = select(Sample.name)
+        key = "sample_name"
+    elif sample_id_flag:
+        stmt = select(Sample.id)
+        key = "sample_id"
+    elif readset_name_flag:
+        stmt = select(Readset.name)
+        key = "readset_name"
+    elif readset_id_flag:
+        stmt = select(Readset.id)
+        key = "readset_id"
+
+    stmt = (
+        stmt.join(Sample.readsets)
+        .join(Readset.operations)
+        .where(Operation.name.ilike(f"%genpipes%"))
+        .join(Sample.patient)
+        .join(Patient.project)
+        .where(Project.id.in_(project_id))
+        )
+
+    if run_id:
+        stmt = (
+            stmt.where(Run.id == run_id)
+            .join(Readset.run)
+            )
+    if experiment_sequencing_technology:
+        stmt = (
+            stmt.where(Experiment.sequencing_technology == experiment_sequencing_technology)
+            .join(Readset.experiment)
+            )
+
+    # logger.debug(f"\n\n{stmt}\n\n")
+    output = {
+        "location_endpoint": location_endpoint,
+        key: session.scalars(stmt).unique().all()
+    }
+    # logger.debug(f"\n\n{session.scalars(stmt).unique().all()}\n\n")
+
+    return json.dumps(output)
diff --git a/project_tracking/model.py b/project_tracking/model.py
index 4abbc71..2b74951 100644
--- a/project_tracking/model.py
+++ b/project_tracking/model.py
@@ -556,6 +556,29 @@ def config_data(cls, data):
         """
         pass
 
+    @classmethod
+    def from_attributes(cls, name=None, version=None, md5sum=None, data=None, session=None):
+        """
+        get operation_config if it exist, set it if it does not exist
+        """
+        if not session:
+            session = database.get_session()
+        operation_config = session.scalars(
+            select(cls)
+                .where(cls.name == name)
+                .where(cls.version == version)
+                .where(cls.md5sum == md5sum)
+                .where(cls.data == data)
+        ).first()
+        if not operation_config:
+            operation_config = cls(
+                name=name,
+                version=version,
+                md5sum=md5sum,
+                data=data
+            )
+        return operation_config
+
 
 class Job(BaseTable):
     """
diff --git a/tests/data/genpipes_rnaseqlight.json b/tests/data/genpipes_rnaseqlight.json
index 516b6c1..beb4d21 100644
--- a/tests/data/genpipes_rnaseqlight.json
+++ b/tests/data/genpipes_rnaseqlight.json
@@ -6,7 +6,7 @@
     "operation_config_data": "[DEFAULT]\ncluster_server = beluga.genome.mcgill.ca\nsequencing_center = McGill Genome Centre\ncluster_submit_cmd_suffix = | grep \"[0-9]\" | cut -d\\  -f4\ncluster_other_arg = --mail-type=END,FAIL --mail-user=$JOB_MAIL -A $RAP_ID\ncluster_work_dir_arg = -D\ncluster_output_dir_arg = -o\ncluster_job_name_arg = -J\ncluster_cmd_produces_job_id = true\ncluster_dependency_arg = --depend=afterok:\ncluster_dependency_sep = :\ncluster_max_jobs = 3000\ntmp_dir = ${SLURM_TMPDIR}\nportal_output_dir = $PORTAL_OUTPUT_DIR\ncluster_walltime = 24:00:00\ncluster_cpu = 1\ncluster_node = 1\nALL_CPU = 40\nHALF_CPU = 20\nQUART_CPU = 10\nPINT_CPU = 5\nALL_MEM = 187G\nHALF_MEM = 90G\nQUART_MEM = 60G\nPINT_MEM = 30G\ncluster_mem = 4700M per cpu\ncluster_queue = \nLARGE_QUEUE = \nmodule_java = mugqic/java/openjdk-jdk1.8.0_72\nmodule_mugqic_R_packages = mugqic/mugqic_R_packages/1.0.6\nmodule_mugqic_tools = mugqic/mugqic_tools/2.8.1\nmodule_pandoc = mugqic/pandoc/2.16.1\nmodule_picard = mugqic/picard/2.0.1\nmodule_python = mugqic/python/2.7.11\nmodule_R = mugqic/R_Bioconductor/3.5.0_3.7\nmodule_trimmomatic = mugqic/trimmomatic/0.35\nmodule_kallisto = mugqic/kallisto/0.44.0\nmodule_perl = mugqic/perl/5.22.1\nscientific_name = Homo_sapiens\nassembly = GRCh38\nsource = Ensembl\nversion = 104\nassembly_dir = $MUGQIC_INSTALL_HOME/genomes/species/%(scientific_name)s.%(assembly)s\nannotations_prefix = %(assembly_dir)s/annotations/%(scientific_name)s.%(assembly)s.%(source)s%(version)s\ngenome_fasta = %(assembly_dir)s/genome/%(scientific_name)s.%(assembly)s.fa\ngtf = %(annotations_prefix)s.gtf\ngtf_transcript_id = %(annotations_prefix)s.transcript_id.gtf\nribosomal_fasta = %(assembly_dir)s/annotations/rrna_bwa_index/%(scientific_name)s.%(assembly)s.%(source)s%(version)s.rrna.fa\nchromosome_size = %(assembly_dir)s/genome/%(scientific_name)s.%(assembly)s.fa.fai\ngenes = %(annotations_prefix)s.genes.tsv\ngene_size = %(annotations_prefix)s.genes.length.tsv\ngene_ontology = %(annotations_prefix)s.GO.tsv\nannotation_flat = %(annotations_prefix)s.ref_flat.tsv\njava_other_options = -XX:ParallelGCThreads=4\nprotocol = TrueSeq mRNA\ncycle_number = 100\nstrand_info = fr-firststrand\nALL_MEM_EPS = 180G\nGPU_QUEUE = \ncommon_name = Human\nassembly_synonyms = hg38\ndbsnp_version = 142\ngnomad_exome = %(assembly_dir)s/annotations/%(scientific_name)s.%(assembly)s.gnomad.exomes.r2.0.2.sites.no-VEP.nohist.tidy.vcf.gz\ndbnsfp = %(assembly_dir)s/annotations/dbNSFPv3.5a/dbNSFPv3.5a.txt.gz.txt.gz\naf_gnomad = %(assembly_dir)s/annotations/%(scientific_name)s.%(assembly)s.af-only-gnomad.vcf.gz\nCTAT_bundle_version = GRCh38_gencode_v32_CTAT_lib_Dec062019\ngenome_mappability_bed_indexed = %(assembly_dir)s/annotations/mappabilityGC/GRCh38_100bpPAIREDEND.exclusion.bed\npopulation_AF = 1000Gp1_EUR_AF\nexcluded_chromosome = chrM,chr1_KI270706v1_random,chr1_KI270707v1_random,chr1_KI270708v1_random,chr1_KI270709v1_random,chr1_KI270710v1_random,chr1_KI270711v1_random,chr1_KI270712v1_random,chr1_KI270713v1_random,chr1_KI270714v1_random,chr2_KI270715v1_random,chr2_KI270716v1_random,chr3_GL000221v1_random,chr4_GL000008v2_random,chr5_GL000208v1_random,chr9_KI270717v1_random,chr9_KI270718v1_random,chr9_KI270719v1_random,chr9_KI270720v1_random,chr11_KI270721v1_random,chr14_GL000009v2_random,chr14_GL000225v1_random,chr14_KI270722v1_random,chr14_GL000194v1_random,chr14_KI270723v1_random,chr14_KI270724v1_random,chr14_KI270725v1_random,chr14_KI270726v1_random,chr15_KI270727v1_random,chr16_KI270728v1_random,chr17_GL000205v2_random,chr17_KI270729v1_random,chr17_KI270730v1_random,chr22_KI270731v1_random,chr22_KI270732v1_random,chr22_KI270733v1_random,chr22_KI270734v1_random,chr22_KI270735v1_random,chr22_KI270736v1_random,chr22_KI270737v1_random,chr22_KI270738v1_random,chr22_KI270739v1_random,chrY_KI270740v1_random,chrUn_KI270302v1,chrUn_KI270304v1,chrUn_KI270303v1,chrUn_KI270305v1,chrUn_KI270322v1,chrUn_KI270320v1,chrUn_KI270310v1,chrUn_KI270316v1,chrUn_KI270315v1,chrUn_KI270312v1,chrUn_KI270311v1,chrUn_KI270317v1,chrUn_KI270412v1,chrUn_KI270411v1,chrUn_KI270414v1,chrUn_KI270419v1,chrUn_KI270418v1,chrUn_KI270420v1,chrUn_KI270424v1,chrUn_KI270417v1,chrUn_KI270422v1,chrUn_KI270423v1,chrUn_KI270425v1,chrUn_KI270429v1,chrUn_KI270442v1,chrUn_KI270466v1,chrUn_KI270465v1,chrUn_KI270467v1,chrUn_KI270435v1,chrUn_KI270438v1,chrUn_KI270468v1,chrUn_KI270510v1,chrUn_KI270509v1,chrUn_KI270518v1,chrUn_KI270508v1,chrUn_KI270516v1,chrUn_KI270512v1,chrUn_KI270519v1,chrUn_KI270522v1,chrUn_KI270511v1,chrUn_KI270515v1,chrUn_KI270507v1,chrUn_KI270517v1,chrUn_KI270529v1,chrUn_KI270528v1,chrUn_KI270530v1,chrUn_KI270539v1,chrUn_KI270538v1,chrUn_KI270544v1,chrUn_KI270548v1,chrUn_KI270583v1,chrUn_KI270587v1,chrUn_KI270580v1,chrUn_KI270581v1,chrUn_KI270579v1,chrUn_KI270589v1,chrUn_KI270590v1,chrUn_KI270584v1,chrUn_KI270582v1,chrUn_KI270588v1,chrUn_KI270593v1,chrUn_KI270591v1,chrUn_KI270330v1,chrUn_KI270329v1,chrUn_KI270334v1,chrUn_KI270333v1,chrUn_KI270335v1,chrUn_KI270338v1,chrUn_KI270340v1,chrUn_KI270336v1,chrUn_KI270337v1,chrUn_KI270363v1,chrUn_KI270364v1,chrUn_KI270362v1,chrUn_KI270366v1,chrUn_KI270378v1,chrUn_KI270379v1,chrUn_KI270389v1,chrUn_KI270390v1,chrUn_KI270387v1,chrUn_KI270395v1,chrUn_KI270396v1,chrUn_KI270388v1,chrUn_KI270394v1,chrUn_KI270386v1,chrUn_KI270391v1,chrUn_KI270383v1,chrUn_KI270393v1,chrUn_KI270384v1,chrUn_KI270392v1,chrUn_KI270381v1,chrUn_KI270385v1,chrUn_KI270382v1,chrUn_KI270376v1,chrUn_KI270374v1,chrUn_KI270372v1,chrUn_KI270373v1,chrUn_KI270375v1,chrUn_KI270371v1,chrUn_KI270448v1,chrUn_KI270521v1,chrUn_GL000195v1,chrUn_GL000219v1,chrUn_GL000220v1,chrUn_GL000224v1,chrUn_KI270741v1,chrUn_GL000226v1,chrUn_GL000213v1,chrUn_KI270743v1,chrUn_KI270744v1,chrUn_KI270745v1,chrUn_KI270746v1,chrUn_KI270747v1,chrUn_KI270748v1,chrUn_KI270749v1,chrUn_KI270750v1,chrUn_KI270751v1,chrUn_KI270752v1,chrUn_KI270753v1,chrUn_KI270754v1,chrUn_KI270755v1,chrUn_KI270756v1,chrUn_KI270757v1,chrUn_GL000214v1,chrUn_KI270742v1,chrUn_GL000216v2,chrUn_GL000218v1,chrEBV\n\n[picard_sam_to_fastq]\njava_other_options = -XX:ParallelGCThreads=1 -Dsamjdk.buffer_size=4194304\nram = 16000G\ncluster_mem = 16G\n\n[trimmomatic]\ncluster_mem = 20G\nram = 20G\ncluster_cpu = %(QUART_CPU)s\nthreads = %(cluster_cpu)s\ntrailing_min_quality = 30\nmin_length = 32\nillumina_clip_settings = :2:30:15:8:true\ncluster_walltime = --time=24:00:0\njava_other_options = -XX:ParallelGCThreads=5 -Dsamjdk.buffer_size=1048576\n\n[kallisto]\ntranscriptome_idx = $MUGQIC_INSTALL_HOME/genomes/species/%(scientific_name)s.%(assembly)s/annotations/cdna_kallisto_index/%(scientific_name)s.%(assembly)s.%(source)s%(version)s.cdna.fa.idx\ntranscript2genes = $MUGQIC_INSTALL_HOME/genomes/species/%(scientific_name)s.%(assembly)s/annotations/cdna_kallisto_index/%(scientific_name)s.%(assembly)s.%(source)s%(version)s.tx2gene\ncluster_walltime = 23:00:0\ncluster_cpu = 10\nbootstraps = 120\nfragment_length = 120\nfragment_length_sd = 20\ncluster_mem = 42G\n\n[kallisto_count_matrix]\ncluster_walltime = --time=23:30:0\ncluster_cpu = 2\ncluster_mem = 24G\n\n[gq_seq_utils_exploratory_analysis_rnaseq_light]\ncluster_walltime = 00:30:0\ncluster_cpu = 2\ncluster_mem = 24G\n\n[sleuth_differential_expression]\ntx2gene = $MUGQIC_INSTALL_HOME/genomes/species/%(scientific_name)s.%(assembly)s/annotations/%(scientific_name)s.%(assembly)s.%(source)s%(version)s.tx2gene.csv\ncluster_mem = 16G\n\n[report]\ntitle = RNA-Seq Light Analysis Report\ncluster_walltime = 30:00\n\n[run_checkmate]\nbed = %(assembly_dir)s/annotations/%(scientific_name)s.%(assembly)s.SNP_GRCh38_hg19_woChr.bed\n\n[compute_effects]\nsnpeff_genome = %(assembly_synonyms)s\n\n[conpair_concordance_contamination]\nmarkers_bed = ${CONPAIR_DATA}/markers/%(assembly)s.autosomes.phase3_shapeit2_mvncall_integrated.20130502.SNV.genotype.sselect_v4_MAF_0.4_LD_0.8.liftover.bed\nmarkers_txt = ${CONPAIR_DATA}/markers/%(assembly)s.autosomes.phase3_shapeit2_mvncall_integrated.20130502.SNV.genotype.sselect_v4_MAF_0.4_LD_0.8.liftover.txt\n\n[gatk_mutect2]\npon = \n\n[amber]\nloci = %(assembly_dir)s/annotations/%(scientific_name)s.%(assembly)s.GermlineHetPon.vcf.gz\n\n[cobalt]\ngc_profile = %(assembly_dir)s/annotations/%(scientific_name)s.%(assembly)s.GC_profile.1000bp.cnp\n\n[purple]\ngc_profile = %(assembly_dir)s/annotations/%(scientific_name)s.%(assembly)s.GC_profile.1000bp.cnp\n\n[report_cpsr]\nassembly = grch38\n\n[report_pcgr]\nassembly = grch38\n\n[delly_call_filter]\nexclude_list = ${DELLY_PATH}/excludeTemplates/human.hg38.excl.tsv\n\n[cnvkit_batch]\naccess = %(assembly_dir)s/annotations/%(scientific_name)s.%(assembly)s.access-5k-mappable.bed\nrefFlat = %(assembly_dir)s/annotations/%(scientific_name)s.%(assembly)s.UCSC.ref_flat.tsv\n\n[wham_call_sv]\nexclude = chr1_KI270706v1_random,chr1_KI270707v1_random,chr1_KI270708v1_random,chr1_KI270709v1_random,chr1_KI270710v1_random,chr1_KI270711v1_random,chr1_KI270712v1_random,chr1_KI270713v1_random,chr1_KI270714v1_random,chr2_KI270715v1_random,chr2_KI270716v1_random,chr3_GL000221v1_random,chr4_GL000008v2_random,chr5_GL000208v1_random,chr9_KI270717v1_random,chr9_KI270718v1_random,chr9_KI270719v1_random,chr9_KI270720v1_random,chr11_KI270721v1_random,chr14_GL000009v2_random,chr14_GL000225v1_random,chr14_KI270722v1_random,chr14_GL000194v1_random,chr14_KI270723v1_random,chr14_KI270724v1_random,chr14_KI270725v1_random,chr14_KI270726v1_random,chr15_KI270727v1_random,chr16_KI270728v1_random,chr17_GL000205v2_random,chr17_KI270729v1_random,chr17_KI270730v1_random,chr22_KI270731v1_random,chr22_KI270732v1_random,chr22_KI270733v1_random,chr22_KI270734v1_random,chr22_KI270735v1_random,chr22_KI270736v1_random,chr22_KI270737v1_random,chr22_KI270738v1_random,chr22_KI270739v1_random,chrY_KI270740v1_random,chrUn_KI270302v1,chrUn_KI270304v1,chrUn_KI270303v1,chrUn_KI270305v1,chrUn_KI270322v1,chrUn_KI270320v1,chrUn_KI270310v1,chrUn_KI270316v1,chrUn_KI270315v1,chrUn_KI270312v1,chrUn_KI270311v1,chrUn_KI270317v1,chrUn_KI270412v1,chrUn_KI270411v1,chrUn_KI270414v1,chrUn_KI270419v1,chrUn_KI270418v1,chrUn_KI270420v1,chrUn_KI270424v1,chrUn_KI270417v1,chrUn_KI270422v1,chrUn_KI270423v1,chrUn_KI270425v1,chrUn_KI270429v1,chrUn_KI270442v1,chrUn_KI270466v1,chrUn_KI270465v1,chrUn_KI270467v1,chrUn_KI270435v1,chrUn_KI270438v1,chrUn_KI270468v1,chrUn_KI270510v1,chrUn_KI270509v1,chrUn_KI270518v1,chrUn_KI270508v1,chrUn_KI270516v1,chrUn_KI270512v1,chrUn_KI270519v1,chrUn_KI270522v1,chrUn_KI270511v1,chrUn_KI270515v1,chrUn_KI270507v1,chrUn_KI270517v1,chrUn_KI270529v1,chrUn_KI270528v1,chrUn_KI270530v1,chrUn_KI270539v1,chrUn_KI270538v1,chrUn_KI270544v1,chrUn_KI270548v1,chrUn_KI270583v1,chrUn_KI270587v1,chrUn_KI270580v1,chrUn_KI270581v1,chrUn_KI270579v1,chrUn_KI270589v1,chrUn_KI270590v1,chrUn_KI270584v1,chrUn_KI270582v1,chrUn_KI270588v1,chrUn_KI270593v1,chrUn_KI270591v1,chrUn_KI270330v1,chrUn_KI270329v1,chrUn_KI270334v1,chrUn_KI270333v1,chrUn_KI270335v1,chrUn_KI270338v1,chrUn_KI270340v1,chrUn_KI270336v1,chrUn_KI270337v1,chrUn_KI270363v1,chrUn_KI270364v1,chrUn_KI270362v1,chrUn_KI270366v1,chrUn_KI270378v1,chrUn_KI270379v1,chrUn_KI270389v1,chrUn_KI270390v1,chrUn_KI270387v1,chrUn_KI270395v1,chrUn_KI270396v1,chrUn_KI270388v1,chrUn_KI270394v1,chrUn_KI270386v1,chrUn_KI270391v1,chrUn_KI270383v1,chrUn_KI270393v1,chrUn_KI270384v1,chrUn_KI270392v1,chrUn_KI270381v1,chrUn_KI270385v1,chrUn_KI270382v1,chrUn_KI270376v1,chrUn_KI270374v1,chrUn_KI270372v1,chrUn_KI270373v1,chrUn_KI270375v1,chrUn_KI270371v1,chrUn_KI270448v1,chrUn_KI270521v1,chrUn_GL000195v1,chrUn_GL000219v1,chrUn_GL000220v1,chrUn_GL000224v1,chrUn_KI270741v1,chrUn_GL000226v1,chrUn_GL000213v1,chrUn_KI270743v1,chrUn_KI270744v1,chrUn_KI270745v1,chrUn_KI270746v1,chrUn_KI270747v1,chrUn_KI270748v1,chrUn_KI270749v1,chrUn_KI270750v1,chrUn_KI270751v1,chrUn_KI270752v1,chrUn_KI270753v1,chrUn_KI270754v1,chrUn_KI270755v1,chrUn_KI270756v1,chrUn_KI270757v1,chrUn_GL000214v1,chrUn_KI270742v1,chrUn_GL000216v2,chrUn_GL000218v1,chrEBV\n\n[run_arriba]\nblacklist = $ARRIBA_HOME/database/blacklist_hg38_GRCh38_2018-01-13.tsv",
     "operation_platform": "beluga",
     "operation_cmd_line": "module purge\nmodule load python/3.10.2 mugqic/genpipes/4.2.0\nrnaseq_light.py \n    -j slurm \n    -r readset.txt \n    -s 1-5 \n    -c $MUGQIC_PIPELINES_HOME/pipelines/rnaseq_light/rnaseq_light.base.ini \n        $MUGQIC_PIPELINES_HOME/pipelines/common_ini/beluga.ini \n        $MUGQIC_PIPELINES_HOME/resources/genomes/config/Homo_sapiens.GRCh38.ini \n        RNA_light.custom.ini \n  > RNASeq_light_run.sh\nrm -r RNA_CHUNKS;\nmkdir RNA_CHUNKS;\n$MUGQIC_PIPELINES_HOME/utils/chunk_genpipes.sh -n 100 RNASeq_light_run.sh RNA_CHUNKS",
-    "operation_name": "genpipes_rnaseq_light",
+    "operation_name": "GenPipes_RnaSeq.cancer",
     "sample": [
         {
             "sample_name": "MoHQ-CM-1-3-6929-1RT",
diff --git a/tests/data/genpipes_tumourpair.json b/tests/data/genpipes_tumourpair.json
index dc201de..526b011 100644
--- a/tests/data/genpipes_tumourpair.json
+++ b/tests/data/genpipes_tumourpair.json
@@ -5,7 +5,7 @@
     "operation_config_data": "\\\n[DEFAULT]\\\ncluster_server = beluga.genome.mcgill.ca\\\nsequencing_center = McGill Genome Centre\\\ncluster_submit_cmd_suffix = | grep \"[0-9]\" | cut -d\\  -f4\\\ncluster_other_arg = --mail-type=FAIL --mail-user=$JOB_MAIL -A $RAP_ID\\\ncluster_work_dir_arg = -D\\\ncluster_output_dir_arg = -o\\\ncluster_job_name_arg = -J\\\ncluster_cmd_produces_job_id = true\\\ncluster_dependency_arg = --depend=afterok:\\\ncluster_dependency_sep = :\\\ncluster_max_jobs = 3000\\\ntmp_dir = ${SLURM_TMPDIR}\\\nportal_output_dir = $PORTAL_OUTPUT_DIR\\\ncluster_walltime = 24:00:00\\\ncluster_cpu = 1\\\ncluster_node = 1\\\nALL_CPU = 40\\\nHALF_CPU = 20\\\nQUART_CPU = 10\\\nPINT_CPU = 5\\\nALL_MEM = 187G\\\nHALF_MEM = 90G\\\nQUART_MEM = 60G\\\nPINT_MEM = 30G\\\ncluster_mem = 4700M per cpu\\\ncluster_queue = \\\nLARGE_QUEUE = \\\nmodule_bvatools = mugqic/bvatools/1.6\\\nmodule_bwa = mugqic/bwa/0.7.17\\\nmodule_bwakit = mugqic/bwakit/0.7.15\\\nmodule_gatk = mugqic/GenomeAnalysisTK/4.1.8.1\\\nmodule_sambamba = mugqic/sambamba/0.8.0\\\nmodule_igvtools = mugqic/igvtools/2.3.14\\\nmodule_java = mugqic/java/openjdk-jdk1.8.0_72\\\nmodule_mugqic_R_packages = mugqic/mugqic_R_packages/1.0.4\\\nmodule_mugqic_tools = mugqic/mugqic_tools/2.10.5\\\nmodule_pandoc = mugqic/pandoc/2.16.1\\\nmodule_perl = mugqic/perl/5.34.0\\\nmodule_picard = mugqic/picard/2.9.0\\\nmodule_python = mugqic/python/3.9.1\\\nmodule_R = mugqic/R_Bioconductor/3.5.0_3.7\\\nmodule_samtools = mugqic/samtools/1.12\\\nmodule_bcftools = mugqic/bcftools/1.15\\\nmodule_snpeff = mugqic/snpEff/4.3\\\nmodule_trimmomatic = mugqic/trimmomatic/0.35\\\nmodule_vcftools = mugqic/vcftools/0.1.14\\\nmodule_skewer = mugqic/skewer/0.2.2\\\nmodule_qualimap = mugqic/qualimap/2.2.2dev\\\nmodule_fastqc = mugqic/fastqc/0.11.5\\\nmodule_htslib = mugqic/htslib/1.14\\\nmodule_verify_bam_id = mugqic/verifyBamID/1.1.3\\\nmodule_vt = mugqic/vt/0.57\\\nmodule_gemini = mugqic/gemini/0.20.1\\\nmodule_multiqc = mugqic/MultiQC/1.9\\\nmodule_checkmate = mugqic/NGSCheckMate/1.0.0_rjme\\\nmodule_variantBam = mugqic/variantbam/1.4.3\\\nmodule_cnvkit = mugqic/CNVkit/0.9.9\\\nmodule_delly = mugqic/Delly/0.8.1\\\nmodule_sv_annotations = mugqic/simple_sv_annotation/1.0.0\\\nmodule_manta = mugqic/Manta/1.5.0\\\nmodule_samblaster = mugqic/samblaster/0.1.24\\\nmodule_lumpy = mugqic/LUMPY-SV/0.2.13\\\nmodule_wham = mugqic/WHAM/1.8.0\\\nmodule_breakseq2 = mugqic/breakseq2/2.2\\\nmodule_vcflib = mugqic/vcflib/1.0.0\\\nmodule_spades = mugqic/SPAdes/3.10.0\\\nmodule_age = mugqic/AGE/master-20181210\\\nmodule_bedtools = mugqic/bedtools/2.26.0\\\nmodule_vawk = mugqic/vawk/0.0.2\\\nmodule_svaba = mugqic/SvABA/1.1.0\\\nscientific_name = Homo_sapiens\\\nassembly = GRCh38\\\nsource = Ensembl\\\nversion = 102\\\nassembly_alias = b37\\\ndbsnp_version = 142\\\nassembly_dir = $MUGQIC_INSTALL_HOME/genomes/species/%(scientific_name)s.%(assembly)s\\\ngenome_fasta = %(assembly_dir)s/genome/%(scientific_name)s.%(assembly)s.fa\\\ngenome_dictionary = %(assembly_dir)s/genome/%(scientific_name)s.%(assembly)s.dict\\\ngenome_bwa_index = %(assembly_dir)s/genome/bwa_index/%(scientific_name)s.%(assembly)s.fa\\\nknown_variants = %(assembly_dir)s/annotations/%(scientific_name)s.%(assembly)s.dbSNP%(dbsnp_version)s.vcf.gz\\\nigv_genome = %(genome_fasta)s.fai\\\nsnpeff_genome = %(assembly)s.%(version)s\\\nhapmap = %(assembly_dir)s/annotations/%(scientific_name)s.%(assembly)s.hapmap_3.3.vcf.gz\\\n1000G = %(assembly_dir)s/annotations/%(scientific_name)s.%(assembly)s.1000G_phase1.snps.high_confidence.vcf.gz\\\ndbsnp = %(known_variants)s\\\nomni = %(assembly_dir)s/annotations/%(scientific_name)s.%(assembly)s.1000G_omni2.5.vcf.gz\\\nmills = %(assembly_dir)s/annotations/%(scientific_name)s.%(assembly)s.Mills_and_1000G_gold_standard.indels.vcf.gz\\\ngenome_mappability_bed_indexed = %(assembly_dir)s/annotations/mappabilityGC/GRCh38_100bpPAIREDEND.exclusion.bed\\\ndbnsfp = %(assembly_dir)s/annotations/dbNSFPv3.5a/dbNSFPv3.5a.txt.gz.txt.gz\\\ncommon_snp_positions = %(assembly_dir)s/annotations/%(scientific_name)s.%(assembly)s.1000G_phase1.snps.high_confidence.allpop_q200.tsv\\\ngnomad_exome = %(assembly_dir)s/annotations/%(scientific_name)s.%(assembly)s.gnomad.exomes.r2.0.2.sites.no-VEP.nohist.tidy.vcf.gz\\\naf_gnomad = %(assembly_dir)s/annotations/%(scientific_name)s.%(assembly)s.af-only-gnomad.vcf.gz\\\nvcf_header = %(assembly_dir)s/annotations/%(scientific_name)s.%(assembly)s.vcf_header.tsv\\\njava_other_options = -XX:+UseParallelGC -XX:ParallelGCThreads=1 -Dsamjdk.buffer_size=4194304\\\ngatk4_java_options = -XX:+UseParallelGC -XX:ParallelGCThreads=1 -Dsamjdk.buffer_size=4194304 -Dsamjdk.use_async_io_read_samtools=true -Dsamjdk.use_async_io_write_samtools=true -Dsamjdk.use_async_io_write_tribble=false\\\nexperiment_type = wholeGenome\\\nexperiment_type_abrev = wgs\\\npopulation_AF = 1000Gp1_EUR_AF\\\nverifyBamID_variants_file = %(assembly_dir)s/annotations/%(scientific_name)s.%(assembly)s.dbSNP%(dbsnp_version)s_1000Gp1_%(population_AF)s_AF.vcf\\\nspecies_vcf_format_descriptor = %(assembly_dir)s/annotations/HumanVCFformatDescriptor.tsv\\\nexcluded_chromosome = chrM,chr1_KI270706v1_random,chr1_KI270707v1_random,chr1_KI270708v1_random,chr1_KI270709v1_random,chr1_KI270710v1_random,chr1_KI270711v1_random,chr1_KI270712v1_random,chr1_KI270713v1_random,chr1_KI270714v1_random,chr2_KI270715v1_random,chr2_KI270716v1_random,chr3_GL000221v1_random,chr4_GL000008v2_random,chr5_GL000208v1_random,chr9_KI270717v1_random,chr9_KI270718v1_random,chr9_KI270719v1_random,chr9_KI270720v1_random,chr11_KI270721v1_random,chr14_GL000009v2_random,chr14_GL000225v1_random,chr14_KI270722v1_random,chr14_GL000194v1_random,chr14_KI270723v1_random,chr14_KI270724v1_random,chr14_KI270725v1_random,chr14_KI270726v1_random,chr15_KI270727v1_random,chr16_KI270728v1_random,chr17_GL000205v2_random,chr17_KI270729v1_random,chr17_KI270730v1_random,chr22_KI270731v1_random,chr22_KI270732v1_random,chr22_KI270733v1_random,chr22_KI270734v1_random,chr22_KI270735v1_random,chr22_KI270736v1_random,chr22_KI270737v1_random,chr22_KI270738v1_random,chr22_KI270739v1_random,chrY_KI270740v1_random,chrUn_KI270302v1,chrUn_KI270304v1,chrUn_KI270303v1,chrUn_KI270305v1,chrUn_KI270322v1,chrUn_KI270320v1,chrUn_KI270310v1,chrUn_KI270316v1,chrUn_KI270315v1,chrUn_KI270312v1,chrUn_KI270311v1,chrUn_KI270317v1,chrUn_KI270412v1,chrUn_KI270411v1,chrUn_KI270414v1,chrUn_KI270419v1,chrUn_KI270418v1,chrUn_KI270420v1,chrUn_KI270424v1,chrUn_KI270417v1,chrUn_KI270422v1,chrUn_KI270423v1,chrUn_KI270425v1,chrUn_KI270429v1,chrUn_KI270442v1,chrUn_KI270466v1,chrUn_KI270465v1,chrUn_KI270467v1,chrUn_KI270435v1,chrUn_KI270438v1,chrUn_KI270468v1,chrUn_KI270510v1,chrUn_KI270509v1,chrUn_KI270518v1,chrUn_KI270508v1,chrUn_KI270516v1,chrUn_KI270512v1,chrUn_KI270519v1,chrUn_KI270522v1,chrUn_KI270511v1,chrUn_KI270515v1,chrUn_KI270507v1,chrUn_KI270517v1,chrUn_KI270529v1,chrUn_KI270528v1,chrUn_KI270530v1,chrUn_KI270539v1,chrUn_KI270538v1,chrUn_KI270544v1,chrUn_KI270548v1,chrUn_KI270583v1,chrUn_KI270587v1,chrUn_KI270580v1,chrUn_KI270581v1,chrUn_KI270579v1,chrUn_KI270589v1,chrUn_KI270590v1,chrUn_KI270584v1,chrUn_KI270582v1,chrUn_KI270588v1,chrUn_KI270593v1,chrUn_KI270591v1,chrUn_KI270330v1,chrUn_KI270329v1,chrUn_KI270334v1,chrUn_KI270333v1,chrUn_KI270335v1,chrUn_KI270338v1,chrUn_KI270340v1,chrUn_KI270336v1,chrUn_KI270337v1,chrUn_KI270363v1,chrUn_KI270364v1,chrUn_KI270362v1,chrUn_KI270366v1,chrUn_KI270378v1,chrUn_KI270379v1,chrUn_KI270389v1,chrUn_KI270390v1,chrUn_KI270387v1,chrUn_KI270395v1,chrUn_KI270396v1,chrUn_KI270388v1,chrUn_KI270394v1,chrUn_KI270386v1,chrUn_KI270391v1,chrUn_KI270383v1,chrUn_KI270393v1,chrUn_KI270384v1,chrUn_KI270392v1,chrUn_KI270381v1,chrUn_KI270385v1,chrUn_KI270382v1,chrUn_KI270376v1,chrUn_KI270374v1,chrUn_KI270372v1,chrUn_KI270373v1,chrUn_KI270375v1,chrUn_KI270371v1,chrUn_KI270448v1,chrUn_KI270521v1,chrUn_GL000195v1,chrUn_GL000219v1,chrUn_GL000220v1,chrUn_GL000224v1,chrUn_KI270741v1,chrUn_GL000226v1,chrUn_GL000213v1,chrUn_KI270743v1,chrUn_KI270744v1,chrUn_KI270745v1,chrUn_KI270746v1,chrUn_KI270747v1,chrUn_KI270748v1,chrUn_KI270749v1,chrUn_KI270750v1,chrUn_KI270751v1,chrUn_KI270752v1,chrUn_KI270753v1,chrUn_KI270754v1,chrUn_KI270755v1,chrUn_KI270756v1,chrUn_KI270757v1,chrUn_GL000214v1,chrUn_KI270742v1,chrUn_GL000216v2,chrUn_GL000218v1,chrEBV\\\nALL_MEM_EPS = 180G\\\nGPU_QUEUE = \\\nmodule_vardict_java = mugqic/VarDictJava/1.4.8\\\nmodule_strelka2 = mugqic/Strelka2/2.9.10\\\nmodule_bcbio_variation_recall = mugqic/bcbio.variation.recall/0.2.6\\\nmodule_varscan = mugqic/VarScan/2.4.3\\\nmodule_conpair = mugqic/Conpair/0.2\\\nmodule_scones = mugqic/SCoNEs/2.1.2\\\nmodule_sequenza_utils = mugqic/Sequenza-utils/3.0.0\\\nmodule_gcc = mugqic/gcc/4.9.3\\\nmodule_amber = mugqic/amber/3.5\\\nmodule_cobalt = mugqic/cobalt/1.11\\\nmodule_purple = mugqic/purple/2.53\\\nmodule_circos = mugqic/circos/0.69-6\\\nmodule_cpsr = mugqic/cpsr/0.6.2\\\nmodule_pcgr = mugqic/pcgr/0.9.2\\\nstrelka2_bed_file = %(assembly_dir)s/annotations/%(scientific_name)s.%(assembly)s.strelka2.bed.gz\\\ncommon_name = Human\\\nassembly_synonyms = hg38\\\nCTAT_bundle_version = GRCh38_gencode_v32_CTAT_lib_Dec062019\\\n\\\n[picard_sam_to_fastq]\\\ncluster_cpu = -c 12 -N 1\\\ncluster_mem = --mem=32G\\\nram = 18G\\\ncluster_walltime = --time=72:00:00\\\nother_options = --MAX_RECORDS_IN_RAM=5000000\\\n\\\n[samtools_cram_output]\\\noptions = -h -T %(genome_fasta)s -C\\\nvariantBam_options = --cram -strip-tags BI,OQ\\\ncluster_cpu = %(PINT_CPU)s\\\ncluster_walltime = 48:00:0\\\n\\\n[sym_link_fastq]\\\ncluster_walltime = 3:00:00\\\n\\\n[sym_link_bam]\\\ncluster_walltime = 3:00:00\\\n\\\n[trimmomatic]\\\ncluster_mem = 20G\\\nram = 19G\\\ncluster_cpu = 5\\\nthreads = %(cluster_cpu)s\\\ntrailing_min_quality = 30\\\nmin_length = 50\\\nillumina_clip_settings = :2:30:15\\\njava_other_options = -XX:ParallelGCThreads=5 -Dsamjdk.buffer_size=1048576\\\n\\\n[skewer_trimming]\\\ncluster_cpu = %(QUART_CPU)s\\\nthreads = %(cluster_cpu)s\\\noptions = --min 25 -q 25 --compress -f sanger\\\nadapter_fasta = \\\ncluster_walltime = 48:00:00\\\n\\\n[bwa_mem]\\\ncluster_cpu = %(HALF_CPU)s\\\nbwa_other_options = -t %(cluster_cpu)s -K 100000000 -Y\\\nsequencing_center = McGill University and Genome Quebec Innovation Centre\\\nsequencing_technology = Illumina\\\n\\\n[picard_sort_sam]\\\nram = 16G\\\nmax_records_in_ram = 3750000\\\n\\\n[sambamba_sort_sam]\\\noptions = -m 10G\\\n\\\n[bwa_mem_sambamba_sort_sam]\\\ncluster_cpu = %(HALF_CPU)s\\\ncluster_walltime = 48:00:00\\\ncluster_mem = 60G\\\n\\\n[bwakit_picard_sort_sam]\\\ncluster_cpu = 4\\\ncluster_walltime = 48:00:00\\\ncluster_mem = 60G\\\n\\\n[sambamba_merge_sam_extract_unmapped]\\\ncluster_cpu = 8\\\noptions = -t %(cluster_cpu)s\\\ncluster_walltime = 35:00:00\\\ncluster_mem = 32G\\\n\\\n[sambamba_extract_unmapped]\\\ncluster_cpu = %(PINT_CPU)s\\\noptions = -t %(PINT_CPU)s -f bam -F \"unmapped\"\\\ncluster_walltime = 3:00:00\\\n\\\n[gatk_indel_realigner]\\\njava_other_options = -XX:+UseParallelGC -XX:ParallelGCThreads=1 -Dsamjdk.buffer_size=4194304\\\nnb_jobs = 23\\\nram = 12G\\\nmax_reads_in_memory = 500000\\\ncluster_cpu = 1\\\nother_options = \\\ncluster_walltime = 35:00:0\\\ncluster_mem = 12G\\\nmodule_gatk = mugqic/GenomeAnalysisTK/3.8\\\n\\\n[gatk_realigner_target_creator]\\\njava_other_options = -XX:+UseParallelGC -XX:ParallelGCThreads=1 -Dsamjdk.buffer_size=4194304\\\nram = 12G\\\nknown_mills = %(mills)s\\\ncluster_cpu = 1\\\nother_options = -nt 1\\\nmodule_gatk = mugqic/GenomeAnalysisTK/3.8\\\nknown_indel_sites = %(mills)s\\\n\\\n[sambamba_merge_realigned]\\\ncluster_cpu = %(QUART_CPU)s\\\noptions = -t %(cluster_cpu)s\\\ncluster_walltime = 35:00:00\\\ncluster_mem = 48G\\\n\\\n[sambamba_merge_unmapped]\\\ncluster_cpu = %(QUART_CPU)s\\\noptions = -t %(cluster_cpu)s\\\ncluster_walltime = 12:00:00\\\n\\\n[merge_realigned]\\\ncluster_walltime = 35:00:00\\\ncluster_cpu = 3\\\ncluster_mem = --mem=32G\\\n\\\n[bvatools_groupfixmate]\\\njava_other_options = -XX:+UseParallelGC -XX:ParallelGCThreads=1 -Dsamjdk.buffer_size=4194304\\\nram = 16G\\\n\\\n[samtools_sort]\\\ncluster_cpu = 5\\\nother_options = -@ %(cluster_cpu)s -m 20000M\\\ncluster_walltime = 71:00:00\\\n\\\n[fix_mate_by_coordinate]\\\ncluster_cpu = 4\\\ncluster_walltime = 71:00:00\\\ncluster_mem = 52G\\\n\\\n[samtools_fixmate]\\\nmodule_samtools = mugqic/samtools/1.9\\\n\\\n[fix_mate_by_coordinate_samtools]\\\ncluster_cpu = %(QUART_CPU)s\\\noptions = -m -O bam -@ %(cluster_cpu)s\\\ncluster_walltime = 35:00:00\\\ncluster_mem = 48G\\\n\\\n[picard_mark_duplicates]\\\ncluster_cpu = %(PINT_CPU)s\\\njava_other_options = -XX:+UseParallelGC -XX:ParallelGCThreads=2 -Dsamjdk.buffer_size=4194304\\\nram = 14G\\\nmax_records_in_ram = 1000000\\\ncluster_walltime = 71:00:00\\\ncluster_mem = 15G\\\n\\\n[gatk_base_recalibrator]\\\ncluster_cpu = %(HALF_CPU)s\\\nthreads = %(cluster_cpu)s\\\ncluster_mem = 55G\\\nram = %(cluster_mem)s\\\noptions = --bqsr-baq-gap-open-penalty 30\\\nknown_dbsnp = %(dbsnp)s\\\nknown_gnomad = %(gnomad_exome)s\\\nknown_mills = %(mills)s\\\ncluster_walltime = 35:00:0\\\n\\\n[gatk_print_reads]\\\njava_other_options = -XX:+UseParallelGC -XX:ParallelGCThreads=4 -Dsamjdk.buffer_size=4194304\\\ncluster_cpu = -l nodes=1:ppn=40\\\nthreads = %(cluster_cpu)s\\\ncluster_mem = 187G\\\nram = 180G\\\ncluster_walltime = 96:00:0\\\n\\\n[gatk_apply_bqsr]\\\njava_other_options = -XX:+UseParallelGC -XX:ParallelGCThreads=2 -Dsamjdk.buffer_size=4194304 -Dsamjdk.use_async_io_read_samtools=true -Dsamjdk.use_async_io_write_samtools=true -Dsamjdk.use_async_io_write_tribble=false\\\ncluster_cpu = 40\\\nthreads = %(cluster_cpu)s\\\noptions = \\\ncluster_mem = 186G\\\nram = 180G\\\ncluster_walltime = 96:00:0\\\n\\\n[recalibration]\\\ncluster_walltime = 96:00:0\\\ncluster_mem = 51G\\\nram = 50G\\\n\\\n[sambamba_index]\\\ncluster_cpu = %(QUART_CPU)s\\\noptions = -t %(cluster_cpu)s\\\n\\\n[picard_collect_multiple_metrics]\\\nmodule_gatk = mugqic/GenomeAnalysisTK/4.2.2.0\\\ncluster_cpu = 1\\\ncluster_mem = 12G\\\nram = 11G\\\nmax_records_in_ram = 1000000\\\noptions = --FILE_EXTENSION \".txt\"\\\ncluster_walltime = 48:00:00\\\n\\\n[picard_calculate_hs_metrics]\\\nmodule_gatk = mugqic/GenomeAnalysisTK/4.2.2.0\\\ncluster_mem = 9G\\\nram = 8G\\\ncluster_walltime = 48:00:00\\\n\\\n[metrics]\\\nmodule_gatk = mugqic/GenomeAnalysisTK/4.2.2.0\\\ncluster_walltime = walltime=24:00:0\\\ncluster_mem = 12G\\\n\\\n[picard_collect_oxog_metrics]\\\nmodule_gatk = mugqic/GenomeAnalysisTK/4.2.2.0\\\ncluster_mem = 13G\\\nram = 12G\\\nmax_records_in_ram = 4000000\\\ncluster_walltime = 35:00:0\\\n\\\n[picard_collect_gcbias_metrics]\\\nmodule_gatk = mugqic/GenomeAnalysisTK/4.2.2.0\\\ncluster_mem = 13G\\\nram = 12G\\\nmax_records_in_ram = 4000000\\\ncluster_walltime = 35:00:0\\\n\\\n[dna_sample_qualimap]\\\ncluster_mem = 120G\\\nram = 120G\\\nuse_bed = false\\\ncluster_cpu = -l nodes=1:ppn=24\\\nqualimap_options = --skip-duplicated -nt 1 -gd HUMAN\\\ncluster_walltime = 35:00:0\\\n\\\n[dna_sambamba_flagstat]\\\ncluster_cpu = 6\\\nflagstat_options = -t %(cluster_cpu)s\\\ncluster_walltime = -l walltime=12:00:0\\\n\\\n[fastqc]\\\ncluster_cpu = 4\\\nthreads = %(cluster_cpu)s\\\ncluster_walltime = -l walltime=35:00:0\\\n\\\n[multiqc]\\\nmodule_python = mugqic/python/3.7.3\\\ncluster_walltime = -l walltime=2:00:0\\\noptions = \\\n\\\n[gatk_depth_of_coverage]\\\nmodule_gatk = mugqic/GenomeAnalysisTK/3.8\\\njava_other_options = -XX:ParallelGCThreads=2\\\nram = 8000M\\\ncluster_mem = 8G\\\nsummary_coverage_thresholds = 10,25,50,75,100,500\\\ncluster_cpu = 1\\\ncluster_walltime = 48:00:00\\\n\\\n[bvatools_depth_of_coverage]\\\ncoverage_targets = auto\\\nother_options = --gc --maxDepth 1001 --summaryCoverageThresholds 1,5,10,25,50,75,100,500,1000 --minMappingQuality 15 --minBaseQuality 15 --ommitN\\\ncluster_mem = 35G\\\nram = 35G\\\ncluster_cpu = %(QUART_CPU)s\\\nthreads = %(cluster_cpu)s\\\ncluster_walltime = 96:00:00\\\n\\\n[igvtools_compute_tdf]\\\nram = 6G\\\ncluster_walltime = 96:00:00\\\ncluster_cpu = 1\\\noption = -f min,max,mean -w 25\\\ncluster_mem = 7G\\\n\\\n[gatk_callable_loci]\\\nmodule_gatk = mugqic/GenomeAnalysisTK/3.8\\\njava_other_options = -XX:ParallelGCThreads=2\\\ncluster_mem = 11G\\\nram = 10G\\\nother_options = -dt none --minDepth 10 --maxDepth 500 --minDepthForLowMAPQ 10 --minMappingQuality 10 --minBaseQuality 15\\\ncluster_walltime = 35:00:00\\\n\\\n[bvatools_basefreq]\\\ncluster_mem = 8G\\\nthreads = 1\\\nram = 8G\\\n\\\n[extract_common_snp_freq]\\\ncluster_cpu = 2\\\ncluster_walltime = 35:00:00\\\ncluster_mem = 20G\\\n\\\n[bvatools_ratiobaf]\\\ncluster_cpu = 5\\\ncluster_mem = 71G\\\nram = 70G\\\nother_options = --plot --maxDepth 1000  --exclude %(excluded_chromosome)s\\\n\\\n[baf_plot]\\\ncluster_cpu = %(QUART_CPU)s\\\ncluster_walltime = 24:00:00\\\ncluster_mem = 48G\\\n\\\n[vcftools_missing_indv]\\\noptions = \\\ncluster_walltime = 24:00:00\\\n\\\n[vcftools_depth]\\\noptions = \\\ncluster_walltime = 24:00:00\\\n\\\n[gatk_crosscheck_fingerprint]\\\nmodule_gatk = mugqic/GenomeAnalysisTK/4.2.2.0\\\njava_other_options = -XX:+UseParallelGC -XX:ParallelGCThreads=1 -Dsamjdk.buffer_size=4194304 -Dsamjdk.use_async_io_read_samtools=true -Dsamjdk.use_async_io_write_samtools=true -Dsamjdk.use_async_io_write_tribble=false\\\ncluster_cpu = 4\\\noptions = --NUM_THREADS %(cluster_cpu)s --EXIT_CODE_WHEN_MISMATCH 0\\\ncluster_mem = 16G\\\nram = 15G\\\nhaplotype_database = %(assembly_dir)s/annotations/%(scientific_name)s.%(assembly)s.fingerprint.map\\\nlod_threshold = 3.0\\\ncluster_walltime = 24:00:00\\\n\\\n[gatk_cluster_crosscheck_metrics]\\\nmodule_gatk = mugqic/GenomeAnalysisTK/4.2.2.0\\\njava_other_options = -XX:+UseParallelGC -XX:ParallelGCThreads=1 -Dsamjdk.buffer_size=4194304 -Dsamjdk.use_async_io_read_samtools=true -Dsamjdk.use_async_io_write_samtools=true -Dsamjdk.use_async_io_write_tribble=false\\\noptions = \\\ncluster_mem = 16G\\\nram = 15G\\\nlod_threshold = 5.0\\\ncluster_walltime = 24:00:00\\\n\\\n[verify_bam_id]\\\noptions = --verbose --ignoreRG --noPhoneHome\\\nvcf = %(assembly_dir)s/annotations/%(scientific_name)s.%(assembly)s.1000G_phase1.snps.high_confidence.EUR.vcf\\\ncluster_walltime = 24:00:00\\\ncluster_cpu = 2\\\ncluster_mem = 20G\\\n\\\n[run_checkmate]\\\nmodule_python = mugqic/python/2.7.14\\\noptions = -V\\\nbed = %(assembly_dir)s/annotations/%(scientific_name)s.%(assembly)s.SNP_GRCh38_hg19_woChr.bed\\\ncluster_walltime = 24:00:00\\\ncluster_mem = 12G\\\n\\\n[run_peddy]\\\noptions = \\\ncluster_cpu = %(QUART_CPU)s\\\nthreads = %(cluster_cpu)s\\\ncluster_walltime = 24:00:00\\\ncluster_mem = 24G\\\n\\\n[gatk_haplotype_caller]\\\ncluster_cpu = %(PINT_CPU)s\\\noptions = --useNewAFCalculator --emitRefConfidence GVCF -dt none -nct %(cluster_cpu)s -G StandardAnnotation -G StandardHCAnnotation\\\ncluster_mem = 36G\\\nram = 35G\\\nnb_jobs = 23\\\ncluster_walltime = 35:00:00\\\ninterval_padding = \\\n\\\n[gatk_cat_variants]\\\noptions = \\\ncluster_mem = 8G\\\nram = 8G\\\ncluster_walltime = 24:00:00\\\n\\\n[picard_merge_vcfs]\\\nmax_records_in_ram = 2000000\\\noptions = \\\nram = 12G\\\n\\\n[gatk_merge_and_call_individual_gvcfs]\\\noptions = -nt 1\\\n\\\n[gatk_merge_and_call_combined_gvcfs]\\\noptions = -nt 1 -G StandardAnnotation -G StandardHCAnnotation -A FisherStrand -A QualByDepth -A ChromosomeCounts\\\n\\\n[gatk_genotype_gvcf]\\\noptions = --useNewAFCalculator -G StandardAnnotation -G StandardHCAnnotation\\\ncluster_mem = 30G\\\nram = 30G\\\ncluster_walltime = 35:00:00\\\ncluster_cpu = 2\\\n\\\n[gatk_combine_gvcf]\\\njava_other_options = -XX:+UseParallelGC -XX:ParallelGCThreads=4 -Dsamjdk.buffer_size=4194304\\\ncluster_mem = 25G\\\nram = 24G\\\nnb_haplotype = 4\\\nnb_batch = 1\\\ncluster_cpu = %(QUART_CPU)s\\\nother_options = \\\ncluster_walltime = 24:00:00\\\n\\\n[merge_and_call_combined_gvcf]\\\ncluster_mem = 25G\\\nram = 24G\\\njava_other_options = -XX:+UseParallelGC -XX:ParallelGCThreads=2 -Dsamjdk.buffer_size=4194304\\\ncluster_walltime = 96:00:00\\\ncluster_cpu = 2\\\n\\\n[merge_and_call_individual_gvcf]\\\njava_other_options = -XX:+UseParallelGC -XX:ParallelGCThreads=2 -Dsamjdk.buffer_size=4194304\\\ncluster_mem = 32G\\\nram = 32G\\\ncluster_walltime = 96:00:00\\\ncluster_cpu = 2\\\n\\\n[variant_recalibrator]\\\ntranch_other_options_snps = --resource:hapmap,known=false,training=true,truth=true,prior=15.0 %(hapmap)s --resource:omni,known=false,training=true,truth=false,prior=12.0 %(omni)s --resource:1000G,known=false,training=true,truth=false,prior=10.0 %(1000G)s --resource:dbsnp,known=true,training=false,truth=false,prior=6.0 %(dbsnp)s -an QD -an MQ -an MQRankSum -an ReadPosRankSum -an FS -an SOR --truth-sensitivity-tranche 100.0 --truth-sensitivity-tranche 99.95 --truth-sensitivity-tranche 99.9 --truth-sensitivity-tranche 99.95 --truth-sensitivity-tranche 99.5 --truth-sensitivity-tranche 99.0 --truth-sensitivity-tranche 90.0 -mode SNP\\\ntranch_other_options_indels = --resource:mills,known=false,training=true,truth=true,prior=12.0 %(mills)s --resource:dbsnp,known=true,training=false,truth=false,prior=2.0 %(dbsnp)s -an QD -an MQ -an MQRankSum -an ReadPosRankSum -an FS -an SOR --truth-sensitivity-tranche 100.0 --truth-sensitivity-tranche 99.9 --truth-sensitivity-tranche 99.4 --truth-sensitivity-tranche 99.0 --truth-sensitivity-tranche 90.0 -mode INDEL\\\napply_other_options_snps = --truth-sensitivity-filter-level 99.95 --mode SNP\\\napply_other_options_indels = --truth-sensitivity-filter-level 99.4 --mode INDEL\\\ncluster_walltime = 35:00:00\\\ncluster_cpu = 2\\\ncluster_mem = 30G\\\n\\\n[gatk_variant_recalibrator]\\\nsmall_sample_option = \\\ncluster_cpu = %(QUART_CPU)s\\\noptions = -nt %(cluster_cpu)s\\\ncluster_mem = 25G\\\nram = 24G\\\n\\\n[gatk_apply_recalibration]\\\ncluster_cpu = %(QUART_CPU)s\\\noptions = -nt %(cluster_cpu)s\\\ncluster_mem = 24G\\\nram = 24G\\\n\\\n[rawmpileup]\\\nnb_jobs = 25\\\nmpileup_other_options = -d 1000 -L 1000 -B -q 11 -Q 10\\\ncluster_walltime = -l walltime=12:00:0\\\ncluster_mem = 8G\\\n\\\n[snp_and_indel_bcf]\\\napproximate_nb_jobs = 15\\\nmpileup_other_options = -d 1000 -B -q 11 -Q 20 -Ou -a FORMAT/DP,FORMAT/AD,FORMAT/ADF,FORMAT/ADR,FORMAT/SP,FORMAT/SCR\\\nbcftools_other_options = -mvO b\\\ncluster_walltime = 48:00:00\\\ncluster_mem = 12G\\\n\\\n[snpsift_annotate]\\\ncluster_mem = 9G\\\nram = 8G\\\njava_other_options = -XX:ParallelGCThreads=2\\\ncluster_walltime = 24:00:00\\\n\\\n[snp_id_annotation]\\\ncluster_walltime = 24:00:00\\\ncluster_mem = 12G\\\n\\\n[mpileup_snp_id_annotation]\\\ncluster_walltime = 24:00:00\\\n\\\n[haplotype_caller_snp_id_annotation]\\\ncluster_walltime = 24:00:00\\\ncluster_mem = 16G\\\n\\\n[compute_effects]\\\njava_other_options = -XX:ParallelGCThreads=1\\\ncluster_mem = 12G\\\nram = 12000M\\\noptions = -lof\\\nsnpeff_genome = %(assembly_synonyms)s\\\ncluster_walltime = 12:00:0\\\n\\\n[mpileup_snp_effect]\\\ncluster_cpu = 2\\\ncluster_walltime = 24:00:00\\\ncluster_mem = 24G\\\n\\\n[haplotype_caller_snp_effect]\\\ncluster_cpu = 1\\\ncluster_walltime = 24:00:00\\\ncluster_mem = 16G\\\n\\\n[snpsift_dbnsfp]\\\ncluster_cpu = 2\\\nram = 24G\\\njava_other_options = -XX:ParallelGCThreads=2\\\n\\\n[dbnsfp_annotation]\\\ncluster_cpu = %(QUART_CPU)s\\\ncluster_walltime = 35:00:00\\\ncluster_mem = --mem=40G\\\n\\\n[gemini_annotations]\\\ncluster_cpu = %(QUART_CPU)s\\\noptions = -t snpEff --cores %(cluster_cpu)s --save-info-string\\\ncluster_walltime = -l walltime=72:00:0\\\ntmp_dir = ${SLURM_TMPDIR}\\\n\\\n[vcf_stats]\\\nmodule_python = mugqic/python/2.7.14\\\n\\\n[report]\\\ntitle = DNA-Seq Analysis Report\\\n\\\n[sv_annotation]\\\ncluster_walltime = 3:00:0\\\ncluster_mem = 12G\\\n\\\n[delly_call_filter]\\\ncluster_cpu = %(QUART_CPU)s\\\nthreads = %(cluster_cpu)s\\\noptions = -q 15\\\nsv_types_options = DEL,INS,DUP,INV,BND\\\nexclude_list = ${DELLY_PATH}/excludeTemplates/human.hg38.excl.tsv\\\ncluster_walltime = 35:00:0\\\ncluster_mem = 60G\\\n\\\n[delly_call_filter_germline]\\\ntype_options = germline\\\nDUP_options = -m 300 -a 0.1 -r 0.75\\\nDEL_options = -m 300 -a 0.1 -r 0.75\\\nINV_options = -m 300 -a 0.1 -r 0.75\\\nBND_options = -m 0 -a 0.1 -r 0.75\\\nINS_options = \\\nbcftools_options = \\\n\\\n[manta_sv]\\\nmodule_python = mugqic/python/2.7.14\\\noption_mode = local\\\ncluster_cpu = %(HALF_CPU)s\\\noption_nodes = %(cluster_cpu)s\\\ncluster_mem = 55G\\\nram = 55G\\\nexperiment_type_option = \\\ncluster_walltime = 24:00:0\\\n\\\n[extract_discordant_reads]\\\ncluster_cpu = %(QUART_CPU)s\\\nsambamba_options = -t %(cluster_cpu)s\\\ndiscordants_sort_option = -t %(cluster_cpu)s\\\ncluster_walltime = -l walltime=35:00:0\\\ncluster_mem = 32G\\\noptions = \\\n\\\n[extract_split_reads]\\\ncluster_cpu = %(QUART_CPU)s\\\nsambamba_options = -t %(cluster_cpu)s\\\nsplit_sort_option = -t %(cluster_cpu)s\\\ncluster_walltime = -l walltime=35:00:0\\\ncluster_mem = 32G\\\noptions = \\\n\\\n[lumpy_paired_sv_calls]\\\nmodule_python = mugqic/python/2.7.13\\\noptions = \\\nheader_options = -h %(vcf_header)s\\\ncluster_walltime = -l walltime=72:00:0\\\ncluster_cpu = 3\\\ncluster_mem = 16G\\\nmodule_samtools = mugqic/samtools/1.3\\\n\\\n[wham_call_sv]\\\nexclude = chr1_KI270706v1_random,chr1_KI270707v1_random,chr1_KI270708v1_random,chr1_KI270709v1_random,chr1_KI270710v1_random,chr1_KI270711v1_random,chr1_KI270712v1_random,chr1_KI270713v1_random,chr1_KI270714v1_random,chr2_KI270715v1_random,chr2_KI270716v1_random,chr3_GL000221v1_random,chr4_GL000008v2_random,chr5_GL000208v1_random,chr9_KI270717v1_random,chr9_KI270718v1_random,chr9_KI270719v1_random,chr9_KI270720v1_random,chr11_KI270721v1_random,chr14_GL000009v2_random,chr14_GL000225v1_random,chr14_KI270722v1_random,chr14_GL000194v1_random,chr14_KI270723v1_random,chr14_KI270724v1_random,chr14_KI270725v1_random,chr14_KI270726v1_random,chr15_KI270727v1_random,chr16_KI270728v1_random,chr17_GL000205v2_random,chr17_KI270729v1_random,chr17_KI270730v1_random,chr22_KI270731v1_random,chr22_KI270732v1_random,chr22_KI270733v1_random,chr22_KI270734v1_random,chr22_KI270735v1_random,chr22_KI270736v1_random,chr22_KI270737v1_random,chr22_KI270738v1_random,chr22_KI270739v1_random,chrY_KI270740v1_random,chrUn_KI270302v1,chrUn_KI270304v1,chrUn_KI270303v1,chrUn_KI270305v1,chrUn_KI270322v1,chrUn_KI270320v1,chrUn_KI270310v1,chrUn_KI270316v1,chrUn_KI270315v1,chrUn_KI270312v1,chrUn_KI270311v1,chrUn_KI270317v1,chrUn_KI270412v1,chrUn_KI270411v1,chrUn_KI270414v1,chrUn_KI270419v1,chrUn_KI270418v1,chrUn_KI270420v1,chrUn_KI270424v1,chrUn_KI270417v1,chrUn_KI270422v1,chrUn_KI270423v1,chrUn_KI270425v1,chrUn_KI270429v1,chrUn_KI270442v1,chrUn_KI270466v1,chrUn_KI270465v1,chrUn_KI270467v1,chrUn_KI270435v1,chrUn_KI270438v1,chrUn_KI270468v1,chrUn_KI270510v1,chrUn_KI270509v1,chrUn_KI270518v1,chrUn_KI270508v1,chrUn_KI270516v1,chrUn_KI270512v1,chrUn_KI270519v1,chrUn_KI270522v1,chrUn_KI270511v1,chrUn_KI270515v1,chrUn_KI270507v1,chrUn_KI270517v1,chrUn_KI270529v1,chrUn_KI270528v1,chrUn_KI270530v1,chrUn_KI270539v1,chrUn_KI270538v1,chrUn_KI270544v1,chrUn_KI270548v1,chrUn_KI270583v1,chrUn_KI270587v1,chrUn_KI270580v1,chrUn_KI270581v1,chrUn_KI270579v1,chrUn_KI270589v1,chrUn_KI270590v1,chrUn_KI270584v1,chrUn_KI270582v1,chrUn_KI270588v1,chrUn_KI270593v1,chrUn_KI270591v1,chrUn_KI270330v1,chrUn_KI270329v1,chrUn_KI270334v1,chrUn_KI270333v1,chrUn_KI270335v1,chrUn_KI270338v1,chrUn_KI270340v1,chrUn_KI270336v1,chrUn_KI270337v1,chrUn_KI270363v1,chrUn_KI270364v1,chrUn_KI270362v1,chrUn_KI270366v1,chrUn_KI270378v1,chrUn_KI270379v1,chrUn_KI270389v1,chrUn_KI270390v1,chrUn_KI270387v1,chrUn_KI270395v1,chrUn_KI270396v1,chrUn_KI270388v1,chrUn_KI270394v1,chrUn_KI270386v1,chrUn_KI270391v1,chrUn_KI270383v1,chrUn_KI270393v1,chrUn_KI270384v1,chrUn_KI270392v1,chrUn_KI270381v1,chrUn_KI270385v1,chrUn_KI270382v1,chrUn_KI270376v1,chrUn_KI270374v1,chrUn_KI270372v1,chrUn_KI270373v1,chrUn_KI270375v1,chrUn_KI270371v1,chrUn_KI270448v1,chrUn_KI270521v1,chrUn_GL000195v1,chrUn_GL000219v1,chrUn_GL000220v1,chrUn_GL000224v1,chrUn_KI270741v1,chrUn_GL000226v1,chrUn_GL000213v1,chrUn_KI270743v1,chrUn_KI270744v1,chrUn_KI270745v1,chrUn_KI270746v1,chrUn_KI270747v1,chrUn_KI270748v1,chrUn_KI270749v1,chrUn_KI270750v1,chrUn_KI270751v1,chrUn_KI270752v1,chrUn_KI270753v1,chrUn_KI270754v1,chrUn_KI270755v1,chrUn_KI270756v1,chrUn_KI270757v1,chrUn_GL000214v1,chrUn_KI270742v1,chrUn_GL000216v2,chrUn_GL000218v1,chrEBV\\\ncluster_cpu = %(QUART_CPU)s\\\ncores = %(cluster_cpu)s\\\nheader_options = -h %(vcf_header)s\\\ncluster_walltime = 24:00:0\\\ncluster_mem = %(HALF_MEM)s\\\n\\\n[cnvkit_batch]\\\nmin_background_samples = 20\\\nbatch_options = -m wgs --target-avg-size 5000 --short-names\\\nfix_options = --no-edge\\\nsegment_options = -m cbs -t 0.00001 --drop-low-coverage -p %(cluster_cpu)s\\\ncall_options = \\\nexport_options = vcf\\\nsegmetrics_options = --ci --pi\\\nmetrics_options = \\\nscatter_options = \\\ndiagram_options = \\\ncluster_cpu = 6\\\nthreads = 6\\\naccess = /cvmfs/soft.mugqic/CentOS6/genomes/species/Homo_sapiens.GRCh38/annotations/Homo_sapiens.GRCh38.access-5k-mappable.bed\\\nrefFlat = /cvmfs/soft.mugqic/CentOS6/genomes/species/Homo_sapiens.GRCh38/annotations/Homo_sapiens.GRCh38.Ensembl87.ref_flat.tsv\\\ncluster_walltime = -l walltime=48:00:0\\\nmodule_python = mugqic/python/2.7.14\\\nmodule_R = mugqic/R_Bioconductor/3.2.3_3.2\\\n\\\n[run_breakseq2]\\\nmodule_pyhton = mugqic/python/2.7.14\\\noptions = \\\nbcftools_options = -f PASS -Ov\\\ncluster_cpu = %(QUART_CPU)s\\\nthreads = %(cluster_cpu)s\\\ngff = %(assembly_dir)s/annotations/%(scientific_name)s.%(assembly)s.breakseq2_bplib_20150129.gff\\\ncluster_walltime = 35:00:00\\\n\\\n[metasv_ensemble]\\\noptions = --boost_sc --filter_gaps --keep_standard_contigs --mean_read_length 150\\\ncluster_cpu = %(QUART_CPU)s\\\nthreads = %(cluster_cpu)s\\\nfilter_pass_options = \\\ncluster_walltime = 35:00:0\\\ncluster_mem = --mem=60G\\\n\\\n[svaba_run]\\\ncluster_cpu = %(HALF_CPU)s\\\noptions = -p %(cluster_cpu)s\\\nref = %(genome_bwa_index)s\\\ndbsnp = $SVABA_HOME/dbsnp_indel.vcf\\\ncluster_walltime = 35:00:0\\\ncluster_mem = 60G\\\n\\\n[haplotype_caller_dbnsfp_annotation]\\\ncluster_walltime = 24:00:00\\\ncluster_cpu = 3\\\ncluster_mem = 40G\\\n\\\n[sambamba_merge_sam_files]\\\ncluster_cpu = %(QUART_CPU)s\\\noptions = -t %(cluster_cpu)s\\\n\\\n[sym_link_pair]\\\ncluster_walltime = 3:00:0\\\n\\\n[sambamba_mark_duplicates]\\\ncluster_cpu = %(QUART_CPU)s\\\noptions = -t %(cluster_cpu)s\\\ntmp_dir = tmp_dir\\\ncluster_mem = 36G\\\ncluster_walltime = 35:00:0\\\n\\\n[conpair_concordance_contamination]\\\nmodule_gatk = mugqic/GenomeAnalysisTK/3.8\\\njava_other_options = -XX:+UseParallelGC -XX:ParallelGCThreads=1 -Dsamjdk.buffer_size=4194304\\\nmarkers_bed = ${CONPAIR_DATA}/markers/%(assembly)s.autosomes.phase3_shapeit2_mvncall_integrated.20130502.SNV.genotype.sselect_v4_MAF_0.4_LD_0.8.liftover.bed\\\nmarkers_txt = ${CONPAIR_DATA}/markers/%(assembly)s.autosomes.phase3_shapeit2_mvncall_integrated.20130502.SNV.genotype.sselect_v4_MAF_0.4_LD_0.8.liftover.txt\\\nconcord_options = --normal_homozygous_markers_only\\\ncontam_options = \\\ncluster_mem = 6G\\\nram = 6G\\\ncluster_walltime = 24:00:0\\\n\\\n[picard_collect_sequencing_artifacts_metrics]\\\nFFPE = true\\\ncluster_mem = 12G\\\nram = 12000M\\\noptions = --FILE_EXTENSION \".txt\"\\\nmax_records_in_ram = 4000000\\\ncluster_walltime = -l walltime=35:00:0\\\n\\\n[qualimap]\\\ncluster_cpu = 4\\\ncluster_mem = 60G\\\nram = 60G\\\n\\\n[picard_ScatterIntervalsByNs]\\\ncluster_mem = 3G\\\nram = 3000M\\\n\\\n[gatk_splitInterval]\\\ncluster_mem = 3G\\\nram = 3000M\\\noptions = --subdivision-mode BALANCING_WITHOUT_INTERVAL_SUBDIVISION\\\n\\\n[gatk_interval_list2bed]\\\ncluster_mem = 3200M\\\nram = 3G\\\ncluster_walltime = -l walltime=3:00:0\\\ncluster_cpu = -l nodes=1:ppn=2\\\n\\\n[gatk_bed2interval_list]\\\ncluster_mem = 3200M\\\nram = 3G\\\n\\\n[gatk_preProcessInterval]\\\ncluster_mem = 3200M\\\nram = 3G\\\noptions = --interval-merging-rule OVERLAPPING_ONLY --bin-length 5000 --padding 250\\\n\\\n[rawmpileup_panel]\\\nnb_jobs = 25\\\nmodule_samtools = mugqic/samtools/1.3\\\npanel = test_rapid/SureSelectHumanAllExonV5.targets_ext1k.bed\\\nmpileup_other_options = -d 1000 -L 1000 -B -q 1 -Q 10\\\ncluster_walltime = 12:00:0\\\ncluster_cpu = 1\\\n\\\n[varscan2_somatic_panel]\\\njava_other_options = -XX:+UseParallelGC -XX:ParallelGCThreads=1 -Dsamjdk.buffer_size=4194304\\\nother_options = --min-coverage 3 --min-var-freq 0.05 --p-value 1 --somatic-p-value 0.05 --strand-filter 0\\\ncluster_mem = 4300M\\\nram = 4G\\\ncluster_walltime = 12:00:0\\\ncluster_cpu = 2\\\n\\\n[merge_varscan2]\\\nmodule_python = mugqic/python/2.7.18\\\nsomatic_filter_options = -Oz -i 'SS=\"2\"'\\\ngermline_filter_options = -Oz -i 'SS=\"1\"|SS=\"3\"'\\\ngenotype_filter_options = -e 'GT[*]=\"RR\"'\\\ntabix_options = -pvcf\\\ncluster_walltime = -l walltime=3:00:0\\\ncluster_cpu = 2\\\ncluster_mem = 8G\\\n\\\n[preprocess_vcf_panel]\\\ncluster_walltime = -l walltime=3:00:0\\\ncluster_mem = 8G\\\n\\\n[rawmpileup_cat]\\\ncluster_walltime = -l walltime=35:00:0\\\ncluster_mem = 8G\\\n\\\n[varscan2_somatic]\\\njava_other_options = -XX:+UseParallelGC -XX:ParallelGCThreads=1 -Dsamjdk.buffer_size=4194304\\\nother_options = --min-coverage 3 --min-var-freq 0.05 --p-value 0.10 --somatic-p-value 0.05 --strand-filter 0\\\ncluster_mem = 4G\\\nram = 4G\\\ncluster_walltime = -l walltime=12:00:0\\\ncluster_cpu = 2\\\n\\\n[varscan2_readcount_fpfilter]\\\nreadcount_options = -q 1 -b 20 -i -w 1000 -d 2500\\\nfpfilter_options = --dream3-settings --keep-failures\\\nsomatic_filter_options = -i 'SS=\"2\"'\\\ngermline_filter_options = -i 'SS=\"1\"|SS=\"3\"'\\\ngenotype_filter_options = -e 'GT[*]=\"RR\"'\\\nram = 12G\\\ncluster_cpu = 3\\\ncluster_mem = 12G\\\ncluster_walltime = -l walltime=3:00:0\\\n\\\n[gatk_mutect2]\\\ncluster_cpu = %(QUART_CPU)s\\\noptions = --pair-hmm-implementation AVX_LOGLESS_CACHING_OMP --native-pair-hmm-threads %(cluster_cpu)s --max-reads-per-alignment-start 0 --read-validation-stringency LENIENT --af-of-alleles-not-in-resource 0.0000025\\\ncluster_mem = 36G\\\nram = 36000M\\\nnb_jobs = 23\\\ncosmic = %(assembly_dir)s/annotations/%(scientific_name)s.%(assembly)s.cosmic.coding.87.vcf.gz\\\nknown_sites = %(af_gnomad)s\\\npon = \\\ncluster_walltime = --time=72:00:0\\\n\\\n[gatk_learn_read_orientation_model]\\\noptions = \\\ncluster_mem = 16G\\\nram = 16000M\\\ncluster_walltime = 24:00:0\\\n\\\n[gatk_merge_stats]\\\nram = 6G\\\noptions = \\\ncluster_walltime = -l walltime=3:00:0\\\n\\\n[gatk_get_pileup_summaries]\\\noptions = \\\nknown_sites = %(af_gnomad)s\\\ncluster_mem = 16G\\\nram = 16000M\\\ncluster_walltime = 24:00:0\\\n\\\n[gatk_calculate_contamination]\\\noptions = \\\ncluster_mem = 16G\\\nram = 16000M\\\ncluster_walltime = 24:00:0\\\n\\\n[gatk_filter_mutect_calls]\\\noptions = \\\ncluster_mem = 16G\\\nram = 16000M\\\ncluster_walltime = -l walltime=24:00:0\\\n\\\n[gatk_merge_vcfs]\\\noptions = \\\ncluster_mem = 16G\\\nram = 16000M\\\ncluster_walltime = -l walltime=24:00:0\\\n\\\n[merge_filter_mutect2]\\\nbcftools_options = \\\nfilter_options = -f PASS\\\ncluster_mem = 18G\\\nram = 18G\\\ncluster_walltime = -l walltime=3:00:0\\\n\\\n[samtools_paired]\\\nmodule_bcftools = mugqic/bcftools/1.9\\\nnb_jobs = 23\\\nmpileup_other_options = -d 1000 -B -q 10 -Q 10 -Ou -a DP,AD,ADF,ADR,SP\\\nbcftools_calls_options = -mvO b\\\ncluster_walltime = -l walltime=24:00:0\\\n\\\n[merge_filter_paired_samtools]\\\nconcat_options = -Ob\\\nsomatic_filter_options = -i 'INFO/STATUS~\"somatic\"'\\\nsomatic_vcffilter_options = -g \"! ( GT = 1/1 )\"\\\ngermline_loh_filter_options = -i 'INFO/STATUS~\"germline\"|INFO/STATUS~\"loh\"'\\\ngenotype_filter_options = -e 'GT[*]=\"RR\"'\\\ncluster_mem = 12G\\\ncluster_walltime = -l walltime=3:00:0\\\n\\\n[vardict_paired]\\\nmodule_python = mugqic/python/2.7.18\\\ncluster_cpu = 4\\\njava_other_options = -XX:+UseParallelGC -XX:ParallelGCThreads=1 -Dsamjdk.buffer_size=4194304\\\nuse_bed = false\\\ndict2bed_options = -c 5000 -o 250\\\nnb_jobs = 22\\\nvardict_options = -f 0.03 -Q 11 -c 1 -S 2 -E 3 -g 4 -th %(cluster_cpu)s\\\nvar2vcf_options = -f 0.03 -P 0.9 -m 4.25 -M\\\njava_options = -Xms768m\\\ncluster_mem = 16G\\\nram = 16000M\\\nclasspath = $VARDICT_HOME/lib/VarDict-1.4.8.jar:$VARDICT_HOME/lib/commons-cli-1.2.jar:$VARDICT_HOME/lib/jregex-1.2_01.jar:$VARDICT_HOME/lib/htsjdk-2.8.0.jar com.astrazeneca.vardict.Main\\\ncluster_walltime = -l walltime=72:00:0\\\n\\\n[merge_filter_paired_vardict]\\\nsomatic_filter_options = -f PASS -i 'INFO/STATUS~\".*Somatic\"'\\\ngermline_filter_options = -f PASS -i 'INFO/STATUS~\"Germline\"|INFO/STATUS~\".*LOH\"'\\\ngenotype_filter_options = -e 'GT[*]=\"RR\"'\\\ncluster_mem = 12G\\\ncluster_walltime = -l walltime=3:00:0\\\n\\\n[strelka2_paired_somatic]\\\nmodule_python = mugqic/python/2.7.18\\\noption_mode = local\\\ncluster_cpu = %(HALF_CPU)s\\\noption_nodes = %(cluster_cpu)s\\\ncluster_mem = 55G\\\nram = 55G\\\nbed_file = %(strelka2_bed_file)s\\\nexperiment_type_option = \\\nfilter_options = -f PASS -Oz\\\ncluster_walltime = -l walltime=24:00:0\\\n\\\n[strelka2_paired_germline]\\\nmodule_python = mugqic/python/2.7.18\\\noption_mode = local\\\ncluster_cpu = %(HALF_CPU)s\\\noption_nodes = %(cluster_cpu)s\\\ncluster_mem = 55G\\\nram = 55G\\\nbed_file = %(strelka2_bed_file)s\\\nexperiment_type_option = \\\nfilter_options = -f PASS -Oz -e 'GT[*]=\"RR\"'\\\ncluster_walltime = -l walltime=48:00:0\\\n\\\n[strelka2_paired_germline_snpeff]\\\nsplit_options = -Oz -i'GT=\"alt\"'\\\noptions = \\\ncluster_mem = 12G\\\nram = 12000M\\\ncluster_walltime = -l walltime=24:00:0\\\n\\\n[bcbio_ensemble]\\\ncluster_cpu = 2\\\ncluster_mem = 24G\\\nram = 24G\\\n\\\n[bcbio_ensemble_somatic]\\\ncluster_cpu = 6\\\noptions = --cores %(cluster_cpu)s --numpass 1 --names mutect2,strelka2,vardict,varscan2\\\ncluster_mem = 24G\\\nram = 24G\\\ncluster_walltime = -l walltime=3:00:0\\\n\\\n[bcbio_ensemble_germline]\\\ncluster_cpu = 6\\\noptions = --cores %(cluster_cpu)s --numpass 1 --names strelka2,vardict,varscan2\\\ncluster_mem = 24G\\\nram = 24G\\\ncluster_walltime = -l walltime=3:00:0\\\n\\\n[gatk_variant_annotator]\\\njava_other_options = -XX:+UseParallelGC -XX:ParallelGCThreads=1 -Dsamjdk.buffer_size=4194304\\\nmodule_gatk = mugqic/GenomeAnalysisTK/3.7\\\ncluster_mem = 12G\\\nram = 12000M\\\nnb_jobs = 20\\\ncluster_walltime = -l walltime=12:00:0\\\n\\\n[gatk_variant_annotator_somatic]\\\ncluster_cpu = 2\\\nother_options = -nt 2 --dbsnp %(known_variants)s -G StandardAnnotation -G StandardSomaticAnnotation -A HomopolymerRun -A Coverage -A DepthPerAlleleBySample -A ClippingRankSumTest -A BaseQualityRankSumTest -A MappingQualityRankSumTest -A MappingQualityZeroBySample -A LowMQ -A ReadPosRankSumTest -A GCContent\\\n\\\n[gatk_variant_annotator_germline]\\\ncluster_cpu = 2\\\nother_options = -nt 2 --dbsnp %(known_variants)s -A HomopolymerRun -A Coverage -A DepthPerAlleleBySample -A ClippingRankSumTest -A BaseQualityRankSumTest -A MappingQualityRankSumTest -A MappingQualityZeroBySample -A LowMQ -A ReadPosRankSumTest -A GCContent\\\n\\\n[merge_gatk_variant_annotator]\\\ncluster_cpu = 2\\\ncluster_walltime = -l walltime=3:00:0\\\n\\\n[filter_ensemble]\\\nmodule_python = mugqic/python/3.10.4\\\ncall_filter = 2\\\nsomatic_filter_options = -Oz -i'TDP>=10 && TVAF>=0.05 && NDP>=10 && NVAF<=0.05'\\\ngermline_filter_options = -Oz -i'TDP>=10 && TVAF>=0.05 && NDP>=10 && NVAF>=0.05'\\\ncluster_mem = 12G\\\ncluster_walltime = -l walltime=3:00:0\\\n\\\n[report_cpsr]\\\noptions = --no_vcf_validate --force_overwrite --no_docker --secondary_findings --gwas_findings --panel_id 0\\\nassembly = grch38\\\ncluster_cpu = %(PINT_CPU)s\\\ncluster_mem = 36G\\\ncluster_walltime = -l walltime=3:00:0\\\n\\\n[report_pcgr]\\\noptions = --no_vcf_validate --force_overwrite --no_docker --vep_buffer_size 500 --vep_regulatory --show_noncoding --vcf2maf\\\ntumor_type = --tumor_site 0\\\ntumor_options = --call_conf_tag TAL --tumor_dp_tag TDP --tumor_af_tag TVAF --tumor_dp_min 10 --tumor_af_min 0.05\\\nnormal_options = --control_dp_tag NDP --control_af_tag NVAF --control_dp_min 10 --control_af_max 0.05\\\nmutsig_options = --estimate_signatures\\\ntmb_options = --estimate_tmb --tmb_algorithm nonsyn\\\nmsi_options = --estimate_msi_status\\\nassembly = grch38\\\nassay = --assay WGS\\\ncluster_cpu = %(HALF_CPU)s\\\ncluster_mem = 36G\\\ncluster_walltime = -l walltime=3:00:0\\\n\\\n[compute_cancer_effects_somatic]\\\njava_other_options = -XX:ParallelGCThreads=1\\\noptions = -cancer -lof\\\nsnpeff_genome = hg19\\\ncluster_walltime = 12:00:0\\\ncluster_mem = 12G\\\nram = 12000M\\\n\\\n[compute_cancer_effects_germline]\\\njava_other_options = -XX:ParallelGCThreads=1\\\noptions = -lof\\\nsnpeff_genome = hg19\\\ncluster_walltime = -l walltime=12:00:0\\\nram = 12000M\\\ncluster_mem = 12G\\\n\\\n[gatk_combine_variants]\\\n\\\n[decompose_and_normalize_mnps]\\\ncluster_walltime = 12:00:0\\\n\\\n[set_somatic_and_actionable_mutations]\\\nset_somatic = --min-depth 30 --min-tumor-depth 10 --min-norm-depth 10\\\ncluster_walltime = -l walltime=24:00:0\\\n\\\n[mpileup_sequenza]\\\ncluster_walltime = -l walltime=12:00:0\\\n\\\n[sequenza]\\\nnb_jobs = 23\\\ngc_file = %(assembly_dir)s/annotations/%(scientific_name)s.%(assembly)s.gc50Base.txt\\\nbin_window_size = 50\\\nmpileup_options = -d 1000 -L 1000 -B -Q 25 -q 15\\\npileup_options = -q 20 -N 30\\\ncluster_walltime = -l walltime=24:00:0\\\ncluster_mem = 12G\\\n\\\n[amber]\\\njava_other_options = -XX:+UseParallelGC -XX:ParallelGCThreads=1 -Dsamjdk.buffer_size=4194304\\\ncluster_mem = 16G\\\nram = 16000M\\\ncluster_cpu = %(PINT_CPU)s\\\nthreads = %(cluster_cpu)s\\\nloci = %(assembly_dir)s/annotations/%(scientific_name)s.%(assembly)s.GermlineHetPon.vcf.gz\\\ncluster_walltime = -l walltime=12:00:0\\\n\\\n[cobalt]\\\njava_other_options = -XX:+UseParallelGC -XX:ParallelGCThreads=1 -Dsamjdk.buffer_size=4194304\\\ncluster_mem = 16G\\\nram = 16000M\\\ncluster_cpu = %(PINT_CPU)s\\\nthreads = %(cluster_cpu)s\\\ngc_profile = %(assembly_dir)s/annotations/%(scientific_name)s.%(assembly)s.GC_profile.1000bp.cnp\\\ncluster_walltime = -l walltime=12:00:0\\\n\\\n[purple]\\\njava_other_options = -XX:+UseParallelGC -XX:ParallelGCThreads=1 -Dsamjdk.buffer_size=4194304\\\nmodule_perl = mugqic/perl/5.34.0\\\ncluster_mem = 17G\\\nram = 16G\\\ncluster_cpu = %(PINT_CPU)s\\\nthreads = %(cluster_cpu)s\\\ngc_profile = %(assembly_dir)s/annotations/%(scientific_name)s.%(assembly)s.GC_profile.1000bp.cnp\\\ncluster_walltime = -l walltime=12:00:0\\\n\\\n[samtools_single]\\\nnb_jobs = 25\\\nmodule_samtools = mugqic/samtools/0.1.19\\\nmpileup_other_options = -B -q 15 -Q 25 -D -S -g\\\nbcftools_view_options = -bvcg\\\ncluster_walltime = -l walltime=12:00:0\\\n\\\n[merge_samtools_single]\\\ncluster_walltime = -l walltime=12:00:0\\\n\\\n[tabix_split]\\\ncluster_walltime = -l walltime=12:00:0\\\n\\\n[shapeit]\\\ncluster_cpu = %(PINT_CPU)s\\\ncheck_options = \\\ncheck_threads = %(cluster_cpu)s\\\nphase_options = \\\nphase_threads = %(cluster_cpu)s\\\ncluster_walltime = -l walltime=12:00:0\\\n\\\n[scnaphase]\\\ncluster_walltime = 12:00:0\\\ncluster_mem = 16G\\\n\\\n[delly_call_filter_somatic]\\\ntype_options = somatic\\\nDUP_options = -m 300 -a 0.1 -r 0.75\\\nDEL_options = -m 300 -a 0.1 -r 0.75\\\nINV_options = -m 300 -a 0.1 -r 0.75\\\nBND_options = -m 0 -a 0.1 -r 0.75\\\nINS_options = \\\nbcftools_options = -i 'FORMAT/DV[0]>4|(INFO/SR>1&INFO/PE>5)|(INFO/SR>5&INFO/PE>1)'\\\n\\\n[scones]\\\nwindow = 10000\\\nbest_model = 0\\\n\\\n[bvatools_bincounter]\\\nother_options = --minMapQ 15\\\ncluster_cpu = 2\\\njava_other_options = -XX:ParallelGCThreads=1\\\ncluster_mem = 31G\\\nram = 30G\\\ncluster_walltime = -l walltime=24:00:0\\\n\\\n[scones_pair]\\\ncluster_cpu = %(HALF_CPU)s\\\nother_options = -t %(cluster_cpu)s\\\ngc_map_bedfile = %(assembly_dir)s/annotations/mappabilityGC/hg1k_v37_bin10kb_GCMAP.bed\\\ncluster_walltime = -l walltime=24:00:0\\\n\\\n[scones_filter]\\\nbest_model = 3\\\ncluster_walltime = 3:00:0\\\ncluster_mem = 8G\\\n\\\n[scones_annotate]\\\nexcluded_regions_bed = %(assembly_dir)s/annotations/bed/%(scientific_name)s.%(assembly)s.TeloCentro_1Mb.bed\\\ngenes_bed = %(assembly_dir)s/annotations/bed/%(scientific_name)s.%(assembly)s.gene.bed\\\ndgv_bed = %(assembly_dir)s/annotations/bed/%(scientific_name)s.%(assembly)s.DGV.bed\\\nmicrosat_bed = %(assembly_dir)s/annotations/bed/%(scientific_name)s.%(assembly)s.microsat.bed\\\nrepeat_masker_bed = %(assembly_dir)s/annotations/bed/%(scientific_name)s.%(assembly)s.repeatMasker.bed\\\nautosome_size_file = %(assembly_dir)s/%(scientific_name)s.%(assembly)s.AutosomeSize.txt\\\ncluster_walltime = -l walltime=3:00:0\\\ncluster_mem = 8G\\\n\\\n[run_arriba]\\\nblacklist = $ARRIBA_HOME/database/blacklist_hg38_GRCh38_2018-01-13.tsv\\\n\\\n",
     "operation_platform": "beluga",
     "operation_cmd_line": "module purge\nmodule load python/3.10.2 mugqic/genpipes/4.2.0\ntumor_pair.py \\\n    -j slurm \\\n    -t ensemble \\\n    -r readset.txt \\\n    -p pair.txt \\\n    -s 1-40 \\\n    -c $MUGQIC_PIPELINES_HOME/pipelines/tumor_pair/tumor_pair.base.ini \\\n        $MUGQIC_PIPELINES_HOME/pipelines/common_ini/beluga.ini \\\n        $MUGQIC_PIPELINES_HOME/pipelines/tumor_pair/tumor_pair.extras.ini \\\n        $MUGQIC_PIPELINES_HOME/resources/genomes/config/Homo_sapiens.GRCh38.ini \\\n  > Tumour_Pair_run.sh\nrm -r Tumour_Pair_CHUNKS;\nmkdir Tumour_Pair_CHUNKS;\n$MUGQIC_PIPELINES_HOME/utils/chunk_genpipes.sh -n 100 Tumour_Pair_run.sh Tumour_Pair_CHUNKS",
-    "operation_name": "genpipes_tumour_pair",
+    "operation_name": "GenPipes_TumorPair.ensemble",
     "sample": [
                 {
                     "sample_name": "MoHQ-CM-1-10-3393-1DT",

From d262f872a18564735a5ab185233b5498b4ca61e9 Mon Sep 17 00:00:00 2001
From: Paul Stretenowich <paul.stretenowich@mcgill.ca>
Date: Thu, 19 Oct 2023 16:01:09 -0400
Subject: [PATCH 02/19] Debug missing arg

---
 project_tracking/db_action.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/project_tracking/db_action.py b/project_tracking/db_action.py
index c0c3a4e..78b8812 100644
--- a/project_tracking/db_action.py
+++ b/project_tracking/db_action.py
@@ -920,7 +920,8 @@ def ingest_genpipes(project_id: str, ingest_data, session=None):
         name=ingest_data[vb.OPERATION_CONFIG_NAME],
         version=ingest_data[vb.OPERATION_CONFIG_VERSION],
         md5sum=ingest_data[vb.OPERATION_CONFIG_MD5SUM],
-        data=bytes(ingest_data[vb.OPERATION_CONFIG_DATA], 'utf-8')
+        data=bytes(ingest_data[vb.OPERATION_CONFIG_DATA], 'utf-8'),
+        session=session
         )
 
     operation = Operation(

From d91dcfcc09a1ad0655f90e6a27a3de796a2ed41f Mon Sep 17 00:00:00 2001
From: Paul Stretenowich <paul.stretenowich@mcgill.ca>
Date: Thu, 26 Oct 2023 13:06:44 -0400
Subject: [PATCH 03/19] Using project rather than project_name or project_id in
 routes - test 1

---
 project_tracking/api/project.py | 51 ++++++++++++++++-----------------
 project_tracking/db_action.py   | 11 +++++--
 2 files changed, 33 insertions(+), 29 deletions(-)

diff --git a/project_tracking/api/project.py b/project_tracking/api/project.py
index 1299897..6593fdd 100644
--- a/project_tracking/api/project.py
+++ b/project_tracking/api/project.py
@@ -29,43 +29,40 @@ def unroll(string):
 
     return unroll_list
 
-def capitalize(func):
+def standardize_project(func):
     """
-    Capitalize project_name
+    Standardize project used by the client: allowing ID and name to be used
     """
     @functools.wraps(func)
-    def wrap(*args, project_name = None, **kwargs):
-        if isinstance(project_name, str):
-            project_name = project_name.upper()
-            if project_name not in [p.name for p in db_action.projects(project_name)]:
-                return abort(
-                    404,
-                    f"Project {project_name} not found"
-                    )
-        return func(*args, project_name=project_name, **kwargs)
+    def wrap(*args, project=None, **kwargs):
+        project_dict = {
+            "id": None,
+            "name": None
+        }
+        if project is None:
+            pass
+        elif project.isdigit():
+            project_dict["id"] = project
+        else:
+            project_dict["name"] = project.upper()
+            project_dict["id"] = db_action.name_to_id("Project", project_dict["name"])
+
+        return func(*args, project=project_dict, **kwargs)
     return wrap
 
 
 @bp.route('/')
-@bp.route('/<string:project_id>')
-# @capitalize
-def projects(project_id: str = None):
+@bp.route('/<string:project>')
+@standardize_project
+def projects(project: str = None):
     """
-    patient_id: uses the form "/project/1"
-    patient_name: uses the form "/project/'?name=<project_name>'"
+    project: uses the form "/project/1" for project ID and "/project/name" for project name
     return: list of all the details of the poject with name "project_name" or ID "project_id"
     """
-    query = request.args
-    # valid query
-    name = None
-    if query.get('name'):
-        name = query['name']
-    if name:
-        project_id = db_action.name_to_id("Project", name)
 
-    if project_id is None:
-        return {"Project list": [f"id: {i.id}, name: {i.name}" for i in db_action.projects(project_id)]}
-    return [i.flat_dict for i in db_action.projects(project_id)]
+    if project["id"] is None:
+        return {"Project list": [f"id: {project.id}, name: {project.name}" for project in db_action.projects(project["id"])]}
+    return [i.flat_dict for i in db_action.projects(project["id"])]
 
 
 
@@ -255,7 +252,7 @@ def files(project_id: str, patient_id: str=None, sample_id: str=None, readset_id
 @bp.route('/<string:project_id>/patients/<string:patient_id>/metrics')
 @bp.route('/<string:project_id>/samples/<string:sample_id>/metrics')
 @bp.route('/<string:project_id>/readsets/<string:readset_id>/metrics')
-@capitalize
+# @capitalize
 def metrics(project_id: str, patient_id: str=None, sample_id: str=None, readset_id: str=None, metric_id: str=None):
     """
     metric_id: uses the form "1,3-8,9". Select metric by ids
diff --git a/project_tracking/db_action.py b/project_tracking/db_action.py
index 78b8812..6dee400 100644
--- a/project_tracking/db_action.py
+++ b/project_tracking/db_action.py
@@ -92,9 +92,16 @@ def projects(project_id=None, session=None):
             .where(Project.id.in_(project_id))
             )
     else:
-        raise DidNotFindError(f"Requested Project doesn't exist. Please try again with one of the following: {session.scalars(select(Project.name)).unique().all()}")
+        all_available = [f"id: {project.id}, name: {project.name}" for project in session.scalars(select(Project)).unique().all()]
+        raise DidNotFindError(f"Requested Project doesn't exist. Please try again with one of the following: {all_available}")
 
-    return session.scalars(stmt).unique().all()
+    ret = session.scalars(stmt).unique().all()
+
+    if not ret:
+        all_available = [f"id: {project.id}, name: {project.name}" for project in session.scalars(select(Project)).unique().all()]
+        raise DidNotFindError(f"Requested Project doesn't exist. Please try again with one of the following: {all_available}")
+
+    return ret
 
 def metrics_deliverable(project_id: str, deliverable: bool, patient_id=None, sample_id=None, readset_id=None, metric_id=None):
     """

From 8a982eebaabe12940bca994924db80602c35afc6 Mon Sep 17 00:00:00 2001
From: Paul Stretenowich <paul.stretenowich@mcgill.ca>
Date: Tue, 31 Oct 2023 11:39:58 -0400
Subject: [PATCH 04/19] Adding project converter/checker

---
 README.md                       |   4 +-
 project_tracking/api/project.py | 220 +++++++++++++++++++-------------
 project_tracking/db_action.py   |  88 +++++++------
 3 files changed, 183 insertions(+), 129 deletions(-)

diff --git a/README.md b/README.md
index 2323edb..0c5f8bd 100644
--- a/README.md
+++ b/README.md
@@ -19,7 +19,7 @@ python -m venv venv
 source ./venv/bin/activate
 pip install --upgrade pip
 pip install -e  .
-# Seting the db url is optiopnal, the default will be in the app installation folder
+# Setting the db url is optional, the default will be in the app installation folder
 export C3G_SQLALCHEMY_DATABASE_URI="sqlite:////tmp/my_test_db.sql"
 # initialyse the db
 flask --app project_tracking init-db
@@ -45,7 +45,7 @@ DOC:
 
 
 
-Once the server is running, you can still initialise the database, you can even flush it clear of any entry with 
+Once the server is running, you can still initialise the database, you can even flush it clear of any entry with:
 
 ```bash
 # WARNING this will erase all entry to you Database!
diff --git a/project_tracking/api/project.py b/project_tracking/api/project.py
index 6593fdd..266777d 100644
--- a/project_tracking/api/project.py
+++ b/project_tracking/api/project.py
@@ -29,46 +29,56 @@ def unroll(string):
 
     return unroll_list
 
-def standardize_project(func):
+def convcheck_project(func):
     """
-    Standardize project used by the client: allowing ID and name to be used
+    Converting project name to project id and checking if project found
     """
     @functools.wraps(func)
     def wrap(*args, project=None, **kwargs):
-        project_dict = {
-            "id": None,
-            "name": None
-        }
         if project is None:
-            pass
+            project_id = None
         elif project.isdigit():
-            project_dict["id"] = project
+            project_id = project
+            if not db_action.projects(project_id):
+                all_available = [f"id: {project.id}, name: {project.name}" for project in db_action.projects()]
+                project_id = {"DB_ACTION_WARNING": f"Requested Project '{project}' doesn't exist. Please try again with one of the following: {all_available}"}
         else:
-            project_dict["name"] = project.upper()
-            project_dict["id"] = db_action.name_to_id("Project", project_dict["name"])
+            project_id = db_action.name_to_id("Project", project.upper())
+            if not project_id:
+                all_available = [f"id: {project.id}, name: {project.name}" for project in db_action.projects()]
+                project_id = {"DB_ACTION_WARNING": f"Requested Project '{project}' doesn't exist. Please try again with one of the following: {all_available}"}
 
-        return func(*args, project=project_dict, **kwargs)
+        return func(*args, project_id=project_id, **kwargs)
     return wrap
 
+def sanity_check(item, action_output):
+    if not action_output:
+        ret = {"DB_ACTION_WARNING": f"Requested {item} doesn't exist."}
+    else:
+        ret = [i.flat_dict for i in action_output]
+    return ret
+
 
 @bp.route('/')
 @bp.route('/<string:project>')
-@standardize_project
-def projects(project: str = None):
+@convcheck_project
+def projects(project_id: str = None):
     """
     project: uses the form "/project/1" for project ID and "/project/name" for project name
     return: list of all the details of the poject with name "project_name" or ID "project_id"
     """
 
-    if project["id"] is None:
-        return {"Project list": [f"id: {project.id}, name: {project.name}" for project in db_action.projects(project["id"])]}
-    return [i.flat_dict for i in db_action.projects(project["id"])]
+    if project_id is None:
+        return {"Project list": [f"id: {project.id}, name: {project.name}" for project in db_action.projects(project_id)]}
+    if isinstance(project_id, dict) and project_id.get("DB_ACTION_WARNING"):
+        return project_id
 
+    return [i.flat_dict for i in db_action.projects(project_id)]
 
 
-@bp.route('/<string:project_id>/patients')
-@bp.route('/<string:project_id>/patients/<string:patient_id>')
-# @capitalize
+@bp.route('/<string:project>/patients')
+@bp.route('/<string:project>/patients/<string:patient_id>')
+@convcheck_project
 def patients(project_id: str, patient_id: str = None):
     """
     patient_id: uses the form "1,3-8,9"
@@ -110,29 +120,28 @@ def patients(project_id: str, patient_id: str = None):
         for patient_name in name.split(","):
             patient_id.extend(db_action.name_to_id("Patient", patient_name))
 
+    if isinstance(project_id, dict) and project_id.get("DB_ACTION_WARNING"):
+        return project_id
+
     # pair being either True or False
     if pair is not None:
-        return [
-        i.flat_dict for i in db_action.patient_pair(
+        action_output = db_action.patient_pair(
             project_id,
             patient_id=patient_id,
             pair=pair,
             tumor=tumor
             )
-        ]
     else:
-        return [
-        i.flat_dict for i in db_action.patients(
+        action_output = db_action.patients(
             project_id,
             patient_id=patient_id
             )
-        ]
-
+    return sanity_check("Patient", action_output)
 
 
-@bp.route('/<string:project_id>/samples')
-@bp.route('/<string:project_id>/samples/<string:sample_id>')
-# @capitalize
+@bp.route('/<string:project>/samples')
+@bp.route('/<string:project>/samples/<string:sample_id>')
+@convcheck_project
 def samples(project_id: str, sample_id: str = None):
     """
     sample_id: uses the form "1,3-8,9", if not provides, all sample are returned
@@ -153,11 +162,16 @@ def samples(project_id: str, sample_id: str = None):
         for sample_name in name.split(","):
             sample_id.extend(db_action.name_to_id("Sample", sample_name))
 
-    return [i.flat_dict for i in db_action.samples(project_id, sample_id=sample_id)]
+    if isinstance(project_id, dict) and project_id.get("DB_ACTION_WARNING"):
+        return project_id
 
-@bp.route('/<string:project_id>/readsets')
-@bp.route('/<string:project_id>/readsets/<string:readset_id>')
-# @capitalize
+    action_output = db_action.samples(project_id, sample_id=sample_id)
+
+    return sanity_check("Sample", action_output)
+
+@bp.route('/<string:project>/readsets')
+@bp.route('/<string:project>/readsets/<string:readset_id>')
+@convcheck_project
 def readsets(project_id: str, readset_id: str=None):
     """
     readset_id: uses the form "1,3-8,9", if not provided, all readsets are returned
@@ -178,14 +192,19 @@ def readsets(project_id: str, readset_id: str=None):
         for readset_name in name.split(","):
             readset_id.extend(db_action.name_to_id("Readset", readset_name))
 
-    return [i.flat_dict for i in db_action.readsets(project_id, readset_id=readset_id)]
+    if isinstance(project_id, dict) and project_id.get("DB_ACTION_WARNING"):
+        return project_id
+
+    action_output = db_action.readsets(project_id, readset_id=readset_id)
 
+    return sanity_check("Readset", action_output)
 
-@bp.route('/<string:project_id>/files/<string:file_id>')
-@bp.route('/<string:project_id>/patients/<string:patient_id>/files')
-@bp.route('/<string:project_id>/samples/<string:sample_id>/files')
-@bp.route('/<string:project_id>/readsets/<string:readset_id>/files')
-# @capitalize
+
+@bp.route('/<string:project>/files/<string:file_id>')
+@bp.route('/<string:project>/patients/<string:patient_id>/files')
+@bp.route('/<string:project>/samples/<string:sample_id>/files')
+@bp.route('/<string:project>/readsets/<string:readset_id>/files')
+@convcheck_project
 def files(project_id: str, patient_id: str=None, sample_id: str=None, readset_id: str=None, file_id: str=None):
     """
     file_id: uses the form "1,3-8,9". Select file by ids
@@ -225,8 +244,7 @@ def files(project_id: str, patient_id: str=None, sample_id: str=None, readset_id
         file_id = unroll(file_id)
 
     if deliverable is not None:
-        return [
-        i.flat_dict for i in db_action.files_deliverable(
+        action_output = db_action.files_deliverable(
             project_id=project_id,
             patient_id=patient_id,
             sample_id=sample_id,
@@ -234,25 +252,28 @@ def files(project_id: str, patient_id: str=None, sample_id: str=None, readset_id
             file_id=file_id,
             deliverable=deliverable
             )
-        ]
     else:
-        return [
-        i.flat_dict for i in db_action.files(
+        action_output = db_action.files(
             project_id=project_id,
             patient_id=patient_id,
             sample_id=sample_id,
             readset_id=readset_id,
             file_id=file_id
             )
-        ]
+
+    if isinstance(project_id, dict) and project_id.get("DB_ACTION_WARNING"):
+        return project_id
+
+    return sanity_check("File", action_output)
 
 
-@bp.route('/<string:project_id>/metrics', methods=['POST'])
-@bp.route('/<string:project_id>/metrics/<string:metric_id>')
-@bp.route('/<string:project_id>/patients/<string:patient_id>/metrics')
-@bp.route('/<string:project_id>/samples/<string:sample_id>/metrics')
-@bp.route('/<string:project_id>/readsets/<string:readset_id>/metrics')
-# @capitalize
+
+@bp.route('/<string:project>/metrics', methods=['POST'])
+@bp.route('/<string:project>/metrics/<string:metric_id>')
+@bp.route('/<string:project>/patients/<string:patient_id>/metrics')
+@bp.route('/<string:project>/samples/<string:sample_id>/metrics')
+@bp.route('/<string:project>/readsets/<string:readset_id>/metrics')
+@convcheck_project
 def metrics(project_id: str, patient_id: str=None, sample_id: str=None, readset_id: str=None, metric_id: str=None):
     """
     metric_id: uses the form "1,3-8,9". Select metric by ids
@@ -313,8 +334,7 @@ def metrics(project_id: str, patient_id: str=None, sample_id: str=None, readset_
         metric_id = unroll(metric_id)
 
     if deliverable is not None:
-        return [
-        i.flat_dict for i in db_action.metrics_deliverable(
+        action_output = db_action.metrics_deliverable(
             project_id=project_id,
             patient_id=patient_id,
             sample_id=sample_id,
@@ -322,24 +342,26 @@ def metrics(project_id: str, patient_id: str=None, sample_id: str=None, readset_
             metric_id=metric_id,
             deliverable=deliverable
             )
-        ]
     else:
-        return [
-        i.flat_dict for i in db_action.metrics(
+        action_output = db_action.metrics(
             project_id=project_id,
             patient_id=patient_id,
             sample_id=sample_id,
             readset_id=readset_id,
             metric_id=metric_id
             )
-        ]
 
-@bp.route('/<string:project_id>/samples/<string:sample_id>/readsets')
-# @capitalize
+    if isinstance(project_id, dict) and project_id.get("DB_ACTION_WARNING"):
+        return project_id
+
+    return sanity_check("Metric", action_output)
+
+@bp.route('/<string:project>/samples/<string:sample_id>/readsets')
+@convcheck_project
 def readsets_from_samples(project_id: str, sample_id: str):
     """
     sample_id: uses the form "1,3-8,9"
-    return: readsets for slected sample_id
+    return: readsets for selected sample_id
     """
 
     query = request.args
@@ -355,11 +377,16 @@ def readsets_from_samples(project_id: str, sample_id: str):
         for sample_name in name.split(","):
             sample_id.extend(db_action.name_to_id("Sample", sample_name))
 
-    return [i.flat_dict for i in db_action.readsets(project_id, sample_id)]
+    if isinstance(project_id, dict) and project_id.get("DB_ACTION_WARNING"):
+        return project_id
 
+    action_output = db_action.readsets(project_id, sample_id)
 
-@bp.route('/<string:project_id>/digest_readset_file', methods=['POST'])
-# @capitalize
+    return sanity_check("Metric", action_output)
+
+
+@bp.route('/<string:project>/digest_readset_file', methods=['POST'])
+@convcheck_project
 def digest_readset_file(project_id: str):
     """
     POST: list of Readset/Sample Name or id
@@ -373,10 +400,14 @@ def digest_readset_file(project_id: str):
             flash('Data does not seems to be json')
             return redirect(request.url)
 
+        if isinstance(project_id, dict) and project_id.get("DB_ACTION_WARNING"):
+            return project_id
+
         return db_action.digest_readset_file(project_id=project_id, digest_data=ingest_data)
 
-@bp.route('/<string:project_id>/digest_pair_file', methods=['POST'])
-# @capitalize
+
+@bp.route('/<string:project>/digest_pair_file', methods=['POST'])
+@convcheck_project
 def digest_pair_file(project_id: str):
     """
     POST: list of Readset/Sample Name or id
@@ -390,16 +421,21 @@ def digest_pair_file(project_id: str):
             flash('Data does not seems to be json')
             return redirect(request.url)
 
+        if isinstance(project_id, dict) and project_id.get("DB_ACTION_WARNING"):
+            return project_id
+
         return db_action.digest_pair_file(project_id=project_id, digest_data=ingest_data)
 
-@bp.route('/<string:project_id>/ingest_run_processing', methods=['GET', 'POST'])
-# @capitalize
+
+@bp.route('/<string:project>/ingest_run_processing', methods=['GET', 'POST'])
+@convcheck_project
 def ingest_run_processing(project_id: str):
     """
-    POST:  json describing run processing
+    POST: json describing run processing
     return: The Operation object
     """
 
+    # Is this if required?
     if request.method == 'GET':
         return abort(
             405,
@@ -413,22 +449,23 @@ def ingest_run_processing(project_id: str):
             flash('Data does not seems to be json')
             return redirect(request.url)
 
+        if isinstance(project_id, dict) and project_id.get("DB_ACTION_WARNING"):
+            return project_id
+
         project_id_from_name = db_action.name_to_id("Project", ingest_data[vc.PROJECT_NAME].upper())
-        if [int(project_id)] != project_id_from_name:
-            return abort(
-                400,
-                f"project name in POST {ingest_data[vc.PROJECT_NAME].upper()} not Valid"
-                )
+
+        if project_id != project_id_from_name:
+            return {"DB_ACTION_WARNING": f"Requested Project {project_id_from_name} in the input json is not matching the Project in the route {project_id}"}
 
         return [i.flat_dict for i in db_action.ingest_run_processing(project_id=project_id, ingest_data=ingest_data)]
 
 
-@bp.route('/<string:project_id>/ingest_transfer', methods=['POST'])
-# @capitalize
+@bp.route('/<string:project>/ingest_transfer', methods=['POST'])
+@convcheck_project
 def ingest_transfer(project_id: str):
     """
-    Add new location to file that has already been moved before
-    the db was created
+    POST: json describing a transfer
+    return: The Operation object
     """
     if request.method == 'POST':
         try:
@@ -437,16 +474,20 @@ def ingest_transfer(project_id: str):
             flash('Data does not seems to be json')
             return redirect(request.url)
 
+        if isinstance(project_id, dict) and project_id.get("DB_ACTION_WARNING"):
+            return project_id
+
         return [i.flat_dict for i in db_action.ingest_transfer(project_id=project_id, ingest_data=ingest_data)]
 
-@bp.route('/<string:project_id>/ingest_genpipes', methods=['GET', 'POST'])
-# @capitalize
+@bp.route('/<string:project>/ingest_genpipes', methods=['GET', 'POST'])
+@convcheck_project
 def ingest_genpipes(project_id: str):
     """
-    POST:  json describing genpipes
+    POST: json describing genpipes
     return: The Operation object and Jobs associated
     """
 
+    # Is this if required?
     if request.method == 'GET':
         return abort(
             405,
@@ -460,25 +501,26 @@ def ingest_genpipes(project_id: str):
             flash('Data does not seems to be json')
             return redirect(request.url)
 
+        if isinstance(project_id, dict) and project_id.get("DB_ACTION_WARNING"):
+            return project_id
+
         project_id_from_name = db_action.name_to_id("Project", ingest_data[vc.PROJECT_NAME].upper())
-        if [int(project_id)] != project_id_from_name:
-            return abort(
-                400,
-                f"project name in POST {ingest_data[vc.PROJECT_NAME].upper()} not in the database, {project_id} required"
-                )
+
+        if project_id != project_id_from_name:
+            return {"DB_ACTION_WARNING": f"Requested Project {project_id_from_name} in the input json is not matching the Project in the route {project_id}"}
 
         output = db_action.ingest_genpipes(project_id=project_id, ingest_data=ingest_data)
         operation = output[0].flat_dict
         jobs = [job.flat_dict for job in output[1]]
         return [operation, jobs]
 
-@bp.route('/<string:project_id>/digest_unanalyzed', methods=['POST'])
+@bp.route('/<string:project>/digest_unanalyzed', methods=['POST'])
+@convcheck_project
 def digest_unanalyzed(project_id: str):
     """
     POST: list of Readset/Sample Name or id
     return: Readsets or Samples unanalyzed
     """
-    logger.debug(f"\n\n{project_id}\n\n")
     if request.method == 'POST':
         try:
             ingest_data = request.get_json(force=True)
@@ -486,5 +528,7 @@ def digest_unanalyzed(project_id: str):
             flash('Data does not seems to be json')
             return redirect(request.url)
 
+        if isinstance(project_id, dict) and project_id.get("DB_ACTION_WARNING"):
+            return project_id
+
         return db_action.digest_unanalyzed(project_id=project_id, digest_data=ingest_data)
-        # return [i.flat_dict for i in db_action.digest_unanalyzed(project_id=project_id, digest_data=ingest_data)]
diff --git a/project_tracking/db_action.py b/project_tracking/db_action.py
index 6dee400..4f3ee18 100644
--- a/project_tracking/db_action.py
+++ b/project_tracking/db_action.py
@@ -91,17 +91,8 @@ def projects(project_id=None, session=None):
             select(Project)
             .where(Project.id.in_(project_id))
             )
-    else:
-        all_available = [f"id: {project.id}, name: {project.name}" for project in session.scalars(select(Project)).unique().all()]
-        raise DidNotFindError(f"Requested Project doesn't exist. Please try again with one of the following: {all_available}")
-
-    ret = session.scalars(stmt).unique().all()
-
-    if not ret:
-        all_available = [f"id: {project.id}, name: {project.name}" for project in session.scalars(select(Project)).unique().all()]
-        raise DidNotFindError(f"Requested Project doesn't exist. Please try again with one of the following: {all_available}")
 
-    return ret
+    return session.scalars(stmt).unique().all()
 
 def metrics_deliverable(project_id: str, deliverable: bool, patient_id=None, sample_id=None, readset_id=None, metric_id=None):
     """
@@ -469,14 +460,22 @@ def patients(project_id=None, patient_id=None):
         project_id = [project_id]
 
     if project_id is None and patient_id is None:
-        stmt = (select(Patient))
+        stmt = select(Patient)
     elif patient_id is None and project_id:
-        stmt = (select(Patient).join(Patient.project).where(Project.id.in_(project_id)))
+        stmt = (
+            select(Patient)
+            .join(Patient.project)
+            .where(Project.id.in_(project_id))
+            )
     else:
         if isinstance(patient_id, int):
             patient_id = [patient_id]
-        stmt = (select(Patient).where(Patient.id.in_(patient_id))
-                .where(Project.id.in_(project_id)))
+        stmt = (
+            select(Patient)
+            .where(Patient.id.in_(patient_id))
+            .join(Patient.project)
+            .where(Project.id.in_(project_id))
+            )
 
     return session.scalars(stmt).unique().all()
 
@@ -488,20 +487,26 @@ def samples(project_id=None, sample_id=None):
     session = database.get_session()
     if isinstance(project_id, str):
         project_id = [project_id]
+
     if project_id is None:
         stmt = (select(Sample))
     elif sample_id is None:
-        stmt = (select(Sample).join(Sample.patient).join(Patient.project)
-                .where(Project.id.in_(project_id)))
+        stmt = (
+            select(Sample)
+            .join(Sample.patient)
+            .join(Patient.project)
+            .where(Project.id.in_(project_id))
+            )
     else:
         if isinstance(sample_id, int):
             sample_id = [sample_id]
-        stmt = (select(Sample)
-                .where(Sample.id.in_(sample_id))
-                .join(Sample.patient)
-                .join(Patient.project)
-                .where(Project.id.in_(project_id))
-                )
+        stmt = (
+            select(Sample)
+            .where(Sample.id.in_(sample_id))
+            .join(Sample.patient)
+            .join(Patient.project)
+            .where(Project.id.in_(project_id))
+            )
 
     return session.scalars(stmt).unique().all()
 
@@ -750,11 +755,14 @@ def digest_readset_file(project_id: str, digest_data, session=None):
     samples = []
     readsets = []
     output = []
+    errors = {
+        "DB_ACTION_WARNING": []
+        }
+
+    location_endpoint = None
 
     if vb.LOCATION_ENDPOINT in digest_data.keys():
         location_endpoint = digest_data[vb.LOCATION_ENDPOINT]
-    else:
-        location_endpoint = None
 
     if vb.SAMPLE_NAME in digest_data.keys():
         for sample_name in digest_data[vb.SAMPLE_NAME]:
@@ -792,9 +800,13 @@ def digest_readset_file(project_id: str, digest_data, session=None):
                 raise DidNotFindError(table="Readset", attribute="id", query=readset_id)
     if readsets:
         set(readsets)
-        readset_files = []
         for readset in readsets:
-            bed = ""
+            readset_files = []
+            logger.debug(f"\n\n{readset}\n\n")
+            bed = None
+            fastq1 = None
+            fastq2 = None
+            bam = None
             for operation in [operation for operation in readset.operations if operation.name == 'run_processing']:
                 for job in operation.jobs:
                     for file in job.files:
@@ -802,33 +814,27 @@ def digest_readset_file(project_id: str, digest_data, session=None):
                             readset_files.append(file)
             for file in readset_files:
                 if file.type in ["fastq", "fq", "fq.gz", "fastq.gz"]:
-                    bam = ""
                     if file.extra_metadata["read_type"] == "R1":
                         if location_endpoint:
                             for location in file.locations:
                                 if location_endpoint == location.endpoint:
                                     fastq1 = location.uri.split("://")[-1]
-                        else:
-                            fastq1 = file.locations[-1].uri.split("://")[-1]
+                            if not fastq1:
+                                errors["DB_ACTION_WARNING"].append(f"Looking for fastq R1 file for Sample {readset.sample.name} and Readset {readset.name} in '{location_endpoint}', file only exists on {[l.endpoint for l in file.locations]} system")
                     elif file.extra_metadata["read_type"] == "R2":
                         if location_endpoint:
                             for location in file.locations:
                                 if location_endpoint == location.endpoint:
-                                    fastq1 = location.uri.split("://")[-1]
-                        else:
-                            fastq1 = file.locations[-1].uri.split("://")[-1]
+                                    fastq2 = location.uri.split("://")[-1]
+                            if not fastq2:
+                                errors["DB_ACTION_WARNING"].append(f"Looking for fastq R2 file for Sample {readset.sample.name} and Readset {readset.name} in '{location_endpoint}', file only exists on {[l.endpoint for l in file.locations]} system")
                 elif file.type == "bam":
-                    # bam = ""
                     if location_endpoint:
                         for location in file.locations:
                             if location_endpoint == location.endpoint:
                                 bam = location.uri.split("://")[-1]
                         if not bam:
-                            raise DidNotFindError(f"looking for bam file for Sample {readset.sample.name} and Readset {readset.name} in '{location_endpoint}', file only exists on {[l.endpoint for l in file.locations]} system")
-                    else:
-                        bam = file.locations[-1].uri.split("://")[-1]
-                    fastq1 = ""
-                    fastq2 = ""
+                            errors["DB_ACTION_WARNING"].append(f"Looking for bam file for Sample {readset.sample.name} and Readset {readset.name} in '{location_endpoint}', file only exists on {[l.endpoint for l in file.locations]} system")
                 if file.type == "bed":
                     bed = file.name
             readset_line = {
@@ -847,7 +853,11 @@ def digest_readset_file(project_id: str, digest_data, session=None):
                 "BAM": bam
                 }
             output.append(readset_line)
-    return json.dumps(output)
+    if errors["DB_ACTION_WARNING"]:
+        ret = errors
+    else:
+        ret = output
+    return json.dumps(ret)
 
 def digest_pair_file(project_id: str, digest_data, session=None):
     """Digesting pair file fields for GenPipes"""

From 83bc30cb571cb3e59d239a67fd572e4577ce75af Mon Sep 17 00:00:00 2001
From: Paul Stretenowich <paul.stretenowich@mcgill.ca>
Date: Tue, 31 Oct 2023 11:48:24 -0400
Subject: [PATCH 05/19] Debug pytest

---
 tests/test_ingestion.py | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/tests/test_ingestion.py b/tests/test_ingestion.py
index c19f81e..a92047b 100644
--- a/tests/test_ingestion.py
+++ b/tests/test_ingestion.py
@@ -14,13 +14,11 @@
 
 def test_create_api(client, run_processing_json, app):
     project_name = run_processing_json[vb.PROJECT_NAME]
-    project_id = "1"
     response = client.get(f'admin/create_project/{project_name}')
     assert response.status_code == 200
     assert json.loads(response.data)['name'] == f"{project_name}"
     assert json.loads(response.data)['id'] == 1
-    # project_id = db_action.name_to_id("Project", project_name)
-    response = client.post(f'project/{project_id}/ingest_run_processing', data=json.dumps(run_processing_json))
+    response = client.post(f'project/{project_name}/ingest_run_processing', data=json.dumps(run_processing_json))
     assert response.status_code == 200
     assert json.loads(response.data)[0]['name'] == "run_processing"
     assert json.loads(response.data)[0]['id'] == 1

From 84bbdc07583b11708c2404a60f220a140b1539d5 Mon Sep 17 00:00:00 2001
From: Paul Stretenowich <paul.stretenowich@mcgill.ca>
Date: Thu, 2 Nov 2023 15:48:11 -0400
Subject: [PATCH 06/19] Adding if to avoid failure if project_name in json set
 to null

---
 project_tracking/api/project.py | 14 ++++++++------
 1 file changed, 8 insertions(+), 6 deletions(-)

diff --git a/project_tracking/api/project.py b/project_tracking/api/project.py
index 266777d..6e2d1b3 100644
--- a/project_tracking/api/project.py
+++ b/project_tracking/api/project.py
@@ -452,10 +452,11 @@ def ingest_run_processing(project_id: str):
         if isinstance(project_id, dict) and project_id.get("DB_ACTION_WARNING"):
             return project_id
 
-        project_id_from_name = db_action.name_to_id("Project", ingest_data[vc.PROJECT_NAME].upper())
 
-        if project_id != project_id_from_name:
-            return {"DB_ACTION_WARNING": f"Requested Project {project_id_from_name} in the input json is not matching the Project in the route {project_id}"}
+        if ingest_data[vc.PROJECT_NAME]:
+            project_id_from_name = db_action.name_to_id("Project", ingest_data[vc.PROJECT_NAME].upper())
+            if project_id != project_id_from_name:
+                return {"DB_ACTION_WARNING": f"Requested Project {project_id_from_name} in the input json is not matching the Project in the route {project_id}"}
 
         return [i.flat_dict for i in db_action.ingest_run_processing(project_id=project_id, ingest_data=ingest_data)]
 
@@ -504,10 +505,11 @@ def ingest_genpipes(project_id: str):
         if isinstance(project_id, dict) and project_id.get("DB_ACTION_WARNING"):
             return project_id
 
-        project_id_from_name = db_action.name_to_id("Project", ingest_data[vc.PROJECT_NAME].upper())
 
-        if project_id != project_id_from_name:
-            return {"DB_ACTION_WARNING": f"Requested Project {project_id_from_name} in the input json is not matching the Project in the route {project_id}"}
+        if ingest_data[vc.PROJECT_NAME]:
+            project_id_from_name = db_action.name_to_id("Project", ingest_data[vc.PROJECT_NAME].upper())
+            if project_id != project_id_from_name:
+                return {"DB_ACTION_WARNING": f"Requested Project {project_id_from_name} in the input json is not matching the Project in the route {project_id}"}
 
         output = db_action.ingest_genpipes(project_id=project_id, ingest_data=ingest_data)
         operation = output[0].flat_dict

From 85e6a2feacba838639895ebf6a9ec873d292c0cc Mon Sep 17 00:00:00 2001
From: Paul Stretenowich <paul.stretenowich@mcgill.ca>
Date: Thu, 9 Nov 2023 14:55:32 -0500
Subject: [PATCH 07/19] Small fix

---
 project_tracking/api/project.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/project_tracking/api/project.py b/project_tracking/api/project.py
index 6e2d1b3..2e29d5c 100644
--- a/project_tracking/api/project.py
+++ b/project_tracking/api/project.py
@@ -454,7 +454,7 @@ def ingest_run_processing(project_id: str):
 
 
         if ingest_data[vc.PROJECT_NAME]:
-            project_id_from_name = db_action.name_to_id("Project", ingest_data[vc.PROJECT_NAME].upper())
+            project_id_from_name = str(db_action.name_to_id("Project", ingest_data[vc.PROJECT_NAME].upper())[0])
             if project_id != project_id_from_name:
                 return {"DB_ACTION_WARNING": f"Requested Project {project_id_from_name} in the input json is not matching the Project in the route {project_id}"}
 
@@ -507,7 +507,7 @@ def ingest_genpipes(project_id: str):
 
 
         if ingest_data[vc.PROJECT_NAME]:
-            project_id_from_name = db_action.name_to_id("Project", ingest_data[vc.PROJECT_NAME].upper())
+            project_id_from_name = str(db_action.name_to_id("Project", ingest_data[vc.PROJECT_NAME].upper())[0])
             if project_id != project_id_from_name:
                 return {"DB_ACTION_WARNING": f"Requested Project {project_id_from_name} in the input json is not matching the Project in the route {project_id}"}
 

From e6389d2e8ea9bb94e4229580075cc8329addc04b Mon Sep 17 00:00:00 2001
From: Paul Stretenowich <paul.stretenowich@mcgill.ca>
Date: Thu, 9 Nov 2023 15:04:45 -0500
Subject: [PATCH 08/19] Need to convert project_id from name_to_id from list of
 int into str

---
 project_tracking/api/project.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/project_tracking/api/project.py b/project_tracking/api/project.py
index 2e29d5c..c9ceb47 100644
--- a/project_tracking/api/project.py
+++ b/project_tracking/api/project.py
@@ -43,7 +43,7 @@ def wrap(*args, project=None, **kwargs):
                 all_available = [f"id: {project.id}, name: {project.name}" for project in db_action.projects()]
                 project_id = {"DB_ACTION_WARNING": f"Requested Project '{project}' doesn't exist. Please try again with one of the following: {all_available}"}
         else:
-            project_id = db_action.name_to_id("Project", project.upper())
+            project_id = str(db_action.name_to_id("Project", project.upper())[0])
             if not project_id:
                 all_available = [f"id: {project.id}, name: {project.name}" for project in db_action.projects()]
                 project_id = {"DB_ACTION_WARNING": f"Requested Project '{project}' doesn't exist. Please try again with one of the following: {all_available}"}

From d935efd0f746e9aa5602d5a4ee11dfae28b05bd2 Mon Sep 17 00:00:00 2001
From: Paul Stretenowich <paul.stretenowich@mcgill.ca>
Date: Mon, 20 Nov 2023 15:44:10 -0500
Subject: [PATCH 09/19] Adding a new attribute for experiment:
 nucleic_acid_type

---
 project_tracking/db_action.py  | 169 +++++++++++++++++++++++++++++----
 project_tracking/model.py      |  25 ++++-
 project_tracking/vocabulary.py |   2 +
 tests/conftest.py              |   2 +-
 tests/data/run_processing.json |   5 +
 tests/test_serialization.py    |   3 +-
 6 files changed, 182 insertions(+), 24 deletions(-)

diff --git a/project_tracking/db_action.py b/project_tracking/db_action.py
index 4f3ee18..a163a82 100644
--- a/project_tracking/db_action.py
+++ b/project_tracking/db_action.py
@@ -53,13 +53,22 @@ def to_dict(self):
         return rv
 
 class DidNotFindError(Error):
-    """DidNotFind"""
+    """DidNotFindError"""
     def __init__(self, message=None, table=None, attribute=None, query=None):
         super().__init__(message)
         if message:
             self.message = message
         else:
-            self.message = f"{table} with {attribute} {query} doesn't exist on database"
+            self.message = f"'{table}' with '{attribute}' '{query}' doesn't exist on database"
+
+class RequestError(Error):
+    """RequestError"""
+    def __init__(self, message=None, argument=None):
+        super().__init__(message)
+        if message:
+            self.message = message
+        else:
+            self.message = f"For current request '{argument}' is required"
 
 def name_to_id(model_class, name, session=None):
     """
@@ -590,6 +599,7 @@ def ingest_run_processing(project_id: str, ingest_data, session=None):
                 experiment = Experiment.from_attributes(
                     sequencing_technology=readset_json[vb.EXPERIMENT_SEQUENCING_TECHNOLOGY],
                     type=readset_json[vb.EXPERIMENT_TYPE],
+                    nucleic_acid_type=readset_json[vb.EXPERIMENT_NUCLEIC_ACID_TYPE],
                     library_kit=readset_json[vb.EXPERIMENT_LIBRARY_KIT],
                     kit_expiration_date=kit_expiration_date,
                     session=session
@@ -752,6 +762,7 @@ def digest_readset_file(project_id: str, digest_data, session=None):
     if not session:
         session = database.get_session()
 
+    patients = []
     samples = []
     readsets = []
     output = []
@@ -760,13 +771,59 @@ def digest_readset_file(project_id: str, digest_data, session=None):
         }
 
     location_endpoint = None
-
     if vb.LOCATION_ENDPOINT in digest_data.keys():
         location_endpoint = digest_data[vb.LOCATION_ENDPOINT]
 
+    if vb.EXPERIMENT_NUCLEIC_ACID_TYPE in digest_data.keys():
+        nucleic_acid_type = digest_data[vb.EXPERIMENT_NUCLEIC_ACID_TYPE]
+    else:
+        raise RequestError(argument="experiment_nucleic_acid_type")
+
+    if vb.PATIENT_NAME in digest_data.keys():
+        for patient_name in digest_data[vb.PATIENT_NAME]:
+            patient = session.scalars(
+                select(Patient)
+                .where(Patient.name == patient_name)
+                .join(Patient.samples)
+                .join(Sample.readsets)
+                .join(Readset.experiment)
+                .where(Experiment.nucleic_acid_type == nucleic_acid_type)
+                ).unique().first()
+            if patient:
+                patients.append(patient)
+            else:
+                raise DidNotFindError(table="Patient", attribute="name", query=patient_name)
+    if vb.PATIENT_ID in digest_data.keys():
+        for patient_id in digest_data[vb.PATIENT_ID]:
+            # logger.debug(f"\n\n{patient_id}\n\n")
+            patient = session.scalars(
+                select(Patient)
+                .where(Patient.id == patient_id)
+                .join(Patient.samples)
+                .join(Sample.readsets)
+                .join(Readset.experiment)
+                .where(Experiment.nucleic_acid_type == nucleic_acid_type)
+                ).unique().first()
+            if patient:
+                patients.append(patient)
+            else:
+                raise DidNotFindError(table="Patient", attribute="id", query=patient_id)
+    if patients:
+        set(patients)
+        for patient in patients:
+            for sample in patient.samples:
+                for readset in sample.readsets:
+                    readsets.append(readset)
+
     if vb.SAMPLE_NAME in digest_data.keys():
         for sample_name in digest_data[vb.SAMPLE_NAME]:
-            sample = session.scalars(select(Sample).where(Sample.name == sample_name)).unique().first()
+            sample = session.scalars(
+                select(Sample)
+                .where(Sample.name == sample_name)
+                .join(Sample.readsets)
+                .join(Readset.experiment)
+                .where(Experiment.nucleic_acid_type == nucleic_acid_type)
+                ).unique().first()
             if sample:
                 samples.append(sample)
             else:
@@ -774,7 +831,13 @@ def digest_readset_file(project_id: str, digest_data, session=None):
     if vb.SAMPLE_ID in digest_data.keys():
         for sample_id in digest_data[vb.SAMPLE_ID]:
             # logger.debug(f"\n\n{sample_id}\n\n")
-            sample = session.scalars(select(Sample).where(Sample.id == sample_id)).unique().first()
+            sample = session.scalars(
+                select(Sample)
+                .where(Sample.id == sample_id)
+                .join(Sample.readsets)
+                .join(Readset.experiment)
+                .where(Experiment.nucleic_acid_type == nucleic_acid_type)
+                ).unique().first()
             if sample:
                 samples.append(sample)
             else:
@@ -784,16 +847,27 @@ def digest_readset_file(project_id: str, digest_data, session=None):
         for sample in samples:
             for readset in sample.readsets:
                 readsets.append(readset)
+
     if vb.READSET_NAME in digest_data.keys():
         for readset_name in digest_data[vb.READSET_NAME]:
-            readset = session.scalars(select(Readset).where(Readset.name == readset_name)).unique().first()
+            readset = session.scalars(
+                select(Readset)
+                .where(Readset.name == readset_name)
+                .join(Readset.experiment)
+                .where(Experiment.nucleic_acid_type == nucleic_acid_type)
+                ).unique().first()
             if readset:
                 readsets.append(readset)
             else:
                 raise DidNotFindError(table="Readset", attribute="name", query=readset_name)
     if vb.READSET_ID in digest_data.keys():
         for readset_id in digest_data[vb.READSET_ID]:
-            readset = session.scalars(select(Readset).where(Readset.id == readset_id)).unique().first()
+            readset = session.scalars(
+                select(Readset)
+                .where(Readset.id == readset_id)
+                .join(Readset.experiment)
+                .where(Experiment.nucleic_acid_type == nucleic_acid_type)
+                ).unique().first()
             if readset:
                 readsets.append(readset)
             else:
@@ -802,7 +876,6 @@ def digest_readset_file(project_id: str, digest_data, session=None):
         set(readsets)
         for readset in readsets:
             readset_files = []
-            logger.debug(f"\n\n{readset}\n\n")
             bed = None
             fastq1 = None
             fastq2 = None
@@ -869,35 +942,93 @@ def digest_pair_file(project_id: str, digest_data, session=None):
     # readsets = []
     output = []
 
+    if vb.EXPERIMENT_NUCLEIC_ACID_TYPE in digest_data.keys():
+        nucleic_acid_type = digest_data[vb.EXPERIMENT_NUCLEIC_ACID_TYPE]
+    else:
+        raise RequestError(argument="experiment_nucleic_acid_type")
+
+    if vb.PATIENT_NAME in digest_data.keys():
+        for patient_name in digest_data[vb.PATIENT_NAME]:
+            patient = session.scalars(
+                select(Patient)
+                .where(Patient.name == patient_name)
+                .join(Patient.samples)
+                .join(Sample.readsets)
+                .join(Readset.experiment)
+                .where(Experiment.nucleic_acid_type == nucleic_acid_type)
+                ).unique().first()
+            if patient:
+                patients.append(patient)
+            else:
+                raise DidNotFindError(table="Patient", attribute="name", query=patient_name)
+    if vb.PATIENT_ID in digest_data.keys():
+        for patient_id in digest_data[vb.PATIENT_ID]:
+            patient = session.scalars(
+                select(Patient)
+                .where(Patient.id == patient_id)
+                .join(Patient.samples)
+                .join(Sample.readsets)
+                .join(Readset.experiment)
+                .where(Experiment.nucleic_acid_type == nucleic_acid_type)
+                ).unique().first()
+            if patient:
+                patients.append(patient)
+            else:
+                raise DidNotFindError(table="Patient", attribute="id", query=patient_id)
+    if patients:
+        set(patients)
+        for patient in patients:
+            for sample in patient.samples:
+                samples.append(sample)
+
     if vb.SAMPLE_NAME in digest_data.keys():
         for sample_name in digest_data[vb.SAMPLE_NAME]:
-            sample = session.scalars(select(Sample).where(Sample.name == sample_name)).unique().first()
-            # logger.info(f"\n\n{sample}\n\n")
+            sample = session.scalars(
+                select(Sample)
+                .where(Sample.name == sample_name)
+                .join(Sample.readsets)
+                .join(Readset.experiment)
+                .where(Experiment.nucleic_acid_type == nucleic_acid_type)
+                ).unique().first()
             if sample:
                 samples.append(sample)
             else:
                 raise DidNotFindError(table="Sample", attribute="name", query=sample_name)
     if vb.SAMPLE_ID in digest_data.keys():
         for sample_id in digest_data[vb.SAMPLE_ID]:
-            sample = session.scalars(select(Sample).where(Sample.id == sample_id)).unique().first()
+            sample = session.scalars(
+                select(Sample)
+                .where(Sample.id == sample_id)
+                .join(Sample.readsets)
+                .join(Readset.experiment)
+                .where(Experiment.nucleic_acid_type == nucleic_acid_type)
+                ).unique().first()
             if sample:
                 samples.append(sample)
             else:
                 raise DidNotFindError(table="Sample", attribute="id", query=sample_id)
     if vb.READSET_NAME in digest_data.keys():
         for readset_name in digest_data[vb.READSET_NAME]:
-            readset = session.scalars(select(Readset).where(Readset.name == readset_name)).unique().first()
+            readset = session.scalars(
+                select(Readset)
+                .where(Readset.name == readset_name)
+                .join(Readset.experiment)
+                .where(Experiment.nucleic_acid_type == nucleic_acid_type)
+                ).unique().first()
             if readset:
                 samples.append(readset.sample)
-                # readsets.append(readset)
             else:
                 raise DidNotFindError(table="Readset", attribute="name", query=readset_name)
     if vb.READSET_ID in digest_data.keys():
         for readset_id in digest_data[vb.READSET_ID]:
-            readset = session.scalars(select(Readset).where(Readset.id == readset_id)).unique().first()
+            readset = session.scalars(
+                select(Readset)
+                .where(Readset.id == readset_id)
+                .join(Readset.experiment)
+                .where(Experiment.nucleic_acid_type == nucleic_acid_type)
+                ).unique().first()
             if readset:
                 samples.append(readset.sample)
-                # readsets.append(readset)
             else:
                 raise DidNotFindError(table="Readset", attribute="id", query=readset_id)
     if samples:
@@ -1066,7 +1197,7 @@ def digest_unanalyzed(project_id: str, digest_data, session=None):
     run_name = digest_data["run_name"]
     if run_name:
         run_id = name_to_id("Run", run_name)[0]
-    experiment_sequencing_technology = digest_data["experiment_sequencing_technology"]
+    experiment_nucleic_acid_type = digest_data["experiment_nucleic_acid_type"]
     location_endpoint = digest_data["location_endpoint"]
 
     if sample_name_flag:
@@ -1096,17 +1227,15 @@ def digest_unanalyzed(project_id: str, digest_data, session=None):
             stmt.where(Run.id == run_id)
             .join(Readset.run)
             )
-    if experiment_sequencing_technology:
+    if experiment_nucleic_acid_type:
         stmt = (
-            stmt.where(Experiment.sequencing_technology == experiment_sequencing_technology)
+            stmt.where(Experiment.nucleic_acid_type == experiment_nucleic_acid_type)
             .join(Readset.experiment)
             )
 
-    # logger.debug(f"\n\n{stmt}\n\n")
     output = {
         "location_endpoint": location_endpoint,
         key: session.scalars(stmt).unique().all()
     }
-    # logger.debug(f"\n\n{session.scalars(stmt).unique().all()}\n\n")
 
     return json.dumps(output)
diff --git a/project_tracking/model.py b/project_tracking/model.py
index 2b74951..8ad1ee6 100644
--- a/project_tracking/model.py
+++ b/project_tracking/model.py
@@ -35,6 +35,12 @@
 
 from . import database
 
+class NucleicAcidTypeEnum(enum.Enum):
+    """nucleic_acid_type enum"""
+    DNA = "DNA"
+    RNA = "RNA"
+
+
 class LaneEnum(enum.Enum):
     """
     lane enum
@@ -43,6 +49,10 @@ class LaneEnum(enum.Enum):
     TWO = "2"
     THREE = "3"
     FOUR = "4"
+    FIVE = "5"
+    SIX = "6"
+    SEVEN = "7"
+    EIGHT = "8"
 
 
 class SequencingTypeEnum(enum.Enum):
@@ -90,6 +100,7 @@ class Base(DeclarativeBase):
     # this is needed for the enum to work properly right now
     # see https://github.com/sqlalchemy/sqlalchemy/discussions/8856
     type_annotation_map = {
+        NucleicAcidTypeEnum: Enum(NucleicAcidTypeEnum),
         LaneEnum: Enum(LaneEnum),
         SequencingTypeEnum: Enum(SequencingTypeEnum),
         StatusEnum: Enum(StatusEnum),
@@ -346,6 +357,7 @@ class Experiment(BaseTable):
         id integer [PK]
         sequencing_technology text
         type text
+        nucleic_acid_type nucleic_acid_type
         library_kit text
         kit_expiration_date text
         deprecated boolean
@@ -358,13 +370,22 @@ class Experiment(BaseTable):
 
     sequencing_technology: Mapped[str] = mapped_column(default=None, nullable=True)
     type: Mapped[str] = mapped_column(default=None, nullable=True)
+    nucleic_acid_type: Mapped[NucleicAcidTypeEnum] = mapped_column(default=None, nullable=False)
     library_kit: Mapped[str] = mapped_column(default=None, nullable=True)
     kit_expiration_date: Mapped[datetime] = mapped_column(default=None, nullable=True)
 
     readsets: Mapped[list["Readset"]] = relationship(back_populates="experiment")
 
     @classmethod
-    def from_attributes(cls, sequencing_technology=None, type=None, library_kit=None, kit_expiration_date=None, session=None):
+    def from_attributes(
+        cls,
+        nucleic_acid_type,
+        sequencing_technology=None,
+        type=None,
+        library_kit=None,
+        kit_expiration_date=None,
+        session=None
+        ):
         """
         get experiment if it exist, set it if it does not exist
         """
@@ -374,6 +395,7 @@ def from_attributes(cls, sequencing_technology=None, type=None, library_kit=None
             select(cls)
                 .where(cls.sequencing_technology == sequencing_technology)
                 .where(cls.type == type)
+                .where(cls.nucleic_acid_type == nucleic_acid_type)
                 .where(cls.library_kit == library_kit)
                 .where(cls.kit_expiration_date == kit_expiration_date)
         ).first()
@@ -381,6 +403,7 @@ def from_attributes(cls, sequencing_technology=None, type=None, library_kit=None
             experiment = cls(
                 sequencing_technology=sequencing_technology,
                 type=type,
+                nucleic_acid_type=nucleic_acid_type,
                 library_kit=library_kit,
                 kit_expiration_date=kit_expiration_date
             )
diff --git a/project_tracking/vocabulary.py b/project_tracking/vocabulary.py
index 90d8cfc..28920d5 100644
--- a/project_tracking/vocabulary.py
+++ b/project_tracking/vocabulary.py
@@ -10,6 +10,7 @@
 
 # patient table
 PATIENT = "patient"
+PATIENT_ID = "patient_id"
 PATIENT_FMS_ID = "patient_fms_id"
 PATIENT_NAME = "patient_name"
 PATIENT_COHORT = "patient_cohort"
@@ -25,6 +26,7 @@
 # experiment table
 EXPERIMENT_SEQUENCING_TECHNOLOGY = "experiment_sequencing_technology"
 EXPERIMENT_TYPE = "experiment_type"
+EXPERIMENT_NUCLEIC_ACID_TYPE = "experiment_nucleic_acid_type"
 EXPERIMENT_LIBRARY_KIT = "experiment_library_kit"
 EXPERIMENT_KIT_EXPIRATION_DATE = "experiment_kit_expiration_date"
 EXPERIMENT_TYPE_LIST = ["PCR-FREE", "RNASEQ"]
diff --git a/tests/conftest.py b/tests/conftest.py
index 5a9d125..99b407a 100644
--- a/tests/conftest.py
+++ b/tests/conftest.py
@@ -28,7 +28,7 @@ def pre_filled_model():
                          project=project)
 
     sequencing_technology = 'Fancy Buzzword'
-    exp = model.Experiment(sequencing_technology=sequencing_technology)
+    exp = model.Experiment(nucleic_acid_type=model.NucleicAcidTypeEnum.DNA)
     pa_name = "P_O"
     pa = model.Patient(name=pa_name, project=project)
     sa_name = 'gros_bobo'
diff --git a/tests/data/run_processing.json b/tests/data/run_processing.json
index 4ff31b8..5e85ebd 100644
--- a/tests/data/run_processing.json
+++ b/tests/data/run_processing.json
@@ -21,6 +21,7 @@
                         {
                             "experiment_sequencing_technology": null,
                             "experiment_type": "PCR-free",
+                            "experiment_nucleic_acid_type": "DNA",
                             "experiment_library_kit": null,
                             "experiment_kit_expiration_date": null,
                             "readset_name": "MoHQ-JG-9-23-15000863775-19933DT.A01433_0157_1",
@@ -73,6 +74,7 @@
                         {
                             "experiment_sequencing_technology": null,
                             "experiment_type": "PCR-free",
+                            "experiment_nucleic_acid_type": "DNA",
                             "experiment_library_kit": null,
                             "experiment_kit_expiration_date": null,
                             "readset_name": "MoHQ-JG-9-23-15000936286-19866DN.A01433_0157_2",
@@ -133,6 +135,7 @@
                         {
                             "experiment_sequencing_technology": null,
                             "experiment_type": "RNASeq",
+                            "experiment_nucleic_acid_type": "RNA",
                             "experiment_library_kit": null,
                             "experiment_kit_expiration_date": null,
                             "readset_name": "MoHQ-CM-1-3-6929-1RT.A01433_0157_3",
@@ -191,6 +194,7 @@
                         {
                             "experiment_sequencing_technology": null,
                             "experiment_type": "PCR-free",
+                            "experiment_nucleic_acid_type": "DNA",
                             "experiment_library_kit": null,
                             "experiment_kit_expiration_date": null,
                             "readset_name": "MoHQ-CM-1-3-15000863775-19933DT.A01433_0157_1",
@@ -243,6 +247,7 @@
                         {
                             "experiment_sequencing_technology": null,
                             "experiment_type": "PCR-free",
+                            "experiment_nucleic_acid_type": "DNA",
                             "experiment_library_kit": null,
                             "experiment_kit_expiration_date": null,
                             "readset_name": "MoHQ-CM-1-3-15000936286-19866DN.A01433_0157_2",
diff --git a/tests/test_serialization.py b/tests/test_serialization.py
index fa75b15..473c8c9 100644
--- a/tests/test_serialization.py
+++ b/tests/test_serialization.py
@@ -8,7 +8,6 @@ def test_serialization(not_app_db):
     op_config_version = 0.1
     op_config_name = 'generic_index'
     op_name = 'ingest'
-    sequencing_technology = 'Fancy Buzzword'
     pa_name = "P_O"
     sa_name = 'gros_bobo'
     ru_name = "cure the Conglomerat old director's partner 01"
@@ -28,7 +27,7 @@ def test_serialization(not_app_db):
                          operation_config=op_c,
                          project=project)
 
-    exp = model.Experiment(sequencing_technology=sequencing_technology)
+    exp = model.Experiment(nucleic_acid_type=model.NucleicAcidTypeEnum.DNA)
     pa = model.Patient(name=pa_name, project=project)
     sa = model.Sample(name=sa_name, patient=pa)
     ru = model.Run(instrument=instrument, name=ru_name)

From dae290ac167c9619961404e510088e6755a8a190 Mon Sep 17 00:00:00 2001
From: Paul Stretenowich <paul.stretenowich@mcgill.ca>
Date: Tue, 21 Nov 2023 13:59:31 -0500
Subject: [PATCH 10/19] Debug

---
 project_tracking/db_action.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/project_tracking/db_action.py b/project_tracking/db_action.py
index a163a82..c293589 100644
--- a/project_tracking/db_action.py
+++ b/project_tracking/db_action.py
@@ -939,6 +939,7 @@ def digest_pair_file(project_id: str, digest_data, session=None):
 
     pair_dict = {}
     samples = []
+    patients = []
     # readsets = []
     output = []
 

From 66e19443dca7d17c71c0e9a74c6e391925370d6a Mon Sep 17 00:00:00 2001
From: Paul Stretenowich <paul.stretenowich@mcgill.ca>
Date: Tue, 21 Nov 2023 16:41:30 -0500
Subject: [PATCH 11/19] Consolidating docstring + adding
 experiment_nucleic_acid_type to digest_unanalyzed output

---
 project_tracking/api/project.py | 32 ++++++++++++++++----------------
 project_tracking/db_action.py   |  1 +
 2 files changed, 17 insertions(+), 16 deletions(-)

diff --git a/project_tracking/api/project.py b/project_tracking/api/project.py
index c9ceb47..6591009 100644
--- a/project_tracking/api/project.py
+++ b/project_tracking/api/project.py
@@ -81,8 +81,8 @@ def projects(project_id: str = None):
 @convcheck_project
 def patients(project_id: str, patient_id: str = None):
     """
-    patient_id: uses the form "1,3-8,9"
-    return: list all patient or selected patient that are also par of <project>
+    patient_id: uses the form "1,3-8,9", if not provided all patients are returned
+    return: list all patients or selected patients, belonging to <project>
 
     Query:
     (pair, tumor):  Default (None, true)
@@ -144,8 +144,8 @@ def patients(project_id: str, patient_id: str = None):
 @convcheck_project
 def samples(project_id: str, sample_id: str = None):
     """
-    sample_id: uses the form "1,3-8,9", if not provides, all sample are returned
-    return: all or selected sample that are in sample_id and part of project
+    sample_id: uses the form "1,3-8,9", if not provided all samples are returned
+    return: list all patients or selected samples, belonging to <project>
     """
 
     query = request.args
@@ -174,8 +174,8 @@ def samples(project_id: str, sample_id: str = None):
 @convcheck_project
 def readsets(project_id: str, readset_id: str=None):
     """
-    readset_id: uses the form "1,3-8,9", if not provided, all readsets are returned
-    return: selected readsets that are in sample_id and part of project
+    readset_id: uses the form "1,3-8,9", if not provided all readsets are returned
+    return: list all patients or selected readsets, belonging to <project>
     """
 
     query = request.args
@@ -212,7 +212,7 @@ def files(project_id: str, patient_id: str=None, sample_id: str=None, readset_id
     sample_id: uses the form "1,3-8,9". Select file by sample ids
     redeaset_id: uses the form "1,3-8,9". Select file by readset ids
 
-    return: selected files
+    return: selected files, belonging to <project>
 
     Query:
     (deliverable):  Default (None)
@@ -281,13 +281,13 @@ def metrics(project_id: str, patient_id: str=None, sample_id: str=None, readset_
     sample_id: uses the form "1,3-8,9". Select metric by sample ids
     redeaset_id: uses the form "1,3-8,9". Select metric by readset ids
 
-    We also accespt POST data with comma separeted list
+    We also accept POST data with comma separeted list
     metric_name = <NAME> [,NAME] [...]
     readset_name = <NAME> [,NAME] [...]
     sample_name = <NAME> [,NAME] [...]
     patient_name = <NAME> [,NAME] [...]
 
-    return: selected metrics
+    return: selected metrics, belonging to <project>
 
     Query:
     (deliverable):  Default (None)
@@ -361,7 +361,7 @@ def metrics(project_id: str, patient_id: str=None, sample_id: str=None, readset_
 def readsets_from_samples(project_id: str, sample_id: str):
     """
     sample_id: uses the form "1,3-8,9"
-    return: readsets for selected sample_id
+    return: selected readsets belonging to <sample_id>
     """
 
     query = request.args
@@ -382,14 +382,14 @@ def readsets_from_samples(project_id: str, sample_id: str):
 
     action_output = db_action.readsets(project_id, sample_id)
 
-    return sanity_check("Metric", action_output)
+    return sanity_check("Readset", action_output)
 
 
 @bp.route('/<string:project>/digest_readset_file', methods=['POST'])
 @convcheck_project
 def digest_readset_file(project_id: str):
     """
-    POST: list of Readset/Sample Name or id
+    POST: json holding the list of Patient/Sample/Readset Name or id AND location endpoint + experiment nucleic_acid_type
     return: all information to create a "Genpipes readset file"
     """
 
@@ -410,7 +410,7 @@ def digest_readset_file(project_id: str):
 @convcheck_project
 def digest_pair_file(project_id: str):
     """
-    POST: list of Readset/Sample Name or id
+    POST: json holding the list of Patient/Sample/Readset Name or id AND location endpoint + experiment nucleic_acid_type
     return: all information to create a "Genpipes pair file"
     """
 
@@ -484,7 +484,7 @@ def ingest_transfer(project_id: str):
 @convcheck_project
 def ingest_genpipes(project_id: str):
     """
-    POST: json describing genpipes
+    POST: json describing genpipes analysis
     return: The Operation object and Jobs associated
     """
 
@@ -520,8 +520,8 @@ def ingest_genpipes(project_id: str):
 @convcheck_project
 def digest_unanalyzed(project_id: str):
     """
-    POST: list of Readset/Sample Name or id
-    return: Readsets or Samples unanalyzed
+    POST: json holding the list of Sample/Readset Name or id AND location endpoint + experiment nucleic_acid_type
+    return: Samples/Readsets unanalyzed with location endpoint + experiment nucleic_acid_type
     """
     if request.method == 'POST':
         try:
diff --git a/project_tracking/db_action.py b/project_tracking/db_action.py
index c293589..4c1ad22 100644
--- a/project_tracking/db_action.py
+++ b/project_tracking/db_action.py
@@ -1236,6 +1236,7 @@ def digest_unanalyzed(project_id: str, digest_data, session=None):
 
     output = {
         "location_endpoint": location_endpoint,
+        "experiment_nucleic_acid_type": experiment_nucleic_acid_type,
         key: session.scalars(stmt).unique().all()
     }
 

From 620fdf44a9aea3e01cd8772df8e3c471d2e7a16b Mon Sep 17 00:00:00 2001
From: Paul Stretenowich <paul.stretenowich@mcgill.ca>
Date: Tue, 21 Nov 2023 16:56:11 -0500
Subject: [PATCH 12/19] Code cleaning + docstring consolidation

---
 project_tracking/api/project.py | 55 ++++++++++++++-------------------
 1 file changed, 23 insertions(+), 32 deletions(-)

diff --git a/project_tracking/api/project.py b/project_tracking/api/project.py
index 6591009..98d218f 100644
--- a/project_tracking/api/project.py
+++ b/project_tracking/api/project.py
@@ -64,7 +64,8 @@ def sanity_check(item, action_output):
 @convcheck_project
 def projects(project_id: str = None):
     """
-    project: uses the form "/project/1" for project ID and "/project/name" for project name
+    GET:
+        project: uses the form "/project/1" for project ID and "/project/name" for project name
     return: list of all the details of the poject with name "project_name" or ID "project_id"
     """
 
@@ -81,7 +82,8 @@ def projects(project_id: str = None):
 @convcheck_project
 def patients(project_id: str, patient_id: str = None):
     """
-    patient_id: uses the form "1,3-8,9", if not provided all patients are returned
+    GET:
+        patient_id: uses the form "1,3-8,9", if not provided all patients are returned
     return: list all patients or selected patients, belonging to <project>
 
     Query:
@@ -144,7 +146,8 @@ def patients(project_id: str, patient_id: str = None):
 @convcheck_project
 def samples(project_id: str, sample_id: str = None):
     """
-    sample_id: uses the form "1,3-8,9", if not provided all samples are returned
+    GET:
+        sample_id: uses the form "1,3-8,9", if not provided all samples are returned
     return: list all patients or selected samples, belonging to <project>
     """
 
@@ -174,7 +177,8 @@ def samples(project_id: str, sample_id: str = None):
 @convcheck_project
 def readsets(project_id: str, readset_id: str=None):
     """
-    readset_id: uses the form "1,3-8,9", if not provided all readsets are returned
+    GET:
+        readset_id: uses the form "1,3-8,9", if not provided all readsets are returned
     return: list all patients or selected readsets, belonging to <project>
     """
 
@@ -207,11 +211,11 @@ def readsets(project_id: str, readset_id: str=None):
 @convcheck_project
 def files(project_id: str, patient_id: str=None, sample_id: str=None, readset_id: str=None, file_id: str=None):
     """
-    file_id: uses the form "1,3-8,9". Select file by ids
-    patient_id: uses the form "1,3-8,9". Select file by patient ids
-    sample_id: uses the form "1,3-8,9". Select file by sample ids
-    redeaset_id: uses the form "1,3-8,9". Select file by readset ids
-
+    GET:
+        file_id: uses the form "1,3-8,9". Select file by ids
+        patient_id: uses the form "1,3-8,9". Select file by patient ids
+        sample_id: uses the form "1,3-8,9". Select file by sample ids
+        redeaset_id: uses the form "1,3-8,9". Select file by readset ids
     return: selected files, belonging to <project>
 
     Query:
@@ -276,10 +280,12 @@ def files(project_id: str, patient_id: str=None, sample_id: str=None, readset_id
 @convcheck_project
 def metrics(project_id: str, patient_id: str=None, sample_id: str=None, readset_id: str=None, metric_id: str=None):
     """
-    metric_id: uses the form "1,3-8,9". Select metric by ids
-    patient_id: uses the form "1,3-8,9". Select metric by patient ids
-    sample_id: uses the form "1,3-8,9". Select metric by sample ids
-    redeaset_id: uses the form "1,3-8,9". Select metric by readset ids
+    GET:
+        metric_id: uses the form "1,3-8,9". Select metric by ids
+        patient_id: uses the form "1,3-8,9". Select metric by patient ids
+        sample_id: uses the form "1,3-8,9". Select metric by sample ids
+        redeaset_id: uses the form "1,3-8,9". Select metric by readset ids
+    return: selected metrics, belonging to <project>
 
     We also accept POST data with comma separeted list
     metric_name = <NAME> [,NAME] [...]
@@ -287,8 +293,6 @@ def metrics(project_id: str, patient_id: str=None, sample_id: str=None, readset_
     sample_name = <NAME> [,NAME] [...]
     patient_name = <NAME> [,NAME] [...]
 
-    return: selected metrics, belonging to <project>
-
     Query:
     (deliverable):  Default (None)
     The deliverable query allows to get all metrics labelled as deliverable
@@ -360,7 +364,8 @@ def metrics(project_id: str, patient_id: str=None, sample_id: str=None, readset_
 @convcheck_project
 def readsets_from_samples(project_id: str, sample_id: str):
     """
-    sample_id: uses the form "1,3-8,9"
+    GET:
+        sample_id: uses the form "1,3-8,9"
     return: selected readsets belonging to <sample_id>
     """
 
@@ -427,7 +432,7 @@ def digest_pair_file(project_id: str):
         return db_action.digest_pair_file(project_id=project_id, digest_data=ingest_data)
 
 
-@bp.route('/<string:project>/ingest_run_processing', methods=['GET', 'POST'])
+@bp.route('/<string:project>/ingest_run_processing', methods=['POST'])
 @convcheck_project
 def ingest_run_processing(project_id: str):
     """
@@ -435,13 +440,6 @@ def ingest_run_processing(project_id: str):
     return: The Operation object
     """
 
-    # Is this if required?
-    if request.method == 'GET':
-        return abort(
-            405,
-            "Use post method to ingest runs"
-            )
-
     if request.method == 'POST':
         try:
             ingest_data = request.get_json(force=True)
@@ -480,7 +478,7 @@ def ingest_transfer(project_id: str):
 
         return [i.flat_dict for i in db_action.ingest_transfer(project_id=project_id, ingest_data=ingest_data)]
 
-@bp.route('/<string:project>/ingest_genpipes', methods=['GET', 'POST'])
+@bp.route('/<string:project>/ingest_genpipes', methods=['POST'])
 @convcheck_project
 def ingest_genpipes(project_id: str):
     """
@@ -488,13 +486,6 @@ def ingest_genpipes(project_id: str):
     return: The Operation object and Jobs associated
     """
 
-    # Is this if required?
-    if request.method == 'GET':
-        return abort(
-            405,
-            "Use post method to ingest genpipes analysis"
-            )
-
     if request.method == 'POST':
         try:
             ingest_data = request.get_json(force=True)

From 8674a8a2cfb5b1d6e947075ba1c033d9e7fe67dc Mon Sep 17 00:00:00 2001
From: Paul Stretenowich <paul.stretenowich@mcgill.ca>
Date: Wed, 29 Nov 2023 13:19:26 -0500
Subject: [PATCH 13/19] Changing error message

---
 project_tracking/api/project.py |  2 +-
 project_tracking/db_action.py   | 12 ++++++------
 2 files changed, 7 insertions(+), 7 deletions(-)

diff --git a/project_tracking/api/project.py b/project_tracking/api/project.py
index 98d218f..4edd7b1 100644
--- a/project_tracking/api/project.py
+++ b/project_tracking/api/project.py
@@ -95,7 +95,7 @@ def patients(project_id: str, patient_id: str = None):
             Return: a subset of patient who have Tumor=False & Tumor=True samples
         (false, true):
             return: a subset of patient who only have Tumor=True samples
-        (false, true):
+        (false, false):
             return: a subset of patient who only have Tumor=false samples
     """
 
diff --git a/project_tracking/db_action.py b/project_tracking/db_action.py
index 4c1ad22..d6af3c7 100644
--- a/project_tracking/db_action.py
+++ b/project_tracking/db_action.py
@@ -792,7 +792,7 @@ def digest_readset_file(project_id: str, digest_data, session=None):
             if patient:
                 patients.append(patient)
             else:
-                raise DidNotFindError(table="Patient", attribute="name", query=patient_name)
+                raise DidNotFindError(f"'Patient' with 'name' '{patient_name}' AND 'nucleic_acid_type' '{nucleic_acid_type}' doesn't exist on database")
     if vb.PATIENT_ID in digest_data.keys():
         for patient_id in digest_data[vb.PATIENT_ID]:
             # logger.debug(f"\n\n{patient_id}\n\n")
@@ -807,7 +807,7 @@ def digest_readset_file(project_id: str, digest_data, session=None):
             if patient:
                 patients.append(patient)
             else:
-                raise DidNotFindError(table="Patient", attribute="id", query=patient_id)
+                raise DidNotFindError(f"'Patient' with 'id' '{patient_id}' AND 'nucleic_acid_type' '{nucleic_acid_type}' doesn't exist on database")
     if patients:
         set(patients)
         for patient in patients:
@@ -827,7 +827,7 @@ def digest_readset_file(project_id: str, digest_data, session=None):
             if sample:
                 samples.append(sample)
             else:
-                raise DidNotFindError(table="Sample", attribute="name", query=sample_name)
+                raise DidNotFindError(f"'Sample' with 'name' '{patient_name}' AND 'nucleic_acid_type' '{nucleic_acid_type}' doesn't exist on database")
     if vb.SAMPLE_ID in digest_data.keys():
         for sample_id in digest_data[vb.SAMPLE_ID]:
             # logger.debug(f"\n\n{sample_id}\n\n")
@@ -841,7 +841,7 @@ def digest_readset_file(project_id: str, digest_data, session=None):
             if sample:
                 samples.append(sample)
             else:
-                raise DidNotFindError(table="Sample", attribute="id", query=sample_id)
+                raise DidNotFindError(f"'Sample' with 'id' '{patient_name}' AND 'nucleic_acid_type' '{nucleic_acid_type}' doesn't exist on database")
     if samples:
         set(samples)
         for sample in samples:
@@ -859,7 +859,7 @@ def digest_readset_file(project_id: str, digest_data, session=None):
             if readset:
                 readsets.append(readset)
             else:
-                raise DidNotFindError(table="Readset", attribute="name", query=readset_name)
+                raise DidNotFindError(f"'Readset' with 'name' '{patient_name}' AND 'nucleic_acid_type' '{nucleic_acid_type}' doesn't exist on database")
     if vb.READSET_ID in digest_data.keys():
         for readset_id in digest_data[vb.READSET_ID]:
             readset = session.scalars(
@@ -871,7 +871,7 @@ def digest_readset_file(project_id: str, digest_data, session=None):
             if readset:
                 readsets.append(readset)
             else:
-                raise DidNotFindError(table="Readset", attribute="id", query=readset_id)
+                raise DidNotFindError(f"'Readset' with 'id' '{patient_name}' AND 'nucleic_acid_type' '{nucleic_acid_type}' doesn't exist on database")
     if readsets:
         set(readsets)
         for readset in readsets:

From 0f80cf8162db7fb5e31a6626c9cfb500d8a42be6 Mon Sep 17 00:00:00 2001
From: Paul Stretenowich <paul.stretenowich@mcgill.ca>
Date: Wed, 29 Nov 2023 15:21:07 -0500
Subject: [PATCH 14/19] Accepting metric without flag set

---
 project_tracking/db_action.py | 12 ++++++++++--
 1 file changed, 10 insertions(+), 2 deletions(-)

diff --git a/project_tracking/db_action.py b/project_tracking/db_action.py
index d6af3c7..1d49502 100644
--- a/project_tracking/db_action.py
+++ b/project_tracking/db_action.py
@@ -651,10 +651,14 @@ def ingest_run_processing(project_id: str, ingest_data, session=None):
                         metric_deliverable = metric_json[vb.METRIC_DELIVERABLE]
                     else:
                         metric_deliverable = False
+                    if vb.METRIC_FLAG in metric_json:
+                        metric_flag = FlagEnum(metric_json[vb.METRIC_FLAG])
+                    else:
+                        metric_flag = None
                     Metric(
                         name=metric_json[vb.METRIC_NAME],
                         value=metric_json[vb.METRIC_VALUE],
-                        flag=FlagEnum(metric_json[vb.METRIC_FLAG]),
+                        flag=metric_flag,
                         deliverable=metric_deliverable,
                         job=job,
                         readsets=[readset]
@@ -1150,10 +1154,14 @@ def ingest_genpipes(project_id: str, ingest_data, session=None):
                             metric_deliverable = metric_json[vb.METRIC_DELIVERABLE]
                         else:
                             metric_deliverable = False
+                        if vb.METRIC_FLAG in metric_json:
+                            metric_flag = FlagEnum(metric_json[vb.METRIC_FLAG])
+                        else:
+                            metric_flag = None
                         Metric(
                             name=metric_json[vb.METRIC_NAME],
                             value=metric_json[vb.METRIC_VALUE],
-                            flag=FlagEnum(metric_json[vb.METRIC_FLAG]),
+                            flag=metric_flag,
                             deliverable=metric_deliverable,
                             job=job,
                             readsets=[readset]

From 24319cbb5508ca62c1da7c3bda1787dce7156176 Mon Sep 17 00:00:00 2001
From: Paul Stretenowich <paul.stretenowich@mcgill.ca>
Date: Wed, 29 Nov 2023 15:47:42 -0500
Subject: [PATCH 15/19] Allowing job_status being null if job hasn't been
 submitted

---
 project_tracking/db_action.py | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/project_tracking/db_action.py b/project_tracking/db_action.py
index 1d49502..575f345 100644
--- a/project_tracking/db_action.py
+++ b/project_tracking/db_action.py
@@ -1113,9 +1113,13 @@ def ingest_genpipes(project_id: str, ingest_data, session=None):
                     job_stop = datetime.strptime(job_json[vb.JOB_STOP], vb.DATE_LONG_FMT)
                 except TypeError:
                     job_stop = None
+                if job_json[vb.JOB_STATUS]:
+                    job_status = StatusEnum(job_json[vb.JOB_STATUS])
+                else:
+                    job_status = None
                 job = Job(
                     name=job_json[vb.JOB_NAME],
-                    status=StatusEnum(job_json[vb.JOB_STATUS]),
+                    status=job_status,
                     start=job_start,
                     stop=job_stop,
                     operation=operation

From 2375fdd7935daa3c8c119e1cf5a27a9a2cbdc96b Mon Sep 17 00:00:00 2001
From: Paul Stretenowich <paul.stretenowich@mcgill.ca>
Date: Wed, 29 Nov 2023 16:11:50 -0500
Subject: [PATCH 16/19] Skipping null job during ingesting genpipes as we don't
 want files not generated

---
 project_tracking/db_action.py | 109 +++++++++++++++++-----------------
 1 file changed, 55 insertions(+), 54 deletions(-)

diff --git a/project_tracking/db_action.py b/project_tracking/db_action.py
index 575f345..261494e 100644
--- a/project_tracking/db_action.py
+++ b/project_tracking/db_action.py
@@ -1113,63 +1113,64 @@ def ingest_genpipes(project_id: str, ingest_data, session=None):
                     job_stop = datetime.strptime(job_json[vb.JOB_STOP], vb.DATE_LONG_FMT)
                 except TypeError:
                     job_stop = None
+                # Check if job_status exists otherwise skip it
                 if job_json[vb.JOB_STATUS]:
-                    job_status = StatusEnum(job_json[vb.JOB_STATUS])
-                else:
-                    job_status = None
-                job = Job(
-                    name=job_json[vb.JOB_NAME],
-                    status=job_status,
-                    start=job_start,
-                    stop=job_stop,
-                    operation=operation
-                    )
-                for file_json in job_json[vb.FILE]:
-                    suffixes = Path(file_json[vb.FILE_NAME]).suffixes
-                    file_type = os.path.splitext(file_json[vb.FILE_NAME])[-1][1:]
-                    if ".gz" in suffixes:
-                        file_type = "".join(suffixes[-2:])
-                    if vb.FILE_DELIVERABLE in file_json:
-                        file_deliverable = file_json[vb.FILE_DELIVERABLE]
-                    else:
-                        file_deliverable = False
-                    # Need to have an the following otherwise assigning extra_metadata to None converts null into json in the db
-                    if vb.FILE_EXTRA_METADATA in file_json.keys():
-                        file = File(
-                            name=file_json[vb.FILE_NAME],
-                            type=file_type,
-                            extra_metadata=file_json[vb.FILE_EXTRA_METADATA],
-                            deliverable=file_deliverable,
-                            readsets=[readset],
-                            jobs=[job]
-                            )
-                    else:
-                        file = File(
-                            name=file_json[vb.FILE_NAME],
-                            type=file_type,
-                            deliverable=file_deliverable,
-                            readsets=[readset],
-                            jobs=[job]
-                            )
-                    location = Location.from_uri(uri=file_json[vb.LOCATION_URI], file=file, session=session)
-                if vb.METRIC in job_json.keys():
-                    for metric_json in job_json[vb.METRIC]:
-                        if vb.METRIC_DELIVERABLE in metric_json:
-                            metric_deliverable = metric_json[vb.METRIC_DELIVERABLE]
+                    job = Job(
+                        name=job_json[vb.JOB_NAME],
+                        status=StatusEnum(job_json[vb.JOB_STATUS]),
+                        start=job_start,
+                        stop=job_stop,
+                        operation=operation
+                        )
+                    for file_json in job_json[vb.FILE]:
+                        suffixes = Path(file_json[vb.FILE_NAME]).suffixes
+                        file_type = os.path.splitext(file_json[vb.FILE_NAME])[-1][1:]
+                        if ".gz" in suffixes:
+                            file_type = "".join(suffixes[-2:])
+                        if vb.FILE_DELIVERABLE in file_json:
+                            file_deliverable = file_json[vb.FILE_DELIVERABLE]
                         else:
-                            metric_deliverable = False
-                        if vb.METRIC_FLAG in metric_json:
-                            metric_flag = FlagEnum(metric_json[vb.METRIC_FLAG])
+                            file_deliverable = False
+                        # Need to have an the following otherwise assigning extra_metadata to None converts null into json in the db
+                        if vb.FILE_EXTRA_METADATA in file_json.keys():
+                            file = File(
+                                name=file_json[vb.FILE_NAME],
+                                type=file_type,
+                                extra_metadata=file_json[vb.FILE_EXTRA_METADATA],
+                                deliverable=file_deliverable,
+                                readsets=[readset],
+                                jobs=[job]
+                                )
                         else:
-                            metric_flag = None
-                        Metric(
-                            name=metric_json[vb.METRIC_NAME],
-                            value=metric_json[vb.METRIC_VALUE],
-                            flag=metric_flag,
-                            deliverable=metric_deliverable,
-                            job=job,
-                            readsets=[readset]
-                            )
+                            file = File(
+                                name=file_json[vb.FILE_NAME],
+                                type=file_type,
+                                deliverable=file_deliverable,
+                                readsets=[readset],
+                                jobs=[job]
+                                )
+                        location = Location.from_uri(uri=file_json[vb.LOCATION_URI], file=file, session=session)
+                    if vb.METRIC in job_json.keys():
+                        for metric_json in job_json[vb.METRIC]:
+                            if vb.METRIC_DELIVERABLE in metric_json:
+                                metric_deliverable = metric_json[vb.METRIC_DELIVERABLE]
+                            else:
+                                metric_deliverable = False
+                            if vb.METRIC_FLAG in metric_json:
+                                metric_flag = FlagEnum(metric_json[vb.METRIC_FLAG])
+                            else:
+                                metric_flag = None
+                            Metric(
+                                name=metric_json[vb.METRIC_NAME],
+                                value=metric_json[vb.METRIC_VALUE],
+                                flag=metric_flag,
+                                deliverable=metric_deliverable,
+                                job=job,
+                                readsets=[readset]
+                                )
+                # If job status is null then skip it as we don't want to ingest data not generated
+                else:
+                    pass
 
                 session.add(job)
                 session.flush()

From 3a126312281290a48a5fbb1b8944c213f37bd55b Mon Sep 17 00:00:00 2001
From: P-O Quirion <pioliqui@gmail.com>
Date: Tue, 19 Dec 2023 16:04:18 -0500
Subject: [PATCH 17/19] Build and push Image. Dockefile to Containerfile

---
 .github/workflows/run_test.yml | 38 ++++++++++++++++++++++++++++++++--
 Dockerfile => Containerfile    |  3 ++-
 2 files changed, 38 insertions(+), 3 deletions(-)
 rename Dockerfile => Containerfile (85%)

diff --git a/.github/workflows/run_test.yml b/.github/workflows/run_test.yml
index 0222484..44235a0 100644
--- a/.github/workflows/run_test.yml
+++ b/.github/workflows/run_test.yml
@@ -11,9 +11,17 @@ on:
       - 'main'
       - 'dev'
 
-jobs:
-  build:
+    tags:
+      - '[0-9]+.[0-9]+.[0-9]+'
+
+env:
+  REGISTRY_USER: c3genomics+github_pusher
+  IMAGE_REGISTRY: quay.io
+  REGISTRY_PASSWORD: ${{ secrets.QUAY_ROBOT_TOKEN }}
+  IMAGE: c3genomics/project_tracking
 
+jobs:
+  test:
     runs-on: ${{ matrix.os }}
     strategy:
       matrix:
@@ -41,3 +49,29 @@ jobs:
       - name: Test with pytest
         run: |
           pytest -v
+  build:
+    needs: test
+    if: startsWith(github.ref, 'refs/tags')
+    name: Build image
+    runs-on: ubuntu-latest
+    steps:
+    - uses: actions/checkout@v3
+    - name: set tag
+      run: echo "RELEASE_VERSION=${GITHUB_REF#refs/*/}" >> $GITHUB_ENV
+    - name: Buildah Action
+      uses: redhat-actions/buildah-build@v2
+      with:
+        image: ${{ env.IMAGE_REGISTRY }}/${{ env.IMAGE }}
+        tag: ${{ env.RELEASE_VERSION }} latest_release
+        containerfiles: ./Containerfile
+    - name: Push to repo
+      uses: redhat-actions/push-to-registry@v2
+      with:
+          username: ${{ env.REGISTRY_USER }}
+          password: ${{ env.REGISTRY_PASSWORD }}
+          registry: ${{ env.IMAGE_REGISTRY }}
+          image: ${{ env.IMAGE_REGISTRY }}/${{ env.IMAGE }}
+          tag: ${{ env.RELEASE_VERSION }} latest_release
+    - name: Print image url
+      run: echo "Image pushed to ${{ steps.push-to-repo.outputs.registry-paths }}"
+      
diff --git a/Dockerfile b/Containerfile
similarity index 85%
rename from Dockerfile
rename to Containerfile
index 5a421ca..55e7aaf 100644
--- a/Dockerfile
+++ b/Containerfile
@@ -1,4 +1,5 @@
-FROM fedora:36
+FROM fedora:39
+MAINTAINER P-O Quirion po.quirion@mcgill.ca 
 ENV APP=project_tracking
 
 RUN mkdir /app /sqlite

From 476ae6efdf842dc902c413aa9697392fa69e74e7 Mon Sep 17 00:00:00 2001
From: P-O Quirion <pioliqui@gmail.com>
Date: Tue, 19 Dec 2023 16:16:49 -0500
Subject: [PATCH 18/19] typo in tag

---
 .github/workflows/run_test.yml | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/.github/workflows/run_test.yml b/.github/workflows/run_test.yml
index 44235a0..572295d 100644
--- a/.github/workflows/run_test.yml
+++ b/.github/workflows/run_test.yml
@@ -62,7 +62,7 @@ jobs:
       uses: redhat-actions/buildah-build@v2
       with:
         image: ${{ env.IMAGE_REGISTRY }}/${{ env.IMAGE }}
-        tag: ${{ env.RELEASE_VERSION }} latest_release
+        tags: ${{ env.RELEASE_VERSION }} latest_release
         containerfiles: ./Containerfile
     - name: Push to repo
       uses: redhat-actions/push-to-registry@v2
@@ -71,7 +71,7 @@ jobs:
           password: ${{ env.REGISTRY_PASSWORD }}
           registry: ${{ env.IMAGE_REGISTRY }}
           image: ${{ env.IMAGE_REGISTRY }}/${{ env.IMAGE }}
-          tag: ${{ env.RELEASE_VERSION }} latest_release
+          tags: ${{ env.RELEASE_VERSION }} latest_release
     - name: Print image url
       run: echo "Image pushed to ${{ steps.push-to-repo.outputs.registry-paths }}"
       

From 556206ed6e8a3250d85b84260081859e83cd4fd9 Mon Sep 17 00:00:00 2001
From: P-O Quirion <pioliqui@gmail.com>
Date: Tue, 19 Dec 2023 16:35:47 -0500
Subject: [PATCH 19/19] typo in action

---
 .github/workflows/run_test.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.github/workflows/run_test.yml b/.github/workflows/run_test.yml
index 572295d..14ad43c 100644
--- a/.github/workflows/run_test.yml
+++ b/.github/workflows/run_test.yml
@@ -70,7 +70,7 @@ jobs:
           username: ${{ env.REGISTRY_USER }}
           password: ${{ env.REGISTRY_PASSWORD }}
           registry: ${{ env.IMAGE_REGISTRY }}
-          image: ${{ env.IMAGE_REGISTRY }}/${{ env.IMAGE }}
+          image: ${{ env.IMAGE }}
           tags: ${{ env.RELEASE_VERSION }} latest_release
     - name: Print image url
       run: echo "Image pushed to ${{ steps.push-to-repo.outputs.registry-paths }}"