forked from mpi2/impc-etl
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathluigi-dev.cfg
144 lines (108 loc) · 4 KB
/
luigi-dev.cfg
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
[DEFAULT]
# Base output path for all the tasks
output_path=tests/data/output/
# DCC Input Files
dcc_experiment_xml_path=tests/data/xml/
dcc_specimen_xml_path=tests/data/xml/
# GenTar reports
gentar_colonies_tsv_path=tests/data/tracking/phenotyping_colonies_2022-01-20T02_20_58.632294.tsv
gentar_gene_status_path=tests/data/tracking/gene_interest_2022-01-28T09_26_58.512980.tsv
product_report_tsv_path=tests/data/tracking/product-2022-01-06_18-52-56.tsv
# IMPReSS crawling base URL
impress_api_url = https://api.mousephenotype.org/impress/
# Proxy URL to access Internet from the cluster
http_proxy=
# MGI Reports
mgi_strain_report_path = tests/data/mgi/MGI_Strain.rpt
mgi_gene_pheno_report_path = tests/data/mgi/MGI_GenePheno.rpt
mgi_homology_report_path = tests/data/mgi/HGNC_AllianceHomology.rpt
mgi_phenotypic_allele_report_path = tests/data/mgi/MGI_PhenotypicAllele.rpt
mgi_markers_report_path = tests/data/mgi/MRK_List1.rpt
# OBO ontologies directory
obo_ontology_input_path=tests/data/ontologies/
# IMPC ontology pipeline output directory (see https://github.com/mpi2/impc-ontology-pipeline)
ontology_metadata_input_path=tests/data/ontologies/
emap_emapa_csv_path=tests/data/ontologies/EMAP-EMAPA.txt
emapa_metadata_csv_path=tests/data/ontologies/emapa_metadata_table.csv
ma_metadata_csv_path=tests/data/ontologies/ma_metadata_table.csv
# Statistical analysis output directory
stats_analysis_out_path=tests/data/statspackets
raw_data_in_out=include
extract_windowed_data=true
# GenTar reference Database connection information
ref_db_jdbc_connection_str=jdbc:postgresql://hx-rke-wp-webadmin-20-worker-1.caas.ebi.ac.uk:32370/refdata
ref_database_user=ref_admin
ref_database_password=ref_admin
[spark]
packages=com.databricks:spark-xml_2.12:0.12.0,mysql:mysql-connector-java:8.0.20,org.postgresql:postgresql:42.2.12
driver_memory=10g
executor_memory=10g
master=local[*]
spark_submit=/Applications/spark-3.1.1-bin-hadoop2.7/bin/spark-submit
jars=lib/phenodcc-derived-parameters-2022.03.16.jar
driver_class_path=lib/phenodcc-derived-parameters-2022.03.16.jar
conf=spark.sql.session.timeZone=UTC
[ImpcConfig]
deploy_mode=local
# XML extraction jobs
[SpecimenLevelExperimentExtractor]
[LineLevelExperimentExtractor]
[MouseSpecimenExtractor]
[EmbryoSpecimenExtractor]
# GenTar extraction jobs
[ColonyTrackingExtractor]
[ProductReportExtractor]
[GeneProductionStatusExtractor]
[ExtractAlleleRef]
# IMPReSS extraction
[ImpressExtractor]
# MGI Extraction jobs
[MGIGenePhenoReportExtractor]
[MGIHomologyReportExtractor]
[MGIPhenotypicAlleleExtractor]
[MGIMarkerListReportExtractor]
[MGIStrainReportExtractor]
# Ontology data tasks
[OntologyTermHierarchyExtractor]
[OntologyMetadataExtractor]
# Data cleaning tasks
[ColonyCleaner]
[SpecimenLevelExperimentCleaner]
[LineLevelExperimentCleaner]
[MouseSpecimenCleaner]
[EmbryoSpecimenCleaner]
# Cross-reference tasks
[MouseSpecimenCrossRef]
[EmbryoSpecimenCrossRef]
[SpecimenLevelExperimentCrossRef]
[LineLevelExperimentCrossRef]
# Additional data calculation tasks
[SpecimenLevelExperimentParameterDerivator]
[LineLevelExperimentParameterDerivator]
[ExperimentBWAgeCalculator]
# Task to generate the Observation level representation of the data
[ExperimentToObservationMapper]
# Tasks to generate the Statistical Analysisi inputs
[MPChooserGenerator]
[StatsPipelineInputMapper]
# Task to generate the Images pipeline input
[ImagesPipelineInputGenerator]
# Task to group the pre-statistical analysis phase of the ETL
[ImpcPreStatisticalAnalysis]
# Task to load the statistical analysis output
[StatisticalAnalysisOutputMapper]
# Solr cores generator tasks
[PipelineCoreLoader]
[GenotypePhenotypeCoreLoader]
[ImpcImagesCoreLoader]
# Task for the mapping of the impress crawler output to parameter centered view
[ImpressToParameterMapper]
# Task for the mapping and gathering of all stats results
[StatsResultsMapper]
# Task for the post-statistical analysis phase of the ETL
[ImpcPostStatisticalAnalysis]
# EOSC Life API datasource tasks
[ImpcGeneBundleMapper]
# Temporary imits task
[ExtractGeneRef]
[AlleleRefExtractor]