-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathomic_config.yaml
104 lines (81 loc) · 4.55 KB
/
omic_config.yaml
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
##############################################################################
# Genome and project-specific files that will change from project-to-project
##############################################################################
# GTF file for the genome build you would like to use
gtf_file: /home/groups/MaxsonLab/indices/mm10/STAR_index/Mus_musculus.GRCm39.104.gtf # mm10
# /home/groups/MaxsonLab/indices/GRch38/STAR_index/Homo_sapiens.GRCh38.104.gtf # hg38
# Bed file for rseqc function read_distribution
bed_file: data/mm10_GENCODE_vm25.bed # mm10
# data/hg38_GENCODE.v38.bed # hg38
# Pre-built STAR index
star_index: /home/groups/MaxsonLab/indices/mm10/STAR_index # mm10
# /home/groups/MaxsonLab/indices/GRch38/STAR_index/ # hg38
filter_anno: anno/biomaRt/mm10.Ens_96.biomaRt.geneAnno.Rdata # mm10
# anno/biomaRt/hg38.Ens_94.biomaRt.geneAnno.Rdata # hg38
gene_names: data/mm10_Ens102_genes.txt # mm10
# data/hg38_Ens102_genes.txt # hg38
####################################################################
# Tool-specific files that will not change from project-to-project
####################################################################
# Pre-built genome indexes for various organisms which we will screen against to look for contamination
conf: data/fastq_screen.conf
##########################################
# Important project-specific paths/files
##########################################
# Metadata needed for differential expression analysis. tab delimited file
omic_meta_data: data/metadata.tsv
##############################################################################################
# Project-specific specifications you will need to make prior to submission of the pipeline
##############################################################################################
# Biotypes you would like to include in your gene counts table
# Example: protein_coding | lincRNA | sRNA | rRNA | snRNA | snoRNA
# if you want to keep all biotypes, use ""
biotypes: protein_coding
project_id: "my_project"
# Genome assembly used for GO analysis, format must be as below, with the genome assembly first, and the version of ensembl second, separated by a period (Unless there is only one ensembl version, then just write assembly).
assembly: mm10 # [hg38, mm10]
# remove mito genes (1/0)
mito: 1
# option to print GO term tree (0/1)
printTree: 1
# fold change cutoff (not log2 transformed) for GO analysis and volcano pot
FC: 2
# FDR adjusted p-value cutoff for GO analysis and volcano plot
adjp: 0.01
# The column name of the characteristic you would like to do DE on. Example: Diagnosis, genotype, etc. This must be a column in your omic_meta_data.
linear_model:
#Column name in meta data file to generate contrasts by
Condition
# The column name in which your sampleIDs are defined in omic_meta_data. These should match the sampleIDs in your fastq files.
sample_id:
#Column name listing unique sample ids to be used. This needs to be the first column in the meta data file
SampleID
# The column names in your omic_meta_data file which you would like to annotate your heatmaps by. This is used as a QC to look for batch effects. Enter columns which could potentially introduce a batch effect in your data.
meta_columns_to_plot:
Condition: Condition
pca:
labels:
#Column name in meta data file to generate group files and comparisons
- Condition
# Define contrasts as target-vs-baseline, such as mutatn-vs-wildtype.
# This format is completely different from the original scheme written by JEstabrook.
# This format will have better compatibility with the maxsonBraunLab/atac_seq pipeline to facilitate easier data interpretation.
diffexp:
# contrasts to generate group files and perform comparisons
contrasts:
# - target-vs-baseline
# This option is specified if you would like to run DESeq2 on a subset of your samples (> pairwise comparison)
LRT:
- condition1
- condition2
- baseline
# If you would like to colour your QC plots by an RColorBrewer palette, list that palette under the rcolorbrewer parameter, otherwise, write "NA"
# If you would like to colour your QC plots by specific colours, please list them under the discrete category, with one colour for each bullet point
## The discrete colours will be assigned to your Conditions (ie. linear_model) in the order that they show up in your metadata table, when it is ordered alphabetically by sample ID
## There must be the same number of colours listed as there are Conditions
# If no options are specified, the default ggplot2 colours are used
colors:
rcolorbrewer:
- NA
discrete:
- NA