-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathconfig.yaml
executable file
·57 lines (43 loc) · 2.59 KB
/
config.yaml
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
# This file should contain everything to configure the workflow on a global scale.
# In case of sample based data, it should be complemented by a samples.tsv file that contains
# one row per sample. It can be parsed easily via pandas.
#
#adapters file for usage to allow trimming to take place
#These adapters are specific to the type of sequencing that was performed
#Trimming is not necessary if you end up changing the aligner to bowtie 2 which performs soft clipping
adapters_file: "/home/groups/MaxsonLab/callahro/adapters/nextera_adapters.fasta"
#The genome that you will be aligning everything to this is the most important value to check
#below is the alignment reference for mouse genome mm10
alignment_reference: "/home/groups/MaxsonLab/indices/mm10/mm10.fa"
#below is the alignment reference for human genome 38a
#alignment_reference: "/home/groups/MaxsonLab/indices/GRch38/GCA_000001405.15_GRCh38_no_alt_analysis_set.fasta"
#the location where the raw samples can be found
raw_sample_folder: "/home/groups/MaxsonLab/input-data/MOLM_C57A_ATAC_merged_fastq/"
#the size of the genome to be used for genome size effect cancellatino
#Below this line is the default size for mouse
macs_genome_size: "1.87e9" #mouse default size
#And this is the size used for human genomes
#macs_genome_size: "2.7e9" #human default genome size
#The location of the blacklist file that we will use to filter out regions that overlap a blacklist
#below is the default blacklist for mouse genome mm10
blacklist_file: "/home/groups/MaxsonLab/indices/mm10/mm10.blacklist.v2.bed"
#below this line is the default blacklist for human genome hg38
#blacklist_file: "/home/groups/MaxsonLab/indices/GRch38/hg38.blacklist.v2.bed"
#the name of the genome that you are using for usage
genome_name: "mm10"
#prefix that allows you to split out files that you care about
prefix: "C57"
readforward_postfix: "_merged_1.fq.gz" #"_CKDL190141440-1a_H53YYBBXX_L2_1.fq.gz"
readreverse_postfix: "_merged_2.fq.gz" #"_CKDL190141440-1a_H53YYBBXX_L2_2.fq.gz"
sample_work_path: "../atac_c57" #_all_samples" #"../samples"
#This is no longer needed unless you have multiple lanes that you wish to merge
#If you do you can add a lane prefix to this config file and change the code in filter_shift_align.smk
#and this pipeline will merge different lanes together
LANES:
- ""
- ""
metadata_file: "./C57_ATAC_metadata"
#if there are samples that you want to include or exclude that match a certain regular expression
#You can write that regular expression here and the pipeline will include or exclude them
#include_samples_regex: ".*24.*"
#exclude_samples_regex: ".*24Q2.*"