-
Notifications
You must be signed in to change notification settings - Fork 13
/
Copy pathsettings_sample_yaml
202 lines (183 loc) · 6.61 KB
/
settings_sample_yaml
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
##########################
# CONFIGURATION FILE FOR VARIABLES
# USED ACROSS THE PROJECT
##########################
########################## PIPELINE ##########################
# Pipeline flags for what processes to perform on the data
pipeline:
# What to read
in:
# What's the source of the corpus.
source: mongo # 'mongo', 'file' and 'delete' values currently supported
# What's the type of the input to be read. Will use
# the corresponding fields in this setting file.
type: edges # 'text', 'edges' and 'med_rec' currently
# Do we want to do it all in a streaming fashion?
stream: False
# Do we want to perform parallel semrep processing?
# Currently for semrep extraction and neo4j saving
parallel: True
# What to do with it
trans:
# Extract entities using metamap?
metamap: False
# Extract relations using reverb?
reverb: False
# Extract entities + relations through semrep?
semrep: True
# Transform edges, fetching concepts and other info?
get_concepts_from_edges: False
# What to do with the outcome
out:
# Create json output?
json: False
# Create .csv files for neo4j?
csv: False
# Create-update neo4 db?
neo4j: True
# Create-update sentences in mongo?
mongo_sentences: False
# Create-update collection of documents/edges in mongo?
mongo: False
########################## END PIPELINE ##########################
########################## INPUT ##########################
# Variables regarding loading
load:
# Paths to binaries/files
path:
# Path to metamap
metamap: /media/kostas/DATA/LLD/Metamap/public_mm/bin/metamap16
# Path to reverb
reverb: /media/kostas/DATA/GIT/reverb/core/
# Path to semrep
semrep: /media/kostas/DATA/LLD/SEMREP/public_semrep/bin/
# Path toAscii binary
toAscii: /media/kostas/DATA/LLD/Papers/BioASQ/MARIOS_PROJECT/lvg2017/bin
# Path to input File
file_path: /media/kostas/DATA/LLD/Papers/BioASQ/MARIOS_PROJECT/enriched_doid.json
# Mongo input variables
mongo:
# DB Full uri. If user/pass required pass it here like mongodb://user:pass@host:port
uri: mongodb://0.0.0.0:27017
# DB name
db: iasis_20190620_KGupdate
# Collection name
collection: LC_20190620_pubmed_MeSH_ENRICHED
# Cache collection name
cache_collection: cache
# FOR PRINTING PURPOSES ONLY!
file_path: mongodb://localhost:27017/iasis_20190620_KGupdate|LC_20190620_pubmed_MeSH_ENRICHED
# For medical records
med_rec:
# Medical record name of the column to read text from
textfield: PrimaryDiagnosisDescription
# Field separator
sep: \t
# idfield
idfield: AdmissionID
# For article-document type of input
text:
# Outer field name for the documents residing in a json probably
itemfield: documents
# Name of the field to read text from
textfield: abstractText #body_Filtered
# Idfield for each document
idfield: pmid
# Label field for each article
labelfield: title
# Sentence Prefix (is this abstract or fullText)
sent_prefix: abstract
# For relation-edge type of input
edges:
# Name of the field where edges are stored
itemfield: relations
# Type of the subject
# Currently [Entity, Article and any new one]
sub_type: Article
# Type of the object
obj_type: Entity
# Id_field of the subject
# Currently [UMLS for cuis, PMC or TEXT for articles, None for precomputed and whatever else for DRUGBANK,MSH etc]
sub_source: None
# Id_field of the subject
obj_source: None
########################## END INPUT ##########################
########################## API KEYS ##########################
# Variables regarding apis
apis:
# Biontology for mapping cuis to uris
biont: ********-****-****-****-************
# Umls ticket service
umls: ********-****-****-****-************
########################## END API KEYS ##########################
########################## NEO4j ##########################
# Neoj variables
neo4j:
# DB url
host: localhost
# port
port: 7474
# user name
user: neo4j
# password
password: admin
# name of the resource to be inputted in neo4j
resource: text
########################## END NEO4j ##########################
########################## MONGODB FOR SENTENCES ##########################
# Mongo variables for writing sentences
mongo_sentences:
# DB Full uri. If user/pass required pass it here like mongodb://user:pass@host:port
uri: mongodb://localhost:27017
# DB name
db: Marios
# Collection name
collection: Articles2
########################## END MONGODB ##########################
########################## CACHE ############################
cache_path: /media/kostas/DATA/LLD/Papers/BioASQ/MARIOS_PROJECT/cache.json
########################## END CACHE ############################
########################## LOG ############################
log_path: /media/kostas/DATA/LLD/Papers/BioASQ/MARIOS_PROJECT/medknow.log
########################## END LOG ############################
########################## PARALLEL ############################
# Number of cores to use in parallel schemes. If none, it will be
# equal to cpu count, using all available cores.
num_cores: 4
# Number of items per core to be processed. This will create a batch
# of total size = num_cores*batch_per_core. It defaults to 100
batch_per_core: 100
########################## END PARALLEL ############################
########################## OUTPUT ##########################
# Output variables
out:
# Resulting .json file before neo4j
json:
# Path
out_path: /media/kostas/DATA/LLD/Papers/BioASQ/MARIOS_PROJECT/MARIOS_ERRORS/small2.json
# Output json outer field name for the documents
itemfield: relations
# Output json text field for each document
json_text_field: text
# Output Idfield
json_id_field: id
# Output Label field
json_label_field: title
# Resulting .json file before neo4j
csv:
# Path
out_path: /media/kostas/DATA/LLD/Papers/BioASQ/MARIOS_PROJECT/out
# Resulting .json file before neo4j
neo4j:
# Just for printing! Change the Neo4j field variables, not this!
out_path: localhost:7474
mongo:
# DB Full uri. If user/pass required pass it here like mongodb://user:pass@host:port
uri: mongodb://localhost:27017
# DB name
db: iasis_20190620_KGupdate
# Collection name
collection: LC_20190620_pubmed_MeSH_ENRICHED
# Just for printing! Change the mongo field variables, not this!
out_path: localhost:27017/iasis_20190620_KGupdate|LC_20190620_pubmed_MeSH_ENRICHED
########################## END OUTPUT ##########################