Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

code for the concatination experiments #19

Open
wants to merge 1 commit into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
14 changes: 13 additions & 1 deletion allennlp_config/ner.jsonnet
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,12 @@
"do_lowercase": std.extVar("is_lowercase"),
"use_starting_offsets": true
},
"bert2": {
"type": "bert-pretrained",
"pretrained_model": std.extVar("BERT_VOCAB2"),
"do_lowercase": std.extVar("is_lowercase2"),
"use_starting_offsets": true
},
"token_characters": {
"type": "characters",
"min_padding_length": 3
Expand All @@ -36,13 +42,19 @@
"allow_unmatched_keys": true,
"embedder_to_indexer_map": {
"bert": ["bert", "bert-offsets"],
"bert2": ["bert2", "bert2-offsets"],
"token_characters": ["token_characters"],
},
"token_embedders": {
"bert": {
"type": "bert-pretrained",
"pretrained_model": std.extVar("BERT_WEIGHTS")
},
"bert2": {
"type": "bert-pretrained",
"pretrained_model": std.extVar("BERT_WEIGHTS2"),
"requires_grad": false
},
"token_characters": {
"type": "character_encoding",
"embedding": {
Expand All @@ -60,7 +72,7 @@
},
"encoder": {
"type": "lstm",
"input_size": 768 + 128,
"input_size": 768 + 768 + 128,
"hidden_size": 200,
"num_layers": 2,
"dropout": 0.5,
Expand Down
19 changes: 16 additions & 3 deletions allennlp_config/text_classification.jsonnet
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,12 @@
"do_lowercase": std.extVar("is_lowercase"),
"use_starting_offsets": true
},
"bert2": {
"type": "bert-pretrained",
"pretrained_model": std.extVar("BERT_VOCAB2"),
"do_lowercase": std.extVar("is_lowercase2"),
"use_starting_offsets": true
},
"token_characters": {
"type": "characters",
"min_padding_length": 3
Expand All @@ -23,17 +29,24 @@
"evaluate_on_test": true,
"model": {
"type": "text_classifier",
"verbose_metrics": true,
"verbose_metrics": false,
"text_field_embedder": {
"allow_unmatched_keys": true,
"embedder_to_indexer_map": {
"bert": ["bert", "bert-offsets"],
"bert2": ["bert2", "bert2-offsets"],
"token_characters": ["token_characters"],
},
"token_embedders": {
"bert": {
"type": "bert-pretrained",
"pretrained_model": std.extVar("BERT_WEIGHTS")
"pretrained_model": std.extVar("BERT_WEIGHTS"),
"requires_grad": false
},
"bert2": {
"type": "bert-pretrained",
"pretrained_model": std.extVar("BERT_WEIGHTS2"),
"requires_grad": false
},
"token_characters": {
"type": "character_encoding",
Expand All @@ -53,7 +66,7 @@
"text_encoder": {
"type": "lstm",
"bidirectional": true,
"input_size": 768 + 128,
"input_size": 768 + 768 + 128,
"hidden_size": 200,
"num_layers": 2,
"dropout": 0.5
Expand Down
49 changes: 37 additions & 12 deletions scripts/exp.sh
Original file line number Diff line number Diff line change
Expand Up @@ -4,14 +4,19 @@
bertvocab="ds_dpsaxi4ltpw9:/bert_vocab/"
bertweights="ds_jda1d19zqy6z:/bert_weights/"

for task in text_classification
for dataset in NCBI-disease bc5cdr JNLPBA sciie chemprot citation_intent mag rct-20k sciie-relation-extraction # pico
do
for dataset in chemprot
for SEED in 13370 13570 14680
do
for SEED in 13370 13570 14680
do
for model in bertbase_basevocab_cased biobert_pmc_basevocab_cased biobert_pubmed_pmc_basevocab_cased s2bert_basevocab_uncased_512 s2bert_s2vocab_uncased_512 bertbase_basevocab_uncased biobert_pubmed_basevocab_cased s2bert_basevocab_cased_512 s2bert_s2vocab_cased_512
do
# for model in s2bert_basevocab_cased_512 s2bert_s2vocab_cased_512 # bertbase_basevocab_cased biobert_pmc_basevocab_cased biobert_pubmed_pmc_basevocab_cased s2bert_basevocab_uncased_512 s2bert_s2vocab_uncased_512 bertbase_basevocab_uncased biobert_pubmed_basevocab_cased s2bert_basevocab_cased_512 s2bert_s2vocab_cased_512
# do

if [[ 'NCBI-diseasebc5cdrJNLPBAsciie' =~ $dataset ]];
then
task='ner'
else
task='text_classification'
fi

PYTORCH_SEED=`expr $SEED / 10`
NUMPY_SEED=`expr $PYTORCH_SEED / 10`
Expand Down Expand Up @@ -39,8 +44,28 @@ fi

config_file=allennlp_config/"$task".jsonnet

export BERT_VOCAB=/bert_vocab/"$vocab_file".vocab
export BERT_WEIGHTS=/bert_weights/"$model".tar.gz
# vocab='basevocab'
# model='bertbase'
# export BERT_VOCAB=/bert_vocab/"$vocab"_uncased.vocab
# export BERT_WEIGHTS=/bert_weights/"$model"_"$vocab"_uncased.tar.gz
# export is_lowercase=true
# export BERT_VOCAB2=/bert_vocab/"$vocab"_cased.vocab
# export BERT_WEIGHTS2=/bert_weights/"$model"_"$vocab"_cased.tar.gz
# export is_lowercase2=false


export BERT_VOCAB=/bert_vocab/basevocab_cased.vocab
export BERT_WEIGHTS=/bert_weights/bertbase_basevocab_cased.tar.gz
export is_lowercase=false

export BERT_VOCAB2=/bert_vocab/s2vocab_cased.vocab
export BERT_WEIGHTS2=/bert_weights/s2bert_s2vocab_cased_512.tar.gz
export is_lowercase2=false



# export BERT_VOCAB=/bert_vocab/"$vocab_file".vocab
# export BERT_WEIGHTS=/bert_weights/"$model".tar.gz
export TRAIN_PATH=data/$task/$dataset/train.txt
export DEV_PATH=data/$task/$dataset/dev.txt
export TEST_PATH=data/$task/$dataset/test.txt
Expand All @@ -51,11 +76,11 @@ echo "$BERT_VOCAB", "$BERT_WEIGHTS", "$is_lowercase", "$TRAIN_PATH", "$config_fi
# remember to change the desc below
python scripts/run_with_beaker.py $config_file --source $bertvocab --source $bertweights \
--desc 's2-bert' \
--env "BERT_VOCAB=$BERT_VOCAB" --env "BERT_WEIGHTS=$BERT_WEIGHTS" \
--env "BERT_VOCAB=$BERT_VOCAB" --env "BERT_WEIGHTS=$BERT_WEIGHTS" --env "is_lowercase=$is_lowercase" \
--env "BERT_VOCAB2=$BERT_VOCAB2" --env "BERT_WEIGHTS2=$BERT_WEIGHTS2" --env "is_lowercase2=$is_lowercase2" \
--env "TRAIN_PATH=$TRAIN_PATH" --env "DEV_PATH=$DEV_PATH" --env "TEST_PATH=$TEST_PATH" \
--env "is_lowercase=$is_lowercase" \
--env "SEED=$SEED" --env "PYTORCH_SEED=$PYTORCH_SEED" --env "NUMPY_SEED=$NUMPY_SEED"
done
done
# done
# done
done
done
7 changes: 5 additions & 2 deletions scripts/train.sh
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
# Run allennlp training locally

dataset='chemprot'
task='text_classification'
dataset='sciie'
task='ner'
config_file=allennlp_config/"$task".jsonnet

SEED=13270
Expand All @@ -15,6 +15,9 @@ export NUMPY_SEED=$NUMPY_SEED
export BERT_VOCAB=vocab/s2vocab_cased.vocab
export BERT_WEIGHTS=pytorch_models/s2bert_s2vocab_cased_512.tar.gz
export is_lowercase=false
export BERT_VOCAB2=vocab/s2vocab_uncased.vocab
export BERT_WEIGHTS2=pytorch_models/s2bert_s2vocab_uncased_512.tar.gz
export is_lowercase2=true
export TRAIN_PATH=data/$task/$dataset/train.txt
export DEV_PATH=data/$task/$dataset/dev.txt
export TEST_PATH=data/$task/$dataset/test.txt
Expand Down