-
Notifications
You must be signed in to change notification settings - Fork 5
/
Copy pathrun-maps-alpaca.sh
146 lines (130 loc) · 3.8 KB
/
run-maps-alpaca.sh
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
set -e
set -u
BASEDIR=$(realpath `dirname $0`)
WS=$BASEDIR
DATA=$WS/data
FORMAT=$DATA/format
RAW=$DATA/raw
OUTPUT=$WS/output
SCRIPT=$WS/scripts
MODEL=$WS/model
ALPACA_CKPT=YOUR_ALPACA_CKPT
all_lang_pairs=(en-zh zh-en en-de de-en en-ja ja-en de-fr fr-de)
test_name=wmt22
BS=6
KS_BS=400
# >>>>>>> Step1: Knowledge Minging >>>>>>>
for lp in ${all_lang_pairs[@]}
do
echo $lp
src=${lp%%-*}
tgt=${lp##*-}
python3 $DATA/format_ask_kw.py \
-w $WS \
-tn wmt22 \
--seed 0 \
-s $src \
-t $tgt
python3 $DATA/format_ask_topic.py \
-w $WS \
-tn wmt22 \
--seed 0 \
-s $src \
-t $tgt
python3 $DATA/format_ask_demo.py \
-w $WS \
-tn wmt22 \
--seed 0 \
-s $src \
-t $tgt
done
# alpaca-7b
model_name=alpaca-7b
mkdir -p $OUTPUT/$model_name
for lp in ${all_lang_pairs[@]}
do
echo $model_name $lp
src=${lp%%-*}
tgt=${lp##*-}
for know in kw topic demo
do
python3 $WS/model/alpaca/translate.py \
--model-name-or-path $ALPACA_CKPT \
-i $FORMAT/$test_name.$lp.$src.ask-$know \
-o $OUTPUT/$model_name/$test_name.$lp.$src.ask-$know \
--search-algorithm beam \
--batch $BS \
--temperature 0
cat $OUTPUT/$model_name/$test_name.$lp.$src.ask-$know | python3 $SCRIPT/alpaca-post-process.py > $OUTPUT/$model_name/$test_name.$lp.$src.ask-$know.trans
done
done
# <<<<<<< Step1: Knowledge Minging <<<<<<<
# >>>>>>> Step2: Knowledge Integration >>>>>>>>
for lp in ${all_lang_pairs[@]}
do
echo $lp
src=${lp%%-*}
tgt=${lp##*-}
for know in kw topic demo
do
python3 $DATA/format-$know.py \
-w $WS \
-tn wmt22 \
-m alpaca-7b \
--seed 0 \
-s $src \
-t $tgt
done
done
# alpaca-7b
model_name=alpaca-7b
mkdir -p $OUTPUT/$model_name
for lp in ${all_lang_pairs[@]}
do
echo $model_name $lp
src=${lp%%-*}
tgt=${lp##*-}
for know in kw topic demo
do
python3 $WS/model/alpaca/translate.py \
--model-name-or-path $ALPACA_CKPT \
-i $FORMAT/with-knowledge/$model_name/$test_name.$lp.$src.$know.0-seed \
-o $OUTPUT/$model_name/$test_name.$lp.$tgt.$know.0-seed.trans \
--search-algorithm beam \
--batch $BS \
--temperature 0
done
done
# <<<<<<< Step2: Knowledge Integration <<<<<<<
# >>>>>>> Step3: Knowledge Selection >>>>>>>>
# alpaca-7b
for lp in ${all_lang_pairs[@]}
do
src=${lp%%-*}
tgt=${lp##*-}
python3 $SCRIPT/knowledge-selection.py \
--sys $OUTPUT/alpaca-7b/wmt22.$lp.$tgt.0-shot.trans \
$OUTPUT/alpaca-7b/wmt22.$lp.$tgt.kw.0-seed.trans \
$OUTPUT/alpaca-7b/wmt22.$lp.$tgt.topic.0-seed.trans \
$OUTPUT/alpaca-7b/wmt22.$lp.$tgt.demo.0-seed.trans \
--src $RAW/wmt22.$lp.$src \
--ref $RAW/wmt22.$lp.$tgt \
--out $OUTPUT/alpaca-7b/wmt22.$lp.$tgt.maps.0-seed.trans \
--src-lang $src --tgt-lang $tgt \
-bs $KS_BS \
--metric comet_qe \
--comet-qe-model-name wmt21-comet-qe-da
python3 $SCRIPT/knowledge-selection.py \
--sys $OUTPUT/alpaca-7b/wmt22.$lp.$tgt.0-shot.trans \
$OUTPUT/alpaca-7b/wmt22.$lp.$tgt.kw.0-seed.trans \
$OUTPUT/alpaca-7b/wmt22.$lp.$tgt.topic.0-seed.trans \
$OUTPUT/alpaca-7b/wmt22.$lp.$tgt.demo.0-seed.trans \
--src $RAW/wmt22.$lp.$src \
--ref $RAW/wmt22.$lp.$tgt \
--out $OUTPUT/alpaca-7b/wmt22.$lp.$tgt.maps_bound.0-seed.trans \
--src-lang $src --tgt-lang $tgt \
-bs $KS_BS \
--metric comet \
--comet-model-name Unbabel/wmt22-comet-da
done
# <<<<<<< Step3: Knowledge Selection <<<<<<<