forked from Helsinki-NLP/Tatoeba-Challenge
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathopus-2021-02-23.yml
47 lines (47 loc) · 1.32 KB
/
opus-2021-02-23.yml
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
release: hbs-ukr/opus-2021-02-23.zip
release-date: 2021-02-23
dataset-name: opus
modeltype: transformer
pre-processing: normalization + SentencePiece (spm32k,spm32k)
subwords:
- source: spm32k
- target: spm32k
subword-models:
- source: source.spm
- target: target.spm
source-languages:
- bos
- hbs
- hrv
- srp
target-languages:
- ukr
training-data:
- bos_Cyrl-ukr: Tatoeba-train (2)
- bos_Latn-ukr: Tatoeba-train (204582)
- hbs_Cyrl-ukr: Tatoeba-train (11)
- hrv-ukr: Tatoeba-train (1398103)
- srp_Cyrl-ukr: Tatoeba-train (152997)
- srp_Latn-ukr: Tatoeba-train (381969)
validation-data:
- bos_Latn-ukr: Tatoeba-dev, 95
- hrv-ukr: Tatoeba-dev, 628
- srp_Cyrl-ukr: Tatoeba-dev, 74
- srp_Latn-ukr: Tatoeba-dev, 202
- total size of shuffled dev data: 997
- devset = top 997 lines of Tatoeba-dev.src.shuffled!
test-data:
- Tatoeba-test.hrv-ukr: 389/2217
- Tatoeba-test.hbs-ukr: 941/4959
- Tatoeba-test.srp_Cyrl-ukr: 204/1051
- Tatoeba-test.srp_Latn-ukr: 348/1658
BLEU-scores:
- Tatoeba-test.hrv-ukr: 41.4
- Tatoeba-test.hbs-ukr: 46.4
- Tatoeba-test.srp_Cyrl-ukr: 48.8
- Tatoeba-test.srp_Latn-ukr: 50.8
chr-F-scores:
- Tatoeba-test.hrv-ukr: 0.624
- Tatoeba-test.hbs-ukr: 0.653
- Tatoeba-test.srp_Cyrl-ukr: 0.652
- Tatoeba-test.srp_Latn-ukr: 0.691