Skip to content

Commit

Permalink
3.1.0 (#812)
Browse files Browse the repository at this point in the history
  • Loading branch information
mmcauliffe authored Jun 2, 2024
1 parent 4499f28 commit 76c46a1
Show file tree
Hide file tree
Showing 30 changed files with 1,554 additions and 332 deletions.
18 changes: 18 additions & 0 deletions docs/source/changelog/changelog_3.0.rst
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,24 @@
3.0 Changelog
*************

3.1.0
-----

- Fixed a bug where cutoffs were not properly modelled
- Added additional filter on create subset to not include utterances with cutoffs in smaller subsets
- Added the ability to specify HMM topologies for phones
- Fixed issues caused by validators not cleaning up temporary files and databases
- Added support for default and nonnative dictionaries generated from other dictionaries
- Restricted initial training rounds to exclude default and nonnative dictionaries
- Changed clustering of phones to not mix silence and non-silence phones
- Optimized textgrid export
- Added better memory management for collecting alignments

3.0.8
-----

- Fixed a compatibility issue with models trained under version 1.0 and earlier

3.0.7
-----

Expand Down
5 changes: 3 additions & 2 deletions montreal_forced_aligner/abc.py
Original file line number Diff line number Diff line change
Expand Up @@ -316,7 +316,7 @@ def db_engine(self) -> sqlalchemy.engine.Engine:
self._db_engine = self.construct_engine()
return self._db_engine

def get_next_primary_key(self, database_table: MfaSqlBase):
def get_next_primary_key(self, database_table):
with self.session() as session:
pk = session.query(sqlalchemy.func.max(database_table.id)).scalar()
if not pk:
Expand Down Expand Up @@ -634,7 +634,8 @@ def parse_args(
unknown_dict[name] = val
for name, param_type in param_types.items():
if (name.endswith("_directory") and name != "audio_directory") or (
name.endswith("_path") and name not in {"rules_path", "phone_groups_path"}
name.endswith("_path")
and name not in {"rules_path", "phone_groups_path", "topology_path"}
):
continue
if args is not None and name in args and args[name] is not None:
Expand Down
4 changes: 4 additions & 0 deletions montreal_forced_aligner/acoustic_modeling/lda.py
Original file line number Diff line number Diff line change
Expand Up @@ -97,6 +97,8 @@ def _run(self):
]
for dict_id in job.dictionary_ids:
ali_path = job.construct_path(self.working_directory, "ali", "ark", dict_id)
if not ali_path.exists():
continue
lda_logger.debug(f"Processing {ali_path}")
feat_path = job.construct_path(
job.corpus.current_subset_directory, "feats", "scp", dictionary_id=dict_id
Expand Down Expand Up @@ -164,6 +166,8 @@ def _run(self) -> typing.Generator[int]:
]
for dict_id in job.dictionary_ids:
ali_path = job.construct_path(self.working_directory, "ali", "ark", dict_id)
if not ali_path.exists():
continue
lda_logger.debug(f"Processing {ali_path}")
feature_archive = job.construct_feature_archive(self.working_directory, dict_id)
alignment_archive = AlignmentArchive(ali_path)
Expand Down
20 changes: 14 additions & 6 deletions montreal_forced_aligner/acoustic_modeling/monophone.py
Original file line number Diff line number Diff line change
Expand Up @@ -75,7 +75,7 @@ def _run(self):
num_error = 0
tot_like = 0.0
tot_t = 0.0
for d in job.dictionaries:
for d in job.training_dictionaries:
dict_id = d.id
train_logger.debug(f"Aligning for dictionary {d.name} ({d.id})")
train_logger.debug(f"Aligning with model: {self.model_path}")
Expand Down Expand Up @@ -302,14 +302,22 @@ def _trainer_initialization(self) -> None:
tree_path = self.working_directory.joinpath("tree")
init_log_path = self.working_log_directory.joinpath("init.log")
job = self.jobs[0]
dict_id = job.dictionary_ids[0]
feature_archive = job.construct_feature_archive(self.working_directory, dict_id)
feats = []
with kalpy_logger("kalpy.train", init_log_path) as train_logger:
for i, (_, mat) in enumerate(feature_archive):
if i > 10:
dict_index = 0
while len(feats) < 10:
try:
dict_id = job.dictionary_ids[dict_index]
except IndexError:
break
feats.append(mat)
feature_archive = job.construct_feature_archive(self.working_directory, dict_id)
for i, (_, mat) in enumerate(feature_archive):
if i > 10:
break
feats.append(mat)
dict_index += 1
if not feats:
raise Exception("Could not initialize monophone model due to lack of features")
shared_phones = self.worker.shared_phones_set_symbols()
topo = read_topology(self.worker.topo_path)
gmm_init_mono(topo, feats, shared_phones, str(self.model_path), str(tree_path))
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -312,8 +312,12 @@ def setup(self):
previous_directory = self.previous_aligner.working_directory
for j in self.jobs:
for p in j.construct_path_dictionary(previous_directory, "ali", "ark").values():
if not p.exists():
continue
shutil.copy(p, wf.working_directory.joinpath(p.name))
for p in j.construct_path_dictionary(previous_directory, "words", "ark").values():
if not p.exists():
continue
shutil.copy(p, wf.working_directory.joinpath(p.name))
for f in ["final.mdl", "final.alimdl", "lda.mat", "tree"]:
p = previous_directory.joinpath(f)
Expand Down Expand Up @@ -384,6 +388,12 @@ def train_pronunciation_probabilities(self) -> None:
)
with mfa_open(silence_info_path, "r") as f:
data = json.load(f)
for k, v in data.items():
if v is None:
if "correction" in k:
data[k] = 1.0
else:
data[k] = 0.5
if self.silence_probabilities:
d.silence_probability = data["silence_probability"]
d.initial_silence_probability = data["initial_silence_probability"]
Expand Down
4 changes: 3 additions & 1 deletion montreal_forced_aligner/acoustic_modeling/sat.py
Original file line number Diff line number Diff line change
Expand Up @@ -81,13 +81,15 @@ def _run(self):
.filter(Job.id == self.job_name)
.first()
)
for d in job.dictionaries:
for d in job.training_dictionaries:
train_logger.debug(f"Accumulating stats for dictionary {d.name} ({d.id})")
train_logger.debug(f"Accumulating stats for model: {self.model_path}")
dict_id = d.id
accumulator = TwoFeatsStatsAccumulator(self.model_path)

ali_path = job.construct_path(self.working_directory, "ali", "ark", dict_id)
if not ali_path.exists():
continue
fmllr_path = job.construct_path(
job.corpus.current_subset_directory, "trans", "scp", dict_id
)
Expand Down
28 changes: 18 additions & 10 deletions montreal_forced_aligner/acoustic_modeling/trainer.py
Original file line number Diff line number Diff line change
Expand Up @@ -100,6 +100,8 @@ def _run(self) -> typing.Generator[typing.Tuple[int, str]]:
transition_model, acoustic_model = read_gmm_model(self.model_path)
for dict_id in job.dictionary_ids:
ali_path = job.construct_path(self.working_directory, "ali", "ark", dict_id)
if not ali_path.exists():
continue
transition_accs = DoubleVector(transition_model.NumTransitionIds() + 1)
alignment_archive = AlignmentArchive(ali_path)
for alignment in alignment_archive:
Expand Down Expand Up @@ -523,6 +525,8 @@ def quality_check_subset(self):
self.working_directory, "temp_ali", "ark"
)
for dict_id, ali_path in ali_paths.items():
if not ali_path.exists():
continue
new_path = temp_ali_paths[dict_id]
write_specifier = generate_write_specifier(new_path)
writer = Int32VectorWriter(write_specifier)
Expand Down Expand Up @@ -577,15 +581,20 @@ def train(self) -> None:
self.current_acoustic_model = AcousticModel(
previous.exported_model_path, self.working_directory
)
self.align()
with self.session() as session:
session.query(WordInterval).delete()
session.query(PhoneInterval).delete()
session.commit()
self.collect_alignments()
self.analyze_alignments()
if self.current_subset != 0:
self.quality_check_subset()
if (
not self.current_workflow.done
or not self.current_workflow.working_directory.exists()
):
logger.debug(f"Skipping {self.current_aligner.identifier} alignments")
self.align()
with self.session() as session:
session.query(WordInterval).delete()
session.query(PhoneInterval).delete()
session.commit()
self.collect_alignments()
self.analyze_alignments()
if self.current_subset != 0:
self.quality_check_subset()

self.set_current_workflow(trainer.identifier)
if trainer.identifier.startswith("pronunciation_probabilities"):
Expand Down Expand Up @@ -721,7 +730,6 @@ def align_options(self) -> MetaDict:
options = self.current_aligner.align_options
else:
options = super().align_options
options["boost_silence"] = max(1.25, options["boost_silence"])
return options

def align(self) -> None:
Expand Down
33 changes: 29 additions & 4 deletions montreal_forced_aligner/acoustic_modeling/triphone.py
Original file line number Diff line number Diff line change
Expand Up @@ -94,10 +94,12 @@ def _run(self) -> typing.Generator[typing.Tuple[int, int]]:
train_logger.debug(f"Previous model path: {self.align_model_path}")
train_logger.debug(f"Model path: {self.model_path}")
train_logger.debug(f"Tree path: {self.tree_path}")
for d in job.dictionaries:
for d in job.training_dictionaries:
dict_id = d.id
train_logger.debug(f"Converting alignments for {d.name}")
ali_path = self.ali_paths[dict_id]
if not ali_path.exists():
continue
new_ali_path = self.new_ali_paths[dict_id]
train_logger.debug(f"Old alignments: {ali_path}")
train_logger.debug(f"New alignments: {new_ali_path}")
Expand Down Expand Up @@ -159,12 +161,14 @@ def _run(self):
.filter(Phone.phone_type.in_([PhoneType.silence, PhoneType.oov]))
.order_by(Phone.mapping_id)
]
for d in job.dictionaries:
for d in job.training_dictionaries:
train_logger.debug(f"Accumulating stats for dictionary {d.name} ({d.id})")
train_logger.debug(f"Accumulating stats for model: {self.model_path}")
dict_id = d.id
feature_archive = job.construct_feature_archive(self.working_directory, dict_id)
ali_path = job.construct_path(self.working_directory, "ali", "ark", dict_id)
if not ali_path.exists():
continue
train_logger.debug("Feature Archive information:")
train_logger.debug(f"File: {feature_archive.file_name}")
train_logger.debug(f"CMVN: {feature_archive.cmvn_read_specifier}")
Expand Down Expand Up @@ -397,8 +401,29 @@ def _setup_tree(self, init_from_previous=False, initial_mix_up=True) -> None:
train_logger.debug(f"Phone sets: {phone_sets}")
questions = automatically_obtain_questions(tree_stats, phone_sets, [1], 1)
train_logger.debug(f"Automatically obtained {len(questions)} questions")
for v in self.worker.extra_questions_mapping.values():
questions.append(sorted([self.phone_mapping[x] for x in v]))
train_logger.debug("Automatic questions:")
for q_set in questions:
train_logger.debug(", ".join([self.reversed_phone_mapping[x] for x in q_set]))

# Remove questions containing silence and other phones
train_logger.debug("Filtering the following sets for containing silence phone:")
silence_phone_id = self.phone_mapping[self.optional_silence_phone]
silence_sets = [
x for x in questions if silence_phone_id in x and x != [silence_phone_id]
]
for q_set in silence_sets:
train_logger.debug(", ".join([self.reversed_phone_mapping[x] for x in q_set]))
questions = [
x for x in questions if silence_phone_id not in x or x == [silence_phone_id]
]

extra_questions = self.worker.extra_questions_mapping
if extra_questions:
train_logger.debug(f"Adding {len(extra_questions)} questions")
train_logger.debug("Extra questions:")
for v in self.worker.extra_questions_mapping.values():
questions.append(sorted([self.phone_mapping[x] for x in v]))
train_logger.debug(", ".join(v))
train_logger.debug(f"{len(questions)} total questions")

build_tree(
Expand Down
Loading

0 comments on commit 76c46a1

Please sign in to comment.