Merge pull request #2 from hungdinhxuan/challenges/cyberaicup2024

Challenges/cyberaicup2024
hungdinhxuan · Oct 3, 2024 · f3666ce · f3666ce
2 parents 0c42734 + 5d4afa6
commit f3666ce
Show file tree

Hide file tree

Showing 69 changed files with 8,418 additions and 685 deletions.
diff --git a/.gitignore b/.gitignore
@@ -152,3 +152,6 @@ configs/local/default.yaml
 
 # Aim logging
 .aim
+
+# Neptune logging
+.neptune
diff --git a/configs/callbacks/default.yaml b/configs/callbacks/default.yaml
@@ -1,21 +1,26 @@
 defaults:
   - model_checkpoint
   - early_stopping
-  - model_summary
+#  - model_summary
   - rich_progress_bar
+  - learning_rate_logger
   - _self_
 
+learning_rate_logger:
+  logging_interval: 'epoch'
+
 model_checkpoint:
   dirpath: ${paths.output_dir}/checkpoints
   filename: "epoch_{epoch:03d}"
   monitor: "val/acc"
   mode: "max"
   save_last: True
   auto_insert_metric_name: False
+  save_top_k: 5 # save k best models (determined by above metric)
 
 early_stopping:
   monitor: "val/acc"
-  patience: 100
+  patience: 20
   mode: "max"
 
 model_summary:

diff --git a/configs/callbacks/learning_rate_logger.yaml b/configs/callbacks/learning_rate_logger.yaml
@@ -0,0 +1,3 @@
+learning_rate_logger:
+  _target_: lightning.pytorch.callbacks.LearningRateMonitor
+  logging_interval: 'step'
diff --git a/configs/data/asvspoof.yaml b/configs/data/asvspoof.yaml
@@ -1,5 +1,11 @@
-_target_: src.data.asvspoof_aasistssl_reproduce_datamodule.ASVSpoofDataModule
+_target_: src.data.asvspoof_datamodule.ASVSpoofDataModule
 data_dir: ${oc.env:ASVSPOOF_PATH}
 batch_size: 16 # Needs to be divisible by the number of devices (e.g., if in a distributed setup)
 num_workers: 4
 pin_memory: False
+args:
+  # The sampling rate of the audio files
+  sampling_rate: 16000
+  cut: 64000
+  padding_type: zero
+  random_start: True
diff --git a/configs/data/asvspoof_reproduce.yaml b/configs/data/asvspoof_reproduce.yaml
@@ -0,0 +1,5 @@
+_target_: src.data.asvspoof_aasistssl_reproduce_datamodule.ASVSpoofDataModule
+data_dir: ${oc.env:ASVSPOOF_PATH}
+batch_size: 16 # Needs to be divisible by the number of devices (e.g., if in a distributed setup)
+num_workers: 4
+pin_memory: False
diff --git a/configs/data/cyberaicup_track2.yaml b/configs/data/cyberaicup_track2.yaml
@@ -0,0 +1,35 @@
+_target_: src.data.normal_datamodule.NormalDataModule
+data_dir: ${oc.env:CYBERCUP2_PATH}
+batch_size: 16 # Needs to be divisible by the number of devices (e.g., if in a distributed setup)
+num_workers: 4
+pin_memory: False
+args:
+  # The sampling rate of the audio files
+  portion: 0.2 # 20% of the data
+  nBands: 5
+  minF: 20
+  maxF: 8000
+  minBW: 100
+  maxBW: 1000
+  minCoeff: 10
+  maxCoeff: 100
+  minG: 0
+  maxG: 0
+  minBiasLinNonLin: 5
+  maxBiasLinNonLin: 20
+  N_f: 5
+  P: 10
+  g_sd: 2
+  SNRmin: 10
+  SNRmax: 40
+
+  data:
+    augmentation_methods: [] 
+    trim_length: 100000 # 6.25s
+    wav_samp_rate: 16000
+    online_aug: true
+    aug_dir: ${oc.env:LARGE_CORPUS_FOR_ASVSPOOF5}/aug
+    noise_path: ${oc.env:NOISE_PATH}
+    rir_path: ${oc.env:RIR_PATH}
+    repeat_pad: false #  If true, repeat the audio to the trim_length
+    random_start: false # If true, randomly pick a start point for the audio
diff --git a/configs/data/cyberaicup_track2_mixed.yaml b/configs/data/cyberaicup_track2_mixed.yaml
@@ -0,0 +1,35 @@
+_target_: src.data.cyber2_mixed_normal_datamodule.NormalDataModule
+data_dir: ${oc.env:CYBERCUP2_PATH}
+batch_size: 16 # Needs to be divisible by the number of devices (e.g., if in a distributed setup)
+num_workers: 4
+pin_memory: False
+args:
+  # The sampling rate of the audio files
+  portion: 0.2 # 20% of the data
+  nBands: 5
+  minF: 20
+  maxF: 8000
+  minBW: 100
+  maxBW: 1000
+  minCoeff: 10
+  maxCoeff: 100
+  minG: 0
+  maxG: 0
+  minBiasLinNonLin: 5
+  maxBiasLinNonLin: 20
+  N_f: 5
+  P: 10
+  g_sd: 2
+  SNRmin: 10
+  SNRmax: 40
+
+  data:
+    augmentation_methods: [] 
+    trim_length: 100000 # 6.25s
+    wav_samp_rate: 16000
+    online_aug: true
+    aug_dir: ${oc.env:LARGE_CORPUS_FOR_ASVSPOOF5}/aug
+    noise_path: ${oc.env:NOISE_PATH}
+    rir_path: ${oc.env:RIR_PATH}
+    repeat_pad: false #  If true, repeat the audio to the trim_length
+    random_start: false # If true, randomly pick a start point for the audio
diff --git a/configs/data/normal_largecorpus_for_asvspoof5.yaml b/configs/data/normal_largecorpus_for_asvspoof5.yaml
@@ -0,0 +1,37 @@
+_target_: src.data.normal_datamodule.NormalDataModule
+data_dir: ${oc.env:LARGE_CORPUS_FOR_ASVSPOOF5}
+batch_size: 2 # Because of scl datamodule will be re-organized mini-batch size
+num_workers: 4
+pin_memory: False
+args:
+  # The sampling rate of the audio files
+  portion: 0.2 # 20% of the data
+  nBands: 5
+  minF: 20
+  maxF: 8000
+  minBW: 100
+  maxBW: 1000
+  minCoeff: 10
+  maxCoeff: 100
+  minG: 0
+  maxG: 0
+  minBiasLinNonLin: 5
+  maxBiasLinNonLin: 20
+  N_f: 5
+  P: 10
+  g_sd: 2
+  SNRmin: 10
+  SNRmax: 40
+
+  data:
+
+    augmentation_methods:
+      ["RawBoost12", "background_noise_5_15", "reverb_1", "telephone_g722"]
+    trim_length: 100000 # 6.25s
+    wav_samp_rate: 16000
+    online_aug: true
+    aug_dir: ${oc.env:LARGE_CORPUS_FOR_ASVSPOOF5}/aug
+    noise_path: ${oc.env:NOISE_PATH}
+    rir_path: ${oc.env:RIR_PATH}
+    repeat_pad: false #  If true, repeat the audio to the trim_length
+    random_start: false # If true, randomly pick a start point for the audio
diff --git a/configs/data/scl_normal_largecorpus_for_asvspoof5.yaml b/configs/data/scl_normal_largecorpus_for_asvspoof5.yaml
@@ -0,0 +1,39 @@
+_target_: src.data.scl_datamodule.SclNormalDataModule
+data_dir: ${oc.env:LARGE_CORPUS_FOR_ASVSPOOF5}
+batch_size: 2 # Because of scl datamodule will be re-organized mini-batch size
+num_workers: 4
+pin_memory: False
+args:
+  # The sampling rate of the audio files
+  portion: 0.2 # 20% of the data
+  nBands: 5
+  minF: 20
+  maxF: 8000
+  minBW: 100
+  maxBW: 1000
+  minCoeff: 10
+  maxCoeff: 100
+  minG: 0
+  maxG: 0
+  minBiasLinNonLin: 5
+  maxBiasLinNonLin: 20
+  N_f: 5
+  P: 10
+  g_sd: 2
+  SNRmin: 10
+  SNRmax: 40
+
+  data:
+    vocoders: ["hifigan", "hn-sinc-nsf-hifi", "waveglow"]
+    augmentation_methods:
+      ["RawBoost12", "background_noise_5_15", "reverb_1", "telephone_g722"]
+    num_additional_real: 2
+    num_additional_spoof: 3
+    trim_length: 100000 # 6.25s
+    wav_samp_rate: 16000
+    online_aug: true
+    aug_dir: "/data/Datasets/0_large-corpus/aug"
+    noise_path: "/data/Datasets/musan/asvspoof5"
+    rir_path: "/data/Datasets/RIRS_NOISES"
+    repeat_pad: false #  If true, repeat the audio to the trim_length
+    random_start: false # If true, randomly pick a start point for the audio
diff --git a/configs/eval_cyberaicup2.yaml b/configs/eval_cyberaicup2.yaml
@@ -0,0 +1,77 @@
+# @package _global_
+
+# to execute this experiment run:
+# python train.py experiment=example
+
+defaults:
+  - override /data: cyberaicup_track2_mixed
+  - override /model: xlsr_conformertcm
+  - override /callbacks: none
+  - override /trainer: default
+
+# all parameters below will be merged with parameters from default configurations set above
+# this allows you to overwrite only specified parameters
+
+#tags: ["cyberaicup_track2_mixed", "xlsr_conformertcm"]
+
+seed: 12345
+
+trainer:
+  min_epochs: 30
+  max_epochs: 100
+  gradient_clip_val: 0.0 # 0.0 means don't clip
+  accelerator: cuda
+
+model:
+  optimizer:
+    lr: 0.00001
+  args:
+    loss_type: 4
+  net: null
+  compile: false
+
+  scheduler:
+    _target_: torch.optim.lr_scheduler.CyclicLR
+    _partial_: true
+    cycle_momentum: false
+    base_lr: 0.000001
+    max_lr: 0.00001
+    mode: "exp_range"
+    gamma: 0.85
+
+  score_save_path: logs/eval/cyberaicup_track2_mixed_xlsr_conformertcm_epoch_25.txt
+
+
+data:
+  batch_size: 10
+  num_workers: 8
+  args:
+    portion: 1 # 100% of the data
+    data:
+      trim_length: 160000 # 10s
+      repeat_pad: false #  If true, repeat the audio to the trim_length
+      random_start: true # If true, randomly pick a start point for the audio
+      augmentation_methods: []
+
+logger: null
+
+
+# task name, determines output directory path
+task_name: "eval"
+
+# tags to help you identify your experiments
+# you can overwrite this in experiment configs
+# overwrite from command line with `python train.py tags="[first_tag, second_tag]"`
+# appending lists from command line is currently not supported :(
+# https://github.com/facebookresearch/hydra/issues/1547
+tags: ["dev"]
+
+# set False to skip model training
+train: False
+
+# evaluate on test set, using best model weights achieved during training
+# lightning chooses best weights based on the metric specified in checkpoint callback
+test: True
+
+# simply provide checkpoint path to resume training
+ckpt_path: logs/train/checkpoints/last-v3.ckpt
diff --git a/configs/experiment/test.yaml b/configs/experiment/test.yaml
diff --git a/configs/experiment/test_aasist.yaml b/configs/experiment/test_aasist.yaml
@@ -4,15 +4,15 @@
 # python train.py experiment=example
 
 defaults:
-  - override /data: asvspoof
+  - override /data: asvspoof_reproduce
   - override /model: aasist
   - override /callbacks: default
   - override /trainer: default
 
 # all parameters below will be merged with parameters from default configurations set above
 # this allows you to overwrite only specified parameters
 
-tags: ["asvspoof", "aasist"]
+tags: ["asvspoof_reproduce", "aasist"]
 
 seed: 12345
 

diff --git a/configs/experiment/test_ssl.yaml b/configs/experiment/test_ssl.yaml
@@ -4,15 +4,15 @@
 # python train.py experiment=example
 
 defaults:
-  - override /data: asvspoof
+  - override /data: asvspoof_reproduce
   - override /model: xlsr_aasist
   - override /callbacks: default
   - override /trainer: default
 
 # all parameters below will be merged with parameters from default configurations set above
 # this allows you to overwrite only specified parameters
 
-tags: ["asvspoof", "xlsr_aasist"]
+tags: ["asvspoof_reproduce", "xlsr_aasist"]
 
 seed: 12345