Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

2025-04-28-better-scores-worse-generation #178

Open
wants to merge 11 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
267 changes: 267 additions & 0 deletions _posts/2025-04-28-better-scores-worse-generation.md

Large diffs are not rendered by default.

113 changes: 113 additions & 0 deletions assets/bibliography/2025-04-28-better-scores-worse-generation.bib
Original file line number Diff line number Diff line change
@@ -0,0 +1,113 @@
@article{shi2024espnet,
title={ESPnet-Codec: Comprehensive Training and Evaluation of Neural Codecs for Audio, Music, and Speech},
author = {Shi, Jiatong and Tian, Jinchuan and Wu, Yihan and Jung, Jee-weon and Yip, Jia Qi and Masuyama, Yoshiki and Chen, William and Wu, Yuning and Tang, Yuxun and Baali, Massa and Alharhi, Dareen and Zhang, Dong and Deng, Ruifan and Srivastava, Tejes and Wu, Haibin and Liu, Alexander H. and Raj, Bhiksha and Jin, Qin and Song, Ruihua and Watanabe, Shinji},
journal={arXiv preprint arXiv:2409.15897},
year={2024}
}

@inproceedings{lee2023bigvgan,
title={BigVGAN: A universal neural vocoder with large-scale training},
author={Lee, Sang-gil and Ping, Wei and Ginsburg, Boris and Catanzaro, Bryan and Yoon, Sungroh},
booktitle={ICLR},
year={2023}
}

@article{soundstream,
author={Zeghidour, Neil and Luebs, Alejandro and Omran, Ahmed and Skoglund, Jan and Tagliasacchi, Marco},
journal={IEEE/ACM Transactions on Audio, Speech, and Language Processing},
title={SoundStream: An End-to-End Neural Audio Codec},
year={2022},
volume={30},
number={},
pages={495-507},
}

@article{defossez2022highfi,
title={High Fidelity Neural Audio Compression},
author={Défossez, Alexandre and Copet, Jade and Synnaeve, Gabriel and Adi, Yossi},
journal={arXiv preprint arXiv:2210.13438},
year={2022}
}

@inproceedings{dac,
author = {Kumar, Rithesh and Seetharaman, Prem and Luebs, Alejandro and Kumar, Ishaan and Kumar, Kundan},
title = {High-fidelity audio compression with improved RVQGAN},
year = {2024},
booktitle = {NeurIPS},
}

@article{zielinski2008bias,
author = {Zieliński, Sławomir and Rumsey, Francis and Bech, Søren},
year = {2008},
month = {01},
pages = {427-451},
title = {On Some Biases Encountered in Modern Audio Quality Listening Tests—A Review},
volume = {56},
journal = {Journal of the Audio Engineering Society},
}

@article{medin1993,
author = {Medin, Doug and Gentner, Dedre},
year = {1993},
month = {04},
pages = {254-278},
title = {Respects for Similarity},
volume = {100},
journal = {Psychological Review},
}

@article{fsd50k,
author = {Fonseca, Eduardo and Favory, Xavier and Pons, Jordi and Font, Frederic and Serra, Xavier},
title = {FSD50K: An Open Dataset of Human-Labeled Sound Events},
year = {2021},
volume = {30},
journal = {IEEE/ACM Transasctions on Audio, Speech and Language Processing},
month = dec,
pages = {829–852},
}

@article{suied2014,
author = {Suied, C and Agus, TR and Thorpe, S and Mesgarani, N and Pressnitzer, D},
journal = {Journal of the Acoustical Society of America},
pages = {1380-1391},
title = {Auditory gist: Recognition of very short sound from timbre cues},
volume = {135},
year = {2014}
}

@article{visqol,
title = {ViSQOL: an objective speech quality model},
author = {Hines, Andrew and Skoglund, Jan and Kokaram, Anil and Harte, Naomi},
year = {2015},
journal = {EURASIP Journal on Audio, Speech, and Music Processing},
pages = {1-18},
volume = {2015 (13)}
}

@inproceedings{dpam,
title = {A Differentiable Perceptual Audio Metric Learned from Just Noticeable Differences},
author = {Manocha, Pranay and Finkelstein, Adam and Zhang, Richard and Bryan, Nicholas J. and Mysore, Gautham J. and Jin, Zeyu},
booktitle = {Interspeech},
year = {2020},
}

@inproceedings{zhang2018,
author={Zhang, Richard and Isola, Phillip and Efros, Alexei A. and Shechtman, Eli and Wang, Oliver},
booktitle={2018 IEEE/CVF Conference on Computer Vision and Pattern Recognition},
title={The Unreasonable Effectiveness of Deep Features as a Perceptual Metric},
year={2018},
}

@inproceedings{sundaram2024,
title={When Does Perceptual Alignment Benefit Vision Representations?},
author={Shobhita Sundaram and Stephanie Fu and Lukas Muttenthaler and Netanel Y. Tamir and Lucy Chai and Simon Kornblith and Trevor Darrell and Phillip Isola},
booktitle={NeurIPS},
year={2024},
}

@inproceedings{patrini2017,
author={Patrini, Giorgio and Rozza, Alessandro and Menon, Aditya Krishna and Nock, Richard and Qu, Lizhen},
booktitle={2017 IEEE Conference on Computer Vision and Pattern Recognition (CVPR)},
title={Making Deep Neural Networks Robust to Label Noise: A Loss Correction Approach},
year={2017},
}
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Loading