AudioLabs - Publications

Publications

Maman, Ben, Zalkow, Frank, Berendes, Hans-Ulrich, Sani, Paolo, Dittmar, Christian, and Müller, Meinard
Adapting a Diffusion-based Music Synthesis Model to Human Voice Conversion
https://benadar293.github.io/voice-conversion, 2025.

@misc{10771710,
author={Maman, Ben and Zalkow, Frank and Berendes, Hans-Ulrich and Sani, Paolo and Dittmar, Christian and Müller, Meinard},
title={Adapting a Diffusion-based Music Synthesis Model to Human Voice Conversion},
howpublished = {\url{https://benadar293.github.io/voice-conversion}},
year={2025}
}

Maman, Ben, Zeitler, Johannes, Müller, Meinard, and Bermano, Amit H.
Multi-Aspect Conditioning for Diffusion-Based Music Synthesis: Enhancing Realism and Acoustic Control
IEEE Transactions on Audio, Speech and Language Processing, 33: 68–81, 2025. DOI

@ARTICLE{10771710,
author={Maman, Ben and Zeitler, Johannes and Müller, Meinard and Bermano, Amit H.},
journal={IEEE Transactions on Audio, Speech and Language Processing},
title={Multi-Aspect Conditioning for Diffusion-Based Music Synthesis: Enhancing Realism and Acoustic Control},
year={2025},
volume={33},
number={},
pages={68-81},
keywords={Instruments;Acoustics;Recording;Timbre;Synthesizers;Diffusion models;Transformers;Training;Speech processing;Multiple signal classification;Multi-Instrument synthesis;diffusion},
doi={10.1109/TASLP.2024.3507553}}

Yaffe, Jonathan, Maman, Ben, Müller, Meinard, and Bermano, Amit
Count The Notes: Histogram-Based Supervision for Automatic Music Transcription
In Proceedings of the International Society for Music Information Retrieval Conference (ISMIR), Daejeon, South Korea, 2025.

@inproceedings{yaffe2025count,
title={Count The Notes: Histogram-Based Supervision for Automatic Music Transcription},
author={Yaffe, Jonathan and Maman, Ben and M{\"u}ller, Meinard and Bermano, Amit},
booktitle={Proceedings of the International Society for Music Information Retrieval Conference (ISMIR), Daejeon, South Korea},
year={2025}
}

Berendes, Hans-Ulrich, Maman, Ben, and Müller, Meinard
Tuning Matters: Analyzing Musical Tuning Bias in Neural Vocoders
In Proceedings of the International Society for Music Information Retrieval Conference (ISMIR), Daejeon, South Korea, 2025.

@inproceedings{berendes2025tuning,
title={Tuning Matters: Analyzing Musical Tuning Bias in Neural Vocoders},
author={Berendes, Hans-Ulrich and Maman, Ben and M{\"u}ller, Meinard},
booktitle={Proceedings of the International Society for Music Information Retrieval Conference (ISMIR), Daejeon, South Korea},
year={2025}
}

Zeitler, Johannes, Maman, Ben, and Müller, Meinard
Robust and Accurate Audio Synchronization using Raw Features from Transcription Models
In Proceedings of the International Society for Music Information Retrieval Conference (ISMIR), San Francisco, USA, 2024.

@inproceedings{zeitler2024robust,
title={Robust and Accurate Audio Synchronization using Raw Features from Transcription Models},
author={Zeitler, Johannes and Maman, Ben and M{\"u}ller, Meinard},
booktitle={Proceedings of the International Society for Music Information Retrieval Conference (ISMIR), San Francisco, USA},
year={2024}
}

Maman, Ben, Zeitler, Johannes, Müller, Meinard, and Bermano, Amit H.
Performance Conditioning for Diffusion-Based Multi-Instrument Music Synthesis
In ICASSP 2024 - 2024 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP): 5045–5049, 2024. DOI

@INPROCEEDINGS{10445979,
author={Maman, Ben and Zeitler, Johannes and Müller, Meinard and Bermano, Amit H.},
booktitle={ICASSP 2024 - 2024 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)},
title={Performance Conditioning for Diffusion-Based Multi-Instrument Music Synthesis},
year={2024},
volume={},
number={},
pages={5045-5049},
keywords={Instruments;Prototypes;Process control;Recording;Timbre;Task analysis;Speech processing;Multi-Instrument Synthesis;Diffusion},
doi={10.1109/ICASSP48485.2024.10445979}}

Maman, Ben and Bermano, Amit H.
Unaligned Supervision for Automatic Music Transcription in the Wild
In International Conference on Machine Learning (ICML), Baltimore, Maryland, USA: 14918–14934, 2022.

@inproceedings{maman2022unaligned,
title={Unaligned Supervision for Automatic Music Transcription in the Wild},
author={Maman, Ben and Bermano, Amit H.},
booktitle={International Conference on Machine Learning ({ICML}), Baltimore, Maryland, {USA}},
pages={14918--14934},
year={2022},
organization={PMLR}
}

Maman, Ben and Bermano, Amit H.
TypeNet: Towards Camera Enabled Touch Typing on Flat Surfaces through Self-Refinement
In IEEE/CVF Winter Conference on Applications of Computer Vision (WACV), Waikoloa, HI, USA: 567–576, 2022. DOI

@inproceedings{DBLP:conf/wacv/MamanB22,
author       = {Maman, Ben and Bermano, Amit H.},
title        = {TypeNet: Towards Camera Enabled Touch Typing on Flat Surfaces through
Self-Refinement},
booktitle    = {{IEEE/CVF} Winter Conference on Applications of Computer Vision ({WACV}), Waikoloa, HI, USA},
pages        = {567--576},
publisher    = {{IEEE}},
year         = {2022},
url          = {https://doi.org/10.1109/WACV51458.2022.00064},
doi          = {10.1109/WACV51458.2022.00064},
timestamp    = {Sat, 30 Sep 2023 09:58:43 +0200},
biburl       = {https://dblp.org/rec/conf/wacv/MamanB22.bib},
bibsource    = {dblp computer science bibliography, https://dblp.org}
}

International Audio Laboratories Erlangen

Publications