BibTeX records: Haohan Guo

download as .bib file

@inproceedings{DBLP:conf/icassp/LuWGL0M24,
  author       = {Hui Lu and
                  Xixin Wu and
                  Haohan Guo and
                  Songxiang Liu and
                  Zhiyong Wu and
                  Helen Meng},
  title        = {Unifying One-Shot Voice Conversion and Cloning with Disentangled Speech
                  Representations},
  booktitle    = {{IEEE} International Conference on Acoustics, Speech and Signal Processing,
                  {ICASSP} 2024, Seoul, Republic of Korea, April 14-19, 2024},
  pages        = {11141--11145},
  publisher    = {{IEEE}},
  year         = {2024},
  url          = {https://doi.org/10.1109/ICASSP48485.2024.10446296},
  doi          = {10.1109/ICASSP48485.2024.10446296},
  timestamp    = {Wed, 07 Aug 2024 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/conf/icassp/LuWGL0M24.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/icassp/0002MCGWLM24,
  author       = {Jiawen Kang and
                  Lingwei Meng and
                  Mingyu Cui and
                  Haohan Guo and
                  Xixin Wu and
                  Xunying Liu and
                  Helen Meng},
  title        = {Cross-Speaker Encoding Network for Multi-Talker Speech Recognition},
  booktitle    = {{IEEE} International Conference on Acoustics, Speech and Signal Processing,
                  {ICASSP} 2024, Seoul, Republic of Korea, April 14-19, 2024},
  pages        = {11986--11990},
  publisher    = {{IEEE}},
  year         = {2024},
  url          = {https://doi.org/10.1109/ICASSP48485.2024.10446249},
  doi          = {10.1109/ICASSP48485.2024.10446249},
  timestamp    = {Wed, 07 Aug 2024 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/conf/icassp/0002MCGWLM24.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/icml/YangT0HLGCSZ0ZW24,
  author       = {Dongchao Yang and
                  Jinchuan Tian and
                  Xu Tan and
                  Rongjie Huang and
                  Songxiang Liu and
                  Haohan Guo and
                  Xuankai Chang and
                  Jiatong Shi and
                  Sheng Zhao and
                  Jiang Bian and
                  Zhou Zhao and
                  Xixin Wu and
                  Helen M. Meng},
  title        = {UniAudio: Towards Universal Audio Generation with Large Language Models},
  booktitle    = {Forty-first International Conference on Machine Learning, {ICML} 2024,
                  Vienna, Austria, July 21-27, 2024},
  publisher    = {OpenReview.net},
  year         = {2024},
  url          = {https://openreview.net/forum?id=SRmZw7nEGW},
  timestamp    = {Mon, 02 Sep 2024 16:45:29 +0200},
  biburl       = {https://dblp.org/rec/conf/icml/YangT0HLGCSZ0ZW24.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/abs-2401-04152,
  author       = {Jiawen Kang and
                  Lingwei Meng and
                  Mingyu Cui and
                  Haohan Guo and
                  Xixin Wu and
                  Xunying Liu and
                  Helen Meng},
  title        = {Cross-Speaker Encoding Network for Multi-Talker Speech Recognition},
  journal      = {CoRR},
  volume       = {abs/2401.04152},
  year         = {2024},
  url          = {https://doi.org/10.48550/arXiv.2401.04152},
  doi          = {10.48550/ARXIV.2401.04152},
  eprinttype    = {arXiv},
  eprint       = {2401.04152},
  timestamp    = {Tue, 02 Jul 2024 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/corr/abs-2401-04152.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/abs-2402-08093,
  author       = {Mateusz Lajszczak and
                  Guillermo C{\'{a}}mbara and
                  Yang Li and
                  Fatih Beyhan and
                  Arent van Korlaar and
                  Fan Yang and
                  Arnaud Joly and
                  {\'{A}}lvaro Mart{\'{\i}}n{-}Cortinas and
                  Ammar Abbas and
                  Adam Michalski and
                  Alexis Moinet and
                  Sri Karlapati and
                  Ewa Muszynska and
                  Haohan Guo and
                  Bartosz Putrycz and
                  Soledad L{\'{o}}pez Gambino and
                  Kayeon Yoo and
                  Elena Sokolova and
                  Thomas Drugman},
  title        = {{BASE} {TTS:} Lessons from building a billion-parameter Text-to-Speech
                  model on 100K hours of data},
  journal      = {CoRR},
  volume       = {abs/2402.08093},
  year         = {2024},
  url          = {https://doi.org/10.48550/arXiv.2402.08093},
  doi          = {10.48550/ARXIV.2402.08093},
  eprinttype    = {arXiv},
  eprint       = {2402.08093},
  timestamp    = {Mon, 19 Feb 2024 00:00:00 +0100},
  biburl       = {https://dblp.org/rec/journals/corr/abs-2402-08093.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/abs-2406-02328,
  author       = {Dongchao Yang and
                  Dingdong Wang and
                  Haohan Guo and
                  Xueyuan Chen and
                  Xixin Wu and
                  Helen Meng},
  title        = {SimpleSpeech: Towards Simple and Efficient Text-to-Speech with Scalar
                  Latent Transformer Diffusion Models},
  journal      = {CoRR},
  volume       = {abs/2406.02328},
  year         = {2024},
  url          = {https://doi.org/10.48550/arXiv.2406.02328},
  doi          = {10.48550/ARXIV.2406.02328},
  eprinttype    = {arXiv},
  eprint       = {2406.02328},
  timestamp    = {Fri, 05 Jul 2024 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/corr/abs-2406-02328.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/abs-2406-02940,
  author       = {Haohan Guo and
                  Fenglong Xie and
                  Dongchao Yang and
                  Hui Lu and
                  Xixin Wu and
                  Helen Meng},
  title        = {Addressing Index Collapse of Large-Codebook Speech Tokenizer with
                  Dual-Decoding Product-Quantized Variational Auto-Encoder},
  journal      = {CoRR},
  volume       = {abs/2406.02940},
  year         = {2024},
  url          = {https://doi.org/10.48550/arXiv.2406.02940},
  doi          = {10.48550/ARXIV.2406.02940},
  eprinttype    = {arXiv},
  eprint       = {2406.02940},
  timestamp    = {Fri, 05 Jul 2024 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/corr/abs-2406-02940.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/abs-2406-10056,
  author       = {Dongchao Yang and
                  Haohan Guo and
                  Yuanyuan Wang and
                  Rongjie Huang and
                  Xiang Li and
                  Xu Tan and
                  Xixin Wu and
                  Helen Meng},
  title        = {UniAudio 1.5: Large Language Model-driven Audio Codec is {A} Few-shot
                  Audio Task Learner},
  journal      = {CoRR},
  volume       = {abs/2406.10056},
  year         = {2024},
  url          = {https://doi.org/10.48550/arXiv.2406.10056},
  doi          = {10.48550/ARXIV.2406.10056},
  eprinttype    = {arXiv},
  eprint       = {2406.10056},
  timestamp    = {Thu, 15 Aug 2024 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/corr/abs-2406-10056.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/taslp/GuoXWSM23,
  author       = {Haohan Guo and
                  Fenglong Xie and
                  Xixin Wu and
                  Frank K. Soong and
                  Helen Meng},
  title        = {{MSMC-TTS:} Multi-Stage Multi-Codebook {VQ-VAE} Based Neural {TTS}},
  journal      = {{IEEE} {ACM} Trans. Audio Speech Lang. Process.},
  volume       = {31},
  pages        = {1811--1824},
  year         = {2023},
  url          = {https://doi.org/10.1109/TASLP.2023.3272470},
  doi          = {10.1109/TASLP.2023.3272470},
  timestamp    = {Fri, 02 Jun 2023 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/taslp/GuoXWSM23.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/abs-2309-00126,
  author       = {Haohan Guo and
                  Fenglong Xie and
                  Jiawen Kang and
                  Yujia Xiao and
                  Xixin Wu and
                  Helen Meng},
  title        = {{QS-TTS:} Towards Semi-Supervised Text-to-Speech Synthesis via Vector-Quantized
                  Self-Supervised Speech Representation Learning},
  journal      = {CoRR},
  volume       = {abs/2309.00126},
  year         = {2023},
  url          = {https://doi.org/10.48550/arXiv.2309.00126},
  doi          = {10.48550/ARXIV.2309.00126},
  eprinttype    = {arXiv},
  eprint       = {2309.00126},
  timestamp    = {Tue, 02 Jul 2024 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/corr/abs-2309-00126.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/icassp/GuoZML22,
  author       = {Haohan Guo and
                  Zhiping Zhou and
                  Fanbo Meng and
                  Kai Liu},
  title        = {Improving Adversarial Waveform Generation Based Singing Voice Conversion
                  with Harmonic Signals},
  booktitle    = {{IEEE} International Conference on Acoustics, Speech and Signal Processing,
                  {ICASSP} 2022, Virtual and Singapore, 23-27 May 2022},
  pages        = {6657--6661},
  publisher    = {{IEEE}},
  year         = {2022},
  url          = {https://doi.org/10.1109/ICASSP43922.2022.9746709},
  doi          = {10.1109/ICASSP43922.2022.9746709},
  timestamp    = {Sun, 02 Oct 2022 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/conf/icassp/GuoZML22.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/interspeech/GuoLWM22,
  author       = {Haohan Guo and
                  Hui Lu and
                  Xixin Wu and
                  Helen Meng},
  editor       = {Hanseok Ko and
                  John H. L. Hansen},
  title        = {A Multi-Scale Time-Frequency Spectrogram Discriminator for GAN-based
                  Non-Autoregressive {TTS}},
  booktitle    = {23rd Annual Conference of the International Speech Communication Association,
                  Interspeech 2022, Incheon, Korea, September 18-22, 2022},
  pages        = {1566--1570},
  publisher    = {{ISCA}},
  year         = {2022},
  url          = {https://doi.org/10.21437/Interspeech.2022-52},
  doi          = {10.21437/INTERSPEECH.2022-52},
  timestamp    = {Tue, 11 Jun 2024 16:45:43 +0200},
  biburl       = {https://dblp.org/rec/conf/interspeech/GuoLWM22.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/interspeech/GuoXSWM22,
  author       = {Haohan Guo and
                  Feng{-}Long Xie and
                  Frank K. Soong and
                  Xixin Wu and
                  Helen Meng},
  editor       = {Hanseok Ko and
                  John H. L. Hansen},
  title        = {A Multi-Stage Multi-Codebook {VQ-VAE} Approach to High-Performance
                  Neural {TTS}},
  booktitle    = {23rd Annual Conference of the International Speech Communication Association,
                  Interspeech 2022, Incheon, Korea, September 18-22, 2022},
  pages        = {1611--1615},
  publisher    = {{ISCA}},
  year         = {2022},
  url          = {https://doi.org/10.21437/Interspeech.2022-952},
  doi          = {10.21437/INTERSPEECH.2022-952},
  timestamp    = {Wed, 21 Jun 2023 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/conf/interspeech/GuoXSWM22.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/abs-2201-10130,
  author       = {Haohan Guo and
                  Zhiping Zhou and
                  Fanbo Meng and
                  Kai Liu},
  title        = {Improving Adversarial Waveform Generation based Singing Voice Conversion
                  with Harmonic Signals},
  journal      = {CoRR},
  volume       = {abs/2201.10130},
  year         = {2022},
  url          = {https://arxiv.org/abs/2201.10130},
  eprinttype    = {arXiv},
  eprint       = {2201.10130},
  timestamp    = {Tue, 01 Feb 2022 00:00:00 +0100},
  biburl       = {https://dblp.org/rec/journals/corr/abs-2201-10130.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/abs-2203-01080,
  author       = {Haohan Guo and
                  Hui Lu and
                  Xixin Wu and
                  Helen Meng},
  title        = {A Multi-Scale Time-Frequency Spectrogram Discriminator for GAN-based
                  Non-Autoregressive {TTS}},
  journal      = {CoRR},
  volume       = {abs/2203.01080},
  year         = {2022},
  url          = {https://doi.org/10.48550/arXiv.2203.01080},
  doi          = {10.48550/ARXIV.2203.01080},
  eprinttype    = {arXiv},
  eprint       = {2203.01080},
  timestamp    = {Wed, 16 Mar 2022 00:00:00 +0100},
  biburl       = {https://dblp.org/rec/journals/corr/abs-2203-01080.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/abs-2209-10887,
  author       = {Haohan Guo and
                  Feng{-}Long Xie and
                  Frank K. Soong and
                  Xixin Wu and
                  Helen Meng},
  title        = {A Multi-Stage Multi-Codebook {VQ-VAE} Approach to High-Performance
                  Neural {TTS}},
  journal      = {CoRR},
  volume       = {abs/2209.10887},
  year         = {2022},
  url          = {https://doi.org/10.48550/arXiv.2209.10887},
  doi          = {10.48550/ARXIV.2209.10887},
  eprinttype    = {arXiv},
  eprint       = {2209.10887},
  timestamp    = {Wed, 28 Sep 2022 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/corr/abs-2209-10887.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/abs-2210-15131,
  author       = {Haohan Guo and
                  Fenglong Xie and
                  Xixin Wu and
                  Hui Lu and
                  Helen Meng},
  title        = {Towards High-Quality Neural {TTS} for Low-Resource Languages by Learning
                  Compact Speech Representations},
  journal      = {CoRR},
  volume       = {abs/2210.15131},
  year         = {2022},
  url          = {https://doi.org/10.48550/arXiv.2210.15131},
  doi          = {10.48550/ARXIV.2210.15131},
  eprinttype    = {arXiv},
  eprint       = {2210.15131},
  timestamp    = {Wed, 02 Nov 2022 00:00:00 +0100},
  biburl       = {https://dblp.org/rec/journals/corr/abs-2210-15131.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/slt/GuoZSHX21,
  author       = {Haohan Guo and
                  Shaofei Zhang and
                  Frank K. Soong and
                  Lei He and
                  Lei Xie},
  title        = {Conversational End-to-End {TTS} for Voice Agents},
  booktitle    = {{IEEE} Spoken Language Technology Workshop, {SLT} 2021, Shenzhen,
                  China, January 19-22, 2021},
  pages        = {403--409},
  publisher    = {{IEEE}},
  year         = {2021},
  url          = {https://doi.org/10.1109/SLT48900.2021.9383460},
  doi          = {10.1109/SLT48900.2021.9383460},
  timestamp    = {Sun, 02 Oct 2022 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/conf/slt/GuoZSHX21.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/abs-2005-10438,
  author       = {Haohan Guo and
                  Shaofei Zhang and
                  Frank K. Soong and
                  Lei He and
                  Lei Xie},
  title        = {Conversational End-to-End {TTS} for Voice Agent},
  journal      = {CoRR},
  volume       = {abs/2005.10438},
  year         = {2020},
  url          = {https://arxiv.org/abs/2005.10438},
  eprinttype    = {arXiv},
  eprint       = {2005.10438},
  timestamp    = {Wed, 20 Jul 2022 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/corr/abs-2005-10438.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/abs-2012-01837,
  author       = {Haohan Guo and
                  Heng Lu and
                  Na Hu and
                  Chunlei Zhang and
                  Shan Yang and
                  Lei Xie and
                  Dan Su and
                  Dong Yu},
  title        = {Phonetic Posteriorgrams based Many-to-Many Singing Voice Conversion
                  via Adversarial Training},
  journal      = {CoRR},
  volume       = {abs/2012.01837},
  year         = {2020},
  url          = {https://arxiv.org/abs/2012.01837},
  eprinttype    = {arXiv},
  eprint       = {2012.01837},
  timestamp    = {Fri, 05 Nov 2021 00:00:00 +0100},
  biburl       = {https://dblp.org/rec/journals/corr/abs-2012-01837.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/interspeech/GuoSHX19,
  author       = {Haohan Guo and
                  Frank K. Soong and
                  Lei He and
                  Lei Xie},
  editor       = {Gernot Kubin and
                  Zdravko Kacic},
  title        = {A New GAN-Based End-to-End {TTS} Training Algorithm},
  booktitle    = {20th Annual Conference of the International Speech Communication Association,
                  Interspeech 2019, Graz, Austria, September 15-19, 2019},
  pages        = {1288--1292},
  publisher    = {{ISCA}},
  year         = {2019},
  url          = {https://doi.org/10.21437/Interspeech.2019-2176},
  doi          = {10.21437/INTERSPEECH.2019-2176},
  timestamp    = {Tue, 11 Jun 2024 16:45:43 +0200},
  biburl       = {https://dblp.org/rec/conf/interspeech/GuoSHX19.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/interspeech/GuoSHX19a,
  author       = {Haohan Guo and
                  Frank K. Soong and
                  Lei He and
                  Lei Xie},
  editor       = {Gernot Kubin and
                  Zdravko Kacic},
  title        = {Exploiting Syntactic Features in a Parsed Tree to Improve End-to-End
                  {TTS}},
  booktitle    = {20th Annual Conference of the International Speech Communication Association,
                  Interspeech 2019, Graz, Austria, September 15-19, 2019},
  pages        = {4460--4464},
  publisher    = {{ISCA}},
  year         = {2019},
  url          = {https://doi.org/10.21437/Interspeech.2019-2167},
  doi          = {10.21437/INTERSPEECH.2019-2167},
  timestamp    = {Sun, 02 Oct 2022 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/conf/interspeech/GuoSHX19a.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/abs-1901-00707,
  author       = {Huaiping Ming and
                  Lei He and
                  Haohan Guo and
                  Frank K. Soong},
  title        = {Feature reinforcement with word embedding and parsing information
                  in neural {TTS}},
  journal      = {CoRR},
  volume       = {abs/1901.00707},
  year         = {2019},
  url          = {http://arxiv.org/abs/1901.00707},
  eprinttype    = {arXiv},
  eprint       = {1901.00707},
  timestamp    = {Wed, 20 Jul 2022 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/corr/abs-1901-00707.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/abs-1904-04764,
  author       = {Haohan Guo and
                  Frank K. Soong and
                  Lei He and
                  Lei Xie},
  title        = {Exploiting Syntactic Features in a Parsed Tree to Improve End-to-End
                  {TTS}},
  journal      = {CoRR},
  volume       = {abs/1904.04764},
  year         = {2019},
  url          = {http://arxiv.org/abs/1904.04764},
  eprinttype    = {arXiv},
  eprint       = {1904.04764},
  timestamp    = {Wed, 20 Jul 2022 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/corr/abs-1904-04764.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/abs-1904-04775,
  author       = {Haohan Guo and
                  Frank K. Soong and
                  Lei He and
                  Lei Xie},
  title        = {A New GAN-based End-to-End {TTS} Training Algorithm},
  journal      = {CoRR},
  volume       = {abs/1904.04775},
  year         = {2019},
  url          = {http://arxiv.org/abs/1904.04775},
  eprinttype    = {arXiv},
  eprint       = {1904.04775},
  timestamp    = {Wed, 20 Jul 2022 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/corr/abs-1904-04775.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}