default search action
BibTeX records: Haohan Guo
@inproceedings{DBLP:conf/icassp/LuWGL0M24, author = {Hui Lu and Xixin Wu and Haohan Guo and Songxiang Liu and Zhiyong Wu and Helen Meng}, title = {Unifying One-Shot Voice Conversion and Cloning with Disentangled Speech Representations}, booktitle = {{IEEE} International Conference on Acoustics, Speech and Signal Processing, {ICASSP} 2024, Seoul, Republic of Korea, April 14-19, 2024}, pages = {11141--11145}, publisher = {{IEEE}}, year = {2024}, url = {https://doi.org/10.1109/ICASSP48485.2024.10446296}, doi = {10.1109/ICASSP48485.2024.10446296}, timestamp = {Wed, 07 Aug 2024 01:00:00 +0200}, biburl = {https://dblp.org/rec/conf/icassp/LuWGL0M24.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@inproceedings{DBLP:conf/icassp/0002MCGWLM24, author = {Jiawen Kang and Lingwei Meng and Mingyu Cui and Haohan Guo and Xixin Wu and Xunying Liu and Helen Meng}, title = {Cross-Speaker Encoding Network for Multi-Talker Speech Recognition}, booktitle = {{IEEE} International Conference on Acoustics, Speech and Signal Processing, {ICASSP} 2024, Seoul, Republic of Korea, April 14-19, 2024}, pages = {11986--11990}, publisher = {{IEEE}}, year = {2024}, url = {https://doi.org/10.1109/ICASSP48485.2024.10446249}, doi = {10.1109/ICASSP48485.2024.10446249}, timestamp = {Wed, 07 Aug 2024 01:00:00 +0200}, biburl = {https://dblp.org/rec/conf/icassp/0002MCGWLM24.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@inproceedings{DBLP:conf/icml/YangT0HLGCSZ0ZW24, author = {Dongchao Yang and Jinchuan Tian and Xu Tan and Rongjie Huang and Songxiang Liu and Haohan Guo and Xuankai Chang and Jiatong Shi and Sheng Zhao and Jiang Bian and Zhou Zhao and Xixin Wu and Helen M. Meng}, title = {UniAudio: Towards Universal Audio Generation with Large Language Models}, booktitle = {Forty-first International Conference on Machine Learning, {ICML} 2024, Vienna, Austria, July 21-27, 2024}, publisher = {OpenReview.net}, year = {2024}, url = {https://openreview.net/forum?id=SRmZw7nEGW}, timestamp = {Mon, 02 Sep 2024 16:45:29 +0200}, biburl = {https://dblp.org/rec/conf/icml/YangT0HLGCSZ0ZW24.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@article{DBLP:journals/corr/abs-2401-04152, author = {Jiawen Kang and Lingwei Meng and Mingyu Cui and Haohan Guo and Xixin Wu and Xunying Liu and Helen Meng}, title = {Cross-Speaker Encoding Network for Multi-Talker Speech Recognition}, journal = {CoRR}, volume = {abs/2401.04152}, year = {2024}, url = {https://doi.org/10.48550/arXiv.2401.04152}, doi = {10.48550/ARXIV.2401.04152}, eprinttype = {arXiv}, eprint = {2401.04152}, timestamp = {Tue, 02 Jul 2024 01:00:00 +0200}, biburl = {https://dblp.org/rec/journals/corr/abs-2401-04152.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@article{DBLP:journals/corr/abs-2402-08093, author = {Mateusz Lajszczak and Guillermo C{\'{a}}mbara and Yang Li and Fatih Beyhan and Arent van Korlaar and Fan Yang and Arnaud Joly and {\'{A}}lvaro Mart{\'{\i}}n{-}Cortinas and Ammar Abbas and Adam Michalski and Alexis Moinet and Sri Karlapati and Ewa Muszynska and Haohan Guo and Bartosz Putrycz and Soledad L{\'{o}}pez Gambino and Kayeon Yoo and Elena Sokolova and Thomas Drugman}, title = {{BASE} {TTS:} Lessons from building a billion-parameter Text-to-Speech model on 100K hours of data}, journal = {CoRR}, volume = {abs/2402.08093}, year = {2024}, url = {https://doi.org/10.48550/arXiv.2402.08093}, doi = {10.48550/ARXIV.2402.08093}, eprinttype = {arXiv}, eprint = {2402.08093}, timestamp = {Mon, 19 Feb 2024 00:00:00 +0100}, biburl = {https://dblp.org/rec/journals/corr/abs-2402-08093.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@article{DBLP:journals/corr/abs-2406-02328, author = {Dongchao Yang and Dingdong Wang and Haohan Guo and Xueyuan Chen and Xixin Wu and Helen Meng}, title = {SimpleSpeech: Towards Simple and Efficient Text-to-Speech with Scalar Latent Transformer Diffusion Models}, journal = {CoRR}, volume = {abs/2406.02328}, year = {2024}, url = {https://doi.org/10.48550/arXiv.2406.02328}, doi = {10.48550/ARXIV.2406.02328}, eprinttype = {arXiv}, eprint = {2406.02328}, timestamp = {Fri, 05 Jul 2024 01:00:00 +0200}, biburl = {https://dblp.org/rec/journals/corr/abs-2406-02328.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@article{DBLP:journals/corr/abs-2406-02940, author = {Haohan Guo and Fenglong Xie and Dongchao Yang and Hui Lu and Xixin Wu and Helen Meng}, title = {Addressing Index Collapse of Large-Codebook Speech Tokenizer with Dual-Decoding Product-Quantized Variational Auto-Encoder}, journal = {CoRR}, volume = {abs/2406.02940}, year = {2024}, url = {https://doi.org/10.48550/arXiv.2406.02940}, doi = {10.48550/ARXIV.2406.02940}, eprinttype = {arXiv}, eprint = {2406.02940}, timestamp = {Fri, 05 Jul 2024 01:00:00 +0200}, biburl = {https://dblp.org/rec/journals/corr/abs-2406-02940.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@article{DBLP:journals/corr/abs-2406-10056, author = {Dongchao Yang and Haohan Guo and Yuanyuan Wang and Rongjie Huang and Xiang Li and Xu Tan and Xixin Wu and Helen Meng}, title = {UniAudio 1.5: Large Language Model-driven Audio Codec is {A} Few-shot Audio Task Learner}, journal = {CoRR}, volume = {abs/2406.10056}, year = {2024}, url = {https://doi.org/10.48550/arXiv.2406.10056}, doi = {10.48550/ARXIV.2406.10056}, eprinttype = {arXiv}, eprint = {2406.10056}, timestamp = {Thu, 15 Aug 2024 01:00:00 +0200}, biburl = {https://dblp.org/rec/journals/corr/abs-2406-10056.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@article{DBLP:journals/taslp/GuoXWSM23, author = {Haohan Guo and Fenglong Xie and Xixin Wu and Frank K. Soong and Helen Meng}, title = {{MSMC-TTS:} Multi-Stage Multi-Codebook {VQ-VAE} Based Neural {TTS}}, journal = {{IEEE} {ACM} Trans. Audio Speech Lang. Process.}, volume = {31}, pages = {1811--1824}, year = {2023}, url = {https://doi.org/10.1109/TASLP.2023.3272470}, doi = {10.1109/TASLP.2023.3272470}, timestamp = {Fri, 02 Jun 2023 01:00:00 +0200}, biburl = {https://dblp.org/rec/journals/taslp/GuoXWSM23.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@article{DBLP:journals/corr/abs-2309-00126, author = {Haohan Guo and Fenglong Xie and Jiawen Kang and Yujia Xiao and Xixin Wu and Helen Meng}, title = {{QS-TTS:} Towards Semi-Supervised Text-to-Speech Synthesis via Vector-Quantized Self-Supervised Speech Representation Learning}, journal = {CoRR}, volume = {abs/2309.00126}, year = {2023}, url = {https://doi.org/10.48550/arXiv.2309.00126}, doi = {10.48550/ARXIV.2309.00126}, eprinttype = {arXiv}, eprint = {2309.00126}, timestamp = {Tue, 02 Jul 2024 01:00:00 +0200}, biburl = {https://dblp.org/rec/journals/corr/abs-2309-00126.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@inproceedings{DBLP:conf/icassp/GuoZML22, author = {Haohan Guo and Zhiping Zhou and Fanbo Meng and Kai Liu}, title = {Improving Adversarial Waveform Generation Based Singing Voice Conversion with Harmonic Signals}, booktitle = {{IEEE} International Conference on Acoustics, Speech and Signal Processing, {ICASSP} 2022, Virtual and Singapore, 23-27 May 2022}, pages = {6657--6661}, publisher = {{IEEE}}, year = {2022}, url = {https://doi.org/10.1109/ICASSP43922.2022.9746709}, doi = {10.1109/ICASSP43922.2022.9746709}, timestamp = {Sun, 02 Oct 2022 01:00:00 +0200}, biburl = {https://dblp.org/rec/conf/icassp/GuoZML22.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@inproceedings{DBLP:conf/interspeech/GuoLWM22, author = {Haohan Guo and Hui Lu and Xixin Wu and Helen Meng}, editor = {Hanseok Ko and John H. L. Hansen}, title = {A Multi-Scale Time-Frequency Spectrogram Discriminator for GAN-based Non-Autoregressive {TTS}}, booktitle = {23rd Annual Conference of the International Speech Communication Association, Interspeech 2022, Incheon, Korea, September 18-22, 2022}, pages = {1566--1570}, publisher = {{ISCA}}, year = {2022}, url = {https://doi.org/10.21437/Interspeech.2022-52}, doi = {10.21437/INTERSPEECH.2022-52}, timestamp = {Tue, 11 Jun 2024 16:45:43 +0200}, biburl = {https://dblp.org/rec/conf/interspeech/GuoLWM22.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@inproceedings{DBLP:conf/interspeech/GuoXSWM22, author = {Haohan Guo and Feng{-}Long Xie and Frank K. Soong and Xixin Wu and Helen Meng}, editor = {Hanseok Ko and John H. L. Hansen}, title = {A Multi-Stage Multi-Codebook {VQ-VAE} Approach to High-Performance Neural {TTS}}, booktitle = {23rd Annual Conference of the International Speech Communication Association, Interspeech 2022, Incheon, Korea, September 18-22, 2022}, pages = {1611--1615}, publisher = {{ISCA}}, year = {2022}, url = {https://doi.org/10.21437/Interspeech.2022-952}, doi = {10.21437/INTERSPEECH.2022-952}, timestamp = {Wed, 21 Jun 2023 01:00:00 +0200}, biburl = {https://dblp.org/rec/conf/interspeech/GuoXSWM22.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@article{DBLP:journals/corr/abs-2201-10130, author = {Haohan Guo and Zhiping Zhou and Fanbo Meng and Kai Liu}, title = {Improving Adversarial Waveform Generation based Singing Voice Conversion with Harmonic Signals}, journal = {CoRR}, volume = {abs/2201.10130}, year = {2022}, url = {https://arxiv.org/abs/2201.10130}, eprinttype = {arXiv}, eprint = {2201.10130}, timestamp = {Tue, 01 Feb 2022 00:00:00 +0100}, biburl = {https://dblp.org/rec/journals/corr/abs-2201-10130.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@article{DBLP:journals/corr/abs-2203-01080, author = {Haohan Guo and Hui Lu and Xixin Wu and Helen Meng}, title = {A Multi-Scale Time-Frequency Spectrogram Discriminator for GAN-based Non-Autoregressive {TTS}}, journal = {CoRR}, volume = {abs/2203.01080}, year = {2022}, url = {https://doi.org/10.48550/arXiv.2203.01080}, doi = {10.48550/ARXIV.2203.01080}, eprinttype = {arXiv}, eprint = {2203.01080}, timestamp = {Wed, 16 Mar 2022 00:00:00 +0100}, biburl = {https://dblp.org/rec/journals/corr/abs-2203-01080.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@article{DBLP:journals/corr/abs-2209-10887, author = {Haohan Guo and Feng{-}Long Xie and Frank K. Soong and Xixin Wu and Helen Meng}, title = {A Multi-Stage Multi-Codebook {VQ-VAE} Approach to High-Performance Neural {TTS}}, journal = {CoRR}, volume = {abs/2209.10887}, year = {2022}, url = {https://doi.org/10.48550/arXiv.2209.10887}, doi = {10.48550/ARXIV.2209.10887}, eprinttype = {arXiv}, eprint = {2209.10887}, timestamp = {Wed, 28 Sep 2022 01:00:00 +0200}, biburl = {https://dblp.org/rec/journals/corr/abs-2209-10887.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@article{DBLP:journals/corr/abs-2210-15131, author = {Haohan Guo and Fenglong Xie and Xixin Wu and Hui Lu and Helen Meng}, title = {Towards High-Quality Neural {TTS} for Low-Resource Languages by Learning Compact Speech Representations}, journal = {CoRR}, volume = {abs/2210.15131}, year = {2022}, url = {https://doi.org/10.48550/arXiv.2210.15131}, doi = {10.48550/ARXIV.2210.15131}, eprinttype = {arXiv}, eprint = {2210.15131}, timestamp = {Wed, 02 Nov 2022 00:00:00 +0100}, biburl = {https://dblp.org/rec/journals/corr/abs-2210-15131.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@inproceedings{DBLP:conf/slt/GuoZSHX21, author = {Haohan Guo and Shaofei Zhang and Frank K. Soong and Lei He and Lei Xie}, title = {Conversational End-to-End {TTS} for Voice Agents}, booktitle = {{IEEE} Spoken Language Technology Workshop, {SLT} 2021, Shenzhen, China, January 19-22, 2021}, pages = {403--409}, publisher = {{IEEE}}, year = {2021}, url = {https://doi.org/10.1109/SLT48900.2021.9383460}, doi = {10.1109/SLT48900.2021.9383460}, timestamp = {Sun, 02 Oct 2022 01:00:00 +0200}, biburl = {https://dblp.org/rec/conf/slt/GuoZSHX21.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@article{DBLP:journals/corr/abs-2005-10438, author = {Haohan Guo and Shaofei Zhang and Frank K. Soong and Lei He and Lei Xie}, title = {Conversational End-to-End {TTS} for Voice Agent}, journal = {CoRR}, volume = {abs/2005.10438}, year = {2020}, url = {https://arxiv.org/abs/2005.10438}, eprinttype = {arXiv}, eprint = {2005.10438}, timestamp = {Wed, 20 Jul 2022 01:00:00 +0200}, biburl = {https://dblp.org/rec/journals/corr/abs-2005-10438.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@article{DBLP:journals/corr/abs-2012-01837, author = {Haohan Guo and Heng Lu and Na Hu and Chunlei Zhang and Shan Yang and Lei Xie and Dan Su and Dong Yu}, title = {Phonetic Posteriorgrams based Many-to-Many Singing Voice Conversion via Adversarial Training}, journal = {CoRR}, volume = {abs/2012.01837}, year = {2020}, url = {https://arxiv.org/abs/2012.01837}, eprinttype = {arXiv}, eprint = {2012.01837}, timestamp = {Fri, 05 Nov 2021 00:00:00 +0100}, biburl = {https://dblp.org/rec/journals/corr/abs-2012-01837.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@inproceedings{DBLP:conf/interspeech/GuoSHX19, author = {Haohan Guo and Frank K. Soong and Lei He and Lei Xie}, editor = {Gernot Kubin and Zdravko Kacic}, title = {A New GAN-Based End-to-End {TTS} Training Algorithm}, booktitle = {20th Annual Conference of the International Speech Communication Association, Interspeech 2019, Graz, Austria, September 15-19, 2019}, pages = {1288--1292}, publisher = {{ISCA}}, year = {2019}, url = {https://doi.org/10.21437/Interspeech.2019-2176}, doi = {10.21437/INTERSPEECH.2019-2176}, timestamp = {Tue, 11 Jun 2024 16:45:43 +0200}, biburl = {https://dblp.org/rec/conf/interspeech/GuoSHX19.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@inproceedings{DBLP:conf/interspeech/GuoSHX19a, author = {Haohan Guo and Frank K. Soong and Lei He and Lei Xie}, editor = {Gernot Kubin and Zdravko Kacic}, title = {Exploiting Syntactic Features in a Parsed Tree to Improve End-to-End {TTS}}, booktitle = {20th Annual Conference of the International Speech Communication Association, Interspeech 2019, Graz, Austria, September 15-19, 2019}, pages = {4460--4464}, publisher = {{ISCA}}, year = {2019}, url = {https://doi.org/10.21437/Interspeech.2019-2167}, doi = {10.21437/INTERSPEECH.2019-2167}, timestamp = {Sun, 02 Oct 2022 01:00:00 +0200}, biburl = {https://dblp.org/rec/conf/interspeech/GuoSHX19a.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@article{DBLP:journals/corr/abs-1901-00707, author = {Huaiping Ming and Lei He and Haohan Guo and Frank K. Soong}, title = {Feature reinforcement with word embedding and parsing information in neural {TTS}}, journal = {CoRR}, volume = {abs/1901.00707}, year = {2019}, url = {http://arxiv.org/abs/1901.00707}, eprinttype = {arXiv}, eprint = {1901.00707}, timestamp = {Wed, 20 Jul 2022 01:00:00 +0200}, biburl = {https://dblp.org/rec/journals/corr/abs-1901-00707.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@article{DBLP:journals/corr/abs-1904-04764, author = {Haohan Guo and Frank K. Soong and Lei He and Lei Xie}, title = {Exploiting Syntactic Features in a Parsed Tree to Improve End-to-End {TTS}}, journal = {CoRR}, volume = {abs/1904.04764}, year = {2019}, url = {http://arxiv.org/abs/1904.04764}, eprinttype = {arXiv}, eprint = {1904.04764}, timestamp = {Wed, 20 Jul 2022 01:00:00 +0200}, biburl = {https://dblp.org/rec/journals/corr/abs-1904-04764.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
@article{DBLP:journals/corr/abs-1904-04775, author = {Haohan Guo and Frank K. Soong and Lei He and Lei Xie}, title = {A New GAN-based End-to-End {TTS} Training Algorithm}, journal = {CoRR}, volume = {abs/1904.04775}, year = {2019}, url = {http://arxiv.org/abs/1904.04775}, eprinttype = {arXiv}, eprint = {1904.04775}, timestamp = {Wed, 20 Jul 2022 01:00:00 +0200}, biburl = {https://dblp.org/rec/journals/corr/abs-1904-04775.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} }
manage site settings
To protect your privacy, all features that rely on external API calls from your browser are turned off by default. You need to opt-in for them to become active. All settings here will be stored as cookies with your web browser. For more information see our F.A.Q.