BibTeX records: Michael L. Seltzer

download as .bib file

@inproceedings{DBLP:conf/icassp/LeSWLSKS23,
  author       = {Duc Le and
                  Frank Seide and
                  Yuhao Wang and
                  Yang Li and
                  Kjell Schubert and
                  Ozlem Kalinli and
                  Michael L. Seltzer},
  title        = {Factorized Blank Thresholding for Improved Runtime Efficiency of Neural
                  Transducers},
  booktitle    = {{IEEE} International Conference on Acoustics, Speech and Signal Processing
                  {ICASSP} 2023, Rhodes Island, Greece, June 4-10, 2023},
  pages        = {1--5},
  publisher    = {{IEEE}},
  year         = {2023},
  url          = {https://doi.org/10.1109/ICASSP49357.2023.10095845},
  doi          = {10.1109/ICASSP49357.2023.10095845},
  timestamp    = {Fri, 01 Dec 2023 00:00:00 +0100},
  biburl       = {https://dblp.org/rec/conf/icassp/LeSWLSKS23.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/icassp/LiMGSKKSL23,
  author       = {Ke Li and
                  Jay Mahadeokar and
                  Jinxi Guo and
                  Yangyang Shi and
                  Gil Keren and
                  Ozlem Kalinli and
                  Michael L. Seltzer and
                  Duc Le},
  title        = {Improving fast-slow Encoder based Transducer with Streaming Deliberation},
  booktitle    = {{IEEE} International Conference on Acoustics, Speech and Signal Processing
                  {ICASSP} 2023, Rhodes Island, Greece, June 4-10, 2023},
  pages        = {1--5},
  publisher    = {{IEEE}},
  year         = {2023},
  url          = {https://doi.org/10.1109/ICASSP49357.2023.10095651},
  doi          = {10.1109/ICASSP49357.2023.10095651},
  timestamp    = {Sun, 05 Nov 2023 00:00:00 +0100},
  biburl       = {https://dblp.org/rec/conf/icassp/LiMGSKKSL23.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/icassp/TjandraSZKMLS23,
  author       = {Andros Tjandra and
                  Nayan Singhal and
                  David Zhang and
                  Ozlem Kalinli and
                  Abdelrahman Mohamed and
                  Duc Le and
                  Michael L. Seltzer},
  title        = {Massively Multilingual {ASR} on 70 Languages: Tokenization, Architecture,
                  and Generalization Capabilities},
  booktitle    = {{IEEE} International Conference on Acoustics, Speech and Signal Processing
                  {ICASSP} 2023, Rhodes Island, Greece, June 4-10, 2023},
  pages        = {1--5},
  publisher    = {{IEEE}},
  year         = {2023},
  url          = {https://doi.org/10.1109/ICASSP49357.2023.10094667},
  doi          = {10.1109/ICASSP49357.2023.10094667},
  timestamp    = {Sun, 05 Nov 2023 00:00:00 +0100},
  biburl       = {https://dblp.org/rec/conf/icassp/TjandraSZKMLS23.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/abs-2307-12134,
  author       = {Suyoun Kim and
                  Akshat Shrivastava and
                  Duc Le and
                  Ju Lin and
                  Ozlem Kalinli and
                  Michael L. Seltzer},
  title        = {Modality Confidence Aware Training for Robust End-to-End Spoken Language
                  Understanding},
  journal      = {CoRR},
  volume       = {abs/2307.12134},
  year         = {2023},
  url          = {https://doi.org/10.48550/arXiv.2307.12134},
  doi          = {10.48550/ARXIV.2307.12134},
  eprinttype    = {arXiv},
  eprint       = {2307.12134},
  timestamp    = {Tue, 01 Aug 2023 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/corr/abs-2307-12134.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/abs-2309-09390,
  author       = {Roshan Sharma and
                  Suyoun Kim and
                  Daniel Lazar and
                  Trang Le and
                  Akshat Shrivastava and
                  Kwanghoon Ahn and
                  Piyush Kansal and
                  Leda Sari and
                  Ozlem Kalinli and
                  Michael L. Seltzer},
  title        = {Augmenting text for spoken language understanding with Large Language
                  Models},
  journal      = {CoRR},
  volume       = {abs/2309.09390},
  year         = {2023},
  url          = {https://doi.org/10.48550/arXiv.2309.09390},
  doi          = {10.48550/ARXIV.2309.09390},
  eprinttype    = {arXiv},
  eprint       = {2309.09390},
  timestamp    = {Fri, 22 Sep 2023 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/corr/abs-2309-09390.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/abs-2309-10917,
  author       = {Egor Lakomkin and
                  Chunyang Wu and
                  Yassir Fathullah and
                  Ozlem Kalinli and
                  Michael L. Seltzer and
                  Christian Fuegen},
  title        = {End-to-End Speech Recognition Contextualization with Large Language
                  Models},
  journal      = {CoRR},
  volume       = {abs/2309.10917},
  year         = {2023},
  url          = {https://doi.org/10.48550/arXiv.2309.10917},
  doi          = {10.48550/ARXIV.2309.10917},
  eprinttype    = {arXiv},
  eprint       = {2309.10917},
  timestamp    = {Tue, 26 Sep 2023 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/corr/abs-2309-10917.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/icassp/BruguierLPLLWCP22,
  author       = {Antoine Bruguier and
                  Duc Le and
                  Rohit Prabhavalkar and
                  Dangna Li and
                  Zhe Liu and
                  Bo Wang and
                  Eun Chang and
                  Fuchun Peng and
                  Ozlem Kalinli and
                  Michael L. Seltzer},
  title        = {Neural-FST Class Language Model for End-to-End Speech Recognition},
  booktitle    = {{IEEE} International Conference on Acoustics, Speech and Signal Processing,
                  {ICASSP} 2022, Virtual and Singapore, 23-27 May 2022},
  pages        = {6107--6111},
  publisher    = {{IEEE}},
  year         = {2022},
  url          = {https://doi.org/10.1109/ICASSP43922.2022.9747573},
  doi          = {10.1109/ICASSP43922.2022.9747573},
  timestamp    = {Wed, 05 Oct 2022 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/conf/icassp/BruguierLPLLWCP22.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/interspeech/MahadeokarSLLZC22,
  author       = {Jay Mahadeokar and
                  Yangyang Shi and
                  Ke Li and
                  Duc Le and
                  Jiedan Zhu and
                  Vikas Chandra and
                  Ozlem Kalinli and
                  Michael L. Seltzer},
  editor       = {Hanseok Ko and
                  John H. L. Hansen},
  title        = {Streaming parallel transducer beam search with fast slow cascaded
                  encoders},
  booktitle    = {Interspeech 2022, 23rd Annual Conference of the International Speech
                  Communication Association, Incheon, Korea, 18-22 September 2022},
  pages        = {2083--2087},
  publisher    = {{ISCA}},
  year         = {2022},
  url          = {https://doi.org/10.21437/Interspeech.2022-10551},
  doi          = {10.21437/INTERSPEECH.2022-10551},
  timestamp    = {Wed, 21 Jun 2023 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/conf/interspeech/MahadeokarSLLZC22.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/interspeech/LeSTKLKS22,
  author       = {Duc Le and
                  Akshat Shrivastava and
                  Paden D. Tomasello and
                  Suyoun Kim and
                  Aleksandr Livshits and
                  Ozlem Kalinli and
                  Michael L. Seltzer},
  editor       = {Hanseok Ko and
                  John H. L. Hansen},
  title        = {Deliberation Model for On-Device Spoken Language Understanding},
  booktitle    = {Interspeech 2022, 23rd Annual Conference of the International Speech
                  Communication Association, Incheon, Korea, 18-22 September 2022},
  pages        = {3468--3472},
  publisher    = {{ISCA}},
  year         = {2022},
  url          = {https://doi.org/10.21437/Interspeech.2022-10800},
  doi          = {10.21437/INTERSPEECH.2022-10800},
  timestamp    = {Wed, 21 Jun 2023 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/conf/interspeech/LeSTKLKS22.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/interspeech/KimLZSAZFKS22,
  author       = {Suyoun Kim and
                  Duc Le and
                  Weiyi Zheng and
                  Tarun Singh and
                  Abhinav Arora and
                  Xiaoyu Zhai and
                  Christian Fuegen and
                  Ozlem Kalinli and
                  Michael L. Seltzer},
  editor       = {Hanseok Ko and
                  John H. L. Hansen},
  title        = {Evaluating User Perception of Speech Recognition System Quality with
                  Semantic Distance Metric},
  booktitle    = {Interspeech 2022, 23rd Annual Conference of the International Speech
                  Communication Association, Incheon, Korea, 18-22 September 2022},
  pages        = {3978--3982},
  publisher    = {{ISCA}},
  year         = {2022},
  url          = {https://doi.org/10.21437/Interspeech.2022-11144},
  doi          = {10.21437/INTERSPEECH.2022-11144},
  timestamp    = {Wed, 21 Jun 2023 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/conf/interspeech/KimLZSAZFKS22.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/abs-2201-11867,
  author       = {Antoine Bruguier and
                  Duc Le and
                  Rohit Prabhavalkar and
                  Dangna Li and
                  Zhe Liu and
                  Bo Wang and
                  Eun Chang and
                  Fuchun Peng and
                  Ozlem Kalinli and
                  Michael L. Seltzer},
  title        = {Neural-FST Class Language Model for End-to-End Speech Recognition},
  journal      = {CoRR},
  volume       = {abs/2201.11867},
  year         = {2022},
  url          = {https://arxiv.org/abs/2201.11867},
  eprinttype    = {arXiv},
  eprint       = {2201.11867},
  timestamp    = {Wed, 05 Oct 2022 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/corr/abs-2201-11867.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/abs-2203-15773,
  author       = {Jay Mahadeokar and
                  Yangyang Shi and
                  Ke Li and
                  Duc Le and
                  Jiedan Zhu and
                  Vikas Chandra and
                  Ozlem Kalinli and
                  Michael L. Seltzer},
  title        = {Streaming parallel transducer beam search with fast-slow cascaded
                  encoders},
  journal      = {CoRR},
  volume       = {abs/2203.15773},
  year         = {2022},
  url          = {https://doi.org/10.48550/arXiv.2203.15773},
  doi          = {10.48550/ARXIV.2203.15773},
  eprinttype    = {arXiv},
  eprint       = {2203.15773},
  timestamp    = {Mon, 04 Apr 2022 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/corr/abs-2203-15773.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/abs-2204-01893,
  author       = {Duc Le and
                  Akshat Shrivastava and
                  Paden Tomasello and
                  Suyoun Kim and
                  Aleksandr Livshits and
                  Ozlem Kalinli and
                  Michael L. Seltzer},
  title        = {Deliberation Model for On-Device Spoken Language Understanding},
  journal      = {CoRR},
  volume       = {abs/2204.01893},
  year         = {2022},
  url          = {https://doi.org/10.48550/arXiv.2204.01893},
  doi          = {10.48550/ARXIV.2204.01893},
  eprinttype    = {arXiv},
  eprint       = {2204.01893},
  timestamp    = {Wed, 06 Apr 2022 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/corr/abs-2204-01893.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/abs-2211-00896,
  author       = {Duc Le and
                  Frank Seide and
                  Yuhao Wang and
                  Yang Li and
                  Kjell Schubert and
                  Ozlem Kalinli and
                  Michael L. Seltzer},
  title        = {Factorized Blank Thresholding for Improved Runtime Efficiency of Neural
                  Transducers},
  journal      = {CoRR},
  volume       = {abs/2211.00896},
  year         = {2022},
  url          = {https://doi.org/10.48550/arXiv.2211.00896},
  doi          = {10.48550/ARXIV.2211.00896},
  eprinttype    = {arXiv},
  eprint       = {2211.00896},
  timestamp    = {Tue, 04 Jul 2023 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/corr/abs-2211-00896.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/abs-2211-05756,
  author       = {Andros Tjandra and
                  Nayan Singhal and
                  David Zhang and
                  Ozlem Kalinli and
                  Abdelrahman Mohamed and
                  Duc Le and
                  Michael L. Seltzer},
  title        = {Massively Multilingual {ASR} on 70 Languages: Tokenization, Architecture,
                  and Generalization Capabilities},
  journal      = {CoRR},
  volume       = {abs/2211.05756},
  year         = {2022},
  url          = {https://doi.org/10.48550/arXiv.2211.05756},
  doi          = {10.48550/ARXIV.2211.05756},
  eprinttype    = {arXiv},
  eprint       = {2211.05756},
  timestamp    = {Tue, 15 Nov 2022 00:00:00 +0100},
  biburl       = {https://dblp.org/rec/journals/corr/abs-2211-05756.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/icassp/KimSMBFSL21,
  author       = {Suyoun Kim and
                  Yuan Shangguan and
                  Jay Mahadeokar and
                  Antoine Bruguier and
                  Christian Fuegen and
                  Michael L. Seltzer and
                  Duc Le},
  title        = {Improved Neural Language Model Fusion for Streaming Recurrent Neural
                  Network Transducer},
  booktitle    = {{IEEE} International Conference on Acoustics, Speech and Signal Processing,
                  {ICASSP} 2021, Toronto, ON, Canada, June 6-11, 2021},
  pages        = {7333--7337},
  publisher    = {{IEEE}},
  year         = {2021},
  url          = {https://doi.org/10.1109/ICASSP39728.2021.9414784},
  doi          = {10.1109/ICASSP39728.2021.9414784},
  timestamp    = {Fri, 09 Jul 2021 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/conf/icassp/KimSMBFSL21.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/icassp/VenkateshVMSFSC21,
  author       = {Ganesh Venkatesh and
                  Alagappan Valliappan and
                  Jay Mahadeokar and
                  Yuan Shangguan and
                  Christian Fuegen and
                  Michael L. Seltzer and
                  Vikas Chandra},
  title        = {Memory-Efficient Speech Recognition on Smart Devices},
  booktitle    = {{IEEE} International Conference on Acoustics, Speech and Signal Processing,
                  {ICASSP} 2021, Toronto, ON, Canada, June 6-11, 2021},
  pages        = {8368--8372},
  publisher    = {{IEEE}},
  year         = {2021},
  url          = {https://doi.org/10.1109/ICASSP39728.2021.9414502},
  doi          = {10.1109/ICASSP39728.2021.9414502},
  timestamp    = {Fri, 09 Jul 2021 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/conf/icassp/VenkateshVMSFSC21.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/interspeech/LeJKKSMCSFKSS21,
  author       = {Duc Le and
                  Mahaveer Jain and
                  Gil Keren and
                  Suyoun Kim and
                  Yangyang Shi and
                  Jay Mahadeokar and
                  Julian Chan and
                  Yuan Shangguan and
                  Christian Fuegen and
                  Ozlem Kalinli and
                  Yatharth Saraf and
                  Michael L. Seltzer},
  editor       = {Hynek Hermansky and
                  Honza Cernock{\'{y}} and
                  Luk{\'{a}}s Burget and
                  Lori Lamel and
                  Odette Scharenborg and
                  Petr Motl{\'{\i}}cek},
  title        = {Contextualized Streaming End-to-End Speech Recognition with Trie-Based
                  Deep Biasing and Shallow Fusion},
  booktitle    = {Interspeech 2021, 22nd Annual Conference of the International Speech
                  Communication Association, Brno, Czechia, 30 August - 3 September
                  2021},
  pages        = {1772--1776},
  publisher    = {{ISCA}},
  year         = {2021},
  url          = {https://doi.org/10.21437/Interspeech.2021-1566},
  doi          = {10.21437/INTERSPEECH.2021-1566},
  timestamp    = {Wed, 21 Jun 2023 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/conf/interspeech/LeJKKSMCSFKSS21.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/interspeech/KimALYFKS21,
  author       = {Suyoun Kim and
                  Abhinav Arora and
                  Duc Le and
                  Ching{-}Feng Yeh and
                  Christian Fuegen and
                  Ozlem Kalinli and
                  Michael L. Seltzer},
  editor       = {Hynek Hermansky and
                  Honza Cernock{\'{y}} and
                  Luk{\'{a}}s Burget and
                  Lori Lamel and
                  Odette Scharenborg and
                  Petr Motl{\'{\i}}cek},
  title        = {Semantic Distance: {A} New Metric for {ASR} Performance Analysis Towards
                  Spoken Language Understanding},
  booktitle    = {Interspeech 2021, 22nd Annual Conference of the International Speech
                  Communication Association, Brno, Czechia, 30 August - 3 September
                  2021},
  pages        = {1977--1981},
  publisher    = {{ISCA}},
  year         = {2021},
  url          = {https://doi.org/10.21437/Interspeech.2021-1929},
  doi          = {10.21437/INTERSPEECH.2021-1929},
  timestamp    = {Wed, 21 Jun 2023 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/conf/interspeech/KimALYFKS21.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/interspeech/ShiNWMLPXYCFKS21,
  author       = {Yangyang Shi and
                  Varun Nagaraja and
                  Chunyang Wu and
                  Jay Mahadeokar and
                  Duc Le and
                  Rohit Prabhavalkar and
                  Alex Xiao and
                  Ching{-}Feng Yeh and
                  Julian Chan and
                  Christian Fuegen and
                  Ozlem Kalinli and
                  Michael L. Seltzer},
  editor       = {Hynek Hermansky and
                  Honza Cernock{\'{y}} and
                  Luk{\'{a}}s Burget and
                  Lori Lamel and
                  Odette Scharenborg and
                  Petr Motl{\'{\i}}cek},
  title        = {Dynamic Encoder Transducer: {A} Flexible Solution for Trading Off
                  Accuracy for Latency},
  booktitle    = {Interspeech 2021, 22nd Annual Conference of the International Speech
                  Communication Association, Brno, Czechia, 30 August - 3 September
                  2021},
  pages        = {2042--2046},
  publisher    = {{ISCA}},
  year         = {2021},
  url          = {https://doi.org/10.21437/Interspeech.2021-1272},
  doi          = {10.21437/INTERSPEECH.2021-1272},
  timestamp    = {Wed, 21 Jun 2023 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/conf/interspeech/ShiNWMLPXYCFKS21.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/interspeech/MahadeokarSSWXS21,
  author       = {Jay Mahadeokar and
                  Yangyang Shi and
                  Yuan Shangguan and
                  Chunyang Wu and
                  Alex Xiao and
                  Hang Su and
                  Duc Le and
                  Ozlem Kalinli and
                  Christian Fuegen and
                  Michael L. Seltzer},
  editor       = {Hynek Hermansky and
                  Honza Cernock{\'{y}} and
                  Luk{\'{a}}s Burget and
                  Lori Lamel and
                  Odette Scharenborg and
                  Petr Motl{\'{\i}}cek},
  title        = {Flexi-Transducer: Optimizing Latency, Accuracy and Compute for Multi-Domain
                  On-Device Scenarios},
  booktitle    = {Interspeech 2021, 22nd Annual Conference of the International Speech
                  Communication Association, Brno, Czechia, 30 August - 3 September
                  2021},
  pages        = {2107--2111},
  publisher    = {{ISCA}},
  year         = {2021},
  url          = {https://doi.org/10.21437/Interspeech.2021-1921},
  doi          = {10.21437/INTERSPEECH.2021-1921},
  timestamp    = {Wed, 21 Jun 2023 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/conf/interspeech/MahadeokarSSWXS21.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/interspeech/ShangguanPSMSZW21,
  author       = {Yuan Shangguan and
                  Rohit Prabhavalkar and
                  Hang Su and
                  Jay Mahadeokar and
                  Yangyang Shi and
                  Jiatong Zhou and
                  Chunyang Wu and
                  Duc Le and
                  Ozlem Kalinli and
                  Christian Fuegen and
                  Michael L. Seltzer},
  editor       = {Hynek Hermansky and
                  Honza Cernock{\'{y}} and
                  Luk{\'{a}}s Burget and
                  Lori Lamel and
                  Odette Scharenborg and
                  Petr Motl{\'{\i}}cek},
  title        = {Dissecting User-Perceived Latency of On-Device {E2E} Speech Recognition},
  booktitle    = {Interspeech 2021, 22nd Annual Conference of the International Speech
                  Communication Association, Brno, Czechia, 30 August - 3 September
                  2021},
  pages        = {4553--4557},
  publisher    = {{ISCA}},
  year         = {2021},
  url          = {https://doi.org/10.21437/Interspeech.2021-1887},
  doi          = {10.21437/INTERSPEECH.2021-1887},
  timestamp    = {Wed, 21 Jun 2023 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/conf/interspeech/ShangguanPSMSZW21.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/interspeech/NagarajaSVKSC21,
  author       = {Varun Nagaraja and
                  Yangyang Shi and
                  Ganesh Venkatesh and
                  Ozlem Kalinli and
                  Michael L. Seltzer and
                  Vikas Chandra},
  editor       = {Hynek Hermansky and
                  Honza Cernock{\'{y}} and
                  Luk{\'{a}}s Burget and
                  Lori Lamel and
                  Odette Scharenborg and
                  Petr Motl{\'{\i}}cek},
  title        = {Collaborative Training of Acoustic Encoders for Speech Recognition},
  booktitle    = {Interspeech 2021, 22nd Annual Conference of the International Speech
                  Communication Association, Brno, Czechia, 30 August - 3 September
                  2021},
  pages        = {4573--4577},
  publisher    = {{ISCA}},
  year         = {2021},
  url          = {https://doi.org/10.21437/Interspeech.2021-354},
  doi          = {10.21437/INTERSPEECH.2021-354},
  timestamp    = {Wed, 21 Jun 2023 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/conf/interspeech/NagarajaSVKSC21.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/slt/YehWSW0CS21,
  author       = {Ching{-}Feng Yeh and
                  Yongqiang Wang and
                  Yangyang Shi and
                  Chunyang Wu and
                  Frank Zhang and
                  Julian Chan and
                  Michael L. Seltzer},
  title        = {Streaming Attention-Based Models with Augmented Memory for End-To-End
                  Speech Recognition},
  booktitle    = {{IEEE} Spoken Language Technology Workshop, {SLT} 2021, Shenzhen,
                  China, January 19-22, 2021},
  pages        = {8--14},
  publisher    = {{IEEE}},
  year         = {2021},
  url          = {https://doi.org/10.1109/SLT48900.2021.9383504},
  doi          = {10.1109/SLT48900.2021.9383504},
  timestamp    = {Wed, 02 Nov 2022 00:00:00 +0100},
  biburl       = {https://dblp.org/rec/conf/slt/YehWSW0CS21.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/slt/MahadeokarSLKSL21,
  author       = {Jay Mahadeokar and
                  Yuan Shangguan and
                  Duc Le and
                  Gil Keren and
                  Hang Su and
                  Thong Le and
                  Ching{-}Feng Yeh and
                  Christian Fuegen and
                  Michael L. Seltzer},
  title        = {Alignment Restricted Streaming Recurrent Neural Network Transducer},
  booktitle    = {{IEEE} Spoken Language Technology Workshop, {SLT} 2021, Shenzhen,
                  China, January 19-22, 2021},
  pages        = {52--59},
  publisher    = {{IEEE}},
  year         = {2021},
  url          = {https://doi.org/10.1109/SLT48900.2021.9383606},
  doi          = {10.1109/SLT48900.2021.9383606},
  timestamp    = {Thu, 08 Apr 2021 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/conf/slt/MahadeokarSLKSL21.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/slt/LeKCMFS21,
  author       = {Duc Le and
                  Gil Keren and
                  Julian Chan and
                  Jay Mahadeokar and
                  Christian Fuegen and
                  Michael L. Seltzer},
  title        = {Deep Shallow Fusion for {RNN-T} Personalization},
  booktitle    = {{IEEE} Spoken Language Technology Workshop, {SLT} 2021, Shenzhen,
                  China, January 19-22, 2021},
  pages        = {251--257},
  publisher    = {{IEEE}},
  year         = {2021},
  url          = {https://doi.org/10.1109/SLT48900.2021.9383560},
  doi          = {10.1109/SLT48900.2021.9383560},
  timestamp    = {Thu, 08 Apr 2021 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/conf/slt/LeKCMFS21.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/abs-2102-11531,
  author       = {Ganesh Venkatesh and
                  Alagappan Valliappan and
                  Jay Mahadeokar and
                  Yuan Shangguan and
                  Christian Fuegen and
                  Michael L. Seltzer and
                  Vikas Chandra},
  title        = {Memory-efficient Speech Recognition on Smart Devices},
  journal      = {CoRR},
  volume       = {abs/2102.11531},
  year         = {2021},
  url          = {https://arxiv.org/abs/2102.11531},
  eprinttype    = {arXiv},
  eprint       = {2102.11531},
  timestamp    = {Wed, 24 Feb 2021 00:00:00 +0100},
  biburl       = {https://dblp.org/rec/journals/corr/abs-2102-11531.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/abs-2104-02138,
  author       = {Suyoun Kim and
                  Abhinav Arora and
                  Duc Le and
                  Ching{-}Feng Yeh and
                  Christian Fuegen and
                  Ozlem Kalinli and
                  Michael L. Seltzer},
  title        = {Semantic Distance: {A} New Metric for {ASR} Performance Analysis Towards
                  Spoken Language Understanding},
  journal      = {CoRR},
  volume       = {abs/2104.02138},
  year         = {2021},
  url          = {https://arxiv.org/abs/2104.02138},
  eprinttype    = {arXiv},
  eprint       = {2104.02138},
  timestamp    = {Mon, 12 Apr 2021 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/corr/abs-2104-02138.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/abs-2104-02176,
  author       = {Yangyang Shi and
                  Varun Nagaraja and
                  Chunyang Wu and
                  Jay Mahadeokar and
                  Duc Le and
                  Rohit Prabhavalkar and
                  Alex Xiao and
                  Ching{-}Feng Yeh and
                  Julian Chan and
                  Christian Fuegen and
                  Ozlem Kalinli and
                  Michael L. Seltzer},
  title        = {Dynamic Encoder Transducer: {A} Flexible Solution For Trading Off
                  Accuracy For Latency},
  journal      = {CoRR},
  volume       = {abs/2104.02176},
  year         = {2021},
  url          = {https://arxiv.org/abs/2104.02176},
  eprinttype    = {arXiv},
  eprint       = {2104.02176},
  timestamp    = {Mon, 12 Apr 2021 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/corr/abs-2104-02176.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/abs-2104-02194,
  author       = {Duc Le and
                  Mahaveer Jain and
                  Gil Keren and
                  Suyoun Kim and
                  Yangyang Shi and
                  Jay Mahadeokar and
                  Julian Chan and
                  Yuan Shangguan and
                  Christian Fuegen and
                  Ozlem Kalinli and
                  Yatharth Saraf and
                  Michael L. Seltzer},
  title        = {Contextualized Streaming End-to-End Speech Recognition with Trie-Based
                  Deep Biasing and Shallow Fusion},
  journal      = {CoRR},
  volume       = {abs/2104.02194},
  year         = {2021},
  url          = {https://arxiv.org/abs/2104.02194},
  eprinttype    = {arXiv},
  eprint       = {2104.02194},
  timestamp    = {Mon, 12 Apr 2021 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/corr/abs-2104-02194.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/abs-2104-02207,
  author       = {Yuan Shangguan and
                  Rohit Prabhavalkar and
                  Hang Su and
                  Jay Mahadeokar and
                  Yangyang Shi and
                  Jiatong Zhou and
                  Chunyang Wu and
                  Duc Le and
                  Ozlem Kalinli and
                  Christian Fuegen and
                  Michael L. Seltzer},
  title        = {Dissecting User-Perceived Latency of On-Device {E2E} Speech Recognition},
  journal      = {CoRR},
  volume       = {abs/2104.02207},
  year         = {2021},
  url          = {https://arxiv.org/abs/2104.02207},
  eprinttype    = {arXiv},
  eprint       = {2104.02207},
  timestamp    = {Mon, 12 Apr 2021 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/corr/abs-2104-02207.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/abs-2104-02232,
  author       = {Jay Mahadeokar and
                  Yangyang Shi and
                  Yuan Shangguan and
                  Chunyang Wu and
                  Alex Xiao and
                  Hang Su and
                  Duc Le and
                  Ozlem Kalinli and
                  Christian Fuegen and
                  Michael L. Seltzer},
  title        = {Flexi-Transducer: Optimizing Latency, Accuracy and Compute forMulti-Domain
                  On-Device Scenarios},
  journal      = {CoRR},
  volume       = {abs/2104.02232},
  year         = {2021},
  url          = {https://arxiv.org/abs/2104.02232},
  eprinttype    = {arXiv},
  eprint       = {2104.02232},
  timestamp    = {Mon, 12 Apr 2021 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/corr/abs-2104-02232.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/abs-2106-08960,
  author       = {Varun Nagaraja and
                  Yangyang Shi and
                  Ganesh Venkatesh and
                  Ozlem Kalinli and
                  Michael L. Seltzer and
                  Vikas Chandra},
  title        = {Collaborative Training of Acoustic Encoders for Speech Recognition},
  journal      = {CoRR},
  volume       = {abs/2106.08960},
  year         = {2021},
  url          = {https://arxiv.org/abs/2106.08960},
  eprinttype    = {arXiv},
  eprint       = {2106.08960},
  timestamp    = {Tue, 29 Jun 2021 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/corr/abs-2106-08960.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/abs-2110-05376,
  author       = {Suyoun Kim and
                  Duc Le and
                  Weiyi Zheng and
                  Tarun Singh and
                  Abhinav Arora and
                  Xiaoyu Zhai and
                  Christian Fuegen and
                  Ozlem Kalinli and
                  Michael L. Seltzer},
  title        = {Evaluating User Perception of Speech Recognition System Quality with
                  Semantic Distance Metric},
  journal      = {CoRR},
  volume       = {abs/2110.05376},
  year         = {2021},
  url          = {https://arxiv.org/abs/2110.05376},
  eprinttype    = {arXiv},
  eprint       = {2110.05376},
  timestamp    = {Thu, 21 Oct 2021 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/corr/abs-2110-05376.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/icassp/LeKFS20,
  author       = {Duc Le and
                  Thilo K{\"{o}}hler and
                  Christian Fuegen and
                  Michael L. Seltzer},
  title        = {{G2G:} TTS-Driven Pronunciation Learning for Graphemic Hybrid {ASR}},
  booktitle    = {2020 {IEEE} International Conference on Acoustics, Speech and Signal
                  Processing, {ICASSP} 2020, Barcelona, Spain, May 4-8, 2020},
  pages        = {6869--6873},
  publisher    = {{IEEE}},
  year         = {2020},
  url          = {https://doi.org/10.1109/ICASSP40776.2020.9054257},
  doi          = {10.1109/ICASSP40776.2020.9054257},
  timestamp    = {Wed, 10 Feb 2021 00:00:00 +0100},
  biburl       = {https://dblp.org/rec/conf/icassp/LeKFS20.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/icassp/WangMLLXMHTZZFZ20,
  author       = {Yongqiang Wang and
                  Abdelrahman Mohamed and
                  Duc Le and
                  Chunxi Liu and
                  Alex Xiao and
                  Jay Mahadeokar and
                  Hongzhao Huang and
                  Andros Tjandra and
                  Xiaohui Zhang and
                  Frank Zhang and
                  Christian Fuegen and
                  Geoffrey Zweig and
                  Michael L. Seltzer},
  title        = {Transformer-Based Acoustic Modeling for Hybrid Speech Recognition},
  booktitle    = {2020 {IEEE} International Conference on Acoustics, Speech and Signal
                  Processing, {ICASSP} 2020, Barcelona, Spain, May 4-8, 2020},
  pages        = {6874--6878},
  publisher    = {{IEEE}},
  year         = {2020},
  url          = {https://doi.org/10.1109/ICASSP40776.2020.9054345},
  doi          = {10.1109/ICASSP40776.2020.9054345},
  timestamp    = {Thu, 04 Apr 2024 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/conf/icassp/WangMLLXMHTZZFZ20.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/icassp/ChenYYJS20,
  author       = {Yi{-}Chen Chen and
                  Zhaojun Yang and
                  Ching{-}Feng Yeh and
                  Mahaveer Jain and
                  Michael L. Seltzer},
  title        = {Aipnet: Generative Adversarial Pre-Training of Accent-Invariant Networks
                  for End-To-End Speech Recognition},
  booktitle    = {2020 {IEEE} International Conference on Acoustics, Speech and Signal
                  Processing, {ICASSP} 2020, Barcelona, Spain, May 4-8, 2020},
  pages        = {6979--6983},
  publisher    = {{IEEE}},
  year         = {2020},
  url          = {https://doi.org/10.1109/ICASSP40776.2020.9053098},
  doi          = {10.1109/ICASSP40776.2020.9053098},
  timestamp    = {Thu, 23 Jul 2020 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/conf/icassp/ChenYYJS20.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/interspeech/ShiWWFZLYS20,
  author       = {Yangyang Shi and
                  Yongqiang Wang and
                  Chunyang Wu and
                  Christian Fuegen and
                  Frank Zhang and
                  Duc Le and
                  Ching{-}Feng Yeh and
                  Michael L. Seltzer},
  editor       = {Helen Meng and
                  Bo Xu and
                  Thomas Fang Zheng},
  title        = {Weak-Attention Suppression for Transformer Based Speech Recognition},
  booktitle    = {Interspeech 2020, 21st Annual Conference of the International Speech
                  Communication Association, Virtual Event, Shanghai, China, 25-29 October
                  2020},
  pages        = {4996--5000},
  publisher    = {{ISCA}},
  year         = {2020},
  url          = {https://doi.org/10.21437/Interspeech.2020-1363},
  doi          = {10.21437/INTERSPEECH.2020-1363},
  timestamp    = {Wed, 02 Nov 2022 00:00:00 +0100},
  biburl       = {https://dblp.org/rec/conf/interspeech/ShiWWFZLYS20.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/abs-2005-09137,
  author       = {Yangyang Shi and
                  Yongqiang Wang and
                  Chunyang Wu and
                  Christian Fuegen and
                  Frank Zhang and
                  Duc Le and
                  Ching{-}Feng Yeh and
                  Michael L. Seltzer},
  title        = {Weak-Attention Suppression For Transformer Based Speech Recognition},
  journal      = {CoRR},
  volume       = {abs/2005.09137},
  year         = {2020},
  url          = {https://arxiv.org/abs/2005.09137},
  eprinttype    = {arXiv},
  eprint       = {2005.09137},
  timestamp    = {Wed, 02 Nov 2022 00:00:00 +0100},
  biburl       = {https://dblp.org/rec/journals/corr/abs-2005-09137.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/abs-2010-10759,
  author       = {Yangyang Shi and
                  Yongqiang Wang and
                  Chunyang Wu and
                  Ching{-}Feng Yeh and
                  Julian Chan and
                  Frank Zhang and
                  Duc Le and
                  Michael L. Seltzer},
  title        = {Emformer: Efficient Memory Transformer Based Acoustic Model For Low
                  Latency Streaming Speech Recognition},
  journal      = {CoRR},
  volume       = {abs/2010.10759},
  year         = {2020},
  url          = {https://arxiv.org/abs/2010.10759},
  eprinttype    = {arXiv},
  eprint       = {2010.10759},
  timestamp    = {Wed, 02 Nov 2022 00:00:00 +0100},
  biburl       = {https://dblp.org/rec/journals/corr/abs-2010-10759.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/abs-2010-13878,
  author       = {Suyoun Kim and
                  Yuan Shangguan and
                  Jay Mahadeokar and
                  Antoine Bruguier and
                  Christian Fuegen and
                  Michael L. Seltzer and
                  Duc Le},
  title        = {Improved Neural Language Model Fusion for Streaming Recurrent Neural
                  Network Transducer},
  journal      = {CoRR},
  volume       = {abs/2010.13878},
  year         = {2020},
  url          = {https://arxiv.org/abs/2010.13878},
  eprinttype    = {arXiv},
  eprint       = {2010.13878},
  timestamp    = {Mon, 02 Nov 2020 00:00:00 +0100},
  biburl       = {https://dblp.org/rec/journals/corr/abs-2010-13878.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/abs-2011-03072,
  author       = {Jay Mahadeokar and
                  Yuan Shangguan and
                  Duc Le and
                  Gil Keren and
                  Hang Su and
                  Thong Le and
                  Ching{-}Feng Yeh and
                  Christian Fuegen and
                  Michael L. Seltzer},
  title        = {Alignment Restricted Streaming Recurrent Neural Network Transducer},
  journal      = {CoRR},
  volume       = {abs/2011.03072},
  year         = {2020},
  url          = {https://arxiv.org/abs/2011.03072},
  eprinttype    = {arXiv},
  eprint       = {2011.03072},
  timestamp    = {Thu, 12 Nov 2020 00:00:00 +0100},
  biburl       = {https://dblp.org/rec/journals/corr/abs-2011-03072.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/abs-2011-07120,
  author       = {Ching{-}Feng Yeh and
                  Yongqiang Wang and
                  Yangyang Shi and
                  Chunyang Wu and
                  Frank Zhang and
                  Julian Chan and
                  Michael L. Seltzer},
  title        = {Streaming Attention-Based Models with Augmented Memory for End-to-End
                  Speech Recognition},
  journal      = {CoRR},
  volume       = {abs/2011.07120},
  year         = {2020},
  url          = {https://arxiv.org/abs/2011.07120},
  eprinttype    = {arXiv},
  eprint       = {2011.07120},
  timestamp    = {Wed, 02 Nov 2022 00:00:00 +0100},
  biburl       = {https://dblp.org/rec/journals/corr/abs-2011-07120.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/abs-2011-07754,
  author       = {Duc Le and
                  Gil Keren and
                  Julian Chan and
                  Jay Mahadeokar and
                  Christian Fuegen and
                  Michael L. Seltzer},
  title        = {Deep Shallow Fusion for {RNN-T} Personalization},
  journal      = {CoRR},
  volume       = {abs/2011.07754},
  year         = {2020},
  url          = {https://arxiv.org/abs/2011.07754},
  eprinttype    = {arXiv},
  eprint       = {2011.07754},
  timestamp    = {Wed, 18 Nov 2020 00:00:00 +0100},
  biburl       = {https://dblp.org/rec/journals/corr/abs-2011-07754.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/jstsp/WatanabeABHS19,
  author       = {Shinji Watanabe and
                  Shoko Araki and
                  Michiel Bacchiani and
                  Reinhold Haeb{-}Umbach and
                  Michael L. Seltzer},
  title        = {Introduction to the Issue on Far-Field Speech Processing in the Era
                  of Deep Learning: Speech Enhancement, Separation, and Recognition},
  journal      = {{IEEE} J. Sel. Top. Signal Process.},
  volume       = {13},
  number       = {4},
  pages        = {785--786},
  year         = {2019},
  url          = {https://doi.org/10.1109/JSTSP.2019.2925640},
  doi          = {10.1109/JSTSP.2019.2925640},
  timestamp    = {Thu, 14 Oct 2021 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/jstsp/WatanabeABHS19.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/spm/Haeb-UmbachWNBH19,
  author       = {Reinhold Haeb{-}Umbach and
                  Shinji Watanabe and
                  Tomohiro Nakatani and
                  Michiel Bacchiani and
                  Bj{\"{o}}rn Hoffmeister and
                  Michael L. Seltzer and
                  Heiga Zen and
                  Mehrez Souden},
  title        = {Speech Processing for Digital Home Assistants: Combining signal processing
                  with deep-learning techniques},
  journal      = {{IEEE} Signal Process. Mag.},
  volume       = {36},
  number       = {6},
  pages        = {111--124},
  year         = {2019},
  url          = {https://doi.org/10.1109/MSP.2019.2918706},
  doi          = {10.1109/MSP.2019.2918706},
  timestamp    = {Fri, 26 Jun 2020 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/spm/Haeb-UmbachWNBH19.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/asru/LeZZFZS19,
  author       = {Duc Le and
                  Xiaohui Zhang and
                  Weiyi Zheng and
                  Christian F{\"{u}}gen and
                  Geoffrey Zweig and
                  Michael L. Seltzer},
  title        = {From Senones to Chenones: Tied Context-Dependent Graphemes for Hybrid
                  Speech Recognition},
  booktitle    = {{IEEE} Automatic Speech Recognition and Understanding Workshop, {ASRU}
                  2019, Singapore, December 14-18, 2019},
  pages        = {457--464},
  publisher    = {{IEEE}},
  year         = {2019},
  url          = {https://doi.org/10.1109/ASRU46091.2019.9003972},
  doi          = {10.1109/ASRU46091.2019.9003972},
  timestamp    = {Thu, 04 Apr 2024 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/conf/asru/LeZZFZS19.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/icassp/ChenJWSF19,
  author       = {Zhehuai Chen and
                  Mahaveer Jain and
                  Yongqiang Wang and
                  Michael L. Seltzer and
                  Christian Fuegen},
  title        = {End-to-end Contextual Speech Recognition Using Class Language Models
                  and a Token Passing Decoder},
  booktitle    = {{IEEE} International Conference on Acoustics, Speech and Signal Processing,
                  {ICASSP} 2019, Brighton, United Kingdom, May 12-17, 2019},
  pages        = {6186--6190},
  publisher    = {{IEEE}},
  year         = {2019},
  url          = {https://doi.org/10.1109/ICASSP.2019.8683573},
  doi          = {10.1109/ICASSP.2019.8683573},
  timestamp    = {Wed, 02 Nov 2022 00:00:00 +0100},
  biburl       = {https://dblp.org/rec/conf/icassp/ChenJWSF19.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/interspeech/ChenJWSF19,
  author       = {Zhehuai Chen and
                  Mahaveer Jain and
                  Yongqiang Wang and
                  Michael L. Seltzer and
                  Christian Fuegen},
  editor       = {Gernot Kubin and
                  Zdravko Kacic},
  title        = {Joint Grapheme and Phoneme Embeddings for Contextual End-to-End {ASR}},
  booktitle    = {Interspeech 2019, 20th Annual Conference of the International Speech
                  Communication Association, Graz, Austria, 15-19 September 2019},
  pages        = {3490--3494},
  publisher    = {{ISCA}},
  year         = {2019},
  url          = {https://doi.org/10.21437/Interspeech.2019-1434},
  doi          = {10.21437/INTERSPEECH.2019-1434},
  timestamp    = {Wed, 02 Nov 2022 00:00:00 +0100},
  biburl       = {https://dblp.org/rec/conf/interspeech/ChenJWSF19.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/abs-1910-01493,
  author       = {Duc Le and
                  Xiaohui Zhang and
                  Weiyi Zheng and
                  Christian F{\"{u}}gen and
                  Geoffrey Zweig and
                  Michael L. Seltzer},
  title        = {From Senones to Chenones: Tied Context-Dependent Graphemes for Hybrid
                  Speech Recognition},
  journal      = {CoRR},
  volume       = {abs/1910.01493},
  year         = {2019},
  url          = {http://arxiv.org/abs/1910.01493},
  eprinttype    = {arXiv},
  eprint       = {1910.01493},
  timestamp    = {Thu, 04 Apr 2024 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/corr/abs-1910-01493.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/abs-1910-09799,
  author       = {Yongqiang Wang and
                  Abdelrahman Mohamed and
                  Duc Le and
                  Chunxi Liu and
                  Alex Xiao and
                  Jay Mahadeokar and
                  Hongzhao Huang and
                  Andros Tjandra and
                  Xiaohui Zhang and
                  Frank Zhang and
                  Christian Fuegen and
                  Geoffrey Zweig and
                  Michael L. Seltzer},
  title        = {Transformer-based Acoustic Modeling for Hybrid Speech Recognition},
  journal      = {CoRR},
  volume       = {abs/1910.09799},
  year         = {2019},
  url          = {http://arxiv.org/abs/1910.09799},
  eprinttype    = {arXiv},
  eprint       = {1910.09799},
  timestamp    = {Thu, 04 Apr 2024 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/corr/abs-1910-09799.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/abs-1910-12612,
  author       = {Duc Le and
                  Thilo K{\"{o}}hler and
                  Christian Fuegen and
                  Michael L. Seltzer},
  title        = {{G2G:} TTS-Driven Pronunciation Learning for Graphemic Hybrid {ASR}},
  journal      = {CoRR},
  volume       = {abs/1910.12612},
  year         = {2019},
  url          = {http://arxiv.org/abs/1910.12612},
  eprinttype    = {arXiv},
  eprint       = {1910.12612},
  timestamp    = {Thu, 31 Oct 2019 00:00:00 +0100},
  biburl       = {https://dblp.org/rec/journals/corr/abs-1910-12612.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/abs-1910-12977,
  author       = {Ching{-}Feng Yeh and
                  Jay Mahadeokar and
                  Kaustubh Kalgaonkar and
                  Yongqiang Wang and
                  Duc Le and
                  Mahaveer Jain and
                  Kjell Schubert and
                  Christian Fuegen and
                  Michael L. Seltzer},
  title        = {Transformer-Transducer: End-to-End Speech Recognition with Self-Attention},
  journal      = {CoRR},
  volume       = {abs/1910.12977},
  year         = {2019},
  url          = {http://arxiv.org/abs/1910.12977},
  eprinttype    = {arXiv},
  eprint       = {1910.12977},
  timestamp    = {Wed, 02 Nov 2022 00:00:00 +0100},
  biburl       = {https://dblp.org/rec/journals/corr/abs-1910-12977.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/abs-1911-01629,
  author       = {Mahaveer Jain and
                  Kjell Schubert and
                  Jay Mahadeokar and
                  Ching{-}Feng Yeh and
                  Kaustubh Kalgaonkar and
                  Anuroop Sriram and
                  Christian Fuegen and
                  Michael L. Seltzer},
  title        = {{RNN-T} For Latency Controlled {ASR} With Improved Beam Search},
  journal      = {CoRR},
  volume       = {abs/1911.01629},
  year         = {2019},
  url          = {http://arxiv.org/abs/1911.01629},
  eprinttype    = {arXiv},
  eprint       = {1911.01629},
  timestamp    = {Mon, 11 Nov 2019 00:00:00 +0100},
  biburl       = {https://dblp.org/rec/journals/corr/abs-1911-01629.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/abs-1911-11935,
  author       = {Yi{-}Chen Chen and
                  Zhaojun Yang and
                  Ching{-}Feng Yeh and
                  Mahaveer Jain and
                  Michael L. Seltzer},
  title        = {AIPNet: Generative Adversarial Pre-training of Accent-invariant Networks
                  for End-to-end Speech Recognition},
  journal      = {CoRR},
  volume       = {abs/1911.11935},
  year         = {2019},
  url          = {http://arxiv.org/abs/1911.11935},
  eprinttype    = {arXiv},
  eprint       = {1911.11935},
  timestamp    = {Tue, 03 Dec 2019 00:00:00 +0100},
  biburl       = {https://dblp.org/rec/journals/corr/abs-1911-11935.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/icassp/KimS18a,
  author       = {Suyoun Kim and
                  Michael L. Seltzer},
  title        = {Towards Language-Universal End-to-End Speech Recognition},
  booktitle    = {2018 {IEEE} International Conference on Acoustics, Speech and Signal
                  Processing, {ICASSP} 2018, Calgary, AB, Canada, April 15-20, 2018},
  pages        = {4914--4918},
  publisher    = {{IEEE}},
  year         = {2018},
  url          = {https://doi.org/10.1109/ICASSP.2018.8462201},
  doi          = {10.1109/ICASSP.2018.8462201},
  timestamp    = {Wed, 16 Oct 2019 14:14:52 +0200},
  biburl       = {https://dblp.org/rec/conf/icassp/KimS18a.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/icassp/ChenYXLSG18,
  author       = {Zhuo Chen and
                  Takuya Yoshioka and
                  Xiong Xiao and
                  Linyu Li and
                  Michael L. Seltzer and
                  Yifan Gong},
  title        = {Efficient Integration of Fixed Beamformers and Speech Separation Networks
                  for Multi-Channel Far-Field Speech Separation},
  booktitle    = {2018 {IEEE} International Conference on Acoustics, Speech and Signal
                  Processing, {ICASSP} 2018, Calgary, AB, Canada, April 15-20, 2018},
  pages        = {5384--5388},
  publisher    = {{IEEE}},
  year         = {2018},
  url          = {https://doi.org/10.1109/ICASSP.2018.8461930},
  doi          = {10.1109/ICASSP.2018.8461930},
  timestamp    = {Tue, 04 May 2021 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/conf/icassp/ChenYXLSG18.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/interspeech/KimSLZ18,
  author       = {Suyoun Kim and
                  Michael L. Seltzer and
                  Jinyu Li and
                  Rui Zhao},
  editor       = {B. Yegnanarayana},
  title        = {Improved Training for Online End-to-end Speech Recognition Systems},
  booktitle    = {Interspeech 2018, 19th Annual Conference of the International Speech
                  Communication Association, Hyderabad, India, 2-6 September 2018},
  pages        = {2913--2917},
  publisher    = {{ISCA}},
  year         = {2018},
  url          = {https://doi.org/10.21437/Interspeech.2018-2517},
  doi          = {10.21437/INTERSPEECH.2018-2517},
  timestamp    = {Tue, 21 Mar 2023 00:00:00 +0100},
  biburl       = {https://dblp.org/rec/conf/interspeech/KimSLZ18.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/abs-1812-02142,
  author       = {Zhehuai Chen and
                  Mahaveer Jain and
                  Yongqiang Wang and
                  Michael L. Seltzer and
                  Christian Fuegen},
  title        = {End-to-end contextual speech recognition using class language models
                  and a token passing decoder},
  journal      = {CoRR},
  volume       = {abs/1812.02142},
  year         = {2018},
  url          = {http://arxiv.org/abs/1812.02142},
  eprinttype    = {arXiv},
  eprint       = {1812.02142},
  timestamp    = {Wed, 02 Nov 2022 00:00:00 +0100},
  biburl       = {https://dblp.org/rec/journals/corr/abs-1812-02142.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/taslp/XiongDHSSSYZ17,
  author       = {Wayne Xiong and
                  Jasha Droppo and
                  Xuedong Huang and
                  Frank Seide and
                  Michael L. Seltzer and
                  Andreas Stolcke and
                  Dong Yu and
                  Geoffrey Zweig},
  title        = {Toward Human Parity in Conversational Speech Recognition},
  journal      = {{IEEE} {ACM} Trans. Audio Speech Lang. Process.},
  volume       = {25},
  number       = {12},
  pages        = {2410--2423},
  year         = {2017},
  url          = {https://doi.org/10.1109/TASLP.2017.2756440},
  doi          = {10.1109/TASLP.2017.2756440},
  timestamp    = {Mon, 26 Oct 2020 00:00:00 +0100},
  biburl       = {https://dblp.org/rec/journals/taslp/XiongDHSSSYZ17.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/eacl/WongPZSJ17,
  author       = {Baolin Peng and
                  Michael L. Seltzer and
                  Y. C. Ju and
                  Geoffrey Zweig and
                  Kam{-}Fai Wong},
  editor       = {Mirella Lapata and
                  Phil Blunsom and
                  Alexander Koller},
  title        = {May {I} take your order? {A} Neural Model for Extracting Structured
                  Information from Conversations},
  booktitle    = {Proceedings of the 15th Conference of the European Chapter of the
                  Association for Computational Linguistics, {EACL} 2017, Valencia,
                  Spain, April 3-7, 2017, Volume 1: Long Papers},
  pages        = {450--459},
  publisher    = {Association for Computational Linguistics},
  year         = {2017},
  url          = {https://doi.org/10.18653/v1/e17-1043},
  doi          = {10.18653/V1/E17-1043},
  timestamp    = {Thu, 14 Oct 2021 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/conf/eacl/WongPZSJ17.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/icassp/KoPPSK17,
  author       = {Tom Ko and
                  Vijayaditya Peddinti and
                  Daniel Povey and
                  Michael L. Seltzer and
                  Sanjeev Khudanpur},
  title        = {A study on data augmentation of reverberant speech for robust speech
                  recognition},
  booktitle    = {2017 {IEEE} International Conference on Acoustics, Speech and Signal
                  Processing, {ICASSP} 2017, New Orleans, LA, USA, March 5-9, 2017},
  pages        = {5220--5224},
  publisher    = {{IEEE}},
  year         = {2017},
  url          = {https://doi.org/10.1109/ICASSP.2017.7953152},
  doi          = {10.1109/ICASSP.2017.7953152},
  timestamp    = {Wed, 16 Oct 2019 14:14:52 +0200},
  biburl       = {https://dblp.org/rec/conf/icassp/KoPPSK17.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/interspeech/LiSWZG17,
  author       = {Jinyu Li and
                  Michael L. Seltzer and
                  Xi Wang and
                  Rui Zhao and
                  Yifan Gong},
  editor       = {Francisco Lacerda},
  title        = {Large-Scale Domain Adaptation via Teacher-Student Learning},
  booktitle    = {Interspeech 2017, 18th Annual Conference of the International Speech
                  Communication Association, Stockholm, Sweden, August 20-24, 2017},
  pages        = {2386--2390},
  publisher    = {{ISCA}},
  year         = {2017},
  url          = {https://doi.org/10.21437/Interspeech.2017-519},
  doi          = {10.21437/INTERSPEECH.2017-519},
  timestamp    = {Sun, 06 Aug 2023 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/conf/interspeech/LiSWZG17.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@incollection{DBLP:books/sp/17/XiaoWEMLHSCZY17,
  author       = {Xiong Xiao and
                  Shinji Watanabe and
                  Hakan Erdogan and
                  Michael I. Mandel and
                  Liang Lu and
                  John R. Hershey and
                  Michael L. Seltzer and
                  Guoguo Chen and
                  Yu Zhang and
                  Dong Yu},
  editor       = {Shinji Watanabe and
                  Marc Delcroix and
                  Florian Metze and
                  John R. Hershey},
  title        = {Discriminative Beamforming with Phase-Aware Neural Networks for Speech
                  Enhancement and Recognition},
  booktitle    = {New Era for Robust Speech Recognition, Exploiting Deep Learning},
  pages        = {79--104},
  publisher    = {Springer},
  year         = {2017},
  url          = {https://doi.org/10.1007/978-3-319-64680-0\_4},
  doi          = {10.1007/978-3-319-64680-0\_4},
  timestamp    = {Sat, 31 Jul 2021 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/books/sp/17/XiaoWEMLHSCZY17.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/abs-1708-05466,
  author       = {Jinyu Li and
                  Michael L. Seltzer and
                  Xi Wang and
                  Rui Zhao and
                  Yifan Gong},
  title        = {Large-Scale Domain Adaptation via Teacher-Student Learning},
  journal      = {CoRR},
  volume       = {abs/1708.05466},
  year         = {2017},
  url          = {http://arxiv.org/abs/1708.05466},
  eprinttype    = {arXiv},
  eprint       = {1708.05466},
  timestamp    = {Fri, 07 Oct 2022 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/corr/abs-1708-05466.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/abs-1711-02207,
  author       = {Suyoun Kim and
                  Michael L. Seltzer},
  title        = {Towards Language-Universal End-to-End Speech Recognition},
  journal      = {CoRR},
  volume       = {abs/1711.02207},
  year         = {2017},
  url          = {http://arxiv.org/abs/1711.02207},
  eprinttype    = {arXiv},
  eprint       = {1711.02207},
  timestamp    = {Mon, 13 Aug 2018 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/corr/abs-1711-02207.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/abs-1711-02212,
  author       = {Suyoun Kim and
                  Michael L. Seltzer and
                  Jinyu Li and
                  Rui Zhao},
  title        = {Improved training for online end-to-end speech recognition systems},
  journal      = {CoRR},
  volume       = {abs/1711.02212},
  year         = {2017},
  url          = {http://arxiv.org/abs/1711.02212},
  eprinttype    = {arXiv},
  eprint       = {1711.02212},
  timestamp    = {Fri, 07 Oct 2022 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/corr/abs-1711-02212.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/icassp/GhahremaniDS16,
  author       = {Pegah Ghahremani and
                  Jasha Droppo and
                  Michael L. Seltzer},
  title        = {Linearly augmented deep neural network},
  booktitle    = {2016 {IEEE} International Conference on Acoustics, Speech and Signal
                  Processing, {ICASSP} 2016, Shanghai, China, March 20-25, 2016},
  pages        = {5085--5089},
  publisher    = {{IEEE}},
  year         = {2016},
  url          = {https://doi.org/10.1109/ICASSP.2016.7472646},
  doi          = {10.1109/ICASSP.2016.7472646},
  timestamp    = {Sun, 25 Oct 2020 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/conf/icassp/GhahremaniDS16.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/icassp/XiaoWELHSCZMY16,
  author       = {Xiong Xiao and
                  Shinji Watanabe and
                  Hakan Erdogan and
                  Liang Lu and
                  John R. Hershey and
                  Michael L. Seltzer and
                  Guoguo Chen and
                  Yu Zhang and
                  Michael I. Mandel and
                  Dong Yu},
  title        = {Deep beamforming networks for multi-channel speech recognition},
  booktitle    = {2016 {IEEE} International Conference on Acoustics, Speech and Signal
                  Processing, {ICASSP} 2016, Shanghai, China, March 20-25, 2016},
  pages        = {5745--5749},
  publisher    = {{IEEE}},
  year         = {2016},
  url          = {https://doi.org/10.1109/ICASSP.2016.7472778},
  doi          = {10.1109/ICASSP.2016.7472778},
  timestamp    = {Sat, 31 Jul 2021 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/conf/icassp/XiaoWELHSCZMY16.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/interspeech/NagamineSM16,
  author       = {Tasha Nagamine and
                  Michael L. Seltzer and
                  Nima Mesgarani},
  editor       = {Nelson Morgan},
  title        = {On the Role of Nonlinear Transformations in Deep Neural Network Acoustic
                  Models},
  booktitle    = {Interspeech 2016, 17th Annual Conference of the International Speech
                  Communication Association, San Francisco, CA, USA, September 8-12,
                  2016},
  pages        = {803--807},
  publisher    = {{ISCA}},
  year         = {2016},
  url          = {https://doi.org/10.21437/Interspeech.2016-1406},
  doi          = {10.21437/INTERSPEECH.2016-1406},
  timestamp    = {Mon, 26 Jun 2023 16:43:56 +0200},
  biburl       = {https://dblp.org/rec/conf/interspeech/NagamineSM16.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/taslp/WengYSD15,
  author       = {Chao Weng and
                  Dong Yu and
                  Michael L. Seltzer and
                  Jasha Droppo},
  title        = {Deep Neural Networks for Single-Channel Multi-Talker Speech Recognition},
  journal      = {{IEEE} {ACM} Trans. Audio Speech Lang. Process.},
  volume       = {23},
  number       = {10},
  pages        = {1670--1679},
  year         = {2015},
  url          = {https://doi.org/10.1109/TASLP.2015.2444659},
  doi          = {10.1109/TASLP.2015.2444659},
  timestamp    = {Mon, 26 Oct 2020 00:00:00 +0100},
  biburl       = {https://dblp.org/rec/journals/taslp/WengYSD15.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/icassp/ZhangYSD15,
  author       = {Yu Zhang and
                  Dong Yu and
                  Michael L. Seltzer and
                  Jasha Droppo},
  title        = {Speech recognition with prediction-adaptation-correction recurrent
                  neural networks},
  booktitle    = {2015 {IEEE} International Conference on Acoustics, Speech and Signal
                  Processing, {ICASSP} 2015, South Brisbane, Queensland, Australia,
                  April 19-24, 2015},
  pages        = {5004--5008},
  publisher    = {{IEEE}},
  year         = {2015},
  url          = {https://doi.org/10.1109/ICASSP.2015.7178923},
  doi          = {10.1109/ICASSP.2015.7178923},
  timestamp    = {Sun, 25 Oct 2020 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/conf/icassp/ZhangYSD15.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/icassp/GiriSDY15,
  author       = {Ritwik Giri and
                  Michael L. Seltzer and
                  Jasha Droppo and
                  Dong Yu},
  title        = {Improving speech recognition in reverberation using a room-aware deep
                  neural network and multi-task learning},
  booktitle    = {2015 {IEEE} International Conference on Acoustics, Speech and Signal
                  Processing, {ICASSP} 2015, South Brisbane, Queensland, Australia,
                  April 19-24, 2015},
  pages        = {5014--5018},
  publisher    = {{IEEE}},
  year         = {2015},
  url          = {https://doi.org/10.1109/ICASSP.2015.7178925},
  doi          = {10.1109/ICASSP.2015.7178925},
  timestamp    = {Sun, 25 Oct 2020 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/conf/icassp/GiriSDY15.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/interspeech/NagamineSM15,
  author       = {Tasha Nagamine and
                  Michael L. Seltzer and
                  Nima Mesgarani},
  title        = {Exploring how deep neural networks form phonemic categories},
  booktitle    = {{INTERSPEECH} 2015, 16th Annual Conference of the International Speech
                  Communication Association, Dresden, Germany, September 6-10, 2015},
  pages        = {1912--1916},
  publisher    = {{ISCA}},
  year         = {2015},
  url          = {https://doi.org/10.21437/Interspeech.2015-422},
  doi          = {10.21437/INTERSPEECH.2015-422},
  timestamp    = {Fri, 23 Jun 2023 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/conf/interspeech/NagamineSM15.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/icassp/SeoKS14,
  author       = {Hyunson Seo and
                  Hong{-}Goo Kang and
                  Michael L. Seltzer},
  title        = {Factored adaptation of speaker and environment using orthogonal subspace
                  transforms},
  booktitle    = {{IEEE} International Conference on Acoustics, Speech and Signal Processing,
                  {ICASSP} 2014, Florence, Italy, May 4-9, 2014},
  pages        = {3251--3255},
  publisher    = {{IEEE}},
  year         = {2014},
  url          = {https://doi.org/10.1109/ICASSP.2014.6854201},
  doi          = {10.1109/ICASSP.2014.6854201},
  timestamp    = {Wed, 16 Oct 2019 14:14:52 +0200},
  biburl       = {https://dblp.org/rec/conf/icassp/SeoKS14.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/icassp/WengYSD14,
  author       = {Chao Weng and
                  Dong Yu and
                  Michael L. Seltzer and
                  Jasha Droppo},
  title        = {Single-channel mixed speech recognition using deep neural networks},
  booktitle    = {{IEEE} International Conference on Acoustics, Speech and Signal Processing,
                  {ICASSP} 2014, Florence, Italy, May 4-9, 2014},
  pages        = {5632--5636},
  publisher    = {{IEEE}},
  year         = {2014},
  url          = {https://doi.org/10.1109/ICASSP.2014.6854681},
  doi          = {10.1109/ICASSP.2014.6854681},
  timestamp    = {Sun, 25 Oct 2020 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/conf/icassp/WengYSD14.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/interspeech/HuangSSG14,
  author       = {Yan Huang and
                  Malcolm Slaney and
                  Michael L. Seltzer and
                  Yifan Gong},
  editor       = {Haizhou Li and
                  Helen M. Meng and
                  Bin Ma and
                  Engsiong Chng and
                  Lei Xie},
  title        = {Towards better performance with heterogeneous training data in acoustic
                  modeling using deep neural networks},
  booktitle    = {{INTERSPEECH} 2014, 15th Annual Conference of the International Speech
                  Communication Association, Singapore, September 14-18, 2014},
  pages        = {845--849},
  publisher    = {{ISCA}},
  year         = {2014},
  url          = {https://doi.org/10.21437/Interspeech.2014-214},
  doi          = {10.21437/INTERSPEECH.2014-214},
  timestamp    = {Fri, 23 Jun 2023 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/conf/interspeech/HuangSSG14.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/interspeech/SlaneyS14,
  author       = {Malcolm Slaney and
                  Michael L. Seltzer},
  editor       = {Haizhou Li and
                  Helen M. Meng and
                  Bin Ma and
                  Engsiong Chng and
                  Lei Xie},
  title        = {The influence of pitch and noise on the discriminability of filterbank
                  features},
  booktitle    = {{INTERSPEECH} 2014, 15th Annual Conference of the International Speech
                  Communication Association, Singapore, September 14-18, 2014},
  pages        = {2263--2267},
  publisher    = {{ISCA}},
  year         = {2014},
  url          = {https://doi.org/10.21437/Interspeech.2014-251},
  doi          = {10.21437/INTERSPEECH.2014-251},
  timestamp    = {Fri, 23 Jun 2023 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/conf/interspeech/SlaneyS14.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/interspeech/YuESYGKSWDHZRC14,
  author       = {Dong Yu and
                  Adam Eversole and
                  Michael L. Seltzer and
                  Kaisheng Yao and
                  Brian Guenter and
                  Oleksii Kuchaiev and
                  Frank Seide and
                  Huaming Wang and
                  Jasha Droppo and
                  Zhiheng Huang and
                  Geoffrey Zweig and
                  Christopher J. Rossbach and
                  Jon Currey},
  editor       = {Haizhou Li and
                  Helen M. Meng and
                  Bin Ma and
                  Engsiong Chng and
                  Lei Xie},
  title        = {An introduction to computational networks and the computational network
                  toolkit (invited talk)},
  booktitle    = {{INTERSPEECH} 2014, 15th Annual Conference of the International Speech
                  Communication Association, Singapore, September 14-18, 2014},
  publisher    = {{ISCA}},
  year         = {2014},
  url          = {https://www.isca-speech.org/archive/interspeech\_2014/yu14d\_interspeech.html},
  timestamp    = {Tue, 27 Jun 2023 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/conf/interspeech/YuESYGKSWDHZRC14.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/icassp/ThomasSCH13,
  author       = {Samuel Thomas and
                  Michael L. Seltzer and
                  Kenneth Church and
                  Hynek Hermansky},
  title        = {Deep neural network features and semi-supervised training for low
                  resource speech recognition},
  booktitle    = {{IEEE} International Conference on Acoustics, Speech and Signal Processing,
                  {ICASSP} 2013, Vancouver, BC, Canada, May 26-31, 2013},
  pages        = {6704--6708},
  publisher    = {{IEEE}},
  year         = {2013},
  url          = {https://doi.org/10.1109/ICASSP.2013.6638959},
  doi          = {10.1109/ICASSP.2013.6638959},
  timestamp    = {Mon, 26 Jun 2023 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/conf/icassp/ThomasSCH13.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/icassp/SeltzerD13,
  author       = {Michael L. Seltzer and
                  Jasha Droppo},
  title        = {Multi-task learning in deep neural networks for improved phoneme recognition},
  booktitle    = {{IEEE} International Conference on Acoustics, Speech and Signal Processing,
                  {ICASSP} 2013, Vancouver, BC, Canada, May 26-31, 2013},
  pages        = {6965--6969},
  publisher    = {{IEEE}},
  year         = {2013},
  url          = {https://doi.org/10.1109/ICASSP.2013.6639012},
  doi          = {10.1109/ICASSP.2013.6639012},
  timestamp    = {Sun, 25 Oct 2020 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/conf/icassp/SeltzerD13.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/icassp/SeltzerYW13,
  author       = {Michael L. Seltzer and
                  Dong Yu and
                  Yongqiang Wang},
  title        = {An investigation of deep neural networks for noise robust speech recognition},
  booktitle    = {{IEEE} International Conference on Acoustics, Speech and Signal Processing,
                  {ICASSP} 2013, Vancouver, BC, Canada, May 26-31, 2013},
  pages        = {7398--7402},
  publisher    = {{IEEE}},
  year         = {2013},
  url          = {https://doi.org/10.1109/ICASSP.2013.6639100},
  doi          = {10.1109/ICASSP.2013.6639100},
  timestamp    = {Thu, 03 Nov 2022 00:00:00 +0100},
  biburl       = {https://dblp.org/rec/conf/icassp/SeltzerYW13.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/icassp/DengLHYYSSZHWGA13,
  author       = {Li Deng and
                  Jinyu Li and
                  Jui{-}Ting Huang and
                  Kaisheng Yao and
                  Dong Yu and
                  Frank Seide and
                  Michael L. Seltzer and
                  Geoffrey Zweig and
                  Xiaodong He and
                  Jason D. Williams and
                  Yifan Gong and
                  Alex Acero},
  title        = {Recent advances in deep learning for speech research at Microsoft},
  booktitle    = {{IEEE} International Conference on Acoustics, Speech and Signal Processing,
                  {ICASSP} 2013, Vancouver, BC, Canada, May 26-31, 2013},
  pages        = {8604--8608},
  publisher    = {{IEEE}},
  year         = {2013},
  url          = {https://doi.org/10.1109/ICASSP.2013.6639345},
  doi          = {10.1109/ICASSP.2013.6639345},
  timestamp    = {Tue, 21 Mar 2023 00:00:00 +0100},
  biburl       = {https://dblp.org/rec/conf/icassp/DengLHYYSSZHWGA13.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:journals/corr/abs-1301-3605,
  author       = {Dong Yu and
                  Michael L. Seltzer and
                  Jinyu Li and
                  Jui{-}Ting Huang and
                  Frank Seide},
  editor       = {Yoshua Bengio and
                  Yann LeCun},
  title        = {Feature Learning in Deep Neural Networks - {A} Study on Speech Recognition
                  Tasks},
  booktitle    = {1st International Conference on Learning Representations, {ICLR} 2013,
                  Scottsdale, Arizona, USA, May 2-4, 2013, Conference Track Proceedings},
  year         = {2013},
  url          = {http://arxiv.org/abs/1301.3605},
  timestamp    = {Mon, 03 May 2021 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/corr/abs-1301-3605.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/icassp/LiSG12,
  author       = {Jinyu Li and
                  Michael L. Seltzer and
                  Yifan Gong},
  title        = {Improvements to {VTS} feature enhancement},
  booktitle    = {2012 {IEEE} International Conference on Acoustics, Speech and Signal
                  Processing, {ICASSP} 2012, Kyoto, Japan, March 25-30, 2012},
  pages        = {4677--4680},
  publisher    = {{IEEE}},
  year         = {2012},
  url          = {https://doi.org/10.1109/ICASSP.2012.6288962},
  doi          = {10.1109/ICASSP.2012.6288962},
  timestamp    = {Tue, 21 Mar 2023 00:00:00 +0100},
  biburl       = {https://dblp.org/rec/conf/icassp/LiSG12.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/interspeech/SeltzerA12,
  author       = {Michael L. Seltzer and
                  Alex Acero},
  title        = {Factored adaptation using a combination of feature-space and model-space
                  transforms},
  booktitle    = {{INTERSPEECH} 2012, 13th Annual Conference of the International Speech
                  Communication Association, Portland, Oregon, USA, September 9-13,
                  2012},
  pages        = {1792--1795},
  publisher    = {{ISCA}},
  year         = {2012},
  url          = {https://doi.org/10.21437/Interspeech.2012-488},
  doi          = {10.21437/INTERSPEECH.2012-488},
  timestamp    = {Fri, 23 Jun 2023 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/conf/interspeech/SeltzerA12.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/interspeech/LiSG12,
  author       = {Jinyu Li and
                  Michael L. Seltzer and
                  Yifan Gong},
  title        = {Efficient {VTS} Adaptation Using Jacobian Approximation},
  booktitle    = {{INTERSPEECH} 2012, 13th Annual Conference of the International Speech
                  Communication Association, Portland, Oregon, USA, September 9-13,
                  2012},
  pages        = {1906--1909},
  publisher    = {{ISCA}},
  year         = {2012},
  url          = {https://doi.org/10.21437/Interspeech.2012-517},
  doi          = {10.21437/INTERSPEECH.2012-517},
  timestamp    = {Fri, 23 Jun 2023 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/conf/interspeech/LiSG12.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@incollection{DBLP:books/wi/12/Seltzer12,
  author       = {Michael L. Seltzer},
  editor       = {Tuomas Virtanen and
                  Rita Singh and
                  Bhiksha Raj},
  title        = {Acoustic Model Training for Robust Speech Recognition},
  booktitle    = {Techniques for Noise Robustness in Automatic Speech Recognition},
  pages        = {347--368},
  publisher    = {Wiley},
  year         = {2012},
  url          = {https://doi.org/10.1002/9781118392683.ch13},
  doi          = {10.1002/9781118392683.CH13},
  timestamp    = {Thu, 27 Jun 2019 13:28:41 +0200},
  biburl       = {https://dblp.org/rec/books/wi/12/Seltzer12.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/spm/SeltzerJTWY11,
  author       = {Michael L. Seltzer and
                  Yun{-}Cheng Ju and
                  Ivan Tashev and
                  Ye{-}Yi Wang and
                  Dong Yu},
  title        = {In-Car Media Search},
  journal      = {{IEEE} Signal Process. Mag.},
  volume       = {28},
  number       = {4},
  pages        = {50--60},
  year         = {2011},
  url          = {https://doi.org/10.1109/MSP.2011.941065},
  doi          = {10.1109/MSP.2011.941065},
  timestamp    = {Thu, 12 Jul 2018 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/spm/SeltzerJTWY11.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/asru/SeltzerA11,
  author       = {Michael L. Seltzer and
                  Alex Acero},
  editor       = {David Nahamoo and
                  Michael Picheny},
  title        = {Factored adaptation for separable compensation of speaker and environmental
                  variability},
  booktitle    = {2011 {IEEE} Workshop on Automatic Speech Recognition {\&} Understanding,
                  {ASRU} 2011, Waikoloa, HI, USA, December 11-15, 2011},
  pages        = {146--151},
  publisher    = {{IEEE}},
  year         = {2011},
  url          = {https://doi.org/10.1109/ASRU.2011.6163921},
  doi          = {10.1109/ASRU.2011.6163921},
  timestamp    = {Wed, 16 Oct 2019 14:14:51 +0200},
  biburl       = {https://dblp.org/rec/conf/asru/SeltzerA11.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/icassp/RibeiroFZS11,
  author       = {Flavio P. Ribeiro and
                  Dinei A. F. Flor{\^{e}}ncio and
                  Cha Zhang and
                  Michael L. Seltzer},
  title        = {{CROWDMOS:} An approach for crowdsourcing mean opinion score studies},
  booktitle    = {Proceedings of the {IEEE} International Conference on Acoustics, Speech,
                  and Signal Processing, {ICASSP} 2011, May 22-27, 2011, Prague Congress
                  Center, Prague, Czech Republic},
  pages        = {2416--2419},
  publisher    = {{IEEE}},
  year         = {2011},
  url          = {https://doi.org/10.1109/ICASSP.2011.5946971},
  doi          = {10.1109/ICASSP.2011.5946971},
  timestamp    = {Fri, 19 May 2017 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/conf/icassp/RibeiroFZS11.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/icassp/FanSDMA11,
  author       = {Xing Fan and
                  Michael L. Seltzer and
                  Jasha Droppo and
                  Henrique S. Malvar and
                  Alex Acero},
  title        = {Joint encoding of the waveform and speech recognition features using
                  a transform codec},
  booktitle    = {Proceedings of the {IEEE} International Conference on Acoustics, Speech,
                  and Signal Processing, {ICASSP} 2011, May 22-27, 2011, Prague Congress
                  Center, Prague, Czech Republic},
  pages        = {5148--5151},
  publisher    = {{IEEE}},
  year         = {2011},
  url          = {https://doi.org/10.1109/ICASSP.2011.5947516},
  doi          = {10.1109/ICASSP.2011.5947516},
  timestamp    = {Sun, 25 Oct 2020 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/conf/icassp/FanSDMA11.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/interspeech/YuS11,
  author       = {Dong Yu and
                  Michael L. Seltzer},
  title        = {Improved Bottleneck Features Using Pretrained Deep Neural Networks},
  booktitle    = {{INTERSPEECH} 2011, 12th Annual Conference of the International Speech
                  Communication Association, Florence, Italy, August 27-31, 2011},
  pages        = {237--240},
  publisher    = {{ISCA}},
  year         = {2011},
  url          = {https://doi.org/10.21437/Interspeech.2011-91},
  doi          = {10.21437/INTERSPEECH.2011-91},
  timestamp    = {Fri, 23 Jun 2023 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/conf/interspeech/YuS11.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/interspeech/SeltzerA11,
  author       = {Michael L. Seltzer and
                  Alex Acero},
  title        = {Separating Speaker and Environmental Variability Using Factored Transforms},
  booktitle    = {{INTERSPEECH} 2011, 12th Annual Conference of the International Speech
                  Communication Association, Florence, Italy, August 27-31, 2011},
  pages        = {1097--1100},
  publisher    = {{ISCA}},
  year         = {2011},
  url          = {https://doi.org/10.21437/Interspeech.2011-415},
  doi          = {10.21437/INTERSPEECH.2011-415},
  timestamp    = {Fri, 23 Jun 2023 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/conf/interspeech/SeltzerA11.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/taslp/KalinliSDA10,
  author       = {Ozlem Kalinli and
                  Michael L. Seltzer and
                  Jasha Droppo and
                  Alex Acero},
  title        = {Noise Adaptive Training for Robust Automatic Speech Recognition},
  journal      = {{IEEE} Trans. Speech Audio Process.},
  volume       = {18},
  number       = {8},
  pages        = {1889--1901},
  year         = {2010},
  url          = {https://doi.org/10.1109/TASL.2010.2040522},
  doi          = {10.1109/TASL.2010.2040522},
  timestamp    = {Mon, 26 Oct 2020 00:00:00 +0100},
  biburl       = {https://dblp.org/rec/journals/taslp/KalinliSDA10.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/icassp/SeltzerAK10,
  author       = {Michael L. Seltzer and
                  Alex Acero and
                  Kaustubh Kalgaonkar},
  title        = {Acoustic model adaptation via Linear Spline Interpolation for robust
                  speech recognition},
  booktitle    = {Proceedings of the {IEEE} International Conference on Acoustics, Speech,
                  and Signal Processing, {ICASSP} 2010, 14-19 March 2010, Sheraton Dallas
                  Hotel, Dallas, Texas, {USA}},
  pages        = {4550--4553},
  publisher    = {{IEEE}},
  year         = {2010},
  url          = {https://doi.org/10.1109/ICASSP.2010.5495581},
  doi          = {10.1109/ICASSP.2010.5495581},
  timestamp    = {Fri, 19 May 2017 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/conf/icassp/SeltzerAK10.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/interspeech/SeltzerA10,
  author       = {Michael L. Seltzer and
                  Alex Acero},
  editor       = {Takao Kobayashi and
                  Keikichi Hirose and
                  Satoshi Nakamura},
  title        = {{HMM} adaptation using linear spline interpolation with integrated
                  spline parameter training for robust speech recognition},
  booktitle    = {{INTERSPEECH} 2010, 11th Annual Conference of the International Speech
                  Communication Association, Makuhari, Chiba, Japan, September 26-30,
                  2010},
  pages        = {1664--1667},
  publisher    = {{ISCA}},
  year         = {2010},
  url          = {https://doi.org/10.21437/Interspeech.2010-480},
  doi          = {10.21437/INTERSPEECH.2010-480},
  timestamp    = {Fri, 23 Jun 2023 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/conf/interspeech/SeltzerA10.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/interspeech/DengSYAMH10,
  author       = {Li Deng and
                  Michael L. Seltzer and
                  Dong Yu and
                  Alex Acero and
                  Abdel{-}rahman Mohamed and
                  Geoffrey E. Hinton},
  editor       = {Takao Kobayashi and
                  Keikichi Hirose and
                  Satoshi Nakamura},
  title        = {Binary coding of speech spectrograms using a deep auto-encoder},
  booktitle    = {{INTERSPEECH} 2010, 11th Annual Conference of the International Speech
                  Communication Association, Makuhari, Chiba, Japan, September 26-30,
                  2010},
  pages        = {1692--1695},
  publisher    = {{ISCA}},
  year         = {2010},
  url          = {https://doi.org/10.21437/Interspeech.2010-487},
  doi          = {10.21437/INTERSPEECH.2010-487},
  timestamp    = {Fri, 23 Jun 2023 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/conf/interspeech/DengSYAMH10.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/asru/KalgaonkarSA09,
  author       = {Kaustubh Kalgaonkar and
                  Michael L. Seltzer and
                  Alex Acero},
  title        = {Noise robust model adaptation using linear spline interpolation},
  booktitle    = {2009 {IEEE} Workshop on Automatic Speech Recognition {\&} Understanding,
                  {ASRU} 2009, Merano/Meran, Italy, December 13-17, 2009},
  pages        = {199--204},
  publisher    = {{IEEE}},
  year         = {2009},
  url          = {https://doi.org/10.1109/ASRU.2009.5373430},
  doi          = {10.1109/ASRU.2009.5373430},
  timestamp    = {Wed, 16 Oct 2019 14:14:51 +0200},
  biburl       = {https://dblp.org/rec/conf/asru/KalgaonkarSA09.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/icassp/SeltzerZ09,
  author       = {Michael L. Seltzer and
                  Lei Zhang},
  title        = {The data deluge: Challenges and opportunities of unlimited data in
                  statistical signal processing},
  booktitle    = {Proceedings of the {IEEE} International Conference on Acoustics, Speech,
                  and Signal Processing, {ICASSP} 2009, 19-24 April 2009, Taipei, Taiwan},
  pages        = {3701--3704},
  publisher    = {{IEEE}},
  year         = {2009},
  url          = {https://doi.org/10.1109/ICASSP.2009.4960430},
  doi          = {10.1109/ICASSP.2009.4960430},
  timestamp    = {Thu, 23 Mar 2023 00:00:00 +0100},
  biburl       = {https://dblp.org/rec/conf/icassp/SeltzerZ09.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/icassp/KalinliSA09,
  author       = {Ozlem Kalinli and
                  Michael L. Seltzer and
                  Alex Acero},
  title        = {Noise adaptive training using a vector taylor series approach for
                  noise robust automatic speech recognition},
  booktitle    = {Proceedings of the {IEEE} International Conference on Acoustics, Speech,
                  and Signal Processing, {ICASSP} 2009, 19-24 April 2009, Taipei, Taiwan},
  pages        = {3825--3828},
  publisher    = {{IEEE}},
  year         = {2009},
  url          = {https://doi.org/10.1109/ICASSP.2009.4960461},
  doi          = {10.1109/ICASSP.2009.4960461},
  timestamp    = {Thu, 23 Mar 2023 00:00:00 +0100},
  biburl       = {https://dblp.org/rec/conf/icassp/KalinliSA09.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/icassp/SongWJSTA09,
  author       = {Young{-}In Song and
                  Ye{-}Yi Wang and
                  Yun{-}Cheng Ju and
                  Michael L. Seltzer and
                  Ivan Tashev and
                  Alex Acero},
  title        = {Voice search of structured media data},
  booktitle    = {Proceedings of the {IEEE} International Conference on Acoustics, Speech,
                  and Signal Processing, {ICASSP} 2009, 19-24 April 2009, Taipei, Taiwan},
  pages        = {3941--3944},
  publisher    = {{IEEE}},
  year         = {2009},
  url          = {https://doi.org/10.1109/ICASSP.2009.4960490},
  doi          = {10.1109/ICASSP.2009.4960490},
  timestamp    = {Thu, 23 Mar 2023 00:00:00 +0100},
  biburl       = {https://dblp.org/rec/conf/icassp/SongWJSTA09.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/interspeech/JuST09,
  author       = {Yun{-}Cheng Ju and
                  Michael L. Seltzer and
                  Ivan Tashev},
  title        = {Improving perceived accuracy for in-car media search},
  booktitle    = {{INTERSPEECH} 2009, 10th Annual Conference of the International Speech
                  Communication Association, Brighton, United Kingdom, September 6-10,
                  2009},
  pages        = {979--982},
  publisher    = {{ISCA}},
  year         = {2009},
  url          = {https://doi.org/10.21437/Interspeech.2009-291},
  doi          = {10.21437/INTERSPEECH.2009-291},
  timestamp    = {Fri, 23 Jun 2023 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/conf/interspeech/JuST09.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/icassp/TashevDSA08,
  author       = {Ivan Tashev and
                  Jasha Droppo and
                  Michael L. Seltzer and
                  Alex Acero},
  title        = {Robust design of wideband loudspeaker arrays},
  booktitle    = {Proceedings of the {IEEE} International Conference on Acoustics, Speech,
                  and Signal Processing, {ICASSP} 2008, March 30 - April 4, 2008, Caesars
                  Palace, Las Vegas, Nevada, {USA}},
  pages        = {381--384},
  publisher    = {{IEEE}},
  year         = {2008},
  url          = {https://doi.org/10.1109/ICASSP.2008.4517626},
  doi          = {10.1109/ICASSP.2008.4517626},
  timestamp    = {Sun, 25 Oct 2020 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/conf/icassp/TashevDSA08.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/icassp/TaylorSA08,
  author       = {Graham W. Taylor and
                  Michael L. Seltzer and
                  Alex Acero},
  title        = {Maximum a posteriori {ICA:} Applying prior knowledge to the separation
                  of acoustic sources},
  booktitle    = {Proceedings of the {IEEE} International Conference on Acoustics, Speech,
                  and Signal Processing, {ICASSP} 2008, March 30 - April 4, 2008, Caesars
                  Palace, Las Vegas, Nevada, {USA}},
  pages        = {1821--1824},
  publisher    = {{IEEE}},
  year         = {2008},
  url          = {https://doi.org/10.1109/ICASSP.2008.4517986},
  doi          = {10.1109/ICASSP.2008.4517986},
  timestamp    = {Fri, 19 May 2017 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/conf/icassp/TaylorSA08.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/interspeech/DroppoSAC08,
  author       = {Jasha Droppo and
                  Michael L. Seltzer and
                  Alex Acero and
                  Yu{-}Hsiang Bosco Chiu},
  title        = {Towards a non-parametric acoustic model: an acoustic decision tree
                  for observation probability calculation},
  booktitle    = {{INTERSPEECH} 2008, 9th Annual Conference of the International Speech
                  Communication Association, Brisbane, Australia, September 22-26, 2008},
  pages        = {289--292},
  publisher    = {{ISCA}},
  year         = {2008},
  url          = {https://doi.org/10.21437/Interspeech.2008-105},
  doi          = {10.21437/INTERSPEECH.2008-105},
  timestamp    = {Fri, 23 Jun 2023 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/conf/interspeech/DroppoSAC08.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/spl/SubramanyaSA07,
  author       = {Amarnag Subramanya and
                  Michael L. Seltzer and
                  Alejandro Acero},
  title        = {Automatic Removal of Typed Keystrokes From Speech Signals},
  journal      = {{IEEE} Signal Process. Lett.},
  volume       = {14},
  number       = {5},
  pages        = {363--366},
  year         = {2007},
  url          = {https://doi.org/10.1109/LSP.2006.888091},
  doi          = {10.1109/LSP.2006.888091},
  timestamp    = {Sun, 28 May 2017 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/spl/SubramanyaSA07.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/taslp/SeltzerA07,
  author       = {Michael L. Seltzer and
                  Alex Acero},
  title        = {Training Wideband Acoustic Models Using Mixed-Bandwidth Training Data
                  for Speech Recognition},
  journal      = {{IEEE} Trans. Speech Audio Process.},
  volume       = {15},
  number       = {1},
  pages        = {235--245},
  year         = {2007},
  url          = {https://doi.org/10.1109/TASL.2006.876774},
  doi          = {10.1109/TASL.2006.876774},
  timestamp    = {Sun, 17 May 2020 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/taslp/SeltzerA07.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/icassp/SeltzerTA07,
  author       = {Michael L. Seltzer and
                  Ivan Tashev and
                  Alex Acero},
  title        = {Microphone Array Post-Filter using Incremental Bayes Learning to Track
                  the Spatial Distributions of Speech and Noise},
  booktitle    = {Proceedings of the {IEEE} International Conference on Acoustics, Speech,
                  and Signal Processing, {ICASSP} 2007, Honolulu, Hawaii, USA, April
                  15-20, 2007},
  pages        = {29--32},
  publisher    = {{IEEE}},
  year         = {2007},
  url          = {https://doi.org/10.1109/ICASSP.2007.366608},
  doi          = {10.1109/ICASSP.2007.366608},
  timestamp    = {Mon, 22 Jun 2020 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/conf/icassp/SeltzerTA07.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/interspeech/SeltzerJTA07,
  author       = {Michael L. Seltzer and
                  Yun{-}Cheng Ju and
                  Ivan Tashev and
                  Alex Acero},
  title        = {Robust location understanding in spoken dialog systems using intersections},
  booktitle    = {{INTERSPEECH} 2007, 8th Annual Conference of the International Speech
                  Communication Association, Antwerp, Belgium, August 27-31, 2007},
  pages        = {2813--2816},
  publisher    = {{ISCA}},
  year         = {2007},
  url          = {https://doi.org/10.21437/Interspeech.2007-725},
  doi          = {10.21437/INTERSPEECH.2007-725},
  timestamp    = {Fri, 23 Jun 2023 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/conf/interspeech/SeltzerJTA07.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/sigdial/TashevSJYA07,
  author       = {Ivan Tashev and
                  Michael L. Seltzer and
                  Yun{-}Cheng Ju and
                  Dong Yu and
                  Alex Acero},
  editor       = {Harry Bunt and
                  Simon Keizer and
                  Tim Paek},
  title        = {Commute {UX:} Telephone Dialog System for Location-based Services},
  booktitle    = {Proceedings of the 8th SIGdial Workshop on Discourse and Dialogue,
                  SIGdial 2007, Antwerp, Belgium, September 1-2, 2007},
  pages        = {87--94},
  publisher    = {Association for Computational Linguistics},
  year         = {2007},
  url          = {https://aclanthology.org/2007.sigdial-1.18/},
  timestamp    = {Fri, 06 Aug 2021 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/conf/sigdial/TashevSJYA07.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/taslp/SeltzerS06,
  author       = {Michael L. Seltzer and
                  Richard M. Stern},
  title        = {Subband Likelihood-Maximizing Beamforming for Speech Recognition in
                  Reverberant Environments},
  journal      = {{IEEE} Trans. Speech Audio Process.},
  volume       = {14},
  number       = {6},
  pages        = {2109--2121},
  year         = {2006},
  url          = {https://doi.org/10.1109/TASL.2006.872614},
  doi          = {10.1109/TASL.2006.872614},
  timestamp    = {Sun, 17 May 2020 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/taslp/SeltzerS06.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/interspeech/SubramanyaSA06,
  author       = {Amarnag Subramanya and
                  Michael L. Seltzer and
                  Alex Acero},
  title        = {Automatic removal of typed keystrokes from speech signals},
  booktitle    = {{INTERSPEECH} 2006 - ICSLP, Ninth International Conference on Spoken
                  Language Processing, Pittsburgh, PA, USA, September 17-21, 2006},
  publisher    = {{ISCA}},
  year         = {2006},
  url          = {https://doi.org/10.21437/Interspeech.2006-72},
  doi          = {10.21437/INTERSPEECH.2006-72},
  timestamp    = {Thu, 22 Jun 2023 16:42:16 +0200},
  biburl       = {https://dblp.org/rec/conf/interspeech/SubramanyaSA06.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/icassp/SeltzerA05,
  author       = {Michael L. Seltzer and
                  Alex Acero},
  title        = {Training Wideband Acoustic Models using Mixed-Bandwidth Training Data
                  via Feature Bandwidth Extension},
  booktitle    = {2005 {IEEE} International Conference on Acoustics, Speech, and Signal
                  Processing, {ICASSP} '05, Philadelphia, Pennsylvania, USA, March 18-23,
                  2005},
  pages        = {921--924},
  publisher    = {{IEEE}},
  year         = {2005},
  url          = {https://doi.org/10.1109/ICASSP.2005.1415265},
  doi          = {10.1109/ICASSP.2005.1415265},
  timestamp    = {Mon, 22 Jun 2020 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/conf/icassp/SeltzerA05.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/interspeech/SeltzerAD05,
  author       = {Michael L. Seltzer and
                  Alex Acero and
                  Jasha Droppo},
  title        = {Robust bandwidth extension of noise-corrupted narrowband speech},
  booktitle    = {{INTERSPEECH} 2005 - Eurospeech, 9th European Conference on Speech
                  Communication and Technology, Lisbon, Portugal, September 4-8, 2005},
  pages        = {1509--1512},
  publisher    = {{ISCA}},
  year         = {2005},
  url          = {https://doi.org/10.21437/Interspeech.2005-529},
  doi          = {10.21437/INTERSPEECH.2005-529},
  timestamp    = {Thu, 22 Jun 2023 16:42:16 +0200},
  biburl       = {https://dblp.org/rec/conf/interspeech/SeltzerAD05.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@incollection{DBLP:books/sp/05/RajSR05,
  author       = {Bhiksha Raj and
                  Michael L. Seltzer and
                  Manuel Jesus Reyes{-}Gomez},
  editor       = {Pierre L. Divenyi},
  title        = {Speech Recognizer Based Maximum Likelihood Beamforming},
  booktitle    = {Speech Separation by Humans and Machines},
  pages        = {65--82},
  publisher    = {Springer},
  year         = {2005},
  url          = {https://doi.org/10.1007/0-387-22794-6\_6},
  doi          = {10.1007/0-387-22794-6\_6},
  timestamp    = {Thu, 25 Jul 2019 16:54:17 +0200},
  biburl       = {https://dblp.org/rec/books/sp/05/RajSR05.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/speech/RajSS04,
  author       = {Bhiksha Raj and
                  Michael L. Seltzer and
                  Richard M. Stern},
  title        = {Reconstruction of missing features for robust speech recognition},
  journal      = {Speech Commun.},
  volume       = {43},
  number       = {4},
  pages        = {275--296},
  year         = {2004},
  url          = {https://doi.org/10.1016/j.specom.2004.03.007},
  doi          = {10.1016/J.SPECOM.2004.03.007},
  timestamp    = {Sat, 22 Feb 2020 00:00:00 +0100},
  biburl       = {https://dblp.org/rec/journals/speech/RajSS04.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/speech/SeltzerRS04,
  author       = {Michael L. Seltzer and
                  Bhiksha Raj and
                  Richard M. Stern},
  title        = {A Bayesian classifier for spectrographic mask estimation for missing
                  feature speech recognition},
  journal      = {Speech Commun.},
  volume       = {43},
  number       = {4},
  pages        = {379--393},
  year         = {2004},
  url          = {https://doi.org/10.1016/j.specom.2004.03.006},
  doi          = {10.1016/J.SPECOM.2004.03.006},
  timestamp    = {Sat, 22 Feb 2020 00:00:00 +0100},
  biburl       = {https://dblp.org/rec/journals/speech/SeltzerRS04.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/taslp/SeltzerRS04,
  author       = {Michael L. Seltzer and
                  Bhiksha Raj and
                  Richard M. Stern},
  title        = {Likelihood-maximizing beamforming for robust hands-free speech recognition},
  journal      = {{IEEE} Trans. Speech Audio Process.},
  volume       = {12},
  number       = {5},
  pages        = {489--498},
  year         = {2004},
  url          = {https://doi.org/10.1109/TSA.2004.832988},
  doi          = {10.1109/TSA.2004.832988},
  timestamp    = {Sun, 17 May 2020 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/taslp/SeltzerRS04.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/icassp/SeltzerS04,
  author       = {Michael L. Seltzer and
                  Richard M. Stern},
  title        = {Parameter sharing in subband likelihood-maximizing beamforming for
                  speech recognition using microphone arrays},
  booktitle    = {2004 {IEEE} International Conference on Acoustics, Speech, and Signal
                  Processing, {ICASSP} 2004, Montreal, Quebec, Canada, May 17-21, 2004},
  pages        = {881--884},
  publisher    = {{IEEE}},
  year         = {2004},
  url          = {https://doi.org/10.1109/ICASSP.2004.1326127},
  doi          = {10.1109/ICASSP.2004.1326127},
  timestamp    = {Mon, 22 Jun 2020 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/conf/icassp/SeltzerS04.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/spl/SeltzerR03,
  author       = {Michael L. Seltzer and
                  Bhiksha Raj},
  title        = {Speech-recognizer-based filter optimization for microphone array processing},
  journal      = {{IEEE} Signal Process. Lett.},
  volume       = {10},
  number       = {3},
  pages        = {69--71},
  year         = {2003},
  url          = {https://doi.org/10.1109/LSP.2002.807877},
  doi          = {10.1109/LSP.2002.807877},
  timestamp    = {Mon, 01 Mar 2021 00:00:00 +0100},
  biburl       = {https://dblp.org/rec/journals/spl/SeltzerR03.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/icassp/SeltzerS03,
  author       = {Michael L. Seltzer and
                  Richard M. Stern},
  title        = {Subband parameter optimization of microphone arrays for speech recognition
                  in reverberant environments},
  booktitle    = {2003 {IEEE} International Conference on Acoustics, Speech, and Signal
                  Processing, {ICASSP} '03, Hong Kong, April 6-10, 2003},
  pages        = {408--411},
  publisher    = {{IEEE}},
  year         = {2003},
  url          = {https://doi.org/10.1109/ICASSP.2003.1198804},
  doi          = {10.1109/ICASSP.2003.1198804},
  timestamp    = {Mon, 22 Jun 2020 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/conf/icassp/SeltzerS03.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/interspeech/SeltzerDA03,
  author       = {Michael L. Seltzer and
                  Jasha Droppo and
                  Alex Acero},
  title        = {A harmonic-model-based front end for robust speech recognition},
  booktitle    = {8th European Conference on Speech Communication and Technology, {EUROSPEECH}
                  2003 - {INTERSPEECH} 2003, Geneva, Switzerland, September 1-4, 2003},
  pages        = {1277--1280},
  publisher    = {{ISCA}},
  year         = {2003},
  url          = {https://doi.org/10.21437/Eurospeech.2003-406},
  doi          = {10.21437/EUROSPEECH.2003-406},
  timestamp    = {Thu, 22 Jun 2023 16:42:17 +0200},
  biburl       = {https://dblp.org/rec/conf/interspeech/SeltzerDA03.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/icassp/SeltzerRS02,
  author       = {Michael L. Seltzer and
                  Bhiksha Raj and
                  Richard M. Stern},
  title        = {Speech recognizer-based microphone array processing for robust hands-free
                  speech recognition},
  booktitle    = {Proceedings of the {IEEE} International Conference on Acoustics, Speech,
                  and Signal Processing, {ICASSP} 2002, May 13-17 2002, Orlando, Florida,
                  {USA}},
  pages        = {897--900},
  publisher    = {{IEEE}},
  year         = {2002},
  url          = {https://doi.org/10.1109/ICASSP.2002.5743884},
  doi          = {10.1109/ICASSP.2002.5743884},
  timestamp    = {Wed, 16 Oct 2019 14:14:52 +0200},
  biburl       = {https://dblp.org/rec/conf/icassp/SeltzerRS02.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/icassp/SinghSRS01,
  author       = {Rita Singh and
                  Michael L. Seltzer and
                  Bhiksha Raj and
                  Richard M. Stern},
  title        = {Speech in Noisy Environments: robust automatic segmentation, feature
                  extraction, and hypothesis combination},
  booktitle    = {{IEEE} International Conference on Acoustics, Speech, and Signal Processing,
                  {ICASSP} 2001, 7-11 May, 2001, Salt Palace Convention Center, Salt
                  Lake City, Utah, USA, Proceedings},
  pages        = {273--276},
  publisher    = {{IEEE}},
  year         = {2001},
  url          = {https://doi.org/10.1109/ICASSP.2001.940820},
  doi          = {10.1109/ICASSP.2001.940820},
  timestamp    = {Thu, 23 Mar 2023 00:00:00 +0100},
  biburl       = {https://dblp.org/rec/conf/icassp/SinghSRS01.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/interspeech/SeltzerR01,
  author       = {Michael L. Seltzer and
                  Bhiksha Raj},
  editor       = {Paul Dalsgaard and
                  B{\o}rge Lindberg and
                  Henrik Benner and
                  Zheng{-}Hua Tan},
  title        = {Calibration of microphone arrays for improved speech recognition},
  booktitle    = {{EUROSPEECH} 2001 Scandinavia, 7th European Conference on Speech Communication
                  and Technology, 2nd {INTERSPEECH} Event, Aalborg, Denmark, September
                  3-7, 2001},
  pages        = {1005--1008},
  publisher    = {{ISCA}},
  year         = {2001},
  url          = {https://doi.org/10.21437/Eurospeech.2001-289},
  doi          = {10.21437/EUROSPEECH.2001-289},
  timestamp    = {Thu, 22 Jun 2023 16:42:18 +0200},
  biburl       = {https://dblp.org/rec/conf/interspeech/SeltzerR01.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/interspeech/RajSS00,
  author       = {Bhiksha Raj and
                  Michael L. Seltzer and
                  Richard M. Stern},
  title        = {Reconstruction of damaged spectrographic features for robust speech
                  recognition},
  booktitle    = {Sixth International Conference on Spoken Language Processing, {ICSLP}
                  2000 / {INTERSPEECH} 2000, Beijing, China, October 16-20, 2000},
  pages        = {357--360},
  publisher    = {{ISCA}},
  year         = {2000},
  url          = {https://doi.org/10.21437/ICSLP.2000-89},
  doi          = {10.21437/ICSLP.2000-89},
  timestamp    = {Thu, 22 Jun 2023 16:42:19 +0200},
  biburl       = {https://dblp.org/rec/conf/interspeech/RajSS00.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/interspeech/SeltzerRS00,
  author       = {Michael L. Seltzer and
                  Bhiksha Raj and
                  Richard M. Stern},
  title        = {Classifier-based mask estimation for missing feature methods of robust
                  speech recognition},
  booktitle    = {Sixth International Conference on Spoken Language Processing, {ICSLP}
                  2000 / {INTERSPEECH} 2000, Beijing, China, October 16-20, 2000},
  pages        = {538--541},
  publisher    = {{ISCA}},
  year         = {2000},
  url          = {https://doi.org/10.21437/ICSLP.2000-591},
  doi          = {10.21437/ICSLP.2000-591},
  timestamp    = {Thu, 22 Jun 2023 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/conf/interspeech/SeltzerRS00.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
a service of  Schloss Dagstuhl - Leibniz Center for Informatics