Skip to content

Commit

Permalink
Formatting changes for new docs system (#8525)
Browse files Browse the repository at this point in the history
Signed-off-by: Neal Vaidya <[email protected]>
  • Loading branch information
nealvaidya authored Feb 28, 2024
1 parent cccd80d commit 0796199
Show file tree
Hide file tree
Showing 10 changed files with 271 additions and 274 deletions.
2 changes: 0 additions & 2 deletions docs/source/asr/api.rst
Original file line number Diff line number Diff line change
Expand Up @@ -326,5 +326,3 @@ Adapter Strategies
:member-order: bysource
:undoc-members: adapter_module_names

-----

108 changes: 50 additions & 58 deletions docs/source/asr/asr_all.bib
Original file line number Diff line number Diff line change
Expand Up @@ -28,17 +28,17 @@ @article{luong17
year = {2017},
}

@INPROCEEDINGS{LaurentSeqWiseBN,
author={C. {Laurent} and G. {Pereyra} and P. {Brakel} and Y. {Zhang} and Y. {Bengio}},
booktitle={2016 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)},
title={Batch normalized recurrent neural networks},
year={2016},
volume={},
number={},
pages={2657-2661},
keywords={feedforward neural nets;learning (artificial intelligence);recurrent neural nets;speech recognition;batch normalized recurrent neural networks;RNN;sequential data;long-term dependency learning;convergence rate improvement;intermediate representation normalization;feedforward neural networks;speech recognition task;language modeling;training criterion;Training;Recurrent neural networks;Convergence;Speech recognition;Computer architecture;Speech;batch normalization;RNN;LSTM;optimization},
doi={10.1109/ICASSP.2016.7472159},
ISSN={2379-190X},
@INPROCEEDINGS{LaurentSeqWiseBN,
author={C. {Laurent} and G. {Pereyra} and P. {Brakel} and Y. {Zhang} and Y. {Bengio}},
booktitle={2016 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)},
title={Batch normalized recurrent neural networks},
year={2016},
volume={},
number={},
pages={2657-2661},
keywords={feedforward neural nets;learning (artificial intelligence);recurrent neural nets;speech recognition;batch normalized recurrent neural networks;RNN;sequential data;long-term dependency learning;convergence rate improvement;intermediate representation normalization;feedforward neural networks;speech recognition task;language modeling;training criterion;Training;Recurrent neural networks;Convergence;Speech recognition;Computer architecture;Speech;batch normalization;RNN;LSTM;optimization},
doi={10.1109/ICASSP.2016.7472159},
ISSN={2379-190X},
month={March},}

@article{graves2005,
Expand Down Expand Up @@ -112,7 +112,7 @@ @article{NVTuring

@misc{Rygaard2015,
title = {Using Synthesized Speech to Improve Speech Recognition for Low-Resource Languages},
author = {Luise Valentin Rygaard},
author = {Luise Valentin Rygaard},
howpublished = {\url{https://parasol.tamu.edu/dreu2015/Rygaard/report.pdf}},
year = {2015},
}
Expand Down Expand Up @@ -156,7 +156,7 @@ @book{Bengio1996
Title = {Neural Networks for Speech and Sequence Recognition},
Year = {1996}
}

@article{Bengio1992,
title={Global optimization of a neural network-hidden Markov model hybrid},
author={Bengio, Y., and De Mori, R., and Flammia, G., and Kompe, R. },
Expand Down Expand Up @@ -359,7 +359,7 @@ @inproceedings{DeepSpeech2
url = {http://dl.acm.org/citation.cfm?id=3045390.3045410},
acmid = {3045410},
publisher = {JMLR.org},
}
}

@inproceedings{prabhavalkar2017comparison,
title={A comparison of sequence-to-sequence models for speech recognition},
Expand Down Expand Up @@ -572,7 +572,7 @@ @incollection{Salimans2016WeightNorm
title = {Weight Normalization: A Simple Reparameterization to Accelerate Training of Deep Neural Networks},
author = {Salimans, Tim and Kingma, Durk P},
booktitle = {Advances in Neural Information Processing Systems 29},
editor = {D. D. Lee and M. Sugiyama and U. V. Luxburg and I. Guyon and R. Garnett},
editor = {D. D. Lee and M. Sugiyama and U. V. Luxburg and I. Guyon and R. Garnett},
pages = {901--909},
year = {2016},
publisher = {Curran Associates, Inc.},
Expand All @@ -581,7 +581,7 @@ @incollection{Salimans2016WeightNorm

@article{wu2016google,
title={Google's neural machine translation system: Bridging the gap between human and machine translation},
author={Wu, Yonghui and Schuster, Mike and Chen, Zhifeng and Le, Quoc V and Norouzi, Mohammad and Macherey, Zolfgang and Krikun, Maxim and Cao, Yuan and Gao, Qin and Macherey, Klaus and others},
author={Wu, Yonghui and Schuster, Mike and Chen, Zhifeng and Le, Quoc V and Norouzi, Mohammad and Macherey, Zolfgang and Krikun, Maxim and Cao, Yuan and Gao, Qin and Macherey, Klaus and others},
journal={arXiv preprint arXiv:1609.08144},
year={2016}
}
Expand Down Expand Up @@ -638,7 +638,7 @@ @inproceedings{Dauphin2017GLU
url = {http://dl.acm.org/citation.cfm?id=3305381.3305478},
acmid = {3305478},
publisher = {JMLR.org},
}
}

@incollection{Oord2016PixelCNN,
title = {Conditional Image Generation with PixelCNN Decoders},
Expand Down Expand Up @@ -698,17 +698,17 @@ @inproceedings{Saon+2016
pages={7--11}
}

@INPROCEEDINGS{Sercu-2016,
author={T. {Sercu} and C. {Puhrsch} and B. {Kingsbury} and Y. {LeCun}},
booktitle={2016 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)},
title={Very deep multilingual convolutional neural networks for LVCSR},
year={2016},
volume={},
number={},
pages={4955-4959},
keywords={natural language processing;neural nets;speech recognition;very deep multilingual convolutional neural networks;LVCSR;CNN;large vocabulary continuous speech recognition systems;word error rate;Training;Context;Hidden Markov models;Neural networks;Computer architecture;Kernel;Training data;Convolutional Networks;Multilingual;Acoustic Modeling;Speech Recognition;Neural Networks},
doi={10.1109/ICASSP.2016.7472620},
ISSN={2379-190X},
@INPROCEEDINGS{Sercu-2016,
author={T. {Sercu} and C. {Puhrsch} and B. {Kingsbury} and Y. {LeCun}},
booktitle={2016 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)},
title={Very deep multilingual convolutional neural networks for LVCSR},
year={2016},
volume={},
number={},
pages={4955-4959},
keywords={natural language processing;neural nets;speech recognition;very deep multilingual convolutional neural networks;LVCSR;CNN;large vocabulary continuous speech recognition systems;word error rate;Training;Context;Hidden Markov models;Neural networks;Computer architecture;Kernel;Training data;Convolutional Networks;Multilingual;Acoustic Modeling;Speech Recognition;Neural Networks},
doi={10.1109/ICASSP.2016.7472620},
ISSN={2379-190X},
month={March},}


Expand All @@ -722,17 +722,17 @@ @inproceedings{Sercu+2016
pages={3429--3433}
}

@INPROCEEDINGS{Xiong-2018,
author={W. {Xiong} and L. {Wu} and F. {Alleva} and J. {Droppo} and X. {Huang} and A. {Stolcke}},
booktitle={2018 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)},
title={The Microsoft 2017 Conversational Speech Recognition System},
year={2018},
volume={},
number={},
pages={5934-5938},
keywords={convolution;feedforward neural nets;natural language processing;speaker recognition;speech processing;language model rescoring step;senone level;switchboard domains;character-based LSTM language models;NIST 2000 switchboard test set;frame level;word-level voting;acoustic model posteriors;dialog session aware LSTM language models;CNN-BLSTM acoustic model;Microsoft 2017 conversational speech recognition system;Acoustics;Error analysis;Training;Speech recognition;Switches;Computational modeling;Context modeling;Conversational speech recognition;CNN;LACE;BLSTM;LSTM-LM;system combination;human parity},
doi={10.1109/ICASSP.2018.8461870},
ISSN={2379-190X},
@INPROCEEDINGS{Xiong-2018,
author={W. {Xiong} and L. {Wu} and F. {Alleva} and J. {Droppo} and X. {Huang} and A. {Stolcke}},
booktitle={2018 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)},
title={The Microsoft 2017 Conversational Speech Recognition System},
year={2018},
volume={},
number={},
pages={5934-5938},
keywords={convolution;feedforward neural nets;natural language processing;speaker recognition;speech processing;language model rescoring step;senone level;switchboard domains;character-based LSTM language models;NIST 2000 switchboard test set;frame level;word-level voting;acoustic model posteriors;dialog session aware LSTM language models;CNN-BLSTM acoustic model;Microsoft 2017 conversational speech recognition system;Acoustics;Error analysis;Training;Speech recognition;Switches;Computational modeling;Context modeling;Conversational speech recognition;CNN;LACE;BLSTM;LSTM-LM;system combination;human parity},
doi={10.1109/ICASSP.2018.8461870},
ISSN={2379-190X},
month={April},}

@inproceedings{zeyer2018improved,
Expand Down Expand Up @@ -862,17 +862,17 @@ @inproceedings{Weng2018
url={http://dx.doi.org/10.21437/Interspeech.2018-1030}
}

@INPROCEEDINGS{Battenberg2017,
author={E. {Battenberg} and J. {Chen} and R. {Child} and A. {Coates} and Y. G. Y. {Li} and H. {Liu} and S. {Satheesh} and A. {Sriram} and Z. {Zhu}},
booktitle={2017 IEEE Automatic Speech Recognition and Understanding Workshop (ASRU)},
title={Exploring neural transducers for end-to-end speech recognition},
year={2017},
volume={},
number={},
pages={206-213},
keywords={recurrent neural nets;speech recognition;Hub500 benchmark;CTC models;speech recognition pipeline;RNN-Transducer models;language model;Seq2Seq models;end-to-end speech recognition;neural transducers;Decoding;Hidden Markov models;Transducers;Task analysis;Speech;Mathematical model;Neural networks},
doi={10.1109/ASRU.2017.8268937},
ISSN={},
@INPROCEEDINGS{Battenberg2017,
author={E. {Battenberg} and J. {Chen} and R. {Child} and A. {Coates} and Y. G. Y. {Li} and H. {Liu} and S. {Satheesh} and A. {Sriram} and Z. {Zhu}},
booktitle={2017 IEEE Automatic Speech Recognition and Understanding Workshop (ASRU)},
title={Exploring neural transducers for end-to-end speech recognition},
year={2017},
volume={},
number={},
pages={206-213},
keywords={recurrent neural nets;speech recognition;Hub500 benchmark;CTC models;speech recognition pipeline;RNN-Transducer models;language model;Seq2Seq models;end-to-end speech recognition;neural transducers;Decoding;Hidden Markov models;Transducers;Task analysis;Speech;Mathematical model;Neural networks},
doi={10.1109/ASRU.2017.8268937},
ISSN={},
month={Dec},
}

Expand Down Expand Up @@ -973,14 +973,6 @@ @article{Dawalatabad_2021
month={Aug}
}

@article{park2022multi,
title = {Multi-scale Speaker Diarization with Dynamic Scale Weighting},
author = {Park, Tae Jin and Koluguri, Nithin Rao and Balam, Jagadeesh and Ginsburg, Boris},
journal = {https://arxiv.org/abs/2203.15974},
year = {2022}
}


@inproceedings{he2019streaming,
title={Streaming end-to-end speech recognition for mobile devices},
author={He, Yanzhang and Sainath, Tara N and Prabhavalkar, Rohit and McGraw, Ian and Alvarez, Raziel and Zhao, Ding and Rybach, David and Kannan, Anjuli and Wu, Yonghui and Pang, Ruoming and others},
Expand Down
Loading

0 comments on commit 0796199

Please sign in to comment.