Formatting changes for new docs system (#8525)

Signed-off-by: Neal Vaidya <[email protected]>
NVIDIA · Feb 28, 2024 · 0796199 · 0796199
1 parent cccd80d
commit 0796199
Show file tree

Hide file tree

Showing 10 changed files with 271 additions and 274 deletions.
diff --git a/docs/source/asr/api.rst b/docs/source/asr/api.rst
@@ -326,5 +326,3 @@ Adapter Strategies
     :member-order: bysource
     :undoc-members: adapter_module_names
 
------
-
diff --git a/docs/source/asr/asr_all.bib b/docs/source/asr/asr_all.bib
@@ -28,17 +28,17 @@ @article{luong17
   year    = {2017},
 }
 
-@INPROCEEDINGS{LaurentSeqWiseBN, 
-author={C. {Laurent} and G. {Pereyra} and P. {Brakel} and Y. {Zhang} and Y. {Bengio}}, 
-booktitle={2016 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)}, 
-title={Batch normalized recurrent neural networks}, 
-year={2016}, 
-volume={}, 
-number={}, 
-pages={2657-2661}, 
-keywords={feedforward neural nets;learning (artificial intelligence);recurrent neural nets;speech recognition;batch normalized recurrent neural networks;RNN;sequential data;long-term dependency learning;convergence rate improvement;intermediate representation normalization;feedforward neural networks;speech recognition task;language modeling;training criterion;Training;Recurrent neural networks;Convergence;Speech recognition;Computer architecture;Speech;batch normalization;RNN;LSTM;optimization}, 
-doi={10.1109/ICASSP.2016.7472159}, 
-ISSN={2379-190X}, 
+@INPROCEEDINGS{LaurentSeqWiseBN,
+author={C. {Laurent} and G. {Pereyra} and P. {Brakel} and Y. {Zhang} and Y. {Bengio}},
+booktitle={2016 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)},
+title={Batch normalized recurrent neural networks},
+year={2016},
+volume={},
+number={},
+pages={2657-2661},
+keywords={feedforward neural nets;learning (artificial intelligence);recurrent neural nets;speech recognition;batch normalized recurrent neural networks;RNN;sequential data;long-term dependency learning;convergence rate improvement;intermediate representation normalization;feedforward neural networks;speech recognition task;language modeling;training criterion;Training;Recurrent neural networks;Convergence;Speech recognition;Computer architecture;Speech;batch normalization;RNN;LSTM;optimization},
+doi={10.1109/ICASSP.2016.7472159},
+ISSN={2379-190X},
 month={March},}
 
 @article{graves2005,
@@ -112,7 +112,7 @@ @article{NVTuring
 
 @misc{Rygaard2015,
   title = {Using Synthesized Speech to Improve Speech Recognition for Low-Resource Languages},
-  author = {Luise Valentin Rygaard}, 
+  author = {Luise Valentin Rygaard},
   howpublished = {\url{https://parasol.tamu.edu/dreu2015/Rygaard/report.pdf}},
   year = {2015},
 }
@@ -156,7 +156,7 @@ @book{Bengio1996
 	Title = {Neural Networks for Speech and Sequence Recognition},
 	Year = {1996}
 }
-	
+
 @article{Bengio1992,
   title={Global optimization of a neural network-hidden Markov model hybrid},
   author={Bengio, Y., and De Mori, R., and Flammia, G., and Kompe, R. },
@@ -359,7 +359,7 @@ @inproceedings{DeepSpeech2
  url = {http://dl.acm.org/citation.cfm?id=3045390.3045410},
  acmid = {3045410},
  publisher = {JMLR.org},
-} 
+}
 
 @inproceedings{prabhavalkar2017comparison,
   title={A comparison of sequence-to-sequence models for speech recognition},
@@ -572,7 +572,7 @@ @incollection{Salimans2016WeightNorm
   title = {Weight Normalization: A Simple Reparameterization to Accelerate Training of Deep Neural Networks},
   author = {Salimans, Tim and Kingma, Durk P},
   booktitle = {Advances in Neural Information Processing Systems 29},
-  editor = {D. D. Lee and M. Sugiyama and U. V. Luxburg and I. Guyon and R. Garnett}, 
+  editor = {D. D. Lee and M. Sugiyama and U. V. Luxburg and I. Guyon and R. Garnett},
   pages = {901--909},
   year = {2016},
   publisher = {Curran Associates, Inc.},
@@ -581,7 +581,7 @@ @incollection{Salimans2016WeightNorm
 
 @article{wu2016google,
   title={Google's neural machine translation system: Bridging the gap between human and machine translation},
-  author={Wu, Yonghui and Schuster, Mike and Chen, Zhifeng and Le, Quoc V and Norouzi, Mohammad and Macherey, Zolfgang and Krikun, Maxim and Cao, Yuan and Gao, Qin and Macherey, Klaus and others},  
+  author={Wu, Yonghui and Schuster, Mike and Chen, Zhifeng and Le, Quoc V and Norouzi, Mohammad and Macherey, Zolfgang and Krikun, Maxim and Cao, Yuan and Gao, Qin and Macherey, Klaus and others},
   journal={arXiv preprint arXiv:1609.08144},
   year={2016}
 }
@@ -638,7 +638,7 @@ @inproceedings{Dauphin2017GLU
  url = {http://dl.acm.org/citation.cfm?id=3305381.3305478},
  acmid = {3305478},
  publisher = {JMLR.org},
-} 
+}
 
 @incollection{Oord2016PixelCNN,
 title = {Conditional Image Generation with PixelCNN Decoders},
@@ -698,17 +698,17 @@ @inproceedings{Saon+2016
 pages={7--11}
 }
 
-@INPROCEEDINGS{Sercu-2016, 
-author={T. {Sercu} and C. {Puhrsch} and B. {Kingsbury} and Y. {LeCun}}, 
-booktitle={2016 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)}, 
-title={Very deep multilingual convolutional neural networks for LVCSR}, 
-year={2016}, 
-volume={}, 
-number={}, 
-pages={4955-4959}, 
-keywords={natural language processing;neural nets;speech recognition;very deep multilingual convolutional neural networks;LVCSR;CNN;large vocabulary continuous speech recognition systems;word error rate;Training;Context;Hidden Markov models;Neural networks;Computer architecture;Kernel;Training data;Convolutional Networks;Multilingual;Acoustic Modeling;Speech Recognition;Neural Networks}, 
-doi={10.1109/ICASSP.2016.7472620}, 
-ISSN={2379-190X}, 
+@INPROCEEDINGS{Sercu-2016,
+author={T. {Sercu} and C. {Puhrsch} and B. {Kingsbury} and Y. {LeCun}},
+booktitle={2016 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)},
+title={Very deep multilingual convolutional neural networks for LVCSR},
+year={2016},
+volume={},
+number={},
+pages={4955-4959},
+keywords={natural language processing;neural nets;speech recognition;very deep multilingual convolutional neural networks;LVCSR;CNN;large vocabulary continuous speech recognition systems;word error rate;Training;Context;Hidden Markov models;Neural networks;Computer architecture;Kernel;Training data;Convolutional Networks;Multilingual;Acoustic Modeling;Speech Recognition;Neural Networks},
+doi={10.1109/ICASSP.2016.7472620},
+ISSN={2379-190X},
 month={March},}
 
 
@@ -722,17 +722,17 @@ @inproceedings{Sercu+2016
 pages={3429--3433}
 }
 
-@INPROCEEDINGS{Xiong-2018, 
-author={W. {Xiong} and L. {Wu} and F. {Alleva} and J. {Droppo} and X. {Huang} and A. {Stolcke}}, 
-booktitle={2018 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)}, 
-title={The Microsoft 2017 Conversational Speech Recognition System}, 
-year={2018}, 
-volume={}, 
-number={}, 
-pages={5934-5938}, 
-keywords={convolution;feedforward neural nets;natural language processing;speaker recognition;speech processing;language model rescoring step;senone level;switchboard domains;character-based LSTM language models;NIST 2000 switchboard test set;frame level;word-level voting;acoustic model posteriors;dialog session aware LSTM language models;CNN-BLSTM acoustic model;Microsoft 2017 conversational speech recognition system;Acoustics;Error analysis;Training;Speech recognition;Switches;Computational modeling;Context modeling;Conversational speech recognition;CNN;LACE;BLSTM;LSTM-LM;system combination;human parity}, 
-doi={10.1109/ICASSP.2018.8461870}, 
-ISSN={2379-190X}, 
+@INPROCEEDINGS{Xiong-2018,
+author={W. {Xiong} and L. {Wu} and F. {Alleva} and J. {Droppo} and X. {Huang} and A. {Stolcke}},
+booktitle={2018 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)},
+title={The Microsoft 2017 Conversational Speech Recognition System},
+year={2018},
+volume={},
+number={},
+pages={5934-5938},
+keywords={convolution;feedforward neural nets;natural language processing;speaker recognition;speech processing;language model rescoring step;senone level;switchboard domains;character-based LSTM language models;NIST 2000 switchboard test set;frame level;word-level voting;acoustic model posteriors;dialog session aware LSTM language models;CNN-BLSTM acoustic model;Microsoft 2017 conversational speech recognition system;Acoustics;Error analysis;Training;Speech recognition;Switches;Computational modeling;Context modeling;Conversational speech recognition;CNN;LACE;BLSTM;LSTM-LM;system combination;human parity},
+doi={10.1109/ICASSP.2018.8461870},
+ISSN={2379-190X},
 month={April},}
 
 @inproceedings{zeyer2018improved,
@@ -862,17 +862,17 @@ @inproceedings{Weng2018
   url={http://dx.doi.org/10.21437/Interspeech.2018-1030}
 }
 
-@INPROCEEDINGS{Battenberg2017, 
-author={E. {Battenberg} and J. {Chen} and R. {Child} and A. {Coates} and Y. G. Y. {Li} and H. {Liu} and S. {Satheesh} and A. {Sriram} and Z. {Zhu}}, 
-booktitle={2017 IEEE Automatic Speech Recognition and Understanding Workshop (ASRU)}, 
-title={Exploring neural transducers for end-to-end speech recognition}, 
-year={2017}, 
-volume={}, 
-number={}, 
-pages={206-213}, 
-keywords={recurrent neural nets;speech recognition;Hub500 benchmark;CTC models;speech recognition pipeline;RNN-Transducer models;language model;Seq2Seq models;end-to-end speech recognition;neural transducers;Decoding;Hidden Markov models;Transducers;Task analysis;Speech;Mathematical model;Neural networks}, 
-doi={10.1109/ASRU.2017.8268937}, 
-ISSN={}, 
+@INPROCEEDINGS{Battenberg2017,
+author={E. {Battenberg} and J. {Chen} and R. {Child} and A. {Coates} and Y. G. Y. {Li} and H. {Liu} and S. {Satheesh} and A. {Sriram} and Z. {Zhu}},
+booktitle={2017 IEEE Automatic Speech Recognition and Understanding Workshop (ASRU)},
+title={Exploring neural transducers for end-to-end speech recognition},
+year={2017},
+volume={},
+number={},
+pages={206-213},
+keywords={recurrent neural nets;speech recognition;Hub500 benchmark;CTC models;speech recognition pipeline;RNN-Transducer models;language model;Seq2Seq models;end-to-end speech recognition;neural transducers;Decoding;Hidden Markov models;Transducers;Task analysis;Speech;Mathematical model;Neural networks},
+doi={10.1109/ASRU.2017.8268937},
+ISSN={},
 month={Dec},
 }
 
@@ -973,14 +973,6 @@ @article{Dawalatabad_2021
    month={Aug}
 }
 
-@article{park2022multi,
-    title = {Multi-scale Speaker Diarization with Dynamic Scale Weighting},
-    author = {Park, Tae Jin and Koluguri, Nithin Rao and Balam, Jagadeesh and Ginsburg, Boris},
-    journal = {https://arxiv.org/abs/2203.15974},
-    year = {2022}
-}
-
-
 @inproceedings{he2019streaming,
   title={Streaming end-to-end speech recognition for mobile devices},
   author={He, Yanzhang and Sainath, Tara N and Prabhavalkar, Rohit and McGraw, Ian and Alvarez, Raziel and Zhao, Ding and Rybach, David and Kannan, Anjuli and Wu, Yonghui and Pang, Ruoming and others},