Bibliography

50 papers spanning 1948–2024

Sort:
No papers match the current filters.
Shannon, Claude E.
A Mathematical Theory of Communication
The Bell System Technical Journal, 27(3), 379–423 (1948)
Core Ch 1 Ch 2
BibTeX
@article{shannon1948,
  author    = {Shannon, Claude E.},
  title     = {A Mathematical Theory of Communication},
  journal   = {The Bell System Technical Journal},
  volume    = {27},
  number    = {3},
  pages     = {379--423},
  year      = {1948}
}
Jelinek, Frederick and Mercer, Robert L.
Interpolated Estimation of Markov Source Parameters from Sparse Data
Proceedings of the Workshop on Pattern Recognition in Practice, 381–397 (1980)
BibTeX
@inproceedings{jelinek1980interpolated,
  author    = {Jelinek, Frederick and Mercer, Robert L.},
  title     = {Interpolated Estimation of {Markov} Source Parameters from Sparse Data},
  booktitle = {Proceedings of the Workshop on Pattern Recognition in Practice},
  pages     = {381--397},
  year      = {1980}
}
Elman, Jeffrey L.
Finding Structure in Time
Cognitive Science, 14(2), 179–211 (1990)
BibTeX
@article{elman1990finding,
  author    = {Elman, Jeffrey L.},
  title     = {Finding Structure in Time},
  journal   = {Cognitive Science},
  volume    = {14},
  number    = {2},
  pages     = {179--211},
  year      = {1990}
}
Hochreiter, Sepp and Schmidhuber, Jürgen
Long Short-Term Memory
Neural Computation, 9(8), 1735–1780 (1997)
BibTeX
@article{hochreiter1997long,
  author    = {Hochreiter, Sepp and Schmidhuber, J{\"u}rgen},
  title     = {Long Short-Term Memory},
  journal   = {Neural Computation},
  volume    = {9},
  number    = {8},
  pages     = {1735--1780},
  year      = {1997}
}
Chen, Stanley F. and Goodman, Joshua
An Empirical Study of Smoothing Techniques for Language Modeling
Computer Speech & Language, 13(4), 359–394 (1999)
BibTeX
@article{chen1999empirical,
  author    = {Chen, Stanley F. and Goodman, Joshua},
  title     = {An Empirical Study of Smoothing Techniques for Language Modeling},
  journal   = {Computer Speech \& Language},
  volume    = {13},
  number    = {4},
  pages     = {359--394},
  year      = {1999}
}
Bengio, Yoshua and Ducharme, Réjean and Vincent, Pascal and Jauvin, Christian
A Neural Probabilistic Language Model
Journal of Machine Learning Research, 3, 1137–1155 (2003)
Core Ch 1 Ch 4 Ch 5
BibTeX
@article{bengio2003,
  author    = {Bengio, Yoshua and Ducharme, R{\'e}jean and Vincent, Pascal and Jauvin, Christian},
  title     = {A Neural Probabilistic Language Model},
  journal   = {Journal of Machine Learning Research},
  volume    = {3},
  pages     = {1137--1155},
  year      = {2003}
}
Mikolov, Tomas and Karafiát, Martin and Burget, Lukáš and Černocký, Jan and Khudanpur, Sanjeev
Recurrent Neural Network based Language Model
Proceedings of INTERSPEECH, 1045–1048 (2010)
BibTeX
@inproceedings{mikolov2010recurrent,
  author    = {Mikolov, Tomas and Karafi{\'a}t, Martin and Burget, Luk{\'a}{\v{s}} and {\v{C}}ernock{\'y}, Jan and Khudanpur, Sanjeev},
  title     = {Recurrent Neural Network based Language Model},
  booktitle = {Proceedings of INTERSPEECH},
  pages     = {1045--1048},
  year      = {2010}
}
Mikolov, Tomas and Chen, Kai and Corrado, Greg and Dean, Jeffrey
Efficient Estimation of Word Representations in Vector Space
Proceedings of ICLR Workshop (2013)
Core Ch 1 Ch 4
BibTeX
@inproceedings{mikolov2013efficient,
  author    = {Mikolov, Tomas and Chen, Kai and Corrado, Greg and Dean, Jeffrey},
  title     = {Efficient Estimation of Word Representations in Vector Space},
  booktitle = {Proceedings of ICLR Workshop},
  year      = {2013}
}
Mikolov, Tomas and Sutskever, Ilya and Chen, Kai and Corrado, Greg S. and Dean, Jeffrey
Distributed Representations of Words and Phrases and Their Compositionality
Advances in Neural Information Processing Systems (NeurIPS), 26, 3111–3119 (2013)
Core Ch 4
BibTeX
@inproceedings{mikolov2013distributed,
  author    = {Mikolov, Tomas and Sutskever, Ilya and Chen, Kai and Corrado, Greg S. and Dean, Jeffrey},
  title     = {Distributed Representations of Words and Phrases and Their Compositionality},
  booktitle = {Advances in Neural Information Processing Systems (NeurIPS)},
  volume    = {26},
  pages     = {3111--3119},
  year      = {2013}
}
Cho, Kyunghyun and van Merriënboer, Bart and Gulcehre, Caglar and Bahdanau, Dzmitry and others
Learning Phrase Representations using RNN Encoder-Decoder for Statistical Machine Translation
Proceedings of EMNLP, 1724–1734 (2014)
BibTeX
@inproceedings{cho2014learning,
  author    = {Cho, Kyunghyun and van Merri{\"e}nboer, Bart and Gulcehre, Caglar and Bahdanau, Dzmitry and others},
  title     = {Learning Phrase Representations using {RNN} Encoder-Decoder for Statistical Machine Translation},
  booktitle = {Proceedings of EMNLP},
  pages     = {1724--1734},
  year      = {2014}
}
Pennington, Jeffrey and Socher, Richard and Manning, Christopher D.
GloVe: Global Vectors for Word Representation
Proceedings of EMNLP, 1532–1543 (2014)
Core Ch 4
BibTeX
@inproceedings{pennington2014,
  author    = {Pennington, Jeffrey and Socher, Richard and Manning, Christopher D.},
  title     = {{GloVe}: Global Vectors for Word Representation},
  booktitle = {Proceedings of EMNLP},
  pages     = {1532--1543},
  year      = {2014}
}
Sutskever, Ilya and Vinyals, Oriol and Le, Quoc V.
Sequence to Sequence Learning with Neural Networks
Advances in Neural Information Processing Systems (NeurIPS), 27, 3104–3112 (2014)
BibTeX
@inproceedings{sutskever2014sequence,
  author    = {Sutskever, Ilya and Vinyals, Oriol and Le, Quoc V.},
  title     = {Sequence to Sequence Learning with Neural Networks},
  booktitle = {Advances in Neural Information Processing Systems (NeurIPS)},
  volume    = {27},
  pages     = {3104--3112},
  year      = {2014}
}
Bahdanau, Dzmitry and Cho, Kyunghyun and Bengio, Yoshua
Neural Machine Translation by Jointly Learning to Align and Translate
Proceedings of ICLR (2015)
BibTeX
@inproceedings{bahdanau2015,
  author    = {Bahdanau, Dzmitry and Cho, Kyunghyun and Bengio, Yoshua},
  title     = {Neural Machine Translation by Jointly Learning to Align and Translate},
  booktitle = {Proceedings of ICLR},
  year      = {2015}
}
Luong, Minh-Thang and Pham, Hieu and Manning, Christopher D.
Effective Approaches to Attention-based Neural Machine Translation
Proceedings of EMNLP, 1412–1421 (2015)
Core Ch 6 Ch 8
BibTeX
@inproceedings{luong2015,
  author    = {Luong, Minh-Thang and Pham, Hieu and Manning, Christopher D.},
  title     = {Effective Approaches to Attention-based Neural Machine Translation},
  booktitle = {Proceedings of EMNLP},
  pages     = {1412--1421},
  year      = {2015}
}
Sennrich, Rico and Haddow, Barry and Birch, Alexandra
Neural Machine Translation of Rare Words with Subword Units
Proceedings of ACL, 1715–1725 (2016)
Core Ch 10
BibTeX
@inproceedings{sennrich2016,
  author    = {Sennrich, Rico and Haddow, Barry and Birch, Alexandra},
  title     = {Neural Machine Translation of Rare Words with Subword Units},
  booktitle = {Proceedings of ACL},
  pages     = {1715--1725},
  year      = {2016}
}
Bojanowski, Piotr and Grave, Edouard and Joulin, Armand and Mikolov, Tomas
Enriching Word Vectors with Subword Information
Transactions of the Association for Computational Linguistics (TACL), 5, 135–146 (2017)
BibTeX
@article{bojanowski2017enriching,
  author    = {Bojanowski, Piotr and Grave, Edouard and Joulin, Armand and Mikolov, Tomas},
  title     = {Enriching Word Vectors with Subword Information},
  journal   = {Transactions of the Association for Computational Linguistics (TACL)},
  volume    = {5},
  pages     = {135--146},
  year      = {2017}
}
Shazeer, Noam and Mirhoseini, Azalia and Maciukiewicz, Krzysztof and Davis, Andy and Le, Quoc and Hinton, Geoffrey and Dean, Jeff
Outrageously Large Neural Networks: The Sparsely-Gated Mixture-of-Experts Layer
Proceedings of ICLR (2017)
BibTeX
@inproceedings{shazeer2017outrageously,
  author    = {Shazeer, Noam and Mirhoseini, Azalia and Maciukiewicz, Krzysztof and Davis, Andy and Le, Quoc and Hinton, Geoffrey and Dean, Jeff},
  title     = {Outrageously Large Neural Networks: The Sparsely-Gated Mixture-of-Experts Layer},
  booktitle = {Proceedings of ICLR},
  year      = {2017}
}
Vaswani, Ashish and Shazeer, Noam and Parmar, Niki and Uszkoreit, Jakob and Jones, Llion and Gomez, Aidan N. and Kaiser, Łukasz and Polosukhin, Illia
Attention Is All You Need
Advances in Neural Information Processing Systems (NeurIPS), 30, 5998–6008 (2017)
Core Ch 1 Ch 6 Ch 8
BibTeX
@inproceedings{vaswani2017,
  author    = {Vaswani, Ashish and Shazeer, Noam and Parmar, Niki and Uszkoreit, Jakob and Jones, Llion and Gomez, Aidan N. and Kaiser, {\L}ukasz and Polosukhin, Illia},
  title     = {Attention Is All You Need},
  booktitle = {Advances in Neural Information Processing Systems (NeurIPS)},
  volume    = {30},
  pages     = {5998--6008},
  year      = {2017}
}
Radford, Alec and Narasimhan, Karthik and Salimans, Tim and Sutskever, Ilya
Improving Language Understanding by Generative Pre-Training
OpenAI Technical Report (2018)
Core Ch 1 Ch 9
BibTeX
@article{radford2018,
  author    = {Radford, Alec and Narasimhan, Karthik and Salimans, Tim and Sutskever, Ilya},
  title     = {Improving Language Understanding by Generative Pre-Training},
  journal   = {OpenAI Technical Report},
  year      = {2018}
}
Clark, Kevin and Khandelwal, Urvashi and Levy, Omer and Manning, Christopher D.
What Does BERT Look At? An Analysis of BERT's Attention
Proceedings of ACL Workshop BlackboxNLP, 276–286 (2019)
BibTeX
@inproceedings{clark2019what,
  author    = {Clark, Kevin and Khandelwal, Urvashi and Levy, Omer and Manning, Christopher D.},
  title     = {What Does {BERT} Look At? {An} Analysis of {BERT}'s Attention},
  booktitle = {Proceedings of ACL Workshop BlackboxNLP},
  pages     = {276--286},
  year      = {2019}
}
Devlin, Jacob and Chang, Ming-Wei and Lee, Kenton and Toutanova, Kristina
BERT: Pre-training of Deep Bidirectional Transformers for Language Understanding
Proceedings of NAACL-HLT, 4171–4186 (2019)
Core Ch 1 Ch 9
BibTeX
@inproceedings{devlin2019,
  author    = {Devlin, Jacob and Chang, Ming-Wei and Lee, Kenton and Toutanova, Kristina},
  title     = {{BERT}: Pre-training of Deep Bidirectional Transformers for Language Understanding},
  booktitle = {Proceedings of NAACL-HLT},
  pages     = {4171--4186},
  year      = {2019}
}
Radford, Alec and Wu, Jeffrey and Child, Rewon and Luan, David and Amodei, Dario and Sutskever, Ilya
Language Models are Unsupervised Multitask Learners
OpenAI Technical Report (2019)
Core Ch 9 Ch 10
BibTeX
@article{radford2019,
  author    = {Radford, Alec and Wu, Jeffrey and Child, Rewon and Luan, David and Amodei, Dario and Sutskever, Ilya},
  title     = {Language Models are Unsupervised Multitask Learners},
  journal   = {OpenAI Technical Report},
  year      = {2019}
}
Strubell, Emma and Ganesh, Ananya and McCallum, Andrew
Energy and Policy Considerations for Deep Learning in NLP
Proceedings of ACL, 3645–3650 (2019)
BibTeX
@inproceedings{strubell2019energy,
  author    = {Strubell, Emma and Ganesh, Ananya and McCallum, Andrew},
  title     = {Energy and Policy Considerations for Deep Learning in {NLP}},
  booktitle = {Proceedings of ACL},
  pages     = {3645--3650},
  year      = {2019}
}
Brown, Tom B. and Mann, Benjamin and Ryder, Nick and Subbiah, Melanie and Kaplan, Jared D. and others
Language Models are Few-Shot Learners
Advances in Neural Information Processing Systems (NeurIPS), 33, 1877–1901 (2020)
Core Ch 1 Ch 9 Ch 13
BibTeX
@inproceedings{brown2020,
  author    = {Brown, Tom B. and Mann, Benjamin and Ryder, Nick and Subbiah, Melanie and Kaplan, Jared D. and others},
  title     = {Language Models are Few-Shot Learners},
  booktitle = {Advances in Neural Information Processing Systems (NeurIPS)},
  volume    = {33},
  pages     = {1877--1901},
  year      = {2020}
}
Holtzman, Ari and Buys, Jan and Du, Li and Forbes, Maxwell and Choi, Yejin
The Curious Case of Neural Text Degeneration
Proceedings of ICLR (2020)
BibTeX
@inproceedings{holtzman2020curious,
  author    = {Holtzman, Ari and Buys, Jan and Du, Li and Forbes, Maxwell and Choi, Yejin},
  title     = {The Curious Case of Neural Text Degeneration},
  booktitle = {Proceedings of ICLR},
  year      = {2020}
}
Kaplan, Jared and McCandlish, Sam and Henighan, Tom and Brown, Tom B. and Chess, Benjamin and others
Scaling Laws for Neural Language Models
arXiv preprint arXiv:2001.08361 (2020)
Core Ch 11
BibTeX
@article{kaplan2020,
  author    = {Kaplan, Jared and McCandlish, Sam and Henighan, Tom and Brown, Tom B. and Chess, Benjamin and others},
  title     = {Scaling Laws for Neural Language Models},
  journal   = {arXiv preprint arXiv:2001.08361},
  year      = {2020}
}
Lewis, Patrick and Perez, Ethan and Piktus, Aleksandra and Petroni, Fabio and others
Retrieval-Augmented Generation for Knowledge-Intensive NLP Tasks
Advances in Neural Information Processing Systems (NeurIPS), 33, 9459–9474 (2020)
BibTeX
@inproceedings{lewis2020retrieval,
  author    = {Lewis, Patrick and Perez, Ethan and Piktus, Aleksandra and Petroni, Fabio and others},
  title     = {Retrieval-Augmented Generation for Knowledge-Intensive {NLP} Tasks},
  booktitle = {Advances in Neural Information Processing Systems (NeurIPS)},
  volume    = {33},
  pages     = {9459--9474},
  year      = {2020}
}
Raffel, Colin and Shazeer, Noam and Roberts, Adam and Lee, Katherine and others
Exploring the Limits of Transfer Learning with a Unified Text-to-Text Transformer
Journal of Machine Learning Research (JMLR), 21(140), 1–67 (2020)
BibTeX
@article{raffel2020exploring,
  author    = {Raffel, Colin and Shazeer, Noam and Roberts, Adam and Lee, Katherine and others},
  title     = {Exploring the Limits of Transfer Learning with a Unified Text-to-Text Transformer},
  journal   = {Journal of Machine Learning Research (JMLR)},
  volume    = {21},
  number    = {140},
  pages     = {1--67},
  year      = {2020}
}
Askell, Amanda and Bai, Yuntao and Chen, Anna and Drain, Dawn and others
A General Language Assistant as a Laboratory for Alignment
arXiv preprint arXiv:2112.00861 (2021)
BibTeX
@article{askell2021general,
  author    = {Askell, Amanda and Bai, Yuntao and Chen, Anna and Drain, Dawn and others},
  title     = {A General Language Assistant as a Laboratory for Alignment},
  journal   = {arXiv preprint arXiv:2112.00861},
  year      = {2021}
}
Carlini, Nicholas and Tramer, Florian and Wallace, Eric and Jagielski, Matthew and others
Extracting Training Data from Large Language Models
Proceedings of the 30th USENIX Security Symposium, 2633–2650 (2021)
BibTeX
@inproceedings{carlini2021extracting,
  author    = {Carlini, Nicholas and Tramer, Florian and Wallace, Eric and Jagielski, Matthew and others},
  title     = {Extracting Training Data from Large Language Models},
  booktitle = {Proceedings of the 30th USENIX Security Symposium},
  pages     = {2633--2650},
  year      = {2021}
}
Patterson, David and Gonzalez, Joseph and Le, Quoc and Liang, Chen and others
Carbon Emissions and Large Neural Network Training
arXiv preprint arXiv:2104.10350 (2021)
BibTeX
@article{patterson2021carbon,
  author    = {Patterson, David and Gonzalez, Joseph and Le, Quoc and Liang, Chen and others},
  title     = {Carbon Emissions and Large Neural Network Training},
  journal   = {arXiv preprint arXiv:2104.10350},
  year      = {2021}
}
Radford, Alec and Kim, Jong Wook and Hallacy, Chris and Ramesh, Aditya and others
Learning Transferable Visual Models From Natural Language Supervision
Proceedings of ICML, 8748–8763 (2021)
BibTeX
@inproceedings{radford2021learning,
  author    = {Radford, Alec and Kim, Jong Wook and Hallacy, Chris and Ramesh, Aditya and others},
  title     = {Learning Transferable Visual Models From Natural Language Supervision},
  booktitle = {Proceedings of ICML},
  pages     = {8748--8763},
  year      = {2021}
}
Su, Jianlin and Lu, Yu and Pan, Shengfeng and Murtadha, Ahmed and Wen, Bo and Liu, Yunfeng
RoFormer: Enhanced Transformer with Rotary Position Embedding
arXiv preprint arXiv:2104.09864 (2021)
BibTeX
@article{su2021roformer,
  author    = {Su, Jianlin and Lu, Yu and Pan, Shengfeng and Murtadha, Ahmed and Wen, Bo and Liu, Yunfeng},
  title     = {{RoFormer}: Enhanced Transformer with Rotary Position Embedding},
  journal   = {arXiv preprint arXiv:2104.09864},
  year      = {2021}
}
Bai, Yuntao and Kadavath, Saurav and Kundu, Sandipan and Askell, Amanda and others
Constitutional AI: Harmlessness from AI Feedback
arXiv preprint arXiv:2212.08073 (2022)
BibTeX
@article{bai2022constitutional,
  author    = {Bai, Yuntao and Kadavath, Saurav and Kundu, Sandipan and Askell, Amanda and others},
  title     = {Constitutional {AI}: Harmlessness from {AI} Feedback},
  journal   = {arXiv preprint arXiv:2212.08073},
  year      = {2022}
}
Fedus, William and Zoph, Barret and Shazeer, Noam
Switch Transformers: Scaling to Trillion Parameter Models with Simple and Efficient Sparsity
Journal of Machine Learning Research (JMLR), 23(120), 1–39 (2022)
BibTeX
@article{fedus2022switch,
  author    = {Fedus, William and Zoph, Barret and Shazeer, Noam},
  title     = {Switch Transformers: Scaling to Trillion Parameter Models with Simple and Efficient Sparsity},
  journal   = {Journal of Machine Learning Research (JMLR)},
  volume    = {23},
  number    = {120},
  pages     = {1--39},
  year      = {2022}
}
Hoffmann, Jordan and Borgeaud, Sebastian and Mensch, Arthur and Buchatskaya, Elena and others
Training Compute-Optimal Large Language Models
Advances in Neural Information Processing Systems (NeurIPS), 35, 30016–30030 (2022)
Core Ch 11
BibTeX
@inproceedings{hoffmann2022,
  author    = {Hoffmann, Jordan and Borgeaud, Sebastian and Mensch, Arthur and Buchatskaya, Elena and others},
  title     = {Training Compute-Optimal Large Language Models},
  booktitle = {Advances in Neural Information Processing Systems (NeurIPS)},
  volume    = {35},
  pages     = {30016--30030},
  year      = {2022}
}
Hu, Edward J. and Shen, Yelong and Wallis, Phillip and Allen-Zhu, Zeyuan and Li, Yuanzhi and Wang, Shean and Wang, Lu and Chen, Weizhu
LoRA: Low-Rank Adaptation of Large Language Models
Proceedings of ICLR (2022)
Core Ch 14
BibTeX
@inproceedings{hu2021,
  author    = {Hu, Edward J. and Shen, Yelong and Wallis, Phillip and Allen-Zhu, Zeyuan and Li, Yuanzhi and Wang, Shean and Wang, Lu and Chen, Weizhu},
  title     = {{LoRA}: Low-Rank Adaptation of Large Language Models},
  booktitle = {Proceedings of ICLR},
  year      = {2022}
}
Ouyang, Long and Wu, Jeffrey and Jiang, Xu and Almeida, Diogo and others
Training Language Models to Follow Instructions with Human Feedback
Advances in Neural Information Processing Systems (NeurIPS), 35, 27730–27744 (2022)
Core Ch 12
BibTeX
@inproceedings{ouyang2022,
  author    = {Ouyang, Long and Wu, Jeffrey and Jiang, Xu and Almeida, Diogo and others},
  title     = {Training Language Models to Follow Instructions with Human Feedback},
  booktitle = {Advances in Neural Information Processing Systems (NeurIPS)},
  volume    = {35},
  pages     = {27730--27744},
  year      = {2022}
}
Perez, Ethan and Ringer, Sam and others
Red Teaming Language Models with Language Models
Proceedings of EMNLP, 3419–3448 (2022)
BibTeX
@inproceedings{perez2022red,
  author    = {Perez, Ethan and Ringer, Sam and others},
  title     = {Red Teaming Language Models with Language Models},
  booktitle = {Proceedings of EMNLP},
  pages     = {3419--3448},
  year      = {2022}
}
Press, Ofir and Smith, Noah A. and Lewis, Mike
Train Short, Test Long: Attention with Linear Biases Enables Input Length Extrapolation
Proceedings of ICLR (2022)
BibTeX
@inproceedings{press2022train,
  author    = {Press, Ofir and Smith, Noah A. and Lewis, Mike},
  title     = {Train Short, Test Long: Attention with Linear Biases Enables Input Length Extrapolation},
  booktitle = {Proceedings of ICLR},
  year      = {2022}
}
Wei, Jason and Wang, Xuezhi and Schuurmans, Dale and Bosma, Maarten and others
Chain-of-Thought Prompting Elicits Reasoning in Large Language Models
Advances in Neural Information Processing Systems (NeurIPS), 35, 24824–24837 (2022)
Core Ch 13
BibTeX
@inproceedings{wei2022,
  author    = {Wei, Jason and Wang, Xuezhi and Schuurmans, Dale and Bosma, Maarten and others},
  title     = {Chain-of-Thought Prompting Elicits Reasoning in Large Language Models},
  booktitle = {Advances in Neural Information Processing Systems (NeurIPS)},
  volume    = {35},
  pages     = {24824--24837},
  year      = {2022}
}
Wei, Jason and Tay, Yi and Bommasani, Rishi and Raffel, Colin and others
Emergent Abilities of Large Language Models
Transactions on Machine Learning Research (TMLR) (2022)
BibTeX
@article{wei2022emergent,
  author    = {Wei, Jason and Tay, Yi and Bommasani, Rishi and Raffel, Colin and others},
  title     = {Emergent Abilities of Large Language Models},
  journal   = {Transactions on Machine Learning Research (TMLR)},
  year      = {2022}
}
Akyürek, Ekin and Schuurmans, Dale and Andreas, Jacob and Ma, Tengyu and Zhou, Denny
What Learning Algorithm Is In-Context Learning? Investigations with Linear Models
Proceedings of ICLR (2023)
BibTeX
@inproceedings{akyurek2023what,
  author    = {Aky{\"u}rek, Ekin and Schuurmans, Dale and Andreas, Jacob and Ma, Tengyu and Zhou, Denny},
  title     = {What Learning Algorithm Is In-Context Learning? {Investigations} with Linear Models},
  booktitle = {Proceedings of ICLR},
  year      = {2023}
}
Liu, Haotian and Li, Chunyuan and Wu, Qingyang and Lee, Yong Jae
Visual Instruction Tuning
Advances in Neural Information Processing Systems (NeurIPS), 36 (2023)
BibTeX
@inproceedings{liu2023visual,
  author    = {Liu, Haotian and Li, Chunyuan and Wu, Qingyang and Lee, Yong Jae},
  title     = {Visual Instruction Tuning},
  booktitle = {Advances in Neural Information Processing Systems (NeurIPS)},
  volume    = {36},
  year      = {2023}
}
Rafailov, Rafael and Sharma, Archit and Mitchell, Eric and Ermon, Stefano and Manning, Christopher D. and Finn, Chelsea
Direct Preference Optimization: Your Language Model Is Secretly a Reward Model
Advances in Neural Information Processing Systems (NeurIPS), 36 (2023)
Core Ch 12
BibTeX
@inproceedings{rafailov2023,
  author    = {Rafailov, Rafael and Sharma, Archit and Mitchell, Eric and Ermon, Stefano and Manning, Christopher D. and Finn, Chelsea},
  title     = {Direct Preference Optimization: Your Language Model Is Secretly a Reward Model},
  booktitle = {Advances in Neural Information Processing Systems (NeurIPS)},
  volume    = {36},
  year      = {2023}
}
Schaeffer, Rylan and Miranda, Brando and Koyejo, Sanmi
Are Emergent Abilities of Large Language Models a Mirage?
Advances in Neural Information Processing Systems (NeurIPS), 36 (2023)
BibTeX
@inproceedings{schaeffer2023emergent,
  author    = {Schaeffer, Rylan and Miranda, Brando and Koyejo, Sanmi},
  title     = {Are Emergent Abilities of Large Language Models a Mirage?},
  booktitle = {Advances in Neural Information Processing Systems (NeurIPS)},
  volume    = {36},
  year      = {2023}
}
Touvron, Hugo and Lavril, Thibaut and Izacard, Gautier and Martinet, Xavier and others
LLaMA: Open and Efficient Foundation Language Models
arXiv preprint arXiv:2302.13971 (2023)
Core Ch 11
BibTeX
@article{touvron2023,
  author    = {Touvron, Hugo and Lavril, Thibaut and Izacard, Gautier and Martinet, Xavier and others},
  title     = {{LLaMA}: Open and Efficient Foundation Language Models},
  journal   = {arXiv preprint arXiv:2302.13971},
  year      = {2023}
}
Wang, Xuezhi and Wei, Jason and Schuurmans, Dale and Le, Quoc and others
Self-Consistency Improves Chain of Thought Reasoning in Language Models
Proceedings of ICLR (2023)
BibTeX
@inproceedings{wang2023selfconsistency,
  author    = {Wang, Xuezhi and Wei, Jason and Schuurmans, Dale and Le, Quoc and others},
  title     = {Self-Consistency Improves Chain of Thought Reasoning in Language Models},
  booktitle = {Proceedings of ICLR},
  year      = {2023}
}
Yao, Shunyu and Zhao, Jeffrey and Yu, Dian and Du, Nan and others
ReAct: Synergizing Reasoning and Acting in Language Models
Proceedings of ICLR (2023)
BibTeX
@inproceedings{yao2023react,
  author    = {Yao, Shunyu and Zhao, Jeffrey and Yu, Dian and Du, Nan and others},
  title     = {{ReAct}: Synergizing Reasoning and Acting in Language Models},
  booktitle = {Proceedings of ICLR},
  year      = {2023}
}
Jiang, Albert Q. and Sablayrolles, Alexandre and Roux, Antoine and Mensch, Arthur and others
Mixtral of Experts
arXiv preprint arXiv:2401.04088 (2024)
BibTeX
@article{jiang2024mixtral,
  author    = {Jiang, Albert Q. and Sablayrolles, Alexandre and Roux, Antoine and Mensch, Arthur and others},
  title     = {Mixtral of Experts},
  journal   = {arXiv preprint arXiv:2401.04088},
  year      = {2024}
}

Publication Timeline

Dot size reflects number of papers per year.