@article{bahdanau2014neural,
title={Neural machine translation by jointly learning to align and translate},
author={Bahdanau, Dzmitry and Cho, Kyunghyun and Bengio, Yoshua},
journal={arXiv preprint arXiv:1409.0473},
year={2014}
}
Attention was invented at U. Montreal by @DBahdanau, @kchonyc and Yoshua Bengio. Transformers were just an extension. This is the paper that really “invented modern AI”: https://arxiv.org/abs/1409.0473 https://arxiv.org/abs/1409.0473 – Pedro Domingos