Documents

2023
G. Raposo, L. Coheur, and B. Martins, “Prompting, Retrieval, Training: An exploration of different approaches for task-oriented dialogue generation,” in Proceedings of the 24th Meeting of the Special Interest Group on Discourse and Dialogue, S. Stoyanchev, S. Joty, D. Schlangen, O. Dusek, C. Kennington, and M. Alikhani, Eds., Prague, Czechia: Association for Computational Linguistics, Sep. 2023, pp. 400–412. [Online]. Available: https://aclanthology.org/2023.sigdial-1.37

PDF

Cite

@InProceedings{Raposo2023,
author = {Raposo, Gon{\c{c}}alo and Coheur, Luisa and Martins, Bruno},
booktitle = {Proceedings of the 24th Meeting of the Special Interest Group on Discourse and Dialogue},
title = {Prompting, Retrieval, Training: An exploration of different approaches for task-oriented dialogue generation},
year = {2023},
address = {Prague, Czechia},
editor = {Stoyanchev, Svetlana and Joty, Shafiq and Schlangen, David and Dusek, Ondrej and Kennington, Casey and Alikhani, Malihe},
month = sep,
pages = {400–412},
publisher = {Association for Computational Linguistics},
abstract = {Task-oriented dialogue systems need to generate appropriate responses to help fulfill users{‘} requests. This paper explores different strategies, namely prompting, retrieval, and fine-tuning, for task-oriented dialogue generation. Through a systematic evaluation, we aim to provide valuable insights and guidelines for researchers and practitioners working on developing efficient and effective dialogue systems for real-world applications. Evaluation is performed on the MultiWOZ and Taskmaster-2 datasets, and we test various versions of FLAN-T5, GPT-3.5, and GPT-4 models. Costs associated with running these models are analyzed, and dialogue evaluation is briefly discussed. Our findings suggest that when testing data differs from the training data, fine-tuning may decrease performance, favoring a combination of a more general language model and a prompting mechanism based on retrieved examples.},
url = {https://aclanthology.org/2023.sigdial-1.37},
}
2022
G. Raposo, A. Raposo, and A. S. Carmo, “Document-Level Abstractive Summarization,” unpublished, Deep Structured Learning 2021/2022, Instituto Superior Técnico, 2022

PDF

Note

This paper was made for an assignment of the Deep Structured Learning 2021/2022 course at Instituto Superior Técnico.

Cite

@Unpublished{Raposo2022a,
author = {Gonçalo Raposo and Afonso Raposo and Ana Sofia Carmo},
note = {This paper was made for an assignment of the Deep Structured Learning 2021/2022 course at Instituto Superior Técnico.},
title = {Document-Level Abstractive Summarization},
year = {2022},
abstract = {The task of automatic text summarization produces a concise and fluent text summary while preserving key information and overall meaning. Recent approaches to document-level summarization have seen significant improvements in recent years by using models based on the Transformer architecture. However, the quadratic memory and time complexities with respect to the sequence length make them very expensive to use, especially with long sequences, as required by document-level summarization. Our work addresses the problem of document-level summarization by studying how efficient Transformer techniques can be used to improve the automatic summarization of very long texts. In particular, we will use the arXiv dataset, consisting of several scientific papers and the corresponding abstracts, as baselines for this work. Then, we propose a novel retrieval-enhanced approach based on the architecture which reduces the cost of generating a summary of the entire document by processing smaller chunks. The results were below the baselines but suggest a more efficient memory a consumption and truthfulness.},
eprint = {2212.03013},
keywords = {Document-level summarization; abstractive summarization; efficient transformers; information retrieval},
url = {https://arxiv.org/abs/2212.03013},
}
2022
G. Raposo, R. Ribeiro, B. Martins, and L. Coheur, “Question rewriting? Assessing its importance for conversational question answering,” in 44th European Conference on Information Retrieval, Apr. 2022. 44th European Conference on Information Retrieval, 2022

PDF

Note

This version of the contribution has been accepted for publication, after peer review but is not the Version of Record and does not reflect post-acceptance improvements, or any corrections. The Version of Record is available online at: http://dx.doi.org/10.1007/978-3-030-99739-7_23. Use of this Accepted Version is subject to the publisher’s Accepted Manuscript terms of use https://www.springernature.com/gp/open-research/policies/accepted-manuscript-terms

Cite

@InProceedings{Raposo2022,
author = {Raposo, Gon{\c{c}}alo and Ribeiro, Rui and Martins, Bruno and Coheur, Lu{'i}sa},
booktitle = {Advances in Information Retrieval},
title = {Question Rewriting? Assessing Its Importance for Conversational Question Answering},
year = {2022},
address = {Cham},
editor = {Hagen, Matthias and Verberne, Suzan and Macdonald, Craig and Seifert, Christin and Balog, Krisztian and N{\o}rv{\aa}g, Kjetil and Setty, Vinay},
month = apr,
pages = {199–206},
publisher = {Springer International Publishing},
abstract = {In conversational question answering, systems must correctly interpret the interconnected interactions and generate knowledgeable answers, which may require the retrieval of relevant information from a background repository. Recent approaches to this problem leverage neural language models, although different alternatives can be considered in terms of modules for (a) representing user questions in context, (b) retrieving the relevant background information, and (c) generating the answer. This work presents a conversational question answering system designed specifically for the Search-Oriented Conversational AI (SCAI) shared task, and reports on a detailed analysis of its question rewriting module. In particular, we considered different variations of the question rewriting module to evaluate the influence on the subsequent components, and performed a careful analysis of the results obtained with the best system configuration. Our system achieved the best performance in the shared task and our analysis emphasizes the importance of the conversation context representation for the overall system performance.},
keywords = {conversational question answering, conversational search, question rewriting, transformer-based neural language models},
doi = {10.1007/978-3-030-99739-7_23},
eprint = {2201.09146},
eprintclass = {cs.CL},
eprinttype = {arXiv},
isbn = {978-3-030-99739-7},
note = {This version of the contribution has been accepted for publication, after peer review but is not the Version of Record and does not reflect post-acceptance improvements, or any corrections. The Version of Record is available online at: http://dx.doi.org/10.1007/978-3-030-99739-7_23. Use of this Accepted Version is subject to the publisher’s Accepted Manuscript terms of use \url{https://www.springernature.com/gp/open-research/policies/accepted-manuscript-terms}},
}
2021
G. Raposo, P. Tomas, and N. Roma, "PositNN: Training Deep Neural Networks with Mixed Low-Precision Posit," in ICASSP 2021 - 2021 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP), 2021, pp. 7908–7912, doi: 10.1109/ICASSP39728.2021.9413919 [Online]. Available: https://ieeexplore.ieee.org/document/9413919

PDF

Cite

@InProceedings{Raposo2021,
author = {Gonçalo Raposo and Pedro Tom{'{a}}s and Nuno Roma},
booktitle = {{ICASSP} 2021 - 2021 {IEEE} International Conference on Acoustics, Speech and Signal Processing ({ICASSP})},
title = {{PositNN: Training Deep Neural Networks with Mixed Low-Precision Posit}},
year = {2021},
month = {jun},
pages = {7908–7912},
publisher = {{IEEE}},
abstract = {Low-precision formats have proven to be an efficient way to reduce not only the memory footprint but also the hardware resources and power consumption of deep learning computations. Under this premise, the posit numerical format appears to be a highly viable substitute for the IEEE floating-point, but its application to neural networks training still requires further research. Some preliminary results have shown that 8-bit (and even smaller) posits may be used for inference and 16-bit for training, while maintaining the model accuracy. The presented research aims to evaluate the feasibility to train deep convolutional neural networks using posits. For such purpose, a software framework was developed to use simulated posits and quires in end-to-end training and inference. This implementation allows using any bit size, configuration, and even mixed precision, suitable for different precision requirements in various stages. The obtained results suggest that 8-bit posits can substitute 32-bit floats during training with no negative impact on the resulting loss and accuracy.},
doi = {10.1109/ICASSP39728.2021.9413919},
eprint = {2105.00053},
eprintclass = {cs.LG},
eprinttype = {arXiv},
keywords = {posit numerical format, low-precision arithmetic, deep neural networks, training, inference},
url = {https://ieeexplore.ieee.org/document/9413919},
}