@inproceedings{garg-etal-2022-keyphrase,
title = "Keyphrase Generation Beyond the Boundaries of Title and Abstract",
author = "Garg, Krishna and
Ray Chowdhury, Jishnu and
Caragea, Cornelia",
editor = "Goldberg, Yoav and
Kozareva, Zornitsa and
Zhang, Yue",
booktitle = "Findings of the Association for Computational Linguistics: EMNLP 2022",
month = dec,
year = "2022",
address = "Abu Dhabi, United Arab Emirates",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2022.findings-emnlp.427/",
doi = "10.18653/v1/2022.findings-emnlp.427",
pages = "5809--5821",
abstract = "Keyphrase generation aims at generating important phrases (keyphrases) that best describe a given document. In scholarly domains, current approaches have largely used only the title and abstract of the articles to generate keyphrases. In this paper, we comprehensively explore whether the integration of additional information from the full text of a given article or from semantically similar articles can be helpful for a neural keyphrase generation model or not. We discover that adding sentences from the full text, particularly in the form of the extractive summary of the article can significantly improve the generation of both types of keyphrases that are either present or absent from the text. Experimental results with three widely used models for keyphrase generation along with one of the latest transformer models suitable for longer documents, Longformer Encoder-Decoder (LED) validate the observation. We also present a new large-scale scholarly dataset FullTextKP for keyphrase generation. Unlike prior large-scale datasets, FullTextKP includes the full text of the articles along with the title and abstract. We release the source code at https://github.com/kgarg8/FullTextKP."
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="garg-etal-2022-keyphrase">
<titleInfo>
<title>Keyphrase Generation Beyond the Boundaries of Title and Abstract</title>
</titleInfo>
<name type="personal">
<namePart type="given">Krishna</namePart>
<namePart type="family">Garg</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Jishnu</namePart>
<namePart type="family">Ray Chowdhury</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Cornelia</namePart>
<namePart type="family">Caragea</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2022-12</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<relatedItem type="host">
<titleInfo>
<title>Findings of the Association for Computational Linguistics: EMNLP 2022</title>
</titleInfo>
<name type="personal">
<namePart type="given">Yoav</namePart>
<namePart type="family">Goldberg</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Zornitsa</namePart>
<namePart type="family">Kozareva</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Yue</namePart>
<namePart type="family">Zhang</namePart>
<role>
<roleTerm authority="marcrelator" type="text">editor</roleTerm>
</role>
</name>
<originInfo>
<publisher>Association for Computational Linguistics</publisher>
<place>
<placeTerm type="text">Abu Dhabi, United Arab Emirates</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">conference publication</genre>
</relatedItem>
<abstract>Keyphrase generation aims at generating important phrases (keyphrases) that best describe a given document. In scholarly domains, current approaches have largely used only the title and abstract of the articles to generate keyphrases. In this paper, we comprehensively explore whether the integration of additional information from the full text of a given article or from semantically similar articles can be helpful for a neural keyphrase generation model or not. We discover that adding sentences from the full text, particularly in the form of the extractive summary of the article can significantly improve the generation of both types of keyphrases that are either present or absent from the text. Experimental results with three widely used models for keyphrase generation along with one of the latest transformer models suitable for longer documents, Longformer Encoder-Decoder (LED) validate the observation. We also present a new large-scale scholarly dataset FullTextKP for keyphrase generation. Unlike prior large-scale datasets, FullTextKP includes the full text of the articles along with the title and abstract. We release the source code at https://github.com/kgarg8/FullTextKP.</abstract>
<identifier type="citekey">garg-etal-2022-keyphrase</identifier>
<identifier type="doi">10.18653/v1/2022.findings-emnlp.427</identifier>
<location>
<url>https://aclanthology.org/2022.findings-emnlp.427/</url>
</location>
<part>
<date>2022-12</date>
<extent unit="page">
<start>5809</start>
<end>5821</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Conference Proceedings
%T Keyphrase Generation Beyond the Boundaries of Title and Abstract
%A Garg, Krishna
%A Ray Chowdhury, Jishnu
%A Caragea, Cornelia
%Y Goldberg, Yoav
%Y Kozareva, Zornitsa
%Y Zhang, Yue
%S Findings of the Association for Computational Linguistics: EMNLP 2022
%D 2022
%8 December
%I Association for Computational Linguistics
%C Abu Dhabi, United Arab Emirates
%F garg-etal-2022-keyphrase
%X Keyphrase generation aims at generating important phrases (keyphrases) that best describe a given document. In scholarly domains, current approaches have largely used only the title and abstract of the articles to generate keyphrases. In this paper, we comprehensively explore whether the integration of additional information from the full text of a given article or from semantically similar articles can be helpful for a neural keyphrase generation model or not. We discover that adding sentences from the full text, particularly in the form of the extractive summary of the article can significantly improve the generation of both types of keyphrases that are either present or absent from the text. Experimental results with three widely used models for keyphrase generation along with one of the latest transformer models suitable for longer documents, Longformer Encoder-Decoder (LED) validate the observation. We also present a new large-scale scholarly dataset FullTextKP for keyphrase generation. Unlike prior large-scale datasets, FullTextKP includes the full text of the articles along with the title and abstract. We release the source code at https://github.com/kgarg8/FullTextKP.
%R 10.18653/v1/2022.findings-emnlp.427
%U https://aclanthology.org/2022.findings-emnlp.427/
%U https://doi.org/10.18653/v1/2022.findings-emnlp.427
%P 5809-5821
Markdown (Informal)
[Keyphrase Generation Beyond the Boundaries of Title and Abstract](https://aclanthology.org/2022.findings-emnlp.427/) (Garg et al., Findings 2022)
ACL