The task of Toxic Span detection was introduced as a SemEval task in 2021 (Task 5). The first version of this dataset exists in the folder SemEval2021 of this repository. An extended version of this dataset, along with more baselines and experimental results, was presented at ACL 2022. The second and final version of the dataset exists in the folder ACL2022 of this repository.
We refer the interested reader to the following articles, which we also kindly request to cite if you use the data or findings of this work:
@inproceedings{pavlopoulos-etal-2022-acl,
title = "From the Detection of Toxic Spans in Online Discussions to the Analysis of Toxic-to-Civil Transfer",
author = "Pavlopoulos, John and Laugier, L{\'e}o and Xenos, Alexandros and Sorensen, Jeffrey and Androutsopoulos, Ion",
booktitle = "Proceedings of the 60th Annual Meeting of the Association for Computational Linguistics (ACL 2022).",
month = may,
year = "2022",
address = "Dublin, Ireland",
publisher = "Association for Computational Linguistics",
}
@inproceedings{pavlopoulos-etal-2021-semeval,
title = "{S}em{E}val-2021 Task 5: Toxic Spans Detection",
author = "Pavlopoulos, John and Sorensen, Jeffrey and Laugier, L{\'e}o and Androutsopoulos, Ion",
booktitle = "Proceedings of the 15th International Workshop on Semantic Evaluation (SemEval-2021)",
month = aug,
year = "2021",
address = "Online",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2021.semeval-1.6",
doi = "10.18653/v1/2021.semeval-1.6",
pages = "59--69",
}