From dc8ebe35c28c694be7ad0cf65f0df67736f511fa Mon Sep 17 00:00:00 2001 From: anthology-assist Date: Mon, 16 Sep 2024 17:39:00 -0500 Subject: [PATCH 1/9] ingested konvens main. --- data/xml/2024.konvens.xml | 361 ++++++++++++++++++++++++++++++++++++++ 1 file changed, 361 insertions(+) create mode 100644 data/xml/2024.konvens.xml diff --git a/data/xml/2024.konvens.xml b/data/xml/2024.konvens.xml new file mode 100644 index 0000000000..c72fa60683 --- /dev/null +++ b/data/xml/2024.konvens.xml @@ -0,0 +1,361 @@ + + + + + Proceedings of the 20th Conference on Natural Language Processing (KONVENS 2024) + Pedro Henrique{Luz de Araujo} + AndreasBaumann + DagmarGromann + BrigitteKrenn + BenjaminRoth + MichaelWiegand + Association for Computational Linguistics +
Vienna, Austria
+ September + 2024 + konvens + + + 2024.konvens-main.0 + konvens-2024-main + + + Large Language Models as Evaluators for Scientific Synthesis + JuliaEvans + JenniferD’Souza + SörenAuer + 1–22 + 2024.konvens-main.1 + evans-etal-2024-large + + + A Crosslingual Approach to Dependency Parsing for <fixed-case>M</fixed-case>iddle <fixed-case>H</fixed-case>igh <fixed-case>G</fixed-case>erman + CoraHaiber + 23–31 + 2024.konvens-main.2 + haiber-2024-crosslingual + + + Complexity of <fixed-case>G</fixed-case>erman Texts Written by Primary School Children + JammilaLaâguidi + DanaNeumann + RonjaLaarmann-Quante + StefanieDipper + MihailChifligarov + 32–39 + 2024.konvens-main.3 + laaguidi-etal-2024-complexity + + + Exploring Automatic Text Simplification for <fixed-case>L</fixed-case>ithuanian + JustinaMandravickaitė + EgleRimkiene + DanguolėKalinauskaitė + Danguolė KotrynaKapkan + 40–49 + 2024.konvens-main.4 + mandravickaite-etal-2024-exploring + + + Word alignment in Discourse Representation Structure parsing + ChristianObereder + GaborRecski + 50–56 + 2024.konvens-main.5 + obereder-recski-2024-word + + + Evaluating and Fine-Tuning Retrieval-Augmented Language Models to Generate Text with Accurate Citations + VinzentPenzkofer + TimoBaumann + 57–64 + 2024.konvens-main.6 + penzkofer-baumann-2024-evaluating + + + Discourse Parsing for <fixed-case>G</fixed-case>erman with new <fixed-case>RST</fixed-case> Corpora + SaraShahmohammadi + ManfredStede + 65–74 + 2024.konvens-main.7 + shahmohammadi-stede-2024-discourse + + + Revisiting the Phenomenon of Syntactic Complexity Convergence on <fixed-case>G</fixed-case>erman Dialogue Data + YuWang + HendrikBuschmeier + 75–80 + 2024.konvens-main.8 + wang-buschmeier-2024-revisiting + + + Estimating Word Concreteness from Contextualized Embeddings + ChristianWartena + 81–88 + 2024.konvens-main.9 + wartena-2024-estimating + + + Using <fixed-case>G</fixed-case>erma<fixed-case>N</fixed-case>et for the Generation of Crossword Puzzles + ClausZinn + MarieHinrichs + ErhardHinrichs + 89–97 + 2024.konvens-main.10 + zinn-etal-2024-using + + + Leveraging Cross-Lingual Transfer Learning in Spoken Named Entity Recognition Systems + MoncefBenaicha + DavidThulke + Mehmet Ali TuğtekinTuran + 98–105 + 2024.konvens-main.11 + benaicha-etal-2024-leveraging + + + Exploring Data Acquisition Strategies for the Domain Adaptation of <fixed-case>QA</fixed-case> Models + MauriceFalk + AdrianUlges + DirkKrechel + 106–116 + 2024.konvens-main.12 + falk-etal-2024-exploring + + + <fixed-case>CO</fixed-case>-Fun: A <fixed-case>G</fixed-case>erman Dataset on Company Outsourcing in Fund Prospectuses for Named Entity Recognition and Relation Extraction + NedaForoutan + MarkusSchröder + AndreasDengel + 117–122 + 2024.konvens-main.13 + foroutan-etal-2024-co + + + <fixed-case>GER</fixed-case>estaurant: A <fixed-case>G</fixed-case>erman Dataset of Annotated Restaurant Reviews for Aspect-Based Sentiment Analysis + Nils ConstantinHellwig + JakobFehle + MarkusBink + ChristianWolff + 123–133 + 2024.konvens-main.14 + hellwig-etal-2024-gerestaurant + + + How to Translate <fixed-case>SQ</fixed-case>u<fixed-case>AD</fixed-case> to <fixed-case>G</fixed-case>erman? A Comparative Study of Answer Span Retrieval Methods for Question Answering Dataset Creation + JensKaiser + AgnieszkaFalenska + 134–140 + 2024.konvens-main.15 + kaiser-falenska-2024-translate + + + Few-Shot Prompting for Subject Indexing of <fixed-case>G</fixed-case>erman Medical Book Titles + LisaKluge + MaximilianKähler + 141–148 + 2024.konvens-main.16 + kluge-kahler-2024-shot + + + Binary indexes for optimising corpus queries + PeterLjunglöf + NicholasSmallbone + MijoThoresson + VictorSalomonsson + 149–158 + 2024.konvens-main.17 + ljunglof-etal-2024-binary + + + An Improved Method for Class-specific Keyword Extraction: A Case Study in the <fixed-case>G</fixed-case>erman Business Registry + StephenMeisenbacher + TimSchopf + WeixinYan + PatrickHoll + FlorianMatthes + 159–165 + 2024.konvens-main.18 + meisenbacher-etal-2024-improved + + + Tabular <fixed-case>JSON</fixed-case>: A Proposal for a Pragmatic Linguistic Data Format + AdamRoussel + 166–172 + 2024.konvens-main.19 + roussel-2024-tabular + + + Semiautomatic Data Generation for Academic Named Entity Recognition in <fixed-case>G</fixed-case>erman Text Corpora + PiaSchwarz + 173–181 + 2024.konvens-main.20 + schwarz-2024-semiautomatic + + + Redundancy Aware Multiple Reference Based Gainwise Evaluation of Extractive Summarization + MousumiAkter + SantuKarmaker + 182–195 + 2024.konvens-main.21 + akter-karmaker-2024-redundancy + + + Fine-grained quotation detection and attribution in <fixed-case>G</fixed-case>erman news articles + FynnPetersen-Frey + ChrisBiemann + 196–208 + 2024.konvens-main.22 + petersen-frey-biemann-2024-fine + + + Decoding 16th-Century Letters: From Topic Models to <fixed-case>GPT</fixed-case>-Based Keyword Mapping + Phillip BenjaminStröbel + StefanAderhold + RamonaRoller + 209–221 + 2024.konvens-main.23 + strobel-etal-2024-decoding + + + Analysing Effects of Inducing Gender Bias in Language Models + StephanieGross + BrigitteKrenn + CraigLincoln + LenaHolzwarth + 222–230 + 2024.konvens-main.24 + gross-etal-2024-analysing + + + <fixed-case>OM</fixed-case>o<fixed-case>S</fixed-case>-<fixed-case>QA</fixed-case>: A Dataset for Cross-Lingual Extractive Question Answering in a <fixed-case>G</fixed-case>erman Migration Context + SteffenKleinle + JakobPrange + AnnemarieFriedrich + 231–248 + 2024.konvens-main.25 + kleinle-etal-2024-omos + + + Role-Playing <fixed-case>LLM</fixed-case>s in Professional Communication Training: The Case of Investigative Interviews with Children + DonTuggener + TeresaSchneider + ArianaHuwiler + TobiasKreienbühl + SimonHischier + Piusvon Däniken + SusannaNiehaus + 249–263 + 2024.konvens-main.26 + tuggener-etal-2024-role + + + Features and Detectability of <fixed-case>G</fixed-case>erman Texts Generated with Large Language Models + VerenaIrrgang + VeronikaSolopova + SteffenZeiler + Robert M.Nickel + DorotheaKolossa + 264–280 + 2024.konvens-main.27 + irrgang-etal-2024-features + + + <fixed-case>L</fixed-case>ex2<fixed-case>S</fixed-case>ent: A bagging approach to unsupervised sentiment analysis + Kai-RobinLange + JonasRieger + JonasRieger + 281–291 + 2024.konvens-main.28 + lange-etal-2024-lex2sent + + + Discourse-Level Features in Spoken and Written Communication + Hannah J.Seemann + SaraShahmohammadi + ManfredStede + TatjanaScheffler + 292–302 + 2024.konvens-main.29 + seemann-etal-2024-discourse + + + Version Control for Speech Corpora + VladDumitru + MatthiasBoehm + MartinHagmüller + BarbaraSchuppler + 303–308 + 2024.konvens-main.30 + dumitru-etal-2024-version + + + Querying Repetitions in Spoken Language Corpora + ElenaFrick + HenrikeHelmer + DoloresLemmenmeier-Batinić + 309–316 + 2024.konvens-main.31 + frick-etal-2024-querying + + + Exploring Phonetic Features in Language Embeddings for Unseen Language Varieties of <fixed-case>A</fixed-case>ustrian <fixed-case>G</fixed-case>erman + LorenzGutscher + MichaelPucher + 317–325 + 2024.konvens-main.32 + gutscher-pucher-2024-exploring + + + A Multilingual Dataset of Adversarial Attacks to Automatic Content Scoring Systems + RonjaLaarmann-Quante + ChristopherChandler + NoemiIncirkus + VitaliiaRuban + AlonaSolopov + LucaSteen + 326–338 + 2024.konvens-main.33 + laarmann-quante-etal-2024-multilingual + + + Towards Improving <fixed-case>ASR</fixed-case> Outputs of Spontaneous Speech with <fixed-case>LLM</fixed-case>s + KarnerManuel + JulianLinke + MarkKröll + BarbaraSchuppler + Bernhard C.Geiger + 339–348 + 2024.konvens-main.34 + manuel-etal-2024-towards + + + <fixed-case>O</fixed-case>ne<fixed-case>L</fixed-case>ove beyond the field - A few-shot pipeline for topic and sentiment analysis during the <fixed-case>FIFA</fixed-case> World Cup in <fixed-case>Q</fixed-case>atar + ChristophRauchegger + Sonja MeiWang + PieterDelobelle + 349–357 + 2024.konvens-main.35 + rauchegger-etal-2024-onelove + + + Linguistic and extralinguistic factors in automatic speech recognition of <fixed-case>G</fixed-case>erman atypical speech + EugeniaRykova + MathiasWalther + 358–367 + 2024.konvens-main.36 + rykova-walther-2024-linguistic + + + <fixed-case>LLM</fixed-case>-based Translation Across 500 Years. The Case for Early <fixed-case>N</fixed-case>ew <fixed-case>H</fixed-case>igh <fixed-case>G</fixed-case>erman + MartinVolk + Dominic P.Fischer + PatriciaScheurer + RaphaelSchwitter + Phillip B.Ströbel + 368–375 + 2024.konvens-main.37 + volk-etal-2024-llm-based + +
+
From 69b86b7b5590383705e803fec3c6a3e428015c19 Mon Sep 17 00:00:00 2001 From: anthology-assist Date: Mon, 16 Sep 2024 17:42:02 -0500 Subject: [PATCH 2/9] ingested workshop germsdetect. --- data/xml/2024.germsdetect.xml | 72 +++++++++++++++++++++++++++++++ data/yaml/venues/germsdetect.yaml | 3 ++ 2 files changed, 75 insertions(+) create mode 100644 data/xml/2024.germsdetect.xml create mode 100644 data/yaml/venues/germsdetect.yaml diff --git a/data/xml/2024.germsdetect.xml b/data/xml/2024.germsdetect.xml new file mode 100644 index 0000000000..91e4c927ad --- /dev/null +++ b/data/xml/2024.germsdetect.xml @@ -0,0 +1,72 @@ + + + + + Proceedings of GermEval 2024 Task 1 GerMS-Detect Workshop on Sexism Detection in German Online News Fora (GerMS-Detect 2024) + BrigitteKrenn + JohannPetrak + StephanieGross + Association for Computational Lingustics +
Vienna, Austria
+ September + 2024 + germsdetect + + + 2024.germsdetect-1.0 + germsdetect-2024-1 + + + <fixed-case>G</fixed-case>erm<fixed-case>E</fixed-case>val2024 Shared Task: <fixed-case>G</fixed-case>er<fixed-case>MS</fixed-case>-Detect – Sexism Detection in <fixed-case>G</fixed-case>erman Online News Fora + StephanieGross + JohannPetrak + LouisaVenhoff + BrigitteKrenn + 1–9 + 2024.germsdetect-1.1 + gross-etal-2024-germeval2024 + + + <fixed-case>THA</fixed-case>ugs at <fixed-case>G</fixed-case>erm<fixed-case>E</fixed-case>val 2024 (Shared Task 1: <fixed-case>G</fixed-case>er<fixed-case>MS</fixed-case>-Detect): Predicting the Severity of Misogyny/Sexism in Forum Comments with <fixed-case>BERT</fixed-case> Models (Subtask 1, Closed Track and Additional Experiments) + CorsinGeiss + AlessandraZarcone + 10–20 + 2024.germsdetect-1.2 + geiss-zarcone-2024-thaugs + + + <fixed-case>FICODE</fixed-case> at <fixed-case>G</fixed-case>erm<fixed-case>E</fixed-case>val 2024 <fixed-case>G</fixed-case>er<fixed-case>MS</fixed-case>-Detect closed <fixed-case>ST</fixed-case>1 & <fixed-case>ST</fixed-case>2: Ensemble- and Transformer-Based Detection of Sexism and Misogyny in <fixed-case>G</fixed-case>erman Texts + MaoroFalk + MichaelaGeierhos + 21–25 + 2024.germsdetect-1.3 + falk-geierhos-2024-ficode + + + Team Quabynar at the <fixed-case>G</fixed-case>erm<fixed-case>E</fixed-case>val 2024 Shared Task 1 <fixed-case>G</fixed-case>er<fixed-case>MS</fixed-case>-Detect (Subtasks 1 and 2) on Sexism Detection + Kwabena OdameAkomeah + UdoKruschwitz + BerndLudwig + 26–32 + 2024.germsdetect-1.4 + akomeah-etal-2024-team + + + Detecting Sexism in <fixed-case>G</fixed-case>erman Online Newspaper Comments with Open-Source Text Embeddings (Team <fixed-case>GDA</fixed-case>, <fixed-case>G</fixed-case>erm<fixed-case>E</fixed-case>val2024 Shared Task 1: <fixed-case>G</fixed-case>er<fixed-case>MS</fixed-case>-Detect, Subtasks 1 and 2, Closed Track) + FlorianBremm + Patrick GustavBlaneck + TobiasBornheim + NiklasGrieger + 33–38 + 2024.germsdetect-1.5 + bremm-etal-2024-detecting + + + pd2904 at <fixed-case>G</fixed-case>erm<fixed-case>E</fixed-case>val2024 (Shared Task 1: <fixed-case>G</fixed-case>er<fixed-case>MS</fixed-case>-Detect): Exploring the Effectiveness of Multi-Task Transformers vs. Traditional Models for Sexism Detection (Closed Tracks of Subtasks 1 and 2) + PiaDonabauer + 39–47 + 2024.germsdetect-1.6 + donabauer-2024-pd2904 + +
+
diff --git a/data/yaml/venues/germsdetect.yaml b/data/yaml/venues/germsdetect.yaml new file mode 100644 index 0000000000..3813cffa5f --- /dev/null +++ b/data/yaml/venues/germsdetect.yaml @@ -0,0 +1,3 @@ +acronym: GerMSDetect +name: GermEval 2024 Task 1 GerMS-Detect Workshop on Sexism Detection in German Online + News Fora From 1f1cfe59a9f898f4ba1da619e0e7f5030b15a242 Mon Sep 17 00:00:00 2001 From: anthology-assist Date: Mon, 16 Sep 2024 17:44:07 -0500 Subject: [PATCH 3/9] ingested workshop germeval. --- data/xml/2024.germeval.xml | 50 ++++++++++++++++++++++++++++++++++++++ 1 file changed, 50 insertions(+) create mode 100644 data/xml/2024.germeval.xml diff --git a/data/xml/2024.germeval.xml b/data/xml/2024.germeval.xml new file mode 100644 index 0000000000..c8ea6f3613 --- /dev/null +++ b/data/xml/2024.germeval.xml @@ -0,0 +1,50 @@ + + + + + Proceedings of GermEval 2024 Shared Task on Statement Segmentation in German Easy Language (StaGE) + ThorbenSchomacker + MiriamAnsch{\"u}tz + ReginaStodden + Association for Computational Lingustics +
Vienna, Austria
+ September + 2024 + germeval + + + 2024.germeval-1.0 + germeval-2024-1 + + + Overview of the <fixed-case>G</fixed-case>erm<fixed-case>E</fixed-case>val 2024 Shared Task on Statement Segmentation in <fixed-case>G</fixed-case>erman Easy Language (<fixed-case>S</fixed-case>ta<fixed-case>GE</fixed-case>) + ThorbenSchomacker + MiriamAnschütz + ReginaStodden + GeorgGroh + MarinaTropmann-Frick + 1–14 + 2024.germeval-1.1 + schomacker-etal-2024-overview + + + <fixed-case>K</fixed-case>lar<fixed-case>T</fixed-case>ext<fixed-case>C</fixed-case>oders at <fixed-case>S</fixed-case>ta<fixed-case>GE</fixed-case>: Automatic Statement Annotations for <fixed-case>G</fixed-case>erman Easy Language + Akhilesh KakoluRamarao + WiebkePetersen + Anna SophiaStein + EmmaStein + HanxinXia + 15–27 + 2024.germeval-1.2 + ramarao-etal-2024-klartextcoders + + + Statement Segmentation for <fixed-case>G</fixed-case>erman Easy Language Using <fixed-case>BERT</fixed-case> and Dependency Parsing + AndreasSäuberli + NiclasBodenmann + 28–32 + 2024.germeval-1.3 + sauberli-bodenmann-2024-statement + +
+
From 071749b372b2fea5f6f0311c9e1d8c4094bbb1f9 Mon Sep 17 00:00:00 2001 From: anthology-assist Date: Wed, 18 Sep 2024 14:29:13 -0500 Subject: [PATCH 4/9] ingested workshop cpss, closes #3062. --- data/xml/2024.cpss.xml | 140 +++++++++++++++++++++++++++++++++++++++++ 1 file changed, 140 insertions(+) create mode 100644 data/xml/2024.cpss.xml diff --git a/data/xml/2024.cpss.xml b/data/xml/2024.cpss.xml new file mode 100644 index 0000000000..b11e51f429 --- /dev/null +++ b/data/xml/2024.cpss.xml @@ -0,0 +1,140 @@ + + + + + Proceedings of the 4th Workshop on Computational Linguistics for the Political and Social Sciences: Long and short papers + ChristopherKlamm + GabriellaLapesa + Simone PaoloPonzetto + InesRehbein + IndiraSen + Association for Computational Linguistics +
Vienna, Austria
+ Sep + 2024 + 2024.cpss-1 + cpss + + + 2024.cpss-1.0 + cpss-2024-1 + + + Detecting Calls to Action in Multimodal Content: Analysis of the 2021 <fixed-case>G</fixed-case>erman Federal Election Campaign on <fixed-case>I</fixed-case>nstagram + MichaelAchmann-Denkler + JakobFehle + MarioHaim + ChristianWolff + 1–13 + This study investigates the automated classification of Calls to Action (CTAs) within the 2021 German Instagram election campaign to advance the understanding of mobilization in social media contexts. We analyzed over 2,208 Instagram stories and 712 posts using fine-tuned BERT models and OpenAI’s GPT-4 models. The fine-tuned BERT model incorporating synthetic training data achieved a macro F1 score of 0.93, demonstrating a robust classification performance. Our analysis revealed that 49.58% of Instagram posts and 10.64% of stories contained CTAs, highlighting significant differences in mobilization strategies between these content types. Additionally, we found that FDP and the Greens had the highest prevalence of CTAs in posts, whereas CDU and CSU led in story CTAs. + 2024.cpss-1.1 + achmann-denkler-etal-2024-detecting + + + Multilingual Bot Accusations: How Different Linguistic Contexts Shape Perceptions of Social Bots + LeonFröhling + XiaofeiLi + DennisAssenmacher + 14–32 + Recent research indicates that the online use of the term ”bot” has evolved over time. In the past, people used the term to accuse others of displaying automated behavior. However, it has gradually transformed into a linguistic tool to dehumanize the conversation partner, particularly on polarizing topics. Although this trend has been observed in English-speaking contexts, it is still unclear whether it holds true in other socio-linguistic environments. In this work we extend existing work on bot accusations and explore the phenomenon in a multilingual setting. We identify three distinct accusation patterns that characterize the different languages. + 2024.cpss-1.2 + frohling-etal-2024-multilingual + + + Operationalising the Hermeneutic Grouping Process in Corpus-assisted Discourse Studies + PhilippHeinrich + StephanieEvert + 33–44 + We propose a framework for quantitative-qualitative research in corpus-assisted discourse studies (CADS), which operationalises the central process of manually forming groups of related words and phrases in terms of “discoursemes” and their constellations. We introduce an open-source implementation of this framework in the form of a REST API based on Corpus Workbench. Going through the workflow of a collocation analysis for fleeing and related terms in the German Federal Parliament, the paper gives details about the underlying algorithms, with available parameters and further possible choices. We also address multi-word units (which are often disregarded by CADS tools), a semantic map visualisation of collocations, and how to compute assocations between discoursemes. + 2024.cpss-1.3 + heinrich-evert-2024-operationalising + + + A Few Hypocrites: Few-Shot Learning and Subtype Definitions for Detecting Hypocrisy Accusations in Online Climate Change Debates + Paulina GarciaCorral + AvishaiGreen + HendrikMeyer + AnkeStoll + XiaoyueYan + MyrtheReuver + 45–60 + The climate crisis is a salient issue in online discussions, and hypocrisy accusations are a central rhetorical element in these debates. However, for large-scale text analysis, hypocrisy accusation detection is an understudied tool, most often defined as a smaller subtask of fallacious argument detection. In this paper, we define hypocrisy accusation detection as an independent task in NLP, and identify different relevant subtypes of hypocrisy accusations. Our Climate Hypocrisy Accusation Corpus (CHAC) consists of 420 Reddit climate debate comments, expert-annotated into two different types of hypocrisy accusations: personal versus political hypocrisy. We evaluate few-shot in-context learning with 6 shots and 3 instruction-tuned Large Language Models (LLMs) for detecting hypocrisy accusations in this dataset. Results indicate that the GPT-4o and Llama-3 models in particular show promise in detecting hypocrisy accusations (F1 reaching 0.68, while previous work shows F1 of 0.44). However, context matters for a complex semantic concept such as hypocrisy accusations, and we find models struggle especially at identifying political hypocrisy accusations compared to personal moral hypocrisy. Our study contributes new insights in hypocrisy detection and climate change discourse, and is a stepping stone for large-scale analysis of hypocrisy accusation in online climate debates. + 2024.cpss-1.4 + corral-etal-2024-hypocrites + + + Language Complexity in Populist Rhetoric + Sergio E.Zanotto + DiegoFrassinelli + MiriamButt + 61–80 + Research suggests that politicians labeled as populists tend to use simpler language than their mainstream opponents. Yet, the metrics traditionally employed to assess the complexity of their language do not show consistent and generalizable results across different datasets and languages. This inconsistencies raise questions about the claimed simplicity of populist discourse, suggesting that the issue may be more nuanced than it initially seemed. To address this topic, we analyze the linguistic profile of IMPAQTS, a dataset of transcribed Italian political speeches, to identify linguistic features differentiating populist and non-populist parties. Our methodology ensures comparability of political texts and combines various statistical analyses to reliably identify key linguistic characteristics to test our case study. Results show that the “simplistic” language features previously described in the literature are not robust predictors of populism. This suggests that the characteristics defining populist statements are highly dependent on the specific dataset and the language being analysed, thus limiting the conclusions drawn in previous research. In our study, various linguistic features statistically differentiate between populist and mainstream parties, indicating that populists tend to employ specific well-known rhetorical strategies more frequently; however, none of them strongly indicate that populist parties use simpler language. + 2024.cpss-1.5 + zanotto-etal-2024-language + + + <fixed-case>C</fixed-case>hat<fixed-case>GPT</fixed-case> as Your n-th Annotator: Experiments in Leveraging Large Language Models for Social Science Text Annotation in <fixed-case>S</fixed-case>lovak Language + EndreHamerlik + MarekŠuppa + MiroslavBlšták + JozefKubík + MartinTakáč + MariánŠimko + AndrejFindor + 81–89 + Large Language Models (LLMs) are increasingly influential in Computational Social Science, offering new methods for processing and analyzing data, particularly in lower-resource language contexts. This study explores the use of OpenAI’s GPT-3.5 Turbo and GPT-4 for automating annotations for a unique news media dataset in a lower resourced language, focusing on stance classification tasks. Our results reveal that prompting in the native language, explanation generation, and advanced prompting strategies like Retrieval Augmented Generation and Chain of Thought prompting enhance LLM performance, particularly noting GPT-4’s superiority in predicting stance. Further evaluation indicates that LLMs can serve as a useful tool for social science text annotation in lower resourced languages, notably in identifying inconsistencies in annotation guidelines and annotated datasets. + 2024.cpss-1.6 + hamerlik-etal-2024-chatgpt + + + Detecting emotional polarity in <fixed-case>F</fixed-case>innish parliamentary proceedings + SuviLehtosalo + JohnNerbonne + 90–100 + Few studies have focused on detecting emotion in parliamentary corpora, and none have done this for the Finnish parliament. In this paper, this gap is addressed by applying the polarity lexicon–based methodology of a study by Rheault et al. (2016) on speeches in the British Parliament to a Finnish corpus. The findings show an increase in positive sentiment over time. Additionally, the findings indicate that politicians’ emotional states may be impacted by the state of the economy and other major events, such as the Covid-19 pandemic and the Russian invasion of Ukraine. + 2024.cpss-1.7 + lehtosalo-nerbonne-2024-detecting + + + Topic-specific social science theory in stance detection: a proposal and interdisciplinary pilot study on sustainability initiatives + MyrtheReuver + AlessandraPolimeno + AntskeFokkens + Ana IsabelLopes + 101–111 + Topic-specificity is often seen as a limitation of stance detection models and datasets, especially for analyzing political and societal debates. However, stances contain topic-specific aspects that are crucial for an in-depth understanding of these debates. Our interdisciplinary approach identifies social science theories on specific debate topics as an opportunity for further defining stance detection research and analyzing online debate. This paper explores sustainability as debate topic, and connects stance to the sustainability-related Value-Belief-Norm (VBN) theory. VBN theory states that arguments in favor or against sustainability initiatives contain the dimensions of feeling power to change the issue with the initiative, and thinking whether or not the initiative tackles an urgent threat to the environment. In a pilot study with our Reddit European Sustainability Initiatives corpus, we develop an annotation procedure for these complex concepts. We then compare crowd-workers with Natural Language Processing experts’ annotation proficiency. Both crowd-workers and NLP experts find the tasks difficult, but experts reach more agreement on some difficult examples. This pilot study shows that complex theories about debate topics are feasible and worthwhile as annotation tasks for stance detection. + 2024.cpss-1.8 + reuver-etal-2024-topic + + + The Echoes of the ‘<fixed-case>I</fixed-case>’: Tracing Identity with Demographically Enhanced Word Embeddings + IvanSmirnov + 112–118 + Identity is one of the most commonly studied constructs in social science. However, despite extensive theoretical work on identity, there remains a need for additional empirical data to validate and refine existing theories. This paper introduces a novel approach to studying identity by enhancing word embeddings with socio-demographic information. As a proof of concept, we demonstrate that our approach successfully reproduces and extends established findings regarding gendered self-views. Our methodology can be applied in a wide variety of settings, allowing researchers to tap into a vast pool of naturally occurring data, such as social media posts. Unlike similar methods already introduced in computer science, our approach allows for the study of differences between social groups. This could be particularly appealing to social scientists and may encourage the faster adoption of computational methods in the field. + 2024.cpss-1.9 + smirnov-2024-echoes + + + <fixed-case>TPPMI</fixed-case> - a Temporal Positive Pointwise Mutual Information Embedding of Words + PaulSchmitt + ZsófiaRakovics + MártonRakovics + GáborRecski + 119–125 + We present Temporal Positive Pointwise Mutual Information (TPPMI) embeddings as a robust and data-efficient alternative for modeling temporal semantic change. Based on the assumption that the semantics of the most frequent words in a corpus are relatively stable over time, our model represents words as vectors of their PPMI similarities with a predefined set of such context words. We evaluate our method on the temporal word analogy benchmark of Yao et al. (2018) and compare it to the TWEC model (Di Carlo et al., 2019), demonstrating the competitiveness of the approach. While the performance of TPPMI stays below that of the state-of-the-art TWEC model, it offers a higher degree of interpretability and is applicable in scenarios where only a limited amount of data is available. + 2024.cpss-1.10 + schmitt-etal-2024-tppmi + + + Augmented Political Leaning Detection: Leveraging Parliamentary Speeches for Classifying News Articles + CharlottJakob + PiaWenzel + SalarMohtaj + VeraSchmitt + 126–133 + In an era where political discourse infiltrates online platforms and news media, identifying opinion is increasingly critical, especially in news articles, where objectivity is expected. Readers frequently encounter authors’ inherent political viewpoints, challenging them to discern facts from opinions. Classifying text on a spectrum from left to right is a key task for uncovering these viewpoints. Previous approaches rely on outdated datasets to classify current articles, neglecting that political opinions on certain subjects change over time. This paper explores a novel methodology for detecting political leaning in news articles by augmenting them with political speeches specific to the topic and publication time. We evaluated the impact of the augmentation using BERT and Mistral models. The results show that the BERT model’s F1 score improved from a baseline of 0.82 to 0.85, while the Mistral model’s F1 score increased from 0.30 to 0.31. + 2024.cpss-1.11 + jakob-etal-2024-augmented + +
+
From 2c363b935e0b316be8f2a51c8f699d6f31615f3b Mon Sep 17 00:00:00 2001 From: anthology-assist Date: Thu, 19 Sep 2024 13:33:14 -0500 Subject: [PATCH 5/9] minor updates. --- data/xml/2024.cpss.xml | 1 + data/xml/2024.germeval.xml | 1 + data/xml/2024.germsdetect.xml | 1 + data/xml/2024.konvens.xml | 7 +++++++ 4 files changed, 10 insertions(+) diff --git a/data/xml/2024.cpss.xml b/data/xml/2024.cpss.xml index b11e51f429..2a4320f354 100644 --- a/data/xml/2024.cpss.xml +++ b/data/xml/2024.cpss.xml @@ -14,6 +14,7 @@ 2024 2024.cpss-1 cpss + ws 2024.cpss-1.0 diff --git a/data/xml/2024.germeval.xml b/data/xml/2024.germeval.xml index c8ea6f3613..83e4275bc9 100644 --- a/data/xml/2024.germeval.xml +++ b/data/xml/2024.germeval.xml @@ -11,6 +11,7 @@ September 2024 germeval + ws 2024.germeval-1.0 diff --git a/data/xml/2024.germsdetect.xml b/data/xml/2024.germsdetect.xml index 91e4c927ad..b5815a9fd1 100644 --- a/data/xml/2024.germsdetect.xml +++ b/data/xml/2024.germsdetect.xml @@ -11,6 +11,7 @@ September 2024 germsdetect + ws 2024.germsdetect-1.0 diff --git a/data/xml/2024.konvens.xml b/data/xml/2024.konvens.xml index c72fa60683..93f6efac3e 100644 --- a/data/xml/2024.konvens.xml +++ b/data/xml/2024.konvens.xml @@ -358,4 +358,11 @@ volk-etal-2024-llm-based + + + 2024.cpss-1 + 2024.germeval-1 + 2024.germsdetect-1 + + From e9919308172481ba69a07480f34e1652f15771b7 Mon Sep 17 00:00:00 2001 From: anthology-assist Date: Sun, 22 Sep 2024 19:57:57 -0500 Subject: [PATCH 6/9] minor update. --- data/xml/2024.konvens.xml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/data/xml/2024.konvens.xml b/data/xml/2024.konvens.xml index 93f6efac3e..8c6782b9ab 100644 --- a/data/xml/2024.konvens.xml +++ b/data/xml/2024.konvens.xml @@ -3,7 +3,7 @@ Proceedings of the 20th Conference on Natural Language Processing (KONVENS 2024) - Pedro Henrique{Luz de Araujo} + Pedro HenriqueLuz de Araujo AndreasBaumann DagmarGromann BrigitteKrenn From 9ccc31d69741395e0da07bd6de8c4adddf622df5 Mon Sep 17 00:00:00 2001 From: anthology-assist Date: Sun, 22 Sep 2024 20:03:48 -0500 Subject: [PATCH 7/9] address comments, reingested germsdetect as volume 2. --- data/xml/2024.germeval.xml | 71 +++++++++++++++++++++++++++++++++- data/xml/2024.germsdetect.xml | 73 ----------------------------------- 2 files changed, 70 insertions(+), 74 deletions(-) delete mode 100644 data/xml/2024.germsdetect.xml diff --git a/data/xml/2024.germeval.xml b/data/xml/2024.germeval.xml index 83e4275bc9..5e3df84fa3 100644 --- a/data/xml/2024.germeval.xml +++ b/data/xml/2024.germeval.xml @@ -4,7 +4,7 @@ Proceedings of GermEval 2024 Shared Task on Statement Segmentation in German Easy Language (StaGE) ThorbenSchomacker - MiriamAnsch{\"u}tz + MiriamAnschütz ReginaStodden Association for Computational Lingustics
Vienna, Austria
@@ -48,4 +48,73 @@ sauberli-bodenmann-2024-statement
+ + + Proceedings of GermEval 2024 Task 1 GerMS-Detect Workshop on Sexism Detection in German Online News Fora (GerMS-Detect 2024) + BrigitteKrenn + JohannPetrak + StephanieGross + Association for Computational Lingustics +
Vienna, Austria
+ September + 2024 + germeval + + + 2024.germeval-2.0 + germeval-2024-2 + + + <fixed-case>G</fixed-case>erm<fixed-case>E</fixed-case>val2024 Shared Task: <fixed-case>G</fixed-case>er<fixed-case>MS</fixed-case>-Detect – Sexism Detection in <fixed-case>G</fixed-case>erman Online News Fora + StephanieGross + JohannPetrak + LouisaVenhoff + BrigitteKrenn + 1–9 + 2024.germeval-2.1 + gross-etal-2024-germeval2024-shared + + + <fixed-case>THA</fixed-case>ugs at <fixed-case>G</fixed-case>erm<fixed-case>E</fixed-case>val 2024 (Shared Task 1: <fixed-case>G</fixed-case>er<fixed-case>MS</fixed-case>-Detect): Predicting the Severity of Misogyny/Sexism in Forum Comments with <fixed-case>BERT</fixed-case> Models (Subtask 1, Closed Track and Additional Experiments) + CorsinGeiss + AlessandraZarcone + 10–20 + 2024.germeval-2.2 + geiss-zarcone-2024-thaugs-germeval + + + <fixed-case>FICODE</fixed-case> at <fixed-case>G</fixed-case>erm<fixed-case>E</fixed-case>val 2024 <fixed-case>G</fixed-case>er<fixed-case>MS</fixed-case>-Detect closed <fixed-case>ST</fixed-case>1 & <fixed-case>ST</fixed-case>2: Ensemble- and Transformer-Based Detection of Sexism and Misogyny in <fixed-case>G</fixed-case>erman Texts + MaoroFalk + MichaelaGeierhos + 21–25 + 2024.germeval-2.3 + falk-geierhos-2024-ficode-germeval + + + Team Quabynar at the <fixed-case>G</fixed-case>erm<fixed-case>E</fixed-case>val 2024 Shared Task 1 <fixed-case>G</fixed-case>er<fixed-case>MS</fixed-case>-Detect (Subtasks 1 and 2) on Sexism Detection + Kwabena OdameAkomeah + UdoKruschwitz + BerndLudwig + 26–32 + 2024.germeval-2.4 + akomeah-etal-2024-team-quabynar + + + Detecting Sexism in <fixed-case>G</fixed-case>erman Online Newspaper Comments with Open-Source Text Embeddings (Team <fixed-case>GDA</fixed-case>, <fixed-case>G</fixed-case>erm<fixed-case>E</fixed-case>val2024 Shared Task 1: <fixed-case>G</fixed-case>er<fixed-case>MS</fixed-case>-Detect, Subtasks 1 and 2, Closed Track) + FlorianBremm + Patrick GustavBlaneck + TobiasBornheim + NiklasGrieger + 33–38 + 2024.germeval-2.5 + bremm-etal-2024-detecting-sexism + + + pd2904 at <fixed-case>G</fixed-case>erm<fixed-case>E</fixed-case>val2024 (Shared Task 1: <fixed-case>G</fixed-case>er<fixed-case>MS</fixed-case>-Detect): Exploring the Effectiveness of Multi-Task Transformers vs. Traditional Models for Sexism Detection (Closed Tracks of Subtasks 1 and 2) + PiaDonabauer + 39–47 + 2024.germeval-2.6 + donabauer-2024-pd2904-germeval2024 + +
diff --git a/data/xml/2024.germsdetect.xml b/data/xml/2024.germsdetect.xml deleted file mode 100644 index b5815a9fd1..0000000000 --- a/data/xml/2024.germsdetect.xml +++ /dev/null @@ -1,73 +0,0 @@ - - - - - Proceedings of GermEval 2024 Task 1 GerMS-Detect Workshop on Sexism Detection in German Online News Fora (GerMS-Detect 2024) - BrigitteKrenn - JohannPetrak - StephanieGross - Association for Computational Lingustics -
Vienna, Austria
- September - 2024 - germsdetect - ws - - - 2024.germsdetect-1.0 - germsdetect-2024-1 - - - <fixed-case>G</fixed-case>erm<fixed-case>E</fixed-case>val2024 Shared Task: <fixed-case>G</fixed-case>er<fixed-case>MS</fixed-case>-Detect – Sexism Detection in <fixed-case>G</fixed-case>erman Online News Fora - StephanieGross - JohannPetrak - LouisaVenhoff - BrigitteKrenn - 1–9 - 2024.germsdetect-1.1 - gross-etal-2024-germeval2024 - - - <fixed-case>THA</fixed-case>ugs at <fixed-case>G</fixed-case>erm<fixed-case>E</fixed-case>val 2024 (Shared Task 1: <fixed-case>G</fixed-case>er<fixed-case>MS</fixed-case>-Detect): Predicting the Severity of Misogyny/Sexism in Forum Comments with <fixed-case>BERT</fixed-case> Models (Subtask 1, Closed Track and Additional Experiments) - CorsinGeiss - AlessandraZarcone - 10–20 - 2024.germsdetect-1.2 - geiss-zarcone-2024-thaugs - - - <fixed-case>FICODE</fixed-case> at <fixed-case>G</fixed-case>erm<fixed-case>E</fixed-case>val 2024 <fixed-case>G</fixed-case>er<fixed-case>MS</fixed-case>-Detect closed <fixed-case>ST</fixed-case>1 & <fixed-case>ST</fixed-case>2: Ensemble- and Transformer-Based Detection of Sexism and Misogyny in <fixed-case>G</fixed-case>erman Texts - MaoroFalk - MichaelaGeierhos - 21–25 - 2024.germsdetect-1.3 - falk-geierhos-2024-ficode - - - Team Quabynar at the <fixed-case>G</fixed-case>erm<fixed-case>E</fixed-case>val 2024 Shared Task 1 <fixed-case>G</fixed-case>er<fixed-case>MS</fixed-case>-Detect (Subtasks 1 and 2) on Sexism Detection - Kwabena OdameAkomeah - UdoKruschwitz - BerndLudwig - 26–32 - 2024.germsdetect-1.4 - akomeah-etal-2024-team - - - Detecting Sexism in <fixed-case>G</fixed-case>erman Online Newspaper Comments with Open-Source Text Embeddings (Team <fixed-case>GDA</fixed-case>, <fixed-case>G</fixed-case>erm<fixed-case>E</fixed-case>val2024 Shared Task 1: <fixed-case>G</fixed-case>er<fixed-case>MS</fixed-case>-Detect, Subtasks 1 and 2, Closed Track) - FlorianBremm - Patrick GustavBlaneck - TobiasBornheim - NiklasGrieger - 33–38 - 2024.germsdetect-1.5 - bremm-etal-2024-detecting - - - pd2904 at <fixed-case>G</fixed-case>erm<fixed-case>E</fixed-case>val2024 (Shared Task 1: <fixed-case>G</fixed-case>er<fixed-case>MS</fixed-case>-Detect): Exploring the Effectiveness of Multi-Task Transformers vs. Traditional Models for Sexism Detection (Closed Tracks of Subtasks 1 and 2) - PiaDonabauer - 39–47 - 2024.germsdetect-1.6 - donabauer-2024-pd2904 - -
-
From 4fded7016ccee9dfc047362011f9a90021616e7c Mon Sep 17 00:00:00 2001 From: anthology-assist Date: Mon, 23 Sep 2024 17:05:59 -0500 Subject: [PATCH 8/9] updated yaml/ --- data/yaml/venues/germsdetect.yaml | 3 --- 1 file changed, 3 deletions(-) delete mode 100644 data/yaml/venues/germsdetect.yaml diff --git a/data/yaml/venues/germsdetect.yaml b/data/yaml/venues/germsdetect.yaml deleted file mode 100644 index 3813cffa5f..0000000000 --- a/data/yaml/venues/germsdetect.yaml +++ /dev/null @@ -1,3 +0,0 @@ -acronym: GerMSDetect -name: GermEval 2024 Task 1 GerMS-Detect Workshop on Sexism Detection in German Online - News Fora From cd36fba88f949ab0ae00b5f33054d7b75aa5d70c Mon Sep 17 00:00:00 2001 From: Matt Post Date: Mon, 23 Sep 2024 18:20:00 -0400 Subject: [PATCH 9/9] Update workshop volume --- data/xml/2024.konvens.xml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/data/xml/2024.konvens.xml b/data/xml/2024.konvens.xml index 8c6782b9ab..7e6fc2cab0 100644 --- a/data/xml/2024.konvens.xml +++ b/data/xml/2024.konvens.xml @@ -362,7 +362,7 @@ 2024.cpss-1 2024.germeval-1 - 2024.germsdetect-1 + 2024.germeval-2