From b0ffb998467216c8a8b3e01fdcb5724043a9e579 Mon Sep 17 00:00:00 2001 From: heinpa Date: Tue, 20 Aug 2024 00:53:38 +0200 Subject: [PATCH 1/6] add common functionality for language components --- qanary_helpers/language_queries.py | 77 ++++++++++++++++++++++++++++++ qanary_helpers/qanary_queries.py | 27 ++++++----- setup.py | 2 +- 3 files changed, 94 insertions(+), 12 deletions(-) create mode 100644 qanary_helpers/language_queries.py diff --git a/qanary_helpers/language_queries.py b/qanary_helpers/language_queries.py new file mode 100644 index 0000000..c9a5346 --- /dev/null +++ b/qanary_helpers/language_queries.py @@ -0,0 +1,77 @@ +from qanary_helpers.qanary_queries import select_from_triplestore + + +class question_text_with_language: + + def __init__(self, uri: str, text: str, lang: str): + self.uri = uri + self.text = text + self.lang = lang + + def get_uri(self): + return self.uri + + def get_text(self): + return self.text + + def get_language(self): + return self.lang + + +def get_texts_with_detected_language_in_triplestore(triplestore_endpoint: str, graph_uri: str, lang: str) -> list[question_text_with_language]: + source_texts = list() + sparql_find_ld = """ + PREFIX qa: + PREFIX oa: + PREFIX xsd: + + SELECT * + FROM <{graph}> + WHERE {{ + ?annotationId a qa:AnnotationOfQuestionLanguage . + ?annotationId oa:hasTarget ?hasTarget ; + oa:hasBody ?hasBody ; + oa:annotatedBy ?annotatedBy ; + oa:annotatedAt ?annotatedAt . + FILTER(STR(?hasBody) = {lang}) + }} + """.format( + graph = graph_uri, + lang=lang + ) + results = select_from_triplestore(triplestore_endpoint, sparql_find_ld) + for result in results["results"]["bindings"]: + question_uri = result["hasTarget"]["value"] + question_text = get_question_text_from_uri(question_uri, triplestore_endpoint) + source_texts.append(question_text_with_language(uri=question_uri, text=question_text, lang=lang)) + + return source_texts + + +def get_translated_texts_in_triplestore(triplestore_endpoint: str, graph_uri: str, lang: str) -> list[question_text_with_language]: + source_texts = list() + sparql_find_ld = """ + PREFIX qa: + PREFIX oa: + + SELECT * + FROM <{graph}> + WHERE {{ + ?annotationId a qa:AnnotationOfQuestionTranslation . + ?annotationId oa:hasTarget ?hasTarget ; + oa:hasBody ?hasBody ; + oa:annotatedBy ?annotatedBy ; + oa:annotatedAt ?annotatedAt . + FILTER(lang(?hasBody) = {lang}). + }} + """.format( + graph = graph_uri, + lang=lang + ) + results = select_from_triplestore(triplestore_endpoint, sparql_find_ld) + for result in results["results"]["bindings"]: + question_uri = result["hasTarget"]["value"] + question_text = result["hasBody"]["value"] + source_texts.append(question_text_with_language(question_uri, question_text, lang)) + + return source_texts diff --git a/qanary_helpers/qanary_queries.py b/qanary_helpers/qanary_queries.py index 5ceac7b..686cf75 100644 --- a/qanary_helpers/qanary_queries.py +++ b/qanary_helpers/qanary_queries.py @@ -5,6 +5,18 @@ import re +def get_text_question_from_uri(question_uri, triplestore_endpoint): + question_raw = question_uri + "/raw" + logging.info("found: questionURI={0} questionURIraw={1}".format( + question_uri, + question_raw + )) + question_text = requests.get(question_raw.replace( + "localhost", urlparse(triplestore_endpoint).hostname) + ) + return question_text.text + + def get_text_question_in_graph(triplestore_endpoint, graph): """ Retrieves the questions from the triplestore returns an array @@ -17,7 +29,7 @@ def get_text_question_in_graph(triplestore_endpoint, graph): query = """ PREFIX rdf: SELECT DISTINCT ?questionURI - FROM <{uri}> + FROM <{uri}> WHERE {{ ?questionURI rdf:type . }} @@ -26,16 +38,9 @@ def get_text_question_in_graph(triplestore_endpoint, graph): results = select_from_triplestore(triplestore_endpoint, query) for result in results["results"]["bindings"]: question_uri = result['questionURI']['value'] - question_raw = question_uri + "/raw" - logging.info("found: questionURI={0} questionURIraw={1}".format( - question_uri, - question_raw - )) - question_text = requests.get(question_raw.replace( - "localhost", urlparse(triplestore_endpoint).hostname) - ) - logging.info("found question: \"{0}\"".format(question_text.text)) - questions.append({"uri": question_uri, "text": question_text.text}) + question_text = get_text_question_from_uri(question_uri, triplestore_endpoint) + logging.info("found question: \"{0}\"".format(question_text)) + questions.append({"uri": question_uri, "text": question_text}) return questions diff --git a/setup.py b/setup.py index 8cfefe6..a813e63 100644 --- a/setup.py +++ b/setup.py @@ -21,7 +21,7 @@ def read_requirements(): setuptools.setup( name="qanary-helpers", - version="0.2.2", + version="0.3.0", author="Andreas Both, Aleksandr Perevalov", author_email="andreas.both@htwk-leipzig.de, aleksandr.perevalov@hs-anhalt.de", description="A package that helps to build Python components for the Qanary Question Answering framework", From c1f9570d181c037c1fc82ba9da938c43a0fcd3de Mon Sep 17 00:00:00 2001 From: heinpa Date: Tue, 20 Aug 2024 01:35:45 +0200 Subject: [PATCH 2/6] add functions for generating common insert queries --- qanary_helpers/language_queries.py | 67 ++++++++++++++++++++++++++++-- 1 file changed, 64 insertions(+), 3 deletions(-) diff --git a/qanary_helpers/language_queries.py b/qanary_helpers/language_queries.py index c9a5346..1f91a7f 100644 --- a/qanary_helpers/language_queries.py +++ b/qanary_helpers/language_queries.py @@ -1,8 +1,8 @@ -from qanary_helpers.qanary_queries import select_from_triplestore +from qanary_helpers.qanary_queries import select_from_triplestore, get_text_question_from_uri +import logging class question_text_with_language: - def __init__(self, uri: str, text: str, lang: str): self.uri = uri self.text = text @@ -42,7 +42,7 @@ def get_texts_with_detected_language_in_triplestore(triplestore_endpoint: str, g results = select_from_triplestore(triplestore_endpoint, sparql_find_ld) for result in results["results"]["bindings"]: question_uri = result["hasTarget"]["value"] - question_text = get_question_text_from_uri(question_uri, triplestore_endpoint) + question_text = get_text_question_from_uri(question_uri, triplestore_endpoint) source_texts.append(question_text_with_language(uri=question_uri, text=question_text, lang=lang)) return source_texts @@ -75,3 +75,64 @@ def get_translated_texts_in_triplestore(triplestore_endpoint: str, graph_uri: st source_texts.append(question_text_with_language(question_uri, question_text, lang)) return source_texts + + +def create_annotation_of_question_translation(graph_uri: str, question_uri: str, translation: str, translation_language: str, app_name: str) -> str: + SPARQLqueryAnnotationOfQuestionTranslation = """ + PREFIX qa: + PREFIX oa: + PREFIX xsd: + + INSERT {{ + GRAPH <{uuid}> {{ + ?a a qa:AnnotationOfQuestionTranslation ; + oa:hasTarget <{qanary_question_uri}> ; + oa:hasBody "{translation_result}"@{target_lang} ; + oa:annotatedBy ; + oa:annotatedAt ?time . + + }} + }} + WHERE {{ + BIND (IRI(str(RAND())) AS ?a) . + BIND (now() as ?time) + }} + """.format( + uuid=graph_uri, + qanary_question_uri=question_uri, + translation_result=translation, + target_lang=translation_language, + app_name=app_name + ) + logging.info(f'SPARQL: {SPARQLqueryAnnotationOfQuestionTranslation}') + return SPARQLqueryAnnotationOfQuestionTranslation + + +def create_annotation_of_question_language(graph_uri: str, question_uri: str, language: str, app_name: str) -> str: + SPARQLqueryAnnotationOfQuestionLanguage = """ + PREFIX qa: + PREFIX oa: + PREFIX xsd: + + INSERT {{ + GRAPH <{uuid}> {{ + ?b a qa:AnnotationOfQuestionLanguage ; + oa:hasTarget <{qanary_question_uri}> ; + oa:hasBody "{src_lang}"^^xsd:string ; + oa:annotatedBy ; + oa:annotatedAt ?time . + }} + }} + WHERE {{ + BIND (IRI(str(RAND())) AS ?b) . + BIND (now() as ?time) + }} + """.format( + uuid=graph_uri, + qanary_question_uri=question_uri, + src_lang=language, + app_name=app_name + ) + + logging.info(f'SPARQL: {SPARQLqueryAnnotationOfQuestionLanguage}') + return SPARQLqueryAnnotationOfQuestionLanguage From 14dcd262c7a0cacd6d02e2b745fad12cb4be258b Mon Sep 17 00:00:00 2001 From: Andreas Both Date: Tue, 20 Aug 2024 16:30:24 +0200 Subject: [PATCH 3/6] Update setup.py --- setup.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/setup.py b/setup.py index a813e63..e2323f4 100644 --- a/setup.py +++ b/setup.py @@ -23,7 +23,7 @@ def read_requirements(): name="qanary-helpers", version="0.3.0", author="Andreas Both, Aleksandr Perevalov", - author_email="andreas.both@htwk-leipzig.de, aleksandr.perevalov@hs-anhalt.de", + author_email="andreas.both@htwk-leipzig.de, aleksandr.perevalov@htwk-leipzig.de", description="A package that helps to build Python components for the Qanary Question Answering framework", long_description=long_description, long_description_content_type="text/markdown", From 15074f38bb61d9cc0d3306fd8b1b0a4f4f5d1673 Mon Sep 17 00:00:00 2001 From: heinpa Date: Sun, 25 Aug 2024 19:08:40 +0200 Subject: [PATCH 4/6] extend documentation --- qanary_helpers/language_queries.py | 64 +++++++++++++++++++++++++++--- qanary_helpers/qanary_queries.py | 22 +++++++--- 2 files changed, 76 insertions(+), 10 deletions(-) diff --git a/qanary_helpers/language_queries.py b/qanary_helpers/language_queries.py index 1f91a7f..ea0e166 100644 --- a/qanary_helpers/language_queries.py +++ b/qanary_helpers/language_queries.py @@ -2,8 +2,17 @@ import logging -class question_text_with_language: +class QuestionTextWithLanguage: + """Holds data of question texts in the triplestore that have an associated language, either through previous translation or language recognition.""" + def __init__(self, uri: str, text: str, lang: str): + """Inits QuestionTextWithLanguage with question URI, question text and question language. + + Keyword arguments: + uri (str) -- URI of the question inside of the triplestore + text (str) -- Textual representation of the question + lang (str) -- Language of the question text + """ self.uri = uri self.text = text self.lang = lang @@ -18,7 +27,17 @@ def get_language(self): return self.lang -def get_texts_with_detected_language_in_triplestore(triplestore_endpoint: str, graph_uri: str, lang: str) -> list[question_text_with_language]: +def get_texts_with_detected_language_in_triplestore(triplestore_endpoint: str, graph_uri: str, lang: str) -> list[QuestionTextWithLanguage]: + """Retrieves question texts from the triplestore for which a specific language has been detected. + + Keyword arguments: + triplestore_endpoint (str) -- URL of the triplestore endpoint + graph_uri (str) -- URI of the graph to query inside of the triplestore + lang (str) -- Expected detected language + + Returns: + list -- A list of appropriate question_text_with_language objects with information from the triplestore. + """ source_texts = list() sparql_find_ld = """ PREFIX qa: @@ -43,12 +62,22 @@ def get_texts_with_detected_language_in_triplestore(triplestore_endpoint: str, g for result in results["results"]["bindings"]: question_uri = result["hasTarget"]["value"] question_text = get_text_question_from_uri(question_uri, triplestore_endpoint) - source_texts.append(question_text_with_language(uri=question_uri, text=question_text, lang=lang)) + source_texts.append(QuestionTextWithLanguage(uri=question_uri, text=question_text, lang=lang)) return source_texts -def get_translated_texts_in_triplestore(triplestore_endpoint: str, graph_uri: str, lang: str) -> list[question_text_with_language]: +def get_translated_texts_in_triplestore(triplestore_endpoint: str, graph_uri: str, lang: str) -> list[QuestionTextWithLanguage]: + """Retrieves question texts from the triplestore that were translated into a specific language. + + Keyword arguments: + triplestore_endpoint (str) -- URL of the triplestore endpoint + graph_uri (str) -- URI of the graph to query inside of the triplestore + lang (str) -- Target language of the translation + + Returns: + list -- A list of appropriate question_text_with_language objects with information from the triplestore. + """ source_texts = list() sparql_find_ld = """ PREFIX qa: @@ -72,12 +101,25 @@ def get_translated_texts_in_triplestore(triplestore_endpoint: str, graph_uri: st for result in results["results"]["bindings"]: question_uri = result["hasTarget"]["value"] question_text = result["hasBody"]["value"] - source_texts.append(question_text_with_language(question_uri, question_text, lang)) + source_texts.append(QuestionTextWithLanguage(question_uri, question_text, lang)) return source_texts def create_annotation_of_question_translation(graph_uri: str, question_uri: str, translation: str, translation_language: str, app_name: str) -> str: + """Creates an INSERT SPARQL query to annotate the question translation in the triplestore. + + Keyword Arguments: + graph_uri (str) -- URI of the graph to query inside of the triplestore + question_uri (str) -- URI of the question inside of the triplestore + translation (str) -- Translation of the question text + translation_language (str) -- Target language of the translation + app_name (str) -- Name of the component making the annotation + + Returns: + str -- The generated INSERT query + """ + SPARQLqueryAnnotationOfQuestionTranslation = """ PREFIX qa: PREFIX oa: @@ -109,6 +151,18 @@ def create_annotation_of_question_translation(graph_uri: str, question_uri: str, def create_annotation_of_question_language(graph_uri: str, question_uri: str, language: str, app_name: str) -> str: + """Creates an INSERT SPARQL query to annotate the language of a question in the triplestore. + + Keyword Arguments: + graph_uri (str) -- URI of the graph to query inside of the triplestore + question_uri (str) -- URI of the question inside of the triplestore + language (str) -- Determined language of the question + app_name (str) -- Name of the component making the annotation + + Returns: + str -- The generated INSERT query + """ + SPARQLqueryAnnotationOfQuestionLanguage = """ PREFIX qa: PREFIX oa: diff --git a/qanary_helpers/qanary_queries.py b/qanary_helpers/qanary_queries.py index 686cf75..1d6fdfb 100644 --- a/qanary_helpers/qanary_queries.py +++ b/qanary_helpers/qanary_queries.py @@ -5,15 +5,27 @@ import re -def get_text_question_from_uri(question_uri, triplestore_endpoint): +def get_text_question_from_uri(triplestore_endpoint: str, question_uri: str) -> str: + """Retrieves the textual representation for a question identified by a URI + + Keyword arguments: + triplestore_endpoint (str) -- URL of the triplestore endpoint + question_uri (str) -- URI of the question + + Returns: + str -- The question text + + """ question_raw = question_uri + "/raw" logging.info("found: questionURI={0} questionURIraw={1}".format( question_uri, question_raw )) - question_text = requests.get(question_raw.replace( - "localhost", urlparse(triplestore_endpoint).hostname) - ) + hostname = urlparse(triplestore_endpoint).hostname + if hostname == None: + raise ValueError("No valid host name could be extracted from the supplied triplestore_endpoint: {0}" + .format(triplestore_endpoint)) + question_text = requests.get(question_raw.replace("localhost", hostname)) return question_text.text @@ -38,7 +50,7 @@ def get_text_question_in_graph(triplestore_endpoint, graph): results = select_from_triplestore(triplestore_endpoint, query) for result in results["results"]["bindings"]: question_uri = result['questionURI']['value'] - question_text = get_text_question_from_uri(question_uri, triplestore_endpoint) + question_text = get_text_question_from_uri(triplestore_endpoint, question_uri) logging.info("found question: \"{0}\"".format(question_text)) questions.append({"uri": question_uri, "text": question_text}) From 43866e714749592fae2f9da12d4c19c3cbe84ef4 Mon Sep 17 00:00:00 2001 From: heinpa Date: Mon, 26 Aug 2024 04:38:59 +0200 Subject: [PATCH 5/6] update repository url --- setup.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/setup.py b/setup.py index e2323f4..0d940d8 100644 --- a/setup.py +++ b/setup.py @@ -21,13 +21,13 @@ def read_requirements(): setuptools.setup( name="qanary-helpers", - version="0.3.0", + version="0.3.2", author="Andreas Both, Aleksandr Perevalov", author_email="andreas.both@htwk-leipzig.de, aleksandr.perevalov@htwk-leipzig.de", description="A package that helps to build Python components for the Qanary Question Answering framework", long_description=long_description, long_description_content_type="text/markdown", - url="https://github.com/Perevalov/qanary_helpers", + url="https://github.com/WSE-research/qanary_helpers", packages=setuptools.find_packages(), classifiers=[ "Programming Language :: Python", From aa8416a96a4c81f9ef875c3b49a4eaf5b5f4c0fb Mon Sep 17 00:00:00 2001 From: heinpa Date: Mon, 26 Aug 2024 04:39:26 +0200 Subject: [PATCH 6/6] fix filter for language tag --- qanary_helpers/language_queries.py | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/qanary_helpers/language_queries.py b/qanary_helpers/language_queries.py index ea0e166..a8062bb 100644 --- a/qanary_helpers/language_queries.py +++ b/qanary_helpers/language_queries.py @@ -36,7 +36,7 @@ def get_texts_with_detected_language_in_triplestore(triplestore_endpoint: str, g lang (str) -- Expected detected language Returns: - list -- A list of appropriate question_text_with_language objects with information from the triplestore. + list -- A list of appropriate QuestionTextWithLanguage objects with information from the triplestore. """ source_texts = list() sparql_find_ld = """ @@ -52,7 +52,7 @@ def get_texts_with_detected_language_in_triplestore(triplestore_endpoint: str, g oa:hasBody ?hasBody ; oa:annotatedBy ?annotatedBy ; oa:annotatedAt ?annotatedAt . - FILTER(STR(?hasBody) = {lang}) + FILTER(STR(?hasBody) = \"{lang}\") }} """.format( graph = graph_uri, @@ -61,7 +61,7 @@ def get_texts_with_detected_language_in_triplestore(triplestore_endpoint: str, g results = select_from_triplestore(triplestore_endpoint, sparql_find_ld) for result in results["results"]["bindings"]: question_uri = result["hasTarget"]["value"] - question_text = get_text_question_from_uri(question_uri, triplestore_endpoint) + question_text = get_text_question_from_uri(triplestore_endpoint=triplestore_endpoint, question_uri=question_uri) source_texts.append(QuestionTextWithLanguage(uri=question_uri, text=question_text, lang=lang)) return source_texts @@ -76,7 +76,7 @@ def get_translated_texts_in_triplestore(triplestore_endpoint: str, graph_uri: st lang (str) -- Target language of the translation Returns: - list -- A list of appropriate question_text_with_language objects with information from the triplestore. + list -- A list of appropriate QuestionTextWithLanguage objects with information from the triplestore. """ source_texts = list() sparql_find_ld = """ @@ -91,7 +91,7 @@ def get_translated_texts_in_triplestore(triplestore_endpoint: str, graph_uri: st oa:hasBody ?hasBody ; oa:annotatedBy ?annotatedBy ; oa:annotatedAt ?annotatedAt . - FILTER(lang(?hasBody) = {lang}). + FILTER(lang(?hasBody) = \"{lang}\"). }} """.format( graph = graph_uri, @@ -132,7 +132,6 @@ def create_annotation_of_question_translation(graph_uri: str, question_uri: str, oa:hasBody "{translation_result}"@{target_lang} ; oa:annotatedBy ; oa:annotatedAt ?time . - }} }} WHERE {{