Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add language specific queries #17

Merged
merged 6 commits into from
Sep 2, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
191 changes: 191 additions & 0 deletions qanary_helpers/language_queries.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,191 @@
from qanary_helpers.qanary_queries import select_from_triplestore, get_text_question_from_uri
import logging


class QuestionTextWithLanguage:
"""Holds data of question texts in the triplestore that have an associated language, either through previous translation or language recognition."""

def __init__(self, uri: str, text: str, lang: str):
"""Inits QuestionTextWithLanguage with question URI, question text and question language.

Keyword arguments:
uri (str) -- URI of the question inside of the triplestore
text (str) -- Textual representation of the question
lang (str) -- Language of the question text
"""
self.uri = uri
self.text = text
self.lang = lang

def get_uri(self):
return self.uri

def get_text(self):
return self.text

def get_language(self):
return self.lang


def get_texts_with_detected_language_in_triplestore(triplestore_endpoint: str, graph_uri: str, lang: str) -> list[QuestionTextWithLanguage]:
"""Retrieves question texts from the triplestore for which a specific language has been detected.

Keyword arguments:
triplestore_endpoint (str) -- URL of the triplestore endpoint
graph_uri (str) -- URI of the graph to query inside of the triplestore
lang (str) -- Expected detected language

Returns:
list -- A list of appropriate QuestionTextWithLanguage objects with information from the triplestore.
"""
source_texts = list()
sparql_find_ld = """
PREFIX qa: <http://www.wdaqua.eu/qa#>
PREFIX oa: <http://www.w3.org/ns/openannotation/core/>
PREFIX xsd: <http://www.w3.org/2001/XMLSchema#>

SELECT *
FROM <{graph}>
WHERE {{
?annotationId a qa:AnnotationOfQuestionLanguage .
?annotationId oa:hasTarget ?hasTarget ;
oa:hasBody ?hasBody ;
oa:annotatedBy ?annotatedBy ;
oa:annotatedAt ?annotatedAt .
FILTER(STR(?hasBody) = \"{lang}\")
}}
""".format(
graph = graph_uri,
lang=lang
)
results = select_from_triplestore(triplestore_endpoint, sparql_find_ld)
for result in results["results"]["bindings"]:
question_uri = result["hasTarget"]["value"]
question_text = get_text_question_from_uri(triplestore_endpoint=triplestore_endpoint, question_uri=question_uri)
source_texts.append(QuestionTextWithLanguage(uri=question_uri, text=question_text, lang=lang))

return source_texts


def get_translated_texts_in_triplestore(triplestore_endpoint: str, graph_uri: str, lang: str) -> list[QuestionTextWithLanguage]:
"""Retrieves question texts from the triplestore that were translated into a specific language.

Keyword arguments:
triplestore_endpoint (str) -- URL of the triplestore endpoint
graph_uri (str) -- URI of the graph to query inside of the triplestore
lang (str) -- Target language of the translation

Returns:
list -- A list of appropriate QuestionTextWithLanguage objects with information from the triplestore.
"""
source_texts = list()
sparql_find_ld = """
PREFIX qa: <http://www.wdaqua.eu/qa#>
PREFIX oa: <http://www.w3.org/ns/openannotation/core/>

SELECT *
FROM <{graph}>
WHERE {{
?annotationId a qa:AnnotationOfQuestionTranslation .
?annotationId oa:hasTarget ?hasTarget ;
oa:hasBody ?hasBody ;
oa:annotatedBy ?annotatedBy ;
oa:annotatedAt ?annotatedAt .
FILTER(lang(?hasBody) = \"{lang}\").
}}
""".format(
graph = graph_uri,
lang=lang
)
results = select_from_triplestore(triplestore_endpoint, sparql_find_ld)
for result in results["results"]["bindings"]:
question_uri = result["hasTarget"]["value"]
question_text = result["hasBody"]["value"]
source_texts.append(QuestionTextWithLanguage(question_uri, question_text, lang))

return source_texts


def create_annotation_of_question_translation(graph_uri: str, question_uri: str, translation: str, translation_language: str, app_name: str) -> str:
"""Creates an INSERT SPARQL query to annotate the question translation in the triplestore.

Keyword Arguments:
graph_uri (str) -- URI of the graph to query inside of the triplestore
question_uri (str) -- URI of the question inside of the triplestore
translation (str) -- Translation of the question text
translation_language (str) -- Target language of the translation
app_name (str) -- Name of the component making the annotation

Returns:
str -- The generated INSERT query
"""

SPARQLqueryAnnotationOfQuestionTranslation = """
PREFIX qa: <http://www.wdaqua.eu/qa#>
PREFIX oa: <http://www.w3.org/ns/openannotation/core/>
PREFIX xsd: <http://www.w3.org/2001/XMLSchema#>

INSERT {{
GRAPH <{uuid}> {{
?a a qa:AnnotationOfQuestionTranslation ;
oa:hasTarget <{qanary_question_uri}> ;
oa:hasBody "{translation_result}"@{target_lang} ;
oa:annotatedBy <urn:qanary:{app_name}> ;
oa:annotatedAt ?time .
}}
}}
WHERE {{
BIND (IRI(str(RAND())) AS ?a) .
BIND (now() as ?time)
}}
""".format(
uuid=graph_uri,
qanary_question_uri=question_uri,
translation_result=translation,
target_lang=translation_language,
app_name=app_name
)
logging.info(f'SPARQL: {SPARQLqueryAnnotationOfQuestionTranslation}')
return SPARQLqueryAnnotationOfQuestionTranslation


def create_annotation_of_question_language(graph_uri: str, question_uri: str, language: str, app_name: str) -> str:
"""Creates an INSERT SPARQL query to annotate the language of a question in the triplestore.

Keyword Arguments:
graph_uri (str) -- URI of the graph to query inside of the triplestore
question_uri (str) -- URI of the question inside of the triplestore
language (str) -- Determined language of the question
app_name (str) -- Name of the component making the annotation

Returns:
str -- The generated INSERT query
"""

SPARQLqueryAnnotationOfQuestionLanguage = """
PREFIX qa: <http://www.wdaqua.eu/qa#>
PREFIX oa: <http://www.w3.org/ns/openannotation/core/>
PREFIX xsd: <http://www.w3.org/2001/XMLSchema#>

INSERT {{
GRAPH <{uuid}> {{
?b a qa:AnnotationOfQuestionLanguage ;
oa:hasTarget <{qanary_question_uri}> ;
oa:hasBody "{src_lang}"^^xsd:string ;
oa:annotatedBy <urn:qanary:{app_name}> ;
oa:annotatedAt ?time .
}}
}}
WHERE {{
BIND (IRI(str(RAND())) AS ?b) .
BIND (now() as ?time)
}}
""".format(
uuid=graph_uri,
qanary_question_uri=question_uri,
src_lang=language,
app_name=app_name
)

logging.info(f'SPARQL: {SPARQLqueryAnnotationOfQuestionLanguage}')
return SPARQLqueryAnnotationOfQuestionLanguage
39 changes: 28 additions & 11 deletions qanary_helpers/qanary_queries.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,30 @@
import re


def get_text_question_from_uri(triplestore_endpoint: str, question_uri: str) -> str:
"""Retrieves the textual representation for a question identified by a URI

Keyword arguments:
triplestore_endpoint (str) -- URL of the triplestore endpoint
question_uri (str) -- URI of the question

Returns:
str -- The question text

"""
question_raw = question_uri + "/raw"
logging.info("found: questionURI={0} questionURIraw={1}".format(
question_uri,
question_raw
))
hostname = urlparse(triplestore_endpoint).hostname
if hostname == None:
raise ValueError("No valid host name could be extracted from the supplied triplestore_endpoint: {0}"
.format(triplestore_endpoint))
question_text = requests.get(question_raw.replace("localhost", hostname))
return question_text.text


def get_text_question_in_graph(triplestore_endpoint, graph):
"""
Retrieves the questions from the triplestore returns an array
Expand All @@ -17,7 +41,7 @@ def get_text_question_in_graph(triplestore_endpoint, graph):
query = """
PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#>
SELECT DISTINCT ?questionURI
FROM <{uri}>
FROM <{uri}>
WHERE {{
?questionURI rdf:type <http://www.wdaqua.eu/qa#Question> .
}}
Expand All @@ -26,16 +50,9 @@ def get_text_question_in_graph(triplestore_endpoint, graph):
results = select_from_triplestore(triplestore_endpoint, query)
for result in results["results"]["bindings"]:
question_uri = result['questionURI']['value']
question_raw = question_uri + "/raw"
logging.info("found: questionURI={0} questionURIraw={1}".format(
question_uri,
question_raw
))
question_text = requests.get(question_raw.replace(
"localhost", urlparse(triplestore_endpoint).hostname)
)
logging.info("found question: \"{0}\"".format(question_text.text))
questions.append({"uri": question_uri, "text": question_text.text})
question_text = get_text_question_from_uri(triplestore_endpoint, question_uri)
logging.info("found question: \"{0}\"".format(question_text))
questions.append({"uri": question_uri, "text": question_text})

return questions

Expand Down
6 changes: 3 additions & 3 deletions setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,13 +21,13 @@ def read_requirements():

setuptools.setup(
name="qanary-helpers",
version="0.2.2",
version="0.3.2",
author="Andreas Both, Aleksandr Perevalov",
author_email="[email protected], aleksandr.perevalov@hs-anhalt.de",
author_email="[email protected], aleksandr.perevalov@htwk-leipzig.de",
description="A package that helps to build Python components for the Qanary Question Answering framework",
long_description=long_description,
long_description_content_type="text/markdown",
url="https://github.com/Perevalov/qanary_helpers",
url="https://github.com/WSE-research/qanary_helpers",
packages=setuptools.find_packages(),
classifiers=[
"Programming Language :: Python",
Expand Down
Loading