Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add language specific queries #17

Merged
merged 6 commits into from
Sep 2, 2024
Merged
Show file tree
Hide file tree
Changes from 3 commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
138 changes: 138 additions & 0 deletions qanary_helpers/language_queries.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,138 @@
from qanary_helpers.qanary_queries import select_from_triplestore, get_text_question_from_uri
import logging


class question_text_with_language:
def __init__(self, uri: str, text: str, lang: str):
self.uri = uri
self.text = text
self.lang = lang

def get_uri(self):
return self.uri

def get_text(self):
return self.text

def get_language(self):
return self.lang


def get_texts_with_detected_language_in_triplestore(triplestore_endpoint: str, graph_uri: str, lang: str) -> list[question_text_with_language]:
source_texts = list()
sparql_find_ld = """
PREFIX qa: <http://www.wdaqua.eu/qa#>
PREFIX oa: <http://www.w3.org/ns/openannotation/core/>
PREFIX xsd: <http://www.w3.org/2001/XMLSchema#>

SELECT *
FROM <{graph}>
WHERE {{
?annotationId a qa:AnnotationOfQuestionLanguage .
?annotationId oa:hasTarget ?hasTarget ;
oa:hasBody ?hasBody ;
oa:annotatedBy ?annotatedBy ;
oa:annotatedAt ?annotatedAt .
FILTER(STR(?hasBody) = {lang})
}}
""".format(
graph = graph_uri,
lang=lang
)
results = select_from_triplestore(triplestore_endpoint, sparql_find_ld)
for result in results["results"]["bindings"]:
question_uri = result["hasTarget"]["value"]
question_text = get_text_question_from_uri(question_uri, triplestore_endpoint)
source_texts.append(question_text_with_language(uri=question_uri, text=question_text, lang=lang))

return source_texts


def get_translated_texts_in_triplestore(triplestore_endpoint: str, graph_uri: str, lang: str) -> list[question_text_with_language]:
source_texts = list()
sparql_find_ld = """
PREFIX qa: <http://www.wdaqua.eu/qa#>
PREFIX oa: <http://www.w3.org/ns/openannotation/core/>

SELECT *
FROM <{graph}>
WHERE {{
?annotationId a qa:AnnotationOfQuestionTranslation .
?annotationId oa:hasTarget ?hasTarget ;
oa:hasBody ?hasBody ;
oa:annotatedBy ?annotatedBy ;
oa:annotatedAt ?annotatedAt .
FILTER(lang(?hasBody) = {lang}).
}}
""".format(
graph = graph_uri,
lang=lang
)
results = select_from_triplestore(triplestore_endpoint, sparql_find_ld)
for result in results["results"]["bindings"]:
question_uri = result["hasTarget"]["value"]
question_text = result["hasBody"]["value"]
source_texts.append(question_text_with_language(question_uri, question_text, lang))

return source_texts


def create_annotation_of_question_translation(graph_uri: str, question_uri: str, translation: str, translation_language: str, app_name: str) -> str:
SPARQLqueryAnnotationOfQuestionTranslation = """
PREFIX qa: <http://www.wdaqua.eu/qa#>
PREFIX oa: <http://www.w3.org/ns/openannotation/core/>
PREFIX xsd: <http://www.w3.org/2001/XMLSchema#>

INSERT {{
GRAPH <{uuid}> {{
?a a qa:AnnotationOfQuestionTranslation ;
oa:hasTarget <{qanary_question_uri}> ;
oa:hasBody "{translation_result}"@{target_lang} ;
oa:annotatedBy <urn:qanary:{app_name}> ;
oa:annotatedAt ?time .

}}
}}
WHERE {{
BIND (IRI(str(RAND())) AS ?a) .
BIND (now() as ?time)
}}
""".format(
uuid=graph_uri,
qanary_question_uri=question_uri,
translation_result=translation,
target_lang=translation_language,
app_name=app_name
)
logging.info(f'SPARQL: {SPARQLqueryAnnotationOfQuestionTranslation}')
return SPARQLqueryAnnotationOfQuestionTranslation


def create_annotation_of_question_language(graph_uri: str, question_uri: str, language: str, app_name: str) -> str:
SPARQLqueryAnnotationOfQuestionLanguage = """
PREFIX qa: <http://www.wdaqua.eu/qa#>
PREFIX oa: <http://www.w3.org/ns/openannotation/core/>
PREFIX xsd: <http://www.w3.org/2001/XMLSchema#>

INSERT {{
GRAPH <{uuid}> {{
?b a qa:AnnotationOfQuestionLanguage ;
oa:hasTarget <{qanary_question_uri}> ;
oa:hasBody "{src_lang}"^^xsd:string ;
oa:annotatedBy <urn:qanary:{app_name}> ;
oa:annotatedAt ?time .
}}
}}
WHERE {{
BIND (IRI(str(RAND())) AS ?b) .
BIND (now() as ?time)
}}
""".format(
uuid=graph_uri,
qanary_question_uri=question_uri,
src_lang=language,
app_name=app_name
)

logging.info(f'SPARQL: {SPARQLqueryAnnotationOfQuestionLanguage}')
return SPARQLqueryAnnotationOfQuestionLanguage
27 changes: 16 additions & 11 deletions qanary_helpers/qanary_queries.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,18 @@
import re


def get_text_question_from_uri(question_uri, triplestore_endpoint):
question_raw = question_uri + "/raw"
logging.info("found: questionURI={0} questionURIraw={1}".format(
question_uri,
question_raw
))
question_text = requests.get(question_raw.replace(
"localhost", urlparse(triplestore_endpoint).hostname)
)
return question_text.text


def get_text_question_in_graph(triplestore_endpoint, graph):
"""
Retrieves the questions from the triplestore returns an array
Expand All @@ -17,7 +29,7 @@ def get_text_question_in_graph(triplestore_endpoint, graph):
query = """
PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#>
SELECT DISTINCT ?questionURI
FROM <{uri}>
FROM <{uri}>
WHERE {{
?questionURI rdf:type <http://www.wdaqua.eu/qa#Question> .
}}
Expand All @@ -26,16 +38,9 @@ def get_text_question_in_graph(triplestore_endpoint, graph):
results = select_from_triplestore(triplestore_endpoint, query)
for result in results["results"]["bindings"]:
question_uri = result['questionURI']['value']
question_raw = question_uri + "/raw"
logging.info("found: questionURI={0} questionURIraw={1}".format(
question_uri,
question_raw
))
question_text = requests.get(question_raw.replace(
"localhost", urlparse(triplestore_endpoint).hostname)
)
logging.info("found question: \"{0}\"".format(question_text.text))
questions.append({"uri": question_uri, "text": question_text.text})
question_text = get_text_question_from_uri(question_uri, triplestore_endpoint)
logging.info("found question: \"{0}\"".format(question_text))
questions.append({"uri": question_uri, "text": question_text})

return questions

Expand Down
4 changes: 2 additions & 2 deletions setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,9 +21,9 @@ def read_requirements():

setuptools.setup(
name="qanary-helpers",
version="0.2.2",
version="0.3.0",
author="Andreas Both, Aleksandr Perevalov",
author_email="[email protected], aleksandr.perevalov@hs-anhalt.de",
author_email="[email protected], aleksandr.perevalov@htwk-leipzig.de",
description="A package that helps to build Python components for the Qanary Question Answering framework",
long_description=long_description,
long_description_content_type="text/markdown",
Expand Down
Loading