Skip to content

Commit

Permalink
Merge pull request #319 from dkpro/refactoring/318-Address-linter-issues
Browse files Browse the repository at this point in the history
#318 - Address linter issues
  • Loading branch information
reckart authored May 7, 2024
2 parents 12f7c22 + d138484 commit a76b4e1
Show file tree
Hide file tree
Showing 8 changed files with 63 additions and 35 deletions.
4 changes: 2 additions & 2 deletions cassis/cas.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,8 @@

from cassis.typesystem import (
FEATURE_BASE_NAME_HEAD,
FEATURE_BASE_NAME_LANGUAGE,
TYPE_NAME_DOCUMENT_ANNOTATION,
TYPE_NAME_FS_ARRAY,
TYPE_NAME_FS_LIST,
TYPE_NAME_SOFA,
Expand All @@ -21,8 +23,6 @@
TypeCheckError,
TypeSystem,
TypeSystemMode,
TYPE_NAME_DOCUMENT_ANNOTATION,
FEATURE_BASE_NAME_LANGUAGE,
)

_validator_optional_string = validators.optional(validators.instance_of(str))
Expand Down
34 changes: 32 additions & 2 deletions cassis/json.py
Original file line number Diff line number Diff line change
@@ -1,12 +1,42 @@
import base64
import json
import math
from collections import OrderedDict
from collections import OrderedDict, defaultdict
from io import TextIOBase, TextIOWrapper
from math import isnan
from typing import Union, IO, Optional, Dict, List
from toposort import toposort_flatten

from cassis.cas import NAME_DEFAULT_SOFA, Cas, IdGenerator, Sofa, View
from cassis.typesystem import *
from cassis.typesystem import (
TYPE_NAME_ANNOTATION,
TypeSystem,
is_predefined,
merge_typesystems,
TYPE_NAME_SOFA,
FEATURE_BASE_NAME_SOFAARRAY,
array_type_name_for_type,
FEATURE_BASE_NAME_SOFASTRING,
FEATURE_BASE_NAME_SOFAID,
FEATURE_BASE_NAME_SOFAMIME,
FEATURE_BASE_NAME_SOFANUM,
FEATURE_BASE_NAME_SOFAURI,
TYPE_NAME_FS_ARRAY,
TYPE_NAME_BYTE_ARRAY,
TYPE_NAME_FLOAT_ARRAY,
TYPE_NAME_DOUBLE_ARRAY,
TypeSystemMode,
TYPE_NAME_DOCUMENT_ANNOTATION,
Type,
Feature,
TYPE_NAME_TOP,
is_primitive_array,
TYPE_NAME_FLOAT,
TYPE_NAME_DOUBLE,
element_type_name_for_array_type,
is_primitive,
is_array,
)

RESERVED_FIELD_PREFIX = "%"
REF_FEATURE_PREFIX = "@"
Expand Down
4 changes: 2 additions & 2 deletions cassis/xmi.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,6 @@
from cassis.typesystem import (
_LIST_TYPES,
_PRIMITIVE_ARRAY_TYPES,
_PRIMITIVE_LIST_TYPES,
FEATURE_BASE_NAME_BEGIN,
FEATURE_BASE_NAME_END,
FEATURE_BASE_NAME_HEAD,
Expand Down Expand Up @@ -295,7 +294,8 @@ def deserialize(self, source: Union[IO, str], typesystem: TypeSystem, lenient: b
else:
view = cas.create_view(sofa.sofaID, xmiID=sofa.xmiID, sofaNum=sofa.sofaNum)

# Directly set the sofaString and offsetConverter for the sofa to avoid recomputing the offset convertion (slow!) when using the setter
# Directly set the sofaString and offsetConverter for the sofa to avoid recomputing the offset convertion
# (slow!) when using the setter
view.get_sofa()._sofaString = sofa.sofaString
view.get_sofa()._offset_converter = sofa._offset_converter
view.sofa_mime = sofa.mimeType
Expand Down
8 changes: 4 additions & 4 deletions tests/test_cas.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,17 +20,17 @@ def test_default_typesystem_is_not_shared():
cas1 = Cas()
cas2 = Cas()

t1 = cas1.typesystem.create_type(name="test.Type")
t2 = cas2.typesystem.create_type(name="test.Type")
cas1.typesystem.create_type(name="test.Type")
cas2.typesystem.create_type(name="test.Type")


def test_default_typesystem_is_not_shared_load_from_xmi(empty_cas_xmi):
# https://github.com/dkpro/dkpro-cassis/issues/67
cas1 = load_cas_from_xmi(empty_cas_xmi)
cas2 = load_cas_from_xmi(empty_cas_xmi)

t1 = cas1.typesystem.create_type(name="test.Type")
t2 = cas2.typesystem.create_type(name="test.Type")
cas1.typesystem.create_type(name="test.Type")
cas2.typesystem.create_type(name="test.Type")


# View
Expand Down
2 changes: 1 addition & 1 deletion tests/test_json.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
import json

from cassis.typesystem import TYPE_NAME_ANNOTATION, TypeSystemMode, TYPE_NAME_DOCUMENT_ANNOTATION
from cassis.typesystem import TYPE_NAME_ANNOTATION, TYPE_NAME_DOCUMENT_ANNOTATION, TypeSystemMode
from tests.fixtures import *
from tests.test_files.test_cas_generators import MultiFeatureRandomCasGenerator, MultiTypeRandomCasGenerator
from tests.util import assert_json_equal
Expand Down
11 changes: 5 additions & 6 deletions tests/test_typesystem.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,14 +10,14 @@
TYPE_NAME_ANNOTATION_BASE,
TYPE_NAME_ARRAY_BASE,
TYPE_NAME_BOOLEAN,
TYPE_NAME_DOCUMENT_ANNOTATION,
TYPE_NAME_INTEGER,
TYPE_NAME_SOFA,
TYPE_NAME_STRING,
TYPE_NAME_STRING_ARRAY,
TYPE_NAME_TOP,
TypeCheckError,
is_predefined,
TYPE_NAME_DOCUMENT_ANNOTATION,
)
from tests.fixtures import *
from tests.util import assert_xml_equal
Expand Down Expand Up @@ -315,7 +315,6 @@ def test_type_inherits_from_annotation():
],
)
def test_is_predefined(type_name: str, expected: bool):

assert is_predefined(type_name) == expected


Expand Down Expand Up @@ -498,7 +497,7 @@ def test_is_primitive_collection(type_name: str, expected: bool):
("uima.cas.DoubleArray", True),
],
)
def test_is_primitive_collection(type_name: str, expected: bool):
def test_is_primitive_array(type_name: str, expected: bool):
typesystem = TypeSystem()

assert typesystem.is_primitive_array(type_name) == expected
Expand Down Expand Up @@ -883,9 +882,9 @@ def test_that_merging_types_creates_self_contained_type_system():
def test_that_dkpro_core_typeystem_can_be_loaded():
ts = load_dkpro_core_typesystem()

POS = ts.get_type("de.tudarmstadt.ukp.dkpro.core.api.lexmorph.type.pos.POS")
NamedEntity = ts.get_type("de.tudarmstadt.ukp.dkpro.core.api.ner.type.NamedEntity")
CoreferenceLink = ts.get_type("de.tudarmstadt.ukp.dkpro.core.api.coref.type.CoreferenceLink")
assert ts.get_type("de.tudarmstadt.ukp.dkpro.core.api.lexmorph.type.pos.POS") is not None
assert ts.get_type("de.tudarmstadt.ukp.dkpro.core.api.ner.type.NamedEntity") is not None
assert ts.get_type("de.tudarmstadt.ukp.dkpro.core.api.coref.type.CoreferenceLink") is not None


# Type checking
Expand Down
2 changes: 0 additions & 2 deletions tests/test_util.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,3 @@
from pathlib import Path

from cassis.typesystem import TYPE_NAME_ANNOTATION, TYPE_NAME_FS_ARRAY
from tests.fixtures import *
from tests.test_files.test_cas_generators import MultiFeatureRandomCasGenerator, MultiTypeRandomCasGenerator
Expand Down
33 changes: 17 additions & 16 deletions tests/test_xmi.py
Original file line number Diff line number Diff line change
Expand Up @@ -65,7 +65,7 @@ def test_deserializing_from_string(small_typesystem_xml):
<cas:Sofa xmi:id="1" sofaNum="1" sofaID="mySofa" mimeType="text/plain"
sofaString="Joe waited for the train . The train was late ."/>
<cas:View sofa="1" members="8 79 84"/>
</xmi:XMI>
</xmi:XMI>
"""
load_cas_from_xmi(cas_xmi, typesystem=typesystem)

Expand Down Expand Up @@ -102,7 +102,7 @@ def test_views_are_parsed(small_xmi, small_typesystem_xml):
<cas:Sofa xmi:id="2" sofaNum="2" sofaID="sofa2" mimeType="text/plain"
sofaString="The train was late ."/>
<cas:View sofa="2" members="84"/>
</xmi:XMI>
</xmi:XMI>
"""
cas = load_cas_from_xmi(cas_xmi, typesystem=typesystem)

Expand Down Expand Up @@ -302,11 +302,11 @@ def test_offsets_are_recomputed_when_sofa_string_changes(cas_with_smileys_xmi, d

def test_offsets_work_for_empty_sofastring():
xmi = """<?xml version="1.0" encoding="UTF-8"?>
<xmi:XMI xmlns:xmi="http://www.omg.org/XMI" xmlns:tcas="http:///uima/tcas.ecore"
xmlns:cas="http:///uima/cas.ecore" xmi:version="2.0">
<cas:NULL xmi:id="0" />
<tcas:DocumentAnnotation xmi:id="2" sofa="1" begin="0" end="0" language="en" />
<cas:Sofa xmi:id="1" sofaNum="1" sofaID="_InitialView" mimeType="text" sofaString="" />
<xmi:XMI xmlns:xmi="http://www.omg.org/XMI" xmlns:tcas="http:///uima/tcas.ecore"
xmlns:cas="http:///uima/cas.ecore" xmi:version="2.0">
<cas:NULL xmi:id="0" />
<tcas:DocumentAnnotation xmi:id="2" sofa="1" begin="0" end="0" language="en" />
<cas:Sofa xmi:id="1" sofaNum="1" sofaID="_InitialView" mimeType="text" sofaString="" />
<cas:View sofa="1" members="2" />
</xmi:XMI>"""

Expand All @@ -316,14 +316,15 @@ def test_offsets_work_for_empty_sofastring():

def test_that_invalid_offsets_remain_unmapped_on_import():
xmi = """<?xml version="1.0" encoding="UTF-8"?>
<xmi:XMI xmlns:xmi="http://www.omg.org/XMI" xmlns:tcas="http:///uima/tcas.ecore"
xmlns:cas="http:///uima/cas.ecore" xmi:version="2.0">
<cas:NULL xmi:id="0" />
<tcas:DocumentAnnotation xmi:id="2" sofa="1" begin="0" end="4" language="en" />
<tcas:Annotation xmi:id="3" sofa="1" begin="100" end="200" />
<cas:Sofa xmi:id="1" sofaNum="1" sofaID="_InitialView" mimeType="text" sofaString="Test" />
<xmi:XMI xmlns:xmi="http://www.omg.org/XMI" xmlns:tcas="http:///uima/tcas.ecore"
xmlns:cas="http:///uima/cas.ecore" xmi:version="2.0">
<cas:NULL xmi:id="0" />
<tcas:DocumentAnnotation xmi:id="2" sofa="1" begin="0" end="4" language="en" />
<tcas:Annotation xmi:id="3" sofa="1" begin="100" end="200" />
<cas:Sofa xmi:id="1" sofaNum="1" sofaID="_InitialView" mimeType="text" sofaString="Test" />
<cas:View sofa="1" members="2 3" />
</xmi:XMI>"""
</xmi:XMI>
"""

# assert no exception
with warnings.catch_warnings(record=True) as ws:
Expand Down Expand Up @@ -361,14 +362,14 @@ def test_leniency_type_not_in_typesystem_lenient(cas_with_leniency_xmi, small_ty
typesystem = load_typesystem(small_typesystem_xml)

with pytest.warns(UserWarning):
cas = load_cas_from_xmi(cas_with_leniency_xmi, typesystem=typesystem, lenient=True)
load_cas_from_xmi(cas_with_leniency_xmi, typesystem=typesystem, lenient=True)


def test_leniency_type_not_in_typesystem_not_lenient(cas_with_leniency_xmi, small_typesystem_xml):
typesystem = load_typesystem(small_typesystem_xml)

with pytest.raises(TypeNotFoundError):
cas = load_cas_from_xmi(cas_with_leniency_xmi, typesystem=typesystem, lenient=False)
load_cas_from_xmi(cas_with_leniency_xmi, typesystem=typesystem, lenient=False)


def test_multiple_references_allowed_true():
Expand Down

0 comments on commit a76b4e1

Please sign in to comment.