diff --git a/cassis/cas.py b/cassis/cas.py index a6206ff..1125abc 100644 --- a/cassis/cas.py +++ b/cassis/cas.py @@ -13,6 +13,8 @@ from cassis.typesystem import ( FEATURE_BASE_NAME_HEAD, + FEATURE_BASE_NAME_LANGUAGE, + TYPE_NAME_DOCUMENT_ANNOTATION, TYPE_NAME_FS_ARRAY, TYPE_NAME_FS_LIST, TYPE_NAME_SOFA, @@ -21,8 +23,6 @@ TypeCheckError, TypeSystem, TypeSystemMode, - TYPE_NAME_DOCUMENT_ANNOTATION, - FEATURE_BASE_NAME_LANGUAGE, ) _validator_optional_string = validators.optional(validators.instance_of(str)) diff --git a/cassis/json.py b/cassis/json.py index 23cb42e..6c39384 100644 --- a/cassis/json.py +++ b/cassis/json.py @@ -1,12 +1,42 @@ import base64 import json import math -from collections import OrderedDict +from collections import OrderedDict, defaultdict from io import TextIOBase, TextIOWrapper from math import isnan +from typing import Union, IO, Optional, Dict, List +from toposort import toposort_flatten from cassis.cas import NAME_DEFAULT_SOFA, Cas, IdGenerator, Sofa, View -from cassis.typesystem import * +from cassis.typesystem import ( + TYPE_NAME_ANNOTATION, + TypeSystem, + is_predefined, + merge_typesystems, + TYPE_NAME_SOFA, + FEATURE_BASE_NAME_SOFAARRAY, + array_type_name_for_type, + FEATURE_BASE_NAME_SOFASTRING, + FEATURE_BASE_NAME_SOFAID, + FEATURE_BASE_NAME_SOFAMIME, + FEATURE_BASE_NAME_SOFANUM, + FEATURE_BASE_NAME_SOFAURI, + TYPE_NAME_FS_ARRAY, + TYPE_NAME_BYTE_ARRAY, + TYPE_NAME_FLOAT_ARRAY, + TYPE_NAME_DOUBLE_ARRAY, + TypeSystemMode, + TYPE_NAME_DOCUMENT_ANNOTATION, + Type, + Feature, + TYPE_NAME_TOP, + is_primitive_array, + TYPE_NAME_FLOAT, + TYPE_NAME_DOUBLE, + element_type_name_for_array_type, + is_primitive, + is_array, +) RESERVED_FIELD_PREFIX = "%" REF_FEATURE_PREFIX = "@" diff --git a/cassis/xmi.py b/cassis/xmi.py index d9d7b35..2e1bfe2 100644 --- a/cassis/xmi.py +++ b/cassis/xmi.py @@ -12,7 +12,6 @@ from cassis.typesystem import ( _LIST_TYPES, _PRIMITIVE_ARRAY_TYPES, - _PRIMITIVE_LIST_TYPES, FEATURE_BASE_NAME_BEGIN, FEATURE_BASE_NAME_END, FEATURE_BASE_NAME_HEAD, @@ -295,7 +294,8 @@ def deserialize(self, source: Union[IO, str], typesystem: TypeSystem, lenient: b else: view = cas.create_view(sofa.sofaID, xmiID=sofa.xmiID, sofaNum=sofa.sofaNum) - # Directly set the sofaString and offsetConverter for the sofa to avoid recomputing the offset convertion (slow!) when using the setter + # Directly set the sofaString and offsetConverter for the sofa to avoid recomputing the offset convertion + # (slow!) when using the setter view.get_sofa()._sofaString = sofa.sofaString view.get_sofa()._offset_converter = sofa._offset_converter view.sofa_mime = sofa.mimeType diff --git a/tests/test_cas.py b/tests/test_cas.py index f09f06d..670db07 100644 --- a/tests/test_cas.py +++ b/tests/test_cas.py @@ -20,8 +20,8 @@ def test_default_typesystem_is_not_shared(): cas1 = Cas() cas2 = Cas() - t1 = cas1.typesystem.create_type(name="test.Type") - t2 = cas2.typesystem.create_type(name="test.Type") + cas1.typesystem.create_type(name="test.Type") + cas2.typesystem.create_type(name="test.Type") def test_default_typesystem_is_not_shared_load_from_xmi(empty_cas_xmi): @@ -29,8 +29,8 @@ def test_default_typesystem_is_not_shared_load_from_xmi(empty_cas_xmi): cas1 = load_cas_from_xmi(empty_cas_xmi) cas2 = load_cas_from_xmi(empty_cas_xmi) - t1 = cas1.typesystem.create_type(name="test.Type") - t2 = cas2.typesystem.create_type(name="test.Type") + cas1.typesystem.create_type(name="test.Type") + cas2.typesystem.create_type(name="test.Type") # View diff --git a/tests/test_json.py b/tests/test_json.py index 1509a5c..810e421 100644 --- a/tests/test_json.py +++ b/tests/test_json.py @@ -1,6 +1,6 @@ import json -from cassis.typesystem import TYPE_NAME_ANNOTATION, TypeSystemMode, TYPE_NAME_DOCUMENT_ANNOTATION +from cassis.typesystem import TYPE_NAME_ANNOTATION, TYPE_NAME_DOCUMENT_ANNOTATION, TypeSystemMode from tests.fixtures import * from tests.test_files.test_cas_generators import MultiFeatureRandomCasGenerator, MultiTypeRandomCasGenerator from tests.util import assert_json_equal diff --git a/tests/test_typesystem.py b/tests/test_typesystem.py index d5a84c8..1f0ecbf 100644 --- a/tests/test_typesystem.py +++ b/tests/test_typesystem.py @@ -10,6 +10,7 @@ TYPE_NAME_ANNOTATION_BASE, TYPE_NAME_ARRAY_BASE, TYPE_NAME_BOOLEAN, + TYPE_NAME_DOCUMENT_ANNOTATION, TYPE_NAME_INTEGER, TYPE_NAME_SOFA, TYPE_NAME_STRING, @@ -17,7 +18,6 @@ TYPE_NAME_TOP, TypeCheckError, is_predefined, - TYPE_NAME_DOCUMENT_ANNOTATION, ) from tests.fixtures import * from tests.util import assert_xml_equal @@ -315,7 +315,6 @@ def test_type_inherits_from_annotation(): ], ) def test_is_predefined(type_name: str, expected: bool): - assert is_predefined(type_name) == expected @@ -498,7 +497,7 @@ def test_is_primitive_collection(type_name: str, expected: bool): ("uima.cas.DoubleArray", True), ], ) -def test_is_primitive_collection(type_name: str, expected: bool): +def test_is_primitive_array(type_name: str, expected: bool): typesystem = TypeSystem() assert typesystem.is_primitive_array(type_name) == expected @@ -883,9 +882,9 @@ def test_that_merging_types_creates_self_contained_type_system(): def test_that_dkpro_core_typeystem_can_be_loaded(): ts = load_dkpro_core_typesystem() - POS = ts.get_type("de.tudarmstadt.ukp.dkpro.core.api.lexmorph.type.pos.POS") - NamedEntity = ts.get_type("de.tudarmstadt.ukp.dkpro.core.api.ner.type.NamedEntity") - CoreferenceLink = ts.get_type("de.tudarmstadt.ukp.dkpro.core.api.coref.type.CoreferenceLink") + assert ts.get_type("de.tudarmstadt.ukp.dkpro.core.api.lexmorph.type.pos.POS") is not None + assert ts.get_type("de.tudarmstadt.ukp.dkpro.core.api.ner.type.NamedEntity") is not None + assert ts.get_type("de.tudarmstadt.ukp.dkpro.core.api.coref.type.CoreferenceLink") is not None # Type checking diff --git a/tests/test_util.py b/tests/test_util.py index 8a2fbd0..3a4a54b 100644 --- a/tests/test_util.py +++ b/tests/test_util.py @@ -1,5 +1,3 @@ -from pathlib import Path - from cassis.typesystem import TYPE_NAME_ANNOTATION, TYPE_NAME_FS_ARRAY from tests.fixtures import * from tests.test_files.test_cas_generators import MultiFeatureRandomCasGenerator, MultiTypeRandomCasGenerator diff --git a/tests/test_xmi.py b/tests/test_xmi.py index f6bb02b..aaf0d9b 100644 --- a/tests/test_xmi.py +++ b/tests/test_xmi.py @@ -65,7 +65,7 @@ def test_deserializing_from_string(small_typesystem_xml): - + """ load_cas_from_xmi(cas_xmi, typesystem=typesystem) @@ -102,7 +102,7 @@ def test_views_are_parsed(small_xmi, small_typesystem_xml): - + """ cas = load_cas_from_xmi(cas_xmi, typesystem=typesystem) @@ -302,11 +302,11 @@ def test_offsets_are_recomputed_when_sofa_string_changes(cas_with_smileys_xmi, d def test_offsets_work_for_empty_sofastring(): xmi = """ - - - - + + + + """ @@ -316,14 +316,15 @@ def test_offsets_work_for_empty_sofastring(): def test_that_invalid_offsets_remain_unmapped_on_import(): xmi = """ - - - - - + + + + + - """ + + """ # assert no exception with warnings.catch_warnings(record=True) as ws: @@ -361,14 +362,14 @@ def test_leniency_type_not_in_typesystem_lenient(cas_with_leniency_xmi, small_ty typesystem = load_typesystem(small_typesystem_xml) with pytest.warns(UserWarning): - cas = load_cas_from_xmi(cas_with_leniency_xmi, typesystem=typesystem, lenient=True) + load_cas_from_xmi(cas_with_leniency_xmi, typesystem=typesystem, lenient=True) def test_leniency_type_not_in_typesystem_not_lenient(cas_with_leniency_xmi, small_typesystem_xml): typesystem = load_typesystem(small_typesystem_xml) with pytest.raises(TypeNotFoundError): - cas = load_cas_from_xmi(cas_with_leniency_xmi, typesystem=typesystem, lenient=False) + load_cas_from_xmi(cas_with_leniency_xmi, typesystem=typesystem, lenient=False) def test_multiple_references_allowed_true():