Skip to content

Commit

Permalink
Merge pull request #848 from Aiven-Open/eliax1996/create-protobuf-sch…
Browse files Browse the repository at this point in the history
…ema-normalisation

normalization: add normalization of the options
  • Loading branch information
aiven-anton authored Apr 12, 2024
2 parents 0b339df + 7293fc7 commit aafc087
Show file tree
Hide file tree
Showing 19 changed files with 963 additions and 35 deletions.
16 changes: 16 additions & 0 deletions README.rst
Original file line number Diff line number Diff line change
Expand Up @@ -597,6 +597,22 @@ The REST proxy process manages a set of producer and consumer clients, which are

Before a client refreshes its OAuth2 JWT token, it is expected to remove currently running consumers (eg. after committing their offsets) and producers using the current token.

Schema Normalization
--------------------

If specified as a rest parameter for the POST ``/subjects/{subject}/versions?normalize=true`` endpoint and the POST ``subjects/{subject}?normalize=true`` endpoint,
Karapace uses a schema normalization algorithm to ensure that the schema is stored in a canonical form.

This normalization process is done so that schemas semantically equivalent are stored in the same way and should be considered equal.

Normalization is currently only supported for Protobuf schemas. Karapace does not support all normalization features implemented by Confluent Schema Registry.
Currently the normalization process is done only for the ordering of the optional fields in the schema.
Use the feature with the assumption that it will be extended in the future and so two schemas that are semantically equivalent could be considered
different by the normalization process in different future versions of Karapace.
The safe choice, when using a normalization process, is always to consider as different two schemas that are semantically equivalent while the problem is when two semantically different schemas are considered equivalent.
In that view the future extension of the normalization process isn't considered a breaking change but rather an extension of the normalization process.


Uninstall
=========

Expand Down
1 change: 0 additions & 1 deletion karapace/kafka_rest_apis/consumer_manager.py
Original file line number Diff line number Diff line change
Expand Up @@ -467,7 +467,6 @@ async def fetch(self, internal_name: Tuple[str, str], content_type: str, formats
)
# we get to be more in line with the confluent proxy by doing a bunch of fetches each time and
# respecting the max fetch request size
# pylint: disable=protected-access
max_bytes = (
int(query_params["max_bytes"])
if "max_bytes" in query_params
Expand Down
6 changes: 3 additions & 3 deletions karapace/protobuf/compare_type_lists.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,12 +11,12 @@
from karapace.protobuf.exception import IllegalStateException
from karapace.protobuf.message_element import MessageElement
from karapace.protobuf.type_element import TypeElement
from typing import List
from typing import Sequence


def compare_type_lists(
self_types_list: List[TypeElement],
other_types_list: List[TypeElement],
self_types_list: Sequence[TypeElement],
other_types_list: Sequence[TypeElement],
result: CompareResult,
compare_types: CompareTypes,
) -> CompareResult:
Expand Down
5 changes: 3 additions & 2 deletions karapace/protobuf/enum_element.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@
from karapace.protobuf.option_element import OptionElement
from karapace.protobuf.type_element import TypeElement
from karapace.protobuf.utils import append_documentation, append_indented
from typing import Sequence


class EnumElement(TypeElement):
Expand All @@ -22,8 +23,8 @@ def __init__(
location: Location,
name: str,
documentation: str = "",
options: list[OptionElement] | None = None,
constants: list[EnumConstantElement] | None = None,
options: Sequence[OptionElement] | None = None,
constants: Sequence[EnumConstantElement] | None = None,
) -> None:
# Enums do not allow nested type declarations.
super().__init__(location, name, documentation, options or [], [])
Expand Down
3 changes: 2 additions & 1 deletion karapace/protobuf/extend_element.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,14 +10,15 @@
from karapace.protobuf.field_element import FieldElement
from karapace.protobuf.location import Location
from karapace.protobuf.utils import append_documentation, append_indented
from typing import Sequence


@dataclass
class ExtendElement:
location: Location
name: str
documentation: str = ""
fields: list[FieldElement] | None = None
fields: Sequence[FieldElement] | None = None

def to_schema(self) -> str:
result: list[str] = []
Expand Down
3 changes: 2 additions & 1 deletion karapace/protobuf/group_element.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@
from karapace.protobuf.field_element import FieldElement
from karapace.protobuf.location import Location
from karapace.protobuf.utils import append_documentation, append_indented
from typing import Sequence


@dataclass
Expand All @@ -20,7 +21,7 @@ class GroupElement:
name: str
tag: int
documentation: str = ""
fields: list[FieldElement] | None = None
fields: Sequence[FieldElement] | None = None

def to_schema(self) -> str:
result: list[str] = []
Expand Down
20 changes: 13 additions & 7 deletions karapace/protobuf/message_element.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,21 +19,27 @@
from karapace.protobuf.reserved_element import ReservedElement
from karapace.protobuf.type_element import TypeElement
from karapace.protobuf.utils import append_documentation, append_indented
from typing import Sequence


class MessageElement(TypeElement):
nested_types: Sequence[TypeElement]
fields: Sequence[FieldElement]
one_ofs: Sequence[OneOfElement]
groups: Sequence[GroupElement]

def __init__(
self,
location: Location,
name: str,
documentation: str = "",
nested_types: list[TypeElement] | None = None,
options: list[OptionElement] | None = None,
reserveds: list[ReservedElement] | None = None,
fields: list[FieldElement] | None = None,
one_ofs: list[OneOfElement] | None = None,
extensions: list[ExtensionsElement] | None = None,
groups: list[GroupElement] | None = None,
nested_types: Sequence[TypeElement] | None = None,
options: Sequence[OptionElement] | None = None,
reserveds: Sequence[ReservedElement] | None = None,
fields: Sequence[FieldElement] | None = None,
one_ofs: Sequence[OneOfElement] | None = None,
extensions: Sequence[ExtensionsElement] | None = None,
groups: Sequence[GroupElement] | None = None,
) -> None:
super().__init__(location, name, documentation, options or [], nested_types or [])
self.reserveds = reserveds or []
Expand Down
7 changes: 4 additions & 3 deletions karapace/protobuf/one_of_element.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,16 +14,17 @@
from karapace.protobuf.group_element import GroupElement
from karapace.protobuf.option_element import OptionElement
from karapace.protobuf.utils import append_documentation, append_indented
from typing import Sequence


class OneOfElement:
def __init__(
self,
name: str,
documentation: str = "",
fields: list[FieldElement] | None = None,
groups: list[GroupElement] | None = None,
options: list[OptionElement] | None = None,
fields: Sequence[FieldElement] | None = None,
groups: Sequence[GroupElement] | None = None,
options: Sequence[OptionElement] | None = None,
) -> None:
self.name = name
self.documentation = documentation
Expand Down
2 changes: 2 additions & 0 deletions karapace/protobuf/option_element.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,8 @@


class OptionElement:
name: str

class Kind(Enum):
STRING = 1
BOOLEAN = 2
Expand Down
18 changes: 11 additions & 7 deletions karapace/protobuf/proto_file_element.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@
from karapace.protobuf.service_element import ServiceElement
from karapace.protobuf.syntax import Syntax
from karapace.protobuf.type_element import TypeElement
from typing import Dict, List, NewType, Optional
from typing import Dict, List, NewType, Optional, Sequence


def _collect_dependencies_types(compare_types: CompareTypes, dependencies: Optional[Dict[str, Dependency]], is_self: bool):
Expand All @@ -37,17 +37,21 @@ def _collect_dependencies_types(compare_types: CompareTypes, dependencies: Optio


class ProtoFileElement:
types: Sequence[TypeElement]
services: Sequence[ServiceElement]
extend_declarations: Sequence[ExtendElement]

def __init__(
self,
location: Location,
package_name: Optional[PackageName] = None,
syntax: Optional[Syntax] = None,
imports: Optional[List[TypeName]] = None,
public_imports: Optional[List[TypeName]] = None,
types: Optional[List[TypeElement]] = None,
services: Optional[List[ServiceElement]] = None,
extend_declarations: Optional[List[ExtendElement]] = None,
options: Optional[List[OptionElement]] = None,
imports: Optional[Sequence[TypeName]] = None,
public_imports: Optional[Sequence[TypeName]] = None,
types: Optional[Sequence[TypeElement]] = None,
services: Optional[Sequence[ServiceElement]] = None,
extend_declarations: Optional[Sequence[ExtendElement]] = None,
options: Optional[Sequence[OptionElement]] = None,
) -> None:
if types is None:
types = list()
Expand Down
Loading

0 comments on commit aafc087

Please sign in to comment.