From a38a6fae2a840051eb2bfba9863c6af99359644f Mon Sep 17 00:00:00 2001 From: Elia Migliore Date: Mon, 16 Oct 2023 14:58:44 +0200 Subject: [PATCH] wip --- karapace/protobuf/dependency.py | 81 ++++++++++ karapace/protobuf/schema.py | 160 ++++++++++++++++---- tests/unit/protobuf/test_protobuf_schema.py | 4 +- 3 files changed, 214 insertions(+), 31 deletions(-) create mode 100644 karapace/protobuf/dependency.py diff --git a/karapace/protobuf/dependency.py b/karapace/protobuf/dependency.py new file mode 100644 index 000000000..5bb543aed --- /dev/null +++ b/karapace/protobuf/dependency.py @@ -0,0 +1,81 @@ +""" +karapace - dependency + +Copyright (c) 2023 Aiven Ltd +See LICENSE for details +""" +from typing import List, Optional, Set + +from karapace.dependency import DependencyVerifierResult +from karapace.protobuf import UsedType +from karapace.protobuf.known_dependency import DependenciesHardcoded, KnownDependency +from karapace.protobuf.one_of_element import OneOfElement + + +class ProtobufDependencyVerifier: + def __init__(self) -> None: + self.declared_types: List[str] = [] + self.used_types: List[str] = [] + self.import_path: List[str] = [] + + def add_declared_type(self, full_name: str) -> None: + self.declared_types.append(full_name) + + + def add_used_type(self, parent: str, element_type: str) -> None: + if element_type.find("map<") == 0: + end = element_type.find(">") + virgule = element_type.find(",") + key = element_type[4:virgule] + value = element_type[virgule + 1: end] + value = value.strip() + self.used_types.append(parent + ";" + key) + self.used_types.append(parent + ";" + value) + else: + self.used_types.append(parent + ";" + element_type) + + def add_import(self, import_name: str) -> None: + self.import_path.append(import_name) + + def is_type_declared( + self, + used_type: str, + declared_index: Set[str], + father_child_type: Optional[str], + used_type_with_scope: Optional[str], + ) -> bool: + return ( + used_type in declared_index + or (used_type_with_scope is not None and used_type_with_scope in declared_index) + or (father_child_type is not None and father_child_type in declared_index) + or "." + used_type in declared_index + ) + + def verify(self) -> DependencyVerifierResult: + declared_index = set(self.declared_types) + for used_type in self.used_types: + delimiter = used_type.rfind(";") + father_child_type = None + used_type_with_scope = None + if delimiter != -1: + used_type_with_scope = used_type[:delimiter] + "." + used_type[delimiter + 1:] + father_delimiter = used_type[:delimiter].find(".") + if father_delimiter != -1: + father_child_type = used_type[:father_delimiter] + "." + used_type[delimiter + 1:] + used_type = used_type[delimiter + 1:] + + if used_type in DependenciesHardcoded.index: + continue + + known_pkg = KnownDependency.index_simple.get(used_type) or KnownDependency.index.get(used_type) + if known_pkg is not None and known_pkg in self.import_path: + continue + + if self.is_type_declared(used_type, declared_index, father_child_type, used_type_with_scope): + continue + + return DependencyVerifierResult(False, f'type "{used_type}" is not defined') + + return DependencyVerifierResult(True) + + diff --git a/karapace/protobuf/schema.py b/karapace/protobuf/schema.py index c3198946b..d20a9542a 100644 --- a/karapace/protobuf/schema.py +++ b/karapace/protobuf/schema.py @@ -2,6 +2,9 @@ Copyright (c) 2023 Aiven Ltd See LICENSE for details """ +import itertools + +from karapace.dataclasses import default_dataclass # Ported from square/wire: # wire-library/wire-schema/src/commonMain/kotlin/com/squareup/wire/schema/Schema.kt # Ported partially for required functionality. @@ -109,47 +112,106 @@ def option_element_string(option: OptionElement) -> str: return f"option {result};\n" -@dataclasses.dataclass(frozen=True) +@default_dataclass class UsedType: parent_object_qualified_name: str used_attribute_type: str -@dataclasses.dataclass(frozen=True) +@default_dataclass +class SourceFileReference: + reference: str + import_order: int + + +@default_dataclass class TypeTree: token: str children: List["TypeTree"] + original_file: Optional[SourceFileReference] + + def expand(self) -> Optional[SourceFileReference]: + expanded_files = itertools.chain( + [] if self.original_file is None else [self.original_file], + (child.expand() for child in self.children) + ) + + # this is not empty by definition, you must have an import for each leaf at least + only_files = filter( + lambda x: x is None, + expanded_files + ), + + return min( + only_files, + key=lambda x: x.imported_order + ) @property def is_fully_qualified_type(self) -> bool: + return self.original_file is not None + + @property + def is_leaf(self) -> bool: return len(self.children) == 0 def represent(self, level=0) -> str: spacing = " " * 3 * level - if self.is_full_path_type: - return f"{spacing}{self.token}" + token = self.token + ">" if not self.is_fully_qualified_type else self.token + if self.is_leaf: + return f"{spacing}{token}" child_repr = "\n".join(child.represent(level=level + 1) for child in self.children) - return f"{spacing}{self.token} -> \n{child_repr}" + return f"{spacing}{token} -> \n{child_repr}" def __repr__(self) -> str: return self.represent() -def add_new_type_r(parent_tree: TypeTree, remaining_tokens: List[str]) -> None: +def _add_new_type( + parent_tree: TypeTree, + remaining_tokens: List[str], + file: str, + inserted_elements: int, +) -> None: if remaining_tokens: token = remaining_tokens.pop() for child in parent_tree.children: if child.token == token: - return add_new_type_r(child, remaining_tokens) # add a reference from which object/file was coming from - new_leaf = TypeTree(token, []) + return _add_new_type( + child, + remaining_tokens, + file, + inserted_elements + ) # add a reference from which object/file was coming from + + new_leaf = TypeTree( + token=token, + children=[], + original_file=( + SourceFileReference( + reference=file, + import_order=inserted_elements + ) if remaining_tokens + else None + ), + ) parent_tree.children.append(new_leaf) - return add_new_type_r(new_leaf, remaining_tokens) - # required by pylint + return _add_new_type(new_leaf, remaining_tokens, file, inserted_elements) return None -def add_new_type(root_tree: TypeTree, full_path_type: str) -> None: - add_new_type_r(root_tree, full_path_type.split(".")) +def add_new_type( + root_tree: TypeTree, + full_path_type: str, + file: str, + inserted_elements: int, +) -> None: + _add_new_type( + root_tree, + full_path_type.split("."), + file, + inserted_elements, + ) # one more it's added after that instruction class ProtobufSchema: @@ -253,17 +315,29 @@ def verify_schema_dependencies(self) -> DependencyVerifierResult: return DependencyVerifierResult(True) return DependencyVerifierResult(False, f'type "{maybe_wrong_declaration}" is not defined') - def nested_type_tree(self, root_tree: TypeTree, parent_name: str, nested_type: TypeElement): + def nested_type_tree( + self, + root_tree: TypeTree, + parent_name: str, + nested_type: TypeElement, + filename: str, + inserted_types: int, + ) -> int: nested_component_full_path_name = parent_name + "." + nested_type.name - add_new_type(root_tree, nested_component_full_path_name) + add_new_type(root_tree, nested_component_full_path_name, filename, inserted_types) + inserted_types += 1 for child in nested_type.nested_types: - self.nested_type_tree(root_tree, nested_component_full_path_name, child) + self.nested_type_tree(root_tree, nested_component_full_path_name, child, filename, inserted_types) + inserted_types += 1 + + return inserted_types - def types_tree_r(self, root_tree: TypeTree): + def types_tree_r(self, root_tree: TypeTree, inserted_types: int, filename: str) -> tuple[TypeTree, int]: # verify that the import it's the same as the order of importing if self.dependencies: - for key in self.dependencies: - self.dependencies[key].get_schema().schema.types_tree_r(root_tree) + for dependency in self.dependencies: + dependency_tree, inserted_types = self.dependencies[dependency].get_schema().schema \ + .types_tree_r(root_tree, inserted_types, dependency) # we can add an incremental number and a reference to the file # to get back which is the file who a certain declaration it's referring to @@ -276,16 +350,21 @@ def types_tree_r(self, root_tree: TypeTree): for element_type in self.proto_file_element.types: type_name = element_type.name full_name = package_name + "." + type_name - add_new_type(root_tree, full_name) + add_new_type(root_tree, full_name, filename, inserted_types) + inserted_types += 1 for nested_type in element_type.nested_types: - self.nested_type_tree(root_tree, full_name, nested_type) + inserted_types = self.nested_type_tree(root_tree, full_name, nested_type, filename, inserted_types) - return root_tree + return root_tree, inserted_types def types_tree(self) -> TypeTree: - root_tree = TypeTree(".", []) - self.types_tree_r(root_tree) + root_tree = TypeTree( + token=".", + children=[], + original_file=None, + ) + self.types_tree_r(root_tree, 0, str(self.proto_file_element.location)) return root_tree @staticmethod @@ -294,17 +373,35 @@ def used_type(parent: str, element_type: str) -> List[UsedType]: end = element_type.find(">") virgule = element_type.find(",") key_element_type = element_type[4:virgule] - value_element_type = element_type[virgule + 1 : end] + value_element_type = element_type[virgule + 1: end] value_element_type = value_element_type.strip() - return [UsedType(parent, key_element_type), UsedType(parent, value_element_type)] - return [UsedType(parent, element_type)] + return [ + UsedType( + parent_object_qualified_name=parent, + used_attribute_type=key_element_type + ), UsedType( + parent_object_qualified_name=parent, + used_attribute_type=value_element_type + ) + ] + return [UsedType( + parent_object_qualified_name=parent, + used_attribute_type=element_type + )] @staticmethod - def dependencies_one_of(package_name: str, parent_name: str, one_of: OneOfElement) -> List[UsedType]: + def dependencies_one_of( + package_name: str, + parent_name: str, + one_of: OneOfElement, + ) -> List[UsedType]: parent = package_name + "." + parent_name dependencies = [] for field in one_of.fields: - dependencies.append(UsedType(parent, field.element_type)) + dependencies.append(UsedType( + parent_object_qualified_name=parent, + used_attribute_type=field.element_type, + )) return dependencies def used_types(self) -> List[UsedType]: @@ -333,7 +430,12 @@ def used_types(self) -> List[UsedType]: return used_types - def nested_used_type(self, package_name: str, parent_name, element_type: TypeElement) -> List[str]: + def nested_used_type( + self, + package_name: str, + parent_name: str, + element_type: TypeElement, + ) -> List[str]: used_types = [] if isinstance(element_type, MessageElement): diff --git a/tests/unit/protobuf/test_protobuf_schema.py b/tests/unit/protobuf/test_protobuf_schema.py index 623d73d88..38783b975 100644 --- a/tests/unit/protobuf/test_protobuf_schema.py +++ b/tests/unit/protobuf/test_protobuf_schema.py @@ -406,6 +406,6 @@ def test_partial_path_in_protobuf() -> None: """ no_ref_schema = ValidatedTypedSchema.parse(SchemaType.PROTOBUF, plan) - dep = Dependency("CustomerPlan.proto", Subject("foo"), 1, no_ref_schema) - ref_schema = ValidatedTypedSchema.parse(SchemaType.PROTOBUF, customer_plan_event, None, {"CustomerPlan.proto": dep}) + dep = Dependency("ggwp", Subject("bar"), 1, no_ref_schema) + ref_schema = ValidatedTypedSchema.parse(SchemaType.PROTOBUF, customer_plan_event, None, {"foobar": dep}) print(ref_schema)