OpenRarity · vickygos · Sep 14, 2022 · Sep 14, 2022 · Sep 14, 2022 · Sep 14, 2022
diff --git a/open_rarity/models/collection.py b/open_rarity/models/collection.py
@@ -57,23 +57,53 @@ class Collection:
     """
 
     attributes_frequency_counts: dict[AttributeName, dict[AttributeValue, int]]
-    name: str | None = ""
+    name: str
 
     def __init__(
         self,
+        tokens: list[Token],
         attributes_frequency_counts: dict[
             AttributeName, dict[AttributeValue, int]
-        ],
-        tokens: list[Token],
+        ]
+        | None = None,
         name: str | None = "",
     ):
-        self._tokens = tokens
-        self.attributes_frequency_counts = (
-            self._normalize_attributes_frequency_counts(
+        """
+        Parameters
+        ----------
+        tokens : list[Token]
+            list of all tokens that belong to the collection. Must have meteadata
+            properly set if attributes_frequency_counts is not provided.
+        attributes_frequency_counts:
+            dict[AttributeName, dict[AttributeValue, int]] | None, optional
+            dictionary of attributes to the number of tokens in this collection
+            that has a specific value for every possible value for the given
+            attribute, by default None.
+            If not provided, the attributes distribution will be derived from the
+            attributes on the tokens provided.
+
+            Example:
+                {"hair": {"brown": 500, "blonde": 100}
+                which means 500 tokens has hair=brown, 100 token has hair=blonde
+            Note: All trait names and string values should be lowercased and stripped
+            of leading and trialing whitespace.
+            Note 2: We currently only support string attributes in
                 attributes_frequency_counts
+        name : str | None, optional
+            A reference string only used for debugging or identification, by default ""
+        """
+        self._tokens = tokens
+        self.name = name or ""
+        if attributes_frequency_counts:
+            self.attributes_frequency_counts = (
+                self._normalize_attributes_frequency_counts(
+                    attributes_frequency_counts
+                )
+            )
+        else:
+            self.attributes_frequency_counts = (
+                self._derive_normalized_attributes_frequency_counts()
             )
-        )
-        self.name = name
 
     @property
     def tokens(self) -> list[Token]:
@@ -112,41 +142,6 @@ def token_standards(self) -> list[TokenStandard]:
             token_standards.add(token.token_standard)
         return list(token_standards)
 
-    def _normalize_attributes_frequency_counts(
-        self,
-        attributes_frequency_counts: dict[
-            AttributeName, dict[AttributeValue, int]
-        ],
-    ) -> dict[AttributeName, dict[AttributeValue, int]]:
-        """We normalize all collection attributes to ensure that neither casing nor
-        leading/trailing spaces produce different attributes:
-            (e.g. 'Hat' == 'hat' == 'hat ')
-        If a collection has the following in their attributes frequency counts:
-            ('Hat', 'beanie') 5 tokens and
-            ('hat', 'beanie') 10 tokens
-        this would produce: ('hat', 'beanie') 15 tokens
-        """
-        normalized: dict[AttributeName, dict[AttributeValue, int]] = {}
-        for (
-            attr_name,
-            attr_value_to_count,
-        ) in attributes_frequency_counts.items():
-            normalized_name = normalize_attribute_string(attr_name)
-            if normalized_name not in normalized:
-                normalized[normalized_name] = {}
-            for attr_value, attr_count in attr_value_to_count.items():
-                normalized_value = (
-                    normalize_attribute_string(attr_value)
-                    if isinstance(attr_value, str)
-                    else attr_value
-                )
-                if normalized_value not in normalized[normalized_name]:
-                    normalized[normalized_name][normalized_value] = attr_count
-                else:
-                    normalized[normalized_name][normalized_value] += attr_count
-
-        return normalized
-
     def total_tokens_with_attribute(self, attribute: StringAttribute) -> int:
         """Returns the numbers of tokens in this collection with the attribute
         based on the attributes frequency counts.
@@ -231,5 +226,71 @@ def extract_collection_attributes(
 
         return collection_traits
 
+    def _normalize_attributes_frequency_counts(
+        self,
+        attributes_frequency_counts: dict[
+            AttributeName, dict[AttributeValue, int]
+        ],
+    ) -> dict[AttributeName, dict[AttributeValue, int]]:
+        """We normalize all collection attributes to ensure that neither casing nor
+        leading/trailing spaces produce different attributes:
+            (e.g. 'Hat' == 'hat' == 'hat ')
+        If a collection has the following in their attributes frequency counts:
+            ('Hat', 'beanie') 5 tokens and
+            ('hat', 'beanie') 10 tokens
+        this would produce: ('hat', 'beanie') 15 tokens
+        """
+        normalized: dict[AttributeName, dict[AttributeValue, int]] = {}
+        for (
+            attr_name,
+            attr_value_to_count,
+        ) in attributes_frequency_counts.items():
+            normalized_name = normalize_attribute_string(attr_name)
+            if normalized_name not in normalized:
+                normalized[normalized_name] = {}
+            for attr_value, attr_count in attr_value_to_count.items():
+                normalized_value = (
+                    normalize_attribute_string(attr_value)
+                    if isinstance(attr_value, str)
+                    else attr_value
+                )
+                if normalized_value not in normalized[normalized_name]:
+                    normalized[normalized_name][normalized_value] = attr_count
+                else:
+                    normalized[normalized_name][normalized_value] += attr_count
+
+        return normalized
+
+    def _derive_normalized_attributes_frequency_counts(
+        self,
+    ) -> dict[AttributeName, dict[AttributeValue, int]]:
+        """Derives and constructs attributes_frequency_counts based on
+        string attributes on tokens. Numeric or date attributes currently not
+        supported.
+
+        Returns
+        -------
+        dict[ AttributeName, dict[AttributeValue, int] ]
+            dictionary of attributes to the number of tokens in this collection
+            that has a specific value for every possible value for the given
+            attribute, by default None.
+        """
+        attrs_freq_counts: dict[
+            AttributeName, dict[AttributeValue, int]
+        ] = defaultdict(dict)
+
+        for token in self._tokens:
+            for (
+                attr_name,
+                str_attr,
+            ) in token.metadata.string_attributes.items():
+                normalized_name = normalize_attribute_string(attr_name)
+                if str_attr.value not in attrs_freq_counts[attr_name]:
+                    attrs_freq_counts[normalized_name][str_attr.value] = 1
+                else:
+                    attrs_freq_counts[normalized_name][str_attr.value] += 1
+
+        return dict(attrs_freq_counts)
+
     def __str__(self) -> str:
         return f"Collection[{self.name}]"
diff --git a/open_rarity/models/token.py b/open_rarity/models/token.py
@@ -1,9 +1,12 @@
 from dataclasses import dataclass
+from typing import Any
 
-from open_rarity.models.token_identifier import TokenIdentifier
-from open_rarity.models.token_metadata import TokenMetadata
+from open_rarity.models.token_identifier import (
+    EVMContractTokenIdentifier,
+    TokenIdentifier,
+)
+from open_rarity.models.token_metadata import AttributeName, TokenMetadata
 from open_rarity.models.token_standard import TokenStandard
-from open_rarity.models.utils.attribute_utils import normalize_attribute_string
 
 
 @dataclass
@@ -26,46 +29,44 @@ class Token:
     token_standard: TokenStandard
     metadata: TokenMetadata
 
-    def __post_init__(self):
-        self.metadata = self._normalize_metadata(self.metadata)
-
-    def _normalize_metadata(self, metadata: TokenMetadata) -> TokenMetadata:
-        """Normalizes token metadata to ensure the attribute names are lower cased
-        and whitespace stripped to ensure equality consistency.
+    @classmethod
+    def from_erc721(
+        cls,
+        contract_address: str,
+        token_id: int,
+        metadata_dict: dict[AttributeName, Any],
+    ):
+        """Creates a Token class representing an ERC721 evm token given the following
+        parameters.
 
         Parameters
         ----------
-        metadata : TokenMetadata
-            The original token metadata
+        contract_address : str
+            Contract address of the token
+        token_id : int
+            Token ID number of the token
+        metadata_dict : dict
+            Dictionary of attribute name to attribute value for the given token.
+            The type of the value determines whether the attribute is a string,
+            numeric or date attribute.
+
+            class           attribute type
+            ------------    -------------
+            string          string attribute
+            int | float     numeric_attribute
+            datetime        date_attribute (stored as timestamp, seconds from epoch)
 
         Returns
         -------
-        TokenMetadata
-            A new normalized token metadata
+        Token
+            A Token instance with EVMContractTokenIdentifier and ERC721 standard set.
         """
-
-        def normalize_and_reset(attributes_dict: dict):
-            """Helper function that takes in an attributes dictionary
-            and normalizes both attribute name in the dictionary as the key
-            and the repeated field inside the <Type>Attribute class
-            """
-            normalized_attributes_dict = {}
-
-            for attribute_name, attr in attributes_dict.items():
-                normalized_attr_name = normalize_attribute_string(
-                    attribute_name
-                )
-                normalized_attributes_dict[normalized_attr_name] = attr
-                if attr.name != normalized_attr_name:
-                    attr.name = normalized_attr_name
-            return normalized_attributes_dict
-
-        return TokenMetadata(
-            string_attributes=normalize_and_reset(metadata.string_attributes),
-            numeric_attributes=normalize_and_reset(
-                metadata.numeric_attributes
+        return cls(
+            token_identifier=EVMContractTokenIdentifier(
+                contract_address=contract_address, token_id=token_id
             ),
-            date_attributes=normalize_and_reset(metadata.date_attributes),
+            token_standard=TokenStandard.ERC721,
+            metadata=TokenMetadata.from_attributes(metadata_dict),
         )
 
     def __str__(self):

diff --git a/open_rarity/models/token_metadata.py b/open_rarity/models/token_metadata.py
@@ -1,4 +1,6 @@
 from dataclasses import dataclass, field
+import datetime
+from typing import Any
 
 from open_rarity.models.utils.attribute_utils import normalize_attribute_string
 
@@ -43,6 +45,12 @@ class NumericAttribute:
     name: AttributeName
     value: float | int
 
+    def __init__(self, name: AttributeName, value: float | int):
+        # We treat attributes names the same regardless of
+        # casing or leading/trailing whitespaces.
+        self.name = normalize_attribute_string(name)
+        self.value = value
+
 
 @dataclass
 class DateAttribute:
@@ -59,6 +67,12 @@ class DateAttribute:
     name: AttributeName
     value: int
 
+    def __init__(self, name: AttributeName, value: int):
+        # We treat attributes names the same regardless of
+        # casing or leading/trailing whitespaces.
+        self.name = normalize_attribute_string(name)
+        self.value = value
+
 
 @dataclass
 class TokenMetadata:
@@ -72,6 +86,10 @@ class TokenMetadata:
         mapping of atrribute name to list of numeric attribute values
     date_attributes : dict
         mapping of attribute name to list of date attribute values
+
+
+    All attributes names are normalized and all string attribute values are
+    normalized in the same way - lowercased and leading/trailing whitespace stripped.
     """
 
     string_attributes: dict[AttributeName, StringAttribute] = field(
@@ -83,3 +101,78 @@ class TokenMetadata:
     date_attributes: dict[AttributeName, DateAttribute] = field(
         default_factory=dict
     )
+
+    def __post_init__(self):
+        self.string_attributes = self._normalize_attributes_dict(
+            self.string_attributes
+        )
+        self.numeric_attributes = self._normalize_attributes_dict(
+            self.numeric_attributes
+        )
+        self.date_attributes = self._normalize_attributes_dict(
+            self.date_attributes
+        )
+
+    def _normalize_attributes_dict(self, attributes_dict: dict) -> dict:
+        """Helper function that takes in an attributes dictionary
+        and normalizes attribute name in the dictionary to ensure all
+        letters are lower cases and whitespace is stripped.
+        """
+        normalized_attributes_dict = {}
+        for attribute_name, attr in attributes_dict.items():
+            normalized_attr_name = normalize_attribute_string(attribute_name)
+            normalized_attributes_dict[normalized_attr_name] = attr
+            if normalized_attr_name != attr.name:
+                attr.name = normalized_attr_name
+        return normalized_attributes_dict
+
+    @classmethod
+    def from_attributes(cls, attributes: dict[AttributeName, Any]):
+        """Constructs TokenMetadata class based on an attributes dictionary
+
+        Parameters
+        ----------
+        attributes : dict[AttributeName, Any]
+            Dictionary of attribute name to attribute value for the given token.
+            The type of the value determines whether the attribute is a string,
+            numeric or date attribute.
+
+            class           attribute type
+            ------------    -------------
+            string          string attribute
+            int | float     numeric_attribute
+            datetime        date_attribute (stored as timestamp, seconds from epoch)
+
+        Returns
+        -------
+        TokenMetadata
+            token metadata from input
+        """
+        string_attributes = {}
+        numeric_attributes = {}
+        date_attributes = {}
+        for attr_name, attr_value in attributes.items():
+            if isinstance(attr_value, str):
+                string_attributes[attr_name] = StringAttribute(
+                    name=attr_name, value=attr_value
+                )
+            elif isinstance(attr_value, (float, int)):
+                numeric_attributes[attr_name] = NumericAttribute(
+                    name=attr_name, value=attr_value
+                )
+            elif isinstance(attr_value, datetime.datetime):
+                date_attributes[attr_name] = DateAttribute(
+                    name=attr_name,
+                    value=int(attr_value.timestamp()),
+                )
+            else:
+                raise TypeError(
+                    f"Provided attribute value has invalid type: {type(attr_value)}. "
+                    "Must be either str, float, int or datetime."
+                )
+
+        return cls(
+            string_attributes=string_attributes,
+            numeric_attributes=numeric_attributes,
+            date_attributes=date_attributes,
+        )