Skip to content

Commit

Permalink
Merge remote-tracking branch 'origin/main' into grantli/token-id-hash
Browse files Browse the repository at this point in the history
  • Loading branch information
vickygos committed Sep 14, 2022
2 parents 6ca2158 + ceb2f0c commit 5a89987
Show file tree
Hide file tree
Showing 7 changed files with 397 additions and 122 deletions.
147 changes: 104 additions & 43 deletions open_rarity/models/collection.py
Original file line number Diff line number Diff line change
Expand Up @@ -57,23 +57,53 @@ class Collection:
"""

attributes_frequency_counts: dict[AttributeName, dict[AttributeValue, int]]
name: str | None = ""
name: str

def __init__(
self,
tokens: list[Token],
attributes_frequency_counts: dict[
AttributeName, dict[AttributeValue, int]
],
tokens: list[Token],
]
| None = None,
name: str | None = "",
):
self._tokens = tokens
self.attributes_frequency_counts = (
self._normalize_attributes_frequency_counts(
"""
Parameters
----------
tokens : list[Token]
list of all tokens that belong to the collection. Must have meteadata
properly set if attributes_frequency_counts is not provided.
attributes_frequency_counts:
dict[AttributeName, dict[AttributeValue, int]] | None, optional
dictionary of attributes to the number of tokens in this collection
that has a specific value for every possible value for the given
attribute, by default None.
If not provided, the attributes distribution will be derived from the
attributes on the tokens provided.
Example:
{"hair": {"brown": 500, "blonde": 100}
which means 500 tokens has hair=brown, 100 token has hair=blonde
Note: All trait names and string values should be lowercased and stripped
of leading and trialing whitespace.
Note 2: We currently only support string attributes in
attributes_frequency_counts
name : str | None, optional
A reference string only used for debugging or identification, by default ""
"""
self._tokens = tokens
self.name = name or ""
if attributes_frequency_counts:
self.attributes_frequency_counts = (
self._normalize_attributes_frequency_counts(
attributes_frequency_counts
)
)
else:
self.attributes_frequency_counts = (
self._derive_normalized_attributes_frequency_counts()
)
)
self.name = name

@property
def tokens(self) -> list[Token]:
Expand Down Expand Up @@ -112,41 +142,6 @@ def token_standards(self) -> list[TokenStandard]:
token_standards.add(token.token_standard)
return list(token_standards)

def _normalize_attributes_frequency_counts(
self,
attributes_frequency_counts: dict[
AttributeName, dict[AttributeValue, int]
],
) -> dict[AttributeName, dict[AttributeValue, int]]:
"""We normalize all collection attributes to ensure that neither casing nor
leading/trailing spaces produce different attributes:
(e.g. 'Hat' == 'hat' == 'hat ')
If a collection has the following in their attributes frequency counts:
('Hat', 'beanie') 5 tokens and
('hat', 'beanie') 10 tokens
this would produce: ('hat', 'beanie') 15 tokens
"""
normalized: dict[AttributeName, dict[AttributeValue, int]] = {}
for (
attr_name,
attr_value_to_count,
) in attributes_frequency_counts.items():
normalized_name = normalize_attribute_string(attr_name)
if normalized_name not in normalized:
normalized[normalized_name] = {}
for attr_value, attr_count in attr_value_to_count.items():
normalized_value = (
normalize_attribute_string(attr_value)
if isinstance(attr_value, str)
else attr_value
)
if normalized_value not in normalized[normalized_name]:
normalized[normalized_name][normalized_value] = attr_count
else:
normalized[normalized_name][normalized_value] += attr_count

return normalized

def total_tokens_with_attribute(self, attribute: StringAttribute) -> int:
"""Returns the numbers of tokens in this collection with the attribute
based on the attributes frequency counts.
Expand Down Expand Up @@ -231,5 +226,71 @@ def extract_collection_attributes(

return collection_traits

def _normalize_attributes_frequency_counts(
self,
attributes_frequency_counts: dict[
AttributeName, dict[AttributeValue, int]
],
) -> dict[AttributeName, dict[AttributeValue, int]]:
"""We normalize all collection attributes to ensure that neither casing nor
leading/trailing spaces produce different attributes:
(e.g. 'Hat' == 'hat' == 'hat ')
If a collection has the following in their attributes frequency counts:
('Hat', 'beanie') 5 tokens and
('hat', 'beanie') 10 tokens
this would produce: ('hat', 'beanie') 15 tokens
"""
normalized: dict[AttributeName, dict[AttributeValue, int]] = {}
for (
attr_name,
attr_value_to_count,
) in attributes_frequency_counts.items():
normalized_name = normalize_attribute_string(attr_name)
if normalized_name not in normalized:
normalized[normalized_name] = {}
for attr_value, attr_count in attr_value_to_count.items():
normalized_value = (
normalize_attribute_string(attr_value)
if isinstance(attr_value, str)
else attr_value
)
if normalized_value not in normalized[normalized_name]:
normalized[normalized_name][normalized_value] = attr_count
else:
normalized[normalized_name][normalized_value] += attr_count

return normalized

def _derive_normalized_attributes_frequency_counts(
self,
) -> dict[AttributeName, dict[AttributeValue, int]]:
"""Derives and constructs attributes_frequency_counts based on
string attributes on tokens. Numeric or date attributes currently not
supported.
Returns
-------
dict[ AttributeName, dict[AttributeValue, int] ]
dictionary of attributes to the number of tokens in this collection
that has a specific value for every possible value for the given
attribute, by default None.
"""
attrs_freq_counts: dict[
AttributeName, dict[AttributeValue, int]
] = defaultdict(dict)

for token in self._tokens:
for (
attr_name,
str_attr,
) in token.metadata.string_attributes.items():
normalized_name = normalize_attribute_string(attr_name)
if str_attr.value not in attrs_freq_counts[attr_name]:
attrs_freq_counts[normalized_name][str_attr.value] = 1
else:
attrs_freq_counts[normalized_name][str_attr.value] += 1

return dict(attrs_freq_counts)

def __str__(self) -> str:
return f"Collection[{self.name}]"
71 changes: 36 additions & 35 deletions open_rarity/models/token.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,12 @@
from dataclasses import dataclass
from typing import Any

from open_rarity.models.token_identifier import TokenIdentifier
from open_rarity.models.token_metadata import TokenMetadata
from open_rarity.models.token_identifier import (
EVMContractTokenIdentifier,
TokenIdentifier,
)
from open_rarity.models.token_metadata import AttributeName, TokenMetadata
from open_rarity.models.token_standard import TokenStandard
from open_rarity.models.utils.attribute_utils import normalize_attribute_string


@dataclass
Expand All @@ -26,46 +29,44 @@ class Token:
token_standard: TokenStandard
metadata: TokenMetadata

def __post_init__(self):
self.metadata = self._normalize_metadata(self.metadata)

def _normalize_metadata(self, metadata: TokenMetadata) -> TokenMetadata:
"""Normalizes token metadata to ensure the attribute names are lower cased
and whitespace stripped to ensure equality consistency.
@classmethod
def from_erc721(
cls,
contract_address: str,
token_id: int,
metadata_dict: dict[AttributeName, Any],
):
"""Creates a Token class representing an ERC721 evm token given the following
parameters.
Parameters
----------
metadata : TokenMetadata
The original token metadata
contract_address : str
Contract address of the token
token_id : int
Token ID number of the token
metadata_dict : dict
Dictionary of attribute name to attribute value for the given token.
The type of the value determines whether the attribute is a string,
numeric or date attribute.
class attribute type
------------ -------------
string string attribute
int | float numeric_attribute
datetime date_attribute (stored as timestamp, seconds from epoch)
Returns
-------
TokenMetadata
A new normalized token metadata
Token
A Token instance with EVMContractTokenIdentifier and ERC721 standard set.
"""

def normalize_and_reset(attributes_dict: dict):
"""Helper function that takes in an attributes dictionary
and normalizes both attribute name in the dictionary as the key
and the repeated field inside the <Type>Attribute class
"""
normalized_attributes_dict = {}

for attribute_name, attr in attributes_dict.items():
normalized_attr_name = normalize_attribute_string(
attribute_name
)
normalized_attributes_dict[normalized_attr_name] = attr
if attr.name != normalized_attr_name:
attr.name = normalized_attr_name
return normalized_attributes_dict

return TokenMetadata(
string_attributes=normalize_and_reset(metadata.string_attributes),
numeric_attributes=normalize_and_reset(
metadata.numeric_attributes
return cls(
token_identifier=EVMContractTokenIdentifier(
contract_address=contract_address, token_id=token_id
),
date_attributes=normalize_and_reset(metadata.date_attributes),
token_standard=TokenStandard.ERC721,
metadata=TokenMetadata.from_attributes(metadata_dict),
)

def __str__(self):
Expand Down
93 changes: 93 additions & 0 deletions open_rarity/models/token_metadata.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,6 @@
from dataclasses import dataclass, field
import datetime
from typing import Any

from open_rarity.models.utils.attribute_utils import normalize_attribute_string

Expand Down Expand Up @@ -43,6 +45,12 @@ class NumericAttribute:
name: AttributeName
value: float | int

def __init__(self, name: AttributeName, value: float | int):
# We treat attributes names the same regardless of
# casing or leading/trailing whitespaces.
self.name = normalize_attribute_string(name)
self.value = value


@dataclass
class DateAttribute:
Expand All @@ -59,6 +67,12 @@ class DateAttribute:
name: AttributeName
value: int

def __init__(self, name: AttributeName, value: int):
# We treat attributes names the same regardless of
# casing or leading/trailing whitespaces.
self.name = normalize_attribute_string(name)
self.value = value


@dataclass
class TokenMetadata:
Expand All @@ -72,6 +86,10 @@ class TokenMetadata:
mapping of atrribute name to list of numeric attribute values
date_attributes : dict
mapping of attribute name to list of date attribute values
All attributes names are normalized and all string attribute values are
normalized in the same way - lowercased and leading/trailing whitespace stripped.
"""

string_attributes: dict[AttributeName, StringAttribute] = field(
Expand All @@ -83,3 +101,78 @@ class TokenMetadata:
date_attributes: dict[AttributeName, DateAttribute] = field(
default_factory=dict
)

def __post_init__(self):
self.string_attributes = self._normalize_attributes_dict(
self.string_attributes
)
self.numeric_attributes = self._normalize_attributes_dict(
self.numeric_attributes
)
self.date_attributes = self._normalize_attributes_dict(
self.date_attributes
)

def _normalize_attributes_dict(self, attributes_dict: dict) -> dict:
"""Helper function that takes in an attributes dictionary
and normalizes attribute name in the dictionary to ensure all
letters are lower cases and whitespace is stripped.
"""
normalized_attributes_dict = {}
for attribute_name, attr in attributes_dict.items():
normalized_attr_name = normalize_attribute_string(attribute_name)
normalized_attributes_dict[normalized_attr_name] = attr
if normalized_attr_name != attr.name:
attr.name = normalized_attr_name
return normalized_attributes_dict

@classmethod
def from_attributes(cls, attributes: dict[AttributeName, Any]):
"""Constructs TokenMetadata class based on an attributes dictionary
Parameters
----------
attributes : dict[AttributeName, Any]
Dictionary of attribute name to attribute value for the given token.
The type of the value determines whether the attribute is a string,
numeric or date attribute.
class attribute type
------------ -------------
string string attribute
int | float numeric_attribute
datetime date_attribute (stored as timestamp, seconds from epoch)
Returns
-------
TokenMetadata
token metadata from input
"""
string_attributes = {}
numeric_attributes = {}
date_attributes = {}
for attr_name, attr_value in attributes.items():
if isinstance(attr_value, str):
string_attributes[attr_name] = StringAttribute(
name=attr_name, value=attr_value
)
elif isinstance(attr_value, (float, int)):
numeric_attributes[attr_name] = NumericAttribute(
name=attr_name, value=attr_value
)
elif isinstance(attr_value, datetime.datetime):
date_attributes[attr_name] = DateAttribute(
name=attr_name,
value=int(attr_value.timestamp()),
)
else:
raise TypeError(
f"Provided attribute value has invalid type: {type(attr_value)}. "
"Must be either str, float, int or datetime."
)

return cls(
string_attributes=string_attributes,
numeric_attributes=numeric_attributes,
date_attributes=date_attributes,
)
Loading

0 comments on commit 5a89987

Please sign in to comment.