Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Updates to Collection and TokenMetadata: Derive attributes #48

Merged
merged 7 commits into from
Sep 14, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
147 changes: 104 additions & 43 deletions open_rarity/models/collection.py
Original file line number Diff line number Diff line change
Expand Up @@ -57,23 +57,53 @@ class Collection:
"""

attributes_frequency_counts: dict[AttributeName, dict[AttributeValue, int]]
name: str | None = ""
name: str

def __init__(
self,
tokens: list[Token],
attributes_frequency_counts: dict[
AttributeName, dict[AttributeValue, int]
],
tokens: list[Token],
]
| None = None,
name: str | None = "",
):
self._tokens = tokens
self.attributes_frequency_counts = (
self._normalize_attributes_frequency_counts(
"""
Parameters
----------
tokens : list[Token]
list of all tokens that belong to the collection. Must have meteadata
properly set if attributes_frequency_counts is not provided.
attributes_frequency_counts:
dict[AttributeName, dict[AttributeValue, int]] | None, optional
dictionary of attributes to the number of tokens in this collection
that has a specific value for every possible value for the given
attribute, by default None.
If not provided, the attributes distribution will be derived from the
attributes on the tokens provided.

Example:
{"hair": {"brown": 500, "blonde": 100}
which means 500 tokens has hair=brown, 100 token has hair=blonde
Note: All trait names and string values should be lowercased and stripped
of leading and trialing whitespace.
Note 2: We currently only support string attributes in
attributes_frequency_counts
name : str | None, optional
A reference string only used for debugging or identification, by default ""
"""
self._tokens = tokens
self.name = name or ""
if attributes_frequency_counts:
self.attributes_frequency_counts = (
self._normalize_attributes_frequency_counts(
attributes_frequency_counts
)
)
else:
self.attributes_frequency_counts = (
self._derive_normalized_attributes_frequency_counts()
)
)
self.name = name

@property
def tokens(self) -> list[Token]:
Expand Down Expand Up @@ -112,41 +142,6 @@ def token_standards(self) -> list[TokenStandard]:
token_standards.add(token.token_standard)
return list(token_standards)

def _normalize_attributes_frequency_counts(
self,
attributes_frequency_counts: dict[
AttributeName, dict[AttributeValue, int]
],
) -> dict[AttributeName, dict[AttributeValue, int]]:
"""We normalize all collection attributes to ensure that neither casing nor
leading/trailing spaces produce different attributes:
(e.g. 'Hat' == 'hat' == 'hat ')
If a collection has the following in their attributes frequency counts:
('Hat', 'beanie') 5 tokens and
('hat', 'beanie') 10 tokens
this would produce: ('hat', 'beanie') 15 tokens
"""
normalized: dict[AttributeName, dict[AttributeValue, int]] = {}
for (
attr_name,
attr_value_to_count,
) in attributes_frequency_counts.items():
normalized_name = normalize_attribute_string(attr_name)
if normalized_name not in normalized:
normalized[normalized_name] = {}
for attr_value, attr_count in attr_value_to_count.items():
normalized_value = (
normalize_attribute_string(attr_value)
if isinstance(attr_value, str)
else attr_value
)
if normalized_value not in normalized[normalized_name]:
normalized[normalized_name][normalized_value] = attr_count
else:
normalized[normalized_name][normalized_value] += attr_count

return normalized

def total_tokens_with_attribute(self, attribute: StringAttribute) -> int:
"""Returns the numbers of tokens in this collection with the attribute
based on the attributes frequency counts.
Expand Down Expand Up @@ -231,5 +226,71 @@ def extract_collection_attributes(

return collection_traits

def _normalize_attributes_frequency_counts(
self,
attributes_frequency_counts: dict[
AttributeName, dict[AttributeValue, int]
],
) -> dict[AttributeName, dict[AttributeValue, int]]:
"""We normalize all collection attributes to ensure that neither casing nor
leading/trailing spaces produce different attributes:
(e.g. 'Hat' == 'hat' == 'hat ')
If a collection has the following in their attributes frequency counts:
('Hat', 'beanie') 5 tokens and
('hat', 'beanie') 10 tokens
this would produce: ('hat', 'beanie') 15 tokens
"""
normalized: dict[AttributeName, dict[AttributeValue, int]] = {}
for (
attr_name,
attr_value_to_count,
) in attributes_frequency_counts.items():
normalized_name = normalize_attribute_string(attr_name)
if normalized_name not in normalized:
normalized[normalized_name] = {}
for attr_value, attr_count in attr_value_to_count.items():
normalized_value = (
normalize_attribute_string(attr_value)
if isinstance(attr_value, str)
else attr_value
)
if normalized_value not in normalized[normalized_name]:
normalized[normalized_name][normalized_value] = attr_count
else:
normalized[normalized_name][normalized_value] += attr_count

return normalized

def _derive_normalized_attributes_frequency_counts(
self,
) -> dict[AttributeName, dict[AttributeValue, int]]:
"""Derives and constructs attributes_frequency_counts based on
string attributes on tokens. Numeric or date attributes currently not
supported.

Returns
-------
dict[ AttributeName, dict[AttributeValue, int] ]
dictionary of attributes to the number of tokens in this collection
that has a specific value for every possible value for the given
attribute, by default None.
"""
attrs_freq_counts: dict[
AttributeName, dict[AttributeValue, int]
] = defaultdict(dict)
vickygos marked this conversation as resolved.
Show resolved Hide resolved

for token in self._tokens:
for (
attr_name,
str_attr,
) in token.metadata.string_attributes.items():
normalized_name = normalize_attribute_string(attr_name)
if str_attr.value not in attrs_freq_counts[attr_name]:
attrs_freq_counts[normalized_name][str_attr.value] = 1
else:
attrs_freq_counts[normalized_name][str_attr.value] += 1

return dict(attrs_freq_counts)

def __str__(self) -> str:
return f"Collection[{self.name}]"
71 changes: 36 additions & 35 deletions open_rarity/models/token.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,12 @@
from dataclasses import dataclass
from typing import Any

from open_rarity.models.token_identifier import TokenIdentifier
from open_rarity.models.token_metadata import TokenMetadata
from open_rarity.models.token_identifier import (
EVMContractTokenIdentifier,
TokenIdentifier,
)
from open_rarity.models.token_metadata import AttributeName, TokenMetadata
from open_rarity.models.token_standard import TokenStandard
from open_rarity.models.utils.attribute_utils import normalize_attribute_string


@dataclass
Expand All @@ -26,46 +29,44 @@ class Token:
token_standard: TokenStandard
metadata: TokenMetadata

def __post_init__(self):
self.metadata = self._normalize_metadata(self.metadata)

def _normalize_metadata(self, metadata: TokenMetadata) -> TokenMetadata:
"""Normalizes token metadata to ensure the attribute names are lower cased
and whitespace stripped to ensure equality consistency.
@classmethod
def from_erc721(
cls,
contract_address: str,
token_id: int,
metadata_dict: dict[AttributeName, Any],
):
"""Creates a Token class representing an ERC721 evm token given the following
parameters.

Parameters
----------
metadata : TokenMetadata
The original token metadata
contract_address : str
Contract address of the token
token_id : int
Token ID number of the token
metadata_dict : dict
Dictionary of attribute name to attribute value for the given token.
The type of the value determines whether the attribute is a string,
numeric or date attribute.

class attribute type
------------ -------------
string string attribute
int | float numeric_attribute
datetime date_attribute (stored as timestamp, seconds from epoch)

Returns
-------
TokenMetadata
A new normalized token metadata
Token
A Token instance with EVMContractTokenIdentifier and ERC721 standard set.
"""

def normalize_and_reset(attributes_dict: dict):
"""Helper function that takes in an attributes dictionary
and normalizes both attribute name in the dictionary as the key
and the repeated field inside the <Type>Attribute class
"""
normalized_attributes_dict = {}

for attribute_name, attr in attributes_dict.items():
normalized_attr_name = normalize_attribute_string(
attribute_name
)
normalized_attributes_dict[normalized_attr_name] = attr
if attr.name != normalized_attr_name:
attr.name = normalized_attr_name
return normalized_attributes_dict

return TokenMetadata(
string_attributes=normalize_and_reset(metadata.string_attributes),
numeric_attributes=normalize_and_reset(
metadata.numeric_attributes
return cls(
token_identifier=EVMContractTokenIdentifier(
contract_address=contract_address, token_id=token_id
),
date_attributes=normalize_and_reset(metadata.date_attributes),
token_standard=TokenStandard.ERC721,
metadata=TokenMetadata.from_attributes(metadata_dict),
)

def __str__(self):
Expand Down
93 changes: 93 additions & 0 deletions open_rarity/models/token_metadata.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,6 @@
from dataclasses import dataclass, field
import datetime
from typing import Any

from open_rarity.models.utils.attribute_utils import normalize_attribute_string

Expand Down Expand Up @@ -43,6 +45,12 @@ class NumericAttribute:
name: AttributeName
value: float | int

def __init__(self, name: AttributeName, value: float | int):
# We treat attributes names the same regardless of
# casing or leading/trailing whitespaces.
self.name = normalize_attribute_string(name)
self.value = value


@dataclass
class DateAttribute:
Expand All @@ -59,6 +67,12 @@ class DateAttribute:
name: AttributeName
value: int

def __init__(self, name: AttributeName, value: int):
# We treat attributes names the same regardless of
# casing or leading/trailing whitespaces.
self.name = normalize_attribute_string(name)
self.value = value


@dataclass
class TokenMetadata:
Expand All @@ -72,6 +86,10 @@ class TokenMetadata:
mapping of atrribute name to list of numeric attribute values
date_attributes : dict
mapping of attribute name to list of date attribute values


All attributes names are normalized and all string attribute values are
normalized in the same way - lowercased and leading/trailing whitespace stripped.
"""

string_attributes: dict[AttributeName, StringAttribute] = field(
Expand All @@ -83,3 +101,78 @@ class TokenMetadata:
date_attributes: dict[AttributeName, DateAttribute] = field(
default_factory=dict
)

def __post_init__(self):
self.string_attributes = self._normalize_attributes_dict(
self.string_attributes
)
self.numeric_attributes = self._normalize_attributes_dict(
self.numeric_attributes
)
self.date_attributes = self._normalize_attributes_dict(
self.date_attributes
)

def _normalize_attributes_dict(self, attributes_dict: dict) -> dict:
"""Helper function that takes in an attributes dictionary
and normalizes attribute name in the dictionary to ensure all
letters are lower cases and whitespace is stripped.
"""
normalized_attributes_dict = {}
for attribute_name, attr in attributes_dict.items():
normalized_attr_name = normalize_attribute_string(attribute_name)
normalized_attributes_dict[normalized_attr_name] = attr
if normalized_attr_name != attr.name:
attr.name = normalized_attr_name
return normalized_attributes_dict

@classmethod
def from_attributes(cls, attributes: dict[AttributeName, Any]):
"""Constructs TokenMetadata class based on an attributes dictionary

Parameters
----------
attributes : dict[AttributeName, Any]
Dictionary of attribute name to attribute value for the given token.
The type of the value determines whether the attribute is a string,
numeric or date attribute.

class attribute type
------------ -------------
string string attribute
int | float numeric_attribute
datetime date_attribute (stored as timestamp, seconds from epoch)

Returns
-------
TokenMetadata
token metadata from input
"""
string_attributes = {}
numeric_attributes = {}
date_attributes = {}
for attr_name, attr_value in attributes.items():
if isinstance(attr_value, str):
string_attributes[attr_name] = StringAttribute(
name=attr_name, value=attr_value
)
elif isinstance(attr_value, (float, int)):
numeric_attributes[attr_name] = NumericAttribute(
name=attr_name, value=attr_value
)
elif isinstance(attr_value, datetime.datetime):
date_attributes[attr_name] = DateAttribute(
name=attr_name,
value=int(attr_value.timestamp()),
)
else:
raise TypeError(
f"Provided attribute value has invalid type: {type(attr_value)}. "
"Must be either str, float, int or datetime."
)

return cls(
string_attributes=string_attributes,
numeric_attributes=numeric_attributes,
date_attributes=date_attributes,
)
Loading