Skip to content

Commit

Permalink
Alternative u256 representations (#53)
Browse files Browse the repository at this point in the history
* initial u256 conversions commit

* basic u256 input parsing

* display u256 conversions in print_schema()

* add raw data conversions from u256

* update readme to reflect possible u256 conversions

* add comment to produce diff on schemas.rs

* fix table_schema tests

* form table_schema tests
  • Loading branch information
sslivkoff authored Aug 31, 2023
1 parent cabe7b0 commit 519e3d7
Show file tree
Hide file tree
Showing 15 changed files with 231 additions and 32 deletions.
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -93,7 +93,7 @@ An attempt is made to ensure that the dataset schemas conform to a common set of
- By default, rows should contain enough information be order-able
- Columns should be named by their JSON-RPC or ethers.rs defaults, except in cases where a much more explicit name is available
- To make joins across tables easier, a given piece of information should use the same datatype and column name across tables when possible
- Large ints such as `u256` should allow multiple conversions. A `value` column of type `u256` should allow: `value_binary`, `value_string`, `value_f64`, `value_decimal128`, `value_u64_high`, and `value_u64_low`
- Large ints such as `u256` should allow multiple conversions. A `value` column of type `u256` should allow: `value_binary`, `value_string`, `value_f32`, `value_f64`, `value_u32`, `value_u64`, and `value_d128`
- By default, columns related to non-identifying cryptographic signatures are omitted by default. For example, `state_root` of a block or `v`/`r`/`s` of a transaction
- Integer values that can never be negative should be stored as unsigned integers

Expand Down
5 changes: 5 additions & 0 deletions crates/cli/src/args.rs
Original file line number Diff line number Diff line change
Expand Up @@ -53,6 +53,11 @@ pub struct Args {
#[arg(long, value_name="COLS", num_args(0..), verbatim_doc_comment, help_heading="Content Options")]
pub columns: Option<Vec<String>>,

/// Set output datatype(s) of U256 integers
/// [default: binary, string, f64]
#[arg(long, num_args(1..), help_heading = "Content Options", verbatim_doc_comment)]
pub u256_types: Option<Vec<String>>,

/// Use hex string encoding for binary columns
#[arg(long, help_heading = "Content Options")]
pub hex: bool,
Expand Down
34 changes: 33 additions & 1 deletion crates/cli/src/parse/query.rs
Original file line number Diff line number Diff line change
@@ -1,4 +1,7 @@
use std::{collections::HashMap, sync::Arc};
use std::{
collections::{HashMap, HashSet},
sync::Arc,
};

use ethers::prelude::*;
use hex::FromHex;
Expand All @@ -7,6 +10,7 @@ use cryo_freeze::{ColumnEncoding, Datatype, FileFormat, MultiQuery, ParseError,

use super::{blocks, file_output, transactions};
use crate::args::Args;
use cryo_freeze::U256Type;

pub(crate) async fn parse_query(
args: &Args,
Expand Down Expand Up @@ -81,6 +85,33 @@ fn parse_datatypes(raw_inputs: &Vec<String>) -> Result<Vec<Datatype>, ParseError
fn parse_schemas(args: &Args) -> Result<HashMap<Datatype, Table>, ParseError> {
let datatypes = parse_datatypes(&args.datatype)?;
let output_format = file_output::parse_output_format(args)?;

let u256_types = if let Some(raw_u256_types) = &args.u256_types {
let mut u256_types: HashSet<U256Type> = HashSet::new();
for raw in raw_u256_types.iter() {
let u256_type = match raw.to_lowercase() {
raw if raw == "binary" => U256Type::Binary,
raw if raw == "string" => U256Type::String,
raw if raw == "str" => U256Type::String,
raw if raw == "f32" => U256Type::F32,
raw if raw == "float32" => U256Type::F32,
raw if raw == "f64" => U256Type::F64,
raw if raw == "float64" => U256Type::F64,
raw if raw == "float" => U256Type::F64,
raw if raw == "u32" => U256Type::U32,
raw if raw == "uint32" => U256Type::U32,
raw if raw == "u64" => U256Type::U64,
raw if raw == "uint64" => U256Type::U64,
raw if raw == "decimal128" => U256Type::Decimal128,
raw if raw == "d128" => U256Type::Decimal128,
_ => return Err(ParseError::ParseError("bad u256 type".to_string())),
};
u256_types.insert(u256_type);
}
u256_types
} else {
HashSet::from_iter(vec![U256Type::Binary, U256Type::String, U256Type::F64])
};
let binary_column_format = match args.hex | (output_format != FileFormat::Parquet) {
true => ColumnEncoding::Hex,
false => ColumnEncoding::Binary,
Expand All @@ -92,6 +123,7 @@ fn parse_schemas(args: &Args) -> Result<HashMap<Datatype, Table>, ParseError> {
.map(|datatype| {
datatype
.table_schema(
&u256_types,
&binary_column_format,
&args.include_columns,
&args.exclude_columns,
Expand Down
15 changes: 12 additions & 3 deletions crates/cli/src/summaries.rs
Original file line number Diff line number Diff line change
Expand Up @@ -6,8 +6,8 @@ use std::time::SystemTime;
use thousands::Separable;

use cryo_freeze::{
BlockChunk, Chunk, ChunkData, Datatype, FileOutput, FreezeSummary, MultiQuery, Source, Table,
TransactionChunk,
BlockChunk, Chunk, ChunkData, ColumnType, Datatype, FileOutput, FreezeSummary, MultiQuery,
Source, Table, TransactionChunk,
};

const TITLE_R: u8 = 0;
Expand Down Expand Up @@ -123,7 +123,16 @@ fn print_schema(name: &Datatype, schema: &Table) {
print_header("schema for ".to_string() + name.dataset().name());
for column in schema.columns() {
if let Some(column_type) = schema.column_type(column) {
print_bullet(column, column_type.as_str());
if column_type == ColumnType::UInt256 {
for uint256_type in schema.u256_types.iter() {
print_bullet(
column.to_owned() + uint256_type.suffix().as_str(),
uint256_type.to_columntype().as_str(),
);
}
} else {
print_bullet(column, column_type.as_str());
}
}
}
println!();
Expand Down
4 changes: 2 additions & 2 deletions crates/freeze/src/datasets/balance_diffs.rs
Original file line number Diff line number Diff line change
Expand Up @@ -24,8 +24,8 @@ impl Dataset for BalanceDiffs {
("transaction_index", ColumnType::Binary),
("transaction_hash", ColumnType::Binary),
("address", ColumnType::Binary),
("from_value", ColumnType::Binary),
("to_value", ColumnType::Binary),
("from_value", ColumnType::UInt256),
("to_value", ColumnType::UInt256),
("chain_id", ColumnType::UInt64),
])
}
Expand Down
10 changes: 5 additions & 5 deletions crates/freeze/src/datasets/blocks.rs
Original file line number Diff line number Diff line change
Expand Up @@ -9,9 +9,9 @@ use crate::{
types::{
conversions::{ToVecHex, ToVecU8},
BlockChunk, Blocks, CollectError, ColumnType, Dataset, Datatype, RowFilter, Source, Table,
TransactionChunk,
TransactionChunk, U256Type,
},
with_series, with_series_binary,
with_series, with_series_binary, with_series_u256,
};

pub(crate) type BlockTxGasTuple<TX> = Result<(Block<TX>, Option<Vec<u32>>), CollectError>;
Expand Down Expand Up @@ -321,7 +321,7 @@ pub(crate) struct TransactionColumns {
nonce: Vec<u64>,
from_address: Vec<Vec<u8>>,
to_address: Vec<Option<Vec<u8>>>,
value: Vec<String>,
value: Vec<U256>,
input: Vec<Vec<u8>>,
gas_limit: Vec<u32>,
gas_used: Vec<u32>,
Expand Down Expand Up @@ -364,7 +364,7 @@ impl TransactionColumns {
with_series!(cols, "nonce", self.nonce, schema);
with_series_binary!(cols, "from_address", self.from_address, schema);
with_series_binary!(cols, "to_address", self.to_address, schema);
with_series!(cols, "value", self.value, schema);
with_series_u256!(cols, "value", self.value, schema);
with_series_binary!(cols, "input", self.input, schema);
with_series!(cols, "gas_limit", self.gas_limit, schema);
with_series!(cols, "gas_used", self.gas_used, schema);
Expand Down Expand Up @@ -471,7 +471,7 @@ fn process_transaction(
columns.nonce.push(tx.nonce.as_u64());
}
if schema.has_column("value") {
columns.value.push(tx.value.to_string());
columns.value.push(tx.value);
}
if schema.has_column("input") {
columns.input.push(tx.input.to_vec());
Expand Down
2 changes: 1 addition & 1 deletion crates/freeze/src/datasets/native_transfers.rs
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@ impl Dataset for NativeTransfers {
("transaction_hash", ColumnType::Binary),
("from_address", ColumnType::Binary),
("to_address", ColumnType::Binary),
("value", ColumnType::Binary),
("value", ColumnType::UInt256),
("chain_id", ColumnType::UInt64),
])
}
Expand Down
2 changes: 1 addition & 1 deletion crates/freeze/src/datasets/traces.rs
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@ impl Dataset for Traces {
HashMap::from_iter(vec![
("action_from", ColumnType::Binary),
("action_to", ColumnType::Binary),
("action_value", ColumnType::String),
("action_value", ColumnType::UInt256),
("action_gas", ColumnType::UInt32),
("action_input", ColumnType::Binary),
("action_call_type", ColumnType::String),
Expand Down
4 changes: 1 addition & 3 deletions crates/freeze/src/datasets/transactions.rs
Original file line number Diff line number Diff line change
Expand Up @@ -28,9 +28,7 @@ impl Dataset for Transactions {
("nonce", ColumnType::Int32),
("from_address", ColumnType::Binary),
("to_address", ColumnType::Binary),
("value", ColumnType::Decimal128),
("value_str", ColumnType::String),
("value_float", ColumnType::Float64),
("value", ColumnType::UInt256),
("input", ColumnType::Binary),
("gas_limit", ColumnType::UInt32),
("gas_used", ColumnType::UInt32),
Expand Down
73 changes: 73 additions & 0 deletions crates/freeze/src/types/dataframes/creation.rs
Original file line number Diff line number Diff line change
Expand Up @@ -21,3 +21,76 @@ macro_rules! with_series_binary {
}
};
}

/// convert a Vec to variety of u256 Series representations
#[macro_export]
macro_rules! with_series_u256 {
($all_series:expr, $name:expr, $value:expr, $schema:expr) => {
if $schema.has_column($name) {
// binary
if $schema.u256_types.contains(&U256Type::Binary) {
let name = $name.to_string() + U256Type::Binary.suffix().as_str();
let name = name.as_str();

let converted: Vec<Vec<u8>> = $value.iter().map(|v| v.to_vec_u8()).collect();
if let Some(ColumnType::Hex) = $schema.column_type($name) {
$all_series.push(Series::new(name, converted.to_vec_hex()));
} else {
$all_series.push(Series::new(name, converted));
}
}

// string
if $schema.u256_types.contains(&U256Type::String) {
let name = $name.to_string() + U256Type::String.suffix().as_str();
let name = name.as_str();

let converted: Vec<String> = $value.iter().map(|v| v.to_string()).collect();
$all_series.push(Series::new(name, converted));
}

// float32
if $schema.u256_types.contains(&U256Type::F32) {
let name = $name.to_string() + U256Type::F32.suffix().as_str();
let name = name.as_str();

let converted: Vec<Option<f32>> =
$value.iter().map(|v| v.to_string().parse::<f32>().ok()).collect();
$all_series.push(Series::new(name, converted));
}

// float64
if $schema.u256_types.contains(&U256Type::F64) {
let name = $name.to_string() + U256Type::F64.suffix().as_str();
let name = name.as_str();

let converted: Vec<Option<f64>> =
$value.iter().map(|v| v.to_string().parse::<f64>().ok()).collect();
$all_series.push(Series::new(name, converted));
}

// u32
if $schema.u256_types.contains(&U256Type::U32) {
let name = $name.to_string() + U256Type::U32.suffix().as_str();
let name = name.as_str();

let converted: Vec<u32> = $value.iter().map(|v| v.as_u32()).collect();
$all_series.push(Series::new(name, converted));
}

// u64
if $schema.u256_types.contains(&U256Type::U64) {
let name = $name.to_string() + U256Type::U64.suffix().as_str();
let name = name.as_str();

let converted: Vec<u64> = $value.iter().map(|v| v.as_u64()).collect();
$all_series.push(Series::new(name, converted));
}

// decimal128
if $schema.u256_types.contains(&U256Type::Decimal128) {
panic!("DECIMAL128 not implemented")
}
}
};
}
2 changes: 1 addition & 1 deletion crates/freeze/src/types/files.rs
Original file line number Diff line number Diff line change
Expand Up @@ -44,7 +44,7 @@ impl FileFormat {
}

/// Encoding for binary data in a column
#[derive(Clone, Eq, PartialEq)]
#[derive(Clone, Eq, PartialEq, Debug)]
pub enum ColumnEncoding {
/// Raw binary encoding
Binary,
Expand Down
2 changes: 1 addition & 1 deletion crates/freeze/src/types/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@ pub use conversions::{ToVecHex, ToVecU8};
pub use datatypes::*;
pub use files::{ColumnEncoding, FileFormat, FileOutput};
pub use queries::{MultiQuery, RowFilter, SingleQuery};
pub use schemas::{ColumnType, Table};
pub use schemas::{ColumnType, Table, U256Type};
pub use sources::{RateLimiter, Source};
pub(crate) use summaries::FreezeSummaryAgg;
pub use summaries::{FreezeChunkSummary, FreezeSummary};
Expand Down
Loading

0 comments on commit 519e3d7

Please sign in to comment.