From 10722f9660f6d8383804e07e6a1e730564e6122b Mon Sep 17 00:00:00 2001 From: Robert Miller Date: Sat, 13 Jul 2024 06:59:06 -0400 Subject: [PATCH 1/2] feat: add csv data source --- Cargo.lock | 1 + crates/rbuilder/Cargo.toml | 1 + crates/rbuilder/src/backtest/fetch/csv.rs | 113 ++++++++++++++++++++++ crates/rbuilder/src/backtest/fetch/mod.rs | 1 + 4 files changed, 116 insertions(+) create mode 100644 crates/rbuilder/src/backtest/fetch/csv.rs diff --git a/Cargo.lock b/Cargo.lock index a9232219..b4f869c1 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -7680,6 +7680,7 @@ dependencies = [ "flume", "futures", "governor", + "hex", "humantime", "hyper 1.3.1", "integer-encoding", diff --git a/crates/rbuilder/Cargo.toml b/crates/rbuilder/Cargo.toml index 96286383..20cea54b 100644 --- a/crates/rbuilder/Cargo.toml +++ b/crates/rbuilder/Cargo.toml @@ -106,6 +106,7 @@ derivative = "2.2.0" mockall = "0.12.1" shellexpand = "3.1.0" async-trait = "0.1.80" +hex = "0.4.3" [build-dependencies] built = { version = "0.7.1", features = ["git2", "chrono"] } diff --git a/crates/rbuilder/src/backtest/fetch/csv.rs b/crates/rbuilder/src/backtest/fetch/csv.rs new file mode 100644 index 00000000..d1521171 --- /dev/null +++ b/crates/rbuilder/src/backtest/fetch/csv.rs @@ -0,0 +1,113 @@ +use crate::primitives::Order; +use crate::{ + backtest::{ + fetch::datasource::{BlockRef, DataSource}, + OrdersWithTimestamp, + }, + primitives::{Bundle, TransactionSignedEcRecoveredWithBlobs}, +}; +use alloy_rlp::Decodable; +use async_trait::async_trait; +use csv::Reader; +use eyre::Context; +use reth::primitives::TransactionSignedEcRecovered; +use std::{collections::HashMap, fs::File, path::PathBuf}; +use tracing::trace; +use uuid::Uuid; + +#[derive(Debug, Clone)] +pub struct CSVDatasource { + batches: HashMap>, +} + +impl CSVDatasource { + pub fn new(filename: impl Into) -> eyre::Result { + let batches = Self::load_transactions_from_csv(filename.into())?; + Ok(Self { batches }) + } + + fn load_transactions_from_csv( + filename: PathBuf, + ) -> eyre::Result>> { + let file = File::open(&filename) + .wrap_err_with(|| format!("Failed to open file: {}", filename.display()))?; + let mut reader = Reader::from_reader(file); + let mut batches: HashMap> = HashMap::new(); + + for result in reader.records() { + let record = result?; + if record.len() != 2 { + return Err(eyre::eyre!("Invalid CSV format")); + } + + let batch_number: u64 = record[0].parse()?; + let rlp_hex = &record[1]; + let rlp_bytes = hex::decode(rlp_hex)?; + let tx = TransactionSignedEcRecovered::decode(&mut &rlp_bytes[..])?; + + batches.entry(batch_number % 10).or_default().push(tx); + } + + Ok(batches) + } +} + +#[async_trait] +impl DataSource for CSVDatasource { + async fn get_orders(&self, block: BlockRef) -> eyre::Result> { + // The csv datasource is one with 10 batches, where batch is a list of transactions + // Since we don't have full "real" blocks, we'll just use the block number to determine the batch + // Thus the usage of mod 10 is just to determine the batch number that we get transactions from, e.g. block 100 corresponds to 0, 101 to 1, 109 to 9, etc. + let batch_number = block.block_number % 10; + let transactions = self.batches.get(&batch_number).cloned().unwrap_or_default(); + + let mut uuid_num = 0; + let orders: Vec = transactions + .into_iter() + .map(|tx| { + let order = transaction_to_order(block.block_number, &mut uuid_num, tx); + OrdersWithTimestamp { + timestamp_ms: block.block_timestamp, + order, + sim_value: None, + } + }) + .collect(); + + trace!( + "Fetched synthetic transactions from CSV for block {}, batch {}, count: {}", + block.block_number, + batch_number, + orders.len() + ); + + Ok(orders) + } + + fn clone_box(&self) -> Box { + Box::new(self.clone()) + } +} + +fn transaction_to_order( + block: u64, + uuid_num: &mut u128, + tx: TransactionSignedEcRecovered, +) -> Order { + let uuid_bytes = uuid_num.to_be_bytes(); + let tx_with_blobs = TransactionSignedEcRecoveredWithBlobs::new_no_blobs(tx).unwrap(); + let bundle = Bundle { + txs: vec![tx_with_blobs.clone()], + hash: tx_with_blobs.hash(), + reverting_tx_hashes: vec![], + block, + uuid: Uuid::from_bytes(uuid_bytes), + min_timestamp: None, + max_timestamp: None, + replacement_data: None, + signer: None, + metadata: Default::default(), + }; + *uuid_num += 1; + Order::Bundle(bundle) +} diff --git a/crates/rbuilder/src/backtest/fetch/mod.rs b/crates/rbuilder/src/backtest/fetch/mod.rs index 3d3bdbf3..7e0172ad 100644 --- a/crates/rbuilder/src/backtest/fetch/mod.rs +++ b/crates/rbuilder/src/backtest/fetch/mod.rs @@ -2,6 +2,7 @@ pub mod datasource; pub mod flashbots_db; pub mod mempool; pub mod mev_boost; +pub mod csv; use crate::{ backtest::{ From 6b219c85845d6d99c7e91391feb50f48973998b2 Mon Sep 17 00:00:00 2001 From: Robert Miller Date: Sat, 13 Jul 2024 07:10:48 -0400 Subject: [PATCH 2/2] fix lint --- crates/rbuilder/src/backtest/fetch/mod.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/crates/rbuilder/src/backtest/fetch/mod.rs b/crates/rbuilder/src/backtest/fetch/mod.rs index 7e0172ad..83944069 100644 --- a/crates/rbuilder/src/backtest/fetch/mod.rs +++ b/crates/rbuilder/src/backtest/fetch/mod.rs @@ -1,8 +1,8 @@ +pub mod csv; pub mod datasource; pub mod flashbots_db; pub mod mempool; pub mod mev_boost; -pub mod csv; use crate::{ backtest::{