From 8dd72723d1bf0417bfede6793516c6853cb4cbd3 Mon Sep 17 00:00:00 2001 From: Matthias Seitz Date: Sat, 23 Apr 2022 11:20:46 +0200 Subject: [PATCH] feat: add minimal ast bindings (#1167) * feat: add minimal ast bindings * feat: add recursive nodes --- .../src/artifact_output/configurable.rs | 8 +- ethers-solc/src/artifacts/ast.rs | 223 ++++++++++++++++++ ethers-solc/src/artifacts/mod.rs | 6 +- ethers-solc/src/artifacts/serde_helpers.rs | 63 ++++- ethers-solc/src/compile/project.rs | 4 +- 5 files changed, 294 insertions(+), 10 deletions(-) create mode 100644 ethers-solc/src/artifacts/ast.rs diff --git a/ethers-solc/src/artifact_output/configurable.rs b/ethers-solc/src/artifact_output/configurable.rs index 7acd788f0..e4602ec50 100644 --- a/ethers-solc/src/artifact_output/configurable.rs +++ b/ethers-solc/src/artifact_output/configurable.rs @@ -8,7 +8,7 @@ use crate::{ BytecodeOutputSelection, ContractOutputSelection, EvmOutputSelection, EwasmOutputSelection, }, - CompactContractBytecodeCow, DevDoc, Evm, Ewasm, FunctionDebugData, GasEstimates, + Ast, CompactContractBytecodeCow, DevDoc, Evm, Ewasm, FunctionDebugData, GasEstimates, LosslessAbi, Metadata, Offsets, Settings, StorageLayout, UserDoc, }, ArtifactOutput, SolcConfig, SolcError, SourceFile, @@ -52,8 +52,8 @@ pub struct ConfigurableContractArtifact { pub ir_optimized: Option, #[serde(default, skip_serializing_if = "Option::is_none")] pub ewasm: Option, - #[serde(default, skip_serializing_if = "serde_json::Value::is_null")] - pub ast: serde_json::Value, + #[serde(default, skip_serializing_if = "Option::is_none")] + pub ast: Option, } impl ConfigurableContractArtifact { @@ -303,7 +303,7 @@ impl ArtifactOutput for ConfigurableArtifacts { ir: artifact_ir, ir_optimized: artifact_ir_optimized, ewasm: artifact_ewasm, - ast: source_file.map(|s| s.ast.clone()).unwrap_or_default(), + ast: source_file.and_then(|s| s.ast.clone()), } } } diff --git a/ethers-solc/src/artifacts/ast.rs b/ethers-solc/src/artifacts/ast.rs new file mode 100644 index 000000000..c7b217337 --- /dev/null +++ b/ethers-solc/src/artifacts/ast.rs @@ -0,0 +1,223 @@ +//! Bindings for solc's `ast` output field + +use crate::artifacts::serde_helpers; +use serde::{Deserialize, Serialize}; +use std::{collections::BTreeMap, fmt, fmt::Write, str::FromStr}; + +/// Represents the AST field in the solc output +#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)] +pub struct Ast { + #[serde(rename = "absolutePath")] + pub absolute_path: String, + pub id: usize, + #[serde(default, rename = "exportedSymbols")] + pub exported_symbols: BTreeMap>, + #[serde(rename = "nodeType")] + pub node_type: NodeType, + #[serde(with = "serde_helpers::display_from_str")] + pub src: SourceLocation, + #[serde(default, skip_serializing_if = "Vec::is_empty")] + pub nodes: Vec, + #[serde(flatten)] + pub other: BTreeMap, +} + +#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)] +pub struct Node { + pub id: usize, + #[serde(rename = "nodeType")] + pub node_type: NodeType, + #[serde(with = "serde_helpers::display_from_str")] + pub src: SourceLocation, + #[serde(default, skip_serializing_if = "Vec::is_empty")] + pub nodes: Vec, + #[serde(flatten)] + pub other: BTreeMap, +} + +/// Represents the source location of a node : `::` +/// +/// The `length` and `index` can be -1 which is represented as `None` +#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)] +pub struct SourceLocation { + pub start: usize, + pub length: Option, + pub index: Option, +} + +impl FromStr for SourceLocation { + type Err = String; + + fn from_str(s: &str) -> Result { + let invalid_location = move || format!("{} invalid source location", s); + + let mut split = s.split(':'); + let start = split + .next() + .ok_or_else(invalid_location)? + .parse::() + .map_err(|_| invalid_location())?; + let length = split + .next() + .ok_or_else(invalid_location)? + .parse::() + .map_err(|_| invalid_location())?; + let index = split + .next() + .ok_or_else(invalid_location)? + .parse::() + .map_err(|_| invalid_location())?; + + let length = if length < 0 { None } else { Some(length as usize) }; + let index = if index < 0 { None } else { Some(index as usize) }; + + Ok(Self { start, length, index }) + } +} + +impl fmt::Display for SourceLocation { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + self.start.fmt(f)?; + f.write_char(':')?; + if let Some(length) = self.length { + length.fmt(f)?; + } else { + f.write_str("-1")?; + } + f.write_char(':')?; + if let Some(index) = self.index { + index.fmt(f)?; + } else { + f.write_str("-1")?; + } + Ok(()) + } +} + +#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)] +#[serde(untagged)] +pub enum NodeType { + YulAssignment, + YulBlock, + YulExpressionStatement, + YulForLoop, + YulIf, + YulVariableDeclaration, + YulFunctionDefinition, + SourceUnit, + PragmaDirective, + ContractDefinition, + EventDefinition, + ErrorDefinition, + Other(String), +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn can_parse_ast() { + let ast = r#" + { + "absolutePath": "input.sol", + "exportedSymbols": + { + "Ballot": + [ + 2 + ], + "Ballot2": + [ + 3 + ], + "Ballot3": + [ + 4 + ] + }, + "id": 5, + "nodeType": "SourceUnit", + "nodes": + [ + { + "id": 1, + "literals": + [ + "solidity", + ">=", + "0.4", + ".0" + ], + "nodeType": "PragmaDirective", + "src": "1:24:0" + }, + { + "abstract": false, + "baseContracts": [], + "canonicalName": "Ballot", + "contractDependencies": [], + "contractKind": "contract", + "fullyImplemented": true, + "id": 2, + "linearizedBaseContracts": + [ + 2 + ], + "name": "Ballot", + "nameLocation": "36:6:0", + "nodeType": "ContractDefinition", + "nodes": [], + "scope": 5, + "src": "27:20:0", + "usedErrors": [] + }, + { + "abstract": false, + "baseContracts": [], + "canonicalName": "Ballot2", + "contractDependencies": [], + "contractKind": "contract", + "fullyImplemented": true, + "id": 3, + "linearizedBaseContracts": + [ + 3 + ], + "name": "Ballot2", + "nameLocation": "58:7:0", + "nodeType": "ContractDefinition", + "nodes": [], + "scope": 5, + "src": "49:21:0", + "usedErrors": [] + }, + { + "abstract": false, + "baseContracts": [], + "canonicalName": "Ballot3", + "contractDependencies": [], + "contractKind": "contract", + "fullyImplemented": true, + "id": 4, + "linearizedBaseContracts": + [ + 4 + ], + "name": "Ballot3", + "nameLocation": "81:7:0", + "nodeType": "ContractDefinition", + "nodes": [], + "scope": 5, + "src": "72:21:0", + "usedErrors": [] + } + ], + "src": "1:92:0" +} + "#; + let _ast: Ast = serde_json::from_str(ast).unwrap(); + + dbg!(serde_json::from_str::("{}").unwrap()); + } +} diff --git a/ethers-solc/src/artifacts/mod.rs b/ethers-solc/src/artifacts/mod.rs index c15b78ce1..617e2ba23 100644 --- a/ethers-solc/src/artifacts/mod.rs +++ b/ethers-solc/src/artifacts/mod.rs @@ -15,6 +15,8 @@ use crate::{compile::*, error::SolcIoError, remappings::Remapping, utils}; use serde::{de::Visitor, Deserialize, Deserializer, Serialize, Serializer}; +pub mod ast; +pub use ast::*; pub mod bytecode; pub mod contract; pub mod output_selection; @@ -1402,8 +1404,8 @@ pub struct SecondarySourceLocation { #[derive(Clone, Debug, Serialize, Deserialize, Eq, PartialEq)] pub struct SourceFile { pub id: u32, - #[serde(default)] - pub ast: serde_json::Value, + #[serde(default, with = "serde_helpers::empty_json_object_opt")] + pub ast: Option, } /// A wrapper type for a list of source files diff --git a/ethers-solc/src/artifacts/serde_helpers.rs b/ethers-solc/src/artifacts/serde_helpers.rs index 77338ae71..a9bdc33c9 100644 --- a/ethers-solc/src/artifacts/serde_helpers.rs +++ b/ethers-solc/src/artifacts/serde_helpers.rs @@ -3,14 +3,14 @@ use ethers_core::types::Bytes; use serde::{Deserialize, Deserializer}; -pub fn deserialize_bytes<'de, D>(d: D) -> std::result::Result +pub fn deserialize_bytes<'de, D>(d: D) -> Result where D: Deserializer<'de>, { String::deserialize(d)?.parse::().map_err(|e| serde::de::Error::custom(e.to_string())) } -pub fn deserialize_opt_bytes<'de, D>(d: D) -> std::result::Result, D::Error> +pub fn deserialize_opt_bytes<'de, D>(d: D) -> Result, D::Error> where D: Deserializer<'de>, { @@ -70,6 +70,43 @@ pub mod json_string_opt { } } +/// deserializes empty json object `{}` as `None` +pub mod empty_json_object_opt { + use serde::{ + de::{self, DeserializeOwned}, + ser, Deserialize, Deserializer, Serialize, Serializer, + }; + + pub fn serialize(value: &Option, serializer: S) -> Result + where + S: Serializer, + T: Serialize, + { + if let Some(value) = value { + let value = serde_json::to_string(value).map_err(ser::Error::custom)?; + serializer.serialize_str(&value) + } else { + let empty = serde_json::Value::Object(Default::default()); + serde_json::Value::serialize(&empty, serializer) + } + } + + pub fn deserialize<'de, T, D>(deserializer: D) -> Result, D::Error> + where + D: Deserializer<'de>, + T: DeserializeOwned, + { + let json = serde_json::Value::deserialize(deserializer)?; + if json.is_null() { + return Ok(None) + } + if json.as_object().map(|obj| obj.is_empty()).unwrap_or_default() { + return Ok(None) + } + serde_json::from_value(json).map_err(de::Error::custom).map(Some) + } +} + /// serde support for string pub mod string_bytes { use serde::{Deserialize, Deserializer, Serializer}; @@ -128,6 +165,28 @@ pub mod display_from_str_opt { } } +pub mod display_from_str { + use serde::{de, Deserialize, Deserializer, Serializer}; + use std::{fmt, str::FromStr}; + + pub fn serialize(value: &T, serializer: S) -> Result + where + T: fmt::Display, + S: Serializer, + { + serializer.collect_str(value) + } + + pub fn deserialize<'de, T, D>(deserializer: D) -> Result + where + D: Deserializer<'de>, + T: FromStr, + T::Err: fmt::Display, + { + String::deserialize(deserializer)?.parse().map_err(de::Error::custom) + } +} + /// (De)serialize vec of tuples as map pub mod tuple_vec_map { use serde::{de::DeserializeOwned, Deserialize, Deserializer, Serialize, Serializer}; diff --git a/ethers-solc/src/compile/project.rs b/ethers-solc/src/compile/project.rs index 49b5eb37a..545cfd2e5 100644 --- a/ethers-solc/src/compile/project.rs +++ b/ethers-solc/src/compile/project.rs @@ -692,9 +692,9 @@ mod tests { assert_eq!(state.output.sources.len(), 3); for (f, source) in &state.output.sources { if f.ends_with("A.sol") { - assert!(source.ast.is_object()); + assert!(source.ast.is_some()); } else { - assert!(source.ast.is_null()); + assert!(source.ast.is_none()); } }