Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Human-readable encoding 2: cleanups and syntax changes #158

Merged
merged 10 commits into from
Jul 18, 2023
2 changes: 1 addition & 1 deletion Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@ bitcoin = { version = "0.29.2", optional = true }
bitcoin_hashes = "0.11"
byteorder = "1.3"
elements = { version = "0.21.1", optional = true }
elements-miniscript = { git = "https://github.com/ElementsProject/elements-miniscript", rev = "955f380" }
elements-miniscript = { git = "https://github.com/apoelstra/elements-miniscript", tag = "2023-07--rust-simplicity-patch" }
simplicity-sys = { version = "0.1.0", path = "./simplicity-sys" }
actual-serde = { package = "serde", version = "1.0.103", features = ["derive"], optional = true }

Expand Down
6 changes: 3 additions & 3 deletions jets-bench/benches/elements/main.rs
Original file line number Diff line number Diff line change
Expand Up @@ -471,7 +471,7 @@ fn bench(c: &mut Criterion) {
let (src_ty, tgt_ty) = jet_arrow(jet);
let env = env_sampler.env();

let mut group = c.benchmark_group(&format!("{}", jet.to_string()));
let mut group = c.benchmark_group(&jet.to_string());
for i in 0..NUM_RANDOM_SAMPLES {
let params = JetParams::with_rand_aligns(InputSampling::Random);
let name = format!("{}", i);
Expand Down Expand Up @@ -531,7 +531,7 @@ fn bench(c: &mut Criterion) {
let (src_ty, tgt_ty) = jet_arrow(jet);
let env = EnvSampling::Null.env();

let mut group = c.benchmark_group(&format!("{}", jet.to_string()));
let mut group = c.benchmark_group(&jet.to_string());
for i in 0..NUM_RANDOM_SAMPLES {
let params = JetParams::with_rand_aligns(InputSampling::Custom(inp_fn.clone()));
let name = format!("{}", i);
Expand Down Expand Up @@ -612,7 +612,7 @@ fn bench(c: &mut Criterion) {
for (jet, index, env_type) in arr {
let (src_ty, tgt_ty) = jet_arrow(jet);
let env = env_type.env();
let mut group = c.benchmark_group(&format!("{}", jet.to_string()));
let mut group = c.benchmark_group(&jet.to_string());

for i in 0..NUM_RANDOM_SAMPLES {
// We always select the current input because this is where we
Expand Down
5 changes: 4 additions & 1 deletion src/analysis.rs
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,10 @@

use crate::jet::Jet;
use crate::Value;
use std::{cmp, fmt, io};
use std::{cmp, fmt};

#[cfg(feature = "elements")]
use std::io;

#[cfg(feature = "elements")]
use elements::encode::Encodable;
Expand Down
2 changes: 1 addition & 1 deletion src/bit_encoding/decode.rs
Original file line number Diff line number Diff line change
Expand Up @@ -410,7 +410,7 @@ mod tests {
#[test]
fn root_unit_to_unit() {
// main = jet_eq_32 :: 2^64 -> 2 # 7387d279
let justjet = vec![0x6d, 0xb8, 0x80];
let justjet = [0x6d, 0xb8, 0x80];
// Should be able to decode this as an expression...
let mut iter = BitIter::from(&justjet[..]);
decode_expression::<_, Core>(&mut iter).unwrap();
Expand Down
6 changes: 3 additions & 3 deletions src/bit_machine/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -528,23 +528,23 @@ mod tests {
prog.cmr().to_string(),
cmr_str,
"CMR mismatch (got {} expected {}) for program {}",
prog.cmr().to_string(),
prog.cmr(),
cmr_str,
prog_hex,
);
assert_eq!(
prog.imr().to_string(),
imr_str,
"IMR mismatch (got {} expected {}) for program {}",
prog.imr().to_string(),
prog.imr(),
imr_str,
prog_hex,
);
assert_eq!(
prog.amr().to_string(),
amr_str,
"AMR mismatch (got {} expected {}) for program {}",
prog.amr().to_string(),
prog.amr(),
amr_str,
prog_hex,
);
Expand Down
14 changes: 11 additions & 3 deletions src/human_encoding/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -52,8 +52,8 @@ and EXPRESSION is
* `unit`, `iden`, or `witness`;
* `injl`, `injr`, `take`, or `drop` followed by another EXPRESSION;
* `case`, `comp`, or `pair` followed by two EXPRESSIONs;
* `assertl` followed by an EXPRESSION, a literal `#`, and another EXPRESSION;
* `assertr` followed by a literal `#` and two EXPRESSIONs;
* `assertl` followed by an EXPRESSION and a CMR (defined below);
* `assertr` followed by CMR and an EXPRESSION;
* a jet, which begins with `jet_` and must belong to the list of jets (FIXME define this list);
* `const` followed by a VALUE (defined below);
* `fail` followed by an ENTROPY (defined below); or
Expand All @@ -63,6 +63,14 @@ Note that while we allow parenthesis to help group parts of expressions for huma
understanding, they are never needed for disambiguation and are essentially
ignored by the parser.

A CMR is

* `#{` followed by an expression followed by `}`; or
* `#` followed by 64 hex bytes.

The first case indicates that an expression should be replaced by its commitment
Merkle root; the second case just directly encodes the Merkle root.

A HOLE is the literal `?` followed by a NAME. It indicates an expression that has
yet to be defined. Holes have a different namespace than other names.

Expand Down Expand Up @@ -151,7 +159,7 @@ Expressions may be
* one of the core combinators `unit`, `iden`, `comp`, `injl`, `injr`, `case`, `take`, `drop`, `pair`, followed by subexpression(s) as needed;
* the `disconnect` combinator followed by an expression and a hole;
* the `witness` combinator which currently allows no subexpressions;
* the assertions, `assertl` or `assertr`, which take two subexpressions, one of which will be hidden in the decoded program. The hidden subexpression should be prefixed by `#` which indicates to the parser to take the CMR of that expression, not the expression itself.
* the assertions, `assertl` or `assertr`, which take a subexpressions and a CMR. The CMR is encoded as a full expression prefixed by `#{` and suffixed by `}`; but in the bit-encoding the expression does not appear, only its CMR;
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

64c6ea4: Typo, and the definition of CMR was extended. I would leave out the latter part because it is already explained in an earlier section.

Suggested change
* the assertions, `assertl` or `assertr`, which take a subexpressions and a CMR. The CMR is encoded as a full expression prefixed by `#{` and suffixed by `}`; but in the bit-encoding the expression does not appear, only its CMR;
the assertions, `assertl` or `assertr`, which take a subexpression and a CMR;

* `fail` followed by a 128-to-512-bit entropy value, which should occur only in the pruned branch of an assertion, though this is not enforced;
* `const` followed by a value, which is a "constant-word jet" and is equivalent to constructing the given value by a tree of `pair`s whose leaves are `injl unit` (0) or `injr unit` (1);

Expand Down
217 changes: 217 additions & 0 deletions src/human_encoding/error.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,217 @@
// Simplicity "Human-Readable" Language
//
// To the extent possible under law, the author(s) have dedicated all
// copyright and related and neighboring rights to this software to
// the public domain worldwide. This software is distributed without
// any warranty.
//
// You should have received a copy of the CC0 Public Domain Dedication
// along with this software.
// If not, see <http://creativecommons.org/publicdomain/zero/1.0/>.
//

//! Parsing Errors

use std::collections::BTreeMap;
use std::sync::{Arc, Mutex};
use std::{error, fmt, iter};

use crate::types;

use super::Position;

/// A set of errors found in a human-readable encoding of a Simplicity program.
#[derive(Clone, Debug, Default)]
pub struct ErrorSet {
context: Option<Arc<str>>,
line_map: Arc<Mutex<Vec<usize>>>,
errors: BTreeMap<Option<Position>, Vec<Error>>,
}

impl ErrorSet {
/// Constructs a new empty error set.
pub fn new() -> Self {
ErrorSet::default()
}

/// Returns the first (and presumably most important) error in the set, if it
/// is non-empty, along with its position.
pub fn first_error(&self) -> Option<(Option<Position>, &Error)> {
self.errors.iter().next().map(|(a, b)| (*a, &b[0]))
}

/// Constructs a new error set with a single error in it.
pub fn single<P: Into<Position>, E: Into<Error>>(position: P, err: E) -> Self {
let mut errors = BTreeMap::default();
errors.insert(Some(position.into()), vec![err.into()]);
ErrorSet {
context: None,
line_map: Arc::new(Mutex::new(vec![])),
errors,
}
}

/// Constructs a new error set with a single error in it.
pub fn single_no_position<E: Into<Error>>(err: E) -> Self {
let mut errors = BTreeMap::default();
errors.insert(None, vec![err.into()]);
ErrorSet {
context: None,
line_map: Arc::new(Mutex::new(vec![])),
errors,
}
}

/// Adds an error to the error set.
pub fn add<P: Into<Position>, E: Into<Error>>(&mut self, position: P, err: E) {
self.errors
.entry(Some(position.into()))
.or_insert(vec![])
.push(err.into());
}

/// Merges another set of errors into the current set.
///
/// # Panics
///
/// Panics if the two sets have different contexts attached.
pub fn merge(&mut self, other: &Self) {
match (self.context.as_ref(), other.context.as_ref()) {
(None, None) => {}
(Some(_), None) => {}
(None, Some(b)) => self.context = Some(Arc::clone(b)),
(Some(a), Some(b)) => {
assert_eq!(a, b, "cannot merge error sets for different source input");
}
};

for (pos, errs) in &other.errors {
self.errors
.entry(*pos)
.or_insert(vec![])
.extend(errs.iter().cloned());
}
}

/// Attaches the input code to the error set, so that error messages can include
/// line numbers etc.
///
/// # Panics
///
/// Panics if it is called twice on the same error set. You should call this once
/// with the complete input code.
pub fn add_context(&mut self, s: Arc<str>) {
if self.context.is_some() {
panic!("tried to add context to the same error context twice");
}
self.context = Some(s);
}

/// Returns a boolean indicating whether the set is empty.
pub fn is_empty(&self) -> bool {
self.errors.is_empty()
}

/// Returns the number of errors currently in the set.
pub fn len(&self) -> usize {
self.errors.len()
}

/// Converts the error set into a result.
///
/// If the set is empty, returns Ok with the given value. Otherwise
/// returns Err with itself.
pub fn into_result<T>(self, ok: T) -> Result<T, Self> {
if self.is_empty() {
Ok(ok)
} else {
Err(self)
}
}

/// Converts the error set into a result.
///
/// If the set is empty, returns Ok with the result of calling the given closure.
/// Otherwise returns Err with itself.
pub fn into_result_with<T, F: FnOnce() -> T>(self, okfn: F) -> Result<T, Self> {
if self.is_empty() {
Ok(okfn())
} else {
Err(self)
}
}
}

impl error::Error for ErrorSet {
fn cause(&self) -> Option<&(dyn error::Error + 'static)> {
match self.first_error()?.1 {
Error::TypeCheck(ref e) => Some(e),
}
}
}

impl fmt::Display for ErrorSet {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
let mut line_map = self.line_map.lock().unwrap();
if line_map.is_empty() {
if let Some(ref s) = self.context {
*line_map = iter::repeat(0)
.take(2)
.chain(
s.char_indices()
.filter_map(|(n, ch)| if ch == '\n' { Some(n) } else { None }),
)
.collect();
}
}

for (pos, errs) in &self.errors {
if let Some(pos) = pos {
for err in errs {
if let Some(ref s) = self.context {
let end = line_map.get(pos.line + 1).copied().unwrap_or(s.len());
let line = &s[line_map[pos.line] + 1..end];
writeln!(f, "{:5} | {}", pos.line, line)?;
writeln!(f, " | {:>width$}", "^", width = pos.column)?;
writeln!(f, " \\-- {}", err)?;
writeln!(f)?;
} else {
writeln!(f, "{:4}:{:2}: {}", pos.line, pos.column, err,)?;
writeln!(f)?;
}
}
} else {
for err in errs {
writeln!(f, "Error: {}", err)?;
}
}
}
Ok(())
}
}

/// An individual error.
///
/// Generally this structure should not be used on its own, but only wrapped in an
/// [`ErrorSet`]. This is because in the human-readable encoding errors it is usually
/// possible to continue past individual errors, and the user would prefer to see as
/// many as possible at once.
#[derive(Clone, Debug)]
pub enum Error {
/// Simplicity type-checking error
TypeCheck(types::Error),
}

impl From<types::Error> for Error {
fn from(e: types::Error) -> Self {
Error::TypeCheck(e)
}
}

impl fmt::Display for Error {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
match *self {
Error::TypeCheck(ref e) => fmt::Display::fmt(e, f),
}
}
}
17 changes: 17 additions & 0 deletions src/human_encoding/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@
//! in a human-readable format.
//!

mod error;
mod named_node;
mod serialize;

Expand All @@ -31,8 +32,20 @@ use std::collections::HashMap;
use std::str;
use std::sync::Arc;

pub use self::error::{Error, ErrorSet};
pub use self::named_node::NamedCommitNode;

/// Line/column pair
///
/// There is a similar type provided by the `santiago` library but it does not implement
/// `Copy`, among many other traits, which makes it unergonomic to use. Santiago positions
/// can be converted using `.into()`.
#[derive(Copy, Clone, Debug, PartialEq, Eq, PartialOrd, Ord, Default, Hash)]
pub struct Position {
line: usize,
column: usize,
}

#[derive(Clone, Debug, PartialEq, Eq)]
pub struct Forest<J: Jet> {
roots: HashMap<Arc<str>, Arc<NamedCommitNode<J>>>,
Expand Down Expand Up @@ -80,6 +93,7 @@ impl<J: Jet> Forest<J> {
let node = data.node;
let name = node.name();
let mut expr_str = match node.inner() {
node::Inner::AssertR(cmr, _) => format!("{} := assertr #{}", name, cmr),
node::Inner::Fail(entropy) => format!("{} := fail {}", name, entropy),
node::Inner::Jet(ref j) => format!("{} := jet_{}", name, j),
node::Inner::Word(ref v) => {
Expand All @@ -94,6 +108,9 @@ impl<J: Jet> Forest<J> {
if let Some(child) = node.right_child() {
expr_str.push(' ');
expr_str.push_str(child.name());
} else if let node::Inner::AssertL(_, cmr) = node.inner() {
expr_str.push_str(" #");
expr_str.push_str(&cmr.to_string());
}

let arrow = node.arrow();
Expand Down
Loading