BlockstreamResearch · apoelstra · Jul 18, 2023 · Jul 13, 2023 · Jul 13, 2023 · Jul 7, 2023
diff --git a/Cargo.toml b/Cargo.toml
@@ -20,7 +20,7 @@ bitcoin = { version = "0.29.2", optional = true }
 bitcoin_hashes = "0.11"
 byteorder = "1.3"
 elements = { version = "0.21.1", optional = true }
-elements-miniscript = { git = "https://github.com/ElementsProject/elements-miniscript", rev = "955f380" }
+elements-miniscript = { git = "https://github.com/apoelstra/elements-miniscript", tag = "2023-07--rust-simplicity-patch" }
 simplicity-sys = { version = "0.1.0", path = "./simplicity-sys" }
 actual-serde = { package = "serde", version = "1.0.103", features = ["derive"], optional = true }
 

diff --git a/jets-bench/benches/elements/main.rs b/jets-bench/benches/elements/main.rs
@@ -471,7 +471,7 @@ fn bench(c: &mut Criterion) {
         let (src_ty, tgt_ty) = jet_arrow(jet);
         let env = env_sampler.env();
 
-        let mut group = c.benchmark_group(&format!("{}", jet.to_string()));
+        let mut group = c.benchmark_group(&jet.to_string());
         for i in 0..NUM_RANDOM_SAMPLES {
             let params = JetParams::with_rand_aligns(InputSampling::Random);
             let name = format!("{}", i);
@@ -531,7 +531,7 @@ fn bench(c: &mut Criterion) {
         let (src_ty, tgt_ty) = jet_arrow(jet);
         let env = EnvSampling::Null.env();
 
-        let mut group = c.benchmark_group(&format!("{}", jet.to_string()));
+        let mut group = c.benchmark_group(&jet.to_string());
         for i in 0..NUM_RANDOM_SAMPLES {
             let params = JetParams::with_rand_aligns(InputSampling::Custom(inp_fn.clone()));
             let name = format!("{}", i);
@@ -612,7 +612,7 @@ fn bench(c: &mut Criterion) {
     for (jet, index, env_type) in arr {
         let (src_ty, tgt_ty) = jet_arrow(jet);
         let env = env_type.env();
-        let mut group = c.benchmark_group(&format!("{}", jet.to_string()));
+        let mut group = c.benchmark_group(&jet.to_string());
 
         for i in 0..NUM_RANDOM_SAMPLES {
             // We always select the current input because this is where we

diff --git a/src/analysis.rs b/src/analysis.rs
@@ -14,7 +14,10 @@
 
 use crate::jet::Jet;
 use crate::Value;
-use std::{cmp, fmt, io};
+use std::{cmp, fmt};
+
+#[cfg(feature = "elements")]
+use std::io;
 
 #[cfg(feature = "elements")]
 use elements::encode::Encodable;

diff --git a/src/bit_encoding/decode.rs b/src/bit_encoding/decode.rs
@@ -410,7 +410,7 @@ mod tests {
     #[test]
     fn root_unit_to_unit() {
         // main = jet_eq_32 :: 2^64 -> 2 # 7387d279
-        let justjet = vec![0x6d, 0xb8, 0x80];
+        let justjet = [0x6d, 0xb8, 0x80];
         // Should be able to decode this as an expression...
         let mut iter = BitIter::from(&justjet[..]);
         decode_expression::<_, Core>(&mut iter).unwrap();

diff --git a/src/bit_machine/mod.rs b/src/bit_machine/mod.rs
@@ -528,23 +528,23 @@ mod tests {
             prog.cmr().to_string(),
             cmr_str,
             "CMR mismatch (got {} expected {}) for program {}",
-            prog.cmr().to_string(),
+            prog.cmr(),
             cmr_str,
             prog_hex,
         );
         assert_eq!(
             prog.imr().to_string(),
             imr_str,
             "IMR mismatch (got {} expected {}) for program {}",
-            prog.imr().to_string(),
+            prog.imr(),
             imr_str,
             prog_hex,
         );
         assert_eq!(
             prog.amr().to_string(),
             amr_str,
             "AMR mismatch (got {} expected {}) for program {}",
-            prog.amr().to_string(),
+            prog.amr(),
             amr_str,
             prog_hex,
         );

diff --git a/src/human_encoding/README.md b/src/human_encoding/README.md
@@ -52,8 +52,8 @@ and EXPRESSION is
 * `unit`, `iden`, or `witness`;
 * `injl`, `injr`, `take`, or `drop` followed by another EXPRESSION;
 * `case`, `comp`, or `pair` followed by two EXPRESSIONs;
-* `assertl` followed by an EXPRESSION, a literal `#`, and another EXPRESSION;
-* `assertr` followed by a literal `#` and two EXPRESSIONs;
+* `assertl` followed by an EXPRESSION and a CMR (defined below);
+* `assertr` followed by CMR and an EXPRESSION;
 * a jet, which begins with `jet_` and must belong to the list of jets (FIXME define this list);
 * `const` followed by a VALUE (defined below);
 * `fail` followed by an ENTROPY (defined below); or
@@ -63,6 +63,14 @@ Note that while we allow parenthesis to help group parts of expressions for huma
 understanding, they are never needed for disambiguation and are essentially
 ignored by the parser.
 
+A CMR is
+
+* `#{` followed by an expression followed by `}`; or
+* `#` followed by 64 hex bytes.
+
+The first case indicates that an expression should be replaced by its commitment
+Merkle root; the second case just directly encodes the Merkle root.
+
 A HOLE is the literal `?` followed by a NAME. It indicates an expression that has
 yet to be defined. Holes have a different namespace than other names.
 
@@ -151,7 +159,7 @@ Expressions may be
 * one of the core combinators `unit`, `iden`, `comp`, `injl`, `injr`, `case`, `take`, `drop`, `pair`, followed by subexpression(s) as needed;
 * the `disconnect` combinator followed by an expression and a hole;
 * the `witness` combinator which currently allows no subexpressions;
-* the assertions, `assertl` or `assertr`, which take two subexpressions, one of which will be hidden in the decoded program. The hidden subexpression should be prefixed by `#` which indicates to the parser to take the CMR of that expression, not the expression itself.
+* the assertions, `assertl` or `assertr`, which take a subexpressions and a CMR. The CMR is encoded as a full expression prefixed by `#{` and suffixed by `}`; but in the bit-encoding the expression does not appear, only its CMR;
-* the assertions, `assertl` or `assertr`, which take a subexpressions and a CMR. The CMR is encoded as a full expression prefixed by `#{` and suffixed by `}`; but in the bit-encoding the expression does not appear, only its CMR;
+the assertions, `assertl` or `assertr`, which take a subexpression and a CMR;
-* the assertions, `assertl` or `assertr`, which take a subexpressions and a CMR. The CMR is encoded as a full expression prefixed by `#{` and suffixed by `}`; but in the bit-encoding the expression does not appear, only its CMR;
+the assertions, `assertl` or `assertr`, which take a subexpression and a CMR;
 * `fail` followed by a 128-to-512-bit entropy value, which should occur only in the pruned branch of an assertion, though this is not enforced;
 * `const` followed by a value, which is a "constant-word jet" and is equivalent to constructing the given value by a tree of `pair`s whose leaves are `injl unit` (0) or `injr unit` (1);
 

diff --git a/src/human_encoding/error.rs b/src/human_encoding/error.rs
@@ -0,0 +1,217 @@
+// Simplicity "Human-Readable" Language
+//
+// To the extent possible under law, the author(s) have dedicated all
+// copyright and related and neighboring rights to this software to
+// the public domain worldwide. This software is distributed without
+// any warranty.
+//
+// You should have received a copy of the CC0 Public Domain Dedication
+// along with this software.
+// If not, see <http://creativecommons.org/publicdomain/zero/1.0/>.
+//
+
+//! Parsing Errors
+
+use std::collections::BTreeMap;
+use std::sync::{Arc, Mutex};
+use std::{error, fmt, iter};
+
+use crate::types;
+
+use super::Position;
+
+/// A set of errors found in a human-readable encoding of a Simplicity program.
+#[derive(Clone, Debug, Default)]
+pub struct ErrorSet {
+    context: Option<Arc<str>>,
+    line_map: Arc<Mutex<Vec<usize>>>,
+    errors: BTreeMap<Option<Position>, Vec<Error>>,
+}
+
+impl ErrorSet {
+    /// Constructs a new empty error set.
+    pub fn new() -> Self {
+        ErrorSet::default()
+    }
+
+    /// Returns the first (and presumably most important) error in the set, if it
+    /// is non-empty, along with its position.
+    pub fn first_error(&self) -> Option<(Option<Position>, &Error)> {
+        self.errors.iter().next().map(|(a, b)| (*a, &b[0]))
+    }
+
+    /// Constructs a new error set with a single error in it.
+    pub fn single<P: Into<Position>, E: Into<Error>>(position: P, err: E) -> Self {
+        let mut errors = BTreeMap::default();
+        errors.insert(Some(position.into()), vec![err.into()]);
+        ErrorSet {
+            context: None,
+            line_map: Arc::new(Mutex::new(vec![])),
+            errors,
+        }
+    }
+
+    /// Constructs a new error set with a single error in it.
+    pub fn single_no_position<E: Into<Error>>(err: E) -> Self {
+        let mut errors = BTreeMap::default();
+        errors.insert(None, vec![err.into()]);
+        ErrorSet {
+            context: None,
+            line_map: Arc::new(Mutex::new(vec![])),
+            errors,
+        }
+    }
+
+    /// Adds an error to the error set.
+    pub fn add<P: Into<Position>, E: Into<Error>>(&mut self, position: P, err: E) {
+        self.errors
+            .entry(Some(position.into()))
+            .or_insert(vec![])
+            .push(err.into());
+    }
+
+    /// Merges another set of errors into the current set.
+    ///
+    /// # Panics
+    ///
+    /// Panics if the two sets have different contexts attached.
+    pub fn merge(&mut self, other: &Self) {
+        match (self.context.as_ref(), other.context.as_ref()) {
+            (None, None) => {}
+            (Some(_), None) => {}
+            (None, Some(b)) => self.context = Some(Arc::clone(b)),
+            (Some(a), Some(b)) => {
+                assert_eq!(a, b, "cannot merge error sets for different source input");
+            }
+        };
+
+        for (pos, errs) in &other.errors {
+            self.errors
+                .entry(*pos)
+                .or_insert(vec![])
+                .extend(errs.iter().cloned());
+        }
+    }
+
+    /// Attaches the input code to the error set, so that error messages can include
+    /// line numbers etc.
+    ///
+    /// # Panics
+    ///
+    /// Panics if it is called twice on the same error set. You should call this once
+    /// with the complete input code.
+    pub fn add_context(&mut self, s: Arc<str>) {
+        if self.context.is_some() {
+            panic!("tried to add context to the same error context twice");
+        }
+        self.context = Some(s);
+    }
+
+    /// Returns a boolean indicating whether the set is empty.
+    pub fn is_empty(&self) -> bool {
+        self.errors.is_empty()
+    }
+
+    /// Returns the number of errors currently in the set.
+    pub fn len(&self) -> usize {
+        self.errors.len()
+    }
+
+    /// Converts the error set into a result.
+    ///
+    /// If the set is empty, returns Ok with the given value. Otherwise
+    /// returns Err with itself.
+    pub fn into_result<T>(self, ok: T) -> Result<T, Self> {
+        if self.is_empty() {
+            Ok(ok)
+        } else {
+            Err(self)
+        }
+    }
+
+    /// Converts the error set into a result.
+    ///
+    /// If the set is empty, returns Ok with the result of calling the given closure.
+    /// Otherwise returns Err with itself.
+    pub fn into_result_with<T, F: FnOnce() -> T>(self, okfn: F) -> Result<T, Self> {
+        if self.is_empty() {
+            Ok(okfn())
+        } else {
+            Err(self)
+        }
+    }
+}
+
+impl error::Error for ErrorSet {
+    fn cause(&self) -> Option<&(dyn error::Error + 'static)> {
+        match self.first_error()?.1 {
+            Error::TypeCheck(ref e) => Some(e),
+        }
+    }
+}
+
+impl fmt::Display for ErrorSet {
+    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
+        let mut line_map = self.line_map.lock().unwrap();
+        if line_map.is_empty() {
+            if let Some(ref s) = self.context {
+                *line_map = iter::repeat(0)
+                    .take(2)
+                    .chain(
+                        s.char_indices()
+                            .filter_map(|(n, ch)| if ch == '\n' { Some(n) } else { None }),
+                    )
+                    .collect();
+            }
+        }
+
+        for (pos, errs) in &self.errors {
+            if let Some(pos) = pos {
+                for err in errs {
+                    if let Some(ref s) = self.context {
+                        let end = line_map.get(pos.line + 1).copied().unwrap_or(s.len());
+                        let line = &s[line_map[pos.line] + 1..end];
+                        writeln!(f, "{:5} | {}", pos.line, line)?;
+                        writeln!(f, "      | {:>width$}", "^", width = pos.column)?;
+                        writeln!(f, "      \\-- {}", err)?;
+                        writeln!(f)?;
+                    } else {
+                        writeln!(f, "{:4}:{:2}: {}", pos.line, pos.column, err,)?;
+                        writeln!(f)?;
+                    }
+                }
+            } else {
+                for err in errs {
+                    writeln!(f, "Error: {}", err)?;
+                }
+            }
+        }
+        Ok(())
+    }
+}
+
+/// An individual error.
+///
+/// Generally this structure should not be used on its own, but only wrapped in an
+/// [`ErrorSet`]. This is because in the human-readable encoding errors it is usually
+/// possible to continue past individual errors, and the user would prefer to see as
+/// many as possible at once.
+#[derive(Clone, Debug)]
+pub enum Error {
+    /// Simplicity type-checking error
+    TypeCheck(types::Error),
+}
+
+impl From<types::Error> for Error {
+    fn from(e: types::Error) -> Self {
+        Error::TypeCheck(e)
+    }
+}
+
+impl fmt::Display for Error {
+    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
+        match *self {
+            Error::TypeCheck(ref e) => fmt::Display::fmt(e, f),
+        }
+    }
+}
diff --git a/src/human_encoding/mod.rs b/src/human_encoding/mod.rs
@@ -20,6 +20,7 @@
 //! in a human-readable format.
 //!
 
+mod error;
 mod named_node;
 mod serialize;
 
@@ -31,8 +32,20 @@ use std::collections::HashMap;
 use std::str;
 use std::sync::Arc;
 
+pub use self::error::{Error, ErrorSet};
 pub use self::named_node::NamedCommitNode;
 
+/// Line/column pair
+///
+/// There is a similar type provided by the `santiago` library but it does not implement
+/// `Copy`, among many other traits, which makes it unergonomic to use. Santiago positions
+/// can be converted using `.into()`.
+#[derive(Copy, Clone, Debug, PartialEq, Eq, PartialOrd, Ord, Default, Hash)]
+pub struct Position {
+    line: usize,
+    column: usize,
+}
+
 #[derive(Clone, Debug, PartialEq, Eq)]
 pub struct Forest<J: Jet> {
     roots: HashMap<Arc<str>, Arc<NamedCommitNode<J>>>,
@@ -80,6 +93,7 @@ impl<J: Jet> Forest<J> {
                 let node = data.node;
                 let name = node.name();
                 let mut expr_str = match node.inner() {
+                    node::Inner::AssertR(cmr, _) => format!("{} := assertr #{}", name, cmr),
                     node::Inner::Fail(entropy) => format!("{} := fail {}", name, entropy),
                     node::Inner::Jet(ref j) => format!("{} := jet_{}", name, j),
                     node::Inner::Word(ref v) => {
@@ -94,6 +108,9 @@ impl<J: Jet> Forest<J> {
                 if let Some(child) = node.right_child() {
                     expr_str.push(' ');
                     expr_str.push_str(child.name());
+                } else if let node::Inner::AssertL(_, cmr) = node.inner() {
+                    expr_str.push_str(" #");
+                    expr_str.push_str(&cmr.to_string());
                 }
 
                 let arrow = node.arrow();