informalsystems · romac · Sep 11, 2020 · Sep 2, 2020 · Sep 2, 2020 · Sep 2, 2020
diff --git a/light-client/tests/model_based.rs b/light-client/tests/model_based.rs
@@ -0,0 +1,174 @@
+use serde::Deserialize;
+use std::time::Duration;
+use tendermint_light_client::components::verifier::Verdict;
+use tendermint_light_client::{
+    tests::{Trusted, *},
+    types::{LightBlock, Time, TrustThreshold},
+};
+use tendermint_testgen::{apalache::*, jsonatr::*, Command, TestEnv, Tester};
+
+#[derive(Deserialize, Clone, Debug, PartialEq)]
+pub enum LiteTestKind {
+    SingleStep,
+    Bisection,
+}
+
+/// An abstraction of the LightClient verification verdict
+#[derive(Deserialize, Clone, Debug, PartialEq)]
+pub enum LiteVerdict {
+    /// verified successfully
+    #[serde(rename = "SUCCESS")]
+    Success,
+    /// outside of trusting period
+    #[serde(rename = "FAILED_TRUSTING_PERIOD")]
+    FailedTrustingPeriod,
+    /// block verification based on the header and commit structure failed
+    #[serde(rename = "INVALID")]
+    Invalid,
+    /// passed block verification, but the validator set is too different to verify it
+    #[serde(rename = "NOT_ENOUGH_TRUST")]
+    NotEnoughTrust,
+}
+
+/// A single-step test case is a test for `Verifier::verify()` function.
+/// It contains an initial trusted block, plus a sequence of input blocks,
+/// each with the expected verdict.
+/// The trusted state is to be updated only if the verdict is "Success"
+#[derive(Deserialize, Clone, Debug)]
+pub struct SingleStepTestCase {
+    description: String,
+    initial: Initial,
+    input: Vec<BlockVerdict>,
+}
+
+/// A LiteBlock together with the time when it's being checked, and the expected verdict
+#[derive(Deserialize, Clone, Debug)]
+pub struct BlockVerdict {
+    block: AnonLightBlock,
+    now: Time,
+    verdict: LiteVerdict,
+}
+
+fn single_step_test(
+    tc: SingleStepTestCase,
+    _env: &TestEnv,
+    _root_env: &TestEnv,
+    output_env: &TestEnv,
+) {
+    let mut latest_trusted = Trusted::new(
+        tc.initial.signed_header.clone(),
+        tc.initial.next_validator_set.clone(),
+    );
+    let clock_drift = Duration::from_secs(1);
+    let trusting_period: Duration = tc.initial.trusting_period.into();
+    for (i, input) in tc.input.iter().enumerate() {
+        output_env.logln(&format!("    > step {}, expecting {:?}", i, input.verdict));
+        let now = input.now;
+        match verify_single(
+            latest_trusted.clone(),
+            input.block.clone().into(),
+            TrustThreshold::default(),
+            trusting_period,
+            clock_drift,
+            now,
+        ) {
+            Ok(new_state) => {
+                assert_eq!(input.verdict, LiteVerdict::Success);
+                let expected_state: LightBlock = input.block.clone().into();
+                assert_eq!(new_state, expected_state);
+                latest_trusted = Trusted::new(new_state.signed_header, new_state.next_validators);
+            }
+            Err(e) => {
+                output_env.logln(&format!("      > lite: {:?}", e));
+                match e {
+                    Verdict::Invalid(_) => assert_eq!(input.verdict, LiteVerdict::Invalid),
+                    Verdict::NotEnoughTrust(_) => {
+                        assert_eq!(input.verdict, LiteVerdict::NotEnoughTrust)
+                    }
+                    Verdict::Success => {
+                        panic!("verify_single() returned error with Verdict::Success")
+                    }
+                }
+            }
+        }
+    }
+}
+
+fn model_based_test(
+    test: ApalacheTestCase,
+    env: &TestEnv,
+    root_env: &TestEnv,
+    output_env: &TestEnv,
+) {
+    println!("  Running model-based single-step test case: {}", test.test);
+    let check_program = |program| {
+        if !Command::exists_program(program) {
+            output_env.logln(&format!("    > {} not found", program));
+            return false;
+        }
+        true
+    };
+    if !check_program("tendermint-testgen")
+        || !check_program("apalache-mc")
+        || !check_program("jsonatr")
+    {
+        output_env.logln("    failed to find necessary programs; consider adding them to your PATH. skipping the test");
+        return;
+    }
+    env.copy_file_from_env(root_env, "Lightclient_A_1.tla");
+    env.copy_file_from_env(root_env, "Blockchain_A_1.tla");
+    env.copy_file_from_env(root_env, "LightTests.tla");
+    env.copy_file_from_env(root_env, &test.model);
+
+    println!("  > running Apalache...");
+    match run_apalache_test(env.current_dir(), test) {
+        Ok(run) => match run {
+            ApalacheRun::Counterexample(_) => (),
+            run => panic!(run.message().to_string()),
+        },
+        Err(e) => panic!("failed to run Apalache; reason: {}", e),
+    }
+
+    let transform_spec = root_env
+        .full_canonical_path("_jsonatr-lib/apalache_to_lite_test.json")
+        .unwrap();
+    let transform = JsonatrTransform {
+        input: "counterexample.json".to_string(),
+        include: vec![transform_spec],
+        output: "test.json".to_string(),
+    };
+    assert!(run_jsonatr_transform(env.current_dir(), transform).is_ok());
+    output_env.copy_file_from_env(env, "test.json");
+
+    let tc: SingleStepTestCase = env.parse_file("test.json").unwrap();
+    println!("  > running auto-generated test...");
+    single_step_test(tc, env, root_env, output_env);
+    output_env.copy_file_from_env(env, "counterexample.tla");
+    output_env.copy_file_from_env(env, "counterexample.json");
+}
+
+fn model_based_test_batch(batch: ApalacheTestBatch) -> Vec<(String, String)> {
+    let mut res = Vec::new();
+    for test in batch.tests {
+        let tc = ApalacheTestCase {
+            model: batch.model.clone(),
+            test: test.clone(),
+            length: batch.length,
+            timeout: batch.timeout,
+        };
+        res.push((test.clone(), serde_json::to_string(&tc).unwrap()));
+    }
+    res
+}
+
+const TEST_DIR: &str = "./tests/support/model_based";
+
+#[test]
+fn run_model_based_single_step_tests() {
+    let mut tester = Tester::new("single_step", TEST_DIR);
+    tester.add_test_with_env("static model-based single-step test", single_step_test);
+    tester.add_test_with_env("full model-based single-step test", model_based_test);
+    tester.add_test_batch(model_based_test_batch);
+    tester.run_foreach_in_dir("");
+    tester.finalize();
+}
diff --git a/light-client/tests/support/model_based/Abstract.md b/light-client/tests/support/model_based/Abstract.md
@@ -0,0 +1,50 @@
+# Model-based testing with TLA+ and Apalache
+
+In this talk we present our instantiation of model-based testing (MBT) using the TLA+ modeling language and the Apalache model checker. In our work we have created the infrastructure that facilitates model-based testing, and makes it appealing and beneficial for software developers to apply in their everyday development practice -- much more so than the standard integration testing with manually written tests that we've employed previously.
+
+Our model-based testing procedure finds its rightful place in the verification-driven development process (VDD) we apply at Informal Systems. More concretely, we use it as an alternative approach to integration testing of the implementation in the Rust programming language of the fault-tolerant distributed consensus protocol named Tendermint. In the VDD process, we start with the semi-formal English language description of the protocol, which is then formally specified in TLA+, and verified by our Apalache model checker. The TLA+ specification is used as a reference model for clarifying misunderstanding both at the English language level, and at the level of the concrete implementation in Rust. Model-based tests are expressed in pure TLA+, and respresent simple assertions on the computation history the user wants to examine. Consider the model-based test below:
+
+```tla+
+Test2NoSuccess ==
+    /\ state = "finishedSuccess"
+    /\ \E s1, s2 \in DOMAIN history:
+       /\ s1 /= s2
+       /\ history[s1].verdict /= "SUCCESS"
+       /\ history[s2].verdict /= "SUCCESS"
+
+```
+
+This is the model-based test of the Tendermint LightClient sub-protocol, which is used to establish a trust in a blockchain block retrieved from an untrusted source. The test describes a computation history in which the protocol execution, despite passing through two distinct intermediate states with unsuccessful block verification, still finishes successfully. This model-based test is then used to automatically generate and execute the complete integration test for the Tendermint LightClient protocol: e.g., in a Tendermint network with 4 validators these 6 lines of TLA+ are translated to more than 500 lines of the JSON integration test with all necessary details: private and public keys, signatures, timestamps, etc.
+
+In our presentation we would like to convey three main messages on model-based testing, which we describe below.
+
+## Model-based testing with TLA+ is easy to understand, and can be sucessfully appllied in the industry to provide correctness guarantees for real-world software.
+
+In our initial experience of implementing and applying MBT, we find quite appealing its simplicity for the end user, being it a researcher or a developer. The model-based tests are quite abstract and concise; they can precisely describe what behavior needs to be tested. At the same time, in sharp contrast to standard integration tests, they leave a lot of details unspecified. An important feature of MBT is that these missing details will be instantiated in different ways with each run of the model-based test; moreover, this behavior can be easily employed to _exhaustively_ test all scenarios falling under the abstract test. In the above example, the intermediate results are only restricted to be unsuccessful, but concrete verification errors are left open. And the model checker will indeed instantiate for each run different combinations of failures: due to the blockchain block falling out of the trusting period, not enough trust in the validator set, signature validation errors, etc.
+
+One of the main reasons for the Tendermint modeling and model-based testing being successfully implemented and accepted within our teams is the rich syntax and sematics of the TLA+ language. TLA+ allowed us to concisely model the complex aspects of the Tendermint protocol, as well as to express correctness properties and tests about it. In particular, even for developers without a prior exposure to TLA+, following the models written by our researchers has helped to clarify the protocol abstractions before diving into the implementation, thus avoiding the painful experience and countless hours of correcting them when the implementation is already done.
+
+
+## Substantial infrastructure is necessary for model-based testing, but the complexity is reasonable.
+
+Some infrastructure naturally needs to be implemented in order to apply MBT. The good news is that a lot of that infrastructure is reusable: some for different components of the same product, some for different products or even different companies. Within our first MBT project we have implemented the following infrastructure components, highlighed in the figure:
+
+* Apalache model checker extension for providing JSON input/output (reusable globally);
+* JSON Artifact Translator (Jsonatr),  which, with the help of a transformation specification, maps an abstract counterexample produced by the model checker, into the concrete integration test (the tool reusable globally, the transformation spec -- for a specific product);
+* Test generator (Testgen), which plays the role of a _concretization function_, mapping abstract model states into concrete implementation states (reusable for multiple products sharing the same data structures);
+* Model-based test driver, which orchestrates the above infrastructure components to run them within the standard continuous integration (CI) framework; for Rust this is done via the standard `cargo test` command (used only for a specific product/component, but quite minimalistic).
+
+Implementation of the above infrastructure took us about 3 person-months of work, which is quite a reasonable cost taking into account the reusability of most of the components.
+
+![Model-based testing of the Tendermint Light Client](LightClientMBT.png) 
+
+
+## MBT can serve as a "missing link" to synchronize formal specification and concrete implementation, and to connect academia and industry.
+
+
+The original VDD process, while being highly successful as acknowledged both by our research and development teams, still lacks some necessary feedback loops. We have observed that after some time the English specification, the TLA+ model, and the concrete implementation start to diverge. Our initial experience with MBT suggests that it can successfully play the role of the missing link, providing feedback loops between the three levels of abstraction. In particular, in this talk we will demonstrate on concrete examples how MBT has helped us to eliminate the growing divergence between the abstraction levels with the help of simple model-based tests.
+
+Another aspect that can be observed on the figure, is that MBT brings a research engineer in the process, as _some_ research experience is necessary for implementing the models and tests concisely and efficiently. What we have observed quite frequently is that while academic researchers often excel in devising new verification approaches, they struggle to find real-world applications for their methods. The developers in the industry, on the other hand, desperately need _some_ level of formal reasoning to be applied to their products, in order to provide reasonable correctness guarantees within the time and budget bounds. Our experience demonstrates that the advantages of MBT can be made so convincing as to justify for commercial companies hiring researches as staff or contractors in order to apply MBT to their products, and, thus, to establish the long-needed dialog between academia and industry.
+
+
+
diff --git a/light-client/tests/support/model_based/Blockchain_A_1.tla b/light-client/tests/support/model_based/Blockchain_A_1.tla
@@ -0,0 +1 @@
+../../../../docs/spec/lightclient/verification/Blockchain_A_1.tla
diff --git a/light-client/tests/support/model_based/LightClientMBT.png b/light-client/tests/support/model_based/LightClientMBT.png
diff --git a/light-client/tests/support/model_based/LightTests.tla b/light-client/tests/support/model_based/LightTests.tla
@@ -0,0 +1,40 @@
+------------------------- MODULE LightTests ---------------------------
+
+EXTENDS Lightclient_A_1
+
+TestFailure ==
+    /\ state = "finishedFailure"
+    /\ Cardinality(DOMAIN fetchedLightBlocks) = TARGET_HEIGHT
+
+TestSuccess ==
+    /\ state = "finishedSuccess"
+    /\ Cardinality(DOMAIN fetchedLightBlocks) = TARGET_HEIGHT
+
+\* This test never produces a counterexample; so the model should be corrected
+TestFailedTrustingPeriod ==
+   \E s \in DOMAIN history :
+      history[s].verdict = "FAILED_TRUSTING_PERIOD"
+
+Test2NotEnoughTrustSuccess ==
+    /\ state = "finishedSuccess"
+    /\ \E s1, s2 \in DOMAIN history :
+       /\ s1 /= s2
+       /\ history[s1].verdict = "NOT_ENOUGH_TRUST"
+       /\ history[s2].verdict = "NOT_ENOUGH_TRUST"
+
+Test2NotEnoughTrustFailure ==
+    /\ state = "finishedFailure"
+    /\ \E s1, s2 \in DOMAIN history :
+       /\ s1 /= s2
+       /\ history[s1].verdict = "NOT_ENOUGH_TRUST"
+       /\ history[s2].verdict = "NOT_ENOUGH_TRUST"
+
+Test3NotEnoughTrustSuccess ==
+    /\ state = "finishedSuccess"
+    /\ \E s1, s2, s3 \in DOMAIN history :
+       /\ s1 /= s2 /\ s2 /= s3 /\ s1 /= s3
+       /\ history[s1].verdict = "NOT_ENOUGH_TRUST"
+       /\ history[s2].verdict = "NOT_ENOUGH_TRUST"
+       /\ history[s3].verdict = "NOT_ENOUGH_TRUST"
+
+============================================================================
diff --git a/light-client/tests/support/model_based/Lightclient_A_1.tla b/light-client/tests/support/model_based/Lightclient_A_1.tla
@@ -0,0 +1 @@
+../../../../docs/spec/lightclient/verification/Lightclient_A_1.tla
diff --git a/light-client/tests/support/model_based/MC4_4_faulty.json b/light-client/tests/support/model_based/MC4_4_faulty.json
@@ -0,0 +1,11 @@
+{
+  "description": "Tests for 4 validators, target height 4, with faulty primary",
+  "model": "MC4_4_faulty.tla",
+  "timeout": 60,
+  "tests": [
+    "TestSuccess",
+    "TestFailure",
+    "Test2NotEnoughTrustSuccess",
+    "Test2NotEnoughTrustFailure"
+  ]
+}
diff --git a/light-client/tests/support/model_based/MC4_4_faulty.tla b/light-client/tests/support/model_based/MC4_4_faulty.tla
@@ -0,0 +1,16 @@
+------------------------- MODULE MC4_4_faulty ---------------------------
+
+AllNodes == {"n1", "n2", "n3", "n4"}
+TRUSTED_HEIGHT == 1
+TARGET_HEIGHT == 4
+TRUSTING_PERIOD == 1400 \* two weeks, one day is 100 time units :-)
+IS_PRIMARY_CORRECT == FALSE
+
+VARIABLES
+  state, nextHeight, fetchedLightBlocks, lightBlockStatus, latestVerified,
+  history, nprobes,
+  now, blockchain, Faulty
+
+INSTANCE LightTests
+
+============================================================================