diff --git a/light-client/tests/model_based.rs b/light-client/tests/model_based.rs new file mode 100644 index 000000000..6bf80ed4f --- /dev/null +++ b/light-client/tests/model_based.rs @@ -0,0 +1,174 @@ +use serde::Deserialize; +use std::time::Duration; +use tendermint_light_client::components::verifier::Verdict; +use tendermint_light_client::{ + tests::{Trusted, *}, + types::{LightBlock, Time, TrustThreshold}, +}; +use tendermint_testgen::{apalache::*, jsonatr::*, Command, TestEnv, Tester}; + +#[derive(Deserialize, Clone, Debug, PartialEq)] +pub enum LiteTestKind { + SingleStep, + Bisection, +} + +/// An abstraction of the LightClient verification verdict +#[derive(Deserialize, Clone, Debug, PartialEq)] +pub enum LiteVerdict { + /// verified successfully + #[serde(rename = "SUCCESS")] + Success, + /// outside of trusting period + #[serde(rename = "FAILED_TRUSTING_PERIOD")] + FailedTrustingPeriod, + /// block verification based on the header and commit structure failed + #[serde(rename = "INVALID")] + Invalid, + /// passed block verification, but the validator set is too different to verify it + #[serde(rename = "NOT_ENOUGH_TRUST")] + NotEnoughTrust, +} + +/// A single-step test case is a test for `Verifier::verify()` function. +/// It contains an initial trusted block, plus a sequence of input blocks, +/// each with the expected verdict. +/// The trusted state is to be updated only if the verdict is "Success" +#[derive(Deserialize, Clone, Debug)] +pub struct SingleStepTestCase { + description: String, + initial: Initial, + input: Vec, +} + +/// A LiteBlock together with the time when it's being checked, and the expected verdict +#[derive(Deserialize, Clone, Debug)] +pub struct BlockVerdict { + block: AnonLightBlock, + now: Time, + verdict: LiteVerdict, +} + +fn single_step_test( + tc: SingleStepTestCase, + _env: &TestEnv, + _root_env: &TestEnv, + output_env: &TestEnv, +) { + let mut latest_trusted = Trusted::new( + tc.initial.signed_header.clone(), + tc.initial.next_validator_set.clone(), + ); + let clock_drift = Duration::from_secs(1); + let trusting_period: Duration = tc.initial.trusting_period.into(); + for (i, input) in tc.input.iter().enumerate() { + output_env.logln(&format!(" > step {}, expecting {:?}", i, input.verdict)); + let now = input.now; + match verify_single( + latest_trusted.clone(), + input.block.clone().into(), + TrustThreshold::default(), + trusting_period, + clock_drift, + now, + ) { + Ok(new_state) => { + assert_eq!(input.verdict, LiteVerdict::Success); + let expected_state: LightBlock = input.block.clone().into(); + assert_eq!(new_state, expected_state); + latest_trusted = Trusted::new(new_state.signed_header, new_state.next_validators); + } + Err(e) => { + output_env.logln(&format!(" > lite: {:?}", e)); + match e { + Verdict::Invalid(_) => assert_eq!(input.verdict, LiteVerdict::Invalid), + Verdict::NotEnoughTrust(_) => { + assert_eq!(input.verdict, LiteVerdict::NotEnoughTrust) + } + Verdict::Success => { + panic!("verify_single() returned error with Verdict::Success") + } + } + } + } + } +} + +fn model_based_test( + test: ApalacheTestCase, + env: &TestEnv, + root_env: &TestEnv, + output_env: &TestEnv, +) { + println!(" Running model-based single-step test case: {}", test.test); + let check_program = |program| { + if !Command::exists_program(program) { + output_env.logln(&format!(" > {} not found", program)); + return false; + } + true + }; + if !check_program("tendermint-testgen") + || !check_program("apalache-mc") + || !check_program("jsonatr") + { + output_env.logln(" failed to find necessary programs; consider adding them to your PATH. skipping the test"); + return; + } + env.copy_file_from_env(root_env, "Lightclient_A_1.tla"); + env.copy_file_from_env(root_env, "Blockchain_A_1.tla"); + env.copy_file_from_env(root_env, "LightTests.tla"); + env.copy_file_from_env(root_env, &test.model); + + println!(" > running Apalache..."); + match run_apalache_test(env.current_dir(), test) { + Ok(run) => match run { + ApalacheRun::Counterexample(_) => (), + run => panic!(run.message().to_string()), + }, + Err(e) => panic!("failed to run Apalache; reason: {}", e), + } + + let transform_spec = root_env + .full_canonical_path("_jsonatr-lib/apalache_to_lite_test.json") + .unwrap(); + let transform = JsonatrTransform { + input: "counterexample.json".to_string(), + include: vec![transform_spec], + output: "test.json".to_string(), + }; + assert!(run_jsonatr_transform(env.current_dir(), transform).is_ok()); + output_env.copy_file_from_env(env, "test.json"); + + let tc: SingleStepTestCase = env.parse_file("test.json").unwrap(); + println!(" > running auto-generated test..."); + single_step_test(tc, env, root_env, output_env); + output_env.copy_file_from_env(env, "counterexample.tla"); + output_env.copy_file_from_env(env, "counterexample.json"); +} + +fn model_based_test_batch(batch: ApalacheTestBatch) -> Vec<(String, String)> { + let mut res = Vec::new(); + for test in batch.tests { + let tc = ApalacheTestCase { + model: batch.model.clone(), + test: test.clone(), + length: batch.length, + timeout: batch.timeout, + }; + res.push((test.clone(), serde_json::to_string(&tc).unwrap())); + } + res +} + +const TEST_DIR: &str = "./tests/support/model_based"; + +#[test] +fn run_model_based_single_step_tests() { + let mut tester = Tester::new("single_step", TEST_DIR); + tester.add_test_with_env("static model-based single-step test", single_step_test); + tester.add_test_with_env("full model-based single-step test", model_based_test); + tester.add_test_batch(model_based_test_batch); + tester.run_foreach_in_dir(""); + tester.finalize(); +} diff --git a/light-client/tests/support/model_based/Abstract.md b/light-client/tests/support/model_based/Abstract.md new file mode 100644 index 000000000..5a1da00ea --- /dev/null +++ b/light-client/tests/support/model_based/Abstract.md @@ -0,0 +1,50 @@ +# Model-based testing with TLA+ and Apalache + +In this talk we present our instantiation of model-based testing (MBT) using the TLA+ modeling language and the Apalache model checker. In our work we have created the infrastructure that facilitates model-based testing, and makes it appealing and beneficial for software developers to apply in their everyday development practice -- much more so than the standard integration testing with manually written tests that we've employed previously. + +Our model-based testing procedure finds its rightful place in the verification-driven development process (VDD) we apply at Informal Systems. More concretely, we use it as an alternative approach to integration testing of the implementation in the Rust programming language of the fault-tolerant distributed consensus protocol named Tendermint. In the VDD process, we start with the semi-formal English language description of the protocol, which is then formally specified in TLA+, and verified by our Apalache model checker. The TLA+ specification is used as a reference model for clarifying misunderstanding both at the English language level, and at the level of the concrete implementation in Rust. Model-based tests are expressed in pure TLA+, and respresent simple assertions on the computation history the user wants to examine. Consider the model-based test below: + +```tla+ +Test2NoSuccess == + /\ state = "finishedSuccess" + /\ \E s1, s2 \in DOMAIN history: + /\ s1 /= s2 + /\ history[s1].verdict /= "SUCCESS" + /\ history[s2].verdict /= "SUCCESS" + +``` + +This is the model-based test of the Tendermint LightClient sub-protocol, which is used to establish a trust in a blockchain block retrieved from an untrusted source. The test describes a computation history in which the protocol execution, despite passing through two distinct intermediate states with unsuccessful block verification, still finishes successfully. This model-based test is then used to automatically generate and execute the complete integration test for the Tendermint LightClient protocol: e.g., in a Tendermint network with 4 validators these 6 lines of TLA+ are translated to more than 500 lines of the JSON integration test with all necessary details: private and public keys, signatures, timestamps, etc. + +In our presentation we would like to convey three main messages on model-based testing, which we describe below. + +## Model-based testing with TLA+ is easy to understand, and can be sucessfully appllied in the industry to provide correctness guarantees for real-world software. + +In our initial experience of implementing and applying MBT, we find quite appealing its simplicity for the end user, being it a researcher or a developer. The model-based tests are quite abstract and concise; they can precisely describe what behavior needs to be tested. At the same time, in sharp contrast to standard integration tests, they leave a lot of details unspecified. An important feature of MBT is that these missing details will be instantiated in different ways with each run of the model-based test; moreover, this behavior can be easily employed to _exhaustively_ test all scenarios falling under the abstract test. In the above example, the intermediate results are only restricted to be unsuccessful, but concrete verification errors are left open. And the model checker will indeed instantiate for each run different combinations of failures: due to the blockchain block falling out of the trusting period, not enough trust in the validator set, signature validation errors, etc. + +One of the main reasons for the Tendermint modeling and model-based testing being successfully implemented and accepted within our teams is the rich syntax and sematics of the TLA+ language. TLA+ allowed us to concisely model the complex aspects of the Tendermint protocol, as well as to express correctness properties and tests about it. In particular, even for developers without a prior exposure to TLA+, following the models written by our researchers has helped to clarify the protocol abstractions before diving into the implementation, thus avoiding the painful experience and countless hours of correcting them when the implementation is already done. + + +## Substantial infrastructure is necessary for model-based testing, but the complexity is reasonable. + +Some infrastructure naturally needs to be implemented in order to apply MBT. The good news is that a lot of that infrastructure is reusable: some for different components of the same product, some for different products or even different companies. Within our first MBT project we have implemented the following infrastructure components, highlighed in the figure: + +* Apalache model checker extension for providing JSON input/output (reusable globally); +* JSON Artifact Translator (Jsonatr), which, with the help of a transformation specification, maps an abstract counterexample produced by the model checker, into the concrete integration test (the tool reusable globally, the transformation spec -- for a specific product); +* Test generator (Testgen), which plays the role of a _concretization function_, mapping abstract model states into concrete implementation states (reusable for multiple products sharing the same data structures); +* Model-based test driver, which orchestrates the above infrastructure components to run them within the standard continuous integration (CI) framework; for Rust this is done via the standard `cargo test` command (used only for a specific product/component, but quite minimalistic). + +Implementation of the above infrastructure took us about 3 person-months of work, which is quite a reasonable cost taking into account the reusability of most of the components. + +![Model-based testing of the Tendermint Light Client](LightClientMBT.png) + + +## MBT can serve as a "missing link" to synchronize formal specification and concrete implementation, and to connect academia and industry. + + +The original VDD process, while being highly successful as acknowledged both by our research and development teams, still lacks some necessary feedback loops. We have observed that after some time the English specification, the TLA+ model, and the concrete implementation start to diverge. Our initial experience with MBT suggests that it can successfully play the role of the missing link, providing feedback loops between the three levels of abstraction. In particular, in this talk we will demonstrate on concrete examples how MBT has helped us to eliminate the growing divergence between the abstraction levels with the help of simple model-based tests. + +Another aspect that can be observed on the figure, is that MBT brings a research engineer in the process, as _some_ research experience is necessary for implementing the models and tests concisely and efficiently. What we have observed quite frequently is that while academic researchers often excel in devising new verification approaches, they struggle to find real-world applications for their methods. The developers in the industry, on the other hand, desperately need _some_ level of formal reasoning to be applied to their products, in order to provide reasonable correctness guarantees within the time and budget bounds. Our experience demonstrates that the advantages of MBT can be made so convincing as to justify for commercial companies hiring researches as staff or contractors in order to apply MBT to their products, and, thus, to establish the long-needed dialog between academia and industry. + + + diff --git a/light-client/tests/support/model_based/Blockchain_A_1.tla b/light-client/tests/support/model_based/Blockchain_A_1.tla new file mode 120000 index 000000000..6b0a7e4b0 --- /dev/null +++ b/light-client/tests/support/model_based/Blockchain_A_1.tla @@ -0,0 +1 @@ +../../../../docs/spec/lightclient/verification/Blockchain_A_1.tla \ No newline at end of file diff --git a/light-client/tests/support/model_based/LightClientMBT.png b/light-client/tests/support/model_based/LightClientMBT.png new file mode 100644 index 000000000..a5233ccb8 Binary files /dev/null and b/light-client/tests/support/model_based/LightClientMBT.png differ diff --git a/light-client/tests/support/model_based/LightTests.tla b/light-client/tests/support/model_based/LightTests.tla new file mode 100644 index 000000000..c761a39ba --- /dev/null +++ b/light-client/tests/support/model_based/LightTests.tla @@ -0,0 +1,40 @@ +------------------------- MODULE LightTests --------------------------- + +EXTENDS Lightclient_A_1 + +TestFailure == + /\ state = "finishedFailure" + /\ Cardinality(DOMAIN fetchedLightBlocks) = TARGET_HEIGHT + +TestSuccess == + /\ state = "finishedSuccess" + /\ Cardinality(DOMAIN fetchedLightBlocks) = TARGET_HEIGHT + +\* This test never produces a counterexample; so the model should be corrected +TestFailedTrustingPeriod == + \E s \in DOMAIN history : + history[s].verdict = "FAILED_TRUSTING_PERIOD" + +Test2NotEnoughTrustSuccess == + /\ state = "finishedSuccess" + /\ \E s1, s2 \in DOMAIN history : + /\ s1 /= s2 + /\ history[s1].verdict = "NOT_ENOUGH_TRUST" + /\ history[s2].verdict = "NOT_ENOUGH_TRUST" + +Test2NotEnoughTrustFailure == + /\ state = "finishedFailure" + /\ \E s1, s2 \in DOMAIN history : + /\ s1 /= s2 + /\ history[s1].verdict = "NOT_ENOUGH_TRUST" + /\ history[s2].verdict = "NOT_ENOUGH_TRUST" + +Test3NotEnoughTrustSuccess == + /\ state = "finishedSuccess" + /\ \E s1, s2, s3 \in DOMAIN history : + /\ s1 /= s2 /\ s2 /= s3 /\ s1 /= s3 + /\ history[s1].verdict = "NOT_ENOUGH_TRUST" + /\ history[s2].verdict = "NOT_ENOUGH_TRUST" + /\ history[s3].verdict = "NOT_ENOUGH_TRUST" + +============================================================================ diff --git a/light-client/tests/support/model_based/Lightclient_A_1.tla b/light-client/tests/support/model_based/Lightclient_A_1.tla new file mode 120000 index 000000000..e2bdf7956 --- /dev/null +++ b/light-client/tests/support/model_based/Lightclient_A_1.tla @@ -0,0 +1 @@ +../../../../docs/spec/lightclient/verification/Lightclient_A_1.tla \ No newline at end of file diff --git a/light-client/tests/support/model_based/MC4_4_faulty.json b/light-client/tests/support/model_based/MC4_4_faulty.json new file mode 100644 index 000000000..1d43ae8e4 --- /dev/null +++ b/light-client/tests/support/model_based/MC4_4_faulty.json @@ -0,0 +1,11 @@ +{ + "description": "Tests for 4 validators, target height 4, with faulty primary", + "model": "MC4_4_faulty.tla", + "timeout": 60, + "tests": [ + "TestSuccess", + "TestFailure", + "Test2NotEnoughTrustSuccess", + "Test2NotEnoughTrustFailure" + ] +} diff --git a/light-client/tests/support/model_based/MC4_4_faulty.tla b/light-client/tests/support/model_based/MC4_4_faulty.tla new file mode 100644 index 000000000..a1542f5e9 --- /dev/null +++ b/light-client/tests/support/model_based/MC4_4_faulty.tla @@ -0,0 +1,16 @@ +------------------------- MODULE MC4_4_faulty --------------------------- + +AllNodes == {"n1", "n2", "n3", "n4"} +TRUSTED_HEIGHT == 1 +TARGET_HEIGHT == 4 +TRUSTING_PERIOD == 1400 \* two weeks, one day is 100 time units :-) +IS_PRIMARY_CORRECT == FALSE + +VARIABLES + state, nextHeight, fetchedLightBlocks, lightBlockStatus, latestVerified, + history, nprobes, + now, blockchain, Faulty + +INSTANCE LightTests + +============================================================================ diff --git a/light-client/tests/support/model_based/README.md b/light-client/tests/support/model_based/README.md new file mode 100644 index 000000000..bf26cc512 --- /dev/null +++ b/light-client/tests/support/model_based/README.md @@ -0,0 +1,66 @@ +## Light Client model-based testing guide + +In this directory you will find the model-based tests for the light client. +They are "model-based" because they are based on the formal `TLA+` model of the Light Client: see [Lightclient_A_1.tla](../../../../docs/spec/lightclient/verification/Lightclient_A_1.tla). +The tests themselves are simple `TLA+` assertions, that describe the desired shape of the Light Client execution; +see [LightTests.tla](LightTests.tla) for some examples. +To be able to specify test assertions we have extended the Light Client model with `history` variable, +which records the whole execution history. +So, by way of referring to `history` you simply specify declaratively what execution history you want to see. + +After you have specified your `TLA+` test, list among tests of a _test batch_, such as [MC4_4_faulty.json](MC4_4_faulty.json). +Such test batches are then executed automatically when you run `cargo test`. + +When you run `cargo test` some machinery will run under the hood, namely: +* [Apalache model checker](https://github.com/informalsystems/apalache) + will execute your `TLA+` test, and will try to find an execution you've described. +* The model execution (or _counterexample_ in model checking terms) + will be translated into the format that is understood by the Light Client implementation. + For that translation two other components are necessary: + * [Jsonatr (JSON Arrifact Translator)](https://github.com/informalsystems/jsonatr) + performs the translation by executing this [transformation spec](_jsonatr-lib/apalache_to_lite_test.json); + * [Tendermint Testgen](https://github.com/informalsystems/tendermint-rs/tree/master/testgen) + takes care of translating abstract values from the model into the concrete implementation values. + +So, for the model-based test to run, the programs `apalache-mc`, `jsonatr`, and `tendermint-testgen` +should be present in your `PATH`. The easiest way to run Apalache is by [using a Docker image](https://github.com/informalsystems/apalache/blob/unstable/docs/manual.md#useDocker); +to run the latter two you need to locally clone the repositories, and then, +after building them, just add their `target/debug` directories into your `PATH`. +If any of the programs is not found, execution of a model-based test will be skipped. + +After you run your model-based tests with `cargo test`, the results will be printed to screen, +and also saved to disk in the [_single_step](_single_step) directory. +Results include: +* [Report](_single_step/_report) on which tests were run, and what is the outcome +* For each test, it's relevant files are also saved in a subdirectory of [_single_step](_single_step). + E.g. For the test `TestSuccess`, which is referenced in [MC4_4_faulty.json](MC4_4_faulty.json), + the results of it's execution will be saved into [_single_step/MC4_4_faulty.json/TestSuccess](_single_step/MC4_4_faulty.json/TestSuccess). + These results include, for a successful test: + * [log file](_single_step/MC4_4_faulty.json/TestSuccess/_log), where test logs are stored + * [counterexample.tla](_single_step/MC4_4_faulty.json/TestSuccess/counterexample.tla), + the counterexample produced by Apalache in `TLA+` format + * [counterexample.json](_single_step/MC4_4_faulty.json/TestSuccess/counterexample.json), + the counterexample produced by Apalache in `JSON` format + * [test.json](_single_step/MC4_4_faulty.json/TestSuccess/test.json), + the actual static Light Client test, generated by Jsonatr from the counterexample. + +One important feature of the model-based testing is that each time you run a model-based test, +a different static test could be produced. This happens due to the fact that the test you describe +is very abstract, and it's requirements can be fulfilled by a multitude of concrete tests. +To see what peculiarities a particular test has, please inspect the test report or logs. + +We suggest the following workflow for running model-based tests: + +1. Experiment with your model-based test, by trying different scenarios. +For that you might want to run your test in isolation, +as counterexample generation by Apalache may take a significant time. +In order to run only your test, just create a new test batch with your test, +and temporarily remove/rename other test batches (files or diretories starting with `_` are ignored). +2. After running your model-based test, inspect its report/logs. +3. If you find the test details interesting, simply copy the generated `test.json` file +into [single_step](single_step) directory, and rename it to reflect the model and model-based test you used to generate it. +For example, [single_step/MC4_4_faulty_TestSuccess.json](single_step/MC4_4_faulty_TestSuccess.json) +was auto-generated from the model [MC4_4_faulty.tla](MC4_4_faulty.tla) and the test `TestSuccess`. +4. Next time you run `cargo test` this static test will be picked up and executed automatically. + + \ No newline at end of file diff --git a/light-client/tests/support/model_based/_jsonatr-lib b/light-client/tests/support/model_based/_jsonatr-lib new file mode 120000 index 000000000..f2ef6f0a5 --- /dev/null +++ b/light-client/tests/support/model_based/_jsonatr-lib @@ -0,0 +1 @@ +../../../../testgen/jsonatr-lib \ No newline at end of file