From 59b1d34e2383dfb70f7ece76ffafa9301e651b4e Mon Sep 17 00:00:00 2001
From: Andrew Brown <andrew.brown@intel.com>
Date: Tue, 15 Nov 2022 14:13:57 -0800
Subject: [PATCH 1/3] bench: benchmark several common WASI scenarios

In order to properly understand the impact of providing thread-safe
implmentations of WASI contexts (#5235), we need benchmarks that measure
the current performance of WASI calls using Wiggle. This change adds
several common WASI scenarios as WAT files (see `benches/wasi/*.wat`)
and benchmarks them with `criterion`. Using `criterion`'s `iter_custom`,
the WAT file runs the desired number of benchmark iterations internally
and the total duration of the runs is divided to get the average time
for each loop iteration.

Why WAT? When compiling these benchmarks from Rust to `wasm32-wasi`, the
output files are large, contain other WASI imports than the desired
ones, and overall it is difficult to tell if we are measuring what we
expect. By hand-writing the WAT, it is (slightly) more clear what each
benchmark is doing.
---
 Cargo.toml                        |  4 ++
 benches/wasi.rs                   | 73 +++++++++++++++++++++++++++++++
 benches/wasi/get-current-time.wat | 22 ++++++++++
 benches/wasi/read-arguments.wat   | 42 ++++++++++++++++++
 benches/wasi/read-environment.wat | 45 +++++++++++++++++++
 5 files changed, 186 insertions(+)
 create mode 100644 benches/wasi.rs
 create mode 100644 benches/wasi/get-current-time.wat
 create mode 100644 benches/wasi/read-arguments.wat
 create mode 100644 benches/wasi/read-environment.wat
diff --git a/Cargo.toml b/Cargo.toml
index 235f312b3192..605695b9ced0 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -225,3 +225,7 @@ harness = false
 [[bench]]
 name = "call"
 harness = false
+
+[[bench]]
+name = "wasi"
+harness = false
diff --git a/benches/wasi.rs b/benches/wasi.rs
new file mode 100644
index 000000000000..3af42b7b1f01
--- /dev/null
+++ b/benches/wasi.rs
@@ -0,0 +1,73 @@
+//! Measure some common WASI call scenarios.
+
+use criterion::{criterion_group, criterion_main, Criterion};
+use std::time::Instant;
+use wasmtime::{Engine, Linker, Module, Store, TypedFunc};
+use wasmtime_wasi::{sync::WasiCtxBuilder, WasiCtx};
+
+criterion_group!(benches, bench_wasi);
+criterion_main!(benches);
+
+fn bench_wasi(c: &mut Criterion) {
+    // Benchmark each `*.wat` file in the `wasi` directory.
+    for file in std::fs::read_dir("benches/wasi").unwrap() {
+        let path = file.unwrap().path();
+        if path.extension().map(|e| e == "wat").unwrap_or(false) {
+            let wat = std::fs::read(&path).unwrap();
+            let (mut store, run_fn) = instantiate(&wat);
+            let bench_name = format!("wasi/{}", path.file_name().unwrap().to_string_lossy());
+            // To avoid overhead, the module itself must iterate the expected
+            // number of times in a specially-crafted `run` function (see
+            // `instantiate` for deta\ils).
+            c.bench_function(&bench_name, move |b| {
+                b.iter_custom(|iters| {
+                    let start = Instant::now();
+                    let result = run_fn.call(&mut store, iters).unwrap();
+                    assert_eq!(iters, result);
+                    start.elapsed()
+                })
+            });
+        }
+    }
+}
+
+/// Compile and instantiate the Wasm module, returning the exported `run`
+/// function. This function expects `run` to:
+/// - have a single `u64` parameter indicating the number of loop iterations to
+///   execute
+/// - execute the body of the function for that number of loop iterations
+/// - return a single `u64` indicating how many loop iterations were executed
+///   (to double-check)
+fn instantiate(wat: &[u8]) -> (Store<WasiCtx>, TypedFunc<u64, u64>) {
+    let engine = Engine::default();
+    let wasi = wasi_context();
+    let mut store = Store::new(&engine, wasi);
+    let module = Module::new(&engine, wat).unwrap();
+    let mut linker = Linker::new(&engine);
+    wasmtime_wasi::add_to_linker(&mut linker, |cx| cx).unwrap();
+    let instance = linker.instantiate(&mut store, &module).unwrap();
+    let run = instance
+        .get_typed_func::<_, _, _>(&mut store, "run")
+        .unwrap();
+    (store, run)
+}
+
+/// Build a WASI context with some actual data to retrieve.
+fn wasi_context() -> WasiCtx {
+    let wasi = WasiCtxBuilder::new();
+    wasi.envs(&[
+        ("a".to_string(), "b".to_string()),
+        ("b".to_string(), "c".to_string()),
+        ("c".to_string(), "d".to_string()),
+    ])
+    .unwrap()
+    .args(&[
+        "exe".to_string(),
+        "--flag1".to_string(),
+        "--flag2".to_string(),
+        "--flag3".to_string(),
+        "--flag4".to_string(),
+    ])
+    .unwrap()
+    .build()
+}
diff --git a/benches/wasi/get-current-time.wat b/benches/wasi/get-current-time.wat
new file mode 100644
index 000000000000..8ac6ceb0758e
--- /dev/null
+++ b/benches/wasi/get-current-time.wat
@@ -0,0 +1,22 @@
+(module
+    (import "wasi_snapshot_preview1" "clock_time_get"
+        (func $__wasi_clock_time_get (param i32 i64 i32) (result i32)))
+    (func (export "run") (param $iters i64) (result i64)
+        (local $i i64)
+        (local.set $i (i64.const 0))
+        (loop $cont
+            ;; Retrieve the current time with the following parameters:
+            ;; - $clockid: here we use the enum value for $realtime
+            ;; - $precision: the maximum lag, which we set to 0 here
+            ;; - the address at which to write the u64 $timestamp
+            ;; Returns an error code.
+            (call $__wasi_clock_time_get (i32.const 1) (i64.const 0) (i32.const 0))
+            (drop)
+            ;; Continue looping until $i reaches $iters.
+            (local.set $i (i64.add (local.get $i) (i64.const 1)))
+            (br_if $cont (i64.lt_u (local.get $i) (local.get $iters)))
+        )
+        (local.get $i)
+    )
+    (memory (export "memory") 1)
+)
diff --git a/benches/wasi/read-arguments.wat b/benches/wasi/read-arguments.wat
new file mode 100644
index 000000000000..c6f1d4a33dbe
--- /dev/null
+++ b/benches/wasi/read-arguments.wat
@@ -0,0 +1,42 @@
+(module
+    (import "wasi_snapshot_preview1" "args_get"
+        (func $__wasi_args_get (param i32 i32) (result i32)))
+    (import "wasi_snapshot_preview1" "args_sizes_get"
+        (func $__wasi_args_sizes_get (param i32 i32) (result i32)))
+    (func (export "run") (param $iters i64) (result i64)
+        (local $i i64)
+        (local.set $i (i64.const 0))
+        (loop $cont
+            ;; Read the current argument list by:
+            ;;  1) retrieving the argument sizes and then
+            ;;  2) retrieving the argument data itself.
+
+            ;; Retrieve the sizes of the arguments with parameters:
+            ;; - the address at which to write the number of arguments
+            ;; - the address at which to write the size of the argument buffer
+            ;; Returns an error code.
+            (call $__wasi_args_sizes_get (i32.const 0) (i32.const 4))
+            (drop)
+
+            ;; Read the arguments with parameters:
+            ;; - the address at which to write the array of argument pointers
+            ;;   (i.e., one pointer per argument); here we overwrite the size
+            ;;   written at address 0
+            ;; - the address at which to write the buffer of argument strings
+            ;;   (pointed to by the items written to the first address); we
+            ;;   calculate where to start the buffer based on the size of the
+            ;;   pointer list (i.e., number of arguments * 4 bytes per pointer)
+            ;; Returns an error code.
+            (call $__wasi_args_get
+                (i32.const 0)
+                (i32.mul (i32.load (i32.const 0)) (i32.const 4)))
+            (drop)
+
+            ;; Continue looping until $i reaches $iters.
+            (local.set $i (i64.add (local.get $i) (i64.const 1)))
+            (br_if $cont (i64.lt_u (local.get $i) (local.get $iters)))
+        )
+        (local.get $i)
+    )
+    (memory (export "memory") 1)
+)
diff --git a/benches/wasi/read-environment.wat b/benches/wasi/read-environment.wat
new file mode 100644
index 000000000000..50f50b22751f
--- /dev/null
+++ b/benches/wasi/read-environment.wat
@@ -0,0 +1,45 @@
+(module
+    (import "wasi_snapshot_preview1" "environ_get"
+        (func $__wasi_environ_get (param i32 i32) (result i32)))
+    (import "wasi_snapshot_preview1" "environ_sizes_get"
+        (func $__wasi_environ_sizes_get (param i32 i32) (result i32)))
+    (func (export "run") (param $iters i64) (result i64)
+        (local $i i64)
+        (local.set $i (i64.const 0))
+        (loop $cont
+            ;; Read the current environment key-value pairs by:
+            ;;  1) retrieving the environment sizes and then
+            ;;  2) retrieving the environment data itself.
+
+            ;; Retrieve the sizes of the environment with parameters:
+            ;; - the address at which to write the number of environment
+            ;;   variables
+            ;; - the address at which to write the size of the environment
+            ;;   buffer
+            ;; Returns an error code.
+            (call $__wasi_environ_sizes_get (i32.const 0) (i32.const 4))
+            (drop)
+
+            ;; Read the environment with parameters:
+            ;; - the address at which to write the array of environment pointers
+            ;;   (i.e., one pointer per key-value pair); here we overwrite
+            ;;   the size written at address 0
+            ;; - the address at which to write the buffer of key-value pairs
+            ;;   (pointed to by the items written to the first address); we
+            ;;   calculate where to start the buffer based on the size of the
+            ;;   pointer list (i.e., number of key-value pairs * 4 bytes per
+            ;;   pointer)
+            ;; Returns an error code.
+            (call $__wasi_environ_get
+                (i32.const 0)
+                (i32.mul (i32.load (i32.const 0)) (i32.const 4)))
+            (drop)
+
+            ;; Continue looping until $i reaches $iters.
+            (local.set $i (i64.add (local.get $i) (i64.const 1)))
+            (br_if $cont (i64.lt_u (local.get $i) (local.get $iters)))
+        )
+        (local.get $i)
+    )
+    (memory (export "memory") 1)
+)

From fd26d9571f598658bb43c9f8efb9d4aed7fd3959 Mon Sep 17 00:00:00 2001
From: Andrew Brown <andrew.brown@intel.com>
Date: Tue, 15 Nov 2022 14:22:23 -0800
Subject: [PATCH 2/3] typo: fix documentation misspelling

---
 benches/thread_eager_init.rs | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/benches/thread_eager_init.rs b/benches/thread_eager_init.rs
index 9f6e4c5b9e53..c87cca9e5d15 100644
--- a/benches/thread_eager_init.rs
+++ b/benches/thread_eager_init.rs
@@ -4,7 +4,7 @@ use std::time::{Duration, Instant};
 use wasmtime::*;
 
 fn measure_execution_time(c: &mut Criterion) {
-    // Baseline performance: a single measurment covers both initializing
+    // Baseline performance: a single measurement covers both initializing
     // thread local resources and executing the first call.
     //
     // The other two bench functions should sum to this duration.

From b66730873823b8b9d6bae06cd2068c32a6cd9266 Mon Sep 17 00:00:00 2001
From: Andrew Brown <andrew.brown@intel.com>
Date: Tue, 15 Nov 2022 15:40:42 -0800
Subject: [PATCH 3/3] review: fix doc typo, remove unnecessary parameters

---
 benches/wasi.rs | 6 ++----
 1 file changed, 2 insertions(+), 4 deletions(-)

diff --git a/benches/wasi.rs b/benches/wasi.rs
index 3af42b7b1f01..3b8f28863abd 100644
--- a/benches/wasi.rs
+++ b/benches/wasi.rs
@@ -18,7 +18,7 @@ fn bench_wasi(c: &mut Criterion) {
             let bench_name = format!("wasi/{}", path.file_name().unwrap().to_string_lossy());
             // To avoid overhead, the module itself must iterate the expected
             // number of times in a specially-crafted `run` function (see
-            // `instantiate` for deta\ils).
+            // `instantiate` for details).
             c.bench_function(&bench_name, move |b| {
                 b.iter_custom(|iters| {
                     let start = Instant::now();
@@ -46,9 +46,7 @@ fn instantiate(wat: &[u8]) -> (Store<WasiCtx>, TypedFunc<u64, u64>) {
     let mut linker = Linker::new(&engine);
     wasmtime_wasi::add_to_linker(&mut linker, |cx| cx).unwrap();
     let instance = linker.instantiate(&mut store, &module).unwrap();
-    let run = instance
-        .get_typed_func::<_, _, _>(&mut store, "run")
-        .unwrap();
+    let run = instance.get_typed_func(&mut store, "run").unwrap();
     (store, run)
 }