Merge pull request #3 from crytic/fix/missing-methods

fix: weight methods correctly to avoid skipping some
crytic · Jul 26, 2024 · fc0e2d7 · fc0e2d7
2 parents 924247c + d1d6344
commit fc0e2d7
Show file tree

Hide file tree

Showing 6 changed files with 247 additions and 17 deletions.
diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
@@ -87,7 +87,6 @@ jobs:
           inputs: ./medusa-*.tar.gz
 
       - name: Upload artifact
-        if: github.ref == 'refs/heads/master' || (github.event_name == 'push' && startsWith(github.ref, 'refs/tags/'))
         uses: actions/upload-artifact@v4
         with:
           name: medusa-${{ runner.os }}-${{ runner.arch }}

diff --git a/DEV.md b/DEV.md
@@ -0,0 +1,104 @@
+# Debugging and Development
+
+## Debugging
+
+The following scripts are available for Medusa developers for debugging changes to the fuzzer.
+
+### Corpus diff
+
+The corpus diff script is used to compare two corpora and identify the methods that are present in one but not the other. This is useful for identifying methods that are missing from a corpus that should be present.
+
+```shell
+python3 scripts/corpus_diff.py corpus1 corpus2
+```
+
+```shell
+Methods only in ~/corpus1:
+-  clampSplitWeight(uint32,uint32)
+
+Methods only in ~/corpus2:
+  <None>
+```
+
+### Corpus stats
+
+The corpus stats script is used to generate statistics about a corpus. This includes the number of sequences, the average length of sequences, and the frequency of methods called.
+
+```shell
+python3 scripts/corpus_stats.py corpus
+```
+
+```shell
+Number of Sequences in ~/corpus: 130
+
+Average Length of Transactions List: 43
+
+Frequency of Methods Called:
+-  testReceiversReceivedSplit(uint8): 280
+-  setMaxEndHints(uint32,uint32): 174
+-  setStreamBalanceWithdrawAll(uint8): 139
+-  giveClampedAmount(uint8,uint8,uint128): 136
+-  receiveStreamsSplitAndCollectToSelf(uint8): 133
+-  testSqueezeViewVsActual(uint8,uint8): 128
+-  testSqueeze(uint8,uint8): 128
+-  testSetStreamBalance(uint8,int128): 128
+-  addStreamWithClamping(uint8,uint8,uint160,uint32,uint32,int128): 125
+-  removeAllSplits(uint8): 118
+-  testSplittableAfterSplit(uint8): 113
+-  testSqueezableVsReceived(uint8): 111
+-  testBalanceAtInFuture(uint8,uint8,uint160): 108
+-  testRemoveStreamShouldNotRevert(uint8,uint256): 103
+-  invariantWithdrawAllTokensShouldNotRevert(): 103
+-  collect(uint8,uint8): 101
+-  invariantAmtPerSecVsMinAmtPerSec(uint8,uint256): 98
+-  testSqueezableAmountCantBeWithdrawn(uint8,uint8): 97
+-  split(uint8): 97
+-  invariantWithdrawAllTokens(): 95
+-  testReceiveStreams(uint8,uint32): 93
+-  invariantAccountingVsTokenBalance(): 92
+-  testSqueezeWithFuzzedHistoryShouldNotRevert(uint8,uint8,uint256,bytes32): 91
+-  testSqueezableAmountCantBeUndone(uint8,uint8,uint160,uint32,uint32,int128): 87
+-  testCollect(uint8,uint8): 86
+-  testSetStreamBalanceWithdrawAllShouldNotRevert(uint8): 86
+-  testAddStreamShouldNotRevert(uint8,uint8,uint160,uint32,uint32,int128): 85
+-  testReceiveStreamsShouldNotRevert(uint8): 84
+-  addSplitsReceiver(uint8,uint8,uint32): 84
+-  setStreamBalanceWithClamping(uint8,int128): 82
+-  addSplitsReceiverWithClamping(uint8,uint8,uint32): 80
+-  testSetStreamBalanceShouldNotRevert(uint8,int128): 80
+-  testSplitShouldNotRevert(uint8): 80
+-  squeezeAllAndReceiveAndSplitAndCollectToSelf(uint8): 79
+-  addStreamImmediatelySqueezable(uint8,uint8,uint160): 79
+-  testSetSplitsShouldNotRevert(uint8,uint8,uint32): 78
+-  invariantSumAmtDeltaIsZero(uint8): 78
+-  testReceiveStreamsViewConsistency(uint8,uint32): 76
+-  squeezeToSelf(uint8): 74
+-  collectToSelf(uint8): 72
+-  setStreams(uint8,uint8,uint160,uint32,uint32,int128): 70
+-  receiveStreamsAllCycles(uint8): 69
+-  invariantWithdrawShouldAlwaysFail(uint256): 68
+-  addStream(uint8,uint8,uint160,uint32,uint32,int128): 68
+-  squeezeWithFuzzedHistory(uint8,uint8,uint256,bytes32): 67
+-  setStreamsWithClamping(uint8,uint8,uint160,uint32,uint32,int128): 67
+-  splitAndCollectToSelf(uint8): 67
+-  testSqueezeWithFullyHashedHistory(uint8,uint8): 65
+-  give(uint8,uint8,uint128): 65
+-  setSplits(uint8,uint8,uint32): 65
+-  testSqueezeTwice(uint8,uint8,uint256,bytes32): 65
+-  testSetStreamsShouldNotRevert(uint8,uint8,uint160,uint32,uint32,int128): 64
+-  squeezeAllSenders(uint8): 63
+-  removeStream(uint8,uint256): 62
+-  testCollectableAfterSplit(uint8): 58
+-  testCollectShouldNotRevert(uint8,uint8): 56
+-  testReceiveStreamsViewVsActual(uint8,uint32): 55
+-  receiveStreams(uint8,uint32): 55
+-  setSplitsWithClamping(uint8,uint8,uint32): 55
+-  testGiveShouldNotRevert(uint8,uint8,uint128): 47
+-  setStreamBalance(uint8,int128): 47
+-  squeezeWithDefaultHistory(uint8,uint8): 45
+-  testSplitViewVsActual(uint8): 45
+-  testAddSplitsShouldNotRevert(uint8,uint8,uint32): 30
+-  testSqueezeWithDefaultHistoryShouldNotRevert(uint8,uint8): 23
+
+Number of Unique Methods: 65
+```
diff --git a/fuzzing/fuzzer_worker.go b/fuzzing/fuzzer_worker.go
@@ -11,7 +11,6 @@ import (
 	"github.com/crytic/medusa/fuzzing/coverage"
 	"github.com/crytic/medusa/fuzzing/valuegeneration"
 	"github.com/crytic/medusa/utils"
-	"github.com/crytic/medusa/utils/randomutils"
 	"github.com/ethereum/go-ethereum/common"
 	"golang.org/x/exp/maps"
 )
@@ -44,9 +43,6 @@ type FuzzerWorker struct {
 	// pureMethods is a list of contract functions which are side-effect free with respect to the EVM (view and/or pure in terms of Solidity mutability).
 	pureMethods []fuzzerTypes.DeployedContractMethod
 
-	// methodChooser uses a weighted selection algorithm to choose a method to call, prioritizing state changing methods over pure ones.
-	methodChooser *randomutils.WeightedRandomChooser[fuzzerTypes.DeployedContractMethod]
-
 	// randomProvider provides random data as inputs to decisions throughout the worker.
 	randomProvider *rand.Rand
 	// sequenceGenerator creates entirely new or mutated call sequences based on corpus call sequences, for use in
@@ -94,7 +90,6 @@ func newFuzzerWorker(fuzzer *Fuzzer, workerIndex int, randomProvider *rand.Rand)
 		coverageTracer:       nil,
 		randomProvider:       randomProvider,
 		valueSet:             valueSet,
-		methodChooser:        randomutils.NewWeightedRandomChooser[fuzzerTypes.DeployedContractMethod](),
 	}
 	worker.sequenceGenerator = NewCallSequenceGenerator(worker, callSequenceGenConfig)
 	worker.shrinkingValueMutator = shrinkingValueMutator
@@ -242,13 +237,13 @@ func (fw *FuzzerWorker) updateMethods() {
 		// If we deployed the contract, also enumerate property tests and state changing methods.
 		for _, method := range contractDefinition.AssertionTestMethods {
 			// Any non-constant method should be tracked as a state changing method.
-			// We favor calling state changing methods over view/pure methods.
 			if method.IsConstant() {
-				fw.pureMethods = append(fw.pureMethods, fuzzerTypes.DeployedContractMethod{Address: contractAddress, Contract: contractDefinition, Method: method})
-				fw.methodChooser.AddChoices(randomutils.NewWeightedRandomChoice(fuzzerTypes.DeployedContractMethod{Address: contractAddress, Contract: contractDefinition, Method: method}, big.NewInt(1)))
+				// Only track the pure/view method if testing view methods is enabled
+				if fw.fuzzer.config.Fuzzing.Testing.AssertionTesting.TestViewMethods {
+					fw.pureMethods = append(fw.pureMethods, fuzzerTypes.DeployedContractMethod{Address: contractAddress, Contract: contractDefinition, Method: method})
+				}
 			} else {
 				fw.stateChangingMethods = append(fw.stateChangingMethods, fuzzerTypes.DeployedContractMethod{Address: contractAddress, Contract: contractDefinition, Method: method})
-				fw.methodChooser.AddChoices(randomutils.NewWeightedRandomChoice(fuzzerTypes.DeployedContractMethod{Address: contractAddress, Contract: contractDefinition, Method: method}, big.NewInt(100)))
 			}
 		}
 	}

diff --git a/fuzzing/fuzzer_worker_sequence_generator.go b/fuzzing/fuzzer_worker_sequence_generator.go
@@ -5,6 +5,7 @@ import (
 	"math/big"
 
 	"github.com/crytic/medusa/fuzzing/calls"
+	"github.com/crytic/medusa/fuzzing/contracts"
 	"github.com/crytic/medusa/fuzzing/valuegeneration"
 	"github.com/crytic/medusa/utils"
 	"github.com/crytic/medusa/utils/randomutils"
@@ -274,16 +275,27 @@ func (g *CallSequenceGenerator) PopSequenceElement() (*calls.CallSequenceElement
 // deployed to the CallSequenceGenerator's parent FuzzerWorker chain, with fuzzed call data.
 // Returns the call sequence element, or an error if one was encountered.
 func (g *CallSequenceGenerator) generateNewElement() (*calls.CallSequenceElement, error) {
-	// Verify we have state changing methods to call if we are not testing view/pure methods.
-	if len(g.worker.stateChangingMethods) == 0 && !g.worker.fuzzer.config.Fuzzing.Testing.AssertionTesting.TestViewMethods {
-		return nil, fmt.Errorf("cannot generate fuzzed tx as there are no state changing methods to call")
+	// Check to make sure that we have any functions to call
+	if len(g.worker.stateChangingMethods) == 0 && len(g.worker.pureMethods) == 0 {
+		return nil, fmt.Errorf("cannot generate fuzzed call as there are no methods to call")
 	}
-	// Select a random method and sender
-	selectedMethod, err := g.worker.methodChooser.Choose()
-	if err != nil {
-		return nil, err
+
+	// Only call view functions if there are no state-changing methods
+	var callOnlyPureFunctions bool
+	if len(g.worker.stateChangingMethods) == 0 && len(g.worker.pureMethods) > 0 {
+		callOnlyPureFunctions = true
+	}
+
+	// Select a random method
+	// There is a 1/100 chance that a pure method will be invoked or if there are only pure functions that are callable
+	var selectedMethod *contracts.DeployedContractMethod
+	if (len(g.worker.pureMethods) > 0 && g.worker.randomProvider.Intn(100) == 0) || callOnlyPureFunctions {
+		selectedMethod = &g.worker.pureMethods[g.worker.randomProvider.Intn(len(g.worker.pureMethods))]
+	} else {
+		selectedMethod = &g.worker.stateChangingMethods[g.worker.randomProvider.Intn(len(g.worker.stateChangingMethods))]
 	}
 
+	// Select a random sender
 	selectedSender := g.worker.fuzzer.senders[g.worker.randomProvider.Intn(len(g.worker.fuzzer.senders))]
 
 	// Generate fuzzed parameters for the function call

diff --git a/scripts/corpus_diff.py b/scripts/corpus_diff.py
@@ -0,0 +1,63 @@
+import os
+import json
+import sys 
+
+def load_json_files_from_subdirectory(subdirectory):
+    json_data = []
+    for root, _, files in os.walk(subdirectory):
+        for file in files:
+            if file.endswith('.json'):
+                with open(os.path.join(root, file), 'r') as f:
+                    data = json.load(f)
+                    json_data.extend(data)
+    return json_data
+
+def extract_unique_methods(transactions):
+    unique_methods = set()
+    for tx in transactions:
+        call_data = tx.get('call', {})
+        data_abi_values = call_data.get('dataAbiValues', {})
+        method_signature = data_abi_values.get('methodSignature', '')
+        if method_signature:
+            unique_methods.add(method_signature)
+    return unique_methods
+
+def compare_methods(subdirectory1, subdirectory2):
+    transactions1 = load_json_files_from_subdirectory(subdirectory1)
+    transactions2 = load_json_files_from_subdirectory(subdirectory2)
+
+    unique_methods1 = extract_unique_methods(transactions1)
+    unique_methods2 = extract_unique_methods(transactions2)
+
+    only_in_subdir1 = unique_methods1 - unique_methods2
+    only_in_subdir2 = unique_methods2 - unique_methods1
+
+    return only_in_subdir1, only_in_subdir2
+
+def main(subdirectory1, subdirectory2):
+
+    only_in_subdir1, only_in_subdir2 = compare_methods(subdirectory1, subdirectory2)
+
+    print(f"Methods only in {subdirectory1}:")
+    if len(only_in_subdir1) == 0:
+        print("  <None>")
+    else:
+        for method in only_in_subdir1:
+            print(f"-  {method}")
+    print("\n")
+
+
+    print(f"Methods only in {subdirectory2}:")
+    if len(only_in_subdir2) == 0:
+        print("  <None>")
+    else:
+        for method in only_in_subdir2:
+            print(f"-  {method}")
+    print("\n")
+
+if __name__ == '__main__':
+    if len(sys.argv) != 3:
+        print("Usage: python3 unique.py <corpus1> <corpus2>")
+        print("Compares the unique methods in the two given corpora.")
+        sys.exit(1)
+    main(sys.argv[1], sys.argv[2])
diff --git a/scripts/corpus_stats.py b/scripts/corpus_stats.py
@@ -0,0 +1,57 @@
+import os
+import json
+from collections import Counter
+import sys
+
+def load_json_files_from_subdirectory(subdirectory):
+    json_data = []
+    for root, _, files in os.walk(subdirectory):
+        for file in files:
+            if file.endswith('.json'):
+                with open(os.path.join(root, file), 'r') as f:
+                    data = json.load(f)
+                    json_data.append(data)
+    return json_data
+
+
+def analyze_transactions(transactions, method_counter):
+
+    for tx in transactions:
+        call_data = tx.get('call', {})
+        data_abi_values = call_data.get('dataAbiValues', {})
+        method_signature = data_abi_values.get('methodSignature', '')
+
+        method_counter[method_signature] += 1
+
+
+
+def main(subdirectory):
+    transaction_seqs = load_json_files_from_subdirectory(subdirectory)
+
+    method_counter = Counter()
+    total_length = 0
+
+    for seq in transaction_seqs:
+        analyze_transactions(seq, method_counter)
+        total_length += len(seq)
+
+    average_length = total_length // len(transaction_seqs)
+
+    print(f"Number of Sequences in {subdirectory}: {len(transaction_seqs)}")
+    print("\n")
+
+    print(f"Average Length of Transactions List: {average_length}")
+    print("\n")
+    print("Frequency of Methods Called:")
+    for method, count in method_counter.most_common():
+        print(f"-  {method}: {count}")
+    print("\n")
+    print(f"Number of Unique Methods: {len(method_counter)}")
+    print("\n")
+
+if __name__ == '__main__':
+    if len(sys.argv) != 2:
+        print("Usage: python3 corpus_stats.py <corpus>")
+        print("Computes statistics on the transactions in the given corpus.")
+        sys.exit(1)
+    main(sys.argv[1])