Merge pull request #1128 from WebFuzzing/refactor-random-walk

Collecting data from random walk algorithm
WebFuzzing · Nov 13, 2024 · d3f9cef · d3f9cef
2 parents b34051b + 16823a9
commit d3f9cef
Show file tree

Hide file tree

Showing 6 changed files with 133 additions and 6 deletions.
diff --git a/core/src/main/kotlin/org/evomaster/core/EMConfig.kt b/core/src/main/kotlin/org/evomaster/core/EMConfig.kt
@@ -1397,9 +1397,26 @@ class EMConfig {
         /**
          * save covered targets with the specified target format and tests with the specified test format
          */
-        TARGET_TEST_IND
+        TARGET_TEST_IND,
+        /**
+         * save heuristic values for each target as csv file
+         */
+        TARGET_HEURISTIC
     }
 
+    @Experimental
+    @Cfg("Where the target heuristic values file (if any) is going to be written (in CSV format). It is only used when processFormat is TARGET_HEURISTIC.")
+    @FilePath
+    var targetHeuristicsFile = "targets.csv"
+
+    @Experimental
+    @Cfg("Whether should add to an existing target heuristics file, instead of replacing it. It is only used when processFormat is TARGET_HEURISTIC.")
+    var appendToTargetHeuristicsFile = false
+
+    @Experimental
+    @Cfg("Prefix specifying which targets to record. Each target can be separated by a comma, such as 'Branch,Line,Success, etc'. It is only used when processFormat is TARGET_HEURISTIC.")
+    var saveTargetHeuristicsPrefixes = "Branch"
+
     @Debug
     @Cfg("Specify a folder to save results when a search monitor is enabled")
     @Folder

diff --git a/core/src/main/kotlin/org/evomaster/core/search/algorithms/RandomWalkAlgorithm.kt b/core/src/main/kotlin/org/evomaster/core/search/algorithms/RandomWalkAlgorithm.kt
@@ -35,7 +35,6 @@ class RandomWalkAlgorithm<T> : SearchAlgorithm<T>() where T : Individual {
 
             Lazy.assert { individual.isInitialized() && individual.searchGlobalState != null }
 
-            //TODO here will be refactored to call method to collect full target info
             ff.calculateCoverage(individual, modifiedSpec = null)?.run {
                 archive.addIfNeeded(this)
                 latestEvaluatedIndividual = this
@@ -44,8 +43,10 @@ class RandomWalkAlgorithm<T> : SearchAlgorithm<T>() where T : Individual {
             return
         }
 
-        //TODO here will be refactored to call method to collect full target info
-        getMutatator().mutateAndSave(latestEvaluatedIndividual!!, archive).run {
+        val mutatedIndividual = getMutatator().mutate(latestEvaluatedIndividual as EvaluatedIndividual<T>)
+
+        ff.calculateCoverage(mutatedIndividual)?.run {
+            archive.addIfNeeded(this)
             latestEvaluatedIndividual = this
         }
 

diff --git a/core/src/main/kotlin/org/evomaster/core/search/service/FitnessFunction.kt b/core/src/main/kotlin/org/evomaster/core/search/service/FitnessFunction.kt
@@ -138,12 +138,15 @@ abstract class FitnessFunction<T>  where T : Individual {
         actionsSize: Int
     ) : EvaluatedIndividual<T>?{
 
+        // By default, we optimize for performance in collecting coverage values, but for special cases, we want to collect full info
+        val allTargetsWithDescriptive = config.processFormat == EMConfig.ProcessDataFormat.TARGET_HEURISTIC
+
         val ei = SearchTimeController.measureTimeMillis(
                 { t, ind ->
                     time.reportExecutedIndividualTime(t, actionsSize)
                     ind?.executionTimeMs = t
                 },
-                {doCalculateCoverage(individual, targets, allTargets = false, fullyCovered = false, descriptiveIds = false)}
+                {doCalculateCoverage(individual, targets, allTargets = allTargetsWithDescriptive, fullyCovered = false, descriptiveIds = allTargetsWithDescriptive)}
         )
         // plugin execution info reporter here, to avoid the time spent by execution reporter
         handleExecutionInfo(ei)

diff --git a/core/src/main/kotlin/org/evomaster/core/search/service/monitor/SearchProcessMonitor.kt b/core/src/main/kotlin/org/evomaster/core/search/service/monitor/SearchProcessMonitor.kt
@@ -151,6 +151,9 @@ class SearchProcessMonitor: SearchListener {
                     EMConfig.ProcessDataFormat.TEST_IND , EMConfig.ProcessDataFormat.TARGET_TEST_IND->{
                         saveStepAsTest(index = time.evaluatedIndividuals,evalInd = evalInd, doesIncludeTarget = config.processFormat == EMConfig.ProcessDataFormat.TARGET_TEST_IND)
                     }
+                    EMConfig.ProcessDataFormat.TARGET_HEURISTIC->{
+                        saveStepAsTargetHeuristic(index = time.evaluatedIndividuals, evalInd = evalInd)
+                    }
                 }
                 if(config.processInterval > 0.0) tb++
             }
@@ -197,6 +200,8 @@ class SearchProcessMonitor: SearchListener {
 
     fun getStepAsPath(index: Int, isTargetFile: Boolean=false) = "${getStepDirAsPath()}${File.separator}${getProcessFileName(getStepName(index, isTargetFile), isTargetFile)}"
 
+    fun getTargetStepAsPath() = Paths.get(config.targetHeuristicsFile).toAbsolutePath()
+
     fun getStepDirAsPath() = "${config.processFiles}${File.separator}$DATA_FOLDER"
 
     private fun saveStep(index:Int, v : StepOfSearchProcess<*>){
@@ -229,12 +234,35 @@ class SearchProcessMonitor: SearchListener {
         }
     }
 
+    private fun <T:Individual> saveStepAsTargetHeuristic(index: Int, evalInd: EvaluatedIndividual<T>) {
+
+        if(!config.appendToTargetHeuristicsFile && index == 1){
+            Files.deleteIfExists(getTargetStepAsPath())
+        }
+
+        if(!Files.exists(getTargetStepAsPath()) && index == 1){
+            writeByChannel(getTargetStepAsPath(),
+                "problem,algorithm,seed,step,branch_identifier,fitness\n")
+        }
+        val saveToPrefix = config.saveTargetHeuristicsPrefixes.split(",").map { it.trim() }
+
+        evalInd.fitness.getViewOfData().forEach({
+            if(saveToPrefix.any { prefix -> idMapper.getDescriptiveId(it.key).startsWith(prefix) }){
+                val row = "${config.statisticsColumnId},${config.algorithm},${config.seed},$index,${idMapper.getDescriptiveId(it.key)},${it.value.score}\n"
+                writeByChannel(
+                    getTargetStepAsPath(),
+                    row, doAppend = true)
+            }
+        })
+    }
+
    private fun getStepName(value: Int, isTargetFile: Boolean): String {
        val num = String.format("%0${config.maxEvaluations.toString().length}d", value)
        return when(config.processFormat){
            EMConfig.ProcessDataFormat.JSON_ALL -> "EM_${num}Json"
            EMConfig.ProcessDataFormat.TEST_IND-> "EM_${num}Test"
            EMConfig.ProcessDataFormat.TARGET_TEST_IND-> "EM_${num}${if (isTargetFile) "Target" else "Test"}"
+           else -> throw IllegalStateException("Unsupported process format")
        }
    }
 
@@ -249,6 +277,7 @@ class SearchProcessMonitor: SearchListener {
             if (isTargetFile) "${name}.txt"
             else TestSuiteFileName(name).getAsPath(config.outputFormat)
         }
+        else -> throw IllegalStateException("Unsupported process format")
     }
     private fun getGsonBuilder() : Gson? {
         if (config.enableProcessMonitor && config.processFormat == EMConfig.ProcessDataFormat.JSON_ALL)

diff --git a/core/src/test/kotlin/org/evomaster/core/search/service/ProcessMonitorTest.kt b/core/src/test/kotlin/org/evomaster/core/search/service/ProcessMonitorTest.kt
@@ -18,6 +18,9 @@ import org.evomaster.core.search.algorithms.onemax.OneMaxSampler
 import org.evomaster.core.search.service.monitor.SearchOverall
 import org.evomaster.core.search.service.monitor.SearchProcessMonitor
 import org.evomaster.core.search.service.monitor.StepOfSearchProcess
+import org.hamcrest.CoreMatchers.containsString
+import org.hamcrest.CoreMatchers.not
+import org.hamcrest.MatcherAssert.assertThat
 import org.junit.jupiter.api.Assertions.*
 import org.junit.jupiter.api.BeforeEach
 import org.junit.jupiter.api.Test
@@ -36,6 +39,7 @@ class ProcessMonitorTest{
     private lateinit var sampler: OneMaxSampler
     private lateinit var mio: MioAlgorithm<OneMaxIndividual>
     private lateinit var epc: ExecutionPhaseController
+    private lateinit var idMapper: IdMapper
 
     @BeforeEach
     fun init(){
@@ -53,6 +57,8 @@ class ProcessMonitorTest{
         mio = injector.getInstance(Key.get(
                 object : TypeLiteral<MioAlgorithm<OneMaxIndividual>>() {}))
         ff =  injector.getInstance(OneMaxFitness::class.java)
+        idMapper = injector.getInstance(IdMapper::class.java)
+
         config = injector.getInstance(EMConfig::class.java)
         config.stoppingCriterion = EMConfig.StoppingCriterion.ACTION_EVALUATIONS
         config.processFormat = EMConfig.ProcessDataFormat.JSON_ALL
@@ -251,4 +257,72 @@ class ProcessMonitorTest{
         assert(Files.exists(Paths.get(processMonitor.getStepDirAsPath())))
         assert(Files.exists(Paths.get(processMonitor.getStepAsPath(1))))
     }
+
+    @Test
+    fun testTargetHeuristicCollect(){
+        config.enableProcessMonitor = true
+        config.processFormat = EMConfig.ProcessDataFormat.TARGET_HEURISTIC
+        config.targetHeuristicsFile = "target/target_heuristics.csv"
+        config.appendToTargetHeuristicsFile = false
+        config.saveTargetHeuristicsPrefixes = "Branch"
+
+        processMonitor.postConstruct()
+
+        val a = OneMaxIndividual(2)
+        TestUtils.doInitializeIndividualForTesting(a, randomness)
+
+        idMapper.addMapping(0, "Branch_1")
+        idMapper.addMapping(1, "Line_1")
+
+        val evalA = ff.calculateCoverage(a, modifiedSpec = null)!!
+        processMonitor.eval = evalA
+        processMonitor.newActionEvaluated()
+
+        val addedA = archive.addIfNeeded(evalA)
+
+        assert(addedA)
+        assertTrue(Files.exists(Paths.get(config.targetHeuristicsFile)))
+
+        val targetData = String(Files.readAllBytes(Paths.get(config.targetHeuristicsFile)))
+
+        assertThat(targetData, containsString("Branch"))
+        assertThat(targetData, not(containsString("Line")))
+
+        // 1 header + filtered number of objectives + 1 empty line
+        assertEquals(3, targetData.lines().size)
+    }
+
+    @Test
+    fun testTargetHeuristicCollectBranchLine(){
+        config.enableProcessMonitor = true
+        config.processFormat = EMConfig.ProcessDataFormat.TARGET_HEURISTIC
+        config.targetHeuristicsFile = "target/target_heuristics.csv"
+        config.appendToTargetHeuristicsFile = false
+        config.saveTargetHeuristicsPrefixes = "Branch,Line"
+
+        processMonitor.postConstruct()
+
+        val a = OneMaxIndividual(2)
+        TestUtils.doInitializeIndividualForTesting(a, randomness)
+
+        idMapper.addMapping(0, "Branch_1")
+        idMapper.addMapping(1, "Line_1")
+
+        val evalA = ff.calculateCoverage(a, modifiedSpec = null)!!
+        processMonitor.eval = evalA
+        processMonitor.newActionEvaluated()
+
+        val addedA = archive.addIfNeeded(evalA)
+
+        assert(addedA)
+        assertTrue(Files.exists(Paths.get(config.targetHeuristicsFile)))
+
+        val targetData = String(Files.readAllBytes(Paths.get(config.targetHeuristicsFile)))
+
+        assertThat(targetData, containsString("Branch"))
+        assertThat(targetData, containsString("Line"))
+
+        // 1 header + filtered number of objectives + 1 empty line
+        assertEquals(4, targetData.lines().size)
+    }
 }
diff --git a/docs/options.md b/docs/options.md
@@ -167,7 +167,7 @@ There are 3 types of options:
 |`probUseRestLinks`| __Double__. In REST, enable the supports of 'links' between resources defined in the OpenAPI schema, if any. When sampling a test case, if the last call has links, given this probability new calls are added for the link. *Constraints*: `probability 0.0-1.0`. *Default value*: `0.5`.|
 |`problemType`| __Enum__. The type of SUT we want to generate tests for, e.g., a RESTful API. If left to DEFAULT, the type will be inferred from the EM Driver. However, in case of ambiguities (e.g., the driver specifies more than one type), then this field must be set with a specific type. This is also the case for Black-Box testing where there is no EM Driver. In this latter case, the system defaults to handle REST APIs. *Valid values*: `DEFAULT, REST, GRAPHQL`. *Experimental values*: `RPC, WEBFRONTEND`. *Default value*: `DEFAULT`.|
 |`processFiles`| __String__. Specify a folder to save results when a search monitor is enabled. *DEBUG option*. *Default value*: `process_data`.|
-|`processFormat`| __Enum__. Specify a format to save the process data. *DEBUG option*. *Valid values*: `JSON_ALL, TEST_IND, TARGET_TEST_IND`. *Default value*: `JSON_ALL`.|
+|`processFormat`| __Enum__. Specify a format to save the process data. *DEBUG option*. *Valid values*: `JSON_ALL, TEST_IND, TARGET_TEST_IND, TARGET_HEURISTIC`. *Default value*: `JSON_ALL`.|
 |`processInterval`| __Double__. Specify how often to save results when a search monitor is enabled, and 0.0 presents to record all evaluated individual. *DEBUG option*. *Constraints*: `min=0.0, max=50.0`. *Default value*: `0.0`.|
 |`recordExceededTargets`| __Boolean__. Whether to record targets when the number is more than 100. *DEBUG option*. *Default value*: `false`.|
 |`recordExecutedMainActionInfo`| __Boolean__. Whether to record info of executed actions during search. *DEBUG option*. *Default value*: `false`.|
@@ -219,6 +219,7 @@ There are 3 types of options:
 |Options|Description|
 |---|---|
 |`abstractInitializationGeneToMutate`| __Boolean__. During mutation, whether to abstract genes for repeated SQL actions. *Default value*: `false`.|
+|`appendToTargetHeuristicsFile`| __Boolean__. Whether should add to an existing target heuristics file, instead of replacing it. It is only used when processFormat is TARGET_HEURISTIC. *Default value*: `false`.|
 |`bbProbabilityUseDataPool`| __Double__. Specify the probability of using the data pool when sampling test cases. This is for black-box (bb) mode. *Constraints*: `probability 0.0-1.0`. *Default value*: `0.8`.|
 |`discoveredInfoRewardedInFitness`| __Boolean__. If there is new discovered information from a test execution, reward it in the fitness function. *Default value*: `false`.|
 |`dpcTargetTestSize`| __Int__. Specify a max size of a test to be targeted when either DPC_INCREASING or DPC_DECREASING is enabled. *Default value*: `1`.|
@@ -255,13 +256,15 @@ There are 3 types of options:
 |`probOfSmartInitStructureMutator`| __Double__. Specify a probability of applying a smart structure mutator for initialization of the individual. *Constraints*: `probability 0.0-1.0`. *Default value*: `0.0`.|
 |`probabilityAllOptionalsAreOnOrOff`| __Double__. When sampling a new individual, probability that ALL optional choices are ON, or ALL are OFF. The choice between ON and OFF depends on probabilityOfOnVsOffInAllOptionals. *Constraints*: `probability 0.0-1.0`. *Default value*: `0.0`.|
 |`saveMockedResponseAsSeparatedFile`| __Boolean__. Whether to save mocked responses as separated files. *Default value*: `false`.|
+|`saveTargetHeuristicsPrefixes`| __String__. Prefix specifying which targets to record. Each target can be separated by a comma, such as 'Branch,Line,Success, etc'. It is only used when processFormat is TARGET_HEURISTIC. *Default value*: `Branch`.|
 |`security`| __Boolean__. Apply a security testing phase after functional test cases have been generated. *Default value*: `false`.|
 |`seedTestCases`| __Boolean__. Whether to seed EvoMaster with some initial test cases. These test cases will be used and evolved throughout the search process. *Default value*: `false`.|
 |`seedTestCasesFormat`| __Enum__. Format of the test cases seeded to EvoMaster. *Valid values*: `POSTMAN`. *Default value*: `POSTMAN`.|
 |`seedTestCasesPath`| __String__. File path where the seeded test cases are located. *Default value*: `postman.postman_collection.json`.|
 |`structureMutationProFS`| __Double__. Specify a probability of applying structure mutator during the focused search. *Constraints*: `probability 0.0-1.0`. *Default value*: `0.0`.|
 |`structureMutationProbStrategy`| __Enum__. Specify a strategy to handle a probability of applying structure mutator during the focused search. *Valid values*: `SPECIFIED, SPECIFIED_FS, DPC_TO_SPECIFIED_BEFORE_FS, DPC_TO_SPECIFIED_AFTER_FS, ADAPTIVE_WITH_IMPACT`. *Default value*: `SPECIFIED`.|
 |`taintForceSelectionOfGenesWithSpecialization`| __Boolean__. During mutation, force the mutation of genes that have newly discovered specialization from previous fitness evaluations, based on taint analysis. *Default value*: `false`.|
+|`targetHeuristicsFile`| __String__. Where the target heuristic values file (if any) is going to be written (in CSV format). It is only used when processFormat is TARGET_HEURISTIC. *Default value*: `targets.csv`.|
 |`testResourcePathToSaveMockedResponse`| __String__. Specify test resource path where to save mocked responses as separated files. *Default value*: `""`.|
 |`thresholdDistanceForDataPool`| __Int__. Threshold of Levenshtein Distance for key-matching in Data Pool. *Constraints*: `min=0.0`. *Default value*: `2`.|
 |`useGlobalTaintInfoProbability`| __Double__. When sampling new individual, check whether to use already existing info on tainted values. *Constraints*: `probability 0.0-1.0`. *Default value*: `0.0`.|