Skip to content

Commit

Permalink
Merge pull request #1128 from WebFuzzing/refactor-random-walk
Browse files Browse the repository at this point in the history
Collecting data from random walk algorithm
  • Loading branch information
arcuri82 authored Nov 13, 2024
2 parents b34051b + 16823a9 commit d3f9cef
Show file tree
Hide file tree
Showing 6 changed files with 133 additions and 6 deletions.
19 changes: 18 additions & 1 deletion core/src/main/kotlin/org/evomaster/core/EMConfig.kt
Original file line number Diff line number Diff line change
Expand Up @@ -1397,9 +1397,26 @@ class EMConfig {
/**
* save covered targets with the specified target format and tests with the specified test format
*/
TARGET_TEST_IND
TARGET_TEST_IND,
/**
* save heuristic values for each target as csv file
*/
TARGET_HEURISTIC
}

@Experimental
@Cfg("Where the target heuristic values file (if any) is going to be written (in CSV format). It is only used when processFormat is TARGET_HEURISTIC.")
@FilePath
var targetHeuristicsFile = "targets.csv"

@Experimental
@Cfg("Whether should add to an existing target heuristics file, instead of replacing it. It is only used when processFormat is TARGET_HEURISTIC.")
var appendToTargetHeuristicsFile = false

@Experimental
@Cfg("Prefix specifying which targets to record. Each target can be separated by a comma, such as 'Branch,Line,Success, etc'. It is only used when processFormat is TARGET_HEURISTIC.")
var saveTargetHeuristicsPrefixes = "Branch"

@Debug
@Cfg("Specify a folder to save results when a search monitor is enabled")
@Folder
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,6 @@ class RandomWalkAlgorithm<T> : SearchAlgorithm<T>() where T : Individual {

Lazy.assert { individual.isInitialized() && individual.searchGlobalState != null }

//TODO here will be refactored to call method to collect full target info
ff.calculateCoverage(individual, modifiedSpec = null)?.run {
archive.addIfNeeded(this)
latestEvaluatedIndividual = this
Expand All @@ -44,8 +43,10 @@ class RandomWalkAlgorithm<T> : SearchAlgorithm<T>() where T : Individual {
return
}

//TODO here will be refactored to call method to collect full target info
getMutatator().mutateAndSave(latestEvaluatedIndividual!!, archive).run {
val mutatedIndividual = getMutatator().mutate(latestEvaluatedIndividual as EvaluatedIndividual<T>)

ff.calculateCoverage(mutatedIndividual)?.run {
archive.addIfNeeded(this)
latestEvaluatedIndividual = this
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -138,12 +138,15 @@ abstract class FitnessFunction<T> where T : Individual {
actionsSize: Int
) : EvaluatedIndividual<T>?{

// By default, we optimize for performance in collecting coverage values, but for special cases, we want to collect full info
val allTargetsWithDescriptive = config.processFormat == EMConfig.ProcessDataFormat.TARGET_HEURISTIC

val ei = SearchTimeController.measureTimeMillis(
{ t, ind ->
time.reportExecutedIndividualTime(t, actionsSize)
ind?.executionTimeMs = t
},
{doCalculateCoverage(individual, targets, allTargets = false, fullyCovered = false, descriptiveIds = false)}
{doCalculateCoverage(individual, targets, allTargets = allTargetsWithDescriptive, fullyCovered = false, descriptiveIds = allTargetsWithDescriptive)}
)
// plugin execution info reporter here, to avoid the time spent by execution reporter
handleExecutionInfo(ei)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -151,6 +151,9 @@ class SearchProcessMonitor: SearchListener {
EMConfig.ProcessDataFormat.TEST_IND , EMConfig.ProcessDataFormat.TARGET_TEST_IND->{
saveStepAsTest(index = time.evaluatedIndividuals,evalInd = evalInd, doesIncludeTarget = config.processFormat == EMConfig.ProcessDataFormat.TARGET_TEST_IND)
}
EMConfig.ProcessDataFormat.TARGET_HEURISTIC->{
saveStepAsTargetHeuristic(index = time.evaluatedIndividuals, evalInd = evalInd)
}
}
if(config.processInterval > 0.0) tb++
}
Expand Down Expand Up @@ -197,6 +200,8 @@ class SearchProcessMonitor: SearchListener {

fun getStepAsPath(index: Int, isTargetFile: Boolean=false) = "${getStepDirAsPath()}${File.separator}${getProcessFileName(getStepName(index, isTargetFile), isTargetFile)}"

fun getTargetStepAsPath() = Paths.get(config.targetHeuristicsFile).toAbsolutePath()

fun getStepDirAsPath() = "${config.processFiles}${File.separator}$DATA_FOLDER"

private fun saveStep(index:Int, v : StepOfSearchProcess<*>){
Expand Down Expand Up @@ -229,12 +234,35 @@ class SearchProcessMonitor: SearchListener {
}
}

private fun <T:Individual> saveStepAsTargetHeuristic(index: Int, evalInd: EvaluatedIndividual<T>) {

if(!config.appendToTargetHeuristicsFile && index == 1){
Files.deleteIfExists(getTargetStepAsPath())
}

if(!Files.exists(getTargetStepAsPath()) && index == 1){
writeByChannel(getTargetStepAsPath(),
"problem,algorithm,seed,step,branch_identifier,fitness\n")
}
val saveToPrefix = config.saveTargetHeuristicsPrefixes.split(",").map { it.trim() }

evalInd.fitness.getViewOfData().forEach({
if(saveToPrefix.any { prefix -> idMapper.getDescriptiveId(it.key).startsWith(prefix) }){
val row = "${config.statisticsColumnId},${config.algorithm},${config.seed},$index,${idMapper.getDescriptiveId(it.key)},${it.value.score}\n"
writeByChannel(
getTargetStepAsPath(),
row, doAppend = true)
}
})
}

private fun getStepName(value: Int, isTargetFile: Boolean): String {
val num = String.format("%0${config.maxEvaluations.toString().length}d", value)
return when(config.processFormat){
EMConfig.ProcessDataFormat.JSON_ALL -> "EM_${num}Json"
EMConfig.ProcessDataFormat.TEST_IND-> "EM_${num}Test"
EMConfig.ProcessDataFormat.TARGET_TEST_IND-> "EM_${num}${if (isTargetFile) "Target" else "Test"}"
else -> throw IllegalStateException("Unsupported process format")
}
}

Expand All @@ -249,6 +277,7 @@ class SearchProcessMonitor: SearchListener {
if (isTargetFile) "${name}.txt"
else TestSuiteFileName(name).getAsPath(config.outputFormat)
}
else -> throw IllegalStateException("Unsupported process format")
}
private fun getGsonBuilder() : Gson? {
if (config.enableProcessMonitor && config.processFormat == EMConfig.ProcessDataFormat.JSON_ALL)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,9 @@ import org.evomaster.core.search.algorithms.onemax.OneMaxSampler
import org.evomaster.core.search.service.monitor.SearchOverall
import org.evomaster.core.search.service.monitor.SearchProcessMonitor
import org.evomaster.core.search.service.monitor.StepOfSearchProcess
import org.hamcrest.CoreMatchers.containsString
import org.hamcrest.CoreMatchers.not
import org.hamcrest.MatcherAssert.assertThat
import org.junit.jupiter.api.Assertions.*
import org.junit.jupiter.api.BeforeEach
import org.junit.jupiter.api.Test
Expand All @@ -36,6 +39,7 @@ class ProcessMonitorTest{
private lateinit var sampler: OneMaxSampler
private lateinit var mio: MioAlgorithm<OneMaxIndividual>
private lateinit var epc: ExecutionPhaseController
private lateinit var idMapper: IdMapper

@BeforeEach
fun init(){
Expand All @@ -53,6 +57,8 @@ class ProcessMonitorTest{
mio = injector.getInstance(Key.get(
object : TypeLiteral<MioAlgorithm<OneMaxIndividual>>() {}))
ff = injector.getInstance(OneMaxFitness::class.java)
idMapper = injector.getInstance(IdMapper::class.java)

config = injector.getInstance(EMConfig::class.java)
config.stoppingCriterion = EMConfig.StoppingCriterion.ACTION_EVALUATIONS
config.processFormat = EMConfig.ProcessDataFormat.JSON_ALL
Expand Down Expand Up @@ -251,4 +257,72 @@ class ProcessMonitorTest{
assert(Files.exists(Paths.get(processMonitor.getStepDirAsPath())))
assert(Files.exists(Paths.get(processMonitor.getStepAsPath(1))))
}

@Test
fun testTargetHeuristicCollect(){
config.enableProcessMonitor = true
config.processFormat = EMConfig.ProcessDataFormat.TARGET_HEURISTIC
config.targetHeuristicsFile = "target/target_heuristics.csv"
config.appendToTargetHeuristicsFile = false
config.saveTargetHeuristicsPrefixes = "Branch"

processMonitor.postConstruct()

val a = OneMaxIndividual(2)
TestUtils.doInitializeIndividualForTesting(a, randomness)

idMapper.addMapping(0, "Branch_1")
idMapper.addMapping(1, "Line_1")

val evalA = ff.calculateCoverage(a, modifiedSpec = null)!!
processMonitor.eval = evalA
processMonitor.newActionEvaluated()

val addedA = archive.addIfNeeded(evalA)

assert(addedA)
assertTrue(Files.exists(Paths.get(config.targetHeuristicsFile)))

val targetData = String(Files.readAllBytes(Paths.get(config.targetHeuristicsFile)))

assertThat(targetData, containsString("Branch"))
assertThat(targetData, not(containsString("Line")))

// 1 header + filtered number of objectives + 1 empty line
assertEquals(3, targetData.lines().size)
}

@Test
fun testTargetHeuristicCollectBranchLine(){
config.enableProcessMonitor = true
config.processFormat = EMConfig.ProcessDataFormat.TARGET_HEURISTIC
config.targetHeuristicsFile = "target/target_heuristics.csv"
config.appendToTargetHeuristicsFile = false
config.saveTargetHeuristicsPrefixes = "Branch,Line"

processMonitor.postConstruct()

val a = OneMaxIndividual(2)
TestUtils.doInitializeIndividualForTesting(a, randomness)

idMapper.addMapping(0, "Branch_1")
idMapper.addMapping(1, "Line_1")

val evalA = ff.calculateCoverage(a, modifiedSpec = null)!!
processMonitor.eval = evalA
processMonitor.newActionEvaluated()

val addedA = archive.addIfNeeded(evalA)

assert(addedA)
assertTrue(Files.exists(Paths.get(config.targetHeuristicsFile)))

val targetData = String(Files.readAllBytes(Paths.get(config.targetHeuristicsFile)))

assertThat(targetData, containsString("Branch"))
assertThat(targetData, containsString("Line"))

// 1 header + filtered number of objectives + 1 empty line
assertEquals(4, targetData.lines().size)
}
}
5 changes: 4 additions & 1 deletion docs/options.md
Original file line number Diff line number Diff line change
Expand Up @@ -167,7 +167,7 @@ There are 3 types of options:
|`probUseRestLinks`| __Double__. In REST, enable the supports of 'links' between resources defined in the OpenAPI schema, if any. When sampling a test case, if the last call has links, given this probability new calls are added for the link. *Constraints*: `probability 0.0-1.0`. *Default value*: `0.5`.|
|`problemType`| __Enum__. The type of SUT we want to generate tests for, e.g., a RESTful API. If left to DEFAULT, the type will be inferred from the EM Driver. However, in case of ambiguities (e.g., the driver specifies more than one type), then this field must be set with a specific type. This is also the case for Black-Box testing where there is no EM Driver. In this latter case, the system defaults to handle REST APIs. *Valid values*: `DEFAULT, REST, GRAPHQL`. *Experimental values*: `RPC, WEBFRONTEND`. *Default value*: `DEFAULT`.|
|`processFiles`| __String__. Specify a folder to save results when a search monitor is enabled. *DEBUG option*. *Default value*: `process_data`.|
|`processFormat`| __Enum__. Specify a format to save the process data. *DEBUG option*. *Valid values*: `JSON_ALL, TEST_IND, TARGET_TEST_IND`. *Default value*: `JSON_ALL`.|
|`processFormat`| __Enum__. Specify a format to save the process data. *DEBUG option*. *Valid values*: `JSON_ALL, TEST_IND, TARGET_TEST_IND, TARGET_HEURISTIC`. *Default value*: `JSON_ALL`.|
|`processInterval`| __Double__. Specify how often to save results when a search monitor is enabled, and 0.0 presents to record all evaluated individual. *DEBUG option*. *Constraints*: `min=0.0, max=50.0`. *Default value*: `0.0`.|
|`recordExceededTargets`| __Boolean__. Whether to record targets when the number is more than 100. *DEBUG option*. *Default value*: `false`.|
|`recordExecutedMainActionInfo`| __Boolean__. Whether to record info of executed actions during search. *DEBUG option*. *Default value*: `false`.|
Expand Down Expand Up @@ -219,6 +219,7 @@ There are 3 types of options:
|Options|Description|
|---|---|
|`abstractInitializationGeneToMutate`| __Boolean__. During mutation, whether to abstract genes for repeated SQL actions. *Default value*: `false`.|
|`appendToTargetHeuristicsFile`| __Boolean__. Whether should add to an existing target heuristics file, instead of replacing it. It is only used when processFormat is TARGET_HEURISTIC. *Default value*: `false`.|
|`bbProbabilityUseDataPool`| __Double__. Specify the probability of using the data pool when sampling test cases. This is for black-box (bb) mode. *Constraints*: `probability 0.0-1.0`. *Default value*: `0.8`.|
|`discoveredInfoRewardedInFitness`| __Boolean__. If there is new discovered information from a test execution, reward it in the fitness function. *Default value*: `false`.|
|`dpcTargetTestSize`| __Int__. Specify a max size of a test to be targeted when either DPC_INCREASING or DPC_DECREASING is enabled. *Default value*: `1`.|
Expand Down Expand Up @@ -255,13 +256,15 @@ There are 3 types of options:
|`probOfSmartInitStructureMutator`| __Double__. Specify a probability of applying a smart structure mutator for initialization of the individual. *Constraints*: `probability 0.0-1.0`. *Default value*: `0.0`.|
|`probabilityAllOptionalsAreOnOrOff`| __Double__. When sampling a new individual, probability that ALL optional choices are ON, or ALL are OFF. The choice between ON and OFF depends on probabilityOfOnVsOffInAllOptionals. *Constraints*: `probability 0.0-1.0`. *Default value*: `0.0`.|
|`saveMockedResponseAsSeparatedFile`| __Boolean__. Whether to save mocked responses as separated files. *Default value*: `false`.|
|`saveTargetHeuristicsPrefixes`| __String__. Prefix specifying which targets to record. Each target can be separated by a comma, such as 'Branch,Line,Success, etc'. It is only used when processFormat is TARGET_HEURISTIC. *Default value*: `Branch`.|
|`security`| __Boolean__. Apply a security testing phase after functional test cases have been generated. *Default value*: `false`.|
|`seedTestCases`| __Boolean__. Whether to seed EvoMaster with some initial test cases. These test cases will be used and evolved throughout the search process. *Default value*: `false`.|
|`seedTestCasesFormat`| __Enum__. Format of the test cases seeded to EvoMaster. *Valid values*: `POSTMAN`. *Default value*: `POSTMAN`.|
|`seedTestCasesPath`| __String__. File path where the seeded test cases are located. *Default value*: `postman.postman_collection.json`.|
|`structureMutationProFS`| __Double__. Specify a probability of applying structure mutator during the focused search. *Constraints*: `probability 0.0-1.0`. *Default value*: `0.0`.|
|`structureMutationProbStrategy`| __Enum__. Specify a strategy to handle a probability of applying structure mutator during the focused search. *Valid values*: `SPECIFIED, SPECIFIED_FS, DPC_TO_SPECIFIED_BEFORE_FS, DPC_TO_SPECIFIED_AFTER_FS, ADAPTIVE_WITH_IMPACT`. *Default value*: `SPECIFIED`.|
|`taintForceSelectionOfGenesWithSpecialization`| __Boolean__. During mutation, force the mutation of genes that have newly discovered specialization from previous fitness evaluations, based on taint analysis. *Default value*: `false`.|
|`targetHeuristicsFile`| __String__. Where the target heuristic values file (if any) is going to be written (in CSV format). It is only used when processFormat is TARGET_HEURISTIC. *Default value*: `targets.csv`.|
|`testResourcePathToSaveMockedResponse`| __String__. Specify test resource path where to save mocked responses as separated files. *Default value*: `""`.|
|`thresholdDistanceForDataPool`| __Int__. Threshold of Levenshtein Distance for key-matching in Data Pool. *Constraints*: `min=0.0`. *Default value*: `2`.|
|`useGlobalTaintInfoProbability`| __Double__. When sampling new individual, check whether to use already existing info on tainted values. *Constraints*: `probability 0.0-1.0`. *Default value*: `0.0`.|
Expand Down

0 comments on commit d3f9cef

Please sign in to comment.