From 5e608d39b7899f004ddd26a2aa013168c04d3b34 Mon Sep 17 00:00:00 2001 From: "Deomid \"rojer\" Ryabkov" Date: Mon, 22 Feb 2021 13:16:49 +0000 Subject: [PATCH] New TestRunner implementation With inter-step state serialization This is take 2, incorporating a fix for #228 and comments from #219 --- pkg/api/api.go | 2 +- pkg/api/api_test.go | 6 +- pkg/cerrors/cerrors.go | 57 ++ pkg/event/testevent/test.go | 16 +- pkg/jobmanager/bundles.go | 3 +- pkg/jobmanager/jobmanager.go | 6 +- pkg/pluginregistry/bundles.go | 2 +- pkg/runner/job_runner.go | 12 +- pkg/runner/test_runner.go | 281 ------ pkg/runner/test_runner2.go | 842 ++++++++++++++++++ pkg/runner/test_runner_pipeline.go | 540 ----------- pkg/runner/test_runner_route.go | 292 ------ pkg/runner/test_runner_route_test.go | 400 --------- pkg/runner/test_runner_test.go | 597 +++++++++++++ pkg/runner/wait_for_first_target.go | 57 -- pkg/runner/wait_for_first_target_test.go | 142 --- pkg/storage/limits/limits_test.go | 2 +- pkg/storage/storage.go | 1 + .../listeners/httplistener/httplistener.go | 3 +- plugins/storage/memory/memory.go | 2 +- plugins/teststeps/echo/echo.go | 2 +- plugins/teststeps/example/example.go | 84 +- plugins/teststeps/randecho/randecho.go | 2 +- plugins/teststeps/sshcmd/sshcmd.go | 2 +- .../terminalexpect/terminalexpect.go | 2 +- plugins/teststeps/teststeps.go | 14 +- tests/common/get_events.go | 48 + .../goroutine_leak_check.go | 6 +- .../goroutine_leak_check_test.go | 6 +- tests/integ/events/frameworkevents/common.go | 20 +- tests/integ/events/testevents/common.go | 24 +- tests/integ/job/common.go | 44 +- tests/integ/jobmanager/common.go | 23 +- tests/integ/test/testrunner_test.go | 224 +---- .../teststeps/badtargets/badtargets.go | 111 +++ tests/plugins/teststeps/channels/channels.go | 6 +- tests/plugins/teststeps/fail/fail.go | 2 +- tests/plugins/teststeps/hanging/hanging.go | 2 +- tests/plugins/teststeps/noreturn/noreturn.go | 2 +- tests/plugins/teststeps/teststep/teststep.go | 160 ++++ 40 files changed, 1997 insertions(+), 2050 deletions(-) delete mode 100644 pkg/runner/test_runner.go create mode 100644 pkg/runner/test_runner2.go delete mode 100644 pkg/runner/test_runner_pipeline.go delete mode 100644 pkg/runner/test_runner_route.go delete mode 100644 pkg/runner/test_runner_route_test.go create mode 100644 pkg/runner/test_runner_test.go delete mode 100644 pkg/runner/wait_for_first_target.go delete mode 100644 pkg/runner/wait_for_first_target_test.go create mode 100644 tests/common/get_events.go rename tests/common/{ => goroutine_leak_check}/goroutine_leak_check.go (97%) rename tests/common/{ => goroutine_leak_check}/goroutine_leak_check_test.go (86%) create mode 100644 tests/plugins/teststeps/badtargets/badtargets.go create mode 100644 tests/plugins/teststeps/teststep/teststep.go diff --git a/pkg/api/api.go b/pkg/api/api.go index 0286d453..e85aa43a 100644 --- a/pkg/api/api.go +++ b/pkg/api/api.go @@ -46,7 +46,7 @@ type API struct { // New returns an initialized instance of an API struct with the specified // server ID generation function. -func New(ctx xcontext.Context, opts ...Option) (*API, error) { +func New(opts ...Option) (*API, error) { cfg := getConfig(opts...) serverID, err := obtainServerID(cfg.ServerIDFunc) if err != nil { diff --git a/pkg/api/api_test.go b/pkg/api/api_test.go index 6ed44a25..ab1d6e07 100644 --- a/pkg/api/api_test.go +++ b/pkg/api/api_test.go @@ -23,7 +23,7 @@ var ctx = logrusctx.NewContext(logger.LevelDebug) func TestOptions(t *testing.T) { eventTimeout := 3141592654 * time.Second serverID := "myUnitTestServerID" - api, err := New(ctx, + api, err := New( OptionEventTimeout(eventTimeout), OptionServerID(serverID), ) @@ -40,7 +40,7 @@ func (d dummyEventMsg) Requestor() EventRequestor { func TestEventTimeout(t *testing.T) { t.Run("timeout", func(t *testing.T) { - apiInstance, err := New(ctx, OptionServerID("unit-test"), OptionEventTimeout(time.Nanosecond)) + apiInstance, err := New(OptionServerID("unit-test"), OptionEventTimeout(time.Nanosecond)) require.NoError(t, err) t.Run("Status", func(t *testing.T) { startTime := time.Now() @@ -63,7 +63,7 @@ func TestEventTimeout(t *testing.T) { }) t.Run("noTimeout", func(t *testing.T) { - apiInstance, err := New(ctx, OptionServerID("unit-test")) + apiInstance, err := New(OptionServerID("unit-test")) require.NoError(t, err) respExpected := &EventResponse{ diff --git a/pkg/cerrors/cerrors.go b/pkg/cerrors/cerrors.go index 4fa3211e..e3c67e75 100644 --- a/pkg/cerrors/cerrors.go +++ b/pkg/cerrors/cerrors.go @@ -43,6 +43,17 @@ func (e *ErrTestStepsNeverReturned) Error() string { return fmt.Sprintf("test step [%s] did not return", strings.Join(e.StepNames, ", ")) } +// ErrTestTargetInjectionTimedOut indicates that test step did not ingest a target +// within allotted time. +type ErrTestTargetInjectionTimedOut struct { + StepName string +} + +// Error returns the error string associated with the error +func (e *ErrTestTargetInjectionTimedOut) Error() string { + return fmt.Sprintf("test step %v failed to ingest a target", e.StepName) +} + // ErrTestStepClosedChannels indicates that the test step returned after // closing its output channels, which constitutes an API violation type ErrTestStepClosedChannels struct { @@ -53,3 +64,49 @@ type ErrTestStepClosedChannels struct { func (e *ErrTestStepClosedChannels) Error() string { return fmt.Sprintf("test step %v closed output channels (api violation)", e.StepName) } + +// ErrTestStepPaniced indicates that a test step's method panicked. +type ErrTestStepPaniced struct { + StepName string + StackTrace string +} + +// Error returns the error string associated with the error +func (e *ErrTestStepPaniced) Error() string { + return fmt.Sprintf("test step %s paniced, trace: %q", e.StepName, e.StackTrace) +} + +// ErrTestStepReturnedDuplicateResult indicates that a test step returned result +// twice for the same target. +type ErrTestStepReturnedDuplicateResult struct { + StepName string + Target string +} + +// Error returns the error string associated with the error +func (e *ErrTestStepReturnedDuplicateResult) Error() string { + return fmt.Sprintf("test step %s returned duplicate result for %s", e.StepName, e.Target) +} + +// ErrTestStepReturnedUnexpectedResult indicates that a test step returned result +// for a target that was not given to it. +type ErrTestStepReturnedUnexpectedResult struct { + StepName string + Target string +} + +// Error returns the error string associated with the error +func (e *ErrTestStepReturnedUnexpectedResult) Error() string { + return fmt.Sprintf("test step %s returned unexpected result for %s", e.StepName, e.Target) +} + +// ErrTestStepLostTargets indicates that targets have been lost during test run. +type ErrTestStepLostTargets struct { + StepName string + Targets []string +} + +// Error returns the error string associated with the error +func (e *ErrTestStepLostTargets) Error() string { + return fmt.Sprintf("test step %s lost targets %v", e.StepName, e.Targets) +} diff --git a/pkg/event/testevent/test.go b/pkg/event/testevent/test.go index 95ed859d..ead40539 100644 --- a/pkg/event/testevent/test.go +++ b/pkg/event/testevent/test.go @@ -17,7 +17,7 @@ import ( "github.com/facebookincubator/contest/pkg/xcontext" ) -// Header models the header of a test event, which consists in metadatat hat defines the +// Header models the header of a test event, which consists in metadata that defines the // emitter of the events. The Header is under ConTest control and cannot be manipulated // by the TestStep type Header struct { @@ -29,8 +29,8 @@ type Header struct { // Data models the data of a test event. It is populated by the TestStep type Data struct { - EventName event.Name Target *target.Target + EventName event.Name Payload *json.RawMessage } @@ -153,3 +153,15 @@ type EmitterFetcher interface { Emitter Fetcher } + +func (h *Header) String() string { + return fmt.Sprintf("[%d %d %s %s]", h.JobID, h.RunID, h.TestName, h.TestStepLabel) +} + +func (d *Data) String() string { + ps := "" + if d.Payload != nil { + ps = fmt.Sprintf(" %q", d.Payload) //nolint SA5009 - works fine + } + return fmt.Sprintf("[%s %s%s]", d.Target, d.EventName, ps) +} diff --git a/pkg/jobmanager/bundles.go b/pkg/jobmanager/bundles.go index 4b1c0a5f..e5d77476 100644 --- a/pkg/jobmanager/bundles.go +++ b/pkg/jobmanager/bundles.go @@ -72,8 +72,7 @@ func newBundlesFromSteps(ctx xcontext.Context, descriptors []*test.TestStepDescr if err != nil { return nil, err } - // test step index is incremented by 1 so we can use 0 to signal an anomaly - tsb, err := registry.NewTestStepBundle(ctx, *descriptor, uint(idx)+1, tse) + tsb, err := registry.NewTestStepBundle(ctx, *descriptor, tse) if err != nil { return nil, fmt.Errorf("NewStepBundle for test step '%s' with index %d failed: %w", descriptor.Name, idx, err) } diff --git a/pkg/jobmanager/jobmanager.go b/pkg/jobmanager/jobmanager.go index 881de8e9..28915399 100644 --- a/pkg/jobmanager/jobmanager.go +++ b/pkg/jobmanager/jobmanager.go @@ -135,13 +135,15 @@ func (jm *JobManager) handleEvent(ev *api.Event) { // signals, propagating the signals downwards to all jobs. func (jm *JobManager) Start(ctx xcontext.Context, sigs chan os.Signal) error { log := ctx.Logger() + log.Debugf("Starting JobManager") - a, err := api.New(ctx, jm.config.apiOptions...) + a, err := api.New(jm.config.apiOptions...) if err != nil { return fmt.Errorf("Cannot start JobManager: %w", err) } apiCtx, apiCancel := xcontext.WithCancel(ctx) + apiCtx, apiPause := xcontext.WithNotify(apiCtx, xcontext.Paused) errCh := make(chan error, 1) go func() { lErr := jm.apiListener.Serve(apiCtx, a) @@ -176,7 +178,7 @@ loop: apiCancel() } else { log.Debugf("Interrupted by signal '%s': pause jobs and exit", sig) - apiCancel() + apiPause() jm.PauseJobs(ctx) } select { diff --git a/pkg/pluginregistry/bundles.go b/pkg/pluginregistry/bundles.go index 923e8a3a..116f30df 100644 --- a/pkg/pluginregistry/bundles.go +++ b/pkg/pluginregistry/bundles.go @@ -16,7 +16,7 @@ import ( ) // NewTestStepBundle creates a TestStepBundle from a TestStepDescriptor -func (r *PluginRegistry) NewTestStepBundle(ctx xcontext.Context, testStepDescriptor test.TestStepDescriptor, stepIndex uint, allowedEvents map[event.Name]bool) (*test.TestStepBundle, error) { +func (r *PluginRegistry) NewTestStepBundle(ctx xcontext.Context, testStepDescriptor test.TestStepDescriptor, allowedEvents map[event.Name]bool) (*test.TestStepBundle, error) { testStep, err := r.NewTestStep(testStepDescriptor.Name) if err != nil { return nil, fmt.Errorf("could not get the desired TestStep (%s): %v", testStepDescriptor.Name, err) diff --git a/pkg/runner/job_runner.go b/pkg/runner/job_runner.go index fc43273b..8ff92306 100644 --- a/pkg/runner/job_runner.go +++ b/pkg/runner/job_runner.go @@ -127,7 +127,7 @@ func (jr *JobRunner) Run(j *job.Job) ([][]*job.Report, []*job.Report, error) { targets = <-targetsCh if err != nil { err = fmt.Errorf("run #%d: cannot fetch targets for test '%s': %v", run+1, t.Name, err) - j.StateCtx.Logger().Errorf("%v", err.Error()) + j.StateCtx.Logger().Errorf(err.Error()) return nil, nil, err } // Associate the targets with the job for later retrievel @@ -181,7 +181,13 @@ func (jr *JobRunner) Run(j *job.Job) ([][]*job.Report, []*job.Report, error) { if runErr = jr.emitAcquiredTargets(j.StateCtx, testEventEmitter, targets); runErr == nil { j.StateCtx.Logger().Infof("Run #%d: running test #%d for job '%s' (job ID: %d) on %d targets", run+1, idx, j.Name, j.ID, len(targets)) testRunner := NewTestRunner() - runErr = testRunner.Run(j.StateCtx, t, targets, j.ID, types.RunID(run+1)) + resumeState, err := testRunner.Run(j.StateCtx, t, targets, j.ID, types.RunID(run+1), nil) + if err == xcontext.Paused { + j.StateCtx.Logger().Debugf("Runner paused, state: %s", string(resumeState)) + // TODO(rojer): Persist the state. + } else { + runErr = err + } } // Job is done, release all the targets @@ -232,7 +238,7 @@ func (jr *JobRunner) Run(j *job.Job) ([][]*job.Report, []*job.Report, error) { j.StateCtx.Logger().Warnf("Run reporter failed while calculating run results, proceeding anyway: %v", err) } else { if success { - j.StateCtx.Logger().Warnf("Run #%d of job %d considered successful according to %s", run+1, j.ID, bundle.Reporter.Name()) + j.StateCtx.Logger().Debugf("Run #%d of job %d considered successful according to %s", run+1, j.ID, bundle.Reporter.Name()) } else { j.StateCtx.Logger().Errorf("Run #%d of job %d considered failed according to %s", run+1, j.ID, bundle.Reporter.Name()) } diff --git a/pkg/runner/test_runner.go b/pkg/runner/test_runner.go deleted file mode 100644 index afc47edc..00000000 --- a/pkg/runner/test_runner.go +++ /dev/null @@ -1,281 +0,0 @@ -// Copyright (c) Facebook, Inc. and its affiliates. -// -// This source code is licensed under the MIT license found in the -// LICENSE file in the root directory of this source tree. - -package runner - -import ( - "context" - "fmt" - "time" - - "github.com/facebookincubator/contest/pkg/cerrors" - "github.com/facebookincubator/contest/pkg/config" - "github.com/facebookincubator/contest/pkg/target" - "github.com/facebookincubator/contest/pkg/test" - "github.com/facebookincubator/contest/pkg/types" - "github.com/facebookincubator/contest/pkg/xcontext" -) - -// TestRunnerTimeouts collects all the timeouts values that the test runner uses -type TestRunnerTimeouts struct { - StepInjectTimeout time.Duration - MessageTimeout time.Duration - ShutdownTimeout time.Duration - StepShutdownTimeout time.Duration -} - -// routingCh represents a set of unidirectional channels used by the routing subsystem. -// There is a routing block for each TestStep of the pipeline, which is responsible for -// the following actions: -// * Targets in egress from the previous routing block are injected into the -// current TestStep -// * Targets in egress from the current TestStep are injected into the next -// routing block -type routingCh struct { - // routeIn and routeOut connect the routing block to other routing blocks - routeIn <-chan *target.Target - routeOut chan<- *target.Target - // Channels that connect the routing block to the TestStep - stepIn chan<- *target.Target - stepOut <-chan *target.Target - stepErr <-chan cerrors.TargetError - // targetErr connects the routing block directly to the TestRunner. Failing - // targets are acquired by the TestRunner via this channel - targetErr chan<- cerrors.TargetError -} - -// stepCh represents a set of bidirectional channels that a TestStep and its associated -// routing block use to communicate. The TestRunner forces the direction of each -// channel when connecting the TestStep to the routing block. -type stepCh struct { - stepIn chan *target.Target - stepOut chan *target.Target - stepErr chan cerrors.TargetError -} - -type injectionCh struct { - stepIn chan<- *target.Target - resultCh chan<- injectionResult -} - -// injectionResult represents the result of an injection goroutine -type injectionResult struct { - target *target.Target - err error -} - -// routeResult represents the result of routing block, possibly carrying error information -type routeResult struct { - bundle test.TestStepBundle - err error -} - -// stepResult represents the result of a TestStep, possibly carrying error information -type stepResult struct { - jobID types.JobID - runID types.RunID - bundle test.TestStepBundle - err error -} - -// pipelineCtrlCh represents multiple result and control channels that the pipeline uses -// to collect results from routing blocks, steps and target completing the test and to -// signa cancellation to various pipeline subsystems -type pipelineCtrlCh struct { - routingResultCh <-chan routeResult - stepResultCh <-chan stepResult - targetOut <-chan *target.Target - targetErr <-chan cerrors.TargetError - - // ctx is a control context used to cancel/pause the steps of the pipeline - ctx xcontext.Context - pause func() - cancel func() -} - -// TestRunner is the main runner of TestSteps in ConTest. `results` collects -// the results of the run. It is not safe to access `results` concurrently. -type TestRunner struct { - timeouts TestRunnerTimeouts -} - -// targetWriter is a helper object which exposes methods to write targets into step channels -type targetWriter struct { - timeouts TestRunnerTimeouts -} - -func (w *targetWriter) writeTimeout(ctx xcontext.Context, ch chan<- *target.Target, target *target.Target, timeout time.Duration) error { - ctx.Logger().Debugf("writing target %+v, timeout %v", target, timeout) - start := time.Now() - select { - case <-ctx.Done(): - ctx.Logger().Debugf("terminate requested while writing target %+v", target) - case ch <- target: - case <-time.After(timeout): - return fmt.Errorf("timeout (%v) while writing target %+v", timeout, target) - } - ctx.Logger().Debugf("done writing target %+v, spent %v", target, time.Since(start)) - return nil -} - -// writeTargetWithResult attempts to deliver a Target on the input channel of a step, -// returning the result of the operation on the result channel wrapped in the -// injectionCh argument -func (w *targetWriter) writeTargetWithResult(ctx xcontext.Context, target *target.Target, ch injectionCh) { - err := w.writeTimeout(ctx, ch.stepIn, target, w.timeouts.StepInjectTimeout) - select { - case <-ctx.Done(): - case ch.resultCh <- injectionResult{target: target, err: err}: - case <-time.After(w.timeouts.MessageTimeout): - ctx.Logger().Panicf("timeout while writing result for target %+v after %v", target, w.timeouts.MessageTimeout) - } -} - -// writeTargetError writes a TargetError object to a TargetError channel with timeout -func (w *targetWriter) writeTargetError(ctx context.Context, ch chan<- cerrors.TargetError, targetError cerrors.TargetError, timeout time.Duration) error { - select { - case <-ctx.Done(): - case ch <- targetError: - case <-time.After(timeout): - return fmt.Errorf("timeout while writing targetError %+v", targetError) - } - return nil -} - -func newTargetWriter(timeouts TestRunnerTimeouts) *targetWriter { - return &targetWriter{timeouts: timeouts} -} - -// Run implements the main logic of the TestRunner, i.e. the instantiation and -// connection of the TestSteps, routing blocks and pipeline runner. -func (tr *TestRunner) Run(ctx xcontext.Context, test *test.Test, targets []*target.Target, jobID types.JobID, runID types.RunID) error { - - if len(test.TestStepsBundles) == 0 { - return fmt.Errorf("no steps to run for test") - } - - ctx = ctx.WithFields(xcontext.Fields{ - "job_id": jobID, - "run_id": runID, - }) - - testPipeline := newPipeline(test.TestStepsBundles, test, jobID, runID, tr.timeouts) - - ctx.Logger().Infof("setting up pipeline") - completedTargets := make(chan *target.Target, 1) - inCh := testPipeline.init(ctx) - - // inject targets in the step - terminateInjectionCtx, terminateInjection := xcontext.WithCancel(xcontext.Background()) - go func(ctx xcontext.Context, inputChannel chan<- *target.Target) { - defer close(inputChannel) - ctx = ctx.WithTag("step", "injection") - writer := newTargetWriter(tr.timeouts) - for _, t := range targets { - if err := writer.writeTimeout(ctx, inputChannel, t, tr.timeouts.MessageTimeout); err != nil { - ctx.Logger().Debugf("could not inject target %+v into first routing block: %+v", t, err) - } - } - }(terminateInjectionCtx.WithLogger(ctx.Logger()), inCh) - - errCh := make(chan error, 1) - go func() { - ctx.Logger().Infof("running pipeline") - errCh <- testPipeline.run(ctx, completedTargets) - }() - - defer terminateInjection() - // Receive targets from the completed channel controlled by the pipeline, while - // waiting for termination signals or fatal errors encountered while running - // the pipeline. - for { - select { - case err := <-errCh: - ctx.Logger().Debugf("test runner terminated, returning %v", err) - return err - case t := <-completedTargets: - ctx.Logger().Infof("test runner completed target: %v", t) - } - } -} - -// NewTestRunner initializes and returns a new TestRunner object. This test -// runner will use default timeout values -func NewTestRunner() TestRunner { - return TestRunner{ - timeouts: TestRunnerTimeouts{ - StepInjectTimeout: config.StepInjectTimeout, - MessageTimeout: config.TestRunnerMsgTimeout, - ShutdownTimeout: config.TestRunnerShutdownTimeout, - StepShutdownTimeout: config.TestRunnerStepShutdownTimeout, - }, - } -} - -// NewTestRunnerWithTimeouts initializes and returns a new TestRunner object with -// custom timeouts -func NewTestRunnerWithTimeouts(timeouts TestRunnerTimeouts) TestRunner { - return TestRunner{timeouts: timeouts} -} - -// State is a structure that models the current state of the test runner -type State struct { - completedSteps map[string]error - completedRouting map[string]error - completedTargets map[*target.Target]error -} - -// NewState initializes a State object. -func NewState() *State { - r := State{} - r.completedSteps = make(map[string]error) - r.completedRouting = make(map[string]error) - r.completedTargets = make(map[*target.Target]error) - return &r -} - -// CompletedTargets returns a map that associates each target with its returning error. -// If the target succeeded, the error will be nil -func (r *State) CompletedTargets() map[*target.Target]error { - return r.completedTargets -} - -// CompletedRouting returns a map that associates each routing block with its returning error. -// If the routing block succeeded, the error will be nil -func (r *State) CompletedRouting() map[string]error { - return r.completedRouting -} - -// CompletedSteps returns a map that associates each step with its returning error. -// If the step succeeded, the error will be nil -func (r *State) CompletedSteps() map[string]error { - return r.completedSteps -} - -// SetRouting sets the error associated with a routing block -func (r *State) SetRouting(testStepLabel string, err error) { - r.completedRouting[testStepLabel] = err -} - -// SetTarget sets the error associated with a target -func (r *State) SetTarget(target *target.Target, err error) { - r.completedTargets[target] = err -} - -// SetStep sets the error associated with a step -func (r *State) SetStep(testStepLabel string, err error) { - r.completedSteps[testStepLabel] = err -} - -// IncompleteSteps returns a slice of step names for which the result hasn't been set yet -func (r *State) IncompleteSteps(bundles []test.TestStepBundle) []string { - var incompleteSteps []string - for _, bundle := range bundles { - if _, ok := r.completedSteps[bundle.TestStepLabel]; !ok { - incompleteSteps = append(incompleteSteps, bundle.TestStepLabel) - } - } - return incompleteSteps -} diff --git a/pkg/runner/test_runner2.go b/pkg/runner/test_runner2.go new file mode 100644 index 00000000..8e63ef8e --- /dev/null +++ b/pkg/runner/test_runner2.go @@ -0,0 +1,842 @@ +// Copyright (c) Facebook, Inc. and its affiliates. +// +// This source code is licensed under the MIT license found in the +// LICENSE file in the root directory of this source tree. + +package runner + +import ( + "encoding/json" + "fmt" + "runtime/debug" + "sync" + "time" + + "github.com/facebookincubator/contest/pkg/xcontext" + "github.com/insomniacslk/xjson" + + "github.com/facebookincubator/contest/pkg/cerrors" + "github.com/facebookincubator/contest/pkg/config" + "github.com/facebookincubator/contest/pkg/event" + "github.com/facebookincubator/contest/pkg/event/testevent" + "github.com/facebookincubator/contest/pkg/storage" + "github.com/facebookincubator/contest/pkg/target" + "github.com/facebookincubator/contest/pkg/test" + "github.com/facebookincubator/contest/pkg/types" +) + +// TestRunner is the state associated with a test run. +// Here's how a test run works: +// * Each target gets a targetState and a "target handler" - a goroutine that takes that particular +// target through each step of the pipeline in sequence. It injects the target, waits for the result, +// then moves on to the next step. +// * Each step of the pipeline gets a stepState and: +// - A "step runner" - a goroutine that is responsible for running the step's Run() method +// - A "step reader" - a goroutine that processes results and sends them on to target handlers that await them. +// * After starting all of the above, the main goroutine goes into "monitor" mode +// that checks on the pipeline's progress and is responsible for closing step input channels +// when all the targets have been injected. +// * Monitor loop finishes when all the targets have been injected into the last step +// or if a step has encountered an error. +// * We then wait for all the step runners and readers to shut down. +// * Once all the activity has died down, resulting state is examined and an error is returned, if any. +type TestRunner struct { + stepInjectTimeout time.Duration // Time to wait for steps to accept each target + shutdownTimeout time.Duration // Time to wait for steps runners to finish a the end of the run + + steps []*stepState // The pipeline, in order of execution + targets map[string]*targetState // Target state lookup map + targetsWg sync.WaitGroup // Tracks all the target handlers + + // One mutex to rule them all, used to serialize access to all the state above. + // Could probably be split into several if necessary. + mu sync.Mutex + cond *sync.Cond // Used to notify the monitor about changes +} + +// stepState contains state associated with one state of the pipeline: +type stepState struct { + ctx xcontext.Context + + stepIndex int // Index of this step in the pipeline. + sb test.TestStepBundle // The test bundle. + + // Channels used to communicate with the plugin. + inCh chan *target.Target + outCh chan *target.Target + errCh chan cerrors.TargetError + ev testevent.Emitter + + tgtDone map[*target.Target]bool // Targets for which results have been received. + + stepRunning bool // testStep.Run() is currently running. + readerRunning bool // Result reader is running. + runErr error // Runner error, returned from Run() or an error condition detected by the reader. +} + +// targetStepPhase denotes progression of a target through a step +type targetStepPhase int + +const ( + targetStepPhaseInvalid targetStepPhase = iota + targetStepPhaseInit // (1) Created + targetStepPhaseBegin // (2) Picked up for execution. + targetStepPhaseRun // (3) Injected into step. + targetStepPhaseResultPending // (4) Result posted to the handler. + targetStepPhaseEnd // (5) Finished running a step. +) + +// targetState contains state associated with one target progressing through the pipeline. +type targetState struct { + tgt *target.Target + + // This part of state gets serialized into JSON for resumption. + CurStep int `json:"cs"` // Current step number. + CurPhase targetStepPhase `json:"cp"` // Current phase of step execution. + Res *xjson.Error `json:"res,omitempty"` // Final result, if reached the end state. + + resCh chan error // Channel used to communicate result by the step runner. +} + +// resumeStateStruct is used to serialize runner state to be resumed in the future. +type resumeStateStruct struct { + Version int `json:"v"` + JobID types.JobID `json:"job_id"` + RunID types.RunID `json:"run_id"` + Targets map[string]*targetState `json:"targets"` +} + +// Resume state version we are compatible with. +// When imcompatible changes are made to the state format, bump this. +// Restoring incompatible state will abort the job. +const resumeStateStructVersion = 2 + +// Run is the main enty point of the runner. +func (tr *TestRunner) Run( + ctx xcontext.Context, + t *test.Test, targets []*target.Target, + jobID types.JobID, runID types.RunID, + resumeState json.RawMessage, +) (json.RawMessage, error) { + + ctx = ctx.WithFields(xcontext.Fields{ + "job_id": jobID, + "run_id": runID, + }) + + ctx.Logger().Debugf("== test runner starting job %d, run %d", jobID, runID) + resumeState, err := tr.run(ctx.WithTag("phase", "run"), t, targets, jobID, runID, resumeState) + ctx.Logger().Debugf("== test runner finished job %d, run %d, err: %v", jobID, runID, err) + return resumeState, err +} + +func (tr *TestRunner) run( + ctx xcontext.Context, + t *test.Test, targets []*target.Target, + jobID types.JobID, runID types.RunID, + resumeState json.RawMessage, +) (json.RawMessage, error) { + + // Peel off contexts used for steps and target handlers. + stepCtx, stepCancel := xcontext.WithCancel(ctx) + defer stepCancel() + targetCtx, targetCancel := xcontext.WithCancel(ctx) + defer targetCancel() + + // Set up the pipeline + for i, sb := range t.TestStepsBundles { + tr.steps = append(tr.steps, &stepState{ + ctx: stepCtx, + stepIndex: i, + sb: sb, + inCh: make(chan *target.Target), + outCh: make(chan *target.Target), + errCh: make(chan cerrors.TargetError), + ev: storage.NewTestEventEmitter(testevent.Header{ + JobID: jobID, + RunID: runID, + TestName: t.Name, + TestStepLabel: sb.TestStepLabel, + }), + tgtDone: make(map[*target.Target]bool), + }) + // Step handlers will be started from target handlers as targets reach them. + } + + // Set up the targets + tr.targets = make(map[string]*targetState) + // If we have target state to resume, do it now. + if len(resumeState) > 0 { + ctx.Logger().Debugf("Attempting to resume from state: %s", string(resumeState)) + var rs resumeStateStruct + if err := json.Unmarshal(resumeState, &rs); err != nil { + return nil, fmt.Errorf("invalid resume state: %w", err) + } + if rs.Version != resumeStateStructVersion { + return nil, fmt.Errorf("incompatible resume state version %d (want %d)", + rs.Version, resumeStateStructVersion) + } + if rs.JobID != jobID { + return nil, fmt.Errorf("wrong resume state, job id %d (want %d)", rs.JobID, jobID) + } + tr.targets = rs.Targets + } + // Initialize remaining fields of the target structures, + // build the map and kick off target processing. + for _, tgt := range targets { + tr.mu.Lock() + tgs := tr.targets[tgt.ID] + if tgs == nil { + tgs = &targetState{ + CurPhase: targetStepPhaseInit, + } + } + tgs.tgt = tgt + tgs.resCh = make(chan error) + tr.targets[tgt.ID] = tgs + tr.mu.Unlock() + tr.targetsWg.Add(1) + go func() { + tr.targetHandler(targetCtx, tgs) + tr.targetsWg.Done() + }() + } + + // Run until no more progress can be made. + runErr := tr.runMonitor(ctx) + if runErr != nil { + ctx.Logger().Errorf("monitor returned error: %q, canceling", runErr) + stepCancel() + } + + // Wait for step runners and readers to exit. + if err := tr.waitStepRunners(ctx); err != nil { + if runErr == nil { + runErr = err + } + } + + // There will be no more results, reel in all the target handlers (if any). + ctx.Logger().Debugf("waiting for target handlers to finish") + targetCancel() + tr.targetsWg.Wait() + + // Examine the resulting state. + ctx.Logger().Debugf("leaving, err %v, target states:", runErr) + tr.mu.Lock() + defer tr.mu.Unlock() + resumeOk := (runErr == nil) + numInFlightTargets := 0 + for i, tgt := range targets { + tgs := tr.targets[tgt.ID] + stepErr := tr.steps[tgs.CurStep].runErr + ctx.Logger().Debugf(" %d %s %v", i, tgs, stepErr) + if tgs.CurPhase == targetStepPhaseRun { + numInFlightTargets++ + } + if stepErr != nil && stepErr != xcontext.Paused { + resumeOk = false + } + } + ctx.Logger().Debugf("- %d in flight, ok to resume? %t", numInFlightTargets, resumeOk) + ctx.Logger().Debugf("step states:") + for i, ss := range tr.steps { + ctx.Logger().Debugf(" %d %s %t %t %v", i, ss, ss.stepRunning, ss.readerRunning, ss.runErr) + } + + // Is there a useful error to report? + if runErr != nil { + return nil, runErr + } + + // Has the run been canceled? + ctx.Logger().Debugf("ctx-cancel-is: %v; ctx-notifications-are: %v", ctx.Err(), ctx.Notifications()) + select { + case <-ctx.Done(): + return nil, xcontext.Canceled + default: + } + + // Have we been asked to pause? If yes, is it safe to do so? + select { + case <-ctx.WaitFor(xcontext.Paused): + if !resumeOk { + ctx.Logger().Warnf("paused but not ok to resume") + break + } + rs := &resumeStateStruct{ + Version: resumeStateStructVersion, + JobID: jobID, RunID: runID, + Targets: tr.targets, + } + resumeState, runErr = json.Marshal(rs) + if runErr != nil { + ctx.Logger().Errorf("unable to serialize the state: %s", runErr) + } else { + runErr = xcontext.Paused + } + default: + } + + return resumeState, runErr +} + +func (tr *TestRunner) waitStepRunners(ctx xcontext.Context) error { + ctx.Logger().Debugf("waiting for step runners to finish") + swch := make(chan struct{}) + go func() { + tr.mu.Lock() + defer tr.mu.Unlock() + for { + ok := true + for _, ss := range tr.steps { + // numRunning == 1 is also acceptable: we allow the Run() goroutine + // to continue in case of error, if the result processor decided + // to abandon its runner, there's nothing we can do. + switch { + case !ss.stepRunning && !ss.readerRunning: + // Done + case ss.stepRunning && ss.readerRunning: + // Still active + ok = false + case !ss.stepRunning && ss.readerRunning: + // Transient state, let it finish + ok = false + case ss.stepRunning && !ss.readerRunning: + // This is possible if plugin got stuck and result processor gave up on it. + // If so, it should have left an error. + if ss.runErr == nil { + ctx.Logger().Errorf("%s: result processor left runner with no error", ss) + // There's nothing we can do at this point, fall through. + } + } + } + if ok { + close(swch) + return + } + tr.cond.Wait() + } + }() + var err error + select { + case <-swch: + ctx.Logger().Debugf("step runners finished") + tr.mu.Lock() + defer tr.mu.Unlock() + err = tr.checkStepRunners() + case <-time.After(tr.shutdownTimeout): + ctx.Logger().Errorf("step runners failed to shut down correctly") + tr.mu.Lock() + defer tr.mu.Unlock() + // If there is a step with an error set, use that. + err = tr.checkStepRunners() + // If there isn't, enumerate ones that were still running at the time. + nrerr := &cerrors.ErrTestStepsNeverReturned{} + if err == nil { + err = nrerr + } + for _, ss := range tr.steps { + if ss.stepRunning { + nrerr.StepNames = append(nrerr.StepNames, ss.sb.TestStepLabel) + // We cannot make the step itself return but we can at least release the reader. + tr.safeCloseOutCh(ss) + } + } + } + // Emit step error events. + for _, ss := range tr.steps { + ctx.Logger().Debugf("%s %v", ss, ss.runErr) + if ss.runErr != nil && ss.runErr != xcontext.Paused && ss.runErr != xcontext.Canceled { + if err := ss.emitEvent(ctx, EventTestError, nil, ss.runErr.Error()); err != nil { + ctx.Logger().Errorf("failed to emit event: %s", err) + } + } + } + return err +} + +func (tr *TestRunner) injectTarget(ctx xcontext.Context, tgs *targetState, ss *stepState) error { + ctx.Logger().Debugf("%s: injecting into %s", tgs, ss) + select { + case ss.inCh <- tgs.tgt: + // Injected successfully. + err := ss.ev.Emit(ctx, testevent.Data{EventName: target.EventTargetIn, Target: tgs.tgt}) + tr.mu.Lock() + defer tr.mu.Unlock() + // By the time we get here the target could have been processed and result posted already, hence the check. + if tgs.CurPhase == targetStepPhaseBegin { + tgs.CurPhase = targetStepPhaseRun + } + if err != nil { + return fmt.Errorf("failed to report target injection: %w", err) + } + tr.cond.Signal() + case <-time.After(tr.stepInjectTimeout): + ctx.Logger().Errorf("timed out while injecting a target") + if err := ss.ev.Emit(ctx, testevent.Data{EventName: target.EventTargetInErr, Target: tgs.tgt}); err != nil { + ctx.Logger().Errorf("failed to emit event: %s", err) + } + return &cerrors.ErrTestTargetInjectionTimedOut{StepName: ss.sb.TestStepLabel} + case <-ctx.Done(): + return xcontext.Canceled + } + return nil +} + +func (tr *TestRunner) awaitTargetResult(ctx xcontext.Context, tgs *targetState, ss *stepState) error { + select { + case res, ok := <-tgs.resCh: + if !ok { + ctx.Logger().Debugf("%s: result channel closed", tgs) + return xcontext.Canceled + } + ctx.Logger().Debugf("%s: result recd for %s", tgs, ss) + var err error + if res == nil { + err = ss.emitEvent(ctx, target.EventTargetOut, tgs.tgt, nil) + } else { + err = ss.emitEvent(ctx, target.EventTargetErr, tgs.tgt, target.ErrPayload{Error: res.Error()}) + } + if err != nil { + ctx.Logger().Errorf("failed to emit event: %s", err) + } + tr.mu.Lock() + if res != nil { + tgs.Res = xjson.NewError(res) + } + tgs.CurPhase = targetStepPhaseEnd + tr.mu.Unlock() + tr.cond.Signal() + return err + // Check for cancellation. + // Notably we are not checking for the pause condition here: + // when paused, we want to let all the injected targets to finish + // and collect all the results they produce. If that doesn't happen, + // step runner will close resCh on its way out and unlock us. + case <-ctx.Done(): + ctx.Logger().Debugf("%s: canceled 2", tgs) + return xcontext.Canceled + } +} + +// targetHandler takes a single target through each step of the pipeline in sequence. +// It injects the target, waits for the result, then moves on to the next step. +func (tr *TestRunner) targetHandler(ctx xcontext.Context, tgs *targetState) { + ctx = ctx.WithField("target", tgs.tgt.ID) + ctx.Logger().Debugf("%s: target handler active", tgs) + // NB: CurStep may be non-zero on entry if resumed +loop: + for i := tgs.CurStep; i < len(tr.steps); { + // Early check for pause or cancelation. + select { + case <-ctx.WaitFor(xcontext.Paused): + ctx.Logger().Debugf("%s: paused 0", tgs) + break loop + case <-ctx.Done(): + ctx.Logger().Debugf("%s: canceled 0", tgs) + break loop + default: + } + tr.mu.Lock() + ss := tr.steps[i] + if tgs.CurPhase == targetStepPhaseEnd { + // This target already terminated. + // Can happen if resumed from terminal state. + tr.mu.Unlock() + break loop + } + tgs.CurPhase = targetStepPhaseBegin + tr.mu.Unlock() + // Make sure we have a step runner active. If not, start one. + tr.runStepIfNeeded(ss) + // Inject the target. + err := tr.injectTarget(ctx, tgs, ss) + // Await result. It will be communicated to us by the step runner + // and returned in tgs.res. + if err == nil { + err = tr.awaitTargetResult(ctx, tgs, ss) + } + if err != nil { + ss.ctx.Logger().Errorf("%s", err) + if err != xcontext.Canceled { + ss.setErr(&tr.mu, err) + } else { + ss.ctx.Logger().Debugf("%s: canceled 1", tgs) + } + break + } + tr.mu.Lock() + if tgs.Res != nil { + tr.mu.Unlock() + break + } + i++ + if i < len(tr.steps) { + tgs.CurStep = i + tgs.CurPhase = targetStepPhaseInit + } + tr.mu.Unlock() + } + ctx.Logger().Debugf("%s: target handler finished", tgs) + tr.mu.Lock() + tgs.resCh = nil + tr.cond.Signal() + tr.mu.Unlock() +} + +// runStepIfNeeded starts the step runner goroutine if not already running. +func (tr *TestRunner) runStepIfNeeded(ss *stepState) { + tr.mu.Lock() + defer tr.mu.Unlock() + if ss.stepRunning { + return + } + if ss.runErr != nil { + return + } + ss.stepRunning = true + ss.readerRunning = true + go tr.stepRunner(ss) + go tr.stepReader(ss) +} + +func (ss *stepState) setErr(mu sync.Locker, err error) { + mu.Lock() + defer mu.Unlock() + ss.setErrLocked(err) +} + +// setErrLocked sets step runner error unless already set. +func (ss *stepState) setErrLocked(err error) { + if err == nil || ss.runErr != nil { + return + } + ss.ctx.Logger().Errorf("err: %v", err) + ss.runErr = err +} + +// emitEvent emits the specified event with the specified JSON payload (if any). +func (ss *stepState) emitEvent(ctx xcontext.Context, name event.Name, tgt *target.Target, payload interface{}) error { + var payloadJSON *json.RawMessage + if payload != nil { + payloadBytes, jmErr := json.Marshal(payload) + if jmErr != nil { + return fmt.Errorf("failed to marshal event: %w", jmErr) + } + pj := json.RawMessage(payloadBytes) + payloadJSON = &pj + } + errEv := testevent.Data{ + EventName: name, + Target: tgt, + Payload: payloadJSON, + } + return ss.ev.Emit(ctx, errEv) +} + +// stepRunner runs a test pipeline's step (the Run() method). +func (tr *TestRunner) stepRunner(ss *stepState) { + ss.ctx.Logger().Debugf("%s: step runner active", ss) + defer func() { + if r := recover(); r != nil { + tr.mu.Lock() + ss.stepRunning = false + ss.setErrLocked(&cerrors.ErrTestStepPaniced{ + StepName: ss.sb.TestStepLabel, + StackTrace: fmt.Sprintf("%s / %s", r, debug.Stack()), + }) + tr.mu.Unlock() + tr.safeCloseOutCh(ss) + } + }() + chans := test.TestStepChannels{In: ss.inCh, Out: ss.outCh, Err: ss.errCh} + runErr := ss.sb.TestStep.Run(ss.ctx, chans, ss.sb.Parameters, ss.ev) + ss.ctx.Logger().Debugf("%s: step runner finished %v", ss, runErr) + tr.mu.Lock() + ss.stepRunning = false + ss.setErrLocked(runErr) + tr.mu.Unlock() + // Signal to the result processor that no more will be coming. + tr.safeCloseOutCh(ss) +} + +// reportTargetResult reports result of executing a step to the appropriate target handler. +func (tr *TestRunner) reportTargetResult(ss *stepState, tgt *target.Target, res error) error { + resCh, err := func() (chan error, error) { + tr.mu.Lock() + defer tr.mu.Unlock() + tgs := tr.targets[tgt.ID] + if tgs == nil { + return nil, &cerrors.ErrTestStepReturnedUnexpectedResult{ + StepName: ss.sb.TestStepLabel, + Target: tgt.ID, + } + } + if ss.tgtDone[tgt] { + return nil, &cerrors.ErrTestStepReturnedDuplicateResult{ + StepName: ss.sb.TestStepLabel, + Target: tgt.ID, + } + } + ss.tgtDone[tgt] = true + // Begin is also allowed here because it may happen that we get a result before target handler updates phase. + if tgs.CurStep != ss.stepIndex || + (tgs.CurPhase != targetStepPhaseBegin && tgs.CurPhase != targetStepPhaseRun) { + return nil, &cerrors.ErrTestStepReturnedUnexpectedResult{ + StepName: ss.sb.TestStepLabel, + Target: tgt.ID, + } + } + if tgs.resCh == nil { + select { + case <-ss.ctx.Done(): + // If canceled, target handler may have left early. We don't care though. + return nil, xcontext.Canceled + default: + // This should not happen, must be an internal error. + return nil, fmt.Errorf("%s: target handler %s is not there, dropping result on the floor", ss, tgs) + } + } + tgs.CurPhase = targetStepPhaseResultPending + ss.ctx.Logger().Debugf("%s: result for %s: %v", ss, tgs, res) + return tgs.resCh, nil + }() + if err != nil { + return err + } + select { + case resCh <- res: + break + case <-ss.ctx.Done(): + break + } + return nil +} + +func (tr *TestRunner) safeCloseOutCh(ss *stepState) { + defer func() { + if r := recover(); r != nil { + ss.setErr(&tr.mu, &cerrors.ErrTestStepClosedChannels{StepName: ss.sb.TestStepLabel}) + } + }() + close(ss.outCh) +} + +// safeCloseErrCh closes error channel safely, even if it has already been closed. +func (tr *TestRunner) safeCloseErrCh(ss *stepState) { + defer func() { + if r := recover(); r != nil { + ss.setErr(&tr.mu, &cerrors.ErrTestStepClosedChannels{StepName: ss.sb.TestStepLabel}) + } + }() + close(ss.errCh) +} + +// stepReader receives results from the step's output channel and forwards them to the appropriate target handlers. +func (tr *TestRunner) stepReader(ss *stepState) { + ss.ctx.Logger().Debugf("%s: step reader active", ss) + var err error + outCh := ss.outCh + cancelCh := ss.ctx.Done() + var shutdownTimeoutCh <-chan time.Time +loop: + for { + select { + case tgt, ok := <-outCh: + if !ok { + ss.ctx.Logger().Debugf("%s: out chan closed", ss) + // At this point we may still have an error to report, + // wait until error channel is emptied too. + outCh = nil + tr.safeCloseErrCh(ss) + continue loop + } + if err = tr.reportTargetResult(ss, tgt, nil); err != nil { + break loop + } + case res, ok := <-ss.errCh: + if !ok { + tr.mu.Lock() + if ss.stepRunning { + // Error channel is always closed after the output, + // which is only closed after runner goroutine has exited. + // If we got here, it means that the plugin closed the error channel. + ss.setErrLocked(&cerrors.ErrTestStepClosedChannels{StepName: ss.sb.TestStepLabel}) + } + tr.mu.Unlock() + ss.ctx.Logger().Debugf("%s: err chan closed", ss) + break loop + } + if err = tr.reportTargetResult(ss, res.Target, res.Err); err != nil { + break loop + } + case <-cancelCh: + ss.ctx.Logger().Debugf("%s: canceled 3, draining", ss) + // Allow some time to drain + cancelCh = nil + shutdownTimeoutCh = time.After(tr.shutdownTimeout) + case <-shutdownTimeoutCh: + ss.setErr(&tr.mu, &cerrors.ErrTestStepsNeverReturned{}) + } + } + tr.mu.Lock() + defer tr.mu.Unlock() + ss.setErrLocked(err) + if ss.stepRunning && ss.runErr == nil { + // This means that plugin closed its channels before leaving. + ss.setErrLocked(&cerrors.ErrTestStepClosedChannels{StepName: ss.sb.TestStepLabel}) + } + ss.readerRunning = false + ss.ctx.Logger().Debugf("%s: step reader finished, %t %t %v", ss, ss.stepRunning, ss.readerRunning, ss.runErr) + tr.cond.Signal() +} + +// checkStepRunnersLocked checks if any step runner has encountered an error. +func (tr *TestRunner) checkStepRunners() error { + for _, ss := range tr.steps { + if ss.runErr != nil { + return ss.runErr + } + } + return nil +} + +// runMonitor monitors progress of targets through the pipeline +// and closes input channels of the steps to indicate that no more are expected. +// It also monitors steps for critical errors and cancels the whole run. +// Note: input channels remain open when cancellation is requested, +// plugins are expected to handle it explicitly. +func (tr *TestRunner) runMonitor(ctx xcontext.Context) error { + ctx.Logger().Debugf("monitor: active") + tr.mu.Lock() + defer tr.mu.Unlock() + // First, compute the starting step of the pipeline (it may be non-zero + // if the pipeline was resumed). + minStep := len(tr.steps) + for _, tgs := range tr.targets { + if tgs.CurStep < minStep { + minStep = tgs.CurStep + } + } + if minStep < len(tr.steps) { + ctx.Logger().Debugf("monitor: starting at step %s", tr.steps[minStep]) + } + + // Run the main loop. + pass := 1 + var runErr error +stepLoop: + for step := minStep; step < len(tr.steps); pass++ { + ss := tr.steps[step] + ctx.Logger().Debugf("monitor pass %d: current step %s", pass, ss) + // Check if all the targets have either made it past the injection phase or terminated. + ok := true + for _, tgs := range tr.targets { + ctx.Logger().Debugf("monitor pass %d: %s: %s", pass, ss, tgs) + if tgs.resCh == nil { // Not running anymore + continue + } + if tgs.CurStep < step || tgs.CurPhase < targetStepPhaseRun { + ctx.Logger().Debugf("monitor pass %d: %s: not all targets injected yet (%s)", pass, ss, tgs) + ok = false + break + } + } + if runErr = tr.checkStepRunners(); runErr != nil { + break stepLoop + } + if !ok { + // Wait for notification: as progress is being made, we get notified. + tr.cond.Wait() + continue + } + // All targets ok, close the step's input channel. + ctx.Logger().Debugf("monitor pass %d: %s: no more targets, closing input channel", pass, ss) + close(ss.inCh) + step++ + } + // Wait for all the targets to finish. + ctx.Logger().Debugf("monitor: waiting for targets to finish") +tgtLoop: + for ; runErr == nil; pass++ { + ok := true + for _, tgs := range tr.targets { + ctx.Logger().Debugf("monitor pass %d: %s", pass, tgs) + if runErr = tr.checkStepRunners(); runErr != nil { + break tgtLoop + } + if tgs.resCh != nil && tgs.CurStep < len(tr.steps) { + ss := tr.steps[tgs.CurStep] + if tgs.CurPhase == targetStepPhaseRun && !ss.readerRunning { + // Target has been injected but step runner has exited, this target has been lost. + runErr = &cerrors.ErrTestStepLostTargets{ + StepName: ss.sb.TestStepLabel, + Targets: []string{tgs.tgt.ID}, + } + break tgtLoop + } + ok = false + break + } + } + if ok { + break + } + // Wait for notification: as progress is being made, we get notified. + tr.cond.Wait() + } + ctx.Logger().Debugf("monitor: finished, %v", runErr) + return runErr +} + +func NewTestRunnerWithTimeouts(stepInjectTimeout, shutdownTimeout time.Duration) *TestRunner { + tr := &TestRunner{ + stepInjectTimeout: stepInjectTimeout, + shutdownTimeout: shutdownTimeout, + } + tr.cond = sync.NewCond(&tr.mu) + return tr +} + +func NewTestRunner() *TestRunner { + return NewTestRunnerWithTimeouts(config.StepInjectTimeout, config.TestRunnerShutdownTimeout) +} + +func (tph targetStepPhase) String() string { + switch tph { + case targetStepPhaseInvalid: + return "INVALID" + case targetStepPhaseInit: + return "init" + case targetStepPhaseBegin: + return "begin" + case targetStepPhaseRun: + return "run" + case targetStepPhaseResultPending: + return "result_pending" + case targetStepPhaseEnd: + return "end" + } + return fmt.Sprintf("???(%d)", tph) +} + +func (ss *stepState) String() string { + return fmt.Sprintf("[#%d %s]", ss.stepIndex, ss.sb.TestStepLabel) +} + +func (tgs *targetState) String() string { + var resText string + if tgs.Res != nil { + resStr := fmt.Sprintf("%v", tgs.Res) + if len(resStr) > 20 { + resStr = resStr[:20] + "..." + } + resText = fmt.Sprintf("%q", resStr) + } else { + resText = "" + } + finished := tgs.resCh == nil + return fmt.Sprintf("[%s %d %s %t %s]", + tgs.tgt, tgs.CurStep, tgs.CurPhase, finished, resText) +} diff --git a/pkg/runner/test_runner_pipeline.go b/pkg/runner/test_runner_pipeline.go deleted file mode 100644 index 241dd932..00000000 --- a/pkg/runner/test_runner_pipeline.go +++ /dev/null @@ -1,540 +0,0 @@ -// Copyright (c) Facebook, Inc. and its affiliates. -// -// This source code is licensed under the MIT license found in the -// LICENSE file in the root directory of this source tree. - -package runner - -import ( - "encoding/json" - "fmt" - "runtime/debug" - "sync/atomic" - "time" - - "github.com/facebookincubator/contest/pkg/cerrors" - "github.com/facebookincubator/contest/pkg/event/testevent" - "github.com/facebookincubator/contest/pkg/storage" - "github.com/facebookincubator/contest/pkg/target" - "github.com/facebookincubator/contest/pkg/test" - "github.com/facebookincubator/contest/pkg/types" - "github.com/facebookincubator/contest/pkg/xcontext" -) - -// pipeline represents a sequence of steps through which targets flow. A pipeline could implement -// either a test sequence or a cleanup sequence -type pipeline struct { - bundles []test.TestStepBundle - - jobID types.JobID - runID types.RunID - - state *State - test *test.Test - - timeouts TestRunnerTimeouts - - // ctrlChannels represents a set of result and completion channels for this pipeline, - // used to collect the results of routing blocks, steps and targets completing - // the pipeline. It's available only after having initialized the pipeline - ctrlChannels *pipelineCtrlCh - - // numIngress represents the total number of targets that have been seen entering - // the pipeline. This number is set by the first routing block as soon as the injection - // terminates - numIngress uint64 -} - -// runStep runs synchronously a TestStep and peforms sanity checks on the status -// of the input/output channels on the defer control path. When the TestStep returns, -// the associated output channels are closed. This signals to the routing subsytem -// that this step of the pipeline will not be producing any more targets. The API -// dictates that the TestStep can only return if its input channel has been closed, -// which indicates that no more targets will be submitted to that step. If the -// TestStep does not comply with this rule, an error is sent downstream to the pipeline -// runner, which will in turn shutdown the whole pipeline and flag the TestStep -// as misbehaving. All attempts to send downstream a result after a TestStep complete -// are protected by a timeout and fail open. This because it's not guaranteed that -// the TestRunner will still be listening on the result channels. If the TestStep hangs -// indefinitely and does not respond to cancellation signals, the TestRunner will -// flag it as misbehaving and return. If the TestStep returns once the TestRunner -// has completed, it will timeout trying to write on the result channel. -func (p *pipeline) runStep(ctx xcontext.Context, jobID types.JobID, runID types.RunID, bundle test.TestStepBundle, stepCh stepCh, resultCh chan<- stepResult, ev testevent.EmitterFetcher) { - stepLabel := bundle.TestStepLabel - ctx = ctx.WithField("step", stepLabel).WithTag("phase", "runStep") - - ctx.Logger().Debugf("initializing step") - timeout := p.timeouts.MessageTimeout - defer func() { - if r := recover(); r != nil { - err := fmt.Errorf("step %s panicked (%v): %s", stepLabel, r, debug.Stack()) - select { - case resultCh <- stepResult{jobID: jobID, runID: runID, bundle: bundle, err: err}: - case <-time.After(p.timeouts.MessageTimeout): - ctx.Logger().Warnf("sending error back from test step runner timed out after %v: %v", timeout, err) - return - } - return - } - }() - - // We should not run a test step if there're no targets in stepCh.stepIn - // First we check if there's at least one incoming target, and only - // then we call `bundle.TestStep.Run()`. - // - // ITS: https://github.com/facebookincubator/contest/issues/101 - stepIn, onFirstTargetChan, onNoTargetsChan := waitForFirstTarget(ctx.WaitFor(), stepCh.stepIn) - - haveTargets := false - select { - case <-onFirstTargetChan: - ctx.Logger().Debugf("first target received, will start step") - haveTargets = true - case <-ctx.Done(): - ctx.Logger().Debugf("cancelled") - case <-ctx.WaitFor(xcontext.Paused): - ctx.Logger().Debugf("paused") - case <-onNoTargetsChan: - ctx.Logger().Debugf("no targets") - } - - var err error - if haveTargets { - // Run the TestStep and defer to the return path the handling of panic - // conditions. If multiple error conditions occur, send downstream only - // the first error encountered. - channels := test.TestStepChannels{ - In: stepIn, - Out: stepCh.stepOut, - Err: stepCh.stepErr, - } - err = bundle.TestStep.Run(ctx, channels, bundle.Parameters, ev) - } - - ctx.Logger().Debugf("step %s returned", bundle.TestStepLabel) - - // Check if we are shutting down. If so, do not perform sanity checks on the - // channels but return immediately as the TestStep itself probably returned - // because it honored the termination signal. - - cancellationAsserted := ctx.Err() != nil - pauseAsserted := ctx.IsSignaledWith(xcontext.Paused) - - if cancellationAsserted && err == nil { - err = fmt.Errorf("test step cancelled") - } - - if cancellationAsserted || pauseAsserted { - select { - case resultCh <- stepResult{jobID: jobID, runID: runID, bundle: bundle, err: err}: - case <-time.After(timeout): - ctx.Logger().Warnf("sending error back from TestStep runner timed out after %v: %v", timeout, err) - } - return - } - - // If the TestStep has crashed with an error, return immediately the result to the TestRunner - // which in turn will issue a cancellation signal to the pipeline - if err != nil { - select { - case resultCh <- stepResult{jobID: jobID, runID: runID, bundle: bundle, err: err}: - case <-time.After(timeout): - ctx.Logger().Warnf("sending error back from test step runner (%s) timed out after %v: %v", stepLabel, timeout, err) - } - return - } - - // Perform sanity checks on the status of the channels. The TestStep API - // mandates that output and error channels shall not be closed by the - // TestStep itself. If the TestStep does not comply with the API, it is - // flagged as misbehaving. - select { - case _, ok := <-stepCh.stepIn: - if !ok { - break - } - // stepCh.stepIn is not closed, but the TestStep returned, which is a violation - // of the API. Record the error if no other error condition has been seen. - err = fmt.Errorf("step %s returned, but input channel is not closed (api violation; case 0)", stepLabel) - - default: - // stepCh.stepIn is not closed, and a read operation would block. The TestStep - // does not comply with the API (see above). - err = fmt.Errorf("step %s returned, but input channel is not closed (api violation; case 1)", stepLabel) - } - - select { - case _, ok := <-stepCh.stepOut: - if !ok { - // stepOutCh has been closed. This is a violation of the API. Record the error - // if no other error condition has been seen. - if err == nil { - err = &cerrors.ErrTestStepClosedChannels{StepName: stepLabel} - } - } - default: - // stepCh.stepOut is open. Flag it for closure to signal to the routing subsystem that - // no more Targets will go through from this channel. - close(stepCh.stepOut) - } - - select { - case _, ok := <-stepCh.stepErr: - if !ok { - // stepErrCh has been closed. This is a violation of the API. Record the error - // if no other error condition has been seen. - if err == nil { - err = &cerrors.ErrTestStepClosedChannels{StepName: stepLabel} - } - } - default: - // stepCh.stepErr is open. Flag it for closure to signal to the routing subsystem that - // no more Targets will go through from this channel. - close(stepCh.stepErr) - } - - select { - case resultCh <- stepResult{jobID: jobID, runID: runID, bundle: bundle, err: err}: - case <-time.After(timeout): - ctx.Logger().Warnf("sending error back from step runner (%s) timed out after %v: %v", stepLabel, timeout, err) - return - } -} - -// waitTargets reads results coming from results channels until all Targets -// have completed or an error occurs. If all Targets complete successfully, it checks -// whether TestSteps and routing blocks have completed as well. If not, returns an -// error. Termination is signalled via terminate channel. -func (p *pipeline) waitTargets(ctx xcontext.Context, completedCh chan<- *target.Target) error { - ctx = ctx.WithTag("phase", "waitTargets") - - var ( - err error - completedTarget *target.Target - completedTargetError error - ) - - outChannel := p.ctrlChannels.targetOut - - writer := newTargetWriter(p.timeouts) - for { - select { - case t, ok := <-outChannel: - if !ok { - ctx.Logger().Debugf("pipeline output channel was closed, no more targets will come through") - outChannel = nil - break - } - completedTarget = t - case <-ctx.WaitFor(): - // When termination is signaled just stop wait. It is up - // to the caller to decide how to further handle pipeline termination. - ctx.Logger().Debugf("termination requested") - return nil - case res := <-p.ctrlChannels.routingResultCh: - err = res.err - p.state.SetRouting(res.bundle.TestStepLabel, res.err) - case res := <-p.ctrlChannels.stepResultCh: - err = res.err - if err != nil { - payload, jmErr := json.Marshal(err.Error()) - if jmErr != nil { - ctx.Logger().Warnf("failed to marshal error string to JSON: %v", jmErr) - continue - } - rm := json.RawMessage(payload) - header := testevent.Header{ - JobID: res.jobID, - RunID: res.runID, - TestName: p.test.Name, - TestStepLabel: res.bundle.TestStepLabel, - } - ev := storage.NewTestEventEmitterFetcher(header) - // this event is not associated to any target, e.g. a plugin has returned an error. - errEv := testevent.Data{EventName: EventTestError, Target: nil, Payload: &rm} - // emit test event containing the completion error - if err := ev.Emit(ctx, errEv); err != nil { - ctx.Logger().Warnf("could not emit completion error event %v", errEv) - } - } - p.state.SetStep(res.bundle.TestStepLabel, res.err) - - case targetErr := <-p.ctrlChannels.targetErr: - completedTarget = targetErr.Target - completedTargetError = targetErr.Err - } - - if err != nil { - return err - } - - if outChannel == nil { - ctx.Logger().Debugf("no more targets to wait, output channel is closed") - break - } - - if completedTarget != nil { - p.state.SetTarget(completedTarget, completedTargetError) - ctx.Logger().Debugf("writing target %+v on the completed channel", completedTarget) - if err := writer.writeTimeout(ctx, completedCh, completedTarget, p.timeouts.MessageTimeout); err != nil { - ctx.Logger().Panicf("could not write completed target: %v", err) - } - completedTarget = nil - completedTargetError = nil - } - - numIngress := atomic.LoadUint64(&p.numIngress) - numCompleted := uint64(len(p.state.CompletedTargets())) - ctx.Logger().Debugf("targets completed: %d, expected: %d", numCompleted, numIngress) - if numIngress != 0 && numCompleted == numIngress { - ctx.Logger().Debugf("no more targets to wait, all targets (%d) completed", numCompleted) - break - } - } - // The test run completed, we have collected all Targets. TestSteps might have already - // closed `ch.out`, in which case the pipeline terminated correctly (channels are closed - // in a "domino" sequence, so seeing the last channel closed indicates that the - // sequence of close operations has completed). If `ch.out` is still open, - // there are still TestSteps that might have not returned. Wait for all - // TestSteps to complete or `StepShutdownTimeout` to occur. - ctx.Logger().Infof("waiting for all steps to complete") - return p.waitSteps(ctx) -} - -// waitTermination reads results coming from result channels waiting -// for the pipeline to completely shutdown before `ShutdownTimeout` occurs. A -// "complete shutdown" means that all TestSteps and routing blocks have sent -// downstream their results. -func (p *pipeline) waitTermination(ctx xcontext.Context) error { - - if len(p.bundles) == 0 { - return fmt.Errorf("no bundles specified for waitTermination") - } - - ctx = ctx.WithTag("phase", "waitTermination") - ctx.Logger().Debugf("waiting for pipeline to terminate") - - for { - - ctx.Logger().Debugf("steps completed: %d", len(p.state.CompletedSteps())) - ctx.Logger().Debugf("routing completed: %d", len(p.state.CompletedRouting())) - stepsCompleted := len(p.state.CompletedSteps()) == len(p.bundles) - routingCompleted := len(p.state.CompletedRouting()) == len(p.bundles) - if stepsCompleted && routingCompleted { - return nil - } - - select { - case <-time.After(p.timeouts.ShutdownTimeout): - incompleteSteps := p.state.IncompleteSteps(p.bundles) - if len(incompleteSteps) > 0 { - return &cerrors.ErrTestStepsNeverReturned{StepNames: incompleteSteps} - } - return fmt.Errorf("pipeline did not return but all test steps completed") - case res := <-p.ctrlChannels.routingResultCh: - p.state.SetRouting(res.bundle.TestStepLabel, res.err) - case res := <-p.ctrlChannels.stepResultCh: - p.state.SetStep(res.bundle.TestStepLabel, res.err) - } - } -} - -// waitSteps reads results coming from result channels until `StepShutdownTimeout` -// occurs or an error is encountered. It then checks whether TestSteps and routing -// blocks have all returned correctly. If not, it returns an error. -func (p *pipeline) waitSteps(ctx xcontext.Context) error { - - if len(p.bundles) == 0 { - return fmt.Errorf("no bundles specified for waitSteps") - } - - ctx = ctx.WithTag("phase", "waitSteps") - - var err error - - ctx.Logger().Debugf("waiting for test steps to terminate") - for { - select { - case <-time.After(p.timeouts.StepShutdownTimeout): - ctx.Logger().Warnf("timed out waiting for steps to complete after %v", p.timeouts.StepShutdownTimeout) - incompleteSteps := p.state.IncompleteSteps(p.bundles) - if len(incompleteSteps) > 0 { - err = &cerrors.ErrTestStepsNeverReturned{StepNames: incompleteSteps} - break - } - if len(p.state.CompletedRouting()) != len(p.bundles) { - err = fmt.Errorf("not all routing completed: %d!=%d", len(p.state.CompletedRouting()), len(p.bundles)) - } - case res := <-p.ctrlChannels.routingResultCh: - ctx.Logger().Debugf("received routing block result for %s", res.bundle.TestStepLabel) - p.state.SetRouting(res.bundle.TestStepLabel, res.err) - err = res.err - case res := <-p.ctrlChannels.stepResultCh: - ctx.Logger().Debugf("received step result for %s", res.bundle.TestStepLabel) - p.state.SetStep(res.bundle.TestStepLabel, res.err) - err = res.err - } - if err != nil { - return err - } - ctx.Logger().Debugf("steps completed: %d, expected: %d", len(p.state.CompletedSteps()), len(p.bundles)) - ctx.Logger().Debugf("routing completed: %d, expected: %d", len(p.state.CompletedRouting()), len(p.bundles)) - stepsCompleted := len(p.state.CompletedSteps()) == len(p.bundles) - routingCompleted := len(p.state.CompletedRouting()) == len(p.bundles) - if stepsCompleted && routingCompleted { - break - } - } - - return nil -} - -// init initializes the pipeline by connecting steps and routing blocks. The result of pipeline -// initialization is a set of control/result channels assigned to the pipeline object. The pipeline -// input channel is returned. -func (p *pipeline) init(ctx xcontext.Context) (routeInFirst chan *target.Target) { - ctx.Logger().Debugf("starting") - - if p.ctrlChannels != nil { - ctx.Logger().Panicf("pipeline is already initialized, control channel are already configured") - } - - var ( - routeOut chan *target.Target - routeIn chan *target.Target - ) - - pipelineCtx, pause := xcontext.WithNotify(ctx, xcontext.Paused) - pipelineCtx, cancel := xcontext.WithCancel(pipelineCtx) - - // result channels used to communicate result information from the routing blocks - // and step executors - routingResultCh := make(chan routeResult) - stepResultCh := make(chan stepResult) - targetErrCh := make(chan cerrors.TargetError) - - routeIn = make(chan *target.Target) - for position, testStepBundle := range p.bundles { - - // Input and output channels for the TestStep - stepInCh := make(chan *target.Target) - stepOutCh := make(chan *target.Target) - stepErrCh := make(chan cerrors.TargetError) - - routeOut = make(chan *target.Target) - - // First step of the pipeline, keep track of the routeIn channel as this is - // going to be used to injects targets into the pipeline from outside. Also - // add an intermediate goroutine which keeps track of how many targets have - // been injected into the pipeline - if position == 0 { - routeInFirst = make(chan *target.Target) - routeInStep := routeIn - go func() { - defer close(routeInStep) - numIngress := uint64(0) - for t := range routeInFirst { - routeInStep <- t - numIngress++ - } - atomic.StoreUint64(&p.numIngress, numIngress) - }() - } - - stepChannels := stepCh{stepIn: stepInCh, stepErr: stepErrCh, stepOut: stepOutCh} - routingChannels := routingCh{ - routeIn: routeIn, - routeOut: routeOut, - stepIn: stepInCh, - stepErr: stepErrCh, - stepOut: stepOutCh, - targetErr: targetErrCh, - } - - // Build the Header that the the TestStep will be using for emitting events - Header := testevent.Header{ - JobID: p.jobID, - RunID: p.runID, - TestName: p.test.Name, - TestStepLabel: testStepBundle.TestStepLabel, - } - ev := storage.NewTestEventEmitterFetcherWithAllowedEvents(Header, &testStepBundle.AllowedEvents) - - stepCtx := pipelineCtx.WithFields(xcontext.Fields{ - "run_id": p.runID, - "step": testStepBundle.TestStepLabel, - }) - router := newStepRouter(testStepBundle, routingChannels, ev, p.timeouts) - go router.route(stepCtx, routingResultCh) - go p.runStep(stepCtx, p.jobID, p.runID, testStepBundle, stepChannels, stepResultCh, ev) - // The input of the next routing block is the output of the current routing block - routeIn = routeOut - } - - p.ctrlChannels = &pipelineCtrlCh{ - routingResultCh: routingResultCh, - stepResultCh: stepResultCh, - targetErr: targetErrCh, - targetOut: routeOut, - - ctx: pipelineCtx, - cancel: cancel, - pause: pause, - } - - return -} - -// run is a blocking method which executes the pipeline until successful or failed termination -func (p *pipeline) run(pipelineCtx xcontext.Context, completedTargetsCh chan<- *target.Target) error { - log := pipelineCtx.Logger() - log.Debugf("run") - if p.ctrlChannels == nil { - log.Panicf("pipeline is not initialized, control channels are not available") - } - channelsCtx := p.ctrlChannels.ctx - - // Wait for the pipeline to complete. If an error occurs, cancel all TestSteps - // and routing blocks and wait again for completion until shutdown timeout occurs. - log.Infof("waiting for pipeline to complete") - completionError := p.waitTargets(channelsCtx, completedTargetsCh) - - pauseAsserted := channelsCtx.IsSignaledWith(xcontext.Paused) - cancellationAsserted := channelsCtx.Err() != nil - - if completionError != nil { - log.Warnf("test failed to complete: %v. Forcing cancellation.", completionError) - p.ctrlChannels.cancel() // terminate routing and and propagate cancellation to the steps - } else if cancellationAsserted { - log.Infof("cancellation was asserted") - } else if pauseAsserted { - log.Warnf("received pause request") - } - - // If either cancellation or pause have been asserted, we need to wait for the - // pipeline to terminate - if cancellationAsserted || pauseAsserted || completionError != nil { - signal := "cancellation" - if pauseAsserted { - signal = "pause" - } - terminationError := p.waitTermination(channelsCtx) - if terminationError != nil { - log.Infof("test did not terminate correctly after %s signal: %v", signal, terminationError) - } else { - log.Infof("test terminated correctly after %s signal", signal) - } - if completionError != nil { - return completionError - } - return terminationError - } - log.Infof("completed") - return nil -} - -func newPipeline(bundles []test.TestStepBundle, test *test.Test, jobID types.JobID, runID types.RunID, timeouts TestRunnerTimeouts) *pipeline { - p := pipeline{bundles: bundles, jobID: jobID, runID: runID, test: test, timeouts: timeouts} - p.state = NewState() - return &p -} diff --git a/pkg/runner/test_runner_route.go b/pkg/runner/test_runner_route.go deleted file mode 100644 index 6f4492a6..00000000 --- a/pkg/runner/test_runner_route.go +++ /dev/null @@ -1,292 +0,0 @@ -// Copyright (c) Facebook, Inc. and its affiliates. -// -// This source code is licensed under the MIT license found in the -// LICENSE file in the root directory of this source tree. - -package runner - -import ( - "container/list" - "encoding/json" - "fmt" - "log" - "sync" - "time" - - "github.com/facebookincubator/contest/pkg/event/testevent" - "github.com/facebookincubator/contest/pkg/target" - "github.com/facebookincubator/contest/pkg/test" - "github.com/facebookincubator/contest/pkg/xcontext" -) - -// router implements the routing logic that injects targets into a test step and consumes -// targets in output from another test step -type stepRouter struct { - routingChannels routingCh - bundle test.TestStepBundle - ev testevent.EmitterFetcher - - timeouts TestRunnerTimeouts -} - -// routeIn is responsible for accepting a target from the previous routing block -// and injecting it into the associated test step. Returns the number of targets -// injected into the test step or an error upon failure -func (r *stepRouter) routeIn(ctx xcontext.Context) (int, error) { - stepLabel := r.bundle.TestStepLabel - ctx = ctx.WithTag("phase", "routeIn").WithField("step", stepLabel) - - var ( - err error - injectionWg sync.WaitGroup - routeInProgress bool - ) - - // terminateTargetWriter is a control channel used to signal termination to - // the writer object which injects a target into the test step - terminateTargetWriterCtx, terminateTargetWriter := xcontext.WithCancel(xcontext.ResetSignalers(ctx)) - defer terminateTargetWriter() // avoids possible goroutine deadlock in context.WithCancel implementation - - // `targets` is used to buffer targets coming from the previous routing blocks, - // queueing them for injection into the TestStep. The list is accessed - // synchronously by a single goroutine. - targets := list.New() - - // `ingressTarget` is used to keep track of ingress times of a target into a test step - ingressTarget := make(map[string]time.Time) - - // Channel that the injection goroutine uses to communicate back to `routeIn` the results - // of asynchronous injection - injectResultCh := make(chan injectionResult) - - // injectionChannels are used to inject targets into test step and return results to `routeIn` - injectionChannels := injectionCh{stepIn: r.routingChannels.stepIn, resultCh: injectResultCh} - - ctx.Logger().Debugf("initializing routeIn for %s", stepLabel) - targetWriter := newTargetWriter(r.timeouts) - - for { - select { - case <-ctx.WaitFor(): - err = fmt.Errorf("termination requested for routing into %s", stepLabel) - case injectionResult := <-injectResultCh: - ctx.Logger().Debugf("received injection result for %v", injectionResult.target) - routeInProgress = false - if injectionResult.err != nil { - err = fmt.Errorf("routing failed while injecting target %+v into %s", injectionResult.target, stepLabel) - targetInErrEv := testevent.Data{EventName: target.EventTargetInErr, Target: injectionResult.target} - if err := r.ev.Emit(ctx, targetInErrEv); err != nil { - ctx.Logger().Warnf("could not emit %v event for target %+v: %v", targetInErrEv, *injectionResult.target, err) - } - } else { - targetInEv := testevent.Data{EventName: target.EventTargetIn, Target: injectionResult.target} - if err := r.ev.Emit(ctx, targetInEv); err != nil { - ctx.Logger().Warnf("could not emit %v event for Target: %+v", targetInEv, *injectionResult.target) - } - } - case t, chanIsOpen := <-r.routingChannels.routeIn: - if !chanIsOpen { - ctx.Logger().Debugf("routing input channel closed") - r.routingChannels.routeIn = nil - } else { - ctx.Logger().Debugf("received target %v in input", t) - targets.PushFront(t) - } - } - - if err != nil { - break - } - - if routeInProgress { - continue - } - - // no targets currently being injected in the test step - if targets.Len() == 0 { - if r.routingChannels.routeIn == nil { - ctx.Logger().Debugf("input channel is closed and no more targets are available, closing step input channel") - close(r.routingChannels.stepIn) - break - } - continue - } - - t := targets.Back().Value.(*target.Target) - ingressTarget[t.ID] = time.Now() - targets.Remove(targets.Back()) - ctx.Logger().Debugf("writing target %v into test step", t) - routeInProgress = true - injectionWg.Add(1) - go func() { - defer injectionWg.Done() - targetWriter.writeTargetWithResult(terminateTargetWriterCtx, t, injectionChannels) - }() - } - // Signal termination to the injection routines regardless of the result of the - // routing. If the routing completed successfully, this is a no-op. If there is an - // injection goroutine running, wait for it to terminate, as we might have gotten - // here after a cancellation signal. - terminateTargetWriter() - injectionWg.Wait() - - if err != nil { - ctx.Logger().Debugf("routeIn failed: %v", err) - return 0, err - } - return len(ingressTarget), nil -} - -func (r *stepRouter) emitOutEvent(ctx xcontext.Context, t *target.Target, err error) error { - ctx = ctx.WithTag("phase", "emitOutEvent").WithField("step", r.bundle.TestStepLabel) - - if err != nil { - targetErrPayload := target.ErrPayload{Error: err.Error()} - payloadEncoded, err := json.Marshal(targetErrPayload) - if err != nil { - ctx.Logger().Warnf("could not encode target error ('%s'): %v", targetErrPayload, err) - } - rawPayload := json.RawMessage(payloadEncoded) - targetErrEv := testevent.Data{EventName: target.EventTargetErr, Target: t, Payload: &rawPayload} - if err := r.ev.Emit(ctx, targetErrEv); err != nil { - return err - } - } else { - targetOutEv := testevent.Data{EventName: target.EventTargetOut, Target: t} - if err := r.ev.Emit(ctx, targetOutEv); err != nil { - ctx.Logger().Warnf("could not emit %v event for target: %v", targetOutEv, *t) - } - } - return nil -} - -// routeOut is responsible for accepting a target from the associated test step -// and forward it to the next routing block. Returns the number of targets -// received from the test step or an error upon failure -func (r *stepRouter) routeOut(ctx xcontext.Context) (int, error) { - - stepLabel := r.bundle.TestStepLabel - ctx = ctx.WithTag("phase", "routeOut").WithField("step", stepLabel) - - targetWriter := newTargetWriter(r.timeouts) - - var err error - - ctx.Logger().Debugf("initializing routeOut for %s", stepLabel) - // `egressTarget` is used to keep track of egress times of a target from a test step - egressTarget := make(map[string]time.Time) - - for { - select { - case <-ctx.WaitFor(): - err = fmt.Errorf("termination requested for routing into %s", r.bundle.TestStepLabel) - case t, chanIsOpen := <-r.routingChannels.stepOut: - if !chanIsOpen { - ctx.Logger().Debugf("step output closed") - r.routingChannels.stepOut = nil - break - } - - if _, targetPresent := egressTarget[t.ID]; targetPresent { - err = fmt.Errorf("step %s returned target %+v multiple times", r.bundle.TestStepLabel, t) - break - } - // Emit an event signaling that the target has left the TestStep - if err := r.emitOutEvent(ctx, t, nil); err != nil { - ctx.Logger().Warnf("could not emit out event for target %v: %v", *t, err) - } - // Register egress time and forward target to the next routing block - egressTarget[t.ID] = time.Now() - if err := targetWriter.writeTimeout(ctx, r.routingChannels.routeOut, t, r.timeouts.MessageTimeout); err != nil { - ctx.Logger().Panicf("could not forward target to the test runner: %+v", err) - } - case targetError, chanIsOpen := <-r.routingChannels.stepErr: - if !chanIsOpen { - ctx.Logger().Debugf("step error closed") - r.routingChannels.stepErr = nil - break - } - - if _, targetPresent := egressTarget[targetError.Target.ID]; targetPresent { - err = fmt.Errorf("step %s returned target %+v multiple times", r.bundle.TestStepLabel, targetError.Target) - } else { - if err := r.emitOutEvent(ctx, targetError.Target, targetError.Err); err != nil { - ctx.Logger().Warnf("could not emit err event for target: %v", *targetError.Target) - } - egressTarget[targetError.Target.ID] = time.Now() - if err := targetWriter.writeTargetError(ctx, r.routingChannels.targetErr, targetError, r.timeouts.MessageTimeout); err != nil { - log.Panicf("could not forward target (%+v) to the test runner: %v", targetError.Target, err) - } - } - } - if err != nil { - break - } - if r.routingChannels.stepErr == nil && r.routingChannels.stepOut == nil { - ctx.Logger().Debugf("output and error channel from step are closed, routeOut should terminate") - close(r.routingChannels.routeOut) - break - } - } - - if err != nil { - ctx.Logger().Debugf("routeOut failed: %v", err) - return 0, err - } - return len(egressTarget), nil - -} - -// route implements the routing logic from the previous routing block to the test step -// and from the test step to the next routing block -func (r *stepRouter) route(ctx xcontext.Context, resultCh chan<- routeResult) { - - var ( - inTargets, outTargets int - errRouteIn, errRouteOut error - ) - - terminateInternalCtx, terminateInternal := xcontext.WithCancel(ctx) - defer terminateInternal() // avoids possible goroutine deadlock in context.WithCancel implementation - - wg := sync.WaitGroup{} - wg.Add(2) - go func() { - defer wg.Done() - inTargets, errRouteIn = r.routeIn(terminateInternalCtx) - if errRouteIn != nil { - terminateInternal() - } - }() - - go func() { - defer wg.Done() - outTargets, errRouteOut = r.routeOut(terminateInternalCtx) - if errRouteOut != nil { - terminateInternal() - } - }() - wg.Wait() - - routingErr := errRouteIn - if routingErr == nil { - routingErr = errRouteOut - } - if routingErr == nil && inTargets != outTargets { - routingErr = fmt.Errorf("step %s completed but did not return all injected Targets (%d!=%d)", r.bundle.TestStepLabel, inTargets, outTargets) - } - - // Send the result to the test runner, which is expected to be listening - // within `MessageTimeout`. If that's not the case, we hit an unrecovrable - // condition. - select { - case resultCh <- routeResult{bundle: r.bundle, err: routingErr}: - case <-time.After(r.timeouts.MessageTimeout): - log.Panicf("could not send routing block result") - } -} - -func newStepRouter(bundle test.TestStepBundle, routingChannels routingCh, ev testevent.EmitterFetcher, timeouts TestRunnerTimeouts) *stepRouter { - r := stepRouter{bundle: bundle, routingChannels: routingChannels, ev: ev, timeouts: timeouts} - return &r -} diff --git a/pkg/runner/test_runner_route_test.go b/pkg/runner/test_runner_route_test.go deleted file mode 100644 index 47bebe3d..00000000 --- a/pkg/runner/test_runner_route_test.go +++ /dev/null @@ -1,400 +0,0 @@ -// Copyright (c) Facebook, Inc. and its affiliates. -// -// This source code is licensed under the MIT license found in the -// LICENSE file in the root directory of this source tree. - -package runner - -import ( - "fmt" - "testing" - "time" - - "github.com/facebookincubator/contest/pkg/cerrors" - "github.com/facebookincubator/contest/pkg/event" - "github.com/facebookincubator/contest/pkg/event/testevent" - "github.com/facebookincubator/contest/pkg/pluginregistry" - "github.com/facebookincubator/contest/pkg/storage" - "github.com/facebookincubator/contest/pkg/target" - "github.com/facebookincubator/contest/pkg/test" - "github.com/facebookincubator/contest/pkg/xcontext/bundles/logrusctx" - "github.com/facebookincubator/contest/pkg/xcontext/logger" - "github.com/facebookincubator/contest/plugins/storage/memory" - "github.com/facebookincubator/contest/plugins/teststeps/example" - - "github.com/stretchr/testify/require" - "github.com/stretchr/testify/suite" -) - -var ctx = logrusctx.NewContext(logger.LevelDebug) - -var testSteps = map[string]test.TestStepFactory{ - example.Name: example.New, -} - -var testStepsEvents = map[string][]event.Name{ - example.Name: example.Events, -} - -type TestRunnerSuite struct { - suite.Suite - routingChannels routingCh - router *stepRouter - - // the following channels are the same channels wrapped within `routingChannels`, - // but used in the opposite direction compared to the routing block. When the - // routing block reads results, the corresponding channel of the test suite has - // write direction, when the routing block writes results, the corresponding channel - // of the test suite has read direction - routeInCh chan<- *target.Target - routeOutCh <-chan *target.Target - - stepInCh <-chan *target.Target - stepOutCh chan<- *target.Target - stepErrCh chan<- cerrors.TargetError - - targetErrCh <-chan cerrors.TargetError -} - -func (suite *TestRunnerSuite) SetupTest() { - - pluginRegistry := pluginregistry.NewPluginRegistry(ctx) - // Setup the PluginRegistry by registering TestSteps - for name, tsfactory := range testSteps { - if _, ok := testStepsEvents[name]; !ok { - suite.T().Errorf("test step %s does not define any associated event", name) - } - err := pluginRegistry.RegisterTestStep(name, tsfactory, testStepsEvents[name]) - require.NoError(suite.T(), err) - } - ts1, err := pluginRegistry.NewTestStep("Example") - require.NoError(suite.T(), err) - - params := make(test.TestStepParameters) - bundle := test.TestStepBundle{TestStep: ts1, TestStepLabel: "FirstStage", Parameters: params} - - routeInCh := make(chan *target.Target) - routeOutCh := make(chan *target.Target) - stepInCh := make(chan *target.Target) - stepOutCh := make(chan *target.Target) - stepErrCh := make(chan cerrors.TargetError) - targetErrCh := make(chan cerrors.TargetError) - - suite.routeInCh = routeInCh - suite.routeOutCh = routeOutCh - suite.stepOutCh = stepOutCh - suite.stepInCh = stepInCh - suite.stepErrCh = stepErrCh - suite.targetErrCh = targetErrCh - - suite.routingChannels = routingCh{ - routeIn: routeInCh, - routeOut: routeOutCh, - stepIn: stepInCh, - stepErr: stepErrCh, - stepOut: stepOutCh, - targetErr: targetErrCh, - } - - s, err := memory.New() - require.NoError(suite.T(), err) - err = storage.SetStorage(s) - require.NoError(suite.T(), err) - - header := testevent.Header{ - JobID: 1, - RunID: 1, - TestName: "TestRunnerSuite", - TestStepLabel: "TestRunnerSuite", - } - ev := storage.NewTestEventEmitterFetcher(header) - - timeouts := TestRunnerTimeouts{ - StepInjectTimeout: 30 * time.Second, - MessageTimeout: 25 * time.Second, - ShutdownTimeout: 5 * time.Second, - StepShutdownTimeout: 5 * time.Second, - } - - suite.router = newStepRouter(bundle, suite.routingChannels, ev, timeouts) -} - -func (suite *TestRunnerSuite) TestRouteInRoutesAllTargets() { - - // test that all targets are routed to step input channel without delay, in order - targets := []*target.Target{ - {ID: "001", FQDN: "host001.facebook.com"}, - {ID: "002", FQDN: "host002.facebook.com"}, - {ID: "003", FQDN: "host003.facebook.com"}, - } - - routeInResult := make(chan error) - stepInResult := make(chan error) - - go func() { - // start routing - _, _ = suite.router.routeIn(ctx) - }() - - // inject targets - go func() { - defer close(suite.routeInCh) - for _, target := range targets { - select { - case <-time.After(2 * time.Second): - routeInResult <- fmt.Errorf("target should be accepted by routeIn block within timeout") - return - case suite.routeInCh <- target: - } - } - routeInResult <- nil - }() - - // mimic the test step consuming targets in input - go func() { - numTargets := 0 - for { - select { - case t, ok := <-suite.stepInCh: - if !ok { - if numTargets != len(targets) { - stepInResult <- fmt.Errorf("not all targets received by teste step") - } else { - stepInResult <- nil - } - return - } - if numTargets+1 > len(targets) { - stepInResult <- fmt.Errorf("more targets returned than injected") - return - } - if t != targets[numTargets] { - stepInResult <- fmt.Errorf("targets returned in wrong order") - return - } - numTargets++ - case <-time.After(2 * time.Second): - stepInResult <- fmt.Errorf("expected target on channel within timeout") - } - } - }() - - for { - select { - case <-time.After(5 * time.Second): - suite.T().Errorf("test should return within timeout") - case err := <-stepInResult: - stepInResult = nil - if err != nil { - suite.T().Errorf("step in returned error: %v", err) - } - case err := <-routeInResult: - routeInResult = nil - if err != nil { - suite.T().Errorf("route in returned error: %v", err) - } - } - if stepInResult == nil && routeInResult == nil { - return - } - } -} - -func (suite *TestRunnerSuite) TestRouteOutRoutesAllSuccessfulTargets() { - - // test that all targets are routed in output from a test step are received by - // the routing logic and forwarded to the next routing block - targets := []*target.Target{ - {ID: "001", FQDN: "host001.facebook.com"}, - {ID: "002", FQDN: "host002.facebook.com"}, - {ID: "003", FQDN: "host003.facebook.com"}, - } - - go func() { - // start routing - _, _ = suite.router.routeOut(ctx) - }() - - stepResult := make(chan error) - routeResult := make(chan error) - - // mimic the test step returning targets on the output channel. This test exercise - // the path where all targets complete successfully - go func() { - // it's expected that the step closes both channels - defer close(suite.stepOutCh) - defer close(suite.stepErrCh) - for _, target := range targets { - select { - case <-time.After(2 * time.Second): - stepResult <- fmt.Errorf("target should be accepted by routing block within timeout") - return - case suite.stepOutCh <- target: - } - } - stepResult <- nil - }() - - // mimic the next routing block and the pipeline reading from the routeOut channel and the targetErr - // channel. We don't expect any target coming through targetErr, we expect all targets to be successful - go func() { - numTargets := 0 - for { - select { - case _, ok := <-suite.targetErrCh: - if !ok { - suite.targetErrCh = nil - } else { - routeResult <- fmt.Errorf("no targets expected on the error channel") - return - } - case t, ok := <-suite.routeOutCh: - if !ok { - if numTargets != len(targets) { - routeResult <- fmt.Errorf("not all targets have been returned") - } else { - routeResult <- nil - } - return - } - if numTargets+1 > len(targets) { - routeResult <- fmt.Errorf("more targets returned than injected") - return - } - if t.ID != targets[numTargets].ID { - routeResult <- fmt.Errorf("targets returned in wrong order") - return - } - numTargets++ - case <-time.After(2 * time.Second): - routeResult <- fmt.Errorf("expected target on channel within timeout") - return - } - } - }() - - for { - select { - case <-time.After(5 * time.Second): - suite.T().Errorf("test should return within timeout") - case err := <-stepResult: - stepResult = nil - if err != nil { - suite.T().Errorf("step output returned error: %v", err) - } - case err := <-routeResult: - routeResult = nil - if err != nil { - suite.T().Errorf("router returned error: %v", err) - } - } - if stepResult == nil && routeResult == nil { - return - } - } -} - -func (suite *TestRunnerSuite) TestRouteOutRoutesAllFailedTargets() { - - // test that all targets are routed in output from a test step are received by - // the routing logic and forwarded to the next routing block - targets := []*target.Target{ - {ID: "001", FQDN: "host001.facebook.com"}, - {ID: "002", FQDN: "host002.facebook.com"}, - {ID: "003", FQDN: "host003.facebook.com"}, - } - - go func() { - // start routing - _, _ = suite.router.routeOut(ctx) - }() - - stepResult := make(chan error) - routeResult := make(chan error) - - // mimic the test step returning targets on the output channel. This test exercise - // the path where all targets complete successfully - go func() { - // it's expected that the step closes both channels - defer close(suite.stepOutCh) - defer close(suite.stepErrCh) - for _, target := range targets { - targetErr := cerrors.TargetError{Target: target, Err: fmt.Errorf("test error")} - select { - case <-time.After(2 * time.Second): - stepResult <- fmt.Errorf("target should be accepted by routing block within timeout") - return - case suite.stepErrCh <- targetErr: - } - } - stepResult <- nil - }() - - // mimic the next routing block and the pipeline reading from the routeOut channel and the targetErr - // channel. We don't expect any target coming through targetErr, we expect all targets to be successful - go func() { - numTargets := 0 - for { - select { - case targetErr, ok := <-suite.targetErrCh: - if !ok { - routeResult <- fmt.Errorf("target error channel should not be closed by routing block") - return - } - if targetErr.Err == nil { - routeResult <- fmt.Errorf("expected error associated to the target") - return - } - if numTargets+1 > len(targets) { - routeResult <- fmt.Errorf("more targets returned than injected") - return - } - if targetErr.Target.ID != targets[numTargets].ID { - routeResult <- fmt.Errorf("targets returned in wrong order") - return - } - - numTargets++ - if numTargets == len(targets) { - routeResult <- nil - return - } - case _, ok := <-suite.routeOutCh: - if !ok { - suite.routeOutCh = nil - } else { - routeResult <- fmt.Errorf("no targets expected on the output channel") - return - } - case <-time.After(2 * time.Second): - routeResult <- fmt.Errorf("expected target on channel within timeout") - return - } - } - }() - - for { - select { - case <-time.After(5 * time.Second): - suite.T().Errorf("test should return within timeout") - case err := <-stepResult: - stepResult = nil - if err != nil { - suite.T().Errorf("step output routine returned error: %v", err) - } - case err := <-routeResult: - routeResult = nil - if err != nil { - suite.T().Errorf("router returned error: %v", err) - } - } - if stepResult == nil && routeResult == nil { - return - } - } -} - -func TestTestRunnerSuite(t *testing.T) { - TestRunnerSuite := &TestRunnerSuite{} - suite.Run(t, TestRunnerSuite) -} diff --git a/pkg/runner/test_runner_test.go b/pkg/runner/test_runner_test.go new file mode 100644 index 00000000..6a920cb5 --- /dev/null +++ b/pkg/runner/test_runner_test.go @@ -0,0 +1,597 @@ +// Copyright (c) Facebook, Inc. and its affiliates. +// +// This source code is licensed under the MIT license found in the +// LICENSE file in the root directory of this source tree. + +package runner + +import ( + "flag" + "fmt" + "strings" + "testing" + "time" + + "github.com/facebookincubator/contest/pkg/xcontext" + "github.com/facebookincubator/contest/pkg/xcontext/bundles/logrusctx" + "github.com/facebookincubator/contest/pkg/xcontext/logger" + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" + + "github.com/facebookincubator/contest/pkg/cerrors" + "github.com/facebookincubator/contest/pkg/event" + "github.com/facebookincubator/contest/pkg/pluginregistry" + "github.com/facebookincubator/contest/pkg/storage" + "github.com/facebookincubator/contest/pkg/target" + "github.com/facebookincubator/contest/pkg/test" + "github.com/facebookincubator/contest/pkg/types" + "github.com/facebookincubator/contest/plugins/storage/memory" + "github.com/facebookincubator/contest/tests/common" + "github.com/facebookincubator/contest/tests/common/goroutine_leak_check" + "github.com/facebookincubator/contest/tests/plugins/teststeps/badtargets" + "github.com/facebookincubator/contest/tests/plugins/teststeps/channels" + "github.com/facebookincubator/contest/tests/plugins/teststeps/hanging" + "github.com/facebookincubator/contest/tests/plugins/teststeps/noreturn" + "github.com/facebookincubator/contest/tests/plugins/teststeps/panicstep" + "github.com/facebookincubator/contest/tests/plugins/teststeps/teststep" +) + +const ( + testName = "SimpleTest" + stepInjectTimeout = 3 * time.Second + shutdownTimeout = 3 * time.Second +) + +var ( + evs storage.ResettableStorage + pluginRegistry *pluginregistry.PluginRegistry +) + +var ( + ctx = logrusctx.NewContext(logger.LevelDebug) +) + +func TestMain(m *testing.M) { + flag.Parse() + if ms, err := memory.New(); err == nil { + evs = ms + if err := storage.SetStorage(ms); err != nil { + panic(err.Error()) + } + } else { + panic(fmt.Sprintf("could not initialize in-memory storage layer: %v", err)) + } + pluginRegistry = pluginregistry.NewPluginRegistry(ctx) + for _, e := range []struct { + name string + factory test.TestStepFactory + events []event.Name + }{ + {badtargets.Name, badtargets.New, badtargets.Events}, + {channels.Name, channels.New, channels.Events}, + {hanging.Name, hanging.New, hanging.Events}, + {noreturn.Name, noreturn.New, noreturn.Events}, + {panicstep.Name, panicstep.New, panicstep.Events}, + {teststep.Name, teststep.New, teststep.Events}, + } { + if err := pluginRegistry.RegisterTestStep(e.name, e.factory, e.events); err != nil { + panic(fmt.Sprintf("could not register TestStep: %v", err)) + } + } + flag.Parse() + goroutine_leak_check.LeakCheckingTestMain(m, + // We expect these to leak. + "github.com/facebookincubator/contest/tests/plugins/teststeps/hanging.(*hanging).Run", + "github.com/facebookincubator/contest/tests/plugins/teststeps/noreturn.(*noreturnStep).Run", + + // No leak in contexts checked with itsown unit-tests + "github.com/facebookincubator/contest/pkg/xcontext.(*ctxValue).cloneWithStdContext.func2", + ) +} + +func newTestRunner() *TestRunner { + return NewTestRunnerWithTimeouts(stepInjectTimeout, shutdownTimeout) +} + +func resetEventStorage() { + if err := evs.Reset(); err != nil { + panic(err.Error()) + } +} + +func tgt(id string) *target.Target { + return &target.Target{ID: id} +} + +func getStepEvents(stepLabel string) string { + return common.GetTestEventsAsString(ctx, evs, testName, nil, &stepLabel) +} + +func getTargetEvents(targetID string) string { + return common.GetTestEventsAsString(ctx, evs, testName, &targetID, nil) +} + +func newStep(label, name string, params *test.TestStepParameters) test.TestStepBundle { + td := test.TestStepDescriptor{ + Name: name, + Label: label, + } + if params != nil { + td.Parameters = *params + } + sb, err := pluginRegistry.NewTestStepBundle(ctx, td, nil) + if err != nil { + panic(fmt.Sprintf("failed to create test step bundle: %v", err)) + } + return *sb +} + +func newTestStep(label string, failPct int, failTargets string, delayTargets string) test.TestStepBundle { + return newStep(label, teststep.Name, &test.TestStepParameters{ + teststep.FailPctParam: []test.Param{*test.NewParam(fmt.Sprintf("%d", failPct))}, + teststep.FailTargetsParam: []test.Param{*test.NewParam(failTargets)}, + teststep.DelayTargetsParam: []test.Param{*test.NewParam(delayTargets)}, + }) +} + +type runRes struct { + res []byte + err error +} + +func runWithTimeout(t *testing.T, tr *TestRunner, ctx xcontext.Context, resumeState []byte, runID types.RunID, timeout time.Duration, targets []*target.Target, bundles []test.TestStepBundle) ([]byte, error) { + newCtx, cancel := xcontext.WithCancel(ctx) + test := &test.Test{ + Name: testName, + TestStepsBundles: bundles, + } + resCh := make(chan runRes) + go func() { + res, err := tr.Run(newCtx, test, targets, 1, runID, resumeState) + resCh <- runRes{res: res, err: err} + }() + var res runRes + select { + case res = <-resCh: + case <-time.After(timeout): + cancel() + assert.FailNow(t, "TestRunner should not time out") + } + return res.res, res.err +} + +// Simple case: one target, one step, success. +func Test1Step1Success(t *testing.T) { + resetEventStorage() + tr := newTestRunner() + _, err := runWithTimeout(t, tr, nil, nil, 1, 2*time.Second, + []*target.Target{tgt("T1")}, + []test.TestStepBundle{ + newTestStep("Step 1", 0, "", ""), + }, + ) + require.NoError(t, err) + require.Equal(t, ` +{[1 1 SimpleTest Step 1][(*Target)(nil) TestStepRunningEvent]} +{[1 1 SimpleTest Step 1][(*Target)(nil) TestStepFinishedEvent]} +`, getStepEvents("")) + require.Equal(t, ` +{[1 1 SimpleTest Step 1][Target{ID: "T1"} TargetIn]} +{[1 1 SimpleTest Step 1][Target{ID: "T1"} TestStartedEvent]} +{[1 1 SimpleTest Step 1][Target{ID: "T1"} TestFinishedEvent]} +{[1 1 SimpleTest Step 1][Target{ID: "T1"} TargetOut]} +`, getTargetEvents("T1")) +} + +// Simple case: one target, one step that blocks for a bit, success. +// We block for longer than the shutdown timeout of the test runner. +func Test1StepLongerThanShutdown1Success(t *testing.T) { + resetEventStorage() + tr := NewTestRunnerWithTimeouts(stepInjectTimeout, 100*time.Millisecond) + _, err := runWithTimeout(t, tr, nil, nil, 1, 2*time.Second, + []*target.Target{tgt("T1")}, + []test.TestStepBundle{ + newTestStep("Step 1", 0, "", "T1=500"), + }, + ) + require.NoError(t, err) + require.Equal(t, ` +{[1 1 SimpleTest Step 1][(*Target)(nil) TestStepRunningEvent]} +{[1 1 SimpleTest Step 1][(*Target)(nil) TestStepFinishedEvent]} +`, getStepEvents("")) + require.Equal(t, ` +{[1 1 SimpleTest Step 1][Target{ID: "T1"} TargetIn]} +{[1 1 SimpleTest Step 1][Target{ID: "T1"} TestStartedEvent]} +{[1 1 SimpleTest Step 1][Target{ID: "T1"} TestFinishedEvent]} +{[1 1 SimpleTest Step 1][Target{ID: "T1"} TargetOut]} +`, getTargetEvents("T1")) +} + +// Simple case: one target, one step, failure. +func Test1Step1Fail(t *testing.T) { + resetEventStorage() + tr := newTestRunner() + _, err := runWithTimeout(t, tr, nil, nil, 1, 2*time.Second, + []*target.Target{tgt("T1")}, + []test.TestStepBundle{ + newTestStep("Step 1", 100, "", ""), + }, + ) + require.NoError(t, err) + require.Equal(t, ` +{[1 1 SimpleTest Step 1][(*Target)(nil) TestStepRunningEvent]} +{[1 1 SimpleTest Step 1][(*Target)(nil) TestStepFinishedEvent]} +`, getStepEvents("Step 1")) + require.Equal(t, ` +{[1 1 SimpleTest Step 1][Target{ID: "T1"} TargetIn]} +{[1 1 SimpleTest Step 1][Target{ID: "T1"} TestStartedEvent]} +{[1 1 SimpleTest Step 1][Target{ID: "T1"} TestFailedEvent]} +{[1 1 SimpleTest Step 1][Target{ID: "T1"} TargetErr &"{\"Error\":\"target failed\"}"]} +`, getTargetEvents("T1")) +} + +// One step pipeline with two targets - one fails, one succeeds. +func Test1Step1Success1Fail(t *testing.T) { + resetEventStorage() + tr := newTestRunner() + _, err := runWithTimeout(t, tr, nil, nil, 1, 2*time.Second, + []*target.Target{tgt("T1"), tgt("T2")}, + []test.TestStepBundle{ + newTestStep("Step 1", 0, "T1", "T2=100"), + }, + ) + require.NoError(t, err) + require.Equal(t, ` +{[1 1 SimpleTest Step 1][(*Target)(nil) TestStepRunningEvent]} +{[1 1 SimpleTest Step 1][(*Target)(nil) TestStepFinishedEvent]} +`, getStepEvents("")) + require.Equal(t, ` +{[1 1 SimpleTest Step 1][Target{ID: "T1"} TargetIn]} +{[1 1 SimpleTest Step 1][Target{ID: "T1"} TestStartedEvent]} +{[1 1 SimpleTest Step 1][Target{ID: "T1"} TestFailedEvent]} +{[1 1 SimpleTest Step 1][Target{ID: "T1"} TargetErr &"{\"Error\":\"target failed\"}"]} +`, getTargetEvents("T1")) + require.Equal(t, ` +{[1 1 SimpleTest Step 1][Target{ID: "T2"} TargetIn]} +{[1 1 SimpleTest Step 1][Target{ID: "T2"} TestStartedEvent]} +{[1 1 SimpleTest Step 1][Target{ID: "T2"} TestFinishedEvent]} +{[1 1 SimpleTest Step 1][Target{ID: "T2"} TargetOut]} +`, getTargetEvents("T2")) +} + +// Three-step pipeline, two targets: T1 fails at step 1, T2 fails at step 2, +// step 3 is not reached and not even run. +func Test3StepsNotReachedStepNotRun(t *testing.T) { + resetEventStorage() + tr := newTestRunner() + _, err := runWithTimeout(t, tr, nil, nil, 1, 2*time.Second, + []*target.Target{tgt("T1"), tgt("T2")}, + []test.TestStepBundle{ + newTestStep("Step 1", 0, "T1", ""), + newTestStep("Step 2", 0, "T2", ""), + newTestStep("Step 3", 0, "", ""), + }, + ) + require.NoError(t, err) + require.Equal(t, ` +{[1 1 SimpleTest Step 1][(*Target)(nil) TestStepRunningEvent]} +{[1 1 SimpleTest Step 1][(*Target)(nil) TestStepFinishedEvent]} +`, getStepEvents("Step 1")) + require.Equal(t, ` +{[1 1 SimpleTest Step 2][(*Target)(nil) TestStepRunningEvent]} +{[1 1 SimpleTest Step 2][(*Target)(nil) TestStepFinishedEvent]} +`, getStepEvents("Step 2")) + require.Equal(t, "\n\n", getStepEvents("Step 3")) + require.Equal(t, ` +{[1 1 SimpleTest Step 1][Target{ID: "T1"} TargetIn]} +{[1 1 SimpleTest Step 1][Target{ID: "T1"} TestStartedEvent]} +{[1 1 SimpleTest Step 1][Target{ID: "T1"} TestFailedEvent]} +{[1 1 SimpleTest Step 1][Target{ID: "T1"} TargetErr &"{\"Error\":\"target failed\"}"]} +`, getTargetEvents("T1")) + require.Equal(t, ` +{[1 1 SimpleTest Step 1][Target{ID: "T2"} TargetIn]} +{[1 1 SimpleTest Step 1][Target{ID: "T2"} TestStartedEvent]} +{[1 1 SimpleTest Step 1][Target{ID: "T2"} TestFinishedEvent]} +{[1 1 SimpleTest Step 1][Target{ID: "T2"} TargetOut]} +{[1 1 SimpleTest Step 2][Target{ID: "T2"} TargetIn]} +{[1 1 SimpleTest Step 2][Target{ID: "T2"} TestStartedEvent]} +{[1 1 SimpleTest Step 2][Target{ID: "T2"} TestFailedEvent]} +{[1 1 SimpleTest Step 2][Target{ID: "T2"} TargetErr &"{\"Error\":\"target failed\"}"]} +`, getTargetEvents("T2")) +} + +// A misbehaving step that fails to shut down properly after processing targets +// and does not return. +func TestNoReturnStepWithCorrectTargetForwarding(t *testing.T) { + resetEventStorage() + tr := NewTestRunnerWithTimeouts(100*time.Millisecond, 200*time.Millisecond) + ctx, cancel := xcontext.WithCancel(ctx) + defer cancel() + _, err := runWithTimeout(t, tr, ctx, nil, 1, 2*time.Second, + []*target.Target{tgt("T1")}, + []test.TestStepBundle{ + newStep("Step 1", noreturn.Name, nil), + }, + ) + require.Error(t, err) + require.IsType(t, &cerrors.ErrTestStepsNeverReturned{}, err) +} + +// A misbehaving step that does not process any targets. +func TestNoReturnStepWithoutTargetForwarding(t *testing.T) { + resetEventStorage() + tr := NewTestRunnerWithTimeouts(100*time.Millisecond, 200*time.Millisecond) + ctx, cancel := xcontext.WithCancel(ctx) + defer cancel() + _, err := runWithTimeout(t, tr, ctx, nil, 1, 2*time.Second, + []*target.Target{tgt("T1")}, + []test.TestStepBundle{ + newStep("Step 1", hanging.Name, nil), + }, + ) + require.Error(t, err) + require.IsType(t, &cerrors.ErrTestTargetInjectionTimedOut{}, err) +} + +// A misbehaving step that panics. +func TestStepPanics(t *testing.T) { + resetEventStorage() + tr := newTestRunner() + _, err := runWithTimeout(t, tr, nil, nil, 1, 2*time.Second, + []*target.Target{tgt("T1")}, + []test.TestStepBundle{ + newStep("Step 1", panicstep.Name, nil), + }, + ) + require.Error(t, err) + require.IsType(t, &cerrors.ErrTestStepPaniced{}, err) + require.Equal(t, "\n\n", getTargetEvents("T1")) + require.Contains(t, getStepEvents("Step 1"), "step Step 1 paniced") +} + +// A misbehaving step that closes its output channel. +func TestStepClosesChannels(t *testing.T) { + resetEventStorage() + tr := newTestRunner() + _, err := runWithTimeout(t, tr, nil, nil, 1, 2*time.Second, + []*target.Target{tgt("T1")}, + []test.TestStepBundle{ + newStep("Step 1", channels.Name, nil), + }, + ) + require.Error(t, err) + require.IsType(t, &cerrors.ErrTestStepClosedChannels{}, err) + require.Equal(t, ` +{[1 1 SimpleTest Step 1][Target{ID: "T1"} TargetIn]} +{[1 1 SimpleTest Step 1][Target{ID: "T1"} TargetOut]} +`, getTargetEvents("T1")) + require.Equal(t, ` +{[1 1 SimpleTest Step 1][(*Target)(nil) TestError &"\"test step Step 1 closed output channels (api violation)\""]} +`, getStepEvents("Step 1")) +} + +// A misbehaving step that yields a result for a target that does not exist. +func TestStepYieldsResultForNonexistentTarget(t *testing.T) { + resetEventStorage() + tr := newTestRunner() + _, err := runWithTimeout(t, tr, nil, nil, 1, 2*time.Second, + []*target.Target{tgt("TExtra")}, + []test.TestStepBundle{ + newStep("Step 1", badtargets.Name, nil), + }, + ) + require.Error(t, err) + require.IsType(t, &cerrors.ErrTestStepReturnedUnexpectedResult{}, err) + require.Equal(t, ` +{[1 1 SimpleTest Step 1][Target{ID: "TExtra"} TargetIn]} +{[1 1 SimpleTest Step 1][Target{ID: "TExtra"} TargetOut]} +`, getTargetEvents("TExtra")) + require.Equal(t, "\n\n", getTargetEvents("TExtra2")) + require.Equal(t, ` +{[1 1 SimpleTest Step 1][(*Target)(nil) TestError &"\"test step Step 1 returned unexpected result for TExtra2\""]} +`, getStepEvents("Step 1")) +} + +// A misbehaving step that yields a duplicate result for a target. +func TestStepYieldsDuplicateResult(t *testing.T) { + resetEventStorage() + tr := newTestRunner() + _, err := runWithTimeout(t, tr, nil, nil, 1, 2*time.Second, + []*target.Target{tgt("TGood"), tgt("TDup")}, + []test.TestStepBundle{ + // TGood makes it past here unscathed and gets delayed in Step 2, + // TDup also emerges fine at first but is then returned again, and that's bad. + newStep("Step 1", badtargets.Name, nil), + newTestStep("Step 2", 0, "", "TGood=100"), + }, + ) + require.Error(t, err) + require.IsType(t, &cerrors.ErrTestStepReturnedDuplicateResult{}, err) +} + +// A misbehaving step that loses targets. +func TestStepLosesTargets(t *testing.T) { + resetEventStorage() + tr := newTestRunner() + _, err := runWithTimeout(t, tr, nil, nil, 1, 2*time.Second, + []*target.Target{tgt("TGood"), tgt("TDrop")}, + []test.TestStepBundle{ + newStep("Step 1", badtargets.Name, nil), + }, + ) + require.Error(t, err) + require.IsType(t, &cerrors.ErrTestStepLostTargets{}, err) + require.Contains(t, err.Error(), "TDrop") +} + +// A misbehaving step that yields a result for a target that does exist +// but is not currently waiting for it. +func TestStepYieldsResultForUnexpectedTarget(t *testing.T) { + resetEventStorage() + tr := newTestRunner() + _, err := runWithTimeout(t, tr, nil, nil, 1, 2*time.Second, + []*target.Target{tgt("TExtra"), tgt("TExtra2")}, + []test.TestStepBundle{ + // TExtra2 fails here. + newTestStep("Step 1", 0, "TExtra2", ""), + // Yet, a result for it is returned here, which we did not expect. + newStep("Step 2", badtargets.Name, nil), + }, + ) + require.Error(t, err) + require.IsType(t, &cerrors.ErrTestStepReturnedUnexpectedResult{}, err) +} + +// Larger, randomized test - a number of steps, some targets failing, some succeeding. +func TestRandomizedMultiStep(t *testing.T) { + resetEventStorage() + tr := newTestRunner() + var targets []*target.Target + for i := 1; i <= 100; i++ { + targets = append(targets, tgt(fmt.Sprintf("T%d", i))) + } + _, err := runWithTimeout(t, tr, nil, nil, 1, 2*time.Second, + targets, + []test.TestStepBundle{ + newTestStep("Step 1", 0, "", "*=10"), // All targets pass the first step, with a slight delay + newTestStep("Step 2", 25, "", ""), // 25% don't make it past the second step + newTestStep("Step 3", 25, "", "*=10"), // Another 25% fail at the third step + }, + ) + require.NoError(t, err) + // Every target mush have started and finished the first step. + numFinished := 0 + for _, tgt := range targets { + s1n := "Step 1" + require.Equal(t, fmt.Sprintf(` +{[1 1 SimpleTest Step 1][Target{ID: "%s"} TargetIn]} +{[1 1 SimpleTest Step 1][Target{ID: "%s"} TestStartedEvent]} +{[1 1 SimpleTest Step 1][Target{ID: "%s"} TestFinishedEvent]} +{[1 1 SimpleTest Step 1][Target{ID: "%s"} TargetOut]} +`, tgt.ID, tgt.ID, tgt.ID, tgt.ID), + common.GetTestEventsAsString(ctx, evs, testName, &tgt.ID, &s1n)) + s3n := "Step 3" + if strings.Contains(common.GetTestEventsAsString(ctx, evs, testName, &tgt.ID, &s3n), "TestFinishedEvent") { + numFinished++ + } + } + // At least some must have finished. + require.Greater(t, numFinished, 0) +} + +// Test pausing/resuming a naive step that does not cooperate. +// In this case we drain input, wait for all targets to emerge and exit gracefully. +func TestPauseResumeSimple(t *testing.T) { + resetEventStorage() + var err error + var resumeState []byte + targets := []*target.Target{tgt("T1"), tgt("T2"), tgt("T3")} + steps := []test.TestStepBundle{ + newTestStep("Step 1", 0, "T1", ""), + // T2 and T3 will be paused here, the step will be given time to finish. + newTestStep("Step 2", 0, "", "T2=200,T3=200"), + newTestStep("Step 3", 0, "", ""), + } + { + tr1 := newTestRunner() + ctx1, pause := xcontext.WithNotify(ctx, xcontext.Paused) + ctx1, cancel := xcontext.WithCancel(ctx1) + defer cancel() + go func() { + time.Sleep(100 * time.Millisecond) + ctx.Logger().Infof("TestPauseResumeNaive: pausing") + pause() + }() + resumeState, err = runWithTimeout(t, tr1, ctx1, nil, 1, 2*time.Second, targets, steps) + require.Error(t, err) + require.IsType(t, xcontext.Paused, err) + require.NotNil(t, resumeState) + } + ctx.Logger().Debugf("Resume state: %s", string(resumeState)) + // Make sure that resume state is validated. + { + tr := newTestRunner() + ctx, cancel := xcontext.WithCancel(ctx) + defer cancel() + resumeState2, err := runWithTimeout( + t, tr, ctx, []byte("FOO"), 2, 2*time.Second, targets, steps) + require.Error(t, err) + require.Contains(t, err.Error(), "invalid resume state") + require.Nil(t, resumeState2) + } + { + tr := newTestRunner() + ctx, cancel := xcontext.WithCancel(ctx) + defer cancel() + resumeState2 := strings.Replace(string(resumeState), `"v"`, `"Xv"`, 1) + _, err := runWithTimeout( + t, tr, ctx, []byte(resumeState2), 3, 2*time.Second, targets, steps) + require.Error(t, err) + require.Contains(t, err.Error(), "incompatible resume state") + } + { + tr := newTestRunner() + ctx, cancel := xcontext.WithCancel(ctx) + defer cancel() + resumeState2 := strings.Replace(string(resumeState), `"job_id":1`, `"job_id":2`, 1) + _, err := runWithTimeout( + t, tr, ctx, []byte(resumeState2), 4, 2*time.Second, targets, steps) + require.Error(t, err) + require.Contains(t, err.Error(), "wrong resume state") + } + // Finally, resume and finish the job. + { + tr2 := newTestRunner() + ctx2, cancel := xcontext.WithCancel(ctx) + defer cancel() + _, err := runWithTimeout(t, tr2, ctx2, resumeState, 5, 2*time.Second, + // Pass exactly the same targets and pipeline to resume properly. + // Don't use the same pointers ot make sure there is no reliance on that. + []*target.Target{tgt("T1"), tgt("T2"), tgt("T3")}, + []test.TestStepBundle{ + newTestStep("Step 1", 0, "T1", ""), + newTestStep("Step 2", 0, "", "T2=200,T3=200"), + newTestStep("Step 3", 0, "", ""), + }, + ) + require.NoError(t, err) + } + // Verify step events. + // Steps 1 and 2 are executed entirely within the first runner instance + // and never started in the second. + require.Equal(t, ` +{[1 1 SimpleTest Step 1][(*Target)(nil) TestStepRunningEvent]} +{[1 1 SimpleTest Step 1][(*Target)(nil) TestStepFinishedEvent]} +`, getStepEvents("Step 1")) + require.Equal(t, ` +{[1 1 SimpleTest Step 2][(*Target)(nil) TestStepRunningEvent]} +{[1 1 SimpleTest Step 2][(*Target)(nil) TestStepFinishedEvent]} +`, getStepEvents("Step 2")) + // Step 3 did not get to start in the first instance and ran in the second. + require.Equal(t, ` +{[1 5 SimpleTest Step 3][(*Target)(nil) TestStepRunningEvent]} +{[1 5 SimpleTest Step 3][(*Target)(nil) TestStepFinishedEvent]} +`, getStepEvents("Step 3")) + // T1 failed entirely within the first run. + require.Equal(t, ` +{[1 1 SimpleTest Step 1][Target{ID: "T1"} TargetIn]} +{[1 1 SimpleTest Step 1][Target{ID: "T1"} TestStartedEvent]} +{[1 1 SimpleTest Step 1][Target{ID: "T1"} TestFailedEvent]} +{[1 1 SimpleTest Step 1][Target{ID: "T1"} TargetErr &"{\"Error\":\"target failed\"}"]} +`, getTargetEvents("T1")) + // T2 and T3 ran in both. + require.Equal(t, ` +{[1 1 SimpleTest Step 1][Target{ID: "T2"} TargetIn]} +{[1 1 SimpleTest Step 1][Target{ID: "T2"} TestStartedEvent]} +{[1 1 SimpleTest Step 1][Target{ID: "T2"} TestFinishedEvent]} +{[1 1 SimpleTest Step 1][Target{ID: "T2"} TargetOut]} +{[1 1 SimpleTest Step 2][Target{ID: "T2"} TargetIn]} +{[1 1 SimpleTest Step 2][Target{ID: "T2"} TestStartedEvent]} +{[1 1 SimpleTest Step 2][Target{ID: "T2"} TestFinishedEvent]} +{[1 1 SimpleTest Step 2][Target{ID: "T2"} TargetOut]} +{[1 5 SimpleTest Step 3][Target{ID: "T2"} TargetIn]} +{[1 5 SimpleTest Step 3][Target{ID: "T2"} TestStartedEvent]} +{[1 5 SimpleTest Step 3][Target{ID: "T2"} TestFinishedEvent]} +{[1 5 SimpleTest Step 3][Target{ID: "T2"} TargetOut]} +`, getTargetEvents("T2")) +} diff --git a/pkg/runner/wait_for_first_target.go b/pkg/runner/wait_for_first_target.go deleted file mode 100644 index 355953f0..00000000 --- a/pkg/runner/wait_for_first_target.go +++ /dev/null @@ -1,57 +0,0 @@ -// Copyright (c) Facebook, Inc. and its affiliates. -// -// This source code is licensed under the MIT license found in the -// LICENSE file in the root directory of this source tree. - -package runner - -import ( - "github.com/facebookincubator/contest/pkg/target" -) - -// waitForFirstTarget copies targets from `in` to `out` and also -// signals on first target or if `in` is closed and there was no targets -// at all. -// -// To solve the initial problem there're two approaches: -// * Just wait for a target in `stepCh.stepIn` and then insert -// it back to the channel. -// * Create substitute `stepCh.stepIn` with a new channel and just -// copy all targets from original `stepCh.stepIn` to this new channel. -// And we may detect when we receive the first target in the original -// channel. -// -// The first approach is much simpler, but the second preserves -// the order of targets (which is handy and not misleading -// while reading logs). Here we implement the second one: -func waitForFirstTarget( - cancelCh <-chan struct{}, - in <-chan *target.Target, -) (out chan *target.Target, onFirstTarget, onNoTargets <-chan struct{}) { - onFirstTargetCh := make(chan struct{}) - onNoTargetsCh := make(chan struct{}) - onFirstTarget, onNoTargets = onFirstTargetCh, onNoTargetsCh - - out = make(chan *target.Target) - go func() { - select { - case t, ok := <-in: - if !ok { - close(out) - close(onNoTargetsCh) - return - } - close(onFirstTargetCh) - out <- t - case <-cancelCh: - return - } - - for t := range in { - out <- t - } - close(out) - }() - - return -} diff --git a/pkg/runner/wait_for_first_target_test.go b/pkg/runner/wait_for_first_target_test.go deleted file mode 100644 index 494a335a..00000000 --- a/pkg/runner/wait_for_first_target_test.go +++ /dev/null @@ -1,142 +0,0 @@ -// Copyright (c) Facebook, Inc. and its affiliates. -// -// This source code is licensed under the MIT license found in the -// LICENSE file in the root directory of this source tree. - -package runner - -import ( - "fmt" - "runtime" - "sync" - "testing" - - "github.com/stretchr/testify/require" - - "github.com/facebookincubator/contest/pkg/target" -) - -func TestWaitForFirstTarget(t *testing.T) { - t.Run("100targets", func(t *testing.T) { - ch0 := make(chan *target.Target) - ch1, onFirstTargetChan, onNoTargetsChan := waitForFirstTarget(nil, ch0) - - var wgBeforeFirstTarget, wgAfterSecondTarget sync.WaitGroup - wgBeforeFirstTarget.Add(1) - wgAfterSecondTarget.Add(1) - - go func() { - wgBeforeFirstTarget.Wait() - for i := 0; i < 100; i++ { - ch0 <- &target.Target{ - ID: fmt.Sprintf("%d", i), - } - if i == 1 { - // to avoid race conditions in this test we verify - // if there's a "onFirstTarget" signal after the second - // target - wgAfterSecondTarget.Done() - } - } - }() - - runtime.Gosched() - select { - case <-onFirstTargetChan: - t.Fatal("should not happen") - case <-onNoTargetsChan: - t.Fatal("should not happen") - default: - } - - wgBeforeFirstTarget.Done() - - var wgReader sync.WaitGroup - wgReader.Add(1) - go func() { - defer wgReader.Done() - // Checking quantity and order - for i := 0; i < 100; i++ { - _target := <-ch1 - require.Equal(t, &target.Target{ - ID: fmt.Sprintf("%d", i), - }, _target) - } - }() - - wgAfterSecondTarget.Wait() - - select { - case <-onFirstTargetChan: - case <-onNoTargetsChan: - t.Fatal("should not happen") - default: - t.Fatal("should not happen") // see wgAfterSecondTarget.Wait() above - } - - wgReader.Wait() - - runtime.Gosched() - require.Len(t, ch1, 0) - }) - - t.Run("no_target", func(t *testing.T) { - ch0 := make(chan *target.Target) - ch1, onFirstTargetChan, onNoTargetsChan := waitForFirstTarget(nil, ch0) - - runtime.Gosched() - select { - case <-onFirstTargetChan: - t.Fatal("should not happen") - case <-onNoTargetsChan: - t.Fatal("should not happen") - default: - } - - close(ch0) - - select { - case <-onFirstTargetChan: - t.Fatal("should not happen") - case <-onNoTargetsChan: - } - - runtime.Gosched() - require.Len(t, ch1, 0) - _, isOpened := <-ch1 - require.False(t, isOpened) - }) - - t.Run("cancel", func(t *testing.T) { - cancelCh := make(chan struct{}) - ch0 := make(chan *target.Target) - ch1, onFirstTargetChan, onNoTargetsChan := waitForFirstTarget(nil, ch0) - - runtime.Gosched() - select { - case <-onFirstTargetChan: - t.Fatal("should not happen") // see wgBeforeFirstTarget.Wait() above - case <-onNoTargetsChan: - t.Fatal("should not happen") - default: - } - - close(cancelCh) - - runtime.Gosched() - select { - case <-onFirstTargetChan: - t.Fatal("should not happen") - case <-onNoTargetsChan: - t.Fatal("should not happen") - default: - } - - require.Len(t, ch1, 0) - select { - case <-ch1: - t.Fatal("should not happen") - default: - } - }) -} diff --git a/pkg/storage/limits/limits_test.go b/pkg/storage/limits/limits_test.go index 50af64f6..b95be939 100644 --- a/pkg/storage/limits/limits_test.go +++ b/pkg/storage/limits/limits_test.go @@ -39,7 +39,7 @@ var ( ) func TestServerID(t *testing.T) { - _, err := api.New(ctx, api.OptionServerID(strings.Repeat("A", limits.MaxServerIDLen+1))) + _, err := api.New(api.OptionServerID(strings.Repeat("A", limits.MaxServerIDLen+1))) assertLenError(t, "Server ID", err) } diff --git a/pkg/storage/storage.go b/pkg/storage/storage.go index 1cafdf4f..642a39aa 100644 --- a/pkg/storage/storage.go +++ b/pkg/storage/storage.go @@ -36,6 +36,7 @@ type TransactionalStorage interface { // ResettableStorage is implemented by storage engines that support reset operation type ResettableStorage interface { + Storage Reset() error } diff --git a/plugins/listeners/httplistener/httplistener.go b/plugins/listeners/httplistener/httplistener.go index fab33b9f..b19157d3 100644 --- a/plugins/listeners/httplistener/httplistener.go +++ b/plugins/listeners/httplistener/httplistener.go @@ -221,7 +221,7 @@ func listenWithCancellation(ctx xcontext.Context, s *http.Server) error { select { case err := <-errCh: return err - case <-ctx.Done(): + case <-ctx.WaitFor(): ctx.Logger().Debugf("Received server shut down request") return s.Close() } @@ -239,6 +239,7 @@ func (h *HTTPListener) Serve(ctx xcontext.Context, a *api.API) error { ReadTimeout: 10 * time.Second, WriteTimeout: 10 * time.Second, } + ctx.Logger().Debugf("Serving a listener") if err := listenWithCancellation(ctx, &s); err != nil { return fmt.Errorf("HTTP listener failed: %v", err) } diff --git a/plugins/storage/memory/memory.go b/plugins/storage/memory/memory.go index fd6caabc..d71cab5b 100644 --- a/plugins/storage/memory/memory.go +++ b/plugins/storage/memory/memory.go @@ -296,7 +296,7 @@ func (m *Memory) Version() (uint64, error) { } // New create a new Memory events storage backend -func New() (storage.Storage, error) { +func New() (storage.ResettableStorage, error) { m := Memory{lock: &sync.Mutex{}} m.jobInfo = make(map[types.JobID]*jobInfo) m.jobIDCounter = 1 diff --git a/plugins/teststeps/echo/echo.go b/plugins/teststeps/echo/echo.go index 6df62322..35ea00ac 100644 --- a/plugins/teststeps/echo/echo.go +++ b/plugins/teststeps/echo/echo.go @@ -60,7 +60,7 @@ func (e Step) Run(ctx xcontext.Context, ch test.TestStepChannels, params test.Te } ctx.Logger().Infof("Running on target %s with text '%s'", target, params.GetOne("text")) ch.Out <- target - case <-ctx.WaitFor(): + case <-ctx.Done(): return nil } } diff --git a/plugins/teststeps/example/example.go b/plugins/teststeps/example/example.go index f37db84d..673f82e4 100644 --- a/plugins/teststeps/example/example.go +++ b/plugins/teststeps/example/example.go @@ -12,13 +12,21 @@ import ( "github.com/facebookincubator/contest/pkg/cerrors" "github.com/facebookincubator/contest/pkg/event" "github.com/facebookincubator/contest/pkg/event/testevent" + "github.com/facebookincubator/contest/pkg/target" "github.com/facebookincubator/contest/pkg/test" "github.com/facebookincubator/contest/pkg/xcontext" + "github.com/facebookincubator/contest/plugins/teststeps" ) // Name is the name used to look this plugin up. var Name = "Example" +// Params this step accepts. +const ( + // Fail this percentage of targets at random. + FailPctParam = "FailPct" +) + // events that we may emit during the plugin's lifecycle. This is used in Events below. // Note that you don't normally need to emit start/finish/cancellation events as // these are emitted automatically by the framework. @@ -33,10 +41,10 @@ const ( var Events = []event.Name{StartedEvent, FinishedEvent, FailedEvent} // Step is an example implementation of a TestStep which simply -// consumes Targets in input and pipes them to the output channel with intermediate -// buffering. It randomly decides if a Target has failed and forwards it on -// the err channel. +// consumes Targets in input and pipes them to the output or error channel +// with intermediate buffering. type Step struct { + failPct int64 } // Name returns the name of the Step @@ -44,50 +52,48 @@ func (ts Step) Name() string { return Name } +func (ts *Step) shouldFail(t *target.Target) bool { + if ts.failPct > 0 { + roll := rand.Int63n(101) + return (roll <= ts.failPct) + } + return false +} + // Run executes the example step. -func (ts *Step) Run(ctx xcontext.Context, ch test.TestStepChannels, _ test.TestStepParameters, ev testevent.Emitter) error { - for { - - r := rand.Intn(3) - select { - case target := <-ch.In: - if target == nil { - return nil - } - ctx.Logger().Infof("Executing on target %s", target) - // NOTE: you may want more robust error handling here, possibly just - // logging the error, or a retry mechanism. Returning an error - // here means failing the entire job. - if err := ev.Emit(ctx, testevent.Data{EventName: StartedEvent, Target: target, Payload: nil}); err != nil { - return fmt.Errorf("failed to emit start event: %v", err) +func (ts *Step) Run(ctx xcontext.Context, ch test.TestStepChannels, params test.TestStepParameters, ev testevent.Emitter) error { + f := func(ctx xcontext.Context, target *target.Target) error { + ctx.Logger().Infof("Executing on target %s", target) + // NOTE: you may want more robust error handling here, possibly just + // logging the error, or a retry mechanism. Returning an error + // here means failing the entire job. + if err := ev.Emit(ctx, testevent.Data{EventName: StartedEvent, Target: target, Payload: nil}); err != nil { + return fmt.Errorf("failed to emit start event: %v", err) + } + if ts.shouldFail(target) { + if err := ev.Emit(ctx, testevent.Data{EventName: FailedEvent, Target: target, Payload: nil}); err != nil { + return fmt.Errorf("failed to emit finished event: %v", err) } - if r == 1 { - select { - case <-ctx.WaitFor(): - return nil - case ch.Err <- cerrors.TargetError{Target: target, Err: fmt.Errorf("target failed")}: - if err := ev.Emit(ctx, testevent.Data{EventName: FinishedEvent, Target: target, Payload: nil}); err != nil { - return fmt.Errorf("failed to emit finished event: %v", err) - } - } - } else { - select { - case <-ctx.WaitFor(): - return nil - case ch.Out <- target: - if err := ev.Emit(ctx, testevent.Data{EventName: FailedEvent, Target: target, Payload: nil}); err != nil { - return fmt.Errorf("failed to emit failed event: %v", err) - } - } + return fmt.Errorf("target failed") + } else { + if err := ev.Emit(ctx, testevent.Data{EventName: FinishedEvent, Target: target, Payload: nil}); err != nil { + return fmt.Errorf("failed to emit failed event: %v", err) } - case <-ctx.WaitFor(): - return nil } + return nil } + return teststeps.ForEachTarget(Name, ctx, ch, f) } // ValidateParameters validates the parameters associated to the TestStep -func (ts *Step) ValidateParameters(_ xcontext.Context, _ test.TestStepParameters) error { +func (ts *Step) ValidateParameters(_ xcontext.Context, params test.TestStepParameters) error { + if params.GetOne(FailPctParam).String() != "" { + if pct, err := params.GetInt(FailPctParam); err == nil { + ts.failPct = pct + } else { + return fmt.Errorf("invalid FailPct: %w", err) + } + } return nil } diff --git a/plugins/teststeps/randecho/randecho.go b/plugins/teststeps/randecho/randecho.go index 5235e6e1..5e57dc20 100644 --- a/plugins/teststeps/randecho/randecho.go +++ b/plugins/teststeps/randecho/randecho.go @@ -80,7 +80,7 @@ func (e Step) Run(ctx xcontext.Context, ch test.TestStepChannels, params test.Te ctx.Logger().Infof("Run: target %s failed: %s", target, params.GetOne("text")) ch.Err <- cerrors.TargetError{Target: target, Err: fmt.Errorf("target randomly failed")} } - case <-ctx.WaitFor(): + case <-ctx.Done(): return nil } } diff --git a/plugins/teststeps/sshcmd/sshcmd.go b/plugins/teststeps/sshcmd/sshcmd.go index beb47919..ef95ca82 100644 --- a/plugins/teststeps/sshcmd/sshcmd.go +++ b/plugins/teststeps/sshcmd/sshcmd.go @@ -247,7 +247,7 @@ func (ts *SSHCmd) Run(ctx xcontext.Context, ch test.TestStepChannels, params tes ctx.Logger().Warnf("Stderr of command '%s' is '%s'", cmd, stderr.Bytes()) } return err - case <-ctx.WaitFor(): + case <-ctx.Done(): return session.Signal(ssh.SIGKILL) case <-time.After(250 * time.Millisecond): keepAliveCnt++ diff --git a/plugins/teststeps/terminalexpect/terminalexpect.go b/plugins/teststeps/terminalexpect/terminalexpect.go index f00040de..1ca08d80 100644 --- a/plugins/teststeps/terminalexpect/terminalexpect.go +++ b/plugins/teststeps/terminalexpect/terminalexpect.go @@ -79,7 +79,7 @@ func (ts *TerminalExpect) Run(ctx xcontext.Context, ch test.TestStepChannels, pa return err case <-time.After(ts.Timeout): return fmt.Errorf("timed out after %s", ts.Timeout) - case <-ctx.WaitFor(): + case <-ctx.Done(): return nil } } diff --git a/plugins/teststeps/teststeps.go b/plugins/teststeps/teststeps.go index c8de4f9d..83171963 100644 --- a/plugins/teststeps/teststeps.go +++ b/plugins/teststeps/teststeps.go @@ -32,15 +32,15 @@ func ForEachTarget(pluginName string, ctx xcontext.Context, ch test.TestStepChan ctx.Logger().Errorf("%s: ForEachTarget: failed to apply test step function on target %s: %v", pluginName, t, err) select { case ch.Err <- cerrors.TargetError{Target: t, Err: err}: - case <-ctx.WaitFor(): - ctx.Logger().Debugf("%s: ForEachTarget: received cancellation/pause signal while reporting error", pluginName) + case <-ctx.Done(): + ctx.Logger().Debugf("%s: ForEachTarget: received cancellation signal while reporting error", pluginName) } } else { ctx.Logger().Debugf("%s: ForEachTarget: target %s completed successfully", pluginName, t) select { case ch.Out <- t: - case <-ctx.WaitFor(): - ctx.Logger().Debugf("%s: ForEachTarget: received cancellation/pause signal while reporting success", pluginName) + case <-ctx.Done(): + ctx.Logger().Debugf("%s: ForEachTarget: received pause signal while reporting success", pluginName) } } } @@ -50,12 +50,11 @@ func ForEachTarget(pluginName string, ctx xcontext.Context, ch test.TestStepChan for { select { case tgt := <-ch.In: - ctx.Logger().Debugf("%s: ForEachTarget: received target %s", pluginName, tgt) if tgt == nil { ctx.Logger().Debugf("%s: ForEachTarget: all targets have been received", pluginName) return } - + ctx.Logger().Debugf("%s: ForEachTarget: received target %s", pluginName, tgt) wg.Add(1) go func() { defer wg.Done() @@ -66,9 +65,6 @@ func ForEachTarget(pluginName string, ctx xcontext.Context, ch test.TestStepChan case <-ctx.Done(): ctx.Logger().Debugf("%s: ForEachTarget: incoming loop canceled", pluginName) return - case <-ctx.WaitFor(xcontext.Paused): - ctx.Logger().Debugf("%s: ForEachTarget: incoming loop paused", pluginName) - return } } }() diff --git a/tests/common/get_events.go b/tests/common/get_events.go new file mode 100644 index 00000000..5c22813a --- /dev/null +++ b/tests/common/get_events.go @@ -0,0 +1,48 @@ +// Copyright (c) Facebook, Inc. and its affiliates. +// +// This source code is licensed under the MIT license found in the +// LICENSE file in the root directory of this source tree. + +package common + +import ( + "fmt" + "strings" + + "github.com/facebookincubator/contest/pkg/event/testevent" + "github.com/facebookincubator/contest/pkg/storage" + "github.com/facebookincubator/contest/pkg/xcontext" +) + +func eventToStringNoTime(ev testevent.Event) string { + // Omit the timestamp to make output stable. + return fmt.Sprintf("{%s%s}", ev.Header, ev.Data) +} + +// GetTestEventsAsString queries storage for particular test's events, +// further filtering by target ID and/or step label. +func GetTestEventsAsString(ctx xcontext.Context, st storage.EventStorage, testName string, targetID, stepLabel *string) string { + q, _ := testevent.BuildQuery(testevent.QueryTestName(testName)) + results, _ := st.GetTestEvents(ctx, q) + var resultsForTarget []string + for _, r := range results { + if targetID != nil { + if r.Data.Target == nil { + continue + } + if *targetID != "" && r.Data.Target.ID != *targetID { + continue + } + } + if stepLabel != nil { + if *stepLabel != "" && r.Header.TestStepLabel != *stepLabel { + continue + } + if targetID == nil && r.Data.Target != nil { + continue + } + } + resultsForTarget = append(resultsForTarget, eventToStringNoTime(r)) + } + return "\n" + strings.Join(resultsForTarget, "\n") + "\n" +} diff --git a/tests/common/goroutine_leak_check.go b/tests/common/goroutine_leak_check/goroutine_leak_check.go similarity index 97% rename from tests/common/goroutine_leak_check.go rename to tests/common/goroutine_leak_check/goroutine_leak_check.go index 3bbbc955..9da703d4 100644 --- a/tests/common/goroutine_leak_check.go +++ b/tests/common/goroutine_leak_check/goroutine_leak_check.go @@ -3,7 +3,7 @@ // This source code is licensed under the MIT license found in the // LICENSE file in the root directory of this source tree. -package common +package goroutine_leak_check import ( "fmt" @@ -144,6 +144,10 @@ func LeakCheckingTestMain(m *testing.M, funcWhitelist ...string) { ret := m.Run() if ret == 0 { time.Sleep(20 * time.Millisecond) // Give stragglers some time to exit. + runtime.Gosched() + runtime.GC() + runtime.Gosched() + runtime.GC() if err := CheckLeakedGoRoutines(funcWhitelist...); err != nil { fmt.Fprintf(os.Stderr, "%s", err) ret = 1 diff --git a/tests/common/goroutine_leak_check_test.go b/tests/common/goroutine_leak_check/goroutine_leak_check_test.go similarity index 86% rename from tests/common/goroutine_leak_check_test.go rename to tests/common/goroutine_leak_check/goroutine_leak_check_test.go index 7cb1d0a5..e361a951 100644 --- a/tests/common/goroutine_leak_check_test.go +++ b/tests/common/goroutine_leak_check/goroutine_leak_check_test.go @@ -3,7 +3,7 @@ // This source code is licensed under the MIT license found in the // LICENSE file in the root directory of this source tree. -package common +package goroutine_leak_check import ( "sync" @@ -33,7 +33,7 @@ func TestLeaks(t *testing.T) { dump, err := checkLeakedGoRoutines() require.Errorf(t, err, "===\n%s\n===", dump) require.Contains(t, err.Error(), "goroutine_leak_check_test.go") - require.Contains(t, err.Error(), "common.Func1") + require.Contains(t, err.Error(), "goroutine_leak_check.Func1") c.Signal() // Give some time for the goroutine to exit // otherwise it causes flakes in case of multiple test runs (-count=N). @@ -47,7 +47,7 @@ func TestLeaksWhitelisted(t *testing.T) { c.Wait() c.L.Unlock() require.NoError(t, CheckLeakedGoRoutines( - "github.com/facebookincubator/contest/tests/common.Func1")) + "github.com/facebookincubator/contest/tests/common/goroutine_leak_check.Func1")) c.Signal() time.Sleep(10 * time.Millisecond) } diff --git a/tests/integ/events/frameworkevents/common.go b/tests/integ/events/frameworkevents/common.go index 0428f0dc..eef35e07 100644 --- a/tests/integ/events/frameworkevents/common.go +++ b/tests/integ/events/frameworkevents/common.go @@ -16,6 +16,8 @@ import ( "github.com/facebookincubator/contest/pkg/event/frameworkevent" "github.com/facebookincubator/contest/pkg/storage" "github.com/facebookincubator/contest/pkg/types" + "github.com/facebookincubator/contest/pkg/xcontext/bundles/logrusctx" + "github.com/facebookincubator/contest/pkg/xcontext/logger" "github.com/facebookincubator/contest/tests/integ/common" "github.com/stretchr/testify/assert" @@ -23,6 +25,10 @@ import ( "github.com/stretchr/testify/suite" ) +var ( + ctx = logrusctx.NewContext(logger.LevelDebug) +) + func mustBuildQuery(t require.TestingT, queryFields ...frameworkevent.QueryField) *frameworkevent.Query { eventQuery, err := frameworkevent.BuildQuery(queryFields...) require.NoError(t, err) @@ -37,11 +43,11 @@ func populateFrameworkEvents(backend storage.Storage, emitTime time.Time) error eventFirst := frameworkevent.Event{JobID: 1, EventName: "AFrameworkEvent", Payload: payload, EmitTime: emitTime} eventSecond := frameworkevent.Event{JobID: 1, EventName: "BFrameworkEvent", Payload: payload, EmitTime: emitTime} - err := backend.StoreFrameworkEvent(eventFirst) + err := backend.StoreFrameworkEvent(ctx, eventFirst) if err != nil { return err } - return backend.StoreFrameworkEvent(eventSecond) + return backend.StoreFrameworkEvent(ctx, eventSecond) } func assertFrameworkEvents(t *testing.T, ev []frameworkevent.Event, emitTime time.Time) { @@ -98,7 +104,7 @@ func (suite *FrameworkEventsSuite) TestRetrieveSingleFrameworkEvent() { require.NoError(suite.T(), err) eventQuery := mustBuildQuery(suite.T(), frameworkevent.QueryEventName("AFrameworkEvent")) - results, err := suite.txStorage.GetFrameworkEvent(eventQuery) + results, err := suite.txStorage.GetFrameworkEvent(ctx, eventQuery) require.NoError(suite.T(), err) assert.Equal(suite.T(), 1, len(results)) @@ -108,7 +114,7 @@ func (suite *FrameworkEventsSuite) TestRetrieveSingleFrameworkEvent() { func (suite *FrameworkEventsSuite) TestRetrieveMultipleFrameworkEvents() { eventQuery := mustBuildQuery(suite.T(), frameworkevent.QueryJobID(1)) - results, err := suite.txStorage.GetFrameworkEvent(eventQuery) + results, err := suite.txStorage.GetFrameworkEvent(ctx, eventQuery) require.NoError(suite.T(), err) assert.Equal(suite.T(), 0, len(results)) @@ -117,7 +123,7 @@ func (suite *FrameworkEventsSuite) TestRetrieveMultipleFrameworkEvents() { require.NoError(suite.T(), err) eventQuery = mustBuildQuery(suite.T(), frameworkevent.QueryJobID(1)) - results, err = suite.txStorage.GetFrameworkEvent(eventQuery) + results, err = suite.txStorage.GetFrameworkEvent(ctx, eventQuery) require.NoError(suite.T(), err) assert.Equal(suite.T(), 2, len(results)) @@ -136,14 +142,14 @@ func (suite *FrameworkEventsSuite) TestRetrieveSingleFrameworkEventByEmitTime() require.NoError(suite.T(), err) eventQuery := mustBuildQuery(suite.T(), frameworkevent.QueryEmittedStartTime(emitTime)) - results, err := suite.txStorage.GetFrameworkEvent(eventQuery) + results, err := suite.txStorage.GetFrameworkEvent(ctx, eventQuery) require.NoError(suite.T(), err) assert.Equal(suite.T(), 2, len(results)) assertFrameworkEvents(suite.T(), results, emitTime) eventQuery = mustBuildQuery(suite.T(), frameworkevent.QueryEmittedStartTime(emitTime.Add(-delta))) - results, err = suite.txStorage.GetFrameworkEvent(eventQuery) + results, err = suite.txStorage.GetFrameworkEvent(ctx, eventQuery) require.NoError(suite.T(), err) assert.Equal(suite.T(), 4, len(results)) diff --git a/tests/integ/events/testevents/common.go b/tests/integ/events/testevents/common.go index faa86b94..7a555004 100644 --- a/tests/integ/events/testevents/common.go +++ b/tests/integ/events/testevents/common.go @@ -17,12 +17,18 @@ import ( "github.com/facebookincubator/contest/pkg/storage" "github.com/facebookincubator/contest/pkg/target" "github.com/facebookincubator/contest/pkg/types" + "github.com/facebookincubator/contest/pkg/xcontext/bundles/logrusctx" + "github.com/facebookincubator/contest/pkg/xcontext/logger" "github.com/facebookincubator/contest/tests/integ/common" "github.com/stretchr/testify/assert" "github.com/stretchr/testify/require" "github.com/stretchr/testify/suite" ) +var ( + ctx = logrusctx.NewContext(logger.LevelDebug) +) + func mustBuildQuery(t require.TestingT, queryFields ...testevent.QueryField) *testevent.Query { eventQuery, err := testevent.BuildQuery(queryFields...) require.NoError(t, err) @@ -45,11 +51,11 @@ func populateTestEvents(backend storage.Storage, emitTime time.Time) error { eventFirst := testevent.Event{Header: &hdrFirst, Data: &dataFirst, EmitTime: emitTime} eventSecond := testevent.Event{Header: &hdrSecond, Data: &dataSecond, EmitTime: emitTime} - err := backend.StoreTestEvent(eventFirst) + err := backend.StoreTestEvent(ctx, eventFirst) if err != nil { return err } - return backend.StoreTestEvent(eventSecond) + return backend.StoreTestEvent(ctx, eventSecond) } func assertTestEvents(t *testing.T, ev []testevent.Event, emitTime time.Time) { @@ -104,7 +110,7 @@ func (suite *TestEventsSuite) TestRetrieveSingleTestEvent() { require.NoError(suite.T(), err) testEventQuery := mustBuildQuery(suite.T(), testevent.QueryTestName("ATestName")) - results, err := suite.txStorage.GetTestEvents(testEventQuery) + results, err := suite.txStorage.GetTestEvents(ctx, testEventQuery) require.NoError(suite.T(), err) assert.Equal(suite.T(), 1, len(results)) @@ -119,7 +125,7 @@ func (suite *TestEventsSuite) TestRetrieveMultipleTestEvents() { require.NoError(suite.T(), err) testEventQuery := mustBuildQuery(suite.T(), testevent.QueryTestStepLabel("TestStepLabel")) - results, err := suite.txStorage.GetTestEvents(testEventQuery) + results, err := suite.txStorage.GetTestEvents(ctx, testEventQuery) require.NoError(suite.T(), err) assert.Equal(suite.T(), 2, len(results)) @@ -138,14 +144,14 @@ func (suite *TestEventsSuite) TestRetrievesSingleTestEventByEmitTime() { require.NoError(suite.T(), err) testEventQuery := mustBuildQuery(suite.T(), testevent.QueryEmittedStartTime(emitTime)) - results, err := suite.txStorage.GetTestEvents(testEventQuery) + results, err := suite.txStorage.GetTestEvents(ctx, testEventQuery) require.NoError(suite.T(), err) assert.Equal(suite.T(), 2, len(results)) assertTestEvents(suite.T(), results, emitTime) testEventQuery = mustBuildQuery(suite.T(), testevent.QueryEmittedStartTime(emitTime.Add(-delta))) - results, err = suite.txStorage.GetTestEvents(testEventQuery) + results, err = suite.txStorage.GetTestEvents(ctx, testEventQuery) require.NoError(suite.T(), err) assert.Equal(suite.T(), 4, len(results)) @@ -162,7 +168,7 @@ func (suite *TestEventsSuite) TestRetrievesMultipleTestEventsByName() { eventNames := []event.Name{event.Name("AEventName"), event.Name("BEventName")} testEventQuery := mustBuildQuery(suite.T(), testevent.QueryEventNames(eventNames)) - results, err := suite.txStorage.GetTestEvents(testEventQuery) + results, err := suite.txStorage.GetTestEvents(ctx, testEventQuery) require.NoError(suite.T(), err) assert.Equal(suite.T(), 2, len(results)) @@ -176,7 +182,7 @@ func (suite *TestEventsSuite) TestRetrieveSingleTestEventsByName() { require.NoError(suite.T(), err) testEventQuery := mustBuildQuery(suite.T(), testevent.QueryEventName(event.Name("AEventName"))) - results, err := suite.txStorage.GetTestEvents(testEventQuery) + results, err := suite.txStorage.GetTestEvents(ctx, testEventQuery) require.NoError(suite.T(), err) assert.Equal(suite.T(), 1, len(results)) @@ -193,7 +199,7 @@ func (suite *TestEventsSuite) TestRetrieveSingleTestEventsByNameAndJobID() { testevent.QueryEventName(event.Name("AEventName")), testevent.QueryJobID(1), ) - results, err := suite.txStorage.GetTestEvents(testEventQuery) + results, err := suite.txStorage.GetTestEvents(ctx, testEventQuery) require.NoError(suite.T(), err) assert.Equal(suite.T(), 1, len(results)) diff --git a/tests/integ/job/common.go b/tests/integ/job/common.go index 0b903da6..3f0b13a2 100644 --- a/tests/integ/job/common.go +++ b/tests/integ/job/common.go @@ -10,6 +10,8 @@ package test import ( "time" + "github.com/facebookincubator/contest/pkg/xcontext/bundles/logrusctx" + "github.com/facebookincubator/contest/pkg/xcontext/logger" "github.com/stretchr/testify/require" "github.com/stretchr/testify/suite" @@ -49,6 +51,10 @@ var testDescs = ` ] ` +var ( + ctx = logrusctx.NewContext(logger.LevelDebug) +) + type JobSuite struct { suite.Suite @@ -78,7 +84,7 @@ func (suite *JobSuite) TestGetJobRequest() { RequestTime: time.Now(), JobDescriptor: jobDescriptorFirst, } - jobIDa, err := suite.txStorage.StoreJobRequest(&jobRequestFirst) + jobIDa, err := suite.txStorage.StoreJobRequest(ctx, &jobRequestFirst) require.NoError(t, err) jobRequestSecond := job.Request{ @@ -87,10 +93,10 @@ func (suite *JobSuite) TestGetJobRequest() { RequestTime: time.Now(), JobDescriptor: jobDescriptorSecond, } - jobIDb, err := suite.txStorage.StoreJobRequest(&jobRequestSecond) + jobIDb, err := suite.txStorage.StoreJobRequest(ctx, &jobRequestSecond) require.NoError(t, err) - request, err := suite.txStorage.GetJobRequest(jobIDa) + request, err := suite.txStorage.GetJobRequest(ctx, jobIDa) require.NoError(suite.T(), err) require.Equal(suite.T(), types.JobID(jobIDa), request.JobID) @@ -103,7 +109,7 @@ func (suite *JobSuite) TestGetJobRequest() { require.True(t, request.RequestTime.After(time.Now().Add(-2*time.Second))) require.True(t, request.RequestTime.Before(time.Now().Add(2*time.Second))) - request, err = suite.txStorage.GetJobRequest(jobIDb) + request, err = suite.txStorage.GetJobRequest(ctx, jobIDb) // Creation timestamp corresponds to the timestamp of the insertion into the // database. Assert that the timestamp retrieved from the database is within @@ -133,7 +139,7 @@ func (suite *JobSuite) TestListJobs() { RequestTime: time.Now(), JobDescriptor: jobDescriptorFirst, } - jobIDa, err := suite.txStorage.StoreJobRequest(&jobRequestFirst) + jobIDa, err := suite.txStorage.StoreJobRequest(ctx, &jobRequestFirst) require.NoError(t, err) jobRequestSecond := job.Request{ @@ -142,75 +148,75 @@ func (suite *JobSuite) TestListJobs() { RequestTime: time.Now(), JobDescriptor: jobDescriptorSecond, } - jobIDb, err := suite.txStorage.StoreJobRequest(&jobRequestSecond) + jobIDb, err := suite.txStorage.StoreJobRequest(ctx, &jobRequestSecond) require.NoError(t, err) var res []types.JobID // No match criteria - returns all jobs. - res, err = suite.txStorage.ListJobs(mustBuildQuery(t)) + res, err = suite.txStorage.ListJobs(ctx, mustBuildQuery(t)) require.NoError(t, err) require.Equal(t, []types.JobID{jobIDa, jobIDb}, res) // List by states - matching any state is enough. // Both jobs are currently in Unknown state. - res, err = suite.txStorage.ListJobs(mustBuildQuery(t, + res, err = suite.txStorage.ListJobs(ctx, mustBuildQuery(t, storage.QueryJobStates(job.JobStateCompleted, job.JobStateFailed), )) require.NoError(t, err) require.Empty(t, res) require.NotNil(t, res) - res, err = suite.txStorage.ListJobs(mustBuildQuery(t, + res, err = suite.txStorage.ListJobs(ctx, mustBuildQuery(t, storage.QueryJobStates(job.JobStateCompleted, job.JobStateFailed, job.JobStateUnknown), )) require.NoError(t, err) require.Equal(t, []types.JobID{jobIDa, jobIDb}, res) // Tag matching - single tag. - res, err = suite.txStorage.ListJobs(mustBuildQuery(t, + res, err = suite.txStorage.ListJobs(ctx, mustBuildQuery(t, storage.QueryJobTags("tests"))) require.NoError(t, err) require.Equal(t, []types.JobID{jobIDa}, res) // Tag matching - must match all tags. - res, err = suite.txStorage.ListJobs(mustBuildQuery(t, + res, err = suite.txStorage.ListJobs(ctx, mustBuildQuery(t, storage.QueryJobTags("integ", "tests"))) require.NoError(t, err) require.Equal(t, []types.JobID{jobIDa}, res) - res, err = suite.txStorage.ListJobs(mustBuildQuery(t, + res, err = suite.txStorage.ListJobs(ctx, mustBuildQuery(t, storage.QueryJobTags("integ", "tests", "no_such_tag"))) require.NoError(t, err) require.Empty(t, res) require.NotNil(t, res) // Match by state - without any events both jobs are in unknown state. - res, err = suite.txStorage.ListJobs(mustBuildQuery(t, + res, err = suite.txStorage.ListJobs(ctx, mustBuildQuery(t, storage.QueryJobStates(job.JobStateUnknown))) require.NoError(t, err) require.Equal(t, []types.JobID{jobIDa, jobIDb}, res) // Inject state events - require.NoError(t, suite.txStorage.StoreFrameworkEvent(frameworkevent.Event{ + require.NoError(t, suite.txStorage.StoreFrameworkEvent(ctx, frameworkevent.Event{ JobID: jobIDa, EventName: job.EventJobStarted, EmitTime: time.Unix(1, 0), })) - require.NoError(t, suite.txStorage.StoreFrameworkEvent(frameworkevent.Event{ + require.NoError(t, suite.txStorage.StoreFrameworkEvent(ctx, frameworkevent.Event{ JobID: jobIDb, EventName: job.EventJobStarted, EmitTime: time.Unix(2, 0), })) - require.NoError(t, suite.txStorage.StoreFrameworkEvent(frameworkevent.Event{ + require.NoError(t, suite.txStorage.StoreFrameworkEvent(ctx, frameworkevent.Event{ JobID: jobIDb, EventName: job.EventJobFailed, EmitTime: time.Unix(3, 0), })) - require.NoError(t, suite.txStorage.StoreFrameworkEvent(frameworkevent.Event{ + require.NoError(t, suite.txStorage.StoreFrameworkEvent(ctx, frameworkevent.Event{ JobID: jobIDa, EventName: job.EventJobCompleted, EmitTime: time.Unix(4, 0), })) // Multiple states - match any of them - res, err = suite.txStorage.ListJobs(mustBuildQuery(t, + res, err = suite.txStorage.ListJobs(ctx, mustBuildQuery(t, storage.QueryJobStates(job.JobStateCompleted, job.JobStateFailed, job.JobStatePaused))) require.NoError(t, err) require.Equal(t, []types.JobID{jobIDa, jobIDb}, res) // State and tag match - must match both - res, err = suite.txStorage.ListJobs(mustBuildQuery(t, + res, err = suite.txStorage.ListJobs(ctx, mustBuildQuery(t, storage.QueryJobStates(job.JobStateCompleted, job.JobStateFailed, job.JobStatePaused), storage.QueryJobTags("tests", "foo"), )) diff --git a/tests/integ/jobmanager/common.go b/tests/integ/jobmanager/common.go index 05f7488f..0d548037 100644 --- a/tests/integ/jobmanager/common.go +++ b/tests/integ/jobmanager/common.go @@ -83,6 +83,7 @@ type TestListener struct { // Serve implements the main logic of a dummy listener which talks to the API // layer to trigger actions in the JobManager func (tl *TestListener) Serve(ctx xcontext.Context, contestApi *api.API) error { + ctx.Logger().Debugf("Serving mock listener") for { select { case command := <-tl.commandCh: @@ -114,7 +115,7 @@ func (tl *TestListener) Serve(ctx xcontext.Context, contestApi *api.API) error { default: return nil } - case <-ctx.Done(): + case <-ctx.WaitFor(): return nil } } @@ -130,7 +131,7 @@ func pollForEvent(eventManager frameworkevent.EmitterFetcher, ev event.Name, job frameworkevent.QueryJobID(jobID), frameworkevent.QueryEventName(ev), } - ev, err := eventManager.Fetch(queryFields...) + ev, err := eventManager.Fetch(ctx, queryFields...) if err != nil { return nil, err } @@ -346,7 +347,7 @@ func (suite *TestJobManagerSuite) testExit( } // JobManager will emit a paused or cancelled event when the job completes - ev, err = suite.eventManager.Fetch( + ev, err = suite.eventManager.Fetch(ctx, frameworkevent.QueryJobID(jobID), frameworkevent.QueryEventName(expectedEvent), ) @@ -368,10 +369,10 @@ func (suite *TestJobManagerSuite) TestJobManagerJobStartSingle() { jobID, err := suite.startJob(jobDescriptorNoop) require.NoError(suite.T(), err) - _, err = suite.jsm.GetJobRequest(types.JobID(jobID)) + _, err = suite.jsm.GetJobRequest(ctx, types.JobID(jobID)) require.NoError(suite.T(), err) - r, err := suite.jsm.GetJobRequest(types.JobID(jobID + 1)) + r, err := suite.jsm.GetJobRequest(ctx, types.JobID(jobID + 1)) require.Error(suite.T(), err) require.NotEqual(suite.T(), nil, r) @@ -398,14 +399,14 @@ func (suite *TestJobManagerSuite) TestJobManagerJobReport() { require.Equal(suite.T(), 1, len(ev)) // A Report must be persisted for the Job - jobReport, err := suite.jsm.GetJobReport(types.JobID(jobID)) + jobReport, err := suite.jsm.GetJobReport(ctx, types.JobID(jobID)) require.NoError(suite.T(), err) require.Equal(suite.T(), 1, len(jobReport.RunReports)) require.Equal(suite.T(), 0, len(jobReport.FinalReports)) // Any other Job should not have a Job report, but fetching the // report should not error out - jobReport, err = suite.jsm.GetJobReport(types.JobID(2)) + jobReport, err = suite.jsm.GetJobReport(ctx, types.JobID(2)) require.NoError(suite.T(), err) require.Equal(suite.T(), &job.JobReport{JobID: 2}, jobReport) } @@ -453,7 +454,7 @@ func (suite *TestJobManagerSuite) TestJobManagerJobNotSuccessful() { require.NoError(suite.T(), err) require.Equal(suite.T(), 1, len(ev)) - jobReport, err := suite.jsm.GetJobReport(types.JobID(jobID)) + jobReport, err := suite.jsm.GetJobReport(ctx, types.JobID(jobID)) require.NoError(suite.T(), err) require.Equal(suite.T(), 1, len(jobReport.RunReports)) require.Equal(suite.T(), 0, len(jobReport.FinalReports)) @@ -471,7 +472,7 @@ func (suite *TestJobManagerSuite) TestJobManagerJobFailure() { require.NoError(suite.T(), err) require.Equal(suite.T(), 1, len(ev)) - jobReport, err := suite.jsm.GetJobReport(types.JobID(jobID)) + jobReport, err := suite.jsm.GetJobReport(ctx, types.JobID(jobID)) require.NoError(suite.T(), err) require.Equal(suite.T(), 1, len(jobReport.RunReports)) require.Equal(suite.T(), 0, len(jobReport.FinalReports)) @@ -489,7 +490,7 @@ func (suite *TestJobManagerSuite) TestJobManagerJobCrash() { require.NoError(suite.T(), err) require.Equal(suite.T(), 1, len(ev)) require.Contains(suite.T(), string(*ev[0].Payload), "TestStep crashed") - jobReport, err := suite.jsm.GetJobReport(types.JobID(jobID)) + jobReport, err := suite.jsm.GetJobReport(ctx, types.JobID(jobID)) require.NoError(suite.T(), err) // no reports are expected if the job crashes @@ -605,7 +606,7 @@ func (suite *TestJobManagerSuite) TestJobManagerDifferentInstances() { jobID, err = suite.startJob(jobDescriptorNoop) require.NoError(suite.T(), err) - _, err = suite.jsm.GetJobRequest(types.JobID(jobID)) + _, err = suite.jsm.GetJobRequest(ctx, types.JobID(jobID)) require.NoError(suite.T(), err) ev, err := pollForEvent(suite.eventManager, job.EventJobCompleted, jobID, 1*time.Second) diff --git a/tests/integ/test/testrunner_test.go b/tests/integ/test/testrunner_test.go index 36153341..78dde750 100644 --- a/tests/integ/test/testrunner_test.go +++ b/tests/integ/test/testrunner_test.go @@ -13,10 +13,11 @@ import ( "testing" "time" - "github.com/stretchr/testify/assert" + "github.com/facebookincubator/contest/pkg/xcontext" + "github.com/facebookincubator/contest/pkg/xcontext/bundles/logrusctx" + "github.com/facebookincubator/contest/pkg/xcontext/logger" "github.com/stretchr/testify/require" - "github.com/facebookincubator/contest/pkg/cerrors" "github.com/facebookincubator/contest/pkg/event" "github.com/facebookincubator/contest/pkg/pluginregistry" "github.com/facebookincubator/contest/pkg/runner" @@ -24,7 +25,6 @@ import ( "github.com/facebookincubator/contest/pkg/target" "github.com/facebookincubator/contest/pkg/test" "github.com/facebookincubator/contest/pkg/types" - "github.com/facebookincubator/contest/pkg/xcontext" "github.com/facebookincubator/contest/plugins/storage/memory" "github.com/facebookincubator/contest/plugins/teststeps/cmd" "github.com/facebookincubator/contest/plugins/teststeps/echo" @@ -43,6 +43,10 @@ var ( successTimeout = 5 * time.Second ) +var ( + ctx = logrusctx.NewContext(logger.LevelDebug) +) + var testSteps = map[string]test.TestStepFactory{ echo.Name: echo.New, example.Name: example.New, @@ -69,7 +73,7 @@ var testStepsEvents = map[string][]event.Name{ func TestMain(m *testing.M) { - pluginRegistry = pluginregistry.NewPluginRegistry() + pluginRegistry = pluginregistry.NewPluginRegistry(ctx) // Setup the PluginRegistry by registering TestSteps for name, tsfactory := range testSteps { if _, ok := testStepsEvents[name]; !ok { @@ -127,7 +131,7 @@ func TestSuccessfulCompletion(t *testing.T) { go func() { tr := runner.NewTestRunner() - err := tr.Run(stateCtx, &test.Test{TestStepsBundles: testSteps}, targets, jobID, runID) + _, err := tr.Run(stateCtx, &test.Test{TestStepsBundles: testSteps}, targets, jobID, runID, nil) errCh <- err }() select { @@ -138,180 +142,6 @@ func TestSuccessfulCompletion(t *testing.T) { } } -func TestPanicStep(t *testing.T) { - - jobID := types.JobID(1) - runID := types.RunID(1) - - ts1, err := pluginRegistry.NewTestStep("Panic") - require.NoError(t, err) - ts2, err := pluginRegistry.NewTestStep("Example") - require.NoError(t, err) - - params := make(test.TestStepParameters) - testSteps := []test.TestStepBundle{ - test.TestStepBundle{TestStep: ts1, TestStepLabel: "StageOne", Parameters: params}, - test.TestStepBundle{TestStep: ts2, TestStepLabel: "StageTwo", Parameters: params}, - } - - errCh := make(chan error, 1) - stateCtx := xcontext.Background() - - go func() { - tr := runner.NewTestRunner() - err := tr.Run(stateCtx, &test.Test{TestStepsBundles: testSteps}, targets, jobID, runID) - errCh <- err - }() - select { - case err = <-errCh: - require.Error(t, err) - case <-time.After(successTimeout): - t.Errorf("test should return within timeout: %+v", successTimeout) - } -} - -func TestNoReturnStepWithCorrectTargetForwarding(t *testing.T) { - - jobID := types.JobID(1) - runID := types.RunID(1) - - ts1, err := pluginRegistry.NewTestStep("NoReturn") - require.NoError(t, err) - ts2, err := pluginRegistry.NewTestStep("Example") - require.NoError(t, err) - - params := make(test.TestStepParameters) - testSteps := []test.TestStepBundle{ - test.TestStepBundle{TestStep: ts1, Parameters: params, TestStepLabel: "NoReturn"}, - test.TestStepBundle{TestStep: ts2, Parameters: params, TestStepLabel: "Example"}, - } - - stateCtx := xcontext.Background() - errCh := make(chan error, 1) - - timeouts := runner.TestRunnerTimeouts{ - StepInjectTimeout: 30 * time.Second, - MessageTimeout: 5 * time.Second, - ShutdownTimeout: 1 * time.Second, - StepShutdownTimeout: 1 * time.Second, - } - go func() { - tr := runner.NewTestRunnerWithTimeouts(timeouts) - err := tr.Run(stateCtx, &test.Test{TestStepsBundles: testSteps}, targets, jobID, runID) - errCh <- err - }() - select { - case err = <-errCh: - require.Error(t, err) - if _, ok := err.(*cerrors.ErrTestStepsNeverReturned); !ok { - errString := fmt.Sprintf("Error returned by TestRunner should be of type ErrTestStepsNeverReturned: %v", err) - assert.FailNow(t, errString) - } - assert.NotNil(t, err.(*cerrors.ErrTestStepsNeverReturned)) - case <-time.After(successTimeout): - t.Errorf("test should return within timeout: %+v", successTimeout) - } -} - -func TestNoReturnStepWithoutTargetForwarding(t *testing.T) { - - jobID := types.JobID(1) - runID := types.RunID(1) - - ts1, err := pluginRegistry.NewTestStep("Hanging") - require.NoError(t, err) - ts2, err := pluginRegistry.NewTestStep("Example") - require.NoError(t, err) - - params := make(test.TestStepParameters) - testSteps := []test.TestStepBundle{ - test.TestStepBundle{TestStep: ts1, TestStepLabel: "StageOne", Parameters: params}, - test.TestStepBundle{TestStep: ts2, TestStepLabel: "StageTwo", Parameters: params}, - } - - stateCtx, cancel := xcontext.WithCancel(nil) - errCh := make(chan error, 1) - - var ( - StepInjectTimeout = 30 * time.Second - MessageTimeout = 5 * time.Second - ShutdownTimeout = 1 * time.Second - StepShutdownTimeout = 1 * time.Second - ) - timeouts := runner.TestRunnerTimeouts{ - StepInjectTimeout: StepInjectTimeout, - MessageTimeout: MessageTimeout, - ShutdownTimeout: ShutdownTimeout, - StepShutdownTimeout: StepShutdownTimeout, - } - - go func() { - tr := runner.NewTestRunnerWithTimeouts(timeouts) - err := tr.Run(stateCtx, &test.Test{TestStepsBundles: testSteps}, targets, jobID, runID) - errCh <- err - }() - - testTimeout := 2 * time.Second - testShutdownTimeout := ShutdownTimeout + StepShutdownTimeout + 2 - select { - case err = <-errCh: - // The TestRunner should never return and should instead time out - assert.FailNow(t, "TestRunner should not return, received an error instead: %v", err) - case <-time.After(testTimeout): - // Assert cancellation signal and expect the TestRunner to return within - // testShutdownTimeout - cancel() - select { - case err = <-errCh: - // The test timed out, which is an error from the perspective of the JobManager - require.Error(t, err) - if _, ok := err.(*cerrors.ErrTestStepsNeverReturned); !ok { - errString := fmt.Sprintf("Error returned by TestRunner should be of type ErrTestStepsNeverReturned: %v", err) - assert.FailNow(t, errString) - } - case <-time.After(testShutdownTimeout): - assert.FailNow(t, "TestRunner should return after cancellation before timeout") - } - } -} - -func TestStepClosesChannels(t *testing.T) { - - jobID := types.JobID(1) - runID := types.RunID(1) - - ts1, err := pluginRegistry.NewTestStep("Channels") - require.NoError(t, err) - ts2, err := pluginRegistry.NewTestStep("Example") - require.NoError(t, err) - - params := make(test.TestStepParameters) - testSteps := []test.TestStepBundle{ - test.TestStepBundle{TestStep: ts1, TestStepLabel: "StageOne", Parameters: params}, - test.TestStepBundle{TestStep: ts2, TestStepLabel: "StageTwo", Parameters: params}, - } - - stateCtx := xcontext.Background() - errCh := make(chan error, 1) - - go func() { - tr := runner.NewTestRunner() - err := tr.Run(stateCtx, &test.Test{TestStepsBundles: testSteps}, targets, jobID, runID) - errCh <- err - }() - - testTimeout := 2 * time.Second - select { - case err = <-errCh: - if _, ok := err.(*cerrors.ErrTestStepClosedChannels); !ok { - errString := fmt.Sprintf("Error returned by TestRunner should be of type ErrTestStepClosedChannels, got %T(%v)", err, err) - assert.FailNow(t, errString) - } - case <-time.After(testTimeout): - assert.FailNow(t, "TestRunner should not time out") - } -} - func TestCmdPlugin(t *testing.T) { jobID := types.JobID(1) @@ -337,7 +167,7 @@ func TestCmdPlugin(t *testing.T) { go func() { tr := runner.NewTestRunner() - err := tr.Run(stateCtx, &test.Test{TestStepsBundles: testSteps}, targets, jobID, runID) + _, err := tr.Run(stateCtx, &test.Test{TestStepsBundles: testSteps}, targets, jobID, runID, nil) errCh <- err }() @@ -352,37 +182,3 @@ func TestCmdPlugin(t *testing.T) { t.Errorf("test should return within timeout: %+v", successTimeout) } } - -func TestNoRunTestStepIfNoTargets(t *testing.T) { - - jobID := types.JobID(1) - runID := types.RunID(1) - - ts1, err := pluginRegistry.NewTestStep("Fail") - require.NoError(t, err) - ts2, err := pluginRegistry.NewTestStep("Crash") - require.NoError(t, err) - - params := make(test.TestStepParameters) - testSteps := []test.TestStepBundle{ - {TestStep: ts1, TestStepLabel: "StageOne", Parameters: params}, - {TestStep: ts2, TestStepLabel: "StageTwo", Parameters: params}, - } - - stateCtx := xcontext.Background() - errCh := make(chan error, 1) - - go func() { - tr := runner.NewTestRunner() - err := tr.Run(stateCtx, &test.Test{TestStepsBundles: testSteps}, targets, jobID, runID) - errCh <- err - }() - - testTimeout := 2 * time.Second - select { - case err = <-errCh: - assert.Nil(t, err, "the Crash TestStep shouldn't be ran") - case <-time.After(testTimeout): - assert.FailNow(t, "TestRunner should not time out") - } -} diff --git a/tests/plugins/teststeps/badtargets/badtargets.go b/tests/plugins/teststeps/badtargets/badtargets.go new file mode 100644 index 00000000..e7fa2e93 --- /dev/null +++ b/tests/plugins/teststeps/badtargets/badtargets.go @@ -0,0 +1,111 @@ +// Copyright (c) Facebook, Inc. and its affiliates. +// +// This source code is licensed under the MIT license found in the +// LICENSE file in the root directory of this source tree. + +package badtargets + +import ( + "fmt" + + "github.com/facebookincubator/contest/pkg/cerrors" + "github.com/facebookincubator/contest/pkg/event" + "github.com/facebookincubator/contest/pkg/event/testevent" + "github.com/facebookincubator/contest/pkg/target" + "github.com/facebookincubator/contest/pkg/test" + "github.com/facebookincubator/contest/pkg/xcontext" +) + +// Name is the name used to look this plugin up. +const Name = "BadTargets" + +// Events defines the events that a TestStep is allow to emit +var Events = []event.Name{} + +type badTargets struct { +} + +// Name returns the name of the Step +func (ts *badTargets) Name() string { + return Name +} + +// Run executes a step that messes up the flow of targets. +func (ts *badTargets) Run(ctx xcontext.Context, ch test.TestStepChannels, params test.TestStepParameters, ev testevent.Emitter) error { + for { + select { + case tgt, ok := <-ch.In: + if !ok { + return nil + } + switch tgt.ID { + case "TDrop": + // ... crickets ... + case "TGood": + // We should not depend on pointer matching, so emit a copy. + tgt2 := *tgt + select { + case ch.Out <- &tgt2: + case <-ctx.Done(): + return xcontext.Canceled + } + case "TDup": + select { + case ch.Out <- tgt: + case <-ctx.Done(): + return xcontext.Canceled + } + select { + case ch.Out <- tgt: + case <-ctx.Done(): + return xcontext.Canceled + } + case "TExtra": + tgt2 := &target.Target{ID: "TExtra2"} + select { + case ch.Out <- tgt: + case <-ctx.Done(): + return xcontext.Canceled + } + select { + case ch.Out <- tgt2: + case <-ctx.Done(): + return xcontext.Canceled + } + case "T1": + // Mangle the returned target name. + tgt2 := &target.Target{ID: tgt.ID + "XXX"} + select { + case ch.Out <- tgt2: + case <-ctx.Done(): + return xcontext.Canceled + } + default: + return fmt.Errorf("Unexpected target name: %q", tgt.ID) + } + case <-ctx.Done(): + return xcontext.Canceled + } + } +} + +// ValidateParameters validates the parameters associated to the TestStep +func (ts *badTargets) ValidateParameters(ctx xcontext.Context, params test.TestStepParameters) error { + return nil +} + +// Resume tries to resume a previously interrupted test step. ExampleTestStep +// cannot resume. +func (ts *badTargets) Resume(ctx xcontext.Context, ch test.TestStepChannels, params test.TestStepParameters, ev testevent.EmitterFetcher) error { + return &cerrors.ErrResumeNotSupported{StepName: Name} +} + +// CanResume tells whether this step is able to resume. +func (ts *badTargets) CanResume() bool { + return false +} + +// New creates a new badTargets step +func New() test.TestStep { + return &badTargets{} +} diff --git a/tests/plugins/teststeps/channels/channels.go b/tests/plugins/teststeps/channels/channels.go index 706e5bee..04879cc0 100644 --- a/tests/plugins/teststeps/channels/channels.go +++ b/tests/plugins/teststeps/channels/channels.go @@ -16,7 +16,7 @@ import ( // Name is the name used to look this plugin up. var Name = "Channels" -// Events defines the events that a TestStep is allow to emit +// Events defines the events that a TestStep is allowed to emit var Events = []event.Name{} type channels struct { @@ -27,12 +27,14 @@ func (ts *channels) Name() string { return Name } -// Run executes a step which does never return.s +// Run executes a step that runs fine but closes its output channels on exit. func (ts *channels) Run(ctx xcontext.Context, ch test.TestStepChannels, params test.TestStepParameters, ev testevent.Emitter) error { for target := range ch.In { ch.Out <- target } + // This is bad, do not do this. close(ch.Out) + close(ch.Err) return nil } diff --git a/tests/plugins/teststeps/fail/fail.go b/tests/plugins/teststeps/fail/fail.go index dcccefcc..544ed5f9 100644 --- a/tests/plugins/teststeps/fail/fail.go +++ b/tests/plugins/teststeps/fail/fail.go @@ -38,7 +38,7 @@ func (ts *fail) Run(ctx xcontext.Context, ch test.TestStepChannels, params test. return nil } ch.Err <- cerrors.TargetError{Target: target, Err: fmt.Errorf("Integration test failure for %v", target)} - case <-ctx.WaitFor(): + case <-ctx.Done(): return nil } } diff --git a/tests/plugins/teststeps/hanging/hanging.go b/tests/plugins/teststeps/hanging/hanging.go index 0c42e218..1b8ec769 100644 --- a/tests/plugins/teststeps/hanging/hanging.go +++ b/tests/plugins/teststeps/hanging/hanging.go @@ -27,7 +27,7 @@ func (ts *hanging) Name() string { return Name } -// Run executes a step which does never return. +// Run executes a step that does not process any targets and never returns. func (ts *hanging) Run(ctx xcontext.Context, ch test.TestStepChannels, params test.TestStepParameters, ev testevent.Emitter) error { channel := make(chan struct{}) <-channel diff --git a/tests/plugins/teststeps/noreturn/noreturn.go b/tests/plugins/teststeps/noreturn/noreturn.go index fb4b7bd8..af8d432c 100644 --- a/tests/plugins/teststeps/noreturn/noreturn.go +++ b/tests/plugins/teststeps/noreturn/noreturn.go @@ -27,7 +27,7 @@ func (ts *noreturnStep) Name() string { return Name } -// Run executes a step which does never return. +// Run executes a step that never returns. func (ts *noreturnStep) Run(ctx xcontext.Context, ch test.TestStepChannels, params test.TestStepParameters, ev testevent.Emitter) error { for target := range ch.In { ch.Out <- target diff --git a/tests/plugins/teststeps/teststep/teststep.go b/tests/plugins/teststeps/teststep/teststep.go new file mode 100644 index 00000000..fece1df4 --- /dev/null +++ b/tests/plugins/teststeps/teststep/teststep.go @@ -0,0 +1,160 @@ +// Copyright (c) Facebook, Inc. and its affiliates. +// +// This source code is licensed under the MIT license found in the +// LICENSE file in the root directory of this source tree. + +package teststep + +import ( + "fmt" + "math/rand" + "strconv" + "strings" + "time" + + "github.com/facebookincubator/contest/pkg/cerrors" + "github.com/facebookincubator/contest/pkg/event" + "github.com/facebookincubator/contest/pkg/event/testevent" + "github.com/facebookincubator/contest/pkg/target" + "github.com/facebookincubator/contest/pkg/test" + "github.com/facebookincubator/contest/pkg/xcontext" + "github.com/facebookincubator/contest/plugins/teststeps" +) + +var Name = "Test" + +const ( + // A comma-delimited list of target IDs to fail on. + FailTargetsParam = "FailTargets" + // Alternatively, fail this percentage of targets at random. + FailPctParam = "FailPct" + // A comma-delimited list of target IDs to delay and by how much, ID=delay_ms. + DelayTargetsParam = "DelayTargets" +) + +const ( + StartedEvent = event.Name("TestStartedEvent") + FinishedEvent = event.Name("TestFinishedEvent") + FailedEvent = event.Name("TestFailedEvent") + StepRunningEvent = event.Name("TestStepRunningEvent") + StepFinishedEvent = event.Name("TestStepFinishedEvent") +) + +var Events = []event.Name{StartedEvent, FinishedEvent, FailedEvent, StepRunningEvent, StepFinishedEvent} + +type Step struct { + failPct int64 + failTargets map[string]bool + delayTargets map[string]time.Duration +} + +// Name returns the name of the Step +func (ts Step) Name() string { + return Name +} + +func (ts *Step) shouldFail(t *target.Target, params test.TestStepParameters) bool { + if ts.failTargets[t.ID] { + return true + } + if ts.failPct > 0 { + roll := rand.Int63n(101) + return (roll <= ts.failPct) + } + return false +} + +// Run executes the example step. +func (ts *Step) Run(ctx xcontext.Context, ch test.TestStepChannels, params test.TestStepParameters, ev testevent.Emitter) error { + f := func(ctx xcontext.Context, target *target.Target) error { + // Sleep to ensure TargetIn fires first. This simplifies test assertions. + time.Sleep(20 * time.Millisecond) + if err := ev.Emit(ctx, testevent.Data{EventName: StartedEvent, Target: target, Payload: nil}); err != nil { + return fmt.Errorf("failed to emit start event: %v", err) + } + delay := ts.delayTargets[target.ID] + if delay == 0 { + delay = ts.delayTargets["*"] + } + select { + case <-time.After(delay): + case <-ctx.Done(): + return xcontext.Canceled + } + if ts.shouldFail(target, params) { + if err := ev.Emit(ctx, testevent.Data{EventName: FailedEvent, Target: target, Payload: nil}); err != nil { + return fmt.Errorf("failed to emit finished event: %v", err) + } + return fmt.Errorf("target failed") + } else { + if err := ev.Emit(ctx, testevent.Data{EventName: FinishedEvent, Target: target, Payload: nil}); err != nil { + return fmt.Errorf("failed to emit failed event: %v", err) + } + } + return nil + } + if err := ev.Emit(ctx, testevent.Data{EventName: StepRunningEvent}); err != nil { + return fmt.Errorf("failed to emit failed event: %v", err) + } + res := teststeps.ForEachTarget(Name, ctx, ch, f) + if err := ev.Emit(ctx, testevent.Data{EventName: StepFinishedEvent}); err != nil { + return fmt.Errorf("failed to emit failed event: %v", err) + } + return res +} + +// ValidateParameters validates the parameters associated to the TestStep +func (ts *Step) ValidateParameters(_ xcontext.Context, params test.TestStepParameters) error { + targetsToFail := params.GetOne(FailTargetsParam).String() + if len(targetsToFail) > 0 { + for _, t := range strings.Split(targetsToFail, ",") { + ts.failTargets[t] = true + } + } + targetsToDelay := params.GetOne(DelayTargetsParam).String() + if len(targetsToDelay) > 0 { + for _, e := range strings.Split(targetsToDelay, ",") { + kv := strings.Split(e, "=") + if len(kv) != 2 { + continue + } + v, err := strconv.Atoi(kv[1]) + if err != nil { + return fmt.Errorf("invalid FailTargets: %w", err) + } + ts.delayTargets[kv[0]] = time.Duration(v) * time.Millisecond + } + } + if params.GetOne(FailPctParam).String() != "" { + if pct, err := params.GetInt(FailPctParam); err == nil { + ts.failPct = pct + } else { + return fmt.Errorf("invalid FailPct: %w", err) + } + } + return nil +} + +// Resume tries to resume a previously interrupted test step. TestTestStep +// cannot resume. +func (ts *Step) Resume(ctx xcontext.Context, ch test.TestStepChannels, _ test.TestStepParameters, ev testevent.EmitterFetcher) error { + return &cerrors.ErrResumeNotSupported{StepName: Name} +} + +// CanResume tells whether this step is able to resume. +func (ts *Step) CanResume() bool { + return false +} + +// New initializes and returns a new TestStep. +func New() test.TestStep { + return &Step{ + failTargets: make(map[string]bool), + delayTargets: make(map[string]time.Duration), + } +} + +// Load returns the name, factory and events which are needed to register the step. +func Load() (string, test.TestStepFactory, []event.Name) { + return Name, New, Events +}