Skip to content

Commit

Permalink
testing framework: refactor interrupt logic
Browse files Browse the repository at this point in the history
  • Loading branch information
liamcervante committed Jul 17, 2023
1 parent 7d1e918 commit f1ff89e
Show file tree
Hide file tree
Showing 7 changed files with 825 additions and 88 deletions.
151 changes: 68 additions & 83 deletions internal/command/test.go
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@ import (
"fmt"
"sort"
"strings"
"time"

"github.com/hashicorp/hcl/v2"

Expand Down Expand Up @@ -87,6 +88,14 @@ func (c *TestCommand) Run(rawArgs []string) int {
}(),
}

// We have two levels of interrupt here. A 'stop' and a 'cancel'. A 'stop'
// is a soft request to stop. We'll finish the current test, do the tidy up,
// but then skip all remaining tests and run blocks. A 'cancel' is a hard
// request to stop now. We'll cancel the current operation immediately
// even if it's a delete operation, and we won't clean up any infrastructure
// if we're halfway through a test. We'll print details explaining what was
// stopped so the user can do their best to recover from it.

runningCtx, done := context.WithCancel(context.Background())
stopCtx, stop := context.WithCancel(runningCtx)
cancelCtx, cancel := context.WithCancel(context.Background())
Expand All @@ -111,7 +120,7 @@ func (c *TestCommand) Run(rawArgs []string) int {

go func() {
defer logging.PanicHandler()
defer done() // We completed successfully.
defer done()
defer stop()
defer cancel()

Expand All @@ -136,10 +145,12 @@ func (c *TestCommand) Run(rawArgs []string) int {
runner.Cancelled = true
cancel()

// TODO(liamcervante): Should we add a timer here? That would mean
// after 5 seconds we just give up and don't even print out the
// lists of resources left behind?
<-runningCtx.Done() // Nothing left to do now but wait.
// We'll wait 5 seconds for this operation to finish now, regardless
// of whether it finishes successfully or not.
select {
case <-runningCtx.Done():
case <-time.After(5 * time.Second):
}

case <-runningCtx.Done():
// The application finished nicely after the request was stopped.
Expand Down Expand Up @@ -240,13 +251,13 @@ func (runner *TestRunner) ExecuteTestFile(file *moduletest.File) {
if run.Config.ConfigUnderTest != nil {
// Then we want to execute a different module under a kind of
// sandbox.
state := runner.ExecuteTestRun(run, file, states.NewState(), run.Config.ConfigUnderTest)
state := runner.ExecuteTestRun(mgr, run, file, states.NewState(), run.Config.ConfigUnderTest)
mgr.States = append(mgr.States, &TestModuleState{
State: state,
Run: run,
})
} else {
mgr.State = runner.ExecuteTestRun(run, file, mgr.State, runner.Config)
mgr.State = runner.ExecuteTestRun(mgr, run, file, mgr.State, runner.Config)
}
file.Status = file.Status.Merge(run.Status)
}
Expand All @@ -257,7 +268,7 @@ func (runner *TestRunner) ExecuteTestFile(file *moduletest.File) {
}
}

func (runner *TestRunner) ExecuteTestRun(run *moduletest.Run, file *moduletest.File, state *states.State, config *configs.Config) *states.State {
func (runner *TestRunner) ExecuteTestRun(mgr *TestStateManager, run *moduletest.Run, file *moduletest.File, state *states.State, config *configs.Config) *states.State {
if runner.Cancelled {
// Don't do anything, just give up and return immediately.
// The surrounding functions should stop this even being called, but in
Expand Down Expand Up @@ -285,7 +296,7 @@ func (runner *TestRunner) ExecuteTestRun(run *moduletest.Run, file *moduletest.F
return state
}

ctx, plan, state, diags := runner.execute(run, file, config, state, &terraform.PlanOpts{
ctx, plan, state, diags := runner.execute(mgr, run, file, config, state, &terraform.PlanOpts{
Mode: func() plans.Mode {
switch run.Config.Options.Mode {
case configs.RefreshOnlyTestMode:
Expand Down Expand Up @@ -339,7 +350,7 @@ func (runner *TestRunner) ExecuteTestRun(run *moduletest.Run, file *moduletest.F
//
// The command argument decides whether it executes only a plan or also applies
// the plan it creates during the planning.
func (runner *TestRunner) execute(run *moduletest.Run, file *moduletest.File, config *configs.Config, state *states.State, opts *terraform.PlanOpts, command configs.TestCommand) (*terraform.Context, *plans.Plan, *states.State, tfdiags.Diagnostics) {
func (runner *TestRunner) execute(mgr *TestStateManager, run *moduletest.Run, file *moduletest.File, config *configs.Config, state *states.State, opts *terraform.PlanOpts, command configs.TestCommand) (*terraform.Context, *plans.Plan, *states.State, tfdiags.Diagnostics) {
if opts.Mode == plans.DestroyMode && state.Empty() {
// Nothing to do!
return nil, nil, state, nil
Expand Down Expand Up @@ -397,7 +408,7 @@ func (runner *TestRunner) execute(run *moduletest.Run, file *moduletest.File, co
defer done()
plan, planDiags = tfCtx.Plan(config, state, opts)
}()
waitDiags, cancelled := runner.wait(tfCtx, runningCtx, opts, identifier)
waitDiags, cancelled := runner.wait(tfCtx, runningCtx, mgr, run, file, nil)
planDiags = planDiags.Append(waitDiags)

diags = diags.Append(planDiags)
Expand Down Expand Up @@ -430,77 +441,58 @@ func (runner *TestRunner) execute(run *moduletest.Run, file *moduletest.File, co
defer done()
updated, applyDiags = tfCtx.Apply(plan, config)
}()
waitDiags, _ = runner.wait(tfCtx, runningCtx, opts, identifier)
waitDiags, _ = runner.wait(tfCtx, runningCtx, mgr, run, file, plan)
applyDiags = applyDiags.Append(waitDiags)

diags = diags.Append(applyDiags)
return tfCtx, plan, updated, diags
}

func (runner *TestRunner) wait(ctx *terraform.Context, runningCtx context.Context, opts *terraform.PlanOpts, identifier string) (diags tfdiags.Diagnostics, cancelled bool) {
select {
case <-runner.StoppedCtx.Done():
func (runner *TestRunner) wait(ctx *terraform.Context, runningCtx context.Context, mgr *TestStateManager, run *moduletest.Run, file *moduletest.File, plan *plans.Plan) (diags tfdiags.Diagnostics, cancelled bool) {

if opts.Mode != plans.DestroyMode {
// It takes more impetus from the user to cancel the cleanup
// operations, so we only do this during the actual tests.
cancelled = true
go ctx.Stop()
}
// This function handles what happens when the user presses the second
// interrupt. This is a "hard cancel", we are going to stop doing whatever
// it is we're doing. This means even if we're halfway through creating or
// destroying infrastructure we just give up.
handleCancelled := func() {

select {
case <-runner.CancelledCtx.Done():
states := make(map[*moduletest.Run]*states.State)
states[nil] = mgr.State
for _, module := range mgr.States {
states[module.Run] = module.State
}
runner.View.FatalInterruptSummary(run, file, states, plan)

// If the user still really wants to cancel, then we'll oblige
// even during the destroy mode at this point.
if opts.Mode == plans.DestroyMode {
cancelled = true
go ctx.Stop()
}
cancelled = true
go ctx.Stop()

diags = diags.Append(tfdiags.Sourceless(
tfdiags.Error,
"Terraform Test Interrupted",
fmt.Sprintf("Terraform test was interrupted while executing %s. This means resources that were created during the test may have been left active, please monitor the rest of the output closely as any dangling resources will be listed.", identifier)))

// It is actually quite disastrous if we exist early at this
// point as it means we'll have created resources that we
// haven't tracked at all. So for now, we won't ever actually
// forcibly terminate the test. When cancelled, we make the
// clean up faster by not performing it but we should still
// always manage it give an accurate list of resources left
// alive.
// TODO(liamcervante): Consider adding a timer here, so that we
// exit early even if that means some resources are just lost
// forever.
<-runningCtx.Done() // Just wait for things to finish now.
// Just wait for things to finish now, the overall test execution will
// exit early if this takes too long.
<-runningCtx.Done()
}

// This function handles what happens when the user presses the first
// interrupt. This is essentially a "soft cancel", we're not going to do
// anything but just wait for things to finish safely. But, we do listen
// for the crucial second interrupt which will prompt a hard stop / cancel.
handleStopped := func() {
select {
case <-runner.CancelledCtx.Done():
// We've been asked again. This time we stop whatever we're doing
// and abandon all attempts to do anything reasonable.
handleCancelled()
case <-runningCtx.Done():
// The operation exited nicely when asked!
// Do nothing, we finished safely and skipping the remaining tests
// will be handled elsewhere.
}
case <-runner.CancelledCtx.Done():
// This shouldn't really happen, as we'd expect to see the StoppedCtx
// being triggered first. But, just in case.
cancelled = true
go ctx.Stop()

diags = diags.Append(tfdiags.Sourceless(
tfdiags.Error,
"Terraform Test Interrupted",
fmt.Sprintf("Terraform test was interrupted while executing %s. This means resources that were created during the test may have been left active, please monitor the rest of the output closely as any dangling resources will be listed.", identifier)))

// It is actually quite disastrous if we exist early at this
// point as it means we'll have created resources that we
// haven't tracked at all. So for now, we won't ever actually
// forcibly terminate the test. When cancelled, we make the
// clean up faster by not performing it but we should still
// always manage it give an accurate list of resources left
// alive.
// TODO(liamcervante): Consider adding a timer here, so that we
// exit early even if that means some resources are just lost
// forever.
<-runningCtx.Done() // Just wait for things to finish now.
}

select {
case <-runner.StoppedCtx.Done():
handleStopped()
case <-runner.CancelledCtx.Done():
handleCancelled()
case <-runningCtx.Done():
// The operation exited normally.
}
Expand Down Expand Up @@ -541,27 +533,21 @@ type TestModuleState struct {

func (manager *TestStateManager) cleanupStates(file *moduletest.File) {
if manager.runner.Cancelled {

// We are still going to print out the resources that we have left
// even though the user asked for an immediate exit.

var diags tfdiags.Diagnostics
diags = diags.Append(tfdiags.Sourceless(tfdiags.Error, "Test cleanup skipped due to immediate exit", "Terraform could not clean up the state left behind due to immediate interrupt."))
manager.runner.View.DestroySummary(diags, nil, file, manager.State)

for _, module := range manager.States {
manager.runner.View.DestroySummary(diags, module.Run, file, module.State)
}

// Don't try and clean anything up if the execution has been cancelled.
return
}

// First, we'll clean up the main state.
_, _, state, diags := manager.runner.execute(nil, file, manager.runner.Config, manager.State, &terraform.PlanOpts{
_, _, state, diags := manager.runner.execute(manager, nil, file, manager.runner.Config, manager.State, &terraform.PlanOpts{
Mode: plans.DestroyMode,
}, configs.ApplyTestCommand)
manager.runner.View.DestroySummary(diags, nil, file, state)

if manager.runner.Cancelled {
// In case things were cancelled during the last execution.
return
}

// Then we'll clean up the additional states for custom modules in reverse
// order.
for ix := len(manager.States); ix > 0; ix-- {
Expand All @@ -570,11 +556,10 @@ func (manager *TestStateManager) cleanupStates(file *moduletest.File) {
if manager.runner.Cancelled {
// In case the cancellation came while a previous state was being
// destroyed.
manager.runner.View.DestroySummary(diags, module.Run, file, module.State)
continue
return
}

_, _, state, diags := manager.runner.execute(module.Run, file, module.Run.Config.ConfigUnderTest, module.State, &terraform.PlanOpts{
_, _, state, diags := manager.runner.execute(manager, module.Run, file, module.Run.Config.ConfigUnderTest, module.State, &terraform.PlanOpts{
Mode: plans.DestroyMode,
}, configs.ApplyTestCommand)
manager.runner.View.DestroySummary(diags, module.Run, file, state)
Expand Down
13 changes: 13 additions & 0 deletions internal/command/test_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -195,6 +195,19 @@ func TestTest_DoubleInterrupt(t *testing.T) {
t.Errorf("output didn't produce the right output:\n\n%s", output)
}

cleanupMessage := `Terraform was interrupted while executing main.tftest, and may not have performed the expected cleanup operations.
Terraform has already created the following resources from the module under test:
- test_resource.primary
- test_resource.secondary
- test_resource.tertiary`

// It's really important that the above message is printed, so we're testing
// for it specifically and making sure it contains all the resources.
if !strings.Contains(output, cleanupMessage) {
t.Errorf("output didn't produce the right output:\n\n%s", output)
}

// This time the test command shouldn't have cleaned up the resource because
// of the hard interrupt.
if provider.ResourceCount() != 3 {
Expand Down
6 changes: 6 additions & 0 deletions internal/command/testing/test_provider.go
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@ import (
"fmt"
"path"
"strings"
"time"

"github.com/hashicorp/go-uuid"
"github.com/zclconf/go-cty/cty"
Expand Down Expand Up @@ -217,6 +218,11 @@ func (provider *TestProvider) ApplyResourceChange(request providers.ApplyResourc
for ix := 0; ix < int(count); ix++ {
provider.Interrupt <- struct{}{}
}

// Wait for a second to make sure the interrupts are processed by
// Terraform before the provider finishes. This is an attempt to ensure
// the output of any tests that rely on this behaviour is deterministic.
time.Sleep(time.Second)
}

provider.Store.Put(provider.GetResourceKey(id.AsString()), resource)
Expand Down
11 changes: 6 additions & 5 deletions internal/command/views/json/message_types.go
Original file line number Diff line number Diff line change
Expand Up @@ -30,9 +30,10 @@ const (
MessageRefreshComplete MessageType = "refresh_complete"

// Test messages
MessageTestAbstract MessageType = "test_abstract"
MessageTestFile MessageType = "test_file"
MessageTestRun MessageType = "test_run"
MessageTestSummary MessageType = "test_summary"
MessageTestCleanup MessageType = "test_cleanup"
MessageTestAbstract MessageType = "test_abstract"
MessageTestFile MessageType = "test_file"
MessageTestRun MessageType = "test_run"
MessageTestSummary MessageType = "test_summary"
MessageTestCleanup MessageType = "test_cleanup"
MessageTestInterrupt MessageType = "test_interrupt"
)
6 changes: 6 additions & 0 deletions internal/command/views/json/test.go
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,12 @@ type TestFailedResource struct {
DeposedKey string `json:"deposed_key,omitempty"`
}

type TestFatalInterrupt struct {
State []TestFailedResource `json:"state,omitempty"`
States map[string][]TestFailedResource `json:"states,omitempty"`
Planned []string `json:"planned,omitempty"`
}

func ToTestStatus(status moduletest.Status) TestStatus {
return TestStatus(strings.ToLower(status.String()))
}
Loading

0 comments on commit f1ff89e

Please sign in to comment.