Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

internal/monitor: mark controllers as unavailable #114

Merged
merged 1 commit into from
May 11, 2016
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion internal/apitest/apitest.go
Original file line number Diff line number Diff line change
Expand Up @@ -19,8 +19,8 @@ import (

external_jem "github.com/CanonicalLtd/jem"
"github.com/CanonicalLtd/jem/internal/idmtest"
"github.com/CanonicalLtd/jem/internal/jemserver"
"github.com/CanonicalLtd/jem/internal/jem"
"github.com/CanonicalLtd/jem/internal/jemserver"
"github.com/CanonicalLtd/jem/jemclient"
"github.com/CanonicalLtd/jem/params"
)
Expand Down
2 changes: 1 addition & 1 deletion internal/debugapi/api.go
Original file line number Diff line number Diff line change
Expand Up @@ -8,8 +8,8 @@ import (
"gopkg.in/errgo.v1"

"github.com/CanonicalLtd/jem/internal/jem"
"github.com/CanonicalLtd/jem/internal/jemserver"
"github.com/CanonicalLtd/jem/internal/jemerror"
"github.com/CanonicalLtd/jem/internal/jemserver"
"github.com/CanonicalLtd/jem/version"
)

Expand Down
55 changes: 51 additions & 4 deletions internal/jem/jem.go
Original file line number Diff line number Diff line change
Expand Up @@ -320,9 +320,8 @@ func (j *JEM) SetModelManagedUser(modelName params.EntityPath, user string, info
// Note that this operation is not atomic.
func (j *JEM) DeleteController(path params.EntityPath) error {
// TODO (urosj) make this operation atomic.
logger.Infof("Deleting controller %s", path)
// Delete its models first.
_, err := j.DB.Models().RemoveAll(bson.D{{"controller", path}})
info, err := j.DB.Models().RemoveAll(bson.D{{"controller", path}})
if err != nil {
return errgo.Notef(err, "error deleting controller models")
}
Expand All @@ -332,8 +331,10 @@ func (j *JEM) DeleteController(path params.EntityPath) error {
return errgo.WithCausef(nil, params.ErrNotFound, "controller %q not found", path)
}
if err != nil {
logger.Errorf("deleted %d controller models for model but could not delete controller: %v", info.Removed, err)
return errgo.Notef(err, "cannot delete controller")
}
logger.Infof("deleted controller %v and %d associated models", path, info.Removed)
return nil
}

Expand Down Expand Up @@ -361,7 +362,6 @@ func (j *JEM) AddModel(m *mongodoc.Model) error {
func (j *JEM) DeleteModel(path params.EntityPath) error {
// TODO when we monitor model health, prohibit this method
// and delete the model automatically when it is destroyed.
logger.Infof("Deleting model %s", path)
// Check if model is also a controller.
var ctl mongodoc.Controller
err := j.DB.Controllers().FindId(path.String()).One(&ctl)
Expand All @@ -376,6 +376,7 @@ func (j *JEM) DeleteModel(path params.EntityPath) error {
if err != nil {
return errgo.Notef(err, "could not delete model")
}
logger.Infof("deleted model %s", path)
return nil
}

Expand Down Expand Up @@ -491,11 +492,11 @@ func (j *JEM) AddTemplate(tmpl *mongodoc.Template, canOverwrite bool) error {
// database. It returns an error with a params.ErrNotFound
// cause if the template was not found.
func (j *JEM) DeleteTemplate(path params.EntityPath) error {
logger.Infof("Deleting template %s", path)
err := j.DB.Templates().RemoveId(path.String())
if err != nil {
return errgo.WithCausef(nil, params.ErrNotFound, "template %q not found", path)
}
logger.Infof("deleted template %s", path)
return nil
}

Expand Down Expand Up @@ -560,6 +561,52 @@ func (j *JEM) SetControllerLocation(path params.EntityPath, location map[string]
return nil
}

// SetControllerAvailable marks the given controller as available.
// This method does not return an error when the controller doesn't exist.
func (j *JEM) SetControllerAvailable(ctlPath params.EntityPath) error {
if err := j.DB.Controllers().UpdateId(ctlPath.String(), bson.D{{
"$unset", bson.D{{"unavailablesince", nil}},
}}); err != nil {
if err == mgo.ErrNotFound {
// For symmetry with SetControllerUnavailableAt.
return nil
}
return errgo.Notef(err, "cannot update %v", ctlPath)
}
return nil
}

// SetControllerUnavailableAt marks the controller as having been unavailable
// since at least the given time. If the controller was already marked
// as unavailable, its time isn't changed.
// This method does not return an error when the controller doesn't exist.
func (j *JEM) SetControllerUnavailableAt(ctlPath params.EntityPath, t time.Time) error {
err := j.DB.Controllers().Update(
bson.D{
{"_id", ctlPath.String()},
{"unavailablesince", bson.D{{"$exists", false}}},
},
bson.D{
{"$set", bson.D{{"unavailablesince", t}}},
},
)
if err == nil {
return nil
}
if err == mgo.ErrNotFound {
// We don't know whether the not-found error is because there
// are no controllers with the given name (in which case we want
// to return a params.ErrNotFound error) or because there was
// one but it is already unavailable.
// We could fetch the controller to decide whether it's actually there
// or not, but because in practice we don't care if we're setting
// controller-unavailable on a non-existent controller, we'll
// save the round trip.
return nil
}
return errgo.Notef(err, "cannot update controller")
}

// ErrLeaseUnavailable is the error cause returned by AcquireMonitorLease
// when it cannot acquire the lease because it is unavailable.
var ErrLeaseUnavailable = errgo.Newf("cannot acquire lease")
Expand Down
51 changes: 51 additions & 0 deletions internal/jem/jem_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -193,6 +193,57 @@ func (s *jemSuite) TestSetControllerLocation(c *gc.C) {
c.Assert(ctl.Location, gc.DeepEquals, map[string]string{"region": "us-east1"})
}

func (s *jemSuite) TestSetControllerAvailability(c *gc.C) {
ctlPath := params.EntityPath{"bob", "x"}
ctl := &mongodoc.Controller{
Path: ctlPath,
}
err := s.store.AddController(ctl, &mongodoc.Model{})

// Check that we can mark it as unavailable.
t0 := time.Now()
err = s.store.SetControllerUnavailableAt(ctlPath, t0)
c.Assert(err, gc.IsNil)

ctl, err = s.store.Controller(ctlPath)
c.Assert(err, gc.IsNil)
c.Assert(ctl.UnavailableSince.UTC(), jc.DeepEquals, mongodoc.Time(t0).UTC())

// Check that if we mark it unavailable again, it doesn't
// have any affect.
err = s.store.SetControllerUnavailableAt(ctlPath, t0.Add(time.Second))
c.Assert(err, gc.IsNil)

ctl, err = s.store.Controller(ctlPath)
c.Assert(err, gc.IsNil)
c.Assert(ctl.UnavailableSince.UTC(), jc.DeepEquals, mongodoc.Time(t0).UTC())

// Check that we can mark it as available again.
err = s.store.SetControllerAvailable(ctlPath)
c.Assert(err, gc.IsNil)

ctl, err = s.store.Controller(ctlPath)
c.Assert(err, gc.IsNil)
c.Assert(ctl.UnavailableSince, jc.Satisfies, time.Time.IsZero)

t1 := t0.Add(3 * time.Second)
// ... and that we can mark it as unavailable after that.
err = s.store.SetControllerUnavailableAt(ctlPath, t1)
c.Assert(err, gc.IsNil)

ctl, err = s.store.Controller(ctlPath)
c.Assert(err, gc.IsNil)
c.Assert(ctl.UnavailableSince.UTC(), jc.DeepEquals, mongodoc.Time(t1).UTC())
}

func (s *jemSuite) TestSetControllerAvailabilityWithNotFoundController(c *gc.C) {
ctlPath := params.EntityPath{"bob", "x"}
err := s.store.SetControllerUnavailableAt(ctlPath, time.Now())
c.Assert(err, gc.IsNil)
err = s.store.SetControllerAvailable(ctlPath)
c.Assert(err, gc.IsNil)
}

func (s *jemSuite) TestControllerLocationQuery(c *gc.C) {
for _, ctl := range []*mongodoc.Controller{{
Path: params.EntityPath{"bob", "aws-us-east-1"},
Expand Down
5 changes: 5 additions & 0 deletions internal/mongodoc/doc.go
Original file line number Diff line number Diff line change
Expand Up @@ -69,6 +69,11 @@ type Controller struct {

// Stats holds runtime information about the controller.
Stats ControllerStats

// UnavailableSince is empty when the controller is marked
// as available; otherwise it holds the time when it became
// unavailable.
UnavailableSince time.Time `bson:",omitempty"`
}

// ControllerStats holds statistics about a controller.
Expand Down
8 changes: 8 additions & 0 deletions internal/monitor/controller.go
Original file line number Diff line number Diff line change
Expand Up @@ -147,9 +147,13 @@ func acquireLease(j jemInterface, ctlPath params.EntityPath, oldExpiry time.Time
func (m *controllerMonitor) watcher() error {
for {
logger.Debugf("monitor dialing controller %v", m.ctlPath)
dialStartTime := Clock.Now()
conn, err := m.dialAPI()
switch errgo.Cause(err) {
case nil:
if err := m.jem.SetControllerAvailable(m.ctlPath); err != nil {
return errgo.Notef(err, "cannot set controller availability")
}
err := m.watch(conn)
conn.Close()
if errgo.Cause(err) == tomb.ErrDying {
Expand All @@ -162,11 +166,15 @@ func (m *controllerMonitor) watcher() error {
// The controller has been removed or we've been explicitly stopped.
return tomb.ErrDying
case jem.ErrAPIConnection:
if err := m.jem.SetControllerUnavailableAt(m.ctlPath, dialStartTime); err != nil {
return errgo.Notef(err, "cannot set controller availability")
}
// We've failed to connect to the API. Log the error and
// try again.
// TODO update the controller doc with the error?
logger.Errorf("cannot connect to controller %v: %v", m.ctlPath, err)
// Sleep for a while so we don't batter the network.
// TODO exponentially backoff up to some limit.
select {
case <-m.tomb.Dying():
// The controllerMonitor is dying.
Expand Down
96 changes: 96 additions & 0 deletions internal/monitor/interface.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,96 @@
// Copyright 2016 Canonical Ltd.

package monitor

import (
"time"

"github.com/juju/juju/state/multiwatcher"

"github.com/CanonicalLtd/jem/internal/mongodoc"
"github.com/CanonicalLtd/jem/params"
)

// jemInterface holds the interface required by allMonitor to
// talk to the JEM database. It is defined as an interface so
// it can be faked for testing purposes.
type jemInterface interface {
// Close closes the JEM instance. This should be called when
// the JEM instance is finished with.
Close()

// Clone returns an independent copy of the receiver
// that uses a cloned database connection. The
// returned value must be closed after use.
Clone() jemInterface

// SetControllerStats sets the stats associated with the controller
// with the given path. It returns an error with a params.ErrNotFound
// cause if the controller does not exist.
SetControllerStats(ctlPath params.EntityPath, stats *mongodoc.ControllerStats) error

// SetControllerUnavailableAt marks the controller as having been unavailable
// since at least the given time. If the controller was already marked
// as unavailable, its time isn't changed.
// This method does not return an error when the controller doesn't exist.
SetControllerUnavailableAt(ctlPath params.EntityPath, t time.Time) error

// SetControllerAvailable marks the given controller as available.
// This method does not return an error when the controller doesn't exist.
SetControllerAvailable(ctlPath params.EntityPath) error

// SetModelLife sets the Life field of all models controlled
// by the given controller that have the given UUID.
// It does not return an error if there are no such models.
SetModelLife(ctlPath params.EntityPath, uuid string, life string) error

// AllControllers returns all the controllers in the system.
AllControllers() ([]*mongodoc.Controller, error)

// OpenAPI opens an API connection to the model with the given path
// and returns it along with the information used to connect.
// If the model does not exist, the error will have a cause
// of params.ErrNotFound.
//
// If the model API connection could not be made, the error
// will have a cause of jem.ErrAPIConnection.
//
// The returned connection must be closed when finished with.
OpenAPI(params.EntityPath) (jujuAPI, error)

// AcquireMonitorLease acquires or renews the lease on a controller.
// The lease will only be changed if the lease in the database
// has the given old expiry time and owner.
// When acquired, the lease will have the given new owner
// and expiration time.
//
// If newOwner is empty, the lease will be dropped, the
// returned time will be zero and newExpiry will be ignored.
//
// If the controller has been removed, an error with a params.ErrNotFound
// cause will be returned. If the lease has been obtained by someone else
// an error with a jem.ErrLeaseUnavailable cause will be returned.
AcquireMonitorLease(ctlPath params.EntityPath, oldExpiry time.Time, oldOwner string, newExpiry time.Time, newOwner string) (time.Time, error)
}

// jujuAPI represents an API connection to a Juju controller.
type jujuAPI interface {
// WatchAllModels returns an allWatcher, from which you can request
// the Next collection of Deltas (for all models).
WatchAllModels() (allWatcher, error)

// Close closes the API connection.
Close() error
}

// allWatcher represents a watcher of all events on a controller.
type allWatcher interface {
// Next returns a new set of deltas. It will block until there
// are deltas to return. The first calls to Next on a given watcher
// will return the entire state of the system without blocking.
Next() ([]multiwatcher.Delta, error)

// Stop stops the watcher and causes any outstanding Next calls
// to return an error.
Stop() error
}
34 changes: 32 additions & 2 deletions internal/monitor/internal_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -203,7 +203,7 @@ func (s *internalSuite) TestWatcher(c *gc.C) {
c.Assert(err, gc.IsNil)

// Start the watcher.
jshim := newJEMShimWithUpdateNotify(s.jem)
jshim := newJEMShimWithUpdateNotify(jemShim{s.jem})
m := &controllerMonitor{
ctlPath: ctlPath,
jem: jshim,
Expand Down Expand Up @@ -361,6 +361,11 @@ func (s *internalSuite) TestWatcherDialAPIError(c *gc.C) {
case <-time.After(jujutesting.ShortWait):
}

// Check that the controller is marked as unavailable.
ctl, err := s.jem.Controller(ctlPath)
c.Assert(err, gc.IsNil)
c.Assert(ctl.UnavailableSince.UTC(), gc.DeepEquals, s.clock.Now().UTC())

// Advance the time until past the retry time.
s.clock.Advance(apiConnectRetryDuration)

Expand All @@ -377,6 +382,31 @@ func (s *internalSuite) TestWatcherDialAPIError(c *gc.C) {
c.Assert(m.Wait(), gc.ErrorMatches, "cannot dial API for controller bob/foo: fatal error")
}

func (s *internalSuite) TestWatcherMarksControllerAvailable(c *gc.C) {
jshim := newJEMShimInMemory()
jshim1 := newJEMShimWithUpdateNotify(jemShimWithAPIOpener{
jemInterface: jshim,
openAPI: func(path params.EntityPath) (jujuAPI, error) {
return newJEMAPIShim(nil), nil
},
})
// Create a controller
ctlPath := params.EntityPath{"bob", "foo"}
addFakeController(jshim, ctlPath)
err := jshim1.SetControllerUnavailableAt(ctlPath, s.clock.Now())
c.Assert(err, gc.IsNil)

m := &controllerMonitor{
ctlPath: ctlPath,
jem: jshim,
ownerId: "jem1",
}
m.tomb.Go(m.watcher)
defer worker.Stop(m)

waitEvent(c, jshim1.controllerAvailabilitySet, "controller available")
}

// TestControllerMonitor tests that the controllerMonitor can be run with both the
// lease updater and the watcher in place.
func (s *internalSuite) TestControllerMonitor(c *gc.C) {
Expand All @@ -398,7 +428,7 @@ func (s *internalSuite) TestControllerMonitor(c *gc.C) {
expiry, err := acquireLease(jemShim{s.jem}, ctlPath, time.Time{}, "", "jem1")
c.Assert(err, gc.IsNil)

jshim := newJEMShimWithUpdateNotify(s.jem)
jshim := newJEMShimWithUpdateNotify(jemShim{s.jem})
m := newControllerMonitor(controllerMonitorParams{
ctlPath: ctlPath,
jem: jshim,
Expand Down
Loading