Skip to content

Commit

Permalink
Resolve deadlock if Docker is not fully initialized when it loads Tri…
Browse files Browse the repository at this point in the history
…dent

Closes: #160
  • Loading branch information
clintonk committed Sep 5, 2018
1 parent 4629d79 commit b3163c1
Show file tree
Hide file tree
Showing 10 changed files with 87 additions and 42 deletions.
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@

**Fixes:**
- Modified log messages about ONTAP media types not associated with performance classes (Issue [#158](https://github.com/NetApp/trident/issues/158)).
- **Docker:** Resolved issue where containers might not restart after restarting Docker (Issue [#160](https://github.com/NetApp/trident/issues/160)).

**Enhancements:**
- Added ability to set snapshotReserve in backend config files, volume creation options, and PVC annotations (Issue [#43](https://github.com/NetApp/trident/issues/43)).
Expand Down
71 changes: 56 additions & 15 deletions frontend/docker/plugin.go
Original file line number Diff line number Diff line change
Expand Up @@ -11,17 +11,23 @@ import (
"path/filepath"
"strings"
"sync"
"time"

"github.com/docker/go-plugins-helpers/volume"
log "github.com/sirupsen/logrus"

"github.com/cenkalti/backoff"
"github.com/netapp/trident/config"
"github.com/netapp/trident/core"
frontendcommon "github.com/netapp/trident/frontend/common"
"github.com/netapp/trident/storage"
"github.com/netapp/trident/utils"
)

const (
startupTimeout = 50 * time.Second
)

type Plugin struct {
orchestrator core.Orchestrator
driverName string
Expand Down Expand Up @@ -73,12 +79,14 @@ func registerDockerVolumePlugin(root string) error {
return nil
}

func getDockerVersion() (*Version, error) {
func (p *Plugin) initDockerVersion() {

time.Sleep(5 * time.Second)

// Get Docker version
out, err := exec.Command("docker", "version", "--format", "'{{json .}}'").CombinedOutput()
if err != nil {
return nil, err
log.Errorf("could not get Docker version: %v", err)
}
versionJSON := string(out)
versionJSON = strings.TrimSpace(versionJSON)
Expand All @@ -88,7 +96,7 @@ func getDockerVersion() (*Version, error) {
var version Version
err = json.Unmarshal([]byte(versionJSON), &version)
if err != nil {
return nil, err
log.Errorf("could not parse Docker version: %v", err)
}

log.WithFields(log.Fields{
Expand All @@ -102,11 +110,15 @@ func getDockerVersion() (*Version, error) {
"clientOS": version.Server.Os,
}).Debug("Docker version info.")

return &version, nil
p.version = &version
config.OrchestratorTelemetry.PlatformVersion = version.Server.Version
}

func (p *Plugin) Activate() error {

handler := volume.NewHandler(p)

// Start serving requests on a different thread
go func() {
var err error
if p.driverPort != "" {
Expand All @@ -128,6 +140,10 @@ func (p *Plugin) Activate() error {
log.Fatalf("Failed to activate Docker frontend: %v", err)
}
}()

// Read the Docker version on a different thread so we don't deadlock if Docker is also initializing
go p.initDockerVersion()

return nil
}

Expand All @@ -142,16 +158,8 @@ func (p *Plugin) GetName() string {

func (p *Plugin) Version() string {

// Get the Docker version on demand
if p.version == nil {

version, err := getDockerVersion()
if err != nil {
log.Errorf("Failed to get the Docker version: %v", err)
return "unknown"
}

p.version = version
return "unknown"
}

return p.version.Server.Version
Expand Down Expand Up @@ -199,7 +207,7 @@ func (p *Plugin) List() (*volume.ListResponse, error) {
"method": "List",
}).Debug("Docker frontend method is invoked.")

err := p.orchestrator.ReloadVolumes()
err := p.reloadVolumes()
if err != nil {
return &volume.ListResponse{}, p.dockerError(err)
}
Expand Down Expand Up @@ -228,7 +236,7 @@ func (p *Plugin) Get(request *volume.GetRequest) (*volume.GetResponse, error) {

// Get is called at the start of every 'docker volume' workflow except List & Unmount,
// so refresh the volume list here.
err := p.orchestrator.ReloadVolumes()
err := p.reloadVolumes()
if err != nil {
return &volume.GetResponse{}, p.dockerError(err)
}
Expand Down Expand Up @@ -404,3 +412,36 @@ func (p *Plugin) dockerError(err error) error {
return err
}
}

// reloadVolumes instructs Trident core to refresh its cached volume info from its
// backend storage controller(s). If Trident isn't ready, it will retry for nearly
// the Docker timeout of 60 seconds. Otherwise, it returns immediately with any
// other error or nil if the operation succeeded.
func (p *Plugin) reloadVolumes() error {

reloadVolumesFunc := func() error {

err := p.orchestrator.ReloadVolumes()
if err == nil {
return nil
} else if core.IsNotReadyError(err) {
return err
} else {
return backoff.Permanent(err)
}
}
reloadNotify := func(err error, duration time.Duration) {
log.WithFields(log.Fields{
"increment": duration,
"message": err.Error(),
}).Debugf("Docker frontend waiting to reload volumes.")
}
reloadBackoff := backoff.NewExponentialBackOff()
reloadBackoff.InitialInterval = 1 * time.Second
reloadBackoff.RandomizationFactor = 0.0
reloadBackoff.Multiplier = 1.0
reloadBackoff.MaxInterval = 1 * time.Second
reloadBackoff.MaxElapsedTime = startupTimeout

return backoff.RetryNotify(reloadVolumesFunc, reloadBackoff, reloadNotify)
}
23 changes: 12 additions & 11 deletions storage_drivers/eseries/api/eseries.go
Original file line number Diff line number Diff line change
Expand Up @@ -93,19 +93,20 @@ func NewAPIClient(config ClientConfig) *Client {
}
c.config.CompiledPoolNameSearchPattern = compiledRegex

volumeTags = []VolumeTag{
{"IF", c.config.Protocol},
{"version", c.config.Telemetry["version"]},
{"platform", c.config.Telemetry["platform"]},
{"platformVersion", c.config.Telemetry["platformVersion"]},
{"plugin", c.config.Telemetry["plugin"]},
{"storagePrefix", c.config.Telemetry["storagePrefix"]},
}

return c
}

var volumeTags []VolumeTag
func (d Client) makeVolumeTags() []VolumeTag {

return []VolumeTag{
{"IF", d.config.Protocol},
{"version", d.config.Telemetry["version"]},
{"platform", tridentconfig.OrchestratorTelemetry.Platform},
{"platformVersion", tridentconfig.OrchestratorTelemetry.PlatformVersion},
{"plugin", d.config.Telemetry["plugin"]},
{"storagePrefix", d.config.Telemetry["storagePrefix"]},
}
}

// InvokeAPI makes a REST call to the Web Services Proxy. The body must be a marshaled JSON byte array (or nil).
// The method is the HTTP verb (i.e. GET, POST, ...). The resource path is appended to the base URL to identify
Expand Down Expand Up @@ -608,7 +609,7 @@ func (d Client) CreateVolume(
}

// Copy static volume metadata and add fstype
tags := append([]VolumeTag(nil), volumeTags...)
tags := d.makeVolumeTags()
tags = append(tags, VolumeTag{"fstype", fstype})

// Set up the volume create request
Expand Down
2 changes: 0 additions & 2 deletions storage_drivers/eseries/eseries_iscsi.go
Original file line number Diff line number Diff line change
Expand Up @@ -102,8 +102,6 @@ func (d *SANStorageDriver) Initialize(

telemetry := make(map[string]string)
telemetry["version"] = tridentconfig.OrchestratorVersion.ShortString()
telemetry["platform"] = tridentconfig.OrchestratorTelemetry.Platform
telemetry["platformVersion"] = tridentconfig.OrchestratorTelemetry.PlatformVersion
telemetry["plugin"] = d.Name()
telemetry["storagePrefix"] = *d.Config.StoragePrefix

Expand Down
11 changes: 5 additions & 6 deletions storage_drivers/ontap/ontap_common.go
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@ import (
"github.com/cenkalti/backoff"
log "github.com/sirupsen/logrus"

trident "github.com/netapp/trident/config"
tridentconfig "github.com/netapp/trident/config"
"github.com/netapp/trident/storage"
sa "github.com/netapp/trident/storage_attribute"
drivers "github.com/netapp/trident/storage_drivers"
Expand All @@ -32,7 +32,7 @@ const (
)

type Telemetry struct {
trident.Telemetry
tridentconfig.Telemetry
Plugin string `json:"plugin"`
SVM string `json:"svm"`
StoragePrefix string `json:"storagePrefix"`
Expand All @@ -50,7 +50,7 @@ type StorageDriver interface {

// InitializeOntapConfig parses the ONTAP config, mixing in the specified common config.
func InitializeOntapConfig(
context trident.DriverContext, configJSON string, commonConfig *drivers.CommonStorageDriverConfig,
context tridentconfig.DriverContext, configJSON string, commonConfig *drivers.CommonStorageDriverConfig,
) (*drivers.OntapStorageDriverConfig, error) {

if commonConfig.DebugTraceFlags["method"] {
Expand All @@ -75,7 +75,6 @@ func InitializeOntapConfig(

func NewOntapTelemetry(d StorageDriver) *Telemetry {
t := &Telemetry{
Telemetry: trident.OrchestratorTelemetry,
Plugin: d.Name(),
SVM: d.GetConfig().SVM,
StoragePrefix: *d.GetConfig().StoragePrefix,
Expand Down Expand Up @@ -488,7 +487,7 @@ func EMSHeartbeat(driver StorageDriver) {

emsResponse, err := driver.GetAPI().EmsAutosupportLog(
strconv.Itoa(drivers.ConfigVersion), false, "heartbeat", hostname,
string(message), 1, trident.OrchestratorName, 5)
string(message), 1, tridentconfig.OrchestratorName, 5)

if err = api.GetError(emsResponse, err); err != nil {
log.WithFields(log.Fields{
Expand Down Expand Up @@ -1017,7 +1016,7 @@ func getVolumeOptsCommon(

func getInternalVolumeNameCommon(commonConfig *drivers.CommonStorageDriverConfig, name string) string {

if trident.UsingPassthroughStore {
if tridentconfig.UsingPassthroughStore {
// With a passthrough store, the name mapping must remain reversible
return *commonConfig.StoragePrefix + name
} else {
Expand Down
1 change: 1 addition & 0 deletions storage_drivers/ontap/ontap_nas.go
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,7 @@ func (d *NASStorageDriver) GetAPI() *api.Client {
}

func (d *NASStorageDriver) GetTelemetry() *Telemetry {
d.Telemetry.Telemetry = tridentconfig.OrchestratorTelemetry
return d.Telemetry
}

Expand Down
1 change: 1 addition & 0 deletions storage_drivers/ontap/ontap_nas_flexgroup.go
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,7 @@ func (d *NASFlexGroupStorageDriver) GetAPI() *api.Client {
}

func (d *NASFlexGroupStorageDriver) GetTelemetry() *Telemetry {
d.Telemetry.Telemetry = tridentconfig.OrchestratorTelemetry
return d.Telemetry
}

Expand Down
1 change: 1 addition & 0 deletions storage_drivers/ontap/ontap_nas_qtree.go
Original file line number Diff line number Diff line change
Expand Up @@ -62,6 +62,7 @@ func (d *NASQtreeStorageDriver) GetAPI() *api.Client {
}

func (d *NASQtreeStorageDriver) GetTelemetry() *Telemetry {
d.Telemetry.Telemetry = tridentconfig.OrchestratorTelemetry
return d.Telemetry
}

Expand Down
1 change: 1 addition & 0 deletions storage_drivers/ontap/ontap_san.go
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,7 @@ func (d *SANStorageDriver) GetAPI() *api.Client {
}

func (d *SANStorageDriver) GetTelemetry() *Telemetry {
d.Telemetry.Telemetry = tridentconfig.OrchestratorTelemetry
return d.Telemetry
}

Expand Down
17 changes: 9 additions & 8 deletions storage_drivers/solidfire/solidfire_san.go
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,6 @@ type SANStorageDriver struct {
AccessGroups []int64
LegacyNamePrefix string
InitiatorIFace string
Telemetry *Telemetry
}

type StorageDriverConfigExternal struct {
Expand Down Expand Up @@ -86,6 +85,13 @@ func parseType(vTypes []api.VolType, typeName string) (qos api.QoS, err error) {
return qos, err
}

func (d SANStorageDriver) getTelemetry() *Telemetry {
return &Telemetry{
Telemetry: tridentconfig.OrchestratorTelemetry,
Plugin: d.Name(),
}
}

// Name is for returning the name of this driver
func (d SANStorageDriver) Name() string {
return drivers.SolidfireSANStorageDriverName
Expand Down Expand Up @@ -236,11 +242,6 @@ func (d *SANStorageDriver) Initialize(
// log cluster node serial numbers asynchronously since the API can take a long time
go d.getNodeSerialNumbers(config.CommonStorageDriverConfig)

d.Telemetry = &Telemetry{
Telemetry: tridentconfig.OrchestratorTelemetry,
Plugin: d.Name(),
}

d.initialized = true
return nil
}
Expand Down Expand Up @@ -516,7 +517,7 @@ func (d *SANStorageDriver) Create(name string, sizeBytes uint64, opts map[string

var req api.CreateVolumeRequest
var qos api.QoS
telemetry, _ := json.Marshal(d.Telemetry)
telemetry, _ := json.Marshal(d.getTelemetry())
var meta = map[string]string{
"trident": string(telemetry),
"docker-name": name,
Expand Down Expand Up @@ -643,7 +644,7 @@ func (d *SANStorageDriver) CreateClone(name, sourceName, snapshotName string, op
}

var req api.CloneVolumeRequest
telemetry, _ := json.Marshal(d.Telemetry)
telemetry, _ := json.Marshal(d.getTelemetry())
var meta = map[string]string{
"trident": string(telemetry),
"docker-name": name,
Expand Down

0 comments on commit b3163c1

Please sign in to comment.