Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add ScheduleOnly initPolicy #59

Merged
merged 7 commits into from
Sep 22, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
23 changes: 16 additions & 7 deletions api/v1beta1/verticadb_types.go
Original file line number Diff line number Diff line change
Expand Up @@ -143,6 +143,7 @@ type VerticaDBSpec struct {
// of 20 minutes.
RestartTimeout int `json:"restartTimeout,omitempty"`

// +kubebuilder:validation:Optional
// Contains details about the communal storage.
Communal CommunalStorage `json:"communal"`

Expand Down Expand Up @@ -204,6 +205,12 @@ const (
// The database in the communal path will be initialized in the VerticaDB
// through a revive_db. The communal path must have a preexisting database.
CommunalInitPolicyRevive = "Revive"
// Only schedule pods to run with the vertica container. The bootstrap of
// the database, either create_db or revive_db, is not handled. Use this
// policy when you have a vertica cluster running outside of Kubernetes and
// you want to provision new nodes to run inside Kubernetes. Most of the
// automation is disabled when running in this mode.
CommunalInitPolicyScheduleOnly = "ScheduleOnly"
)

type KSafetyType string
Expand All @@ -227,11 +234,12 @@ type SubclusterPodCount struct {

// Holds details about the communal storage
type CommunalStorage struct {
// +kubebuilder:validation:required
// +kubebuilder:validation:Optional
// The path to the communal storage. This must be the s3 bucket. You specify
// this using the s3:// bucket notation. For example:
// s3://bucket-name/key-name. The bucket must be created prior to creating
// the VerticaDB. This field is required and cannot change after creation.
// the VerticaDB. When initPolicy is Create or Revive, this field is
// required and cannot change after creation.
Path string `json:"path"`

// +kubebuilder:validation:Optional
Expand All @@ -241,16 +249,17 @@ type CommunalStorage struct {
// forces each database path to be unique.
IncludeUIDInPath bool `json:"includeUIDInPath,omitempty"`

// +kubebuilder:validation:required
// +kubebuilder:validation:Optional
// The URL to the s3 endpoint. The endpoint must be prefaced with http:// or
// https:// to know what protocol to connect with. This field is required
// and cannot change after creation.
// https:// to know what protocol to connect with. When initPolicy is Create
// or Revive, this field is required and cannot change after creation.
Endpoint string `json:"endpoint"`

// +kubebuilder:validation:required
// +kubebuilder:validation:Optional
// The name of a secret that contains the credentials to connect to the
// communal S3 endpoint. The secret must have the following keys set:
// accessey and secretkey.
// accessey and secretkey. When initPolicy is Create or Revive, this field
// is required.
CredentialSecret string `json:"credentialSecret"`

// +kubebuilder:validation:Optional
Expand Down
24 changes: 21 additions & 3 deletions api/v1beta1/verticadb_webhook.go
Original file line number Diff line number Diff line change
Expand Up @@ -221,17 +221,23 @@ func (v *VerticaDB) hasAtLeastOneSC(allErrs field.ErrorList) field.ErrorList {
}

func (v *VerticaDB) hasValidInitPolicy(allErrs field.ErrorList) field.ErrorList {
// initPolicy should either be Create or Revive.
if v.Spec.InitPolicy != CommunalInitPolicyCreate && v.Spec.InitPolicy != CommunalInitPolicyRevive {
switch v.Spec.InitPolicy {
case CommunalInitPolicyCreate:
case CommunalInitPolicyRevive:
case CommunalInitPolicyScheduleOnly:
default:
err := field.Invalid(field.NewPath("spec").Child("initPolicy"),
v.Spec.InitPolicy,
"initPolicy should either be Create or Revive.")
"initPolicy should either be Create, Revive or ScheduleOnly.")
allErrs = append(allErrs, err)
}
return allErrs
}

func (v *VerticaDB) validateCommunalPath(allErrs field.ErrorList) field.ErrorList {
if v.Spec.InitPolicy == CommunalInitPolicyScheduleOnly {
return allErrs
}
// communal.Path must be an S3 bucket, prefaced with s3://
if !strings.HasPrefix(v.Spec.Communal.Path, "s3://") {
err := field.Invalid(field.NewPath("spec").Child("communal").Child("endpoint"),
Expand All @@ -243,6 +249,9 @@ func (v *VerticaDB) validateCommunalPath(allErrs field.ErrorList) field.ErrorLis
}

func (v *VerticaDB) validateS3Bucket(allErrs field.ErrorList) field.ErrorList {
if v.Spec.InitPolicy == CommunalInitPolicyScheduleOnly {
return allErrs
}
// communal.Path must be an S3 bucket, prefaced with s3://
if !strings.HasPrefix(v.Spec.Communal.Path, "s3://") {
err := field.Invalid(field.NewPath("spec").Child("communal").Child("endpoint"),
Expand All @@ -254,6 +263,9 @@ func (v *VerticaDB) validateS3Bucket(allErrs field.ErrorList) field.ErrorList {
}

func (v *VerticaDB) validateEndpoint(allErrs field.ErrorList) field.ErrorList {
if v.Spec.InitPolicy == CommunalInitPolicyScheduleOnly {
return allErrs
}
// communal.endpoint must be prefaced with http:// or https:// to know what protocol to connect with.
if !(strings.HasPrefix(v.Spec.Communal.Endpoint, "http://") ||
strings.HasPrefix(v.Spec.Communal.Endpoint, "https://")) {
Expand All @@ -266,6 +278,9 @@ func (v *VerticaDB) validateEndpoint(allErrs field.ErrorList) field.ErrorList {
}

func (v *VerticaDB) credentialSecretExists(allErrs field.ErrorList) field.ErrorList {
if v.Spec.InitPolicy == CommunalInitPolicyScheduleOnly {
return allErrs
}
// communal.credentialSecret must exist
if v.Spec.Communal.CredentialSecret == "" {
err := field.Invalid(field.NewPath("spec").Child("communal").Child("credentialSecret"),
Expand Down Expand Up @@ -311,6 +326,9 @@ func (v *VerticaDB) hasPrimarySubcluster(allErrs field.ErrorList) field.ErrorLis
}

func (v *VerticaDB) validateKsafety(allErrs field.ErrorList) field.ErrorList {
if v.Spec.InitPolicy == CommunalInitPolicyScheduleOnly {
return allErrs
}
sizeSum := v.getClusterSize()
switch v.Spec.KSafety {
case KSafety0:
Expand Down
5 changes: 5 additions & 0 deletions changes/unreleased/Added-20210920-171923.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
kind: Added
body: New initPolicy called ScheduleOnly. The bootstrap of the database, either
create_db or revive_db, is not handled. Use this policy when you have a vertica
cluster running outside of Kubernetes and you want to provision new nodes to run
inside Kubernetes. Most of the automation is disabled when running in this mode.
13 changes: 0 additions & 13 deletions kuttl-test.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -35,19 +35,6 @@ commands:
- command: kubectl delete pod vertica-k8s-image-pull
namespaced: true

# We do a pre-pull of the vertica-k8s image version latest and 10.1.1-0
# that we will use for the test upgrade-vertica.
- command: bash -c "sed 's+kustomize-vertica-image+vertica/vertica-k8s:11.0.0-0-minimal+g' tests/manifests/image-pull/base/vertica-k8s-image-pull.yaml | kubectl -n $NAMESPACE apply -f - "
- command: kubectl wait --for=condition=Ready pod --timeout=10m vertica-k8s-image-pull
namespaced: true
- command: kubectl delete pod vertica-k8s-image-pull
namespaced: true
- command: bash -c "sed 's+kustomize-vertica-image+vertica/vertica-k8s:latest+g' tests/manifests/image-pull/base/vertica-k8s-image-pull.yaml | kubectl -n $NAMESPACE apply -f - "
- command: kubectl wait --for=condition=Ready pod --timeout=10m vertica-k8s-image-pull
namespaced: true
- command: kubectl delete pod vertica-k8s-image-pull
namespaced: true

# We use stern to collect the pod output of any test that creates a pod with
# the 'stern=include' label. By default, the output of this is stored in a
# file in int-tests-output/
Expand Down
5 changes: 5 additions & 0 deletions pkg/controllers/dbaddnode_reconcile.go
Original file line number Diff line number Diff line change
Expand Up @@ -48,6 +48,11 @@ func MakeDBAddNodeReconciler(vdbrecon *VerticaDBReconciler, log logr.Logger,

// Reconcile will ensure a DB exists and create one if it doesn't
func (d *DBAddNodeReconciler) Reconcile(ctx context.Context, req *ctrl.Request) (ctrl.Result, error) {
// no-op for ScheduleOnly init policy
if d.Vdb.Spec.InitPolicy == vapi.CommunalInitPolicyScheduleOnly {
return ctrl.Result{}, nil
}

if err := d.PFacts.Collect(ctx, d.Vdb); err != nil {
return ctrl.Result{}, err
}
Expand Down
5 changes: 5 additions & 0 deletions pkg/controllers/dbaddsubcluster_reconcile.go
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,11 @@ func MakeDBAddSubclusterReconciler(vdbrecon *VerticaDBReconciler, log logr.Logge

// Reconcile will ensure a subcluster exists for each one defined in the vdb.
func (d *DBAddSubclusterReconciler) Reconcile(ctx context.Context, req *ctrl.Request) (ctrl.Result, error) {
// no-op for ScheduleOnly init policy
if d.Vdb.Spec.InitPolicy == vapi.CommunalInitPolicyScheduleOnly {
return ctrl.Result{}, nil
}

// We need to collect pod facts, to find a pod to run AT and vsql commands from.
if err := d.PFacts.Collect(ctx, d.Vdb); err != nil {
return ctrl.Result{}, err
Expand Down
5 changes: 5 additions & 0 deletions pkg/controllers/dbremovenode_reconcile.go
Original file line number Diff line number Diff line change
Expand Up @@ -72,6 +72,11 @@ func (d *DBRemoveNodeReconciler) CollectPFacts(ctx context.Context) error {
// everything in Vdb. We will know if we are scaling down by comparing the
// expected subcluster size with the current.
func (d *DBRemoveNodeReconciler) Reconcile(ctx context.Context, req *ctrl.Request) (ctrl.Result, error) {
// no-op for ScheduleOnly init policy
if d.Vdb.Spec.InitPolicy == vapi.CommunalInitPolicyScheduleOnly {
return ctrl.Result{}, nil
}

// Use the finder so that we check only the subclusters that are in the vdb.
// Any nodes that are in subclusters that we are removing are handled by the
// DBRemoveSubcusterReconciler.
Expand Down
5 changes: 5 additions & 0 deletions pkg/controllers/dbremovesubcluster_reconcile.go
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,11 @@ func MakeDBRemoveSubclusterReconciler(vdbrecon *VerticaDBReconciler, log logr.Lo

// Reconcile will remove any subcluster that no longer exists in the vdb.
func (d *DBRemoveSubclusterReconciler) Reconcile(ctx context.Context, req *ctrl.Request) (ctrl.Result, error) {
// no-op for ScheduleOnly init policy
if d.Vdb.Spec.InitPolicy == vapi.CommunalInitPolicyScheduleOnly {
return ctrl.Result{}, nil
}

// We need to collect pod facts, to find a pod to run AT and vsql commands from.
if err := d.PFacts.Collect(ctx, d.Vdb); err != nil {
return ctrl.Result{}, err
Expand Down
5 changes: 5 additions & 0 deletions pkg/controllers/imagechange_reconcile.go
Original file line number Diff line number Diff line change
Expand Up @@ -50,6 +50,11 @@ func MakeImageChangeReconciler(vdbrecon *VerticaDBReconciler, log logr.Logger,
// Reconcile will handle the process of the vertica image changing. For
// example, this can automate the process for an upgrade.
func (u *ImageChangeReconciler) Reconcile(ctx context.Context, req *ctrl.Request) (ctrl.Result, error) {
// no-op for ScheduleOnly init policy
if u.Vdb.Spec.InitPolicy == vapi.CommunalInitPolicyScheduleOnly {
return ctrl.Result{}, nil
}

if err := u.PFacts.Collect(ctx, u.Vdb); err != nil {
return ctrl.Result{}, err
}
Expand Down
5 changes: 5 additions & 0 deletions pkg/controllers/install_reconcile.go
Original file line number Diff line number Diff line change
Expand Up @@ -58,6 +58,11 @@ func MakeInstallReconciler(vdbrecon *VerticaDBReconciler, log logr.Logger,

// Reconcile will ensure Vertica is installed and running in the pods.
func (d *InstallReconciler) Reconcile(ctx context.Context, req *ctrl.Request) (ctrl.Result, error) {
// no-op for ScheduleOnly init policy
if d.Vdb.Spec.InitPolicy == vapi.CommunalInitPolicyScheduleOnly {
return ctrl.Result{}, nil
}

// The reconcile loop works by collecting all of the facts about the running
// pods. We then analyze those facts to determine a course of action to take.
if err := d.PFacts.Collect(ctx, d.Vdb); err != nil {
Expand Down
56 changes: 37 additions & 19 deletions pkg/controllers/podfacts.go
Original file line number Diff line number Diff line change
Expand Up @@ -75,7 +75,7 @@ type PodFact struct {
vnodeName string

// The compat21 node name that Vertica assignes to the pod. This is only set
// if installation has occurred.
// if installation has occurred and the initPolicy is not ScheduleOnly.
compat21NodeName string

// True if the end user license agreement has been accepted
Expand Down Expand Up @@ -205,30 +205,48 @@ func (p *PodFacts) collectPodByStsIndex(ctx context.Context, vdb *vapi.VerticaDB

// checkIsInstalled will check a single pod to see if the installation has happened.
func (p *PodFacts) checkIsInstalled(ctx context.Context, vdb *vapi.VerticaDB, pf *PodFact) error {
if pf.isPodRunning {
fn := paths.GenInstallerIndicatorFileName(vdb)
if stdout, stderr, err := p.PRunner.ExecInPod(ctx, pf.name, names.ServerContainer, "cat", fn); err != nil {
if !strings.Contains(stderr, "cat: "+fn+": No such file or directory") {
return err
}
if !pf.isPodRunning {
pf.isInstalled = tristate.None
return nil
}

// If initPolicy is ScheduleOnly, there is no install indicator since the
// operator didn't initiate it. We are going to do based on the existence
// of admintools.conf.
if vdb.Spec.InitPolicy == vapi.CommunalInitPolicyScheduleOnly {
if _, _, err := p.PRunner.ExecInPod(ctx, pf.name, names.ServerContainer, "test", "-f", paths.AdminToolsConf); err != nil {
pf.isInstalled = tristate.False
} else {
pf.isInstalled = tristate.True
}

// Check if there is a stale admintools.conf
cmd := []string{"ls", paths.AdminToolsConf}
if _, stderr, err := p.PRunner.ExecInPod(ctx, pf.name, names.ServerContainer, cmd...); err != nil {
if !strings.Contains(stderr, "No such file or directory") {
return err
}
pf.hasStaleAdmintoolsConf = false
} else {
pf.hasStaleAdmintoolsConf = true
// We can't reliably set compat21NodeName because the operator didn't
// originate the install. We will intentionally leave that blank.
pf.compat21NodeName = ""
Copy link
Collaborator

@ningdeng ningdeng Sep 22, 2021

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

No change required, just trying to confirm my understanding of the functionalities: does this mean re_ip will not be automated on the schedule only pods (Edit: I was asking about if there could be a case where some pods are managed by the operator starting from installation while some are not, but since there can be only one init policy so I think the behavior is consistent in terms of that either the operator manages the entire cluster from the very beginning or the operator only cares about schedule only pods, so there's no case of some pods being managed by the operators starting from installation but some are not). I recall compat21NodeName is used for re_ip when db is down but would like to double check if my memory is correct.

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

nvm, I saw the changes in restart_reconcile and I think my question is answered.


return nil
}

fn := paths.GenInstallerIndicatorFileName(vdb)
if stdout, stderr, err := p.PRunner.ExecInPod(ctx, pf.name, names.ServerContainer, "cat", fn); err != nil {
if !strings.Contains(stderr, "cat: "+fn+": No such file or directory") {
return err
}
pf.isInstalled = tristate.False

// Check if there is a stale admintools.conf
cmd := []string{"ls", paths.AdminToolsConf}
if _, stderr, err := p.PRunner.ExecInPod(ctx, pf.name, names.ServerContainer, cmd...); err != nil {
if !strings.Contains(stderr, "No such file or directory") {
return err
}
pf.hasStaleAdmintoolsConf = false
} else {
pf.isInstalled = tristate.True
pf.compat21NodeName = strings.TrimSuffix(stdout, "\n")
pf.hasStaleAdmintoolsConf = true
}
} else {
pf.isInstalled = tristate.None
pf.isInstalled = tristate.True
pf.compat21NodeName = strings.TrimSuffix(stdout, "\n")
}
return nil
}
Expand Down
15 changes: 12 additions & 3 deletions pkg/controllers/restart_reconcile.go
Original file line number Diff line number Diff line change
Expand Up @@ -84,9 +84,10 @@ func (r *RestartReconciler) Reconcile(ctx context.Context, req *ctrl.Request) (c
}

// We have two paths. If the entire cluster is down we have separate
// admintools commands to run.

if r.PFacts.getUpNodeCount() == 0 {
// admintools commands to run. Cluster operations only apply if the entire
// vertica cluster is managed by k8s. We skip that if initPolicy is
// ScheduleOnly.
if r.PFacts.getUpNodeCount() == 0 && r.Vdb.Spec.InitPolicy != vapi.CommunalInitPolicyScheduleOnly {
return r.reconcileCluster(ctx)
}
return r.reconcileNodes(ctx)
Expand Down Expand Up @@ -165,6 +166,14 @@ func (r *RestartReconciler) reconcileNodes(ctx context.Context) (ctrl.Result, er
}
}

// The rest of the steps depend on knowing the compat21 node name for the
// pod. If ScheduleOnly, we cannot reliable know that since the operator
// didn't originate the install. So we will skip the rest if running in
// that mode.
if r.Vdb.Spec.InitPolicy == vapi.CommunalInitPolicyScheduleOnly {
return ctrl.Result{}, nil
}

// Find any pods that need to have their IP updated. These are nodes that
// have been installed but not yet added to a database.
reIPPods := r.PFacts.findReIPPods(true)
Expand Down
5 changes: 5 additions & 0 deletions pkg/controllers/uninstall_reconcile.go
Original file line number Diff line number Diff line change
Expand Up @@ -74,6 +74,11 @@ func (s *UninstallReconciler) CollectPFacts(ctx context.Context) error {
// everything in Vdb. We will know if we are scaling down by comparing the
// expected subcluster size with the current.
func (s *UninstallReconciler) Reconcile(ctx context.Context, req *ctrl.Request) (ctrl.Result, error) {
// no-op for ScheduleOnly init policy
if s.Vdb.Spec.InitPolicy == vapi.CommunalInitPolicyScheduleOnly {
return ctrl.Result{}, nil
}

if err := s.PFacts.Collect(ctx, s.Vdb); err != nil {
return ctrl.Result{}, err
}
Expand Down
17 changes: 17 additions & 0 deletions tests/e2e/schedule-only/00-create-communal-creds.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
# (c) Copyright [2021] Micro Focus or one of its affiliates.
# Licensed under the Apache License, Version 2.0 (the "License");
# You may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

apiVersion: kuttl.dev/v1beta1
kind: TestStep
commands:
- script: kustomize build ../../manifests/s3-creds/base | kubectl apply -f - --namespace $NAMESPACE
23 changes: 23 additions & 0 deletions tests/e2e/schedule-only/05-assert.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
# (c) Copyright [2021] Micro Focus or one of its affiliates.
# Licensed under the Apache License, Version 2.0 (the "License");
# You may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

apiVersion: v1
kind: Pod
metadata:
name: create-s3-bucket
status:
containerStatuses:
- name: aws
state:
terminated:
exitCode: 0
Loading