Skip to content

Commit

Permalink
Add JIT config as part of instance create
Browse files Browse the repository at this point in the history
We must create the DB entry for a runner with a JIT config included. Adding it later
via an update runs the risk of having the consolidate loop pick up the incomplete instance.

Signed-off-by: Gabriel Adrian Samfira <[email protected]>
  • Loading branch information
gabriel-samfira committed Sep 24, 2023
1 parent 8c507a9 commit 019948a
Show file tree
Hide file tree
Showing 7 changed files with 43 additions and 76 deletions.
1 change: 1 addition & 0 deletions database/sql/instances.go
Original file line number Diff line number Diff line change
Expand Up @@ -82,6 +82,7 @@ func (s *sqlDatabase) CreateInstance(ctx context.Context, poolID string, param p
GitHubRunnerGroup: param.GitHubRunnerGroup,
JitConfiguration: secret,
AditionalLabels: labels,
AgentID: param.AgentID,
}
q := s.conn.Create(&newInstance)
if q.Error != nil {
Expand Down
3 changes: 2 additions & 1 deletion params/requests.go
Original file line number Diff line number Diff line change
Expand Up @@ -136,7 +136,8 @@ type CreateInstanceParams struct {
// GithubRunnerGroup is the github runner group to which the runner belongs.
// The runner group must be created by someone with access to the enterprise.
GitHubRunnerGroup string
CreateAttempt int `json:"-"`
CreateAttempt int `json:"-"`
AgentID int64 `json:"-"`
AditionalLabels []string
JitConfiguration map[string]string
}
Expand Down
12 changes: 2 additions & 10 deletions runner/pool/enterprise.go
Original file line number Diff line number Diff line change
Expand Up @@ -104,15 +104,7 @@ func (r *enterprise) findRunnerGroupByName(ctx context.Context, name string) (*g
return nil, errors.Wrap(runnerErrors.ErrNotFound, "runner group not found")
}

func (r *enterprise) GetJITConfig(ctx context.Context, instance params.Instance, pool params.Pool, labels []string) (jitConfigMap map[string]string, runner *github.Runner, err error) {
if instance.AgentID != 0 {
return nil, nil, fmt.Errorf("instance already has an agent ID: %w", runnerErrors.ErrBadRequest)
}

if instance.JitConfiguration != nil {
return nil, nil, fmt.Errorf("instance already has a JIT configuration: %w", runnerErrors.ErrBadRequest)
}

func (r *enterprise) GetJITConfig(ctx context.Context, instance string, pool params.Pool, labels []string) (jitConfigMap map[string]string, runner *github.Runner, err error) {
var rg int64 = 1
if pool.GitHubRunnerGroup != "" {
runnerGroup, err := r.findRunnerGroupByName(ctx, pool.GitHubRunnerGroup)
Expand All @@ -123,7 +115,7 @@ func (r *enterprise) GetJITConfig(ctx context.Context, instance params.Instance,
}

req := github.GenerateJITConfigRequest{
Name: instance.Name,
Name: instance,
RunnerGroupID: rg,
Labels: labels,
// TODO(gabriel-samfira): Should we make this configurable?
Expand Down
2 changes: 1 addition & 1 deletion runner/pool/interfaces.go
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,7 @@ type poolHelper interface {

GithubCLI() common.GithubClient

GetJITConfig(ctx context.Context, instance params.Instance, pool params.Pool, labels []string) (map[string]string, *github.Runner, error)
GetJITConfig(ctx context.Context, instanceName string, pool params.Pool, labels []string) (map[string]string, *github.Runner, error)

FetchDbInstances() ([]params.Instance, error)
ListPools() ([]params.Pool, error)
Expand Down
12 changes: 2 additions & 10 deletions runner/pool/organization.go
Original file line number Diff line number Diff line change
Expand Up @@ -116,15 +116,7 @@ func (r *organization) findRunnerGroupByName(ctx context.Context, name string) (
return nil, errors.Wrap(runnerErrors.ErrNotFound, "runner group not found")
}

func (r *organization) GetJITConfig(ctx context.Context, instance params.Instance, pool params.Pool, labels []string) (jitConfigMap map[string]string, runner *github.Runner, err error) {
if instance.AgentID != 0 {
return nil, nil, fmt.Errorf("instance already has an agent ID: %w", runnerErrors.ErrBadRequest)
}

if instance.JitConfiguration != nil {
return nil, nil, fmt.Errorf("instance already has a JIT configuration: %w", runnerErrors.ErrBadRequest)
}

func (r *organization) GetJITConfig(ctx context.Context, instance string, pool params.Pool, labels []string) (jitConfigMap map[string]string, runner *github.Runner, err error) {
var rg int64 = 1
if pool.GitHubRunnerGroup != "" {
runnerGroup, err := r.findRunnerGroupByName(ctx, pool.GitHubRunnerGroup)
Expand All @@ -135,7 +127,7 @@ func (r *organization) GetJITConfig(ctx context.Context, instance params.Instanc
}

req := github.GenerateJITConfigRequest{
Name: instance.Name,
Name: instance,
RunnerGroupID: rg,
Labels: labels,
// TODO(gabriel-samfira): Should we make this configurable?
Expand Down
77 changes: 33 additions & 44 deletions runner/pool/pool.go
Original file line number Diff line number Diff line change
Expand Up @@ -388,11 +388,18 @@ func (r *basePoolManager) cleanupOrphanedProviderRunners(runners []*github.Runne
continue
}

pool, err := r.store.GetPoolByID(r.ctx, instance.PoolID)
if err != nil {
return errors.Wrap(err, "fetching instance pool info")
}

switch instance.RunnerStatus {
case params.RunnerPending, params.RunnerInstalling:
// runner is still installing. We give it a chance to finish.
r.log("runner %s is still installing, give it a chance to finish", instance.Name)
continue
if time.Since(instance.UpdatedAt).Minutes() < float64(pool.RunnerTimeout()) {
// runner is still installing. We give it a chance to finish.
r.log("runner %s is still installing, give it a chance to finish", instance.Name)
continue
}
}

if time.Since(instance.UpdatedAt).Minutes() < 5 {
Expand Down Expand Up @@ -451,20 +458,7 @@ func (r *basePoolManager) reapTimedOutRunners(runners []*github.Runner) error {
// * The runner never joined github within the pool timeout
// * The runner managed to join github, but the setup process failed later and the runner
// never started on the instance.
//
// There are several steps in the user data that sets up the runner:
// * Download and unarchive the runner from github (or used the cached version)
// * Configure runner (connects to github). At this point the runner is seen as offline.
// * Install the service
// * Set SELinux context (if SELinux is enabled)
// * Start the service (if successful, the runner will transition to "online")
// * Get the runner ID
//
// If we fail getting the runner ID after it's started, garm will set the runner status to "failed",
// even though, technically the runner is online and fully functional. This is why we check here for
// both the runner status as reported by GitHub and the runner status as reported by the provider.
// If the runner is "offline" and marked as "failed", it should be safe to reap it.
if runner, ok := runnersByName[instance.Name]; !ok || (runner.GetStatus() == "offline" && instance.RunnerStatus == params.RunnerFailed) {
if runner, ok := runnersByName[instance.Name]; !ok || runner.GetStatus() == "offline" {
r.log("reaping timed-out/failed runner %s", instance.Name)
if err := r.ForceDeleteRunner(instance); err != nil {
r.log("failed to update runner %s status: %s", instance.Name, err)
Expand Down Expand Up @@ -699,6 +693,12 @@ func (r *basePoolManager) AddRunner(ctx context.Context, poolID string, aditiona
}

name := fmt.Sprintf("%s-%s", pool.GetRunnerPrefix(), util.NewID())
labels := r.getLabelsForInstance(pool)
// Attempt to create JIT config
jitConfig, runner, err := r.helper.GetJITConfig(ctx, name, pool, labels)
if err != nil {
r.log("failed to get JIT config, falling back to registration token: %s", err)
}

createParams := params.CreateInstanceParams{
Name: name,
Expand All @@ -711,6 +711,11 @@ func (r *basePoolManager) AddRunner(ctx context.Context, poolID string, aditiona
CreateAttempt: 1,
GitHubRunnerGroup: pool.GitHubRunnerGroup,
AditionalLabels: aditionalLabels,
JitConfiguration: jitConfig,
}

if runner != nil {
createParams.AgentID = runner.GetID()
}

instance, err := r.store.CreateInstance(r.ctx, poolID, createParams)
Expand All @@ -719,37 +724,21 @@ func (r *basePoolManager) AddRunner(ctx context.Context, poolID string, aditiona
}

defer func() {
if err != nil && instance.ID != "" {
if err := r.ForceDeleteRunner(instance); err != nil {
r.log("failed to cleanup instance: %s", instance.Name)
if err != nil {
if instance.ID != "" {
if err := r.ForceDeleteRunner(instance); err != nil {
r.log("failed to cleanup instance: %s", instance.Name)
}
}
}
}()

labels := r.getLabelsForInstance(pool)
// Attempt to create JIT config
jitConfig, runner, err := r.helper.GetJITConfig(ctx, instance, pool, labels)
if err == nil {
updateParams := params.UpdateInstanceParams{
AgentID: runner.GetID(),
// We're using a JIT config. Setting the TokenFetched will disable the registration token
// metadata endpoint.
TokenFetched: github.Bool(true),
JitConfiguration: jitConfig,
}
instance, err = r.updateInstance(instance.Name, updateParams)
if err != nil {
// The agent ID is not recorded in the instance, so the deferred ForceDeleteRunner() will not
// attempt to clean it up. We need to do it here.
_, runnerCleanupErr := r.helper.RemoveGithubRunner(runner.GetID())
if err != nil {
log.Printf("failed to remove runner %d: %s", runner.GetID(), runnerCleanupErr)
if runner != nil {
_, runnerCleanupErr := r.helper.RemoveGithubRunner(runner.GetID())
if err != nil {
r.log("failed to remove runner %d: %s", runner.GetID(), runnerCleanupErr)
}
}
return errors.Wrap(err, "updating instance")
}
} else {
r.log("failed to get JIT config, falling back to registration token: %s", err)
}
}()

return nil
}
Expand Down
12 changes: 2 additions & 10 deletions runner/pool/repository.go
Original file line number Diff line number Diff line change
Expand Up @@ -88,17 +88,9 @@ type repository struct {
mux sync.Mutex
}

func (r *repository) GetJITConfig(ctx context.Context, instance params.Instance, pool params.Pool, labels []string) (jitConfigMap map[string]string, runner *github.Runner, err error) {
if instance.AgentID != 0 {
return nil, nil, fmt.Errorf("instance already has an agent ID: %w", runnerErrors.ErrBadRequest)
}

if instance.JitConfiguration != nil {
return nil, nil, fmt.Errorf("instance already has a JIT configuration: %w", runnerErrors.ErrBadRequest)
}

func (r *repository) GetJITConfig(ctx context.Context, instance string, pool params.Pool, labels []string) (jitConfigMap map[string]string, runner *github.Runner, err error) {
req := github.GenerateJITConfigRequest{
Name: instance.Name,
Name: instance,
// At the repository level we only have the default runner group.
RunnerGroupID: 1,
Labels: labels,
Expand Down

0 comments on commit 019948a

Please sign in to comment.