diff --git a/app/invocation/BUILD b/app/invocation/BUILD index e69b7118eb9..2378f51da26 100644 --- a/app/invocation/BUILD +++ b/app/invocation/BUILD @@ -97,7 +97,8 @@ ts_library( deps = [ "//app/components/link", "//app/format", - "//proto:build_event_stream_ts_proto", + "//proto:invocation_status_ts_proto", + "//proto:invocation_ts_proto", "@npm//@types/react", "@npm//lucide-react", "@npm//react", @@ -109,8 +110,7 @@ ts_library( srcs = ["child_invocations.tsx"], deps = [ "//app/invocation:child_invocation_card", - "//app/invocation:invocation_model", - "//app/util:proto", + "//proto:invocation_ts_proto", "@npm//@types/react", "@npm//react", ], diff --git a/app/invocation/child_invocation_card.tsx b/app/invocation/child_invocation_card.tsx index 9649dec94c3..b82582c30e4 100644 --- a/app/invocation/child_invocation_card.tsx +++ b/app/invocation/child_invocation_card.tsx @@ -1,40 +1,49 @@ import React from "react"; import format from "../format/format"; -import { build_event_stream } from "../../proto/build_event_stream_ts_proto"; -import { CheckCircle, PlayCircle, XCircle, CircleSlash, Timer } from "lucide-react"; +import { invocation } from "../../proto/invocation_ts_proto"; +import { CheckCircle, PlayCircle, XCircle, CircleSlash } from "lucide-react"; import Link from "../components/link/link"; +import { invocation_status } from "../../proto/invocation_status_ts_proto"; -export type CommandStatus = "failed" | "succeeded" | "in-progress" | "queued" | "not-run"; - -export type BazelCommandResult = { - status: CommandStatus; - invocation: InvocationMetadata; - durationMillis?: number; -}; - -export type InvocationMetadata = - | build_event_stream.WorkflowConfigured.IInvocationMetadata - | build_event_stream.ChildInvocationsConfigured.IInvocationMetadata; +type CommandStatus = "failed" | "succeeded" | "in-progress" | "not-run"; export type ChildInvocationCardProps = { - result: BazelCommandResult; + invocation: invocation.Invocation; }; export default class ChildInvocationCard extends React.Component { - private isClickable() { - return this.props.result.status !== "queued" && this.props.result.status !== "not-run"; + private getStatus(): CommandStatus { + const inv = this.props.invocation; + switch (inv.invocationStatus) { + case invocation_status.InvocationStatus.COMPLETE_INVOCATION_STATUS: + case invocation_status.InvocationStatus.DISCONNECTED_INVOCATION_STATUS: + return inv.bazelExitCode == "SUCCESS" ? "succeeded" : "failed"; + case invocation_status.InvocationStatus.PARTIAL_INVOCATION_STATUS: + return "in-progress"; + default: + return "not-run"; + } + } + + private isClickable(status: CommandStatus): boolean { + return status !== "not-run"; + } + + private getDurationLabel(status: CommandStatus): string { + if (status == "failed" || status == "succeeded") { + return format.durationUsec(this.props.invocation.durationUsec); + } + return ""; } - private renderStatusIcon() { - switch (this.props.result.status) { + private renderStatusIcon(status: CommandStatus) { + switch (status) { case "succeeded": return ; case "failed": return ; case "in-progress": return ; - case "queued": - return ; case "not-run": return ; default: @@ -44,15 +53,16 @@ export default class ChildInvocationCard extends React.Component -
{this.renderStatusIcon()}
-
{this.props.result.invocation.bazelCommand}
-
- {this.props.result.durationMillis !== undefined && format.durationMillis(this.props.result.durationMillis)} -
+ className={`child-invocation-card status-${status} ${this.isClickable(status) ? "clickable" : ""}`} + href={this.isClickable(status) ? `/invocation/${this.props.invocation.invocationId}` : undefined}> +
{this.renderStatusIcon(status)}
+
{command}
+
{this.getDurationLabel(status)}
); } diff --git a/app/invocation/child_invocations.tsx b/app/invocation/child_invocations.tsx index 0d9f3f8dd98..f87aee6647c 100644 --- a/app/invocation/child_invocations.tsx +++ b/app/invocation/child_invocations.tsx @@ -1,61 +1,22 @@ import React from "react"; -import InvocationModel from "./invocation_model"; -import ChildInvocationCard, { CommandStatus, InvocationMetadata } from "./child_invocation_card"; -import { BazelCommandResult } from "./child_invocation_card"; -import { durationToMillisWithFallback } from "../util/proto"; +import ChildInvocationCard from "./child_invocation_card"; +import { invocation } from "../../proto/invocation_ts_proto"; export type ChildInvocationProps = { - model: InvocationModel; + childInvocations: invocation.Invocation[]; }; export default class ChildInvocations extends React.Component { - private getDurationMillis(invocation: InvocationMetadata): number | undefined { - const completedEvent = this.props.model.childInvocationCompletedByInvocationId.get(invocation.invocationId ?? ""); - if (!completedEvent) return undefined; - return durationToMillisWithFallback(completedEvent.duration, +(completedEvent?.durationMillis ?? 0)); - } - render() { - const childInvocationConfiguredEvents = this.props.model.childInvocationsConfigured; - let invocations = []; - for (let i = 0; i < childInvocationConfiguredEvents.length; i++) { - const event = childInvocationConfiguredEvents[i]; - for (let inv of event.invocation) { - invocations.push(inv); - } - } - - const results: BazelCommandResult[] = []; - let inProgressCount = 0; - const getStatus = (invocation: InvocationMetadata): CommandStatus => { - const completedEvent = this.props.model.childInvocationCompletedByInvocationId.get(invocation.invocationId ?? ""); - if (completedEvent) { - return completedEvent.exitCode === 0 ? "succeeded" : "failed"; - } else if (this.props.model.finished) { - return "not-run"; - } else if (inProgressCount === 0) { - // Only one command should be marked in progress; the rest should be - // marked queued. - inProgressCount++; - return "in-progress"; - } else { - return "queued"; - } - }; - - for (const invocation of invocations) { - results.push({ invocation, status: getStatus(invocation), durationMillis: this.getDurationMillis(invocation) }); - } - - if (!results.length) return null; + if (!this.props.childInvocations.length) return null; return (

Bazel commands

Click a command to see results.
- {results.map((result) => ( - + {this.props.childInvocations.map((result) => ( + ))}
diff --git a/app/invocation/invocation.tsx b/app/invocation/invocation.tsx index 137fef2455f..026b3520efe 100644 --- a/app/invocation/invocation.tsx +++ b/app/invocation/invocation.tsx @@ -60,6 +60,13 @@ interface State { runnerExecution?: execution_stats.Execution; runnerLastExecuteOperation?: ExecuteOperation; + /* + * We only need to update the child invocation cards right when they've started and ended. + * Memoize them on the client, so we don't need to keep fetching them from the + * db in the meantime. + */ + childInvocations: invocation.Invocation[]; + keyboardShortcutHandle: string; } @@ -80,6 +87,7 @@ export default class InvocationComponent extends React.Component { inProgress: false, error: null, keyboardShortcutHandle: "", + childInvocations: [], }; private timeoutRef: number = 0; @@ -88,6 +96,9 @@ export default class InvocationComponent extends React.Component { private modelChangedSubscription?: Subscription; private runnerExecutionRPC?: CancelablePromise; + private seenChildInvocationConfiguredIds = new Set(); + private seenChildInvocationCompletedIds = new Set(); + componentWillMount() { document.title = `Invocation ${this.props.invocationId} | BuildBuddy`; // TODO(siggisim): Move moment configuration elsewhere @@ -189,9 +200,11 @@ export default class InvocationComponent extends React.Component { this.fetchRunnerExecution(); } + const fetchChildren = this.shouldFetchChildren(this.state.model); let request = new invocation.GetInvocationRequest(); request.lookup = new invocation.InvocationLookup(); request.lookup.invocationId = this.props.invocationId; + request.lookup.fetchChildInvocations = fetchChildren; rpcService.service .getInvocation(request) .then((response: invocation.GetInvocationResponse) => { @@ -199,14 +212,24 @@ export default class InvocationComponent extends React.Component { if (!response.invocation || response.invocation.length === 0) { throw new BuildBuddyError("NotFound", "Invocation not found."); } - const model = new InvocationModel(response.invocation[0]); + const inv = response.invocation[0]; + const model = new InvocationModel(inv); // Only show the in-progress screen if we don't have any events yet. - const showInProgressScreen = model.isInProgress() && !response.invocation[0].event?.length; + const showInProgressScreen = model.isInProgress() && !inv.event?.length; + // Only update the child invocations if we've fetched new updates. + const childInvocations = fetchChildren ? inv.childInvocations : this.state.childInvocations; this.setState({ inProgress: showInProgressScreen, model: model, error: null, + childInvocations: childInvocations, }); + + if (fetchChildren) { + for (let child of childInvocations) { + this.seenChildInvocationConfiguredIds.add(child.invocationId); + } + } }) .catch((error: any) => { console.error("Failed to fetch invocation:", error); @@ -215,6 +238,29 @@ export default class InvocationComponent extends React.Component { .finally(() => this.setState({ loading: false })); } + shouldFetchChildren(model: InvocationModel | undefined): boolean { + if (!model) return true; + const childInvocationConfiguredEvents = model.childInvocationsConfigured; + let shouldFetch = false; + + for (const event of childInvocationConfiguredEvents) { + for (let inv of event.invocation) { + if (!this.seenChildInvocationConfiguredIds.has(inv.invocationId)) { + shouldFetch = true; + } + } + } + + for (const iid of model.childInvocationCompletedByInvocationId.keys()) { + if (!this.seenChildInvocationCompletedIds.has(iid)) { + this.seenChildInvocationCompletedIds.add(iid); + shouldFetch = true; + } + } + + return shouldFetch; + } + scheduleRefetch() { clearTimeout(this.timeoutRef); // Refetch invocation data in 3 seconds to update status. @@ -463,7 +509,7 @@ export default class InvocationComponent extends React.Component { )} {!isBazelInvocation && (
- +
)} diff --git a/app/invocation/invocation_model.tsx b/app/invocation/invocation_model.tsx index 28e65d8a7b9..51add637ab1 100644 --- a/app/invocation/invocation_model.tsx +++ b/app/invocation/invocation_model.tsx @@ -51,10 +51,7 @@ export default class InvocationModel { failedAction?: build_event_stream.BuildEvent; workflowConfigured?: build_event_stream.WorkflowConfigured; childInvocationsConfigured: build_event_stream.ChildInvocationsConfigured[] = []; - childInvocationCompletedByInvocationId = new Map< - string, - build_event_stream.IChildInvocationCompleted | build_event_stream.IWorkflowCommandCompleted - >(); + childInvocationCompletedByInvocationId = new Map(); workspaceStatus?: build_event_stream.WorkspaceStatus; configuration?: build_event_stream.Configuration; workspaceConfig?: build_event_stream.WorkspaceConfig; @@ -145,12 +142,6 @@ export default class InvocationModel { buildEvent.childInvocationsConfigured as build_event_stream.ChildInvocationsConfigured ); } - if (buildEvent.workflowCommandCompleted && buildEvent.id?.workflowCommandCompleted?.invocationId) { - this.childInvocationCompletedByInvocationId.set( - buildEvent.id.workflowCommandCompleted.invocationId, - buildEvent.workflowCommandCompleted - ); - } if (buildEvent.childInvocationCompleted && buildEvent.id?.childInvocationCompleted?.invocationId) { this.childInvocationCompletedByInvocationId.set( buildEvent.id.childInvocationCompleted.invocationId, diff --git a/cli/remotebazel/BUILD b/cli/remotebazel/BUILD index 9c9c375d1a6..f261bd2e3af 100644 --- a/cli/remotebazel/BUILD +++ b/cli/remotebazel/BUILD @@ -21,6 +21,7 @@ go_library( "//proto:build_event_stream_go_proto", "//proto:buildbuddy_service_go_proto", "//proto:eventlog_go_proto", + "//proto:execution_stats_go_proto", "//proto:git_go_proto", "//proto:invocation_go_proto", "//proto:remote_execution_go_proto", @@ -39,6 +40,7 @@ go_library( "@com_github_go_git_go_git_v5//plumbing", "@org_golang_google_genproto_googleapis_bytestream//:bytestream", "@org_golang_google_grpc//metadata", + "@org_golang_x_sync//errgroup", "@org_golang_x_sys//unix", ], ) diff --git a/cli/remotebazel/remotebazel.go b/cli/remotebazel/remotebazel.go index 5b6d935caee..8ecb769ad16 100644 --- a/cli/remotebazel/remotebazel.go +++ b/cli/remotebazel/remotebazel.go @@ -37,12 +37,14 @@ import ( "github.com/buildbuddy-io/buildbuddy/server/util/status" "github.com/go-git/go-git/v5" "github.com/go-git/go-git/v5/plumbing" + "golang.org/x/sync/errgroup" "golang.org/x/sys/unix" "google.golang.org/grpc/metadata" bespb "github.com/buildbuddy-io/buildbuddy/proto/build_event_stream" bbspb "github.com/buildbuddy-io/buildbuddy/proto/buildbuddy_service" elpb "github.com/buildbuddy-io/buildbuddy/proto/eventlog" + espb "github.com/buildbuddy-io/buildbuddy/proto/execution_stats" gitpb "github.com/buildbuddy-io/buildbuddy/proto/git" inpb "github.com/buildbuddy-io/buildbuddy/proto/invocation" repb "github.com/buildbuddy-io/buildbuddy/proto/remote_execution" @@ -770,7 +772,6 @@ func Run(ctx context.Context, opts RunOpts, repoConfig *RepoConfig) (int, error) CommitSha: repoConfig.CommitSHA, Branch: repoConfig.Ref, }, - BazelCommand: strings.Join(bazelArgs, " "), Os: reqOS, Arch: reqArch, ContainerImage: *containerImage, @@ -780,6 +781,18 @@ func Run(ctx context.Context, opts RunOpts, repoConfig *RepoConfig) (int, error) } req.GetRepoState().Patch = append(req.GetRepoState().Patch, repoConfig.Patches...) + // TODO(Maggie): Clean up after we've migrated fully to use `Steps` + stepsMode := os.Getenv("STEPS_MODE") == "1" + if stepsMode { + req.Steps = []*rnpb.Step{ + { + Run: fmt.Sprintf("bazel %s", strings.Join(bazelArgs, " ")), + }, + } + } else { + req.BazelCommand = strings.Join(bazelArgs, " ") + } + if *timeout != 0 { req.Timeout = timeout.String() } @@ -824,25 +837,44 @@ func Run(ctx context.Context, opts RunOpts, repoConfig *RepoConfig) (int, error) } isInvocationRunning = false - inRsp, err := bbClient.GetInvocation(ctx, &inpb.GetInvocationRequest{Lookup: &inpb.InvocationLookup{InvocationId: iid}}) + eg := errgroup.Group{} + var inRsp *inpb.GetInvocationResponse + var exRsp *espb.GetExecutionResponse + eg.Go(func() error { + var err error + inRsp, err = bbClient.GetInvocation(ctx, &inpb.GetInvocationRequest{Lookup: &inpb.InvocationLookup{InvocationId: iid}}) + if err != nil { + return fmt.Errorf("could not retrieve invocation: %s", err) + } + if len(inRsp.GetInvocation()) == 0 { + return fmt.Errorf("invocation not found") + } + return nil + }) + eg.Go(func() error { + var err error + exRsp, err = bbClient.GetExecution(ctx, &espb.GetExecutionRequest{ExecutionLookup: &espb.ExecutionLookup{ + InvocationId: iid, + }}) + if err != nil { + return fmt.Errorf("could not retrieve ci_runner execution: %s", err) + } + if len(exRsp.GetExecution()) == 0 { + return fmt.Errorf("ci_runner execution not found") + } + return nil + }) + err = eg.Wait() if err != nil { - return 0, fmt.Errorf("could not retrieve invocation: %s", err) - } - if len(inRsp.GetInvocation()) == 0 { - return 0, fmt.Errorf("invocation not found") + return 0, err } childIID := "" - exitCode := -1 runfilesRoot := "" var runfiles []*bespb.File var runfileDirectories []*bespb.Tree var defaultRunArgs []string for _, e := range inRsp.GetInvocation()[0].GetEvent() { - if cic, ok := e.GetBuildEvent().GetPayload().(*bespb.BuildEvent_ChildInvocationCompleted); ok { - childIID = e.GetBuildEvent().GetId().GetChildInvocationCompleted().GetInvocationId() - exitCode = int(cic.ChildInvocationCompleted.ExitCode) - } if runOutput { if rta, ok := e.GetBuildEvent().GetPayload().(*bespb.BuildEvent_RunTargetAnalyzed); ok { runfilesRoot = rta.RunTargetAnalyzed.GetRunfilesRoot() @@ -853,13 +885,7 @@ func Run(ctx context.Context, opts RunOpts, repoConfig *RepoConfig) (int, error) } } - if exitCode == -1 { - if ctx.Err() != nil { - return 0, ctx.Err() - } - return 0, fmt.Errorf("could not determine remote Bazel exit code") - } - + exitCode := int(exRsp.GetExecution()[0].ExitCode) if fetchOutputs && exitCode == 0 { conn, err := grpc_client.DialSimple(opts.Server) if err != nil { diff --git a/enterprise/server/cmd/ci_runner/main.go b/enterprise/server/cmd/ci_runner/main.go index e7863e3becd..c30cf23e0b2 100644 --- a/enterprise/server/cmd/ci_runner/main.go +++ b/enterprise/server/cmd/ci_runner/main.go @@ -72,6 +72,8 @@ const ( // Name of the bazel output base dir. This is written under the workspace // so that it can be cleaned up when the workspace is cleaned up. outputBaseDirName = "output-base" + // Name of the dir where we write bazel run scripts. + runScriptDirName = "bazel-run-scripts" // Fraction of disk space that must be in use before we attempt to reclaim // disk space. @@ -193,6 +195,7 @@ var ( fallbackToCleanCheckout = flag.Bool("fallback_to_clean_checkout", true, "Fallback to cloning the repo from scratch if sync fails (for testing purposes only).") shellCharsRequiringQuote = regexp.MustCompile(`[^\w@%+=:,./-]`) + invocationIDRegex = regexp.MustCompile(`Streaming build results to:\s+.*?/invocation/([a-f0-9-]+)`) ) type workspace struct { @@ -216,6 +219,9 @@ type workspace struct { // WORKSPACE // buildbuddy.yaml (optional workflow config) // ... + // bazel-run-scripts/ (generated run scripts for targets that were + // built remotely, but intended to run locally + // on the client's machine) // // The CI runner stays in the rootDir while setting up the repo, and then // changes to the "repo-root" dir just before executing any actions. @@ -238,6 +244,11 @@ type workspace struct { // An invocation ID that should be forced, or "" if any is allowed. forcedInvocationID string + // A unique ID for each ci_runner run. + // If the ci_runner execution is retried, it should have the same + // invocation ID, but a different run ID. + runID string + // An error that occurred while setting up the workspace, which should be // reported for all action logs instead of actually executing the action. setupError error @@ -281,6 +292,9 @@ type buildEventReporter struct { startTime time.Time cancelBackgroundFlush func() + // Child invocations detected by scanning the build logs + childInvocations []string + mu sync.Mutex // protects(progressCount) progressCount int32 } @@ -310,7 +324,7 @@ func newBuildEventReporter(ctx context.Context, besBackend string, apiKey string uploader = ul } - return &buildEventReporter{apiKey: apiKey, bep: bep, uploader: uploader, log: newInvocationLog(), invocationID: iid, isWorkflow: isWorkflow}, nil + return &buildEventReporter{apiKey: apiKey, bep: bep, uploader: uploader, log: newInvocationLog(), invocationID: iid, isWorkflow: isWorkflow, childInvocations: []string{}}, nil } func (r *buildEventReporter) InvocationID() string { @@ -428,12 +442,14 @@ func (r *buildEventReporter) Start(startTime time.Time) error { return err } - // Flush whenever the log buffer fills past a certain threshold. - r.log.writeListener = func() { + r.log.writeListener = func(b []byte) { + r.emitBuildEventsForBazelCommands(b) + // Flush whenever the log buffer fills past a certain threshold. if size := r.log.Len(); size >= progressFlushThresholdBytes { r.FlushProgress() // ignore error; it will surface in `bep.Finish()` } } + stopFlushingProgress := r.startBackgroundProgressFlush() r.cancelBackgroundFlush = stopFlushingProgress return nil @@ -544,6 +560,74 @@ func (r *buildEventReporter) startBackgroundProgressFlush() func() { } } +// emitBuildEventsForBazelCommands scans command output logs for bazel invocations +// in order to emit bazel build events. +// +// Event publishing errors will be surfaced in the caller func when calling +// `buildEventPublisher.Finish()` +// +// TODO: Emit TargetConfigured and TargetCompleted events to render artifacts +// for each command +func (r *buildEventReporter) emitBuildEventsForBazelCommands(b []byte) { + output := string(b) + + // Check whether a bazel invocation was invoked + iidMatches := invocationIDRegex.FindAllStringSubmatch(output, -1) + for _, m := range iidMatches { + iid := m[1] + childStarted := contains(r.childInvocations, iid) + + var buildEvent *bespb.BuildEvent + if childStarted { + // The `Streaming build results to` log line is printed at the start and + // end of a bazel build. If we've already seen it for this invocation, + // we know the build has finished. + buildEvent = &bespb.BuildEvent{ + Id: &bespb.BuildEventId{ + Id: &bespb.BuildEventId_ChildInvocationCompleted{ + ChildInvocationCompleted: &bespb.BuildEventId_ChildInvocationCompletedId{InvocationId: iid}, + }, + }, + Payload: &bespb.BuildEvent_ChildInvocationCompleted{ChildInvocationCompleted: &bespb.ChildInvocationCompleted{}}, + } + } else { + r.childInvocations = append(r.childInvocations, iid) + + cic := &bespb.ChildInvocationsConfigured{ + Invocation: []*bespb.ChildInvocationsConfigured_InvocationMetadata{ + { + InvocationId: iid, + }, + }, + } + buildEvent = &bespb.BuildEvent{ + Id: &bespb.BuildEventId{Id: &bespb.BuildEventId_ChildInvocationsConfigured{ChildInvocationsConfigured: &bespb.BuildEventId_ChildInvocationsConfiguredId{}}}, + Payload: &bespb.BuildEvent_ChildInvocationsConfigured{ChildInvocationsConfigured: cic}, + Children: []*bespb.BuildEventId{ + { + Id: &bespb.BuildEventId_ChildInvocationCompleted{ + ChildInvocationCompleted: &bespb.BuildEventId_ChildInvocationCompletedId{InvocationId: iid}, + }, + }, + }, + } + } + + if err := r.Publish(buildEvent); err != nil { + continue + } + } +} + +func contains(s []string, target string) bool { + for _, elem := range s { + if elem == target { + return true + } + } + return false +} + func main() { if err := run(); err != nil { if result, ok := err.(*actionResult); ok { @@ -572,10 +656,16 @@ func run() error { return runBazelWrapper() } + runID, err := newUUID() + if err != nil { + return err + } + ws := &workspace{ startTime: time.Now(), buildbuddyAPIKey: os.Getenv(buildbuddyAPIKeyEnvVarName), forcedInvocationID: *invocationID, + runID: runID, } ctx := context.Background() @@ -653,7 +743,7 @@ func run() error { } // Write default bazelrc - if err := writeBazelrc(buildbuddyBazelrcPath, buildEventReporter.invocationID, rootDir); err != nil { + if err := writeBazelrc(buildbuddyBazelrcPath, buildEventReporter.invocationID, runID, rootDir); err != nil { return status.WrapError(err, "write "+buildbuddyBazelrcPath) } // Delete bazelrc before exiting. Use abs path since we might cd after this @@ -839,18 +929,18 @@ func (r *buildEventReporter) Printf(format string, vals ...interface{}) { type invocationLog struct { lockingbuffer.LockingBuffer writer io.Writer - writeListener func() + writeListener func(b []byte) } func newInvocationLog() *invocationLog { - invLog := &invocationLog{writeListener: func() {}} + invLog := &invocationLog{writeListener: func(b []byte) {}} invLog.writer = io.MultiWriter(&invLog.LockingBuffer, os.Stderr) return invLog } func (invLog *invocationLog) Write(b []byte) (int, error) { + invLog.writeListener(b) n, err := invLog.writer.Write(b) - invLog.writeListener() return n, err } @@ -919,6 +1009,7 @@ func (ar *actionRunner) Run(ctx context.Context, ws *workspace) error { if *visibility != "" { buildMetadata.Metadata["VISIBILITY"] = *visibility } + buildMetadata.Metadata["RUN_ID"] = ws.runID buildMetadataEvent := &bespb.BuildEvent{ Id: &bespb.BuildEventId{Id: &bespb.BuildEventId_BuildMetadata{BuildMetadata: &bespb.BuildEventId_BuildMetadataId{}}}, Payload: &bespb.BuildEvent_BuildMetadata{BuildMetadata: buildMetadata}, @@ -952,11 +1043,6 @@ func (ar *actionRunner) Run(ctx context.Context, ws *workspace) error { return status.WrapError(err, "failed to get action to run") } - cic := &bespb.ChildInvocationsConfigured{} - cicEvent := &bespb.BuildEvent{ - Id: &bespb.BuildEventId{Id: &bespb.BuildEventId_ChildInvocationsConfigured{ChildInvocationsConfigured: &bespb.BuildEventId_ChildInvocationsConfiguredId{}}}, - Payload: &bespb.BuildEvent_ChildInvocationsConfigured{ChildInvocationsConfigured: cic}, - } // If the triggering commit merges cleanly with the target branch, the runner // will execute the configured bazel commands. Otherwise, the runner will // exit early without running those commands and does not need to create @@ -972,24 +1058,10 @@ func (ar *actionRunner) Run(ctx context.Context, ws *workspace) error { BazelCommand: bazelCmd, }) wfcEvent.Children = append(wfcEvent.Children, &bespb.BuildEventId{ - Id: &bespb.BuildEventId_WorkflowCommandCompleted{WorkflowCommandCompleted: &bespb.BuildEventId_WorkflowCommandCompletedId{ - InvocationId: iid, - }}, - }) - cic.Invocation = append(cic.Invocation, &bespb.ChildInvocationsConfigured_InvocationMetadata{ - InvocationId: iid, - BazelCommand: bazelCmd, - }) - cicEvent.Children = append(cicEvent.Children, &bespb.BuildEventId{ - Id: &bespb.BuildEventId_ChildInvocationCompleted{ChildInvocationCompleted: &bespb.BuildEventId_ChildInvocationCompletedId{ - InvocationId: iid, - }}, + Id: &bespb.BuildEventId_WorkflowCommandCompleted{WorkflowCommandCompleted: &bespb.BuildEventId_WorkflowCommandCompletedId{}}, }) } } - if err := ar.reporter.Publish(cicEvent); err != nil { - return nil - } if !publishedWorkspaceStatus { if err := ar.reporter.Publish(ar.workspaceStatusEvent()); err != nil { @@ -1024,8 +1096,22 @@ func (ar *actionRunner) Run(ctx context.Context, ws *workspace) error { } }() - // TODO(Maggie): Emit BES events for each bazel command for i, step := range action.Steps { + cmdStartTime := time.Now() + + // The UI uses TargetConfigured/Completed build events to render artifacts + // associated with targets. + // Here we consider the step a "target" and publish the events for it, + // so that we can render artifacts for it. + targetLabel := fmt.Sprintf("steps[%d]", i) + ar.reporter.Publish(&bespb.BuildEvent{ + Id: &bespb.BuildEventId{Id: &bespb.BuildEventId_TargetConfigured{ + TargetConfigured: &bespb.BuildEventId_TargetConfiguredId{ + Label: targetLabel, + }, + }}, + Payload: &bespb.BuildEvent_Configured{Configured: &bespb.TargetConfigured{}}, + }) if err := provisionArtifactsDir(ws, i); err != nil { return err } @@ -1033,11 +1119,25 @@ func (ar *actionRunner) Run(ctx context.Context, ws *workspace) error { runErr := runBashCommand(ctx, step.Run, nil, action.BazelWorkspaceDir, ar.reporter) exitCode := getExitCode(runErr) - // Flush progress after every command. - // Stop execution early on BEP failure, but ignore error -- it will surface in `bep.Finish()`. - if err := ar.reporter.FlushProgress(); err != nil { - break - } + artifactsDir := artifactsPathForCommand(ws, i) + namedSetID := filepath.Base(artifactsDir) + ar.reporter.Publish(&bespb.BuildEvent{ + Id: &bespb.BuildEventId{Id: &bespb.BuildEventId_TargetCompleted{ + TargetCompleted: &bespb.BuildEventId_TargetCompletedId{ + Label: targetLabel, + }, + }}, + Payload: &bespb.BuildEvent_Completed{Completed: &bespb.TargetComplete{ + Success: runErr == nil, + OutputGroup: []*bespb.OutputGroup{ + { + FileSets: []*bespb.BuildEventId_NamedSetOfFilesId{ + {Id: namedSetID}, + }, + }, + }, + }}, + }) if exitCode != noExitCode { ar.reporter.Printf("%s(command exited with code %d)%s\n", ansiGray, exitCode, ansiReset) @@ -1060,8 +1160,6 @@ func (ar *actionRunner) Run(ctx context.Context, ws *workspace) error { // If we get an OOM or a Bazel internal error, copy debug outputs to the // artifacts directory so they get uploaded as workflow artifacts. - artifactsDir := artifactsPathForCommand(ws, i) - namedSetID := filepath.Base(artifactsDir) if exitCode == bazelOOMErrorExitCode || exitCode == bazelInternalErrorExitCode { jvmOutPath := filepath.Join(ar.rootDir, outputBaseDirName, "server/jvm.out") if err := os.Link(jvmOutPath, filepath.Join(artifactsDir, "jvm.out")); err != nil { @@ -1069,10 +1167,13 @@ func (ar *actionRunner) Run(ctx context.Context, ws *workspace) error { } } if exitCode == bazelOOMErrorExitCode { - // TODO(Maggie): Use invocation ID of failed bazel command - heapDumpPath := filepath.Join(ar.rootDir, outputBaseDirName, fmt.Sprintf("%d.heapdump.hprof", i)) - if err := os.Link(heapDumpPath, filepath.Join(artifactsDir, "heapdump.hprof")); err != nil { - ar.reporter.Printf("%sfailed to preserve heapdump.hprof: %s%s\n", ansiGray, err, ansiReset) + bazelInvocationIDs := ar.reporter.childInvocations + if len(bazelInvocationIDs) > 0 { + lastInvocationID := bazelInvocationIDs[len(bazelInvocationIDs)-1] + heapDumpPath := filepath.Join(ar.rootDir, outputBaseDirName, fmt.Sprintf("%s.heapdump.hprof", lastInvocationID)) + if err := os.Link(heapDumpPath, filepath.Join(artifactsDir, "heapdump.hprof")); err != nil { + ar.reporter.Printf("%sfailed to preserve heapdump.hprof: %s%s\n", ansiGray, err, ansiReset) + } } } @@ -1081,14 +1182,80 @@ func (ar *actionRunner) Run(ctx context.Context, ws *workspace) error { uploader.UploadDirectory(namedSetID, artifactsDir) // does not return an error } + // If extracting run information from builds was requested, + // extract it and send it via the event stream. + runScriptDir := filepath.Join(ws.rootDir, runScriptDirName) + log.Warningf("Check if %s exists", runScriptDir) + if _, err = os.Stat(runScriptDir); err == nil { + log.Warningf("It does exist") + err = filepath.Walk(runScriptDir, func(path string, info os.FileInfo, err error) error { + if err != nil { + return err + } + if !info.IsDir() { + runScriptPath := filepath.Join(runScriptDir, info.Name()) + log.Warningf("It does exist, processing path %s", runScriptPath) + runScriptInfo, err := processRunScript(ctx, runScriptPath) + if err != nil { + log.Warningf("process run script error") + return err + } + e := &bespb.BuildEvent{ + Id: &bespb.BuildEventId{Id: &bespb.BuildEventId_RunTargetAnalyzed{}}, + Payload: &bespb.BuildEvent_RunTargetAnalyzed{RunTargetAnalyzed: &bespb.RunTargetAnalyzed{ + Arguments: runScriptInfo.args, + RunfilesRoot: runScriptInfo.runfilesRoot, + Runfiles: runScriptInfo.runfiles, + RunfileDirectories: runScriptInfo.runfileDirs, + }}, + } + ar.reporter.Publish(e) + } + return nil + }) + if err != nil { + return err + } + + // Clear the directory so it's in a clean state for future steps + if err := os.RemoveAll(runScriptDir); err != nil { + return err + } + } + + duration := time.Since(cmdStartTime) + completedEvent := &bespb.BuildEvent{ + Id: &bespb.BuildEventId{Id: &bespb.BuildEventId_WorkflowCommandCompleted{ + WorkflowCommandCompleted: &bespb.BuildEventId_WorkflowCommandCompletedId{}, + }}, + Payload: &bespb.BuildEvent_WorkflowCommandCompleted{WorkflowCommandCompleted: &bespb.WorkflowCommandCompleted{ + ExitCode: int32(exitCode), + StartTime: timestamppb.New(cmdStartTime), + Duration: durationpb.New(duration), + }}, + } + if err := ar.reporter.Publish(completedEvent); err != nil { + break + } + if exitCode != 0 { return runErr } + + // Flush progress after every command. + // Stop execution early on BEP failure, but ignore error -- it will surface in `bep.Finish()`. + if err := ar.reporter.FlushProgress(); err != nil { + break + } } // TODO(Maggie): Consolidate action.BazelCommands with action.Steps for i, bazelCmd := range action.BazelCommands { cmdStartTime := time.Now() + if i >= len(wfc.GetInvocation()) { + return status.InternalErrorf("No invocation metadata generated for bazel_commands[%d]; this should never happen", i) + } + iid := wfc.GetInvocation()[i].GetInvocationId() // Publish a TargetConfigured event associated with the bazel command so // that we can render artifacts associated with the "target". @@ -1102,15 +1269,10 @@ func (ar *actionRunner) Run(ctx context.Context, ws *workspace) error { Payload: &bespb.BuildEvent_Configured{Configured: &bespb.TargetConfigured{}}, }) - if i >= len(wfc.GetInvocation()) { - return status.InternalErrorf("No invocation metadata generated for bazel_commands[%d]; this should never happen", i) - } - if err := provisionArtifactsDir(ws, i); err != nil { return err } - iid := wfc.GetInvocation()[i].GetInvocationId() args, err := ws.bazelArgsWithCustomBazelrc(bazelCmd) if err != nil { return status.InvalidArgumentErrorf("failed to parse bazel command: %s", err) @@ -1219,13 +1381,9 @@ func (ar *actionRunner) Run(ctx context.Context, ws *workspace) error { } } - // Publish the status of each command as well as the finish time. - // Stop execution early on BEP failure, but ignore error -- it will surface in `bep.Finish()`. duration := time.Since(cmdStartTime) completedEvent := &bespb.BuildEvent{ - Id: &bespb.BuildEventId{Id: &bespb.BuildEventId_WorkflowCommandCompleted{WorkflowCommandCompleted: &bespb.BuildEventId_WorkflowCommandCompletedId{ - InvocationId: iid, - }}}, + Id: &bespb.BuildEventId{Id: &bespb.BuildEventId_WorkflowCommandCompleted{WorkflowCommandCompleted: &bespb.BuildEventId_WorkflowCommandCompletedId{}}}, Payload: &bespb.BuildEvent_WorkflowCommandCompleted{WorkflowCommandCompleted: &bespb.WorkflowCommandCompleted{ ExitCode: int32(exitCode), StartTime: timestamppb.New(cmdStartTime), @@ -1235,20 +1393,6 @@ func (ar *actionRunner) Run(ctx context.Context, ws *workspace) error { if err := ar.reporter.Publish(completedEvent); err != nil { break } - duration = time.Since(cmdStartTime) - childCompletedEvent := &bespb.BuildEvent{ - Id: &bespb.BuildEventId{Id: &bespb.BuildEventId_ChildInvocationCompleted{ChildInvocationCompleted: &bespb.BuildEventId_ChildInvocationCompletedId{ - InvocationId: iid, - }}}, - Payload: &bespb.BuildEvent_ChildInvocationCompleted{ChildInvocationCompleted: &bespb.ChildInvocationCompleted{ - ExitCode: int32(exitCode), - StartTime: timestamppb.New(cmdStartTime), - Duration: durationpb.New(duration), - }}, - } - if err := ar.reporter.Publish(childCompletedEvent); err != nil { - break - } if runErr != nil { // Return early if the command failed. @@ -2145,7 +2289,7 @@ type commandError struct { } func (e *commandError) Error() string { - return fmt.Sprintf("%s: %q", e.Err.Error(), e.Output) + return e.Err.Error() } func isRemoteAlreadyExists(err error) bool { @@ -2199,7 +2343,7 @@ func invocationURL(invocationID string) string { return urlPrefix + invocationID } -func writeBazelrc(path, invocationID, rootDir string) error { +func writeBazelrc(path, invocationID, runID, rootDir string) error { if err := os.MkdirAll(filepath.Dir(path), 0755); err != nil { return err } @@ -2211,6 +2355,7 @@ func writeBazelrc(path, invocationID, rootDir string) error { lines := []string{ "build --build_metadata=PARENT_INVOCATION_ID=" + invocationID, + "build --build_metadata=PARENT_RUN_ID=" + runID, // Note: these pieces of metadata are set to match the WorkspaceStatus event // for the outer (workflow) invocation. "build --build_metadata=COMMIT_SHA=" + *commitSHA, @@ -2532,7 +2677,7 @@ func runCredentialHelper() error { func runBazelWrapper() error { rootPath := os.Getenv("CI_RUNNER_ROOT") - bazelCmd := os.Getenv("BAZEL_BIN") + bazelBin := os.Getenv("BAZEL_BIN") // These arguments are passed as env vars so we don't have to parse out flags // intended for the bazel wrapper from startup options intended to be passed through @@ -2552,16 +2697,31 @@ func runBazelWrapper() error { return fmt.Errorf("find bazel workspace: %w", err) } - args := os.Args[1:] + bazelCmd := os.Args[1:] startupArgs, err := customBazelrcOptions(rootPath, workspacePath) if err != nil { return err } - args = append([]string{bazelCmd}, append(startupArgs, args...)...) + + // Users can request to build a target on the remote runner and run them locally. + // To support this, have Bazel write out a run script using the --script_path flag and + // extract run options (i.e. args, runfile information) from the generated run script. + if *recordRunMetadata && len(bazelCmd) > 0 && bazelCmd[0] == "run" { + runScriptDir := filepath.Join(rootPath, runScriptDirName) + if err := os.MkdirAll(runScriptDir, 0755); err != nil { + return err + } + + runScriptPath := filepath.Join(runScriptDir, "run.sh") + bazelCmd = appendBazelSubcommandArgs(bazelCmd, "--script_path="+runScriptPath) + log.Warningf("Just added flag, new cmd %s", bazelCmd) + } + + bazelCmd = append([]string{bazelBin}, append(startupArgs, bazelCmd...)...) // Replace the process running the bazel wrapper with the process running bazel, // so there are no remaining traces of the wrapper script. - return syscall.Exec(bazelCmd, args, os.Environ()) + return syscall.Exec(bazelBin, bazelCmd, os.Environ()) } // Attempts to free up disk space. diff --git a/enterprise/server/test/integration/ci_runner/ci_runner_test.go b/enterprise/server/test/integration/ci_runner/ci_runner_test.go index c0c0ab90cf3..d24dacb820b 100644 --- a/enterprise/server/test/integration/ci_runner/ci_runner_test.go +++ b/enterprise/server/test/integration/ci_runner/ci_runner_test.go @@ -1649,8 +1649,31 @@ func TestArtifactUploads_GRPCLog(t *testing.T) { } func TestArtifactUploads_JVMLog(t *testing.T) { + workspaceSimulateOOM := map[string]string{ + "WORKSPACE": `workspace(name = "test")`, + "BUILD": ` +sh_test(name = "simulate_oom", srcs = ["simulate_oom.sh"]) +`, + "simulate_oom.sh": ` +output_base="$1" +echo "java.lang.OutOfMemoryError" > "$output_base/server/jvm.out" +exit 37 +`, + "buildbuddy.yaml": ` +actions: + - name: "Test" + triggers: + pull_request: { branches: [ master ] } + push: { branches: [ master ] } + steps: + - run: | + output_base=$(bazel info output_base) + bazel run :simulate_oom "$output_base" +`, + } + wsPath := testfs.MakeTempDir(t) - repoPath, headCommitSHA := makeGitRepo(t, workspaceContentsWithArtifactUploads) + repoPath, headCommitSHA := makeGitRepo(t, workspaceSimulateOOM) runnerFlags := []string{ "--workflow_id=test-workflow", @@ -1661,8 +1684,6 @@ func TestArtifactUploads_JVMLog(t *testing.T) { "--commit_sha=" + headCommitSHA, "--target_repo_url=file://" + repoPath, "--target_branch=master", - // Set a small JVM memory limit to cause Bazel to OOM. - "--bazel_startup_flags=" + bazelStartupFlags + " --host_jvm_args=-Xmx5m", } // Start the app so the runner can use it as the BES+cache backend. app := buildbuddy.Run(t) diff --git a/enterprise/server/test/integration/remote_bazel/BUILD b/enterprise/server/test/integration/remote_bazel/BUILD index 348b65907d6..f97e4858d5b 100644 --- a/enterprise/server/test/integration/remote_bazel/BUILD +++ b/enterprise/server/test/integration/remote_bazel/BUILD @@ -26,6 +26,7 @@ go_test( ], deps = [ "//cli/remotebazel", + "//enterprise/server/execution_service", "//enterprise/server/hostedrunner", "//enterprise/server/invocation_search_service", "//enterprise/server/test/integration/remote_execution/rbetest", diff --git a/enterprise/server/test/integration/remote_bazel/remote_bazel_test.go b/enterprise/server/test/integration/remote_bazel/remote_bazel_test.go index 7f2d5b5e2c0..e8098d83188 100644 --- a/enterprise/server/test/integration/remote_bazel/remote_bazel_test.go +++ b/enterprise/server/test/integration/remote_bazel/remote_bazel_test.go @@ -10,6 +10,7 @@ import ( "time" "github.com/buildbuddy-io/buildbuddy/cli/remotebazel" + "github.com/buildbuddy-io/buildbuddy/enterprise/server/execution_service" "github.com/buildbuddy-io/buildbuddy/enterprise/server/hostedrunner" "github.com/buildbuddy-io/buildbuddy/enterprise/server/invocation_search_service" "github.com/buildbuddy-io/buildbuddy/enterprise/server/test/integration/remote_execution/rbetest" @@ -243,6 +244,7 @@ func runLocalServerAndExecutor(t *testing.T, githubToken string) (*rbetest.Env, keyValStore, err := memory_kvstore.NewMemoryKeyValStore() require.NoError(t, err) e.SetKeyValStore(keyValStore) + e.SetExecutionService(execution_service.NewExecutionService(e)) }, }) diff --git a/proto/build_event_stream.proto b/proto/build_event_stream.proto index efeac342be0..b812de1124c 100644 --- a/proto/build_event_stream.proto +++ b/proto/build_event_stream.proto @@ -259,20 +259,22 @@ message BuildEventId { // run as part of a BuildBuddy workflow. message WorkflowConfiguredId {} - // Identifier of an event providing the status of a completed invocation + // Identifier of an event providing the status of a completed step // within a BuildBuddy workflow. // - // A workflow action can specify multiple bazel commands, creating a - // "sub-invocation" for each one. There will be one of these events generated - // for each of those Bazel commands, where the invocation ID here refers to - // the invocation ID of the Bazel command that was completed. + // A workflow action can specify multiple steps, each of which is a block of + // bash code. This event will be emitted when the entire block has completed. + // + // For each bazel command invoked in the step, a ChildInvocationsConfigured + // and ChildInvocationCompleted event will also be emitted. + // TODO(Maggie): Once FE references have been cleaned up, rename to + // `RemoteRunnerStepCompleted` message WorkflowCommandCompletedId { - // Invocation ID of the command that was completed. - string invocation_id = 1; + reserved 1; } - // Identifier of an event providing a list of commands that are - // run as children of this invocation. + // Identifier of an event indicating that a child invocation of this + // invocation has been invoked. message ChildInvocationsConfiguredId {} // Identifier of an event providing the status of a completed child @@ -1514,8 +1516,9 @@ message WorkflowConfigured { reserved 13; } -// Event describing a workflow command that completed. -// Note: the event ID holds the invocation ID that identifies the command. +// Event describing a workflow step that completed. +// This differs from ChildInvocationCompleted because each step can contain +// multiple child invocations. // Next Tag: 6 message WorkflowCommandCompleted { // The overall status of the command. A command was successful if and only if diff --git a/proto/invocation.proto b/proto/invocation.proto index 8ac29c39784..c32059bac62 100644 --- a/proto/invocation.proto +++ b/proto/invocation.proto @@ -153,6 +153,14 @@ message Invocation { // remote bazel invocation runs bazel commands, each of those is a child // invocation). repeated Invocation child_invocations = 36; + + // A unique ID for each run of an invocation. If the invocation is retried, + // this should change with each retry. + string run_id = 37; + + // If set, the run ID of the parent invocation that invoked this invocation + // (i.e. a workflow or remote bazel invocation). + string parent_run_id = 38; } message InvocationEvent { diff --git a/server/backends/invocationdb/invocationdb.go b/server/backends/invocationdb/invocationdb.go index d0519848cda..c8ff4bcc2c4 100644 --- a/server/backends/invocationdb/invocationdb.go +++ b/server/backends/invocationdb/invocationdb.go @@ -181,9 +181,9 @@ func (d *InvocationDB) LookupInvocation(ctx context.Context, invocationID string return ti, nil } -func (d *InvocationDB) LookupChildInvocations(ctx context.Context, parentInvocationID string) ([]*tables.Invocation, error) { +func (d *InvocationDB) LookupChildInvocations(ctx context.Context, parentRunID string) ([]*tables.Invocation, error) { rq := d.h.NewQuery(ctx, "invocationdb_get_child_invocations").Raw( - `SELECT * FROM "Invocations" WHERE parent_invocation_id = ? ORDER BY created_at_usec`, parentInvocationID) + `SELECT * FROM "Invocations" WHERE parent_run_id = ? ORDER BY created_at_usec`, parentRunID) return db.ScanAll(rq, &tables.Invocation{}) } @@ -356,5 +356,7 @@ func TableInvocationToProto(i *tables.Invocation) *inpb.Invocation { // claims the tags are. out.Tags, _ = invocation_format.SplitAndTrimAndDedupeTags(i.Tags, false) out.ParentInvocationId = i.ParentInvocationID + out.ParentRunId = i.ParentRunID + out.RunId = i.RunID return out } diff --git a/server/build_event_protocol/build_event_handler/build_event_handler.go b/server/build_event_protocol/build_event_handler/build_event_handler.go index 648b7e4eac2..049901115a6 100644 --- a/server/build_event_protocol/build_event_handler/build_event_handler.go +++ b/server/build_event_protocol/build_event_handler/build_event_handler.go @@ -719,6 +719,14 @@ func isWorkspaceStatusEvent(bazelBuildEvent *build_event_stream.BuildEvent) bool return false } +func isChildInvocationsConfiguredEvent(bazelBuildEvent *build_event_stream.BuildEvent) bool { + switch bazelBuildEvent.Payload.(type) { + case *build_event_stream.BuildEvent_ChildInvocationsConfigured: + return true + } + return false +} + func readBazelEvent(obe *pepb.OrderedBuildEvent, out *build_event_stream.BuildEvent) error { switch buildEvent := obe.Event.Event.(type) { case *bepb.BuildEvent_BazelEvent: @@ -1143,11 +1151,11 @@ func (e *EventChannel) processSingleEvent(event *inpb.InvocationEvent, iid strin return err } - // Small optimization: Flush the event stream after the workspace status event. Most of the - // command line options and workspace info has come through by then, so we have - // something to show the user. Flushing the proto file here allows that when the - // client fetches status for the incomplete build. Also flush if we haven't in over a minute. - if isWorkspaceStatusEvent(event.BuildEvent) || e.pw.TimeSinceLastWrite().Minutes() > 1 { + // Small optimization: For certain event types, flush the event stream + // immediately to show things to the user faster when fetching status + // of an incomplete build. + /// Also flush if we haven't in over a minute. + if shouldFlushImmediately(event.BuildEvent) || e.pw.TimeSinceLastWrite().Minutes() > 1 { if err := e.pw.Flush(e.ctx); err != nil { return err } @@ -1167,6 +1175,16 @@ func (e *EventChannel) processSingleEvent(event *inpb.InvocationEvent, iid strin return nil } +func shouldFlushImmediately(bazelBuildEvent *build_event_stream.BuildEvent) bool { + // Workspace status event: Most of the command line options and workspace info + // has come through by then, so we have a good amount of info to show the user + // about the in-progress build + // + // Child invocations configured event: If a child invocation starts, flush + // the event stream so we can link to the child invocation in the UI + return isWorkspaceStatusEvent(bazelBuildEvent) || isChildInvocationsConfiguredEvent(bazelBuildEvent) +} + const apiFacetsExpiration = 1 * time.Hour func (e *EventChannel) flushAPIFacets(iid string) error { @@ -1527,6 +1545,8 @@ func (e *EventChannel) tableInvocationFromProto(p *inpb.Invocation, blobID strin } i.Tags = tags i.ParentInvocationID = p.ParentInvocationId + i.ParentRunID = p.ParentRunId + i.RunID = p.RunId userGroupPerms, err := perms.ForAuthenticatedGroup(e.ctx, e.env) if err != nil { diff --git a/server/build_event_protocol/event_parser/event_parser.go b/server/build_event_protocol/event_parser/event_parser.go index 6813a6e3be3..c2143c24bcf 100644 --- a/server/build_event_protocol/event_parser/event_parser.go +++ b/server/build_event_protocol/event_parser/event_parser.go @@ -115,7 +115,9 @@ type fieldPriorities struct { Command, Pattern, Tags, - ParentInvocationId int + ParentInvocationId, + ParentRunId, + RunId int } func NewStreamingEventParser(invocation *inpb.Invocation) *StreamingEventParser { @@ -435,6 +437,12 @@ func (sep *StreamingEventParser) fillInvocationFromBuildMetadata(metadata map[st if parentInvocationId, ok := metadata["PARENT_INVOCATION_ID"]; ok && parentInvocationId != "" { sep.setParentInvocationId(parentInvocationId, priority) } + if parentRunId, ok := metadata["PARENT_RUN_ID"]; ok && parentRunId != "" { + sep.setParentRunId(parentRunId, priority) + } + if runId, ok := metadata["RUN_ID"]; ok && runId != "" { + sep.setRunId(runId, priority) + } return nil } @@ -521,3 +529,17 @@ func (sep *StreamingEventParser) setParentInvocationId(value string, priority in sep.invocation.ParentInvocationId = value } } + +func (sep *StreamingEventParser) setParentRunId(value string, priority int) { + if sep.priority.ParentRunId <= priority { + sep.priority.ParentRunId = priority + sep.invocation.ParentRunId = value + } +} + +func (sep *StreamingEventParser) setRunId(value string, priority int) { + if sep.priority.RunId <= priority { + sep.priority.RunId = priority + sep.invocation.RunId = value + } +} diff --git a/server/buildbuddy_server/buildbuddy_server.go b/server/buildbuddy_server/buildbuddy_server.go index 12a30ca866a..f0d8fd691b0 100644 --- a/server/buildbuddy_server/buildbuddy_server.go +++ b/server/buildbuddy_server/buildbuddy_server.go @@ -141,8 +141,9 @@ func (s *BuildBuddyServer) GetInvocation(ctx context.Context, req *inpb.GetInvoc } } - if req.GetLookup().GetFetchChildInvocations() { - children, err := s.env.GetInvocationDB().LookupChildInvocations(ctx, inv.GetInvocationId()) + // Fetch children by run ID so that we don't fetch children from earlier retries + if req.GetLookup().GetFetchChildInvocations() && inv.GetRunId() != "" { + children, err := s.env.GetInvocationDB().LookupChildInvocations(ctx, inv.GetRunId()) if err != nil { return nil, err } diff --git a/server/interfaces/interfaces.go b/server/interfaces/interfaces.go index e8f6c08df4e..fbf2325c4f4 100644 --- a/server/interfaces/interfaces.go +++ b/server/interfaces/interfaces.go @@ -415,7 +415,7 @@ type InvocationDB interface { LookupGroupFromInvocation(ctx context.Context, invocationID string) (*tables.Group, error) LookupGroupIDFromInvocation(ctx context.Context, invocationID string) (string, error) LookupExpiredInvocations(ctx context.Context, cutoffTime time.Time, limit int) ([]*tables.Invocation, error) - LookupChildInvocations(ctx context.Context, parentInvocationID string) ([]*tables.Invocation, error) + LookupChildInvocations(ctx context.Context, parentRunID string) ([]*tables.Invocation, error) DeleteInvocation(ctx context.Context, invocationID string) error DeleteInvocationWithPermsCheck(ctx context.Context, authenticatedUser *UserInfo, invocationID string) error FillCounts(ctx context.Context, log *telpb.TelemetryStat) error diff --git a/server/tables/tables.go b/server/tables/tables.go index 4c6e131cd38..a2348aa01b8 100644 --- a/server/tables/tables.go +++ b/server/tables/tables.go @@ -157,6 +157,7 @@ type Invocation struct { InvocationUUID []byte `gorm:"size:16;default:NULL;uniqueIndex:invocation_invocation_uuid;unique"` Success bool Attempt uint64 `gorm:"not null;default:0"` + RunID string BazelExitCode string // The user-specified setting of how to download outputs from remote cache. @@ -173,6 +174,7 @@ type Invocation struct { Tags string ParentInvocationID string `gorm:"index:parent_invocation_id_index"` + ParentRunID string `gorm:"index:parent_run_id_index"` } func (i *Invocation) TableName() string { diff --git a/server/util/clickhouse/schema/schema.go b/server/util/clickhouse/schema/schema.go index 6f10286fd5a..26c31305442 100644 --- a/server/util/clickhouse/schema/schema.go +++ b/server/util/clickhouse/schema/schema.go @@ -117,6 +117,8 @@ type Invocation struct { RemoteExecutionEnabled bool Tags []string `gorm:"type:Array(String);"` ParentInvocationUUID string + RunID string + ParentRunID string } func (i *Invocation) ExcludedFields() []string { @@ -479,5 +481,7 @@ func ToInvocationFromPrimaryDB(ti *tables.Invocation) (*Invocation, error) { RemoteExecutionEnabled: ti.RemoteExecutionEnabled, Tags: invocation_format.ConvertDBTagsToOLAP(ti.Tags), ParentInvocationUUID: parentInvocationUUID, + RunID: ti.RunID, + ParentRunID: ti.ParentRunID, }, nil }