Skip to content

Commit

Permalink
Backend for getting logs of a trial
Browse files Browse the repository at this point in the history
  • Loading branch information
d-gol committed Nov 25, 2022
1 parent 0a1cb31 commit b6dacbd
Show file tree
Hide file tree
Showing 3 changed files with 103 additions and 0 deletions.
1 change: 1 addition & 0 deletions cmd/new-ui/v1beta1/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -67,6 +67,7 @@ func main() {
http.HandleFunc("/katib/edit_template/", kuh.EditTemplate)
http.HandleFunc("/katib/delete_template/", kuh.DeleteTemplate)
http.HandleFunc("/katib/fetch_namespaces", kuh.FetchNamespaces)
http.HandleFunc("/katib/fetch_trial_logs/", kuh.FetchTrialLogs)

log.Printf("Serving at %s:%s", *host, *port)
if err := http.ListenAndServe(fmt.Sprintf("%s:%s", *host, *port), nil); err != nil {
Expand Down
8 changes: 8 additions & 0 deletions manifests/v1beta1/components/ui/rbac.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,14 @@ rules:
- suggestions
verbs:
- "*"
- apiGroups:
- ""
resources:
- pods
- pods/log
verbs:
- list
- get
---
apiVersion: v1
kind: ServiceAccount
Expand Down
94 changes: 94 additions & 0 deletions pkg/new-ui/v1beta1/backend.go
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,10 @@ limitations under the License.
package v1beta1

import (
"bytes"
"context"
"encoding/json"
"io"
"log"
"net/http"
"path/filepath"
Expand All @@ -27,8 +30,15 @@ import (
"sigs.k8s.io/controller-runtime/pkg/client"

experimentv1beta1 "github.com/kubeflow/katib/pkg/apis/controller/experiments/v1beta1"
trialsv1beta1 "github.com/kubeflow/katib/pkg/apis/controller/trials/v1beta1"
api_pb_v1beta1 "github.com/kubeflow/katib/pkg/apis/manager/v1beta1"
"github.com/kubeflow/katib/pkg/util/v1beta1/katibclient"

apiv1 "k8s.io/api/core/v1"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/apimachinery/pkg/types"
corev1 "k8s.io/client-go/kubernetes/typed/core/v1"
"sigs.k8s.io/controller-runtime/pkg/client/config"
)

func NewKatibUIHandler(dbManagerAddr string) *KatibUIHandler {
Expand Down Expand Up @@ -421,3 +431,87 @@ func (k *KatibUIHandler) FetchTrial(w http.ResponseWriter, r *http.Request) {
return
}
}

// FetchTrialLogs fetches logs for a trial in specific namespace.
func (k *KatibUIHandler) FetchTrialLogs(w http.ResponseWriter, r *http.Request) {
log.Printf("Requesting logs")

trialName := r.URL.Query()["trialName"][0]
namespace := r.URL.Query()["namespace"][0]
log.Printf("Requesting logs")

logs, err := getTrialLogs(k, trialName, namespace)
if err != nil {
log.Printf("GetLogs failed: %v", err)
http.Error(w, err.Error(), http.StatusInternalServerError)
return
}
response, err := json.Marshal(logs)
if err != nil {
log.Printf("Marshal logs failed: %v", err)
http.Error(w, err.Error(), http.StatusInternalServerError)
return
}
w.Write(response)
}

// GetTrialLogs returns logs of a master Pod for the given job name and namespace
func getTrialLogs(k *KatibUIHandler, trialName string, namespace string) (string, error) {
cfg, err := config.GetConfig()
if err != nil {
return "", err
}

clientset, err := corev1.NewForConfig(cfg)
if err != nil {
return "", err
}

trial := &trialsv1beta1.Trial{}
if err := k.katibClient.GetClient().Get(context.TODO(), types.NamespacedName{Name: trialName, Namespace: namespace}, trial); err != nil {
return "", err
}

selectionLabel := "training.kubeflow.org/job-name=" + trialName + ",training.kubeflow.org/job-role=master"
if trial.Spec.RunSpec.GetKind() == "Job" {
selectionLabel = "job-name=" + trialName
}

podList, err := clientset.Pods(namespace).List(context.Background(), metav1.ListOptions{LabelSelector: selectionLabel})
if err != nil {
return "", err
}

if len(podList.Items) == 0 {
message := `Logs for the trial could not be found.
Was 'retain: true' specified in the Experiment definition?
An example can be found here: https://github.com/kubeflow/katib/blob/master/examples/v1beta1/argo/argo-workflow.yaml#L33`

return message, nil
}

podLogOpts := apiv1.PodLogOptions{}
podLogOpts.Container = trial.Spec.PrimaryContainerName
for container := range podList.Items[0].Spec.Containers {
if podList.Items[0].Spec.Containers[container].Name == "metrics-logger-and-collector" {
podLogOpts.Container = "metrics-logger-and-collector"
break
}
}

req := clientset.Pods(namespace).GetLogs(podList.Items[0].Name, &podLogOpts)
podLogs, err := req.Stream(context.Background())
if err != nil {
return "", err
}
defer podLogs.Close()

buf := new(bytes.Buffer)
_, err = io.Copy(buf, podLogs)
if err != nil {
return "", err
}
str := buf.String()

return str, nil
}

0 comments on commit b6dacbd

Please sign in to comment.