Skip to content

Commit

Permalink
Fix image libintern (#20)
Browse files Browse the repository at this point in the history
* bug: name is removed from response

Problem: This is the same issue that hit fluence -
Name was removed from the Match response.
Solution: Remove it from here, since it is not
largely used.

Signed-off-by: vsoch <[email protected]>
  • Loading branch information
vsoch authored Oct 11, 2024
1 parent 0d577aa commit 4add14e
Show file tree
Hide file tree
Showing 10 changed files with 22 additions and 34 deletions.
4 changes: 2 additions & 2 deletions .github/test.sh
Original file line number Diff line number Diff line change
Expand Up @@ -47,7 +47,7 @@ fluxnetes_scheduler=$(kubectl get pods --selector=job-name=job -o json | jq -r .

echo
echo "Fluxnetes job pod is ${fluxnetes_job_pod}"
sleep 10
sleep 30

# Shared function to check output
function check_output {
Expand Down Expand Up @@ -75,4 +75,4 @@ check_output 'check-scheduled-by' "${fluxnetes_scheduler}" "fluxnetes"
# But events tell us actually what happened, let's parse throught them and find our pods
# This tells us the Event -> reason "Scheduled" and who it was reported by.
reported_by=$(kubectl events --for pod/${fluxnetes_job_pod} -o json | jq -c '[ .items[] | select( .reason | contains("Scheduled")) ]' | jq -r .[0].reportingComponent)
check_output 'reported-by-fluxnetes' "${reported_by}" "fluxnetes"
check_output 'reported-by-fluxnetes' "${reported_by}" "fluxnetes"
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -172,11 +172,11 @@ SELECT group_name, group_size from pods_provisional;

### TODO

- [ ] kubectl plugin to get fluxion state?
- [ ] Figure out how In-tree registry plugins (that are related to resources) should be run to inform fluxion
- we likely want to move assume pod outside of that schedule function, or ensure pod passed matches.
- [ ] Optimize queries.
- [ ] Restarting with postgres shouldn't have crashloopbackoff when the database isn't ready yet
- [ ] need to cancel reservations and clear table at end of cycle
- [ ] The queue should inherit (and return) the start time (when the pod was first seen) "start" in scheduler.go
- Testing:
- [ ] need to test duration / completion time works (run job with short duration, should be cancelled/cleaned up)
Expand Down
4 changes: 2 additions & 2 deletions src/Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ INSTALL_PREFIX ?= /usr
LIB_PREFIX ?= /usr/lib
LOCALBIN ?= $(shell pwd)/bin
COMMONENVVAR=GOOS=$(shell uname -s | tr A-Z a-z)
BUILDENVVAR=CGO_CFLAGS="-I${FLUX_SCHED_ROOT} -I${FLUX_SCHED_ROOT}/resource/reapi/bindings/c" CGO_LDFLAGS="-L${LIB_PREFIX} -L${LIB_PREFIX}/flux -L${FLUX_SCHED_ROOT}/resource/reapi/bindings -lreapi_cli -lflux-idset -lstdc++ -ljansson -lhwloc -lboost_system -lflux-hostlist -lboost_graph -lyaml-cpp"
BUILDENVVAR=CGO_CFLAGS="-I${FLUX_SCHED_ROOT} -I${FLUX_SCHED_ROOT}/resource/reapi/bindings/c" CGO_LDFLAGS="-L${LIB_PREFIX} -L${LIB_PREFIX}/flux -L${FLUX_SCHED_ROOT}/resource/reapi/bindings -lreapi_cli -lflux-idset -lstdc++ -ljansson -lhwloc -lflux-hostlist -lboost_graph -lyaml-cpp"


LOCAL_REGISTRY=localhost:5000
Expand Down Expand Up @@ -35,4 +35,4 @@ protoc: $(LOCALBIN)
.PHONY: proto
proto: protoc
PATH=$(LOCALBIN):${PATH} protoc --go_out=. --go_opt=paths=source_relative --go-grpc_out=. --go-grpc_opt=paths=source_relative fluxnetes/pkg/fluxion-grpc/fluxion.proto
PATH=$(LOCALBIN):${PATH} protoc --go_out=. --go_opt=paths=source_relative --go-grpc_out=. --go-grpc_opt=paths=source_relative fluxnetes/pkg/service-grpc/service.proto
PATH=$(LOCALBIN):${PATH} protoc --go_out=. --go_opt=paths=source_relative --go-grpc_out=. --go-grpc_opt=paths=source_relative fluxnetes/pkg/service-grpc/service.proto
14 changes: 3 additions & 11 deletions src/build/scheduler/Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -31,19 +31,11 @@ RUN go mod tidy && \
make server FLUX_SCHED_ROOT=/opt/flux-sched

# minimize build!
FROM ubuntu:jammy
FROM fluxrm/flux-sched:jammy
COPY --from=builder /go/src/fluxnetes/bin/server /bin/fluxion-service
COPY --from=builder /usr/lib/flux/ /usr/lib/flux
COPY --from=builder /usr/lib/libflux* /usr/lib/

RUN apt-get update && apt-get -qq install -y --no-install-recommends \
libboost-graph-dev \
libboost-system-dev \
libboost-filesystem-dev \
libboost-regex-dev \
libyaml-cpp-dev \
libjansson-dev \
hwloc && \
apt-get clean && \
mkdir -p /home/data/jobspecs /home/data/jgf && chmod -R ugo+rwx /home/data
USER root
RUN mkdir -p /home/data/jobspecs /home/data/jgf && chmod -R ugo+rwx /home/data
ENV LD_LIBRARY_PATH=/usr/local/lib:/usr/lib:/usr/lib/flux
2 changes: 1 addition & 1 deletion src/fluxnetes/go.mod
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@ module github.com/converged-computing/fluxnetes
go 1.21

require (
github.com/flux-framework/fluxion-go v0.32.1-0.20240420052153-909523c84ca2
github.com/flux-framework/fluxion-go v0.39.0
github.com/stretchr/testify v1.7.0
google.golang.org/grpc v1.38.0
google.golang.org/protobuf v1.26.0
Expand Down
4 changes: 2 additions & 2 deletions src/fluxnetes/go.sum
Original file line number Diff line number Diff line change
Expand Up @@ -98,8 +98,8 @@ github.com/exponent-io/jsonpath v0.0.0-20151013193312-d6023ce2651d/go.mod h1:ZZM
github.com/fatih/camelcase v1.0.0/go.mod h1:yN2Sb0lFhZJUdVvtELVWefmrXpuZESvPmqwoZc+/fpc=
github.com/fatih/color v1.7.0/go.mod h1:Zm6kSWBoL9eyXnKyktHP6abPY2pDugNf5KwzbycvMj4=
github.com/felixge/httpsnoop v1.0.1/go.mod h1:m8KPJKqk1gH5J9DgRY2ASl2lWCfGKXixSwevea8zH2U=
github.com/flux-framework/fluxion-go v0.32.1-0.20240420052153-909523c84ca2 h1:Yz/vVX0XfB2q51ZLh2p8YI5vphvv0rZF4PqtKPscvsY=
github.com/flux-framework/fluxion-go v0.32.1-0.20240420052153-909523c84ca2/go.mod h1:jA5+kOSLxchFzixzYEvMAGjkXB5yszO/HxUwdhX/5/U=
github.com/flux-framework/fluxion-go v0.39.0 h1:f68CTxHouyOvjfgu5YKYFHQ405vxtdSlG8crPph8+DU=
github.com/flux-framework/fluxion-go v0.39.0/go.mod h1:jA5+kOSLxchFzixzYEvMAGjkXB5yszO/HxUwdhX/5/U=
github.com/form3tech-oss/jwt-go v3.2.2+incompatible/go.mod h1:pbq4aXjuKjdthFRnoDwaVPLA+WlJuPGy+QneDUgJi2k=
github.com/form3tech-oss/jwt-go v3.2.3+incompatible/go.mod h1:pbq4aXjuKjdthFRnoDwaVPLA+WlJuPGy+QneDUgJi2k=
github.com/fsnotify/fsnotify v1.4.7/go.mod h1:jwhsz4b93w/PPRr/qN1Yymfu8t87LnFCMoQvtojpjFo=
Expand Down
8 changes: 3 additions & 5 deletions src/fluxnetes/pkg/jgf/jgf.go
Original file line number Diff line number Diff line change
Expand Up @@ -97,11 +97,9 @@ func (g *FluxJGF) MakeBidirectionalEdge(parent, child string) {
// MakeEdge creates an edge for the JGF
func (g *FluxJGF) MakeEdge(source string, target string, contains string) {
newedge := edge{
Source: source,
Target: target,
Metadata: edgeMetadata{
Name: map[string]string{containmentKey: contains},
},
Source: source,
Target: target,
Metadata: edgeMetadata{Subsystem: containmentKey},
}
g.Graph.Edges = append(g.Graph.Edges, newedge)
}
Expand Down
8 changes: 4 additions & 4 deletions src/fluxnetes/pkg/jgf/jgf_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -44,8 +44,8 @@ func TestNewFluxJGF(t *testing.T) {
fmt.Println(out)

// Add some nodes!
computeNodeA := fluxgraph.MakeNode("node", subnetNodeA.Metadata.Name, 0)
computeNodeB := fluxgraph.MakeNode("node", subnetNodeB.Metadata.Name, 1)
computeNodeA := fluxgraph.MakeNode("node", subnetNodeA.Metadata.Type, 0)
computeNodeB := fluxgraph.MakeNode("node", subnetNodeB.Metadata.Type, 1)
fluxgraph.MakeBidirectionalEdge(subnetNodeA.Id, computeNodeA.Id)
fluxgraph.MakeBidirectionalEdge(subnetNodeB.Id, computeNodeB.Id)

Expand All @@ -56,11 +56,11 @@ func TestNewFluxJGF(t *testing.T) {
fmt.Println(out)

// Add a GPU to one, and cores to the other
subpath := fmt.Sprintf("%s/%s", subnetNodeA.Metadata.Name, computeNodeA.Metadata.Name)
subpath := fmt.Sprintf("%s/%s", subnetNodeA.Metadata.Type, computeNodeA.Metadata.Type)
gpuNodeA := fluxgraph.MakeGPU(NvidiaGPU, subpath, 1, 0)
fluxgraph.MakeBidirectionalEdge(computeNodeA.Id, gpuNodeA.Id)

subpath = fmt.Sprintf("%s/%s", subnetNodeB.Metadata.Name, computeNodeB.Metadata.Name)
subpath = fmt.Sprintf("%s/%s", subnetNodeB.Metadata.Type, computeNodeB.Metadata.Type)
coreNode := fluxgraph.MakeCore(CoreType, subpath, 0)
fluxgraph.MakeBidirectionalEdge(computeNodeB.Id, coreNode.Id)

Expand Down
2 changes: 1 addition & 1 deletion src/fluxnetes/pkg/jgf/types.go
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,7 @@ type edge struct {
}

type edgeMetadata struct {
Name map[string]string `json:"name,omitempty"`
Subsystem string `json:"subsystem"`
}

type nodeMetadata struct {
Expand Down
8 changes: 3 additions & 5 deletions src/fluxnetes/pkg/utils/utils.go
Original file line number Diff line number Diff line change
Expand Up @@ -256,7 +256,6 @@ func computeTotalRequests(podList *corev1.PodList) map[corev1.ResourceName]resou

type allocation struct {
Type string
Name string
Basename string
CoreCount int
}
Expand Down Expand Up @@ -290,7 +289,6 @@ func ParseAllocResult(allocated, groupName string) []allocation {
if metadata["type"].(string) == jgf.NodeType {
result = append(result, allocation{
Type: metadata["type"].(string),
Name: metadata["name"].(string),
Basename: metadata["basename"].(string),
CoreCount: corecount,
})
Expand All @@ -301,9 +299,9 @@ func ParseAllocResult(allocated, groupName string) []allocation {
}
fmt.Printf("Final node result for %s\n", groupName)
for i, alloc := range result {
fmt.Printf("Node %d: %s\n", i, alloc.Name)
fmt.Printf(" Type: %s\n Name: %s\n Basename: %s\n CoreCount: %d\n",
alloc.Type, alloc.Name, alloc.Basename, alloc.CoreCount)
fmt.Printf("Node %d: %s\n", i, alloc.Basename)
fmt.Printf(" Type: %s\n Basename: %s\n CoreCount: %d\n",
alloc.Type, alloc.Basename, alloc.CoreCount)

}
return result
Expand Down

0 comments on commit 4add14e

Please sign in to comment.