Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix image libintern #20

Merged
merged 4 commits into from
Oct 11, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions .github/test.sh
Original file line number Diff line number Diff line change
Expand Up @@ -47,7 +47,7 @@ fluxnetes_scheduler=$(kubectl get pods --selector=job-name=job -o json | jq -r .

echo
echo "Fluxnetes job pod is ${fluxnetes_job_pod}"
sleep 10
sleep 30

# Shared function to check output
function check_output {
Expand Down Expand Up @@ -75,4 +75,4 @@ check_output 'check-scheduled-by' "${fluxnetes_scheduler}" "fluxnetes"
# But events tell us actually what happened, let's parse throught them and find our pods
# This tells us the Event -> reason "Scheduled" and who it was reported by.
reported_by=$(kubectl events --for pod/${fluxnetes_job_pod} -o json | jq -c '[ .items[] | select( .reason | contains("Scheduled")) ]' | jq -r .[0].reportingComponent)
check_output 'reported-by-fluxnetes' "${reported_by}" "fluxnetes"
check_output 'reported-by-fluxnetes' "${reported_by}" "fluxnetes"
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -172,11 +172,11 @@ SELECT group_name, group_size from pods_provisional;

### TODO

- [ ] kubectl plugin to get fluxion state?
- [ ] Figure out how In-tree registry plugins (that are related to resources) should be run to inform fluxion
- we likely want to move assume pod outside of that schedule function, or ensure pod passed matches.
- [ ] Optimize queries.
- [ ] Restarting with postgres shouldn't have crashloopbackoff when the database isn't ready yet
- [ ] need to cancel reservations and clear table at end of cycle
- [ ] The queue should inherit (and return) the start time (when the pod was first seen) "start" in scheduler.go
- Testing:
- [ ] need to test duration / completion time works (run job with short duration, should be cancelled/cleaned up)
Expand Down
4 changes: 2 additions & 2 deletions src/Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ INSTALL_PREFIX ?= /usr
LIB_PREFIX ?= /usr/lib
LOCALBIN ?= $(shell pwd)/bin
COMMONENVVAR=GOOS=$(shell uname -s | tr A-Z a-z)
BUILDENVVAR=CGO_CFLAGS="-I${FLUX_SCHED_ROOT} -I${FLUX_SCHED_ROOT}/resource/reapi/bindings/c" CGO_LDFLAGS="-L${LIB_PREFIX} -L${LIB_PREFIX}/flux -L${FLUX_SCHED_ROOT}/resource/reapi/bindings -lreapi_cli -lflux-idset -lstdc++ -ljansson -lhwloc -lboost_system -lflux-hostlist -lboost_graph -lyaml-cpp"
BUILDENVVAR=CGO_CFLAGS="-I${FLUX_SCHED_ROOT} -I${FLUX_SCHED_ROOT}/resource/reapi/bindings/c" CGO_LDFLAGS="-L${LIB_PREFIX} -L${LIB_PREFIX}/flux -L${FLUX_SCHED_ROOT}/resource/reapi/bindings -lreapi_cli -lflux-idset -lstdc++ -ljansson -lhwloc -lflux-hostlist -lboost_graph -lyaml-cpp"


LOCAL_REGISTRY=localhost:5000
Expand Down Expand Up @@ -35,4 +35,4 @@ protoc: $(LOCALBIN)
.PHONY: proto
proto: protoc
PATH=$(LOCALBIN):${PATH} protoc --go_out=. --go_opt=paths=source_relative --go-grpc_out=. --go-grpc_opt=paths=source_relative fluxnetes/pkg/fluxion-grpc/fluxion.proto
PATH=$(LOCALBIN):${PATH} protoc --go_out=. --go_opt=paths=source_relative --go-grpc_out=. --go-grpc_opt=paths=source_relative fluxnetes/pkg/service-grpc/service.proto
PATH=$(LOCALBIN):${PATH} protoc --go_out=. --go_opt=paths=source_relative --go-grpc_out=. --go-grpc_opt=paths=source_relative fluxnetes/pkg/service-grpc/service.proto
14 changes: 3 additions & 11 deletions src/build/scheduler/Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -31,19 +31,11 @@ RUN go mod tidy && \
make server FLUX_SCHED_ROOT=/opt/flux-sched

# minimize build!
FROM ubuntu:jammy
FROM fluxrm/flux-sched:jammy
COPY --from=builder /go/src/fluxnetes/bin/server /bin/fluxion-service
COPY --from=builder /usr/lib/flux/ /usr/lib/flux
COPY --from=builder /usr/lib/libflux* /usr/lib/

RUN apt-get update && apt-get -qq install -y --no-install-recommends \
libboost-graph-dev \
libboost-system-dev \
libboost-filesystem-dev \
libboost-regex-dev \
libyaml-cpp-dev \
libjansson-dev \
hwloc && \
apt-get clean && \
mkdir -p /home/data/jobspecs /home/data/jgf && chmod -R ugo+rwx /home/data
USER root
RUN mkdir -p /home/data/jobspecs /home/data/jgf && chmod -R ugo+rwx /home/data
ENV LD_LIBRARY_PATH=/usr/local/lib:/usr/lib:/usr/lib/flux
2 changes: 1 addition & 1 deletion src/fluxnetes/go.mod
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@ module github.com/converged-computing/fluxnetes
go 1.21

require (
github.com/flux-framework/fluxion-go v0.32.1-0.20240420052153-909523c84ca2
github.com/flux-framework/fluxion-go v0.39.0
github.com/stretchr/testify v1.7.0
google.golang.org/grpc v1.38.0
google.golang.org/protobuf v1.26.0
Expand Down
4 changes: 2 additions & 2 deletions src/fluxnetes/go.sum
Original file line number Diff line number Diff line change
Expand Up @@ -98,8 +98,8 @@ github.com/exponent-io/jsonpath v0.0.0-20151013193312-d6023ce2651d/go.mod h1:ZZM
github.com/fatih/camelcase v1.0.0/go.mod h1:yN2Sb0lFhZJUdVvtELVWefmrXpuZESvPmqwoZc+/fpc=
github.com/fatih/color v1.7.0/go.mod h1:Zm6kSWBoL9eyXnKyktHP6abPY2pDugNf5KwzbycvMj4=
github.com/felixge/httpsnoop v1.0.1/go.mod h1:m8KPJKqk1gH5J9DgRY2ASl2lWCfGKXixSwevea8zH2U=
github.com/flux-framework/fluxion-go v0.32.1-0.20240420052153-909523c84ca2 h1:Yz/vVX0XfB2q51ZLh2p8YI5vphvv0rZF4PqtKPscvsY=
github.com/flux-framework/fluxion-go v0.32.1-0.20240420052153-909523c84ca2/go.mod h1:jA5+kOSLxchFzixzYEvMAGjkXB5yszO/HxUwdhX/5/U=
github.com/flux-framework/fluxion-go v0.39.0 h1:f68CTxHouyOvjfgu5YKYFHQ405vxtdSlG8crPph8+DU=
github.com/flux-framework/fluxion-go v0.39.0/go.mod h1:jA5+kOSLxchFzixzYEvMAGjkXB5yszO/HxUwdhX/5/U=
github.com/form3tech-oss/jwt-go v3.2.2+incompatible/go.mod h1:pbq4aXjuKjdthFRnoDwaVPLA+WlJuPGy+QneDUgJi2k=
github.com/form3tech-oss/jwt-go v3.2.3+incompatible/go.mod h1:pbq4aXjuKjdthFRnoDwaVPLA+WlJuPGy+QneDUgJi2k=
github.com/fsnotify/fsnotify v1.4.7/go.mod h1:jwhsz4b93w/PPRr/qN1Yymfu8t87LnFCMoQvtojpjFo=
Expand Down
8 changes: 3 additions & 5 deletions src/fluxnetes/pkg/jgf/jgf.go
Original file line number Diff line number Diff line change
Expand Up @@ -97,11 +97,9 @@ func (g *FluxJGF) MakeBidirectionalEdge(parent, child string) {
// MakeEdge creates an edge for the JGF
func (g *FluxJGF) MakeEdge(source string, target string, contains string) {
newedge := edge{
Source: source,
Target: target,
Metadata: edgeMetadata{
Name: map[string]string{containmentKey: contains},
},
Source: source,
Target: target,
Metadata: edgeMetadata{Subsystem: containmentKey},
}
g.Graph.Edges = append(g.Graph.Edges, newedge)
}
Expand Down
8 changes: 4 additions & 4 deletions src/fluxnetes/pkg/jgf/jgf_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -44,8 +44,8 @@ func TestNewFluxJGF(t *testing.T) {
fmt.Println(out)

// Add some nodes!
computeNodeA := fluxgraph.MakeNode("node", subnetNodeA.Metadata.Name, 0)
computeNodeB := fluxgraph.MakeNode("node", subnetNodeB.Metadata.Name, 1)
computeNodeA := fluxgraph.MakeNode("node", subnetNodeA.Metadata.Type, 0)
computeNodeB := fluxgraph.MakeNode("node", subnetNodeB.Metadata.Type, 1)
fluxgraph.MakeBidirectionalEdge(subnetNodeA.Id, computeNodeA.Id)
fluxgraph.MakeBidirectionalEdge(subnetNodeB.Id, computeNodeB.Id)

Expand All @@ -56,11 +56,11 @@ func TestNewFluxJGF(t *testing.T) {
fmt.Println(out)

// Add a GPU to one, and cores to the other
subpath := fmt.Sprintf("%s/%s", subnetNodeA.Metadata.Name, computeNodeA.Metadata.Name)
subpath := fmt.Sprintf("%s/%s", subnetNodeA.Metadata.Type, computeNodeA.Metadata.Type)
gpuNodeA := fluxgraph.MakeGPU(NvidiaGPU, subpath, 1, 0)
fluxgraph.MakeBidirectionalEdge(computeNodeA.Id, gpuNodeA.Id)

subpath = fmt.Sprintf("%s/%s", subnetNodeB.Metadata.Name, computeNodeB.Metadata.Name)
subpath = fmt.Sprintf("%s/%s", subnetNodeB.Metadata.Type, computeNodeB.Metadata.Type)
coreNode := fluxgraph.MakeCore(CoreType, subpath, 0)
fluxgraph.MakeBidirectionalEdge(computeNodeB.Id, coreNode.Id)

Expand Down
2 changes: 1 addition & 1 deletion src/fluxnetes/pkg/jgf/types.go
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,7 @@ type edge struct {
}

type edgeMetadata struct {
Name map[string]string `json:"name,omitempty"`
Subsystem string `json:"subsystem"`
}

type nodeMetadata struct {
Expand Down
8 changes: 3 additions & 5 deletions src/fluxnetes/pkg/utils/utils.go
Original file line number Diff line number Diff line change
Expand Up @@ -256,7 +256,6 @@ func computeTotalRequests(podList *corev1.PodList) map[corev1.ResourceName]resou

type allocation struct {
Type string
Name string
Basename string
CoreCount int
}
Expand Down Expand Up @@ -290,7 +289,6 @@ func ParseAllocResult(allocated, groupName string) []allocation {
if metadata["type"].(string) == jgf.NodeType {
result = append(result, allocation{
Type: metadata["type"].(string),
Name: metadata["name"].(string),
Basename: metadata["basename"].(string),
CoreCount: corecount,
})
Expand All @@ -301,9 +299,9 @@ func ParseAllocResult(allocated, groupName string) []allocation {
}
fmt.Printf("Final node result for %s\n", groupName)
for i, alloc := range result {
fmt.Printf("Node %d: %s\n", i, alloc.Name)
fmt.Printf(" Type: %s\n Name: %s\n Basename: %s\n CoreCount: %d\n",
alloc.Type, alloc.Name, alloc.Basename, alloc.CoreCount)
fmt.Printf("Node %d: %s\n", i, alloc.Basename)
fmt.Printf(" Type: %s\n Basename: %s\n CoreCount: %d\n",
alloc.Type, alloc.Basename, alloc.CoreCount)

}
return result
Expand Down
Loading