Skip to content

Commit

Permalink
fix(initializer): do select backends that exist
Browse files Browse the repository at this point in the history
we were not checking if the binary exists before picking these up from
the asset dir.

Signed-off-by: Ettore Di Giacinto <[email protected]>
  • Loading branch information
mudler committed Jul 1, 2024
1 parent bd2f95c commit 0352bc3
Show file tree
Hide file tree
Showing 2 changed files with 32 additions and 9 deletions.
2 changes: 2 additions & 0 deletions Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -282,6 +282,8 @@ COPY --from=grpc /opt/grpc /usr/local

# Rebuild with defaults backends
WORKDIR /build

## Build the binary
RUN make build

RUN if [ ! -d "/build/sources/go-piper/piper-phonemize/pi/lib/" ]; then \
Expand Down
39 changes: 30 additions & 9 deletions pkg/model/initializers.go
Original file line number Diff line number Diff line change
Expand Up @@ -247,14 +247,23 @@ func selectGRPCProcess(backend, assetDir string, f16 bool) string {
}

if xsysinfo.HasCPUCaps(cpuid.AVX2) {
log.Info().Msgf("[%s] attempting to load with AVX2 variant", backend)
grpcProcess = backendPath(assetDir, LLamaCPPAVX2)
p := backendPath(assetDir, LLamaCPPAVX2)
if _, err := os.Stat(p); err == nil {
log.Info().Msgf("[%s] attempting to load with AVX2 variant", backend)
grpcProcess = p
}
} else if xsysinfo.HasCPUCaps(cpuid.AVX) {
log.Info().Msgf("[%s] attempting to load with AVX variant", backend)
grpcProcess = backendPath(assetDir, LLamaCPPAVX)
p := backendPath(assetDir, LLamaCPPAVX)
if _, err := os.Stat(p); err == nil {
log.Info().Msgf("[%s] attempting to load with AVX variant", backend)
grpcProcess = p
}
} else {
log.Info().Msgf("[%s] attempting to load with fallback variant", backend)
grpcProcess = backendPath(assetDir, LLamaCPPFallback)
p := backendPath(assetDir, LLamaCPPFallback)
if _, err := os.Stat(p); err == nil {
log.Info().Msgf("[%s] attempting to load with fallback variant", backend)
grpcProcess = p
}
}

return grpcProcess
Expand Down Expand Up @@ -511,11 +520,23 @@ func (ml *ModelLoader) GreedyLoader(opts ...Option) (grpc.Backend, error) {
}

if autoDetect && key == LLamaCPP && err != nil {
backendToUse := LLamaCPPFallback
// try as hard as possible to run the llama.cpp variants
backendToUse := ""
if xsysinfo.HasCPUCaps(cpuid.AVX2) {
backendToUse = LLamaCPPAVX2
if _, err := os.Stat(backendPath(o.assetDir, LLamaCPPAVX2)); err == nil {
backendToUse = LLamaCPPAVX2
}
} else if xsysinfo.HasCPUCaps(cpuid.AVX) {
backendToUse = LLamaCPPAVX
if _, err := os.Stat(backendPath(o.assetDir, LLamaCPPAVX2)); err == nil {
backendToUse = LLamaCPPAVX
}
} else {
if _, err := os.Stat(backendPath(o.assetDir, LLamaCPPFallback)); err == nil {
backendToUse = LLamaCPPFallback
} else {
// If we don't have a fallback, just skip fallback
continue
}
}

// Autodetection failed, try the fallback
Expand Down

0 comments on commit 0352bc3

Please sign in to comment.