Skip to content

feat: improve RAM estimation by using values from summary #5525

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
Jun 5, 2025
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
34 changes: 21 additions & 13 deletions pkg/xsysinfo/gguf.go
Original file line number Diff line number Diff line change
@@ -1,8 +1,6 @@
package xsysinfo

import (
"errors"

gguf "github.com/gpustack/gguf-parser-go"
)

Expand All @@ -18,35 +16,45 @@
func EstimateGGUFVRAMUsage(f *gguf.GGUFFile, availableVRAM uint64) (*VRAMEstimate, error) {
// Get model metadata
m := f.Metadata()
a := f.Architecture()

estimate := f.EstimateLLaMACppRun()

lmes := estimate.SummarizeItem(true, 0, 0)
estimatedVRAM := uint64(0)
availableLayers := lmes.OffloadLayers // TODO: check if we can just use OffloadLayers here

for _, vram := range lmes.VRAMs {
estimatedVRAM += uint64(vram.NonUMA)
}

// Calculate base model size
modelSize := uint64(m.Size)

if a.BlockCount == 0 {
return nil, errors.New("block count is 0")
if availableLayers == 0 {
availableLayers = 1
}

if estimatedVRAM == 0 {
estimatedVRAM = 1
}

// Estimate number of layers that can fit in VRAM
// Each layer typically requires about 1/32 of the model size
layerSize := modelSize / uint64(a.BlockCount)
estimatedLayers := int(availableVRAM / layerSize)
layerSize := estimatedVRAM / availableLayers

// If we can't fit even one layer, we need to do full offload
isFullOffload := estimatedLayers <= 0
if isFullOffload {
estimatedLayers = 0
estimatedLayers := int(availableVRAM / layerSize)

Check failure

Code scanning / gosec

integer overflow conversion uint64 -> int Error

integer overflow conversion uint64 -> int
if availableVRAM > estimatedVRAM {
estimatedLayers = int(availableLayers)

Check failure

Code scanning / gosec

integer overflow conversion uint64 -> int Error

integer overflow conversion uint64 -> int
}

// Calculate estimated VRAM usage
estimatedVRAM := uint64(estimatedLayers) * layerSize

return &VRAMEstimate{
TotalVRAM: availableVRAM,
AvailableVRAM: availableVRAM,
ModelSize: modelSize,
EstimatedLayers: estimatedLayers,
EstimatedVRAM: estimatedVRAM,
IsFullOffload: isFullOffload,
IsFullOffload: availableVRAM > estimatedVRAM,
}, nil
}
Loading