fix: qwen2.5 vl rope (#13486)

* qwen25vl: bump max pixels

* qwen25vl: mrope

fix qwen2.5vl window

* qwen25vl: vision rope
This commit is contained in:
Michael Yang
2025-12-15 17:30:33 -08:00
committed by GitHub
parent ffbe8e076d
commit 971d62595a
6 changed files with 195 additions and 216 deletions

View File

@@ -19,8 +19,8 @@ type ImageProcessor struct {
maxPixels int
factor int
rescaleFactor float32
imageMean []float32
imageStd []float32
imageMean [3]float32
imageStd [3]float32
}
// newImageProcessor creates a new image processor with default values
@@ -34,11 +34,11 @@ func newImageProcessor(c fs.Config) ImageProcessor {
temporalPatchSize: 2,
mergeSize: mergeSize,
minPixels: 56 * 56,
maxPixels: int(c.Uint("vision.max_pixels", 28*28*1280)), // 1MP limit
maxPixels: int(c.Uint("vision.max_pixels", 2<<20)), // 2M limit
factor: patchSize * mergeSize,
rescaleFactor: 1.0 / 255.0,
imageMean: imageproc.ClipDefaultMean[:],
imageStd: imageproc.ClipDefaultSTD[:],
imageMean: imageproc.ClipDefaultMean,
imageStd: imageproc.ClipDefaultSTD,
}
}
@@ -90,13 +90,7 @@ func (p *ImageProcessor) ProcessImage(img image.Image) ([]float32, *Grid, error)
// Resize image using existing functions
resizedImg := imageproc.Resize(img, image.Point{X: resizedWidth, Y: resizedHeight}, imageproc.ResizeBilinear)
normalizedPixels := imageproc.Normalize(
resizedImg,
[3]float32{p.imageMean[0], p.imageMean[1], p.imageMean[2]},
[3]float32{p.imageStd[0], p.imageStd[1], p.imageStd[2]},
true, // rescale
true, // channelFirst
)
normalizedPixels := imageproc.Normalize(resizedImg, p.imageMean, p.imageStd, true, true)
// Calculate grid dimensions
grid := &Grid{