quantize any fp16/fp32 model

- FROM /path/to/{safetensors,pytorch}
- FROM /path/to/fp{16,32}.bin
- FROM model:fp{16,32}
This commit is contained in:
Michael Yang
2024-04-12 13:55:12 -07:00
parent d091fe3c21
commit 9685c34509
12 changed files with 654 additions and 556 deletions

View File

@@ -5,6 +5,7 @@ import (
"encoding/binary"
"encoding/json"
"fmt"
"io"
"log/slog"
"os"
"path/filepath"
@@ -47,7 +48,7 @@ type ByteOrder interface {
type ModelArch interface {
GetTensors() error
LoadVocab() error
WriteGGUF() (string, error)
WriteGGUF(io.WriteSeeker) error
}
type ModelFormat interface {