truncation: fixed runner truncation logic + removed server truncation (#12839)

This PR consolidates all embedding prompt-length checking, truncation, and prompt token counting into the runner to ensure a single source of truth.
This commit is contained in:
nicole pardal
2025-12-08 11:20:28 -08:00
committed by GitHub
parent 5dae738067
commit e082d60a24
6 changed files with 278 additions and 88 deletions

View File

@@ -780,8 +780,8 @@ func (s *mockLlm) Completion(ctx context.Context, req llm.CompletionRequest, fn
return s.completionResp
}
func (s *mockLlm) Embedding(ctx context.Context, input string) ([]float32, error) {
return s.embeddingResp, s.embeddingRespErr
func (s *mockLlm) Embedding(ctx context.Context, input string) ([]float32, int, error) {
return s.embeddingResp, 0, s.embeddingRespErr
}
func (s *mockLlm) Tokenize(ctx context.Context, content string) ([]int, error) {