Revert "server: Consolidate embedding truncation in runner (#12730)" (#12810)

This reverts commit 5d347f6d6f.
2025-12-21 22:33:56 +00:00 · 2025-10-28 14:49:14 -07:00
parent 3d99d9779a
commit 29f63f37c8
6 changed files with 84 additions and 264 deletions
--- a/runner/llamarunner/runner.go
+++ b/runner/llamarunner/runner.go
@@ -709,13 +709,13 @@ func (s *Server) embeddings(w http.ResponseWriter, r *http.Request) {

 	seq, err := s.NewSequence(req.Content, nil, NewSequenceParams{
 		embedding: true,
-		truncate:  req.Truncate,
+
+		// TODO (jmorganca): this should be provided by the server via the
+		// request options and truncated here in the runner, instead of relying on
+		// the server's truncate logic
+		truncate: true,
 	})
 	if err != nil {
-		if errors.Is(err, errorInputTooLong) {
-			http.Error(w, err.Error(), http.StatusBadRequest)
-			return
-		}
 		http.Error(w, fmt.Sprintf("Failed to create new sequence: %v", err), http.StatusInternalServerError)
 		return
 	}
@@ -758,8 +758,7 @@ func (s *Server) embeddings(w http.ResponseWriter, r *http.Request) {
 	embedding := <-seq.embedding

 	if err := json.NewEncoder(w).Encode(&llm.EmbeddingResponse{
-		Embedding:       embedding,
-		PromptEvalCount: seq.numPromptInputs,
+		Embedding: embedding,
 	}); err != nil {
 		http.Error(w, fmt.Sprintf("failed to encode response: %v", err), http.StatusInternalServerError)
 	}