From 9f7822851c1f080d7d2a1dbe0e4d51233e5a28bc Mon Sep 17 00:00:00 2001 From: Devon Rifkin Date: Thu, 11 Dec 2025 17:39:40 -0800 Subject: [PATCH] docs: add docs for v1/responses and rework openai compat section (#13416) * docs: add docs for v1/responses and rework openai compat section I reworked the examples to be separated by topic and to be fully runnable (i.e., they now log output instead of just suggesting how a call might be made). We now use ``s so that each example has a dropdown on the docs site for users to choose, which makes the examples a lot more digestible (since you only see approx 1/3 of the code you used to). I also added a new tool to extract code examples into files so that it's easier to actually run them and check that they work. ## Example ```shell go run docs/tools/extract-examples/main.go docs/api/openai-compatibility.mdx ``` Output: ``` Extracting code examples to: /var/folders/vq/wfm2g6k917d3ldzpjdxc8ph00000gn/T/mdx-examples-3271754368 - 01_basic.py - 01_basic.js - 01_basic.sh - 02_responses.py - 02_responses.js - 02_responses.sh - 03_vision.py - 03_vision.js - 03_vision.sh Extracted 9 file(s) to /var/folders/vq/wfm2g6k917d3ldzpjdxc8ph00000gn/T/mdx-examples-3271754368 To run examples: cd /var/folders/vq/wfm2g6k917d3ldzpjdxc8ph00000gn/T/mdx-examples-3271754368 npm install # for JS examples then run individual files with `node file.js`, `python file.py`, `bash file.sh` ``` In the future we should consider actually running the examples in CI and having some sort of acceptance test so we can automatically detect when our examples break. So this is just a start in that direction. * Update docs/api/openai-compatibility.mdx Co-authored-by: Parth Sareen * Update docs/api/openai-compatibility.mdx Co-authored-by: Parth Sareen --------- Co-authored-by: Parth Sareen --- docs/api/openai-compatibility.mdx | 288 +++++++++++++------------- docs/tools/extract-examples/README.md | 46 ++++ docs/tools/extract-examples/main.go | 137 ++++++++++++ 3 files changed, 322 insertions(+), 149 deletions(-) create mode 100644 docs/tools/extract-examples/README.md create mode 100644 docs/tools/extract-examples/main.go diff --git a/docs/api/openai-compatibility.mdx b/docs/api/openai-compatibility.mdx index 8329934a..94febc30 100644 --- a/docs/api/openai-compatibility.mdx +++ b/docs/api/openai-compatibility.mdx @@ -6,16 +6,16 @@ Ollama provides compatibility with parts of the [OpenAI API](https://platform.op ## Usage -### OpenAI Python library +### Simple `v1/chat/completions` example -```python + + +```python basic.py from openai import OpenAI client = OpenAI( base_url='http://localhost:11434/v1/', - - # required but ignored - api_key='ollama', + api_key='ollama', # required but ignored ) chat_completion = client.chat.completions.create( @@ -25,96 +25,125 @@ chat_completion = client.chat.completions.create( 'content': 'Say this is a test', } ], - model='llama3.2', + model='gpt-oss:20b', +) +print(chat_completion.choices[0].message.content) +``` + +```javascript basic.js +import OpenAI from "openai"; + +const openai = new OpenAI({ + baseURL: "http://localhost:11434/v1/", + apiKey: "ollama", // required but ignored +}); + +const chatCompletion = await openai.chat.completions.create({ + messages: [{ role: "user", content: "Say this is a test" }], + model: "gpt-oss:20b", +}); + +console.log(chatCompletion.choices[0].message.content); +``` + +```shell basic.sh +curl -X POST http://localhost:11434/v1/chat/completions \ +-H "Content-Type: application/json" \ +-d '{ + "model": "gpt-oss:20b", + "messages": [{ "role": "user", "content": "Say this is a test" }] +}' +``` + + + +### Simple `v1/responses` example + + + +```python responses.py +from openai import OpenAI + +client = OpenAI( + base_url='http://localhost:11434/v1/', + api_key='ollama', # required but ignored +) + +responses_result = client.responses.create( + model='qwen3:8b', + input='Write a short poem about the color blue', +) +print(responses_result.output_text) +``` + +```javascript responses.js +import OpenAI from "openai"; + +const openai = new OpenAI({ + baseURL: "http://localhost:11434/v1/", + apiKey: "ollama", // required but ignored +}); + +const responsesResult = await openai.responses.create({ + model: "qwen3:8b", + input: "Write a short poem about the color blue", +}); + +console.log(responsesResult.output_text); +``` + +```shell responses.sh +curl -X POST http://localhost:11434/v1/responses \ +-H "Content-Type: application/json" \ +-d '{ + "model": "qwen3:8b", + "input": "Write a short poem about the color blue" +}' +``` + + + +### v1/chat/completions with vision example + + + +```python vision.py +from openai import OpenAI + +client = OpenAI( + base_url='http://localhost:11434/v1/', + api_key='ollama', # required but ignored ) response = client.chat.completions.create( - model="llava", + model='qwen3-vl:8b', messages=[ { - "role": "user", - "content": [ - {"type": "text", "text": "What's in this image?"}, + 'role': 'user', + 'content': [ + {'type': 'text', 'text': "What's in this image?"}, { - "type": "image_url", - "image_url": "", + 'type': 'image_url', + 'image_url': '', }, ], } ], max_tokens=300, ) - -completion = client.completions.create( - model="llama3.2", - prompt="Say this is a test", -) - -list_completion = client.models.list() - -model = client.models.retrieve("llama3.2") - -embeddings = client.embeddings.create( - model="all-minilm", - input=["why is the sky blue?", "why is the grass green?"], -) +print(response.choices[0].message.content) ``` -#### Structured outputs - -```python -from pydantic import BaseModel -from openai import OpenAI - -client = OpenAI(base_url="http://localhost:11434/v1", api_key="ollama") - -# Define the schema for the response -class FriendInfo(BaseModel): - name: str - age: int - is_available: bool - -class FriendList(BaseModel): - friends: list[FriendInfo] - -try: - completion = client.beta.chat.completions.parse( - temperature=0, - model="llama3.1:8b", - messages=[ - {"role": "user", "content": "I have two friends. The first is Ollama 22 years old busy saving the world, and the second is Alonso 23 years old and wants to hang out. Return a list of friends in JSON format"} - ], - response_format=FriendList, - ) - - friends_response = completion.choices[0].message - if friends_response.parsed: - print(friends_response.parsed) - elif friends_response.refusal: - print(friends_response.refusal) -except Exception as e: - print(f"Error: {e}") -``` - -### OpenAI JavaScript library - -```javascript +```javascript vision.js import OpenAI from "openai"; const openai = new OpenAI({ baseURL: "http://localhost:11434/v1/", - - // required but ignored - apiKey: "ollama", -}); - -const chatCompletion = await openai.chat.completions.create({ - messages: [{ role: "user", content: "Say this is a test" }], - model: "llama3.2", + apiKey: "ollama", // required but ignored }); const response = await openai.chat.completions.create({ - model: "llava", + model: "qwen3-vl:8b", messages: [ { role: "user", @@ -129,84 +158,20 @@ const response = await openai.chat.completions.create({ }, ], }); - -const completion = await openai.completions.create({ - model: "llama3.2", - prompt: "Say this is a test.", -}); - -const listCompletion = await openai.models.list(); - -const model = await openai.models.retrieve("llama3.2"); - -const embedding = await openai.embeddings.create({ - model: "all-minilm", - input: ["why is the sky blue?", "why is the grass green?"], -}); +console.log(response.choices[0].message.content); ``` -### `curl` - -```shell -curl http://localhost:11434/v1/chat/completions \ - -H "Content-Type: application/json" \ - -d '{ - "model": "llama3.2", - "messages": [ - { - "role": "system", - "content": "You are a helpful assistant." - }, - { - "role": "user", - "content": "Hello!" - } - ] - }' - -curl http://localhost:11434/v1/chat/completions \ - -H "Content-Type: application/json" \ - -d '{ - "model": "llava", - "messages": [ - { - "role": "user", - "content": [ - { - "type": "text", - "text": "What'\''s in this image?" - }, - { - "type": "image_url", - "image_url": { - "url": "" - } - } - ] - } - ], - "max_tokens": 300 - }' - -curl http://localhost:11434/v1/completions \ - -H "Content-Type: application/json" \ - -d '{ - "model": "llama3.2", - "prompt": "Say this is a test" - }' - -curl http://localhost:11434/v1/models - -curl http://localhost:11434/v1/models/llama3.2 - -curl http://localhost:11434/v1/embeddings \ - -H "Content-Type: application/json" \ - -d '{ - "model": "all-minilm", - "input": ["why is the sky blue?", "why is the grass green?"] - }' +```shell vision.sh +curl -X POST http://localhost:11434/v1/chat/completions \ +-H "Content-Type: application/json" \ +-d '{ + "model": "qwen3-vl:8b", + "messages": [{ "role": "user", "content": [{"type": "text", "text": "What is this an image of?"}, {"type": "image_url", "image_url": ""}]}] +}' ``` + + ## Endpoints ### `/v1/chat/completions` @@ -310,6 +275,31 @@ curl http://localhost:11434/v1/embeddings \ - [x] `dimensions` - [ ] `user` +### `/v1/responses` + +Ollama supports the [OpenAI Responses API](https://platform.openai.com/docs/api-reference/responses). Only the non-stateful flavor is supported (i.e., there is no `previous_response_id` or `conversation` support). + +#### Supported features + +- [x] Streaming +- [x] Tools (function calling) +- [x] Reasoning summaries (for thinking models) +- [ ] Stateful requests + +#### Supported request fields + +- [x] `model` +- [x] `input` +- [x] `instructions` +- [x] `tools` +- [x] `stream` +- [x] `temperature` +- [x] `top_p` +- [x] `max_output_tokens` +- [ ] `previous_response_id` (stateful v1/responses not supported) +- [ ] `conversation` (stateful v1/responses not supported) +- [ ] `truncation` + ## Models Before using a model, pull it locally `ollama pull`: @@ -365,4 +355,4 @@ curl http://localhost:11434/v1/chat/completions \ } ] }' -``` \ No newline at end of file +``` diff --git a/docs/tools/extract-examples/README.md b/docs/tools/extract-examples/README.md new file mode 100644 index 00000000..38560492 --- /dev/null +++ b/docs/tools/extract-examples/README.md @@ -0,0 +1,46 @@ +# extract-examples + +Extracts code examples from MDX files to a temp directory so you can run them. + +## Usage + +```shell +go run docs/tools/extract-examples/main.go +``` + +## Example + +```shell +go run docs/tools/extract-examples/main.go docs/api/openai-compatibility.mdx +``` + +Output: + +``` +Extracting code examples to: /var/folders/vq/wfm2g6k917d3ldzpjdxc8ph00000gn/T/mdx-examples-3271754368 + + - 01_basic.py + - 01_basic.js + - 01_basic.sh + - 02_responses.py + - 02_responses.js + - 02_responses.sh + - 03_vision.py + - 03_vision.js + - 03_vision.sh + +Extracted 9 file(s) to /var/folders/vq/wfm2g6k917d3ldzpjdxc8ph00000gn/T/mdx-examples-3271754368 + +To run examples: + + cd /var/folders/vq/wfm2g6k917d3ldzpjdxc8ph00000gn/T/mdx-examples-3271754368 + npm install # for JS examples + +then run individual files with `node file.js`, `python file.py`, `bash file.sh` +``` + +## How it works + +- Parses MDX files looking for fenced code blocks with filenames (e.g., ` ```python basic.py `) +- Groups examples by their `` and prefixes filenames with `01_`, `02_`, etc. +- Writes all extracted files to a temp directory diff --git a/docs/tools/extract-examples/main.go b/docs/tools/extract-examples/main.go new file mode 100644 index 00000000..3f09af5c --- /dev/null +++ b/docs/tools/extract-examples/main.go @@ -0,0 +1,137 @@ +package main + +import ( + "bufio" + "fmt" + "os" + "path/filepath" + "regexp" + "strings" +) + +func main() { + if len(os.Args) < 2 { + fmt.Fprintln(os.Stderr, "Usage: go run extract-examples.go ") + os.Exit(1) + } + + mdxFile := os.Args[1] + + f, err := os.Open(mdxFile) + if err != nil { + fmt.Fprintf(os.Stderr, "Error: %v\n", err) + os.Exit(1) + } + defer f.Close() + + // Create temp directory + tempDir, err := os.MkdirTemp("", "mdx-examples-*") + if err != nil { + fmt.Fprintf(os.Stderr, "Error creating temp dir: %v\n", err) + os.Exit(1) + } + + fmt.Printf("Extracting code examples to: %s\n\n", tempDir) + + // Patterns + codeBlockStart := regexp.MustCompile("^```([a-zA-Z0-9_-]+)\\s+([^\\s]+)$") + codeGroupStart := regexp.MustCompile("^") + + scanner := bufio.NewScanner(f) + inCodeBlock := false + inCodeGroup := false + var currentFile string + var content strings.Builder + count := 0 + codeGroupNum := 0 + + for scanner.Scan() { + line := scanner.Text() + + // Track CodeGroup boundaries + if codeGroupStart.MatchString(line) { + inCodeGroup = true + codeGroupNum++ + continue + } + if codeGroupEnd.MatchString(line) { + inCodeGroup = false + continue + } + + if inCodeBlock { + if line == "```" { + // End of code block - write file + if currentFile != "" { + outPath := filepath.Join(tempDir, currentFile) + if err := os.WriteFile(outPath, []byte(content.String()), 0o644); err != nil { + fmt.Fprintf(os.Stderr, "Error writing %s: %v\n", currentFile, err) + } else { + fmt.Printf(" - %s\n", currentFile) + count++ + } + } + inCodeBlock = false + currentFile = "" + content.Reset() + } else { + content.WriteString(line) + content.WriteString("\n") + } + } else { + if matches := codeBlockStart.FindStringSubmatch(line); matches != nil { + inCodeBlock = true + filename := matches[2] + // Prefix with CodeGroup number if inside a CodeGroup + if inCodeGroup { + currentFile = fmt.Sprintf("%02d_%s", codeGroupNum, filename) + } else { + currentFile = filename + } + content.Reset() + } + } + } + + if err := scanner.Err(); err != nil { + fmt.Fprintf(os.Stderr, "Error reading file: %v\n", err) + os.Exit(1) + } + + // Write package.json for JavaScript dependencies + packageJSON := `{ + "name": "mdx-examples", + "type": "module", + "dependencies": { + "openai": "^4", + "ollama": "^0.5" + } +} +` + if err := os.WriteFile(filepath.Join(tempDir, "package.json"), []byte(packageJSON), 0o644); err != nil { + fmt.Fprintf(os.Stderr, "Error writing package.json: %v\n", err) + } + + // Write pyproject.toml for Python dependencies + pyprojectTOML := `[project] +name = "mdx-examples" +version = "0.0.0" +dependencies = [ + "openai", + "ollama", +] +` + if err := os.WriteFile(filepath.Join(tempDir, "pyproject.toml"), []byte(pyprojectTOML), 0o644); err != nil { + fmt.Fprintf(os.Stderr, "Error writing pyproject.toml: %v\n", err) + } + + fmt.Printf("\n") + fmt.Printf("Extracted %d file(s) to %s\n", count, tempDir) + fmt.Printf("\n") + fmt.Printf("To run examples:\n") + fmt.Printf("\n") + fmt.Printf(" cd %s\n npm install # for JS examples\n", tempDir) + fmt.Printf("\n") + fmt.Printf("then run individual files with `node file.js`, `python file.py`, `bash file.sh`\n") +}