diff --git a/docs/api/openai-compatibility.mdx b/docs/api/openai-compatibility.mdx
index 8329934a..94febc30 100644
--- a/docs/api/openai-compatibility.mdx
+++ b/docs/api/openai-compatibility.mdx
@@ -6,16 +6,16 @@ Ollama provides compatibility with parts of the [OpenAI API](https://platform.op
## Usage
-### OpenAI Python library
+### Simple `v1/chat/completions` example
-```python
+
+
+```python basic.py
from openai import OpenAI
client = OpenAI(
base_url='http://localhost:11434/v1/',
-
- # required but ignored
- api_key='ollama',
+ api_key='ollama', # required but ignored
)
chat_completion = client.chat.completions.create(
@@ -25,96 +25,125 @@ chat_completion = client.chat.completions.create(
'content': 'Say this is a test',
}
],
- model='llama3.2',
+ model='gpt-oss:20b',
+)
+print(chat_completion.choices[0].message.content)
+```
+
+```javascript basic.js
+import OpenAI from "openai";
+
+const openai = new OpenAI({
+ baseURL: "http://localhost:11434/v1/",
+ apiKey: "ollama", // required but ignored
+});
+
+const chatCompletion = await openai.chat.completions.create({
+ messages: [{ role: "user", content: "Say this is a test" }],
+ model: "gpt-oss:20b",
+});
+
+console.log(chatCompletion.choices[0].message.content);
+```
+
+```shell basic.sh
+curl -X POST http://localhost:11434/v1/chat/completions \
+-H "Content-Type: application/json" \
+-d '{
+ "model": "gpt-oss:20b",
+ "messages": [{ "role": "user", "content": "Say this is a test" }]
+}'
+```
+
+
+
+### Simple `v1/responses` example
+
+
+
+```python responses.py
+from openai import OpenAI
+
+client = OpenAI(
+ base_url='http://localhost:11434/v1/',
+ api_key='ollama', # required but ignored
+)
+
+responses_result = client.responses.create(
+ model='qwen3:8b',
+ input='Write a short poem about the color blue',
+)
+print(responses_result.output_text)
+```
+
+```javascript responses.js
+import OpenAI from "openai";
+
+const openai = new OpenAI({
+ baseURL: "http://localhost:11434/v1/",
+ apiKey: "ollama", // required but ignored
+});
+
+const responsesResult = await openai.responses.create({
+ model: "qwen3:8b",
+ input: "Write a short poem about the color blue",
+});
+
+console.log(responsesResult.output_text);
+```
+
+```shell responses.sh
+curl -X POST http://localhost:11434/v1/responses \
+-H "Content-Type: application/json" \
+-d '{
+ "model": "qwen3:8b",
+ "input": "Write a short poem about the color blue"
+}'
+```
+
+
+
+### v1/chat/completions with vision example
+
+
+
+```python vision.py
+from openai import OpenAI
+
+client = OpenAI(
+ base_url='http://localhost:11434/v1/',
+ api_key='ollama', # required but ignored
)
response = client.chat.completions.create(
- model="llava",
+ model='qwen3-vl:8b',
messages=[
{
- "role": "user",
- "content": [
- {"type": "text", "text": "What's in this image?"},
+ 'role': 'user',
+ 'content': [
+ {'type': 'text', 'text': "What's in this image?"},
{
- "type": "image_url",
- "image_url": "",
+ 'type': 'image_url',
+ 'image_url': '',
},
],
}
],
max_tokens=300,
)
-
-completion = client.completions.create(
- model="llama3.2",
- prompt="Say this is a test",
-)
-
-list_completion = client.models.list()
-
-model = client.models.retrieve("llama3.2")
-
-embeddings = client.embeddings.create(
- model="all-minilm",
- input=["why is the sky blue?", "why is the grass green?"],
-)
+print(response.choices[0].message.content)
```
-#### Structured outputs
-
-```python
-from pydantic import BaseModel
-from openai import OpenAI
-
-client = OpenAI(base_url="http://localhost:11434/v1", api_key="ollama")
-
-# Define the schema for the response
-class FriendInfo(BaseModel):
- name: str
- age: int
- is_available: bool
-
-class FriendList(BaseModel):
- friends: list[FriendInfo]
-
-try:
- completion = client.beta.chat.completions.parse(
- temperature=0,
- model="llama3.1:8b",
- messages=[
- {"role": "user", "content": "I have two friends. The first is Ollama 22 years old busy saving the world, and the second is Alonso 23 years old and wants to hang out. Return a list of friends in JSON format"}
- ],
- response_format=FriendList,
- )
-
- friends_response = completion.choices[0].message
- if friends_response.parsed:
- print(friends_response.parsed)
- elif friends_response.refusal:
- print(friends_response.refusal)
-except Exception as e:
- print(f"Error: {e}")
-```
-
-### OpenAI JavaScript library
-
-```javascript
+```javascript vision.js
import OpenAI from "openai";
const openai = new OpenAI({
baseURL: "http://localhost:11434/v1/",
-
- // required but ignored
- apiKey: "ollama",
-});
-
-const chatCompletion = await openai.chat.completions.create({
- messages: [{ role: "user", content: "Say this is a test" }],
- model: "llama3.2",
+ apiKey: "ollama", // required but ignored
});
const response = await openai.chat.completions.create({
- model: "llava",
+ model: "qwen3-vl:8b",
messages: [
{
role: "user",
@@ -129,84 +158,20 @@ const response = await openai.chat.completions.create({
},
],
});
-
-const completion = await openai.completions.create({
- model: "llama3.2",
- prompt: "Say this is a test.",
-});
-
-const listCompletion = await openai.models.list();
-
-const model = await openai.models.retrieve("llama3.2");
-
-const embedding = await openai.embeddings.create({
- model: "all-minilm",
- input: ["why is the sky blue?", "why is the grass green?"],
-});
+console.log(response.choices[0].message.content);
```
-### `curl`
-
-```shell
-curl http://localhost:11434/v1/chat/completions \
- -H "Content-Type: application/json" \
- -d '{
- "model": "llama3.2",
- "messages": [
- {
- "role": "system",
- "content": "You are a helpful assistant."
- },
- {
- "role": "user",
- "content": "Hello!"
- }
- ]
- }'
-
-curl http://localhost:11434/v1/chat/completions \
- -H "Content-Type: application/json" \
- -d '{
- "model": "llava",
- "messages": [
- {
- "role": "user",
- "content": [
- {
- "type": "text",
- "text": "What'\''s in this image?"
- },
- {
- "type": "image_url",
- "image_url": {
- "url": ""
- }
- }
- ]
- }
- ],
- "max_tokens": 300
- }'
-
-curl http://localhost:11434/v1/completions \
- -H "Content-Type: application/json" \
- -d '{
- "model": "llama3.2",
- "prompt": "Say this is a test"
- }'
-
-curl http://localhost:11434/v1/models
-
-curl http://localhost:11434/v1/models/llama3.2
-
-curl http://localhost:11434/v1/embeddings \
- -H "Content-Type: application/json" \
- -d '{
- "model": "all-minilm",
- "input": ["why is the sky blue?", "why is the grass green?"]
- }'
+```shell vision.sh
+curl -X POST http://localhost:11434/v1/chat/completions \
+-H "Content-Type: application/json" \
+-d '{
+ "model": "qwen3-vl:8b",
+ "messages": [{ "role": "user", "content": [{"type": "text", "text": "What is this an image of?"}, {"type": "image_url", "image_url": ""}]}]
+}'
```
+
+
## Endpoints
### `/v1/chat/completions`
@@ -310,6 +275,31 @@ curl http://localhost:11434/v1/embeddings \
- [x] `dimensions`
- [ ] `user`
+### `/v1/responses`
+
+Ollama supports the [OpenAI Responses API](https://platform.openai.com/docs/api-reference/responses). Only the non-stateful flavor is supported (i.e., there is no `previous_response_id` or `conversation` support).
+
+#### Supported features
+
+- [x] Streaming
+- [x] Tools (function calling)
+- [x] Reasoning summaries (for thinking models)
+- [ ] Stateful requests
+
+#### Supported request fields
+
+- [x] `model`
+- [x] `input`
+- [x] `instructions`
+- [x] `tools`
+- [x] `stream`
+- [x] `temperature`
+- [x] `top_p`
+- [x] `max_output_tokens`
+- [ ] `previous_response_id` (stateful v1/responses not supported)
+- [ ] `conversation` (stateful v1/responses not supported)
+- [ ] `truncation`
+
## Models
Before using a model, pull it locally `ollama pull`:
@@ -365,4 +355,4 @@ curl http://localhost:11434/v1/chat/completions \
}
]
}'
-```
\ No newline at end of file
+```
diff --git a/docs/tools/extract-examples/README.md b/docs/tools/extract-examples/README.md
new file mode 100644
index 00000000..38560492
--- /dev/null
+++ b/docs/tools/extract-examples/README.md
@@ -0,0 +1,46 @@
+# extract-examples
+
+Extracts code examples from MDX files to a temp directory so you can run them.
+
+## Usage
+
+```shell
+go run docs/tools/extract-examples/main.go
+```
+
+## Example
+
+```shell
+go run docs/tools/extract-examples/main.go docs/api/openai-compatibility.mdx
+```
+
+Output:
+
+```
+Extracting code examples to: /var/folders/vq/wfm2g6k917d3ldzpjdxc8ph00000gn/T/mdx-examples-3271754368
+
+ - 01_basic.py
+ - 01_basic.js
+ - 01_basic.sh
+ - 02_responses.py
+ - 02_responses.js
+ - 02_responses.sh
+ - 03_vision.py
+ - 03_vision.js
+ - 03_vision.sh
+
+Extracted 9 file(s) to /var/folders/vq/wfm2g6k917d3ldzpjdxc8ph00000gn/T/mdx-examples-3271754368
+
+To run examples:
+
+ cd /var/folders/vq/wfm2g6k917d3ldzpjdxc8ph00000gn/T/mdx-examples-3271754368
+ npm install # for JS examples
+
+then run individual files with `node file.js`, `python file.py`, `bash file.sh`
+```
+
+## How it works
+
+- Parses MDX files looking for fenced code blocks with filenames (e.g., ` ```python basic.py `)
+- Groups examples by their `` and prefixes filenames with `01_`, `02_`, etc.
+- Writes all extracted files to a temp directory
diff --git a/docs/tools/extract-examples/main.go b/docs/tools/extract-examples/main.go
new file mode 100644
index 00000000..3f09af5c
--- /dev/null
+++ b/docs/tools/extract-examples/main.go
@@ -0,0 +1,137 @@
+package main
+
+import (
+ "bufio"
+ "fmt"
+ "os"
+ "path/filepath"
+ "regexp"
+ "strings"
+)
+
+func main() {
+ if len(os.Args) < 2 {
+ fmt.Fprintln(os.Stderr, "Usage: go run extract-examples.go ")
+ os.Exit(1)
+ }
+
+ mdxFile := os.Args[1]
+
+ f, err := os.Open(mdxFile)
+ if err != nil {
+ fmt.Fprintf(os.Stderr, "Error: %v\n", err)
+ os.Exit(1)
+ }
+ defer f.Close()
+
+ // Create temp directory
+ tempDir, err := os.MkdirTemp("", "mdx-examples-*")
+ if err != nil {
+ fmt.Fprintf(os.Stderr, "Error creating temp dir: %v\n", err)
+ os.Exit(1)
+ }
+
+ fmt.Printf("Extracting code examples to: %s\n\n", tempDir)
+
+ // Patterns
+ codeBlockStart := regexp.MustCompile("^```([a-zA-Z0-9_-]+)\\s+([^\\s]+)$")
+ codeGroupStart := regexp.MustCompile("^")
+
+ scanner := bufio.NewScanner(f)
+ inCodeBlock := false
+ inCodeGroup := false
+ var currentFile string
+ var content strings.Builder
+ count := 0
+ codeGroupNum := 0
+
+ for scanner.Scan() {
+ line := scanner.Text()
+
+ // Track CodeGroup boundaries
+ if codeGroupStart.MatchString(line) {
+ inCodeGroup = true
+ codeGroupNum++
+ continue
+ }
+ if codeGroupEnd.MatchString(line) {
+ inCodeGroup = false
+ continue
+ }
+
+ if inCodeBlock {
+ if line == "```" {
+ // End of code block - write file
+ if currentFile != "" {
+ outPath := filepath.Join(tempDir, currentFile)
+ if err := os.WriteFile(outPath, []byte(content.String()), 0o644); err != nil {
+ fmt.Fprintf(os.Stderr, "Error writing %s: %v\n", currentFile, err)
+ } else {
+ fmt.Printf(" - %s\n", currentFile)
+ count++
+ }
+ }
+ inCodeBlock = false
+ currentFile = ""
+ content.Reset()
+ } else {
+ content.WriteString(line)
+ content.WriteString("\n")
+ }
+ } else {
+ if matches := codeBlockStart.FindStringSubmatch(line); matches != nil {
+ inCodeBlock = true
+ filename := matches[2]
+ // Prefix with CodeGroup number if inside a CodeGroup
+ if inCodeGroup {
+ currentFile = fmt.Sprintf("%02d_%s", codeGroupNum, filename)
+ } else {
+ currentFile = filename
+ }
+ content.Reset()
+ }
+ }
+ }
+
+ if err := scanner.Err(); err != nil {
+ fmt.Fprintf(os.Stderr, "Error reading file: %v\n", err)
+ os.Exit(1)
+ }
+
+ // Write package.json for JavaScript dependencies
+ packageJSON := `{
+ "name": "mdx-examples",
+ "type": "module",
+ "dependencies": {
+ "openai": "^4",
+ "ollama": "^0.5"
+ }
+}
+`
+ if err := os.WriteFile(filepath.Join(tempDir, "package.json"), []byte(packageJSON), 0o644); err != nil {
+ fmt.Fprintf(os.Stderr, "Error writing package.json: %v\n", err)
+ }
+
+ // Write pyproject.toml for Python dependencies
+ pyprojectTOML := `[project]
+name = "mdx-examples"
+version = "0.0.0"
+dependencies = [
+ "openai",
+ "ollama",
+]
+`
+ if err := os.WriteFile(filepath.Join(tempDir, "pyproject.toml"), []byte(pyprojectTOML), 0o644); err != nil {
+ fmt.Fprintf(os.Stderr, "Error writing pyproject.toml: %v\n", err)
+ }
+
+ fmt.Printf("\n")
+ fmt.Printf("Extracted %d file(s) to %s\n", count, tempDir)
+ fmt.Printf("\n")
+ fmt.Printf("To run examples:\n")
+ fmt.Printf("\n")
+ fmt.Printf(" cd %s\n npm install # for JS examples\n", tempDir)
+ fmt.Printf("\n")
+ fmt.Printf("then run individual files with `node file.js`, `python file.py`, `bash file.sh`\n")
+}