Compare commits

...

244 Commits

Author SHA1 Message Date
likelovewant
76026b4a35 Merge branch 'ollama:main' into main 2024-06-09 10:10:23 +08:00
Nischal Jain
85169e8d6f Added headless-ollama (#4612) 2024-06-08 18:51:16 -07:00
Jeffrey Morgan
34f142797a llm: always add bos token to prompt (#4941)
* fix embedding by adding fixes from llama.cpp upstream

* remove assert

---------

Co-authored-by: Jesper Ek <deadbeef84@gmail.com>
2024-06-08 18:47:10 -07:00
Erhan
46a7f1e74a Update README.md with LangChainRust (#4854) 2024-06-08 17:29:36 -07:00
Daniel Hiltgen
cddc63381c Merge pull request #4909 from dhiltgen/oneapi_disable
Add ability to skip oneapi generate
2024-06-07 14:07:15 -07:00
Michael Yang
385a32ecb5 Merge pull request #4910 from ollama/mxyng/detect-chat-template
fix create model when template detection errors
2024-06-07 11:07:39 -07:00
Michael Yang
030e765e76 fix create model when template detection errors 2024-06-07 10:51:35 -07:00
Daniel Hiltgen
ab8c929e20 Add ability to skip oneapi generate
This follows the same pattern for cuda and rocm to allow
disabling the build even when we detect the dependent libraries
2024-06-07 08:32:49 -07:00
likelovewant
27e7397b11 Update gen_windows.ps1 2024-06-07 17:35:15 +08:00
likelovewant
a6390a8992 Merge branch 'ollama:main' into main 2024-06-07 17:25:53 +08:00
Jeffrey Morgan
ce0dc33cb8 llm: patch to fix qwen 2 temporarily on nvidia (#4897) 2024-06-06 23:14:33 -07:00
Michael Yang
78f81fc0e5 Merge pull request #4800 from ollama/mxyng/detect-chat-template
detect chat template from KV
2024-06-06 16:17:18 -07:00
Michael Yang
9b6c2e6eb6 detect chat template from KV 2024-06-06 16:03:47 -07:00
royjhan
1a29e9a879 API app/browser access (#4879)
* API app/browser access

* Add tauri (resolves #2291, #4791, #3799, #4388)
2024-06-06 15:19:03 -07:00
royjhan
4bf1da4944 Separate ListResponse and ModelResponse for api/tags vs api/ps (#4842)
* Remove false time fields

* Struct Separation for List and Process

* Remove Marshaler
2024-06-06 10:11:45 -07:00
Blake Mizerany
de5beb06b3 server: skip blob verification for already verified blobs 2024-06-05 16:39:11 -07:00
Sam
98e65929dc docs(tools): add gollama (#4829) 2024-06-05 14:13:39 -07:00
Michael Yang
22fcf8f7de Merge pull request #3737 from ollama/mxyng/modelname-4
update create handler to use model.Name
2024-06-05 12:05:05 -07:00
royjhan
28c7813ac4 API PS Documentation (#4822)
* API PS Documentation
2024-06-05 11:06:53 -07:00
Kartikeya Mishra
1d8616d30f docs: update to add LLocal.in to web & desktop integrations (#4719) 2024-06-04 14:43:59 -07:00
Michael Yang
d61ef8b954 update create handler to use model.Name 2024-06-04 13:28:25 -07:00
Michael Yang
89d9900152 Merge pull request #4570 from ollama/mxyng/slices
lint some of the things
2024-06-04 13:27:05 -07:00
Michael
4a048715b6 local wording was confusing people
local wording was confusing people -- Ollama runs on cloud providers
2024-06-04 13:25:25 -07:00
Michael Yang
6297f85606 gofmt, goimports 2024-06-04 13:20:24 -07:00
Michael Yang
ed56428dd7 warn on intrange, usestdlibvars 2024-06-04 11:52:48 -07:00
Michael Yang
ad40b92b6a disable intrange 2024-06-04 11:35:30 -07:00
Michael Yang
8ce4032e72 more lint 2024-06-04 11:13:30 -07:00
Michael Yang
42660466f8 no usestdlibvars 2024-06-04 11:13:30 -07:00
Michael Yang
e919f6811f lint windows 2024-06-04 11:13:30 -07:00
Michael Yang
bf7edb0d5d lint linux 2024-06-04 11:13:30 -07:00
Michael Yang
f38353d6b9 stdin.fd 2024-06-04 11:13:30 -07:00
Michael Yang
201d853fdf nolintlint 2024-06-04 11:13:30 -07:00
Michael Yang
e40145a39d lint 2024-06-04 11:13:30 -07:00
Michael Yang
c895a7d13f some gocritic 2024-06-04 11:13:30 -07:00
Michael Yang
dad7a987ae nosprintfhostport 2024-06-04 11:13:30 -07:00
Michael Yang
8ffb51749f nolintlint 2024-06-04 11:13:30 -07:00
Michael Yang
55f6eba049 gofmt 2024-06-04 11:13:30 -07:00
Michael Yang
04f3c12bb7 replace x/exp/slices with slices 2024-06-04 11:13:30 -07:00
Shubham
60323e0805 add embed model command and fix question invoke (#4766)
* add embed model command and fix question invoke

* Update docs/tutorials/langchainpy.md

Co-authored-by: Kim Hallberg <hallberg.kim@gmail.com>

* Update docs/tutorials/langchainpy.md

---------

Co-authored-by: Kim Hallberg <hallberg.kim@gmail.com>
Co-authored-by: Jeffrey Morgan <jmorganca@gmail.com>
2024-06-03 22:20:48 -07:00
likelovewant
71ae05239e Update README.md 2024-06-03 15:54:48 +08:00
likelovewant
a4a435bf8f Update amd_windows.go 2024-06-03 14:55:48 +08:00
likelovewant
2490a69f7b Merge branch 'ollama:main' into main 2024-06-03 14:15:05 +08:00
Jeffrey Morgan
d4a86102fd update welcome prompt in windows to llama3 (#4779) 2024-06-01 21:05:51 -07:00
Jeffrey Morgan
476fb8e892 Limit GPU lib search for now (#4777)
* fix oneapi errors on windows 10
2024-06-01 19:24:33 -07:00
Michael Yang
829ff87bd1 revert tokenize ffi (#4761)
* Revert "use `int32_t` for call to tokenize (#4738)"

This reverts commit 763bb65dbb.

* Revert "vocab only"

This reverts commit bf54c845e9.

* Revert "use ffi for tokenizing/detokenizing"

This reverts commit 26a00a0410.
2024-05-31 18:54:21 -07:00
Josh
f6b622c4b3 Merge pull request #4733 from ollama/jyan/isvalidname
added IsValidNamespace function
2024-05-31 14:08:45 -07:00
Josh Yan
2e4da8eec2 added tests for IsValidNamespace 2024-05-31 11:48:07 -07:00
likelovewant
16ce79eb3b Merge branch 'ollama:main' into main 2024-05-31 18:43:24 +08:00
Jeffrey Morgan
763bb65dbb use int32_t for call to tokenize (#4738)
* use `int32_t` for call to tokenize

* variable naming

* cleanup

* fix crash
2024-05-30 21:43:30 -07:00
Jeffrey Morgan
7ca9605f54 speed up tests by only building static lib (#4740) 2024-05-30 21:43:15 -07:00
Michael Yang
eb2c443a79 Merge pull request #4736 from ollama/mxyng/vocab-only
vocab only for tokenize
2024-05-30 17:21:00 -07:00
Michael Yang
278e25ea44 Merge pull request #4737 from ollama/mxyng/less-generate
only generate on relevant changes
2024-05-30 17:17:50 -07:00
Jeffrey Morgan
a50a87a7b8 partial offloading: allow flash attention and disable mmap (#4734)
* partial offloading: allow flash attention and disable mmap

* allow mmap with num_gpu=0
2024-05-30 16:58:01 -07:00
Michael Yang
98085015d5 only generate on relevant changes 2024-05-30 16:54:11 -07:00
Michael Yang
bf54c845e9 vocab only 2024-05-30 16:49:28 -07:00
Josh Yan
c365f195a8 directly use isvalidpart 2024-05-30 16:40:04 -07:00
Josh
e91d0ef737 Merge pull request #4728 from ollama/jyan/japanese
fixed japanese characters deleted at end of line
2024-05-30 16:25:12 -07:00
Jeffrey Morgan
22f5c12ced Update llama.cpp submodule to 5921b8f0 (#4731)
* update llama.cpp submodule to `5921b8f089d3b7bda86aac5a66825df6a6c10603`

* add patch
2024-05-30 16:20:22 -07:00
Josh Yan
298c996e54 added IsValidNamespace function 2024-05-30 16:02:07 -07:00
Daniel Hiltgen
0fc0cfc6d2 Merge pull request #4594 from dhiltgen/doc_container_workarounds
Add isolated gpu test to troubleshooting
2024-05-30 13:10:54 -07:00
Josh Yan
914f68f021 replaced duplicate call with variable 2024-05-30 10:38:07 -07:00
Josh Yan
bd1d119ba9 fixed japanese characters deleted at end of line 2024-05-30 10:24:21 -07:00
Lei Jitang
a03be18189 Fix OLLAMA_LLM_LIBRARY with wrong map name and add more env vars to help message (#4663)
* envconfig/config.go: Fix wrong description of OLLAMA_LLM_LIBRARY

Signed-off-by: Lei Jitang <leijitang@outlook.com>

* serve: Add more env to help message of ollama serve

Add more enviroment variables to `ollama serve --help`
to let users know what can be configurated.

Signed-off-by: Lei Jitang <leijitang@outlook.com>

---------

Signed-off-by: Lei Jitang <leijitang@outlook.com>
2024-05-30 09:36:51 -07:00
Michael Yang
96bc232b43 Merge pull request #4413 from ollama/mxyng/name-check
check if name exists before create/pull/copy
2024-05-29 12:06:58 -07:00
Michael Yang
bca7b12284 Merge pull request #3718 from ollama/mxyng/modelname-3
update delete handler to use model.Name
2024-05-29 12:02:07 -07:00
Michael Yang
32cb1960c1 Merge pull request #4380 from ollama/mxyng/tokenize
use tokenize/detokenize
2024-05-29 12:00:59 -07:00
Michael Yang
de781b37c8 rm unused infill 2024-05-29 11:26:47 -07:00
Michael Yang
3e21799377 rm unused system prompt 2024-05-29 11:26:47 -07:00
Michael Yang
26a00a0410 use ffi for tokenizing/detokenizing 2024-05-29 11:26:47 -07:00
likelovewant
cafde1f8ce Merge branch 'ollama:main' into main 2024-05-29 19:33:39 +08:00
Daniel Hiltgen
646371f56d Merge pull request #3278 from zhewang1-intc/rebase_ollama_main
Enabling ollama to run on Intel GPUs with SYCL backend
2024-05-28 16:30:50 -07:00
Jeffrey Morgan
1f5008544b Update install.sh 2024-05-28 15:01:22 -07:00
Jeffrey Morgan
45cbfc5aee fix wsl2 status check for nvidia cards (#4689) 2024-05-28 14:49:46 -07:00
Jeffrey Morgan
6d423b383b Improve install experience on WSL2 and Linux (#4653) 2024-05-28 14:41:50 -07:00
Josh
ad897080a2 working on integration of multi-byte and multi-width runes (#4549)
* integrated runewidth for display management - fixed cursor movement for mutli-width char

* updated input and deletion of multi-byte chars

* fixed line history with some exceptions

* improved insert and add

* fixed issues with moving across lines

* end of line extra space tracking'

* saved changes

* fixed end of line issues with empty spaces

* worked some more

* worked on end of line

* fixed failed test

* fixed minor inserting bug

* fixed movement hotkeys

* adjusted hotkeys

* removed comments

* Update readline/buffer.go

Co-authored-by: Bruce MacDonald <brucewmacdonald@gmail.com>

* Update readline/buffer.go

Co-authored-by: Bruce MacDonald <brucewmacdonald@gmail.com>

* Update readline/buffer.go

Co-authored-by: Bruce MacDonald <brucewmacdonald@gmail.com>

* Update readline/buffer.go

Co-authored-by: Bruce MacDonald <brucewmacdonald@gmail.com>

* Update readline/buffer.go

Co-authored-by: Bruce MacDonald <brucewmacdonald@gmail.com>

* Update readline/buffer.go

Co-authored-by: Bruce MacDonald <brucewmacdonald@gmail.com>

* Update readline/buffer.go

Co-authored-by: Bruce MacDonald <brucewmacdonald@gmail.com>

* Update readline/buffer.go

Co-authored-by: Bruce MacDonald <brucewmacdonald@gmail.com>

* Update readline/buffer.go

Co-authored-by: Bruce MacDonald <brucewmacdonald@gmail.com>

* Update readline/buffer.go

Co-authored-by: Bruce MacDonald <brucewmacdonald@gmail.com>

* Update readline/buffer.go

Co-authored-by: Bruce MacDonald <brucewmacdonald@gmail.com>

* Update readline/buffer.go

Co-authored-by: Bruce MacDonald <brucewmacdonald@gmail.com>

* deleted comments and duplicate code

* removed duplicate code

* added comments, refactored add function to use addChar

* added helper to retrieve lineSpacing, renamed lineFlags for clarity

* fixed remove()

---------

Co-authored-by: Bruce MacDonald <brucewmacdonald@gmail.com>
2024-05-28 12:04:03 -07:00
Jeffrey Morgan
b7d316d98d fix nvidia detection in install script (#4683) 2024-05-28 09:59:36 -07:00
Daniel Hiltgen
d7339fad52 Merge pull request #4682 from dhiltgen/more_time
Give the final model loading more time
2024-05-28 09:36:02 -07:00
Daniel Hiltgen
92c81e8117 Give the final model loading more time
On some systems, 1 minute isn't sufficient to finish the load after it
hits 100% This creates 2 distinct timers, although they're both set to
the same value for now so we can refine the timeouts further.
2024-05-28 09:08:10 -07:00
Tai
9db0996ed4 Add OllamaSpring Project to Readme (#4672)
* Add OllamaSpring Project to Readme

* Update README.md

---------

Co-authored-by: Jeffrey Morgan <jmorganca@gmail.com>
2024-05-27 19:58:26 -07:00
Orfeo Ciano
6f43898b17 Adds olpaka flutter client (#4647)
* Adds olpaka flutter client

* Update README.md

---------

Co-authored-by: Jeffrey Morgan <jmorganca@gmail.com>
2024-05-27 17:22:01 -07:00
Lei Jitang
7487229c34 llm/server.go: Fix 2 minor typos (#4661)
Signed-off-by: Lei Jitang <leijitang@outlook.com>
2024-05-27 17:21:10 -07:00
Rayan Mostovoi
8a8e7afa96 small fix on examples/python-simplechat/client.py to actually get a streamed response and get tokens printed as we receive it (#4671) 2024-05-27 17:19:20 -07:00
Jeffrey Morgan
c79f8c9c39 Ensure nvidia and nvidia_uvm kernel modules are loaded in install.sh script and at startup (#4652)
* ensure kernel modules are loaded in `install.sh` script and at startup

* indentation

* use `SUDO` variable

* restart if nouveau is detected

* consistent success message for AMD
2024-05-26 14:57:17 -07:00
Jeffrey Morgan
485016bfbb Update install.sh 2024-05-26 11:46:00 -07:00
likelovewant
2a80d6f743 Merge branch 'ollama:main' into main 2024-05-26 11:57:21 +08:00
Daniel Hiltgen
0165ba1651 Merge pull request #4638 from dhiltgen/better_error
Report better warning on client closed abort of load
2024-05-25 14:32:28 -07:00
Daniel Hiltgen
c4209d6d21 Report better warning on client closed abort of load
If the client closes the connection before we finish loading the model
we abort, so lets make the log message clearer why to help users
understand this failure mode
2024-05-25 09:23:28 -07:00
Michael Yang
6adca97f37 Merge pull request #4619 from noxer/patch-1
Fix download retry issue
2024-05-24 17:21:57 -07:00
Michael Yang
9a3c8003c8 Merge pull request #4624 from ollama/mxyng/fix-5
fix q5_0, q5_1
2024-05-24 16:11:21 -07:00
Michael Yang
d51f15257c Update llm/ggml.go
Co-authored-by: Bruce MacDonald <brucewmacdonald@gmail.com>
2024-05-24 16:10:43 -07:00
Michael Yang
8f440d579a fix q5_0, q5_1 2024-05-24 16:01:46 -07:00
Patrick Devine
4cc3be3035 Move envconfig and consolidate env vars (#4608) 2024-05-24 14:57:15 -07:00
Tim Scheuermann
db2ffa79f1 Fix download retry issue 2024-05-24 20:30:42 +02:00
likelovewant
73c49d57e8 Update amd_windows.go
remove this will broken the installer build
2024-05-24 20:06:28 +08:00
likelovewant
6b50b2f3bf Update gen_windows.ps1 2024-05-24 15:42:29 +08:00
likelovewant
6f03952a62 Update amd_linux.go
remove old gpu limits due to the new rocm supports
2024-05-24 15:30:41 +08:00
likelovewant
0e5b263a60 Update amd_windows.go
add igpu support ,remove those ,otherwise ,gfx1035 report not work
2024-05-24 15:26:24 +08:00
likelovewant
4e37e24b04 Merge branch 'ollama:main' into main 2024-05-24 15:19:08 +08:00
Jeffrey Morgan
afd2b058b4 set codesign timeout to longer (#4605) 2024-05-23 22:46:23 -07:00
Wang,Zhe
fd5971be0b support ollama run on Intel GPUs 2024-05-24 11:18:27 +08:00
Daniel Hiltgen
89bf98bcf2 Merge pull request #4598 from dhiltgen/docs
Tidy up developer guide a little
2024-05-23 15:14:29 -07:00
Daniel Hiltgen
1b2d156094 Tidy up developer guide a little 2024-05-23 15:14:05 -07:00
Michael Yang
714adb8bd1 bump (#4597) 2024-05-23 14:16:26 -07:00
Daniel Hiltgen
95b1133d0c Merge pull request #4547 from dhiltgen/load_progress
Wire up load progress
2024-05-23 14:06:02 -07:00
Daniel Hiltgen
b37b496a12 Wire up load progress
This doesn't expose a UX yet, but wires the initial server portion
of progress reporting during load
2024-05-23 13:36:48 -07:00
Bruce MacDonald
d6f692ad1a Add support for IQ1_S, IQ3_S, IQ2_S, IQ4_XS. IQ4_NL (#4322)
Co-authored-by: ManniX-ITA <20623405+mann1x@users.noreply.github.com>
2024-05-23 13:21:49 -07:00
Daniel Hiltgen
f77713bf1f Add isolated gpu test to troubleshooting 2024-05-23 09:33:25 -07:00
Jeffrey Morgan
38255d2af1 Use flash attention flag for now (#4580)
* put flash attention behind flag for now

* add test

* remove print

* up timeout for sheduler tests
2024-05-22 21:52:09 -07:00
Michael
73630a7e85 add phi 3 medium (#4578) 2024-05-22 12:53:45 -04:00
Ikko Eltociear Ashimine
955c317cab chore: update tokenizer.go (#4571)
PreTokenziers -> PreTokenizers
2024-05-22 00:25:23 -07:00
Josh
9f18b88a06 Merge pull request #4566 from ollama/jyan/shortcuts
add Ctrl + W shortcut
2024-05-21 22:49:36 -07:00
Josh Yan
353f83a9c7 add Ctrl + W shortcut 2024-05-21 16:55:09 -07:00
Patrick Devine
3bade04e10 doc updates for the faq/troubleshooting (#4565) 2024-05-21 15:30:09 -07:00
Michael Yang
a6d0f443eb Merge pull request #4543 from ollama/mxyng/simple-safetensors
simplify safetensors reading
2024-05-21 14:43:55 -07:00
Michael Yang
96236b7968 Merge pull request #4268 from ollama/pdevine/llama3
Convert directly from llama3
2024-05-21 14:43:37 -07:00
Sang Park
4434d7f447 Correct typo in error message (#4535)
The spelling of the term "request" has been corrected, which was previously mistakenly written as "requeset" in the error log message.
2024-05-21 13:39:01 -07:00
Michael Yang
171eb040fc simplify safetensors reading 2024-05-21 11:28:22 -07:00
Michael Yang
3591bbe56f add test 2024-05-21 11:28:22 -07:00
Michael Yang
34d5ef29b3 fix conversion for f16 or f32 inputs 2024-05-21 11:28:22 -07:00
Michael Yang
bbbd9f20f3 cleanup 2024-05-20 16:13:57 -07:00
Michael Yang
547132e820 bpe pretokenizer 2024-05-20 16:13:57 -07:00
Patrick Devine
2d315ba9a9 add missing file 2024-05-20 16:13:57 -07:00
Patrick Devine
d355d2020f add fixes for llama 2024-05-20 16:13:57 -07:00
Patrick Devine
c8cf0d94ed llama3 conversion 2024-05-20 16:13:57 -07:00
Patrick Devine
4730762e5c add safetensors version 2024-05-20 16:13:57 -07:00
Patrick Devine
d88582dffd some changes for llama3 2024-05-20 16:13:57 -07:00
Michael Yang
2f81b3dce2 Merge pull request #4502 from ollama/mxyng/fix-quantize
fix quantize file types
2024-05-20 16:09:27 -07:00
jmorganca
5cab13739e set llama.cpp submodule commit to 614d3b9 2024-05-20 15:28:17 -07:00
Josh Yan
8aadad9c72 updated updateURL 2024-05-20 15:24:32 -07:00
Michael Yang
807d092761 fix quantize file types 2024-05-20 15:22:11 -07:00
Michael Yang
f36f1d6be9 tidy intermediate blobs 2024-05-20 15:15:06 -07:00
alwqx
8800c8a59b chore: fix typo in docs (#4536) 2024-05-20 14:19:03 -07:00
Michael Yang
b4dce13309 Merge pull request #4330 from ollama/mxyng/cache-intermediate-layers
cache and reuse intermediate blobs
2024-05-20 13:54:41 -07:00
Sam
e15307fdf4 feat: add support for flash_attn (#4120)
* feat: enable flash attention if supported

* feat: enable flash attention if supported

* feat: enable flash attention if supported

* feat: add flash_attn support
2024-05-20 13:36:03 -07:00
Michael Yang
3520c0e4d5 cache and reuse intermediate blobs
particularly useful for zipfiles and f16s
2024-05-20 13:25:10 -07:00
Patrick Devine
ccdf0b2a44 Move the parser back + handle utf16 files (#4533) 2024-05-20 11:26:45 -07:00
jmorganca
63a453554d go mod tidy 2024-05-19 23:03:57 -07:00
Patrick Devine
105186aa17 add OLLAMA_NOHISTORY to turn off history in interactive mode (#4508) 2024-05-18 11:51:57 -07:00
likelovewant
fc2f25c1d5 Merge branch 'ollama:main' into main 2024-05-18 13:52:47 +08:00
Daniel Hiltgen
ba04afc9a4 Merge pull request #4483 from dhiltgen/clean_exit
Don't return error on signal exit
2024-05-17 11:41:57 -07:00
Daniel Hiltgen
7e1e0086e7 Merge pull request #4482 from dhiltgen/integration_improvements
Skip max queue test on remote
2024-05-16 16:43:48 -07:00
Daniel Hiltgen
02b31c9dc8 Don't return error on signal exit 2024-05-16 16:25:38 -07:00
Daniel Hiltgen
7f2fbad736 Skip max queue test on remote
This test needs to be able to adjust the queue size down from
our default setting for a reliable test, so it needs to skip on
remote test execution mode.
2024-05-16 16:24:18 -07:00
Josh
5bece94509 Merge pull request #4463 from ollama/jyan/line-display
changed line display to be calculated with runewidth
2024-05-16 14:15:08 -07:00
Josh Yan
3d90156e99 removed comment 2024-05-16 14:12:03 -07:00
Rose Heart
5e46c5c435 Updating software for read me (#4467)
* Update README.md

Added chat/moderation bot to list of software.

* Update README.md

Fixed link error.
2024-05-16 13:55:14 -07:00
Jeffrey Morgan
583c1f472c update llama.cpp submodule to 614d3b9 (#4414) 2024-05-16 13:53:09 -07:00
likelovewant
d497e31f4b Merge branch 'ollama:main' into main 2024-05-16 22:24:44 +08:00
Josh Yan
26bfc1c443 go fmt'd cmd.go 2024-05-15 17:26:39 -07:00
Josh Yan
799aa9883c go fmt'd cmd.go 2024-05-15 17:24:17 -07:00
Michael Yang
84ed77cbd8 Merge pull request #4436 from ollama/mxyng/done-part
return on part done
2024-05-15 17:16:24 -07:00
Josh Yan
c9e584fb90 updated double-width display 2024-05-15 16:45:24 -07:00
Josh Yan
17b1e81ca1 fixed width and word count for double spacing 2024-05-15 16:29:33 -07:00
Daniel Hiltgen
7e9a2da097 Merge pull request #4462 from dhiltgen/opt_out_build
Port cuda/rocm skip build vars to linux
2024-05-15 16:27:47 -07:00
Daniel Hiltgen
c48c1d7c46 Port cuda/rocm skip build vars to linux
Windows already implements these, carry over to linux.
2024-05-15 15:56:43 -07:00
Patrick Devine
d1692fd3e0 fix the cpu estimatedTotal memory + get the expiry time for loading models (#4461) 2024-05-15 15:43:16 -07:00
Daniel Hiltgen
5fa36a0833 Merge pull request #4459 from dhiltgen/sanitize_env_log
Sanitize the env var debug log
2024-05-15 14:58:55 -07:00
Daniel Hiltgen
853ae490e1 Sanitize the env var debug log
Only dump env vars we care about in the logs
2024-05-15 14:42:57 -07:00
Patrick Devine
f2cf97d6f1 fix typo in modelfile generation (#4439) 2024-05-14 15:34:29 -07:00
Patrick Devine
c344da4c5a fix keepalive for non-interactive mode (#4438) 2024-05-14 15:17:04 -07:00
Michael Yang
85a57006d1 check if name exists before create/pull/copy 2024-05-14 14:58:58 -07:00
Michael Yang
c5e892cb3e update tests 2024-05-14 14:56:31 -07:00
Michael Yang
81fb06f530 more resilient Manifests 2024-05-14 14:08:24 -07:00
Michael Yang
a385382ff5 filepath.Join 2024-05-14 14:08:24 -07:00
Michael Yang
b8772a353f remove DeleteModel 2024-05-14 14:08:24 -07:00
Michael Yang
c2714fcbfd routes: use Manifests for ListHandler 2024-05-14 14:08:24 -07:00
Michael Yang
a2fc933fed update delete handler to use model.Name 2024-05-14 14:08:24 -07:00
Michael Yang
0e331c7168 Merge pull request #4328 from ollama/mxyng/mem
count memory up to NumGPU if set by user
2024-05-14 13:47:44 -07:00
Michael Yang
ac145f75ca return on part done 2024-05-14 13:04:30 -07:00
Patrick Devine
a4b8d1f89a re-add system context (#4435) 2024-05-14 11:38:20 -07:00
Ryo Machida
798b107f19 Fixed the API endpoint /api/tags when the model list is empty. (#4424)
* Fixed the API endpoint /api/tags to return {models: []} instead of {models: null} when the model list is empty.

* Update server/routes.go

---------

Co-authored-by: Jeffrey Morgan <jmorganca@gmail.com>
2024-05-14 11:18:10 -07:00
Daniel Hiltgen
6a1b471365 Merge pull request #4430 from dhiltgen/gpu_info
Remove VRAM convergence check for windows
2024-05-14 10:59:06 -07:00
Daniel Hiltgen
ec231a7923 Remove VRAM convergence check for windows
The APIs we query are optimistic on free space, and windows pages
VRAM, so we don't have to wait to see reported usage recover on unload
2024-05-14 09:53:46 -07:00
Patrick Devine
7ca71a6b0f don't abort when an invalid model name is used in /save (#4416) 2024-05-13 18:48:28 -07:00
Josh
7607e6e902 Merge pull request #4379 from WolfTheDeveloper/main
Update `LlamaScript` to point to new link from Legacy link.
2024-05-13 18:08:32 -07:00
Patrick Devine
f1548ef62d update the FAQ to be more clear about windows env variables (#4415) 2024-05-13 18:01:13 -07:00
Patrick Devine
6845988807 Ollama ps command for showing currently loaded models (#4327) 2024-05-13 17:17:36 -07:00
Josh
9eed4a90ce Merge pull request #4411 from joshyan1/main
removed inconsistent punctuation
2024-05-13 15:30:45 -07:00
Josh Yan
f8464785a6 removed inconsistencies 2024-05-13 14:50:52 -07:00
Michael Yang
1d359e737e typo 2024-05-13 14:18:34 -07:00
Michael Yang
50b9056e09 count memory up to NumGPU 2024-05-13 14:13:10 -07:00
Josh Yan
91a090a485 removed inconsistent punctuation 2024-05-13 14:08:22 -07:00
睡觉型学渣
9c76b30d72 Correct typos. (#4387)
* Correct typos.

* Correct typos.
2024-05-12 18:21:11 -07:00
Zander Lewis
93f19910c5 Update LlamaScript to point to new link.
Still used Legacy link.
2024-05-12 11:24:21 -04:00
likelovewant
9a36dc537d Update README.md
edit the download links and guide for use pre-relased version.
2024-05-12 22:27:43 +08:00
likelovewant
9b3b3f6a14 Merge branch 'ollama:main' into main 2024-05-12 21:57:11 +08:00
jmorganca
4ec7445a6f Revert "use post token"
This reverts commit 0fec3525ad.
2024-05-11 22:19:14 -07:00
Michael Yang
0372c51f82 Merge pull request #4369 from ollama/mxyng/post-token
use post token
2024-05-11 19:29:14 -07:00
Michael Yang
0fec3525ad use post token 2024-05-11 19:13:16 -07:00
Jeffrey Morgan
41ba3017fd Fix OpenAI finish_reason values when empty (#4368) 2024-05-11 15:31:41 -07:00
todashuta
8080fbce35 fix ollama create's usage string (#4362) 2024-05-11 14:47:49 -07:00
Michael Yang
ec14f6ceda case sensitive filepaths (#4366) 2024-05-11 14:12:36 -07:00
Daniel Hiltgen
c60a086635 Merge pull request #4331 from dhiltgen/fix_unit
Fix envconfig unit test
2024-05-11 09:16:28 -07:00
likelovewant
dfbeca78af Merge branch 'ollama:main' into main 2024-05-11 15:07:33 +08:00
jmorganca
92ca2cca95 Revert "only forward some env vars"
This reverts commit ce3b212d12.
2024-05-10 22:53:21 -07:00
Patrick Devine
1e1634daca update go deps (#4324) 2024-05-10 21:39:27 -07:00
likelovewant
33d0209023 Merge branch 'ollama:main' into main 2024-05-11 12:00:17 +08:00
Daniel Hiltgen
824ee5446f Fix envconfig unit test 2024-05-10 16:49:48 -07:00
Daniel Hiltgen
879e2caf8c Merge pull request #4329 from dhiltgen/zero_layers
Fall back to CPU runner with zero layers
2024-05-10 15:23:16 -07:00
Daniel Hiltgen
c4014e73a2 Fall back to CPU runner with zero layers 2024-05-10 15:09:48 -07:00
Daniel Hiltgen
be9efdb981 Merge pull request #4326 from dhiltgen/fix_integration
Integration fixes
2024-05-10 14:25:59 -07:00
Daniel Hiltgen
074dc3b9d8 Integration fixes 2024-05-10 14:20:10 -07:00
Daniel Hiltgen
86f9b582d5 Merge pull request #4323 from dhiltgen/sort_by_free
Always use the sorted list of GPUs
2024-05-10 14:12:15 -07:00
Daniel Hiltgen
4142c3ef7c Always use the sorted list of GPUs
Make sure the first GPU has the most free space
2024-05-10 13:53:21 -07:00
Jeffrey Morgan
6602e793c0 Use --quantize flag and quantize api parameter (#4321)
* rename `--quantization` to `--quantize`

* backwards

* Update api/types.go

Co-authored-by: Michael Yang <mxyng@pm.me>

---------

Co-authored-by: Michael Yang <mxyng@pm.me>
2024-05-10 13:06:13 -07:00
Michael Yang
ea0fdaed28 Merge pull request #4320 from ollama/mxyng/phi2-mem
add phi2 mem
2024-05-10 12:35:08 -07:00
Michael Yang
1eb382da5a add phi2 mem 2024-05-10 12:13:28 -07:00
Jeffrey Morgan
bb6fd02298 Don't clamp ctx size in PredictServerFit (#4317)
* dont clamp ctx size in `PredictServerFit`

* minimum 4 context

* remove context warning
2024-05-10 10:17:12 -07:00
Daniel Hiltgen
7e2bceceee Merge pull request #4316 from dhiltgen/more_buffer
Bump VRAM buffer back up
2024-05-10 10:02:34 -07:00
Daniel Hiltgen
30a7d7096c Bump VRAM buffer back up
Under stress scenarios we're seeing OOMs so this should help stabilize
the allocations under heavy concurrency stress.
2024-05-10 09:15:28 -07:00
Michael Yang
200a18820e Merge pull request #4306 from ollama/mxyng/fix-routes 2024-05-10 08:58:16 -07:00
Michael Yang
e03637176d fix(routes): skip bad manifests 2024-05-10 08:46:11 -07:00
Bruce MacDonald
c02db93243 omit empty done reason 2024-05-09 16:45:29 -07:00
Michael Yang
ffa4d5134a Merge pull request #4305 from ollama/mxyng/typo
fix typo
2024-05-09 16:42:09 -07:00
Jeffrey Morgan
302d7fdbf3 prune partial downloads (#4272) 2024-05-09 16:35:20 -07:00
Michael Yang
cf442cd57e fix typo 2024-05-09 16:23:37 -07:00
Michael Yang
0e1ba65855 Merge pull request #4302 from ollama/mxyng/forward-env
only forward some env vars
2024-05-09 16:21:05 -07:00
Michael Yang
6aad333c63 Merge pull request #4298 from ollama/mxyng/log-cleanup
log clean up
2024-05-09 16:20:57 -07:00
Daniel Hiltgen
4fcc84e67a Merge pull request #4304 from dhiltgen/signals
Fix race in shutdown logic
2024-05-09 15:58:44 -07:00
Daniel Hiltgen
3ae2f441e0 Fix race in shutdown logic
Ensure the runners are terminated
2024-05-09 15:54:02 -07:00
Zander Lewis
2abb3f6424 Update README.md (#4300) 2024-05-09 15:30:49 -07:00
Michael Yang
ce3b212d12 only forward some env vars 2024-05-09 15:16:09 -07:00
Daniel Hiltgen
83d6d46e29 Merge pull request #4299 from dhiltgen/handle_vram_reporting_lag
Wait for GPU free memory reporting to converge
2024-05-09 15:08:56 -07:00
Daniel Hiltgen
354ad9254e Wait for GPU free memory reporting to converge
The GPU drivers take a while to update their free memory reporting, so we need
to wait until the values converge with what we're expecting before proceeding
to start another runner in order to get an accurate picture.
2024-05-09 14:56:01 -07:00
Michael Yang
58876091f7 log clean up 2024-05-09 14:55:36 -07:00
Daniel Hiltgen
dc18eee39d Merge pull request #4238 from dhiltgen/gpu_info
Record more GPU information
2024-05-09 14:26:58 -07:00
Daniel Hiltgen
8727a9c140 Record more GPU information
This cleans up the logging for GPU discovery a bit, and can
serve as a foundation to report GPU information in a future UX.
2024-05-09 14:18:14 -07:00
Daniel Hiltgen
d0425f26cf Merge pull request #4294 from dhiltgen/harden_subprocess_reaping
Harden subprocess reaping
2024-05-09 14:02:16 -07:00
Bruce MacDonald
cfa84b8470 add done_reason to the api (#4235) 2024-05-09 13:30:14 -07:00
Michael Yang
1580ed4c06 Merge pull request #4295 from ollama/mxyng/fix-list
routes: skip invalid filepaths
2024-05-09 11:37:34 -07:00
Michael Yang
a7ee84fc31 routes: skip invalid filepaths 2024-05-09 11:23:22 -07:00
Daniel Hiltgen
84ac7ce139 Refine subprocess reaping 2024-05-09 11:21:31 -07:00
tusharhero
788b092c49 docs: add Guix package manager in README. (#4040) 2024-05-09 11:10:24 -07:00
J S
5cde17a096 Add PromptingTools.jl (#2192) 2024-05-09 09:39:05 -07:00
Daniel Hiltgen
c3837eb08c Merge pull request #4289 from dhiltgen/doc_container_workarounds
Doc container usage and workaround for nvidia errors
2024-05-09 09:27:29 -07:00
Daniel Hiltgen
8cc0ee2efe Doc container usage and workaround for nvidia errors 2024-05-09 09:26:45 -07:00
Jeffrey Morgan
d5eec16d23 use model defaults for num_gqa, rope_frequency_base and rope_frequency_scale (#1983) 2024-05-09 09:06:13 -07:00
likelovewant
a3906a6173 update links 2024-05-09 14:00:53 +08:00
Carlos Gamez
daa1a032f7 Update langchainjs.md (#2027)
Updated sample code as per warning notification from the package maintainers
2024-05-08 20:21:03 -07:00
jmorganca
6042e8bc57 remove bash-comparemodels example 2024-05-08 19:49:45 -07:00
Daniel Hiltgen
920a4b0794 Merge remote-tracking branch 'upstream/main' into pr3702 2024-05-08 16:44:35 -07:00
ManniX-ITA
c496967e56 Merge branch 'ollama:main' into mannix-server 2024-04-18 18:45:15 +02:00
ManniX-ITA
c942e4a07b Fixed startup sequence to report model loading 2024-04-17 17:40:32 +02:00
ManniX-ITA
bd54b08261 Streamlined WaitUntilRunning 2024-04-17 17:39:52 +02:00
145 changed files with 5171 additions and 2101 deletions

View File

@@ -28,6 +28,7 @@ jobs:
security unlock-keychain -p password build.keychain
security import certificate.p12 -k build.keychain -P $MACOS_SIGNING_KEY_PASSWORD -T /usr/bin/codesign
security set-key-partition-list -S apple-tool:,apple:,codesign: -s -k password build.keychain
security set-keychain-settings -lut 3600 build.keychain
- uses: actions/setup-go@v5
with:
go-version-file: go.mod

View File

@@ -34,13 +34,13 @@ jobs:
git diff-tree -r --no-commit-id --name-only \
$(git merge-base ${{ github.event.pull_request.base.sha }} ${{ github.event.pull_request.head.sha }}) \
${{ github.event.pull_request.head.sha }} \
| xargs python3 -c "import sys; print(any([x.startswith('$1') for x in sys.argv[1:]]))"
| xargs python3 -c "import sys; from pathlib import Path; print(any(Path(x).match(glob) for x in sys.argv[1:] for glob in '$*'.split(' ')))"
}
{
echo GENERATE=$(changed llm/)
echo GENERATE_CUDA=$(changed llm/)
echo GENERATE_ROCM=$(changed llm/)
echo GENERATE=$(changed 'llm/llama.cpp' 'llm/patches/**' 'llm/ext_server/**' 'llm/generate/**')
echo GENERATE_CUDA=$(changed 'llm/llama.cpp' 'llm/patches/**' 'llm/ext_server/**' 'llm/generate/**')
echo GENERATE_ROCM=$(changed 'llm/llama.cpp' 'llm/patches/**' 'llm/ext_server/**' 'llm/generate/**')
} >>$GITHUB_OUTPUT
generate:
@@ -269,9 +269,9 @@ jobs:
mkdir -p llm/build/darwin/$ARCH/stub/bin
touch llm/build/darwin/$ARCH/stub/bin/ollama_llama_server
if: ${{ startsWith(matrix.os, 'macos-') }}
- uses: golangci/golangci-lint-action@v4
- uses: golangci/golangci-lint-action@v6
with:
args: --timeout 8m0s -v
args: --timeout 8m0s -v ${{ startsWith(matrix.os, 'windows-') && '' || '--disable gofmt --disable goimports' }}
test:
strategy:
matrix:
@@ -287,6 +287,8 @@ jobs:
GOARCH: ${{ matrix.arch }}
CGO_ENABLED: '1'
OLLAMA_CPU_TARGET: 'static'
OLLAMA_SKIP_CPU_GENERATE: '1'
OLLAMA_SKIP_METAL_GENERATE: '1'
steps:
- uses: actions/checkout@v4
with:

View File

@@ -9,9 +9,26 @@ linters:
- contextcheck
- exportloopref
- gocheckcompilerdirectives
# FIXME: for some reason this errors on windows
# conditionally enable this on linux/macos
# - gofmt
# - goimports
- intrange
- misspell
- nilerr
- nolintlint
- nosprintfhostport
- testifylint
- unconvert
- unused
- wastedassign
- whitespace
- usestdlibvars
severity:
default-severity: error
rules:
- linters:
- gofmt
- goimports
- intrange
- usestdlibvars
severity: info

View File

@@ -6,7 +6,7 @@
[![Discord](https://dcbadge.vercel.app/api/server/ollama?style=flat&compact=true)](https://discord.gg/ollama)
Get up and running with large language models locally.
Get up and running with large language models.
### macOS
@@ -14,7 +14,25 @@ Get up and running with large language models locally.
### Windows preview
[Download](https://ollama.com/download/OllamaSetup.exe)
[Download](https://github.com/likelovewant/ollama-for-amd/releases)
For AMD use or build , please follow the guide on [wiki](https://github.com/likelovewant/ollama-for-amd/wiki)
official support list
```
"gfx900" "gfx906:xnack-" "gfx908:xnack-" "gfx90a:xnack+" "gfx90a:xnack-" "gfx940" "gfx941" "gfx942" "gfx1010""gfx1012" "gfx1030" "gfx1100""gfx1101" "gfx1102"
```
Please download from ollama [official](https://ollama.com/download/OllamaSetup.exe)
Example extra list add on this repo.
```
"gfx803" "gfx902" "gfx904""gfx940" "gfx941" "gfx942" "gfx1010" "gfx1011" "gfx1012" "gfx1031" "gfx1032""gfx1034" "gfx1035" "gfx1036" "gfx1103"
```
Please follow the [wiki](https://github.com/likelovewant/ollama-for-amd/wiki) guide to build or use the pre-release version.
Note: `gfx803` reported partialy working by the wiki method ,expected a future support
### Linux
@@ -51,15 +69,17 @@ Here are some example models that can be downloaded:
| ------------------ | ---------- | ----- | ------------------------------ |
| Llama 3 | 8B | 4.7GB | `ollama run llama3` |
| Llama 3 | 70B | 40GB | `ollama run llama3:70b` |
| Phi-3 | 3.8B | 2.3GB | `ollama run phi3` |
| Phi 3 Mini | 3.8B | 2.3GB | `ollama run phi3` |
| Phi 3 Medium | 14B | 7.9GB | `ollama run phi3:medium` |
| Gemma | 2B | 1.4GB | `ollama run gemma:2b` |
| Gemma | 7B | 4.8GB | `ollama run gemma:7b` |
| Mistral | 7B | 4.1GB | `ollama run mistral` |
| Moondream 2 | 1.4B | 829MB | `ollama run moondream` |
| Neural Chat | 7B | 4.1GB | `ollama run neural-chat` |
| Starling | 7B | 4.1GB | `ollama run starling-lm` |
| Code Llama | 7B | 3.8GB | `ollama run codellama` |
| Llama 2 Uncensored | 7B | 3.8GB | `ollama run llama2-uncensored` |
| LLaVA | 7B | 4.5GB | `ollama run llava` |
| Gemma | 2B | 1.4GB | `ollama run gemma:2b` |
| Gemma | 7B | 4.8GB | `ollama run gemma:7b` |
| Solar | 10.7B | 6.1GB | `ollama run solar` |
> Note: You should have at least 8 GB of RAM available to run the 7B models, 16 GB to run the 13B models, and 32 GB to run the 33B models.
@@ -192,25 +212,7 @@ ollama list
## Building
Install `cmake` and `go`:
```
brew install cmake go
```
Then generate dependencies:
```
go generate ./...
```
Then build the binary:
```
go build .
```
More detailed instructions can be found in the [developer guide](https://github.com/ollama/ollama/blob/main/docs/development.md)
See the [developer guide](https://github.com/ollama/ollama/blob/main/docs/development.md)
### Running local builds
@@ -299,6 +301,9 @@ See the [API documentation](./docs/api.md) for all endpoints.
- [Ollama RAG Chatbot](https://github.com/datvodinh/rag-chatbot.git) (Local Chat with multiple PDFs using Ollama and RAG)
- [BrainSoup](https://www.nurgo-software.com/products/brainsoup) (Flexible native client with RAG & multi-agent automation)
- [macai](https://github.com/Renset/macai) (macOS client for Ollama, ChatGPT, and other compatible API back-ends)
- [Olpaka](https://github.com/Otacon/olpaka) (User-friendly Flutter Web App for Ollama)
- [OllamaSpring](https://github.com/CrazyNeil/OllamaSpring) (Ollama Client for macOS)
- [LLocal.in](https://github.com/kartikm7/llocal) (Easy to use Electron Desktop Client for Ollama)
### Terminal
@@ -321,6 +326,7 @@ See the [API documentation](./docs/api.md) for all endpoints.
- [ShellOracle](https://github.com/djcopley/ShellOracle)
- [tlm](https://github.com/yusufcanb/tlm)
- [podman-ollama](https://github.com/ericcurtin/podman-ollama)
- [gollama](https://github.com/sammcj/gollama)
### Database
@@ -331,12 +337,14 @@ See the [API documentation](./docs/api.md) for all endpoints.
- [Pacman](https://archlinux.org/packages/extra/x86_64/ollama/)
- [Helm Chart](https://artifacthub.io/packages/helm/ollama-helm/ollama)
- [Guix channel](https://codeberg.org/tusharhero/ollama-guix)
### Libraries
- [LangChain](https://python.langchain.com/docs/integrations/llms/ollama) and [LangChain.js](https://js.langchain.com/docs/modules/model_io/models/llms/integrations/ollama) with [example](https://js.langchain.com/docs/use_cases/question_answering/local_retrieval_qa)
- [LangChainGo](https://github.com/tmc/langchaingo/) with [example](https://github.com/tmc/langchaingo/tree/main/examples/ollama-completion-example)
- [LangChain4j](https://github.com/langchain4j/langchain4j) with [example](https://github.com/langchain4j/langchain4j-examples/tree/main/ollama-examples/src/main/java)
- [LangChainRust](https://github.com/Abraxas-365/langchain-rust) with [example](https://github.com/Abraxas-365/langchain-rust/blob/main/examples/llm_ollama.rs)
- [LlamaIndex](https://gpt-index.readthedocs.io/en/stable/examples/llm/ollama.html)
- [LiteLLM](https://github.com/BerriAI/litellm)
- [OllamaSharp for .NET](https://github.com/awaescher/OllamaSharp)
@@ -357,6 +365,8 @@ See the [API documentation](./docs/api.md) for all endpoints.
- [Ollama Connector for SAP ABAP](https://github.com/b-tocs/abap_btocs_ollama)
- [Testcontainers](https://testcontainers.com/modules/ollama/)
- [Portkey](https://portkey.ai/docs/welcome/integration-guides/ollama)
- [PromptingTools.jl](https://github.com/svilupp/PromptingTools.jl) with an [example](https://svilupp.github.io/PromptingTools.jl/dev/examples/working_with_ollama)
- [LlamaScript](https://github.com/Project-Llama/llamascript)
### Mobile
@@ -389,7 +399,10 @@ See the [API documentation](./docs/api.md) for all endpoints.
- [AI Telegram Bot](https://github.com/tusharhero/aitelegrambot) (Telegram bot using Ollama in backend)
- [AI ST Completion](https://github.com/yaroslavyaroslav/OpenAI-sublime-text) (Sublime Text 4 AI assistant plugin with Ollama support)
- [Discord-Ollama Chat Bot](https://github.com/kevinthedang/discord-ollama) (Generalized TypeScript Discord Bot w/ Tuning Documentation)
- [Discord AI chat/moderation bot](https://github.com/rapmd73/Companion) Chat/moderation bot written in python. Uses Ollama to create personalities.
- [Headless Ollama](https://github.com/nischalj10/headless-ollama) (Scripts to automatically install ollama client & models on any OS for apps that depends on ollama server)
### Supported backends
- [llama.cpp](https://github.com/ggerganov/llama.cpp) project founded by Georgi Gerganov.

View File

@@ -354,6 +354,15 @@ func (c *Client) List(ctx context.Context) (*ListResponse, error) {
return &lr, nil
}
// List running models.
func (c *Client) ListRunning(ctx context.Context) (*ProcessResponse, error) {
var lr ProcessResponse
if err := c.do(ctx, http.MethodGet, "/api/ps", nil, &lr); err != nil {
return nil, err
}
return &lr, nil
}
// Copy copies a model - creating a model with another name from an existing
// model.
func (c *Client) Copy(ctx context.Context, req *CopyRequest) error {

View File

@@ -4,6 +4,7 @@ import (
"encoding/json"
"errors"
"fmt"
"log/slog"
"math"
"os"
"reflect"
@@ -116,6 +117,7 @@ type ChatResponse struct {
Model string `json:"model"`
CreatedAt time.Time `json:"created_at"`
Message Message `json:"message"`
DoneReason string `json:"done_reason,omitempty"`
Done bool `json:"done"`
@@ -161,7 +163,6 @@ type Runner struct {
UseNUMA bool `json:"numa,omitempty"`
NumCtx int `json:"num_ctx,omitempty"`
NumBatch int `json:"num_batch,omitempty"`
NumGQA int `json:"num_gqa,omitempty"`
NumGPU int `json:"num_gpu,omitempty"`
MainGPU int `json:"main_gpu,omitempty"`
LowVRAM bool `json:"low_vram,omitempty"`
@@ -171,11 +172,6 @@ type Runner struct {
UseMMap bool `json:"use_mmap,omitempty"`
UseMLock bool `json:"use_mlock,omitempty"`
NumThread int `json:"num_thread,omitempty"`
// Unused: RopeFrequencyBase is ignored. Instead the value in the model will be used
RopeFrequencyBase float32 `json:"rope_frequency_base,omitempty"`
// Unused: RopeFrequencyScale is ignored. Instead the value in the model will be used
RopeFrequencyScale float32 `json:"rope_frequency_scale,omitempty"`
}
// EmbeddingRequest is the request passed to [Client.Embeddings].
@@ -205,10 +201,13 @@ type CreateRequest struct {
Path string `json:"path"`
Modelfile string `json:"modelfile"`
Stream *bool `json:"stream,omitempty"`
Quantization string `json:"quantization,omitempty"`
Quantize string `json:"quantize,omitempty"`
// Name is deprecated, see Model
Name string `json:"name"`
// Quantization is deprecated, see Quantize
Quantization string `json:"quantization,omitempty"`
}
// DeleteRequest is the request passed to [Client.Delete].
@@ -283,11 +282,16 @@ type PushRequest struct {
// ListResponse is the response from [Client.List].
type ListResponse struct {
Models []ModelResponse `json:"models"`
Models []ListModelResponse `json:"models"`
}
// ModelResponse is a single model description in [ListResponse].
type ModelResponse struct {
// ProcessResponse is the response from [Client.Process].
type ProcessResponse struct {
Models []ProcessModelResponse `json:"models"`
}
// ListModelResponse is a single model description in [ListResponse].
type ListModelResponse struct {
Name string `json:"name"`
Model string `json:"model"`
ModifiedAt time.Time `json:"modified_at"`
@@ -296,6 +300,17 @@ type ModelResponse struct {
Details ModelDetails `json:"details,omitempty"`
}
// ProcessModelResponse is a single model description in [ProcessResponse].
type ProcessModelResponse struct {
Name string `json:"name"`
Model string `json:"model"`
Size int64 `json:"size"`
Digest string `json:"digest"`
Details ModelDetails `json:"details,omitempty"`
ExpiresAt time.Time `json:"expires_at"`
SizeVRAM int64 `json:"size_vram"`
}
type TokenResponse struct {
Token string `json:"token"`
}
@@ -305,7 +320,7 @@ type GenerateResponse struct {
// Model is the model name that generated the response.
Model string `json:"model"`
//CreatedAt is the timestamp of the response.
// CreatedAt is the timestamp of the response.
CreatedAt time.Time `json:"created_at"`
// Response is the textual response itself.
@@ -314,6 +329,9 @@ type GenerateResponse struct {
// Done specifies if the response is complete.
Done bool `json:"done"`
// DoneReason is the reason the model stopped generating text.
DoneReason string `json:"done_reason,omitempty"`
// Context is an encoding of the conversation used in this response; this
// can be sent in the next request to keep a conversational memory.
Context []int `json:"context,omitempty"`
@@ -359,8 +377,6 @@ func (m *Metrics) Summary() {
}
}
// ErrInvalidOpts is returned when invalid options are passed to the client.
var ErrInvalidOpts = errors.New("invalid options")
var ErrInvalidHostPort = errors.New("invalid port specified in OLLAMA_HOST")
func (opts *Options) FromMap(m map[string]interface{}) error {
@@ -376,9 +392,13 @@ func (opts *Options) FromMap(m map[string]interface{}) error {
}
}
invalidOpts := []string{}
for key, val := range m {
if opt, ok := jsonOpts[key]; ok {
opt, ok := jsonOpts[key]
if !ok {
slog.Warn("invalid option provided", "option", opt.Name)
continue
}
field := valueOpts.FieldByName(opt.Name)
if field.IsValid() && field.CanSet() {
if val == nil {
@@ -435,14 +455,8 @@ func (opts *Options) FromMap(m map[string]interface{}) error {
return fmt.Errorf("unknown type loading config params: %v", field.Kind())
}
}
} else {
invalidOpts = append(invalidOpts, key)
}
}
if len(invalidOpts) > 0 {
return fmt.Errorf("%w: %v", ErrInvalidOpts, strings.Join(invalidOpts, ", "))
}
return nil
}
@@ -475,7 +489,6 @@ func DefaultOptions() Options {
NumCtx: 2048,
NumBatch: 512,
NumGPU: -1, // -1 here indicates that NumGPU should be set dynamically
NumGQA: 1,
NumThread: 0, // let the runtime decide
LowVRAM: false,
F16KV: true,

View File

@@ -72,13 +72,13 @@ func TestDurationMarshalUnmarshal(t *testing.T) {
},
{
"positive duration",
time.Duration(42 * time.Second),
time.Duration(42 * time.Second),
42 * time.Second,
42 * time.Second,
},
{
"another positive duration",
time.Duration(42 * time.Minute),
time.Duration(42 * time.Minute),
42 * time.Minute,
42 * time.Minute,
},
{
"zero duration",

View File

@@ -6,7 +6,7 @@ import (
"os"
"path/filepath"
"github.com/ollama/ollama/server/envconfig"
"github.com/ollama/ollama/envconfig"
)
func InitLogging() {

View File

@@ -69,7 +69,6 @@ func init() {
slog.Error(fmt.Sprintf("create ollama dir %s: %v", AppDataDir, err))
}
}
} else if runtime.GOOS == "darwin" {
// TODO
AppName += ".app"

View File

@@ -15,7 +15,7 @@ import (
)
func getCLIFullPath(command string) string {
cmdPath := ""
var cmdPath string
appExe, err := os.Executable()
if err == nil {
cmdPath = filepath.Join(filepath.Dir(appExe), command)
@@ -65,7 +65,6 @@ func start(ctx context.Context, command string) (*exec.Cmd, error) {
if err != nil {
if !errors.Is(err, os.ErrNotExist) {
return nil, fmt.Errorf("stat ollama server log dir %s: %v", logDir, err)
}
if err := os.MkdirAll(logDir, 0o755); err != nil {

View File

@@ -24,7 +24,8 @@ func terminate(cmd *exec.Cmd) error {
if err != nil {
return err
}
defer dll.Release() // nolint: errcheck
//nolint:errcheck
defer dll.Release()
pid := cmd.Process.Pid
@@ -73,7 +74,8 @@ func isProcessExited(pid int) (bool, error) {
if err != nil {
return false, fmt.Errorf("failed to open process: %v", err)
}
defer windows.CloseHandle(hProcess) // nolint: errcheck
//nolint:errcheck
defer windows.CloseHandle(hProcess)
var exitCode uint32
err = windows.GetExitCodeProcess(hProcess, &exitCode)

View File

@@ -78,7 +78,7 @@ func IsNewReleaseAvailable(ctx context.Context) (bool, UpdateResponse) {
}
defer resp.Body.Close()
if resp.StatusCode == 204 {
if resp.StatusCode == http.StatusNoContent {
slog.Debug("check update response 204 (current version is up to date)")
return false, updateResp
}
@@ -87,7 +87,7 @@ func IsNewReleaseAvailable(ctx context.Context) (bool, UpdateResponse) {
slog.Warn(fmt.Sprintf("failed to read body response: %s", err))
}
if resp.StatusCode != 200 {
if resp.StatusCode != http.StatusOK {
slog.Info(fmt.Sprintf("check update error %d - %.96s", resp.StatusCode, string(body)))
return false, updateResp
}
@@ -114,7 +114,7 @@ func DownloadNewRelease(ctx context.Context, updateResp UpdateResponse) error {
if err != nil {
return fmt.Errorf("error checking update: %w", err)
}
if resp.StatusCode != 200 {
if resp.StatusCode != http.StatusOK {
return fmt.Errorf("unexpected status attempting to download update %d", resp.StatusCode)
}
resp.Body.Close()

View File

@@ -4,5 +4,5 @@ write-host "Welcome to Ollama!"
write-host ""
write-host "Run your first model:"
write-host ""
write-host "`tollama run llama2"
write-host "`tollama run llama3"
write-host ""

View File

@@ -29,7 +29,6 @@ func GetID() string {
initStore()
}
return store.ID
}
func GetFirstTimeRun() bool {

View File

@@ -47,7 +47,6 @@ func nativeLoop() {
default:
pTranslateMessage.Call(uintptr(unsafe.Pointer(m))) //nolint:errcheck
pDispatchMessage.Call(uintptr(unsafe.Pointer(m))) //nolint:errcheck
}
}
}
@@ -160,8 +159,8 @@ func (t *winTray) wndProc(hWnd windows.Handle, message uint32, wParam, lParam ui
lResult, _, _ = pDefWindowProc.Call(
uintptr(hWnd),
uintptr(message),
uintptr(wParam),
uintptr(lParam),
wParam,
lParam,
)
}
return

View File

@@ -186,7 +186,7 @@ func (t *winTray) initInstance() error {
t.muNID.Lock()
defer t.muNID.Unlock()
t.nid = &notifyIconData{
Wnd: windows.Handle(t.window),
Wnd: t.window,
ID: 100,
Flags: NIF_MESSAGE,
CallbackMessage: t.wmSystrayMessage,
@@ -197,7 +197,6 @@ func (t *winTray) initInstance() error {
}
func (t *winTray) createMenu() error {
menuHandle, _, err := pCreatePopupMenu.Call()
if menuHandle == 0 {
return err
@@ -246,7 +245,7 @@ func (t *winTray) addOrUpdateMenuItem(menuItemId uint32, parentId uint32, title
mi := menuItemInfo{
Mask: MIIM_FTYPE | MIIM_STRING | MIIM_ID | MIIM_STATE,
Type: MFT_STRING,
ID: uint32(menuItemId),
ID: menuItemId,
TypeData: titlePtr,
Cch: uint32(len(title)),
}
@@ -302,11 +301,10 @@ func (t *winTray) addOrUpdateMenuItem(menuItemId uint32, parentId uint32, title
}
func (t *winTray) addSeparatorMenuItem(menuItemId, parentId uint32) error {
mi := menuItemInfo{
Mask: MIIM_FTYPE | MIIM_ID | MIIM_STATE,
Type: MFT_SEPARATOR,
ID: uint32(menuItemId),
ID: menuItemId,
}
mi.Size = uint32(unsafe.Sizeof(mi))
@@ -426,7 +424,6 @@ func iconBytesToFilePath(iconBytes []byte) (string, error) {
// Loads an image from file and shows it in tray.
// Shell_NotifyIcon: https://msdn.microsoft.com/en-us/library/windows/desktop/bb762159(v=vs.85).aspx
func (t *winTray) setIcon(src string) error {
h, err := t.loadIconFrom(src)
if err != nil {
return err
@@ -444,7 +441,6 @@ func (t *winTray) setIcon(src string) error {
// Loads an image from file to be shown in tray or menu item.
// LoadImage: https://msdn.microsoft.com/en-us/library/windows/desktop/ms648045(v=vs.85).aspx
func (t *winTray) loadIconFrom(src string) (windows.Handle, error) {
// Save and reuse handles of loaded images
t.muLoadedImages.RLock()
h, ok := t.loadedImages[src]

View File

@@ -12,6 +12,7 @@ import (
"fmt"
"io"
"log"
"math"
"net"
"net/http"
"os"
@@ -19,21 +20,23 @@ import (
"path/filepath"
"regexp"
"runtime"
"slices"
"strings"
"syscall"
"time"
"github.com/containerd/console"
"github.com/mattn/go-runewidth"
"github.com/olekukonko/tablewriter"
"github.com/spf13/cobra"
"golang.org/x/crypto/ssh"
"golang.org/x/exp/slices"
"golang.org/x/term"
"github.com/ollama/ollama/api"
"github.com/ollama/ollama/auth"
"github.com/ollama/ollama/envconfig"
"github.com/ollama/ollama/format"
"github.com/ollama/ollama/parser"
"github.com/ollama/ollama/progress"
"github.com/ollama/ollama/server"
"github.com/ollama/ollama/types/errtypes"
@@ -62,7 +65,7 @@ func CreateHandler(cmd *cobra.Command, args []string) error {
}
defer f.Close()
modelfile, err := model.ParseFile(f)
modelfile, err := parser.ParseFile(f)
if err != nil {
return err
}
@@ -142,9 +145,9 @@ func CreateHandler(cmd *cobra.Command, args []string) error {
return nil
}
quantization, _ := cmd.Flags().GetString("quantization")
quantize, _ := cmd.Flags().GetString("quantize")
request := api.CreateRequest{Name: args[0], Modelfile: modelfile.String(), Quantization: quantization}
request := api.CreateRequest{Name: args[0], Modelfile: modelfile.String(), Quantize: quantize}
if err := client.Create(cmd.Context(), &request, fn); err != nil {
return err
}
@@ -206,7 +209,7 @@ func tempZipFiles(path string) (string, error) {
// pytorch files might also be unresolved git lfs references; skip if they are
// covers pytorch_model-x-of-y.bin, pytorch_model.fp32-x-of-y.bin, pytorch_model.bin
files = append(files, pt...)
} else if pt, _ := glob(filepath.Join(path, "consolidated*.pth"), "application/octet-stream"); len(pt) > 0 {
} else if pt, _ := glob(filepath.Join(path, "consolidated*.pth"), "application/zip"); len(pt) > 0 {
// pytorch files might also be unresolved git lfs references; skip if they are
// covers consolidated.x.pth, consolidated.pth
files = append(files, pt...)
@@ -324,6 +327,18 @@ func RunHandler(cmd *cobra.Command, args []string) error {
}
opts.Format = format
keepAlive, err := cmd.Flags().GetString("keepalive")
if err != nil {
return err
}
if keepAlive != "" {
d, err := time.ParseDuration(keepAlive)
if err != nil {
return err
}
opts.KeepAlive = &api.Duration{Duration: d}
}
prompts := args[1:]
// prepend stdin to the prompt if provided
if !term.IsTerminal(int(os.Stdin.Fd())) {
@@ -496,6 +511,52 @@ func ListHandler(cmd *cobra.Command, args []string) error {
return nil
}
func ListRunningHandler(cmd *cobra.Command, args []string) error {
client, err := api.ClientFromEnvironment()
if err != nil {
return err
}
models, err := client.ListRunning(cmd.Context())
if err != nil {
return err
}
var data [][]string
for _, m := range models.Models {
if len(args) == 0 || strings.HasPrefix(m.Name, args[0]) {
var procStr string
switch {
case m.SizeVRAM == 0:
procStr = "100% CPU"
case m.SizeVRAM == m.Size:
procStr = "100% GPU"
case m.SizeVRAM > m.Size || m.Size == 0:
procStr = "Unknown"
default:
sizeCPU := m.Size - m.SizeVRAM
cpuPercent := math.Round(float64(sizeCPU) / float64(m.Size) * 100)
procStr = fmt.Sprintf("%d%%/%d%% CPU/GPU", int(cpuPercent), int(100-cpuPercent))
}
data = append(data, []string{m.Name, m.Digest[:12], format.HumanBytes(m.Size), procStr, format.HumanTime(m.ExpiresAt, "Never")})
}
}
table := tablewriter.NewWriter(os.Stdout)
table.SetHeader([]string{"NAME", "ID", "SIZE", "PROCESSOR", "UNTIL"})
table.SetHeaderAlignment(tablewriter.ALIGN_LEFT)
table.SetAlignment(tablewriter.ALIGN_LEFT)
table.SetHeaderLine(false)
table.SetBorder(false)
table.SetNoWhiteSpace(true)
table.SetTablePadding("\t")
table.AppendBulk(data)
table.Render()
return nil
}
func DeleteHandler(cmd *cobra.Command, args []string) error {
client, err := api.ClientFromEnvironment()
if err != nil {
@@ -672,6 +733,7 @@ type runOptions struct {
Images []api.ImageData
Options map[string]interface{}
MultiModal bool
KeepAlive *api.Duration
}
type displayResponseState struct {
@@ -684,7 +746,7 @@ func displayResponse(content string, wordWrap bool, state *displayResponseState)
if wordWrap && termWidth >= 10 {
for _, ch := range content {
if state.lineLength+1 > termWidth-5 {
if len(state.wordBuffer) > termWidth-10 {
if runewidth.StringWidth(state.wordBuffer) > termWidth-10 {
fmt.Printf("%s%c", state.wordBuffer, ch)
state.wordBuffer = ""
state.lineLength = 0
@@ -692,12 +754,22 @@ func displayResponse(content string, wordWrap bool, state *displayResponseState)
}
// backtrack the length of the last word and clear to the end of the line
fmt.Printf("\x1b[%dD\x1b[K\n", len(state.wordBuffer))
a := runewidth.StringWidth(state.wordBuffer)
if a > 0 {
fmt.Printf("\x1b[%dD", a)
}
fmt.Printf("\x1b[K\n")
fmt.Printf("%s%c", state.wordBuffer, ch)
state.lineLength = len(state.wordBuffer) + 1
chWidth := runewidth.RuneWidth(ch)
state.lineLength = runewidth.StringWidth(state.wordBuffer) + chWidth
} else {
fmt.Print(string(ch))
state.lineLength += 1
state.lineLength += runewidth.RuneWidth(ch)
if runewidth.RuneWidth(ch) >= 2 {
state.wordBuffer = ""
continue
}
switch ch {
case ' ':
@@ -766,6 +838,10 @@ func chat(cmd *cobra.Command, opts runOptions) (*api.Message, error) {
Options: opts.Options,
}
if opts.KeepAlive != nil {
req.KeepAlive = opts.KeepAlive
}
if err := client.Chat(cancelCtx, req, fn); err != nil {
if errors.Is(err, context.Canceled) {
return nil, nil
@@ -849,6 +925,7 @@ func generate(cmd *cobra.Command, opts runOptions) error {
System: opts.System,
Template: opts.Template,
Options: opts.Options,
KeepAlive: opts.KeepAlive,
}
if err := client.Generate(ctx, &request, fn); err != nil {
@@ -952,24 +1029,6 @@ func initializeKeypair() error {
return nil
}
//nolint:unused
func waitForServer(ctx context.Context, client *api.Client) error {
// wait for the server to start
timeout := time.After(5 * time.Second)
tick := time.Tick(500 * time.Millisecond)
for {
select {
case <-timeout:
return errors.New("timed out waiting for server to start")
case <-tick:
if err := client.Heartbeat(ctx); err == nil {
return nil // server has started
}
}
}
}
func checkServerHeartbeat(cmd *cobra.Command, _ []string) error {
client, err := api.ClientFromEnvironment()
if err != nil {
@@ -1006,12 +1065,19 @@ func versionHandler(cmd *cobra.Command, _ []string) {
}
}
func appendHostEnvDocs(cmd *cobra.Command) {
const hostEnvDocs = `
func appendEnvDocs(cmd *cobra.Command, envs []envconfig.EnvVar) {
if len(envs) == 0 {
return
}
envUsage := `
Environment Variables:
OLLAMA_HOST The host:port or base URL of the Ollama server (e.g. http://localhost:11434)
`
cmd.SetUsageTemplate(cmd.UsageTemplate() + hostEnvDocs)
for _, e := range envs {
envUsage += fmt.Sprintf(" %-24s %s\n", e.Name, e.Description)
}
cmd.SetUsageTemplate(cmd.UsageTemplate() + envUsage)
}
func NewCLI() *cobra.Command {
@@ -1050,8 +1116,8 @@ func NewCLI() *cobra.Command {
RunE: CreateHandler,
}
createCmd.Flags().StringP("file", "f", "Modelfile", "Name of the Modelfile (default \"Modelfile\")")
createCmd.Flags().StringP("quantization", "q", "", "Quantization level.")
createCmd.Flags().StringP("file", "f", "Modelfile", "Name of the Modelfile")
createCmd.Flags().StringP("quantize", "q", "", "Quantize model to this level (e.g. q4_0)")
showCmd := &cobra.Command{
Use: "show MODEL",
@@ -1075,6 +1141,7 @@ func NewCLI() *cobra.Command {
RunE: RunHandler,
}
runCmd.Flags().String("keepalive", "", "Duration to keep a model loaded (e.g. 5m)")
runCmd.Flags().Bool("verbose", false, "Show timings for response")
runCmd.Flags().Bool("insecure", false, "Use an insecure registry")
runCmd.Flags().Bool("nowordwrap", false, "Don't wrap words to the next line automatically")
@@ -1086,15 +1153,6 @@ func NewCLI() *cobra.Command {
Args: cobra.ExactArgs(0),
RunE: RunServer,
}
serveCmd.SetUsageTemplate(serveCmd.UsageTemplate() + `
Environment Variables:
OLLAMA_HOST The host:port to bind to (default "127.0.0.1:11434")
OLLAMA_ORIGINS A comma separated list of allowed origins.
OLLAMA_MODELS The path to the models directory (default is "~/.ollama/models")
OLLAMA_KEEP_ALIVE The duration that models stay loaded in memory (default is "5m")
OLLAMA_DEBUG Set to 1 to enable additional debug logging
`)
pullCmd := &cobra.Command{
Use: "pull MODEL",
@@ -1123,6 +1181,14 @@ Environment Variables:
PreRunE: checkServerHeartbeat,
RunE: ListHandler,
}
psCmd := &cobra.Command{
Use: "ps",
Short: "List running models",
PreRunE: checkServerHeartbeat,
RunE: ListRunningHandler,
}
copyCmd := &cobra.Command{
Use: "cp SOURCE DESTINATION",
Short: "Copy a model",
@@ -1139,6 +1205,10 @@ Environment Variables:
RunE: DeleteHandler,
}
envVars := envconfig.AsMap()
envs := []envconfig.EnvVar{envVars["OLLAMA_HOST"]}
for _, cmd := range []*cobra.Command{
createCmd,
showCmd,
@@ -1146,10 +1216,33 @@ Environment Variables:
pullCmd,
pushCmd,
listCmd,
psCmd,
copyCmd,
deleteCmd,
serveCmd,
} {
appendHostEnvDocs(cmd)
switch cmd {
case runCmd:
appendEnvDocs(cmd, []envconfig.EnvVar{envVars["OLLAMA_HOST"], envVars["OLLAMA_NOHISTORY"]})
case serveCmd:
appendEnvDocs(cmd, []envconfig.EnvVar{
envVars["OLLAMA_DEBUG"],
envVars["OLLAMA_HOST"],
envVars["OLLAMA_KEEP_ALIVE"],
envVars["OLLAMA_MAX_LOADED_MODELS"],
envVars["OLLAMA_MAX_QUEUE"],
envVars["OLLAMA_MODELS"],
envVars["OLLAMA_NUM_PARALLEL"],
envVars["OLLAMA_NOPRUNE"],
envVars["OLLAMA_ORIGINS"],
envVars["OLLAMA_TMPDIR"],
envVars["OLLAMA_FLASH_ATTENTION"],
envVars["OLLAMA_LLM_LIBRARY"],
envVars["OLLAMA_MAX_VRAM"],
})
default:
appendEnvDocs(cmd, envs)
}
}
rootCmd.AddCommand(
@@ -1160,6 +1253,7 @@ Environment Variables:
pullCmd,
pushCmd,
listCmd,
psCmd,
copyCmd,
deleteCmd,
)

View File

@@ -8,15 +8,17 @@ import (
"os"
"path/filepath"
"regexp"
"slices"
"sort"
"strings"
"github.com/spf13/cobra"
"golang.org/x/exp/slices"
"github.com/ollama/ollama/api"
"github.com/ollama/ollama/envconfig"
"github.com/ollama/ollama/progress"
"github.com/ollama/ollama/readline"
"github.com/ollama/ollama/types/errtypes"
)
type MultilineState int
@@ -56,6 +58,11 @@ func loadModel(cmd *cobra.Command, opts *runOptions) error {
Model: opts.Model,
Messages: []api.Message{},
}
if opts.KeepAlive != nil {
chatReq.KeepAlive = opts.KeepAlive
}
err = client.Chat(cmd.Context(), chatReq, func(resp api.ChatResponse) error {
p.StopAndClear()
if len(opts.Messages) > 0 {
@@ -132,6 +139,7 @@ func generateInteractive(cmd *cobra.Command, opts runOptions) error {
fmt.Fprintln(os.Stderr, " Alt + f Move forward (right) one word")
fmt.Fprintln(os.Stderr, " Ctrl + k Delete the sentence after the cursor")
fmt.Fprintln(os.Stderr, " Ctrl + u Delete the sentence before the cursor")
fmt.Fprintln(os.Stderr, " Ctrl + w Delete the word before the cursor")
fmt.Fprintln(os.Stderr, "")
fmt.Fprintln(os.Stderr, " Ctrl + l Clear the screen")
fmt.Fprintln(os.Stderr, " Ctrl + c Stop the model from responding")
@@ -176,6 +184,10 @@ func generateInteractive(cmd *cobra.Command, opts runOptions) error {
return err
}
if envconfig.NoHistory {
scanner.HistoryDisable()
}
fmt.Print(readline.StartBracketedPaste)
defer fmt.Printf(readline.EndBracketedPaste)
@@ -276,13 +288,20 @@ func generateInteractive(cmd *cobra.Command, opts runOptions) error {
fn := func(resp api.ProgressResponse) error { return nil }
err = client.Create(cmd.Context(), req, fn)
if err != nil {
fmt.Println("error: couldn't save model")
if strings.Contains(err.Error(), errtypes.InvalidModelNameErrMsg) {
fmt.Printf("error: The model name '%s' is invalid\n", args[1])
continue
}
return err
}
fmt.Printf("Created new model '%s'\n", args[1])
continue
case strings.HasPrefix(line, "/clear"):
opts.Messages = []api.Message{}
if opts.System != "" {
newMessage := api.Message{Role: "system", Content: opts.System}
opts.Messages = append(opts.Messages, newMessage)
}
fmt.Println("Cleared session context")
continue
case strings.HasPrefix(line, "/set"):

View File

@@ -6,6 +6,7 @@ import (
"text/template"
"github.com/stretchr/testify/assert"
"github.com/stretchr/testify/require"
"github.com/ollama/ollama/api"
)
@@ -85,11 +86,11 @@ MESSAGE assistant """Yes it is true, I am half horse, half shark."""
`
tmpl, err := template.New("").Parse(expectedModelfile)
assert.Nil(t, err)
require.NoError(t, err)
var buf bytes.Buffer
err = tmpl.Execute(&buf, opts)
assert.Nil(t, err)
require.NoError(t, err)
assert.Equal(t, buf.String(), mf)
opts.ParentModel = "horseshark"
@@ -107,10 +108,10 @@ MESSAGE assistant """Yes it is true, I am half horse, half shark."""
`
tmpl, err = template.New("").Parse(expectedModelfile)
assert.Nil(t, err)
require.NoError(t, err)
var parentBuf bytes.Buffer
err = tmpl.Execute(&parentBuf, opts)
assert.Nil(t, err)
require.NoError(t, err)
assert.Equal(t, parentBuf.String(), mf)
}

27
cmd/start.go Normal file
View File

@@ -0,0 +1,27 @@
//go:build darwin || windows
package cmd
import (
"context"
"errors"
"time"
"github.com/ollama/ollama/api"
)
func waitForServer(ctx context.Context, client *api.Client) error {
// wait for the server to start
timeout := time.After(5 * time.Second)
tick := time.Tick(500 * time.Millisecond)
for {
select {
case <-timeout:
return errors.New("timed out waiting for server to start")
case <-tick:
if err := client.Heartbeat(ctx); err == nil {
return nil // server has started
}
}
}
}

View File

@@ -18,6 +18,16 @@ import (
"github.com/ollama/ollama/llm"
)
const (
_ int32 = iota
tokenTypeNormal
tokenTypeUnknown
tokenTypeControl
tokenTypeUserDefined
tokenTypeUnused
tokenTypeByte
)
type Params struct {
Architectures []string `json:"architectures"`
VocabSize int `json:"vocab_size"`
@@ -37,6 +47,8 @@ type Params struct {
Experts int `json:"num_local_experts"`
ExpertsUsed int `json:"num_experts_per_tok"`
PreTokenizer string
ByteOrder
}
@@ -74,10 +86,9 @@ func GetModelFormat(dirname string) (ModelFormat, error) {
}
for _, fn := range files {
slog.Debug(fmt.Sprintf("file = %s", fn))
if strings.HasSuffix(fn, ".safetensors") {
return &SafetensorFormat{}, nil
} else if strings.HasSuffix(fn, ".bin") {
} else if strings.HasSuffix(fn, ".bin") || strings.HasSuffix(fn, ".pth") {
slog.Debug("model is torch")
return &TorchFormat{}, nil
}
@@ -92,6 +103,7 @@ type Vocab struct {
Tokens []string
Scores []float32
Types []int32
Merges []string
}
func LoadSentencePieceTokens(dirpath string, params *Params) (*Vocab, error) {
@@ -170,17 +182,17 @@ func LoadSentencePieceTokens(dirpath string, params *Params) (*Vocab, error) {
}
v.Tokens = append(v.Tokens, t.key)
v.Scores = append(v.Scores, -1000.0)
v.Types = append(v.Types, int32(llm.GGUFTokenUserDefined))
v.Types = append(v.Types, tokenTypeUserDefined)
}
slog.Info(fmt.Sprintf("vocab size w/ extra tokens: %d", len(v.Tokens)))
if params.VocabSize > len(v.Tokens) {
missingTokens := params.VocabSize - len(v.Tokens)
slog.Warn(fmt.Sprintf("vocab is missing %d tokens", missingTokens))
for cnt := 0; cnt < missingTokens; cnt++ {
for cnt := range missingTokens {
v.Tokens = append(v.Tokens, fmt.Sprintf("<dummy%05d>", cnt+1))
v.Scores = append(v.Scores, -1)
v.Types = append(v.Types, int32(llm.GGUFTokenUserDefined))
v.Types = append(v.Types, tokenTypeUserDefined)
}
}

103
convert/convert_test.go Normal file
View File

@@ -0,0 +1,103 @@
//go:build slow
package convert
import (
"os"
"path/filepath"
"testing"
"github.com/ollama/ollama/llm"
)
func convertFull(t *testing.T, p string) (llm.KV, llm.Tensors) {
t.Helper()
mf, err := GetModelFormat(p)
if err != nil {
t.Fatal(err)
}
params, err := mf.GetParams(p)
if err != nil {
t.Fatal(err)
}
arch, err := mf.GetModelArch("", p, params)
if err != nil {
t.Fatal(err)
}
if err := arch.LoadVocab(); err != nil {
t.Fatal(err)
}
if err := arch.GetTensors(); err != nil {
t.Fatal(err)
}
f, err := os.CreateTemp(t.TempDir(), "f16")
if err != nil {
t.Fatal(err)
}
defer f.Close()
if err := arch.WriteGGUF(f); err != nil {
t.Fatal(err)
}
r, err := os.Open(f.Name())
if err != nil {
t.Fatal(err)
}
defer r.Close()
m, _, err := llm.DecodeGGML(r)
if err != nil {
t.Fatal(err)
}
return m.KV(), m.Tensors()
}
func TestConvertFull(t *testing.T) {
cases := []struct {
path string
arch string
tensors int
layers int
}{
{"Meta-Llama-3-8B-Instruct", "llama", 291, 35},
{"Mistral-7B-Instruct-v0.2", "llama", 291, 35},
{"Mixtral-8x7B-Instruct-v0.1", "llama", 291, 35},
{"gemma-2b-it", "gemma", 164, 20},
}
for _, tt := range cases {
t.Run(tt.path, func(t *testing.T) {
p := filepath.Join("testdata", tt.path)
if _, err := os.Stat(p); err != nil {
t.Skipf("%s not found", p)
}
kv, tensors := convertFull(t, p)
if kv.Architecture() != tt.arch {
t.Fatalf("expected llama, got %s", kv.Architecture())
}
if kv.FileType().String() != "F16" {
t.Fatalf("expected F16, got %s", kv.FileType())
}
if len(tensors) != tt.tensors {
t.Fatalf("expected %d tensors, got %d", tt.tensors, len(tensors))
}
layers := tensors.Layers()
if len(layers) != tt.layers {
t.Fatalf("expected %d layers, got %d", tt.layers, len(layers))
}
})
}
}

View File

@@ -1,14 +1,11 @@
package convert
import (
"encoding/binary"
"fmt"
"io"
"log/slog"
"os"
"strings"
"github.com/d4l3k/go-bfloat16"
"github.com/pdevine/tensor"
"github.com/pdevine/tensor/native"
@@ -19,49 +16,26 @@ type GemmaModel struct {
ModelData
}
func gemmaLayerHandler(w io.Writer, r safetensorWriterTo, f *os.File) error {
slog.Debug(fmt.Sprintf("converting '%s'", r.t.Name))
data := make([]byte, r.end-r.start)
if err := binary.Read(f, r.bo, data); err != nil {
return err
}
tDataF32 := bfloat16.DecodeFloat32(data)
var err error
tDataF32, err = addOnes(tDataF32, int(r.t.Shape[0]))
if err != nil {
return err
}
if err := binary.Write(w, r.bo, tDataF32); err != nil {
return err
}
return nil
}
func addOnes(data []float32, vectorSize int) ([]float32, error) {
n := tensor.New(tensor.WithShape(vectorSize), tensor.WithBacking(data))
ones := tensor.Ones(tensor.Float32, vectorSize)
var err error
n, err = n.Add(ones)
n, err := n.Add(ones)
if err != nil {
return []float32{}, err
return nil, err
}
newN, err := native.SelectF32(n, 0)
ts, err := native.SelectF32(n, 0)
if err != nil {
return []float32{}, err
return nil, err
}
var fullTensor []float32
for _, v := range newN {
fullTensor = append(fullTensor, v...)
var f32s []float32
for _, t := range ts {
f32s = append(f32s, t...)
}
return fullTensor, nil
return f32s, nil
}
func (m *GemmaModel) GetTensors() error {
@@ -71,12 +45,10 @@ func (m *GemmaModel) GetTensors() error {
}
slog.Debug(fmt.Sprintf("Total tensors: %d", len(t)))
m.Tensors = []llm.Tensor{}
for _, l := range t {
if strings.HasSuffix(l.Name, "norm.weight") {
wt := l.WriterTo.(safetensorWriterTo)
wt.handler = gemmaLayerHandler
wt.repacker = m.Repack
l.WriterTo = wt
}
m.Tensors = append(m.Tensors, l)
@@ -94,6 +66,10 @@ func (m *GemmaModel) LoadVocab() error {
return nil
}
func (m *GemmaModel) Repack(_ string, data []float32, shape []uint64) ([]float32, error) {
return addOnes(data, int(shape[0]))
}
func (m *GemmaModel) WriteGGUF(ws io.WriteSeeker) error {
kv := llm.KV{
"general.architecture": "gemma",

View File

@@ -1,17 +1,17 @@
package convert
import (
"encoding/binary"
"cmp"
"errors"
"fmt"
"io"
"log/slog"
"os"
"path/filepath"
"regexp"
"strings"
"github.com/nlpodyssey/gopickle/pytorch"
"github.com/pdevine/tensor"
"github.com/pdevine/tensor/native"
"github.com/x448/float16"
"github.com/ollama/ollama/llm"
)
@@ -20,81 +20,12 @@ type LlamaModel struct {
ModelData
}
func llamaLayerHandler(w io.Writer, r torchWriterTo) error {
slog.Debug(fmt.Sprintf("repacking layer '%s'", r.t.Name))
data := r.storage.(*pytorch.HalfStorage).Data
tData := make([]uint16, len(data))
for cnt, v := range data {
tData[cnt] = uint16(float16.Fromfloat32(v))
}
var err error
var heads uint32
if strings.Contains(r.t.Name, "attn_q") {
heads = uint32(r.params.AttentionHeads)
} else if strings.Contains(r.t.Name, "attn_k") {
heads = uint32(r.params.KeyValHeads)
if heads == 0 {
heads = uint32(r.params.AttentionHeads)
}
} else {
return fmt.Errorf("unknown layer type")
}
slog.Debug(fmt.Sprintf("heads = %d", heads))
tData, err = llamaRepack(tData, int(heads), r.t.Shape)
if err != nil {
return err
}
if err = binary.Write(w, r.bo, tData); err != nil {
return err
}
return nil
}
func llamaRepack(data []uint16, heads int, shape []uint64) ([]uint16, error) {
n := tensor.New(tensor.WithShape(int(shape[0]), int(shape[1])), tensor.WithBacking(data))
origShape := n.Shape().Clone()
// reshape the tensor and swap axes 1 and 2 to unpack the layer for gguf
if err := n.Reshape(heads, 2, origShape[0]/heads/2, origShape[1]); err != nil {
return nil, err
}
if err := n.T(0, 2, 1, 3); err != nil {
return nil, err
}
if err := n.Reshape(origShape...); err != nil {
return nil, err
}
if err := n.Transpose(); err != nil {
return nil, err
}
newN, err := native.SelectU16(n, 1)
if err != nil {
return nil, err
}
var fullTensor []uint16
for _, v := range newN {
fullTensor = append(fullTensor, v...)
}
return fullTensor, nil
}
func (m *LlamaModel) GetTensors() error {
t, err := m.Format.GetTensors(m.Path, m.Params)
if err != nil {
return err
}
m.Tensors = []llm.Tensor{}
pattern := `^blk\.[0-9]+\.attn_(?P<layer>q|k)\.weight$`
re, err := regexp.Compile(pattern)
if err != nil {
@@ -104,10 +35,16 @@ func (m *LlamaModel) GetTensors() error {
for _, l := range t {
matches := re.FindAllStringSubmatch(l.Name, -1)
if len(matches) > 0 {
slog.Debug(fmt.Sprintf("setting handler for: %s", l.Name))
switch m.Format.(type) {
case *TorchFormat:
wt := l.WriterTo.(torchWriterTo)
wt.handler = llamaLayerHandler
wt.repacker = m.Repack
l.WriterTo = wt
case *SafetensorFormat:
wt := l.WriterTo.(safetensorWriterTo)
wt.repacker = m.Repack
l.WriterTo = wt
}
}
m.Tensors = append(m.Tensors, l)
}
@@ -115,19 +52,22 @@ func (m *LlamaModel) GetTensors() error {
return nil
}
func (m *LlamaModel) LoadVocab() error {
var v *Vocab
var err error
slog.Debug("loading vocab")
v, err = LoadSentencePieceTokens(m.Path, m.Params)
if err != nil {
func (m *LlamaModel) LoadVocab() (err error) {
pre, ts, merges, err := parseTokens(filepath.Join(m.Path, "tokenizer.json"))
if errors.Is(err, os.ErrNotExist) {
return nil
} else if err != nil {
return err
}
slog.Debug("vocab loaded")
m.Vocab = &Vocab{}
for _, t := range ts {
m.Vocab.Tokens = append(m.Vocab.Tokens, t.Content)
m.Vocab.Types = append(m.Vocab.Types, t.Type())
}
m.Vocab = v
m.Vocab.Merges = merges
m.Params.PreTokenizer = pre
return nil
}
@@ -140,23 +80,80 @@ func (m *LlamaModel) WriteGGUF(ws io.WriteSeeker) error {
"llama.embedding_length": uint32(m.Params.HiddenSize),
"llama.block_count": uint32(m.Params.HiddenLayers),
"llama.feed_forward_length": uint32(m.Params.IntermediateSize),
"llama.rope.freq_base": float32(m.Params.RopeFrequencyBase),
"llama.rope.dimension_count": uint32(m.Params.HiddenSize / m.Params.AttentionHeads),
"llama.attention.head_count": uint32(m.Params.AttentionHeads),
"llama.attention.head_count_kv": uint32(m.Params.KeyValHeads),
"llama.attention.layer_norm_rms_epsilon": float32(m.Params.NormEPS),
"general.file_type": uint32(1),
"tokenizer.ggml.model": "llama",
"tokenizer.ggml.model": "gpt2",
"tokenizer.ggml.pre": m.Params.PreTokenizer,
"tokenizer.ggml.tokens": m.Vocab.Tokens,
"tokenizer.ggml.scores": m.Vocab.Scores,
"tokenizer.ggml.token_type": m.Vocab.Types,
"tokenizer.ggml.bos_token_id": uint32(m.Params.BoSTokenID),
"tokenizer.ggml.eos_token_id": uint32(m.Params.EoSTokenID),
"tokenizer.ggml.unknown_token_id": uint32(0),
"tokenizer.ggml.add_bos_token": true,
"tokenizer.ggml.add_eos_token": false,
}
if len(m.Vocab.Merges) > 0 {
kv["tokenizer.ggml.merges"] = m.Vocab.Merges
} else {
kv["tokenizer.ggml.scores"] = m.Vocab.Scores
}
return llm.NewGGUFV3(m.Params.ByteOrder).Encode(ws, kv, m.Tensors)
}
func (m *LlamaModel) Repack(name string, data []float32, shape []uint64) ([]float32, error) {
return llamaRepack(name, m.Params, data, shape)
}
func llamaRepack(name string, params *Params, data []float32, shape []uint64) ([]float32, error) {
var dims []int
for _, dim := range shape {
if dim != 0 {
dims = append(dims, int(dim))
}
}
var heads int
switch {
case strings.HasSuffix(name, "attn_q.weight"):
heads = params.AttentionHeads
case strings.HasSuffix(name, "attn_k.weight"):
heads = cmp.Or(params.KeyValHeads, params.AttentionHeads)
default:
return nil, fmt.Errorf("unknown tensor name: %s", name)
}
n := tensor.New(tensor.WithShape(dims...), tensor.WithBacking(data))
if err := n.Reshape(append([]int{heads, 2, dims[0] / heads / 2}, dims[1:]...)...); err != nil {
return nil, err
}
if err := n.T(0, 2, 1, 3); err != nil {
return nil, err
}
if err := n.Reshape(dims...); err != nil {
return nil, err
}
if err := n.Transpose(); err != nil {
return nil, err
}
ts, err := native.SelectF32(n, 1)
if err != nil {
return nil, err
}
var f32s []float32
for _, t := range ts {
f32s = append(f32s, t...)
}
return f32s, nil
}

View File

@@ -1,17 +1,8 @@
package convert
import (
"encoding/binary"
"fmt"
"io"
"os"
"regexp"
"strings"
"github.com/d4l3k/go-bfloat16"
"github.com/pdevine/tensor"
"github.com/pdevine/tensor/native"
"github.com/x448/float16"
"github.com/ollama/ollama/llm"
)
@@ -20,90 +11,12 @@ type MistralModel struct {
ModelData
}
func mistralLayerHandler(w io.Writer, r safetensorWriterTo, f *os.File) error {
layerSize := r.end - r.start
var err error
tData := make([]uint16, layerSize/2)
if err = binary.Read(f, r.bo, tData); err != nil {
return err
}
var heads uint32
if strings.Contains(r.t.Name, "attn_q") {
heads = uint32(r.params.AttentionHeads)
} else if strings.Contains(r.t.Name, "attn_k") {
heads = uint32(r.params.KeyValHeads)
if heads == 0 {
heads = uint32(r.params.AttentionHeads)
}
} else {
return fmt.Errorf("unknown layer type")
}
tData, err = repack(tData, int(heads), r.t.Shape)
if err != nil {
return err
}
var buf []byte
for _, n := range tData {
buf = r.bo.AppendUint16(buf, n)
}
tempBuf := make([]uint16, len(tData))
tDataF32 := bfloat16.DecodeFloat32(buf)
for cnt, v := range tDataF32 {
tDataF16 := float16.Fromfloat32(v)
tempBuf[cnt] = uint16(tDataF16)
}
if err = binary.Write(w, r.bo, tempBuf); err != nil {
return err
}
return nil
}
func repack(data []uint16, heads int, shape []uint64) ([]uint16, error) {
n := tensor.New(tensor.WithShape(int(shape[0]), int(shape[1])), tensor.WithBacking(data))
origShape := n.Shape().Clone()
// reshape the tensor and swap axes 1 and 2 to unpack the layer for gguf
if err := n.Reshape(heads, 2, origShape[0]/heads/2, origShape[1]); err != nil {
return nil, err
}
if err := n.T(0, 2, 1, 3); err != nil {
return nil, err
}
if err := n.Reshape(origShape...); err != nil {
return nil, err
}
if err := n.Transpose(); err != nil {
return nil, err
}
newN, err := native.SelectU16(n, 1)
if err != nil {
return nil, err
}
var fullTensor []uint16
for _, v := range newN {
fullTensor = append(fullTensor, v...)
}
return fullTensor, nil
}
func (m *MistralModel) GetTensors() error {
t, err := m.Format.GetTensors(m.Path, m.Params)
if err != nil {
return err
}
m.Tensors = []llm.Tensor{}
pattern := `^blk\.[0-9]+\.attn_(?P<layer>q|k)\.weight$`
re, err := regexp.Compile(pattern)
if err != nil {
@@ -114,7 +27,7 @@ func (m *MistralModel) GetTensors() error {
matches := re.FindAllStringSubmatch(l.Name, -1)
if len(matches) > 0 {
wt := l.WriterTo.(safetensorWriterTo)
wt.handler = mistralLayerHandler
wt.repacker = m.Repack
l.WriterTo = wt
}
m.Tensors = append(m.Tensors, l)
@@ -160,3 +73,7 @@ func (m *MistralModel) WriteGGUF(ws io.WriteSeeker) error {
return llm.NewGGUFV3(m.Params.ByteOrder).Encode(ws, kv, m.Tensors)
}
func (m *MistralModel) Repack(name string, data []float32, shape []uint64) ([]float32, error) {
return llamaRepack(name, m.Params, data, shape)
}

View File

@@ -17,8 +17,6 @@ func (m *MixtralModel) GetTensors() error {
return err
}
m.Tensors = []llm.Tensor{}
pattern := `^blk\.[0-9]+\.attn_(?P<layer>q|k)\.weight$`
re, err := regexp.Compile(pattern)
if err != nil {
@@ -29,7 +27,7 @@ func (m *MixtralModel) GetTensors() error {
matches := re.FindAllStringSubmatch(l.Name, -1)
if len(matches) > 0 {
wt := l.WriterTo.(safetensorWriterTo)
wt.handler = mistralLayerHandler
wt.repacker = m.Repack
l.WriterTo = wt
}
m.Tensors = append(m.Tensors, l)
@@ -83,3 +81,7 @@ func (m *MixtralModel) WriteGGUF(ws io.WriteSeeker) error {
return llm.NewGGUFV3(m.Params.ByteOrder).Encode(ws, kv, m.Tensors)
}
func (m *MixtralModel) Repack(name string, data []float32, shape []uint64) ([]float32, error) {
return llamaRepack(name, m.Params, data, shape)
}

View File

@@ -6,14 +6,13 @@ import (
"encoding/json"
"fmt"
"io"
"log/slog"
"os"
"path/filepath"
"regexp"
"slices"
"strings"
"github.com/d4l3k/go-bfloat16"
"github.com/mitchellh/mapstructure"
"github.com/x448/float16"
"github.com/ollama/ollama/llm"
@@ -26,39 +25,38 @@ type safetensorWriterTo struct {
bo ByteOrder
filename string
dtype string
start, end, padding uint64
handler func(w io.Writer, r safetensorWriterTo, f *os.File) error
offset, size int64
repacker func(string, []float32, []uint64) ([]float32, error)
}
type tensorMetaData struct {
Type string `mapstructure:"dtype"`
Shape []int `mapstructure:"shape"`
Offsets []int `mapstructure:"data_offsets"`
type safetensorMetadata struct {
Type string `json:"dtype"`
Shape []uint64 `json:"shape"`
Offsets []int64 `json:"data_offsets"`
}
type SafetensorFormat struct{}
func (m *SafetensorFormat) GetTensors(dirpath string, params *Params) ([]llm.Tensor, error) {
slog.Debug("getting tensor data")
var tensors []llm.Tensor
files, err := filepath.Glob(filepath.Join(dirpath, "/model-*.safetensors"))
matches, err := filepath.Glob(filepath.Join(dirpath, "*.safetensors"))
if err != nil {
return nil, err
}
var offset uint64
for _, f := range files {
for _, f := range matches {
var t []llm.Tensor
var err error
t, offset, err = m.readTensors(f, offset, params)
if err != nil {
slog.Error(err.Error())
return nil, err
}
tensors = append(tensors, t...)
}
slog.Debug(fmt.Sprintf("all tensors = %d", len(tensors)))
return tensors, nil
}
@@ -69,73 +67,60 @@ func (m *SafetensorFormat) readTensors(fn string, offset uint64, params *Params)
}
defer f.Close()
var jsonSize uint64
if err := binary.Read(f, binary.LittleEndian, &jsonSize); err != nil {
var n int64
if err := binary.Read(f, binary.LittleEndian, &n); err != nil {
return nil, 0, err
}
buf := make([]byte, jsonSize)
_, err = io.ReadFull(f, buf)
if err != nil {
b := bytes.NewBuffer(make([]byte, 0, n))
if _, err = io.CopyN(b, f, n); err != nil {
return nil, 0, err
}
d := json.NewDecoder(bytes.NewBuffer(buf))
d.UseNumber()
var parsed map[string]interface{}
if err = d.Decode(&parsed); err != nil {
var headers map[string]safetensorMetadata
if err := json.NewDecoder(b).Decode(&headers); err != nil {
return nil, 0, err
}
var keys []string
for k := range parsed {
keys = append(keys, k)
for key := range headers {
if !strings.HasSuffix(key, "self_attn.rotary_embd.inv_freq") {
keys = append(keys, key)
}
}
slices.Sort(keys)
slog.Info("converting layers")
var tensors []llm.Tensor
for _, k := range keys {
vals := parsed[k].(map[string]interface{})
var data tensorMetaData
if err = mapstructure.Decode(vals, &data); err != nil {
slog.Error("couldn't decode properly")
return nil, 0, err
}
for _, key := range keys {
value := headers[key]
var size uint64
var kind uint32
switch len(data.Shape) {
switch len(value.Shape) {
case 0:
// metadata
// valuedata
continue
case 1:
// convert to float32
kind = 0
size = uint64(data.Shape[0] * 4)
case 2:
// convert to float16
kind = 1
size = uint64(data.Shape[0] * data.Shape[1] * 2)
}
ggufName, err := m.GetLayerName(k)
name, err := m.GetLayerName(key)
if err != nil {
slog.Error(err.Error())
return nil, 0, err
}
shape := []uint64{0, 0, 0, 0}
for i := range data.Shape {
shape[i] = uint64(data.Shape[i])
shape := make([]uint64, len(value.Shape))
copy(shape, value.Shape)
pad := func(s int64) int64 {
return 8 + n + s
}
t := llm.Tensor{
Name: ggufName,
Name: name,
Kind: kind,
Offset: offset,
Shape: shape[:],
Shape: shape,
}
t.WriterTo = safetensorWriterTo{
@@ -143,18 +128,15 @@ func (m *SafetensorFormat) readTensors(fn string, offset uint64, params *Params)
params: params,
bo: params.ByteOrder,
filename: fn,
start: uint64(data.Offsets[0]),
end: uint64(data.Offsets[1]),
padding: 8 + jsonSize,
dtype: value.Type,
offset: pad(value.Offsets[0]),
size: pad(value.Offsets[1]) - pad(value.Offsets[0]),
}
offset += size
offset += t.Size()
tensors = append(tensors, t)
}
slog.Debug(fmt.Sprintf("total tensors for file = %d", len(tensors)))
slog.Debug(fmt.Sprintf("offset = %d", offset))
return tensors, offset, nil
}
@@ -167,9 +149,7 @@ func (m *SafetensorFormat) GetParams(dirpath string) (*Params, error) {
var params Params
d := json.NewDecoder(f)
err = d.Decode(&params)
if err != nil {
if err := json.NewDecoder(f).Decode(&params); err != nil {
return nil, err
}
@@ -224,55 +204,58 @@ func (r safetensorWriterTo) WriteTo(w io.Writer) (n int64, err error) {
}
defer f.Close()
if _, err = f.Seek(int64(r.padding+r.start), 0); err != nil {
if _, err = f.Seek(r.offset, io.SeekStart); err != nil {
return 0, err
}
// use the handler if one is present
if r.handler != nil {
return 0, r.handler(w, r, f)
var f32s []float32
switch r.dtype {
case "F32":
f32s = make([]float32, r.size/4)
if err = binary.Read(f, r.bo, f32s); err != nil {
return 0, err
}
remaining := r.end - r.start
bufSize := uint64(10240)
var finished bool
for {
data := make([]byte, min(bufSize, remaining))
b, err := io.ReadFull(f, data)
remaining -= uint64(b)
if err == io.EOF || remaining <= 0 {
finished = true
} else if err != nil {
case "F16":
u16s := make([]uint16, r.size/2)
if err = binary.Read(f, r.bo, u16s); err != nil {
return 0, err
}
// convert bfloat16 -> ieee float32
tDataF32 := bfloat16.DecodeFloat32(data)
for _, b := range u16s {
f32s = append(f32s, float16.Frombits(b).Float32())
}
case "BF16":
u8s := make([]uint8, r.size)
if err = binary.Read(f, r.bo, u8s); err != nil {
return 0, err
}
f32s = bfloat16.DecodeFloat32(u8s)
default:
return 0, fmt.Errorf("unknown data type: %s", r.dtype)
}
if r.repacker != nil {
f32s, err = r.repacker(r.t.Name, f32s, r.t.Shape)
if err != nil {
return 0, err
}
}
switch r.t.Kind {
case 0:
if err := binary.Write(w, r.bo, tDataF32); err != nil {
return 0, err
}
return 0, binary.Write(w, r.bo, f32s)
case 1:
// convert float32 -> float16
tempBuf := make([]uint16, len(data)/2)
for cnt, v := range tDataF32 {
tDataF16 := float16.Fromfloat32(v)
tempBuf[cnt] = uint16(tDataF16)
f16s := make([]uint16, len(f32s))
for i := range f32s {
f16s[i] = float16.Fromfloat32(f32s[i]).Bits()
}
if err := binary.Write(w, r.bo, tempBuf); err != nil {
return 0, err
return 0, binary.Write(w, r.bo, f16s)
default:
return 0, fmt.Errorf("unknown storage type: %d", r.t.Kind)
}
}
if finished {
break
}
}
return 0, nil
}
func (m *SafetensorFormat) GetModelArch(name, dirPath string, params *Params) (ModelArch, error) {
@@ -281,6 +264,15 @@ func (m *SafetensorFormat) GetModelArch(name, dirPath string, params *Params) (M
return nil, fmt.Errorf("No architecture specified to convert")
case 1:
switch params.Architectures[0] {
case "LlamaForCausalLM":
return &LlamaModel{
ModelData{
Name: name,
Path: dirPath,
Params: params,
Format: m,
},
}, nil
case "MistralForCausalLM":
return &MistralModel{
ModelData{

106
convert/tokenizer.go Normal file
View File

@@ -0,0 +1,106 @@
package convert
import (
"cmp"
"crypto/sha256"
"encoding/json"
"fmt"
"log/slog"
"os"
"slices"
"golang.org/x/exp/maps"
)
type Tokenizer struct {
Version string `json:"version"`
AddedTokens []Token `json:"added_tokens"`
Model TokenizerModel `json:"model"`
PreTokenizer struct {
PreTokenizers []struct {
Type string `json:"type"`
Pattern struct {
Regex string `json:"Regex"`
} `json:"pattern"`
} `json:"pretokenizers"`
} `json:"pre_tokenizer"`
}
type TokenizerModel struct {
Type string `json:"type"`
Vocab map[string]int `json:"vocab"`
Merges []string `json:"merges"`
Tokens []Token
}
type Token struct {
ID int `json:"id"`
Content string `json:"content"`
Special bool `json:"special"`
UserDefined bool
}
func (t *Token) Type() int32 {
switch {
case t.Special:
return tokenTypeControl
case t.UserDefined:
return tokenTypeUserDefined
default:
return tokenTypeNormal
}
}
func (t *Tokenizer) maxID() int {
return max(
slices.Max(maps.Values(t.Model.Vocab)),
slices.MaxFunc(t.AddedTokens, func(a, b Token) int {
return cmp.Compare(a.ID, b.ID)
}).ID,
)
}
func parseTokens(dirpath string) (pre string, tokens []Token, merges []string, err error) {
f, err := os.Open(dirpath)
if err != nil {
panic(err)
}
defer f.Close()
var t Tokenizer
if err := json.NewDecoder(f).Decode(&t); err != nil {
return "", nil, nil, err
}
tokens = make([]Token, t.maxID()+1)
for k, v := range t.Model.Vocab {
tokens[v] = Token{ID: v, Content: k, Special: false, UserDefined: false}
}
for _, v := range t.AddedTokens {
v.UserDefined = true
tokens[v.ID] = v
}
sha256sum := sha256.New()
for _, pt := range t.PreTokenizer.PreTokenizers {
if pt.Type == "Split" && pt.Pattern.Regex != "" {
sha256sum.Write([]byte(pt.Pattern.Regex))
}
}
switch digest := fmt.Sprintf("%x", sha256sum.Sum(nil)); digest {
case "d98f9631be1e9607a9848c26c1f9eac1aa9fc21ac6ba82a2fc0741af9780a48f":
pre = "llama-bpe"
case "03df5c5863ad70781dcfdef491ead25140f895fe8010964be0daefe27be32b02":
pre = "deepseek-llm"
case "21cde974d587f0d54dc8d56b183cc1e6239600172035c68fbd6d4b9f8da0576e":
pre = "deepseek-coder"
default:
slog.Warn("unknown pretokenizer, using default", "digest", digest)
pre = "default"
}
return pre, tokens, t.Model.Merges, nil
}

View File

@@ -25,7 +25,7 @@ type torchWriterTo struct {
bo ByteOrder
storage pytorch.StorageInterface
handler func(w io.Writer, r torchWriterTo) error
repacker func(string, []float32, []uint64) ([]float32, error)
}
type TorchFormat struct{}
@@ -33,14 +33,14 @@ type TorchFormat struct{}
func (tf *TorchFormat) GetTensors(dirpath string, params *Params) ([]llm.Tensor, error) {
slog.Debug("getting torch tensors")
files, err := filepath.Glob(filepath.Join(dirpath, "pytorch_model-*.bin"))
if err != nil {
slog.Error("didn't find any torch files")
return nil, err
var files []string
if pt, _ := filepath.Glob(filepath.Join(dirpath, "consolidated*.pth")); len(pt) > 0 {
files = append(files, pt...)
} else if pt, _ := filepath.Glob(filepath.Join(dirpath, "pytorch_model*.pth")); len(pt) > 0 {
files = append(files, pt...)
}
var offset uint64
var tensors []llm.Tensor
for _, fn := range files {
m, err := pytorch.Load(fn)
@@ -77,7 +77,7 @@ func (tf *TorchFormat) GetTensors(dirpath string, params *Params) ([]llm.Tensor,
slog.Error(err.Error())
return nil, err
}
slog.Debug(fmt.Sprintf("finding name for '%s' -> '%s'", k.(string), ggufName))
slog.Debug(fmt.Sprintf("'%35s': '%30s' %10d [%#v]", k.(string), ggufName, size, tshape))
shape := []uint64{0, 0, 0, 0}
for i := range tshape {
@@ -88,7 +88,7 @@ func (tf *TorchFormat) GetTensors(dirpath string, params *Params) ([]llm.Tensor,
Name: ggufName,
Kind: kind,
Offset: offset, // calculate the offset
Shape: shape[:],
Shape: shape,
}
tensor.WriterTo = torchWriterTo{
@@ -104,7 +104,6 @@ func (tf *TorchFormat) GetTensors(dirpath string, params *Params) ([]llm.Tensor,
}
return tensors, nil
}
func getAltParams(dirpath string) (*Params, error) {
@@ -120,7 +119,7 @@ func getAltParams(dirpath string) (*Params, error) {
AttentionHeads int `json:"n_heads"`
KeyValHeads int `json:"n_kv_heads"`
HiddenLayers int `json:"n_layers"`
RopeTheta int `json:"rope_theta"`
RopeTheta float64 `json:"rope_theta"`
NormEPS float64 `json:"norm_eps"`
}
@@ -133,6 +132,7 @@ func getAltParams(dirpath string) (*Params, error) {
}
params := &Params{
Architectures: []string{"LlamaForCausalLM"},
HiddenSize: tparams.HiddenSize,
AttentionHeads: tparams.AttentionHeads,
KeyValHeads: tparams.KeyValHeads,
@@ -229,37 +229,38 @@ func (m *TorchFormat) GetLayerName(n string) (string, error) {
}
func (r torchWriterTo) WriteTo(w io.Writer) (n int64, err error) {
// use the handler if one is present
if r.handler != nil {
return 0, r.handler(w, r)
var f32s []float32
switch s := r.storage.(type) {
case *pytorch.FloatStorage:
f32s = s.Data
case *pytorch.HalfStorage:
f32s = s.Data
case *pytorch.BFloat16Storage:
f32s = s.Data
default:
return 0, fmt.Errorf("unknown data type: %T", s)
}
if r.repacker != nil {
f32s, err = r.repacker(r.t.Name, f32s, r.t.Shape)
if err != nil {
return 0, err
}
}
switch r.storage.(type) {
case *pytorch.FloatStorage:
slog.Warn(fmt.Sprintf("unexpected storage found for layer '%s'; skipping", r.t.Name))
return 0, nil
case *pytorch.HalfStorage:
switch r.t.Kind {
case 0:
data := r.storage.(*pytorch.HalfStorage).Data
slog.Debug(fmt.Sprintf("%35s F32 (%d)", r.t.Name, len(data)))
if err := binary.Write(w, r.bo, data); err != nil {
return 0, err
}
return 0, binary.Write(w, r.bo, f32s)
case 1:
data := r.storage.(*pytorch.HalfStorage).Data
tData := make([]uint16, len(data))
for cnt, v := range data {
tData[cnt] = uint16(float16.Fromfloat32(v))
}
slog.Debug(fmt.Sprintf("%35s F16 (%d)", r.t.Name, len(tData)))
if err := binary.Write(w, r.bo, tData); err != nil {
return 0, err
}
}
f16s := make([]uint16, len(f32s))
for i := range f32s {
f16s[i] = float16.Fromfloat32(f32s[i]).Bits()
}
return 0, nil
return 0, binary.Write(w, r.bo, f16s)
default:
return 0, fmt.Errorf("unknown storage type: %d", r.t.Kind)
}
}
func (m *TorchFormat) GetModelArch(name, dirPath string, params *Params) (ModelArch, error) {

View File

@@ -6,7 +6,7 @@
* [Importing models](./import.md)
* [Linux Documentation](./linux.md)
* [Windows Documentation](./windows.md)
* [Docker Documentation](https://hub.docker.com/r/ollama/ollama)
* [Docker Documentation](./docker.md)
### Reference

View File

@@ -12,6 +12,7 @@
- [Pull a Model](#pull-a-model)
- [Push a Model](#push-a-model)
- [Generate Embeddings](#generate-embeddings)
- [List Running Models](#list-running-models)
## Conventions
@@ -313,7 +314,6 @@ curl http://localhost:11434/api/generate -d '{
"numa": false,
"num_ctx": 1024,
"num_batch": 2,
"num_gqa": 1,
"num_gpu": 1,
"main_gpu": 0,
"low_vram": false,
@@ -321,8 +321,6 @@ curl http://localhost:11434/api/generate -d '{
"vocab_only": false,
"use_mmap": true,
"use_mlock": false,
"rope_frequency_base": 1.1,
"rope_frequency_scale": 0.8,
"num_thread": 8
}
}'
@@ -800,9 +798,9 @@ curl http://localhost:11434/api/show -d '{
```json
{
"modelfile": "# Modelfile generated by \"ollama show\"\n# To build a new Modelfile based on this one, replace the FROM line with:\n# FROM llava:latest\n\nFROM /Users/matt/.ollama/models/blobs/sha256:200765e1283640ffbd013184bf496e261032fa75b99498a9613be4e94d63ad52\nTEMPLATE \"\"\"{{ .System }}\nUSER: {{ .Prompt }}\nASSSISTANT: \"\"\"\nPARAMETER num_ctx 4096\nPARAMETER stop \"\u003c/s\u003e\"\nPARAMETER stop \"USER:\"\nPARAMETER stop \"ASSSISTANT:\"",
"parameters": "num_ctx 4096\nstop \u003c/s\u003e\nstop USER:\nstop ASSSISTANT:",
"template": "{{ .System }}\nUSER: {{ .Prompt }}\nASSSISTANT: ",
"modelfile": "# Modelfile generated by \"ollama show\"\n# To build a new Modelfile based on this one, replace the FROM line with:\n# FROM llava:latest\n\nFROM /Users/matt/.ollama/models/blobs/sha256:200765e1283640ffbd013184bf496e261032fa75b99498a9613be4e94d63ad52\nTEMPLATE \"\"\"{{ .System }}\nUSER: {{ .Prompt }}\nASSISTANT: \"\"\"\nPARAMETER num_ctx 4096\nPARAMETER stop \"\u003c/s\u003e\"\nPARAMETER stop \"USER:\"\nPARAMETER stop \"ASSISTANT:\"",
"parameters": "num_ctx 4096\nstop \u003c/s\u003e\nstop USER:\nstop ASSISTANT:",
"template": "{{ .System }}\nUSER: {{ .Prompt }}\nASSISTANT: ",
"details": {
"format": "gguf",
"family": "llama",
@@ -1038,3 +1036,48 @@ curl http://localhost:11434/api/embeddings -d '{
]
}
```
## List Running Models
```shell
GET /api/ps
```
List models that are currently loaded into memory.
\* If a model is loaded completely into system memory, `size_vram` is omitted from the response.
#### Examples
### Request
```shell
curl http://localhost:11434/api/ps
```
#### Response
A single JSON object will be returned.
```json
{
"models": [
{
"name": "mistral:latest",
"model": "mistral:latest",
"size": 5137025024,
"digest": "2ae6f6dd7a3dd734790bbbf58b8909a606e0e7e97e94b7604e0aa7ae4490e6d8",
"details": {
"parent_model": "",
"format": "gguf",
"family": "llama",
"families": [
"llama"
],
"parameter_size": "7.2B",
"quantization_level": "Q4_0"
},
"expires_at": "2024-06-04T14:38:31.83753-07:00",
"size_vram": 5137025024
}
]
}
```

View File

@@ -6,6 +6,8 @@ Install required tools:
- go version 1.22 or higher
- gcc version 11.4.0 or higher
### MacOS
```bash
brew install go cmake gcc
```

71
docs/docker.md Normal file
View File

@@ -0,0 +1,71 @@
# Ollama Docker image
### CPU only
```bash
docker run -d -v ollama:/root/.ollama -p 11434:11434 --name ollama ollama/ollama
```
### Nvidia GPU
Install the [NVIDIA Container Toolkit](https://docs.nvidia.com/datacenter/cloud-native/container-toolkit/latest/install-guide.html#installation).
#### Install with Apt
1. Configure the repository
```bash
curl -fsSL https://nvidia.github.io/libnvidia-container/gpgkey \
| sudo gpg --dearmor -o /usr/share/keyrings/nvidia-container-toolkit-keyring.gpg
curl -s -L https://nvidia.github.io/libnvidia-container/stable/deb/nvidia-container-toolkit.list \
| sed 's#deb https://#deb [signed-by=/usr/share/keyrings/nvidia-container-toolkit-keyring.gpg] https://#g' \
| sudo tee /etc/apt/sources.list.d/nvidia-container-toolkit.list
sudo apt-get update
```
2. Install the NVIDIA Container Toolkit packages
```bash
sudo apt-get install -y nvidia-container-toolkit
```
#### Install with Yum or Dnf
1. Configure the repository
```bash
curl -s -L https://nvidia.github.io/libnvidia-container/stable/rpm/nvidia-container-toolkit.repo \
| sudo tee /etc/yum.repos.d/nvidia-container-toolkit.repo
```
2. Install the NVIDIA Container Toolkit packages
```bash
sudo yum install -y nvidia-container-toolkit
```
#### Configure Docker to use Nvidia driver
```
sudo nvidia-ctk runtime configure --runtime=docker
sudo systemctl restart docker
```
#### Start the container
```bash
docker run -d --gpus=all -v ollama:/root/.ollama -p 11434:11434 --name ollama ollama/ollama
```
### AMD GPU
To run Ollama using Docker with AMD GPUs, use the `rocm` tag and the following command:
```
docker run -d --device /dev/kfd --device /dev/dri -v ollama:/root/.ollama -p 11434:11434 --name ollama ollama/ollama:rocm
```
### Run model locally
Now you can run a model:
```
docker exec -it ollama ollama run llama3
```
### Try different models
More models can be found on the [Ollama library](https://ollama.com/library).

View File

@@ -6,7 +6,7 @@ Ollama on macOS and Windows will automatically download updates. Click on the ta
On Linux, re-run the install script:
```
```shell
curl -fsSL https://ollama.com/install.sh | sh
```
@@ -30,7 +30,7 @@ To change this when using `ollama run`, use `/set parameter`:
When using the API, specify the `num_ctx` parameter:
```
```shell
curl http://localhost:11434/api/generate -d '{
"model": "llama3",
"prompt": "Why is the sky blue?",
@@ -40,6 +40,21 @@ curl http://localhost:11434/api/generate -d '{
}'
```
## How can I tell if my model was loaded onto the GPU?
Use the `ollama ps` command to see what models are currently loaded into memory.
```shell
ollama ps
NAME ID SIZE PROCESSOR UNTIL
llama3:70b bcfb190ca3a7 42 GB 100% GPU 4 minutes from now
```
The `Processor` column will show which memory the model was loaded in to:
* `100% GPU` means the model was loaded entirely into the GPU
* `100% CPU` means the model was loaded entirely in system memory
* `48%/52% CPU/GPU` means the model was loaded partially onto both the GPU and into system memory
## How do I configure Ollama server?
Ollama server can be configured with environment variables.
@@ -80,81 +95,19 @@ If Ollama is run as a systemd service, environment variables should be set using
### Setting environment variables on Windows
On windows, Ollama inherits your user and system environment variables.
On Windows, Ollama inherits your user and system environment variables.
1. First Quit Ollama by clicking on it in the task bar
1. First Quit Ollama by clicking on it in the task bar.
2. Edit system environment variables from the control panel
2. Start the Settings (Windows 11) or Control Panel (Windows 10) application and search for _environment variables_.
3. Edit or create New variable(s) for your user account for `OLLAMA_HOST`, `OLLAMA_MODELS`, etc.
3. Click on _Edit environment variables for your account_.
4. Click OK/Apply to save
4. Edit or create a new variable for your user account for `OLLAMA_HOST`, `OLLAMA_MODELS`, etc.
5. Run `ollama` from a new terminal window
5. Click OK/Apply to save.
## How can I expose Ollama on my network?
Ollama binds 127.0.0.1 port 11434 by default. Change the bind address with the `OLLAMA_HOST` environment variable.
Refer to the section [above](#how-do-i-configure-ollama-server) for how to set environment variables on your platform.
## How can I use Ollama with a proxy server?
Ollama runs an HTTP server and can be exposed using a proxy server such as Nginx. To do so, configure the proxy to forward requests and optionally set required headers (if not exposing Ollama on the network). For example, with Nginx:
```
server {
listen 80;
server_name example.com; # Replace with your domain or IP
location / {
proxy_pass http://localhost:11434;
proxy_set_header Host localhost:11434;
}
}
```
## How can I use Ollama with ngrok?
Ollama can be accessed using a range of tools for tunneling tools. For example with Ngrok:
```
ngrok http 11434 --host-header="localhost:11434"
```
## How can I use Ollama with Cloudflare Tunnel?
To use Ollama with Cloudflare Tunnel, use the `--url` and `--http-host-header` flags:
```
cloudflared tunnel --url http://localhost:11434 --http-host-header="localhost:11434"
```
## How can I allow additional web origins to access Ollama?
Ollama allows cross-origin requests from `127.0.0.1` and `0.0.0.0` by default. Additional origins can be configured with `OLLAMA_ORIGINS`.
Refer to the section [above](#how-do-i-configure-ollama-server) for how to set environment variables on your platform.
## Where are models stored?
- macOS: `~/.ollama/models`
- Linux: `/usr/share/ollama/.ollama/models`
- Windows: `C:\Users\%username%\.ollama\models`
### How do I set them to a different location?
If a different directory needs to be used, set the environment variable `OLLAMA_MODELS` to the chosen directory.
Refer to the section [above](#how-do-i-configure-ollama-server) for how to set environment variables on your platform.
## Does Ollama send my prompts and answers back to ollama.com?
No. Ollama runs locally, and conversation data does not leave your machine.
## How can I use Ollama in Visual Studio Code?
There is already a large collection of plugins available for VSCode as well as other editors that leverage Ollama. See the list of [extensions & plugins](https://github.com/ollama/ollama#extensions--plugins) at the bottom of the main repository readme.
6. Start the Ollama application from the Windows Start menu.
## How do I use Ollama behind a proxy?
@@ -181,6 +134,69 @@ docker build -t ollama-with-ca .
docker run -d -e HTTPS_PROXY=https://my.proxy.example.com -p 11434:11434 ollama-with-ca
```
## Does Ollama send my prompts and answers back to ollama.com?
No. Ollama runs locally, and conversation data does not leave your machine.
## How can I expose Ollama on my network?
Ollama binds 127.0.0.1 port 11434 by default. Change the bind address with the `OLLAMA_HOST` environment variable.
Refer to the section [above](#how-do-i-configure-ollama-server) for how to set environment variables on your platform.
## How can I use Ollama with a proxy server?
Ollama runs an HTTP server and can be exposed using a proxy server such as Nginx. To do so, configure the proxy to forward requests and optionally set required headers (if not exposing Ollama on the network). For example, with Nginx:
```
server {
listen 80;
server_name example.com; # Replace with your domain or IP
location / {
proxy_pass http://localhost:11434;
proxy_set_header Host localhost:11434;
}
}
```
## How can I use Ollama with ngrok?
Ollama can be accessed using a range of tools for tunneling tools. For example with Ngrok:
```shell
ngrok http 11434 --host-header="localhost:11434"
```
## How can I use Ollama with Cloudflare Tunnel?
To use Ollama with Cloudflare Tunnel, use the `--url` and `--http-host-header` flags:
```shell
cloudflared tunnel --url http://localhost:11434 --http-host-header="localhost:11434"
```
## How can I allow additional web origins to access Ollama?
Ollama allows cross-origin requests from `127.0.0.1` and `0.0.0.0` by default. Additional origins can be configured with `OLLAMA_ORIGINS`.
Refer to the section [above](#how-do-i-configure-ollama-server) for how to set environment variables on your platform.
## Where are models stored?
- macOS: `~/.ollama/models`
- Linux: `/usr/share/ollama/.ollama/models`
- Windows: `C:\Users\%username%\.ollama\models`
### How do I set them to a different location?
If a different directory needs to be used, set the environment variable `OLLAMA_MODELS` to the chosen directory.
Refer to the section [above](#how-do-i-configure-ollama-server) for how to set environment variables on your platform.
## How can I use Ollama in Visual Studio Code?
There is already a large collection of plugins available for VSCode as well as other editors that leverage Ollama. See the list of [extensions & plugins](https://github.com/ollama/ollama#extensions--plugins) at the bottom of the main repository readme.
## How do I use Ollama with GPU acceleration in Docker?
The Ollama Docker container can be configured with GPU acceleration in Linux or Windows (with WSL2). This requires the [nvidia-container-toolkit](https://github.com/NVIDIA/nvidia-container-toolkit). See [ollama/ollama](https://hub.docker.com/r/ollama/ollama) for more details.
@@ -195,7 +211,7 @@ Open `Control Panel > Networking and Internet > View network status and tasks` a
Click on `Configure` and open the `Advanced` tab. Search through each of the properties until you find `Large Send Offload Version 2 (IPv4)` and `Large Send Offload Version 2 (IPv6)`. *Disable* both of these
properties.
## How can I pre-load a model to get faster response times?
## How can I preload a model into Ollama to get faster response times?
If you are using the API you can preload a model by sending the Ollama server an empty request. This works with both the `/api/generate` and `/api/chat` API endpoints.
@@ -209,6 +225,11 @@ To use the chat completions endpoint, use:
curl http://localhost:11434/api/chat -d '{"model": "mistral"}'
```
To preload a model using the CLI, use the command:
```shell
ollama run llama3 ""
```
## How do I keep a model loaded in memory or make it unload immediately?
By default models are kept in memory for 5 minutes before being unloaded. This allows for quicker response times if you are making numerous requests to the LLM. You may, however, want to free up the memory before the 5 minutes have elapsed or keep the model loaded indefinitely. Use the `keep_alive` parameter with either the `/api/generate` and `/api/chat` API endpoints to control how long the model is left in memory.
@@ -233,8 +254,6 @@ Alternatively, you can change the amount of time all models are loaded into memo
If you wish to override the `OLLAMA_KEEP_ALIVE` setting, use the `keep_alive` API parameter with the `/api/generate` or `/api/chat` API endpoints.
## How do I manage the maximum number of requests the server can queue
## How do I manage the maximum number of requests the Ollama server can queue?
If too many requests are sent to the server, it will respond with a 503 error
indicating the server is overloaded. You can adjust how many requests may be
queue by setting `OLLAMA_MAX_QUEUE`
If too many requests are sent to the server, it will respond with a 503 error indicating the server is overloaded. You can adjust how many requests may be queue by setting `OLLAMA_MAX_QUEUE`.

View File

@@ -37,16 +37,9 @@ Join the [Discord](https://discord.gg/ollama) for help interpreting the logs.
## LLM libraries
Ollama includes multiple LLM libraries compiled for different GPUs and CPU
vector features. Ollama tries to pick the best one based on the capabilities of
your system. If this autodetection has problems, or you run into other problems
(e.g. crashes in your GPU) you can workaround this by forcing a specific LLM
library. `cpu_avx2` will perform the best, followed by `cpu_avx` an the slowest
but most compatible is `cpu`. Rosetta emulation under MacOS will work with the
`cpu` library.
Ollama includes multiple LLM libraries compiled for different GPUs and CPU vector features. Ollama tries to pick the best one based on the capabilities of your system. If this autodetection has problems, or you run into other problems (e.g. crashes in your GPU) you can workaround this by forcing a specific LLM library. `cpu_avx2` will perform the best, followed by `cpu_avx` an the slowest but most compatible is `cpu`. Rosetta emulation under MacOS will work with the `cpu` library.
In the server log, you will see a message that looks something like this (varies
from release to release):
In the server log, you will see a message that looks something like this (varies from release to release):
```
Dynamic LLM libraries [rocm_v6 cpu cpu_avx cpu_avx2 cuda_v11 rocm_v5]
@@ -54,9 +47,7 @@ Dynamic LLM libraries [rocm_v6 cpu cpu_avx cpu_avx2 cuda_v11 rocm_v5]
**Experimental LLM Library Override**
You can set OLLAMA_LLM_LIBRARY to any of the available LLM libraries to bypass
autodetection, so for example, if you have a CUDA card, but want to force the
CPU LLM library with AVX2 vector support, use:
You can set OLLAMA_LLM_LIBRARY to any of the available LLM libraries to bypass autodetection, so for example, if you have a CUDA card, but want to force the CPU LLM library with AVX2 vector support, use:
```
OLLAMA_LLM_LIBRARY="cpu_avx2" ollama serve
@@ -69,9 +60,7 @@ cat /proc/cpuinfo| grep flags | head -1
## Installing older or pre-release versions on Linux
If you run into problems on Linux and want to install an older version, or you'd
like to try out a pre-release before it's officially released, you can tell the
install script which version to install.
If you run into problems on Linux and want to install an older version, or you'd like to try out a pre-release before it's officially released, you can tell the install script which version to install.
```sh
curl -fsSL https://ollama.com/install.sh | OLLAMA_VERSION="0.1.29" sh
@@ -79,7 +68,20 @@ curl -fsSL https://ollama.com/install.sh | OLLAMA_VERSION="0.1.29" sh
## Linux tmp noexec
If your system is configured with the "noexec" flag where Ollama stores its
temporary executable files, you can specify an alternate location by setting
OLLAMA_TMPDIR to a location writable by the user ollama runs as. For example
OLLAMA_TMPDIR=/usr/share/ollama/
If your system is configured with the "noexec" flag where Ollama stores its temporary executable files, you can specify an alternate location by setting OLLAMA_TMPDIR to a location writable by the user ollama runs as. For example OLLAMA_TMPDIR=/usr/share/ollama/
## Container fails to run on NVIDIA GPU
Make sure you've set up the container runtime first as described in [docker.md](./docker.md)
Sometimes the container runtime can have difficulties initializing the GPU. When you check the server logs, this can show up as various error codes, such as "3" (not initialized), "46" (device unavailable), "100" (no device), "999" (unknown), or others. The following troubleshooting techniques may help resolve the problem
- Is the container runtime working? Try `docker run --gpus all ubuntu nvidia-smi` - if this doesn't work, Ollama wont be able to see your NVIDIA GPU.
- Is the uvm driver not loaded? `sudo nvidia-modprobe -u`
- Try reloading the nvidia_uvm driver - `sudo rmmod nvidia_uvm` then `sudo modprobe nvidia_uvm`
- Try rebooting
- Make sure you're running the latest nvidia drivers
If none of those resolve the problem, gather additional information and file an issue:
- Set `CUDA_ERROR_LEVEL=50` and try again to get more diagnostic logs
- Check dmesg for any errors `sudo dmesg | grep -i nvrm` and `sudo dmesg | grep -i nvidia`

View File

@@ -5,13 +5,13 @@ In this tutorial, we are going to use JavaScript with LangChain and Ollama to le
To get started, let's just use **LangChain** to ask a simple question to a model. To do this with JavaScript, we need to install **LangChain**:
```bash
npm install langchain
npm install @langchain/community
```
Now we can start building out our JavaScript:
```javascript
import { Ollama } from "langchain/llms/ollama";
import { Ollama } from "@langchain/community/llms/ollama";
const ollama = new Ollama({
baseUrl: "http://localhost:11434",

View File

@@ -45,7 +45,7 @@ all_splits = text_splitter.split_documents(data)
```
It's split up, but we have to find the relevant splits and then submit those to the model. We can do this by creating embeddings and storing them in a vector database. We can use Ollama directly to instantiate an embedding model. We will use ChromaDB in this example for a vector database. `pip install chromadb`
We also need to pull embedding model: `ollama pull nomic-embed-text`
```python
from langchain.embeddings import OllamaEmbeddings
from langchain.vectorstores import Chroma
@@ -68,7 +68,8 @@ The next thing is to send the question and the relevant parts of the docs to the
```python
from langchain.chains import RetrievalQA
qachain=RetrievalQA.from_chain_type(ollama, retriever=vectorstore.as_retriever())
qachain.invoke({"query": question})
res = qachain.invoke({"query": question})
print(res['result'])
```
The answer received from this chain was:

View File

@@ -33,7 +33,7 @@ Here's a quick example showing API access from `powershell`
## Troubleshooting
While we're in preview, `OLLAMA_DEBUG` is always enabled, which adds
a "view logs" menu item to the app, and increses logging for the GUI app and
a "view logs" menu item to the app, and increases logging for the GUI app and
server.
Ollama on Windows stores files in a few different locations. You can view them in

View File

@@ -3,6 +3,7 @@ package envconfig
import (
"fmt"
"log/slog"
"net"
"os"
"path/filepath"
"runtime"
@@ -15,6 +16,10 @@ var (
AllowOrigins []string
// Set via OLLAMA_DEBUG in the environment
Debug bool
// Experimental flash attention
FlashAttention bool
// Set via OLLAMA_KEEP_ALIVE in the environment
KeepAlive string
// Set via OLLAMA_LLM_LIBRARY in the environment
LLMLibrary string
// Set via OLLAMA_MAX_LOADED_MODELS in the environment
@@ -23,6 +28,8 @@ var (
MaxQueuedRequests int
// Set via OLLAMA_MAX_VRAM in the environment
MaxVRAM uint64
// Set via OLLAMA_NOHISTORY in the environment
NoHistory bool
// Set via OLLAMA_NOPRUNE in the environment
NoPrune bool
// Set via OLLAMA_NUM_PARALLEL in the environment
@@ -33,21 +40,40 @@ var (
TmpDir string
)
func AsMap() map[string]string {
return map[string]string{
"OLLAMA_ORIGINS": fmt.Sprintf("%v", AllowOrigins),
"OLLAMA_DEBUG": fmt.Sprintf("%v", Debug),
"OLLAMA_LLM_LIBRARY": fmt.Sprintf("%v", LLMLibrary),
"OLLAMA_MAX_LOADED_MODELS": fmt.Sprintf("%v", MaxRunners),
"OLLAMA_MAX_QUEUE": fmt.Sprintf("%v", MaxQueuedRequests),
"OLLAMA_MAX_VRAM": fmt.Sprintf("%v", MaxVRAM),
"OLLAMA_NOPRUNE": fmt.Sprintf("%v", NoPrune),
"OLLAMA_NUM_PARALLEL": fmt.Sprintf("%v", NumParallel),
"OLLAMA_RUNNERS_DIR": fmt.Sprintf("%v", RunnersDir),
"OLLAMA_TMPDIR": fmt.Sprintf("%v", TmpDir),
type EnvVar struct {
Name string
Value any
Description string
}
func AsMap() map[string]EnvVar {
return map[string]EnvVar{
"OLLAMA_DEBUG": {"OLLAMA_DEBUG", Debug, "Show additional debug information (e.g. OLLAMA_DEBUG=1)"},
"OLLAMA_FLASH_ATTENTION": {"OLLAMA_FLASH_ATTENTION", FlashAttention, "Enabled flash attention"},
"OLLAMA_HOST": {"OLLAMA_HOST", "", "IP Address for the ollama server (default 127.0.0.1:11434)"},
"OLLAMA_KEEP_ALIVE": {"OLLAMA_KEEP_ALIVE", KeepAlive, "The duration that models stay loaded in memory (default \"5m\")"},
"OLLAMA_LLM_LIBRARY": {"OLLAMA_LLM_LIBRARY", LLMLibrary, "Set LLM library to bypass autodetection"},
"OLLAMA_MAX_LOADED_MODELS": {"OLLAMA_MAX_LOADED_MODELS", MaxRunners, "Maximum number of loaded models (default 1)"},
"OLLAMA_MAX_QUEUE": {"OLLAMA_MAX_QUEUE", MaxQueuedRequests, "Maximum number of queued requests"},
"OLLAMA_MAX_VRAM": {"OLLAMA_MAX_VRAM", MaxVRAM, "Maximum VRAM"},
"OLLAMA_MODELS": {"OLLAMA_MODELS", "", "The path to the models directory"},
"OLLAMA_NOHISTORY": {"OLLAMA_NOHISTORY", NoHistory, "Do not preserve readline history"},
"OLLAMA_NOPRUNE": {"OLLAMA_NOPRUNE", NoPrune, "Do not prune model blobs on startup"},
"OLLAMA_NUM_PARALLEL": {"OLLAMA_NUM_PARALLEL", NumParallel, "Maximum number of parallel requests (default 1)"},
"OLLAMA_ORIGINS": {"OLLAMA_ORIGINS", AllowOrigins, "A comma separated list of allowed origins"},
"OLLAMA_RUNNERS_DIR": {"OLLAMA_RUNNERS_DIR", RunnersDir, "Location for runners"},
"OLLAMA_TMPDIR": {"OLLAMA_TMPDIR", TmpDir, "Location for temporary files"},
}
}
func Values() map[string]string {
vals := make(map[string]string)
for k, v := range AsMap() {
vals[k] = fmt.Sprintf("%v", v.Value)
}
return vals
}
var defaultAllowOrigins = []string{
"localhost",
"127.0.0.1",
@@ -78,6 +104,13 @@ func LoadConfig() {
}
}
if fa := clean("OLLAMA_FLASH_ATTENTION"); fa != "" {
d, err := strconv.ParseBool(fa)
if err == nil {
FlashAttention = d
}
}
RunnersDir = clean("OLLAMA_RUNNERS_DIR")
if runtime.GOOS == "windows" && RunnersDir == "" {
// On Windows we do not carry the payloads inside the main executable
@@ -94,7 +127,7 @@ func LoadConfig() {
var paths []string
for _, root := range []string{filepath.Dir(appExe), cwd} {
paths = append(paths,
filepath.Join(root),
root,
filepath.Join(root, "windows-"+runtime.GOARCH),
filepath.Join(root, "dist", "windows-"+runtime.GOARCH),
)
@@ -137,6 +170,10 @@ func LoadConfig() {
}
}
if nohistory := clean("OLLAMA_NOHISTORY"); nohistory != "" {
NoHistory = true
}
if noprune := clean("OLLAMA_NOPRUNE"); noprune != "" {
NoPrune = true
}
@@ -148,11 +185,17 @@ func LoadConfig() {
AllowOrigins = append(AllowOrigins,
fmt.Sprintf("http://%s", allowOrigin),
fmt.Sprintf("https://%s", allowOrigin),
fmt.Sprintf("http://%s:*", allowOrigin),
fmt.Sprintf("https://%s:*", allowOrigin),
fmt.Sprintf("http://%s", net.JoinHostPort(allowOrigin, "*")),
fmt.Sprintf("https://%s", net.JoinHostPort(allowOrigin, "*")),
)
}
AllowOrigins = append(AllowOrigins,
"app://*",
"file://*",
"tauri://*",
)
maxRunners := clean("OLLAMA_MAX_LOADED_MODELS")
if maxRunners != "" {
m, err := strconv.Atoi(maxRunners)
@@ -171,4 +214,6 @@ func LoadConfig() {
MaxQueuedRequests = p
}
}
KeepAlive = clean("OLLAMA_KEEP_ALIVE")
}

23
envconfig/config_test.go Normal file
View File

@@ -0,0 +1,23 @@
package envconfig
import (
"testing"
"github.com/stretchr/testify/require"
)
func TestConfig(t *testing.T) {
Debug = false // Reset whatever was loaded in init()
t.Setenv("OLLAMA_DEBUG", "")
LoadConfig()
require.False(t, Debug)
t.Setenv("OLLAMA_DEBUG", "false")
LoadConfig()
require.False(t, Debug)
t.Setenv("OLLAMA_DEBUG", "1")
LoadConfig()
require.True(t, Debug)
t.Setenv("OLLAMA_FLASH_ATTENTION", "1")
LoadConfig()
require.True(t, FlashAttention)
}

View File

@@ -1,10 +0,0 @@
# Bash Shell examples
When calling `ollama`, you can pass it a file to run all the prompts in the file, one after the other:
`ollama run llama3 < sourcequestions.txt`
This concept is used in the following example.
## Compare Models
`comparemodels.sh` is a script that runs all the questions in `sourcequestions.txt` using any 4 models you choose that you have already pulled from the Ollama library or have created locally.

View File

@@ -1,64 +0,0 @@
#! /usr/bin/env bash
# Compare multiple models by running them with the same questions
NUMBEROFCHOICES=4
SELECTIONS=()
declare -a SUMS=()
# Get the list of models
CHOICES=$(ollama list | awk '{print $1}')
# Select which models to run as a comparison
echo "Select $NUMBEROFCHOICES models to compare:"
select ITEM in $CHOICES; do
if [[ -n $ITEM ]]; then
echo "You have selected $ITEM"
SELECTIONS+=("$ITEM")
((COUNT++))
if [[ $COUNT -eq $NUMBEROFCHOICES ]]; then
break
fi
else
echo "Invalid selection"
fi
done
# Loop through each of the selected models
for ITEM in "${SELECTIONS[@]}"; do
echo "--------------------------------------------------------------"
echo "Loading the model $ITEM into memory"
ollama run "$ITEM" ""
echo "--------------------------------------------------------------"
echo "Running the questions through the model $ITEM"
COMMAND_OUTPUT=$(ollama run "$ITEM" --verbose < sourcequestions.txt 2>&1| tee /dev/stderr)
# eval duration is sometimes listed in seconds and sometimes in milliseconds.
# Add up the values for each model
SUM=$(echo "$COMMAND_OUTPUT" | awk '
/eval duration:/ {
value = $3
if (index(value, "ms") > 0) {
gsub("ms", "", value)
value /= 1000
} else {
gsub("s", "", value)
}
sum += value
}
END { print sum }')
SUMS+=("All questions for $ITEM completed in $SUM seconds")
done
echo ""
echo "--------------------------------------------------------------"
echo -e "Sums of eval durations for each run:"
for val in "${SUMS[@]}"; do
echo "$val"
done
echo "--------------------------------------------------------------"
echo "Comparison complete. Now you can decide"
echo "which model is best."
echo "--------------------------------------------------------------"

View File

@@ -1,7 +0,0 @@
Why is the sky blue
What is a black hole
Explain the big bang theory like I am 5?
What is the quickest way to win a game of Monopoly with 3 others?
Why does a vacuum bottle keep my coffee hot and my milkshake cold?
What is the difference between a meteor, a meteorite, and a meteoroid?
Create an array with 5 items and print to the console. Do this in Python, C#, Typescript, and Rust.

View File

@@ -9,6 +9,7 @@ def chat(messages):
r = requests.post(
"http://0.0.0.0:11434/api/chat",
json={"model": model, "messages": messages, "stream": True},
stream=True
)
r.raise_for_status()
output = ""

View File

@@ -5,7 +5,6 @@ import (
)
func TestHumanNumber(t *testing.T) {
type testCase struct {
input uint64
expected string

View File

@@ -60,7 +60,9 @@ func humanTime(t time.Time, zeroValue string) string {
}
delta := time.Since(t)
if delta < 0 {
if int(delta.Hours())/24/365 < -20 {
return "Forever"
} else if delta < 0 {
return humanDuration(-delta) + " from now"
}

View File

@@ -32,4 +32,14 @@ func TestHumanTime(t *testing.T) {
v := now.Add(800 * time.Millisecond)
assertEqual(t, HumanTime(v, ""), "Less than a second from now")
})
t.Run("time way in the future", func(t *testing.T) {
v := now.Add(24 * time.Hour * 365 * 200)
assertEqual(t, HumanTime(v, ""), "Forever")
})
t.Run("time way in the future lowercase", func(t *testing.T) {
v := now.Add(24 * time.Hour * 365 * 200)
assertEqual(t, HumanTimeLower(v, ""), "forever")
})
}

68
go.mod
View File

@@ -1,77 +1,77 @@
module github.com/ollama/ollama
go 1.22
toolchain go1.22.0
go 1.22.0
require (
github.com/containerd/console v1.0.3
github.com/d4l3k/go-bfloat16 v0.0.0-20211005043715-690c3bdd05f1
github.com/emirpasic/gods v1.18.1
github.com/gin-gonic/gin v1.9.1
github.com/golang/protobuf v1.5.0 // indirect
github.com/google/uuid v1.0.0
github.com/mitchellh/mapstructure v1.5.0
github.com/gin-gonic/gin v1.10.0
github.com/golang/protobuf v1.5.4 // indirect
github.com/google/uuid v1.1.2
github.com/olekukonko/tablewriter v0.0.5
github.com/spf13/cobra v1.7.0
github.com/stretchr/testify v1.8.4
github.com/stretchr/testify v1.9.0
github.com/x448/float16 v0.8.4
golang.org/x/sync v0.3.0
)
require (
github.com/agnivade/levenshtein v1.1.1
github.com/d4l3k/go-bfloat16 v0.0.0-20211005043715-690c3bdd05f1
github.com/mattn/go-runewidth v0.0.14
github.com/nlpodyssey/gopickle v0.3.0
github.com/pdevine/tensor v0.0.0-20240228013915-64ccaa8d9ca9
github.com/pdevine/tensor v0.0.0-20240510204454-f88f4562727c
)
require (
github.com/apache/arrow/go/arrow v0.0.0-20201229220542-30ce2eb5d4dc // indirect
github.com/apache/arrow/go/arrow v0.0.0-20211112161151-bc219186db40 // indirect
github.com/bytedance/sonic/loader v0.1.1 // indirect
github.com/chewxy/hm v1.0.0 // indirect
github.com/chewxy/math32 v1.0.8 // indirect
github.com/chewxy/math32 v1.10.1 // indirect
github.com/cloudwego/base64x v0.1.4 // indirect
github.com/cloudwego/iasm v0.2.0 // indirect
github.com/davecgh/go-spew v1.1.1 // indirect
github.com/gogo/protobuf v1.3.2 // indirect
github.com/google/flatbuffers v1.12.0 // indirect
github.com/mattn/go-runewidth v0.0.14 // indirect
github.com/google/flatbuffers v24.3.25+incompatible // indirect
github.com/kr/text v0.2.0 // indirect
github.com/pkg/errors v0.9.1 // indirect
github.com/pmezard/go-difflib v1.0.0 // indirect
github.com/rivo/uniseg v0.2.0 // indirect
github.com/xtgo/set v1.0.0 // indirect
go4.org/unsafe/assume-no-moving-gc v0.0.0-20231121144256-b99613f794b6 // indirect
golang.org/x/xerrors v0.0.0-20200804184101-5ec99f83aff1 // indirect
gonum.org/v1/gonum v0.8.2 // indirect
gonum.org/v1/gonum v0.15.0 // indirect
gorgonia.org/vecf32 v0.9.0 // indirect
gorgonia.org/vecf64 v0.9.0 // indirect
)
require (
github.com/bytedance/sonic v1.9.1 // indirect
github.com/chenzhuoyu/base64x v0.0.0-20221115062448-fe3a3abad311 // indirect
github.com/gabriel-vasile/mimetype v1.4.2 // indirect
github.com/gin-contrib/cors v1.4.0
github.com/bytedance/sonic v1.11.6 // indirect
github.com/gabriel-vasile/mimetype v1.4.3 // indirect
github.com/gin-contrib/cors v1.7.2
github.com/gin-contrib/sse v0.1.0 // indirect
github.com/go-playground/locales v0.14.1 // indirect
github.com/go-playground/universal-translator v0.18.1 // indirect
github.com/go-playground/validator/v10 v10.14.0 // indirect
github.com/go-playground/validator/v10 v10.20.0 // indirect
github.com/goccy/go-json v0.10.2 // indirect
github.com/google/go-cmp v0.5.9 // indirect
github.com/inconshreveable/mousetrap v1.1.0 // indirect
github.com/json-iterator/go v1.1.12 // indirect
github.com/klauspost/cpuid/v2 v2.2.4 // indirect
github.com/leodido/go-urn v1.2.4 // indirect
github.com/mattn/go-isatty v0.0.19 // indirect
github.com/klauspost/cpuid/v2 v2.2.7 // indirect
github.com/leodido/go-urn v1.4.0 // indirect
github.com/mattn/go-isatty v0.0.20 // indirect
github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd // indirect
github.com/modern-go/reflect2 v1.0.2 // indirect
github.com/pelletier/go-toml/v2 v2.0.8 // indirect
github.com/pelletier/go-toml/v2 v2.2.2 // indirect
github.com/spf13/pflag v1.0.5 // indirect
github.com/twitchyliquid64/golang-asm v0.15.1 // indirect
github.com/ugorji/go/codec v1.2.11 // indirect
golang.org/x/arch v0.3.0 // indirect
golang.org/x/crypto v0.14.0
golang.org/x/exp v0.0.0-20230817173708-d852ddb80c63
golang.org/x/net v0.17.0 // indirect
golang.org/x/sys v0.13.0
golang.org/x/term v0.13.0
golang.org/x/text v0.14.0 // indirect
google.golang.org/protobuf v1.30.0
github.com/ugorji/go/codec v1.2.12 // indirect
golang.org/x/arch v0.8.0 // indirect
golang.org/x/crypto v0.23.0
golang.org/x/exp v0.0.0-20231110203233-9a3e6036ecaa
golang.org/x/net v0.25.0 // indirect
golang.org/x/sys v0.20.0
golang.org/x/term v0.20.0
golang.org/x/text v0.15.0 // indirect
google.golang.org/protobuf v1.34.1
gopkg.in/yaml.v3 v3.0.1 // indirect
)

252
go.sum
View File

@@ -1,22 +1,36 @@
cloud.google.com/go v0.26.0/go.mod h1:aQUYkXzVsufM+DwF1aE+0xfcU+56JwCaLick0ClmMTw=
cloud.google.com/go v0.34.0/go.mod h1:aQUYkXzVsufM+DwF1aE+0xfcU+56JwCaLick0ClmMTw=
dmitri.shuralyov.com/gpu/mtl v0.0.0-20190408044501-666a987793e9/go.mod h1:H6x//7gZCb22OMCxBHrMx7a5I7Hp++hsVxbQ4BYO7hU=
gioui.org v0.0.0-20210308172011-57750fc8a0a6/go.mod h1:RSH6KIUZ0p2xy5zHDxgAM4zumjgTw83q2ge/PI+yyw8=
github.com/BurntSushi/toml v0.3.1/go.mod h1:xHWCNGjB5oqiDr8zfno3MHue2Ht5sIBksp03qcyfWMU=
github.com/BurntSushi/xgb v0.0.0-20160522181843-27f122750802/go.mod h1:IVnqGOEym/WlBOVXweHU+Q+/VP0lqqI8lqeDx9IjBqo=
github.com/agnivade/levenshtein v1.1.1 h1:QY8M92nrzkmr798gCo3kmMyqXFzdQVpxLlGPRBij0P8=
github.com/agnivade/levenshtein v1.1.1/go.mod h1:veldBMzWxcCG2ZvUTKD2kJNRdCk5hVbJomOvKkmgYbo=
github.com/ajstarks/svgo v0.0.0-20180226025133-644b8db467af/go.mod h1:K08gAheRH3/J6wwsYMMT4xOr94bZjxIelGM0+d/wbFw=
github.com/apache/arrow/go/arrow v0.0.0-20201229220542-30ce2eb5d4dc h1:zvQ6w7KwtQWgMQiewOF9tFtundRMVZFSAksNV6ogzuY=
github.com/apache/arrow/go/arrow v0.0.0-20201229220542-30ce2eb5d4dc/go.mod h1:c9sxoIT3YgLxH4UhLOCKaBlEojuMhVYpk4Ntv3opUTQ=
github.com/bytedance/sonic v1.5.0/go.mod h1:ED5hyg4y6t3/9Ku1R6dU/4KyJ48DZ4jPhfY1O2AihPM=
github.com/bytedance/sonic v1.9.1 h1:6iJ6NqdoxCDr6mbY8h18oSO+cShGSMRGCEo7F2h0x8s=
github.com/bytedance/sonic v1.9.1/go.mod h1:i736AoUSYt75HyZLoJW9ERYxcy6eaN6h4BZXU064P/U=
github.com/antihax/optional v1.0.0/go.mod h1:uupD/76wgC+ih3iEmQUL+0Ugr19nfwCT1kdvxnR2qWY=
github.com/apache/arrow/go/arrow v0.0.0-20211112161151-bc219186db40 h1:q4dksr6ICHXqG5hm0ZW5IHyeEJXoIJSOZeBLmWPNeIQ=
github.com/apache/arrow/go/arrow v0.0.0-20211112161151-bc219186db40/go.mod h1:Q7yQnSMnLvcXlZ8RV+jwz/6y1rQTqbX6C82SndT52Zs=
github.com/arbovm/levenshtein v0.0.0-20160628152529-48b4e1c0c4d0 h1:jfIu9sQUG6Ig+0+Ap1h4unLjW6YQJpKZVmUzxsD4E/Q=
github.com/arbovm/levenshtein v0.0.0-20160628152529-48b4e1c0c4d0/go.mod h1:t2tdKJDJF9BV14lnkjHmOQgcvEKgtqs5a1N3LNdJhGE=
github.com/boombuler/barcode v1.0.0/go.mod h1:paBWMcWSl3LHKBqUq+rly7CNSldXjb2rDl3JlRe0mD8=
github.com/bytedance/sonic v1.11.6 h1:oUp34TzMlL+OY1OUWxHqsdkgC/Zfc85zGqw9siXjrc0=
github.com/bytedance/sonic v1.11.6/go.mod h1:LysEHSvpvDySVdC2f87zGWf6CIKJcAvqab1ZaiQtds4=
github.com/bytedance/sonic/loader v0.1.1 h1:c+e5Pt1k/cy5wMveRDyk2X4B9hF4g7an8N3zCYjJFNM=
github.com/bytedance/sonic/loader v0.1.1/go.mod h1:ncP89zfokxS5LZrJxl5z0UJcsk4M4yY2JpfqGeCtNLU=
github.com/census-instrumentation/opencensus-proto v0.2.1/go.mod h1:f6KPmirojxKA12rnyqOA5BBL4O983OfeGPqjHWSTneU=
github.com/chenzhuoyu/base64x v0.0.0-20211019084208-fb5309c8db06/go.mod h1:DH46F32mSOjUmXrMHnKwZdA8wcEefY7UVqBKYGjpdQY=
github.com/chenzhuoyu/base64x v0.0.0-20221115062448-fe3a3abad311 h1:qSGYFH7+jGhDF8vLC+iwCD4WpbV1EBDSzWkJODFLams=
github.com/chenzhuoyu/base64x v0.0.0-20221115062448-fe3a3abad311/go.mod h1:b583jCggY9gE99b6G5LEC39OIiVsWj+R97kbl5odCEk=
github.com/chewxy/hm v1.0.0 h1:zy/TSv3LV2nD3dwUEQL2VhXeoXbb9QkpmdRAVUFiA6k=
github.com/chewxy/hm v1.0.0/go.mod h1:qg9YI4q6Fkj/whwHR1D+bOGeF7SniIP40VweVepLjg0=
github.com/chewxy/math32 v1.0.0/go.mod h1:Miac6hA1ohdDUTagnvJy/q+aNnEk16qWUdb8ZVhvCN0=
github.com/chewxy/math32 v1.0.8 h1:fU5E4Ec4Z+5RtRAi3TovSxUjQPkgRh+HbP7tKB2OFbM=
github.com/chewxy/math32 v1.0.8/go.mod h1:dOB2rcuFrCn6UHrze36WSLVPKtzPMRAQvBvUwkSsLqs=
github.com/chewxy/math32 v1.10.1 h1:LFpeY0SLJXeaiej/eIp2L40VYfscTvKh/FSEZ68uMkU=
github.com/chewxy/math32 v1.10.1/go.mod h1:dOB2rcuFrCn6UHrze36WSLVPKtzPMRAQvBvUwkSsLqs=
github.com/client9/misspell v0.3.4/go.mod h1:qj6jICC3Q7zFZvVWo7KLAzC3yx5G7kyvSDkc90ppPyw=
github.com/cloudwego/base64x v0.1.4 h1:jwCgWpFanWmN8xoIUHa2rtzmkd5J2plF/dnLS6Xd/0Y=
github.com/cloudwego/base64x v0.1.4/go.mod h1:0zlkT4Wn5C6NdauXdJRhSKRlJvmclQ1hhJgA0rcu/8w=
github.com/cloudwego/iasm v0.2.0 h1:1KNIy1I1H9hNNFEEH3DVnI4UujN+1zjpuk6gwHLTssg=
github.com/cloudwego/iasm v0.2.0/go.mod h1:8rXZaNYT2n95jn+zTI1sDr+IgcD2GVs0nlbbQPiEFhY=
github.com/cncf/udpa/go v0.0.0-20191209042840-269d4d468f6f/go.mod h1:M8M6+tZqaGXZJjfX53e64911xZQV5JYwmTeXPW+k8Sc=
github.com/cncf/udpa/go v0.0.0-20201120205902-5459f2c99403/go.mod h1:WmhPx2Nbnhtbo57+VJT5O0JRkEi1Wbu0z5j0R8u5Hbk=
github.com/cncf/xds/go v0.0.0-20210312221358-fbca930ec8ed/go.mod h1:eXthEFrGJvWHgFFCl3hGmgk+/aYT6PnTQLykKQRLhEs=
github.com/containerd/console v1.0.3 h1:lIr7SlA5PxZyMV30bDW0MGbiOPXwc63yRuCP0ARubLw=
github.com/containerd/console v1.0.3/go.mod h1:7LqA/THxQ86k76b8c/EMSiaJ3h1eZkMkXar0TQ1gf3U=
github.com/cpuguy83/go-md2man/v2 v2.0.2/go.mod h1:tgQtvFlXSQOSOSIRvRPT7W67SCa46tRHOmNcaadrF8o=
@@ -26,35 +40,42 @@ github.com/d4l3k/go-bfloat16 v0.0.0-20211005043715-690c3bdd05f1/go.mod h1:uw2gLc
github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c=
github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
github.com/dgryski/trifles v0.0.0-20200323201526-dd97f9abfb48 h1:fRzb/w+pyskVMQ+UbP35JkH8yB7MYb4q/qhBarqZE6g=
github.com/dgryski/trifles v0.0.0-20200323201526-dd97f9abfb48/go.mod h1:if7Fbed8SFyPtHLHbg49SI7NAdJiC5WIA09pe59rfAA=
github.com/emirpasic/gods v1.18.1 h1:FXtiHYKDGKCW2KzwZKx0iC0PQmdlorYgdFG9jPXJ1Bc=
github.com/emirpasic/gods v1.18.1/go.mod h1:8tpGGwCnJ5H4r6BWwaV6OrWmMoPhUl5jm/FMNAnJvWQ=
github.com/envoyproxy/go-control-plane v0.9.0/go.mod h1:YTl/9mNaCwkRvm6d1a2C3ymFceY/DCBVvsKhRF0iEA4=
github.com/envoyproxy/go-control-plane v0.9.1-0.20191026205805-5f8ba28d4473/go.mod h1:YTl/9mNaCwkRvm6d1a2C3ymFceY/DCBVvsKhRF0iEA4=
github.com/envoyproxy/go-control-plane v0.9.4/go.mod h1:6rpuAdCZL397s3pYoYcLgu1mIlRU8Am5FuJP05cCM98=
github.com/envoyproxy/go-control-plane v0.9.9-0.20201210154907-fd9021fe5dad/go.mod h1:cXg6YxExXjJnVBQHBLXeUAgxn2UodCpnH306RInaBQk=
github.com/envoyproxy/go-control-plane v0.9.9-0.20210217033140-668b12f5399d/go.mod h1:cXg6YxExXjJnVBQHBLXeUAgxn2UodCpnH306RInaBQk=
github.com/envoyproxy/go-control-plane v0.9.9-0.20210512163311-63b5d3c536b0/go.mod h1:hliV/p42l8fGbc6Y9bQ70uLwIvmJyVE5k4iMKlh8wCQ=
github.com/envoyproxy/protoc-gen-validate v0.1.0/go.mod h1:iSmxcyjqTsJpI2R4NaDN7+kN2VEUnK/pcBlmesArF7c=
github.com/fogleman/gg v1.2.1-0.20190220221249-0403632d5b90/go.mod h1:R/bRT+9gY/C5z7JzPU0zXsXHKM4/ayA+zqcVNZzPa1k=
github.com/gabriel-vasile/mimetype v1.4.2 h1:w5qFW6JKBz9Y393Y4q372O9A7cUSequkh1Q7OhCmWKU=
github.com/gabriel-vasile/mimetype v1.4.2/go.mod h1:zApsH/mKG4w07erKIaJPFiX0Tsq9BFQgN3qGY5GnNgA=
github.com/gin-contrib/cors v1.4.0 h1:oJ6gwtUl3lqV0WEIwM/LxPF1QZ5qe2lGWdY2+bz7y0g=
github.com/gin-contrib/cors v1.4.0/go.mod h1:bs9pNM0x/UsmHPBWT2xZz9ROh8xYjYkiURUfmBoMlcs=
github.com/fogleman/gg v1.3.0/go.mod h1:R/bRT+9gY/C5z7JzPU0zXsXHKM4/ayA+zqcVNZzPa1k=
github.com/gabriel-vasile/mimetype v1.4.3 h1:in2uUcidCuFcDKtdcBxlR0rJ1+fsokWf+uqxgUFjbI0=
github.com/gabriel-vasile/mimetype v1.4.3/go.mod h1:d8uq/6HKRL6CGdk+aubisF/M5GcPfT7nKyLpA0lbSSk=
github.com/ghodss/yaml v1.0.0/go.mod h1:4dBDuWmgqj2HViK6kFavaiC9ZROes6MMH2rRYeMEF04=
github.com/gin-contrib/cors v1.7.2 h1:oLDHxdg8W/XDoN/8zamqk/Drgt4oVZDvaV0YmvVICQw=
github.com/gin-contrib/cors v1.7.2/go.mod h1:SUJVARKgQ40dmrzgXEVxj2m7Ig1v1qIboQkPDTQ9t2E=
github.com/gin-contrib/sse v0.1.0 h1:Y/yl/+YNO8GZSjAhjMsSuLt29uWRFHdHYUb5lYOV9qE=
github.com/gin-contrib/sse v0.1.0/go.mod h1:RHrZQHXnP2xjPF+u1gW/2HnVO7nvIa9PG3Gm+fLHvGI=
github.com/gin-gonic/gin v1.8.1/go.mod h1:ji8BvRH1azfM+SYow9zQ6SZMvR8qOMZHmsCuWR9tTTk=
github.com/gin-gonic/gin v1.9.1 h1:4idEAncQnU5cB7BeOkPtxjfCSye0AAm1R0RVIqJ+Jmg=
github.com/gin-gonic/gin v1.9.1/go.mod h1:hPrL7YrpYKXt5YId3A/Tnip5kqbEAP+KLuI3SUcPTeU=
github.com/go-playground/assert/v2 v2.0.1/go.mod h1:VDjEfimB/XKnb+ZQfWdccd7VUvScMdVu0Titje2rxJ4=
github.com/gin-gonic/gin v1.10.0 h1:nTuyha1TYqgedzytsKYqna+DfLos46nTv2ygFy86HFU=
github.com/gin-gonic/gin v1.10.0/go.mod h1:4PMNQiOhvDRa013RKVbsiNwoyezlm2rm0uX/T7kzp5Y=
github.com/go-fonts/dejavu v0.1.0/go.mod h1:4Wt4I4OU2Nq9asgDCteaAaWZOV24E+0/Pwo0gppep4g=
github.com/go-fonts/latin-modern v0.2.0/go.mod h1:rQVLdDMK+mK1xscDwsqM5J8U2jrRa3T0ecnM9pNujks=
github.com/go-fonts/liberation v0.1.1/go.mod h1:K6qoJYypsmfVjWg8KOVDQhLc8UDgIK2HYqyqAO9z7GY=
github.com/go-fonts/stix v0.1.0/go.mod h1:w/c1f0ldAUlJmLBvlbkvVXLAD+tAMqobIIQpmnUIzUY=
github.com/go-gl/glfw v0.0.0-20190409004039-e6da0acd62b1/go.mod h1:vR7hzQXu2zJy9AVAgeJqvqgH9Q5CA+iKCZ2gyEVpxRU=
github.com/go-latex/latex v0.0.0-20210118124228-b3d85cf34e07/go.mod h1:CO1AlKB2CSIqUrmQPqA0gdRIlnLEY0gK5JGjh37zN5U=
github.com/go-playground/assert/v2 v2.2.0 h1:JvknZsQTYeFEAhQwI4qEt9cyV5ONwRHC+lYKSsYSR8s=
github.com/go-playground/assert/v2 v2.2.0/go.mod h1:VDjEfimB/XKnb+ZQfWdccd7VUvScMdVu0Titje2rxJ4=
github.com/go-playground/locales v0.14.0/go.mod h1:sawfccIbzZTqEDETgFXqTho0QybSa7l++s0DH+LDiLs=
github.com/go-playground/locales v0.14.1 h1:EWaQ/wswjilfKLTECiXz7Rh+3BjFhfDFKv/oXslEjJA=
github.com/go-playground/locales v0.14.1/go.mod h1:hxrqLVvrK65+Rwrd5Fc6F2O76J/NuW9t0sjnWqG1slY=
github.com/go-playground/universal-translator v0.18.0/go.mod h1:UvRDBj+xPUEGrFYl+lu/H90nyDXpg0fqeB/AQUGNTVA=
github.com/go-playground/universal-translator v0.18.1 h1:Bcnm0ZwsGyWbCzImXv+pAJnYK9S473LQFuzCbDbfSFY=
github.com/go-playground/universal-translator v0.18.1/go.mod h1:xekY+UJKNuX9WP91TpwSH2VMlDf28Uj24BCp08ZFTUY=
github.com/go-playground/validator/v10 v10.10.0/go.mod h1:74x4gJWsvQexRdW8Pn3dXSGrTK4nAUsbPlLADvpJkos=
github.com/go-playground/validator/v10 v10.14.0 h1:vgvQWe3XCz3gIeFDm/HnTIbj6UGmg/+t63MyGU2n5js=
github.com/go-playground/validator/v10 v10.14.0/go.mod h1:9iXMNT7sEkjXb0I+enO7QXmzG6QCsPWY4zveKFVRSyU=
github.com/goccy/go-json v0.9.7/go.mod h1:6MelG93GURQebXPDq3khkgXZkazVtN9CRI+MGFi0w8I=
github.com/go-playground/validator/v10 v10.20.0 h1:K9ISHbSaI0lyB2eWMPJo+kOS/FBExVwjEviJTixqxL8=
github.com/go-playground/validator/v10 v10.20.0/go.mod h1:dbuPbCMFw/DrkbEynArYaCwl3amGuJotoKCe95atGMM=
github.com/goccy/go-json v0.10.2 h1:CrxCmQqYDkv1z7lO7Wbh2HN93uovUHgrECaO5ZrCXAU=
github.com/goccy/go-json v0.10.2/go.mod h1:6MelG93GURQebXPDq3khkgXZkazVtN9CRI+MGFi0w8I=
github.com/gogo/protobuf v1.3.2 h1:Ov1cvc58UF3b5XjBnZv7+opcTcQFZebYjWzi34vdm4Q=
@@ -72,51 +93,54 @@ github.com/golang/protobuf v1.4.0-rc.4.0.20200313231945-b860323f09d0/go.mod h1:W
github.com/golang/protobuf v1.4.0/go.mod h1:jodUvKwWbYaEsadDk5Fwe5c77LiNKVO9IDvqG2KuDX0=
github.com/golang/protobuf v1.4.1/go.mod h1:U8fpvMrcmy5pZrNK1lt4xCsGvpyWQ/VVv6QDs8UjoX8=
github.com/golang/protobuf v1.4.2/go.mod h1:oDoupMAO8OvCJWAcko0GGGIgR6R6ocIYbsSw735rRwI=
github.com/golang/protobuf v1.5.0 h1:LUVKkCeviFUMKqHa4tXIIij/lbhnMbP7Fn5wKdKkRh4=
github.com/golang/protobuf v1.4.3/go.mod h1:oDoupMAO8OvCJWAcko0GGGIgR6R6ocIYbsSw735rRwI=
github.com/golang/protobuf v1.5.0/go.mod h1:FsONVRAS9T7sI+LIUmWTfcYkHO4aIWwzhcaSAoJOfIk=
github.com/google/flatbuffers v1.11.0/go.mod h1:1AeVuKshWv4vARoZatz6mlQ0JxURH0Kv5+zNeJKJCa8=
github.com/google/flatbuffers v1.12.0 h1:/PtAHvnBY4Kqnx/xCQ3OIV9uYcSFGScBsWI3Oogeh6w=
github.com/google/flatbuffers v1.12.0/go.mod h1:1AeVuKshWv4vARoZatz6mlQ0JxURH0Kv5+zNeJKJCa8=
github.com/golang/protobuf v1.5.2/go.mod h1:XVQd3VNwM+JqD3oG2Ue2ip4fOMUkwXdXDdiuN0vRsmY=
github.com/golang/protobuf v1.5.4 h1:i7eJL8qZTpSEXOPTxNKhASYpMn+8e5Q6AdndVa1dWek=
github.com/golang/protobuf v1.5.4/go.mod h1:lnTiLA8Wa4RWRcIUkrtSVa5nRhsEGBg48fD6rSs7xps=
github.com/golang/snappy v0.0.3 h1:fHPg5GQYlCeLIPB9BZqMVR5nR9A+IM5zcgeTdjMYmLA=
github.com/golang/snappy v0.0.3/go.mod h1:/XxbfmMg8lxefKM7IXC3fBNl/7bRcc72aCRzEWrmP2Q=
github.com/google/flatbuffers v2.0.0+incompatible/go.mod h1:1AeVuKshWv4vARoZatz6mlQ0JxURH0Kv5+zNeJKJCa8=
github.com/google/flatbuffers v24.3.25+incompatible h1:CX395cjN9Kke9mmalRoL3d81AtFUxJM+yDthflgJGkI=
github.com/google/flatbuffers v24.3.25+incompatible/go.mod h1:1AeVuKshWv4vARoZatz6mlQ0JxURH0Kv5+zNeJKJCa8=
github.com/google/go-cmp v0.2.0/go.mod h1:oXzfMopK8JAjlY9xF4vHSVASa0yLyX7SntLO5aqRK0M=
github.com/google/go-cmp v0.3.0/go.mod h1:8QqcDgzrUqlUb/G2PQTWiueGozuR1884gddMywk6iLU=
github.com/google/go-cmp v0.3.1/go.mod h1:8QqcDgzrUqlUb/G2PQTWiueGozuR1884gddMywk6iLU=
github.com/google/go-cmp v0.4.0/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE=
github.com/google/go-cmp v0.5.0/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE=
github.com/google/go-cmp v0.5.5/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE=
github.com/google/go-cmp v0.5.9 h1:O2Tfq5qg4qc4AmwVlvv0oLiVAGB7enBSJ2x2DqQFi38=
github.com/google/go-cmp v0.5.9/go.mod h1:17dUlkBOakJ0+DkrSSNjCkIjxS6bF9zb3elmeNGIjoY=
github.com/google/go-cmp v0.5.6/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE=
github.com/google/go-cmp v0.6.0 h1:ofyhxvXcZhMsU5ulbFiLKl/XBFqE1GSq7atu8tAmTRI=
github.com/google/go-cmp v0.6.0/go.mod h1:17dUlkBOakJ0+DkrSSNjCkIjxS6bF9zb3elmeNGIjoY=
github.com/google/gofuzz v1.0.0/go.mod h1:dBl0BpW6vV/+mYPU4Po3pmUjxk6FQPldtuIdl/M65Eg=
github.com/google/uuid v1.0.0 h1:b4Gk+7WdP/d3HZH8EJsZpvV7EtDOgaZLtnaNGIu1adA=
github.com/google/uuid v1.0.0/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo=
github.com/google/uuid v1.1.2 h1:EVhdT+1Kseyi1/pUmXKaFxYsDNy9RQYkMWRH68J/W7Y=
github.com/google/uuid v1.1.2/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo=
github.com/grpc-ecosystem/grpc-gateway v1.16.0/go.mod h1:BDjrQk3hbvj6Nolgz8mAMFbcEtjT1g+wF4CSlocrBnw=
github.com/inconshreveable/mousetrap v1.1.0 h1:wN+x4NVGpMsO7ErUn/mUI3vEoE6Jt13X2s0bqwp9tc8=
github.com/inconshreveable/mousetrap v1.1.0/go.mod h1:vpF70FUmC8bwa3OWnCshd2FqLfsEA9PFc4w1p2J65bw=
github.com/json-iterator/go v1.1.12 h1:PV8peI4a0ysnczrg+LtxykD8LfKY9ML6u2jnxaEnrnM=
github.com/json-iterator/go v1.1.12/go.mod h1:e30LSqwooZae/UwlEbR2852Gd8hjQvJoHmT4TnhNGBo=
github.com/jung-kurt/gofpdf v1.0.0/go.mod h1:7Id9E/uU8ce6rXgefFLlgrJj/GYY22cpxn+r32jIOes=
github.com/jung-kurt/gofpdf v1.0.3-0.20190309125859-24315acbbda5/go.mod h1:7Id9E/uU8ce6rXgefFLlgrJj/GYY22cpxn+r32jIOes=
github.com/kisielk/errcheck v1.5.0/go.mod h1:pFxgyoBC7bSaBwPgfKdkLd5X25qrDl4LWUI2bnpBCr8=
github.com/kisielk/gotool v1.0.0/go.mod h1:XhKaO+MFFWcvkIS/tQcRk01m1F5IRFswLeQ+oQHNcck=
github.com/klauspost/compress v1.13.1 h1:wXr2uRxZTJXHLly6qhJabee5JqIhTRoLBhDOA74hDEQ=
github.com/klauspost/compress v1.13.1/go.mod h1:8dP1Hq4DHOhN9w426knH3Rhby4rFm6D8eO+e+Dq5Gzg=
github.com/klauspost/cpuid/v2 v2.0.9/go.mod h1:FInQzS24/EEf25PyTYn52gqo7WaD8xa0213Md/qVLRg=
github.com/klauspost/cpuid/v2 v2.2.4 h1:acbojRNwl3o09bUq+yDCtZFc1aiwaAAxtcn8YkZXnvk=
github.com/klauspost/cpuid/v2 v2.2.4/go.mod h1:RVVoqg1df56z8g3pUjL/3lE5UfnlrJX8tyFgg4nqhuY=
github.com/kr/pretty v0.1.0/go.mod h1:dAy3ld7l9f0ibDNOQOHHMYYIIbhfbHSm3C4ZsoJORNo=
github.com/kr/pretty v0.2.1/go.mod h1:ipq/a2n7PKx3OHsz4KJII5eveXtPO4qwEXGdVfWzfnI=
github.com/klauspost/cpuid/v2 v2.2.7 h1:ZWSB3igEs+d0qvnxR/ZBzXVmxkgt8DdzP6m9pfuVLDM=
github.com/klauspost/cpuid/v2 v2.2.7/go.mod h1:Lcz8mBdAVJIBVzewtcLocK12l3Y+JytZYpaMropDUws=
github.com/knz/go-libedit v1.10.1/go.mod h1:MZTVkCWyz0oBc7JOWP3wNAzd002ZbM/5hgShxwh4x8M=
github.com/kr/pretty v0.3.0 h1:WgNl7dwNpEZ6jJ9k1snq4pZsg7DOEN8hP9Xw0Tsjwk0=
github.com/kr/pretty v0.3.0/go.mod h1:640gp4NfQd8pI5XOwp5fnNeVWj67G7CFk/SaSQn7NBk=
github.com/kr/pty v1.1.1/go.mod h1:pFQYn66WHrOpPYNljwOMqo10TkYh1fy3cYio2l3bCsQ=
github.com/kr/text v0.1.0/go.mod h1:4Jbv+DJW3UT/LiOwJeYQe1efqtUx/iVham/4vfdArNI=
github.com/kr/text v0.2.0 h1:5Nx0Ya0ZqY2ygV366QzturHI13Jq95ApcVaJBhpS+AY=
github.com/kr/text v0.2.0/go.mod h1:eLer722TekiGuMkidMxC/pM04lWEeraHUUmBw8l2grE=
github.com/leodido/go-urn v1.2.1/go.mod h1:zt4jvISO2HfUBqxjfIshjdMTYS56ZS/qv49ictyFfxY=
github.com/leodido/go-urn v1.2.4 h1:XlAE/cm/ms7TE/VMVoduSpNBoyc2dOxHs5MZSwAN63Q=
github.com/leodido/go-urn v1.2.4/go.mod h1:7ZrI8mTSeBSHl/UaRyKQW1qZeMgak41ANeCNaVckg+4=
github.com/mattn/go-isatty v0.0.14/go.mod h1:7GGIvUiUoEMVVmxf/4nioHXj79iQHKdU27kJ6hsGG94=
github.com/mattn/go-isatty v0.0.19 h1:JITubQf0MOLdlGRuRq+jtsDlekdYPia9ZFsB8h/APPA=
github.com/mattn/go-isatty v0.0.19/go.mod h1:W+V8PltTTMOvKvAeJH7IuucS94S2C6jfK/D7dTCTo3Y=
github.com/leodido/go-urn v1.4.0 h1:WT9HwE9SGECu3lg4d/dIA+jxlljEa1/ffXKmRjqdmIQ=
github.com/leodido/go-urn v1.4.0/go.mod h1:bvxc+MVxLKB4z00jd1z+Dvzr47oO32F/QSNjSBOlFxI=
github.com/mattn/go-isatty v0.0.20 h1:xfD0iDuEKnDkl03q4limB+vH+GxLEtL/jb4xVJSWWEY=
github.com/mattn/go-isatty v0.0.20/go.mod h1:W+V8PltTTMOvKvAeJH7IuucS94S2C6jfK/D7dTCTo3Y=
github.com/mattn/go-runewidth v0.0.9/go.mod h1:H031xJmbD/WCDINGzjvQ9THkh0rPKHF+m2gUSrubnMI=
github.com/mattn/go-runewidth v0.0.14 h1:+xnbZSEeDbOIg5/mE6JF0w6n9duR1l3/WmbinWVwUuU=
github.com/mattn/go-runewidth v0.0.14/go.mod h1:Jdepj2loyihRzMpdS35Xk/zdY8IAYHsh153qUoGf23w=
github.com/mitchellh/mapstructure v1.5.0 h1:jeMsZIYE/09sWLaz43PL7Gy6RuMjD2eJVyuac5Z2hdY=
github.com/mitchellh/mapstructure v1.5.0/go.mod h1:bFUtVrKA4DC2yAKiSyO/QUcy7e+RRV2QTWOzhPopBRo=
github.com/modern-go/concurrent v0.0.0-20180228061459-e0a39a4cb421/go.mod h1:6dJC0mAP4ikYIbvyc7fijjWJddQyLn8Ig3JB5CqoB9Q=
github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd h1:TRLaZ9cD/w8PVh93nsPXa1VrQ6jlwL5oN8l14QlcNfg=
github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd/go.mod h1:6dJC0mAP4ikYIbvyc7fijjWJddQyLn8Ig3JB5CqoB9Q=
@@ -126,12 +150,15 @@ github.com/nlpodyssey/gopickle v0.3.0 h1:BLUE5gxFLyyNOPzlXxt6GoHEMMxD0qhsE4p0CIQ
github.com/nlpodyssey/gopickle v0.3.0/go.mod h1:f070HJ/yR+eLi5WmM1OXJEGaTpuJEUiib19olXgYha0=
github.com/olekukonko/tablewriter v0.0.5 h1:P2Ga83D34wi1o9J6Wh1mRuqd4mF/x/lgBS7N7AbDhec=
github.com/olekukonko/tablewriter v0.0.5/go.mod h1:hPp6KlRPjbx+hW8ykQs1w3UBbZlj6HuIJcUGPhkA7kY=
github.com/pdevine/tensor v0.0.0-20240228013915-64ccaa8d9ca9 h1:DV4iXjNn6fGeDl1AkZ1I0QB/0DBjrc7kPpxHrmuDzW4=
github.com/pdevine/tensor v0.0.0-20240228013915-64ccaa8d9ca9/go.mod h1:nR7l3gM6ubiOm+mCkmmUyIBUcBAyiUmW6dQrDZhugFE=
github.com/pelletier/go-toml/v2 v2.0.1/go.mod h1:r9LEWfGN8R5k0VXJ+0BkIe7MYkRdwZOjgMj2KwnJFUo=
github.com/pelletier/go-toml/v2 v2.0.8 h1:0ctb6s9mE31h0/lhu+J6OPmVeDxJn+kYnJc2jZR9tGQ=
github.com/pelletier/go-toml/v2 v2.0.8/go.mod h1:vuYfssBdrU2XDZ9bYydBu6t+6a6PYNcZljzZR9VXg+4=
github.com/pkg/diff v0.0.0-20210226163009-20ebb0f2a09e/go.mod h1:pJLUxLENpZxwdsKMEsNbx1VGcRFpLqf3715MtcvvzbA=
github.com/pdevine/tensor v0.0.0-20240510204454-f88f4562727c h1:GwiUUjKefgvSNmv3NCvI/BL0kDebW6Xa+kcdpdc1mTY=
github.com/pdevine/tensor v0.0.0-20240510204454-f88f4562727c/go.mod h1:PSojXDXF7TbgQiD6kkd98IHOS0QqTyUEaWRiS8+BLu8=
github.com/pelletier/go-toml/v2 v2.2.2 h1:aYUidT7k73Pcl9nb2gScu7NSrKCSHIDE89b3+6Wq+LM=
github.com/pelletier/go-toml/v2 v2.2.2/go.mod h1:1t835xjRzz80PqgE6HHgN2JOsmgYu/h4qDAS4n929Rs=
github.com/phpdave11/gofpdf v1.4.2/go.mod h1:zpO6xFn9yxo3YLyMvW8HcKWVdbNqgIfOOp2dXMnm1mY=
github.com/phpdave11/gofpdi v1.0.12/go.mod h1:vBmVV0Do6hSBHC8uKUQ71JGW+ZGQq74llk/7bXwjDoI=
github.com/pierrec/lz4/v4 v4.1.8 h1:ieHkV+i2BRzngO4Wd/3HGowuZStgq6QkPsD1eolNAO4=
github.com/pierrec/lz4/v4 v4.1.8/go.mod h1:gZWDp/Ze/IJXGXf23ltt2EXimqmTUXEy0GFuRQyBid4=
github.com/pkg/errors v0.8.1/go.mod h1:bwawxfHBFNV+L2hUp1rHADufV3IMtnDRdf1r5NINEl0=
github.com/pkg/errors v0.9.1 h1:FEBLx1zS214owpjy7qsBeixbURkuhQAwrK5UwLGTwt4=
github.com/pkg/errors v0.9.1/go.mod h1:bwawxfHBFNV+L2hUp1rHADufV3IMtnDRdf1r5NINEl0=
github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM=
@@ -139,10 +166,11 @@ github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZN
github.com/prometheus/client_model v0.0.0-20190812154241-14fe0d1b01d4/go.mod h1:xMI15A0UPsDsEKsMN9yxemIoYk6Tm2C1GtYGdfGttqA=
github.com/rivo/uniseg v0.2.0 h1:S1pD9weZBuJdFmowNwbpi7BJ8TNftyUImj/0WQi72jY=
github.com/rivo/uniseg v0.2.0/go.mod h1:J6wj4VEh+S6ZtnVlnTBMWIodfgj8LQOQFoIToxlJtxc=
github.com/rogpeppe/go-internal v1.6.1/go.mod h1:xXDCJY+GAPziupqXw64V24skbSoqbTEfhy4qGm1nDQc=
github.com/rogpeppe/fastuuid v1.2.0/go.mod h1:jVj6XXZzXRy/MSR5jhDC/2q6DgLz+nrA6LYCDYWNEvQ=
github.com/rogpeppe/go-internal v1.8.0 h1:FCbCCtXNOY3UtUuHUYaghJg4y7Fd14rXifAYUAtL9R8=
github.com/rogpeppe/go-internal v1.8.0/go.mod h1:WmiCO8CzOY8rg0OYDC4/i/2WRWAB6poM+XZ2dLUbcbE=
github.com/russross/blackfriday/v2 v2.1.0/go.mod h1:+Rmxgy9KzJVeS9/2gXHxylqXiyQDYRxCVz55jmeOWTM=
github.com/ruudk/golang-pdf417 v0.0.0-20181029194003-1af4ab5afa58/go.mod h1:6lfFZQK844Gfx8o5WFuvpxWRwnSoipWe/p622j1v06w=
github.com/spf13/cobra v1.7.0 h1:hyqWnYt1ZQShIddO5kBpj3vu05/++x6tJ6dg8EC572I=
github.com/spf13/cobra v1.7.0/go.mod h1:uLxZILRyS/50WlhOIKD7W6V5bgeIt+4sICxh6uRMrb0=
github.com/spf13/pflag v1.0.5 h1:iy+VFUOCP1a+8yFto/drg2CJ5u0yRoB7fZw3DKv/JXA=
@@ -150,96 +178,119 @@ github.com/spf13/pflag v1.0.5/go.mod h1:McXfInJRrz4CZXVZOBLb0bTZqETkiAhM9Iw0y3An
github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME=
github.com/stretchr/objx v0.4.0/go.mod h1:YvHI0jy2hoMjB+UWwv71VJQ9isScKT/TqJzVSSt89Yw=
github.com/stretchr/objx v0.5.0/go.mod h1:Yh+to48EsGEfYuaHDzXPcE3xhTkx73EhmCGUpEOglKo=
github.com/stretchr/objx v0.5.2/go.mod h1:FRsXN1f5AsAjCGJKqEizvkpNtU+EGNCLh3NxZ/8L+MA=
github.com/stretchr/testify v1.1.4/go.mod h1:a8OnRcib4nhh0OaRAV+Yts87kKdq0PP7pXfy6kDkUVs=
github.com/stretchr/testify v1.2.0/go.mod h1:a8OnRcib4nhh0OaRAV+Yts87kKdq0PP7pXfy6kDkUVs=
github.com/stretchr/testify v1.2.2/go.mod h1:a8OnRcib4nhh0OaRAV+Yts87kKdq0PP7pXfy6kDkUVs=
github.com/stretchr/testify v1.3.0/go.mod h1:M5WIy9Dh21IEIfnGCwXGc5bZfKNJtfHm1UVUgZn+9EI=
github.com/stretchr/testify v1.6.1/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg=
github.com/stretchr/testify v1.5.1/go.mod h1:5W2xD1RspED5o8YsWQXVCued0rvSQ+mT+I5cxcmMvtA=
github.com/stretchr/testify v1.7.0/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg=
github.com/stretchr/testify v1.7.1/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg=
github.com/stretchr/testify v1.8.0/go.mod h1:yNjHg4UonilssWZ8iaSj1OCr/vHnekPRkoO+kdMU+MU=
github.com/stretchr/testify v1.8.1/go.mod h1:w2LPCIKwWwSfY2zedu0+kehJoqGctiVI29o6fzry7u4=
github.com/stretchr/testify v1.8.2/go.mod h1:w2LPCIKwWwSfY2zedu0+kehJoqGctiVI29o6fzry7u4=
github.com/stretchr/testify v1.8.3/go.mod h1:sz/lmYIOXD/1dqDmKjjqLyZ2RngseejIcXlSw2iwfAo=
github.com/stretchr/testify v1.8.4 h1:CcVxjf3Q8PM0mHUKJCdn+eZZtm5yQwehR5yeSVQQcUk=
github.com/stretchr/testify v1.8.4/go.mod h1:sz/lmYIOXD/1dqDmKjjqLyZ2RngseejIcXlSw2iwfAo=
github.com/stretchr/testify v1.9.0 h1:HtqpIVDClZ4nwg75+f6Lvsy/wHu+3BoSGCbBAcpTsTg=
github.com/stretchr/testify v1.9.0/go.mod h1:r2ic/lqez/lEtzL7wO/rwa5dbSLXVDPFyf8C91i36aY=
github.com/twitchyliquid64/golang-asm v0.15.1 h1:SU5vSMR7hnwNxj24w34ZyCi/FmDZTkS4MhqMhdFk5YI=
github.com/twitchyliquid64/golang-asm v0.15.1/go.mod h1:a1lVb/DtPvCB8fslRZhAngC2+aY1QWCk3Cedj/Gdt08=
github.com/ugorji/go v1.2.7/go.mod h1:nF9osbDWLy6bDVv/Rtoh6QgnvNDpmCalQV5urGCCS6M=
github.com/ugorji/go/codec v1.2.7/go.mod h1:WGN1fab3R1fzQlVQTkfxVtIBhWDRqOviHU95kRgeqEY=
github.com/ugorji/go/codec v1.2.11 h1:BMaWp1Bb6fHwEtbplGBGJ498wD+LKlNSl25MjdZY4dU=
github.com/ugorji/go/codec v1.2.11/go.mod h1:UNopzCgEMSXjBc6AOMqYvWC1ktqTAfzJZUZgYf6w6lg=
github.com/ugorji/go/codec v1.2.12 h1:9LC83zGrHhuUA9l16C9AHXAqEV/2wBQ4nkvumAE65EE=
github.com/ugorji/go/codec v1.2.12/go.mod h1:UNopzCgEMSXjBc6AOMqYvWC1ktqTAfzJZUZgYf6w6lg=
github.com/x448/float16 v0.8.4 h1:qLwI1I70+NjRFUR3zs1JPUCgaCXSh3SW62uAKT1mSBM=
github.com/x448/float16 v0.8.4/go.mod h1:14CWIYCyZA/cWjXOioeEpHeN/83MdbZDRQHoFcYsOfg=
github.com/xtgo/set v1.0.0 h1:6BCNBRv3ORNDQ7fyoJXRv+tstJz3m1JVFQErfeZz2pY=
github.com/xtgo/set v1.0.0/go.mod h1:d3NHzGzSa0NmB2NhFyECA+QdRp29oEn2xbT+TpeFoM8=
github.com/yuin/goldmark v1.1.27/go.mod h1:3hX8gzYuyVAZsxl0MRgGTJEmQBFcNTphYh9decYSb74=
github.com/yuin/goldmark v1.2.1/go.mod h1:3hX8gzYuyVAZsxl0MRgGTJEmQBFcNTphYh9decYSb74=
github.com/yuin/goldmark v1.3.5/go.mod h1:mwnBkeHKe2W/ZEtQ+71ViKU8L12m81fl3OWwC1Zlc8k=
go.opentelemetry.io/proto/otlp v0.7.0/go.mod h1:PqfVotwruBrMGOCsRd/89rSnXhoiJIqeYNgFYFoEGnI=
go4.org/unsafe/assume-no-moving-gc v0.0.0-20231121144256-b99613f794b6 h1:lGdhQUN/cnWdSH3291CUuxSEqc+AsGTiDxPP3r2J0l4=
go4.org/unsafe/assume-no-moving-gc v0.0.0-20231121144256-b99613f794b6/go.mod h1:FftLjUGFEDu5k8lt0ddY+HcrH/qU/0qk+H8j9/nTl3E=
golang.org/x/arch v0.0.0-20210923205945-b76863e36670/go.mod h1:5om86z9Hs0C8fWVUuoMHwpExlXzs5Tkyp9hOrfG7pp8=
golang.org/x/arch v0.3.0 h1:02VY4/ZcO/gBOH6PUaoiptASxtXU10jazRCP865E97k=
golang.org/x/arch v0.3.0/go.mod h1:5om86z9Hs0C8fWVUuoMHwpExlXzs5Tkyp9hOrfG7pp8=
golang.org/x/arch v0.8.0 h1:3wRIsP3pM4yUptoR96otTUOXI367OS0+c9eeRi9doIc=
golang.org/x/arch v0.8.0/go.mod h1:FEVrYAQjsQXMVJ1nsMoVVXPZg6p2JE2mx8psSWTDQys=
golang.org/x/crypto v0.0.0-20190308221718-c2843e01d9a2/go.mod h1:djNgcEr1/C05ACkg1iLfiJU5Ep61QUkGW8qpdssI0+w=
golang.org/x/crypto v0.0.0-20190510104115-cbcb75029529/go.mod h1:yigFU9vqHzYiE8UmvKecakEJjdnWj3jj499lnFckfCI=
golang.org/x/crypto v0.0.0-20191011191535-87dc89f01550/go.mod h1:yigFU9vqHzYiE8UmvKecakEJjdnWj3jj499lnFckfCI=
golang.org/x/crypto v0.0.0-20200622213623-75b288015ac9/go.mod h1:LzIPMQfyMNhhGPhUkYOs5KpL4U8rLKemX1yGLhDgUto=
golang.org/x/crypto v0.0.0-20210711020723-a769d52b0f97/go.mod h1:GvvjBRRGRdwPK5ydBHafDWAxML/pGHZbMvKqRZ5+Abc=
golang.org/x/crypto v0.14.0 h1:wBqGXzWJW6m1XrIKlAH0Hs1JJ7+9KBwnIO8v66Q9cHc=
golang.org/x/crypto v0.14.0/go.mod h1:MVFd36DqK4CsrnJYDkBA3VC4m2GkXAM0PvzMCn4JQf4=
golang.org/x/crypto v0.23.0 h1:dIJU/v2J8Mdglj/8rJ6UUOM3Zc9zLZxVZwwxMooUSAI=
golang.org/x/crypto v0.23.0/go.mod h1:CKFgDieR+mRhux2Lsu27y0fO304Db0wZe70UKqHu0v8=
golang.org/x/exp v0.0.0-20180321215751-8460e604b9de/go.mod h1:CJ0aWSM057203Lf6IL+f9T1iT9GByDxfZKAQTCR3kQA=
golang.org/x/exp v0.0.0-20180807140117-3d87b88a115f/go.mod h1:CJ0aWSM057203Lf6IL+f9T1iT9GByDxfZKAQTCR3kQA=
golang.org/x/exp v0.0.0-20190121172915-509febef88a4/go.mod h1:CJ0aWSM057203Lf6IL+f9T1iT9GByDxfZKAQTCR3kQA=
golang.org/x/exp v0.0.0-20190125153040-c74c464bbbf2/go.mod h1:CJ0aWSM057203Lf6IL+f9T1iT9GByDxfZKAQTCR3kQA=
golang.org/x/exp v0.0.0-20230817173708-d852ddb80c63 h1:m64FZMko/V45gv0bNmrNYoDEq8U5YUhetc9cBWKS1TQ=
golang.org/x/exp v0.0.0-20230817173708-d852ddb80c63/go.mod h1:0v4NqG35kSWCMzLaMeX+IQrlSnVE/bqGSyC2cz/9Le8=
golang.org/x/exp v0.0.0-20190306152737-a1d7652674e8/go.mod h1:CJ0aWSM057203Lf6IL+f9T1iT9GByDxfZKAQTCR3kQA=
golang.org/x/exp v0.0.0-20191002040644-a1355ae1e2c3/go.mod h1:NOZ3BPKG0ec/BKJQgnvsSFpcKLM5xXVWnvZS97DWHgE=
golang.org/x/exp v0.0.0-20231110203233-9a3e6036ecaa h1:FRnLl4eNAQl8hwxVVC17teOw8kdjVDVAiFMtgUdTSRQ=
golang.org/x/exp v0.0.0-20231110203233-9a3e6036ecaa/go.mod h1:zk2irFbV9DP96SEBUUAy67IdHUaZuSnrz1n472HUCLE=
golang.org/x/image v0.0.0-20180708004352-c73c2afc3b81/go.mod h1:ux5Hcp/YLpHSI86hEcLt0YII63i6oz57MZXIpbrjZUs=
golang.org/x/image v0.0.0-20190227222117-0694c2d4d067/go.mod h1:kZ7UVZpmo3dzQBMxlp+ypCbDeSB+sBbTgSJuh5dn5js=
golang.org/x/image v0.0.0-20190802002840-cff245a6509b/go.mod h1:FeLwcggjj3mMvU+oOTbSwawSJRM1uh48EjtB4UJZlP0=
golang.org/x/image v0.0.0-20190910094157-69e4b8554b2a/go.mod h1:FeLwcggjj3mMvU+oOTbSwawSJRM1uh48EjtB4UJZlP0=
golang.org/x/image v0.0.0-20200119044424-58c23975cae1/go.mod h1:FeLwcggjj3mMvU+oOTbSwawSJRM1uh48EjtB4UJZlP0=
golang.org/x/image v0.0.0-20200430140353-33d19683fad8/go.mod h1:FeLwcggjj3mMvU+oOTbSwawSJRM1uh48EjtB4UJZlP0=
golang.org/x/image v0.0.0-20200618115811-c13761719519/go.mod h1:FeLwcggjj3mMvU+oOTbSwawSJRM1uh48EjtB4UJZlP0=
golang.org/x/image v0.0.0-20201208152932-35266b937fa6/go.mod h1:FeLwcggjj3mMvU+oOTbSwawSJRM1uh48EjtB4UJZlP0=
golang.org/x/image v0.0.0-20210216034530-4410531fe030/go.mod h1:FeLwcggjj3mMvU+oOTbSwawSJRM1uh48EjtB4UJZlP0=
golang.org/x/lint v0.0.0-20181026193005-c67002cb31c3/go.mod h1:UVdnD1Gm6xHRNCYTkRU2/jEulfH38KcIWyp/GAMgvoE=
golang.org/x/lint v0.0.0-20190227174305-5b3e6a55c961/go.mod h1:wehouNa3lNwaWXcvxsM5YxQ5yQlVC4a0KAMCusXpPoU=
golang.org/x/lint v0.0.0-20190313153728-d0100b6bd8b3/go.mod h1:6SW0HCj/g11FgYtHlgUYUwCkIfeOF89ocIRzGO/8vkc=
golang.org/x/lint v0.0.0-20210508222113-6edffad5e616/go.mod h1:3xt1FjdF8hUf6vQPIChWIBhFzV8gjjsPE/fR3IyQdNY=
golang.org/x/mobile v0.0.0-20190719004257-d2bd2a29d028/go.mod h1:E/iHnbuqvinMTCcRqshq8CkpyQDoeVncDDYHnLhea+o=
golang.org/x/mod v0.1.0/go.mod h1:0QHyrYULN0/3qlju5TqG8bIK38QM8yzMo5ekMj3DlcY=
golang.org/x/mod v0.1.1-0.20191105210325-c90efee705ee/go.mod h1:QqPTAvyqsEbceGzBzNggFXnrqF1CaUcvgkdR5Ot7KZg=
golang.org/x/mod v0.2.0/go.mod h1:s0Qsj1ACt9ePp/hMypM3fl4fZqREWJwdYDEqhRiZZUA=
golang.org/x/mod v0.3.0/go.mod h1:s0Qsj1ACt9ePp/hMypM3fl4fZqREWJwdYDEqhRiZZUA=
golang.org/x/mod v0.4.2/go.mod h1:s0Qsj1ACt9ePp/hMypM3fl4fZqREWJwdYDEqhRiZZUA=
golang.org/x/net v0.0.0-20180724234803-3673e40ba225/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4=
golang.org/x/net v0.0.0-20180826012351-8a410e7b638d/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4=
golang.org/x/net v0.0.0-20190108225652-1e06a53dbb7e/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4=
golang.org/x/net v0.0.0-20190213061140-3a22650c66bd/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4=
golang.org/x/net v0.0.0-20190311183353-d8887717615a/go.mod h1:t9HGtf8HONx5eT2rtn7q6eTqICYqUVnKs3thJo3Qplg=
golang.org/x/net v0.0.0-20190404232315-eb5bcb51f2a3/go.mod h1:t9HGtf8HONx5eT2rtn7q6eTqICYqUVnKs3thJo3Qplg=
golang.org/x/net v0.0.0-20190620200207-3b0461eec859/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s=
golang.org/x/net v0.0.0-20200226121028-0de0cce0169b/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s=
golang.org/x/net v0.0.0-20200904194848-62affa334b73/go.mod h1:/O7V0waA8r7cgGh81Ro3o1hOxt32SMVPicZroKQ2sZA=
golang.org/x/net v0.0.0-20200822124328-c89045814202/go.mod h1:/O7V0waA8r7cgGh81Ro3o1hOxt32SMVPicZroKQ2sZA=
golang.org/x/net v0.0.0-20201021035429-f5854403a974/go.mod h1:sp8m0HH+o8qH0wwXwYZr8TS3Oi6o0r6Gce1SSxlDquU=
golang.org/x/net v0.0.0-20210226172049-e18ecbb05110/go.mod h1:m0MpNAwzfU5UDzcl9v0D8zg8gWTRqZa9RBIspLL5mdg=
golang.org/x/net v0.17.0 h1:pVaXccu2ozPjCXewfr1S7xza/zcXTity9cCdXQYSjIM=
golang.org/x/net v0.17.0/go.mod h1:NxSsAGuq816PNPmqtQdLE42eU2Fs7NoRIZrHJAlaCOE=
golang.org/x/net v0.0.0-20210405180319-a5a99cb37ef4/go.mod h1:p54w0d4576C0XHj96bSt6lcn1PtDYWL6XObtHCRCNQM=
golang.org/x/net v0.0.0-20210614182718-04defd469f4e/go.mod h1:9nx3DQGgdP8bBQD5qxJ1jj9UTztislL4KSBs9R2vV5Y=
golang.org/x/net v0.25.0 h1:d/OCCoBEUq33pjydKrGQhw7IlUPI2Oylr+8qLx49kac=
golang.org/x/net v0.25.0/go.mod h1:JkAGAh7GEvH74S6FOH42FLoXpXbE/aqXSrIQjXgsiwM=
golang.org/x/oauth2 v0.0.0-20180821212333-d2e6202438be/go.mod h1:N/0e6XlmueqKjAGxoOufVs8QHGRruUQn6yWY3a++T0U=
golang.org/x/oauth2 v0.0.0-20200107190931-bf48bf16ab8d/go.mod h1:gOpvHmFTYa4IltrdGE7lF6nIHvwfUNPOp7c8zoXwtLw=
golang.org/x/sync v0.0.0-20180314180146-1d60e4601c6f/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
golang.org/x/sync v0.0.0-20181108010431-42b317875d0f/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
golang.org/x/sync v0.0.0-20181221193216-37e7f081c4d4/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
golang.org/x/sync v0.0.0-20190423024810-112230192c58/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
golang.org/x/sync v0.0.0-20190911185100-cd5d95a43a6e/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
golang.org/x/sync v0.0.0-20201020160332-67f06af15bc9/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
golang.org/x/sync v0.0.0-20210220032951-036812b2e83c/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
golang.org/x/sync v0.3.0 h1:ftCYgMx6zT/asHUrPw8BLLscYtGznsLAnjq5RH9P66E=
golang.org/x/sync v0.3.0/go.mod h1:FU7BRWz2tNW+3quACPkgCx/L+uEAv1htQ0V83Z9Rj+Y=
golang.org/x/sys v0.0.0-20180830151530-49385e6e1522/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY=
golang.org/x/sys v0.0.0-20190215142949-d0b11bdaac8a/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY=
golang.org/x/sys v0.0.0-20190312061237-fead79001313/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
golang.org/x/sys v0.0.0-20190412213103-97732733099d/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
golang.org/x/sys v0.0.0-20200323222414-85ca7c5b95cd/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
golang.org/x/sys v0.0.0-20200909081042-eff7692f9009/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
golang.org/x/sys v0.0.0-20200930185726-fdedc70b468f/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
golang.org/x/sys v0.0.0-20201119102817-f84b799fce68/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
golang.org/x/sys v0.0.0-20210124154548-22da62e12c0c/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
golang.org/x/sys v0.0.0-20210615035016-665e8c7367d1/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
golang.org/x/sys v0.0.0-20210304124612-50617c2ba197/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
golang.org/x/sys v0.0.0-20210330210617-4fbd30eecc44/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
golang.org/x/sys v0.0.0-20210423082822-04245dca01da/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
golang.org/x/sys v0.0.0-20210510120138-977fb7262007/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
golang.org/x/sys v0.0.0-20210630005230-0f9fa26af87c/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
golang.org/x/sys v0.0.0-20210806184541-e5e7981a1069/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
golang.org/x/sys v0.0.0-20220704084225-05e143d24a9e/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
golang.org/x/sys v0.5.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
golang.org/x/sys v0.6.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
golang.org/x/sys v0.13.0 h1:Af8nKPmuFypiUBjVoU9V20FiaFXOcuZI21p0ycVYYGE=
golang.org/x/sys v0.13.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
golang.org/x/sys v0.20.0 h1:Od9JTbYCk261bKm4M/mw7AklTlFYIa0bIp9BgSm1S8Y=
golang.org/x/sys v0.20.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA=
golang.org/x/term v0.0.0-20201126162022-7de9c90e9dd1/go.mod h1:bj7SfCRtBDWHUb9snDiAeCFNEtKQo2Wmx5Cou7ajbmo=
golang.org/x/term v0.13.0 h1:bb+I9cTfFazGW51MZqBVmZy7+JEJMouUHTUSKVQLBek=
golang.org/x/term v0.13.0/go.mod h1:LTmsnFJwVN6bCy1rVCoS+qHT1HhALEFxKncY3WNNh4U=
golang.org/x/term v0.20.0 h1:VnkxpohqXaOBYJtBmEppKUG6mXpi+4O6purfc2+sMhw=
golang.org/x/term v0.20.0/go.mod h1:8UkIAJTvZgivsXaD6/pH6U9ecQzZ45awqEOzuCvwpFY=
golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ=
golang.org/x/text v0.3.3/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ=
golang.org/x/text v0.3.5/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ=
golang.org/x/text v0.3.6/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ=
golang.org/x/text v0.14.0 h1:ScX5w1eTa3QqT8oi6+ziP7dTV1S2+ALU0bI+0zXKWiQ=
golang.org/x/text v0.14.0/go.mod h1:18ZOQIKpY8NJVqYksKHtTdi31H5itFRjB5/qKTNYzSU=
golang.org/x/text v0.15.0 h1:h1V/4gjBv8v9cjcR6+AR5+/cIYK5N/WAgiv4xlsEtAk=
golang.org/x/text v0.15.0/go.mod h1:18ZOQIKpY8NJVqYksKHtTdi31H5itFRjB5/qKTNYzSU=
golang.org/x/tools v0.0.0-20180525024113-a5b4c53f6e8b/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ=
golang.org/x/tools v0.0.0-20180917221912-90fa682c2a6e/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ=
golang.org/x/tools v0.0.0-20190114222345-bf090417da8b/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ=
@@ -247,34 +298,40 @@ golang.org/x/tools v0.0.0-20190206041539-40960b6deb8e/go.mod h1:n7NCudcB/nEzxVGm
golang.org/x/tools v0.0.0-20190226205152-f727befe758c/go.mod h1:9Yl7xja0Znq3iFh3HoIrodX9oNMXvdceNzlUR8zjMvY=
golang.org/x/tools v0.0.0-20190311212946-11955173bddd/go.mod h1:LCzVGOaR6xXOjkQ3onu1FJEFr0SW1gC7cKk1uF8kGRs=
golang.org/x/tools v0.0.0-20190524140312-2c0ae7006135/go.mod h1:RgjU9mgBXZiqYHBnxXauZ1Gv1EHHAz9KjViQ78xBX0Q=
golang.org/x/tools v0.0.0-20190927191325-030b2cf1153e/go.mod h1:b+2E5dAYhXwXZwtnZ6UAqBI28+e2cm9otk0dWdXHAEo=
golang.org/x/tools v0.0.0-20191119224855-298f0cb1881e/go.mod h1:b+2E5dAYhXwXZwtnZ6UAqBI28+e2cm9otk0dWdXHAEo=
golang.org/x/tools v0.0.0-20200130002326-2f3ba24bd6e7/go.mod h1:TB2adYChydJhpapKDTa4BR/hXlZSLoq2Wpct/0txZ28=
golang.org/x/tools v0.0.0-20200619180055-7c47624df98f/go.mod h1:EkVYQZoAsY45+roYkvgYkIh4xh/qjgUK9TdY2XT94GE=
golang.org/x/tools v0.0.0-20210106214847-113979e3529a/go.mod h1:emZCQorbCU4vsT4fOWvOPXz4eW1wZW4PmDk9uLelYpA=
golang.org/x/tools v0.1.4/go.mod h1:o0xws9oXOQQZyjljx8fwUC0k7L1pTE6eaCbjGeHmOkk=
golang.org/x/xerrors v0.0.0-20190717185122-a985d3407aa7/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0=
golang.org/x/xerrors v0.0.0-20191011141410-1b5146add898/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0=
golang.org/x/xerrors v0.0.0-20191204190536-9bdfabe68543/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0=
golang.org/x/xerrors v0.0.0-20200804184101-5ec99f83aff1 h1:go1bK/D/BFZV2I8cIQd1NKEZ+0owSTG1fDTci4IqFcE=
golang.org/x/xerrors v0.0.0-20200804184101-5ec99f83aff1/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0=
gonum.org/v1/gonum v0.0.0-20180816165407-929014505bf4/go.mod h1:Y+Yx5eoAFn32cQvJDxZx5Dpnq+c3wtXuadVZAcxbbBo=
gonum.org/v1/gonum v0.8.2 h1:CCXrcPKiGGotvnN6jfUsKk4rRqm7q09/YbKb5xCEvtM=
gonum.org/v1/gonum v0.8.2/go.mod h1:oe/vMfY3deqTw+1EZJhuvEW2iwGF1bW9wwu7XCu0+v0=
gonum.org/v1/netlib v0.0.0-20190313105609-8cb42192e0e0 h1:OE9mWmgKkjJyEmDAAtGMPjXu+YNeGvK9VTSHY6+Qihc=
gonum.org/v1/gonum v0.9.3/go.mod h1:TZumC3NeyVQskjXqmyWt4S3bINhy7B4eYwW69EbyX+0=
gonum.org/v1/gonum v0.15.0 h1:2lYxjRbTYyxkJxlhC+LvJIx3SsANPdRybu1tGj9/OrQ=
gonum.org/v1/gonum v0.15.0/go.mod h1:xzZVBJBtS+Mz4q0Yl2LJTk+OxOg4jiXZ7qBoM0uISGo=
gonum.org/v1/netlib v0.0.0-20190313105609-8cb42192e0e0/go.mod h1:wa6Ws7BG/ESfp6dHfk7C6KdzKA7wR7u/rKwOGE66zvw=
gonum.org/v1/plot v0.0.0-20190515093506-e2840ee46a6b/go.mod h1:Wt8AAjI+ypCyYX3nZBvf6cAIx93T+c/OS2HFAYskSZc=
gonum.org/v1/plot v0.9.0/go.mod h1:3Pcqqmp6RHvJI72kgb8fThyUnav364FOsdDo2aGW5lY=
google.golang.org/appengine v1.1.0/go.mod h1:EbEs0AVv82hx2wNQdGPgUI5lhzA/G0D9YwlJXL52JkM=
google.golang.org/appengine v1.4.0/go.mod h1:xpcJRLb0r/rnEns0DIKYYv+WjYCduHsrkT7/EB5XEv4=
google.golang.org/genproto v0.0.0-20180817151627-c66870c02cf8/go.mod h1:JiN7NxoALGmiZfu7CAH4rXhgtRTLTxftemlI0sWmxmc=
google.golang.org/genproto v0.0.0-20190819201941-24fa4b261c55/go.mod h1:DMBHOl98Agz4BDEuKkezgsaosCRResVns1a3J2ZsMNc=
google.golang.org/genproto v0.0.0-20200513103714-09dca8ec2884/go.mod h1:55QSHmfGQM9UVYDPBsyGGes0y52j32PQ3BqQfXhyH3c=
google.golang.org/genproto v0.0.0-20200526211855-cb27e3aa2013/go.mod h1:NbSheEEYHJ7i3ixzK3sjbqSGDJWnxyFXZblF3eUsNvo=
google.golang.org/genproto v0.0.0-20200911024640-645f7a48b24f h1:Yv4xsIx7HZOoyUGSJ2ksDyWE2qIBXROsZKt2ny3hCGM=
google.golang.org/genproto v0.0.0-20200911024640-645f7a48b24f/go.mod h1:FWY/as6DDZQgahTzZj3fqbO1CbirC29ZNUFHwi0/+no=
google.golang.org/genproto v0.0.0-20210630183607-d20f26d13c79/go.mod h1:yiaVoXHpRzHGyxV3o4DktVWY4mSUErTKaeEOq6C3t3U=
google.golang.org/grpc v1.19.0/go.mod h1:mqu4LbDTu4XGKhr4mRzUsmM4RtVoemTSY81AxZiDr8c=
google.golang.org/grpc v1.23.0/go.mod h1:Y5yQAOtifL1yxbo5wqy6BxZv8vAUGQwXBOALyacEbxg=
google.golang.org/grpc v1.25.1/go.mod h1:c3i+UQWmh7LiEpx4sFZnkU36qjEYZ0imhYfXVyQciAY=
google.golang.org/grpc v1.27.0/go.mod h1:qbnxyOmOxrQa7FizSgH+ReBfzJrCY1pSN7KXBS8abTk=
google.golang.org/grpc v1.32.0 h1:zWTV+LMdc3kaiJMSTOFz2UgSBgx8RNQoTGiZu3fR9S0=
google.golang.org/grpc v1.32.0/go.mod h1:N36X2cJ7JwdamYAgDz+s+rVMFjt3numwzf/HckM8pak=
google.golang.org/grpc/cmd/protoc-gen-go-grpc v0.0.0-20200910201057-6591123024b3/go.mod h1:6Kw0yEErY5E/yWrBtf03jp27GLLJujG4z/JK95pnjjw=
google.golang.org/grpc v1.33.1/go.mod h1:fr5YgcSWrqhRRxogOsw7RzIpsmvOZ6IcH4kBYTpR3n0=
google.golang.org/grpc v1.36.0/go.mod h1:qjiiYl8FncCW8feJPdyg3v6XW24KsRHe+dy9BAGRRjU=
google.golang.org/grpc v1.38.0/go.mod h1:NREThFqKR1f3iQ6oBuvc5LadQuXVGo9rkm5ZGrQdJfM=
google.golang.org/grpc v1.39.0/go.mod h1:PImNr+rS9TWYb2O4/emRugxiyHZ5JyHW5F+RPnDzfrE=
google.golang.org/protobuf v0.0.0-20200109180630-ec00e32a8dfd/go.mod h1:DFci5gLYBciE7Vtevhsrf46CRTquxDuWsQurQQe4oz8=
google.golang.org/protobuf v0.0.0-20200221191635-4d8936d0db64/go.mod h1:kwYJMbMJ01Woi6D6+Kah6886xMZcty6N08ah7+eCXa0=
google.golang.org/protobuf v0.0.0-20200228230310-ab0ca4ff8a60/go.mod h1:cfTl7dwQJ+fmap5saPgwCLgHXTUD7jkjRqWcaiX5VyM=
@@ -283,20 +340,18 @@ google.golang.org/protobuf v1.21.0/go.mod h1:47Nbq4nVaFHyn7ilMalzfO3qCViNmqZ2kzi
google.golang.org/protobuf v1.22.0/go.mod h1:EGpADcykh3NcUnDUJcl1+ZksZNG86OlYog2l/sGQquU=
google.golang.org/protobuf v1.23.0/go.mod h1:EGpADcykh3NcUnDUJcl1+ZksZNG86OlYog2l/sGQquU=
google.golang.org/protobuf v1.23.1-0.20200526195155-81db48ad09cc/go.mod h1:EGpADcykh3NcUnDUJcl1+ZksZNG86OlYog2l/sGQquU=
google.golang.org/protobuf v1.24.0/go.mod h1:r/3tXBNzIEhYS9I1OUVjXDlt8tc493IdKGjtUeSXeh4=
google.golang.org/protobuf v1.25.0/go.mod h1:9JNX74DMeImyA3h4bdi1ymwjUzf21/xIlbajtzgsN7c=
google.golang.org/protobuf v1.26.0-rc.1/go.mod h1:jlhhOSvTdKEhbULTjvd4ARK9grFBp09yW+WbY/TyQbw=
google.golang.org/protobuf v1.28.0/go.mod h1:HV8QOd/L58Z+nl8r43ehVNZIU/HEI6OcFqwMG9pJV4I=
google.golang.org/protobuf v1.30.0 h1:kPPoIgf3TsEvrm0PFe15JQ+570QVxYzEvvHqChK+cng=
google.golang.org/protobuf v1.30.0/go.mod h1:HV8QOd/L58Z+nl8r43ehVNZIU/HEI6OcFqwMG9pJV4I=
google.golang.org/protobuf v1.26.0/go.mod h1:9q0QmTI4eRPtz6boOQmLYwt+qCgq0jsYwAQnmE0givc=
google.golang.org/protobuf v1.27.1/go.mod h1:9q0QmTI4eRPtz6boOQmLYwt+qCgq0jsYwAQnmE0givc=
google.golang.org/protobuf v1.34.1 h1:9ddQBjfCyZPOHPUiPxpYESBLc+T8P3E+Vo4IbKZgFWg=
google.golang.org/protobuf v1.34.1/go.mod h1:c6P6GXX6sHbq/GpV6MGZEdwhWPcYBgnhAHhKbcUYpos=
gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0=
gopkg.in/check.v1 v1.0.0-20180628173108-788fd7840127/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0=
gopkg.in/check.v1 v1.0.0-20201130134442-10cb98267c6c h1:Hei/4ADfdWqJk1ZMxUNpqntNwaWcugrBjAiHlqqRiVk=
gopkg.in/check.v1 v1.0.0-20201130134442-10cb98267c6c/go.mod h1:JHkPIbrfpd72SG/EVd6muEfDQjcINNoR0C8j2r3qZ4Q=
gopkg.in/errgo.v2 v2.1.0/go.mod h1:hNsd1EY+bozCKY1Ytp96fpM3vjJbqLJn88ws8XvfDNI=
gopkg.in/yaml.v2 v2.4.0/go.mod h1:RDklbk79AGWmwhnvt/jBztapEOGDOx6ZbXqjP6csGnQ=
gopkg.in/yaml.v2 v2.2.2/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI=
gopkg.in/yaml.v2 v2.2.3/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI=
gopkg.in/yaml.v3 v3.0.0-20200313102051-9f266ea9e77c/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM=
gopkg.in/yaml.v3 v3.0.0-20210107192922-496545a6307b/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM=
gopkg.in/yaml.v3 v3.0.1 h1:fxVm/GzAzEWqLHuvctI91KS9hhNmmWOoWu0XTYJS7CA=
gopkg.in/yaml.v3 v3.0.1/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM=
gorgonia.org/vecf32 v0.9.0 h1:PClazic1r+JVJ1dEzRXgeiVl4g1/Hf/w+wUSqnco1Xg=
@@ -305,4 +360,5 @@ gorgonia.org/vecf64 v0.9.0 h1:bgZDP5x0OzBF64PjMGC3EvTdOoMEcmfAh1VCUnZFm1A=
gorgonia.org/vecf64 v0.9.0/go.mod h1:hp7IOWCnRiVQKON73kkC/AUMtEXyf9kGlVrtPQ9ccVA=
honnef.co/go/tools v0.0.0-20190102054323-c2f93a96b099/go.mod h1:rf3lG4BRIbNafJWhAfAdb/ePZxsR/4RtNHQocxwk9r4=
honnef.co/go/tools v0.0.0-20190523083050-ea95bdfd59fc/go.mod h1:rf3lG4BRIbNafJWhAfAdb/ePZxsR/4RtNHQocxwk9r4=
nullprogram.com/x/optparse v1.0.0/go.mod h1:KdyPE+Igbe0jQUrVfMqDMeJQIJZEuyV7pjYmp6pbG50=
rsc.io/pdf v0.1.1/go.mod h1:n8OzWcQ6Sp37PL01nO98y4iUCRdTGarVfzxY20ICaU4=

View File

@@ -3,7 +3,6 @@ package gpu
import (
"fmt"
"log/slog"
"strconv"
"syscall"
"unsafe"
@@ -74,16 +73,22 @@ func (hl *HipLib) Release() {
hl.dll = 0
}
func (hl *HipLib) AMDDriverVersion() (string, error) {
func (hl *HipLib) AMDDriverVersion() (driverMajor, driverMinor int, err error) {
if hl.dll == 0 {
return "", fmt.Errorf("dll has been unloaded")
return 0, 0, fmt.Errorf("dll has been unloaded")
}
var version int
status, _, err := syscall.SyscallN(hl.hipDriverGetVersion, uintptr(unsafe.Pointer(&version)))
if status != hipSuccess {
return "", fmt.Errorf("failed call to hipDriverGetVersion: %d %s", status, err)
return 0, 0, fmt.Errorf("failed call to hipDriverGetVersion: %d %s", status, err)
}
return strconv.Itoa(version), nil
slog.Debug("hipDriverGetVersion", "version", version)
// TODO - this isn't actually right, but the docs claim hipDriverGetVersion isn't accurate anyway...
driverMajor = version / 1000
driverMinor = (version - (driverMajor * 1000)) / 10
return driverMajor, driverMinor, nil
}
func (hl *HipLib) HipGetDeviceCount() int {

View File

@@ -8,6 +8,7 @@ import (
"log/slog"
"os"
"path/filepath"
"regexp"
"slices"
"strconv"
"strings"
@@ -41,10 +42,8 @@ func AMDGetGPUInfo() []GpuInfo {
}
// Opportunistic logging of driver version to aid in troubleshooting
ver, err := AMDDriverVersion()
if err == nil {
slog.Info("AMD Driver: " + ver)
} else {
driverMajor, driverMinor, err := AMDDriverVersion()
if err != nil {
// TODO - if we see users crash and burn with the upstreamed kernel this can be adjusted to hard-fail rocm support and fallback to CPU
slog.Warn("ollama recommends running the https://www.amd.com/en/support/linux-drivers", "error", err)
}
@@ -91,6 +90,7 @@ func AMDGetGPUInfo() []GpuInfo {
scanner := bufio.NewScanner(fp)
isCPU := false
var major, minor, patch uint64
var vendor, device uint64
for scanner.Scan() {
line := strings.TrimSpace(scanner.Text())
// Note: we could also use "cpu_cores_count X" where X is greater than zero to detect CPUs
@@ -118,6 +118,26 @@ func AMDGetGPUInfo() []GpuInfo {
slog.Debug("malformed int " + line)
continue
}
} else if strings.HasPrefix(line, "vendor_id") {
ver := strings.Fields(line)
if len(ver) != 2 {
slog.Debug("malformed vendor_id", "vendor_id", line)
continue
}
vendor, err = strconv.ParseUint(ver[1], 10, 32)
if err != nil {
slog.Debug("malformed vendor_id" + line)
}
} else if strings.HasPrefix(line, "device_id") {
ver := strings.Fields(line)
if len(ver) != 2 {
slog.Debug("malformed device_id", "device_id", line)
continue
}
device, err = strconv.ParseUint(ver[1], 10, 32)
if err != nil {
slog.Debug("malformed device_id" + line)
}
}
// TODO - any other properties we want to extract and record?
@@ -139,10 +159,10 @@ func AMDGetGPUInfo() []GpuInfo {
return []GpuInfo{}
}
if int(major) < RocmComputeMin {
slog.Warn(fmt.Sprintf("amdgpu too old gfx%d%d%x", major, minor, patch), "gpu", gpuID)
continue
}
//if int(major) < RocmComputeMin {
// slog.Warn(fmt.Sprintf("amdgpu too old gfx%d%x%x", major, minor, patch), "gpu", gpuID)
// continue
//}
// Look up the memory for the current node
totalMemory := uint64(0)
@@ -210,12 +230,17 @@ func AMDGetGPUInfo() []GpuInfo {
// iGPU detection, remove this check once we can support an iGPU variant of the rocm library
if totalMemory < IGPUMemLimit {
slog.Info("amdgpu appears to be an iGPU, skipping", "gpu", gpuID, "total", format.HumanBytes2(totalMemory))
slog.Info("unsupported Radeon iGPU detected skipping", "id", gpuID, "total", format.HumanBytes2(totalMemory))
continue
}
var name string
// TODO - PCI ID lookup
if vendor > 0 && device > 0 {
name = fmt.Sprintf("%04x:%04x", vendor, device)
}
slog.Info("amdgpu memory", "gpu", gpuID, "total", format.HumanBytes2(totalMemory))
slog.Info("amdgpu memory", "gpu", gpuID, "available", format.HumanBytes2(totalMemory-usedMemory))
slog.Debug("amdgpu memory", "gpu", gpuID, "total", format.HumanBytes2(totalMemory))
slog.Debug("amdgpu memory", "gpu", gpuID, "available", format.HumanBytes2(totalMemory-usedMemory))
gpuInfo := GpuInfo{
Library: "rocm",
memInfo: memInfo{
@@ -223,11 +248,11 @@ func AMDGetGPUInfo() []GpuInfo {
FreeMemory: (totalMemory - usedMemory),
},
ID: fmt.Sprintf("%d", gpuID),
// Name: not exposed in sysfs directly, would require pci device id lookup
Major: int(major),
Minor: int(minor),
Patch: int(patch),
Name: name,
Compute: fmt.Sprintf("gfx%d%x%x", major, minor, patch),
MinimumMemory: rocmMinimumMemory,
DriverMajor: driverMajor,
DriverMinor: driverMinor,
}
// If the user wants to filter to a subset of devices, filter out if we aren't a match
@@ -266,7 +291,7 @@ func AMDGetGPUInfo() []GpuInfo {
}
slog.Debug("rocm supported GPUs", "types", supported)
}
gfx := fmt.Sprintf("gfx%d%d%x", gpuInfo.Major, gpuInfo.Minor, gpuInfo.Patch)
gfx := gpuInfo.Compute
if !slices.Contains[[]string, string](supported, gfx) {
slog.Warn("amdgpu is not supported", "gpu", gpuInfo.ID, "gpu_type", gfx, "library", libDir, "supported_types", supported)
// TODO - consider discrete markdown just for ROCM troubleshooting?
@@ -276,7 +301,7 @@ func AMDGetGPUInfo() []GpuInfo {
slog.Info("amdgpu is supported", "gpu", gpuInfo.ID, "gpu_type", gfx)
}
} else {
slog.Debug("skipping rocm gfx compatibility check with HSA_OVERRIDE_GFX_VERSION=" + gfxOverride)
slog.Info("skipping rocm gfx compatibility check", "HSA_OVERRIDE_GFX_VERSION", gfxOverride)
}
// The GPU has passed all the verification steps and is supported
@@ -322,19 +347,34 @@ func AMDValidateLibDir() (string, error) {
return "", fmt.Errorf("no suitable rocm found, falling back to CPU")
}
func AMDDriverVersion() (string, error) {
_, err := os.Stat(DriverVersionFile)
func AMDDriverVersion() (driverMajor, driverMinor int, err error) {
_, err = os.Stat(DriverVersionFile)
if err != nil {
return "", fmt.Errorf("amdgpu version file missing: %s %w", DriverVersionFile, err)
return 0, 0, fmt.Errorf("amdgpu version file missing: %s %w", DriverVersionFile, err)
}
fp, err := os.Open(DriverVersionFile)
if err != nil {
return "", err
return 0, 0, err
}
defer fp.Close()
verString, err := io.ReadAll(fp)
if err != nil {
return "", err
return 0, 0, err
}
return strings.TrimSpace(string(verString)), nil
pattern := `\A(\d+)\.(\d+).*`
regex := regexp.MustCompile(pattern)
match := regex.FindStringSubmatch(string(verString))
if len(match) < 2 {
return 0, 0, fmt.Errorf("malformed version string %s", string(verString))
}
driverMajor, err = strconv.Atoi(match[1])
if err != nil {
return 0, 0, err
}
driverMinor, err = strconv.Atoi(match[2])
if err != nil {
return 0, 0, err
}
return driverMajor, driverMinor, nil
}

View File

@@ -7,8 +7,7 @@ import (
"os"
"path/filepath"
"slices"
"strconv"
"strings"
// "strings"
"github.com/ollama/ollama/format"
)
@@ -34,13 +33,12 @@ func AMDGetGPUInfo() []GpuInfo {
}
defer hl.Release()
ver, err := hl.AMDDriverVersion()
if err == nil {
slog.Info("AMD Driver: " + ver)
} else {
// For now this is benign, but we may eventually need to fail compatibility checks
slog.Debug("error looking up amd driver version", "error", err)
}
// TODO - this reports incorrect version information, so omitting for now
// driverMajor, driverMinor, err := hl.AMDDriverVersion()
// if err != nil {
// // For now this is benign, but we may eventually need to fail compatibility checks
// slog.Debug("error looking up amd driver version", "error", err)
// }
// Note: the HIP library automatically handles subsetting to any HIP_VISIBLE_DEVICES the user specified
count := hl.HipGetDeviceCount()
@@ -62,12 +60,12 @@ func AMDGetGPUInfo() []GpuInfo {
return nil
}
} else {
slog.Debug("skipping rocm gfx compatibility check with HSA_OVERRIDE_GFX_VERSION=" + gfxOverride)
slog.Info("skipping rocm gfx compatibility check", "HSA_OVERRIDE_GFX_VERSION", gfxOverride)
}
slog.Info("detected hip devices", "count", count)
slog.Debug("detected hip devices", "count", count)
// TODO how to determine the underlying device ID when visible devices is causing this to subset?
for i := 0; i < count; i++ {
for i := range count {
err = hl.HipSetDevice(i)
if err != nil {
slog.Warn("set device", "id", i, "error", err)
@@ -85,20 +83,13 @@ func AMDGetGPUInfo() []GpuInfo {
// Can luid be used on windows for setting visible devices (and is it actually set?)
n = bytes.IndexByte(props.GcnArchName[:], 0)
gfx := string(props.GcnArchName[:n])
slog.Info("hip device", "id", i, "name", name, "gfx", gfx)
var major, minor, patch string
switch len(gfx) {
case 6:
major, minor, patch = gfx[3:4], gfx[4:5], gfx[5:]
case 7:
major, minor, patch = gfx[3:5], gfx[5:6], gfx[6:]
}
slog.Debug("hip device", "id", i, "name", name, "gfx", gfx)
//slog.Info(fmt.Sprintf("[%d] Integrated: %d", i, props.iGPU)) // DOESN'T REPORT CORRECTLY! Always 0
// TODO Why isn't props.iGPU accurate!?
if strings.EqualFold(name, iGPUName) {
slog.Info("iGPU detected skipping", "id", i)
continue
}
//if strings.EqualFold(name, iGPUName) {
// slog.Info("unsupported Radeon iGPU detected skipping", "id", i, "name", name, "gfx", gfx)
// continue
//}
if gfxOverride == "" {
if !slices.Contains[[]string, string](supported, gfx) {
slog.Warn("amdgpu is not supported", "gpu", i, "gpu_type", gfx, "library", libDir, "supported_types", supported)
@@ -106,7 +97,7 @@ func AMDGetGPUInfo() []GpuInfo {
slog.Warn("See https://github.com/ollama/ollama/blob/main/docs/troubleshooting.md for HSA_OVERRIDE_GFX_VERSION usage")
continue
} else {
slog.Info("amdgpu is supported", "gpu", i, "gpu_type", gfx)
slog.Debug("amdgpu is supported", "gpu", i, "gpu_type", gfx)
}
}
@@ -124,8 +115,8 @@ func AMDGetGPUInfo() []GpuInfo {
// TODO revisit this once ROCm v6 is available on windows.
// v5.7 only reports VRAM used by this process, so it's completely wrong and unusable
slog.Info("amdgpu memory", "gpu", i, "total", format.HumanBytes2(totalMemory))
slog.Info("amdgpu memory", "gpu", i, "available", format.HumanBytes2(freeMemory))
slog.Debug("amdgpu memory", "gpu", i, "total", format.HumanBytes2(totalMemory))
slog.Debug("amdgpu memory", "gpu", i, "available", format.HumanBytes2(freeMemory))
gpuInfo := GpuInfo{
Library: "rocm",
memInfo: memInfo{
@@ -135,31 +126,12 @@ func AMDGetGPUInfo() []GpuInfo {
ID: fmt.Sprintf("%d", i), // TODO this is probably wrong if we specify visible devices
DependencyPath: libDir,
MinimumMemory: rocmMinimumMemory,
}
if major != "" {
gpuInfo.Major, err = strconv.Atoi(major)
if err != nil {
slog.Info("failed to parse version", "version", gfx, "error", err)
}
}
if minor != "" {
gpuInfo.Minor, err = strconv.Atoi(minor)
if err != nil {
slog.Info("failed to parse version", "version", gfx, "error", err)
}
}
if patch != "" {
// Patch rev is hex; e.g. gfx90a
p, err := strconv.ParseInt(patch, 16, 0)
if err != nil {
slog.Info("failed to parse version", "version", gfx, "error", err)
} else {
gpuInfo.Patch = int(p)
}
}
if gpuInfo.Major < RocmComputeMin {
slog.Warn(fmt.Sprintf("amdgpu [%s] too old gfx%d%d%x", gpuInfo.ID, gpuInfo.Major, gpuInfo.Minor, gpuInfo.Patch))
continue
Name: name,
Compute: gfx,
// TODO - this information isn't accurate on windows, so don't report it until we find the right way to retrieve
// DriverMajor: driverMajor,
// DriverMinor: driverMinor,
}
resp = append(resp, gpuInfo)

View File

@@ -13,7 +13,7 @@ import (
"syscall"
"time"
"github.com/ollama/ollama/server/envconfig"
"github.com/ollama/ollama/envconfig"
)
var (
@@ -80,7 +80,7 @@ func cleanupTmpDirs() {
if err == nil {
pid, err := strconv.Atoi(string(raw))
if err == nil {
if proc, err := os.FindProcess(int(pid)); err == nil && !errors.Is(proc.Signal(syscall.Signal(0)), os.ErrProcessDone) {
if proc, err := os.FindProcess(pid); err == nil && !errors.Is(proc.Signal(syscall.Signal(0)), os.ErrProcessDone) {
// Another running ollama, ignore this tmpdir
continue
}

View File

@@ -8,14 +8,14 @@ import (
func GetCPUVariant() string {
if cpu.X86.HasAVX2 {
slog.Info("CPU has AVX2")
slog.Debug("CPU has AVX2")
return "avx2"
}
if cpu.X86.HasAVX {
slog.Info("CPU has AVX")
slog.Debug("CPU has AVX")
return "avx"
}
slog.Info("CPU does not have vector extensions")
slog.Debug("CPU does not have vector extensions")
// else LCD
return ""
}

View File

@@ -18,5 +18,4 @@ func cudaGetVisibleDevicesEnv(gpuInfo []GpuInfo) (string, string) {
ids = append(ids, info.ID)
}
return "CUDA_VISIBLE_DEVICES", strings.Join(ids, ",")
}

View File

@@ -20,19 +20,20 @@ import (
"sync"
"unsafe"
"github.com/ollama/ollama/envconfig"
"github.com/ollama/ollama/format"
"github.com/ollama/ollama/server/envconfig"
)
type handles struct {
deviceCount int
cudart *C.cudart_handle_t
nvcuda *C.nvcuda_handle_t
oneapi *C.oneapi_handle_t
}
const (
cudaMinimumMemory = 256 * format.MebiByte
rocmMinimumMemory = 256 * format.MebiByte
cudaMinimumMemory = 457 * format.MebiByte
rocmMinimumMemory = 457 * format.MebiByte
)
var gpuMutex sync.Mutex
@@ -80,6 +81,15 @@ var NvcudaWindowsGlobs = []string{
"c:\\windows\\system*\\nvcuda.dll",
}
var OneapiWindowsGlobs = []string{
"c:\\Windows\\System32\\DriverStore\\FileRepository\\*\\ze_intel_gpu64.dll",
}
var OneapiLinuxGlobs = []string{
"/usr/lib/x86_64-linux-gnu/libze_intel_gpu.so*",
"/usr/lib*/libze_intel_gpu.so*",
}
// Jetson devices have JETSON_JETPACK="x.y.z" factory set to the Jetpack version installed.
// Included to drive logic for reducing Ollama-allocated overhead on L4T/Jetson devices.
var CudaTegra string = os.Getenv("JETSON_JETPACK")
@@ -119,12 +129,12 @@ func initGPUHandles() *handles {
return gpuHandles
}
slog.Info("Detecting GPUs")
slog.Debug("Detecting GPUs")
nvcudaLibPaths := FindGPULibs(nvcudaMgmtName, nvcudaMgmtPatterns)
if len(nvcudaLibPaths) > 0 {
deviceCount, nvcuda, libPath := LoadNVCUDAMgmt(nvcudaLibPaths)
if nvcuda != nil {
slog.Info("detected GPUs", "count", deviceCount, "library", libPath)
slog.Debug("detected GPUs", "count", deviceCount, "library", libPath)
gpuHandles.nvcuda = nvcuda
gpuHandles.deviceCount = deviceCount
return gpuHandles
@@ -135,12 +145,13 @@ func initGPUHandles() *handles {
if len(cudartLibPaths) > 0 {
deviceCount, cudart, libPath := LoadCUDARTMgmt(cudartLibPaths)
if cudart != nil {
slog.Info("detected GPUs", "library", libPath, "count", deviceCount)
slog.Debug("detected GPUs", "library", libPath, "count", deviceCount)
gpuHandles.cudart = cudart
gpuHandles.deviceCount = deviceCount
return gpuHandles
}
}
return gpuHandles
}
@@ -176,18 +187,23 @@ func GetGPUInfo() GpuInfoList {
resp := []GpuInfo{}
// NVIDIA first
for i := 0; i < gpuHandles.deviceCount; i++ {
for i := range gpuHandles.deviceCount {
// TODO once we support CPU compilation variants of GPU libraries refine this...
if cpuVariant == "" && runtime.GOARCH == "amd64" {
continue
}
if gpuHandles.cudart != nil || gpuHandles.nvcuda != nil {
gpuInfo := GpuInfo{
Library: "cuda",
}
var driverMajor int
var driverMinor int
if gpuHandles.cudart != nil {
C.cudart_check_vram(*gpuHandles.cudart, C.int(i), &memInfo)
} else {
C.nvcuda_check_vram(*gpuHandles.nvcuda, C.int(i), &memInfo)
driverMajor = int(gpuHandles.nvcuda.driver_major)
driverMinor = int(gpuHandles.nvcuda.driver_minor)
}
if memInfo.err != nil {
slog.Info("error looking up nvidia GPU memory", "error", C.GoString(memInfo.err))
@@ -201,14 +217,17 @@ func GetGPUInfo() GpuInfoList {
gpuInfo.TotalMemory = uint64(memInfo.total)
gpuInfo.FreeMemory = uint64(memInfo.free)
gpuInfo.ID = C.GoString(&memInfo.gpu_id[0])
gpuInfo.Major = int(memInfo.major)
gpuInfo.Minor = int(memInfo.minor)
gpuInfo.Compute = fmt.Sprintf("%d.%d", memInfo.major, memInfo.minor)
gpuInfo.MinimumMemory = cudaMinimumMemory
gpuInfo.DependencyPath = depPath
gpuInfo.Name = C.GoString(&memInfo.gpu_name[0])
gpuInfo.DriverMajor = driverMajor
gpuInfo.DriverMinor = driverMinor
// TODO potentially sort on our own algorithm instead of what the underlying GPU library does...
resp = append(resp, gpuInfo)
}
}
// Then AMD
resp = append(resp, AMDGetGPUInfo()...)
@@ -342,6 +361,23 @@ func LoadNVCUDAMgmt(nvcudaLibPaths []string) (int, *C.nvcuda_handle_t, string) {
return 0, nil, ""
}
func LoadOneapiMgmt(oneapiLibPaths []string) (int, *C.oneapi_handle_t, string) {
var resp C.oneapi_init_resp_t
resp.oh.verbose = getVerboseState()
for _, libPath := range oneapiLibPaths {
lib := C.CString(libPath)
defer C.free(unsafe.Pointer(lib))
C.oneapi_init(lib, &resp)
if resp.err != nil {
slog.Debug("Unable to load oneAPI management library", "library", libPath, "error", C.GoString(resp.err))
C.free(unsafe.Pointer(resp.err))
} else {
return int(resp.num_devices), &resp.oh, libPath
}
}
return 0, nil, ""
}
func getVerboseState() C.uint16_t {
if envconfig.Debug {
return C.uint16_t(1)
@@ -362,6 +398,8 @@ func (l GpuInfoList) GetVisibleDevicesEnv() (string, string) {
return cudaGetVisibleDevicesEnv(l)
case "rocm":
return rocmGetVisibleDevicesEnv(l)
case "oneapi":
return oneapiGetVisibleDevicesEnv(l)
default:
slog.Debug("no filter required for library " + l[0].Library)
return "", ""

View File

@@ -15,7 +15,7 @@ import (
)
const (
metalMinimumMemory = 384 * format.MebiByte
metalMinimumMemory = 512 * format.MebiByte
)
func GetGPUInfo() GpuInfoList {

View File

@@ -39,16 +39,19 @@ extern "C" {
#endif
#define GPU_ID_LEN 64
#define GPU_NAME_LEN 96
typedef struct mem_info {
char *err; // If non-nill, caller responsible for freeing
char gpu_id[GPU_ID_LEN];
char gpu_name[GPU_NAME_LEN];
uint64_t total;
uint64_t free;
// Compute Capability
int major;
int minor;
int patch;
} mem_info_t;
void cpu_check_ram(mem_info_t *resp);
@@ -59,6 +62,7 @@ void cpu_check_ram(mem_info_t *resp);
#include "gpu_info_cudart.h"
#include "gpu_info_nvcuda.h"
#include "gpu_info_oneapi.h"
#endif // __GPU_INFO_H__
#endif // __APPLE__

View File

@@ -10,8 +10,6 @@ void cpu_check_ram(mem_info_t *resp) {
if (GlobalMemoryStatusEx(&info) != 0) {
resp->total = info.ullTotalPhys;
resp->free = info.ullAvailPhys;
resp->major = 0;
resp->minor = 0;
snprintf(&resp->gpu_id[0], GPU_ID_LEN, "0");
} else {
resp->err = LOAD_ERR();
@@ -31,8 +29,6 @@ void cpu_check_ram(mem_info_t *resp) {
} else {
resp->total = info.totalram * info.mem_unit;
resp->free = info.freeram * info.mem_unit;
resp->major = 0;
resp->minor = 0;
snprintf(&resp->gpu_id[0], GPU_ID_LEN, "0");
}
return;

View File

@@ -22,6 +22,7 @@ void nvcuda_init(char *nvcuda_lib_path, nvcuda_init_resp_t *resp) {
{"cuDeviceGet", (void *)&resp->ch.cuDeviceGet},
{"cuDeviceGetAttribute", (void *)&resp->ch.cuDeviceGetAttribute},
{"cuDeviceGetUuid", (void *)&resp->ch.cuDeviceGetUuid},
{"cuDeviceGetName", (void *)&resp->ch.cuDeviceGetName},
{"cuCtxCreate_v3", (void *)&resp->ch.cuCtxCreate_v3},
{"cuMemGetInfo_v2", (void *)&resp->ch.cuMemGetInfo_v2},
{"cuCtxDestroy", (void *)&resp->ch.cuCtxDestroy},
@@ -70,18 +71,17 @@ void nvcuda_init(char *nvcuda_lib_path, nvcuda_init_resp_t *resp) {
}
int version = 0;
nvcudaDriverVersion_t driverVersion;
driverVersion.major = 0;
driverVersion.minor = 0;
resp->ch.driver_major = 0;
resp->ch.driver_minor = 0;
// Report driver version if we're in verbose mode, ignore errors
ret = (*resp->ch.cuDriverGetVersion)(&version);
if (ret != CUDA_SUCCESS) {
LOG(resp->ch.verbose, "cuDriverGetVersion failed: %d\n", ret);
} else {
driverVersion.major = version / 1000;
driverVersion.minor = (version - (driverVersion.major * 1000)) / 10;
LOG(resp->ch.verbose, "CUDA driver version: %d-%d\n", driverVersion.major, driverVersion.minor);
resp->ch.driver_major = version / 1000;
resp->ch.driver_minor = (version - (resp->ch.driver_major * 1000)) / 10;
LOG(resp->ch.verbose, "CUDA driver version: %d.%d\n", resp->ch.driver_major, resp->ch.driver_minor);
}
ret = (*resp->ch.cuDeviceGetCount)(&resp->num_devices);
@@ -117,8 +117,6 @@ void nvcuda_check_vram(nvcuda_handle_t h, int i, mem_info_t *resp) {
return;
}
resp->major = 0;
resp->minor = 0;
int major = 0;
int minor = 0;
ret = (*h.cuDeviceGetAttribute)(&major, CU_DEVICE_ATTRIBUTE_COMPUTE_CAPABILITY_MAJOR, device);
@@ -161,6 +159,12 @@ void nvcuda_check_vram(nvcuda_handle_t h, int i, mem_info_t *resp) {
);
}
ret = (*h.cuDeviceGetName)(&resp->gpu_name[0], GPU_NAME_LEN, device);
if (ret != CUDA_SUCCESS) {
LOG(h.verbose, "[%d] device name lookup failure: %d\n", i, ret);
resp->gpu_name[0] = '\0';
}
// To get memory we have to set (and release) a context
ret = (*h.cuCtxCreate_v3)(&ctx, NULL, 0, 0, device);
if (ret != CUDA_SUCCESS) {

View File

@@ -44,12 +44,15 @@ typedef void* CUcontext;
typedef struct nvcuda_handle {
void *handle;
uint16_t verbose;
int driver_major;
int driver_minor;
CUresult (*cuInit)(unsigned int Flags);
CUresult (*cuDriverGetVersion)(int *driverVersion);
CUresult (*cuDeviceGetCount)(int *);
CUresult (*cuDeviceGet)(CUdevice* device, int ordinal);
CUresult (*cuDeviceGetAttribute)(int* pi, CUdevice_attribute attrib, CUdevice dev);
CUresult (*cuDeviceGetUuid)(CUuuid* uuid, CUdevice dev); // signature compatible with cuDeviceGetUuid_v2
CUresult (*cuDeviceGetName)(char *name, int len, CUdevice dev);
// Context specific aspects
CUresult (*cuCtxCreate_v3)(CUcontext* pctx, void *params, int len, unsigned int flags, CUdevice dev);

214
gpu/gpu_info_oneapi.c Normal file
View File

@@ -0,0 +1,214 @@
#ifndef __APPLE__
#include "gpu_info_oneapi.h"
#include <string.h>
void oneapi_init(char *oneapi_lib_path, oneapi_init_resp_t *resp)
{
ze_result_t ret;
resp->err = NULL;
const int buflen = 256;
char buf[buflen + 1];
int i;
struct lookup
{
char *s;
void **p;
} l[] = {
{"zesInit", (void *)&resp->oh.zesInit},
{"zesDriverGet", (void *)&resp->oh.zesDriverGet},
{"zesDeviceGet", (void *)&resp->oh.zesDeviceGet},
{"zesDeviceGetProperties", (void *)&resp->oh.zesDeviceGetProperties},
{"zesDeviceEnumMemoryModules",
(void *)&resp->oh.zesDeviceEnumMemoryModules},
{"zesMemoryGetProperties", (void *)&resp->oh.zesMemoryGetProperties},
{"zesMemoryGetState", (void *)&resp->oh.zesMemoryGetState},
{NULL, NULL},
};
resp->oh.handle = LOAD_LIBRARY(oneapi_lib_path, RTLD_LAZY);
if (!resp->oh.handle)
{
char *msg = LOAD_ERR();
snprintf(buf, buflen,
"Unable to load %s library to query for Intel GPUs: %s\n",
oneapi_lib_path, msg);
free(msg);
resp->err = strdup(buf);
return;
}
// TODO once we've squashed the remaining corner cases remove this log
LOG(resp->oh.verbose,
"wiring Level-Zero management library functions in %s\n",
oneapi_lib_path);
for (i = 0; l[i].s != NULL; i++)
{
// TODO once we've squashed the remaining corner cases remove this log
LOG(resp->oh.verbose, "dlsym: %s\n", l[i].s);
*l[i].p = LOAD_SYMBOL(resp->oh.handle, l[i].s);
if (!l[i].p)
{
resp->oh.handle = NULL;
char *msg = LOAD_ERR();
LOG(resp->oh.verbose, "dlerr: %s\n", msg);
UNLOAD_LIBRARY(resp->oh.handle);
snprintf(buf, buflen, "symbol lookup for %s failed: %s", l[i].s, msg);
free(msg);
resp->err = strdup(buf);
return;
}
}
ret = (*resp->oh.zesInit)(0);
if (ret != ZE_RESULT_SUCCESS)
{
LOG(resp->oh.verbose, "zesInit err: %d\n", ret);
UNLOAD_LIBRARY(resp->oh.handle);
resp->oh.handle = NULL;
snprintf(buf, buflen, "oneapi vram init failure: %d", ret);
resp->err = strdup(buf);
}
(*resp->oh.zesDriverGet)(&resp->num_devices, NULL);
return;
}
void oneapi_check_vram(oneapi_handle_t h, mem_info_t *resp)
{
ze_result_t ret;
resp->err = NULL;
uint64_t totalMem = 0;
uint64_t usedMem = 0;
const int buflen = 256;
char buf[buflen + 1];
int i, d, m;
if (h.handle == NULL)
{
resp->err = strdup("Level-Zero handle not initialized");
return;
}
uint32_t driversCount = 0;
ret = (*h.zesDriverGet)(&driversCount, NULL);
if (ret != ZE_RESULT_SUCCESS)
{
snprintf(buf, buflen, "unable to get driver count: %d", ret);
resp->err = strdup(buf);
return;
}
LOG(h.verbose, "discovered %d Level-Zero drivers\n", driversCount);
zes_driver_handle_t *allDrivers =
malloc(driversCount * sizeof(zes_driver_handle_t));
(*h.zesDriverGet)(&driversCount, allDrivers);
resp->total = 0;
resp->free = 0;
for (d = 0; d < driversCount; d++)
{
uint32_t deviceCount = 0;
ret = (*h.zesDeviceGet)(allDrivers[d], &deviceCount, NULL);
if (ret != ZE_RESULT_SUCCESS)
{
snprintf(buf, buflen, "unable to get device count: %d", ret);
resp->err = strdup(buf);
free(allDrivers);
return;
}
LOG(h.verbose, "discovered %d Level-Zero devices\n", deviceCount);
zes_device_handle_t *devices =
malloc(deviceCount * sizeof(zes_device_handle_t));
(*h.zesDeviceGet)(allDrivers[d], &deviceCount, devices);
for (i = 0; i < deviceCount; i++)
{
zes_device_ext_properties_t ext_props;
ext_props.stype = ZES_STRUCTURE_TYPE_DEVICE_EXT_PROPERTIES;
ext_props.pNext = NULL;
zes_device_properties_t props;
props.stype = ZES_STRUCTURE_TYPE_DEVICE_PROPERTIES;
props.pNext = &ext_props;
ret = (*h.zesDeviceGetProperties)(devices[i], &props);
if (ret != ZE_RESULT_SUCCESS)
{
snprintf(buf, buflen, "unable to get device properties: %d", ret);
resp->err = strdup(buf);
free(allDrivers);
free(devices);
return;
}
if (h.verbose)
{
// When in verbose mode, report more information about
// the card we discover.
LOG(h.verbose, "[%d] oneAPI device name: %s\n", i,
props.modelName);
LOG(h.verbose, "[%d] oneAPI brand: %s\n", i,
props.brandName);
LOG(h.verbose, "[%d] oneAPI vendor: %s\n", i,
props.vendorName);
LOG(h.verbose, "[%d] oneAPI S/N: %s\n", i,
props.serialNumber);
LOG(h.verbose, "[%d] oneAPI board number: %s\n", i,
props.boardNumber);
}
uint32_t memCount = 0;
ret = (*h.zesDeviceEnumMemoryModules)(devices[i], &memCount, NULL);
if (ret != ZE_RESULT_SUCCESS)
{
snprintf(buf, buflen,
"unable to enumerate Level-Zero memory modules: %d", ret);
resp->err = strdup(buf);
free(allDrivers);
free(devices);
return;
}
LOG(h.verbose, "discovered %d Level-Zero memory modules\n", memCount);
zes_mem_handle_t *mems = malloc(memCount * sizeof(zes_mem_handle_t));
(*h.zesDeviceEnumMemoryModules)(devices[i], &memCount, mems);
for (m = 0; m < memCount; m++)
{
zes_mem_state_t state;
state.stype = ZES_STRUCTURE_TYPE_MEM_STATE;
state.pNext = NULL;
ret = (*h.zesMemoryGetState)(mems[m], &state);
if (ret != ZE_RESULT_SUCCESS)
{
snprintf(buf, buflen, "unable to get memory state: %d", ret);
resp->err = strdup(buf);
free(allDrivers);
free(devices);
free(mems);
return;
}
resp->total += state.size;
resp->free += state.free;
}
free(mems);
}
free(devices);
}
free(allDrivers);
}
#endif // __APPLE__

211
gpu/gpu_info_oneapi.h Normal file
View File

@@ -0,0 +1,211 @@
#ifndef __APPLE__
#ifndef __GPU_INFO_ONEAPI_H__
#define __GPU_INFO_ONEAPI_H__
#include "gpu_info.h"
#define ZE_MAX_DEVICE_NAME 256
#define ZE_MAX_DEVICE_UUID_SIZE 16
#define ZES_STRING_PROPERTY_SIZE 64
#define ZE_BIT(_i) (1 << _i)
// Just enough typedef's to dlopen/dlsym for memory information
typedef enum ze_result_t
{
ZE_RESULT_SUCCESS = 0,
// Other values omitted for now...
} ze_result_t;
typedef uint8_t ze_bool_t;
typedef struct _zes_driver_handle_t *zes_driver_handle_t;
typedef struct _zes_device_handle_t *zes_device_handle_t;
typedef struct _zes_mem_handle_t *zes_mem_handle_t;
typedef enum _ze_structure_type_t
{
ZE_STRUCTURE_TYPE_FORCE_UINT32 = 0x7fffffff
} ze_structure_type_t;
typedef enum _zes_structure_type_t
{
ZES_STRUCTURE_TYPE_DEVICE_PROPERTIES = 0x1,
ZES_STRUCTURE_TYPE_MEM_PROPERTIES = 0xb,
ZES_STRUCTURE_TYPE_MEM_STATE = 0x1e,
ZES_STRUCTURE_TYPE_DEVICE_EXT_PROPERTIES = 0x2d,
ZES_STRUCTURE_TYPE_FORCE_UINT32 = 0x7fffffff
} zes_structure_type_t;
typedef enum _zes_mem_type_t
{
ZES_MEM_TYPE_FORCE_UINT32 = 0x7fffffff
} zes_mem_type_t;
typedef enum _zes_mem_loc_t
{
ZES_MEM_LOC_SYSTEM = 0,
ZES_MEM_LOC_DEVICE = 1,
ZES_MEM_LOC_FORCE_UINT32 = 0x7fffffff
} zes_mem_loc_t;
typedef enum _zes_mem_health_t
{
ZES_MEM_HEALTH_FORCE_UINT32 = 0x7fffffff
} zes_mem_health_t;
typedef struct _ze_device_uuid_t
{
uint8_t id[ZE_MAX_DEVICE_UUID_SIZE];
} ze_device_uuid_t;
typedef struct _zes_uuid_t
{
uint8_t id[ZE_MAX_DEVICE_UUID_SIZE];
} zes_uuid_t;
typedef enum _ze_device_type_t
{
ZE_DEVICE_TYPE_GPU = 1,
ZE_DEVICE_TYPE_CPU = 2,
ZE_DEVICE_TYPE_FPGA = 3,
ZE_DEVICE_TYPE_MCA = 4,
ZE_DEVICE_TYPE_VPU = 5,
ZE_DEVICE_TYPE_FORCE_UINT32 = 0x7fffffff
} ze_device_type_t;
typedef enum _zes_device_type_t
{
ZES_DEVICE_TYPE_GPU = 1,
ZES_DEVICE_TYPE_CPU = 2,
ZES_DEVICE_TYPE_FPGA = 3,
ZES_DEVICE_TYPE_MCA = 4,
ZES_DEVICE_TYPE_VPU = 5,
ZES_DEVICE_TYPE_FORCE_UINT32 = 0x7fffffff
} zes_device_type_t;
typedef uint32_t ze_device_property_flags_t;
typedef enum _ze_device_property_flag_t
{
ZE_DEVICE_PROPERTY_FLAG_INTEGRATED = ZE_BIT(0),
ZE_DEVICE_PROPERTY_FLAG_SUBDEVICE = ZE_BIT(1),
ZE_DEVICE_PROPERTY_FLAG_ECC = ZE_BIT(2),
ZE_DEVICE_PROPERTY_FLAG_ONDEMANDPAGING = ZE_BIT(3),
ZE_DEVICE_PROPERTY_FLAG_FORCE_UINT32 = 0x7fffffff
} ze_device_property_flag_t;
typedef uint32_t zes_device_property_flags_t;
typedef enum _zes_device_property_flag_t
{
ZES_DEVICE_PROPERTY_FLAG_INTEGRATED = ZE_BIT(0),
ZES_DEVICE_PROPERTY_FLAG_SUBDEVICE = ZE_BIT(1),
ZES_DEVICE_PROPERTY_FLAG_ECC = ZE_BIT(2),
ZES_DEVICE_PROPERTY_FLAG_ONDEMANDPAGING = ZE_BIT(3),
ZES_DEVICE_PROPERTY_FLAG_FORCE_UINT32 = 0x7fffffff
} zes_device_property_flag_t;
typedef struct _ze_device_properties_t
{
ze_structure_type_t stype;
void *pNext;
ze_device_type_t type;
uint32_t vendorId;
uint32_t deviceId;
ze_device_property_flags_t flags;
uint32_t subdeviceId;
uint32_t coreClockRate;
uint64_t maxMemAllocSize;
uint32_t maxHardwareContexts;
uint32_t maxCommandQueuePriority;
uint32_t numThreadsPerEU;
uint32_t physicalEUSimdWidth;
uint32_t numEUsPerSubslice;
uint32_t numSubslicesPerSlice;
uint32_t numSlices;
uint64_t timerResolution;
uint32_t timestampValidBits;
uint32_t kernelTimestampValidBits;
ze_device_uuid_t uuid;
char name[ZE_MAX_DEVICE_NAME];
} ze_device_properties_t;
typedef struct _zes_device_properties_t
{
zes_structure_type_t stype;
void *pNext;
ze_device_properties_t core;
uint32_t numSubdevices;
char serialNumber[ZES_STRING_PROPERTY_SIZE];
char boardNumber[ZES_STRING_PROPERTY_SIZE];
char brandName[ZES_STRING_PROPERTY_SIZE];
char modelName[ZES_STRING_PROPERTY_SIZE];
char vendorName[ZES_STRING_PROPERTY_SIZE];
char driverVersion[ZES_STRING_PROPERTY_SIZE];
} zes_device_properties_t;
typedef struct _zes_device_ext_properties_t
{
zes_structure_type_t stype;
void *pNext;
zes_uuid_t uuid;
zes_device_type_t type;
zes_device_property_flags_t flags;
} zes_device_ext_properties_t;
typedef struct _zes_mem_properties_t
{
zes_structure_type_t stype;
void *pNext;
zes_mem_type_t type;
ze_bool_t onSubdevice;
uint32_t subdeviceId;
zes_mem_loc_t location;
uint64_t physicalSize;
int32_t busWidth;
int32_t numChannels;
} zes_mem_properties_t;
typedef struct _zes_mem_state_t
{
zes_structure_type_t stype;
const void *pNext;
zes_mem_health_t health;
uint64_t free;
uint64_t size;
} zes_mem_state_t;
typedef struct oneapi_handle
{
void *handle;
uint16_t verbose;
ze_result_t (*zesInit)(int);
ze_result_t (*zesDriverGet)(uint32_t *pCount, zes_driver_handle_t *phDrivers);
ze_result_t (*zesDeviceGet)(zes_driver_handle_t hDriver, uint32_t *pCount,
zes_device_handle_t *phDevices);
ze_result_t (*zesDeviceGetProperties)(zes_device_handle_t hDevice,
zes_device_properties_t *pProperties);
ze_result_t (*zesDeviceEnumMemoryModules)(zes_device_handle_t hDevice,
uint32_t *pCount,
zes_mem_handle_t *phMemory);
ze_result_t (*zesMemoryGetProperties)(zes_mem_handle_t hMemory,
zes_mem_properties_t *pProperties);
ze_result_t (*zesMemoryGetState)(zes_mem_handle_t hMemory,
zes_mem_state_t *pState);
} oneapi_handle_t;
typedef struct oneapi_init_resp
{
char *err; // If err is non-null handle is invalid
int num_devices;
oneapi_handle_t oh;
} oneapi_init_resp_t;
typedef struct oneapi_version_resp
{
ze_result_t status;
char *str; // Contains version or error string if status != 0
} oneapi_version_resp_t;
void oneapi_init(char *oneapi_lib_path, oneapi_init_resp_t *resp);
void oneapi_check_vram(oneapi_handle_t rh, mem_info_t *resp);
#endif // __GPU_INFO_INTEL_H__
#endif // __APPLE__

21
gpu/gpu_oneapi.go Normal file
View File

@@ -0,0 +1,21 @@
//go:build linux || windows
package gpu
import (
"log/slog"
"strings"
)
func oneapiGetVisibleDevicesEnv(gpuInfo []GpuInfo) (string, string) {
ids := []string{}
for _, info := range gpuInfo {
if info.Library != "oneapi" {
// TODO shouldn't happen if things are wired correctly...
slog.Debug("oneapiGetVisibleDevicesEnv skipping over non-sycl device", "library", info.Library)
continue
}
ids = append(ids, info.ID)
}
return "ONEAPI_DEVICE_SELECTOR", "level_zero:" + strings.Join(ids, ",")
}

View File

@@ -5,11 +5,12 @@ import (
"testing"
"github.com/stretchr/testify/assert"
"github.com/stretchr/testify/require"
)
func TestBasicGetGPUInfo(t *testing.T) {
info := GetGPUInfo()
assert.Greater(t, len(info), 0)
assert.NotEmpty(t, len(info))
assert.Contains(t, "cuda rocm cpu metal", info[0].Library)
if info[0].Library != "cpu" {
assert.Greater(t, info[0].TotalMemory, uint64(0))
@@ -19,7 +20,7 @@ func TestBasicGetGPUInfo(t *testing.T) {
func TestCPUMemInfo(t *testing.T) {
info, err := GetCPUMem()
assert.NoError(t, err)
require.NoError(t, err)
switch runtime.GOOS {
case "darwin":
t.Skip("CPU memory not populated on darwin")

View File

@@ -1,5 +1,12 @@
package gpu
import (
"fmt"
"log/slog"
"github.com/ollama/ollama/format"
)
type memInfo struct {
TotalMemory uint64 `json:"total_memory,omitempty"`
FreeMemory uint64 `json:"free_memory,omitempty"`
@@ -22,9 +29,11 @@ type GpuInfo struct {
// GPU information
ID string `json:"gpu_id"` // string to use for selection of this specific GPU
Name string `json:"name"` // user friendly name if available
Major int `json:"major,omitempty"` // Major compatibility version (CC or gfx)
Minor int `json:"minor,omitempty"` // Minor compatibility version (CC or gfx)
Patch int `json:"patch,omitempty"` // Patch compatibility only matters on AMD
Compute string `json:"compute"` // Compute Capability or gfx
// Driver Information - TODO no need to put this on each GPU
DriverMajor int `json:"driver_major,omitempty"`
DriverMinor int `json:"driver_minor,omitempty"`
// TODO other performance capability info to help in scheduling decisions
}
@@ -56,6 +65,21 @@ func (l GpuInfoList) ByLibrary() []GpuInfoList {
return resp
}
// Report the GPU information into the log an Info level
func (l GpuInfoList) LogDetails() {
for _, g := range l {
slog.Info("inference compute",
"id", g.ID,
"library", g.Library,
"compute", g.Compute,
"driver", fmt.Sprintf("%d.%d", g.DriverMajor, g.DriverMinor),
"name", g.Name,
"total", format.HumanBytes2(g.TotalMemory),
"available", format.HumanBytes2(g.FreeMemory),
)
}
}
// Sort by Free Space
type ByFreeMemory []GpuInfo

View File

@@ -217,7 +217,7 @@ func TestMultiModelStress(t *testing.T) {
defer wg.Done()
for j := 0; j < 3; j++ {
slog.Info("Starting", "req", i, "iter", j, "model", req[i].Model)
DoGenerate(ctx, t, client, req[i], resp[i], 90*time.Second, 5*time.Second)
DoGenerate(ctx, t, client, req[i], resp[i], 120*time.Second, 5*time.Second)
}
}(i)
}

View File

@@ -19,6 +19,11 @@ import (
)
func TestMaxQueue(t *testing.T) {
if os.Getenv("OLLAMA_TEST_EXISTING") != "" {
t.Skip("Max Queue test requires spawing a local server so we can adjust the queue size")
return
}
// Note: This test can be quite slow when running in CPU mode, so keep the threadCount low unless your on GPU
// Also note that by default Darwin can't sustain > ~128 connections without adjusting limits
threadCount := 32
@@ -109,9 +114,9 @@ func TestMaxQueue(t *testing.T) {
slog.Info("generate done, waiting for embeds")
embedwg.Wait()
slog.Info("embeds completed", "success", succesCount, "busy", busyCount, "reset", resetByPeerCount, "canceled", canceledCount)
require.Equal(t, resetByPeerCount, 0, "Connections reset by peer, have you updated your fd and socket limits?")
require.True(t, busyCount > 0, "no requests hit busy error but some should have")
require.True(t, canceledCount == 0, "no requests should have been canceled due to timeout")
slog.Info("embeds completed", "success", succesCount, "busy", busyCount, "reset", resetByPeerCount, "canceled", canceledCount)
}

View File

@@ -85,7 +85,7 @@ func GetTestEndpoint() (*api.Client, string) {
var serverMutex sync.Mutex
var serverReady bool
func startServer(ctx context.Context, ollamaHost string) error {
func startServer(t *testing.T, ctx context.Context, ollamaHost string) error {
// Make sure the server has been built
CLIName, err := filepath.Abs("../ollama")
if err != nil {
@@ -200,7 +200,7 @@ func InitServerConnection(ctx context.Context, t *testing.T) (*api.Client, strin
}
lifecycle.ServerLogFile = fp.Name()
fp.Close()
require.NoError(t, startServer(ctx, testEndpoint))
require.NoError(t, startServer(t, ctx, testEndpoint))
}
return client, testEndpoint, func() {

View File

@@ -66,7 +66,7 @@ struct server_params {
};
bool server_verbose = false;
bool server_log_json = true;
bool server_log_json = false;
enum stop_type {
STOP_FULL,
@@ -140,7 +140,6 @@ struct server_slot {
std::vector<llama_token> cache_tokens;
std::vector<completion_token_output> generated_token_probs;
bool infill = false;
bool embedding = false;
bool has_next_token = true;
bool truncated = false;
@@ -187,7 +186,6 @@ struct server_slot {
n_past = 0;
n_sent_text = 0;
n_sent_token_probs = 0;
infill = false;
ga_i = 0;
n_past_se = 0;
@@ -266,7 +264,7 @@ struct server_slot {
sprintf(buffer, "prompt eval time = %10.2f ms / %5d tokens (%8.2f ms per token, %8.2f tokens per second)",
t_prompt_processing, n_prompt_tokens_processed,
t_token, n_tokens_second);
LOG_INFO(buffer, {
LOG_DEBUG(buffer, {
{"slot_id", id},
{"task_id", task_id},
{"t_prompt_processing", t_prompt_processing},
@@ -280,7 +278,7 @@ struct server_slot {
sprintf(buffer, "generation eval time = %10.2f ms / %5d runs (%8.2f ms per token, %8.2f tokens per second)",
t_token_generation, n_decoded,
t_token, n_tokens_second);
LOG_INFO(buffer, {
LOG_DEBUG(buffer, {
{"slot_id", id},
{"task_id", task_id},
{"t_token_generation", t_token_generation},
@@ -290,7 +288,7 @@ struct server_slot {
});
sprintf(buffer, " total time = %10.2f ms", t_prompt_processing + t_token_generation);
LOG_INFO(buffer, {
LOG_DEBUG(buffer, {
{"slot_id", id},
{"task_id", task_id},
{"t_prompt_processing", t_prompt_processing},
@@ -334,6 +332,7 @@ struct server_metrics {
struct llama_server_context
{
llama_model *model = nullptr;
float modelProgress = 0.0;
llama_context *ctx = nullptr;
clip_ctx *clp_ctx = nullptr;
@@ -371,7 +370,7 @@ struct llama_server_context
{
if (clp_ctx)
{
LOG_INFO("freeing clip model", {});
LOG_DEBUG("freeing clip model", {});
clip_free(clp_ctx);
clp_ctx = nullptr;
}
@@ -392,7 +391,7 @@ struct llama_server_context
params = params_;
if (!params.mmproj.empty()) {
multimodal = true;
LOG_INFO("Multi Modal Mode Enabled", {});
LOG_DEBUG("Multi Modal Mode Enabled", {});
clp_ctx = clip_model_load(params.mmproj.c_str(), /*verbosity=*/ 1);
if(clp_ctx == nullptr) {
LOG_ERROR("unable to load clip model", {{"model", params.mmproj}});
@@ -445,7 +444,7 @@ struct llama_server_context
const int32_t n_ctx_slot = n_ctx / params.n_parallel;
LOG_INFO("initializing slots", {{"n_slots", params.n_parallel}});
LOG_DEBUG("initializing slots", {{"n_slots", params.n_parallel}});
for (int i = 0; i < params.n_parallel; i++)
{
server_slot slot;
@@ -454,7 +453,7 @@ struct llama_server_context
slot.n_ctx = n_ctx_slot;
slot.n_predict = params.n_predict;
LOG_INFO("new slot", {
LOG_DEBUG("new slot", {
{"slot_id", slot.id},
{"n_ctx_slot", slot.n_ctx}
});
@@ -468,7 +467,7 @@ struct llama_server_context
//GGML_ASSERT(n_ctx_train % ga_w == 0 && "n_ctx_train must be a multiple of ga_w"); // NOLINT
//GGML_ASSERT(n_ctx >= n_ctx_train * ga_n && "n_ctx must be at least n_ctx_train * ga_n"); // NOLINT
LOG_INFO("slot self-extend", {
LOG_DEBUG("slot self-extend", {
{"slot_id", slot.id},
{"ga_n", ga_n},
{"ga_w", ga_w}
@@ -599,16 +598,6 @@ struct llama_server_context
slot->params.n_predict = slot->n_predict;
}
// infill
if (data.count("input_prefix") != 0)
{
slot->params.input_prefix = data["input_prefix"];
}
else
{
slot->params.input_prefix = "";
}
if (data.count("input_suffix") != 0)
{
slot->params.input_suffix = data["input_suffix"];
@@ -737,7 +726,7 @@ struct llama_server_context
sampler_names.emplace_back(sampler_name);
}
}
slot->sparams.samplers_sequence = sampler_types_from_names(sampler_names, false);
slot->sparams.samplers_sequence = llama_sampling_types_from_names(sampler_names, false);
}
else
{
@@ -827,7 +816,7 @@ struct llama_server_context
all_slots_are_idle = false;
LOG_INFO("slot is processing task", {
LOG_DEBUG("slot is processing task", {
{"slot_id", slot->id},
{"task_id", slot->task_id},
});
@@ -846,7 +835,7 @@ struct llama_server_context
system_tokens.clear();
if (!system_prompt.empty()) {
system_tokens = ::llama_tokenize(ctx, system_prompt, add_bos_token);
system_tokens = ::llama_tokenize(ctx, system_prompt, true);
llama_batch_clear(batch);
@@ -896,15 +885,6 @@ struct llama_server_context
system_need_update = true;
}
void system_prompt_process(const json &sys_props) {
system_prompt = sys_props.value("prompt", "");
name_user = sys_props.value("anti_prompt", "");
name_assistant = sys_props.value("assistant_name", "");
system_prompt_notify();
}
static size_t find_stopping_strings(const std::string &text, const size_t last_token_size,
const stop_type type, server_slot &slot)
{
@@ -1095,7 +1075,7 @@ struct llama_server_context
std::vector<std::string> samplers_sequence;
for (const auto &sampler_type : slot.sparams.samplers_sequence)
{
samplers_sequence.emplace_back(sampler_type_to_name_string(sampler_type));
samplers_sequence.emplace_back(llama_sampling_type_to_str(sampler_type));
}
return json {
@@ -1262,13 +1242,12 @@ struct llama_server_context
queue_results.send(res);
}
void request_completion(int task_id, json data, bool infill, bool embedding, int multitask_id)
void request_completion(int task_id, json data, bool embedding, int multitask_id)
{
task_server task;
task.id = task_id;
task.target_id = 0;
task.data = std::move(data);
task.infill_mode = infill;
task.embedding_mode = embedding;
task.type = TASK_TYPE_COMPLETION;
task.multitask_id = multitask_id;
@@ -1414,8 +1393,8 @@ struct llama_server_context
json subtask_data = multiprompt_task.data;
subtask_data["prompt"] = subtask_data["prompt"][i];
// subtasks inherit everything else (infill mode, embedding mode, etc.)
request_completion(subtask_ids[i], subtask_data, multiprompt_task.infill_mode, multiprompt_task.embedding_mode, multitask_id);
// subtasks inherit everything else (embedding mode, etc.)
request_completion(subtask_ids[i], subtask_data, multiprompt_task.embedding_mode, multitask_id);
}
}
@@ -1433,26 +1412,8 @@ struct llama_server_context
break;
}
if (task.data.contains("system_prompt"))
{
if (!all_slots_are_idle) {
send_error(task, "system prompt can only be updated when all slots are idle");
break;
}
system_prompt_process(task.data["system_prompt"]);
// reset cache_tokens for all slots
for (server_slot &slot : slots)
{
slot.cache_tokens.clear();
slot.n_past = 0;
slot.n_past_se = 0;
}
}
slot->reset();
slot->infill = task.infill_mode;
slot->embedding = task.embedding_mode;
slot->task_id = task.id;
slot->multitask_id = task.multitask_id;
@@ -1504,7 +1465,7 @@ struct llama_server_context
}
slots_data.push_back(slot_data);
}
LOG_INFO("slot data", {
LOG_DEBUG("slot data", {
{"task_id", task.id},
{"n_idle_slots", n_idle_slots},
{"n_processing_slots", n_processing_slots}
@@ -1566,7 +1527,7 @@ struct llama_server_context
bool update_slots() {
if (system_need_update)
{
LOG_INFO("updating system prompt", {});
LOG_DEBUG("updating system prompt", {});
system_prompt_update();
}
@@ -1576,7 +1537,7 @@ struct llama_server_context
{
if (system_prompt.empty() && clean_kv_cache)
{
LOG_INFO("all slots are idle and system prompt is empty, clear the KV cache", {});
LOG_DEBUG("all slots are idle and system prompt is empty, clear the KV cache", {});
kv_cache_clear();
}
return true;
@@ -1599,7 +1560,7 @@ struct llama_server_context
const int n_left = (int) system_tokens.size() + slot.n_past - n_keep;
const int n_discard = n_left / 2;
LOG_INFO("slot context shift", {
LOG_DEBUG("slot context shift", {
{"slot_id", slot.id},
{"task_id", slot.task_id},
{"n_keep", n_keep},
@@ -1638,7 +1599,7 @@ struct llama_server_context
slot.command = NONE;
slot.t_last_used = ggml_time_us();
LOG_INFO("slot released", {
LOG_DEBUG("slot released", {
{"slot_id", slot.id},
{"task_id", slot.task_id},
{"n_ctx", n_ctx},
@@ -1678,8 +1639,7 @@ struct llama_server_context
const bool has_prompt = slot.prompt.is_array() || (slot.prompt.is_string() && !slot.prompt.get<std::string>().empty()) || !slot.images.empty();
// empty prompt passed -> release the slot and send empty response
// note: infill mode allows empty prompt
if (slot.state == IDLE && slot.command == LOAD_PROMPT && !has_prompt && !slot.infill)
if (slot.state == IDLE && slot.command == LOAD_PROMPT && !has_prompt)
{
slot.release();
slot.print_timings();
@@ -1696,33 +1656,7 @@ struct llama_server_context
slot.t_start_process_prompt = ggml_time_us();
slot.t_start_genereration = 0;
if (slot.infill)
{
bool suff_rm_leading_spc = true;
if (params.input_suffix.find_first_of(' ') == 0 && params.input_suffix.size() > 1)
{
params.input_suffix.erase(0, 1);
suff_rm_leading_spc = false;
}
auto prefix_tokens = tokenize(slot.params.input_prefix, false);
auto suffix_tokens = tokenize(slot.params.input_suffix, false);
const int space_token = 29871; // TODO: this should not be hardcoded
if (suff_rm_leading_spc && !suffix_tokens.empty() && suffix_tokens[0] == space_token) {
suffix_tokens.erase(suffix_tokens.begin());
}
prefix_tokens.insert(prefix_tokens.begin(), llama_token_prefix(model));
prefix_tokens.insert(prefix_tokens.begin(), llama_token_bos(model)); // always add BOS
prefix_tokens.insert(prefix_tokens.end(), llama_token_suffix(model));
prefix_tokens.insert(prefix_tokens.end(), suffix_tokens.begin(), suffix_tokens.end());
prefix_tokens.push_back(llama_token_middle(model));
prompt_tokens = prefix_tokens;
}
else
{
prompt_tokens = tokenize(slot.prompt, system_prompt.empty() && add_bos_token); // add BOS if there isn't system prompt
}
prompt_tokens = tokenize(slot.prompt, system_prompt.empty()); // add BOS if there isn't system prompt
slot.n_prompt_tokens = prompt_tokens.size();
@@ -1807,7 +1741,7 @@ struct llama_server_context
slot.ga_i = ga_i;
}
LOG_INFO("slot progression", {
LOG_DEBUG("slot progression", {
{ "slot_id", slot.id },
{ "task_id", slot.task_id },
{ "n_past", slot.n_past },
@@ -1822,7 +1756,7 @@ struct llama_server_context
if (slot.n_past == slot.n_prompt_tokens && slot.n_past > 0)
{
// we have to evaluate at least 1 token to generate logits.
LOG_INFO("we have to evaluate at least 1 token to generate logits", {
LOG_DEBUG("we have to evaluate at least 1 token to generate logits", {
{ "slot_id", slot.id },
{ "task_id", slot.task_id }
});
@@ -1834,7 +1768,7 @@ struct llama_server_context
}
int p0 = (int) system_tokens.size() + slot.n_past;
LOG_INFO("kv cache rm [p0, end)", {
LOG_DEBUG("kv cache rm [p0, end)", {
{ "slot_id", slot.id },
{ "task_id", slot.task_id },
{ "p0", p0 }
@@ -2104,6 +2038,7 @@ static void server_print_usage(const char *argv0, const gpt_params &params,
printf(" --embedding enable embedding vector output (default: %s)\n", params.embedding ? "enabled" : "disabled");
printf(" -np N, --parallel N number of slots for process requests (default: %d)\n", params.n_parallel);
printf(" -cb, --cont-batching enable continuous batching (a.k.a dynamic batching) (default: disabled)\n");
printf(" -fa, --flash-attn enable Flash Attention (default: %s)\n", params.flash_attn ? "enabled" : "disabled");
printf(" -spf FNAME, --system-prompt-file FNAME\n");
printf(" set a file to load a system prompt (initial prompt of all slots), this is useful for chat applications.\n");
printf(" -ctk TYPE, --cache-type-k TYPE\n");
@@ -2128,8 +2063,7 @@ static void server_print_usage(const char *argv0, const gpt_params &params,
printf("\n");
}
static void server_params_parse(int argc, char **argv, server_params &sparams,
gpt_params &params, llama_server_context& llama)
static void server_params_parse(int argc, char **argv, server_params &sparams, gpt_params &params)
{
gpt_params default_params;
server_params default_sparams;
@@ -2491,11 +2425,7 @@ static void server_params_parse(int argc, char **argv, server_params &sparams,
}
else if (arg == "-v" || arg == "--verbose")
{
#if SERVER_VERBOSE != 1
LOG_WARNING("server.cpp is not built with verbose logging.", {});
#else
server_verbose = true;
#endif
}
else if (arg == "--mlock")
{
@@ -2505,7 +2435,8 @@ static void server_params_parse(int argc, char **argv, server_params &sparams,
{
params.use_mmap = false;
}
else if (arg == "--numa") {
else if (arg == "--numa")
{
if (++i >= argc) {
invalid_param = true;
break;
@@ -2525,6 +2456,10 @@ static void server_params_parse(int argc, char **argv, server_params &sparams,
{
params.cont_batching = true;
}
else if (arg == "-fa" || arg == "--flash-attn")
{
params.flash_attn = true;
}
else if (arg == "-np" || arg == "--parallel")
{
if (++i >= argc)
@@ -2533,7 +2468,8 @@ static void server_params_parse(int argc, char **argv, server_params &sparams,
break;
}
params.n_parallel = std::stoi(argv[i]);
} else if (arg == "-n" || arg == "--n-predict")
}
else if (arg == "-n" || arg == "--n-predict")
{
if (++i >= argc)
{
@@ -2541,26 +2477,6 @@ static void server_params_parse(int argc, char **argv, server_params &sparams,
break;
}
params.n_predict = std::stoi(argv[i]);
} else if (arg == "-spf" || arg == "--system-prompt-file")
{
if (++i >= argc)
{
invalid_param = true;
break;
}
std::ifstream file(argv[i]);
if (!file) {
fprintf(stderr, "error: failed to open file '%s'\n", argv[i]);
invalid_param = true;
break;
}
std::string systm_content;
std::copy(
std::istreambuf_iterator<char>(file),
std::istreambuf_iterator<char>(),
std::back_inserter(systm_content)
);
llama.system_prompt_process(json::parse(systm_content));
}
else if (arg == "-ctk" || arg == "--cache-type-k") {
params.cache_type_k = argv[++i];
@@ -2601,7 +2517,7 @@ static void server_params_parse(int argc, char **argv, server_params &sparams,
else if (arg == "--log-disable")
{
log_set_target(stdout);
LOG_INFO("logging to file is disabled.", {});
LOG_DEBUG("logging to file is disabled.", {});
}
else if (arg == "--slots-endpoint-disable")
{
@@ -2727,12 +2643,12 @@ static json format_detokenized_response(std::string content)
static void log_server_request(const httplib::Request &req, const httplib::Response &res)
{
// skip GH copilot requests when using default port
if (req.path == "/v1/health" || req.path == "/v1/completions")
if (req.path == "/health" || req.path == "/v1/health" || req.path == "/v1/completions")
{
return;
}
LOG_INFO("request", {
LOG_DEBUG("request", {
{"remote_addr", req.remote_addr},
{"remote_port", req.remote_port},
{"status", res.status},
@@ -2775,6 +2691,12 @@ inline void signal_handler(int signal) {
shutdown_handler(signal);
}
static bool update_load_progress(float progress, void *data)
{
((llama_server_context*)data)->modelProgress = progress;
return true;
}
#if defined(_WIN32)
char* wchar_to_char(const wchar_t* wstr) {
if (wstr == nullptr) return nullptr;
@@ -2807,7 +2729,7 @@ int main(int argc, char **argv) {
// struct that contains llama context and inference
llama_server_context llama;
server_params_parse(argc, argv, sparams, params, llama);
server_params_parse(argc, argv, sparams, params);
if (params.model_alias == "unknown")
{
@@ -2880,7 +2802,9 @@ int main(int argc, char **argv) {
break;
}
case SERVER_STATE_LOADING_MODEL:
res.set_content(R"({"status": "loading model"})", "application/json");
char buf[128];
snprintf(&buf[0], 128, R"({"status": "loading model", "progress": %0.2f})", llama.modelProgress);
res.set_content(buf, "application/json");
res.status = 503; // HTTP Service Unavailable
break;
case SERVER_STATE_ERROR:
@@ -3054,7 +2978,30 @@ int main(int argc, char **argv) {
log_data["api_key"] = "api_key: " + std::to_string(sparams.api_keys.size()) + " keys loaded";
}
if (sparams.n_threads_http < 1) {
// +2 threads for monitoring endpoints
sparams.n_threads_http = std::max(params.n_parallel + 2, (int32_t) std::thread::hardware_concurrency() - 1);
}
log_data["n_threads_http"] = std::to_string(sparams.n_threads_http);
svr.new_task_queue = [&sparams] { return new httplib::ThreadPool(sparams.n_threads_http); };
LOG_INFO("HTTP server listening", log_data);
// run the HTTP server in a thread - see comment below
std::thread t([&]()
{
if (!svr.listen_after_bind())
{
state.store(SERVER_STATE_ERROR);
return 1;
}
return 0;
});
// load the model
params.progress_callback = update_load_progress;
params.progress_callback_user_data = (void*)&llama;
if (!llama.load_model(params))
{
state.store(SERVER_STATE_ERROR);
@@ -3114,7 +3061,7 @@ int main(int argc, char **argv) {
json data = json::parse(req.body);
const int task_id = llama.queue_tasks.get_new_id();
llama.queue_results.add_waiting_task_id(task_id);
llama.request_completion(task_id, data, false, false, -1);
llama.request_completion(task_id, data, false, -1);
if (!json_value(data, "stream", false)) {
std::string completion_text;
task_result result = llama.queue_results.recv(task_id);
@@ -3236,7 +3183,7 @@ int main(int argc, char **argv) {
// create and queue the task
const int task_id = llama.queue_tasks.get_new_id();
llama.queue_results.add_waiting_task_id(task_id);
llama.request_completion(task_id, { {"prompt", prompt}, { "n_predict", 0}, {"image_data", image_data} }, false, true, -1);
llama.request_completion(task_id, { {"prompt", prompt}, { "n_predict", 0}, {"image_data", image_data} }, true, -1);
// get the result
task_result result = llama.queue_results.recv(task_id);
@@ -3258,26 +3205,6 @@ int main(int argc, char **argv) {
}*/
//);
if (sparams.n_threads_http < 1) {
// +2 threads for monitoring endpoints
sparams.n_threads_http = std::max(params.n_parallel + 2, (int32_t) std::thread::hardware_concurrency() - 1);
}
log_data["n_threads_http"] = std::to_string(sparams.n_threads_http);
svr.new_task_queue = [&sparams] { return new httplib::ThreadPool(sparams.n_threads_http); };
LOG_INFO("HTTP server listening", log_data);
// run the HTTP server in a thread - see comment below
std::thread t([&]()
{
if (!svr.listen_after_bind())
{
state.store(SERVER_STATE_ERROR);
return 1;
}
return 0;
});
llama.queue_tasks.on_new_task(std::bind(
&llama_server_context::process_single_task, &llama, std::placeholders::_1));
llama.queue_tasks.on_finish_multitask(std::bind(

View File

@@ -55,9 +55,10 @@ extern bool server_log_json;
} while (0)
#endif
#define LOG_ERROR( MSG, ...) server_log("ERR", __func__, __LINE__, MSG, __VA_ARGS__)
#define LOG_ERROR( MSG, ...) server_log("ERROR", __func__, __LINE__, MSG, __VA_ARGS__)
#define LOG_WARNING(MSG, ...) server_log("WARN", __func__, __LINE__, MSG, __VA_ARGS__)
#define LOG_INFO( MSG, ...) server_log("INFO", __func__, __LINE__, MSG, __VA_ARGS__)
#define LOG_DEBUG( MSG, ...) server_log("DEBUG", __func__, __LINE__, MSG, __VA_ARGS__)
enum server_state {
SERVER_STATE_LOADING_MODEL, // Server is starting up, model not fully loaded yet
@@ -123,6 +124,10 @@ static inline void server_log(const char *level, const char *function, int line,
{"timestamp", time(nullptr)},
};
if (strncmp("DEBUG", level, strlen(level)) == 0 && !server_verbose) {
return;
}
if (server_log_json) {
log.merge_patch(
{
@@ -137,14 +142,12 @@ static inline void server_log(const char *level, const char *function, int line,
std::cout << log.dump(-1, ' ', false, json::error_handler_t::replace) << "\n" << std::flush;
} else {
char buf[1024];
snprintf(buf, 1024, "%4s [%24s] %s", level, function, message);
if (!extra.empty()) {
log.merge_patch(extra);
}
std::stringstream ss;
ss << buf << " |";
ss << level << " [" << function << "] " << message << " |";
for (const auto& el : log.items())
{
const std::string value = el.value().dump(-1, ' ', false, json::error_handler_t::replace);

View File

@@ -27,8 +27,16 @@ const (
fileTypeIQ2_XXS
fileTypeIQ2_XS
fileTypeQ2_K_S
fileTypeQ3_K_XS
fileTypeIQ3_XS
fileTypeIQ3_XXS
fileTypeIQ1_S
fileTypeIQ4_NL
fileTypeIQ3_S
fileTypeIQ2_S
fileTypeIQ4_XS
fileTypeIQ2_M
fileTypeIQ1_M
fileTypeBF16
fileTypeUnknown
)
@@ -75,10 +83,26 @@ func ParseFileType(s string) (fileType, error) {
return fileTypeIQ2_XS, nil
case "Q2_K_S":
return fileTypeQ2_K_S, nil
case "Q3_K_XS":
return fileTypeQ3_K_XS, nil
case "IQ3_XS":
return fileTypeIQ3_XS, nil
case "IQ3_XXS":
return fileTypeIQ3_XXS, nil
case "IQ1_S":
return fileTypeIQ1_S, nil
case "IQ4_NL":
return fileTypeIQ4_NL, nil
case "IQ3_S":
return fileTypeIQ3_S, nil
case "IQ2_S":
return fileTypeIQ2_S, nil
case "IQ4_XS":
return fileTypeIQ4_XS, nil
case "IQ2_M":
return fileTypeIQ2_M, nil
case "IQ1_M":
return fileTypeIQ1_M, nil
case "BF16":
return fileTypeBF16, nil
default:
return fileTypeUnknown, fmt.Errorf("unknown fileType: %s", s)
}
@@ -126,10 +150,26 @@ func (t fileType) String() string {
return "IQ2_XS"
case fileTypeQ2_K_S:
return "Q2_K_S"
case fileTypeQ3_K_XS:
return "Q3_K_XS"
case fileTypeIQ3_XS:
return "IQ3_XS"
case fileTypeIQ3_XXS:
return "IQ3_XXS"
case fileTypeIQ1_S:
return "IQ1_S"
case fileTypeIQ4_NL:
return "IQ4_NL"
case fileTypeIQ3_S:
return "IQ3_S"
case fileTypeIQ2_S:
return "IQ2_S"
case fileTypeIQ4_XS:
return "IQ4_XS"
case fileTypeIQ2_M:
return "IQ2_M"
case fileTypeIQ1_M:
return "IQ1_M"
case fileTypeBF16:
return "BF16"
default:
return "unknown"
}

View File

@@ -32,7 +32,7 @@ case "${GOARCH}" in
echo "Building static library"
build
if [ -z "$OLLAMA_SKIP_CPU_GENERATE" ]; then
#
# CPU first for the default library, set up as lowest common denominator for maximum compatibility (including Rosetta)
#
@@ -68,6 +68,7 @@ case "${GOARCH}" in
build
sign ${BUILD_DIR}/bin/ollama_llama_server
compress
fi
;;
"arm64")
@@ -79,6 +80,7 @@ case "${GOARCH}" in
echo "Building static library"
build
if [ -z "$OLLAMA_SKIP_METAL_GENERATE" ]; then
init_vars
CMAKE_DEFS="${COMMON_DARWIN_DEFS} -DLLAMA_ACCELERATE=on -DCMAKE_SYSTEM_PROCESSOR=${ARCH} -DCMAKE_OSX_ARCHITECTURES=${ARCH} -DLLAMA_METAL=on ${CMAKE_DEFS}"
BUILD_DIR="../build/darwin/${ARCH}/metal"
@@ -86,6 +88,7 @@ case "${GOARCH}" in
build
sign ${BUILD_DIR}/bin/ollama_llama_server
compress
fi
;;
*)
echo "GOARCH must be set"

View File

@@ -165,7 +165,7 @@ if [ -z "${CUDART_LIB_DIR}" ]; then
CUDART_LIB_DIR="${CUDA_LIB_DIR}"
fi
if [ -d "${CUDA_LIB_DIR}" ]; then
if [ -z "${OLLAMA_SKIP_CUDA_GENERATE}" -a -d "${CUDA_LIB_DIR}" ]; then
echo "CUDA libraries detected - building dynamic CUDA library"
init_vars
CUDA_MAJOR=$(ls "${CUDA_LIB_DIR}"/libcudart.so.* | head -1 | cut -f3 -d. || true)
@@ -215,6 +215,36 @@ if [ -d "${CUDA_LIB_DIR}" ]; then
fi
if [ -z "${ONEAPI_ROOT}" ]; then
# Try the default location in case it exists
ONEAPI_ROOT=/opt/intel/oneapi
fi
if [ -z "${OLLAMA_SKIP_ONEAPI_GENERATE}" -a -d "${ONEAPI_ROOT}" ]; then
echo "OneAPI libraries detected - building dynamic OneAPI library"
init_vars
source ${ONEAPI_ROOT}/setvars.sh --force # set up environment variables for oneAPI
CC=icx
CMAKE_DEFS="${COMMON_CMAKE_DEFS} ${CMAKE_DEFS} -DCMAKE_C_COMPILER=icx -DCMAKE_CXX_COMPILER=icpx -DLLAMA_SYCL=ON -DLLAMA_SYCL_F16=OFF"
BUILD_DIR="../build/linux/${ARCH}/oneapi"
EXTRA_LIBS="-fsycl -Wl,-rpath,${ONEAPI_ROOT}/compiler/latest/lib,-rpath,${ONEAPI_ROOT}/mkl/latest/lib,-rpath,${ONEAPI_ROOT}/tbb/latest/lib,-rpath,${ONEAPI_ROOT}/compiler/latest/opt/oclfpga/linux64/lib -lOpenCL -lmkl_core -lmkl_sycl_blas -lmkl_intel_ilp64 -lmkl_tbb_thread -ltbb"
DEBUG_FLAGS="" # icx compiles with -O0 if we pass -g, so we must remove it
build
# copy oneAPI dependencies
for dep in $(ldd "${BUILD_DIR}/bin/ollama_llama_server" | grep "=>" | cut -f2 -d= | cut -f2 -d' ' | grep -e sycl -e mkl -e tbb); do
cp "${dep}" "${BUILD_DIR}/bin/"
done
cp "${ONEAPI_ROOT}/compiler/latest/lib/libOpenCL.so" "${BUILD_DIR}/bin/"
cp "${ONEAPI_ROOT}/compiler/latest/lib/libimf.so" "${BUILD_DIR}/bin/"
cp "${ONEAPI_ROOT}/compiler/latest/lib/libintlc.so.5" "${BUILD_DIR}/bin/"
cp "${ONEAPI_ROOT}/compiler/latest/lib/libirng.so" "${BUILD_DIR}/bin/"
cp "${ONEAPI_ROOT}/compiler/latest/lib/libpi_level_zero.so" "${BUILD_DIR}/bin/"
cp "${ONEAPI_ROOT}/compiler/latest/lib/libsvml.so" "${BUILD_DIR}/bin/"
cp "${ONEAPI_ROOT}/compiler/latest/lib/libur_loader.so.0" "${BUILD_DIR}/bin/"
compress
fi
if [ -z "${ROCM_PATH}" ]; then
# Try the default location in case it exists
ROCM_PATH=/opt/rocm
@@ -227,7 +257,7 @@ if [ -z "${CLBlast_DIR}" ]; then
fi
fi
if [ -d "${ROCM_PATH}" ]; then
if [ -z "${OLLAMA_SKIP_ROCM_GENERATE}" -a -d "${ROCM_PATH}" ]; then
echo "ROCm libraries detected - building dynamic ROCm library"
if [ -f ${ROCM_PATH}/lib/librocblas.so.*.*.????? ]; then
ROCM_VARIANT=_v$(ls ${ROCM_PATH}/lib/librocblas.so.*.*.????? | cut -f5 -d. || true)

View File

@@ -12,6 +12,8 @@ function amdGPUs {
"gfx900"
"gfx902"
"gfx904"
"gfx90c"
"gfx906"
"gfx906:xnack-"
"gfx908:xnack-"
"gfx90a:xnack+"
@@ -25,6 +27,7 @@ function amdGPUs {
"gfx1030"
"gfx1031"
"gfx1032"
"gfx1033"
"gfx1034"
"gfx1035"
"gfx1036"
@@ -299,6 +302,49 @@ function build_cuda() {
}
}
function build_oneapi() {
if ((-not "${env:OLLAMA_SKIP_ONEAPI_GENERATE}") -and ("${env:ONEAPI_ROOT}")) {
# Get oneAPI version
$script:ONEAPI_VERSION = icpx --version
$script:ONEAPI_VERSION = [regex]::Match($script:ONEAPI_VERSION, '(?<=oneAPI DPC\+\+/C\+\+ Compiler )(?<version>\d+\.\d+\.\d+)').Value
if ($null -ne $script:ONEAPI_VERSION) {
$script:ONEAPI_VARIANT = "_v" + $script:ONEAPI_VERSION
}
init_vars
$script:buildDir = "../build/windows/${script:ARCH}/oneapi$script:ONEAPI_VARIANT"
$script:distDir ="$script:DIST_BASE\oneapi$script:ONEAPI_VARIANT"
$script:cmakeDefs += @(
"-G", "MinGW Makefiles",
"-DLLAMA_SYCL=ON",
"-DCMAKE_C_COMPILER=icx",
"-DCMAKE_CXX_COMPILER=icx",
"-DCMAKE_BUILD_TYPE=Release"
)
Write-Host "Building oneAPI"
build
# Ninja doesn't prefix with config name
if ($null -ne $script:DUMPBIN) {
& "$script:DUMPBIN" /dependents "${script:buildDir}/bin/ollama_llama_server.exe" | Select-String ".dll"
}
sign
install
cp "${env:ONEAPI_ROOT}\compiler\latest\bin\libirngmd.dll" "${script:distDir}"
cp "${env:ONEAPI_ROOT}\compiler\latest\bin\libmmd.dll" "${script:distDir}"
cp "${env:ONEAPI_ROOT}\compiler\latest\bin\pi_level_zero.dll" "${script:distDir}"
cp "${env:ONEAPI_ROOT}\compiler\latest\bin\pi_unified_runtime.dll" "${script:distDir}"
cp "${env:ONEAPI_ROOT}\compiler\latest\bin\pi_win_proxy_loader.dll" "${script:distDir}"
cp "${env:ONEAPI_ROOT}\compiler\latest\bin\svml_dispmd.dll" "${script:distDir}"
cp "${env:ONEAPI_ROOT}\compiler\latest\bin\sycl7.dll" "${script:distDir}"
cp "${env:ONEAPI_ROOT}\mkl\latest\bin\mkl_core.2.dll" "${script:distDir}"
cp "${env:ONEAPI_ROOT}\mkl\latest\bin\mkl_sycl_blas.4.dll" "${script:distDir}"
cp "${env:ONEAPI_ROOT}\mkl\latest\bin\mkl_tbb_thread.2.dll" "${script:distDir}"
} else {
Write-Host "Skipping oneAPI generation step"
}
}
function build_rocm() {
if ((-not "${env:OLLAMA_SKIP_ROCM_GENERATE}") -and ("${env:HIP_PATH}")) {
$script:ROCM_VERSION=(get-item $env:HIP_PATH).Basename
@@ -366,6 +412,7 @@ if ($($args.count) -eq 0) {
build_cpu_avx
build_cpu_avx2
build_cuda
build_oneapi
build_rocm
}

View File

@@ -119,7 +119,7 @@ func (llm *ggla) decode(rs io.ReadSeeker) error {
t.Offset = uint64(offset)
if _, err := rs.Seek(int64(t.size()), io.SeekCurrent); err != nil {
if _, err := rs.Seek(int64(t.Size()), io.SeekCurrent); err != nil {
return err
}

View File

@@ -81,6 +81,11 @@ func (kv KV) ContextLength() uint64 {
return kv.u64(fmt.Sprintf("%s.context_length", kv.Architecture()))
}
func (kv KV) ChatTemplate() string {
s, _ := kv["tokenizer.chat_template"].(string)
return s
}
type Tensors []*Tensor
func (ts Tensors) Layers() map[string]Layer {
@@ -106,7 +111,7 @@ type Layer map[string]*Tensor
func (l Layer) size() (size uint64) {
for _, t := range l {
size += t.size()
size += t.Size()
}
return size
@@ -124,12 +129,12 @@ type Tensor struct {
}
func (t Tensor) blockSize() uint64 {
switch {
case t.Kind < 2:
switch t.Kind {
case 0, 1, 24, 25, 26, 27, 28, 30: // F32, F16, I8, I16, I32, I64, F64, BF16
return 1
case t.Kind < 10:
case 2, 3, 4, 5, 6, 7, 8, 9, 20: // Q4_0, Q4_1, Q5_0, Q5_1, Q8_0, Q8_1, IQ4_NL
return 32
default:
default: // All others
return 256
}
}
@@ -171,7 +176,29 @@ func (t Tensor) typeSize() uint64 {
case 17: // IQ2_XS
return 2 + 2*blockSize/8 + blockSize/32
case 18: // IQ3_XXS
return 2 + 3*blockSize/8
return 2 + blockSize/4 + blockSize/8
case 19: // IQ1_S
return 2 + blockSize/8 + blockSize/16
case 20: // IQ4_NL
return 2 + blockSize/2
case 21: // IQ3_S
return 2 + blockSize/4 + blockSize/8 + blockSize/32 + 4
case 22: // IQ2_S
return 2 + blockSize/4 + blockSize/16
case 23: // IQ4_XS
return 2 + 2 + blockSize/2 + blockSize/64
case 24: // I8
return 1
case 25: // I16
return 2
case 26: // I32
return 4
case 27: // I64
return 8
case 28: // F64
return 8
case 29: // IQ1_M
return blockSize/8 + blockSize/16 + blockSize/32
default:
return 0
}
@@ -185,7 +212,7 @@ func (t Tensor) parameters() uint64 {
return count
}
func (t Tensor) size() uint64 {
func (t Tensor) Size() uint64 {
return t.parameters() * t.typeSize() / t.blockSize()
}
@@ -288,7 +315,7 @@ func (llm GGML) GraphSize(context, batch uint64) (partialOffload, fullOffload ui
// mixtral 8x22b
ff := uint64(llm.KV()["llama.feed_forward_length"].(uint32))
partialOffload = max(
3*ffnGateExpsWeight.size()+4*batch*(2*ff+headsKV+embedding+context+embedding/heads*headsKV),
3*ffnGateExpsWeight.Size()+4*batch*(2*ff+headsKV+embedding+context+embedding/heads*headsKV),
4*(context*batch*heads+context*embedding/heads*headsKV+batch*1024+embedding/heads*headsKV*batch),
)
} else if ffnGateWeight, ok := layers["blk.0"]["ffn_gate.0.weight"]; ok {
@@ -329,7 +356,10 @@ func (llm GGML) GraphSize(context, batch uint64) (partialOffload, fullOffload ui
4*batch*(1+4*embedding+context+context*heads),
)
partialOffload = 4*batch*(2*embedding+vocab) + embedding*vocab*105/128
partialOffload = max(
4*batch*(2*embedding+vocab)+embedding*vocab*105/128,
4*batch*(2+3*embedding+context+context*heads),
)
case "stablelm":
fullOffload = 4 * batch * (context*(1+heads) + 3*embedding + 2)
partialOffload = max(

View File

@@ -62,16 +62,6 @@ func (c *containerGGUF) Decode(rs io.ReadSeeker) (model, error) {
return model, nil
}
const (
_ uint32 = iota
GGUFTokenNormal
GGUFTokenUnknown
GGUFTokenControl
GGUFTokenUserDefined
GGUFTokenUnused
GGUFTokenByte
)
const (
ggufTypeUint8 uint32 = iota
ggufTypeInt8
@@ -251,11 +241,11 @@ func (llm *gguf) Decode(rs io.ReadSeeker) error {
}
for _, tensor := range llm.tensors {
if _, err := rs.Seek(int64(tensor.size()), io.SeekCurrent); err != nil {
if _, err := rs.Seek(int64(tensor.Size()), io.SeekCurrent); err != nil {
return err
}
padding := llm.padding(int64(tensor.size()), int64(alignment))
padding := llm.padding(int64(tensor.Size()), int64(alignment))
if _, err := rs.Seek(padding, io.SeekCurrent); err != nil {
return err
}
@@ -480,9 +470,11 @@ var ggufKVOrder = map[string][]string{
"gemma.attention.key_length",
"gemma.attention.value_length",
"general.file_type",
"tokenizer.ggml.pre",
"tokenizer.ggml.model",
"tokenizer.ggml.tokens",
"tokenizer.ggml.scores",
"tokenizer.ggml.merges",
"tokenizer.ggml.token_type",
"tokenizer.ggml.bos_token_id",
"tokenizer.ggml.eos_token_id",
@@ -600,8 +592,8 @@ func (llm *gguf) Encode(ws io.WriteSeeker, kv KV, tensors []Tensor) error {
return err
}
dims := 0
for cnt := 0; cnt < len(tensor.Shape); cnt++ {
var dims int
for cnt := range len(tensor.Shape) {
if tensor.Shape[cnt] > 0 {
dims++
}
@@ -611,8 +603,8 @@ func (llm *gguf) Encode(ws io.WriteSeeker, kv KV, tensors []Tensor) error {
return err
}
for i := 0; i < dims; i++ {
if err := binary.Write(ws, llm.ByteOrder, uint64(tensor.Shape[dims-1-i])); err != nil {
for i := range dims {
if err := binary.Write(ws, llm.ByteOrder, tensor.Shape[dims-1-i]); err != nil {
return err
}
}
@@ -626,22 +618,8 @@ func (llm *gguf) Encode(ws io.WriteSeeker, kv KV, tensors []Tensor) error {
}
}
offset, err := ws.Seek(0, io.SeekCurrent)
if err != nil {
return err
}
var alignment int64 = 32
padding := llm.padding(offset, alignment)
if err := binary.Write(ws, llm.ByteOrder, bytes.Repeat([]byte{0}, int(padding))); err != nil {
return err
}
for _, tensor := range tensors {
if _, err := tensor.WriteTo(ws); err != nil {
return err
}
offset, err := ws.Seek(0, io.SeekCurrent)
if err != nil {
return err
@@ -651,6 +629,10 @@ func (llm *gguf) Encode(ws io.WriteSeeker, kv KV, tensors []Tensor) error {
if err := binary.Write(ws, llm.ByteOrder, bytes.Repeat([]byte{0}, int(padding))); err != nil {
return err
}
if _, err := tensor.WriteTo(ws); err != nil {
return err
}
}
return nil

View File

@@ -5,24 +5,15 @@ import (
"log/slog"
"github.com/ollama/ollama/api"
"github.com/ollama/ollama/envconfig"
"github.com/ollama/ollama/format"
"github.com/ollama/ollama/gpu"
"github.com/ollama/ollama/server/envconfig"
)
// This algorithm looks for a complete fit to determine if we need to unload other models
func PredictServerFit(allGpus gpu.GpuInfoList, ggml *GGML, adapters, projectors []string, opts api.Options) (bool, uint64) {
var estimatedVRAM uint64
if opts.NumCtx > int(ggml.KV().ContextLength()) {
slog.Warn("requested context length is greater than model max context length", "requested", opts.NumCtx, "model", ggml.KV().ContextLength())
opts.NumCtx = int(ggml.KV().ContextLength())
}
if opts.NumCtx < 4 {
opts.NumCtx = 4
}
// Split up the GPUs by type and try them
var estimatedVRAM uint64
for _, gpus := range allGpus.ByLibrary() {
var layerCount int
layerCount, estimatedVRAM, _ = EstimateGPULayers(gpus, ggml, projectors, opts)
@@ -62,6 +53,12 @@ func EstimateGPULayers(gpus []gpu.GpuInfo, ggml *GGML, projectors []string, opts
opts.NumCtx = max(opts.NumCtx, 2048)
}
layers := ggml.Tensors().Layers()
// add one layer worth of memory as a buffer
if blk0, ok := layers["blk.0"]; ok {
memoryMinimum += blk0.size()
}
// fp16 k,v = (1 (k) + 1 (v)) * sizeof(float16) * n_ctx * n_layer * n_embd / n_head * n_head_kv
var kv uint64 = 2 * 2 * uint64(opts.NumCtx) * ggml.KV().BlockCount() * ggml.KV().EmbeddingLength() / ggml.KV().HeadCount() * ggml.KV().HeadCountKV()
@@ -82,13 +79,11 @@ func EstimateGPULayers(gpus []gpu.GpuInfo, ggml *GGML, projectors []string, opts
graphPartialOffload = graphFullOffload
}
layers := ggml.Tensors().Layers()
// memoryRequiredTotal represents the memory required for full GPU offloading (all layers)
memoryRequiredTotal := memoryMinimum + graphFullOffload + layers["blk.0"].size()
memoryRequiredTotal := memoryMinimum + graphFullOffload
// memoryRequiredPartial represents the memory required for partial GPU offloading (n > 0, n < layers)
memoryRequiredPartial := memoryMinimum + graphPartialOffload + layers["blk.0"].size()
memoryRequiredPartial := memoryMinimum + graphPartialOffload
var memoryLayerOutput uint64
if layer, ok := layers["output_norm"]; ok {
@@ -108,25 +103,27 @@ func EstimateGPULayers(gpus []gpu.GpuInfo, ggml *GGML, projectors []string, opts
}
var layerCount int
for i := 0; i < int(ggml.KV().BlockCount()); i++ {
memoryLayer := layers[fmt.Sprintf("blk.%d", i)].size()
for i := range int(ggml.KV().BlockCount()) {
if blk, ok := layers[fmt.Sprintf("blk.%d", i)]; ok {
memoryLayer := blk.size()
// KV is proportional to the number of layers
memoryLayer += kv / ggml.KV().BlockCount()
memoryRequiredTotal += memoryLayer
if memoryAvailable > memoryRequiredPartial+memoryLayer {
if (opts.NumGPU >= 0 && layerCount+1 <= opts.NumGPU) || (opts.NumGPU < 0 && memoryAvailable > memoryRequiredPartial+memoryLayer) {
memoryRequiredPartial += memoryLayer
layerCount++
}
}
}
if gpus[0].Library != "metal" || !opts.UseMMap {
// memory was not preallocated for output tensors
memoryRequiredTotal += memoryLayerOutput
}
if memoryAvailable > memoryRequiredTotal {
if (opts.NumGPU >= 0 && layerCount+1 <= opts.NumGPU) || (opts.NumGPU < 0 && memoryAvailable > memoryRequiredTotal) {
layerCount = int(ggml.KV().BlockCount()) + 1
memoryRequiredPartial = memoryRequiredTotal
}
@@ -137,10 +134,10 @@ func EstimateGPULayers(gpus []gpu.GpuInfo, ggml *GGML, projectors []string, opts
"offload to gpu",
slog.Group(
"layers",
// actual number of layers offloaded
"real", opts.NumGPU,
// requested number of layers to offload
"requested", opts.NumGPU,
// estimated number of layers that can be offloaded
"estimate", layerCount,
"real", layerCount,
),
slog.Group(
"memory",

View File

@@ -0,0 +1,31 @@
diff --git a/common/common.cpp b/common/common.cpp
index ba1ecf0e..cead57cc 100644
--- a/common/common.cpp
+++ b/common/common.cpp
@@ -1836,6 +1836,8 @@ struct llama_model_params llama_model_params_from_gpt_params(const gpt_params &
mparams.use_mmap = params.use_mmap;
mparams.use_mlock = params.use_mlock;
mparams.check_tensors = params.check_tensors;
+ mparams.progress_callback = params.progress_callback;
+ mparams.progress_callback_user_data = params.progress_callback_user_data;
if (params.kv_overrides.empty()) {
mparams.kv_overrides = NULL;
} else {
diff --git a/common/common.h b/common/common.h
index d80344f2..71e84834 100644
--- a/common/common.h
+++ b/common/common.h
@@ -174,6 +174,13 @@ struct gpt_params {
// multimodal models (see examples/llava)
std::string mmproj = ""; // path to multimodal projector
std::vector<std::string> image; // path to image file(s)
+
+ // Called with a progress value between 0.0 and 1.0. Pass NULL to disable.
+ // If the provided progress_callback returns true, model loading continues.
+ // If it returns false, model loading is immediately aborted.
+ llama_progress_callback progress_callback = NULL;
+ // context pointer passed to the progress callback
+ void * progress_callback_user_data;
};
void gpt_params_handle_model_default(gpt_params & params);

View File

@@ -1,8 +1,17 @@
From 544a2d2e646d39e878d87dfbb3398a356bc560ab Mon Sep 17 00:00:00 2001
From: Michael Yang <mxyng@pm.me>
Date: Thu, 23 May 2024 11:18:45 -0700
Subject: [PATCH] throw exception on load errors
---
llama.cpp | 25 ++++++++++++++++---------
1 file changed, 16 insertions(+), 9 deletions(-)
diff --git a/llama.cpp b/llama.cpp
index 4225f955..7b762f86 100644
index 15c66077..8ba90b6a 100644
--- a/llama.cpp
+++ b/llama.cpp
@@ -4756,7 +4756,7 @@ static int llama_model_load(const std::string & fname, llama_model & model, llam
@@ -6346,7 +6346,7 @@ static int llama_model_load(const std::string & fname, llama_model & model, llam
}
} catch (const std::exception & err) {
LLAMA_LOG_ERROR("%s: error loading model: %s\n", __func__, err.what());
@@ -11,10 +20,10 @@ index 4225f955..7b762f86 100644
}
return 0;
@@ -12102,16 +12102,22 @@ struct llama_model * llama_load_model_from_file(
};
@@ -15600,16 +15600,23 @@ struct llama_model * llama_load_model_from_file(
}
model->rpc_servers.push_back(servers);
}
- int status = llama_model_load(path_model, *model, params);
- GGML_ASSERT(status <= 0);
- if (status < 0) {
@@ -22,6 +31,7 @@ index 4225f955..7b762f86 100644
- LLAMA_LOG_ERROR("%s: failed to load model\n", __func__);
- } else if (status == -2) {
- LLAMA_LOG_INFO("%s: cancelled model load\n", __func__);
+
+ try {
+ int status = llama_model_load(path_model, *model, params);
+ GGML_ASSERT(status <= 0);
@@ -42,3 +52,6 @@ index 4225f955..7b762f86 100644
}
return model;
--
2.45.1

View File

@@ -1,24 +0,0 @@
diff --git a/examples/llava/clip.cpp b/examples/llava/clip.cpp
index e3c9bcd4..b43f892d 100644
--- a/examples/llava/clip.cpp
+++ b/examples/llava/clip.cpp
@@ -573,14 +573,16 @@ static ggml_cgraph * clip_image_build_graph(clip_ctx * ctx, const clip_image_f32
struct ggml_tensor * embeddings = inp;
if (ctx->has_class_embedding) {
embeddings = ggml_new_tensor_3d(ctx0, GGML_TYPE_F32, hidden_size, num_positions, batch_size);
+ }
+ ggml_set_name(embeddings, "embeddings");
+ ggml_set_input(embeddings);
+
+ if (ctx->has_class_embedding) {
embeddings = ggml_acc(ctx0, embeddings, model.class_embedding,
embeddings->nb[1], embeddings->nb[2], embeddings->nb[3], 0);
embeddings = ggml_acc(ctx0, embeddings, inp,
embeddings->nb[1], embeddings->nb[2], embeddings->nb[3], model.class_embedding->nb[1]);
}
- ggml_set_name(embeddings, "embeddings");
- ggml_set_input(embeddings);
-
struct ggml_tensor * positions = ggml_new_tensor_1d(ctx0, GGML_TYPE_I32, num_positions);
ggml_set_name(positions, "positions");

View File

@@ -0,0 +1,32 @@
diff --git a/llama.cpp b/llama.cpp
index 40d2ec2c..74f3ee9c 100644
--- a/llama.cpp
+++ b/llama.cpp
@@ -4642,16 +4642,7 @@ static void llm_load_vocab(
// for now, only BPE models have pre-tokenizers
if (vocab.type == LLAMA_VOCAB_TYPE_BPE) {
- if (tokenizer_pre.empty()) {
- LLAMA_LOG_WARN("%s: missing pre-tokenizer type, using: 'default'\n", __func__);
- LLAMA_LOG_WARN("%s: \n", __func__);
- LLAMA_LOG_WARN("%s: ************************************ \n", __func__);
- LLAMA_LOG_WARN("%s: GENERATION QUALITY WILL BE DEGRADED! \n", __func__);
- LLAMA_LOG_WARN("%s: CONSIDER REGENERATING THE MODEL \n", __func__);
- LLAMA_LOG_WARN("%s: ************************************ \n", __func__);
- LLAMA_LOG_WARN("%s: \n", __func__);
- vocab.type_pre = LLAMA_VOCAB_PRE_TYPE_DEFAULT;
- } else if (
+ if (
tokenizer_pre == "default") {
vocab.type_pre = LLAMA_VOCAB_PRE_TYPE_DEFAULT;
} else if (
@@ -4703,7 +4694,8 @@ static void llm_load_vocab(
tokenizer_pre == "smaug-bpe") {
vocab.type_pre = LLAMA_VOCAB_PRE_TYPE_SMAUG;
} else {
- throw std::runtime_error(format("unknown pre-tokenizer type: '%s'", tokenizer_pre.c_str()));
+ LLAMA_LOG_WARN("%s: missing or unrecognized pre-tokenizer type, using: 'default'\n", __func__);
+ vocab.type_pre = LLAMA_VOCAB_PRE_TYPE_DEFAULT;
}
} else {
vocab.type_pre = LLAMA_VOCAB_PRE_TYPE_DEFAULT;

13
llm/patches/06-qwen2.diff Normal file
View File

@@ -0,0 +1,13 @@
diff --git a/llama.cpp b/llama.cpp
index 40d2ec2c..f34eb79a 100644
--- a/llama.cpp
+++ b/llama.cpp
@@ -6943,7 +6943,7 @@ static struct ggml_tensor * llm_build_kqv(
struct ggml_tensor * kq = ggml_mul_mat(ctx, k, q);
cb(kq, "kq", il);
- if (model.arch == LLM_ARCH_PHI2 || model.arch == LLM_ARCH_PHI3 || model.arch == LLM_ARCH_GPTNEOX) {
+ if (model.arch == LLM_ARCH_PHI2 || model.arch == LLM_ARCH_PHI3 || model.arch == LLM_ARCH_GPTNEOX || model.arch == LLM_ARCH_QWEN2) {
// for this arch, we need to perform the KQ multiplication with F32 precision, otherwise we get NaNs
// ref: https://github.com/ggerganov/llama.cpp/pull/4490#issuecomment-1859055847
ggml_mul_mat_set_prec(kq, GGML_PREC_F32);

View File

@@ -10,9 +10,9 @@ import (
"os"
"path/filepath"
"runtime"
"slices"
"strings"
"golang.org/x/exp/slices"
"golang.org/x/sync/errgroup"
"github.com/ollama/ollama/gpu"

View File

@@ -24,9 +24,9 @@ import (
"golang.org/x/sync/semaphore"
"github.com/ollama/ollama/api"
"github.com/ollama/ollama/envconfig"
"github.com/ollama/ollama/format"
"github.com/ollama/ollama/gpu"
"github.com/ollama/ollama/server/envconfig"
)
type LlamaServer interface {
@@ -38,6 +38,7 @@ type LlamaServer interface {
Detokenize(ctx context.Context, tokens []int) (string, error)
Close() error
EstimatedVRAM() uint64
EstimatedTotal() uint64
}
// llmServer is an instance of the llama.cpp server
@@ -53,6 +54,8 @@ type llmServer struct {
estimatedTotal uint64 // Total size of model
totalLayers uint64
gpuCount int
loadDuration time.Duration // Record how long it took the model to load
loadProgress float32
sem *semaphore.Weighted
}
@@ -76,25 +79,17 @@ func LoadModel(model string) (*GGML, error) {
// The gpu list must be a single family.
func NewLlamaServer(gpus gpu.GpuInfoList, model string, ggml *GGML, adapters, projectors []string, opts api.Options) (LlamaServer, error) {
var err error
if opts.NumCtx > int(ggml.KV().ContextLength()) {
slog.Warn("requested context length is greater than the model's training context window size", "requested", opts.NumCtx, "training size", ggml.KV().ContextLength())
}
if opts.NumCtx < 4 {
opts.NumCtx = 4
}
cpuRunner := ""
var cpuRunner string
var estimatedVRAM uint64
var estimatedTotal uint64
var systemMemory uint64
gpuCount := len(gpus)
if (len(gpus) == 1 && gpus[0].Library == "cpu") || opts.NumGPU == 0 {
// TODO evaluate system memory to see if we should block the load, or force an unload of another CPU runner
cpuRunner = serverForCpu()
gpuCount = 0
_, _, estimatedTotal = EstimateGPULayers(gpus, ggml, projectors, opts)
} else {
if gpus[0].Library == "metal" {
memInfo, err := gpu.GetCPUMem()
@@ -108,17 +103,22 @@ func NewLlamaServer(gpus gpu.GpuInfoList, model string, ggml *GGML, adapters, pr
var layers int
layers, estimatedVRAM, estimatedTotal = EstimateGPULayers(gpus, ggml, projectors, opts)
if gpus[0].Library == "metal" && estimatedVRAM > systemMemory {
switch {
case gpus[0].Library == "metal" && estimatedVRAM > systemMemory:
// disable partial offloading when model is greater than total system memory as this
// can lead to locking up the system
opts.NumGPU = 0
} else if opts.NumGPU < 0 && layers > 0 && gpus[0].Library != "cpu" {
case gpus[0].Library != "metal" && layers == 0:
// Don't bother loading into the GPU if no layers can fit
cpuRunner = serverForCpu()
gpuCount = 0
case opts.NumGPU < 0 && layers > 0 && gpus[0].Library != "cpu":
opts.NumGPU = layers
}
}
// Loop through potential servers
finalErr := fmt.Errorf("no suitable llama servers found")
finalErr := errors.New("no suitable llama servers found")
if len(adapters) > 1 {
return nil, errors.New("ollama supports only one lora adapter, but multiple were provided")
@@ -156,11 +156,8 @@ func NewLlamaServer(gpus gpu.GpuInfoList, model string, ggml *GGML, adapters, pr
"--batch-size", fmt.Sprintf("%d", opts.NumBatch),
"--embedding",
}
if envconfig.Debug {
params = append(params, "--log-format", "json")
} else {
params = append(params, "--log-disable")
}
if opts.NumGPU >= 0 {
params = append(params, "--n-gpu-layers", fmt.Sprintf("%d", opts.NumGPU))
@@ -192,14 +189,34 @@ func NewLlamaServer(gpus gpu.GpuInfoList, model string, ggml *GGML, adapters, pr
params = append(params, "--memory-f32")
}
if opts.UseMLock {
params = append(params, "--mlock")
flashAttnEnabled := envconfig.FlashAttention
for _, g := range gpus {
// only cuda (compute capability 7+) and metal support flash attention
if g.Library != "metal" && (g.Library != "cuda" || g.DriverMajor < 7) {
flashAttnEnabled = false
}
// mmap has issues with partial offloading on metal
if g.Library == "metal" &&
uint64(opts.NumGPU) > 0 &&
uint64(opts.NumGPU) < ggml.KV().BlockCount()+1 {
opts.UseMMap = false
}
}
if flashAttnEnabled {
params = append(params, "--flash-attn")
}
if !opts.UseMMap {
params = append(params, "--no-mmap")
}
if opts.UseMLock {
params = append(params, "--mlock")
}
if opts.UseNUMA {
params = append(params, "--numa")
}
@@ -215,12 +232,12 @@ func NewLlamaServer(gpus gpu.GpuInfoList, model string, ggml *GGML, adapters, pr
params = append(params, "--parallel", fmt.Sprintf("%d", numParallel))
for i := 0; i < len(servers); i++ {
for i := range len(servers) {
dir := availableServers[servers[i]]
if dir == "" {
// Shouldn't happen
finalErr = fmt.Errorf("[%d] server %s not listed in available servers %v", i, servers[i], availableServers)
slog.Error("sever list inconsistent", "error", finalErr)
slog.Error("server list inconsistent", "error", finalErr)
continue
}
@@ -229,7 +246,7 @@ func NewLlamaServer(gpus gpu.GpuInfoList, model string, ggml *GGML, adapters, pr
gpuCount = 0
}
// Find an availableServers port, retry on each iterration in case the failure was a port conflict race
// Find an availableServers port, retry on each iteration in case the failure was a port conflict race
port := 0
if a, err := net.ResolveTCPAddr("tcp", "localhost:0"); err == nil {
var l *net.TCPListener
@@ -267,7 +284,7 @@ func NewLlamaServer(gpus gpu.GpuInfoList, model string, ggml *GGML, adapters, pr
server := filepath.Join(dir, "ollama_llama_server")
if runtime.GOOS == "windows" {
server = server + ".exe"
server += ".exe"
}
// Detect tmp cleaners wiping out the file
@@ -291,13 +308,14 @@ func NewLlamaServer(gpus gpu.GpuInfoList, model string, ggml *GGML, adapters, pr
sem: semaphore.NewWeighted(int64(numParallel)),
totalLayers: ggml.KV().BlockCount() + 1,
gpuCount: gpuCount,
done: make(chan error, 1),
}
s.cmd.Env = os.Environ()
s.cmd.Stdout = os.Stdout
s.cmd.Stderr = s.status
visibleDevicesEnv, visibleDevicesEnvVal := gpu.GpuInfoList(gpus).GetVisibleDevicesEnv()
visibleDevicesEnv, visibleDevicesEnvVal := gpus.GetVisibleDevicesEnv()
pathEnvVal := strings.Join(libraryPaths, string(filepath.ListSeparator))
// Update or add the path and visible devices variable with our adjusted version
@@ -321,8 +339,22 @@ func NewLlamaServer(gpus gpu.GpuInfoList, model string, ggml *GGML, adapters, pr
}
slog.Info("starting llama server", "cmd", s.cmd.String())
if envconfig.Debug {
filteredEnv := []string{}
for _, ev := range s.cmd.Env {
if strings.HasPrefix(ev, "CUDA_") ||
strings.HasPrefix(ev, "ROCM_") ||
strings.HasPrefix(ev, "HIP_") ||
strings.HasPrefix(ev, "HSA_") ||
strings.HasPrefix(ev, "GGML_") ||
strings.HasPrefix(ev, "PATH=") ||
strings.HasPrefix(ev, "LD_LIBRARY_PATH=") {
filteredEnv = append(filteredEnv, ev)
}
}
// Log at debug as the environment is inherited and might contain sensitive information
slog.Debug("subprocess", "environment", s.cmd.Env)
slog.Debug("subprocess", "environment", filteredEnv)
}
if err = s.cmd.Start(); err != nil {
// Detect permission denied and augment them essage about noexec
@@ -339,6 +371,11 @@ func NewLlamaServer(gpus gpu.GpuInfoList, model string, ggml *GGML, adapters, pr
continue
}
// reap subprocess when it exits
go func() {
s.done <- s.cmd.Wait()
}()
return s, nil
}
@@ -396,6 +433,7 @@ type ServerStatusResp struct {
SlotsIdle int `json:"slots_idle"`
SlotsProcessing int `json:"slots_processing"`
Error string `json:"error"`
Progress float32 `json:"progress"`
}
func (s *llmServer) getServerStatus(ctx context.Context) (ServerStatus, error) {
@@ -421,7 +459,7 @@ func (s *llmServer) getServerStatus(ctx context.Context) (ServerStatus, error) {
resp, err := http.DefaultClient.Do(req)
if err != nil {
if errors.Is(err, context.DeadlineExceeded) {
return ServerStatusNotResponding, fmt.Errorf("server not responding")
return ServerStatusNotResponding, errors.New("server not responding")
}
return ServerStatusError, fmt.Errorf("health resp: %w", err)
}
@@ -443,6 +481,7 @@ func (s *llmServer) getServerStatus(ctx context.Context) (ServerStatus, error) {
case "no slot available":
return ServerStatusNoSlotsAvailable, nil
case "loading model":
s.loadProgress = status.Progress
return ServerStatusLoadingModel, nil
default:
return ServerStatusError, fmt.Errorf("server error: %+v", status)
@@ -483,17 +522,18 @@ func (s *llmServer) Ping(ctx context.Context) error {
func (s *llmServer) WaitUntilRunning(ctx context.Context) error {
start := time.Now()
// TODO we need to wire up a better way to detect hangs during model load and startup of the server
expiresAt := time.Now().Add(10 * time.Minute) // be generous with timeout, large models can take a while to load
ticker := time.NewTicker(50 * time.Millisecond)
defer ticker.Stop()
stallDuration := 5 * time.Minute // If no progress happens
finalLoadDuration := 5 * time.Minute // After we hit 100%, give the runner more time to come online
stallTimer := time.Now().Add(stallDuration) // give up if we stall
slog.Info("waiting for llama runner to start responding")
var lastStatus ServerStatus = -1
fullyLoaded := false
for {
select {
case <-ctx.Done():
slog.Info("context expired before server started")
slog.Warn("client connection closed before server finished loading, aborting load")
return fmt.Errorf("timed out waiting for llama runner to start: %w", ctx.Err())
case err := <-s.done:
msg := ""
@@ -501,14 +541,15 @@ func (s *llmServer) WaitUntilRunning(ctx context.Context) error {
msg = s.status.LastErrMsg
}
return fmt.Errorf("llama runner process has terminated: %v %s", err, msg)
case <-ticker.C:
if time.Now().After(expiresAt) {
default:
}
if time.Now().After(stallTimer) {
// timeout
msg := ""
if s.status != nil && s.status.LastErrMsg != "" {
msg = s.status.LastErrMsg
}
return fmt.Errorf("timed out waiting for llama runner to start: %s", msg)
return fmt.Errorf("timed out waiting for llama runner to start - progress %0.2f - %s", s.loadProgress, msg)
}
if s.cmd.ProcessState != nil {
msg := ""
@@ -517,25 +558,32 @@ func (s *llmServer) WaitUntilRunning(ctx context.Context) error {
}
return fmt.Errorf("llama runner process no longer running: %d %s", s.cmd.ProcessState.ExitCode(), msg)
}
c, cancel := context.WithTimeout(ctx, 200*time.Millisecond)
ctx, cancel := context.WithTimeout(ctx, 200*time.Millisecond)
defer cancel()
status, err := s.getServerStatus(c)
if err != nil && lastStatus != status {
slog.Debug("server not yet available", "error", err)
lastStatus = status
continue
priorProgress := s.loadProgress
status, _ := s.getServerStatus(ctx)
if lastStatus != status && status != ServerStatusReady {
// Only log on status changes
slog.Info("waiting for server to become available", "status", status.ToString())
}
switch status {
case ServerStatusLoadingModel:
// TODO - this state never seems to happen with the current server.cpp code (bug?)
// it doesn't respond to the health endpoint until after the model is loaded
slog.Debug("loading model")
case ServerStatusReady:
slog.Debug(fmt.Sprintf("llama runner started in %f seconds", time.Since(start).Seconds()))
s.loadDuration = time.Since(start)
slog.Info(fmt.Sprintf("llama runner started in %0.2f seconds", s.loadDuration.Seconds()))
return nil
default:
lastStatus = status
// Reset the timer as long as we're making forward progress on the load
if priorProgress != s.loadProgress {
slog.Debug(fmt.Sprintf("model load progress %0.2f", s.loadProgress))
stallTimer = time.Now().Add(stallDuration)
} else if !fullyLoaded && int(s.loadProgress*100.0) >= 100 {
slog.Debug("model load completed, waiting for server to become available", "status", status.ToString())
stallTimer = time.Now().Add(finalLoadDuration)
fullyLoaded = true
}
time.Sleep(time.Millisecond * 250)
continue
}
}
}
@@ -580,6 +628,7 @@ type completion struct {
Model string `json:"model"`
Prompt string `json:"prompt"`
Stop bool `json:"stop"`
StoppedLimit bool `json:"stopped_limit"`
Timings struct {
PredictedN int `json:"predicted_n"`
@@ -598,6 +647,7 @@ type CompletionRequest struct {
type CompletionResponse struct {
Content string
DoneReason string
Done bool
PromptEvalCount int
PromptEvalDuration time.Duration
@@ -715,7 +765,7 @@ func (s *llmServer) Completion(ctx context.Context, req CompletionRequest, fn fu
var c completion
if err := json.Unmarshal(evt, &c); err != nil {
return fmt.Errorf("error unmarshaling llm prediction response: %v", err)
return fmt.Errorf("error unmarshalling llm prediction response: %v", err)
}
switch {
@@ -739,8 +789,14 @@ func (s *llmServer) Completion(ctx context.Context, req CompletionRequest, fn fu
}
if c.Stop {
doneReason := "stop"
if c.StoppedLimit {
doneReason = "length"
}
fn(CompletionResponse{
Done: true,
DoneReason: doneReason,
PromptEvalCount: c.Timings.PromptN,
PromptEvalDuration: parseDurationMs(c.Timings.PromptMS),
EvalCount: c.Timings.PredictedN,
@@ -935,8 +991,11 @@ func (s *llmServer) Close() error {
if err := s.cmd.Process.Kill(); err != nil {
return err
}
_ = s.cmd.Wait()
// if ProcessState is already populated, Wait already completed, no need to wait again
if s.cmd.ProcessState == nil {
slog.Debug("waiting for llama server to exit")
<-s.done
}
slog.Debug("llama server stopped")
}
@@ -948,6 +1007,10 @@ func (s *llmServer) EstimatedVRAM() uint64 {
return s.estimatedVRAM
}
func (s *llmServer) EstimatedTotal() uint64 {
return s.estimatedTotal
}
func parseDurationMs(ms float64) time.Duration {
dur, err := time.ParseDuration(fmt.Sprintf("%fms", ms))
if err != nil {

View File

@@ -162,7 +162,7 @@ app.on('before-quit', () => {
}
})
const updateURL = `https://ollama.ai/api/update?os=${process.platform}&arch=${
const updateURL = `https://ollama.com/api/update?os=${process.platform}&arch=${
process.arch
}&version=${app.getVersion()}&id=${id()}`

View File

@@ -109,13 +109,12 @@ func toChatCompletion(id string, r api.ChatResponse) ChatCompletion {
Choices: []Choice{{
Index: 0,
Message: Message{Role: r.Message.Role, Content: r.Message.Content},
FinishReason: func(done bool) *string {
if done {
reason := "stop"
FinishReason: func(reason string) *string {
if len(reason) > 0 {
return &reason
}
return nil
}(r.Done),
}(r.DoneReason),
}},
Usage: Usage{
// TODO: ollama returns 0 for prompt eval if the prompt was cached, but openai returns the actual count
@@ -133,19 +132,16 @@ func toChunk(id string, r api.ChatResponse) ChatCompletionChunk {
Created: time.Now().Unix(),
Model: r.Model,
SystemFingerprint: "fp_ollama",
Choices: []ChunkChoice{
{
Choices: []ChunkChoice{{
Index: 0,
Delta: Message{Role: "assistant", Content: r.Message.Content},
FinishReason: func(done bool) *string {
if done {
reason := "stop"
FinishReason: func(reason string) *string {
if len(reason) > 0 {
return &reason
}
return nil
}(r.Done),
},
},
}(r.DoneReason),
}},
}
}
@@ -249,7 +245,6 @@ func (w *writer) writeResponse(data []byte) (int, error) {
d, err := json.Marshal(toChunk(w.id, chatResponse))
if err != nil {
return 0, err
}
w.ResponseWriter.Header().Set("Content-Type", "text/event-stream")

View File

@@ -1,4 +1,4 @@
package model
package parser
import (
"bufio"
@@ -8,6 +8,7 @@ import (
"io"
"strconv"
"strings"
"unicode"
)
type File struct {
@@ -68,6 +69,11 @@ func ParseFile(r io.Reader) (*File, error) {
var b bytes.Buffer
var role string
var lineCount int
var linePos int
var utf16 bool
var f File
br := bufio.NewReader(r)
@@ -79,6 +85,17 @@ func ParseFile(r io.Reader) (*File, error) {
return nil, err
}
// the utf16 byte order mark will be read as "unreadable" by ReadRune()
if isUnreadable(r) && lineCount == 0 && linePos == 0 {
utf16 = true
continue
}
// skip the second byte if we're reading utf16
if utf16 && r == 0 {
continue
}
next, r, err := parseRuneForState(r, curr)
if errors.Is(err, io.ErrUnexpectedEOF) {
return nil, fmt.Errorf("%w: %s", err, b.String())
@@ -86,6 +103,13 @@ func ParseFile(r io.Reader) (*File, error) {
return nil, err
}
if isNewline(r) {
lineCount++
linePos = 0
} else {
linePos++
}
// process the state transition, some transitions need to be intercepted and redirected
if next != curr {
switch curr {
@@ -285,6 +309,10 @@ func isNewline(r rune) bool {
return r == '\r' || r == '\n'
}
func isUnreadable(r rune) bool {
return r == unicode.ReplacementChar
}
func isValidMessageRole(role string) bool {
return role == "system" || role == "user" || role == "assistant"
}

View File

@@ -1,13 +1,16 @@
package model
package parser
import (
"bytes"
"encoding/binary"
"fmt"
"io"
"strings"
"testing"
"unicode/utf16"
"github.com/stretchr/testify/assert"
"github.com/stretchr/testify/require"
)
func TestParseFileFile(t *testing.T) {
@@ -23,7 +26,7 @@ TEMPLATE template1
reader := strings.NewReader(input)
modelfile, err := ParseFile(reader)
assert.NoError(t, err)
require.NoError(t, err)
expectedCommands := []Command{
{Name: "model", Args: "model1"},
@@ -86,7 +89,7 @@ func TestParseFileFrom(t *testing.T) {
for _, c := range cases {
t.Run("", func(t *testing.T) {
modelfile, err := ParseFile(strings.NewReader(c.input))
assert.ErrorIs(t, err, c.err)
require.ErrorIs(t, err, c.err)
if modelfile != nil {
assert.Equal(t, c.expected, modelfile.Commands)
}
@@ -103,7 +106,7 @@ PARAMETER param1
reader := strings.NewReader(input)
_, err := ParseFile(reader)
assert.ErrorIs(t, err, io.ErrUnexpectedEOF)
require.ErrorIs(t, err, io.ErrUnexpectedEOF)
}
func TestParseFileBadCommand(t *testing.T) {
@@ -112,8 +115,7 @@ FROM foo
BADCOMMAND param1 value1
`
_, err := ParseFile(strings.NewReader(input))
assert.ErrorIs(t, err, errInvalidCommand)
require.ErrorIs(t, err, errInvalidCommand)
}
func TestParseFileMessages(t *testing.T) {
@@ -199,7 +201,7 @@ MESSAGE system`,
for _, c := range cases {
t.Run("", func(t *testing.T) {
modelfile, err := ParseFile(strings.NewReader(c.input))
assert.ErrorIs(t, err, c.err)
require.ErrorIs(t, err, c.err)
if modelfile != nil {
assert.Equal(t, c.expected, modelfile.Commands)
}
@@ -353,7 +355,7 @@ TEMPLATE """
for _, c := range cases {
t.Run("", func(t *testing.T) {
modelfile, err := ParseFile(strings.NewReader(c.multiline))
assert.ErrorIs(t, err, c.err)
require.ErrorIs(t, err, c.err)
if modelfile != nil {
assert.Equal(t, c.expected, modelfile.Commands)
}
@@ -411,7 +413,7 @@ func TestParseFileParameters(t *testing.T) {
fmt.Fprintln(&b, "FROM foo")
fmt.Fprintln(&b, "PARAMETER", k)
modelfile, err := ParseFile(&b)
assert.NoError(t, err)
require.NoError(t, err)
assert.Equal(t, []Command{
{Name: "model", Args: "foo"},
@@ -440,7 +442,7 @@ FROM foo
for _, c := range cases {
t.Run("", func(t *testing.T) {
modelfile, err := ParseFile(strings.NewReader(c.input))
assert.NoError(t, err)
require.NoError(t, err)
assert.Equal(t, c.expected, modelfile.Commands)
})
}
@@ -499,13 +501,46 @@ SYSTEM ""
for _, c := range cases {
t.Run("", func(t *testing.T) {
modelfile, err := ParseFile(strings.NewReader(c))
assert.NoError(t, err)
require.NoError(t, err)
modelfile2, err := ParseFile(strings.NewReader(modelfile.String()))
assert.NoError(t, err)
require.NoError(t, err)
assert.Equal(t, modelfile, modelfile2)
})
}
}
func TestParseFileUTF16ParseFile(t *testing.T) {
data := `FROM bob
PARAMETER param1 1
PARAMETER param2 4096
SYSTEM You are a utf16 file.
`
// simulate a utf16 le file
utf16File := utf16.Encode(append([]rune{'\ufffe'}, []rune(data)...))
buf := new(bytes.Buffer)
err := binary.Write(buf, binary.LittleEndian, utf16File)
require.NoError(t, err)
actual, err := ParseFile(buf)
require.NoError(t, err)
expected := []Command{
{Name: "model", Args: "bob"},
{Name: "param1", Args: "1"},
{Name: "param2", Args: "4096"},
{Name: "system", Args: "You are a utf16 file."},
}
assert.Equal(t, expected, actual.Commands)
// simulate a utf16 be file
buf = new(bytes.Buffer)
err = binary.Write(buf, binary.BigEndian, utf16File)
require.NoError(t, err)
actual, err = ParseFile(buf)
require.NoError(t, err)
assert.Equal(t, expected, actual.Commands)
}

View File

@@ -59,7 +59,7 @@ func (p *Progress) StopAndClear() bool {
stopped := p.stop()
if stopped {
// clear all progress lines
for i := 0; i < p.pos; i++ {
for i := range p.pos {
if i > 0 {
fmt.Fprint(p.w, "\033[A")
}
@@ -85,7 +85,7 @@ func (p *Progress) render() {
defer fmt.Fprint(p.w, "\033[?25h")
// clear already rendered progress lines
for i := 0; i < p.pos; i++ {
for i := range p.pos {
if i > 0 {
fmt.Fprint(p.w, "\033[A")
}

View File

@@ -5,12 +5,16 @@ import (
"os"
"github.com/emirpasic/gods/lists/arraylist"
"github.com/mattn/go-runewidth"
"golang.org/x/term"
)
type Buffer struct {
DisplayPos int
Pos int
Buf *arraylist.List
//LineHasSpace is an arraylist of bools to keep track of whether a line has a space at the end
LineHasSpace *arraylist.List
Prompt *Prompt
LineWidth int
Width int
@@ -27,8 +31,10 @@ func NewBuffer(prompt *Prompt) (*Buffer, error) {
lwidth := width - len(prompt.prompt())
b := &Buffer{
DisplayPos: 0,
Pos: 0,
Buf: arraylist.New(),
LineHasSpace: arraylist.New(),
Prompt: prompt,
Width: width,
Height: height,
@@ -38,14 +44,43 @@ func NewBuffer(prompt *Prompt) (*Buffer, error) {
return b, nil
}
func (b *Buffer) GetLineSpacing(line int) bool {
hasSpace, _ := b.LineHasSpace.Get(line)
if hasSpace == nil {
return false
}
return hasSpace.(bool)
}
func (b *Buffer) MoveLeft() {
if b.Pos > 0 {
if b.Pos%b.LineWidth == 0 {
//asserts that we retrieve a rune
if e, ok := b.Buf.Get(b.Pos - 1); ok {
if r, ok := e.(rune); ok {
rLength := runewidth.RuneWidth(r)
if b.DisplayPos%b.LineWidth == 0 {
fmt.Printf(CursorUp + CursorBOL + cursorRightN(b.Width))
} else {
if rLength == 2 {
fmt.Print(CursorLeft)
}
line := b.DisplayPos/b.LineWidth - 1
hasSpace := b.GetLineSpacing(line)
if hasSpace {
b.DisplayPos -= 1
fmt.Print(CursorLeft)
}
} else {
fmt.Print(cursorLeftN(rLength))
}
b.Pos -= 1
b.DisplayPos -= rLength
}
}
}
}
@@ -71,18 +106,32 @@ func (b *Buffer) MoveLeftWord() {
}
func (b *Buffer) MoveRight() {
if b.Pos < b.Size() {
if b.Pos < b.Buf.Size() {
if e, ok := b.Buf.Get(b.Pos); ok {
if r, ok := e.(rune); ok {
rLength := runewidth.RuneWidth(r)
b.Pos += 1
if b.Pos%b.LineWidth == 0 {
hasSpace := b.GetLineSpacing(b.DisplayPos / b.LineWidth)
b.DisplayPos += rLength
if b.DisplayPos%b.LineWidth == 0 {
fmt.Printf(CursorDown + CursorBOL + cursorRightN(len(b.Prompt.prompt())))
} else if (b.DisplayPos-rLength)%b.LineWidth == b.LineWidth-1 && hasSpace {
fmt.Printf(CursorDown + CursorBOL + cursorRightN(len(b.Prompt.prompt())+rLength))
b.DisplayPos += 1
} else if b.LineHasSpace.Size() > 0 && b.DisplayPos%b.LineWidth == b.LineWidth-1 && hasSpace {
fmt.Printf(CursorDown + CursorBOL + cursorRightN(len(b.Prompt.prompt())))
b.DisplayPos += 1
} else {
fmt.Print(CursorRight)
fmt.Print(cursorRightN(rLength))
}
}
}
}
}
func (b *Buffer) MoveRightWord() {
if b.Pos < b.Size() {
if b.Pos < b.Buf.Size() {
for {
b.MoveRight()
v, _ := b.Buf.Get(b.Pos)
@@ -90,7 +139,7 @@ func (b *Buffer) MoveRightWord() {
break
}
if b.Pos == b.Size() {
if b.Pos == b.Buf.Size() {
break
}
}
@@ -99,89 +148,200 @@ func (b *Buffer) MoveRightWord() {
func (b *Buffer) MoveToStart() {
if b.Pos > 0 {
currLine := b.Pos / b.LineWidth
currLine := b.DisplayPos / b.LineWidth
if currLine > 0 {
for cnt := 0; cnt < currLine; cnt++ {
for range currLine {
fmt.Print(CursorUp)
}
}
fmt.Printf(CursorBOL + cursorRightN(len(b.Prompt.prompt())))
b.Pos = 0
b.DisplayPos = 0
}
}
func (b *Buffer) MoveToEnd() {
if b.Pos < b.Size() {
currLine := b.Pos / b.LineWidth
totalLines := b.Size() / b.LineWidth
if b.Pos < b.Buf.Size() {
currLine := b.DisplayPos / b.LineWidth
totalLines := b.DisplaySize() / b.LineWidth
if currLine < totalLines {
for cnt := 0; cnt < totalLines-currLine; cnt++ {
for range totalLines - currLine {
fmt.Print(CursorDown)
}
remainder := b.Size() % b.LineWidth
remainder := b.DisplaySize() % b.LineWidth
fmt.Printf(CursorBOL + cursorRightN(len(b.Prompt.prompt())+remainder))
} else {
fmt.Print(cursorRightN(b.Size() - b.Pos))
fmt.Print(cursorRightN(b.DisplaySize() - b.DisplayPos))
}
b.Pos = b.Size()
b.Pos = b.Buf.Size()
b.DisplayPos = b.DisplaySize()
}
}
func (b *Buffer) Size() int {
return b.Buf.Size()
func (b *Buffer) DisplaySize() int {
sum := 0
for i := range b.Buf.Size() {
if e, ok := b.Buf.Get(i); ok {
if r, ok := e.(rune); ok {
sum += runewidth.RuneWidth(r)
}
}
}
return sum
}
func (b *Buffer) Add(r rune) {
if b.Pos == b.Buf.Size() {
b.AddChar(r, false)
} else {
b.AddChar(r, true)
}
}
func (b *Buffer) AddChar(r rune, insert bool) {
rLength := runewidth.RuneWidth(r)
b.DisplayPos += rLength
if b.Pos > 0 {
if b.DisplayPos%b.LineWidth == 0 {
fmt.Printf("%c", r)
b.Buf.Add(r)
b.Pos += 1
if b.Pos > 0 && b.Pos%b.LineWidth == 0 {
fmt.Printf("\n%s", b.Prompt.AltPrompt)
if insert {
b.LineHasSpace.Set(b.DisplayPos/b.LineWidth-1, false)
} else {
b.LineHasSpace.Add(false)
}
// this case occurs when a double-width rune crosses the line boundary
} else if b.DisplayPos%b.LineWidth < (b.DisplayPos-rLength)%b.LineWidth {
if insert {
fmt.Print(ClearToEOL)
}
fmt.Printf("\n%s", b.Prompt.AltPrompt)
b.DisplayPos += 1
fmt.Printf("%c", r)
if insert {
b.LineHasSpace.Set(b.DisplayPos/b.LineWidth-1, true)
} else {
b.LineHasSpace.Add(true)
}
} else {
fmt.Printf("%c", r)
b.Buf.Insert(b.Pos, r)
b.Pos += 1
if b.Pos > 0 && b.Pos%b.LineWidth == 0 {
fmt.Printf("\n%s", b.Prompt.AltPrompt)
}
} else {
fmt.Printf("%c", r)
}
if insert {
b.Buf.Insert(b.Pos, r)
} else {
b.Buf.Add(r)
}
b.Pos += 1
if insert {
b.drawRemaining()
}
}
func (b *Buffer) countRemainingLineWidth(place int) int {
var sum int
counter := -1
var prevLen int
for place <= b.LineWidth {
counter += 1
sum += prevLen
if e, ok := b.Buf.Get(b.Pos + counter); ok {
if r, ok := e.(rune); ok {
place += runewidth.RuneWidth(r)
prevLen = len(string(r))
}
} else {
break
}
}
return sum
}
func (b *Buffer) drawRemaining() {
var place int
remainingText := b.StringN(b.Pos)
if b.Pos > 0 {
place = b.Pos % b.LineWidth
place = b.DisplayPos % b.LineWidth
}
fmt.Print(CursorHide)
// render the rest of the current line
currLine := remainingText[:min(b.LineWidth-place, len(remainingText))]
currLineLength := b.countRemainingLineWidth(place)
currLine := remainingText[:min(currLineLength, len(remainingText))]
currLineSpace := runewidth.StringWidth(currLine)
remLength := runewidth.StringWidth(remainingText)
if len(currLine) > 0 {
fmt.Printf(ClearToEOL + currLine)
fmt.Print(cursorLeftN(len(currLine)))
fmt.Print(cursorLeftN(currLineSpace))
} else {
fmt.Print(ClearToEOL)
}
if currLineSpace != b.LineWidth-place && currLineSpace != remLength {
b.LineHasSpace.Set(b.DisplayPos/b.LineWidth, true)
} else if currLineSpace != b.LineWidth-place {
b.LineHasSpace.Remove(b.DisplayPos / b.LineWidth)
} else {
b.LineHasSpace.Set(b.DisplayPos/b.LineWidth, false)
}
if (b.DisplayPos+currLineSpace)%b.LineWidth == 0 && currLine == remainingText {
fmt.Print(cursorRightN(currLineSpace))
fmt.Printf("\n%s", b.Prompt.AltPrompt)
fmt.Printf(CursorUp + CursorBOL + cursorRightN(b.Width-currLineSpace))
}
// render the other lines
if len(remainingText) > len(currLine) {
remaining := []rune(remainingText[len(currLine):])
if remLength > currLineSpace {
remaining := (remainingText[len(currLine):])
var totalLines int
for i, c := range remaining {
if i%b.LineWidth == 0 {
var displayLength int
var lineLength int = currLineSpace
for _, c := range remaining {
if displayLength == 0 || (displayLength+runewidth.RuneWidth(c))%b.LineWidth < displayLength%b.LineWidth {
fmt.Printf("\n%s", b.Prompt.AltPrompt)
totalLines += 1
if displayLength != 0 {
if lineLength == b.LineWidth {
b.LineHasSpace.Set(b.DisplayPos/b.LineWidth+totalLines-1, false)
} else {
b.LineHasSpace.Set(b.DisplayPos/b.LineWidth+totalLines-1, true)
}
}
lineLength = 0
}
displayLength += runewidth.RuneWidth(c)
lineLength += runewidth.RuneWidth(c)
fmt.Printf("%c", c)
}
fmt.Print(ClearToEOL)
fmt.Print(cursorUpN(totalLines))
fmt.Printf(CursorBOL + cursorRightN(b.Width-len(currLine)))
fmt.Printf(CursorBOL + cursorRightN(b.Width-currLineSpace))
hasSpace := b.GetLineSpacing(b.DisplayPos / b.LineWidth)
if hasSpace && b.DisplayPos%b.LineWidth != b.LineWidth-1 {
fmt.Print(CursorLeft)
}
}
fmt.Print(CursorShow)
@@ -189,46 +349,81 @@ func (b *Buffer) drawRemaining() {
func (b *Buffer) Remove() {
if b.Buf.Size() > 0 && b.Pos > 0 {
if b.Pos%b.LineWidth == 0 {
if e, ok := b.Buf.Get(b.Pos - 1); ok {
if r, ok := e.(rune); ok {
rLength := runewidth.RuneWidth(r)
hasSpace := b.GetLineSpacing(b.DisplayPos/b.LineWidth - 1)
if b.DisplayPos%b.LineWidth == 0 {
// if the user backspaces over the word boundary, do this magic to clear the line
// and move to the end of the previous line
fmt.Printf(CursorBOL + ClearToEOL)
fmt.Printf(CursorUp + CursorBOL + cursorRightN(b.Width) + " " + CursorLeft)
fmt.Printf(CursorUp + CursorBOL + cursorRightN(b.Width))
if b.DisplaySize()%b.LineWidth < (b.DisplaySize()-rLength)%b.LineWidth {
b.LineHasSpace.Remove(b.DisplayPos/b.LineWidth - 1)
}
if hasSpace {
b.DisplayPos -= 1
fmt.Print(CursorLeft)
}
if rLength == 2 {
fmt.Print(CursorLeft + " " + cursorLeftN(2))
} else {
fmt.Printf(CursorLeft + " " + CursorLeft)
fmt.Print(" " + CursorLeft)
}
} else if (b.DisplayPos-rLength)%b.LineWidth == 0 && hasSpace {
fmt.Printf(CursorBOL + ClearToEOL)
fmt.Printf(CursorUp + CursorBOL + cursorRightN(b.Width))
if b.Pos == b.Buf.Size() {
b.LineHasSpace.Remove(b.DisplayPos/b.LineWidth - 1)
}
b.DisplayPos -= 1
} else {
fmt.Print(cursorLeftN(rLength))
for range rLength {
fmt.Print(" ")
}
fmt.Print(cursorLeftN(rLength))
}
var eraseExtraLine bool
if (b.Size()-1)%b.LineWidth == 0 {
if (b.DisplaySize()-1)%b.LineWidth == 0 || (rLength == 2 && ((b.DisplaySize()-2)%b.LineWidth == 0)) || b.DisplaySize()%b.LineWidth == 0 {
eraseExtraLine = true
}
b.Pos -= 1
b.DisplayPos -= rLength
b.Buf.Remove(b.Pos)
if b.Pos < b.Size() {
if b.Pos < b.Buf.Size() {
b.drawRemaining()
// this erases a line which is left over when backspacing in the middle of a line and there
// are trailing characters which go over the line width boundary
if eraseExtraLine {
remainingLines := (b.Size() - b.Pos) / b.LineWidth
remainingLines := (b.DisplaySize() - b.DisplayPos) / b.LineWidth
fmt.Printf(cursorDownN(remainingLines+1) + CursorBOL + ClearToEOL)
place := b.Pos % b.LineWidth
place := b.DisplayPos % b.LineWidth
fmt.Printf(cursorUpN(remainingLines+1) + cursorRightN(place+len(b.Prompt.prompt())))
}
}
}
}
}
}
func (b *Buffer) Delete() {
if b.Size() > 0 && b.Pos < b.Size() {
if b.Buf.Size() > 0 && b.Pos < b.Buf.Size() {
b.Buf.Remove(b.Pos)
b.drawRemaining()
if b.Size()%b.LineWidth == 0 {
if b.Pos != b.Size() {
remainingLines := (b.Size() - b.Pos) / b.LineWidth
if b.DisplaySize()%b.LineWidth == 0 {
if b.DisplayPos != b.DisplaySize() {
remainingLines := (b.DisplaySize() - b.DisplayPos) / b.LineWidth
fmt.Printf(cursorDownN(remainingLines) + CursorBOL + ClearToEOL)
place := b.Pos % b.LineWidth
place := b.DisplayPos % b.LineWidth
fmt.Printf(cursorUpN(remainingLines) + cursorRightN(place+len(b.Prompt.prompt())))
}
}
@@ -244,9 +439,9 @@ func (b *Buffer) DeleteBefore() {
}
func (b *Buffer) DeleteRemaining() {
if b.Size() > 0 && b.Pos < b.Size() {
charsToDel := b.Size() - b.Pos
for cnt := 0; cnt < charsToDel; cnt++ {
if b.DisplaySize() > 0 && b.Pos < b.DisplaySize() {
charsToDel := b.Buf.Size() - b.Pos
for range charsToDel {
b.Delete()
}
}
@@ -281,14 +476,16 @@ func (b *Buffer) ClearScreen() {
ph := b.Prompt.placeholder()
fmt.Printf(ColorGrey + ph + cursorLeftN(len(ph)) + ColorDefault)
} else {
currPos := b.Pos
currPos := b.DisplayPos
currIndex := b.Pos
b.Pos = 0
b.DisplayPos = 0
b.drawRemaining()
fmt.Printf(CursorReset + cursorRightN(len(b.Prompt.prompt())))
if currPos > 0 {
targetLine := currPos / b.LineWidth
if targetLine > 0 {
for cnt := 0; cnt < targetLine; cnt++ {
for range targetLine {
fmt.Print(CursorDown)
}
}
@@ -300,7 +497,8 @@ func (b *Buffer) ClearScreen() {
fmt.Printf(CursorBOL + b.Prompt.AltPrompt)
}
}
b.Pos = currPos
b.Pos = currIndex
b.DisplayPos = currPos
}
}
@@ -309,9 +507,20 @@ func (b *Buffer) IsEmpty() bool {
}
func (b *Buffer) Replace(r []rune) {
b.DisplayPos = 0
b.Pos = 0
lineNums := b.DisplaySize() / b.LineWidth
b.Buf.Clear()
fmt.Printf(ClearLine + CursorBOL + b.Prompt.prompt())
fmt.Printf(CursorBOL + ClearToEOL)
for range lineNums {
fmt.Print(CursorUp + CursorBOL + ClearToEOL)
}
fmt.Printf(CursorBOL + b.Prompt.prompt())
for _, c := range r {
b.Add(c)
}
@@ -328,7 +537,7 @@ func (b *Buffer) StringN(n int) string {
func (b *Buffer) StringNM(n, m int) string {
var s string
if m == 0 {
m = b.Size()
m = b.Buf.Size()
}
for cnt := n; cnt < m; cnt++ {
c, _ := b.Buf.Get(cnt)

View File

@@ -91,7 +91,7 @@ func (h *History) Add(l []rune) {
func (h *History) Compact() {
s := h.Buf.Size()
if s > h.Limit {
for cnt := 0; cnt < s-h.Limit; cnt++ {
for range s - h.Limit {
h.Buf.Remove(0)
}
}
@@ -139,7 +139,7 @@ func (h *History) Save() error {
defer f.Close()
buf := bufio.NewWriter(f)
for cnt := 0; cnt < h.Size(); cnt++ {
for cnt := range h.Size() {
v, _ := h.Buf.Get(cnt)
line, _ := v.([]rune)
if _, err := buf.WriteString(string(line) + "\n"); err != nil {

View File

@@ -5,7 +5,6 @@ import (
"fmt"
"io"
"os"
"syscall"
)
type Prompt struct {
@@ -63,7 +62,7 @@ func New(prompt Prompt) (*Instance, error) {
func (i *Instance) Readline() (string, error) {
if !i.Terminal.rawmode {
fd := int(syscall.Stdin)
fd := os.Stdin.Fd()
termios, err := SetRawMode(fd)
if err != nil {
return "", err
@@ -80,8 +79,8 @@ func (i *Instance) Readline() (string, error) {
fmt.Print(prompt)
defer func() {
fd := int(syscall.Stdin)
// nolint: errcheck
fd := os.Stdin.Fd()
//nolint:errcheck
UnsetRawMode(fd, i.Terminal.termios)
i.Terminal.rawmode = false
}()
@@ -136,7 +135,7 @@ func (i *Instance) Readline() (string, error) {
buf.MoveRight()
case CharBracketedPaste:
var code string
for cnt := 0; cnt < 3; cnt++ {
for range 3 {
r, err = i.Terminal.Read()
if err != nil {
return "", io.EOF
@@ -150,7 +149,7 @@ func (i *Instance) Readline() (string, error) {
i.Pasting = false
}
case KeyDel:
if buf.Size() > 0 {
if buf.DisplaySize() > 0 {
buf.Delete()
}
metaDel = true
@@ -198,11 +197,11 @@ func (i *Instance) Readline() (string, error) {
buf.Remove()
case CharTab:
// todo: convert back to real tabs
for cnt := 0; cnt < 8; cnt++ {
for range 8 {
buf.Add(' ')
}
case CharDelete:
if buf.Size() > 0 {
if buf.DisplaySize() > 0 {
buf.Delete()
} else {
return "", io.EOF
@@ -216,7 +215,7 @@ func (i *Instance) Readline() (string, error) {
case CharCtrlW:
buf.DeleteWord()
case CharCtrlZ:
fd := int(syscall.Stdin)
fd := os.Stdin.Fd()
return handleCharCtrlZ(fd, i.Terminal.termios)
case CharEnter, CharCtrlJ:
output := buf.String()
@@ -248,7 +247,7 @@ func (i *Instance) HistoryDisable() {
}
func NewTerminal() (*Terminal, error) {
fd := int(syscall.Stdin)
fd := os.Stdin.Fd()
termios, err := SetRawMode(fd)
if err != nil {
return nil, err

View File

@@ -6,7 +6,7 @@ import (
"syscall"
)
func handleCharCtrlZ(fd int, termios any) (string, error) {
func handleCharCtrlZ(fd uintptr, termios any) (string, error) {
t := termios.(*Termios)
if err := UnsetRawMode(fd, t); err != nil {
return "", err

View File

@@ -1,6 +1,6 @@
package readline
func handleCharCtrlZ(fd int, state any) (string, error) {
func handleCharCtrlZ(fd uintptr, state any) (string, error) {
// not supported
return "", nil
}

View File

@@ -8,7 +8,7 @@ import (
type Termios syscall.Termios
func SetRawMode(fd int) (*Termios, error) {
func SetRawMode(fd uintptr) (*Termios, error) {
termios, err := getTermios(fd)
if err != nil {
return nil, err
@@ -25,13 +25,13 @@ func SetRawMode(fd int) (*Termios, error) {
return termios, setTermios(fd, &newTermios)
}
func UnsetRawMode(fd int, termios any) error {
func UnsetRawMode(fd uintptr, termios any) error {
t := termios.(*Termios)
return setTermios(fd, t)
}
// IsTerminal returns true if the given file descriptor is a terminal.
func IsTerminal(fd int) bool {
func IsTerminal(fd uintptr) bool {
_, err := getTermios(fd)
return err == nil
}

View File

@@ -7,17 +7,17 @@ import (
"unsafe"
)
func getTermios(fd int) (*Termios, error) {
func getTermios(fd uintptr) (*Termios, error) {
termios := new(Termios)
_, _, err := syscall.Syscall6(syscall.SYS_IOCTL, uintptr(fd), syscall.TIOCGETA, uintptr(unsafe.Pointer(termios)), 0, 0, 0)
_, _, err := syscall.Syscall6(syscall.SYS_IOCTL, fd, syscall.TIOCGETA, uintptr(unsafe.Pointer(termios)), 0, 0, 0)
if err != 0 {
return nil, err
}
return termios, nil
}
func setTermios(fd int, termios *Termios) error {
_, _, err := syscall.Syscall6(syscall.SYS_IOCTL, uintptr(fd), syscall.TIOCSETA, uintptr(unsafe.Pointer(termios)), 0, 0, 0)
func setTermios(fd uintptr, termios *Termios) error {
_, _, err := syscall.Syscall6(syscall.SYS_IOCTL, fd, syscall.TIOCSETA, uintptr(unsafe.Pointer(termios)), 0, 0, 0)
if err != 0 {
return err
}

View File

@@ -10,17 +10,17 @@ import (
const tcgets = 0x5401
const tcsets = 0x5402
func getTermios(fd int) (*Termios, error) {
func getTermios(fd uintptr) (*Termios, error) {
termios := new(Termios)
_, _, err := syscall.Syscall6(syscall.SYS_IOCTL, uintptr(fd), tcgets, uintptr(unsafe.Pointer(termios)), 0, 0, 0)
_, _, err := syscall.Syscall6(syscall.SYS_IOCTL, fd, tcgets, uintptr(unsafe.Pointer(termios)), 0, 0, 0)
if err != 0 {
return nil, err
}
return termios, nil
}
func setTermios(fd int, termios *Termios) error {
_, _, err := syscall.Syscall6(syscall.SYS_IOCTL, uintptr(fd), tcsets, uintptr(unsafe.Pointer(termios)), 0, 0, 0)
func setTermios(fd uintptr, termios *Termios) error {
_, _, err := syscall.Syscall6(syscall.SYS_IOCTL, fd, tcsets, uintptr(unsafe.Pointer(termios)), 0, 0, 0)
if err != 0 {
return err
}

Some files were not shown because too many files have changed in this diff Show More