diff --git a/projects/local-api-test/README.md b/projects/local-api-test/README.md index 43d0043..0e3fd4a 100644 --- a/projects/local-api-test/README.md +++ b/projects/local-api-test/README.md @@ -1,7 +1,29 @@ +# local-ai-test +## How to run? + +Export environments then run: ```shell uv run main.py ``` +Run with `tiny-encrypt` environments: + +```shell +tiny-encrypt -X -k $(cat ~/.config/envs/LCOAL_DEFAULT_TINY_ENCRYPT_KEY_ID) \ + ~/secrets/alibaba-cloud-ai-cn.env.tinyenc -- go run main.go +``` + +Use local llm-proxy: + +```shell +OPENAI_API_BASE=http://127.0.0.1:8080/ python main.py + +OPENAI_API_BASE=http://127.0.0.1:8080/ python main-stream.py +``` + +## Reference + +- https://help.aliyun.com/zh/model-studio/stream - 大模型服务平台百炼 / 文本生成 / 流式输出 diff --git a/projects/local-api-test/justfile b/projects/local-api-test/justfile new file mode 100644 index 0000000..9c53c79 --- /dev/null +++ b/projects/local-api-test/justfile @@ -0,0 +1,10 @@ +_: + @just --list + +alias r:=run + +# Run main.py +run: + te -X -k $(cat ~/.config/envs/LCOAL_DEFAULT_TINY_ENCRYPT_KEY_ID) ~/secrets/alibaba-cloud-ai-cn.env.tinyenc -- uv run main.py + + diff --git a/projects/local-api-test/main-stream.py b/projects/local-api-test/main-stream.py new file mode 100644 index 0000000..3e86bdf --- /dev/null +++ b/projects/local-api-test/main-stream.py @@ -0,0 +1,45 @@ +import os +from pprint import pprint + +from openai import OpenAI +from openai._types import Timeout +from openai.types.chat import ChatCompletionUserMessageParam, ChatCompletionSystemMessageParam + + +def main(): + client = OpenAI( + api_key=os.environ.get("OPENAI_API_KEY", "sk-dummy"), + base_url=os.environ.get("OPENAI_API_BASE", "http://localhost:11434/v1"), + timeout=Timeout(30, connect=5), + ) + + response = client.chat.completions.create( + model=os.environ.get("OPENAI_MODEL", "qwen-math-turbo"), + messages=[ + ChatCompletionSystemMessageParam(role="system", content="You are a useful assistant."), + # ChatCompletionUserMessageParam(**{"role": "user", "content": "Hello!"}), + ChatCompletionUserMessageParam(role="user", content="Hello!"), + ], + stream=True, + # stream_options={"include_usage": True}, + ) + print() + pprint(vars(response)) + print() + # print(response.choices[0].message.content) + + for chunk in response: + print('xx') + if chunk.choices: + content = chunk.choices[0].delta.content or "" + print('CHUNK:', content, end="", flush=True) + # content_parts.append(content) + elif chunk.usage: + print("\n--- 请求用量 ---") + print(f"输入 Tokens: {chunk.usage.prompt_tokens}") + print(f"输出 Tokens: {chunk.usage.completion_tokens}") + print(f"总计 Tokens: {chunk.usage.total_tokens}") + + +if __name__ == "__main__": + main()