import os from pprint import pprint from openai import OpenAI from openai._types import Timeout from openai.types.chat import ChatCompletionUserMessageParam, ChatCompletionSystemMessageParam def main(): client = OpenAI( api_key=os.environ.get("OPENAI_API_KEY", "sk-dummy"), base_url=os.environ.get("OPENAI_API_BASE", "http://localhost:11434/v1"), timeout=Timeout(30, connect=5), ) response = client.chat.completions.create( model=os.environ.get("OPENAI_MODEL", "qwen-math-turbo"), messages=[ ChatCompletionSystemMessageParam(role="system", content="You are a useful assistant."), # ChatCompletionUserMessageParam(**{"role": "user", "content": "Hello!"}), ChatCompletionUserMessageParam(role="user", content="Hello!"), ], stream=True, # stream_options={"include_usage": True}, ) print() pprint(vars(response)) print() # print(response.choices[0].message.content) for chunk in response: print('xx') if chunk.choices: content = chunk.choices[0].delta.content or "" print('CHUNK:', content, end="", flush=True) # content_parts.append(content) elif chunk.usage: print("\n--- 请求用量 ---") print(f"输入 Tokens: {chunk.usage.prompt_tokens}") print(f"输出 Tokens: {chunk.usage.completion_tokens}") print(f"总计 Tokens: {chunk.usage.total_tokens}") if __name__ == "__main__": main()