import os from datetime import datetime from pprint import pprint from openai import OpenAI from openai._types import Timeout from openai.types.chat import ChatCompletionUserMessageParam, ChatCompletionSystemMessageParam, \ ChatCompletionStreamOptionsParam def main(): client = OpenAI( api_key=os.environ.get("OPENAI_API_KEY", "sk-dummy"), base_url=os.environ.get("OPENAI_API_BASE", "http://localhost:11434/v1"), timeout=Timeout(10, connect=5), ) print("Start", datetime.now()) try: stream = client.chat.completions.create( model=os.environ.get("OPENAI_MODEL", "qwen-math-turbo"), messages=[ ChatCompletionSystemMessageParam(role="system", content="You are a useful assistant."), # ChatCompletionUserMessageParam(**{"role": "user", "content": "Hello!"}), ChatCompletionUserMessageParam(role="user", content="Introduce your self."), ], stream=True, stream_options=ChatCompletionStreamOptionsParam(include_usage=True), ) print() pprint(vars(stream)) print() content_parts = [] print("Begin chunk", datetime.now()) for chunk in stream: print("Chunk", datetime.now()) if chunk.choices: content = chunk.choices[0].delta.content or "" print('CHUNK:', content, flush=True) content_parts.append(content) elif chunk.usage: print("\n----- Usage -----") print(f"Input Tokens: {chunk.usage.prompt_tokens}") print(f"Output Tokens: {chunk.usage.completion_tokens}") print(f"Total Tokens: {chunk.usage.total_tokens}") print("") print("Finally:", "".join(content_parts)) except Exception as e: print("Exception", datetime.now(), e) finally: print("End", datetime.now()) if __name__ == "__main__": main()