大语言模型API的基本概念、调用方式和最佳实践
{ "headers": { "Authorization": "Bearer sk-...", // API密钥 "X-API-Version": "2025-01", // 版本控制 "X-Request-ID": "uuid-v4" // 请求追踪 } }
{ "model": "gpt-4-turbo-2025", "messages": [ {"role": "system", "content": "You are a helpful assistant"}, {"role": "user", "content": "Hello, how are you?"} ], "temperature": 0.7, "max_tokens": 1000, "response_format": { "type": "json_schema", "json_schema": {...} // 2025新增:结构化输出 } }
{ "id": "chatcmpl-xxx", "object": "chat.completion", "created": 1709000000, "choices": [{ "message": {"role": "assistant", "content": "..."}, "finish_reason": "stop" }], "usage": {"prompt_tokens": 10, "completion_tokens": 20} }
data: {"choices":[{"delta":{"content":"Hello"}}]} data: {"choices":[{"delta":{"content":" there"}}]} data: [DONE]
# 指数退避重试 def retry_with_backoff(func, max_retries=3): for i in range(max_retries): try: return func() except RateLimitError as e: if i == max_retries - 1: raise wait_time = (2 ** i) + random.uniform(0, 1) time.sleep(wait_time)