a
    f2                    @  s  d dl mZ d dlZd dlmZmZmZmZmZm	Z	 d dl
mZ d dlZd dlZddlmZ ddlmZmZmZmZmZ ddlmZmZmZ dd	lmZ dd
lmZmZ ddlm Z m!Z! ddl"m#Z#m$Z$ ddl%m&Z& ddl'm(Z( ddl)m*Z* ddl+m,Z, ddl-m.Z. ddl/m0Z0 ddl1m2Z2 ddl3m4Z4 ddl5m6Z6 ddgZ7G dd deZ8G dd deZ9G dd dZ:G dd dZ;G dd dZ<G d d! d!Z=d"d#d$d%d&Z>dS )'    )annotationsN)DictListUnionIterableOptionaloverload)Literal   )_legacy_response)	NOT_GIVENBodyQueryHeadersNotGiven)required_argsmaybe_transformasync_maybe_transform)cached_property)SyncAPIResourceAsyncAPIResource)to_streamed_response_wrapper"async_to_streamed_response_wrapper)StreamAsyncStream)completion_create_params)make_request_options)	ChatModel)ChatCompletion)ChatCompletionChunk)ChatCompletionToolParam)ChatCompletionMessageParam) ChatCompletionStreamOptionsParam)#ChatCompletionToolChoiceOptionParamCompletionsAsyncCompletionsc                   @  s2  e Zd ZeddddZeddddZeeeeeeeeeeeeeeeeeeeeeeddded	d
dddddddddddddddddddddddddddd d!d"Zeeeeeeeeeeeeeeeeeeeeeddded#d
dd$ddddddddddddddddddddddddd%d&d'd"Zeeeeeeeeeeeeeeeeeeeeeddded#d
dd(ddddddddddddddddddddddddd)d&d*d"Ze	d+d,gg d-eeeeeeeeeeeeeeeeeeeeeddded	d
ddddddddddddddd.dddddddddddd)d d/d"ZdS )0r$   CompletionsWithRawResponsereturnc                 C  s   t | S N)r&   self r,   Z/var/www/ai-form-bot/venv/lib/python3.9/site-packages/openai/resources/chat/completions.pywith_raw_response%   s    zCompletions.with_raw_response CompletionsWithStreamingResponsec                 C  s   t | S r)   )r/   r*   r,   r,   r-   with_streaming_response)   s    z#Completions.with_streaming_responseNfrequency_penaltyfunction_call	functions
logit_biaslogprobs
max_tokensnparallel_tool_callspresence_penaltyresponse_formatseedservice_tierstopstreamstream_optionstemperaturetool_choicetoolstop_logprobstop_puserextra_headersextra_query
extra_bodytimeout$Iterable[ChatCompletionMessageParam]Union[str, ChatModel]Optional[float] | NotGiven0completion_create_params.FunctionCall | NotGiven6Iterable[completion_create_params.Function] | NotGiven#Optional[Dict[str, int]] | NotGivenOptional[bool] | NotGivenOptional[int] | NotGivenbool | NotGiven2completion_create_params.ResponseFormat | NotGiven/Optional[Literal['auto', 'default']] | NotGiven*Union[Optional[str], List[str]] | NotGiven#Optional[Literal[False]] | NotGiven5Optional[ChatCompletionStreamOptionsParam] | NotGiven.ChatCompletionToolChoiceOptionParam | NotGiven,Iterable[ChatCompletionToolParam] | NotGivenstr | NotGivenHeaders | NoneQuery | NoneBody | None'float | httpx.Timeout | None | NotGivenr   messagesmodelr2   r3   r4   r5   r6   r7   r8   r9   r:   r;   r<   r=   r>   r?   r@   rA   rB   rC   rD   rE   rF   rG   rH   rI   rJ   r(   c                C  s   dS a$  
        Creates a model response for the given chat conversation.

        Args:
          messages: A list of messages comprising the conversation so far.
              [Example Python code](https://cookbook.openai.com/examples/how_to_format_inputs_to_chatgpt_models).

          model: ID of the model to use. See the
              [model endpoint compatibility](https://platform.openai.com/docs/models/model-endpoint-compatibility)
              table for details on which models work with the Chat API.

          frequency_penalty: Number between -2.0 and 2.0. Positive values penalize new tokens based on their
              existing frequency in the text so far, decreasing the model's likelihood to
              repeat the same line verbatim.

              [See more information about frequency and presence penalties.](https://platform.openai.com/docs/guides/text-generation/parameter-details)

          function_call: Deprecated in favor of `tool_choice`.

              Controls which (if any) function is called by the model. `none` means the model
              will not call a function and instead generates a message. `auto` means the model
              can pick between generating a message or calling a function. Specifying a
              particular function via `{"name": "my_function"}` forces the model to call that
              function.

              `none` is the default when no functions are present. `auto` is the default if
              functions are present.

          functions: Deprecated in favor of `tools`.

              A list of functions the model may generate JSON inputs for.

          logit_bias: Modify the likelihood of specified tokens appearing in the completion.

              Accepts a JSON object that maps tokens (specified by their token ID in the
              tokenizer) to an associated bias value from -100 to 100. Mathematically, the
              bias is added to the logits generated by the model prior to sampling. The exact
              effect will vary per model, but values between -1 and 1 should decrease or
              increase likelihood of selection; values like -100 or 100 should result in a ban
              or exclusive selection of the relevant token.

          logprobs: Whether to return log probabilities of the output tokens or not. If true,
              returns the log probabilities of each output token returned in the `content` of
              `message`.

          max_tokens: The maximum number of [tokens](/tokenizer) that can be generated in the chat
              completion.

              The total length of input tokens and generated tokens is limited by the model's
              context length.
              [Example Python code](https://cookbook.openai.com/examples/how_to_count_tokens_with_tiktoken)
              for counting tokens.

          n: How many chat completion choices to generate for each input message. Note that
              you will be charged based on the number of generated tokens across all of the
              choices. Keep `n` as `1` to minimize costs.

          parallel_tool_calls: Whether to enable
              [parallel function calling](https://platform.openai.com/docs/guides/function-calling/parallel-function-calling)
              during tool use.

          presence_penalty: Number between -2.0 and 2.0. Positive values penalize new tokens based on
              whether they appear in the text so far, increasing the model's likelihood to
              talk about new topics.

              [See more information about frequency and presence penalties.](https://platform.openai.com/docs/guides/text-generation/parameter-details)

          response_format: An object specifying the format that the model must output. Compatible with
              [GPT-4o](https://platform.openai.com/docs/models/gpt-4o),
              [GPT-4o mini](https://platform.openai.com/docs/models/gpt-4o-mini),
              [GPT-4 Turbo](https://platform.openai.com/docs/models/gpt-4-and-gpt-4-turbo) and
              all GPT-3.5 Turbo models newer than `gpt-3.5-turbo-1106`.

              Setting to `{ "type": "json_schema", "json_schema": {...} }` enables Structured
              Outputs which guarantees the model will match your supplied JSON schema. Learn
              more in the
              [Structured Outputs guide](https://platform.openai.com/docs/guides/structured-outputs).

              Setting to `{ "type": "json_object" }` enables JSON mode, which guarantees the
              message the model generates is valid JSON.

              **Important:** when using JSON mode, you **must** also instruct the model to
              produce JSON yourself via a system or user message. Without this, the model may
              generate an unending stream of whitespace until the generation reaches the token
              limit, resulting in a long-running and seemingly "stuck" request. Also note that
              the message content may be partially cut off if `finish_reason="length"`, which
              indicates the generation exceeded `max_tokens` or the conversation exceeded the
              max context length.

          seed: This feature is in Beta. If specified, our system will make a best effort to
              sample deterministically, such that repeated requests with the same `seed` and
              parameters should return the same result. Determinism is not guaranteed, and you
              should refer to the `system_fingerprint` response parameter to monitor changes
              in the backend.

          service_tier: Specifies the latency tier to use for processing the request. This parameter is
              relevant for customers subscribed to the scale tier service:

              - If set to 'auto', the system will utilize scale tier credits until they are
                exhausted.
              - If set to 'default', the request will be processed using the default service
                tier with a lower uptime SLA and no latency guarentee.
              - When not set, the default behavior is 'auto'.

              When this parameter is set, the response body will include the `service_tier`
              utilized.

          stop: Up to 4 sequences where the API will stop generating further tokens.

          stream: If set, partial message deltas will be sent, like in ChatGPT. Tokens will be
              sent as data-only
              [server-sent events](https://developer.mozilla.org/en-US/docs/Web/API/Server-sent_events/Using_server-sent_events#Event_stream_format)
              as they become available, with the stream terminated by a `data: [DONE]`
              message.
              [Example Python code](https://cookbook.openai.com/examples/how_to_stream_completions).

          stream_options: Options for streaming response. Only set this when you set `stream: true`.

          temperature: What sampling temperature to use, between 0 and 2. Higher values like 0.8 will
              make the output more random, while lower values like 0.2 will make it more
              focused and deterministic.

              We generally recommend altering this or `top_p` but not both.

          tool_choice: Controls which (if any) tool is called by the model. `none` means the model will
              not call any tool and instead generates a message. `auto` means the model can
              pick between generating a message or calling one or more tools. `required` means
              the model must call one or more tools. Specifying a particular tool via
              `{"type": "function", "function": {"name": "my_function"}}` forces the model to
              call that tool.

              `none` is the default when no tools are present. `auto` is the default if tools
              are present.

          tools: A list of tools the model may call. Currently, only functions are supported as a
              tool. Use this to provide a list of functions the model may generate JSON inputs
              for. A max of 128 functions are supported.

          top_logprobs: An integer between 0 and 20 specifying the number of most likely tokens to
              return at each token position, each with an associated log probability.
              `logprobs` must be set to `true` if this parameter is used.

          top_p: An alternative to sampling with temperature, called nucleus sampling, where the
              model considers the results of the tokens with top_p probability mass. So 0.1
              means only the tokens comprising the top 10% probability mass are considered.

              We generally recommend altering this or `temperature` but not both.

          user: A unique identifier representing your end-user, which can help OpenAI to monitor
              and detect abuse.
              [Learn more](https://platform.openai.com/docs/guides/safety-best-practices/end-user-ids).

          extra_headers: Send extra headers

          extra_query: Add additional query parameters to the request

          extra_body: Add additional JSON properties to the request

          timeout: Override the client-level default timeout for this request, in seconds
        Nr,   r+   ra   rb   r2   r3   r4   r5   r6   r7   r8   r9   r:   r;   r<   r=   r>   r?   r@   rA   rB   rC   rD   rE   rF   rG   rH   rI   rJ   r,   r,   r-   create-   s     CzCompletions.creater2   r3   r4   r5   r6   r7   r8   r9   r:   r;   r<   r=   r>   r@   rA   rB   rC   rD   rE   rF   rG   rH   rI   rJ   Literal[True]zStream[ChatCompletionChunk]ra   rb   r?   r2   r3   r4   r5   r6   r7   r8   r9   r:   r;   r<   r=   r>   r@   rA   rB   rC   rD   rE   rF   rG   rH   rI   rJ   r(   c                C  s   dS a$  
        Creates a model response for the given chat conversation.

        Args:
          messages: A list of messages comprising the conversation so far.
              [Example Python code](https://cookbook.openai.com/examples/how_to_format_inputs_to_chatgpt_models).

          model: ID of the model to use. See the
              [model endpoint compatibility](https://platform.openai.com/docs/models/model-endpoint-compatibility)
              table for details on which models work with the Chat API.

          stream: If set, partial message deltas will be sent, like in ChatGPT. Tokens will be
              sent as data-only
              [server-sent events](https://developer.mozilla.org/en-US/docs/Web/API/Server-sent_events/Using_server-sent_events#Event_stream_format)
              as they become available, with the stream terminated by a `data: [DONE]`
              message.
              [Example Python code](https://cookbook.openai.com/examples/how_to_stream_completions).

          frequency_penalty: Number between -2.0 and 2.0. Positive values penalize new tokens based on their
              existing frequency in the text so far, decreasing the model's likelihood to
              repeat the same line verbatim.

              [See more information about frequency and presence penalties.](https://platform.openai.com/docs/guides/text-generation/parameter-details)

          function_call: Deprecated in favor of `tool_choice`.

              Controls which (if any) function is called by the model. `none` means the model
              will not call a function and instead generates a message. `auto` means the model
              can pick between generating a message or calling a function. Specifying a
              particular function via `{"name": "my_function"}` forces the model to call that
              function.

              `none` is the default when no functions are present. `auto` is the default if
              functions are present.

          functions: Deprecated in favor of `tools`.

              A list of functions the model may generate JSON inputs for.

          logit_bias: Modify the likelihood of specified tokens appearing in the completion.

              Accepts a JSON object that maps tokens (specified by their token ID in the
              tokenizer) to an associated bias value from -100 to 100. Mathematically, the
              bias is added to the logits generated by the model prior to sampling. The exact
              effect will vary per model, but values between -1 and 1 should decrease or
              increase likelihood of selection; values like -100 or 100 should result in a ban
              or exclusive selection of the relevant token.

          logprobs: Whether to return log probabilities of the output tokens or not. If true,
              returns the log probabilities of each output token returned in the `content` of
              `message`.

          max_tokens: The maximum number of [tokens](/tokenizer) that can be generated in the chat
              completion.

              The total length of input tokens and generated tokens is limited by the model's
              context length.
              [Example Python code](https://cookbook.openai.com/examples/how_to_count_tokens_with_tiktoken)
              for counting tokens.

          n: How many chat completion choices to generate for each input message. Note that
              you will be charged based on the number of generated tokens across all of the
              choices. Keep `n` as `1` to minimize costs.

          parallel_tool_calls: Whether to enable
              [parallel function calling](https://platform.openai.com/docs/guides/function-calling/parallel-function-calling)
              during tool use.

          presence_penalty: Number between -2.0 and 2.0. Positive values penalize new tokens based on
              whether they appear in the text so far, increasing the model's likelihood to
              talk about new topics.

              [See more information about frequency and presence penalties.](https://platform.openai.com/docs/guides/text-generation/parameter-details)

          response_format: An object specifying the format that the model must output. Compatible with
              [GPT-4o](https://platform.openai.com/docs/models/gpt-4o),
              [GPT-4o mini](https://platform.openai.com/docs/models/gpt-4o-mini),
              [GPT-4 Turbo](https://platform.openai.com/docs/models/gpt-4-and-gpt-4-turbo) and
              all GPT-3.5 Turbo models newer than `gpt-3.5-turbo-1106`.

              Setting to `{ "type": "json_schema", "json_schema": {...} }` enables Structured
              Outputs which guarantees the model will match your supplied JSON schema. Learn
              more in the
              [Structured Outputs guide](https://platform.openai.com/docs/guides/structured-outputs).

              Setting to `{ "type": "json_object" }` enables JSON mode, which guarantees the
              message the model generates is valid JSON.

              **Important:** when using JSON mode, you **must** also instruct the model to
              produce JSON yourself via a system or user message. Without this, the model may
              generate an unending stream of whitespace until the generation reaches the token
              limit, resulting in a long-running and seemingly "stuck" request. Also note that
              the message content may be partially cut off if `finish_reason="length"`, which
              indicates the generation exceeded `max_tokens` or the conversation exceeded the
              max context length.

          seed: This feature is in Beta. If specified, our system will make a best effort to
              sample deterministically, such that repeated requests with the same `seed` and
              parameters should return the same result. Determinism is not guaranteed, and you
              should refer to the `system_fingerprint` response parameter to monitor changes
              in the backend.

          service_tier: Specifies the latency tier to use for processing the request. This parameter is
              relevant for customers subscribed to the scale tier service:

              - If set to 'auto', the system will utilize scale tier credits until they are
                exhausted.
              - If set to 'default', the request will be processed using the default service
                tier with a lower uptime SLA and no latency guarentee.
              - When not set, the default behavior is 'auto'.

              When this parameter is set, the response body will include the `service_tier`
              utilized.

          stop: Up to 4 sequences where the API will stop generating further tokens.

          stream_options: Options for streaming response. Only set this when you set `stream: true`.

          temperature: What sampling temperature to use, between 0 and 2. Higher values like 0.8 will
              make the output more random, while lower values like 0.2 will make it more
              focused and deterministic.

              We generally recommend altering this or `top_p` but not both.

          tool_choice: Controls which (if any) tool is called by the model. `none` means the model will
              not call any tool and instead generates a message. `auto` means the model can
              pick between generating a message or calling one or more tools. `required` means
              the model must call one or more tools. Specifying a particular tool via
              `{"type": "function", "function": {"name": "my_function"}}` forces the model to
              call that tool.

              `none` is the default when no tools are present. `auto` is the default if tools
              are present.

          tools: A list of tools the model may call. Currently, only functions are supported as a
              tool. Use this to provide a list of functions the model may generate JSON inputs
              for. A max of 128 functions are supported.

          top_logprobs: An integer between 0 and 20 specifying the number of most likely tokens to
              return at each token position, each with an associated log probability.
              `logprobs` must be set to `true` if this parameter is used.

          top_p: An alternative to sampling with temperature, called nucleus sampling, where the
              model considers the results of the tokens with top_p probability mass. So 0.1
              means only the tokens comprising the top 10% probability mass are considered.

              We generally recommend altering this or `temperature` but not both.

          user: A unique identifier representing your end-user, which can help OpenAI to monitor
              and detect abuse.
              [Learn more](https://platform.openai.com/docs/guides/safety-best-practices/end-user-ids).

          extra_headers: Send extra headers

          extra_query: Add additional query parameters to the request

          extra_body: Add additional JSON properties to the request

          timeout: Override the client-level default timeout for this request, in seconds
        Nr,   r+   ra   rb   r?   r2   r3   r4   r5   r6   r7   r8   r9   r:   r;   r<   r=   r>   r@   rA   rB   rC   rD   rE   rF   rG   rH   rI   rJ   r,   r,   r-   re      s     Cboolz,ChatCompletion | Stream[ChatCompletionChunk]c                C  s   dS ri   r,   rj   r,   r,   r-   re     s     Cra   rb   ra   rb   r?   3Optional[Literal[False]] | Literal[True] | NotGivenc                C  sj   t | | jdt|||||||||	|
|||||||||||||dtjt||||dt|p^dtt dS Nz/chat/completions)ra   rb   r2   r3   r4   r5   r6   r7   r8   r9   r:   r;   r<   r=   r>   r?   r@   rA   rB   rC   rD   rE   rF   )rG   rH   rI   rJ   F)bodyoptionsZcast_tor?   Z
stream_cls)	validate_response_format_postr   r   CompletionCreateParamsr   r   r   r   rd   r,   r,   r-   re   y  sJ    "ݩ
__name__
__module____qualname__r   r.   r0   r   r   re   r   r,   r,   r,   r-   r$   $   s   J DJ DJ Dc                   @  s2  e Zd ZeddddZeddddZeeeeeeeeeeeeeeeeeeeeeeddded	d
dddddddddddddddddddddddddddd d!d"Zeeeeeeeeeeeeeeeeeeeeeddded#d
dd$ddddddddddddddddddddddddd%d&d'd"Zeeeeeeeeeeeeeeeeeeeeeddded#d
dd(ddddddddddddddddddddddddd)d&d*d"Ze	d+d,gg d-eeeeeeeeeeeeeeeeeeeeeddded	d
ddddddddddddddd.dddddddddddd)d d/d"ZdS )0r%   AsyncCompletionsWithRawResponser'   c                 C  s   t | S r)   )rx   r*   r,   r,   r-   r.     s    z"AsyncCompletions.with_raw_response%AsyncCompletionsWithStreamingResponsec                 C  s   t | S r)   )ry   r*   r,   r,   r-   r0     s    z(AsyncCompletions.with_streaming_responseNr1   rK   rL   rM   rN   rO   rP   rQ   rR   rS   rT   rU   rV   rW   rX   rY   rZ   r[   r\   r]   r^   r_   r   r`   c                  s   dS rc   r,   rd   r,   r,   r-   re     s     CzAsyncCompletions.createrf   rg   z AsyncStream[ChatCompletionChunk]rh   c                  s   dS ri   r,   rj   r,   r,   r-   re     s     Crk   z1ChatCompletion | AsyncStream[ChatCompletionChunk]c                  s   dS ri   r,   rj   r,   r,   r-   re   T  s     Cra   rb   rl   rm   c                  sv   t | | jdt|||||||||	|
|||||||||||||dtjI d H t||||dt|pddtt dI d H S rn   )	rq   rr   r   r   rs   r   r   r   r   rd   r,   r,   r-   re     sJ    "rt   r,   r,   r,   r-   r%     s   J DJ DJ Dc                   @  s   e Zd ZdddddZdS )r&   r$   Nonecompletionsr(   c                 C  s   || _ t|j| _d S r)   )_completionsr   Zto_raw_response_wrapperre   r+   r|   r,   r,   r-   __init__c  s    z#CompletionsWithRawResponse.__init__Nru   rv   rw   r   r,   r,   r,   r-   r&   b  s   r&   c                   @  s   e Zd ZdddddZdS )rx   r%   rz   r{   c                 C  s   || _ t|j| _d S r)   )r}   r   Zasync_to_raw_response_wrapperre   r~   r,   r,   r-   r   l  s    z(AsyncCompletionsWithRawResponse.__init__Nr   r,   r,   r,   r-   rx   k  s   rx   c                   @  s   e Zd ZdddddZdS )r/   r$   rz   r{   c                 C  s   || _ t|j| _d S r)   )r}   r   re   r~   r,   r,   r-   r   u  s    z)CompletionsWithStreamingResponse.__init__Nr   r,   r,   r,   r-   r/   t  s   r/   c                   @  s   e Zd ZdddddZdS )ry   r%   rz   r{   c                 C  s   || _ t|j| _d S r)   )r}   r   re   r~   r,   r,   r-   r   ~  s    z.AsyncCompletionsWithStreamingResponse.__init__Nr   r,   r,   r,   r-   ry   }  s   ry   objectrz   )r;   r(   c                 C  s"   t | rt| tjrtdd S )NzzYou tried to pass a `BaseModel` class to `chat.completions.create()`; You must use `beta.chat.completions.parse()` instead)inspectisclass
issubclasspydanticZ	BaseModel	TypeError)r;   r,   r,   r-   rq     s    rq   )?
__future__r   r   typingr   r   r   r   r   r   typing_extensionsr	   Zhttpxr    r   _typesr   r   r   r   r   Z_utilsr   r   r   _compatr   Z	_resourcer   r   	_responser   r   Z
_streamingr   r   Z
types.chatr   Z_base_clientr   Ztypes.chat_modelr   Ztypes.chat.chat_completionr   Z types.chat.chat_completion_chunkr   Z%types.chat.chat_completion_tool_paramr    Z(types.chat.chat_completion_message_paramr!   Z/types.chat.chat_completion_stream_options_paramr"   Z3types.chat.chat_completion_tool_choice_option_paramr#   __all__r$   r%   r&   rx   r/   ry   rq   r,   r,   r,   r-   <module>   sN         $     $				