
    qid                     v   d Z ddlZddlZddlZddlmZ ddlmZmZ ddl	m
Z
mZmZmZmZ ddlZddlmZ ddlmZmZmZmZmZ ddlmZmZ dd	lmZmZ dd
lmZ ddl m!Z!m"Z"m#Z#m$Z$m%Z%m&Z& ddl'm(Z( ddl)m*Z* ddl+m,Z,m-Z- ddl.m/Z/ ddl0m1Z1m2Z2 ddl3mZ4 ddl3m5Z5m6Z6 ddl7m8Z8 e G d de5             Z9 G d de2      Z:y)zMBase LLM service implementation for services that use the AsyncOpenAI client.    N)asynccontextmanager)	dataclassfield)AnyDictListMappingOptional)logger)	NOT_GIVENAPITimeoutErrorAsyncOpenAIAsyncStreamDefaultAsyncHttpxClient)ChatCompletionChunkChatCompletionMessageParam)	BaseModelField)OpenAILLMInvocationParams)FrameLLMContextFrameLLMFullResponseEndFrameLLMFullResponseStartFrameLLMMessagesFrameLLMTextFrame)LLMTokenUsage)
LLMContext)OpenAILLMContextOpenAILLMContextFrame)FrameDirection)FunctionCallFromLLM
LLMServicer   )LLMSettings	_NotGiven)
traced_llmc                   6    e Zd ZU dZ ed       Zeez  ed<   y)OpenAILLMSettingsz~Settings for BaseOpenAILLMService.

    Parameters:
        max_completion_tokens: Maximum completion tokens to generate.
    c                      t         S N)
_NOT_GIVEN     R/opt/pipecat/venv/lib/python3.12/site-packages/pipecat/services/openai/base_llm.py<lambda>zOpenAILLMSettings.<lambda>:   s    : r-   default_factorymax_completion_tokensN)	__name__
__module____qualname____doc__r   r2   intr%   __annotations__r,   r-   r.   r(   r(   2   s     .3CU-V3?Vr-   r(   c                       e Zd ZU dZeZeed<    G d de      Zddddddddddddd	e	e
   d
e	ee
e
f      de	e
   de	e   de	e   de	e   de	e   f fdZ	 	 	 	 	 d$dZdefdZde
fdZd Zdedee   fdZdedefdZ	 	 d%deez  de	e   de	e
   de	e
   fdZdedee   fdZdedee   fdZedeez  fd        Z d!e!d"e"f fd#Z# xZ$S )&BaseOpenAILLMServicea  Base class for all services that use the AsyncOpenAI client.

    This service consumes OpenAILLMContextFrame or LLMContextFrame frames,
    which contain a reference to an OpenAILLMContext or LLMContext object. The
    context defines what is sent to the LLM for completion, including user,
    assistant, and system messages, as well as tool choices and function call
    configurations.
    	_settingsc                      e Zd ZU dZ ed dd      Zee   ed<    ed dd      Z	ee   ed<    ed	 d
      Z
ee   ed<    ed dd      Zee   ed<    edd
      Zee   ed<    ed dd      Zee   ed<    ed d      Zee   ed<    ed d      Zee   ed<    ed       Zee   ed<    ee      Zeeeef      ed<   y) BaseOpenAILLMService.InputParamsa  Input parameters for OpenAI model configuration.

        .. deprecated:: 0.0.105
            Use ``settings=OpenAILLMSettings(...)`` instead of
            ``params=InputParams(...)``.

        Parameters:
            frequency_penalty: Penalty for frequent tokens (-2.0 to 2.0).
            presence_penalty: Penalty for new tokens (-2.0 to 2.0).
            seed: Random seed for deterministic outputs.
            temperature: Sampling temperature (0.0 to 2.0).
            top_k: Top-k sampling parameter (currently ignored by OpenAI).
            top_p: Top-p (nucleus) sampling parameter (0.0 to 1.0).
            max_tokens: Maximum tokens in response (deprecated, use max_completion_tokens).
            max_completion_tokens: Maximum completion tokens to generate.
            service_tier: Service tier to use (e.g., "auto", "flex", "priority").
            extra: Additional model-specific parameters.
        c                      t         S r*   r#   r,   r-   r.   r/   z)BaseOpenAILLMService.InputParams.<lambda>_       I r-   g       g       @)r1   gelefrequency_penaltyc                      t         S r*   r#   r,   r-   r.   r/   z)BaseOpenAILLMService.InputParams.<lambda>b   r?   r-   presence_penaltyc                      t         S r*   r#   r,   r-   r.   r/   z)BaseOpenAILLMService.InputParams.<lambda>d   s    I r-   r   )r1   r@   seedc                      t         S r*   r#   r,   r-   r.   r/   z)BaseOpenAILLMService.InputParams.<lambda>e   s    Y r-   g        temperatureN)defaultr@   top_kc                      t         S r*   r#   r,   r-   r.   r/   z)BaseOpenAILLMService.InputParams.<lambda>i   s    y r-   g      ?top_pc                      t         S r*   r#   r,   r-   r.   r/   z)BaseOpenAILLMService.InputParams.<lambda>j   s    ) r-      
max_tokensc                      t         S r*   r#   r,   r-   r.   r/   z)BaseOpenAILLMService.InputParams.<lambda>k   s    Y r-   r2   c                      t         S r*   r#   r,   r-   r.   r/   z)BaseOpenAILLMService.InputParams.<lambda>l   s    I r-   r0   service_tierextra)r3   r4   r5   r6   r   rB   r
   floatr8   rD   rF   r7   rH   rJ   rL   rO   r2   rR   strdictrS   r   r   r,   r-   r.   InputParamsr=   J   s   	& .3-$3.
8E? 	
 -2-$3-
(5/ 	
 $4E!LhsmL',=NSV[^'_Xe__  %Ta8x}8!&7HSUX!YxY$):KPQ$R
HSMR/4EV[\/]x}]&+<M&NhsmN*/*ExS#X'Er-   rW   Ng      @F)modelapi_keybase_urlorganizationprojectdefault_headersrR   paramssettingsretry_timeout_secsretry_on_timeoutrX   r]   rR   r^   r_   r`   ra   c                   t        ddt        t        t        t        t        dt        t        ddi       }|||_        ||	s|j                  |_        |j                  |_        |j
                  |_        |j                  |_        |j                  |_        |j                  |_        |j                  |_	        t        |j                  t              r|j                  |_        |	|j                  |	       t        | <  d	d|i| || _        |
| _        || _        d| _         | j(                  d	|||||d|| _        | j,                  j.                  r/t1        j2                  |  d| j,                  j.                          yy)
a  Initialize the BaseOpenAILLMService.

        Args:
            model: The OpenAI model name to use (e.g., "gpt-4.1", "gpt-4o").

                .. deprecated:: 0.0.105
                    Use ``settings=OpenAILLMSettings(model=...)`` instead.

            api_key: OpenAI API key. If None, uses environment variable.
            base_url: Custom base URL for OpenAI API. If None, uses default.
            organization: OpenAI organization ID.
            project: OpenAI project ID.
            default_headers: Additional HTTP headers to include in requests.
            service_tier: Service tier to use (e.g., "auto", "flex", "priority").
            params: Input parameters for model configuration and behavior.

                .. deprecated:: 0.0.105
                    Use ``settings=OpenAILLMSettings(...)`` instead.

            settings: Runtime-updatable settings. When provided alongside deprecated
                parameters, ``settings`` values take precedence.
            retry_timeout_secs: Request timeout in seconds. Defaults to 5.0 seconds.
            retry_on_timeout: Whether to retry the request once if it times out.
            **kwargs: Additional arguments passed to the parent LLMService.
        zgpt-4oNF)rX   system_instructionrB   rD   rF   rH   rL   rJ   rO   r2   filter_incomplete_user_turnsuser_turn_completion_configrS   r_    )rY   rZ   r[   r\   r]   z: Using system instruction: r,   )r(   r   rX   rB   rD   rF   rH   rL   rO   r2   
isinstancerS   rV   apply_updatesuper__init___service_tier_retry_timeout_secs_retry_on_timeout_full_model_namecreate_client_clientr;   rc   r   debug)selfrX   rY   rZ   r[   r\   r]   rR   r^   r_   r`   ra   kwargsdefault_settings	__class__s                 r.   rj   zBaseOpenAILLMService.__init__o   s   T -#'&! "+).(,
" %*" h171I1I.060G0G-$*KK!+1+=+=(%+\\"*0*;*;'5;5Q5Q2&,,-)/ & ))(3 	
%	
	
 *#5 !1%')t)) 
%+
 
 >>,,LLD6!=dnn>_>_=`ab -r-   c                 d    t        ||||t        t        j                  ddd            |      S )a  Create an AsyncOpenAI client instance.

        Args:
            api_key: OpenAI API key.
            base_url: Custom base URL for the API.
            organization: OpenAI organization ID.
            project: OpenAI project ID.
            default_headers: Additional HTTP headers.
            **kwargs: Additional client configuration arguments.

        Returns:
            Configured AsyncOpenAI client instance.
        d   i  N)max_keepalive_connectionsmax_connectionskeepalive_expiry)limits)rY   rZ   r[   r\   http_clientr]   )r   r   httpxLimits)rr   rY   rZ   r[   r\   r]   rs   s          r.   ro   z"BaseOpenAILLMService.create_client   s>    , %/||.14Z^
 ,
 	
r-   returnc                      y)zCheck if this service can generate processing metrics.

        Returns:
            True, as OpenAI service supports metrics generation.
        Tr,   rr   s    r.   can_generate_metricsz)BaseOpenAILLMService.can_generate_metrics   s     r-   full_model_namec                     || _         y)zvSet the full AI model name.

        Args:
            full_model_name: The full name of the AI model to use.
        Nrn   )rr   r   s     r.   set_full_model_namez(BaseOpenAILLMService.set_full_model_name   s     !0r-   c                     | j                   S )zqGet the current full model name.

        Returns:
            The full name of the AI model being used.
        r   r   s    r.   get_full_model_namez(BaseOpenAILLMService.get_full_model_name  s     $$$r-   params_from_contextc                 X  K   | j                  |      }| j                  rZ	 t        j                   | j                  j
                  j                  j                  di || j                         d{   }|S  | j                  j
                  j                  j                  di | d{   }|S 7 @# t        t        j                  f$ rV t        j                  |  d        | j                  j
                  j                  j                  di | d{  7  }|cY S w xY w7 zw)ac  Get streaming chat completions from OpenAI API with optional timeout and retry.

        Args:
            params_from_context: Parameters, derived from the LLM context, to
                use for the chat completion. Contains messages, tools, and tool
                choice.

        Returns:
            Async stream of chat completion chunks.
        )timeoutNz): Retrying chat completion due to timeoutr,   )build_chat_completion_paramsrm   asynciowait_forrp   chatcompletionscreaterl   r   TimeoutErrorr   rq   )rr   r   r^   chunkss       r.   get_chat_completionsz)BaseOpenAILLMService.get_chat_completions  s	     223FG!!	&//8DLL%%1188B6BDLdLd    @4<<,,88??I&IIFM $W%9%9: v%NOPCt||00<<CCMfMMM	 JsT   D*AB6 3B44B6 94D*-D(.D*4B6 6A$D%DD%"D*$D%%D*c                    | j                   j                  dddi| j                   j                  | j                   j                  | j                   j                  | j                   j
                  | j                   j                  | j                   j                  | j                   j                  | j                  | j                  nt        d}|j                  |       |j                  | j                   j                         | j                   j                  rc|j                  dg       }|r/|d   j                  d      dk(  rt        j                   |  d       d| j                   j                  d	g|z   |d<   |S )
a  Build parameters for chat completion request.

        Subclasses can override this to customize parameters for different providers.

        Args:
            params_from_context: Parameters, derived from the LLM context, to
                use for the chat completion. Contains messages, tools, and tool
                choice.

        Returns:
            Dictionary of parameters for the chat completion request.
        Tinclude_usage)rX   streamstream_optionsrB   rD   rF   rH   rL   rO   r2   rR   messagesr   rolesystem\: Both system_instruction and a system message in context are set. Using system_instruction.r   content)r;   rX   rB   rD   rF   rH   rL   rO   r2   rk   r   updaterS   rc   getr   warning)rr   r   r^   r   s       r.   r   z1BaseOpenAILLMService.build_chat_completion_params*  s:    ^^)).5!%!A!A $ ? ?NN''>>55^^))..33%)^^%I%I262D2D2PD..V_
 	)*dnn**+ >>,,zz*b1HHQKOOF3x?f 1 1
 "dnn.O.OP""F: r-   contextrO   rc   c                   K   t        |t              r"| j                         }|j                  |      }n,t	        |j
                  |j                  |j                        }| j                  |      }d|d<   |j                  dd       |O|j                  dg       }|r/|d   j                  d      d	k(  rt        j                  |  d
       d	|dg|z   |d<   |d|v r||d<   n||d<    | j                  j                  j                  j                   di | d{   }|j"                  d   j$                  j&                  S 7 'w)al  Run a one-shot, out-of-band (i.e. out-of-pipeline) inference with the given LLM context.

        Args:
            context: The LLM context containing conversation history.
            max_tokens: Optional maximum number of tokens to generate. If provided,
                overrides the service's default max_tokens/max_completion_tokens setting.
            system_instruction: Optional system instruction to use for this inference.
                If provided, overrides any system instruction in the context.

        Returns:
            The LLM's response as a string, or None if no response is generated.
        r   toolstool_choiceFr   r   Nr   r   r   r   r   r   r2   rO   r,   )rg   r   get_llm_adapterget_llm_invocation_paramsr   r   r   r   r   popr   r   r   rp   r   r   r   choicesmessager   )	rr   r   rO   rc   adapterinvocation_paramsr^   r   responses	            r.   run_inferencez"BaseOpenAILLMService.run_inferenceX  s]    $ gz***,G;B;\;\< !: ))GL_L_!
 223DE !x

#T* )zz*b1HHQKOOF3x?f 1 1 ,4@R"S!TW_!_F: !&&02<./'1|$ >**66==GGG"**222 Hs   DEE(Ec                   K   t        j                  |  d|j                                 |j                         }|D ]x  }|j	                  d      dk(  s|d   j                         }t        j                  |      j                  d      }|j	                  dd      }d|d	d
dd| idg|d<   |d= |d= z t        ||j                  |j                        }| j                  |       d {   }|S 7 w)Nz,: Generating chat from LLM-specific context 	mime_typez
image/jpegdatazutf-8r   rf   text)typer   	image_urlurlzdata:image/jpeg;base64,)r   r   r   )r   rq   get_messages_for_loggingget_messagesr   readbase64	b64encodedecoder   r   r   r   )	rr   r   r   r   	raw_bytesencoded_imager   r^   r   s	            r.   )_stream_chat_completions_specific_contextz>BaseOpenAILLMService._stream_chat_completions_specific_context  s     	f@AaAaAc@de	
 6=5I5I5K   	)G{{;'<7#FO002	 & 0 0 ; B B7 K{{9b1#T2 +&+/F}o-V%W&	" FOK(	)" +W]]@S@S
 0088 9s   AC7BC7.C5/C7c                    K   | j                         }t        j                  |  d|j                  |              |j	                  |      }| j                  |       d {   }|S 7 w)Nz): Generating chat from universal context )r   r   rq   r   r   r   )rr   r   r   r^   r   s        r.   *_stream_chat_completions_universal_contextz?BaseOpenAILLMService._stream_chat_completions_universal_context  sm      &&(f=g>^>^_f>g=hi	
 -4,M,Mg,V0088 9s   AA*!A("A*c           	      
  K   g }g }g }d}d}d}d}| j                          d {    t        |t              r| j                  |      n| j	                  |       d {   }	t
        d        }
 |
|	      4 d {   }|2 3 d {   }|j                  r|j                  j                  r |j                  j                  j                  nd }|j                  j                  r |j                  j                  j                  nd }t        |j                  j                  |j                  j                  |j                  j                  ||      }| j                  |       d {    |j                   r8| j#                         |j                   k7  r| j%                  |j                          |j&                  t)        |j&                        dk(  rT| j+                          d {    |j&                  d   j,                  s|j&                  d   j,                  j.                  r|j&                  d   j,                  j.                  d   }|j0                  |k7  r>|j3                  |       |j3                  |       |j3                  |       d}d}d}|dz  }|j4                  r;|j4                  j6                  r%||j4                  j6                  z  }|j8                  }|j4                  sr|j4                  j:                  s||j4                  j:                  z  }|j&                  d   j,                  j<                  r<| j?                  |j&                  d   j,                  j<                         d {    tA        |j&                  d   j,                  d      s)|j&                  d   j,                  jB                  jE                  d      s]| jG                  tI        |j&                  d   j,                  jB                  d                d {    7 7 7 7 7 7 M7 7 6 d d d       d {  7   n# 1 d {  7  sw Y   nxY w|r|r|j3                  |       |j3                  |       |j3                  |       g }tK        |||      D ]9  \  }}}tM        jN                  |      }|j3                  tQ        ||||             ; | jS                  |       d {  7   y y y w)	Nr   rf   c                  K   | j                         }	 | t        |d      r|j                          d {    t        | d      r| j                          d {    y t        | d      r| j                          d {    y y 7 O7 -7 
# t        |d      r|j                          d {  7   t        | d      r| j                          d {  7   w t        | d      r| j                          d {  7   w w xY ww)Nacloseclose)	__aiter__hasattrr   r   )r   
chunk_iters     r.   _closingz7BaseOpenAILLMService._process_context.<locals>._closing  s     ))+J*   :x0$++---67+ ,,.((VX. --/)) /	 . )) :x0$++---67+ ,,.((VX. --/)) /sm   DB DB#DB
$DBD
DD D.B1/$DC%D9C<:DD)prompt_tokenscompletion_tokenstotal_tokenscache_read_input_tokensreasoning_tokensrN   audio
transcript)r   tool_call_idfunction_name	arguments)*start_ttfb_metricsrg   r   r   r   r   usageprompt_tokens_detailscached_tokenscompletion_tokens_detailsr   r   r   r   r   start_llm_usage_metricsrX   r   r   r   lenstop_ttfb_metricsdelta
tool_callsindexappendfunctionnameidr   r   _push_llm_textr   r   r   
push_framer   zipjsonloadsr!   run_function_calls)rr   r   functions_listarguments_listtool_id_listfunc_idxr   r   r   chunk_streamr   r   chunkr   r   tokens	tool_callfunction_callstool_ids                      r.   _process_contextz%BaseOpenAILLMService._process_context  sx    	%%'''
 '#34 ::7C@@I
 
	* 
	* L) D	d D	dZ) Cd Cde;; !;;<< 99GG! " !;;@@ ==NN! %
 +&+kk&?&?*/++*G*G%*[[%=%=0=)9F 66v>>>;;4#;#;#=#L,,U[[9==(C,>!,C,,...}}Q'--==#))44 !&a 0 6 6 A A! DI (2&--m<&--i8$++L9(*$&	') A ))i.@.@.E.E%););)@)@@'0|| ))i.@.@.J.J!Y%7%7%A%AA	]]1%++33--emmA.>.D.D.L.LMMM U]]1-33W=%--PQBRBXBXB^B^BbBb C //,u}}Q7G7M7M7S7ST`7a*bcccE 	(
6D	dCd& ? /B N dG  *D	d D	d D	d D	d D	dT Y!!-0!!),-N586 1y' !JJy1	%%' '%,&3"+		 )).999+ '=s   "TP'9TP*T;P-<T?QP=P0P=
CQ'P3(BQ)P6*DQ4QA3Q?P9 )Q+2QAQ P;!Q'T*T-T0P=3Q6Q9Q;Q=Q>T	Q
TQ"QQ"BT;S><Tframe	directionc                   K   t         |   ||       d{    d}t        |t              r|j                  }ngt        |t
              r|j                  }nJt        |t              r t        j                  |j                        }n| j                  ||       d{    |r	 | j                  t                      d{    | j                          d{    | j                  |       d{    | j'                          d{    | j                  t)                      d{    yy7 7 7 u7 _7 H# t        j                  $ r@}| j!                  d       d{  7   | j#                  d|       d{  7   Y d}~d}~wt$        $ r)}| j#                  d| |       d{  7   Y d}~d}~ww xY w7 7 # | j'                          d{  7   | j                  t)                      d{  7   w xY ww)aL  Process frames for LLM completion requests.

        Handles OpenAILLMContextFrame, LLMContextFrame, LLMMessagesFrame,
        and LLMUpdateSettingsFrame to trigger LLM completions and manage
        settings.

        Args:
            frame: The frame to process.
            direction: The direction of frame processing.
        Non_completion_timeoutzLLM completion timeout)	error_msg	exceptionzError during completion: )ri   process_framerg   r   r   r   r   r   from_messagesr   r   r   start_processing_metricsr   r}   TimeoutException_call_event_handler
push_error	Exceptionstop_processing_metricsr   )rr   r   r   r   eru   s        r.   r   z"BaseOpenAILLMService.process_frameJ  s     g#E9555e23mmG/mmG/0 '44U^^DG//%333Aoo&?&ABBB33555++G444 22444oo&=&?@@@ ! 	6 4 C54)) W../FGGGoo0HTUoVVV ^oo2KA30O[\o]]]^ 5@ 22444oo&=&?@@@s   HD2BHD5H%D= D7D= D9D= 2D;3D= 7H
G H+G,H5H7D= 9D= ;D= =G F$E'%F FFG G F;0F31F;6G ;G  G HHHG!H=H >HH)NNNNN)NN)%r3   r4   r5   r6   r(   Settingsr8   r   rW   r
   rU   r	   rT   boolrj   ro   r   r   r   r   r   r   r   rV   r   r   r   r7   r   r   r   r&   r   r   r    r   __classcell__)ru   s   @r.   r:   r:   =   s    !H  #Fi #FP  $7;&*(,04.1+0`c }`c "'#s("34`c sm`c %`c ,-`c %UO`c #4.`cH !
Fd 03 0%#<	(	)<,@Y ,^b ,b %),0	83..83 SM83 %SM	83
 
#83t ' 	(	) D!	(	) F:.>.K F: F:P'A 'A> 'A 'Ar-   r:   );r6   r   r   r   
contextlibr   dataclassesr   r   typingr   r   r   r	   r
   r}   logurur   openair   r   r   r   r   openai.types.chatr   r   pydanticr   r   )pipecat.adapters.services.open_ai_adapterr   pipecat.frames.framesr   r   r   r   r   r   pipecat.metrics.metricsr   *pipecat.processors.aggregators.llm_contextr   1pipecat.processors.aggregators.openai_llm_contextr   r   "pipecat.processors.frame_processorr    pipecat.services.llm_servicer!   r"   pipecat.services.settingsr+   r$   r%   (pipecat.utils.tracing.service_decoratorsr&   r(   r:   r,   r-   r.   <module>r     s    T    * ( 5 5    N % O  2 A > H = < ? W W WtA: tAr-   