
    qi>                         d Z ddlmZ ddlmZ ddlmZ ddlmZ ddl	m
Z
 ddlmZ ddlmZ dd	lmZ e G d
 de             Z G d de      Zy)zNVIDIA NIM API service implementation.

This module provides a service for interacting with NVIDIA's NIM (NVIDIA Inference
Microservice) API while maintaining compatibility with the OpenAI-style interface.
    )	dataclass)Optional)LLMTokenUsage)
LLMContext)OpenAILLMContext)OpenAILLMSettings)OpenAILLMService)_warn_deprecated_paramc                       e Zd ZdZy)NvidiaLLMSettingszSettings for NvidiaLLMService.N)__name__
__module____qualname____doc__     M/opt/pipecat/venv/lib/python3.12/site-packages/pipecat/services/nvidia/llm.pyr   r      s    (r   r   c            
       |     e Zd ZU dZeZeed<   dddddededee   d	ee   f fd
Z	de
ez  f fdZdefdZ xZS )NvidiaLLMServicea]  A service for interacting with NVIDIA's NIM (NVIDIA Inference Microservice) API.

    This service extends OpenAILLMService to work with NVIDIA's NIM API while maintaining
    compatibility with the OpenAI-style interface. It specifically handles the difference
    in token usage reporting between NIM (incremental) and OpenAI (final summary).
    	_settingsz#https://integrate.api.nvidia.com/v1N)base_urlmodelsettingsapi_keyr   r   r   c                    t        d      }|t        dt         d       ||_        ||j                  |       t	        |   d|||d| d| _        d| _        d| _        d| _	        d| _
        y)	a  Initialize the NvidiaLLMService.

        Args:
            api_key: The API key for accessing NVIDIA's NIM API.
            base_url: The base URL for NIM API. Defaults to "https://integrate.api.nvidia.com/v1".
            model: The model identifier to use. Defaults to
                "nvidia/llama-3.1-nemotron-70b-instruct".

                .. deprecated:: 0.0.105
                    Use ``settings=OpenAILLMSettings(model=...)`` instead.

            settings: Runtime-updatable settings. When provided alongside deprecated
                parameters, ``settings`` values take precedence.
            **kwargs: Additional keyword arguments passed to OpenAILLMService.
        z&nvidia/llama-3.1-nemotron-70b-instruct)r   Nr   )r   r   r   r   Fr   )r   r
   r   apply_updatesuper__init___prompt_tokens_completion_tokens_total_tokens_has_reported_prompt_tokens_is_processing)selfr   r   r   r   kwargsdefault_settings	__class__s          r   r   zNvidiaLLMService.__init__*   s    2 -3[\ "7,=wG%*"
 ))(3a8FVaZ`a"#+0(#r   contextc                   K   d| _         d| _        d| _        d| _        d| _        	 t
        |   |       d{    d| _        | j                   dkD  s| j                  dkD  rb| j                   | j                  z   | _        t        | j                   | j                  | j                        }t
        | !  |       d{    yy7 7 # d| _        | j                   dkD  s| j                  dkD  rc| j                   | j                  z   | _        t        | j                   | j                  | j                        }t
        | !  |       d{  7   w w xY ww)a  Process a context through the LLM and accumulate token usage metrics.

        This method overrides the parent class implementation to handle NVIDIA's
        incremental token reporting style, accumulating the counts and reporting
        them once at the end of processing.

        Args:
            context: The context to process, containing messages and other information
                needed for the LLM interaction.
        r   FTN)prompt_tokenscompletion_tokenstotal_tokens)	r   r    r!   r"   r#   r   _process_contextr   start_llm_usage_metrics)r$   r(   tokensr'   s      r   r-   z!NvidiaLLMService._process_contextX   sO      "#+0("	>'*7333"'D""Q&$*A*AA*E%)%8%84;R;R%R"&"&"5"5&*&=&=!%!3!3
 g5f=== +F	 4 > #(D""Q&$*A*AA*E%)%8%84;R;R%R"&"&"5"5&*&=&=!%!3!3
 g5f=== +FsK   $EC CC BE C	EC 	EBEEEEr/   c                    K   | j                   sy| j                  s'|j                  dkD  r|j                  | _        d| _        |j                  | j
                  kD  r|j                  | _        yyw)a  Accumulate token usage metrics during processing.

        This method intercepts the incremental token updates from NVIDIA's API
        and accumulates them instead of passing each update to the metrics system.
        The final accumulated totals are reported at the end of processing.

        Args:
            tokens: The token usage metrics for the current chunk of processing,
                containing prompt_tokens and completion_tokens counts.
        Nr   T)r#   r"   r*   r   r+   r    )r$   r/   s     r   r.   z(NvidiaLLMService.start_llm_usage_metricsx   sk      "" //F4H4H14L"("6"6D/3D, ##d&=&==&,&>&>D# >s   A-A/)r   r   r   r   r   Settings__annotations__strr   r   r   r   r-   r   r.   __classcell__)r'   s   @r   r   r      sw     !H   >#04,$ ,$ 	,$
 },$ ,-,$\>.>.K >@?M ?r   r   N)r   dataclassesr   typingr   pipecat.metrics.metricsr   *pipecat.processors.aggregators.llm_contextr   1pipecat.processors.aggregators.openai_llm_contextr    pipecat.services.openai.base_llmr   pipecat.services.openai.llmr	   pipecat.services.settingsr
   r   r   r   r   r   <module>r=      sO    "  1 A N > 8 < 	) 	 	o?' o?r   