
    qi`                        d Z ddlZddlmZmZ ddlmZmZmZ erddl	m
Z
 ddlmZ ddlmZmZ dZd	Zd
ZdZdZdZdZe G d d             Ze G d d             Ze G d d             Ze G d d             Z G d d      Zy)zUtility for context summarization in LLM services.

This module provides reusable functionality for automatically compressing conversation
context when token limits are reached, enabling efficient long-running conversations.
    N)	dataclassfield)TYPE_CHECKINGListOptional)
LLMService)logger)
LLMContextLLMSpecificMessageg      ^@   
   i  皙?d   a  You are summarizing a conversation between a user and an AI assistant.

Your task:
1. Create a concise summary that preserves:
   - Key facts, decisions, and agreements
   - Important context needed to continue the conversation
   - User preferences and requirements mentioned
   - Any unresolved questions or action items

2. Format:
   - Use clear, factual statements
   - Group related information
   - Prioritize information likely to be referenced later
   - Keep the summary concise to fit within the specified token budget

3. Omit:
   - Greetings and small talk
   - Redundant information
   - Tangential discussions that were resolved

The conversation transcript follows. Generate only the summary, no other text.c                       e Zd ZU dZdZeed<   dZeed<   dZe	e
   ed<   dZe
ed	<   dZe	d
   ed<   eZeed<   d Zede
fd       Zy)LLMContextSummaryConfigaJ  Configuration for summary generation parameters.

    Contains settings that control how a summary is generated. Used by both
    automatic and manual summarization modes.

    Parameters:
        target_context_tokens: Maximum token size for the generated summary.
            This value is passed directly to the LLM as the max_tokens parameter
            when generating the summary. Should be sized appropriately to allow
            the summary plus recent preserved messages to fit within reasonable
            context limits.
        min_messages_after_summary: Number of recent messages to preserve
            uncompressed after each summarization. These messages maintain
            immediate conversational context.
        summarization_prompt: Custom prompt for the LLM to use when generating
            summaries. If None, uses DEFAULT_SUMMARIZATION_PROMPT.
        summary_message_template: Template for formatting the summary when
            injected into context. Must contain ``{summary}`` as a placeholder
            for the generated summary text. Allows applications to wrap the
            summary in custom delimiters (e.g., XML tags) so that system
            prompts can distinguish summaries from live conversation.
        llm: Optional separate LLM service for generating summaries. When set,
            summarization requests are sent to this service instead of the
            pipeline's primary LLM. Useful for routing summarization to a
            cheaper/faster model (e.g., Gemini Flash) while keeping an
            expensive model for conversation. If None, uses the pipeline LLM.
        summarization_timeout: Maximum time in seconds to wait for the LLM to
            generate a summary. If the call exceeds this timeout, summarization
            is aborted with an error and future summarizations are unblocked.
    p  target_context_tokensr   min_messages_after_summaryNsummarization_promptConversation summary: {summary}summary_message_templater   llmsummarization_timeoutc                 l    | j                   dk  rt        d      | j                  dk  rt        d      y)"Validate configuration parameters.r   &target_context_tokens must be positivez/min_messages_after_summary must be non-negativeN)r   
ValueErrorr   selfs    a/opt/pipecat/venv/lib/python3.12/site-packages/pipecat/utils/context/llm_context_summarization.py__post_init__z%LLMContextSummaryConfig.__post_init__a   s;    %%*EFF**Q.NOO /    returnc                 *    | j                   xs t        S zGet the summarization prompt to use.

        Returns:
            The custom prompt if set, otherwise the default summarization prompt.
        r   DEFAULT_SUMMARIZATION_PROMPTr   s    r    summary_promptz&LLMContextSummaryConfig.summary_prompth        ((H,HHr"   )__name__
__module____qualname____doc__r   int__annotations__r   r   r   strr   r   DEFAULT_SUMMARIZATION_TIMEOUTr   floatr!   propertyr(    r"   r    r   r   9   sw    > "&3%&''*.(3-.$EcE"&C,	&#@5@P I I Ir"   r   c                   \    e Zd ZU dZdZee   ed<   dZee   ed<    e	e
      Ze
ed<   d Zy	)
!LLMAutoContextSummarizationConfigu  Configuration for automatic context summarization.

    Controls when conversation context is automatically compressed and how
    that summary is generated. Summarization is triggered when either the
    token limit or the unsummarized message count threshold is exceeded.

    At least one of ``max_context_tokens`` and ``max_unsummarized_messages``
    must be set. Set the other to ``None`` to disable that threshold.

    Parameters:
        max_context_tokens: Maximum allowed context size in tokens. When this
            limit is reached, summarization is triggered to compress the context.
            The tokens are calculated using the industry-standard approximation
            of 1 token ≈ 4 characters. Set to ``None`` to disable token-based
            triggering.
        max_unsummarized_messages: Maximum number of new messages that can
            accumulate since the last summary before triggering a new
            summarization. This ensures regular compression even if token
            limits are not reached. Set to ``None`` to disable message-count
            triggering.
        summary_config: Configuration for summary generation parameters
            (prompt, token budget, messages to keep). If not provided, uses
            default ``LLMContextSummaryConfig`` values.
    @  max_context_tokens   max_unsummarized_messages)default_factorysummary_configc                    | j                   | j                  t        d      | j                   | j                   dk  rt        d      | j                  | j                  dk  rt        d      | j                   L| j                  j                  | j                   kD  r(t        | j                   dz        | j                  _        yyy)r   NLAt least one of max_context_tokens and max_unsummarized_messages must be setr   #max_context_tokens must be positive   ,max_unsummarized_messages must be at least 1r   )r8   r:   r   r<   r   r.   r   s    r    r!   z/LLMAutoContextSummarizationConfig.__post_init__   s    ""*t/M/M/U^  "".43J3Ja3OBCC))5$:X:X[\:\KLL ##/##99D<S<SS 9<D<S<SVY<Y8ZD5 T 0r"   N)r*   r+   r,   r-   r8   r   r.   r/   r:   r   r   r<   r!   r4   r"   r    r6   r6   r   s>    2 )-,/1x}1.3D[.\N+\[r"   r6   c                       e Zd ZU dZdZee   ed<   dZeed<   dZ	ee   ed<   dZ
eed	<   d
Zee   ed<   dZeed<   d
Zed   ed<   eZeed<   d Zedefd       ZdefdZy
)LLMContextSummarizationConfiga  Configuration for context summarization behavior.

    .. deprecated:: 0.0.104
        Use :class:`LLMAutoContextSummarizationConfig` with a nested
        :class:`LLMContextSummaryConfig` instead::

            LLMAutoContextSummarizationConfig(
                max_context_tokens=8000,
                max_unsummarized_messages=20,
                summary_config=LLMContextSummaryConfig(
                    target_context_tokens=6000,
                    min_messages_after_summary=4,
                ),
            )

    Parameters:
        max_context_tokens: Maximum allowed context size in tokens.
            Set to ``None`` to disable token-based triggering.
        target_context_tokens: Maximum token size for the generated summary.
        max_unsummarized_messages: Maximum new messages before triggering summarization.
            Set to ``None`` to disable message-count triggering.
        min_messages_after_summary: Number of recent messages to preserve.
        summarization_prompt: Custom prompt for summary generation.
    r7   r8   r   r   r9   r:   r   r   Nr   r   r   r   r   r   c                    t        j                  dt        d       | j                  | j                  t        d      | j                  | j                  dk  rt        d      | j                  dk  rt        d      | j                  6| j                  | j                  kD  rt        | j                  d	z        | _        | j                  | j                  d
k  rt        d      | j                  dk  rt        d      y)r   zLLMContextSummarizationConfig is deprecated. Use LLMAutoContextSummarizationConfig with a nested LLMContextSummaryConfig instead.   )
stacklevelNr>   r   r?   r   r   r@   rA   z+min_messages_after_summary must be positive)	warningswarnDeprecationWarningr8   r:   r   r   r.   r   r   s    r    r!   z+LLMContextSummarizationConfig.__post_init__   s   c		
 ""*t/M/M/U^  "".43J3Ja3OBCC%%*EFF ##/**T-D-DD *-T-D-Ds-J)KD&))5$:X:X[\:\KLL**Q.JKK /r"   r#   c                 *    | j                   xs t        S r%   r&   r   s    r    r(   z,LLMContextSummarizationConfig.summary_prompt   r)   r"   c                     t        | j                  | j                  t        | j                  | j
                  | j                  | j                  | j                  | j                              S )zConvert to the new :class:`LLMAutoContextSummarizationConfig`.

        Returns:
            An equivalent ``LLMAutoContextSummarizationConfig`` instance.
        )r   r   r   r   r   r   )r8   r:   r<   )
r6   r8   r:   r   r   r   r   r   r   r   r   s    r    to_auto_configz,LLMContextSummarizationConfig.to_auto_config   s_     1#66&*&D&D2&*&@&@+/+J+J%)%>%>)-)F)FHH&*&@&@
 	
r"   )r*   r+   r,   r-   r8   r   r.   r/   r   r:   r   r   r0   r   r   r1   r   r2   r!   r3   r(   r6   rL   r4   r"   r    rC   rC      s    2 )-,!%3%/1x}1&''*.(3-.$EcE"&C,	&#@5@L< I I I
 A 
r"   rC   c                   ,    e Zd ZU dZee   ed<   eed<   y)LLMMessagesToSummarizezResult of get_messages_to_summarize operation.

    Parameters:
        messages: Messages to include in the summary
        last_summarized_index: Index of the last message being summarized
    messageslast_summarized_indexN)r*   r+   r,   r-   r   dictr/   r.   r4   r"   r    rN   rN     s     4jr"   rN   c            	           e Zd ZdZededefd       Zededefd       Z	ede
e   ded	edefd
       Zedededefd       Zede
e   defd       Zy)LLMContextSummarizationUtilu?  Utility providing context summarization capabilities for LLM processing.

    This utility enables automatic conversation context compression when token
    limits are reached. It provides functionality for both aggregators
    (which decide when to summarize) and LLM services (which generate summaries).

    Key features:
    - Token estimation using character-count heuristics (chars // 4)
    - Smart message selection (preserves system messages and recent context)
    - Function call awareness (avoids summarizing incomplete tool interactions)
    - Flexible transcript formatting for summarization
    - Maximum summary token calculation with safety buffers

    Usage:
        Use the static methods directly on the class:

        tokens = LLMContextSummarizationUtil.estimate_context_tokens(context)
        result = LLMContextSummarizationUtil.get_messages_to_summarize(context, 4)
        transcript = LLMContextSummarizationUtil.format_messages_for_summary(messages)

    Note:
        Token estimation uses the industry-standard heuristic of 1 token ≈ 4 characters.
    textr#   c                 ,    | syt        |       t        z  S )u,  Estimate token count for text using character count heuristic.

        Uses the industry-standard approximation of 1 token ≈ 4 characters.
        This works well across different content types (prose, code, etc.)
        and languages.

        Note:
            For more accurate token counts, use the model's official tokenizer.
            This is a rough estimate suitable for threshold checks and budgeting.

        Args:
            text: Text to estimate tokens for

        Returns:
            Estimated token count (characters // 4)
        r   )lenCHARS_PER_TOKEN)rT   s    r    estimate_tokensz+LLMContextSummarizationUtil.estimate_tokens*  s    $ 4yO++r"   contextc           
         d}| j                   D ]l  }t        |t              r|t        z  }|j	                  dd      }t        |t
              r|t        j                  |      z  }nvt        |t              rf|D ]a  }t        |t              s|j	                  dd      }|dk(  r)|t        j                  |j	                  dd            z  }T|dv sY|t        z  }c d|v r|d   }t        |t              rv|D ]q  }t        |t              s|j	                  di       }t        |t              s7|t        j                  |j	                  d	d      |j	                  d
d      z         z  }s d|v sd|t        z  }o |S )a  Estimate total token count for a context.

        Calculates an approximate token count by analyzing all messages,
        including text content, tool calls, and structural overhead.

        Args:
            context: LLM context to estimate.

        Returns:
            Estimated total token count including:
            - Message content (text, images)
            - Tool calls and their arguments
            - Tool results
            - Structural overhead (TOKEN_OVERHEAD_PER_MESSAGE per message)
        r   content typerT   )	image_urlimage
tool_callsfunctionname	argumentstool_call_id)rO   
isinstancer   TOKEN_OVERHEAD_PER_MESSAGEgetr0   rS   rX   listrQ   IMAGE_TOKEN_ESTIMATE)	rY   totalmessager[   item	item_typer`   	tool_callfuncs	            r    estimate_context_tokensz3LLMContextSummarizationUtil.estimate_context_tokens@  s   " '' )	4G '#56 //E kk)R0G'3'4DDWMMGT*# :D!$-$(HHVR$8	$.!%@%P%P $ 4& E '*@@!%99E: w&$\2
j$/%/ "	%i6#,==R#@D)$5 %)D)T)T$(HHVR$8488KQS;T$T*" !"	" (33S)	4V r"   rO   	start_idxsummary_endc                    i }t        ||      D ]  }| |   }t        |t              r|j                  d      }|dk(  rWd|v rS|j                  dg       }t        |t              r1|D ],  }t        |t
              s|j                  d      }	|	s(|||	<   . |dk(  s|j                  d      }	|	s|	|v s|j                  |	        |rt        |j                               S y)a  Find the earliest message index with incomplete function calls.

        Scans messages from ``start_idx`` up to (but not including)
        ``summary_end`` to identify tool calls whose responses either don't
        exist yet or fall in the kept portion of the context (>= summary_end).
        This prevents summarizing tool call requests when their responses would
        remain in the kept context as orphans, which the OpenAI API rejects.

        Args:
            messages: List of messages to check.
            start_idx: Index to start checking from.
            summary_end: Exclusive upper bound for the scan (the first kept
                message index). Only tool responses within this range count as
                completing a call; responses beyond it are treated as absent,
                leaving the call "in progress".

        Returns:
            Index of first message with function call in progress, or -1 if all
            function calls are complete within the scanned range.
        role	assistantr`   idtoolrd   )	rangere   r   rg   rh   rQ   popminvalues)
rO   rq   rr   pending_tool_callsimsgrt   r`   rn   rd   s
             r    1_get_earliest_function_call_not_resolved_in_rangezMLLMContextSummarizationUtil._get_earliest_function_call_not_resolved_in_range  s    2 .0y+. 	9A1+C #12776?D {"|s': WW\26
j$/%/ E	%i6+4==+>L+CD 2< @	E v~"ww~6L4F$F&**<81	96 )00233r"   min_messages_to_keepc                    | j                   }t        |      |k  rt        g d      S t        d t	        |      D        d      }|dk\  r|dz   }nd}t        |      |z
  }||k\  rt        g d      S t
        j                  |||      }|dk\  rF||k  rAt        j                  d| d| d       ||z
  }|}|dkD  rt        j                  d	| d
       ||k\  rt        g d      S ||| }|dz
  }	t        ||	      S )a  Determine which messages should be included in summarization.

        Intelligently selects messages for summarization while preserving:
        - The first system message (defines assistant behavior)
        - The last N messages (maintains immediate conversation context)
        - Incomplete function call sequences (preserves tool interaction integrity)

        Args:
            context: The LLM context containing all messages.
            min_messages_to_keep: Number of recent messages to exclude from
                summarization.

        Returns:
            LLMMessagesToSummarize containing the messages to summarize and the
            index of the last message included.
        rx   )rO   rP   c              3   n   K   | ]-  \  }}t        |t              s|j                  d       dk(  r| / yw)rt   systemN)re   r   rg   ).0r~   r   s      r    	<genexpr>zHLLMContextSummarizationUtil.get_messages_to_summarize.<locals>.<genexpr>  s7      As!#'9:swwvRZ?Z s   35r   r@   z?ContextSummarization: Found function call in progress at index z;, stopping summary before it (was going to summarize up to )zContextSummarization: Skipping zV messages with function calls in progress (will summarize after results are available))
rO   rV   rN   next	enumeraterS   r   r	   debuginfo)
rY   r   rO   first_system_indexsummary_startrr   function_call_startskipped_messagesmessages_to_summarizerP   s
             r    get_messages_to_summarizez5LLMContextSummarizationUtil.get_messages_to_summarize  sb   ( ##x=00)2RPP "'1
 
 ".2MM (m&::K')2RPP (YY- 	
 !#(;k(ILLQReQf gLLW=XY[
  +-@@-K!#56F5G H^ _
 K')2RPP ({ C +a%*BW
 	
r"   c           	         g }| D ]  }t        |t              r|j                  dd      }|j                  dd      }t        |t              r|}n}t        |t              rbg }|D ]I  }t        |t
              s|j                  d      dk(  s)|j                  |j                  dd             K dj                  |      }nt        |      }|r&|j                         }|j                  | d|        d	|v r|j                  d	g       }	t        |	t              rw|	D ]r  }
t        |
t
              s|
j                  d
i       }t        |t
              s7|j                  dd      }|j                  dd      }|j                  d| d| d       t |dk(  s|j                  dd      }|j                  d| d|         dj                  |      S )zFormat messages as a transcript for summarization.

        Args:
            messages: Messages to format

        Returns:
            Formatted transcript string
        rt   unknownr[   r\   r]   rT    z: r`   ra   rb   rc   zTOOL_CALL: (r   rw   rd   zTOOL_RESULT[z]: z

)	re   r   rg   r0   rh   rQ   appendjoinupper)rO   transcript_partsr   rt   r[   rT   
text_partsrl   formatted_roler`   rn   ro   rb   argsrd   s                  r    format_messages_for_summaryz7LLMContextSummarizationUtil.format_messages_for_summary  s     ,	PC #127769-Dggi,G '3'GT*
# @D!$-$((62Bf2L"))$((62*>?@ xx
+7|!% ''>*:"TF(CD s" WW\26
j$/%/ V	%i6#,==R#@D)$5'+xx	'B'+xxR'@ 0 7 7+dV1TFRS8T UV v~"ww~yA '',|nCv(NOY,	P\ {{+,,r"   N)r*   r+   r,   r-   staticmethodr0   r.   rX   r
   rp   r   rQ   r   rN   r   r   r4   r"   r    rS   rS     s    0 ,c ,c , ,* = = = =~ 8t*8),8;>8	8 8t N
N
36N
	N
 N
` 9-d4j 9-S 9- 9-r"   rS   )r-   rG   dataclassesr   r   typingr   r   r   pipecat.services.llm_servicer   logurur	   *pipecat.processors.aggregators.llm_contextr
   r   r1   rW   rf   ri   SUMMARY_TOKEN_BUFFERMIN_SUMMARY_TOKENSr'   r   r6   rC   rN   rS   r4   r"   r    <module>r      s     ( 0 07  U !&       R . 5I 5I 5Ip /[ /[ /[d [
 [
 [
| 	 	 	u- u-r"   