
    qi9                     *   d dl Z d dlmZmZ d dlmZ d dlZddlmZm	Z	 ddl
mZ ddlmZ  e       rd dlZdZ ej                   d	      Zd
eej&                  eeef   fdZ G d de      Ze G d d             Ze G d d             Z G d d      Zy)    N)	dataclassfield)IntEnum   )is_psutil_availableis_torch_xpu_available)logging)tracedContinuousBatchingLoggerreturnc                     t         j                  j                         rt        j                  d      } t         j                  j	                          t         j                  j                          t         j                  j                  |       j                  }t         j                  j                  |       }t         j                  j                  |       }nt               rt        j                  d      } t         j                  j	                          t         j                  j                          t         j                  j                  |       j                  }t         j                  j                  |       }t         j                  j                  |       }n.t         j                  j                  j                         rt         j                  j                  j                         rWt        j                  d      } t         j                  j                         }|t         j                  j!                         z
  }d}nt        j                  d      } t#               rMt%        j&                         j(                  }t%        j*                         j-                         j.                  }|}nt0        j3                  d       d}d}d}| |||fS )Ncudaxpumpsr   cpuzCannot get memory breakdown on CPU without psutil: returning 0 for all memory values. Please install psutil to get an actual memory breakdown.)torchr   is_availabledeviceempty_cachesynchronizeget_device_propertiestotal_memorymemory_reservedmemory_allocatedr   r   backendsr   is_builtdriver_allocated_memoryrecommended_max_memoryr   psutilvirtual_memorytotalProcessmemory_inforssloggererror)r   r   reserved_memoryallocated_memorys       f/opt/pipecat/venv/lib/python3.12/site-packages/transformers/generation/continuous_batching/requests.pyget_device_and_memory_breakdownr+   $   s   zz f%

 

 zz77?LL**44V< ::66v>		!e$				yy66v>KK))33F; 9955f=				(	(	*u~~/A/A/J/J/Le$yy88:'%))*J*J*LLe$ !00288L%~~/;;=AA.OLL< LO <2BBB    c                   $    e Zd ZdZdZdZdZdZdZy)RequestStatusz5Status of a generation request through its lifecycle.r         r      N)	__name__
__module____qualname____doc__PENDING
PREFILLINGDECODINGFINISHEDFAILED r,   r*   r.   r.   K   s    ?GJHHFr,   r.   c                   &   e Zd ZU dZeed<    ee      Zee	   ed<    ee      Z
ee	   ed<    ee      Zee   ed<   dZedz  ed<   ej                  Zeed	<    eej$                        Zeed
<   dZeeef   ed<   dZee   dz  ed<   defdZy)GenerationOutputa  Tracks the output of a generation request.

    Attributes:
        request_id (str): The ID of the generation request.
        prompt_ids (list[int]): The IDs of the prompt tokens.
        generated_tokens (list[int]): The generated tokens.
        logprobs (list[float]): The log probabilities of the generated tokens.
        error (Optional[str]): Any error message associated with the request. When None, the request was successful.
        status (RequestStatus): The status of the request.
        created_time (float): The time the request was created.
        lifespan (tuple[float, float]): The time the request was no longer pending and the time the request finished.
    
request_iddefault_factory
prompt_idsgenerated_tokenslogprobsNr'   statuscreated_timer   r   lifespan
timestampsr   c                 <    | j                   t        j                  k(  S N)rD   r.   r9   selfs    r*   is_finishedzGenerationOutput.is_finishedn   s    {{m4444r,   )r2   r3   r4   r5   str__annotations__r   listrA   intrB   rC   floatr'   r.   r6   rD   timeperf_counterrE   rG   tuplerH   boolrM   r;   r,   r*   r=   r=   U   s     O!$7JS	7"'"=d3i=!$7Hd5k7E3:)11FM10A0ABL%B$,HeE5L!,%)JUd")5T 5r,   r=   c                   z   e Zd ZU dZeed<   ee   ed<   dZe	ed<   dZ
eed<    ee      Zee   ed	<    ee      Zee   ed
<    ee      Zee   ed<   dZeed<   dZeed<   ej$                  Zeed<   dZedz  ed<   dZeed<   dZe	ed<    eej0                        Zeed<   dZedz  ed<   dZeeef   ed<    ee      Zee   ed<   dZeed<   dZ eed<   d Z!e"defd       Z#e#jH                  d efd!       Z#e"dee   dz  fd"       Z%d# Z&defd$Z'defd%Z(e)d&ede	fd'       Z*d( Z+d) Z,d*edd fd+Z-d-d,Z.y).RequestStatea6  Tracks the state of a generation request through its lifecycle.

    Attributes:
        request_id (str): The ID of the generation request.
        initial_tokens (list[int]): The initial prompt tokens.
        num_children (int): The number of children requests
        full_prompt_ids (list[int] | None): The tokens IDs of the full prompt.
        prompt_ids (list[int] | None): The tokens IDs currently being processed.
        remaining_prompt_ids (list[int]): The initial tokens IDs remaining to be processed.
        static_outputs (list[int]): The generated tokens.
        allocated_blocks (int): The number of blocks allocated to the request.
        position_offset (int): The current position in the sequence for position_ids.
        status (RequestStatus): The status of the request: can be one of PENDING, PREFILLING, PREFILLING_SPLIT,
                                SPLIT_PENDING_REMAINDER, DECODING, FINISHED, FAILED
        max_new_tokens (int | None): The maximum number of new tokens to generate.
        eos_token_id (int): The ID of the end-of-sequence token.
        streaming (bool): Whether to stream tokens as they're generated
        created_time (float): The time the request was created.
        error (Optional[str]): Any error message associated with the request. When None, has had no error yet.
    r>   initial_tokensFrecord_timestampsr   num_childrenr?   tokens_to_processremaining_prefill_tokensrB   allocated_blocksposition_offset_status   Nmax_new_tokensr   eos_token_id	streamingrE   r'   rF   rG   _timestamps_true_initial_tokens_new_tokens_limitc                 j    | j                   dn| j                   | _        | j                  d d  | _        y )Nrg   )rb   rh   rY   r]   rK   s    r*   __post_init__zRequestState.__post_init__   s1    /3/B/B/JPTPcPc(,(;(;A(>%r,   r   c                     | j                   S rJ   )r`   rK   s    r*   rD   zRequestState.status   s    ||r,   valuec                 (   | j                   t        j                  k(  r#t        j                         df| _        || _         y |t        j                  k(  r8| j
                  d   t        j                         f| _        | j                          || _         y )Nr   r   )r`   r.   r6   rS   rT   rG   r9   log_end_of_request)rL   rl   s     r*   rD   zRequestState.status   ss    <<=000!..0"5DM  m,,,!]]1-t/@/@/BCDM##%r,   c                 6    | j                   r| j                  S d S rJ   )rZ   re   rK   s    r*   rH   zRequestState.timestamps   s    #'#9#9tCtCr,   c                    t        | j                        }| j                         }| j                  d   | j                  z
  }| j                  d   | j                  z
  }t
        j                  d| j                   d|d|d|d|
       y )Nr   r/   Request z finished: prefill_len = z decode_len = z start_time = z end_time = )lenrY   generated_lenrG   rE   r&   infor>   )rL   prefill_len
decode_len
start_timeend_times        r*   rn   zRequestState.log_end_of_request   s    $--.'')
]]1%(9(99
==#d&7&77t''A;2B/J?RaT^Sbbodlcpq	
r,   c                     | j                   S )zCGet the current length of the sequence (prompt + generated tokens).)r_   rK   s    r*   current_lenzRequestState.current_len   s    ###r,   c                 ,    t        | j                        S )z*Get the number of tokens generated so far.)rr   rB   rK   s    r*   rs   zRequestState.generated_len   s    4(())r,   token_idc                 R   | j                   t        j                  k7  ry| j                  r-| j                  j                  t        j                                || j                  k(  xr | j                  dk7  }| j                         }|s|| j                  k  r)| j                  j                  |       |g| _        |dz  }n?t        j                  d| j                   d|        | j                  j!                          |s|| j                  k\  rt        j"                  | _         yy)zUpdate the request with a newly generated token and check for completion.

        Args:
            token_id: The token ID to add to the output sequence

        Returns:
            bool: True if the request is now complete, False otherwise
        Fr   r/   rq   z generated a useless token: T)rD   r.   r8   rZ   re   appendrS   rT   rc   rs   rh   rB   r\   r&   warningr>   popr9   )rL   r|   is_eosrz   s       r*   update_and_check_completionz(RequestState.update_and_check_completion   s    ;;-000 !!##D$5$5$78 T...J43D3D3J((* kD$:$::!!((2&.ZD"1KNNXdoo%66RS[R\]^!!%%'[D$:$::'00DKr,   c           
      n   d| j                    d| j                   d| j                          dt        | j                         dt        | j
                         d| j                   dt        | j                         d| j                   d	| j                   g	}d
dj                  |      z   dz   S )Nzrequest_id=zstatus=zout_tokens=zquery_length=zremaining_tokens=z
kv_length=zfull_prompt_length=zallocated_blocks=zgenerated_tokens=zRequestState(
	z,
	z
))r>   r`   rs   rr   r\   r]   r_   rY   r^   rB   join)rL   msgs     r*   __repr__zRequestState.__repr__   s    $//*+dll^$$,,./0C 6 6789D$A$A BCD--./!#d&9&9":!;< 5 567 5 567

 #W\\#%66>>r,   c                 t   | j                   rI| j                  | j                   d | j                  z   | _        | j                  d| j                    | _        t        | j                  | j                  | j                  g | j
                  | j                  | j                  | j                  | j                  	      S )z7Convert the request state to a GenerationOutput object.N)	r>   rA   rB   rC   r'   rD   rE   rG   rH   )
rf   rY   rB   r=   r>   r'   rD   rE   rG   rH   rK   s    r*   to_generation_outputz!RequestState.to_generation_output   s    $$$($7$78Q8Q8S$TW[WlWl$lD!"&"5"56Q8Q8Q"RD**!22**;;**]]

 
	
r,   new_request_idc                    t        j                         }t        di d|d| j                  d| j                  d| j
                  dd d| j                  dd d| j                  d| j                  d	| j                  d
| j                  d| j                  d| j                  d|d|dfdg d| j                  d| j                  }| j                  dd |_        |S )ziFork the request into a new request with the same state expect for request_id, created_time and lifespan.r>   rY   r[   r\   NrB   r^   r_   r`   rb   rc   rd   rE   rG   r   re   r'   rZ   r;   )rS   rT   rX   rY   r[   r\   rB   r^   r_   rD   rb   rc   rd   r'   rZ   r]   )rL   r   tnew_requests       r*   forkzRequestState.fork  s!   " 
%
..
 **
 #44Q7	

 "2215
 "22
 !00
 KK
  ..
 **
 nn
 
 W
 
 **
  #44!
& 04/L/LQ/O,r,   c           	      j   | j                   dn!| j                   t        | j                        z
  }t        | j                  | j
                  | j                  z   | j                  | j                  || j                  | j                        }| j                  t        | j
                        z   |_
        |S )a  Creates an equivalent new request by removing the generated tokens and adding them to the initial prompt. The
        created request has THE SAME request_id. Notably, we can retrieve the original request from the created one with
        the _true_initial_tokens attribute.N)r>   rY   r[   rZ   rb   rc   rd   )rb   rr   rB   rX   r>   rY   r[   rZ   rc   rd   rf   )rL   rb   	new_states      r*   !create_equivalent_initial_requestz.RequestState.create_equivalent_initial_request(  s     "&!4!4!<4CVCVY\]a]r]rYsCs ..1F1FF**"44)**nn
	 *.)B)BSI\I\E])]	&r,   )r   rX   )/r2   r3   r4   r5   rN   rO   rP   rQ   rZ   rV   r[   r   r\   r]   rB   r^   r_   r.   r6   r`   rb   rc   rd   rS   rT   rE   rR   r'   rG   rU   re   rf   rh   rj   propertyrD   setterrH   rn   rz   rs   r
   r   r   r   r   r   r;   r,   r*   rX   rX   r   s   , OI#t#L##(#>tCy>*/+d3i  #("=d3i=cOS*22G]2!#NC$J#L#It0A0ABL%BE3:$,HeE5L!,$T:Ke: !#!'s'?
    ]]M   DDK$. D D
$S $*s *
 "C "D " "H?
"3 > 2r,   rX   c                   ,    e Zd ZdZdZdedededdfdZy)	FutureRequestStatezPTracks the current state of a request and the relevant information to update it.statehas_new_tokencomplete_blocksr   r   r   r   Nc                 .    || _         || _        || _        y rJ   r   )rL   r   r   r   s       r*   __init__zFutureRequestState.__init__@  s    
*.r,   )	r2   r3   r4   r5   	__slots__rX   rV   rQ   r   r;   r,   r*   r   r   :  s/    Z >I/l /4 /RU /Z^ /r,   r   )rS   dataclassesr   r   enumr   r   utilsr   r   utils.loggingr	   utils.metricsr
   r    TMP_TOKEN_ID	getLoggerr&   rU   r   rQ   r+   r.   r=   rX   r   r;   r,   r*   <module>r      s     (   @ $ #   
		5	6$Cu||S#s/J)K $CNG  5 5 58 D D DN	/ 	/r,   