
    qiQ                     4    d dl Z ddlmZ  G d de      ZdgZy)    N   )TokenizersBackendc                   R     e Zd ZdZ	 	 	 d	deee   z  dededz  dedef
 fdZ xZ	S )
ParakeetTokenizera   
    Inherits all methods from [`PreTrainedTokenizerFast`]. Users should refer to this superclass for more information regarding those methods,
    except for `_decode` which is overridden to adapt it to CTC decoding:
    1. Group consecutive tokens
    2. Filter out the blank token
    N	token_idsskip_special_tokensclean_up_tokenization_spacesgroup_tokensreturnc                     t        |t              r|g}|r%t        j                  |      D cg c]  }|d   	 }}|D cg c]  }|| j                  k7  s| }}t        |   d|||d|S c c}w c c}w )Nr   )r   r   r	    )
isinstanceint	itertoolsgroupbypad_token_idsuper_decode)	selfr   r   r	   r
   kwargstoken_grouptoken	__class__s	           d/opt/pipecat/venv/lib/python3.12/site-packages/transformers/models/parakeet/tokenization_parakeet.pyr   zParakeetTokenizer._decode   s     i%"I;D;L;LY;WXKQXIX )2PuUd>O>O5OUP	Pw 
 3)E
 	
 	
 Y Qs   A. A3A3)FNT)
__name__
__module____qualname____doc__r   listboolstrr   __classcell__)r   s   @r   r   r      sY     %*48!
c?
 "
 '+Tk	

 
 

 
    r   )r   tokenization_utils_tokenizersr   r   __all__r   r#   r   <module>r&      s%     >
) 
@ 
r#   