
    qi                     H    d dl mZ d dlmZ d dlmZ d dlmZmZ dede	fdZ
y)	    )Path)Any)TikTokenConverter)TIKTOKEN_VOCAB_FILETOKENIZER_FILEencoding
output_dirc                    t        |      }|j                  d       |dz  t        z  }|t        z  }|j                  j                  dd       t        |j                               }t        |j                               }	 ddlm} ddl	m
} t        | t
              r ||       }  || j                  |       t!        || j"                  | j$                        j'                         }
|
j)                  |       y# t        $ r:}t        |      }	d|	j                         v rt        d	      |t        d
      |d}~ww xY w)a  
    Converts given `tiktoken` encoding to `PretrainedTokenizerFast` and saves the configuration of converted tokenizer
    on disk.

    Args:
        encoding (`str` or `tiktoken.Encoding`):
            Tokenizer from `tiktoken` library. If `encoding` is `str`, the tokenizer will be loaded with
            `tiktoken.get_encoding(encoding)`.
        output_dir (`str`):
            Save path for converted tokenizer configuration file.
    T)exist_oktiktoken)parentsr   r   )get_encoding)dump_tiktoken_bpeblobfilezY`blobfile` is required to save a `tiktoken` file. Install it with `pip install blobfile`.zY`tiktoken` is required to save a `tiktoken` file. Install it with `pip install tiktoken`.N)
vocab_filepatternextra_special_tokens)r   mkdirr   r   parentstrabsoluter   r   tiktoken.loadr   
isinstance_mergeable_ranksImportErrorlower
ValueErrorr   _pat_str_special_tokens	convertedsave)r   r	   	save_filetokenizer_filesave_file_absoluteoutput_file_absoluter   r   e	error_msg	tokenizers              T/opt/pipecat/venv/lib/python3.12/site-packages/transformers/integrations/tiktoken.pyconvert_tiktoken_to_fastr*      s7    j!Jd#Z'*==I.0N 4$7Y//12~6689)3h$#H-H(335GH "%x/@/@W_WoWoik  NN'(  F	**k g
	s   7C< <	D?5D::D?N)pathlibr   typingr   #transformers.convert_slow_tokenizerr   *transformers.tokenization_utils_tokenizersr   r   r   r*        r)   <module>r1      s$      A Z-)s -) -)r0   