
    qi                     l   d Z ddlZddlZddlmZmZmZ ddlZddlm	Z	 ddl
mZmZmZ ddlmZmZ ddlmZmZmZmZmZmZmZmZmZmZmZmZmZmZ  e  ee            Z! ee      Z" ejF                  d	e"D  ci c]  } | |  c} e$
      Z%eee$    ejL                  ddjO                  e!       d ee!            f   Z( ed      Z)e)jU                  dg d      ddddddejV                  ddf	dedededeee%    ejL                  d      f   dede(dededed dfd!       Z,e)jU                  d"d#d$g      	 	 	 d6d%ee$ ejZ                  d&      f   d'ede(ded df
d(       Z.e)jU                  d)g d*      ddejV                  ddfd%ee$ ejZ                  d&      f   d+eee$    ejL                  d,d-      f   d.eee$    ejL                  d/      f   dededed dfd0       Z/e)jU                  d1d2d3g      ejV                  dfd1ee$ ejZ                  d4      f   deded dfd5       Z0yc c} w )7a  Contains commands to interact with datasets on the Hugging Face Hub.

Usage:
    # list datasets on the Hub
    hf datasets ls

    # list datasets with a search query
    hf datasets ls --search "code"

    # get info about a dataset
    hf datasets info HuggingFaceFW/fineweb
    N)	AnnotatedOptionalget_args)execute_raw_sql_query)CLIErrorRepositoryNotFoundErrorRevisionNotFoundError)DatasetSort_TExpandDatasetProperty_T   )	AuthorOpt	FilterOpt	FormatOptLimitOptOutputFormatQuietOptRevisionOpt	SearchOptTokenOptapi_object_to_dict
get_hf_apimake_expand_properties_parserprint_list_outputtyper_factoryDatasetSortEnum)typezWComma-separated properties to expand. Example: '--expand=downloads,likes,tags'. Valid: z, .)helpcallbackz"Interact with datasets on the Hub.)r   ls)zhf datasets lsz*hf datasets ls --sort downloads --limit 10zhf datasets ls --search "code")examples
   FsearchauthorfiltersortzSort results.limitexpandformatquiettokenreturnc	                     t        |      }	|r|j                  nd}
|	j                  ||| |
||      D cg c]  }t        |       }}t	        |||       yc c}w )zList datasets on the Hub.r+   N)r%   r$   r#   r&   r'   r(   r)   r*   )r   valuelist_datasetsr   r   )r#   r$   r%   r&   r'   r(   r)   r*   r+   apisort_keydataset_inforesultss                N/opt/pipecat/venv/lib/python3.12/site-packages/huggingface_hub/cli/datasets.pydatasets_lsr7   H   st    . 5
!C!tzztH  -- . 

 	<(
G 
 gfE:
s   Ainfoz&hf datasets info HuggingFaceFW/finewebz9hf datasets info my-dataset --expand downloads,likes,tags
dataset_idz+The dataset ID (e.g. `username/repo-name`).revisionc                    t        |      }	 |j                  | ||      }t        t        j                  t        |      d	
             y# t        $ r}t        d|  d      |d}~wt        $ r}t        d| d|  d      |d}~ww xY w)z$Get info about a dataset on the Hub.r.   )repo_idr:   r(   z	Dataset 'z' not found.Nz
Revision 'z' not found on 'z'.   )indent)	r   r4   r   r   r	   printjsondumpsr   )r9   r:   r(   r+   r2   r8   es          r6   datasets_inforC   o   s     5
!CU
XfU
 
$**'-a
89	 # D:,l;<!C  UH:-=j\LMSTTUs#   A 	B	A%%B	1BB	parquet)z(hf datasets parquet cfahlgren1/hub-statsz8hf datasets parquet cfahlgren1/hub-stats --subset modelsz6hf datasets parquet cfahlgren1/hub-stats --split trainz6hf datasets parquet cfahlgren1/hub-stats --format jsonsubsetz--subsetz(Filter parquet entries by subset/config.splitz Filter parquet entries by split.c                 .   t        |      }|j                  | |      }|D cg c]  }||j                  |k(  s| }	}|	D cg c]1  }|j                  |j                  |j                  |j
                  d3 }
}t        |
||d       yc c}w c c}w )z/List parquet file URLs available for a dataset.r.   )r<   configN)rE   rF   urlsizerI   )r)   r*   id_key)r   list_dataset_parquet_filesrF   rH   rI   rJ   r   )r9   rE   rF   r)   r*   r+   r2   entriesentryfilteredr5   s              r6   datasets_parquetrP      s    $ 5
!C,,Z,OG#*T%emu{{e?STHTjraf5<<%++eiiQVQ[Q[\G  gfE%H	 Us   BB6Bsqlzhf datasets sql "SELECT COUNT(*) AS rows FROM read_parquet('https://huggingface.co/api/datasets/cfahlgren1/hub-stats/parquet/models/train/0.parquet')"zhf datasets sql "SELECT * FROM read_parquet('https://huggingface.co/api/datasets/cfahlgren1/hub-stats/parquet/models/train/0.parquet') LIMIT 5" --format jsonzRaw SQL query to execute.c                     	 t        | |      }t	        ||d       y# t        $ r}t        t        |            |d}~ww xY w)zAExecute a raw SQL query with DuckDB against dataset parquet URLs.)	sql_queryr+   NFr/   )r   ImportErrorr   strr   )rQ   r)   r+   resultrB   s        r6   datasets_sqlrW      sD    &&EB fV59  &s1vA%&s    	A<A)NNN)1__doc__enumr@   typingr   r   r   typerhuggingface_hub._dataset_viewerr   huggingface_hub.errorsr   r   r	   huggingface_hub.hf_apir
   r   
_cli_utilsr   r   r   r   r   r   r   r   r   r   r   r   r   r   sorted_EXPAND_PROPERTIES_SORT_OPTIONSEnumrU   r   Optionjoin	ExpandOptdatasets_clicommandtabler7   ArgumentrC   rP   rW   )ss   0r6   <module>rl      s     0 0  A [ [ I   $ H%<=> '$))-m/L1/LSVW SMELLfgkgpgp  rD  hE  gF  FG  H./AB	 "FG     	$**;;; ; !/*	,	; ; ; ; ; ; 
;;> 
0C   !	:#~u~~3`aab:: : 	:
 
::"    sw]a$**I#~u~~3`aabIhsm\U\\*Cm%nnoI Xc]LELL6X$YYZI 	I
 I I 
II$ 	 	c 	j   %**:	3,GHH	I:: : 
	::a 0Ms   8
H1
