
    qi-#                         d dl Z d dlZd dlZd dlmZ d dlZd dlmZmZ dede	e
ef   ddfdZdded	ed
efdZ G d d      Zy)    N)Path)ErrorMessage_unzip_iter	file_pathmembersreturnc                     t        j                  | d      5 }|j                         D ]  \  }}|j                  ||        	 ddd       y# 1 sw Y   yxY w)zR
    Create a ZIP file at file_path, with the given arcname->content mapping.
    wN)zipfileZipFileitemswritestr)r   r   zfarcnamecontents        V/opt/pipecat/venv/lib/python3.12/site-packages/nltk/test/unit/test_downloader_unzip.py	_make_zipr      sQ     
C	( *B ' 	*GWKK)	** * *s   +AAzip_pathextract_rootverbosec                 T    t        t        t        |       t        |      |            S )zj
    Convenience wrapper that runs _unzip_iter and returns the list of yielded
    messages (if any).
    r   )listr   str)r   r   r   s      r   _run_unzip_iterr      s!    
 CM3|+<gNOO    c                       e Zd ZdZdeddfdZdeddfdZej                  j                  e
j                  j                  d      d	      deddfd
       ZdeddfdZdeddfdZdeddfdZy)TestSecureUnzipa  
    Tests for secure ZIP extraction behaviour in nltk.downloader._unzip_iter.

    These tests are specifically designed so that:

    * On the old implementation (using zf.extractall(root) without checks),
      the "Zip-Slip" tests will fail because they rely on the new behaviour
      (yielding ErrorMessage for escaping entries, and NOT writing outside
      the extraction root).

    * On the new implementation, the tests pass, because the new path
      validation prevents escapes and emits the expected ErrorMessage.
    tmp_pathr   Nc                     |dz  }|dz  }ddd}t        ||       t        ||d      }t        d |D              rJ |d	z  d
z  j                         dk(  sJ |d	z  dz  dz  j                         dk(  sJ y)z
        A ZIP with only safe, relative paths should fully extract under the
        given root, and should not yield any ErrorMessage.
        zsafe.zipextracts   hellos   world)pkg/file.txtzpkg/subdir/other.txtFr   c              3   <   K   | ]  }t        |t                y wN
isinstancer   .0ms     r   	<genexpr>zKTestSecureUnzip.test_normal_relative_paths_are_extracted.<locals>.<genexpr><   s     Eqz!\2E   pkgzfile.txtsubdirz	other.txtN)r   r   any
read_bytes)selfr   r   r   r   messagess         r   (test_normal_relative_paths_are_extractedz8TestSecureUnzip.test_normal_relative_paths_are_extracted+   s    
 j()+ %$,
 	(G$"8\5I EHEEEEu$z1==?8KKKu$x/+=IIKxWWWr   c                 |   |dz  }|dz  }|dz  dz  j                         }ddd}t        ||       t        ||d	      }|D cg c]  }t        |t              s| }}|sJ d
       dj                  d |D              }	d|	v rd|	v sJ |j                         rJ |dz  dz  j                         dk(  sJ yc c}w )az  
        An entry containing ``..`` that would escape the target directory
        must not be written outside the extraction root, and must cause
        _unzip_iter to yield an ErrorMessage.

        On the old implementation (extractall(root)), this test will fail
        because the file outside the root is actually created and no
        ErrorMessage is yielded.
        zzip_slip_parent.zipr!   z..zoutside.txt   ok   evil)pkg/good.txtz../outside.txtFr   z@Expected an ErrorMessage for a Zip-Slip parent-directory attempt c              3   F   K   | ]  }t        |j                          y wr$   r   messager'   s     r   r*   z[TestSecureUnzip.test_zip_slip_with_parent_directory_component_is_blocked.<locals>.<genexpr>f   s     $FS^$F   !Zip Slipblockedr,   good.txtN)resolver   r   r&   r   joinexistsr/   )
r0   r   r   r   outside_targetr   r1   r)   err_msgscombined_messagess
             r   8test_zip_slip_with_parent_directory_component_is_blockedzHTestSecureUnzip.test_zip_slip_with_parent_directory_component_is_blockedA   s     33)+ '-=FFH "%

 	(G$"8\5I  (G!:a+FAGG	NM	N
  HH$FX$FF..9@Q3QQQ "((*** u$z1==?5HHH Hs   B9B9winz2Absolute POSIX paths are not meaningful on Windows)reasonc                 |   |dz  }|dz  }t        d      dt        j                          z  }	 ddt        |      di}t	        ||       t        ||d	      }|D cg c]  }t        |t              s| }}|sJ d
       dj                  d |D              }	d|	v rd|	v sJ |j                         rJ |dz  dz  j                         dk(  sJ 	 |j                         r	 |j                          yyc c}w # t        $ r Y yw xY w# |j                         r!	 |j                          w # t        $ r Y w w xY ww xY w)af  
        An entry with an absolute POSIX path (e.g. ``/tmp/evil``) must not be
        extracted as-is; it should not overwrite arbitrary filesystem paths,
        and should result in an ErrorMessage.

        On the old implementation (extractall(root)), this test will fail
        because the absolute path gets created without any ErrorMessage.
        zzip_slip_abs_posix.zipr!   z/tmpnltk_zip_slip_test_r6   r4   r5   Fr   z;Expected an ErrorMessage for absolute-path Zip-Slip attemptr7   c              3   F   K   | ]  }t        |j                          y wr$   r9   r'   s     r   r*   zTTestSecureUnzip.test_zip_slip_with_absolute_posix_path_is_blocked.<locals>.<genexpr>   s     (JAQYY(Jr;   r<   r=   r,   r>   N)r   osgetpidr   r   r   r&   r   r@   rA   r/   unlinkOSError)
r0   r   r   r   absolute_targetr   r1   r)   rC   rD   s
             r   1test_zip_slip_with_absolute_posix_path_is_blockedzATestSecureUnzip.test_zip_slip_with_absolute_posix_path_is_blockedo   sy    66)+ v,+>ryy{m)LL 	 O$g	G h(&xuMH#+Kaz!\/JKHKMLM !$(J(J J!22yDU7UUU '--/// !5(:5AACuLLL %%'#**, (! L&   %%'#**,  (sZ   -D C42C46AD "C9 4D 9	DDD;D+*D;+	D74D;6D77D;c                    t        t        d      st        j                  d       |dz  }|dz  }|dz  }|j	                          |dz  }ddd	}t        ||       |j	                          t        j                  ||d
z         t        ||d      }|j                         rJ |dz  dz  j                         dk(  sJ t        d |D              sJ y)a'  
        If there is a pre-existing symlink below the extraction root that
        points outside the root, writing through that symlink should not
        be allowed to escape the root.

        This test documents the desired hardening to defend against this
        class of attacks.
        symlinkz'Symlinks not supported on this platformzzip_slip_symlink.zipr!   outside_dirzevil.txtr4   r5   )r6   zdir_link/evil.txtdir_linkFr   r,   r>   c              3   <   K   | ]  }t        |t                y wr$   r%   r'   s     r   r*   z]TestSecureUnzip.test_entries_resolved_outside_root_are_blocked_via_symlink.<locals>.<genexpr>        A1:a.Ar+   N)hasattrrK   pytestskipmkdirr   rR   r   rA   r/   r.   )r0   r   r   r   rS   rB   r   r1   s           r   :test_entries_resolved_outside_root_are_blocked_via_symlinkzJTestSecureUnzip.test_entries_resolved_outside_root_are_blocked_via_symlink   s     r9%KKAB44)+.$z1 "!(
 	(G$


;z 9:"8\5I "((***u$z1==?5HHHAAAAAr   c                     |dz  }|j                  d       |dz  }t        ||d      }t        d |D              sJ |j                         rt        |j	                               rJ yy)z
        A corrupt or non-zip file should cause _unzip_iter to yield an
        ErrorMessage instead of raising an unhandled exception.
        znot_a_zip.txts   this is not a zip archiver!   Fr   c              3   <   K   | ]  }t        |t                y wr$   r%   r'   s     r   r*   zGTestSecureUnzip.test_bad_zipfile_yields_errormessage.<locals>.<genexpr>   rV   r+   N)write_bytesr   r.   rA   iterdir)r0   r   r   r   r1   s        r   $test_bad_zipfile_yields_errormessagez4TestSecureUnzip.test_bad_zipfile_yields_errormessage   sv    
 o-9:)+"8\5IAAAAA  <//12222 !r   c                     |dz  }|dz  }ddi}t        ||       t        ||d       |j                         }d|j                  v sJ y)	z
        When verbose=True, _unzip_iter should write a status line to stdout.
        This checks that existing user-visible behaviour is preserved.
        zverbose.zipr!   r"   s   dataTr   	UnzippingN)r   r   
readouterrout)r0   capsysr   r   r   r   captureds          r   (test_unzip_iter_verbose_writes_to_stdoutz8TestSecureUnzip.test_unzip_iter_verbose_writes_to_stdout   sX    
 m+)+!7+(G$,=$$&hll***r   )__name__
__module____qualname____doc__r   r2   rE   rX   markskipifsysplatform
startswithrP   r[   r`   rg    r   r   r   r      s    X X$ X,,I,I	,I\ [[&C  /$ /SW /	/b$B$B	$BL3T 3d 3$+ +RV +r   r   )F)rK   rn   r   pathlibr   rX   nltk.downloaderr   r   dictr   bytesr   boolr   r   rq   r   r   <module>rw      sd    	 
    5* *S%Z(8 *T *Pd P$ P PM+ M+r   