
    qi7                     V    d dl Z d dlZd dlmZ d Zd Zd Zd Zd Zd Z	d Z
d	 Zd
 Zy)    Nc                     t        j                  t              5  t        j                  d       ddd       t        j                  t              5  t        j                  d       ddd       y# 1 sw Y   AxY w# 1 sw Y   yxY w)z3No-protocol traversal sequences should be rejected.../../etc/passwdNz../relative/../etc/passwdpytestraises
ValueErrordatanormalize_resource_url     S/opt/pipecat/venv/lib/python3.12/site-packages/nltk/test/unit/test_data_security.py,test_normalize_rejects_no_protocol_traversalr      sn    	z	" 8##$678 
z	" A##$?@A A8 8A A   A0A<0A9<Bc                      t        j                  t              5  t        j                  d       ddd       y# 1 sw Y   yxY w)zQWindows-style backslash traversal should be rejected when no protocol is present.z..\..\etc\passwdNr   r   r   r   .test_normalize_rejects_no_protocol_backslashesr      s2    	z	" 9##$789 9 9	   9Ac                  ^    t        j                  d      } | j                  d      sJ d       y)zIValid package-style resource names should still be treated as nltk: URLs.zcorpora/brownznltk:z5Package-style paths should be treated as 'nltk:' URLsN)r	   r
   
startswith)outs    r   #test_normalize_allows_package_pathsr      s3    

%
%o
6C>> ?>? r   c                      t        j                  t              5  t        j                  d       ddd       y# 1 sw Y   yxY w)zLDefense-in-depth: direct calls to find() should reject traversal-like names.r   Nr   r   r   r	   findr   r   r   'test_find_rejects_traversal_direct_callr      s0    	z	" &		$%& & &r   c                      t        j                  t              5  t        j                  d       ddd       y# 1 sw Y   yxY w)z
    Defense-in-depth edge case: a path can become unsafe only after normalization.

    Example from review: "foo/../../etc/passwd" normalizes to "../etc/passwd" and
    must still be rejected.
    zfoo/../../etc/passwdNr   r   r   r   Ctest_find_rejects_traversal_that_becomes_unsafe_after_normalizationr   %   s2     
z	" *		()* * *r   c                      t        j                  t              5  t        j                  d       ddd       y# 1 sw Y   yxY w)z;Absolute POSIX paths without a protocol should be rejected.z/etc/passwdNr   r   r   r   6test_normalize_rejects_no_protocol_absolute_posix_pathr   0   s1    	z	" 3##M23 3 3r   c                     t        j                  t              5  t        j                  d       ddd       t        j                  t              5  t        j                  d       ddd       y# 1 sw Y   AxY w# 1 sw Y   yxY w)z
    Windows drive letter paths should be rejected even on non-Windows platforms.

    Review note: don't gate 'C:/etc/passwd' on Windows only; ensure robust rejection
    regardless of runtime platform.
    zC:\etc\passwdNzC:/etc/passwdr   r   r   r   =test_normalize_rejects_no_protocol_windows_drive_letter_pathsr    6   sj     
z	" 6##$456 
z	" 5##O45 5	6 65 5r   c                      t        j                  t              5  t        j                  d       ddd       y# 1 sw Y   yxY w)z8A resource name that is exactly '..' should be rejected.z..Nr   r   r   r   .test_normalize_rejects_no_protocol_dotdot_onlyr"   E   s1    	z	" *##D)* * *r   c                    | dz  }t        j                  |d      5 }|j                  dd       d d d        t        j                  dt        |       g      }|j                         5 }|j                         }t        |t              r|j                  d      }|dk(  sJ 	 d d d        y # 1 sw Y   }xY w# 1 sw Y   y xY w)Nza.zipwzb.zip/c.txtokza.zip/b.zip/c.txt)pathszutf-8)zipfileZipFilewritestrr	   r   stropenread
isinstancebytesdecode)tmp_pathzpathzfptrfgots         r   !test_find_zip_split_is_non_greedyr6   K   s    wE		$ )
M4() ))'H
?C	 qffhc5!**W%Cd{{	 	) ) s   B+(9B7+B47C )r'   r   	nltk.datar	   r   r   r   r   r   r   r    r"   r6   r   r   r   <module>r8      s<      A9?&*35*r   