
    qiS>                        d Z ddlZddlZddlmZmZ ddlmZmZm	Z	m
Z
mZmZ  G d dej                        Z G d dej                        Z G d	 d
ej                        Z G d dej                        Z G d dej                        Zy)z.
Tests for BLEU translation evaluation metric
    N)findopen_datafile)SmoothingFunctionbrevity_penaltyclosest_ref_lengthcorpus_bleumodified_precisionsentence_bleuc                   *    e Zd Zd Zd Zd Zd Zd Zy)TestBLEUc                    dj                         }dj                         }dj                         }||g}t        t        ||d            }t        |d      dk(  sJ | j	                  |dd	       t        t        ||d
            dk(  sJ t        d      j                         }t        d      j                         }t        d      j                         }dj                         }|||g}t        t        ||d            dk(  sJ t        t        ||d
            dk(  sJ t        d      j                         }t        d      j                         }|||g}t        t        ||d            }t        t        ||d            }| j	                  |dd	       | j	                  |dd	       t        |d      dk(  sJ t        |d      dk(  sJ t        t        ||d
            }	t        t        ||d
            }
| j	                  |	dd	       | j	                  |
dd	       t        |	d      dk(  sJ t        |
d      dk(  sJ y)zm
        Examples from the original BLEU paper
        https://www.aclweb.org/anthology/P02-1040.pdf
        zthe cat is on the matzthere is a cat on the matzthe the the the the the the   )n   gH?gvi&$I?places           zWIt is a guide to action that ensures that the military will forever heed Party commandszlIt is the guiding principle which guarantees the military forces always being under the command of the PartyzQIt is the practical guide for the army always to heed the directions of the partyzof the      ?z^It is a guide to action which ensures that the military always obeys the commands of the partyzSIt is to insure the troops forever hearing the activity guidebook that party directg֋8?g/̈́$I?gJY8?gH?g?g)::?g:pΈ?gr鷯?N)splitfloatr	   roundassertAlmostEqualstr)selfref1ref2hyp1
referenceshyp1_unigram_precisionref3hyp2hyp2_unigram_precisionhyp1_bigram_precisionhyp2_bigram_precisions              T/opt/pipecat/venv/lib/python3.12/site-packages/nltk/test/unit/translate/test_bleu.pytest_modified_precisionz TestBLEU.test_modified_precision   s    ',,.*002,224D\
 "''9*da'P!Q+Q/69995z!L '
DA>?3FFF /
 %' 	 A
 %' 	 *
 %' 	
 ~~D$'
'
DA>?3FFF '
DA>?3FFF 5
 %' 	 *
 %' 	
 D$'
 "''9*da'P!Q!&'9*da'P!Q5z!L5z!L+Q/6999+Q/6999 !&&8TQ&O P %&8TQ&O P4jK4jK*A.&888*A.&888    c                    dgdz  dgdz  g}dgdz  }t        |      }t        ||      }| j                  t        ||      dd       dgdz  dgdz  dgdz  dgdz  g}dgdz  }t        |      }t        ||      }t        ||      d	k(  sJ y )
Na         gv?r   r      r   )lenr   r   r   )r   r   
hypothesishyp_lenclosest_ref_lens        r&   test_brevity_penaltyzTestBLEU.test_brevity_penaltyd   s     ebj3%!),
UQY
j/,ZAOW5va 	 	
 ebj3%!)cUQY	B
UQY
j/,ZA8C???r(   c                     dj                         g}dj                         }t        dt        |            D ]  }d|z  f|z  }t        |||      dk(  rJ  y )N7The candidate has no alignment to any of the referencesJohn loves Maryr   r   r   r   ranger/   r
   r   r   r0   r   weightss        r&   test_zero_matcheszTestBLEU.test_zero_matchesu   se    OUUWX
&,,.
 q#j/* 	GAQwj1nG ZAQFFF	Gr(   c                     dj                         g}dj                         }t        dt        |            D ]  }d|z  f|z  }t        |||      dk(  rJ  y )Nr6   r   r   r7   r9   s        r&   test_full_matcheszTestBLEU.test_full_matches   se    '--/0
&,,.
 q#j/* 	IAQwj1nG ZASHHH	Ir(   c                     dj                         g}dj                         }| j                  t        ||      dd       	 | j                  t        t        ||       y # t
        $ r Y y w xY w)Nr6   zJohn loves Mary who loves Miker   r   r   r   r   r
   assertWarnsUserWarningAttributeErrorr   r   r0   s      r&   5test_partial_matches_hypothesis_longer_than_referencez>TestBLEU.test_partial_matches_hypothesis_longer_than_reference   sk    '--/0
5;;=
 	}ZDcRST	[-ZP 		   A 	A*)A*N)__name__
__module____qualname__r'   r3   r;   r=   rD    r(   r&   r   r      s     M9^@"GI
r(   r   c                   6    e Zd Zd Zd Zd Zd Zd Zd Zd Z	y)	TestBLEUFringeCasesc                    dj                         g}dj                         }t        |      dz   }d|z  f|z  }| j                  t        |||      dd       	 | j	                  t
        t        ||       dj                         g}dj                         }| j                  t        |||      dd       y # t        $ r Y Lw xY w)NzJohn loves Mary ?r6   r   r   r   r   r   )r   r/   r   r
   r@   rA   rB   r9   s        r&   2test_case_where_n_is_bigger_than_hypothesis_lengthzFTestBLEUFringeCases.test_case_where_n_is_bigger_than_hypothesis_length   s    )//12
&,,.

Oa7*q. 	*j':C 	 	
	[-ZP (--/0
&,,.
 	*j':C 	 	
  		s   B6 6	CCc                 L    dj                         g}g }t        ||      dk(  sJ y )Nr5   r   r   r
   rC   s      r&   test_empty_hypothesisz)TestBLEUFringeCases.test_empty_hypothesis   s-    OUUWX

Z4999r(   c                     dj                         g}dg}t               j                  }	 t        |||       y # t        $ r Y y w xY w)Nr5   Foo)smoothing_function)r   r   method4r
   
ValueError)r   r   r0   rT   s       r&   test_length_one_hypothesisz.TestBLEUFringeCases.test_length_one_hypothesis   sK    OUUWX
W
#%--	*jWM 		s   9 	AAc                 L    g g}dj                         }t        ||      dk(  sJ y )Nr6   r   rO   rC   s      r&   test_empty_referencesz)TestBLEUFringeCases.test_empty_references   s,    T
&,,.
Z4999r(   c                 0    g g}g }t        ||      dk(  sJ y )Nr   )r
   rC   s      r&   $test_empty_references_and_hypothesisz8TestBLEUFringeCases.test_empty_references_and_hypothesis   s#    T

Z4999r(   c                     dj                         g}dj                         }| j                  t        ||      dd       	 | j                  t        t        ||       y # t
        $ r Y y w xY w)Nz	let it goz	let go itr   r   r   r?   rC   s      r&   3test_reference_or_hypothesis_shorter_than_fourgramszGTestBLEUFringeCases.test_reference_or_hypothesis_shorter_than_fourgrams   sm     "'')*
 &&(
 	}ZDcRST	[-ZP 		rE   c                     t        j                  d      }dj                         g}dj                         }|j                  dgdz        }t	        |||      dk(  sJ y )Nnumpyr5   r6         ?r   r   )pytestimportorskipr   arrayr
   )r   npr   r0   r:   s        r&   test_numpy_weightsz&TestBLEUFringeCases.test_numpy_weights   s\      )OUUWX
&,,.
((D6A:&ZW=BBBr(   N)
rF   rG   rH   rM   rP   rV   rX   rZ   r\   rd   rI   r(   r&   rK   rK      s&    
6:::Cr(   rK   c                       e Zd Zd Zy)TestBLEUvsMteval13ac           	         t        d      }t        d      }t        d      }t        |      5 }t        t        |j	                         d   j                         dd       }d d d        t        |d      5 }t        |d      5 }t        t        d	 |            }t        t        d
 |            }	t        t        dd            D ]-  \  }
}t        |	|d|
z  f|
z        }t        ||z
        dk  r-J  t               }t        t        dd      |      D ]8  \  }
}t        |	|d|
z  f|
z  |j                        }t        ||z
        dk  r8J  	 d d d        d d d        y # 1 sw Y   xY w# 1 sw Y   xY w# 1 sw Y   y xY w)Nzmodels/wmt15_eval/ref.ruzmodels/wmt15_eval/google.ruz#models/wmt15_eval/mteval-13a.outputr   utf8)encodingc                 "    | j                         S Nr   xs    r&   <lambda>z6TestBLEUvsMteval13a.test_corpus_bleu.<locals>.<lambda>   s    	 r(   c                 $    | j                         gS rm   rn   ro   s    r&   rq   z6TestBLEUvsMteval13a.test_corpus_bleu.<locals>.<lambda>   s     r(   
   r   )r:   g{Gzt?)r:   rS   )r   r   mapr   	readlinesr   listzipr8   r   absr   method3)r   ref_filehyp_filemteval_output_file
mteval_finmteval_bleu_scoresref_finhyp_finr0   r   imteval_bleu	nltk_bleu
chencherrys                 r&   test_corpus_bleuz$TestBLEUvsMteval13a.test_corpus_bleu   s   2356!"GH -. 	V* "%UJ,@,@,B2,F,L,L,NqQS,T!U	V
 8f5 	@x&9 @W "#&97"CD
!#&;W"EF
&)%2,8J&K @NA{ +"Jq
Q!I {Y67%???@ /0
&)%2,8J&K @NA{ +""!$q
Q+5+=+=	!I {Y67%???@'@	@ 	@	V 	V@ @	@ 	@s=   3E#5E<A/E03AE0E0E<#E-0E9	5E<<FN)rF   rG   rH   r   rI   r(   r&   rf   rf      s    '@r(   rf   c                       e Zd Zd Zy)TestBLEUWithBadSentencec                 P   d}t        d      }|j                         gg}|j                         g}	 | j                  t              5  | j	                  t        ||      dd       d d d        y # 1 sw Y   y xY w# t        $ r! | j	                  t        ||      dd       Y y w xY w)NzQTeo S yb , oe uNb , R , T t , , t Tue Ar saln S , , 5istsi l , 5oe R ulO sae oR RzTheir tasks include changing a pump on the faulty stokehold .Likewise , two species that are very similar in morphology were distinguished using genetics .r   r   r   )r   r   r@   rA   r   r   rB   )r   hyprefr   
hypothesess        r&   "test_corpus_bleu_with_bad_sentencez:TestBLEUWithBadSentence.test_corpus_bleu_with_bad_sentence  s    a2

 yy{m_
iik]
		W!!+. &&
J7Q '    	W "";z:#FTU"V	Ws/   A; A/&A; /A84A; 8A; ;'B%$B%N)rF   rG   rH   r   rI   r(   r&   r   r     s    Wr(   r   c                       e Zd Zd Zy)TestBLEUWithMultipleWeightsc                     g d}g d}g d}g d}g d}g d}d}d}d	}	t        |||g|gg||g|||	g
      }
|
d   t        |||g|gg||g|      k(  sJ |
d   t        |||g|gg||g|      k(  sJ |
d   t        |||g|gg||g|	      k(  sJ y )N)Itisr*   guidetoactionwhichensuresthatthemilitaryalwaysobeysr   commandsofr   party)r   r   r*   r   r   r   r   r   r   r   r   willforeverheedPartyr   )r   r   r   guiding	principler   
guaranteesr   r   forcesr   beingunderr   commandr   r   r   )r   r   r   	practicalr   forr   armyr   r   r   r   
directionsr   r   r   )hereadr   bookbecauser   was
interestedinworldhistory)r   r   r   r   r   r   r   r   r   r   r   )r   r   r   r   )r_   r_   r_   r_   )r   r   r   r   r   )list_of_referencesr   r:   r   r   r   )r   )r   r   ref1aref1bref1cr"   ref2aweight_1weight_2weight_3bleu_scoress              r&   &test_corpus_bleu_with_multiple_weightszBTestBLEUWithMultipleWeights.test_corpus_bleu_with_multiple_weights*  s   
(
$
(
$

  +"!!&u 5w?d|x2

 1~UE"UG,tTlH"
 
 	
 
 1~UE"UG,tTlH"
 
 	
 
 1~UE"UG,tTlH"
 
 	
 
r(   N)rF   rG   rH   r   rI   r(   r&   r   r   )  s    x
r(   r   )__doc__unittestr`   	nltk.datar   r   nltk.translate.bleu_scorer   r   r   r   r	   r
   TestCaser   rK   rf   r   r   rI   r(   r&   <module>r      s      ) x   FNC(++ NCb(@(++ (@VWh// W,y
("3"3 y
r(   