
    qi<                        d dl Zd dlZd dlmZ ddlmZmZmZ ddl	m
Z
mZmZmZmZmZmZ  e       rd dlmZ  e       rd dlmZ  e       rd dlmZ d d	lmZ  G d
 de
      Z G d dej4                        Z	 	 	 	 ddZy)    N   )is_accelerate_availableis_scipy_availableis_vision_available   )HungarianMatcher_set_aux_lossbox_iou	dice_lossgeneralized_box_iounested_tensor_from_tensor_listsigmoid_focal_loss)center_to_corners_format)linear_sum_assignment)PartialState)reducec                   :    e Zd Z ej                         d        Zy)LwDetrHungarianMatcherc                 v   |d   j                   dd \  }}|d   j                  dd      j                         }|d   j                  dd      }t        j                  |D cg c]  }|d   	 c}      }	t        j                  |D cg c]  }|d   	 c}      }
d	}d
}d|z
  ||z  z  d|z
  dz   j                          z  }|d|z
  |z  z  |dz   j                          z  }|dd|	f   |dd|	f   z
  }|j                  }|j                  t        j                        }|
j                  t        j                        }
t        j                  ||
d      }|j                  |      }t        t        |      t        |
             }| j                  |z  | j                  |z  z   | j                  |z  z   }|j                  ||d      j!                         }|D cg c]  }t#        |d          }}g }||z  }|j%                  |d      }t'        |      D ]  }||   }t)        |j%                  |d            D cg c]  \  }}t+        ||          }}}|dk(  r|}It-        ||      D cg c]E  \  }}t/        j0                  |d   |d   ||z  z   g      t/        j0                  |d   |d   g      fG }}} |D cg c]O  \  }}t        j2                  |t        j4                        t        j2                  |t        j4                        fQ c}}S c c}w c c}w c c}w c c}}w c c}}w c c}}w )z
        Differences:
        - out_prob = outputs["logits"].flatten(0, 1).sigmoid() instead of softmax
        - class_cost uses alpha and gamma
        logitsNr   r   r   
pred_boxesclass_labelsboxesg      ?g       @g:0yE>)pdim)dtype)shapeflattensigmoidtorchcatlogr   tofloat32cdistr   r   	bbox_cost
class_cost	giou_costviewcpulensplitrange	enumerater   zipnpconcatenate	as_tensorint64) selfoutputstargets
group_detr
batch_sizenum_queriesout_probout_bboxv
target_idstarget_bboxalphagammaneg_cost_classpos_cost_classr)   r   r(   r*   cost_matrixsizesindicesgroup_num_queriescost_matrix_listgroup_idgroup_cost_matrixicgroup_indicesindice1indice2js                                    P/opt/pipecat/venv/lib/python3.12/site-packages/transformers/loss/loss_lw_detr.pyforwardzLwDetrHungarianMatcher.forward+   s=    #*("3"9"9"1"=
K 8$,,Q2::<<(00A6 YY7Ca. 1CD
iiW =7 => e)%8a(lT>Q=V=V=X<XY1x<E"9:4?T?T?V>VW#AzM2^AzM5RR
 ;;u}}-!nnU]]3KK+;	LL'	 ))A()KMefqMrss	 nny04??Z3OORVR`R`clRll!&&z;CGGI*12QQwZ22':5&,,->A,Fj) 	H 0 :ENO`OfOfglnpOqErsTQ21Q48sMs1}' -0,G
 ) 
GAJARU]A]4]'^_
GAJ'?@ 	 lsscgcdfg%++6QVQ\Q\8]^ssS  D =. 3 t ts&   !LLL$<L).A
L/AL5N)__name__
__module____qualname__r"   no_gradrS        rR   r   r   *   s    U]]_6t 6trY   r   c                   v     e Zd Z fdZd Z ej                         d        Zd Zd Z	d Z
d Zd Zd	 Z xZS )
LwDetrImageLossc                 h    t         |           || _        || _        || _        || _        || _        y N)super__init__matchernum_classesfocal_alphalossesr9   )r6   r`   ra   rb   rc   r9   	__class__s         rR   r_   zLwDetrImageLoss.__init__f   s3    &&$rY   c           	      J   d|vrt        d      |d   }| j                  |      }t        j                  t	        ||      D 	cg c]  \  }\  }}	|d   |	    c}	}}      }
| j
                  }d}|d   |   }t        j                  t	        ||      D cg c]  \  }\  }}|d   |    c}}}d      }t        j                  t        t        |j                               t        |            d         }|j                  |j                        }|j                         j                         }|j                         }t        j                  |      }||z  }t        |      }|j!                  |
       ||   j#                  |      |j#                  d	|z
        z  }t        j$                  |d
      j                         }|||<   d	|z
  ||<   | |j'                         z  |d	|z
  j'                         z  z
  }|j)                         |z  }d|i}|S c c}	}}w c c}}}w )Nr   z#No logits were found in the outputsr   r   r   r   r   r   r   g{Gz?loss_ce)KeyError_get_source_permutation_idxr"   r#   r1   rb   diagr
   r   detachr%   r   cloner!   
zeros_likelistappendpowclampr$   sum)r6   r7   r8   rG   	num_boxessource_logitsidxt_Jtarget_classes_orA   rB   	src_boxesrL   target_boxesiou_targetspos_iousprobpos_weightsneg_weightspos_indrf   rc   s                           rR   loss_labelszLwDetrImageLoss.loss_labelso   s   7"@AA)..w7 99CPWY`La%b%byq&1aa&7&:%bc  L)#.	yyWgAV!W!WIAv1!G*Q-!W]^_jj,Y-=-=-?@BZ[gBhijkl
 "nn]%8%89$$&--/$$&&&}5Eks)'(Me$x||AI'>>KK4 '') G 1uG,+kQXNN<L.LL++-)+W%9 &c "Xs   HHc           	         |d   }|j                   }t        j                  |D cg c]  }t        |d          c}|      }|j	                         j                  d      j                  dkD  j                  d      }	t        j                  j                  |	j                         |j                               }
d|
i}|S c c}w )z
        Compute the cardinality error, i.e. the absolute error in the number of predicted non-empty boxes.

        This is not really a loss, it is intended for logging purposes only. It doesn't propagate gradients.
        r   r   )devicer   g      ?r   cardinality_error)r   r"   r4   r-   r!   maxvaluesrq   nn
functionall1_lossfloat)r6   r7   r8   rG   rr   r   r   r>   target_lengths	card_predcard_errrc   s               rR   loss_cardinalityz LwDetrImageLoss.loss_cardinality   s     "')RQ#a.?*@)R[ab^^%))"-44s:??B	==(():N<P<P<RS%x0 *Ss   C c           	         d|vrt        d      | j                  |      }|d   |   }t        j                  t	        ||      D 	cg c]  \  }\  }}	|d   |	    c}	}}d      }
t
        j                  j                  ||
d      }i }|j                         |z  |d<   d	t        j                  t        t        |      t        |
                  z
  }|j                         |z  |d
<   |S c c}	}}w )a<  
        Compute the losses related to the bounding boxes, the L1 regression loss and the GIoU loss.

        Targets dicts must contain the key "boxes" containing a tensor of dim [nb_target_boxes, 4]. The target boxes
        are expected in format (center_x, center_y, w, h), normalized by the image size.
        r   z#No predicted boxes found in outputsr   r   r   none)	reduction	loss_bboxr   	loss_giou)rg   rh   r"   r#   r1   r   r   r   rq   ri   r   r   )r6   r7   r8   rG   rr   rt   source_boxesru   rv   rL   rz   r   rc   r   s                 rR   
loss_boxeszLwDetrImageLoss.loss_boxes   s     w&@AA..w7|,S1yyWgAV!W!WIAv1!G*Q-!W]^_MM)),PV)W	'mmo	9{

 8 FH`amHno
 
	 (mmo	9{ "Xs   C.c                 :   d|vrt        d      | j                  |      }| j                  |      }|d   }||   }|D cg c]  }|d   	 }	}t        |	      j	                         \  }
}|
j                  |      }
|
|   }
t        j                  j                  |dddf   |
j                  dd dd      }|ddd	f   j                  d
      }|
j                  d
      }
|
j                  |j                        }
t        ||
|      t        ||
|      d}|S c c}w )z
        Compute the losses related to the masks: the focal loss and the dice loss.

        Targets dicts must contain the key "masks" containing a tensor of dim [nb_target_boxes, h, w].
        
pred_masksz#No predicted masks found in outputsmasksNbilinearF)sizemodealign_cornersr   r   )	loss_mask	loss_dice)rg   rh   _get_target_permutation_idxr   	decomposer%   r   r   interpolater   r    r+   r   r   )r6   r7   r8   rG   rr   
source_idx
target_idxsource_masksru   r   target_masksvalidrc   s                rR   
loss_maskszLwDetrImageLoss.loss_masks   s:    w&@AA55g>
55g>
|,#J/%,-7--<UCMMOe#|4#J/ }}00D!(:(:23(?j`e 1 
 $AqD)11!4#++A.#((););<+L,	R"<yI
 % .s    Dc                    t        j                  t        |      D cg c]  \  }\  }}t        j                  ||        c}}}      }t        j                  |D cg c]  \  }}|	 c}}      }||fS c c}}}w c c}}w r]   r"   r#   r0   	full_like)r6   rG   rL   sourcerv   	batch_idxr   s          rR   rh   z+LwDetrImageLoss._get_source_permutation_idx   si    IIPYZaPbccna&!uvq9cd	YY'B;FABC
*$$ dB   #A7A>
c                    t        j                  t        |      D cg c]  \  }\  }}t        j                  ||        c}}}      }t        j                  |D cg c]  \  }}|	 c}}      }||fS c c}}}w c c}}w r]   r   )r6   rG   rL   rv   targetr   r   s          rR   r   z+LwDetrImageLoss._get_target_permutation_idx   si    IIPYZaPbccna!Vuvq9cd	YY'B;AvBC
*$$ dBr   c                     | j                   | j                  | j                  | j                  d}||vrt	        d| d       ||   ||||      S )N)labelscardinalityr   r   zLoss z not supported)r   r   r   r   
ValueError)r6   lossr7   r8   rG   rr   loss_maps          rR   get_losszLwDetrImageLoss.get_loss   s^    &&00____	
 xuTF.9::x~gwCCrY   c           
         | j                   r| j                  nd}|j                         D ci c]  \  }}|dk7  s|dk7  s|| }}}| j                  |||      }t	        d |D              }||z  }t        j                  |gt
        j                  t        t        |j                                     j                        }d}	t               r2t        j                  i k7  rt        |      }t               j                   }	t        j"                  ||	z  d      j%                         }i }
| j&                  D ]&  }|
j)                  | j+                  |||||             ( d|v rt-        |d         D ]~  \  }}| j                  |||      }| j&                  D ]W  }|dk(  r	| j+                  |||||      }|j                         D ci c]  \  }}|d| z   | }}}|
j)                  |       Y  d|v rv|d   }| j                  |||	      }| j&                  D ]N  }| j+                  |||||      }|j                         D ci c]  \  }}|d
z   | }}}|
j)                  |       P |
S c c}}w c c}}w c c}}w )a  
        This performs the loss computation.

        Args:
             outputs (`dict`, *optional*):
                Dictionary of tensors, see the output specification of the model for the format.
             targets (`list[dict]`, *optional*):
                List of dicts, such that `len(targets) == batch_size`. The expected keys in each dict depends on the
                losses applied, see each loss' doc.
        r   enc_outputsauxiliary_outputsc              3   8   K   | ]  }t        |d            yw)r   N)r-   ).0ru   s     rR   	<genexpr>z*LwDetrImageLoss.forward.<locals>.<genexpr>  s     @1An-.@s   )r   r   )minr   rv   )r9   _enc)trainingr9   itemsr`   rq   r"   r4   r   nextiterr   r   r   r   _shared_stater   num_processesrp   itemrc   updater   r0   )r6   r7   r8   r9   kr>   outputs_without_aux_and_encrG   rr   
world_sizerc   r   rL   r   l_dictr   s                   rR   rS   zLwDetrImageLoss.forward   s}    )-T__1
$]]_'
Q]0BqL_G_AqD'
# '

 ,,:GZP @@@	
*	OOYKu{{4PTU\UcUcUePfKgKnKno	
"$))R/"9-	)^99
KK	J 6A>CCE	 KK 	UDMM$--gwST	U ')(1':M2N(O *$$,,'8':N KK *Dw !]]41BGWV_`F9?HAaAaS'k1nHFHMM&)** G#!-0Kll;JlOG &t['7IV4:LLNCDAq!f*a-CCf%&
 W'
@ I Ds   I1I1I1
I7
	I=)rT   rU   rV   r_   r   r"   rW   r   r   r   rh   r   r   rS   __classcell__)rd   s   @rR   r[   r[   e   sJ    %"H U]]_  2D%%	D7rY   r[   c	           
      <   t        |j                  |j                  |j                        }
g d}t	        |
|j
                  |j                  ||j                        }|j                  |       i }d }| |d<   ||d<   ||d|d<   |j                  rt        ||      }||d<    |||      d	|j                  d
|j                  d<   |j                  rii }t        |j                  d	z
        D ];  }|j                  j!                         D ci c]  \  }}|d| z   | c}}       = j                  |       j!                         D ci c]  \  }}|dz   | }}}j                  |       t#        fdD              }||fS c c}}w c c}}w )N)r)   r(   r*   )r   r   r   )r`   ra   rb   rc   r9   r   r   )r   r   r   r   r   )rf   r   r   rv   r   c              3   >   K   | ]  }|v s|   |   z    y wr]   rX   )r   r   	loss_dictweight_dicts     rR   r   z/LwDetrForObjectDetectionLoss.<locals>.<genexpr>b  s%     T1CSy|k!n,Ts   	)r   r)   r(   r*   r[   
num_labelsrb   r9   r%   auxiliary_lossr	   bbox_loss_coefficientgiou_loss_coefficientr/   decoder_layersr   r   rq   )r   r   r   r   configoutputs_classoutputs_coordenc_outputs_classenc_outputs_coordkwargsr`   rc   	criterionoutputs_lossr   aux_weight_dictrL   r   r>   enc_weight_dictr   r   r   s                        @@rR   LwDetrForObjectDetectionLossr   1  s    %$$0@0@FL\L\G 0F%%&&$$I LLL#L!+L#'#L )-G,=(),/If.J.JKK%;;Kv,,q01 	UA""{?P?P?R#Stq!A!A3KN#ST	U?+1<1B1B1DEAq6z1}EOE'TiTTD--- $TEs   FF)NNNN)numpyr2   r"   torch.nnr   utilsr   r   r   loss_for_object_detectionr   r	   r
   r   r   r   r   transformers.image_transformsr   scipy.optimizer   
accelerater   accelerate.utilsr   r   Moduler[   r   rX   rY   rR   <module>r      s}       T T   F 4''8t- 8tvIbii Id 2.rY   