
    Z j,                         S SK Jr  S SKrS SKJr  SSKJr  SSKJrJ	r	J
r
Jr  SSKJr  SSKJr  SSKJrJrJrJr  \R*                  " \5      r " S	 S
\R0                  5      r\ " S S5      5       r\ " S S5      5       r\ " S S5      5       rg)    )partialN   )Cache)BaseModelOutputWithPastQuestionAnsweringModelOutput SequenceClassifierOutputWithPastTokenClassifierOutput)	AutoModel)Unpack)TransformersKwargsauto_docstringcan_return_tupleloggingc                   0   ^  \ rS rSrSrSrU 4S jrSrU =r$ )GradientCheckpointingLayer"   a  Base class for layers with gradient checkpointing.

This class enables gradient checkpointing functionality for a layer. By default, gradient checkpointing is disabled
(`gradient_checkpointing = False`). When `model.set_gradient_checkpointing()` is called, gradient checkpointing is
enabled by setting `gradient_checkpointing = True` and assigning a checkpointing function to `_gradient_checkpointing_func`.

Important:

    When using gradient checkpointing with `use_reentrant=True`, inputs that require gradients (e.g. hidden states)
    must be passed as positional arguments (`*args`) rather than keyword arguments to properly propagate gradients.

    Example:

        ```python
        >>> # Correct - hidden_states passed as positional arg
        >>> out = self.layer(hidden_states, attention_mask=attention_mask)

        >>> # Incorrect - hidden_states passed as keyword arg
        >>> out = self.layer(hidden_states=hidden_states, attention_mask=attention_mask)
        ```
Fc                   > U R                   (       a  U R                  (       a  SnU R                  R                  nSU S3nSU;   a  US   (       a  SUS'   US-  nSnSU;   a  US   b  S US'   US-  nSnS	U;   a  US	   b  S US	'   US
-  nSnSU;   a  US   b  S US'   US-  nSnU(       a)  UR	                  S5      S-   n[
        R                  U5        U R                  " [        [        TU ](  40 UD6/UQ76 $ [        TU ](  " U0 UD6$ )NFz7Caching is incompatible with gradient checkpointing in z	. Setting	use_cachez `use_cache=False`,Tpast_key_valuez `past_key_value=None`,past_key_valuesz `past_key_values=None`,
layer_pastz `layer_past=None`,,.)gradient_checkpointingtraining	__class____name__rstriploggerwarning_once_gradient_checkpointing_funcr   super__call__)selfargskwargsdo_warn
layer_namemessager   s         m/root/GenerationalWealth/GenerationalWealth/venv/lib/python3.13/site-packages/transformers/modeling_layers.pyr#   #GradientCheckpointingLayer.__call__;   sK   &&4==G00JOPZ|[deGf$)<&+{#00  6)f5E.F.R+/'(44 F*v6G/H/T,0()55v%&*>*J'+|$00 !..-3##G,44WUW=M5XQW5X`[_``w000     )	r   
__module____qualname____firstlineno____doc__r   r#   __static_attributes____classcell__r   s   @r*   r   r   "   s    , #"1 "1r,   r   c                     ^  \ rS rSrSrU 4S jr\\       SS\R                  S-  S\R                  S-  S\R                  S-  S\S-  S	\R                  S-  S
\R                  S-  S\S-  S\\   S\4S jj5       5       rSrU =r$ ) GenericForSequenceClassification`   modelc                   > [         TU ]  U5        UR                  U l        [        X R                  [
        R                  " U5      5        [        R                  " UR                  U R                  SS9U l
        U R                  5         g )NF)bias)r"   __init__
num_labelssetattrbase_model_prefixr
   from_confignnLinearhidden_sizescore	post_initr$   configr   s     r*   r;   )GenericForSequenceClassification.__init__d   sb      ++,,i.C.CF.KLYYv114??O
 	r,   N	input_idsattention_maskposition_idsr   inputs_embedslabelsr   r&   returnc           	         [        X R                  5      " U4UUUUUS.UD6n	U	R                  n
U R                  U
5      nUb  UR                  S   nOUR                  S   nU R
                  R                  c  US:w  a  [        S5      eU R
                  R                  c  SnOUb  XR
                  R                  :g  R                  UR                  [        R                  5      n[        R                  " UR                  S   UR                  [        R                  S9nX-  R                  S5      nO.Sn[        R                  U R                   R"                   S35        U[        R                  " XR                  S9U4   nS nUb  U R%                  XUU R
                  S	9n['        UUU	R(                  U	R*                  U	R,                  S
9$ )NrI   rJ   r   rK   r   r   r   z=Cannot handle batch sizes > 1 if no padding token is defined.)devicedtypez will not detect padding tokens in `inputs_embeds`. Results may be unexpected if using padding tokens in conjunction with `inputs_embeds.`)rQ   )logitsrL   pooled_logitsrF   )lossrS   r   hidden_states
attentions)getattrr>   last_hidden_staterC   shaperF   pad_token_id
ValueErrortorQ   torchint32arangeargmaxr   r    r   r   loss_functionr   r   rV   rW   )r$   rH   rI   rJ   r   rK   rL   r   r&   transformer_outputsrV   rS   
batch_sizelast_non_pad_tokennon_pad_masktoken_indicesrT   rU   s                     r*   forward(GenericForSequenceClassification.forwardn   s    8?tE[E[7\8
)%+'8
 8
 ,==M* "+J&,,Q/J;;##+
a\]];;##+!#"%)A)AAEEfmmUZU`U`aL!LL)<V]]Z_ZeZefM"/">!F!Fr!J!#>>**+ ,Z Z
 u||J}}MOaab%%VR_hlhshs%tD/ /??-;;*55
 	
r,   )r<   rC   NNNNNNN)r   r.   r/   r0   r>   r;   r   r   r^   
LongTensorTensorr   FloatTensorboolr   r   r   rh   r2   r3   r4   s   @r*   r6   r6   `   s      .2.204(,26*.!%8
##d*8
 t+8
 &&-	8

 8
 ((4/8
   4'8
 $;8
 +,8
 
*8
  8
r,   r6   c                   4  ^  \ rS rSrSrU 4S jrS rS r\\	       SS\
R                  S-  S\
R                  S-  S	\
R                  S-  S
\S-  S\
R                  S-  S\
R                  S-  S\
R                  S-  S\\   S\4S jj5       5       rSrU =r$ )GenericForQuestionAnswering   r8   c                    > [         TU ]  U5        [        X R                  [        R
                  " U5      5        [        R                  " UR                  S5      U l	        U R                  5         g )N   )r"   r;   r=   r>   r
   r?   r@   rA   rB   
qa_outputsrD   rE   s     r*   r;   $GenericForQuestionAnswering.__init__   sO     ,,i.C.CF.KL))F$6$6: 	r,   c                 @    [        X R                  5      R                  $ NrX   r>   embed_tokens)r$   s    r*   get_input_embeddings0GenericForQuestionAnswering.get_input_embeddings   s    t334AAAr,   c                 8    U[        X R                  5      l        g rw   rx   )r$   values     r*   set_input_embeddings0GenericForQuestionAnswering.set_input_embeddings   s    =B,,-:r,   NrH   rI   rJ   r   rK   start_positionsend_positionsr&   rM   c                    [        X R                  5      " U4UUUUS.UD6n	U	R                  n
U R                  U
5      nUR	                  SSS9u  pUR                  S5      R                  5       nUR                  S5      R                  5       nS nUb  Ub  U R                  " XXg40 UD6n[        UUUU	R                  U	R                  S9$ )N)rI   rJ   r   rK   r   rP   )dim)rU   start_logits
end_logitsrV   rW   )rX   r>   rY   rt   splitsqueeze
contiguousrb   r   rV   rW   )r$   rH   rI   rJ   r   rK   r   r   r&   outputssequence_outputrS   r   r   rU   s                  r*   rh   #GenericForQuestionAnswering.forward   s     ,349O9O+P,
)%+',
 ,
 "331#)<<r<#: #++B/::<''+668
&=+D%%libhiD+%!!//))
 	
r,   )rt   rj   )r   r.   r/   r0   r>   r;   rz   r~   r   r   r^   rk   rl   r   rm   r   r   r   rh   r2   r3   r4   s   @r*   rp   rp      s    BC  .2.204(,263715%
##d*%
 t+%
 &&-	%

 %
 ((4/%
 ))D0%
 ''$.%
 +,%
 
&%
  %
r,   rp   c                     ^  \ rS rSrSrU 4S jr\\       SS\R                  S-  S\R                  S-  S\R                  S-  S\S-  S	\R                  S-  S
\R                  S-  S\S-  S\\   S\4S jj5       5       rSrU =r$ )GenericForTokenClassification   r8   c                   > [         TU ]  U5        UR                  U l        [        X R                  [
        R                  " U5      5        [        USS 5      b  UR                  nO[        USS 5      b  UR                  nOSn[        R                  " U5      U l        [        R                  " UR                  UR                  5      U l        U R!                  5         g )Nclassifier_dropouthidden_dropoutg?)r"   r;   r<   r=   r>   r
   r?   rX   r   r   r@   DropoutdropoutrA   rB   rC   rD   )r$   rF   r   r   s      r*   r;   &GenericForTokenClassification.__init__   s      ++,,i.C.CF.KL6/6B!'!:!:V-t4@!'!6!6!$zz"45YYv1163D3DE
 	r,   NrH   rI   rJ   r   rK   rL   r   r&   rM   c           	      $   [        X R                  5      " U4UUUUUS.UD6n	U	R                  n
U R                  U
5      n
U R	                  U
5      nS nUb  U R                  XU R                  5      n[        UUU	R                  U	R                  S9$ )NrO   )rU   rS   rV   rW   )
rX   r>   rY   r   rC   rb   rF   r	   rV   rW   )r$   rH   rI   rJ   r   rK   rL   r   r&   r   r   rS   rU   s                r*   rh   %GenericForTokenClassification.forward   s     ,349O9O+P,
)%+',
 ,
 "33,,7O,%%fdkkBD$!//))	
 	
r,   )r   r<   rC   rj   )r   r.   r/   r0   r>   r;   r   r   r^   rk   rl   r   rm   rn   r   r   r	   rh   r2   r3   r4   s   @r*   r   r      s    "  .2.204(,26*.!%!
##d*!
 t+!
 &&-	!

 !
 ((4/!
   4'!
 $;!
 +,!
 
!
  !
r,   r   )	functoolsr   r^   torch.nnr@   cache_utilsr   modeling_outputsr   r   r   r	   models.autor
   processing_utilsr   utilsr   r   r   r   
get_loggerr   r   Moduler   r6   rp   r   r-   r,   r*   <module>r      s         # $ P P 
		H	%;1 ;1| G
 G
 G
T 9
 9
 9
x 7
 7
 7
r,   