
    Z j                        S SK Jr  S SKrS SKJr  SSKJrJr  SSKJrJ	r	  SSK
Jr  SSKJr  SS	KJr  SS
KJr  SSKJr  SSKJrJrJr  SSKJr  SSKJr  SSKJrJrJrJ r J!r!J"r"J#r#J$r$J%r%J&r&  SSK'J(r(  \RR                  " \*5      r+ " S S\"5      r, " S S\5      r- " S S\5      r. " S S\$5      r/ " S S\#5      r0 " S S\5      r1 " S S \!5      r2 " S! S"\ 5      r3 " S# S$\\/5      r4/ S%Qr5g)&    )CallableN)nn   )CacheDynamicCache)create_causal_mask!create_sliding_window_causal_mask)FlashAttentionKwargs)GenericForQuestionAnswering)BaseModelOutputWithPast)ALL_ATTENTION_FUNCTIONS)Unpack)TransformersKwargsauto_docstringlogging)merge_with_config_defaults)capture_outputs   )
LlamaAttentionLlamaDecoderLayerLlamaForCausalLMLlamaForSequenceClassificationLlamaForTokenClassificationLlamaMLP
LlamaModelLlamaPreTrainedModelapply_rotary_pos_embeager_attention_forward   )MistralConfigc                   (   ^  \ rS rSrU 4S jrSrU =r$ )
MistralMLP$   c                 >  > [         TU ]  U5        [        R                  " U R                  U R
                  SS9U l        [        R                  " U R                  U R
                  SS9U l        [        R                  " U R
                  U R                  SS9U l        g )NFbias)	super__init__r   Linearhidden_sizeintermediate_size	gate_projup_proj	down_proj)selfconfig	__class__s     |/root/GenerationalWealth/GenerationalWealth/venv/lib/python3.13/site-packages/transformers/models/mistral/modular_mistral.pyr(   MistralMLP.__init__%   ss     4#3#3T5K5KRWXyy!1!143I3IPUV4#9#94;K;KRWX    )r.   r,   r-   )__name__
__module____qualname____firstlineno__r(   __static_attributes____classcell__r1   s   @r2   r"   r"   $   s    Y Yr4   r"   c                      ^  \ rS rSrS\S\4U 4S jjr SS\R                  S\	\R                  \R                  4   S\R                  S-  S	\
S-  S
\\   S\	\R                  \R                  S-  4   4S jjrSrU =r$ )MistralAttention,   r0   	layer_idxc                 h  > [         TU ]  X5        [        USS 5      =(       d    UR                  UR                  -  U l        [        R                  " UR                  UR                  U R
                  -  SS9U l        [        R                  " UR                  UR                  U R
                  -  SS9U l
        [        R                  " UR                  UR                  U R
                  -  SS9U l        [        R                  " UR                  U R
                  -  UR                  SS9U l        g )Nhead_dimFr%   )r'   r(   getattrr*   num_attention_headsrA   r   r)   q_projnum_key_value_headsk_projv_projo_projr/   r0   r?   r1   s      r2   r(   MistralAttention.__init__-   s    +
D9mV=O=OSYSmSm=mii 2 2F4N4NQUQ^Q^4^ejkii 2 2F4N4NQUQ^Q^4^ejkii 2 2F4N4NQUQ^Q^4^ejkii : :T]] JFL^L^ejkr4   Nhidden_statesposition_embeddingsattention_maskpast_key_valueskwargsreturnc           
      4   UR                   S S n/ UQSPU R                  P7nU R                  U5      R                  U5      R	                  SS5      nU R                  U5      R                  U5      R	                  SS5      n	U R                  U5      R                  U5      R	                  SS5      n
Uu  p[        XX5      u  pUb  UR                  XU R                  5      u  p[        R                  " U R                  R                  [        5      nU" U UU	U
U4U R                  (       d  SOU R                   U R"                  [%        U R                  SS 5      S.UD6u  pUR&                  " / UQSP76 R)                  5       nU R+                  U5      nX4$ )Nr   r   g        sliding_window)dropoutscalingrS   )shaperA   rD   view	transposerF   rG   r   updater?   r   get_interfacer0   _attn_implementationr   trainingattention_dropoutrU   rB   reshape
contiguousrH   )r/   rK   rL   rM   rN   rO   input_shapehidden_shapequery_states
key_statesvalue_statescossinattention_interfaceattn_outputattn_weightss                   r2   forwardMistralAttention.forward5   s    $))#2.88b8$--8{{=166|DNNqRST[[/44\BLLQPQR
{{=166|DNNqRST&#7RU#[ &'6'='=jX\XfXf'g$J(?(M(MKK,,.E)
 %8
%
  $}}C$2H2HLL"4;;0@$G
%
 
%
! "));;;;FFHkk+.((r4   )rA   rF   rH   rD   rG   )N)r5   r6   r7   r8   r    intr(   torchTensortupler   r   r
   rj   r9   r:   r;   s   @r2   r=   r=   ,   s    l} l l )-')||') #5<<#=>') t+	')
 ') -.') 
u||U\\D00	1') ')r4   r=   c                   4   ^  \ rS rSrS\S\4U 4S jjrSrU =r$ )MistralDecoderLayer_   r0   r?   c                 `   > [         TU ]  X5        [        XS9U l        [	        U5      U l        g )N)r0   r?   )r'   r(   r=   	self_attnr"   mlprI   s      r2   r(   MistralDecoderLayer.__init__`   s(    +)Mf%r4   )ru   rt   )	r5   r6   r7   r8   r    rl   r(   r9   r:   r;   s   @r2   rq   rq   _   s    &} & & &r4   rq   c                       \ rS rSr\\S.rSrg)MistralPreTrainedModelf   )rK   
attentions N)r5   r6   r7   r8   rq   r=   _can_record_outputsr9   r{   r4   r2   rx   rx   f   s    ,&r4   rx   c                       \ rS rSr\\\      SS\R                  S-  S\R                  S-  S\R                  S-  S\
S-  S\R                  S-  S\S-  S	\\   S
\4S jj5       5       5       rSrg)MistralModelm   N	input_idsrM   position_idsrN   inputs_embeds	use_cacherO   rP   c           
         US L US L-  (       a  [        S5      eUc  U R                  U5      nU(       a  Uc  [        U R                  S9nUcU  Ub  UR	                  5       OSn[
        R                  " UR                  S   UR                  S9U-   nUR                  S5      nU R                  R                  c  [        O[        n	U	" U R                  UUUUS9n
UnU R                  XS9nU R                  S U R                  R                    H  nU" U4U
UUUUS.UD6nM     U R!                  U5      n[#        UU(       a  US	9$ S S	9$ )
Nz:You must specify exactly one of input_ids or inputs_embeds)r0   r   r   )device)r0   r   rM   rN   r   )r   )rM   r   rN   r   rL   )last_hidden_staterN   )
ValueErrorembed_tokensr   r0   get_seq_lengthrm   arangerV   r   	unsqueezerS   r   r	   
rotary_emblayersnum_hidden_layersnormr   )r/   r   rM   r   rN   r   r   rO   past_seen_tokensmask_functioncausal_maskrK   rL   decoder_layers                 r2   rj   MistralModel.forwardn   sm    -t";<YZZ  --i8M0*$++>OCRC^==?de <<(;(;A(>}G[G[\_ooL'11!4L.2kk.H.H.P*Vw#;;')+%
 &"oomoW![[)H4;;+H+HIM)*) /#$7 M J 		-0&+/8O
 	
>B
 	
r4   r{   )NNNNNN)r5   r6   r7   r8   r   r   r   rm   
LongTensorrn   r   FloatTensorboolr   r   r   rj   r9   r{   r4   r2   r~   r~   m   s     .2.204(,26!%2
##d*2
 t+2
 &&-	2

 2
 ((4/2
 $;2
 +,2
 
!2
    2
r4   r~   c                       \ rS rSrSrg)MistralForCausalLM   r{   Nr5   r6   r7   r8   r9   r{   r4   r2   r   r          r4   r   c                       \ rS rSrSrg)MistralForTokenClassification   r{   Nr   r{   r4   r2   r   r      r   r4   r   c                       \ rS rSrSrg) MistralForSequenceClassification   r{   Nr   r{   r4   r2   r   r      r   r4   r   c                       \ rS rSrSrg)MistralForQuestionAnswering   r{   Nr   r{   r4   r2   r   r      s    X[r4   r   )r   r   r~   rx   r   r   )6collections.abcr   rm   r   cache_utilsr   r   masking_utilsr   r	   modeling_flash_attention_utilsr
   modeling_layersr   modeling_outputsr   modeling_utilsr   processing_utilsr   utilsr   r   r   utils.genericr   utils.output_capturingr   llama.modeling_llamar   r   r   r   r   r   r   r   r   r   configuration_mistralr    
get_loggerr5   loggerr"   r=   rq   rx   r~   r   r   r   r   __all__r{   r4   r2   <module>r      s    $   . R B 8 5 & @ @ 7 5   1 
		H	%Y Y0)~ 0)f&+ &1 6
: 6
r	) 		$? 		'E 	 \"=?U [r4   