
    Z j7*                        S SK Jr  S SKJr  S SKrS SKJr  SSKJrJ	r	  SSK
Jr  SSKJr  SSKJr  SS	KJr  SS
KJr  SSKJrJrJr  SSKJr  SSKJr  SSKJr  SSKJrJ r J!r!J"r"J#r#J$r$J%r%J&r&J'r'  SSK(J)r)  \RT                  " \+5      r,Sr-Sr. " S S\%5      r/ " S S\5      r0 " S S\5      r1 " S S\5      r2 " S S\$5      r3 " S S \#5      r4 " S! S"\ 5      r5 " S# S$\!5      r6 " S% S&\"5      r7/ S'Qr8g)(    )Callable)OptionalN   )CacheDynamicCache)create_causal_mask)GradientCheckpointingLayer)BaseModelOutputWithPast)ALL_ATTENTION_FUNCTIONS)Unpack)TransformersKwargsauto_docstringlogging)merge_with_config_defaults)capture_outputs   )CLIPMLP)	LlamaAttentionLlamaForCausalLMLlamaForSequenceClassificationLlamaForTokenClassification
LlamaModelLlamaPreTrainedModelLlamaRotaryEmbeddingapply_rotary_pos_embeager_attention_forward   )	PhiConfigzmicrosoft/phi-1r   c                   ^    \ rS rSr\   SS\S-  S\S   S\S-  S\S\	4   4S	 jj5       r
S
rg)PhiRotaryEmbedding'   Nconfigdeviceztorch.deviceseq_lenreturnztorch.Tensorc           	      j   U R                   S   nU R                   R                  SS5      n[        U SS5      =(       d    U R                  U R                  -  n[        XT-  5      nSnSU[        R                  " SUS[        R                  S9R                  U[        R                  S	9U-  -  -  nX4$ )
aH  
Computes the inverse frequencies according to the original RoPE implementation
Args:
    config ([`~transformers.PreTrainedConfig`]):
        The model configuration.
    device (`torch.device`):
        The device to use for initialization of the inverse frequencies.
    seq_len (`int`, *optional*):
        The current sequence length. Unused for this type of RoPE.
Returns:
    Tuple of (`torch.Tensor`, `float`), containing the inverse frequencies for the RoPE embeddings and the
    post-processing scaling factor applied to the computed cos/sin (unused in this type of RoPE).

rope_thetapartial_rotary_factorg      ?head_dimNr   r   )dtype)r#   r*   )rope_parametersgetgetattrhidden_sizenum_attention_headsinttorcharangeint64tofloat)	r"   r#   r$   baser(   r)   dimattention_factorinv_freqs	            t/root/GenerationalWealth/GenerationalWealth/venv/lib/python3.13/site-packages/transformers/models/phi/modular_phi.pycompute_default_rope_parameters2PhiRotaryEmbedding.compute_default_rope_parameters(   s    & %%l3 & 6 6 : :;RTW X6:t4h8J8JfNhNh8h(23 U\\!S!5;;?BB&X]XcXcBdgjjk
 ))     )NNN)__name__
__module____qualname____firstlineno__staticmethodr   r   r0   tupler5   r;   __static_attributes__r>   r=   r:   r    r    '   sY    #'+/"*D *(* t* 
~u$	%	* *r=   r    c                      ^  \ rS rSrS\S\4U 4S jjr SS\R                  S\	\R                  \R                  4   S\R                  S-  S	\
S-  S
\	\R                  \R                  S-  4   4
S jjrSrU =r$ )PhiAttentionI   r"   	layer_idxc                   > [         TU ]  X5        [        R                  " UR                  UR
                  U R                  -  SS9U l        [        R                  " UR                  UR                  U R                  -  SS9U l	        [        R                  " UR                  UR                  U R                  -  SS9U l
        [        R                  " UR
                  U R                  -  UR                  SS9U l        U ?[        U R                  UR                  S   -  5      U l        UR                   U l        U R                   (       ay  [        R"                  " UR                  UR
                  -  UR$                  SS9U l        [        R"                  " UR                  UR
                  -  UR$                  SS9U l        g g )NTbiasr(   )epselementwise_affine)super__init__nnLinearr.   r/   r)   q_projnum_key_value_headsk_projv_projdenseo_projr0   r+   rotary_ndimsqk_layernorm	LayerNormlayer_norm_epsq_layernormk_layernormselfr"   rI   	__class__s      r:   rP   PhiAttention.__init__J   sf   +ii 2 2F4N4NQUQ^Q^4^eijii 2 2F4N4NQUQ^Q^4^eijii 2 2F4N4NQUQ^Q^4^eijYYv99DMMI6K]K]dhi
K0F0FG^0_ _`"//!||""f&@&@@fF[F[pt D  "||""f&@&@@fF[F[pt D	 r=   Nhidden_statesposition_embeddingsattention_maskpast_key_valuesr%   c                 b   UR                   S S n/ UQSPU R                  P7nU R                  U5      R                  U5      R	                  SS5      nU R                  U5      R                  U5      R	                  SS5      n	U R                  U5      R                  U5      R	                  SS5      n
U R                  (       a"  U R                  U5      nU R                  U	5      n	Uu  pUSS U R                  24   USU R                  S 24   pU	SS U R                  24   U	SU R                  S 24   nn[        XX5      u  p[        R                  " X4SS9n[        R                  " UU4SS9n	Ub  UR                  XU R                  5      u  p[         R"                  " U R$                  R&                  [(        5      nU" U UU	U
U4U R*                  (       d  SOU R,                  U R.                  S.UD6u  nnUR0                  " / UQSP76 R3                  5       nU R5                  U5      nUU4$ )Nr   r   .)r7   g        )dropoutscaling)shaper)   rS   view	transposerU   rV   rZ   r]   r^   rY   r   r1   catupdaterI   r   get_interfacer"   _attn_implementationr   trainingattention_dropoutrj   reshape
contiguousrW   )r`   rc   rd   re   rf   kwargsinput_shapehidden_shapequery_states
key_statesvalue_statescossin	query_rot
query_passkey_rotkey_passattention_interfaceattn_outputattn_weightss                       r:   forwardPhiAttention.forward[   sD    $))#2.88b8$--8{{=166|DNNqRST[[/44\BLLQPQR
{{=166|DNNqRST++L9L))*5J& 1 1 1112d//112 
 s/d////0sD--//0 
 2)cO	 yy)!8bAYY2;
&'6'='=jX\XfXf'g$J(?(M(MKK,,.E)
 %8	%
  $}}C$2H2HLL	%
 	%
!\ "));;;;FFHjj-L((r=   )rW   r^   rU   r]   rS   rZ   rY   rV   )N)r?   r@   rA   rB   r   r0   rP   r1   TensorrD   r   r   rE   __classcell__ra   s   @r:   rG   rG   I   s    y S , )-8)||8) #5<<#=>8) t+	8)
 8) 
u||U\\D00	18) 8)r=   rG   c                       \ rS rSrSrg)PhiMLP   r>   Nr?   r@   rA   rB   rE   r>   r=   r:   r   r          r=   r   c                     ^  \ rS rSrS\S\4U 4S jjr     SS\R                  S\R                  S-  S\R                  S-  S	\
S-  S
\S-  S\\R                  \R                  4   S-  S\\   S\R                  4S jjrSrU =r$ )PhiDecoderLayer   r"   rI   c                   > [         TU ]  5         [        XS9U l        [	        U5      U l        [        R                  " UR                  UR                  S9U l
        [        R                  " UR                  5      U l        g )N)rI   rM   )rO   rP   rG   	self_attnr   mlprQ   r[   r.   r\   input_layernormDropoutresid_pdropresid_dropoutr_   s      r:   rP   PhiDecoderLayer.__init__   s[    %fB&>!||F,>,>FDYDYZZZ(:(:;r=   Nrc   re   position_idsrf   	use_cacherd   rv   r%   c           
          UnU R                  U5      nU R                  " SUUUUUUS.UD6u  pU R                  U	5      n	U R                  U R                  U5      5      nX-   U-   nU$ )N)rc   re   r   rf   r   rd   r>   )r   r   r   r   )r`   rc   re   r   rf   r   rd   rv   residualattn_outputs_feed_forward_hidden_statess               r:   r   PhiDecoderLayer.forward   s     !,,];.. 
')%+ 3
 
 )),7%)%7%78O%P"$AHLr=   )r   r   r   r   )NNNFN)r?   r@   rA   rB   r   r0   rP   r1   r   
LongTensorr   boolrD   r   r   r   rE   r   r   s   @r:   r   r      s    <y <S < /304(,!&HL|| t+ &&-	
  $; #5<<#=>E +, 
 r=   r   c                       \ rS rSr\\S.rSrg)PhiPreTrainedModel   )rc   
attentionsr>   N)r?   r@   rA   rB   r   rG   _can_record_outputsrE   r>   r=   r:   r   r      s    ("r=   r   c                     ^  \ rS rSrS\4U 4S jjr\\\      SS\	R                  S-  S\	R                  S-  S\	R                  S-  S\S-  S	\	R                  S-  S
\S-  S\\   S\4S jj5       5       5       rSrU =r$ )PhiModel   r"   c           	      h  > [         TU ]  U5        [        R                  " [	        UR
                  5       Vs/ s H  n[        X5      PM     sn5      U l        [        R                  " UR                  5      U l
        [        R                  " UR                  UR                  S9U l        U ?g s  snf )Nr   )rO   rP   rQ   
ModuleListrangenum_hidden_layersr   layersr   
embd_pdropembed_dropoutr[   r.   r\   final_layernormnormr_   s      r:   rP   PhiModel.__init__   s     mmAFvG_G_A`aA`I_V/A`a
  ZZ(9(9:!||F,>,>FDYDYZI	 bs   B/N	input_idsre   r   rf   inputs_embedsr   rv   r%   c           
      `   US L US L-  (       a  [        S5      eUc  U R                  U5      nU(       a  Uc  [        U R                  S9nUcU  Ub  UR	                  5       OSn[
        R                  " UR                  S   UR                  S9U-   nUR                  S5      n[        U R                  UUUUS9n	U R                  U5      nUn
U R                  XS9nU R                  S U R                  R                    H  nU" U
4U	UUUUS.UD6n
M     U R                  U
5      n
[!        U
US	9$ )
Nz:You must specify exactly one of input_ids or inputs_embeds)r"   r   r   )r#   )r"   r   re   rf   r   )r   )re   r   rf   r   rd   )last_hidden_staterf   )
ValueErrorembed_tokensr   r"   get_seq_lengthr1   r2   rk   r#   	unsqueezer   r   
rotary_embr   r   r   r
   )r`   r   re   r   rf   r   r   rv   past_seen_tokenscausal_maskrc   rd   decoder_layers                r:   r   PhiModel.forward   sX    -t";<YZZ  --i8M0*$++>OCRC^==?de <<(;(;A(>}G[G[\_ooL'11!4L(;;')+%
 **=9%"oomoW![[)H4;;+H+HIM)*) /#$7 M J ,,];&++
 	
r=   )r   r   r   )NNNNNN)r?   r@   rA   rB   r   rP   r   r   r   r1   r   r   r   FloatTensorr   r   r   r
   r   rE   r   r   s   @r:   r   r      s    y    .2.204(,26!%4
##d*4
 t+4
 &&-	4

 4
 ((4/4
 $;4
 +,4
 
!4
    4
r=   r   c                   (   ^  \ rS rSrU 4S jrSrU =r$ )PhiForCausalLMi  c                    > [         TU ]  U5        [        R                  " UR                  UR
                  SS9U l        g )NTrK   )rO   rP   rQ   rR   r.   
vocab_sizelm_head)r`   r"   ra   s     r:   rP   PhiForCausalLM.__init__  s0     yy!3!3V5F5FTRr=   )r   )r?   r@   rA   rB   rP   rE   r   r   s   @r:   r   r     s    S Sr=   r   c                       \ rS rSrSrg)PhiForSequenceClassificationi  r>   Nr   r>   r=   r:   r   r     r   r=   r   c                       \ rS rSrSrg)PhiForTokenClassificationi  r>   Nr   r>   r=   r:   r   r     r   r=   r   )r   r   r   r   r   )9collections.abcr   typingr   r1   torch.nnrQ   cache_utilsr   r   masking_utilsr   modeling_layersr	   modeling_outputsr
   modeling_utilsr   processing_utilsr   utilsr   r   r   utils.genericr   utils.output_capturingr   clip.modeling_clipr   llama.modeling_llamar   r   r   r   r   r   r   r   r   configuration_phir   
get_loggerr?   logger_CHECKPOINT_FOR_DOC_CONFIG_FOR_DOCr    rG   r   r   r   r   r   r   r   __all__r>   r=   r:   <module>r      s    $    . / 9 6 & @ @ 7 5 (
 
 
 ) 
		H	%' *- *DJ)> J)Z	W 	$0 $N- A
z A
HS% S	#A 		 ; 	r=   