
    Z jK#                        S r SSKJr  SSKrSSKJr  SSKJr  SSKJrJ	r	  SSK
JrJr  SS	KJr  SS
KJr  SSKJr  SSKJr  SSKJrJr  SSKJr  SSKJr  SSKJrJrJrJ r J!r!J"r"J#r#J$r$  SSK%J&r&  \RN                  " \(5      r) " S S\RT                  5      r+ " S S\5      r, " S S\5      r- " S S\"5      r. " S S\5      r/ " S S\ 5      r0 " S  S!\!5      r1/ S"Qr2g)#zPyTorch Starcoder2 model.    )CallableN)nn   )ACT2FN)CacheDynamicCache)create_causal_mask!create_sliding_window_causal_mask)FlashAttentionKwargs)BaseModelOutputWithPast)ALL_ATTENTION_FUNCTIONS)Unpack)TransformersKwargslogging)merge_with_config_defaults)capture_outputs   )MistralAttentionMistralDecoderLayerMistralForCausalLM MistralForSequenceClassificationMistralForTokenClassificationMistralModelapply_rotary_pos_embeager_attention_forward   )Starcoder2Configc                   v   ^  \ rS rSrS\4U 4S jjrS\\R                     S-  S\R                  4S jr	Sr
U =r$ )	Starcoder2MLP4   configc                 D  > [         TU ]  5         UR                  n[        R                  " X!R
                  UR                  S9U l        [        R                  " UR
                  X!R                  S9U l        [        UR                     U l        UR                  U l        g )Nbias)super__init__hidden_sizer   Linearintermediate_sizeuse_biasc_fcc_projr   
hidden_actactresidual_dropout)selfr!   	embed_dim	__class__s      ڂ/root/GenerationalWealth/GenerationalWealth/venv/lib/python3.13/site-packages/transformers/models/starcoder2/modular_starcoder2.pyr&   Starcoder2MLP.__init__5   sq    &&	IIi)A)AX	ii 8 8)//Z&++, & 7 7    hidden_statesNreturnc                     U R                  U5      nU R                  U5      nU R                  U5      n[        R                  R                  XR                  U R                  S9nU$ )Nptraining)r+   r.   r,   r   
functionaldropoutr/   r;   )r0   r6   s     r3   forwardStarcoder2MLP.forward=   sX    		-0/M2--m?T?T_c_l_l-mr5   )r.   r+   r,   r/   )__name__
__module____qualname____firstlineno__r   r&   tupletorchFloatTensorr>   __static_attributes____classcell__r2   s   @r3   r   r   4   s>    8/ 8U5+<+<%=%D IZIZ  r5   r   c                   ,  ^  \ rS rSrSS\S\S-  4U 4S jjjr SS\R                  S\	\R                  \R                  4   S\R                  S-  S	\
S-  S
\\   S\	\R                  \R                  S-  \	\R                     S-  4   4S jjrSrU =r$ )Starcoder2AttentionE   Nr!   	layer_idxc                 r  > [         TU ]  XS9  UR                  U l        [        R                  " UR
                  UR                  U R                  -  UR                  S9U l	        [        R                  " UR
                  UR                  U R                  -  UR                  S9U l        [        R                  " UR
                  UR                  U R                  -  UR                  S9U l        [        R                  " UR                  U R                  -  UR
                  UR                  S9U l        g )Nr!   rM   r#   )r%   r&   r/   r   r(   r'   num_attention_headshead_dimr*   q_projnum_key_value_headsk_projv_projo_projr0   r!   rM   r2   s      r3   r&   Starcoder2Attention.__init__F   s    < & 7 7ii 2 2F4N4NQUQ^Q^4^eketetuii 2 2F4N4NQUQ^Q^4^eketetuii 2 2F4N4NQUQ^Q^4^eketetuii : :T]] JFL^L^eketetur5   r6   position_embeddingsattention_maskpast_key_valueskwargsr7   c           
         UR                   S S n/ UQSPU R                  P7nU R                  U5      R                  U5      R	                  SS5      nU R                  U5      R                  U5      R	                  SS5      n	U R                  U5      R                  U5      R	                  SS5      n
Uu  p[        XX5      u  pUb  UR                  XU R                  5      u  p[        R                  " U R                  R                  [        5      nU" U UU	U
U4U R                  (       d  SOU R                   U R"                  [%        U R                  SS 5      S.UD6u  pUR&                  " / UQSP76 R)                  5       nU R+                  U5      n[,        R.                  R1                  XR2                  U R                  S9nX4$ )Nr   r   g        sliding_window)r=   scalingr_   r9   )shaperQ   rR   view	transposerT   rU   r   updaterM   r   get_interfacer!   _attn_implementationr   r;   attention_dropoutr`   getattrreshape
contiguousrV   r   r<   r=   r/   )r0   r6   rY   rZ   r[   r\   input_shapehidden_shapequery_states
key_statesvalue_statescossinattention_interfaceattn_outputattn_weightss                   r3   r>   Starcoder2Attention.forwardN   s    $))#2.88b8$--8{{=166|DNNqRST[[/44\BLLQPQR
{{=166|DNNqRST&#7RU#[ &'6'='=jX\XfXf'g$J(?(M(MKK,,.E)
 %8
%
  $}}C$2H2HLL"4;;0@$G
%
 
%
! "));;;;FFHkk+.mm++004== , 
 ((r5   )rT   rV   rR   r/   rU   )N)r@   rA   rB   rC   r   intr&   rE   TensorrD   r   r   r   r>   rG   rH   rI   s   @r3   rK   rK   E   s    v/ vC$J v v )-+)||+) #5<<#=>+) t+	+)
 +) -.+) 
u||U\\D0%2E2LL	M+) +)r5   rK   c                   4   ^  \ rS rSrS\S\4U 4S jjrSrU =r$ )Starcoder2DecoderLayer|   r!   rM   c                   > [         TU ]  X5        [        XS9U l        [	        U5      U l        [        R                  " UR                  UR                  S9U l
        [        R                  " UR                  UR                  S9U l        g )NrO   eps)r%   r&   rK   	self_attnr   mlpr   	LayerNormr'   norm_epsiloninput_layernormpost_attention_layernormrW   s      r3   r&   Starcoder2DecoderLayer.__init__}   sf    +,FP (!||F,>,>FDWDWX(*V5G5GVM`M`(a%r5   )r   r   r   r~   )	r@   rA   rB   rC   r   rv   r&   rG   rH   rI   s   @r3   ry   ry   |   s     b/ bC b br5   ry   c                      ^  \ rS rSrS\4U 4S jjr\\      SS\R                  S-  S\R                  S-  S\R                  S-  S\S-  S	\R                  S-  S
\S-  S\\   S\\-  4S jj5       5       rSrU =r$ )Starcoder2Model   r!   c           	      <  > [         TU ]  U5        [        R                  " [	        UR
                  5       Vs/ s H  n[        X5      PM     sn5      U l        [        R                  " UR                  UR                  S9U l        UR                  U l        g s  snf )Nr|   )r%   r&   r   
ModuleListrangenum_hidden_layersry   layersr   r'   r   normembedding_dropoutrW   s      r3   r&   Starcoder2Model.__init__   sy     mmHMfNfNfHghHg9#F6Hgh
 LL!3!39L9LM	!'!9!9 is   BN	input_idsrZ   position_idsr[   inputs_embeds	use_cacher\   r7   c           
         US L US L-  (       a  [        S5      eUc  U R                  U5      nU(       a  Uc  [        U R                  S9nUcU  Ub  UR	                  5       OSn[
        R                  " UR                  S   UR                  S9U-   nUR                  S5      nU R                  R                  c  [        O[        n	U	" U R                  UUUUS9n
Un[        R                  R                  XR                   U R"                  S9nU R%                  XS9nU R&                  S U R                  R(                    H  nU" U4U
UUUUS	.UD6nM     U R+                  U5      n[-        UU(       a  US
9$ S S
9$ )Nz:You must specify exactly one of input_ids or inputs_embeds)r!   r   r   )device)r!   r   rZ   r[   r   r9   )r   )rZ   r   r[   r   rY   )last_hidden_stater[   )
ValueErrorembed_tokensr   r!   get_seq_lengthrE   arangera   r   	unsqueezer_   r	   r
   r   r<   r=   r   r;   
rotary_embr   r   r   r   )r0   r   rZ   r   r[   r   r   r\   past_seen_tokensmask_functioncausal_maskr6   rY   decoder_layers                 r3   r>   Starcoder2Model.forward   s    -t";<YZZ  --i8M0*$++>OCRC^==?de <<(;(;A(>}G[G[\_ooL'11!4L.2kk.H.H.P*Vw#;;')+%
 &--33dmm . 
 #oomoW![[)H4;;+H+HIM)*) /#$7 M J 		-0&+/8O
 	
>B
 	
r5   )r   r   r   )NNNNNN)r@   rA   rB   rC   r   r&   r   r   rE   
LongTensorrw   r   rF   boolr   r   rD   r   r>   rG   rH   rI   s   @r3   r   r      s    :/ :   .2.204(,26!%7
##d*7
 t+7
 &&-	7

 7
 ((4/7
 $;7
 +,7
 
(	(7
   7
r5   r   c                       \ rS rSrSrg)Starcoder2ForCausalLM    Nr@   rA   rB   rC   rG   r   r5   r3   r   r          r5   r   c                       \ rS rSrSrg)#Starcoder2ForSequenceClassification   r   Nr   r   r5   r3   r   r      r   r5   r   c                       \ rS rSrSrg) Starcoder2ForTokenClassification   r   Nr   r   r5   r3   r   r      r   r5   r   )r   r   Starcoder2PreTrainedModelr   r   )3__doc__collections.abcr   rE   r   activationsr   cache_utilsr   r   masking_utilsr	   r
   modeling_flash_attention_utilsr   modeling_outputsr   modeling_utilsr   processing_utilsr   utilsr   r   utils.genericr   utils.output_capturingr   mistral.modeling_mistralr   r   r   r   r   r   r   r   configuration_starcoder2r   
get_loggerr@   loggerModuler   rK   ry   r   r   r   r   __all__r   r5   r3   <module>r      s   &   $   ! . R B 7 5 & 0 7 5	 	 	 7 
		H	%BII "4)* 4)nb0 bB
l B
J	. 		*J 		'D 	r5   