
    Z jO                     v   S r SSKrSSKrSSKJr  SSKJr  SSKJr  SSK	J
r
Jr  SSKJr  SS	KJr  SS
KJr  SSKJrJr  SSKJr  SSKJrJr  SSKJr  \R8                  " \5      rS\S\S\R@                  4S jr!S\R@                  S\R@                  4S jr"S\R@                  S\R@                  S\R@                  S\R@                  4S jr# " S S\RH                  5      r% " S S\RH                  5      r& " S S\5      r'\ " S  S!\5      5       r(\ " S" S#\(5      5       r)\" S$S%9 " S& S'\(\5      5       r*/ S(Qr+g))zPyTorch CodeGen model.    N)nn   )initialization)ACT2FN)CacheDynamicCache)GenerationMixin)create_causal_mask)GradientCheckpointingLayer)BaseModelOutputWithPastCausalLMOutputWithPast)PreTrainedModel)auto_docstringlogging   )CodeGenConfignum_posdimreturnc           	         SS[         R                  " SUS[         R                  S9U-  -  -  n[         R                  " S[         R                  " U [         R                  S9R	                  5       U5      R	                  5       n[         R
                  " [         R                  " U5      [         R                  " U5      4SS9$ )	Ng      ?i'  r      )dtypezi , j -> i jr   r   )torcharangeint64einsumfloatcatsincos)r   r   inv_freqsinusoid_inps       }/root/GenerationalWealth/GenerationalWealth/venv/lib/python3.13/site-packages/transformers/models/codegen/modeling_codegen.pycreate_sinusoidal_positionsr%   (   s    eQQekk JS PQRH<<WEKK0X0^0^0`bjkqqsL99eii-uyy/FGQOO    xc                     U S S 2S S 2S S 2S S S24   nU S S 2S S 2S S 2SS S24   n[         R                  " U* U4SS9n U R                  S5      $ )Nr   r   r   )r   stackflatten)r'   x1x2s      r$   rotate_every_twor/   /   sS    	
1aCaC<B	
1aADqD=	BbS"I2&A99R=r&   tensorr    r!   c                     [         R                  " US S 2S S 2S S S 24   SS5      n[         R                  " US S 2S S 2S S S 24   SS5      nX-  [        U 5      U-  -   $ )Nr   r   )r   repeat_interleaver/   )r0   r    r!   s      r$   apply_rotary_pos_embr3   7   s\    

!
!#aD!m"4a
;C

!
!#aD!m"4a
;CL-f5;<<r&   c                   t  ^  \ rS rSrSU 4S jjrS rS r SS jr     SS\R                  S-  S\
S-  S	\R                  S-  S
\R                  S-  S\S-  S\S-  S\\R                  \\R                     4   \\R                  \\R                     \\R                  S4   4   -  S-  4S jjrSrU =r$ )CodeGenAttention=   Nc                 2  > [         TU ]  5         UR                  U l        [        R
                  " UR                  5      U l        [        R
                  " UR                  5      U l	        X l
        Uc-  [        R                  SU R                  R                   S35        UR                  U l        UR"                  U l        U R                   U R"                  -  U l        U R$                  U R"                  -  U R                   :w  a&  ['        SU R                    SU R"                   S35      e[(        R*                  " U R$                  5      U l        [        R.                  " U R                   U R                   S-  SS9U l        [        R.                  " U R                   U R                   SS9U l        UR4                  U l        U R4                  =(       d    U R                   U l        U R9                  S	[;        U R                  U R6                  5      SS
9  g )NzInstantiating z without passing a `layer_idx` is not recommended and will lead to errors during the forward call if caching is used. Please make sure to provide a `layer_idx` when creating this class.zEembed_dim must be divisible by num_attention_heads (got `embed_dim`: z and `num_attention_heads`: z).r   F)biasembed_positions)
persistent)super__init__max_position_embeddingsmax_positionsr   Dropout
attn_pdropattn_dropoutresid_pdropresid_dropout	layer_idxloggerwarning_once	__class____name__hidden_size	embed_dimnum_attention_headshead_dim
ValueErrormathsqrt
scale_attnLinearqkv_projout_proj
rotary_dimpos_embd_dimregister_bufferr%   )selfconfigrD   rG   s      r$   r<   CodeGenAttention.__init__>   s   #;;JJv'8'89ZZ(:(:;" !8!8 9 :, ,  ++#)#=#= $*B*BB==4333t~~EWX\XfXfWg h++/+C+C*DBH  ))DMM2		$..$..12D5Q		$..$..uM ++ OO=t~~:4;M;MtO`O`ans 	 	
r&   c                     UR                  UR                  S S X$-  U4-   5      nUR                  UR                  S S S-   UR                  SS  -   5      nU$ )Nr)   r*   )r)   )reshapeshape)rW   r'   n_headdim_headmp_numreshapeds         r$   _split_headsCodeGenAttention._split_heads^   s[    99QWWSb\V-=x,HHI##AGGCRL5$88>>"#;N$NOr&   c                    [        UR                  5      S:X  a$  UR                  SSSSS5      R                  5       nO][        UR                  5      S:X  a#  UR                  SSSS5      R                  5       nO![	        S[        UR                  5       35      eUR                  5       SS	 X#-  4-   nUR                  U5      $ )
z=
Merges attn_head_size dim and num_attn_heads dim into n_ctx
   r   r   r   r      z3Input tensor rank should be one of [4, 5], but is: Nr*   )lenr\   permute
contiguousrM   sizeview)rW   r0   rK   attn_head_size	new_shapes        r$   _merge_headsCodeGenAttention._merge_headsc   s     v||!^^Aq!Q2==?F!#^^Aq!Q/::<FRSVW]WcWcSdRefggKKM#2&*=*N)PP	{{9%%r&   c                    UR                  [        R                  5      nUR                  [        R                  5      n[        R                  " XR	                  SS5      5      nUb  XT-   nXPR
                  -  n[        R                  " SS9" U5      nUR                  UR                  5      nU R                  U5      n[        R                  " XS5      nXe4$ )Nr)   r*   r   )
tor   float32matmul	transposerP   r   Softmaxr   rA   )rW   querykeyvalueattention_maskattn_weightsattn_outputs          r$   _attnCodeGenAttention._attnp   s     'ffU]]#||E==R+@A%'8L#oo5zzb),7#u{{3((6ll<7((r&   hidden_states
layer_pastrx   position_ids	use_cacheoutput_attentionsr   .c                    U R                  U5      nSnUR                  UR                  S S US4-   5      n	U R                  U R                  -  U-  n
[
        R                  " XSS9u  pnU R                  XR                  U R                  US9nU R                  XR                  U R                  US9nU R                  XR                  U R                  US9nUR                  SSSS5      nU R                  nUR                  UR                  :w  a!  UR                  UR                  5      nXl	        X   n[
        R                  " XR                  S   S-  SS9u  nnU R                  b  US S 2S S 2S S 2S U R                  24   nUS S 2S S 2S S 2U R                  S 24   nUS S 2S S 2S S 2S U R                  24   nUS S 2S S 2S S 2U R                  S 24   n[        UUU5      n[        UUU5      n[
        R                  " UU/SS9n[
        R                  " UU/SS9nO[        UUU5      n[        UUU5      nUR                  SSSS5      nUR                  SSSS5      nUb7  UR                  UR                  UR                   5      XR"                  5      u  pU R%                  XX5      u  nnU R'                  UU R                  U R                  5      nU R)                  U5      nU R+                  U5      nUU4$ )	Nre   r)   r   )r_   r   r   r   r   )rR   r[   r\   rL   rK   r   splitra   rg   r9   devicerp   rT   r3   r   updater   rD   r{   rm   rS   rC   )rW   r}   r~   rx   r   r   r   qkvr_   	qkv_split	local_dimru   rw   rv   r9   sincosr    r!   k_rotk_passq_rotq_passrz   ry   s                           r$   forwardCodeGenAttention.forward   s    mmM*KK		#2&" =>	MMD$<$<<F	!KK	"Ec!!%)A)A4==Y_!`%=%=t}}U[\!!%)A)A4==Y_!`aAq)..!!\%8%88-001D1DEO#2  .;;v||B'71'<"ES??&1a!24??!223EAq$//"334F!Q#4T__#445E1aDOO$556F(S9E(S9E))UFO4CIIufo26E&sC5C(S9Ekk!Q1%aAq) !#**366-2E2E+F~~^JC %)JJu5$Q!\''T5M5Mt}}]mmK0((5L((r&   )rA   rJ   r9   rL   rD   r>   rK   rS   rU   rR   rC   rT   rP   NNNNFF)rH   
__module____qualname____firstlineno__r<   ra   rm   r{   r   FloatTensorr   
LongTensorbooltupleTensorr   __static_attributes____classcell__rG   s   @r$   r5   r5   =   s   
@
&$ )8 $(3704!&).@)((4/@) DL@) ))D0	@)
 &&-@) $;@)  $;@) 	ellE%,,//0
eELL15s9J3KK
L	M
	@) @)r&   r5   c                   h   ^  \ rS rSrU 4S jrS\R                  S-  S\R                  4S jrSrU =r	$ )
CodeGenMLP   c                    > [         TU ]  5         UR                  n[        R                  " X15      U l        [        R                  " X5      U l        [        UR                     U l	        [        R                  " UR                  5      U l        g r   )r;   r<   n_embdr   rQ   fc_infc_outr   activation_functionactr?   rB   dropout)rW   intermediate_sizerX   rJ   rG   s       r$   r<   CodeGenMLP.__init__   s`    MM	YYy<
ii 1=&445zz&"4"45r&   r}   Nr   c                     U R                  U5      nU R                  U5      nU R                  U5      nU R                  U5      nU$ r   )r   r   r   r   )rW   r}   s     r$   r   CodeGenMLP.forward   s@    

=1/M2]3r&   )r   r   r   r   )
rH   r   r   r   r<   r   r   r   r   r   r   s   @r$   r   r      s1    6U%6%6%= %BSBS  r&   r   c                   "  ^  \ rS rSrSU 4S jjr     SS\R                  S-  S\S-  S\R                  S-  S\R                  S-  S\	S-  S	\	S-  S
\
\R                     \
\R                  \
\R                  S4   4   -  S-  4S jjrSrU =r$ )CodeGenBlock   Nc                   > [         TU ]  5         UR                  b  UR                  OSUR                  -  n[        R
                  " UR                  UR                  S9U l        [        X5      U l	        [        X15      U l        g )Nre   eps)r;   r<   n_innerr   r   	LayerNormlayer_norm_epsilonln_1r5   attnr   mlp)rW   rX   rD   	inner_dimrG   s       r$   r<   CodeGenBlock.__init__   s_    &,nn&@FNNa&--FW	LLF4M4MN	$V7	i0r&   r}   r~   rx   r   r   r   r   .c           	          UnU R                  U5      nU R                  UUUUUUS9u  pU R                  U5      nX-   U-   nX4$ )N)r}   r~   rx   r   r   r   )r   r   r   )rW   r}   r~   rx   r   r   r   kwargsresidualattn_outputsry   feed_forward_hidden_statess               r$   r   CodeGenBlock.forward   sf     !		-0%)YY'!)%/ &/ &
" &*XXm%<"$AHL**r&   )r   r   r   r   r   )rH   r   r   r   r<   r   r   r   r   r   r   r   r   r   r   r   s   @r$   r   r      s    1 $(3704!&).+((4/+ DL+ ))D0	+
 &&-+ $;+  $;+ 
u||	uU\\59J9JC9O3P%PQ	QTX	X+ +r&   r   c                   J   ^  \ rS rSr% \\S'   SrSrS/rSr	Sr
U 4S jrSrU =r$ )	CodeGenPreTrainedModeli  rX   transformerTr   past_key_valuesc                    > [         TU ]  U5        [        U[        5      (       a@  [        R
                  " UR                  [        UR                  UR                  5      5        g g r   )
r;   _init_weights
isinstancer5   initcopy_r9   r%   r>   rU   )rW   modulerG   s     r$   r   $CodeGenPreTrainedModel._init_weights  sL    f%f.//JJv--/J6K_K_agatat/uv 0r&    )rH   r   r   r   r   __annotations__base_model_prefixsupports_gradient_checkpointing_no_split_modules_skip_keys_device_placement_can_compile_fullgraphr   r   r   r   s   @r$   r   r     s6    %&*#'("3!w wr&   r   c                   2  ^  \ rS rSrU 4S jrS rS r\          SS\R                  S-  S\
S-  S\R                  S-  S	\R                  S-  S
\R                  S-  S\R                  S-  S\S-  S\S-  S\S-  S\S-  S\\-  4S jj5       rSrU =r$ )CodeGenModeli  c           
        > [         TU ]  U5        UR                  U l        UR                  U l        [
        R                  " UR                  U R                  5      U l        [
        R                  " UR                  5      U l
        [
        R                  " [        UR                  5       Vs/ s H  n[        XS9PM     sn5      U l        [
        R                   " U R                  UR"                  S9U l        ['        UR(                  UR*                  UR,                  -  5      U l        SU l        U R1                  5         g s  snf )N)rD   r   F)r;   r<   r   rJ   
vocab_sizer   	Embeddingwter?   
embd_pdropdrop
ModuleListrangen_layerr   hr   r   ln_fminrT   n_ctxrK   gradient_checkpointing	post_init)rW   rX   irG   s      r$   r<   CodeGenModel.__init__  s      ++<< 1 14>>BJJv001	5QWQ_Q_K`aK`aV AK`abLLV5N5NO	f//A[A[1[\&+# 	  bs   /Ec                     U R                   $ r   r   )rW   s    r$   get_input_embeddings!CodeGenModel.get_input_embeddings%  s    xxr&   c                     Xl         g r   r   )rW   new_embeddingss     r$   set_input_embeddings!CodeGenModel.set_input_embeddings(  s    !r&   N	input_idsr   rx   token_type_idsr   inputs_embedsr   r   output_hidden_statesreturn_dictr   c           
         Ub  UOU R                   R                  nU	b  U	OU R                   R                  n	Ub  UOU R                   R                  nU
b  U
OU R                   R                  n
USL USL-  (       a  [        S5      eU R                  (       a/  U R                  (       a  U(       a  [        R                  S5        SnUc  U R                  U5      nU(       a  Uc  [        U R                   S9nUR                  S   nUcU  Ub  UR                  5       OSn[        R                  " UR                  S   UR                   S9U-   nUR#                  S5      n[%        U R                   UUUUS	9nUnUb(  UR'                  S
U5      nU R                  U5      nUU-   nU R)                  U5      nS
XR+                  S
5      4nU(       a  SOSnU	(       a  SOSn[-        U R.                  5       H5  u  nnU	(       a  UU4-   nU" UUUUUUS9nUS   nU(       d  M,  UUS   4-   nM7     U R1                  U5      nUR'                  U5      nU	(       a  UU4-   nU
(       d  [3        S XUU4 5       5      $ [5        UUUUS9$ )au  
inputs_embeds (`torch.FloatTensor` of shape `(batch_size, sequence_length, hidden_dim)`, *optional*):
    Optionally, instead of passing `input_ids` you can choose to directly pass an embedded representation. This
    is useful if you want more control over how to convert *input_ids* indices into associated vectors than the
    model's internal embedding lookup matrix.
Nz:You must specify exactly one of input_ids or inputs_embedszZ`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`...F)rX   r   r   )r   )rX   r   rx   r   r   r)   r   )r~   rx   r   r   r   c              3   .   #    U  H  oc  M  Uv   M     g 7fr   r   ).0vs     r$   	<genexpr>'CodeGenModel.forward.<locals>.<genexpr>  s      cacs   	)last_hidden_stater   r}   
attentions)rX   r   r   r   r   rM   r   trainingrE   rF   r   r   r\   get_seq_lengthr   r   r   	unsqueezer
   rj   r   ri   	enumerater   r   r   r   )rW   r   r   rx   r   r   r   r   r   r   r   r   
seq_lengthpast_seen_tokenscausal_maskr}   token_type_embedsoutput_shapeall_self_attentionsall_hidden_statesr   blockoutputss                          r$   r   CodeGenModel.forward+  s   * 2C1N-TXT_T_TqTq$8$D $++JjJj 	 "+!6IDKK<Q<Q	%0%<k$++BYBY-t";<YZZ&&4==##p "	  HHY/M0*$++>O"((+
CRC^==?de <<(;(;A(>}G[G[\_ooL'11!4L(;;')+%
 &%+00Z@N $ 8),==M		-0J(:(:2(>?$5b4"6BD!$&&)HAu#$58H$H!**)#"3G $AJM  &9WQZM&I# *" 		-0%**<8 1]4D D )<MObc   '+++*	
 	
r&   )r   rJ   r   r   r   rT   r   r   )
NNNNNNNNNN)rH   r   r   r   r<   r   r   r   r   r   r   r   r   r   r   r   r   r   r   s   @r$   r   r     s    "  .2(,37260426!%)-,0#'g
##d*g
 g
 ))D0	g

 ((4/g
 &&-g
 ((4/g
 $;g
  $;g
 #Tkg
 D[g
 
(	(g
 g
r&   r   zM
    The CodeGen Model transformer with a language modeling head on top.
    )custom_introc                   n  ^  \ rS rSrSS0rU 4S jr\            SS\R                  S-  S\	S-  S\R                  S-  S	\R                  S-  S
\R                  S-  S\R                  S-  S\R                  S-  S\S-  S\S-  S\S-  S\S-  S\\R                  -  S\\-  4S jj5       rSrU =r$ )CodeGenForCausalLMi  zlm_head.weightztransformer.wte.weightc                    > [         TU ]  U5        [        U5      U l        [        R
                  " UR                  UR                  5      U l        U R                  5         g r   )
r;   r<   r   r   r   rQ   r   r   lm_headr   )rW   rX   rG   s     r$   r<   CodeGenForCausalLM.__init__  sE     '/yy0A0AB 	r&   Nr   r   rx   r   r   r   labelsr   r   r   r   logits_to_keepr   c                    Ub  UOU R                   R                  nU R                  UUUUUUUU	U
US9
nUS   n[        U[        5      (       a  [        U* S5      OUnU R                  USS2USS24   5      nSnUb)  U R                  " SUXpR                   R                  S.UD6nU(       d  U4USS -   nUb  U4U-   $ U$ [        UUUR                  UR                  UR                  S9$ )a  
inputs_embeds (`torch.FloatTensor` of shape `(batch_size, sequence_length, hidden_dim)`, *optional*):
    Optionally, instead of passing `input_ids` you can choose to directly pass an embedded representation. This
    is useful if you want more control over how to convert *input_ids* indices into associated vectors than the
    model's internal embedding lookup matrix.
labels (`torch.LongTensor` of shape `(batch_size, sequence_length)`, *optional*):
    Labels for language modeling. Note that the labels **are shifted** inside the model, i.e. you can set
    `labels = input_ids` Indices are selected in `[-100, 0, ..., config.vocab_size]` All labels set to `-100`
    are ignored (masked), the loss is only computed for labels in `[0, ..., config.vocab_size]`
N)	r   rx   r   r   r   r   r   r   r   r   )logitsr  r   r   )lossr  r   r}   r   r   )rX   r   r   r   intslicer  loss_functionr   r   r   r}   r   )rW   r   r   rx   r   r   r   r  r   r   r   r   r  r   transformer_outputsr}   slice_indicesr  r  outputs                       r$   r   CodeGenForCausalLM.forward  s   6 &1%<k$++BYBY"..+))%'/!5# / 
 ,A.8B>SV8W8W~ot4]kmA}a,?@A%%pVF{{OeOepiopDY!4QR!88F)-)9TGf$EvE%/??-;;*55
 	
r&   )r  r   )NNNNNNNNNNNr   )rH   r   r   r   _tied_weights_keysr<   r   r   r   r   r   r   r  r   r   r   r   r   r   r   s   @r$   r	  r	    s5    +,DE  .2(,37260426*.!%)-,0#'-.<
##d*<
 <
 ))D0	<

 ((4/<
 &&-<
 ((4/<
   4'<
 $;<
  $;<
 #Tk<
 D[<
 ell*<
 
'	'<
 <
r&   r	  )r	  r   r   ),__doc__rN   r   r    r   r   activationsr   cache_utilsr   r   
generationr	   masking_utilsr
   modeling_layersr   modeling_outputsr   r   modeling_utilsr   utilsr   r   configuration_codegenr   
get_loggerrH   rE   r  r   r%   r/   r3   Moduler5   r   r   r   r   r	  __all__r   r&   r$   <module>r(     s[       & ! . ) / 9 O - 1 
		H	%P P3 P5<< P  = =ELL =u|| =X]XdXd =L)ryy L)` ( +-  +F w_ w w 
) 
 
D 
H
/ H

H
V Kr&   