
    Z jY                        S SK r S SKJr  SSKJrJr  SSKJrJr  SSK	J
r
  SSKJr  SSKJrJrJr  S	S
KJr  S	SKJrJrJrJr  S	SKJrJr  \R6                  " \5      r\" SS9\ " S S\5      5       5       r " S S\5      r " S S\5      r  " S S\5      r! " S S\5      r" " S S\
5      r# " S S\5      r$ " S S\5      r%/ SQr&g)     N)strict   )CacheDynamicCache)create_causal_mask!create_sliding_window_causal_mask)BaseModelOutputWithPast)Unpack)TransformersKwargsauto_docstringlogging   )LlamaConfig)LlamaDecoderLayerLlamaForCausalLM
LlamaModelLlamaPreTrainedModel)Qwen2AttentionQwen2RotaryEmbeddingzfacebook/cwm)
checkpointc                     ^  \ rS rSr% SrSrSr\\S'   Sr	\\S'   Sr
\\S	'   S
r\\S'   Sr\\S'   Sr\\S'   Sr\\S'   Sr\\S'   Sr\\S'   Sr\\S'   Sr\\S'   Sr\\S'   Sr\S-  \S'   Sr\\\   -  S-  \S'   Sr\\S '   S!r\\S"'   S#r\\-  \S$'   S%r\\S&'   S!r\\S''   Sr\ S-  \S('   S)r!\\S*'   Sr"\\   S-  \S+'   \#" 5       r$U 4S, jr%S-r&U =r'$ ).	CwmConfig%   cwm    .Ai  
vocab_sizei   hidden_sizei T  intermediate_size@   num_hidden_layers0   num_attention_heads   num_key_value_heads   head_dimsilu
hidden_acti   max_position_embeddingsg{Gz?initializer_rangegh㈵>rms_norm_epsT	use_cacheNpad_token_ideos_token_idi  bos_token_idFtie_word_embeddingsg        attention_dropout   pretraining_tpmlp_biasrope_parameters    sliding_windowlayer_typesc                   > U R                   c  SSSSSSS.U l         U R                  c7  Sn[        U R                  5       Vs/ s H  nX2-  S	:X  a  S
OSPM     snU l        U R                  (       a  [        U R                  5      OS U l        [        U R                  5      U l        U R                  b  U R                  O/ SQU l        [        TU ]$  " S0 UD6  g s  snf )Nr   g      0@g      @g      ?r6   llama3)
rope_thetafactorhigh_freq_factorlow_freq_factor original_max_position_embeddings	rope_type   r   full_attentionsliding_attention)i i i	  )
r5   r8   ranger    r7   intlistr.   super__post_init__)selfkwargswindow_patterni	__class__s       t/root/GenerationalWealth/GenerationalWealth/venv/lib/python3.13/site-packages/transformers/models/cwm/modular_cwm.pyrI   CwmConfig.__post_init__D   s    ')$'#&48%$D  #N t556 6A '(&8A&=!DWW6 D
 ;?:M:Mc$"5"56SW 0 01151B1B1ND--Tl'' s   C)r.   r8   r5   r7   )(__name__
__module____qualname____firstlineno__
model_typedefault_thetar   rF   __annotations__r   r   r    r"   r$   r&   r(   strr)   r*   floatr+   r,   boolr-   r.   rG   r/   r0   r1   r3   r4   r5   dictr7   r8   AttributeErrorattention_biasrI   __static_attributes____classcell__rN   s   @rO   r   r   %   s'    JMJK"s"s!!  HcJ#)S)#u#L%It#L#*#+/L#S	/D(/L# %%%(us{(NCHd#'OTD['NC$(KcT!(#%N( (    r   c                       \ rS rSrSrg)CwmRotaryEmbedding]   rD   NrQ   rR   rS   rT   r^   rD   ra   rO   rc   rc   ]       ra   rc   c                   4   ^  \ rS rSrS\S\4U 4S jjrSrU =r$ )CwmAttentiona   config	layer_idxc                   > [         TU ]  XS9  [        R                  R	                  UR
                  UR                  U R                  -  SS9U l        [        R                  R	                  UR
                  UR                  U R                  -  SS9U l
        [        R                  R	                  UR
                  UR                  U R                  -  SS9U l        g )Nrj   rk   F)bias)rH   __init__torchnnLinearr   r"   r&   q_projr$   k_projv_projrJ   rj   rk   rN   s      rO   ro   CwmAttention.__init__b   s    <hhoof&8&8&:T:TW[WdWd:dkpoqhhoof&8&8&:T:TW[WdWd:dkpoqhhoof&8&8&:T:TW[WdWd:dkpoqra   )rt   rs   ru   	rQ   rR   rS   rT   r   rF   ro   r^   r_   r`   s   @rO   rh   rh   a   s    ry rS r rra   rh   c                   4   ^  \ rS rSrS\S\4U 4S jjrSrU =r$ )CwmDecoderLayeri   rj   rk   c                 <   > [         TU ]  XS9  [        XS9U l        g )Nrm   )rH   ro   rh   	self_attnrv   s      rO   ro   CwmDecoderLayer.__init__j   s    <%VIra   )r}   rx   r`   s   @rO   rz   rz   i   s    Jy JS J Jra   rz   c                       \ rS rSrSrg)CwmPreTrainedModelo   rD   Nre   rD   ra   rO   r   r   o   rf   ra   r   c                       \ rS rSrSrg)CwmModelOutputWithPasts   rD   Nre   rD   ra   rO   r   r   s   rf   ra   r   c                      ^  \ rS rSr\rS\4U 4S jjr      SS\R                  S-  S\R                  S-  S\R                  S-  S\
S-  S	\R                  S-  S
\S-  S\\   S\4S jjrSrU =r$ )CwmModelw   rj   c           	         > [         TU ]  U5        [        R                  R	                  [        UR                  5       Vs/ s H  n[        X5      PM     sn5      U l        g s  snf )N)	rH   ro   rp   rq   
ModuleListrE   r    rz   layersrv   s      rO   ro   CwmModel.__init__z   sM     hh))AFvG_G_A`aA`I_V/A`a
as   A#N	input_idsattention_maskposition_idspast_key_valuesinputs_embedsr,   rK   returnc           	         US L US L-  (       a  [        S5      eUc  U R                  U5      nU(       a  Uc  [        U R                  S9nUcU  Ub  UR	                  5       OSn[
        R                  " UR                  S   UR                  S9U-   nUR                  S5      n[        U=n	[        5      (       d9  U R                  UUUUS.n
U
R                  5       n[        S
0 U
D6[        S
0 UD6S.n	UnU R                  X5      n[!        U R"                  S U R                  R$                   5       H,  u  pU" U4XR                  R&                  U      UUUS.UD6nM.     U R)                  U5      n[+        UUS	9$ )Nz:You must specify exactly one of input_ids or inputs_embeds)rj   r   r2   )device)rj   r   r   r   r   )rB   rC   )r   r   r   position_embeddings)last_hidden_stater   rD   )
ValueErrorembed_tokensr   rj   get_seq_lengthrp   arangeshaper   	unsqueeze
isinstancer[   copyr   r   
rotary_emb	enumerater   r    r8   normr   )rJ   r   r   r   r   r   r,   rK   past_seen_tokenscausal_mask_mappingmask_kwargssliding_mask_kwargshidden_statesr   rM   decoder_layers                   rO   forwardCwmModel.forward   s    -t";<YZZ *.*;*;I*FM0*$++>OCRC^==?de <<(;(;A(>}G[G[\_ooL'11!4L?-FF++!."0#2 ,K #."2"2"4 #5"C{"C%F%]I\%]#
 &"oomJ )$++6U8U8U*V WA)2;;3J3J13MN) /$7 M !X 		-0%++
 	
ra   )r   )NNNNNN)rQ   rR   rS   rT   r   config_classro   rp   
LongTensorTensorr   FloatTensorrZ   r
   r   r   r   r^   r_   r`   s   @rO   r   r   w   s    L
y 
 .2.204(,26!%8
##d*8
 t+8
 &&-	8

 8
 ((4/8
 $;8
 +,8
 
 8
 8
ra   r   c                       \ rS rSrSrg)CwmForCausalLM   rD   Nre   rD   ra   rO   r   r      rf   ra   r   )r   r   r   r   )'rp   huggingface_hub.dataclassesr   cache_utilsr   r   masking_utilsr   r   modeling_outputsr	   processing_utilsr
   utilsr   r   r   llama.configuration_llamar   llama.modeling_llamar   r   r   r   qwen2.modeling_qwen2r   r   
get_loggerrQ   loggerr   rc   rh   rz   r   r   r   r   __all__rD   ra   rO   <module>r      s      . . R 7 & @ @ 3  H 
		H	% >*3( 3(  +3(l	- 	r> rJ' J	- 		4 	A
z A
H	% 	ra   