
    Z j                     (   S SK Jr  S SKrS SKJr  SSKJr  SSKJrJ	r	J
r
JrJr  SSKJr  SS	KJr  \R"                  " \5      rS
r " S S\5      r " S S\5      rS rSS jr " S S\5      r " S S\	5      r " S S\
5      r " S S\5      r/ SQrg)    )OptionalN   )logging   )LlamaAttentionLlamaForCausalLMLlamaForSequenceClassificationLlamaForTokenClassificationLlamaRotaryEmbedding)Phi3MLP   )	GlmConfigzTHUDM/glm-4-9bc                       \ rS rSrSrg)GlmMLP%    N__name__
__module____qualname____firstlineno____static_attributes__r       t/root/GenerationalWealth/GenerationalWealth/venv/lib/python3.13/site-packages/transformers/models/glm/modular_glm.pyr   r   %       r   r   c                   ^    \ rS rSr\   SS\S-  S\S   S\S-  S\S\	4   4S	 jj5       r
S
rg)GlmRotaryEmbedding)   Nconfigdeviceztorch.deviceseq_lenreturnztorch.Tensorc           	      j   U R                   S   nU R                   R                  SS5      n[        U SS5      =(       d    U R                  U R                  -  n[        XT-  5      nSnSU[        R                  " SUS[        R                  S9R                  U[        R                  S	9U-  -  -  nX4$ )
aH  
Computes the inverse frequencies according to the original RoPE implementation
Args:
    config ([`~transformers.PreTrainedConfig`]):
        The model configuration.
    device (`torch.device`):
        The device to use for initialization of the inverse frequencies.
    seq_len (`int`, *optional*):
        The current sequence length. Unused for this type of RoPE.
Returns:
    Tuple of (`torch.Tensor`, `float`), containing the inverse frequencies for the RoPE embeddings and the
    post-processing scaling factor applied to the computed cos/sin (unused in this type of RoPE).

rope_thetapartial_rotary_factorg      ?head_dimNr   r   )dtype)r    r'   )rope_parametersgetgetattrhidden_sizenum_attention_headsinttorcharangeint64tofloat)	r   r    r!   baser%   r&   dimattention_factorinv_freqs	            r   compute_default_rope_parameters2GlmRotaryEmbedding.compute_default_rope_parameters*   s    & %%l3 & 6 6 : :;RTW X6:t4h8J8JfNhNh8h(23 U\\!S!5;;?BB&X]XcXcBdgjjk
 ))r   r   )NNN)r   r   r   r   staticmethodr   r   r-   tupler2   r7   r   r   r   r   r   r   )   sY    #'+/"*D *(* t* 
~u$	%	* *r   r   c                 x    U SSSS24   nU SSSS24   n[         R                  " U* U4SS9R                  S5      $ )	z*Rotates half the hidden dims of the input..r   Nr   r   r4   )r.   stackflatten)xx1x2s      r   rotate_halfrD   K   sJ    	
319B	
319B;;Ryb)11"55r   c                    UR                  U5      nUR                  U5      nUSSUR                  S   S-  24   R                  SSS9nUSSUR                  S   S-  24   R                  SSS9nUR                  S   nU SSU24   U SUS24   pvUSSU24   USUS24   pXb-  [        U5      U-  -   n
X-  [        U5      U-  -   n[        R
                  " X/SS9n
[        R
                  " X/SS9nX4$ )aI  Applies Rotary Position Embedding to the query and key tensors.

Args:
    q (`torch.Tensor`): The query tensor.
    k (`torch.Tensor`): The key tensor.
    cos (`torch.Tensor`): The cosine part of the rotary embedding.
    sin (`torch.Tensor`): The sine part of the rotary embedding.
    unsqueeze_dim (`int`, *optional*, defaults to 1):
        The 'unsqueeze_dim' argument specifies the dimension along which to unsqueeze cos[position_ids] and
        sin[position_ids] so that they can be properly broadcasted to the dimensions of q and k. For example, note
        that cos[position_ids] and sin[position_ids] have the shape [batch_size, seq_len, head_dim]. Then, if q and
        k have the shape [batch_size, heads, seq_len, head_dim], then setting unsqueeze_dim=1 makes
        cos[position_ids] and sin[position_ids] broadcastable to the shapes of q and k. Similarly, if q and k have
        the shape [batch_size, seq_len, heads, head_dim], then set unsqueeze_dim=2.
Returns:
    `tuple(torch.Tensor)` comprising of the query and key tensors rotated using the Rotary Position Embedding.
.Nr<   r   r=   )	unsqueezeshaperepeat_interleaverD   r.   cat)qkcossinunsqueeze_dim
rotary_dimq_rotq_passk_rotk_passq_embedk_embeds               r   apply_rotary_pos_embrV   R   s6   $ --
&C
--
&C c'SYYr]a'''
(
:
:1"
:
EC
c'SYYr]a'''
(
:
:1"
:
EC 2Jc;J;&'3
+;)<6c;J;&'3
+;)<6 {{51C78G{{51C78G ii)r2Gii)r2Gr   c                   >   ^  \ rS rSrSS\S\S-  4U 4S jjjrSrU =r$ )GlmAttentionz   Nr   	layer_idxc                    > [         TU ]  X5        [        R                  " UR                  U R
                  -  UR                  SS9U l        g )NF)bias)super__init__nnLinearr,   r&   r+   o_proj)selfr   rZ   	__class__s      r   r^   GlmAttention.__init__{   s:    +ii : :T]] JFL^L^ejkr   )ra   )N)	r   r   r   r   r   r-   r^   r   __classcell__)rc   s   @r   rX   rX   z   s#    ly lS4Z l lr   rX   c                       \ rS rSrSrg)GlmForCausalLM   r   Nr   r   r   r   rg   rg      r   r   rg   c                       \ rS rSrSrg)GlmForSequenceClassification   r   Nr   r   r   r   rj   rj      r   r   rj   c                       \ rS rSrSrg)GlmForTokenClassification   r   Nr   r   r   r   rm   rm      r   r   rm   )GlmPreTrainedModelGlmModelrg   rj   rm   )r   )typingr   r.   torch.nnr_   utilsr   llama.modeling_llamar   r   r	   r
   r   phi3.modeling_phi3r   configuration_glmr   
get_loggerr   logger_CHECKPOINT_FOR_DOCr   r   rD   rV   rX   rg   rj   rm   __all__r   r   r   <module>r{      s         ) ( 
		H	%& 	W 	*- *D6%Pl> l	% 		#A 		 ; 	r   