
    Z j                     H   S SK r SSKJr  SSKJr  SSKJr  SSKJr  SSK	J
r
  SSKJrJr  S	S
KJrJrJrJr  S	SKJr  SSKJr  SSKJr  \R2                  " \5      rSr " S S\5      r " S S\5      r " S S\5      r " S S\5      r  " S S\5      r! " S S\5      r"/ SQr#g)    N   )Cache)FlashAttentionKwargs)GradientCheckpointingLayer)CausalLMOutputWithPast)Unpack)TransformersKwargslogging   )GlmAttentionGlmForCausalLMGlmForSequenceClassificationGlmForTokenClassification)Phi3MLP   )
Glm4Config)Glm4RMSNormzTHUDM/GLM-4-9B-0414c                       \ rS rSrSrg)Glm4MLP#    N__name__
__module____qualname____firstlineno____static_attributes__r       v/root/GenerationalWealth/GenerationalWealth/venv/lib/python3.13/site-packages/transformers/models/glm4/modular_glm4.pyr   r   #       r   r   c                   T  ^  \ rS rSrS\S\4U 4S jjr     SS\R                  S\R                  S-  S\R                  S-  S	\
S-  S
\S-  S\\R                  \R                  4   S-  S\\   S\\R                  \\R                  \R                  4   S-  4   4S jjrSrU =r$ )Glm4DecoderLayer'   config	layer_idxc                   > [         TU ]  5         UR                  U l        [        XS9U l        [        U5      U l        [        UR                  UR                  S9U l	        [        UR                  UR                  S9U l
        [        UR                  UR                  S9U l        [        UR                  UR                  S9U l        g )N)r$   r%   )eps)super__init__hidden_sizeGlm4Attention	self_attnr   mlpr   rms_norm_epsinput_layernormpost_attention_layernormpost_self_attn_layernormpost_mlp_layernorm)selfr$   r%   	__class__s      r   r)   Glm4DecoderLayer.__init__(   s    !--&fJ6?*6+=+=6CVCVW(3F4F4FFL_L_(`%(3F4F4FFL_L_(`%"-f.@.@fFYFY"Zr   Nhidden_statesattention_maskposition_idspast_key_values	use_cacheposition_embeddingskwargsreturnc           
          UnU R                  U5      nU R                  " SUUUUUUS.UD6u  pU R                  U5      nX-   nUnU R                  U5      nU R	                  U5      nU R                  U5      nX-   nU$ )N)r6   r7   r8   r9   r:   r;   r   )r/   r,   r1   r0   r-   r2   )
r3   r6   r7   r8   r9   r:   r;   r<   residual_s
             r   forwardGlm4DecoderLayer.forward3   s     !,,];>> 
')%+ 3
 
 55mD 0 55mD///> 0r   )r*   r/   r-   r0   r2   r1   r,   )NNNFN)r   r   r   r   r   intr)   torchTensor
LongTensorr   booltupler   r   FloatTensorrA   r   __classcell__r4   s   @r   r"   r"   '   s    	[z 	[c 	[ /304(,!&HL|| t+ &&-	
  $; #5<<#=>E -. 
u  %(9(95;L;L(L"MPT"TT	U r   r"   c                       \ rS rSrSrg)r+   U   r   Nr   r   r   r   r+   r+   U   r    r   r+   c                   @   ^  \ rS rSrS\\   S\\-  4U 4S jjrSr	U =r
$ )Glm4ForCausalLMY   super_kwargsr=   c                 $   > [         TU ]  " S0 UD6$ )a  
labels (`torch.LongTensor` of shape `(batch_size, sequence_length)`, *optional*):
    Labels for computing the masked language modeling loss. Indices should either be in `[0, ...,
    config.vocab_size]` or -100 (see `input_ids` docstring). Tokens with indices set to `-100` are ignored
    (masked), the loss is only computed for the tokens with labels in `[0, ..., config.vocab_size]`.

Example:

```python
>>> from transformers import AutoTokenizer, Glm4ForCausalLM

>>> model = Glm4ForCausalLM.from_pretrained("THUDM/GLM-4-9B-0414")
>>> tokenizer = AutoTokenizer.from_pretrained("THUDM/GLM-4-9B-0414")

>>> prompt = "Hey, are you conscious? Can you talk to me?"
>>> inputs = tokenizer(prompt, return_tensors="pt")

>>> # Generate
>>> generate_ids = model.generate(inputs.input_ids, max_length=30)
>>> tokenizer.batch_decode(generate_ids, skip_special_tokens=True, clean_up_tokenization_spaces=False)[0]
"Hey, are you conscious? Can you talk to me?\nI'm not conscious, but I can talk to you."
```r   )r(   rA   )r3   rQ   r4   s     r   rA   Glm4ForCausalLM.forwardZ   s    4 w...r   r   )r   r   r   r   r   r	   rH   r   rA   r   rJ   rK   s   @r   rO   rO   Y   s+    /12/ 
'	'/ /r   rO   c                       \ rS rSrSrg)Glm4ForSequenceClassificationw   r   Nr   r   r   r   rU   rU   w   r    r   rU   c                       \ rS rSrSrg)Glm4ForTokenClassification{   r   Nr   r   r   r   rX   rX   {   r    r   rX   )Glm4PreTrainedModel	Glm4ModelrO   rU   rX   )$rD   cache_utilsr   modeling_flash_attention_utilsr   modeling_layersr   modeling_outputsr   processing_utilsr   utilsr	   r
   glm.modeling_glmr   r   r   r   phi3.modeling_phi3r   configuration_glm4r   modeling_glm4r   
get_loggerr   logger_CHECKPOINT_FOR_DOCr   r"   r+   rO   rU   rX   __all__r   r   r   <module>rj      s        B 9 6 & 0 t t ( * & 
		H	%+ 	g 	+1 +\	L 	/n /<	$@ 		!: 	r   