
    Z j                        S SK r S SKJr  S SK Jr  SSKJrJr  SSKJr  SSK	J
r
Jr  SSKJr  SS	KJr  SS
KJrJr  SSKJr  SSKJr  SSKJr  SSKJrJrJrJrJrJrJ r J!r!J"r"J#r#J$r$  \" SS9\ " S S\5      5       5       r% " S S\ 5      r& " S S\5      r' " S S\#5      r( " S S\5      r) " S S\"5      r* " S S\$5      r+ " S  S!\!5      r, " S" S#\5      r- " S$ S%\5      r. " S& S'\5      r/ " S( S)\5      r0/ S*Qr1g)+    N)strict)nn   )CacheDynamicCache)PreTrainedConfig)create_causal_mask!create_sliding_window_causal_mask)BaseModelOutputWithPast)Unpack)TransformersKwargsauto_docstring)merge_with_config_defaults)capture_outputs   )MistralConfig)Qwen2AttentionQwen2DecoderLayerQwen2ForCausalLMQwen2ForQuestionAnsweringQwen2ForSequenceClassificationQwen2ForTokenClassificationQwen2MLP
Qwen2ModelQwen2PreTrainedModelQwen2RMSNormQwen2RotaryEmbeddingz$mistralai/Ministral-8B-Instruct-2410)
checkpointc                   >    \ rS rSr% SrSrSr\\   S-  \	S'   S r
Srg)MinistralConfig,   al  
Example:

```python
>>> from transformers import MinistralModel, MinistralConfig

>>> # Initializing a Ministral 8B style configuration
>>> configuration = MinistralConfig()

>>> # Initializing a model from the Ministral 8B style configuration
>>> model = MinistralModel(configuration)

>>> # Accessing the model configuration
>>> configuration = model.config
```	ministralNlayer_typesc                     U R                   c  U R                  U l         U R                  c$  U R                  b  SOS/U R                  -  U l        [
        R                  " U 40 UD6  g )Nsliding_attentionfull_attention)num_key_value_headsnum_attention_headsr#   sliding_windownum_hidden_layersr   __post_init__)selfkwargss     ڀ/root/GenerationalWealth/GenerationalWealth/venv/lib/python3.13/site-packages/transformers/models/ministral/modular_ministral.pyr+   MinistralConfig.__post_init__C   sg    ##+'+'?'?D$#'+':':'F#L\ && 'D 	&&t6v6    )r#   r'   )__name__
__module____qualname____firstlineno____doc__
model_typer#   liststr__annotations__r+   __static_attributes__ r0   r.   r    r    ,   s&      J$(KcT!(	7r0   r    c                       \ rS rSrSrg)MinistralMLPO   r;   Nr1   r2   r3   r4   r:   r;   r0   r.   r=   r=   O       r0   r=   c                   0   ^  \ rS rSrS\4U 4S jjrSrU =r$ )MinistralAttentionS   	layer_idxc                   > [         TU ]  X5        [        R                  " UR                  UR
                  U R                  -  SS9U l        [        R                  " UR                  UR                  U R                  -  SS9U l	        [        R                  " UR                  UR                  U R                  -  SS9U l
        g )NF)bias)super__init__r   Linearhidden_sizer(   head_dimq_projr'   k_projv_proj)r,   configrD   	__class__s      r.   rH   MinistralAttention.__init__T   s    +ii 2 2F4N4NQUQ^Q^4^ejkii 2 2F4N4NQUQ^Q^4^ejkii 2 2F4N4NQUQ^Q^4^ejkr0   )rM   rL   rN   )r1   r2   r3   r4   intrH   r:   __classcell__rP   s   @r.   rB   rB   S   s    l# l lr0   rB   c                       \ rS rSrSrg)MinistralRMSNorm\   r;   Nr?   r;   r0   r.   rV   rV   \   r@   r0   rV   c                       \ rS rSrSrg)MinistralDecoderLayer`   r;   Nr?   r;   r0   r.   rY   rY   `   r@   r0   rY   c                       \ rS rSrSrg)MinistralPreTrainedModeld   r;   Nr?   r;   r0   r.   r\   r\   d   r@   r0   r\   c                       \ rS rSrSrg)MinistralRotaryEmbeddingh   r;   Nr?   r;   r0   r.   r_   r_   h   r@   r0   r_   c                     ^  \ rS rSrS\4U 4S jjr\\\      SS\	R                  S-  S\	R                  S-  S\	R                  S-  S\S-  S	\	R                  S-  S
\S-  S\\   S\4S jj5       5       5       rSrU =r$ )MinistralModell   rO   c                 (   > [         TU ]  U5        U ?g )N)rG   rH   has_sliding_layers)r,   rO   rP   s     r.   rH   MinistralModel.__init__m   s     #r0   N	input_idsattention_maskposition_idspast_key_valuesinputs_embeds	use_cacher-   returnc           
         US L US L-  (       a  [        S5      eUc  U R                  U5      nU(       a  Uc  [        U R                  S9nUcU  Ub  UR	                  5       OSn[
        R                  " UR                  S   UR                  S9U-   nUR                  S5      n[        U=n	[        5      (       d)  U R                  UUUUS.n
[        S
0 U
D6[        S
0 U
D6S.n	UnU R                  X5      n[        U R                   S U R                  R"                   5       H-  u  pU" U4XR                  R$                  U      UUUUS.UD6nM/     U R'                  U5      n[)        UU(       a  US	9$ S S	9$ )Nz:You must specify exactly one of input_ids or inputs_embeds)rO   r      )device)rO   rk   rh   rj   ri   )r&   r%   )rh   ri   rj   rl   position_embeddings)last_hidden_staterj   r;   )
ValueErrorembed_tokensr   rO   get_seq_lengthtorcharangeshaperp   	unsqueeze
isinstancedictr	   r
   
rotary_emb	enumeratelayersr*   r#   normr   )r,   rg   rh   ri   rj   rk   rl   r-   past_seen_tokenscausal_mask_mappingmask_kwargshidden_statesrq   idecoder_layers                  r.   forwardMinistralModel.forwardq   s    -t";<YZZ  --i8M0*$++>OCRC^==?de <<(;(;A(>}G[G[\_ooL'11!4L ?-FF ++!."0#2 ,K #5"C{"C%F%U%U#
 &"oomJ )$++6U8U8U*V WA)2;;3J3J13MN) /#$7 M !X 		-0&+/8O
 	
>B
 	
r0   r;   )NNNNNN)r1   r2   r3   r4   r    rH   r   r   r   rv   
LongTensorTensorr   FloatTensorboolr   r   r   r   r:   rS   rT   s   @r.   rb   rb   l   s    $ $   .2.204(,26!%:
##d*:
 t+:
 &&-	:

 :
 ((4/:
 $;:
 +,:
 
!:
    :
r0   rb   c                       \ rS rSrSrg)MinistralForCausalLM   r;   Nr?   r;   r0   r.   r   r      r@   r0   r   c                       \ rS rSrSrg)"MinistralForSequenceClassification   r;   Nr?   r;   r0   r.   r   r      r@   r0   r   c                       \ rS rSrSrg)MinistralForTokenClassification   r;   Nr?   r;   r0   r.   r   r      r@   r0   r   c                       \ rS rSrSrg)MinistralForQuestionAnswering   r;   Nr?   r;   r0   r.   r   r      r@   r0   r   )r    r\   rb   r   r   r   r   )2rv   huggingface_hub.dataclassesr   r   cache_utilsr   r   configuration_utilsr   masking_utilsr	   r
   modeling_outputsr   processing_utilsr   utilsr   r   utils.genericr   utils.output_capturingr   mistral.configuration_mistralr   qwen2.modeling_qwen2r   r   r   r   r   r   r   r   r   r   r   r    r=   rB   rV   rY   r\   r_   rb   r   r   r   r   __all__r;   r0   r.   <module>r      s     .  . 3 R 7 & 7 7 5 9    AB7m 7  C7B	8 	l l	| 		- 		3 		3 	B
Z B
J	+ 		)G 		&A 		$= 	r0   