
    Z j/                        S SK r S SK Jr  SSKJr  SSKJr  SSKJr  SSKJ	r	  SSK
JrJr  SS	KJrJr  S
SKJrJrJrJrJrJr  S
SKJr  SSKJr  \R6                  " \5      r " S S\5      r " S S\R>                  5      r  " S S\R>                  5      r! " S S\5      r" " S S\5      r# " S S\5      r$ " S S\5      r% " S S\5      r&/ SQr'g)     N)nn   )ACT2FN)Cache)BaseModelOutputWithPooling)Unpack)auto_docstringlogging)can_return_tuplemerge_with_config_defaults   )LlavaCausalLMOutputWithPastLlavaForConditionalGeneration
LlavaModelLlavaModelOutputWithPastLlavaPreTrainedModelTransformersKwargs)MistralRMSNorm   )Mistral3Configc                       \ rS rSrSrg)Mistral3RMSNorm(    N__name__
__module____qualname____firstlineno____static_attributes__r       ~/root/GenerationalWealth/GenerationalWealth/venv/lib/python3.13/site-packages/transformers/models/mistral3/modular_mistral3.pyr   r   (       r!   r   c                      ^  \ rS rSrSrS\4U 4S jjrS\R                  S\R                  S\R                  4S jr	S	r
U =r$ )
Mistral3PatchMerger,   z4
Learned merging of spatial_merge_size ** 2 patches
configc                   > [         TU ]  5         Xl        UR                  R                  nUR
                  U l        U R                  R                  R                  U l        [        R                  " X R
                  S-  -  USS9U l	        g )Nr   Fbias)
super__init__r'   vision_confighidden_sizespatial_merge_size
patch_sizer   Linearmerging_layer)selfr'   r.   	__class__s      r"   r,   Mistral3PatchMerger.__init__1   sn    **66"(";";++33>>YY{5L5La5O'OQ\chir!   image_featuresimage_sizesreturnc                    U Vs/ s H&  o3S   U R                   -  US   U R                   -  4PM(     nnU VVs/ s H	  u  pEXE-  PM     nnnUR                  S   n/ n[        UR                  U5      5       H  u  pX)   u  pEU
R	                  XEU5      R                  SSS5      R                  S5      n[        R                  R                  R                  XR                  U R                  S9nUR	                  XpR                  S-  -  S5      R                  5       nUR                  U5        M     [        R                  " USS9nU R                  U5      nU$ s  snf s  snnf )Nr   r   r   )kernel_sizestridedim)r0   shape	enumeratesplitviewpermute	unsqueezetorchr   
functionalunfoldr/   tappendcatr2   )r3   r6   r7   
image_sizehwtokens_per_imagedpermuted_tensorimage_indeximage_tokens
image_gridgrids                r"   forwardMistral3PatchMerger.forward:   s[   cn
cnU_]doo-z!}/OPcn 	 
 /::kdaAEk:  $)2>3G3GHX3Y)Z%K+DA%**13;;Aq!DNNqQJ88&&--(?(?H_H_ . D 99Q!8!8!!;;R@BBDD""4( *[ ?:++N;)
 ;s
   -EE!)r'   r2   r0   r/   )r   r   r   r   __doc__r   r,   rE   TensorrU   r    __classcell__r4   s   @r"   r%   r%   ,   sD    j~ jell  RWR^R^  r!   r%   c                   j   ^  \ rS rSrS\4U 4S jjrS\R                  S\R                  4S jrSr	U =r
$ )Mistral3MultiModalProjectorR   r'   c                   > [         TU ]  5         [        UR                  R                  UR
                  R                  S9U l        [        U5      U l	        [        UR                  [        5      (       a  SO[        UR                  5      U l        [        R                   " UR                  R                  U R                  -  UR
                  R                  UR"                  S9U l        [&        UR(                     U l        [        R                   " UR
                  R                  UR
                  R                  UR"                  S9U l        g )N)epsr   r)   )r+   r,   r   r-   r.   text_configrms_norm_epsnormr%   patch_merger
isinstancevision_feature_layerintlennum_feature_layersr   r1   multimodal_projector_biaslinear_1r   projector_hidden_actactlinear_2)r3   r'   r4   s     r"   r,   $Mistral3MultiModalProjector.__init__S   s    #F$8$8$D$D&J\J\JiJij	/7 F77==A3vGbGbCc 	 		  ,,t/F/FF**11

 &556		**F,>,>,J,JQWQqQq
r!   r6   r7   c                     U R                  U5      nU R                  X5      nU R                  U5      nU R                  U5      nU R	                  U5      nU$ N)rb   rc   rj   rl   rm   )r3   r6   r7   hidden_statess       r"   rU   #Mistral3MultiModalProjector.forwarde   sP    >2**>Gn5/m4r!   )rl   rj   rm   rb   rh   rc   )r   r   r   r   r   r,   rE   rX   rU   r    rY   rZ   s   @r"   r\   r\   R   s/    
~ 
$ell   r!   r\   c                       \ rS rSrSrg)Mistral3CausalLMOutputWithPastn   r   Nr   r   r!   r"   rt   rt   n   r#   r!   rt   c                       \ rS rSrSrg)Mistral3ModelOutputWithPastr   r   Nr   r   r!   r"   rw   rw   r   r#   r!   rw   c                       \ rS rSrSrg)Mistral3PreTrainedModelv   r   Nr   r   r!   r"   rz   rz   v   r#   r!   rz   c                      \ rS rSr\\\" SS9  SS\R                  S\R                  S\
\\
   -  \\
   -  S-  S\S-  S	\\   S
\\-  4S jj5       5       5       r\\\         SS\R$                  S-  S\R                  S-  S\R                  S-  S\R$                  S-  S\S-  S\R                  S-  S\
\\
   -  \\
   -  S-  S\S-  S\R                  S-  S	\\   S
\\-  4S jj5       5       5       rSrg)Mistral3Modelz   zWObtains image last hidden states from the vision tower and apply multimodal projection.)custom_introNpixel_valuesr7   re   output_hidden_stateskwargsr8   c                    UR                  5        VVs0 s H  u  pgUc  M
  Xg_M     nnnU R                  " U4USSS.UD6n[        U[        5      (       a  UR                  U   n	O3U V
s/ s H  oR                  U
   PM     nn
[
        R                  " USS9n	U R                  U	R                  S5      U5      nU R                  R                  U R                  R                  -  n[
        R                  " X,R                  S9U-  R                  SS9R                  5       n[
        R                   " UR                  S5      U5      nXl        U$ s  snnf s  sn
f )NT)r7   r   return_dictr:   r=   r   )device)itemsvision_towerrd   rf   rq   rE   rJ   multi_modal_projectorsqueezer0   r'   r/   	as_tensorr   prodtolistrA   pooler_output)r3   r   r7   re   r   r   kvimage_outputsselected_image_feature	layer_idxhs_poolr6   downsample_ratiosplit_sizess                  r"   get_image_features Mistral3Model.get_image_features{   sN    $*<<>C>41Q$!$>C))
#!%	

 
 *C00%2%@%@AU%V"OcdOc)229=OcGd%*YYwB%?"334J4R4RST4UWbc,,77$++:X:XX__[1F1FGK[[aafhaippr 	 ^%;%;A%>L&4#3 D es   	EE*E	input_idsattention_maskposition_idspast_key_valuesinputs_embeds	use_cachec
           	          US L US L-  (       a  [        S5      eUc  U R                  5       " U5      nUbv  U R                  UUU	SS9R                  n[        R
                  " USS9R                  UR                  UR                  5      nU R                  XUS9nUR                  X5      nU R                  " S	UUUUUS.U
D6n[        UR                  UR                  UR                  UR                   Ub  WS9$ S S9$ )
Nz:You must specify exactly one of input_ids or inputs_embedsT)r   re   r7   r   r   r=   )r   r6   )r   r   r   r   r   )last_hidden_stater   rq   
attentionsimage_hidden_statesr   )
ValueErrorget_input_embeddingsr   r   rE   rJ   tor   dtypeget_placeholder_maskmasked_scatterlanguage_modelrw   r   r   rq   r   )r3   r   r   r   r   r   r   re   r   r7   r   r6   special_image_maskoutputss                 r"   rU   Mistral3Model.forward   s@     -t";<YZZ  557	BM#!44)%9' 	 5 
 m  #YY~1=@@AUAUWdWjWjkN!%!:!:~ "; " *889K\M%% 
)%+'
 
 +%77#33!//))2>2J
 	

 QU
 	
r!   r   )NN)	NNNNNNNNN)r   r   r   r   r   r   r	   rE   FloatTensorrX   rf   listboolr   r   tupler   r   
LongTensorr   rw   rU   r    r   r!   r"   r}   r}   z   s   n DH,0!''! \\! "DIoS	9D@	!
 #Tk! +,! 
+	+!   
!F   .215.204(,26CG!%+//
##d*/
 ''$./
 t+	/

 &&-/
 /
 ((4//
 "DIoS	9D@/
 $;/
 \\D(/
 +,/
 
,	,/
    /
r!   r}   c                       \ rS rSr\\\ SS\R                  S\R                  S\
\\
   -  \\
   -  S-  S\\   S\\-  4
S jj5       5       5       r\\\          SS	\R"                  S-  S\R                  S-  S
\R                  S-  S\R"                  S-  S\S-  S\R                  S-  S\R"                  S-  S\S-  S\
\R                  -  S\R                  S-  S\\   S\\-  4S jj5       5       5       rSrg) Mistral3ForConditionalGeneration   Nr   r7   re   r   r8   c                 B    U R                   R                  " SUUUS.UD6$ )N)r   r7   re   r   )modelr   )r3   r   r7   re   r   s        r"   r   3Mistral3ForConditionalGeneration.get_image_features   s3     zz,, 
%#!5
 	
 	
r!   r   r   r   r   r   labelsr   logits_to_keepc                    U R                   " SUUUUUUUU
S.UD6nUS   n[        U	[        5      (       a  [        U	* S5      OU	nU R	                  USS2USS24   5      nSnUb3  U R
                  " SXU R                  R                  R                  S.UD6n[        UUUR                  UR                  UR                  UR                  S9$ )a  
Example:

```python
>>> from PIL import Image
>>> import httpx
>>> from io import BytesIO
>>> from transformers import AutoProcessor, Mistral3ForConditionalGeneration

>>> model = Mistral3ForConditionalGeneration.from_pretrained("mistralai/Mistral-Small-3.1-24B-Instruct-2503")
>>> processor = AutoProcessor.from_pretrained("mistralai/Mistral-Small-3.1-24B-Instruct-2503")

>>> prompt = "<s>[INST][IMG]What is the image?[/INST]"
>>> url = "http://images.cocodataset.org/val2017/000000039769.jpg"
>>> with httpx.stream("GET", url) as response:
...     image = Image.open(BytesIO(response.read()))

>>> inputs = processor(images=image, text=prompt, return_tensors="pt")

>>> # Generate
>>> generate_ids = model.generate(**inputs, max_new_tokens=15)
>>> processor.batch_decode(generate_ids, skip_special_tokens=True, clean_up_tokenization_spaces=False)[0]
"What is the image?The image depicts two cats lying on a pink blanket."
```)r   r   r   r   r   r   r   r7   r   N)logitsr   
vocab_size)lossr   r   rq   r   r   r   )r   rd   rf   slicelm_headloss_functionr'   r`   r   rt   r   rq   r   r   )r3   r   r   r   r   r   r   r   r   r   r7   r   r   rq   slice_indicesr   r   s                    r"   rU   (Mistral3ForConditionalGeneration.forward   s    R ** 

%)%+'#

 

  
8B>SV8W8W~ot4]kmA}a,?@A%% 9P9P9[9[_eD .#33!//)) ' ; ;
 	
r!   r   rp   )
NNNNNNNNr   N)r   r   r   r   r   r   r	   rE   r   rX   rf   r   r   r   r   r   r   r   r   r   rt   rU   r    r   r!   r"   r   r      s   
 DH	
''
 \\
 "DIoS	9D@	

 +,
 
+	+
    
   .215.204(,26*.!%-.+/C
##d*C
 ''$.C
 t+	C

 &&-C
 C
 ((4/C
   4'C
 $;C
 ell*C
 \\D(C
 +,C
 
/	/C
    C
r!   r   )r}   rz   r   )(rE   r   activationsr   cache_utilsr   modeling_outputsr   processing_utilsr   utilsr	   r
   utils.genericr   r   llava.modeling_llavar   r   r   r   r   r   mistral.modeling_mistralr   configuration_mistral3r   
get_loggerr   loggerr   Moduler%   r\   rt   rw   rz   r}   r   __all__r   r!   r"   <module>r      s       !   : & , I  6 2 
		H	%	n 	#")) #L")) 8	%@ 		": 		2 	[
J [
|X
'D X
vr!   