
    Z jK                        S r SSKJr  SSKJr  SSKrSSKJr  SSKJr  SSK	J
r
  SS	KJr  SS
KJr  SSKJrJr  SSKJr  SSKJrJrJr  SSKJrJrJr  SSKJr  SSKJr  \R@                  " \!5      r"S r# " S S\RH                  5      r%S r&S.S jr' S/S\RH                  S\RP                  S\RP                  S\RP                  S\RP                  S-  S\)S\)4S jjr* " S S \RH                  5      r+ " S! S"\RH                  5      r, " S# S$\RH                  5      r- " S% S&\
5      r. " S' S(\RH                  5      r/\ " S) S*\5      5       r0S+ r1\ " S, S-\05      5       r2S-S*/r3g)0zPyTorch Pixtral model.    )Callable)OptionalN)nn   )ACT2FN)GradientCheckpointingLayer)BaseModelOutput)dynamic_rope_update)ALL_ATTENTION_FUNCTIONSPreTrainedModel)Unpack)TransformersKwargsauto_docstringlogging)is_flash_attention_requestedmaybe_autocastmerge_with_config_defaults)capture_outputs   )PixtralVisionConfigc                    / nU  H  nUR                   SS  u  pE[        R                  " [        R                  " U5      [        R                  " U5      SS9n[        R                  " USS9R                  SS5      R                  SS5      u  pxXq-  U-   n	UR                  U	S S 2S4   5        M     [        R                  " U5      $ )Nij)indexingdim   r   )	shapetorchmeshgridarangestackreshapechunkappendcat)
patch_embeds_list	max_width	positionspatchheightwidthmeshh_gridv_grididss
             }/root/GenerationalWealth/GenerationalWealth/venv/lib/python3.13/site-packages/transformers/models/pixtral/modeling_pixtral.pyposition_ids_in_meshgridr3   %   s    I"BC(~~ell62ELL4GRVWTr2::2qAGG2N 6)QT# # 99Y    c                      ^  \ rS rSr% Sr\R                  \S'   SS\4U 4S jjjr	\
   SS\S-  S\S   S	\S-  S
\S\4   4S jj5       r\R                   " 5       \S 5       5       rSrU =r$ )PixtralRotaryEmbedding0   a  
The key with pixtral embedding is just that you have a frequency for each pixel positions.
If you have height x width pixels (or embedding pixels), then the frequency used for ROPE
is given by indexing the pre_computed frequency on the width and height.

What you output is of dimension (batch, height * width, dim) with dim the embed dim.

This simply means that for each image hidden state, you are going to add
a corresponding positional embedding, based on its index in the grid.
inv_freqNconfigc                   > [         TU ]  5         Xl        U R                  R                  S   U l        U R
                  nU R                  S:w  a/  [        U R                  R                   SU R                   S35      eU" U R                  U5      u  pVU R                  SUSS9  U R                  SUR                  5       SS9  g )	N	rope_typedefaultz7 does not support non-default RoPE, but got `rope_type=`r8   F)
persistentoriginal_inv_freq)super__init__r9   rope_parametersr;   compute_default_rope_parameters
ValueError	__class____name__register_bufferclone)selfr9   device
layer_typerope_init_fnr8   attention_scalingrE   s          r2   rA   PixtralRotaryEmbedding.__init__>   s    44[A!%!E!E>>Y&>>**++bcgcqcqbrrst  '34;;&G#ZeD0(..2BuUr4   rJ   ztorch.deviceseq_lenreturnztorch.Tensorc                     U R                   S   n[        U SS5      =(       d    U R                  U R                  -  nSnU R                  U R
                  -  n[        R                  " U5      n[        R                  " U5      nSU[        R                  " SUS5      R                  5       U-  -  -  n	[        R                  " XySSS2   5      R                  5       n
[        R                  " XSSS2   5      R                  5       n[        R                  " U
SS2SSS24   R                  SUS5      USSS2SS24   R                  USS5      /SS	9R                  SUS-  5      n[        R                  " X4SS	9nX4$ )
aH  
Computes the inverse frequencies according to the original RoPE implementation
Args:
    config ([`~transformers.PreTrainedConfig`]):
        The model configuration.
    device (`torch.device`):
        The device to use for initialization of the inverse frequencies.
    seq_len (`int`, *optional*):
        The current sequence length. Unused for this type of RoPE.
Returns:
    Tuple of (`torch.Tensor`, `float`), containing the inverse frequencies for the RoPE embeddings and the
    post-processing scaling factor applied to the computed cos/sin (unused in this type of RoPE).

rope_thetahead_dimNg      ?r   r   r   r   r   )rB   getattrhidden_sizenum_attention_heads
image_size
patch_sizer    r"   floatouterr'   repeatr$   )r9   rJ   rO   baser   attention_factormax_patches_per_sidehwfreqsfreqs_hfreqs_wr8   s                r2   rC   6PixtralRotaryEmbedding.compute_default_rope_parametersN   sf   & %%l3fj$/c63E3EIcIc3c  &00F4E4EELL-.LL-.tQQ 7 = = ?# EFG++ass,224++aqt!t-335994
#**1.BAFa
#**+?AF 
 '"cQh
 	 99h1r:))r4   c                    U R                   U   n[        UR                  R                  [        5      (       a0  UR                  R                  S:w  a  UR                  R                  OSn[        USS9   UnUR                  5       nUR                  5       nS S S 5        WR                  UR                  S9WR                  UR                  S94$ ! , (       d  f       N@= f)NmpscpuF)device_typeenableddtype)
r8   
isinstancerJ   typestrr   cossintork   )rI   xposition_idsra   rh   embro   rp   s           r2   forwardPixtralRotaryEmbedding.forward{   s     l+'1!((--'E'E!((--[`J`ahhmmfkUCC'')C'')C D
 vvAGGv$cff177f&;;; DCs   5#C
C )r9   r;   NN)NNN)rF   
__module____qualname____firstlineno____doc__r    Tensor__annotations__r   rA   staticmethodr   inttuplerY   rC   no_gradr
   ru   __static_attributes____classcell__rE   s   @r2   r6   r6   0   s    	 llV2 V V  -1+/"**#d***(** t** 
~u$	%	** **X ]]_<  <r4   r6   c                     U SSU R                   S   S-  24   nU SU R                   S   S-  S24   n[        R                  " U* U4SS9$ )z*Rotates half the hidden dims of the input..Nr   r   r   )r   r    r'   )rr   x1x2s      r2   rotate_halfr      sZ    	
3"!''"+"""	#B	
3q ""	#B99rc2YB''r4   c                     UR                  U5      nUR                  U5      nX-  [        U 5      U-  -   nX-  [        U5      U-  -   nXV4$ )aI  Applies Rotary Position Embedding to the query and key tensors.

Args:
    q (`torch.Tensor`): The query tensor.
    k (`torch.Tensor`): The key tensor.
    cos (`torch.Tensor`): The cosine part of the rotary embedding.
    sin (`torch.Tensor`): The sine part of the rotary embedding.
    unsqueeze_dim (`int`, *optional*, defaults to 1):
        The 'unsqueeze_dim' argument specifies the dimension along which to unsqueeze cos[position_ids] and
        sin[position_ids] so that they can be properly broadcasted to the dimensions of q and k. For example, note
        that cos[position_ids] and sin[position_ids] have the shape [batch_size, seq_len, head_dim]. Then, if q and
        k have the shape [batch_size, heads, seq_len, head_dim], then setting unsqueeze_dim=1 makes
        cos[position_ids] and sin[position_ids] broadcastable to the shapes of q and k. Similarly, if q and k have
        the shape [batch_size, seq_len, heads, head_dim], then set unsqueeze_dim=2.
Returns:
    `tuple(torch.Tensor)` comprising of the query and key tensors rotated using the Rotary Position Embedding.
)	unsqueezer   )qkro   rp   unsqueeze_dimq_embedk_embeds          r2   apply_rotary_pos_embr      sS    $ --
&C
--
&Cw;q>C/0Gw;q>C/0Gr4   modulequerykeyvalueattention_maskscalingdropoutc                    [         R                  " XR                  SS5      5      U-  nUb  X-   n[        R                  R                  US[         R                  S9R                  UR                  5      n[        R                  R                  XU R                  S9n[         R                  " X5      n	U	R                  SS5      R                  5       n	X4$ )Nr   r   )r   rk   )ptrainingr   r   )r    matmul	transposer   
functionalsoftmaxfloat32rq   rk   r   r   
contiguous)
r   r   r   r   r   r   r   kwargsattn_weightsattn_outputs
             r2   eager_attention_forwardr      s     <<}}R'<=GL!#4==((2U]](SVVW\WbWbcL==((6??([L,,|3K''1-88:K$$r4   c                      ^  \ rS rSrSrU 4S jr  SS\R                  S\R                  S-  S\\R                  \R                  4   S-  S\	\
   S	\\R                  \R                  S-  4   4
S
 jjrSrU =r$ )PixtralAttention   zA
Multi-headed attention compatible with ALL_ATTENTION_FUNCTIONS.
c                   > [         TU ]  5         Xl        UR                  U l        UR
                  U l        U R                  U R                  -  U l        SU l        U R                  S-  U l	        SU l        UR                  U l        [        R                  " U R                  U R                  SS9U l        [        R                  " U R                  U R                  SS9U l        [        R                  " U R                  U R                  SS9U l        [        R                  " U R                  U R                  SS9U l        g )NFg      ࿩bias)r@   rA   r9   rU   	embed_dimrV   	num_headsrS   	is_causalr   attention_dropoutr   r   Lineark_projv_projq_projo_projrI   r9   rE   s     r2   rA   PixtralAttention.__init__   s    ++33$..8}}d*//iiUKiiUKiiUKiiUKr4   Nhidden_statesr   position_embeddingsr   rP   c                 2   UR                  5       u  pVnU R                  U5      nU R                  U5      n	U R                  U5      n
UR	                  XVU R
                  U R                  5      R                  SS5      nU	R	                  XVU R
                  U R                  5      R                  SS5      n	U
R	                  XVU R
                  U R                  5      R                  SS5      n
Uu  p[        XXSS9u  p[        R                  " U R                  R                  [        5      nU" U UU	U
U4U R                  (       d  SOU R                  U R                   S.UD6u  pUR#                  XVS5      R%                  5       nU R'                  U5      nX4$ )z#Input shape: Batch x Time x Channelr   r   r   )r           )r   r   r   )sizer   r   r   viewr   rS   r   r   r   get_interfacer9   _attn_implementationr   r   r   r   r$   r   r   )rI   r   r   r   r   
batch_sizepatches_query_states
key_statesvalue_statesro   rp   attention_interfacer   r   s                   r2   ru   PixtralAttention.forward   st    "/!3!3!5
Q{{=1[[/
{{=1#((dnndmm\ffghjkl__Z$..$--Xbbcdfgh
#((dnndmm\ffghjkl&#7RUjk#l (?(M(MKK,,.E)
 %8	%
  $}}C$,,LL	%
 	%
! "))*rBMMOkk+.((r4   )r9   r   r   rS   r   r   r   r   r   r   r   rw   )rF   rx   ry   rz   r{   rA   r    r|   r   r   r   ru   r   r   r   s   @r2   r   r      s    L* /3HL	()||() t+() #5<<#=>E	()
 +,() 
u||U\\D00	1() ()r4   r   c                   .   ^  \ rS rSrU 4S jrS rSrU =r$ )
PixtralMLPi  c                   > [         TU ]  5         Xl        UR                  U l        UR                  U l        [
        R                  " U R                  U R                  SS9U l        [
        R                  " U R                  U R                  SS9U l        [
        R                  " U R                  U R                  SS9U l	        [        UR                     U l        g )NFr   )r@   rA   r9   rU   intermediate_sizer   r   	gate_projup_proj	down_projr   
hidden_actact_fnr   s     r2   rA   PixtralMLP.__init__  s    !--!'!9!94#3#3T5K5KRWXyy!1!143I3IPUV4#9#94;K;KRWXV../r4   c                     U R                  U R                  U R                  U5      5      U R                  U5      -  5      nU$ N)r   r   r   r   )rI   rr   r   s      r2   ru   PixtralMLP.forward  s6    NN4;;t~~a/@#ADLLQRO#ST	r4   )r   r9   r   r   rU   r   r   )rF   rx   ry   rz   rA   ru   r   r   r   s   @r2   r   r     s    0 r4   r   c                   x   ^  \ rS rSrS
S\SS4U 4S jjjrS\R                  S\R                  4S jrS r	S	r
U =r$ )PixtralRMSNormi  epsrP   Nc                    > [         TU ]  5         [        R                  " [        R
                  " U5      5      U l        X l        g)z-
PixtralRMSNorm is equivalent to T5LayerNorm
N)r@   rA   r   	Parameterr    onesweightvariance_epsilon)rI   rU   r   rE   s      r2   rA   PixtralRMSNorm.__init__  s/     	ll5::k#:; #r4   r   c                    UR                   nUR                  [        R                  5      nUR	                  S5      R                  SSS9nU[        R                  " X0R                  -   5      -  nU R                  UR                  U5      -  $ )Nr   r   T)keepdim)	rk   rq   r    r   powmeanrsqrtr   r   )rI   r   input_dtypevariances       r2   ru   PixtralRMSNorm.forward  sw    #))%((7 $$Q',,R,>%H?T?T4T(UU{{]--k:::r4   c                 ^    [        U R                  R                  5       SU R                   3$ )Nz, eps=)r   r   r   r   rI   s    r2   
extra_reprPixtralRMSNorm.extra_repr%  s*    ))*+6$2G2G1HIIr4   )r   r   )gư>)rF   rx   ry   rz   rY   rA   r    r|   ru   r   r   r   r   s   @r2   r   r     sB    $ $$ $ $;U\\ ;ell ;J Jr4   r   c                      ^  \ rS rSrU 4S jr SS\R                  S\R                  S\\R                  \R                  4   S-  S\\	   S\R                  4
S	 jjr
S
rU =r$ )PixtralAttentionLayeri)  c                    > [         TU ]  5         [        UR                  SS9U l        [        U5      U l        [        U5      U l        [        UR                  SS9U l	        g )Nh㈵>r   )
r@   rA   r   rU   attention_normr   feed_forwardr   	attentionffn_normr   s     r2   rA   PixtralAttentionLayer.__init__*  sP    ,V-?-?TJ&v.)&1&v'9'9tDr4   Nr   r   r   r   rP   c                     UnU R                  U5      nU R                  " SUUUS.UD6u  pXQ-   nUnU R                  U5      nU R                  U5      nXQ-   nU$ )a"  
Args:
    hidden_states (`torch.FloatTensor`):
        Input to the layer of shape `(batch, seq_len, embed_dim)`.
    attention_mask (`torch.FloatTensor`):
        Attention mask of shape `(batch, 1, q_len, k_v_seq_len)` where padding elements are indicated by very large negative values.
)r   r   r    )r   r   r   r   )rI   r   r   r   r   residualr   s          r2   ru   PixtralAttentionLayer.forward1  s}     !++M:>> 
') 3
 	
 !0 m4))-8 0r4   )r   r   r   r   r   )rF   rx   ry   rz   rA   r    r|   r   r   r   ru   r   r   r   s   @r2   r   r   )  ss    E IM	||  #5<<#=>E	
 +, 
 r4   r   c            
          ^  \ rS rSrU 4S jr  S
S\R                  S-  S\\R                  \R                  4   S-  S\\	   S\\
-  4S jjrS	rU =r$ )PixtralTransformeriR  c                   > [         TU ]  5         Xl        [        R                  R                  5       U l        [        UR                  5       H'  nU R                  R                  [        U5      5        M)     SU l        g )NF)r@   rA   r9   r    r   
ModuleListlayersrangenum_hidden_layersr&   r   gradient_checkpointing)rI   r9   r   rE   s      r2   rA   PixtralTransformer.__init__S  s\    hh))+v//0AKK4V<= 1&+#r4   Nr   r   r   rP   c                 V    UnU R                    H  nU" UU4SU0UD6nM     [        US9$ )a  
Args:
    inputs_embeds (`torch.FloatTensor` of shape `(batch_size, sequence_length, hidden_size)`):
        Embeddings which serve as input to the Transformer.
    attention_mask (`torch.Tensor` of shape `(batch_size, sequence_length)`, *optional*):
        Mask to avoid performing attention on padding token indices. Mask values selected in `[0, 1]`:

        - 1 for tokens that are **not masked**,
        - 0 for tokens that are **masked**.

        [What are attention masks?](../glossary#attention-mask)
r   )last_hidden_state)r   r	   )rI   inputs_embedsr   r   r   r   encoder_layers          r2   ru   PixtralTransformer.forward[  sH    & &![[M) %8 	M ) ??r4   )r9   r   r   rw   )rF   rx   ry   rz   rA   r    r|   r   r   r   r	   ru   r   r   r   s   @r2   r   r   R  ss    , /3HL	@ t+@ #5<<#=>E	@
 +,@ 
	 @ @r4   r   c                   P    \ rS rSr% \\S'   SrSrSrSr	Sr
SrSrSrS/r\\S.rS	rg
)PixtralPreTrainedModeliz  r9   modelpixel_values)imageTr   )r   
attentionsr   N)rF   rx   ry   rz   r   r}   base_model_prefixmain_input_nameinput_modalitiessupports_gradient_checkpointing_supports_attention_backend_supports_flash_attn_supports_sdpa_supports_flex_attn_no_split_modulesr   r   _can_record_outputsr   r   r4   r2   r  r  z  sM    $O!&*#"&N01.&r4   r  c                    UR                   nUR                  nUR                  S   n[        R                  " U5      R
                  n[        R                  " XD4XRUS9n[        R                  " U 5      R                  S5      n[        R                  " S/U S S -   5      R                  S5      n[        X5       H  u  pSXiU
2X24'   M     US S S S 2S S 24   R                  UR                  S   SSS5      nU$ )Nr   )
fill_valuerk   rJ   r   r   )rk   rJ   r   r    finfominfulltensorcumsumzipexpand)r(   r  rk   rJ   rO   d_mincausal_maskblock_end_idxblock_start_idxstartends              r2   generate_block_attention_maskr#    s    LLE]]Fll1oGKK""E**g/EW]^KLL!23::2>MllA3):3B)?#?@GGKO/9
,-#Iuy() : dD!Q./66v||A2rRKr4   c                      ^  \ rS rSrSrU 4S jrS r\\\	 SS\
R                  S\
R                  S-  S\\   S	\\-  4S
 jj5       5       5       rSrU =r$ )PixtralVisionModeli  vision_encoderc                 n  > [         TU ]  U5        Xl        [        R                  " UR
                  UR                  UR                  UR                  SS9U l        UR                  U l        [        UR                  SS9U l
        [        U5      U l        [        U5      U l        U R                  5         g )NF)in_channelsout_channelskernel_sizestrider   r   r   )r@   rA   r9   r   Conv2dnum_channelsrU   rX   
patch_convr   ln_prer   transformerr6   patch_positional_embedding	post_initr   s     r2   rA   PixtralVisionModel.__init__  s     ))++++))$$
 !++$V%7%7TB-f5*@*H'r4   c                     U R                   $ r   )r.  r   s    r2   get_input_embeddings'PixtralVisionModel.get_input_embeddings  s    r4   Nr  image_sizesr   rP   c           
         Uc  UR                   u  pEpgXg4/U-  nU R                  R                  R                  nU R                  UR	                  US95      n	[        X5       V
Vs/ s H1  u  pU
SS US   U R                  -  2S US   U R                  -  24   PM3     nn
n[        R                  " U Vs/ s H  oR                  S5      R                  PM     snSS9R                  S5      n	U R                  U	5      n	[        XR                  R                  U R                  R                  -  S9nUR                  S5      R	                  U	R                   SS9US	'   U R#                  X5      n[%        U R                  5      (       a  S nO9['        U Vs/ s H!  oR                   S
   UR                   S   -  PM#     snU	5      nU R(                  " U	4UUS.UD6$ s  snn
f s  snf s  snf )Nrj   .r   r   r   )r)   T)non_blockingrs   r   r   )r   r   )r   r.  r   rk   rq   r  rX   r    r'   flattenTr   r/  r3   r9   rW   rJ   r1  r   r#  r0  )rI   r  r7  r   r   r   r,   r-   target_dtypepatch_embedsembedr   r(   r   rs   r   r   s                    r2   ru   PixtralVisionModel.forward  s    +7+=+=(J6"?+j8K --33|\'JK  #<=
= #5$q'T__457U$q'T__:T7UUV= 	 
 yy:K!L:KQ))A,..:K!LRST^^_`a{{<0 0)?)?4;;CYCY)Y
 ".!7!7!:!=!=l>Q>Q`d!=!e~"==lY'44!N:4EF4Eqqwwr{*4EFN 
) 3
 	
 	
3
 "M  Gs   &8G5$G"(G')r9   r/  r.  r1  rX   r0  r   )rF   rx   ry   rz   r
  rA   r5  r   r   r   r    r|   r   r   r   r	   ru   r   r   r   s   @r2   r%  r%    sz    ("   ,0+
ll+
 \\D(+
 +,	+

 
	 +
    +
r4   r%  )r   )r   )4r{   collections.abcr   typingr   r    r   activationsr   modeling_layersr   modeling_outputsr	   modeling_rope_utilsr
   modeling_utilsr   r   processing_utilsr   utilsr   r   r   utils.genericr   r   r   utils.output_capturingr   configuration_pixtralr   
get_loggerrF   loggerr3   Moduler6   r   r   r|   rY   r   r   r   r   r   r   r  r#  r%  __all__r   r4   r2   <module>rP     su    $    ! 9 / 6 F & @ @ e e 5 6 
		H	% U<RYY U<r(B %II%<<% 
% <<	%
 LL4'% % %.?)ryy ?)F "JRYY J(&6 &R%@ %@P _  "  E
/ E
 E
P  !9
:r4   