
    Z j}                     d   S SK r S SKJr  S SKJr  S SKJr  S SKrS SKJr  SSK	J
r  SSKJr  SS	KJrJr  SS
KJr  SSKJrJrJr  SSKJrJr  SSKJr  SSKJrJrJrJ r J!r!  SSK"J#r#J$r$  SSK%J&r&J'r'  SSK(J)r)  SSK*J+r+J,r,J-r-  SSK.J/r/J0r0  SSK1J2r2J3r3  SSK4J5r5J6r6  S r7 " S S\Rp                  5      r9 " S S\Rp                  5      r:S r;\" S5      S_S j5       r<  S`S\Rp                  S \Rz                  S!\Rz                  S"\Rz                  S#\Rz                  S-  S$\>S-  S%\>S&\)\+   4S' jjr?\" \<5       " S( S)\Rp                  5      5       r@ " S* S+\Rp                  5      rA " S, S-\Rp                  5      rBS. rC " S/ S0\Rp                  5      rD " S1 S2\Rp                  5      rE " S3 S4\5      rF " S5 S6\Rp                  5      rG " S7 S8\Rp                  5      rH\, " S9 S:\'5      5       rI " S; S<\I5      rJ " S= S>\Rp                  5      rK " S? S@\Rp                  5      rL " SA SB\Rp                  5      rM\\, " SC SD\!5      5       5       rN " SE SF\Rp                  5      rO " SG SH\Rp                  5      rP\" SI5       " SJ SK\Rp                  5      5       rQ " SL SM\Rp                  5      rR " SN SO\Rp                  5      rSSP\Rz                  SQ\TSR\Rz                  4SS jrU\" \<5       " ST SU\Rp                  5      5       rV " SV SW\5      rW\, " SX SY\'5      5       rX " SZ S[\X5      rY " S\ S]\X\5      rZ/ S^Qr[g)a    N)Callable)	dataclass)Optional)nn   )initialization)ACT2FN)CacheDynamicCache)GenerationMixin)use_kernel_forward_from_hubuse_kernel_func_from_hubuse_kernelized_func)create_bidirectional_maskcreate_causal_mask)GradientCheckpointingLayer)"BaseModelOutputWithCrossAttentionsBaseModelOutputWithPast,BaseModelOutputWithPoolingAndCrossAttentionsCausalLMOutputWithPastModelOutput)ROPE_INIT_FUNCTIONSdynamic_rope_update)ALL_ATTENTION_FUNCTIONSPreTrainedModel)Unpack)TransformersKwargsauto_docstringcan_return_tuple)maybe_autocastmerge_with_config_defaults)OutputRecordercapture_outputs   )EvollaConfigSaProtConfigc                     U R                  U5      R                  5       n[        R                  " USS9R	                  U5      U-  nUR                  5       U-   $ )z
Replace non-padding symbols with their position numbers. Position numbers begin at padding_idx+1. Padding symbols
are ignored. This is modified from fairseq's `utils.make_positions`.

Args:
    x: torch.Tensor x:

Returns: torch.Tensor
r$   dim)neinttorchcumsumtype_aslong)	input_idspadding_idxmaskincremental_indicess       {/root/GenerationalWealth/GenerationalWealth/venv/lib/python3.13/site-packages/transformers/models/evolla/modeling_evolla.py"create_position_ids_from_input_idsr5   4   sP     <<$((*D,,t3;;DADH##%33    c                   D   ^  \ rS rSrSrU 4S jr    SS jrS rSrU =r	$ )EvollaSaProtEmbeddingsD   zN
Same as BertEmbeddings with a tiny tweak for positional embeddings indexing.
c                   > [         TU ]  5         [        R                  " UR                  UR
                  UR                  S9U l        UR                  (       a/  [        R                  " UR
                  UR                  S9U l        OS U l        [        R                  " UR                  5      U l        [        USS5      U l        U R#                  S[$        R&                  " UR(                  5      R+                  S5      SS9  UR                  U l        U R                   S:X  a9  [        R                  " UR(                  UR
                  U R,                  S9U l        UR0                  U l        UR2                  U l        S U l        g )	N)r1   epsposition_embedding_typeabsoluteposition_ids)r$   F
persistent)super__init__r   	Embedding
vocab_sizehidden_sizepad_token_idword_embeddingsemb_layer_norm_before	LayerNormlayer_norm_eps
layer_normDropouthidden_dropout_probdropoutgetattrr=   register_bufferr,   arangemax_position_embeddingsexpandr1   position_embeddingstoken_dropoutmask_token_idr?   selfconfig	__class__s     r4   rD   EvollaSaProtEmbeddings.__init__I   s2   !||F,=,=v?Q?Q_e_r_rs'' ll6+=+=6CXCXYDO"DOzz&"<"<='.v7PR\']$ELL)G)GHOOPWXej 	 	
 "..'':5')||..0B0BPTP`P`(D$ $11#11 r6   c                    Uc*  Ub  [        XR                  5      nOU R                  U5      nUc  U R                  U5      nUnU R                  (       a  Ub  UR                  XR                  :H  R                  S5      S5      nSnUb  UR                  S5      OUR                  S   nXR                  :H  R                  S5      R                  5       U-  nUSU-
  -  SU-
  S S 2S S 4   -  R                  UR                  5      nU R                  S:X  a  U R                  U5      n	XY-   nU R                  b  U R                  U5      nUb,  XRR                  S5      -  R                  UR                  5      nU$ )Nr@           gQ?r$   r>   )r5   r1   &create_position_ids_from_inputs_embedsrI   rW   masked_fillrX   	unsqueezesumshapefloattodtyper=   rV   rM   )
rZ   r0   attention_maskr?   inputs_embeds
embeddingsmask_ratio_trainsrc_lengthsmask_ratio_observedrV   s
             r4   forwardEvollaSaProtEmbeddings.forwardb   s    $A)M]M]^#JJ=Y  00;M #
 )"7#//>P>P1P0[0[\^0_adeJ)4B4N.,,R0T]TcTcdeTfK#,0B0B#B"G"G"K"Q"Q"SVa"a$,<(<=EXAXZ[]acgZg@hhll  J '':5"&":":<"H#9J??&4J%$'?'?'CCGG
HXHXYJ r6   c                    UR                  5       SS nUS   n[        R                  " U R                  S-   X0R                  -   S-   [        R                  UR
                  S9nUR                  S5      R                  U5      $ )z
We are provided embeddings directly. We cannot infer which are padded so just generate sequential position ids.

Args:
    inputs_embeds: torch.Tensor

Returns: torch.Tensor
Nr@   r$   )rg   devicer   )sizer,   rS   r1   r/   rq   rb   rU   )rZ   ri   input_shapesequence_lengthr?   s        r4   r`   =EvollaSaProtEmbeddings.create_position_ids_from_inputs_embeds   s~     $((*3B/%a.||q /4D4D"Dq"HPUPZPZcpcwcw
 %%a(//<<r6   )	rP   rM   rX   r1   r=   rV   r?   rW   rI   NNNN)
__name__
__module____qualname____firstlineno____doc__rD   rn   r`   __static_attributes____classcell__r\   s   @r4   r8   r8   D   s+    !6 /b= =r6   r8   c                      ^  \ rS rSr% Sr\R                  \S'   SS\4U 4S jjjr	\
   SS\S-  SSS	\S-  S
\S\4   4S jj5       r\R                  " 5       \SS j5       5       rSrU =r$ )EvollaSaProtRotaryEmbedding   z
Rotary position embeddings.
Implementation based on [ModernBERT's RotaryEmbedding](https://github.com/huggingface/transformers/blob/aad13b87ed59f2afcfaebc985f403301887a35fc/src/transformers/models/modernbert/modeling_modernbert.py#L94).
inv_freqNr[   c                    > [         TU ]  5         Xl        0 U l        U R	                  U R                  U5      u  p4U R                  SU5        [        U SU5        g )Nr   attention_scaling)rC   rD   r[   	rope_typecompute_default_rope_parametersrR   setattr)rZ   r[   rq   curr_inv_freqcurr_attention_scalingr\   s        r4   rD   $EvollaSaProtRotaryEmbedding.__init__   sU    040T0TUYU`U`bh0i-Z7)+ABr6   rq   ztorch.device | Noneseq_lenreturntorch.Tensorc           	         U R                   n[        U SS5      =(       d    U R                  U R                  -  nSnSU[        R
                  " SUS[        R                  S9R                  U[        R                  S9U-  -  -  nXe4$ )aI  
Computes the inverse frequencies according to the original RoPE implementation
Args:
    config ([`~transformers.PreTrainedConfig`]):
        The model configuration.
    device (`torch.device`):
        The device to use for initialization of the inverse frequencies.
    seq_len (`int`, *optional*):
        The current sequence length. Unused for this type of RoPE.

Returns:
    Tuple of (`torch.Tensor`, `float`), containing the inverse frequencies for the RoPE embeddings and the
    post-processing scaling factor applied to the computed cos/sin (unused in this type of RoPE).
head_dimN      ?r      rg   rq   rg   )	
rope_thetarQ   rG   num_attention_headsr,   rS   int64rf   re   r[   rq   r   baser)   attention_factorr   s          r4   r   ;EvollaSaProtRotaryEmbedding.compute_default_rope_parameters   s    (   fj$/c63E3EIcIc3c U\\!S!5;;?BB&X]XcXcBdgjjk
 ))r6   c                 @   [        U S5      n[        U S5      nUS S S 2S 4   R                  5       R                  UR                  S   SS5      R	                  UR
                  5      nUS S 2S S S 24   R                  5       n[        UR
                  R                  [        5      (       a0  UR
                  R                  S:w  a  UR
                  R                  OSn[        USS	9   UR                  5       UR                  5       -  R                  SS
5      n	[        R                  " X4SS9n
U
R                  5       U-  nU
R                  5       U-  nS S S 5        WR	                  UR                  S9WR	                  UR                  S94$ ! , (       d  f       N@= f)Nr   r   r   r@   r$   mpscpuFdevice_typeenabledr   r(   r   )rQ   re   rU   rd   rf   rq   
isinstancetypestrr    	transposer,   catcossinrg   )rZ   xr?   
layer_typer   r   inv_freq_expandedposition_ids_expandedr   freqsembr   r   s                r4   rn   #EvollaSaProtRotaryEmbedding.forward   sZ    4,#D*=>$T1d]399;BB<CUCUVWCXZ\^_`ccdedldlm ,QaZ 8 > > @'1!((--'E'E!((--[`J`ahhmmfkUC&,,.1F1L1L1NNYYZ[]^_E))UN3C'')//C'')//C	 D vvAGGv$cff177f&;;; DCs   'A.F
F)r[   r   NNNN)rw   rx   ry   rz   r{   r,   Tensor__annotations__r&   rD   staticmethodr+   tuplere   r   no_gradr   rn   r|   r}   r~   s   @r4   r   r      s    
 llC| C C &*(,"*t#*%* t* 
~u$	%	* *< ]]_<  <r6   r   c                     U SSU R                   S   S-  24   nU SU R                   S   S-  S24   n[        R                  " U* U4SS9$ )z*Rotates half the hidden dims of the input..Nr@   r   r(   )rd   r,   r   )r   x1x2s      r4   rotate_halfr      sZ    	
3"!''"+"""	#B	
3q ""	#B99rc2YB''r6   rotary_pos_embc                 b   U R                   nUR                  U5      nUR                  U5      nU R                  5       U-  [        U R                  5       5      U-  -   nUR                  5       U-  [        UR                  5       5      U-  -   nUR	                  U5      UR	                  U5      4$ )aI  Applies Rotary Position Embedding to the query and key tensors.

Args:
    q (`torch.Tensor`): The query tensor.
    k (`torch.Tensor`): The key tensor.
    cos (`torch.Tensor`): The cosine part of the rotary embedding.
    sin (`torch.Tensor`): The sine part of the rotary embedding.
    unsqueeze_dim (`int`, *optional*, defaults to 1):
        The 'unsqueeze_dim' argument specifies the dimension along which to unsqueeze cos[position_ids] and
        sin[position_ids] so that they can be properly broadcasted to the dimensions of q and k. For example, note
        that cos[position_ids] and sin[position_ids] have the shape [batch_size, seq_len, head_dim]. Then, if q and
        k have the shape [batch_size, heads, seq_len, head_dim], then setting unsqueeze_dim=1 makes
        cos[position_ids] and sin[position_ids] broadcastable to the shapes of q and k. Similarly, if q and k have
        the shape [batch_size, seq_len, heads, head_dim], then set unsqueeze_dim=2.
Returns:
    `tuple(torch.Tensor)` comprising of the query and key tensors rotated using the Rotary Position Embedding.
)rg   rb   re   r   rf   )qkr   r   unsqueeze_dimoriginal_dtypeq_embedk_embeds           r4   apply_rotary_pos_embr      s    & WWN
--
&C
--
&Cwwy3;qwwy#9C#?@Gwwy3;qwwy#9C#?@G::n%wzz.'AAAr6   modulequerykeyvaluerh   scalingrP   kwargsc                    Uc  UR                  S5      S-  n[        R                  " XR                  SS5      5      U-  nUb  X-   n[        R
                  R                  USS9n[        R
                  R                  XU R                  S9n[        R                  " X5      n	U	R                  SS5      R                  5       n	X4$ )Nr@         r   r   r(   )ptrainingr$   )
rr   r,   matmulr   r   
functionalsoftmaxrP   r   
contiguous)
r   r   r   r   rh   r   rP   r   attn_weightsattn_outputs
             r4   eager_attention_forwardr     s     **R.D( <<}}Q':;gEL!#4==((2(>L==((6??([L,,|3K''1-88:K$$r6   c                      ^  \ rS rSrSU 4S jjr    SS\R                  S\R                  S-  S\R                  S-  S\R                  S-  S\R                  S-  S	\\	   S
\
\R                     4S jjrSrU =r$ )EvollaSaProtSelfAttentioni'  Nc                 t  > [         TU ]  5         Xl        UR                  UR                  -  S:w  a7  [        US5      (       d&  [        SUR                   SUR                   S35      eUR                  U l        [        UR                  UR                  -  5      U l        U R                  U R                  -  U l	        [        R                  " UR                  U R                  5      U l        [        R                  " UR                  U R                  5      U l        [        R                  " UR                  U R                  5      U l        UR                  U l        U=(       d    [#        USS5      U l        SU l        UR(                  U l        X0l        U R(                  =(       a    U(       + U l        g )	Nr   embedding_sizezThe hidden size (z6) is not a multiple of the number of attention heads ()r=   r>   r   )rC   rD   r[   rG   r   hasattr
ValueErrorr+   attention_head_sizeall_head_sizer   Linearr   r   r   attention_probs_dropout_probrP   rQ   r=   r   
is_decoder	layer_idx	is_causal)rZ   r[   r=   r   is_cross_attentionr\   s        r4   rD   "EvollaSaProtSelfAttention.__init__)  sg    : ::a?PVXhHiHi#F$6$6#7 8 445Q8 
 $*#=#= #&v'9'9F<V<V'V#W !558P8PPYYv1143E3EF
99V//1C1CDYYv1143E3EF
::'> (
'-zC
$  ++"C1C-Cr6   hidden_statesrh   encoder_hidden_statesencoder_attention_maskrV   r   r   c                    UR                   S S n/ UQSPU R                  P7nU R                  U5      R                  U5      R	                  SS5      n	US Ln
U
(       a  UOUnU
(       a  UOUnU R                  U5      R                  U5      R	                  SS5      nU R                  U5      R                  U5      R	                  SS5      nXR                  S-  -  n	U R                  S:X  a  Uu  p[        XXSS9u  p[        R                  " U R                  R                  [        5      nU" U U	UUU4U R                  (       d  SOU R                  U R                   S.UD6u  nnUR"                  " / UQSP76 R%                  5       nUU4$ )	Nr@   r$   r   r   rotary)r   r_   rP   r   )rd   r   r   viewr   r   r   r=   r   r   get_interfacer[   _attn_implementationr   r   rP   r   reshaper   )rZ   r   rh   r   r   rV   r   rs   hidden_shapequery_layerr   current_states	key_layervalue_layerr   r   attention_interfacer   r   s                      r4   rn   !EvollaSaProtSelfAttention.forwardE  s    $))#2.CCbC$*B*BCjj/44\BLLQPQR2$>2D.-3E/>HH^,11,?II!QO	jj055lCMMaQRS "$<$<d$BB''83*HC%9+RUjk%l"K(?(M(MKK,,.E)
 %8	%
  $}}C$,,LL	%
 	%
!\ "));;;;FFHL((r6   )r   r   r[   rP   r   r   r   r   r   r=   r   r   r   )NNFrv   )rw   rx   ry   rz   rD   r,   r   FloatTensorr   r   r   rn   r|   r}   r~   s   @r4   r   r   '  s    D> 48:>;?37.)||.) ))D0.)  %0047	.)
 !& 1 1D 8.) #\\D0.) +,.) 
u||	.) .)r6   r   c                   .   ^  \ rS rSrU 4S jrS rSrU =r$ )EvollaSaProtSelfOutputiv  c                    > [         TU ]  5         [        R                  " UR                  UR                  5      U l        [        R                  " UR                  5      U l        g r   )	rC   rD   r   r   rG   denserN   rO   rP   rY   s     r4   rD   EvollaSaProtSelfOutput.__init__w  sB    YYv1163E3EF
zz&"<"<=r6   c                 R    U R                  U5      nU R                  U5      nX-   nU$ r   r   rP   rZ   r   input_tensors      r4   rn   EvollaSaProtSelfOutput.forward|  ,    

=1]3%4r6   r   rw   rx   ry   rz   rD   rn   r|   r}   r~   s   @r4   r   r   v      >
 r6   r   c                   j   ^  \ rS rSrSU 4S jjr    S	S\R                  S-  S\\   4S jjr	Sr
U =r$ )
EvollaSaProtAttentioni  Nc                    > [         TU ]  5         [        XUS9U l        [	        U5      U l        [        R                  " UR                  UR                  S9U l        g )N)r   r   r;   )
rC   rD   r   rZ   r   outputr   rK   rG   rL   )rZ   r[   r   r   r\   s       r4   rD   EvollaSaProtAttention.__init__  sG    -f^pq	,V4f&8&8f>S>STr6   rV   r   c                 ~    U R                  U5      nU R                  " U4UUUUS.UD6u  pU R                  X5      nU$ )Nrh   r   r   rV   )rK   rZ   r  )
rZ   r   rh   r   r   rV   r   hidden_states_lnr   _s
             r4   rn   EvollaSaProtAttention.forward  sW      >>-8
)"7#9 3
 
 kk+=r6   )rK   r  rZ   )NFrv   )rw   rx   ry   rz   rD   r,   r   r   r   rn   r|   r}   r~   s   @r4   r  r    sD    U "#37 #\\D0 +, r6   r  c                 n    U S-  S[         R                  " U [        R                  " S5      -  5      -   -  $ )zr
This is the gelu implementation from the original EVOLLA_SA_PROT repo. Using F.gelu yields subtly wrong results.
g      ?r   g       @)r,   erfmathsqrt)r   s    r4   gelur    s.     s7cEIIa$))C.&899::r6   c                   b   ^  \ rS rSrU 4S jrS\R                  S\R                  4S jrSrU =r	$ )EvollaSaProtIntermediatei  c                    > [         TU ]  5         [        R                  " UR                  UR
                  5      U l        g r   )rC   rD   r   r   rG   intermediate_sizer   rY   s     r4   rD   !EvollaSaProtIntermediate.__init__  s,    YYv1163K3KL
r6   r   r   c                 >    U R                  U5      n[        U5      nU$ r   )r   r  )rZ   r   s     r4   rn    EvollaSaProtIntermediate.forward  s     

=1]+r6   )r   
rw   rx   ry   rz   rD   r,   r   rn   r|   r}   r~   s   @r4   r  r    s)    MU\\ ell  r6   r  c                   .   ^  \ rS rSrU 4S jrS rSrU =r$ )EvollaSaProtOutputi  c                    > [         TU ]  5         [        R                  " UR                  UR
                  5      U l        [        R                  " UR                  5      U l	        g r   )
rC   rD   r   r   r  rG   r   rN   rO   rP   rY   s     r4   rD   EvollaSaProtOutput.__init__  sB    YYv779K9KL
zz&"<"<=r6   c                 R    U R                  U5      nU R                  U5      nX-   nU$ r   r   r   s      r4   rn   EvollaSaProtOutput.forward  r   r6   r   r  r~   s   @r4   r  r    r  r6   r  c                   l   ^  \ rS rSrU 4S jr    S	S\R                  S-  S\\   4S jjr	S r
SrU =r$ )
EvollaSaProtLayeri  c                   > [         TU ]  5         UR                  U l        SU l        [	        U5      U l        UR                  U l        UR                  U l        U R                  (       a.  U R                  (       d  [        U  S35      e[	        USS9U l	        [        U5      U l        [        U5      U l        [        R                  " UR                   UR"                  S9U l        g )Nr$   z> should be used as a decoder model if cross attention is addedT)r   r;   )rC   rD   chunk_size_feed_forwardseq_len_dimr  	attentionr   add_cross_attentionRuntimeErrorcrossattentionr  intermediater  r  r   rK   rG   rL   rY   s     r4   rD   EvollaSaProtLayer.__init__  s    '-'E'E$.v6 ++#)#=#= ##??"dV+i#jkk"7SW"XD4V<(0f&8&8f>S>STr6   NrV   r   c                     U R                   " U4UUS.UD6nU R                  (       a;  Ub8  [        U S5      (       d  [        SU  S35      eU R                  " U4UUUUS.UD6nU R                  U5      nU$ )Nrh   rV   r(  z'If `encoder_hidden_states` are passed, z` has to be instantiated with cross-attention layers by setting `config.add_cross_attention=True`r	  )r%  r   r   AttributeErrorr(  feed_forward_chunk)	rZ   r   rh   r   r   rV   r   attention_outputlayer_outputs	            r4   rn   EvollaSaProtLayer.forward  s      >>
) 3
 	
 ??4@4!122$=dV D` ` 
  $22  -&;'=$7    ../?@r6   c                 l    U R                  U5      nU R                  U5      nU R                  X15      nU$ r   )rK   r)  r  )rZ   r/  attention_output_lnintermediate_outputr0  s        r4   r.  $EvollaSaProtLayer.feed_forward_chunk  s9    "nn-=>"//0CD{{#6Ir6   )	rK   r&  r%  r#  r(  r)  r   r  r$  rv   )rw   rx   ry   rz   rD   r,   r   r   r   rn   r.  r|   r}   r~   s   @r4   r!  r!    sJ    U$ "#37! #\\D0! +,!F r6   r!  c                   p   ^  \ rS rSrU 4S jr\    SS\R                  S-  S\\	   4S jj5       r
SrU =r$ )	EvollaSaProtEncoderi  c                 2  > [         TU ]  5         Xl        [        R                  " [        UR                  5       Vs/ s H  n[        U5      PM     sn5      U l        [        R                  " UR                  UR                  S9U l        SU l        g s  snf )Nr;   F)rC   rD   r[   r   
ModuleListrangenum_hidden_layersr!  layerrK   rG   rL   emb_layer_norm_aftergradient_checkpointing)rZ   r[   r  r\   s      r4   rD   EvollaSaProtEncoder.__init__  sr    ]]uVMeMeGf#gGf!$5f$=Gf#gh
$&LL1C1CI^I^$_!&+# $hs   BNrV   r   c           	          [        U R                  5       H  u  pxU" U4UUUUS.UD6nM     U R                  (       a  U R                  U5      n[        US9$ )Nr	  )last_hidden_state)	enumerater<  r=  r   )	rZ   r   rh   r   r   rV   r   ilayer_modules	            r4   rn   EvollaSaProtEncoder.forward  sg      )4OA(-&;'=$7 M  5 $$ 55mDM1MRRr6   )r[   r=  r>  r<  rv   )rw   rx   ry   rz   rD   r   r,   r   r   r   rn   r|   r}   r~   s   @r4   r7  r7    sQ    ,  "#37S #\\D0S +,S Sr6   r7  c                   b   ^  \ rS rSrU 4S jrS\R                  S\R                  4S jrSrU =r	$ )EvollaSaProtPooleri  c                    > [         TU ]  5         [        R                  " UR                  UR                  5      U l        [        R                  " 5       U l        g r   )rC   rD   r   r   rG   r   Tanh
activationrY   s     r4   rD   EvollaSaProtPooler.__init__  s9    YYv1163E3EF
'')r6   r   r   c                 \    US S 2S4   nU R                  U5      nU R                  U5      nU$ )Nr   )r   rJ  )rZ   r   first_token_tensorpooled_outputs       r4   rn   EvollaSaProtPooler.forward"  s6     +1a40

#566r6   )rJ  r   r  r~   s   @r4   rG  rG    s(    $
U\\ ell  r6   rG  c                      ^  \ rS rSr% \\S'   S/rSrSrSr	Sr
\\" \SSS9/\" \SSS9/S	.r\R                   " 5       U 4S
 j5       rSrU =r$ )EvollaSaProtPreTrainedModeli+  r[   r!  Tr$   r%  )index
layer_namer(  )r   
attentionscross_attentionsc                    > [         TU ]  U5        [        U[        5      (       a?  UR	                  UR
                  5      u  p#[        R                  " [        US5      U5        g g )Nr   )	rC   _init_weightsr   r   r   r[   initcopy_rQ   )rZ   r   r   r  r\   s       r4   rW  )EvollaSaProtPreTrainedModel._init_weights<  sO    f%f9::%EEfmmTMJJwvz2MB ;r6    )rw   rx   ry   rz   r&   r   _no_split_modules_supports_flash_attn_supports_sdpa_supports_flex_attn_supports_attention_backendr!  r"   r   _can_record_outputsr,   r   rW  r|   r}   r~   s   @r4   rQ  rQ  +  sv    ,-N"& +%&?qU`ab4AJZ[
 ]]_C Cr6   rQ  c            
          ^  \ rS rSrS\4U 4S jjrS rS r\\	 SS\
R                  S-  S\
R                  S-  S	\\
R                     \-  4S
 jj5       5       rSrU =r$ )EvollaSaProtProteinEncoderiD  r[   c                    > [         TU ]  U5        [        U5      U l        [	        US9U l        [        U5      U l        U R                  5         g Nr[   )	rC   rD   r8   rj   r   rotary_embeddingsr7  encoder	post_initrY   s     r4   rD   #EvollaSaProtProteinEncoder.__init__E  s?     08!<F!K*62r6   c                 .    U R                   R                  $ r   rj   rI   rZ   s    r4   get_input_embeddings/EvollaSaProtProteinEncoder.get_input_embeddingsL  s    ...r6   c                 $    XR                   l        g r   rl  rZ   r   s     r4   set_input_embeddings/EvollaSaProtProteinEncoder.set_input_embeddingsO  s    */'r6   Nr0   rh   r   c                    UR                  5       nUu  pVUR                  nUc  [        R                  " XV4US9nU R	                  XS9n[        U R                  UUS9n[        R                  " XgS9R                  S5      n	U R                  X5      n
U R                  " U4X*S.UD6nUS   n[        UUR                  UR                  UR                  S9$ )Nrq   r0   rh   )r[   ri   rh   r   r,  )rA  r   rT  rU  )rr   rq   r,   onesrj   r   r[   rS   rb   rg  rh  r   r   rT  rU  )rZ   r0   rh   r   rs   
batch_size
seq_lengthrq   ri   r?   rV   encoder_outputssequence_outputs                r4   rn   "EvollaSaProtProteinEncoder.forwardR  s      nn&!,
!!!"ZZ*)A6RN)[2;;')
 ||J>HHK"44]Q,,
*8
ek
 *!,;-)77&11,==	
 	
r6   )rj   rh  rg  r   )rw   rx   ry   rz   r&   rD   rn  rr  r!   r#   r,   r   r   r   rn   r|   r}   r~   s   @r4   rc  rc  D  sw    | /0   /3!
<<$&!
 t+!

 
u||	K	K!
   !
r6   rc  c                   2   ^  \ rS rSrSU 4S jjrS rSrU =r$ )!EvollaSequenceCompressorAttentionix  c                 X  > [         TU ]  5         US-  U l        X0l        X#-  n[        R
                  " U5      U l        [        R
                  " U5      U l        [        R                  " XSS9U l	        [        R                  " XS-  SS9U l
        [        R                  " XASS9U l        g )Nr   Fbiasr   )rC   rD   scaleheadsr   rK   
norm_medianorm_latentsr   to_qto_kvto_out)rZ   r)   dim_headr  	inner_dimr\   s        r4   rD   *EvollaSequenceCompressorAttention.__init__y  s    t^

$	,,s+LL-IIc59	YYsM>
ii	U;r6   c                 &   U R                  U5      nU R                  U5      nU R                  nU R                  U5      n[        R
                  " X4SS9nU R                  U5      R                  SSS9u  pxUR                  UR                  S5      UR                  S5      US5      R                  SSSS5      nUR                  UR                  S5      UR                  S5      US5      R                  SSSS5      nUR                  UR                  S5      UR                  S5      US5      R                  SSSS5      nXPR                  -  n[        R                  " XWR                  SS5      5      n	XR                  SSS	9R                  5       -
  n	U	R                   u  pp[        R"                  " X5      R%                  UR&                  5      nUS
S
2S
S
S
S
24   nUS
S
S
2S
S
2S
4   nUU-  nU	R)                  SU-
  R+                  5       S5      n	U	R-                  SS9n[        R                  " UU5      nUR                  SSSS5      nUR/                  UR                  S5      UR                  S5      S5      nU R1                  U5      $ )z
Args:
    x (torch.Tensor): image features
        shape (b, n1, D)
    latent (torch.Tensor): latent features
        shape (b, n2, D);  n2: num of latent tokens
r(   r   r@   r   r$   r   Tr)   keepdimNg     )r  r  r  r  r,   r   r  chunkr   rr   permuter  r   r   amaxdetachrd   rw  rf   rq   ra   boolr   r   r  )rZ   r   latentsr2   hr   kv_inputr   vsimbsnhskdokdrw  mask_expones_expattnouts                      r4   rn   )EvollaSequenceCompressorAttention.forward  s2    OOA##G,JJIIg99a\r2zz(#))2 * 
 FF166!9affQiB/771aCFF166!9affQiB/771aCFF166!9affQiB/771aC

N ll1kk"b12HHTH299;;99zz""%%dkk24q()aD()("ooq4xoo/6{{r{"ll4#kk!Q1% kk#((1+sxx{B7{{3r6   )r  r  r  r  r  r  r  )@      r  r~   s   @r4   r~  r~  x  s    <)  ) r6   r~  c                   2   ^  \ rS rSrSU 4S jjrS rSrU =r$ )EvollaFeedForwardi  c                   > [         TU ]  5         [        X-  5      n[        R                  " U5      U l        [        R                  " XSS9U l        [        R                  " 5       U l	        [        R                  " X1SS9U l
        g NFr  )rC   rD   r+   r   rK   normr   fc1GELUrJ  fc2)rZ   r)   multr  r\   s       r4   rD   EvollaFeedForward.__init__  sZ    
O	LL%	99S%8'')99Y%8r6   c           	      ~    U R                  U R                  U R                  U R                  U5      5      5      5      $ r   )r  rJ  r  r  )rZ   r   s     r4   rn   EvollaFeedForward.forward  s+    xx1(>?@@r6   )rJ  r  r  r  )   r  r~   s   @r4   r  r    s    9A Ar6   r  c                   6   ^  \ rS rSrS\4U 4S jjrS rSrU =r$ )!EvollaSequenceCompressorResampleri  r[   c                   > [         TU ]  5         UR                  R                  nUR                  U l        [        R                  " [        R                  " U R
                  U5      SS9U l
        [        R                  " / 5      U l        [        UR                  5       Ha  nU R                  R                  [        R                  " [!        X!R"                  UR$                  S9['        X!R(                  S9/5      5        Mc     [        R*                  " UR                  5      U l        [        R.                  " X!R                  5      U l        g )NT)requires_grad)r)   r  r  )r)   r  )rC   rD   protein_encoder_configrG   resampler_num_latentsnum_latentsr   	Parameterr,   randnr  r9  layersr:  resampler_depthappendr~  resampler_dim_headresampler_headsr  resampler_ff_multrK   r  r   protein_projector)rZ   r[   protein_repr_dimr  r\   s       r4   rD   *EvollaSequenceCompressorResampler.__init__  s    !88DD!77||EKK0@0@BR$ScghmmB'v--.AKK9 0;T;T\b\r\r *.>E]E]^		 / LL!3!34	!#+;=O=O!Pr6   c                 d   UR                   S   nUR                   u  pE[        R                  " X@R                  5      R	                  UR
                  5      n[        R                  " X&4SS9n[        R                  " U5      R	                  U R                  R
                  5      nU R                  S    UR                  SSS5      -  nUR	                  UR                  5      nU R                   H  u  pU	" XU5      U-   nU
" U5      U-   nM     U R                  U5      nU R                  U5      $ )Nr   r$   r(   r@   )rd   r,   rw  r  rf   rq   r   r  r   rg   r  r  r  )rZ   embedsr2   br  r  latent_maskrw  r  r  fftransformed_features               r4   rn   )EvollaSequenceCompressorResampler.forward  s    LLO

jj%5%5699$++Fyy$,!4 zz!} 3 34,,t$tyyQ'::**V\\*HD6D1G;GkG+G $ #44W=yy,--r6   )r  r  r  r  r  )	rw   rx   ry   rz   r%   rD   rn   r|   r}   r~   s   @r4   r  r    s    Q| Q*. .r6   r  c                       \ rS rSr% Sr\R                  S-  \S'   Sr\R                  S-  \S'   Sr	\
\R                  S4   S-  \S'   Sr\
\R                  S4   S-  \S'   Srg)	EvollaProteinEncoderModelOutputi  Nsequence_compressor_outputrA  .r   rT  r[  )rw   rx   ry   rz   r  r,   r   r   rA  r   r   rT  r|   r[  r6   r4   r  r    so     <@ 1 1D 8?26u((4/6:>M5**C/047>7;Je'',-4;r6   r  c                   t   ^  \ rS rSrS\4U 4S jjr\S\R                  S\R                  4S j5       r
SrU =r$ )EvollaProteinEncoderi  r[   c                 n   > [         TU ]  5         [        UR                  S9U l        [        US9U l        g re  )rC   rD   rc  r  modelr  sequence_compressor_resamplerrY   s     r4   rD   EvollaProteinEncoder.__init__  s.    /v7T7TU
-NV\-]*r6   r0   rh   c                     U R                  XS9nUR                  nU R                  XR5      n[        UUR                  S9$ )Nrv  )r  rA  )r  rA  r  r  )rZ   r0   rh   r   protein_outputprotein_embedssequence_reprs          r4   rn   EvollaProteinEncoder.forward  sF    iW'99::>Z.'4,>>
 	
r6   )r  r  )rw   rx   ry   rz   r%   rD   r   r,   
LongTensorr   rn   r|   r}   r~   s   @r4   r  r    s?    ^| ^
 
!1!1 
5CTCT 
 
r6   r  c                   r   ^  \ rS rSr   S
S\S-  S\S-  S\S-  4U 4S jjjrS r       SS jrS	rU =r	$ )#EvollaSequenceAlignerCrossAttentioni  Nprotein_encoder_dimstructure_encoder_dimmsa_encoder_dimc                   > [         TU ]  5         UR                  U l        UR                  U l        U R                  S-  U l        [        U R                  U R                  -  5      U l        U R                  U R                  -  U l        UR                  nUR                  nUR                  n[        R                  " U R                  U R                  5      U l        UbK  [        R                  " X R                  5      U l        [        R                  " X R                  5      U l        OS U l        S U l        UbK  [        R                  " X0R                  5      U l        [        R                  " X0R                  5      U l        OS U l        S U l        UbK  [        R                  " X@R                  5      U l        [        R                  " X@R                  5      U l        OS U l        S U l        [)        U R                  5      U l        [        R,                  " U5      U l        [        R                  " U R                  U R                  US9U l        [3        U R                  U5      U l        [        R6                  " [8        R:                  " S/5      5      U l        [        R6                  " [8        R:                  " S/5      5      U l        g )Nr   r  r_   ) rC   rD   rG   r   r  r+   r   r   $aligner_attention_probs_dropout_probaligner_enable_biasaligner_ffn_multr   r   r   key_proteinvalue_proteinkey_structurevalue_structurekey_msa	value_msaEvollaRMSNormattention_normrN   rP   out_projr  r  r  r,   tensorgate_attentiongate_ffw)	rZ   r[   r  r  r  r   enable_biasffn_multr\   s	           r4   rD   ,EvollaSequenceAlignerCrossAttention.__init__  s    	!--#)#=#= --t3
#&t'7'7$:R:R'R#S !558P8PP'-'R'R$00**YYt//1C1CD
*!yy)<>P>PQD!#+>@R@R!SD#D!%D ,!#+@BTBT!UD#%99-BDVDV#WD !%D#'D &99_6H6HIDLYY8J8JKDNDL!DN+D,<,<=zz">?		$"2"2D4D4D;W#D$4$4h? ll5<<+>?U\\3%%89r6   c	                    XgU/n	U	 V
s/ s H	  oc  M  U
PM     n	n
U	(       d  [        S5      e[        R                  " U	SS9n	U R                  U5      nU R	                  U5      nU R
                  bA  U R                  b4  UR                  U5      nU R                  U5      nU R                  U5      nOSnSnU R                  bA  U R                  b4  UR                  U5      nU R                  U5      nU R                  U5      nOSnSnU R                  bA  U R                  b4  UR                  U5      nU R                  U5      nU R                  U5      nOSnSnXU/nU V
s/ s H	  oc  M  U
PM     nn
[        R                  " USS9nXU/nU V
s/ s H	  oc  M  U
PM     nn
[        R                  " USS9nUR                  5       SS U R                  U R                  4-   nUR                  " U6 R!                  SSSS5      nUR                  5       SS U R                  U R                  4-   nUR                  " U6 R!                  SSSS5      nUR                  5       SS U R                  U R                  4-   nUR                  " U6 R!                  SSSS5      nXR"                  -  nUcN  [        R$                  " UR                  S5      UR                  S5      5      R                  UR&                  5      nUSS2SSS2S4   U	SS2SSSS24   -  n[        R(                  " UUR+                  SS	5      5      nUUR-                  SS
S9R/                  5       -
  nUR1                  SU-
  R3                  5       [        R4                  " UR6                  5      R8                  5      n[:        R<                  " SS9" U5      n[        R(                  " UU5      nUR!                  SSSS5      R?                  5       nUR                  5       SS	 U R@                  4-   nUR                  " U6 nU RC                  U5      nU$ s  sn
f s  sn
f s  sn
f )z
query_states: text
key_value_states: protein
query_states: [bs, query_seq_len, dim]
key_value_states: [bs, kv_seq_len, dim]
query_attn_mask: [bs, query_seq_len]
kv_attn_mask: [bs, kv_seq_len]
Nz=At least one modality should be provided for cross attention.r$   r(   r@   r   r   r   r  Tr  )"r   r,   r   r  r   r  r  rf   r  r  r  r  rr   r   r   r   r  r  rw  rq   r   r   r  r  ra   r  finforg   minr   Softmaxr   r   r  )rZ   query_statesprotein_key_value_statesstructure_key_value_statesmsa_key_value_statesquery_attn_maskprotein_kv_attn_maskstructure_kv_attn_maskmsa_kv_attn_maskkv_attn_maskr  r   key_layer_proteinvalue_layer_proteinkey_layer_structurevalue_layer_structurekey_layer_msavalue_layer_msar   r   new_query_layer_shapenew_key_layer_shapenew_value_layer_shaperh   r   attention_scoresattention_probscontext_layernew_context_layer_shapes                                r4   cross_attention3EvollaSequenceAlignerCrossAttention.cross_attention9  sK   * -FVW#/A<a<A\]]yy15)),7 jj-'D,>,>,J'?'B'B<'P$ $ 0 01I J"&"4"45M"N $"&)d.B.B.N)C)F)F|)T&"&"4"45O"P$($8$89S$T!"&$(!<<#(B#7#:#:<#H  LL)=>M"nn-ABO M"O&]K	 );	1Q		;IIiQ/	*?S"-?+Qq+?ii3 + 0 0 23B 7$$$$;
 !
 "&&(=>FFq!QPQR'nn.s3$$$$7
 
 NN$78@@Aq!L	 + 0 0 23B 7$$$$;
 !
 "&&(=>FFq!QPQR!JJ. "#jj):):1)=|?P?PQR?STWWXdXkXklO(D!T)9:\!TSWYZJZ=[[||K1D1DR1LM#l&7&7B&7&M&T&T&VV'33%%'\5G5G)H)L)L
 **,-=> _kB%--aAq9DDF"/"4"4"6s";t?Q?Q>S"S%**,CDm4q BL < @s"   QQ"Q,QQQc                 z   Ubv  UR                   u  pnUcc  [        R                  " X5      R                  U	R                  5      U	R                  X4S9R                  -  R                  UR                  5      nOS nUby  UR                   u  nnnUce  [        R                  " UU5      R                  U	R                  5      U
R                  UU4S9R                  -  R                  UR                  5      nOS nUby  UR                   u  nnnUce  [        R                  " UU5      R                  U	R                  5      UR                  UU4S9R                  -  R                  UR                  5      nOS nUnUb  UR                  5       (       d0  Ub  UR                  5       (       d  Ub  UR                  5       (       ay  UnU R                  UUUUUUUUS9n[        R                  " U R                  5      U-  nUU-   nUnU R                  U5      [        R                  " U R                  5      -  nUU-   nU$ )N)rr   )r  r  r  r  r  r  r  r  )rd   r,   rw  rf   rq   rU   Tanyr  tanhr  r  r  )rZ   r  protein_kv_statesstructure_kv_statesmsa_kv_statesr  r  r  r  protein_batch_maskstructure_batch_maskmsa_batch_maskpast_key_valuesr  protein_kv_seq_lenr)   structure_kv_seq_lenmsa_kv_seq_lenr   residuals                       r4   rn   +EvollaSequenceAlignerCrossAttention.forward  sP    (*;*A*A'BC#+JJr699:L:S:ST(//6H5M/NPPQ"&--. %
 $( *,?,E,E)B$c%-JJr#78;;<N<U<UV*118Lb7Q1RTTU"(//0 '
 &*"$&3&9&9#B'JJr>2556H6O6OP$++."1E+FHHI"]))* !
  $$ */C/G/G/I/I#/4J4N4N4P4P).>.B.B.D.D$H 00*):+>%2 /%9'=!1 1 	M "JJt':':;mKM$}4M$H GGM2UZZ5NNM$}4Mr6   )r   r   r  rP   r  r  r  rG   r  r  r  r   r  r   r  r  r  r  r   )NNNNNNN)
rw   rx   ry   rz   r+   rD   r  rn   r|   r}   r~   s   @r4   r  r    sm     +/,0&*1: !4Z1:  #Tz	1:
 t1: 1:fnn "#!G Gr6   r  RMSNormc                   x   ^  \ rS rSrS
S\SS4U 4S jjjrS\R                  S\R                  4S jrS r	S	r
U =r$ )r  i  r<   r   Nc                    > [         TU ]  5         [        R                  " [        R
                  " U5      5      U l        X l        g)z,
EvollaRMSNorm is equivalent to T5LayerNorm
N)rC   rD   r   r  r,   rw  weightvariance_epsilon)rZ   rG   r<   r\   s      r4   rD   EvollaRMSNorm.__init__  s/     	ll5::k#:; #r6   r   c                    UR                   nUR                  [        R                  5      nUR	                  S5      R                  SSS9nU[        R                  " X0R                  -   5      -  nU R                  UR                  U5      -  $ )Nr   r@   T)r  )	rg   rf   r,   float32powmeanrsqrtr"  r!  )rZ   r   input_dtypevariances       r4   rn   EvollaRMSNorm.forward  sw    #))%((7 $$Q',,R,>%H?T?T4T(UU{{]--k:::r6   c                 ^    [        U R                  R                  5       SU R                   3$ )Nz, eps=)r   r!  rd   r"  rm  s    r4   
extra_reprEvollaRMSNorm.extra_repr  s*    ))*+6$2G2G1HIIr6   )r"  r!  )gư>)rw   rx   ry   rz   re   rD   r,   r   rn   r-  r|   r}   r~   s   @r4   r  r    sB    $ $$ $ $;U\\ ;ell ;J Jr6   r  c                      ^  \ rS rSr% \R
                  \S'   SS\4U 4S jjjr\	   SS\S-  S\
S   S\S-  S	\S
\4   4S jj5       r\R                  " 5       \S 5       5       rSrU =r$ )EvollaRotaryEmbeddingi  r   Nr[   c                   > [         TU ]  5         UR                  U l        UR                  U l        Xl        U R
                  R                  S   U l        U R                  nU R                  S:w  a  [        U R                     nU" U R
                  U5      u  o@l
        U R                  SUSS9  U R                  SUR                  5       SS9  g )Nr   defaultr   FrA   original_inv_freq)rC   rD   rT   max_seq_len_cachedoriginal_max_seq_lenr[   rope_parametersr   r   r   r   rR   clone)rZ   r[   rq   rope_init_fnr   r\   s        r4   rD   EvollaRotaryEmbedding.__init__  s    "("@"@$*$B$B!44[A!%!E!E>>Y&.t~~>L+7V+L((ZeD0(..2BuUr6   rq   ztorch.devicer   r   r   c           	         U R                   S   n[        U SS5      =(       d    U R                  U R                  -  nSnSU[        R
                  " SUS[        R                  S9R                  U[        R                  S9U-  -  -  nXe4$ )	aH  
Computes the inverse frequencies according to the original RoPE implementation
Args:
    config ([`~transformers.PreTrainedConfig`]):
        The model configuration.
    device (`torch.device`):
        The device to use for initialization of the inverse frequencies.
    seq_len (`int`, *optional*):
        The current sequence length. Unused for this type of RoPE.
Returns:
    Tuple of (`torch.Tensor`, `float`), containing the inverse frequencies for the RoPE embeddings and the
    post-processing scaling factor applied to the computed cos/sin (unused in this type of RoPE).
r   r   Nr   r   r   r   r   )	r6  rQ   rG   r   r,   rS   r   rf   re   r   s          r4   r   5EvollaRotaryEmbedding.compute_default_rope_parameters  s    & %%l3fj$/c63E3EIcIc3c U\\!S!5;;?BB&X]XcXcBdgjjk
 ))r6   c                 L   U R                   S S S 2S 4   R                  5       R                  UR                  S   SS5      R	                  UR
                  5      nUS S 2S S S 24   R                  5       n[        UR
                  R                  [        5      (       a0  UR
                  R                  S:w  a  UR
                  R                  OSn[        USS9   UR                  5       UR                  5       -  R                  SS5      n[        R                  " Xf4SS	9nUR                  5       U R                  -  nUR                  5       U R                  -  n	S S S 5        WR	                  UR                   S
9W	R	                  UR                   S
94$ ! , (       d  f       N@= f)Nr   r@   r$   r   r   Fr   r   r(   r   )r   re   rU   rd   rf   rq   r   r   r   r    r   r,   r   r   r   r   rg   )
rZ   r   r?   r   r   r   r   r   r   r   s
             r4   rn   EvollaRotaryEmbedding.forward9  sN    !MM$4-8>>@GGHZHZ[\H]_acdehhijiqiqr ,QaZ 8 > > @'1!((--'E'E!((--[`J`ahhmmfkUC&,,.1F1L1L1NNYYZ[]^_E))UN3C'')d444C'')d444C	 D vvAGGv$cff177f&;;; DCs   BF
F#)r   r[   r4  r5  r   r   r   )rw   rx   ry   rz   r,   r   r   r%   rD   r   r   r+   r   re   r   r   r   rn   r|   r}   r~   s   @r4   r0  r0    s    llV| V V  &*+/"*t#*(* t* 
~u$	%	* *: ]]_<  <r6   r0  c                   .   ^  \ rS rSrU 4S jrS rSrU =r$ )	EvollaMLPiI  c                   > [         TU ]  5         Xl        UR                  U l        UR                  U l        [
        R                  " U R                  U R                  UR                  S9U l        [
        R                  " U R                  U R                  UR                  S9U l	        [
        R                  " U R                  U R                  UR                  S9U l
        [        UR                     U l        g )Nr  )rC   rD   r[   rG   r  r   r   mlp_bias	gate_projup_proj	down_projr	   
hidden_actact_fnrY   s     r4   rD   EvollaMLP.__init__J  s    !--!'!9!94#3#3T5K5KRXRaRabyy!1!143I3IPVP_P_`4#9#94;K;KRXRaRabV../r6   c                     U R                  U R                  U R                  U5      5      U R                  U5      -  5      nU$ r   )rD  rF  rB  rC  )rZ   r   rD  s      r4   rn   EvollaMLP.forwardT  s6    NN4;;t~~a/@#ADLLQRO#ST	r6   )rF  r[   rD  rB  rG   r  rC  r  r~   s   @r4   r?  r?  I  s    0 r6   r?  r   n_repr   c                     U R                   u  p#pEUS:X  a  U $ U SS2SS2SSS2SS24   R                  X#XU5      n U R                  X#U-  XE5      $ )z
This is the equivalent of torch.repeat_interleave(x, dim=1, repeats=n_rep). The hidden states go from (batch,
num_key_value_heads, seqlen, head_dim) to (batch, num_attention_heads, seqlen, head_dim)
r$   N)rd   rU   r   )r   rJ  batchnum_key_value_headsslenr   s         r4   	repeat_kvrO  Y  s_    
 2?1D1D.Ez!!Qa"23::5W\dlmM  e(CTTTr6   c                     ^  \ rS rSrSrS\S\4U 4S jjr   SS\R                  S\
\R                  \R                  4   S-  S	\R                  S-  S
\S-  S\\   S\
\R                  \R                  4   4S jjrSrU =r$ )EvollaAttentionie  z=Multi-headed attention from 'Attention Is All You Need' paperr[   r   c                 P  > [         TU ]  5         Xl        X l        [	        USUR
                  UR                  -  5      U l        UR                  UR                  -  U l	        U R                  S-  U l
        UR                  U l        SU l        [        R                  " UR
                  UR                  U R                  -  UR                  S9U l        [        R                  " UR
                  UR                  U R                  -  UR                  S9U l        [        R                  " UR
                  UR                  U R                  -  UR                  S9U l        [        R                  " UR                  U R                  -  UR
                  UR                  S9U l        g )Nr   r   Tr  )rC   rD   r[   r   rQ   rG   r   r   rM  num_key_value_groupsr   attention_dropoutr   r   r   attention_biasq_projk_projv_projo_projrZ   r[   r   r\   s      r4   rD   EvollaAttention.__init__i  sI   "
F4F4F&JdJd4de$*$>$>&B\B\$\!}}d*!'!9!9ii : :T]] JQWQfQf
 ii : :T]] JQWQfQf
 ii : :T]] JQWQfQf
 ii&&68J8JQWQfQf
r6   Nr   rV   rh   r  r   r   c                    UR                   S S n/ UQSPU R                  P7nU R                  U5      R                  U5      R	                  SS5      nU R                  U5      R                  U5      R	                  SS5      n	U R                  U5      R                  U5      R	                  SS5      n
Uu  p[        XX5      u  pUb  UR                  XU R                  5      u  p[        R                  " U R                  R                  [        5      nU" U UU	U
U4U R                  (       d  SOU R                   U R"                  S.UD6u  pUR$                  " / UQSP76 R'                  5       nU R)                  U5      nX4$ )Nr@   r$   r   r_   r   )rd   r   rV  r   r   rW  rX  r   updater   r   r   r[   r   r   r   rT  r   r   r   rY  )rZ   r   rV   rh   r  r   rs   r   r  
key_statesvalue_statesr   r   r   r   r   s                   r4   rn   EvollaAttention.forward  s~    $))#2.88b8$--8{{=166|DNNqRST[[/44\BLLQPQR
{{=166|DNNqRST&#7RU#[ &'6'='=jX\XfXf'g$J(?(M(MKK,,.E)
 %8	%
  $}}C$2H2HLL	%
 	%
! "));;;;FFHkk+.((r6   )rT  r[   r   r   rW  r   rS  rY  rV  r   rX  r   )rw   rx   ry   rz   r{   r%   r+   rD   r,   r   r   r
   r   r   rn   r|   r}   r~   s   @r4   rQ  rQ  e  s    G
| 
 
4 IM.2(,&)||&) #5<<#=>E&) t+	&)
 &) +,&) 
u||U\\)	*&) &)r6   rQ  c                     ^  \ rS rSrS\S\4U 4S jjr            SS\R                  S\	\R                  \R                  4   S-  S\R                  S-  S	\R                  S-  S
\S-  S\S-  S\R                  S-  S\R                  S-  S\R                  S-  S\R                  S-  S\R                  S-  S\R                  S-  S\R                  S-  S\R                  4S jjrSrU =r$ )EvollaDecoderLayeri  r[   r   c                   > [         TU ]  5         UR                  U l        [        XS9U l        [        U5      U l        [        UR                  UR                  S9U l	        [        UR                  UR                  S9U l
        US-   [        UR                  UR                  -  S5      -  S:X  a  [        UUR                  S9U l        g g )Nr[   r   r;   r$   r   )r  )rC   rD   rG   rQ  	self_attnr?  mlpr  rms_norm_epsinput_layernormpost_attention_layernormmaxr;  aligner_num_add_layersr  adapterrZ  s      r4   rD   EvollaDecoderLayer.__init__  s    !--(LV$,V-?-?VEXEXY(5f6H6HfNaNa(b%MS!9!9V=Z=Z!Z\]^^bcc>$*$6$6DL dr6   Nr   rV   rh   r?   r  	use_cacher  r  r  r  r  r  r  r   c                 
   UnU R                  U5      nU R                  " SUUUUUUS.UD6u  nnX-   nUnU R                  U5      nU R                  U5      nX-   n[	        U S5      (       a  U R                  UUUU	UU
UUS9nU$ )N)r   rh   r?   r  rn  rV   rl  )r  r  r  r  r  r  r  r  r[  )rh  re  ri  rf  r   rl  )rZ   r   rV   rh   r?   r  rn  r  r  r  r  r  r  r  r   r  r  s                    r4   rn   EvollaDecoderLayer.forward  s    " !,,];  >> 
')%+ 3
 
q !0 !55mD/ 04## LL*"3$7+ /#5%9- ) 	M r6   )rl  rG   rh  rf  ri  re  )NNNNFNNNNNNN)rw   rx   ry   rz   r%   r+   rD   r,   r   r   r  r
   r  rn   r|   r}   r~   s   @r4   rb  rb    sL   |  $ IM.204(,!&1537-12648.2/33||3 #5<<#=>E3 t+	3
 &&-3 3 $;3 !<<$.3 #\\D03 ||d*3 "LL4/3 $llT13 t+3 ,3  
!3 3r6   rb  c                      ^  \ rS rSr% \\S'   SrSr/ SQrS/r	Sr
SrSrSrSr\\S.r\R&                  " 5       U 4S	 j5       rS
rU =r$ )EvollaPreTrainedModeli  r[   r  T)rb  r!  r  r  r  F)r   rT  c                   > U R                   R                  n[        TU ]  U5        [	        U[
        5      (       ak  [        R                  " UR                  5        [        R                  " UR                  5        [        R                  " UR                  R                  5        g [	        U[        5      (       a!  [        R                  " UR                  SUS9  g g )Nr_   )r'  std)r[   initializer_rangerC   rW  r   r  rX  zeros_r  r  ones_r  r!  r  normal_r  )rZ   r   rt  r\   s      r4   rW  #EvollaPreTrainedModel._init_weights  s    kk++f%fABBKK--.KK(JJv,,334 ABBLLcs; Cr6   r[  )rw   rx   ry   rz   r%   r   base_model_prefixsupports_gradient_checkpointingr\  _skip_keys_device_placementr]  r^  r_  _can_compile_fullgraphr`  rb  rQ  ra  r,   r   rW  r|   r}   r~   s   @r4   rr  rr    sm    &*# $5"5 N!"'+%
 ]]_< <r6   rr  c                     ^  \ rS rSrS\4U 4S jjrS rS r\\	\
            SS\R                  S-  S\R                  S-  S	\R                  S-  S
\S-  S\R                  S-  S\S-  S\R                  S-  S\R                  S-  S\R                  S-  S\R                  S-  S\R                  S-  S\R                  S-  S\\-  4S jj5       5       5       rSrU =r$ )EvollaModeli  r[   c           
      8  > [         TU ]  U5        UR                  U l        UR                  U l        [
        R                  " U R                  UR                  U R                  5      U l        [        US9U l
        [
        R                  " [        UR                  5       Vs/ s H  n[        UUS9PM     sn5      U l        [!        UR                  UR"                  S9U l        ['        USS5      U l        [+        US9U l        U R/                  5         g s  snf )Nrf  rd  r;   r>  F)rC   rD   rH   r1   rF   r   rE   rG   embed_tokensr  protein_encoderr9  r:  r;  rb  r  r  rg  r  rQ   r>  r0  
rotary_embri  rZ  s      r4   rD   EvollaModel.__init__  s     !.. ++LL&:L:LdN^N^_36Bmm "'v'?'?!@
 "AI	 #!' "A
 "&"4"4&:M:MN	&-f6NPU&V#/v>s   #Dc                     U R                   $ r   r  rm  s    r4   rn   EvollaModel.get_input_embeddings(  s       r6   c                     Xl         g r   r  rq  s     r4   rr   EvollaModel.set_input_embeddings+  s    !r6   Nr0   rh   r?   r  ri   rn  protein_input_idsprotein_attention_maskstructure_feats	msa_featsr  r  r   c                    USL USL-  (       a  [        S5      eUc  U R                  U5      nU(       a  Uc  [        U R                  S9nUcU  Ub  UR	                  5       OSn[
        R                  " UR                  S   UR                  S9U-   nUR                  S5      nSnSnUbZ  UbW  U R                  UUS9nUR                  n[
        R                  " UR                  S   UR                  [
        R                  S9n[        U R                  UUUS	9nUnU R                  UUS
9nU R                    H  nU" U4UUUUUU	U
UUUUUS.UD6nM     U R#                  U5      n[%        UUS9nU$ )a  
protein_input_ids (torch.LongTensor):
    The input IDs for the protein sequence in structure-aware tokens. Should be of shape `(batch_size, protein_seq_length)` and type `torch.LongTensor`.
protein_attention_mask (torch.Tensor):
    The attention mask for the protein sequence. Should be of shape `(batch_size, protein_seq_length)` and type `torch.Tensor`.
structure_feats (torch.FloatTensor):
    The input IDs for purely structure-based features. Should be of shape `(batch_size, structure_seq_length, structure_feat_dim)` and type `torch.FloatTensor`. Dummy input for now.
msa_feats (torch.FloatTensor):
    The input IDs for purely MSA-based features. Should be of shape `(batch_size, msa_seq_length, msa_feat_dim)` and type `torch.FloatTensor`. Dummy input for now.
structure_batch_mask (torch.Tensor):
    The batch mask to decide which protein sequences are purely structure-based. Should be of shape `(batch_size)` and type `torch.Tensor`. Should be paired with `structure_feats`. Dummpy input for now.
msa_batch_mask (torch.Tensor):
    The batch mask to decide which protein sequences are purely MSA-based. Should be of shape `(batch_size)` and type `torch.Tensor`. Should be paired with `msa_feats`. Dummpy input for now.
Nz:You must specify exactly one of input_ids or inputs_embedsrf  r   r$   ru  rv  r   )r[   ri   rh   r  )r?   )rh   r?   r  rn  r  r  r  r  r  r  r  rV   )rA  r  )r   r  r   r[   get_seq_lengthr,   rS   rd   rq   rb   r  r  rw  r  r   r  r  r  r   )rZ   r0   rh   r?   r  ri   rn  r  r  r  r  r  r  r   past_seen_tokensprotein_featsr  protein_outputscausal_maskr   rV   decoder_layerr  s                          r4   rn   EvollaModel.forward.  s   B -t";<YZZ  --i8M0*$++>OCRC^==?de <<(;(;A(>}G[G[\_ooL'11!4L!(-C-O"22+5 3 O ,FFM!&!''*(//jj" );;')+	
 &"oom,oW![[M)*) /#"/$3'#5%9- .$7 M )$ 		-0(++
 r6   )r  r>  r  r  r1   r  r  rF   )NNNNNNNNNNNN)rw   rx   ry   rz   r%   rD   rn  rr  r   r!   r#   r,   r  r   r
   r   r  r   r   rn   r|   r}   r~   s   @r4   r  r    sf   | *!"  .2.204(,26!%596:48.248.2]##d*] t+] &&-	]
 ] ((4/] $;] !++d2] !&t 3] **T1] $$t+] $llT1] t+] 
(	(]    ]r6   r  c                   B  ^  \ rS rSrU 4S jrS rS r\\        SS\	R                  S-  S\	R                  S-  S\	R                  S-  S	\	R                  S-  S
\	R                  S-  S\	R                  S-  S\S-  S\\	R                  -  4S jj5       5       rSrU =r$ )EvollaForProteinText2Texti  c                    > [         TU ]  U5        [        U5      U l        UR                  U l        [
        R                  " UR                  U R                  SS9U l        U R                  5         g r  )
rC   rD   r  r  rF   r   r   rG   lm_headri  rY   s     r4   rD   "EvollaForProteinText2Text.__init__  sQ      (
 ++yy!3!3T__5Qr6   c                 6    U R                   R                  5       $ r   )r  rn  rm  s    r4   rn  .EvollaForProteinText2Text.get_input_embeddings  s    zz..00r6   c                 8    U R                   R                  U5      $ r   )r  rr  rq  s     r4   rr  .EvollaForProteinText2Text.set_input_embeddings  s    zz..u55r6   Nr0   rh   ri   labelsr  r  rn  logits_to_keepc	           
      l   U R                   " SUUUUUUS.U	D6n
U
R                  n[        U[        5      (       a  [	        U* S5      OUnU R                  USS2USS24   5      nSnUb  U R                  " SXU R                  S.U	D6n[        UUU
R                  U
R                  U
R                  S9nU$ )a|  
protein_input_ids (torch.LongTensor):
    The input IDs for the protein sequence. Should be of shape `(batch_size, protein_seq_length)` and type `torch.LongTensor`.
protein_attention_mask (torch.Tensor):
    The attention mask for the protein sequence. Should be of shape `(batch_size, protein_seq_length)` and type `torch.Tensor`.

Example:

```python
>>> from transformers import EvollaProcessor, EvollaForProteinText2Text
>>> model = EvollaForProteinText2Text.from_pretrained("westlake/Evolla-10B-hf")
>>> processor = EvollaProcessor.from_pretrained("westlake/Evolla-10B-hf")

>>> protein_information = {
    "aa_seq": "your amino acid sequence",
    "foldseek": "your foldseek sequence",
}
>>> question = "What is the function of this protein?"
>>> message = [
    {"role": "system", "content": "You are an AI expert that can answer any questions about protein."},
    {"role": "user", "content": question},
]

>>> inputs = processor(proteins=[protein_information], messages_list=[message], return_tensors="pt", padding="longest")
>>> outputs = model.generate(**inputs)

>>> print(processor.batch_decode(outputs, skip_special_tokens=True))
```)r0   rh   ri   r  r  rn  N)logitsr  rF   )lossr  r  r   rT  r[  )r  rA  r   r+   slicer  loss_functionrF   r   r  r   rT  )rZ   r0   rh   ri   r  r  r  rn  r  r   outputsr   slice_indicesr  r  
lm_outputss                   r4   rn   !EvollaForProteinText2Text.forward  s    T ,0:: ,
)'/#9,
 ,
  118B>SV8W8W~ot4]kmA}a,?@A%%iVtibhiD+#33!//))

 r6   )r  r  rF   )NNNNNNNr   )rw   rx   ry   rz   rD   rn  rr  r   r   r,   r  r   r   r  r+   rn   r|   r}   r~   s   @r4   r  r    s    16  .2.226*.596:!%-.B##d*B t+B ((4/	B
   4'B !++d2B !&t 3B $;B ell*B  Br6   r  )r  r  rr  )r$   )Nr_   )\r  collections.abcr   dataclassesr   typingr   r,   r    r   rX  activationsr	   cache_utilsr
   r   
generationr   integrationsr   r   r   masking_utilsr   r   modeling_layersr   modeling_outputsr   r   r   r   r   modeling_rope_utilsr   r   modeling_utilsr   r   processing_utilsr   utilsr   r   r   utils.genericr    r!   utils.output_capturingr"   r#   configuration_evollar%   r&   r5   Moduler8   r   r   r   r   re   r   r   r   r  r  r  r  r!  r7  rG  rQ  rc  r~  r  r  r  r  r  r  r0  r?  r+   rO  rQ  rb  rr  r  r  __all__r[  r6   r4   <module>r     sQ  *  $ !    & ! . ) f f J 9  L F & I I G E <4 ^=RYY ^=BA<")) A<H( *+B ,B@ !%II%<<% 
% <<	%
 LL4'% T\% % '(%8 )*K)		 K) +K)\
RYY 
BII <;ryy 
 
72 7tS")) SD  C/ C C01
!< 1
h7 		 7 tA		 A'.		 '.T <k <  <
299 
$k")) k\ Y'JBII J (J(><BII ><B		  	UU\\ 	U# 	U%,, 	U )*@)bii @) +@)FC3 CL <O < <D|' |~S 5 Sl Pr6   