
    Z j"                        S r SSKrSSKrSSKrSSKJr  SSKJr  SSKJr  SSK	J
r  SSKJr  SS	KJrJrJr  SS
KJr  SSKJr  SSKJr  SSKJrJrJr  SSKJr  SSKJrJ r J!r!  SSK"J#r#  \!RH                  " \%5      r& " S S\RN                  5      r( " S S\RN                  5      r) " S S\RN                  5      r* " S S\RN                  5      r+ " S S\RN                  5      r, " S S\RN                  5      r- " S S\RN                  5      r. " S  S!\5      r/\ " S" S#\5      5       r0 " S$ S%\05      r1 " S& S'\RN                  5      r2\" S(S)9 " S* S+\0\5      5       r3S+S#/r4g),zPyTorch Pop2Piano model.    N)nn)CrossEntropyLoss)GenerationConfig   )initialization)ACT2FN)CacheDynamicCacheEncoderDecoderCache)GenerationMixin)create_causal_mask)GradientCheckpointingLayer)BaseModelOutput)BaseModelOutputWithPastAndCrossAttentionsSeq2SeqLMOutput)PreTrainedModel)auto_docstringis_torchdynamo_compilinglogging   )Pop2PianoConfigc                   2   ^  \ rS rSrSU 4S jjrS rSrU =r$ )Pop2PianoLayerNorm)   c                    > [         TU ]  5         [        R                  " [        R
                  " U5      5      U l        X l        g)zZ
Construct a layernorm module in the Pop2Piano style. No bias and no subtraction of mean.
N)super__init__r   	Parametertorchonesweightvariance_epsilon)selfhidden_sizeeps	__class__s      ځ/root/GenerationalWealth/GenerationalWealth/venv/lib/python3.13/site-packages/transformers/models/pop2piano/modeling_pop2piano.pyr   Pop2PianoLayerNorm.__init__*   s/     	ll5::k#:; #    c                    UR                  [        R                  5      R                  S5      R	                  SSS9nU[        R
                  " X R                  -   5      -  nU R                  R                  [        R                  [        R                  4;   a%  UR                  U R                  R                  5      nU R                  U-  $ )N   T)keepdim)tor   float32powmeanrsqrtr"   r!   dtypefloat16bfloat16)r#   hidden_statesvariances      r'   forwardPop2PianoLayerNorm.forward2   s     !##EMM266q9>>r4>P%H?T?T4T(UU ;; ??),,T[[->->?M{{]**r)   )r"   r!   )gư>)__name__
__module____qualname____firstlineno__r   r8   __static_attributes____classcell__r&   s   @r'   r   r   )   s    $+ +r)   r   c                   6   ^  \ rS rSrS\4U 4S jjrS rSrU =r$ )Pop2PianoDenseActDenseC   configc                 X  > [         TU ]  5         [        R                  " UR                  UR
                  SS9U l        [        R                  " UR
                  UR                  SS9U l        [        R                  " UR                  5      U l
        [        UR                     U l        g NFbias)r   r   r   Lineard_modeld_ffwiwoDropoutdropout_ratedropoutr   dense_act_fnactr#   rD   r&   s     r'   r   Pop2PianoDenseActDense.__init__D   sn    ))FNNFKKeD))FKKeDzz&"5"56&--.r)   c                    U R                  U5      nU R                  U5      nU R                  U5      n[        U R                  R
                  [        R                  5      (       a  UR                  U R                  R
                  R                  :w  aa  U R                  R
                  R                  [        R                  :w  a/  UR                  U R                  R
                  R                  5      nU R	                  U5      nU$ N)rL   rR   rP   
isinstancerM   r!   r   Tensorr3   int8r.   )r#   r6   s     r'   r8   Pop2PianoDenseActDense.forwardK   s    ./]3tww~~u||44##tww~~';';;$$

2),,TWW^^-A-ABM.r)   )rR   rP   rL   rM   	r:   r;   r<   r=   r   r   r8   r>   r?   r@   s   @r'   rB   rB   C   s    / / r)   rB   c                   6   ^  \ rS rSrS\4U 4S jjrS rSrU =r$ )Pop2PianoDenseGatedActDenseZ   rD   c                   > [         TU ]  5         [        R                  " UR                  UR
                  SS9U l        [        R                  " UR                  UR
                  SS9U l        [        R                  " UR
                  UR                  SS9U l        [        R                  " UR                  5      U l        [        UR                     U l        g rF   )r   r   r   rI   rJ   rK   wi_0wi_1rM   rN   rO   rP   r   rQ   rR   rS   s     r'   r   $Pop2PianoDenseGatedActDense.__init__[   s    IIfnnfkkF	IIfnnfkkF	))FKKeDzz&"5"56&--.r)   c                 8   U R                  U R                  U5      5      nU R                  U5      nX#-  nU R                  U5      n[	        U R
                  R                  [        R                  5      (       a  UR                  U R
                  R                  R                  :w  aa  U R
                  R                  R                  [        R                  :w  a/  UR                  U R
                  R                  R                  5      nU R                  U5      nU$ rV   )rR   r`   ra   rP   rW   rM   r!   r   rX   r3   rY   r.   )r#   r6   hidden_geluhidden_linears       r'   r8   #Pop2PianoDenseGatedActDense.forwardc   s    hhtyy78		-0#3]3 tww~~u||44##tww~~';';;$$

2),,TWW^^-A-ABM.r)   )rR   rP   r`   ra   rM   r[   r@   s   @r'   r]   r]   Z   s    / / r)   r]   c                   6   ^  \ rS rSrS\4U 4S jjrS rSrU =r$ )Pop2PianoLayerFFx   rD   c                   > [         TU ]  5         UR                  (       a  [        U5      U l        O[        U5      U l        [        UR                  UR                  S9U l	        [        R                  " UR                  5      U l        g )Nr%   )r   r   is_gated_actr]   DenseReluDenserB   r   rJ   layer_norm_epsilon
layer_normr   rN   rO   rP   rS   s     r'   r   Pop2PianoLayerFF.__init__y   s_    "=f"ED"8"@D,V^^AZAZ[zz&"5"56r)   c                 p    U R                  U5      nU R                  U5      nXR                  U5      -   nU$ rV   )ro   rm   rP   )r#   r6   forwarded_statess      r'   r8   Pop2PianoLayerFF.forward   s;    ??=9../?@%5E(FFr)   )rm   rP   ro   r[   r@   s   @r'   rh   rh   x   s    7 7 r)   rh   c                   t   ^  \ rS rSr  S
S\S\S-  4U 4S jjjr\SS j5       rSS jr	     SS jr
S	rU =r$ )Pop2PianoAttention   NrD   	layer_idxc                   > [         TU ]  5         UR                  U l        X l        UR                  U l        UR
                  U l        UR                  U l        UR                  U l        UR                  U l
        UR                  U l        U R                  U R                  -  U l        X0l        Uc>  U R                  (       a-  [        R!                  SU R"                  R$                   S35        [&        R(                  " U R                  U R                  SS9U l        [&        R(                  " U R                  U R                  SS9U l        [&        R(                  " U R                  U R                  SS9U l        [&        R(                  " U R                  U R                  SS9U l        U R                  (       a0  [&        R2                  " U R                  U R                  5      U l        SU l        g )NzInstantiating a decoder z without passing `layer_idx` is not recommended and will to errors during the forward call, if caching is used. Please make sure to provide a `layer_idx` when creating this class.FrG   )r   r   
is_decoderhas_relative_attention_biasrelative_attention_num_bucketsrelative_attention_max_distancerJ   d_kvkey_value_proj_dim	num_headsn_headsrO   rP   	inner_dimrw   loggerwarning_oncer&   r:   r   rI   qkvo	Embeddingrelative_attention_biasgradient_checkpointingr#   rD   rz   rw   r&   s       r'   r   Pop2PianoAttention.__init__   se    	 +++F(.4.S.S+/5/U/U,~~"(++''**(?(??"*4>>+B+B*C D, , 4<<eD4<<eD4<<eD4>>4<<eD+++-<<8[8[]a]i]i+jD(&+#r)   c                 b   SnU(       aC  US-  nX@S:  R                  [        R                  5      U-  -  n[        R                  " U 5      n O,[        R                  " U [        R
                  " U 5      5      * n US-  nX:  nU[        R                  " U R                  5       U-  5      [        R                  " X5-  5      -  X%-
  -  R                  [        R                  5      -   n[        R                  " U[        R                  " XrS-
  5      5      nU[        R                  " X`U5      -  nU$ )aR  
Adapted from Mesh Tensorflow:
https://github.com/tensorflow/mesh/blob/0cb87fe07da627bf0b7e60475d59f95ed6b5be3d/mesh_tensorflow/transformer/transformer_layers.py#L593

Translate relative position to a bucket number for relative attention. The relative position is defined as
memory_position - query_position, i.e. the distance in tokens from the attending position to the attended-to
position. If bidirectional=False, then positive relative positions are invalid. We use smaller buckets for
small absolute relative_position and larger buckets for larger absolute relative_positions. All relative
positions >=max_distance map to the same bucket. All relative positions <=-max_distance map to the same bucket.
This should allow for more graceful generalization to longer sequences than the model has been trained on

Args:
    relative_position: an int32 Tensor
    bidirectional: a boolean - whether the attention is bidirectional
    num_buckets: an integer
    max_distance: an integer

Returns:
    a Tensor with the same shape as relative_position, containing int32 values in the range [0, num_buckets)
r   r+   r   )r.   r   longabsmin
zeros_likelogfloatmath	full_likewhere)relative_positionbidirectionalnum_bucketsmax_distancerelative_buckets	max_exactis_smallrelative_position_if_larges           r'   _relative_position_bucket,Pop2PianoAttention._relative_position_bucket   s   , AKQ!6 : :5:: F TT %		*; <!&+<e>N>NO`>a!b b  1$	$0 &/II'--/);<hh|/01&( "UZZ.	&"
 &+YY&8RbcTc(d&
" 	EKKE_``r)   c                    Uc   U R                   R                  R                  n[        R                  " U[        R
                  US9SS2S4   U-   n[        R                  " U[        R
                  US9SSS24   nXe-
  nU R                  UU R                  (       + U R                  U R                  S9nU R                  U5      n	U	R                  / SQ5      R                  S5      n	U	$ )z%Compute binned relative position biasN)r3   device)r   r   r   )r+   r   r   r   )r   r!   r   r   aranger   r   ry   r{   r|   permute	unsqueeze)
r#   query_length
key_lengthr   past_seen_tokenscontext_positionmemory_positionr   relative_position_bucketvaluess
             r'   compute_biasPop2PianoAttention.compute_bias   s    >1188??F <<EJJvVWXZ^W^_brr,,zFSTXZ[T[\+>#'#A#A#.;;==	 $B $
  --.FG	*44Q7r)   c                 :   UR                   SS n/ UQSPU R                  P7n	Ub  UR                  U R                  5      OSn
[	        U
[
        R                  5      (       a  U
R                  5       OU
n
USLnU R                  U5      R                  U	5      R                  SS5      nSn[	        U[        5      (       aF  UR                  R                  U R                  5      nU(       a  UR                  nOUR                  nOUnU(       a  UOUnU(       aQ  UbN  U(       aG  UR                   U R                     R"                  nUR                   U R                     R$                  nO/ UR                   SS QSPU R                  P7nU R'                  U5      R                  U5      R                  SS5      nU R)                  U5      R                  U5      R                  SS5      nUbU  UR+                  UUU R                  5      u  nnU(       a.  [	        U[        5      (       a  SUR                  U R                  '   [
        R,                  " UUR                  SS5      5      nUc  UR                   S	   nU R.                  (       dh  [
        R0                  " SUR                   S   US   U4UR2                  UR4                  S
9nU R6                  (       a  U R8                  (       a  SUl        OU R=                  US   UUR2                  U
S9nUb#  USS2SS2SS2SUR                   S	   24   nUU-   nUnUU-  n[>        R@                  RC                  URE                  5       SS9RG                  U5      n[>        R@                  RI                  UU RH                  U R8                  S9n[
        R,                  " UU5      nUR                  SS5      RK                  5       nURL                  " / UQSP76 nU RO                  U5      nUU4nU(       a  UU4-   nU$ )zp
Self-attention (if key_value_states is None) or attention over source sentence (provided by key_value_states).
Nr,   r   r   r+   FTr   )r   r3   )r   r   dim)ptraining)(shaper~   get_seq_lengthrw   rW   r   rX   cloner   view	transposer   
is_updatedgetcross_attention_cacheself_attention_cachelayerskeysr   r   r   updatematmulrz   zerosr   r3   r   r   requires_gradr   r   
functionalsoftmaxr   type_asrP   
contiguousreshaper   )r#   r6   maskkey_value_statesposition_biaspast_key_valuesoutput_attentionskwargsinput_shapehidden_shaper   is_cross_attentionquery_statesr   curr_past_key_valuescurrent_states
key_statesvalue_stateskv_shapescoresr   causal_maskposition_bias_maskedattn_weightsattn_outputoutputss                             r'   r8   Pop2PianoAttention.forward   s    $))#2.BBbB$*A*ABM\Mh?99$..Ino7ABRTYT`T`7a7a+113gw .T9vvm,11,?II!QO 
o':;;(3377GJ!'6'L'L$'6'K'K$#2 -?)]/"=*-44T^^DIIJ/66t~~FMMLP--cr2PBP8O8OPH/44X>HHANJ66.166x@JJ1aPL*+?+F+FzS_aeaoao+p(
L%*_FY*Z*ZAEO..t~~> lJ,@,@A,FG #))"-J33 %**1-{1~zJSYS`S`hnhtht! ..4==26M/ $ 1 1NJv}}Wg !2 ! "1a,Bj.>.>r.B,B#BC - ;,&& }},,V\\^,DLLVT}},,\T\\TXTaTa,bll<>!++Aq1<<>!));;;;ff[)./Gr)   )rJ   rP   r   rz   r   ry   r   r~   rw   r   r   r   r   r|   r{   r   FN)T       )Nr   )NNNNF)r:   r;   r<   r=   r   intr   staticmethodr   r   r8   r>   r?   r@   s   @r'   ru   ru      sc     %* $	 , , :	 ,  ,D -  - ^( [ [r)   ru   c                   N   ^  \ rS rSrSS\S-  4U 4S jjjr     SS jrSrU =r$ )	Pop2PianoLayerSelfAttentioniN  Nrw   c                    > [         TU ]  5         [        XUS9U l        [	        UR
                  UR                  S9U l        [        R                  " UR                  5      U l        g )Nrz   rw   rk   )r   r   ru   SelfAttentionr   rJ   rn   ro   r   rN   rO   rP   r   s       r'   r   $Pop2PianoLayerSelfAttention.__init__O  sQ    /W`
 -V^^AZAZ[zz&"5"56r)   c           	          U R                  U5      nU R                  UUUUUUS9n	XR                  U	S   5      -   nU4U	SS  -   n
U
$ )N)r   r   r   	use_cacher   r   r   )ro   r   rP   )r#   r6   attention_maskr   r   r   r   r   normed_hidden_statesattention_outputr   s              r'   r8   #Pop2PianoLayerSelfAttention.forwardW  sn      $}=-- '+/ . 
 &5Ea5H(II "%5ab%99r)   )r   rP   ro   r   )NNNFF	r:   r;   r<   r=   r   r   r8   r>   r?   r@   s   @r'   r   r   N  s4    7SSWZ 7 7  r)   r   c                   L   ^  \ rS rSrSS\S-  4U 4S jjjr    SS jrSrU =r$ )	Pop2PianoLayerCrossAttentionip  Nrw   c                    > [         TU ]  5         [        USUS9U l        [	        UR
                  UR                  S9U l        [        R                  " UR                  5      U l        g )NFr   rk   )r   r   ru   EncDecAttentionr   rJ   rn   ro   r   rN   rO   rP   )r#   rD   rw   r&   s      r'   r   %Pop2PianoLayerCrossAttention.__init__q  sO    1&V[gpq,V^^AZAZ[zz&"5"56r)   c           	          U R                  U5      nU R                  UUUUUUS9n	XR                  U	S   5      -   n
U
4U	SS  -   nU$ )N)r   r   r   r   r   r   r   )ro   r   rP   )r#   r6   r   r   r   r   r   r   r   r   layer_outputr   s               r'   r8   $Pop2PianoLayerCrossAttention.forwardw  sm      $}=// -'+/ 0 
 %||4DQ4G'HH/$4QR$88r)   )r   rP   ro   rV   )NNNFr   r@   s   @r'   r   r   p  s0    7#* 7 7  r)   r   c                   V   ^  \ rS rSrSS\S-  4U 4S jjjr         SS jrSrU =r$ )	Pop2PianoBlocki  Nrw   c                 l  > [         TU ]  5         UR                  U l        [        R                  " 5       U l        U R
                  R                  [        XUS95        U R                  (       a"  U R
                  R                  [        XS95        U R
                  R                  [        U5      5        g )Nr   )rw   )
r   r   ry   r   
ModuleListlayerappendr   r   rh   r   s       r'   r   Pop2PianoBlock.__init__  s     ++]]_


'[d	

 ??JJ:6WX

*623r)   c           	         U R                   S   " UUUUUU	S9nUS   nUSS  nUR                  [        R                  :X  a  [        R                  " [        R
                  " U5      R                  5       [        R                  " UR                  5      R                  S-
  [        R                  " UR                  5      R                  5      n[        R                  " X* US9nU R                  =(       a    US LnU(       a  U R                   S   " UUUUUU	S9nUS   nUR                  [        R                  :X  a  [        R                  " [        R
                  " U5      R                  5       [        R                  " UR                  5      R                  S-
  [        R                  " UR                  5      R                  5      n[        R                  " X* US9nUUSS  -   nU R                   S   " U5      nUR                  [        R                  :X  a  [        R                  " [        R
                  " U5      R                  5       [        R                  " UR                  5      R                  S-
  [        R                  " UR                  5      R                  5      n[        R                  " X* US9nU4nUU-   $ )Nr   )r   r   r   r   r   r   i  )r   max)r   r   r   r   r   r,   )r   r3   r   r4   r   isinfanyfinfor  clampry   )r#   r6   r   r   encoder_hidden_statesencoder_attention_maskencoder_decoder_position_biasr   r   r   return_dictr   self_attention_outputsattention_outputsclamp_valuedo_cross_attentioncross_attention_outputsr   s                     r'   r8   Pop2PianoBlock.forward  sM    "&A)'+/"
 /q12126 %--/++M*..0M//044t;M//044K
 "KK<[YM!__R1Fd1R&*jjm!65; /"3'# 4A6M ""emm3#kkKK.224KK 3 34884?KK 3 3488
 !&M|Q\ ] !24KAB4O O 

2}5 %--/++M*..0M//044t;M//044K
 "KK<[YM " ''	
r)   )ry   r   r   )	NNNNNNFFTr   r@   s   @r'   r   r     sB    4SSWZ 4 4" "#&*J
 J
r)   r   c                   p    \ rS rSr% \\S'   SrSrSrSr	S/r
S/r\R                  " 5       S	 5       rS
 rSrg)Pop2PianoPreTrainedModeli  rD   transformer)audioTFr   rM   c                    U R                   R                  n[        U[        5      (       a%  [        R
                  " UR                  US-  5        g[        U[        5      (       a.  [        R                  " UR                  R                  SUS-  S9  g[        U[        5      (       am  [        R                  " UR                  R                  SUS-  S9  [        US5      (       a.  [        R                  " UR                  R                  SUS-  S9  gg[        U[        5      (       GaA  [        R                  " UR                  R                  SX R                   R                   S-  -  S9  [        UR                  S5      (       aA  UR                  R"                  b*  [        R$                  " UR                  R"                  5        [        R                  " UR&                  R                  SX R                   R(                  S-  -  S9  [        UR&                  S5      (       aC  UR&                  R"                  b+  [        R$                  " UR&                  R"                  5        ggg[        U[*        5      (       Ga  [        R                  " UR,                  R                  SX R                   R                   S-  -  S9  [        UR,                  S5      (       aA  UR,                  R"                  b*  [        R$                  " UR,                  R"                  5        [        R                  " UR.                  R                  SX R                   R                   S-  -  S9  [        UR.                  S5      (       aA  UR.                  R"                  b*  [        R$                  " UR.                  R"                  5        [        R                  " UR&                  R                  SX R                   R(                  S-  -  S9  [        UR&                  S5      (       aC  UR&                  R"                  b+  [        R$                  " UR&                  R"                  5        ggg[        U[0        5      (       GaF  U R                   R                   nU R                   R2                  nU R                   R4                  n[        R                  " UR6                  R                  SX#U-  S-  -  S9  [        R                  " UR8                  R                  SX#S-  -  S9  [        R                  " UR:                  R                  SX#S-  -  S9  [        R                  " UR<                  R                  SX%U-  S-  -  S9  UR>                  (       a0  [        R                  " UR@                  R                  SX#S-  -  S9  ggg)zInitialize the weights      ?        )r1   stdlm_head      rH   N)!rD   initializer_factorrW   r   init	constant_r!   Pop2PianoConcatEmbeddingToMelnormal_	embedding!Pop2PianoForConditionalGenerationsharedhasattrr  rB   rL   rJ   rH   zeros_rM   rK   r]   r`   ra   ru   r}   r   r   r   r   r   rz   r   )r#   modulefactorrJ   r~   r   s         r'   _init_weights&Pop2PianoPreTrainedModel._init_weights  s    //f011NN6==&3,7 =>>LL))00sM ABBLL--CVc\Jvy))V^^22&3,O * 677LL))KKDWDW\`C`9abvyy&))fiinn.HFIINN+LL))KKDTDTY]C]9^_vyy&))fiinn.HFIINN+ /I) ;<<LL++#6kkFYFY^bEb;cdv{{F++0@0@0LFKK,,-LL++#6kkFYFY^bEb;cdv{{F++0@0@0LFKK,,-LL))KKDTDTY]C]9^_vyy&))fiinn.HFIINN+ /I) 233kk))G!%!1!1kk++GLLsM_C_dhBh8ijLLs4-8PQLLs4-8PQLLsM_C_dhBh8ij11V;;BBRXim\mRno 2 4r)   c                 :   U R                   R                  nU R                   R                  nUc  [        S5      eUR	                  UR
                  5      nUSS S24   R                  5       USSS 24'   X$S'   Uc  [        S5      eUR                  US:H  U5        U$ )Nzoself.model.config.decoder_start_token_id has to be defined. In Pop2Piano it is usually set to the pad_token_id..r,   r   ).r   z1self.model.config.pad_token_id has to be defined.)rD   decoder_start_token_idpad_token_id
ValueError	new_zerosr   r   masked_fill_)r#   	input_idsr*  r+  shifted_input_idss        r'   _shift_right%Pop2PianoPreTrainedModel._shift_right  s    !%!C!C{{//!) B  &//	@%.sCRCx%8%>%>%@#qr'"$:&!PQQ&&'8D'@,O  r)    N)r:   r;   r<   r=   r   __annotations__base_model_prefixoutput_modalitiessupports_gradient_checkpointing_can_compile_fullgraph_no_split_modules_keep_in_fp32_modulesr   no_gradr&  r1  r>   r3  r)   r'   r  r    sQ    %"&*#")*!F
]]_%p %pN!r)   r  c                   L   ^  \ rS rSrU 4S jrS r          SS jrSrU =r$ )Pop2PianoStacki4  c                   > [         TU ]  U5        [        R                  " UR                  UR
                  5      U l        UR                  U l        [        R                  " [        UR                  5       Vs/ s H  n[        U[        US:H  5      US9PM     sn5      U l        [        UR
                  UR                  S9U l        [        R"                  " UR$                  5      U l        U R)                  5         SU l        g s  snf )Nr   r   rk   F)r   r   r   r   
vocab_sizerJ   embed_tokensry   r   range
num_layersr   boolblockr   rn   final_layer_normrN   rO   rP   	post_initr   )r#   rD   ir&   s      r'   r   Pop2PianoStack.__init__6  s     LL):):FNNK ++]] v0011A v4Q<[\]1

 !36>>vG`G` azz&"5"56 	&+#s   9!Dc                     Xl         g rV   )r@  r#   new_embeddingss     r'   set_input_embeddings#Pop2PianoStack.set_input_embeddingsJ  s    *r)   c                 	   Ub  UOU R                   R                  nUb  UOU R                   R                  nU	b  U	OU R                   R                  n	U
b  U
OU R                   R                  n
Ub*  Ub'  U R
                  (       a  SOSn[        SU SU S35      eUb&  UR                  5       nUR                  SUS   5      nO>Ub  UR                  5       S S nO'U R
                  (       a  SOSn[        SU SU S	35      eU R                  (       a/  U R                  (       a  U(       a  [        R                  S
5        SnUc)  U R                  c  [        S5      eU R                  U5      nUu  pUSL a   U R
                  (       d  [        SU  S35      eU R
                  (       ah  U(       a`  Uc]  U R                   R                  (       a/  [        [!        U R                   S9[!        U R                   S95      nO'[!        U R                   S9nOU R
                  (       d  S nUb  UR#                  5       OSnUc4  [%        5       (       d%  UU-   n[&        R(                  " UUUR*                  S9nU R                   R
                  (       a  [-        U R                   UUUS9nOVUS S 2S S S S 24   nUR/                  UR0                  S9nSU-
  [&        R2                  " UR0                  5      R4                  -  nU R
                  (       aO  UbL  UR                  5       u  nnnUU4nUc  [&        R(                  " UUR*                  S9nU R7                  U5      nOS nU	(       a  SOS nU(       a  SOS nU(       a  U R
                  (       a  SOS nS nS nU R9                  U5      n[;        U R<                  5       H{  u  nnU	(       a  UU4-   nU" UUUUUUUUUS9	n U S   nU S   nU R
                  (       a  Ub  U U(       a  SOS   nU(       d  MV  UU S   4-   nU R
                  (       d  Mr  UU S   4-   nM}     U R?                  U5      nU R9                  U5      nU	(       a  UU4-   nU
(       d  [A        S UUUUU4 5       5      $ [C        UUUUUS9$ )Ndecoder_ zYou cannot specify both zinput_ids and zinputs_embeds at the same timer,   zYou have to specify either zinput_ids or inputs_embedszZ`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`...Fz<You have to initialize the model with valid token embeddingsTz)`use_cache` can only be set to `True` if z is used as a decoder)rD   r   r   )rD   rQ  r   r   )r3   r  r3  )r   r   r   r   r   r+      c              3   0   #    U  H  nUc  M  Uv   M     g 7frV   r3  ).0r   s     r'   	<genexpr>)Pop2PianoStack.forward.<locals>.<genexpr>  s"      
A  s   	)last_hidden_stater   r6   
attentionscross_attentions)"rD   r   r   output_hidden_statesr	  ry   r,  sizer   r   r   r   r   r@  is_encoder_decoderr   r
   r   r   r   r    r   r   r.   r3   r  r   invert_attention_maskrP   	enumeraterD  rE  tupler   )!r#   r/  r   r  r  rQ  r   r   r   r[  r	  r   err_msg_prefixr   
batch_size
seq_lengthpast_key_values_lengthmask_seq_lengthr   encoder_batch_sizeencoder_sequence_length_encoder_hidden_shapeencoder_extended_attention_maskall_hidden_statesall_attentionsall_cross_attentionsr   r  r6   rG  layer_modulelayer_outputss!                                    r'   r8   Pop2PianoStack.forwardM  s    "+!6IDKK<Q<Q	1B1N-TXT_T_TqTq$8$D $++JjJj 	 &1%<k$++BYBY ]%>+/??ZN*>*:.HXXvw  "#..*K!r;r?;I&',,.s3K+/??ZN:>:J-XfWggtuvv&&4==##p "	   ( !_`` --i8M!,
?? #LTFRg!hii??_4;;11&9$DKK8,dkk:Z'O '3$++&FO #OETE`!?!?!Afg!*B*D*D4zAO"ZZ
OML`L`aN;;!!,{{+- /	K )D$)9:K%..}/B/B.CK,M<O<O0P0T0TTK ??4@=R=W=W=Y: 7$68O#P %-).4HQ^QeQe)f&.2.H.HI_.`+.2+"6BD0d&7DOOrRV(,%]3(4OA|#$58H$H!(%/- /#"3
M *!,M
 *!,M#8#D0=CTaZ[0\-  !/=3C2E!E???+?=QRCSBU+U(9  5< --m<]3   1]4D D 
 "#%"(
 
 
 9+++%1
 	
r)   )rD  rP   r@  rE  r   ry   )
NNNNNNNNNN)	r:   r;   r<   r=   r   rL  r8   r>   r?   r@   s   @r'   r=  r=  4  s9    ,(+
 "#!^
 ^
r)   r=  c                   2   ^  \ rS rSrSrU 4S jrS rSrU =r$ )r  i  z'Embedding Matrix for `composer` tokens.c                 ~   > [         TU ]  5         [        R                  " UR                  UR
                  S9U l        g )N)num_embeddingsembedding_dim)r   r   r   r   composer_vocab_sizerJ   r  rS   s     r'   r   &Pop2PianoConcatEmbeddingToMel.__init__  s-    V5O5O_e_m_mnr)   c                 z    X#-
  nU R                  U5      R                  S5      n[        R                  " XQ/SS9nU$ )Nr   r   )r  r   r   cat)r#   featureindex_valueembedding_offsetindex_shiftedcomposer_embeddingrQ  s          r'   r8   %Pop2PianoConcatEmbeddingToMel.forward  s>    #6!^^M:DDQG		#5"?QGr)   )r  )	r:   r;   r<   r=   __doc__r   r8   r>   r?   r@   s   @r'   r  r    s    1o r)   r  zA
    Pop2Piano Model with a `language modeling` head on top.
    )custom_introc            !         ^  \ rS rSrSSS.rS\4U 4S jjrS rS r SS	\	R                  S
\S\S\	R                  S-  4S jjr\              S S\	R                  S-  S\	R                  S-  S\	R                  S-  S\	R                   S-  S\\\	R$                        S-  S\S-  S\	R                  S-  S	\	R                  S-  S\	R                  S-  S\	R                  S-  S\S-  S\S-  S\S-  S\S-  S\\	R                     \-  4S jj5       r\	R.                  " 5          S!U 4S jj5       rS\	R$                  4S jrSrU =r$ )"r   i  zshared.weight)zencoder.embed_tokens.weightzdecoder.embed_tokens.weightrD   c                 4  > [         TU ]  U5        Xl        UR                  U l        [
        R                  " UR                  UR                  5      U l        [        U5      U l
        [        R                  " U5      nSUl        SUl        [        U5      U l        [        R                  " U5      nSUl        UR"                  Ul        [        U5      U l        [
        R(                  " UR                  UR                  SS9U l        U R-                  5         g )NFTrG   )r   r   rD   rJ   	model_dimr   r   r?  r!  r  mel_conditionercopydeepcopyry   r   r=  encodernum_decoder_layersrB  decoderrI   r  rF  )r#   rD   encoder_configdecoder_configr&   s       r'   r   *Pop2PianoForConditionalGeneration.__init__  s     ll6#4#4fnnE<VDv.$)!#( %n5v.$(!$*$=$=!%n5yy1B1BO 	r)   c                     U R                   $ rV   )r!  )r#   s    r'   get_input_embeddings6Pop2PianoForConditionalGeneration.get_input_embeddings   s    {{r)   c                 |    Xl         U R                  R                  U5        U R                  R                  U5        g rV   )r!  r  rL  r  rJ  s     r'   rL  6Pop2PianoForConditionalGeneration.set_input_embeddings#  s+    $)).9)).9r)   Ninput_featurescomposergeneration_configr   c                    UR                   nX%;  a(  [        S[        UR                  5       5       SU 35      eXR   n[        R
                  " X`R                  S9nUR                  UR                  S   5      n[        UR                  5       5      nU R                  UUUS9nUbK  SXSS2S4   R                  5       ) '   [        R                  " USS2S4   R                  SS	5      U/S	S
9nX4$ US4$ )ak  
This method is used to concatenate mel conditioner tokens at the front of the input_features in order to
control the type of MIDI token generated by the model.

Args:
    input_features (`torch.FloatTensor` of shape `(batch_size, sequence_length, hidden_size)`):
        input features extracted from the feature extractor.
    composer (`str`):
        composer token which determines the type of MIDI tokens to be generated.
    generation_config (`~generation.GenerationConfig`):
        The generation is used to get the composer-feature_token pair.
    attention_mask (``, *optional*):
        For batched generation `input_features` are padded to have the same shape across all examples.
        `attention_mask` helps to determine which areas were padded and which were not.
        - 1 for tokens that are **not padded**,
        - 0 for tokens that are **padded**.
zPlease choose a composer from z. Composer received - rR  r   )ry  rz  r{  Nr  r,   r   )axis)composer_to_feature_tokenr,  listr   r   tensorr   repeatr   r   r   r  rC  concatenater   )r#   r  r  r  r   r  composer_valuer{  s           r'   get_mel_conditioner_outputs=Pop2PianoForConditionalGeneration.get_mel_conditioner_outputs(  s   0 %6$O$O!406O6T6T6V1W0XXnownxy  3<n[[I'..~/C/CA/FG8??AB--"&- . 

 %;>N1a4055778 #..q!t0D0I0I"a0PR`/ahijN!11t##r)   r/  decoder_input_idsdecoder_attention_maskencoder_outputsr   rQ  decoder_inputs_embedslabelsr   r   r[  r	  returnc                    Ub  UOU R                   R                  nUb  UOU R                   R                  nUb  Ub  [        S5      eUb  Uc  UnUc  U R	                  UUUUUUS9nORU(       aK  [        U[        5      (       d6  [        US   [        U5      S:  a  US   OS[        U5      S:  a  US   OSS9nUS   nU
b  Uc  U	c  U R                  U
5      nU R                  UUU	UUUUUUUS9
nUS   nU R                   R                  (       a  UU R                  S	-  -  nU R                  U5      nSnU
b@  [        S
S9nU" UR                  SUR                  S5      5      U
R                  S5      5      nU(       d  U4USS -   U-   nUb  U4U-   $ U$ [!        UUUR"                  UR$                  UR&                  UR(                  UR*                  UR$                  UR&                  S9	$ )a  
input_ids (`torch.LongTensor` of shape `(batch_size, sequence_length)`):
    Indices of input sequence tokens in the vocabulary. Pop2Piano is a model with relative position embeddings
    so you should be able to pad the inputs on both the right and the left. Indices can be obtained using
    [`AutoTokenizer`]. See [`PreTrainedTokenizer.encode`] and [`PreTrainedTokenizer.__call__`] for detail.
    [What are input IDs?](../glossary#input-ids) To know more on how to prepare `input_ids` for pretraining
    take a look a [Pop2Piano Training](./Pop2Piano#training).
decoder_input_ids (`torch.LongTensor` of shape `(batch_size, target_sequence_length)`, *optional*):
    Indices of decoder input sequence tokens in the vocabulary. Indices can be obtained using
    [`AutoTokenizer`]. See [`PreTrainedTokenizer.encode`] and [`PreTrainedTokenizer.__call__`] for details.
    [What are decoder input IDs?](../glossary#decoder-input-ids) Pop2Piano uses the `pad_token_id` as the
    starting token for `decoder_input_ids` generation. If `past_key_values` is used, optionally only the last
    `decoder_input_ids` have to be input (see `past_key_values`). To know more on how to prepare
decoder_attention_mask (`torch.BoolTensor` of shape `(batch_size, target_sequence_length)`, *optional*):
    Default behavior: generate a tensor that ignores pad tokens in `decoder_input_ids`. Causal mask will also
    be used by default.
labels (`torch.LongTensor` of shape `(batch_size,)`, *optional*):
    Labels for computing the sequence classification/regression loss. Indices should be in `[-100, 0, ...,
    config.vocab_size - 1]`. All labels set to `-100` are ignored (masked), the loss is only computed for
    labels in `[0, ..., config.vocab_size]`
NzSBoth `inputs_embeds` and `input_features` received! Please provide only one of them)r/  r   rQ  r   r[  r	  r   r   r+   )rX  r6   rY  )
r/  r   rQ  r   r  r  r   r   r[  r	  r  r)  )ignore_indexr,   )	losslogitsr   decoder_hidden_statesdecoder_attentionsrZ  encoder_last_hidden_stater  encoder_attentions)rD   r   r	  r,  r  rW   r   lenr1  r  tie_word_embeddingsr  r  r   r   r\  r   r   r6   rY  rZ  rX  )r#   r/  r   r  r  r  r   rQ  r  r  r  r   r   r[  r	  r   r6   decoder_outputssequence_output	lm_logitsr  loss_fctoutputs                          r'   r8   )Pop2PianoForConditionalGeneration.forwardY  s3   P "+!6IDKK<Q<Q	%0%<k$++BYBY$)Crss'M,A*M ""ll#-+"3%9' + O O_!M!M-"1!"4474H14Loa0RV14_1E1I?1-tO (*"3";@U@] $ 1 1& 9 ,,'1/+"/#1/!5# ' 
 *!,;;**-1EFOLL1	'T:HINN2y~~b/ABFKKPROTD\OAB$77/IF)-)9TGf$EvE+;;"1"?"?.99,==&5&G&G"1"?"?.99

 
	
r)   c                   > Uc  U R                   nUR                  " S	0 UD6  [        US5      (       d  [        S5      e[	        UR
                  5      U R                  R                  :w  a9  [        SU R                  R                   S[	        UR
                  5       S35      eU R                  UUUUS9u  p[        TU ](  " S	SUUUS.UD6$ )
aV  
Generates token ids for midi outputs.

<Tip warning={true}>

Most generation-controlling parameters are set in `generation_config` which, if not passed, will be set to the
model's default generation configuration. You can override any `generation_config` by passing the corresponding
parameters to generate(), e.g. `.generate(inputs, num_beams=4, do_sample=True)`. For an overview of generation
strategies and code examples, check out the [following guide](./generation_strategies).

</Tip>

Parameters:
    input_features (`torch.FloatTensor` of shape `(batch_size, sequence_length, hidden_size)`, *optional*):
        This is the featurized version of audio generated by `Pop2PianoFeatureExtractor`.
    attention_mask:
        For batched generation `input_features` are padded to have the same shape across all examples.
        `attention_mask` helps to determine which areas were padded and which were not.
        - 1 for tokens that are **not padded**,
        - 0 for tokens that are **padded**.
    composer (`str`, *optional*, defaults to `"composer1"`):
        This value is passed to `Pop2PianoConcatEmbeddingToMel` to generate different embeddings for each
        `"composer"`. Please make sure that the composer value is present in `composer_to_feature_token` in
        `generation_config`. For an example please see
        https://huggingface.co/sweetcocoa/pop2piano/blob/main/generation_config.json .
    generation_config (`~generation.GenerationConfig`, *optional*):
        The generation configuration to be used as base parametrization for the generation call. `**kwargs`
        passed to generate matching the attributes of `generation_config` will override them. If
        `generation_config` is not provided, the default will be used, which had the following loading
        priority: 1) from the `generation_config.json` model file, if it exists; 2) from the model
        configuration. Please note that unspecified parameters will inherit [`~generation.GenerationConfig`]'s
        default values, whose documentation should be checked to parameterize generation.
    kwargs:
        Ad hoc parametrization of `generate_config` and/or additional model-specific kwargs that will be
        forwarded to the `forward` function of the model. If the model is an encoder-decoder model, encoder
        specific kwargs should not be prefixed and decoder specific kwargs should be prefixed with *decoder_*.
Return:
    [`~utils.ModelOutput`] or `torch.LongTensor`: A [`~utils.ModelOutput`] (if `return_dict_in_generate=True`
    or when `config.return_dict_in_generate=True`) or a `torch.FloatTensor`.
        Since Pop2Piano is an encoder-decoder model (`model.config.is_encoder_decoder=True`), the possible
        [`~utils.ModelOutput`] types are:
            - [`~generation.GenerateEncoderDecoderOutput`],
            - [`~generation.GenerateBeamEncoderDecoderOutput`]
Nr  z`composer_to_feature_token` was not found! Please refer to https://huggingface.co/sweetcocoa/pop2piano/blob/main/generation_config.jsonand parse a dict like that.ztconfig.composer_vocab_size must be same as the number of keys in generation_config.composer_to_feature_token! Found z vs .)r  r   r  r  )inputsrQ  r   r  r3  )r  r   r"  r,  r  r  rD   ru  r  r   generate)r#   r  r   r  r  r   r&   s         r'   r  *Pop2PianoForConditionalGeneration.generate  s    l $ $ 6 6  *6* (*EFF.   ::;t{{?^?^^889cBSBmBm>n=oopr  *.)I)I))/	 *J *
& w 
()/	

 
 	
r)   c                 $    U R                  U5      $ rV   )r1  )r#   r  s     r'   %prepare_decoder_input_ids_from_labelsGPop2PianoForConditionalGeneration.prepare_decoder_input_ids_from_labels%  s      ((r)   )rD   r  r  r  r  r  r!  rV   )NNNNNNNNNNNNNN)N	composer1N)r:   r;   r<   r=   _tied_weights_keysr   r   r  rL  r   FloatTensorstrr   r  r   
LongTensor
BoolTensorr`  rX   r	   rC  r   r8   r;  r  r  r>   r?   r@   s   @r'   r   r     s    (7'6
 2: 48/$))/$ /$ ,	/$
 ))D0/$b  .23759:>=A(,2637:>*.!%)-,0#'o
##d*o
 ))D0o
 !++d2	o

 !& 0 04 7o
 uU\\23d:o
 o
 ((4/o
 ))D0o
  %0047o
   4'o
 $;o
  $;o
 #Tko
 D[o
" 
u  	!O	3#o
 o
b ]]_ W
 W
r)ELL ) )r)   r   )5r  r  r   r   r   torch.nnr   transformers.generationr   rP  r   r  activationsr   cache_utilsr	   r
   r   
generationr   masking_utilsr   modeling_layersr   modeling_outputsr   r   r   modeling_utilsr   utilsr   r   r   configuration_pop2pianor   
get_loggerr:   r   Moduler   rB   r]   rh   ru   r   r   r   r  r=  r  r   __all__r3  r)   r'   <module>r     sR        % 4 & ! C C ) / 9 k k - F F 4 
		H	%+ +4RYY .")) <ryy & F")) D299 @Y
/ Y
x D! D! D!Nw
- w
tBII  
e)(@/ e)
e)P	 /0J
Kr)   