
    Z j                     N   S r SSKJr  SSKrSSKrSSKJr  SSKJrJ	r	J
r
  SSKJr  SSKJr  SS	KJrJrJrJrJrJr  SS
KJr  SSKJrJrJr  SSKJr  \R>                  " \ 5      r!Sr" " S S\RF                  5      r$ " S S\RF                  5      r%S\RL                  S\'S\'S\RL                  4S jr( " S S\RF                  5      r) " S S\RF                  5      r* " S S\RF                  5      r+ " S S\RF                  5      r, SFS \RL                  S!\'S"\'S#\-S$\-S\RL                  4S% jjr. " S& S'\RF                  5      r/ " S( S)\RF                  5      r0\ " S* S+\5      5       r1 " S, S-\RF                  5      r2\" S.S/9\ " S0 S1\5      5       5       r3\" S2S/9 " S3 S4\15      5       r4\ " S5 S6\15      5       r5\" S7S/9 " S8 S9\15      5       r6\ " S: S;\15      5       r7\" S<S/9 " S= S>\15      5       r8\ " S? S@\15      5       r9\ " SA SB\15      5       r:\ " SC SD\15      5       r;/ SEQr<g)Gz!PyTorch Funnel Transformer model.    )	dataclassN)nn)BCEWithLogitsLossCrossEntropyLossMSELoss   )initialization)ACT2FN)BaseModelOutputMaskedLMOutputMultipleChoiceModelOutputQuestionAnsweringModelOutputSequenceClassifierOutputTokenClassifierOutput)PreTrainedModel)ModelOutputauto_docstringlogging   )FunnelConfigg    .Ac                      ^  \ rS rSrS\SS4U 4S jjr S
S\R                  S-  S\R                  S-  S\R                  4S jjrS	r	U =r
$ )FunnelEmbeddings,   configreturnNc                 :  > [         TU ]  5         [        R                  " UR                  UR
                  UR                  S9U l        [        R                  " UR                  UR                  S9U l        [        R                  " UR                  5      U l        g )N)padding_idxeps)super__init__r   	Embedding
vocab_sizehidden_sizepad_token_idword_embeddings	LayerNormd_modellayer_norm_eps
layer_normDropouthidden_dropoutdropoutselfr   	__class__s     {/root/GenerationalWealth/GenerationalWealth/venv/lib/python3.13/site-packages/transformers/models/funnel/modeling_funnel.pyr!   FunnelEmbeddings.__init__-   sh    !||F,=,=v?Q?Q_e_r_rs,,v~~6;P;PQzz&"7"78    	input_idsinputs_embedsc                 r    Uc  U R                  U5      nU R                  U5      nU R                  U5      nU$ N)r&   r*   r-   )r/   r4   r5   
embeddingss       r1   forwardFunnelEmbeddings.forward3   s<       00;M__]3
\\*-
r3   )r-   r*   r&   NN__name__
__module____qualname____firstlineno__r   r!   torchTensorr9   __static_attributes____classcell__r0   s   @r1   r   r   ,   sV    9| 9 9 [_,DILLSWDW	 r3   r   c                     ^  \ rS rSr% SrSr\\S'   S\SS4U 4S jjr	  S"S	\
R                  S
\
R                  S-  S\
R                  S-  S\\
R                     4S jjrS\
R                  S\
R                  4S jrS\S\
R                  S\
R                   S\\
R                     \\\
R                        -  4S jrS\
R                  S\4S jrS#S\
R                  S\S\S\
R                  4S jjrS\
R                  \\
R                     -  \\
R                     -  S\\\   -  \\   -  S\
R                  4S jr S$S\
R                  \\
R                     -  \\
R                     -  S\S\S\
R                  4S jjrS\\
R                     S\\
R                  \\
R                     4   4S jrS\\
R                     S\\
R                     4S  jrS!rU =r$ )%FunnelAttentionStructure=   z6
Contains helpers for `FunnelRelMultiheadAttention `.
   cls_token_type_idr   r   Nc                    > [         TU ]  5         Xl        [        R                  " UR
                  5      U l        [        R                  " UR
                  5      U l        S U l        g r7   )	r    r!   r   r   r+   r,   sin_dropoutcos_dropoutpooling_multr.   s     r1   r!   !FunnelAttentionStructure.__init__D   sK    ::f&;&;<::f&;&;< !r3   r5   attention_masktoken_type_idsc                 h   SU l         UR                  S5      =U l        nU R                  XAR                  UR
                  5      nUb  U R                  U5      OSnU R                  R                  (       a7  [        R                  R                  UR                  US-
  US-
  /5      S5      OSnXVX'4$ )zCReturns the attention inputs associated to the inputs of the model.r   N)r   r   r   r   )rN   sizeseq_lenget_position_embedsdtypedevicetoken_type_ids_to_matr   separate_clsr   
functionalpadnew_ones)r/   r5   rP   rQ   rT   position_embedstoken_type_matcls_masks           r1   init_attention_inputs.FunnelAttentionStructure.init_attention_inputsM   s     !.!3!3A!66w227<O<OQ^QeQefGUGa33NCgk {{'' MMm44gk7Q;5OPR^_ 	
  JJr3   c                 ~    USS2SS2S4   USS2S4   :H  nXR                   :H  nUSS2SS2S4   USS2S4   -  nXB-  $ )z-Convert `token_type_ids` to `token_type_mat`.N)rJ   )r/   rQ   r^   cls_idscls_mats        r1   rX   .FunnelAttentionStructure.token_type_ids_to_mata   sU    '1d
3~ag7NN $:$::!Q*%4(88''r3   rT   rV   rW   c                 R   U R                   R                  nU R                   R                  S:X  Ga6  [        R                  " SUS[        R
                  US9R                  U5      n[        R                  " SUS-  S[        R
                  US9R                  U5      nSSXdS-  -  -  -  nUSS2S4   US   -  n[        R                  " U5      n	U R                  U	5      n
[        R                  " U5      nU R                  U5      n[        R                  " X/S	S
9n[        R                  " X/S	S
9n[        R                  " X/S	S
9n[        R                  " U	* U/S	S
9nXUU4$ [        R                  " SUS-  S[        R
                  US9R                  U5      nSSXdS-  -  -  -  n[        R                  " U* S-  US-  S[        R
                  US9R                  U5      nUS-  nUSS2S4   US   -  nU R                  [        R                  " U5      5      n	U R                  [        R                  " U5      5      n[        R                  " X/S	S
9n[        R                  " SU[        R
                  US9R                  U5      nUn/ n[        SU R                   R                  5       H  nUS:X  a  SnOqU R                  UU5      nSUS-
  -  nU R                  UUUSS9nUSS2S4   U-   nUR!                  UR#                  S5      U5      n[        R$                  " USU5      nUnSU-  nU R                  UU5      nUSS2S4   U-   nUR!                  UR#                  S5      U5      n[        R$                  " USU5      nUR'                  UU/5        M     U$ )a  
Create and cache inputs related to relative position encoding. Those are very different depending on whether we
are using the factorized or the relative shift attention:

For the factorized attention, it returns the matrices (phi, pi, psi, omega) used in the paper, appendix A.2.2,
final formula.

For the relative shift attention, it returns all possible vectors R used in the paper, appendix A.2.1, final
formula.

Paper link: https://huggingface.co/papers/2006.03236

factorizedr         ?rV   rW   rI   r   i'  Ndim)shift)r   r(   attention_typerA   arangeint64tosinrL   cosrM   catrange
num_blocksstride_pool_posrelative_posexpandrS   gatherappend)r/   rT   rV   rW   r(   pos_seqfreq_seqinv_freqsinusoid	sin_embedsin_embed_d	cos_embedcos_embed_dphipsipiomega
rel_pos_idzero_offset	pos_embedpos
pooled_posposition_embeds_listblock_indexposition_embeds_poolingstriderel_posposition_embeds_no_poolings                               r1   rU   ,FunnelAttentionStructure.get_position_embedsi   sh    ++%%;;%%5 ll1gs%++fUXXY^_G||Aw!|STZ[^^_deHEhQ,&?@AHq$w'(4.8H		(+I**95K		(+I**95K))[6B?C))Y2;CK52>BII	z952>ES%(( ||Aw!|STZ[^^_deHEhQ,&?@AHwhlGaKEKK`fgjjkpqJ!A+K!!T'*Xd^;H((8)<=I((8)<=I		9"8bAI,,q'VLOOPUVCJ#% $Q(>(>? !#.2+!%!5!5c;!GJ ;?3F"//VZq/QG%ag.<G%nnW\\!_gFG.3ll9a.Q+ !K++C8!!T'*[8!..a'B-2\\)Q-P*$++-GI`,ab9  @: ('r3   pos_idr   c                     U R                   R                  (       a\  UR                  SU-  * S-   /5      nU R                   R                  (       a  USS OUSS n[        R
                  " X4SSS2   /S5      $ USSS2   $ )zU
Pool `pos_id` while keeping the cls token separate (if `config.separate_cls=True`).
rI   r   rj   Nr   )r   rY   
new_tensortruncate_seqrA   rt   )r/   r   r   cls_pospooled_pos_ids        r1   rw   (FunnelAttentionStructure.stride_pool_pos   s~     ;;##
 ''1k>):Q)>(?@G,0KK,D,DF1RL&QRQS*M99gSqS'9:A>>#A#;r3   r   r   rm   c                     Uc  UnUS   US   -
  nU[        U5      -  nXVU-  -   nUS   US   -
  n[        R                  " XxS-
  U* [        R                  UR                  S9$ )zF
Build the relative positional vector between `pos` and `pooled_pos`.
r   rj   r   ri   )lenrA   ro   longrW   )	r/   r   r   r   rm   	ref_point
num_removemax_distmin_dists	            r1   rx   %FunnelAttentionStructure.relative_pos   st     JqMCF*	S_,
F22a=3r7*||HlVG5::VYV`V`aar3   tensoraxisc                   ^ ^ Uc  g[        T[        [        45      (       a  T H  nT R                  X5      nM     U$ [        U[        [        45      (       a  [	        U5      " UU 4S jU 5       5      $ TUR
                  -  mT R                  R                  (       a(  T R                  R                  (       a  [        SSS5      O[        SSS5      n[        [        S5      /T-  U/-   5      nT R                  R                  (       a?  [        [        S5      /T-  [        SS5      /-   5      n[        R                  " X   U/TS9nX   $ )zD
Perform pooling by stride slicing the tensor along the given axis.
Nc              3   H   >#    U  H  nTR                  UT5      v   M     g 7fr7   )stride_pool).0xr   r/   s     r1   	<genexpr>7FunnelAttentionStructure.stride_pool.<locals>.<genexpr>   s!     J6a 0 0D 9 96s   "rj   rI   r   )r   )
isinstancelisttupler   typendimr   rY   r   slicerA   rt   )r/   r   r   ax
axis_slice	enc_slice	cls_slices   ` `    r1   r   $FunnelAttentionStructure.stride_pool   s     > dT5M**))&5 M fudm,,<J6JJJ 	 #'++":":t{{?W?WE$A]bcgimop]q 	 5;-$.*=>	;;##uT{md2eD!n5EEFIYY 16:FF  r3   modec                   ^ ^^^ Tc  g[        T[        [        45      (       a  [        T5      " UU UU4S jT 5       5      $ T R                  R
                  (       aH  T R                  R                  (       a  TSS2SS24   OTn[        R                  " TSS2SS24   U/SS9mTR                  nUS:X  a  TSS2SSS2S4   mOUS:X  a  TSS2SSS2SS24   mTS4mTS:X  a!  [        R                  R                  TTTS	S
9mO[TS:X  a!  [        R                  R                  TTTS	S
9mO4TS:X  a#  [        R                  R                  T* TTS	S
9* mO[        S5      eUS:X  a  TSS2SSS2S4   $ US:X  a	  TSS2S4   $ T$ )z3Apply 1D pooling to a tensor of size [B x T (x H)].Nc              3   F   >#    U  H  nTR                  TTTS 9v   M     g7f))r   r   N)pool_tensor)r   r   r   r/   r   r   s     r1   r   7FunnelAttentionStructure.pool_tensor.<locals>.<genexpr>   s&     c\bWX 0 0d6 0 R\bs   !rj   r   rk   rI   r   meanT)r   	ceil_modemaxminz0The supported modes are 'mean', 'max' and 'min'.r   )r   r   r   r   r   rY   r   rA   rt   r   r   rZ   
avg_pool2d
max_pool2dNotImplementedError)r/   r   r   r   suffixr   s   ````  r1   r   $FunnelAttentionStructure.pool_tensor   s|    > fudm,,<c\bccc;;##'+{{'?'?VAssF^VFYYq"1"uv6A>F{{19AtQ,-FQYAtQM*F!6>]]--ffVW[-\FU]]]--ffVW[-\FU]mm..wvY].^^F%&XYY19!Q1*%%QY!Q$<r3   attention_inputsc                    Uu  p4pVU R                   R                  (       a}  U R                   R                  S:X  a  U R                  USS S5      USS -   nU R                  US5      nU R                  US5      nU R	                  XR                   R
                  S9nOU =R                  S-  sl        U R                   R                  S:X  a  U R                  US5      nU R                  USS/5      nU R                  USS/5      nU R	                  USS9nU R	                  XR                   R
                  S9nX4XV4nX4$ )zTPool `output` and the proper parts of `attention_inputs` before the attention layer.rg   NrI   r   r   r   r   )r   pool_q_onlyrn   r   r   pooling_typerN   )r/   outputr   r]   r^   rP   r_   s          r1   pre_attention_pooling.FunnelAttentionStructure.pre_attention_pooling  s>    EUA;;""{{))\9"&"2"2?2A3F"J_]^]_M`"`!--na@N''!4H%%f;;3K3K%LF"{{))\9"&"2"2?A"F!--nq!fEN''1a&9H!--n5-IN%%f;;3K3K%LF+^V''r3   c                 J   Uu  p#pEU R                   R                  (       a~  U =R                  S-  sl        U R                   R                  S:X  a  USS U R	                  USS S5      -   nU R	                  US5      nU R	                  US5      nU R                  USS9nX#XE4nU$ )zFPool the proper parts of `attention_inputs` after the attention layer.rI   rg   Nr   r   r   r   )r   r   rN   rn   r   r   )r/   r   r]   r^   rP   r_   s         r1   post_attention_pooling/FunnelAttentionStructure.post_attention_pooling3  s    DTA;;"""{{))\9"1"1"58H8HYZY[I\^_8`"`!--na@N''!4H!--n5-IN+^Vr3   )r   rM   rN   rT   rL   r;   Nr   )r   rI   )r=   r>   r?   r@   __doc__rJ   int__annotations__r   r!   rA   rB   r   r`   rX   rV   rW   r   rU   rw   rx   r   strr   r   r   rC   rD   rE   s   @r1   rG   rG   =   s=    s!| ! ! /3.2	K||K t+K t+	K
 
u||	K((ELL (U\\ (N(N(#(;;N(8=N(	u||	tD$67	7N(`ell  b bc bSV b_d_k_k b!uU\\22T%,,5GG! E#Jc*! 
	!D rs$llU5<<%884;MM$UX$kn$	$L((-ell(;(	u||U5<<00	1(, uU\\7J  uUZUaUaOb    r3   rG   positional_attncontext_lenrm   r   c                     U R                   u  p4pV[        R                  " XXFU/5      n U S S 2S S 2US 2S S 24   n [        R                  " XXEXb-
  /5      n U SS U24   n U $ )N.)shaperA   reshape)r   r   rm   
batch_sizen_headrT   max_rel_lens          r1   _relative_shift_gatherr   A  sp    />/D/D,J mmO&W^5_`O%aEFAo6OmmO&S^Sf5ghO%c<K<&78Or3   c                      ^  \ rS rSrS\S\SS4U 4S jjrSS jrSS jr SS	\	R                  S
\	R                  S\	R                  S\\	R                     S\S\\	R                  S4   4S jjrSrU =r$ )FunnelRelMultiheadAttentioniQ  r   r   r   Nc                 F  > [         TU ]  5         Xl        X l        UR                  UR
                  UR                  pTn[        R                  " UR                  5      U l	        [        R                  " UR                  5      U l
        [        R                  " X4U-  SS9U l        [        R                  " X4U-  5      U l        [        R                  " X4U-  5      U l        [        R                  " [         R"                  " XE/5      5      U l        [        R                  " [         R"                  " XE/5      5      U l        [        R                  " [         R"                  " X4U/5      5      U l        [        R                  " [         R"                  " XE/5      5      U l        [        R                  " [         R"                  " SXE/5      5      U l        [        R                  " XE-  U5      U l        [        R0                  " X1R2                  S9U l        SUS-  -  U l        g )NF)biasrI   r   rh   g      ?)r    r!   r   r   r(   r   d_headr   r+   r,   attention_dropoutLinearq_headk_headv_head	ParameterrA   zerosr_w_biasr_r_biasr_kernelr_s_bias	seg_embed	post_projr'   r)   r*   scale)r/   r   r   r(   r   r   r0   s         r1   r!   $FunnelRelMultiheadAttention.__init__R  sf   &"(..&-- jj)>)>?!#F,D,D!Eii&uEii&9ii&9U[[&1A%BCU[[&1A%BCU[['61J%KLU[[&1A%BCekk1f2E&FG6?G<,,w4I4IJFCK(
r3   c                 z   U R                   R                  S:X  a  Uu  pVpxU R                  U R                  -  n	U R                  n
[
        R                  " SX)-   U
5      nXSS2S4   -  nXSS2S4   -  n[
        R                  " SX5      [
        R                  " SX5      -   nOUR                  S   U:w  a  SOSnXR                     US-
     nU R                  U R                  -  nU R                  n
[
        R                  " SUU
5      n[
        R                  " SUU-   U5      n[        XU5      nUb  X-  nU$ )	z5Relative attention score for the positional encodingsrg   zbinh,dnh->bindNzbind,jd->bnijr   rI   ztd,dnh->tnhzbinh,tnh->bnit)
r   rn   r   r   r   rA   einsumr   r   r   )r/   r]   r   r   r_   r   r   r   r   uw_rq_r_attentionq_r_attention_1q_r_attention_2r   rm   rvr_heads                      r1   relative_positional_attention9FunnelRelMultiheadAttention.relative_positional_attentioni  s7    ;;%%5 #2CS

*A--C "LL)96:sKM+!T'l:O+Dk9O $ll?OQTYT`T`U O  aK7AQE   0 01%!)<A

*A--C \\-C8F#ll+;VaZPO4_SXYO'Or3   c                    Uc  gUR                   u  pEnU R                  U R                  -  n[        R                  " SX'-   U R
                  5      nUSS2S4   R                  XBR                   S   XV/5      n[        R                  " USSS9u  p[        R                  " XR                  UR                   5      U	R                  UR                   5      5      nUb  X-  nU$ )z/Relative attention score for the token_type_idsNr   zbind,snd->bnisrI   r   rj   rk   )	r   r   r   rA   r   r   ry   splitwhere)r/   r^   r   r_   r   rT   r   r   token_type_biasdiff_token_typesame_token_typetoken_type_attns               r1   relative_token_type_attention9FunnelRelMultiheadAttention.relative_token_type_attention  s    !+9+?+?(
[ ==4::-  ,,'79JDNN['4077\\RS_V]8kl+0;;r+R(++22>3G3GH/J`J`aoauauJv
 'Or3   querykeyvaluer   output_attentions.c                    Uu  pgpUR                   u  pnUR                   S   nU R                  R                  U R                  R                  pU R	                  U5      R                  XX5      nU R                  U5      R                  XX5      nU R                  U5      R                  XX5      nUU R                  -  nU R                  U R                  -  n[        R                  " SUU-   U5      nU R                  UUX5      nU R                  UUU	5      nUU-   U-   nUR                  nUR                  5       nUb%  U[         SUS S 2S S 4   R                  5       -
  -  -
  n[        R"                  " USUS9nU R%                  U5      n[        R                  " SUU5      nU R'                  UR)                  XX-  5      5      nU R+                  U5      nU R-                  UU-   5      nU(       a  UU4$ U4$ )Nr   zbind,bjnd->bnijrj   )rl   rV   zbnij,bjnd->bind)r   r   r   r   r   viewr   r   r   r   rA   r   r   r  rV   floatINFsoftmaxr   r   r   r,   r*   )r/   r  r  r	  r   r
  r]   r^   rP   r_   r   rT   _r   r   r   r   r   r   r   content_scorer   r  
attn_scorerV   	attn_probattn_vecattn_outr   s                                r1   r9   #FunnelRelMultiheadAttention.forward  s    EUA!&
Qiil++T[[-?-? U#((fMS!&&zOU#((&Q$**$==4::-%68I6R<<_fVal<<^VU]^ #_4F
   %%'
%#cQ41N1T1T1V-V&WWJMM*"EB	**95	 << 19fE >>("2"2:"XY&&x0!12&7	"FfYFr3   )r   r   r   r,   r   r*   r   r   r   r   r   r   r   r   r   r7   F)r=   r>   r?   r@   r   r   r!   r   r  rA   rB   r   boolr9   rC   rD   rE   s   @r1   r   r   Q  s    )| )# )$ ).(T< #(3G||3G \\3G ||	3G
  -3G  3G 
u||S 	!3G 3Gr3   r   c                   n   ^  \ rS rSrS\SS4U 4S jjrS\R                  S\R                  4S jrSr	U =r
$ )	FunnelPositionwiseFFNi  r   r   Nc                   > [         TU ]  5         [        R                  " UR                  UR
                  5      U l        [        UR                     U l	        [        R                  " UR                  5      U l        [        R                  " UR
                  UR                  5      U l        [        R                  " UR                  5      U l        [        R                  " UR                  UR                   5      U l        g r7   )r    r!   r   r   r(   d_innerlinear_1r
   
hidden_actactivation_functionr+   activation_dropoutlinear_2r,   r-   r'   r)   r*   r.   s     r1   r!   FunnelPositionwiseFFN.__init__  s    		&..&..A#)&*;*;#< "$**V-F-F"G		&..&..Azz&"7"78,,v~~v7L7LMr3   hiddenc                     U R                  U5      nU R                  U5      nU R                  U5      nU R                  U5      nU R	                  U5      nU R                  X-   5      $ r7   )r  r  r   r!  r-   r*   )r/   r#  hs      r1   r9   FunnelPositionwiseFFN.forward  s\    MM&!$$Q'##A&MM!LLOvz**r3   )r   r  r-   r*   r  r!  r<   rE   s   @r1   r  r    s9    N| N N+ell +u|| + +r3   r  c                      ^  \ rS rSrS\S\SS4U 4S jjr SS\R                  S\R                  S	\R                  S
\	S\
4
S jjrSrU =r$ )FunnelLayeri  r   r   r   Nc                 b   > [         TU ]  5         [        X5      U l        [	        U5      U l        g r7   )r    r!   r   	attentionr  ffn)r/   r   r   r0   s      r1   r!   FunnelLayer.__init__  s&    4VI(0r3   r  r  r	  r
  c                 l    U R                  XX4US9nU R                  US   5      nU(       a  XvS   4$ U4$ )Nr
  r   r   r*  r+  )r/   r  r  r	  r   r
  attnr   s           r1   r9   FunnelLayer.forward  sA     ~~e%Uf~g$q'"$5Q DF9Dr3   r/  r  )r=   r>   r?   r@   r   r   r!   rA   rB   r  r   r9   rC   rD   rE   s   @r1   r(  r(    so    1| 1# 1$ 1 #(
E||
E \\
E ||	
E  
E 

E 
Er3   r(  c                      ^  \ rS rSrS\SS4U 4S jjr     SS\R                  S\R                  S-  S\R                  S-  S	\S
\S\S\	\
-  4S jjrSrU =r$ )FunnelEncoderi  r   r   Nc                 ^  > [         TU ]  5         Xl        [        U5      U l        [
        R                  " [        UR                  5       VVVs/ s H>  u  p#[
        R                  " [        U5       Vs/ s H  n[        X5      PM     sn5      PM@     snnn5      U l        g s  snf s  snnnf r7   )r    r!   r   rG   attention_structurer   
ModuleList	enumerateblock_sizesru   r(  blocks)r/   r   r   
block_sizer  r0   s        r1   r!   FunnelEncoder.__init__  s    #;F#C mm 099K9K/L/L+K zIZ[IZA{6?IZ[\/L
[s   %B(4B#	B(#B(r5   rP   rQ   r
  output_hidden_statesreturn_dictc                    UR                  U5      nU R                  R                  UUUS9nUnU(       a  U4OS n	U(       a  SOS n
[        U R                  5       GH?  u  pUR                  S5      U R                  R                  (       a  SOS:  nU=(       a    US:  nU(       a  U R                  R                  X5      u  p[        U5       H  u  nn[        U R                  R                  U   5       H  nUS:H  =(       a    US:H  =(       a    UnU(       a$  WnU R                  R                  (       a  UOU=nnOU=n=nnU" UUUXtS9nUS   nU(       a  U R                  R                  U5      nU(       a  U
USS  -   n
U(       d  M  X4-   n	M     M     GMB     U(       d  [        S XU
4 5       5      $ [        XU
S9$ )	NrP   rQ    r   rI   r   r.  c              3   .   #    U  H  oc  M  Uv   M     g 7fr7   r@  r   r   s     r1   r   (FunnelEncoder.forward.<locals>.<genexpr>B       a$Oq$O   	last_hidden_statehidden_states
attentions)type_asr5  r`   r7  r9  rS   r   rY   r   ru   block_repeatsr   r   r   r   )r/   r5   rP   rQ   r
  r<  r=  r   r#  all_hidden_statesall_attentionsr   blockpooling_flagpooled_hiddenlayer_indexlayerrepeat_index
do_poolingr  r  r	  layer_outputs                          r1   r9   FunnelEncoder.forward  s    (//>33II)) J 

 0D],$0d"+DKK"8K!;;q>$++2J2JQPQRL';K!OL262J2J2`2`3/ '0&6"U$)$++*C*CK*P$QL".!"3!\+:J!\P\J! -040G0Gf]Ze.444e#(U<L#rL)!_F!+/+C+C+Z+Z[k+l(()7,qr:J)J++,=	,I) %R '7 #92 aV$Oaaaesttr3   )r5  r9  r   NNFFTr=   r>   r?   r@   r   r!   rA   rB   r  r   r   r9   rC   rD   rE   s   @r1   r3  r3    s    	
| 	
 	
 /3.2"'%* 0u||0u t+0u t+	0u
  0u #0u 0u 
	 0u 0ur3   r3  r   r   
target_lenrY   r   c           	      L   US:X  a  U $ U(       a  U SS2SS24   nU SS2SS24   n [         R                  " XSS9nU(       aW  U(       a)  [        R                  R	                  USSSUS-
  SS45      nUSS2SUS-
  24   n[         R
                  " WU/SS9nU$ USS2SU24   nU$ )zs
Upsample tensor `x` to match `target_len` by repeating the tokens `stride` time on the sequence length dimension.
r   N)repeatsrl   r   rk   )rA   repeat_interleaver   rZ   r[   rt   )r   r   rY  rY   r   clsr   s          r1   upsampler^  F  s     {2A2haeH$$QA>F]]&&v1a!Q/JKF+Z!^++,C=a0 M ;J;'Mr3   c                      ^  \ rS rSrS\SS4U 4S jjr     SS\R                  S\R                  S\R                  S-  S	\R                  S-  S
\S\S\S\	\
-  4S jjrSrU =r$ )FunnelDecoderi\  r   r   Nc           	         > [         TU ]  5         Xl        [        U5      U l        [
        R                  " [        UR                  5       Vs/ s H  n[        US5      PM     sn5      U l
        g s  snf )Nr   )r    r!   r   rG   r5  r   r6  ru   num_decoder_layersr(  layers)r/   r   r  r0   s      r1   r!   FunnelDecoder.__init__]  sV    #;F#C mmU6KdKdEe$fEe[%;Ee$fg$fs   A0final_hiddenfirst_block_hiddenrP   rQ   r
  r<  r=  c           	         [        US[        U R                  R                  5      S-
  -  UR                  S   U R                  R
                  U R                  R                  S9nX-   n	U(       a  U	4OS n
U(       a  SOS nU R                  R                  U	UUS9nU R                   H,  nU" XXUS9nUS   n	U(       a  XSS  -   nU(       d  M'  X4-   n
M.     U(       d  [        S XU4 5       5      $ [        XUS	9$ )
NrI   r   )r   rY  rY   r   r@  r?  r.  r   c              3   .   #    U  H  oc  M  Uv   M     g 7fr7   r@  rB  s     r1   r   (FunnelDecoder.forward.<locals>.<genexpr>  rD  rE  rF  )r^  r   r   r8  r   rY   r   r5  r`   rc  r   r   )r/   re  rf  rP   rQ   r
  r<  r=  upsampled_hiddenr#  rL  rM  r   rR  rU  s                  r1   r9   FunnelDecoder.forwardc  s    $T[[4459:)//21111
 "6)=VI40d33II)) J 
 [[E ]noL!!_F !/qr2B!B##$5	$A! ! aV$Oaaaesttr3   )r5  r   rc  rW  rX  rE   s   @r1   r`  r`  \  s    h| h h /3.2"'%* 'ull'u "LL'u t+	'u
 t+'u  'u #'u 'u 
	 'u 'ur3   r`  c                   r   ^  \ rS rSrSrS\SS4U 4S jjrS\R                  S\R                  4S jr	S	r
U =r$ )
FunnelDiscriminatorPredictionsi  zEPrediction module for the discriminator, made up of two dense layers.r   r   Nc                    > [         TU ]  5         Xl        [        R                  " UR
                  UR
                  5      U l        [        R                  " UR
                  S5      U l        g r   )r    r!   r   r   r   r(   densedense_predictionr.   s     r1   r!   'FunnelDiscriminatorPredictions.__init__  sD    YYv~~v~~>
 "		&..! <r3   discriminator_hidden_statesc                     U R                  U5      n[        U R                  R                     " U5      nU R	                  U5      R                  S5      nU$ )Nrj   )ro  r
   r   r  rp  squeeze)r/   rr  rH  logitss       r1   r9   &FunnelDiscriminatorPredictions.forward  sJ    

#>?t{{556}E&&}5==bAr3   )r   ro  rp  )r=   r>   r?   r@   r   r   r!   rA   rB   r9   rC   rD   rE   s   @r1   rm  rm    s9    O=| = =5<< ELL  r3   rm  c                   R    \ rS rSr% \\S'   Sr\R                  " 5       S 5       r	Sr
g)FunnelPreTrainedModeli  r   funnelc                 h   UR                   R                  nUR                  S5      S:w  a  [        USS 5      b  U R                  R
                  c=  UR                  R                  u  p4[        R                  " S[        XC-   5      -  5      nOU R                  R
                  n[        R                  " UR                  US9  [        USS 5      b"  [        R                  " UR                  S5        g g US:X  Ga   [        R                  " UR                   U R                  R"                  S	9  [        R                  " UR$                  U R                  R"                  S	9  [        R                  " UR&                  U R                  R"                  S	9  [        R                  " UR(                  U R                  R"                  S	9  [        R                  " UR*                  U R                  R"                  S	9  g US
:X  a  U R                  R
                  c  SOU R                  R
                  n[        R                  " UR,                  R                  US9  UR,                  R.                  bB  [        R0                  " UR,                  R                  UR,                  R.                     5        g g g )Nr   rj   weightrh   )stdr   g        r   )br   )r0   r=   findgetattrr   initializer_stdr{  r   npsqrtr  initnormal_	constant_r   uniform_r   initializer_ranger   r   r   r   r&   r   zeros_)r/   module	classnamefan_outfan_inr|  s         r1   _init_weights#FunnelPreTrainedModel._init_weights  s   $$--	>>(#r)vx.:;;..6&,mm&9&9OG''#f.>(?"?@C++55CV]]4vvt,8v{{C0 977MM&//T[[-J-JKMM&//T[[-J-JKMM&//T[[-J-JKMM&//T[[-J-JKMM&**dkk.K.KL,,44<#$++B]B]CLL//66C@%%11=F2299&:P:P:\:\]^ > -r3   r@  N)r=   r>   r?   r@   r   r   base_model_prefixrA   no_gradr  rC   r@  r3   r1   rx  rx    s(     
]]__ _r3   rx  c                   r   ^  \ rS rSrS\S\SS4U 4S jjrS\R                  S\R                  4S jr	S	r
U =r$ )
FunnelClassificationHeadi  r   n_labelsr   Nc                   > [         TU ]  5         [        R                  " UR                  UR                  5      U l        [        R                  " UR                  5      U l        [        R                  " UR                  U5      U l	        g r7   )
r    r!   r   r   r(   linear_hiddenr+   r,   r-   
linear_out)r/   r   r  r0   s      r1   r!   !FunnelClassificationHead.__init__  sU    YYv~~v~~Fzz&"7"78))FNNH=r3   r#  c                     U R                  U5      n[        R                  " U5      nU R                  U5      nU R	                  U5      $ r7   )r  rA   tanhr-   r  )r/   r#  s     r1   r9    FunnelClassificationHead.forward  s=    ##F+F#f%v&&r3   )r-   r  r  )r=   r>   r?   r@   r   r   r!   rA   rB   r9   rC   rD   rE   s   @r1   r  r    s=    >| >s >t >'ell 'u|| ' 'r3   r  z2
    Output type of [`FunnelForPreTraining`].
    )custom_introc                       \ rS rSr% SrSr\R                  S-  \S'   Sr	\R                  S-  \S'   Sr
\\R                     S-  \S'   Sr\\R                     S-  \S'   Srg)	FunnelForPreTrainingOutputi  a  
loss (*optional*, returned when `labels` is provided, `torch.FloatTensor` of shape `(1,)`):
    Total loss of the ELECTRA-style objective.
logits (`torch.FloatTensor` of shape `(batch_size, sequence_length)`):
    Prediction scores of the head (scores for each token before SoftMax).
Nlossru  rH  rI  r@  )r=   r>   r?   r@   r   r  rA   FloatTensorr   ru  rH  r   rI  rC   r@  r3   r1   r  r    sg     &*D%

d
")'+FE$+59M5**+d2926Je''(4/6r3   r  z
    The base Funnel Transformer Model transformer outputting raw hidden-states without upsampling head (also called
    decoder) or any task-specific head on top.
    c                   b  ^  \ rS rSrS\SS4U 4S jjrS\R                  4S jrS\R                  SS4S jr	\
        SS	\R                  S-  S
\R                  S-  S\R                  S-  S\R                  S-  S\R                  S-  S\S-  S\S-  S\S-  S\\-  4S jj5       rSrU =r$ )FunnelBaseModeli  r   r   Nc                    > [         TU ]  U5        [        U5      U l        [	        U5      U l        U R                  5         g r7   )r    r!   r   r8   r3  encoder	post_initr.   s     r1   r!   FunnelBaseModel.__init__  s4     *62$V, 	r3   c                 .    U R                   R                  $ r7   r8   r&   r/   s    r1   get_input_embeddings$FunnelBaseModel.get_input_embeddings      ...r3   new_embeddingsc                 $    XR                   l        g r7   r  r/   r  s     r1   set_input_embeddings$FunnelBaseModel.set_input_embeddings      *8'r3   r4   rP   rQ   position_idsr5   r
  r<  r=  c	           	      P   Ub  UOU R                   R                  nUb  UOU R                   R                  nUb  UOU R                   R                  nUb  Ub  [	        S5      eUb"  U R                  X5        UR                  5       n
O"Ub  UR                  5       S S n
O[	        S5      eUb  UR                  OUR                  nUc  [        R                  " XS9nUc$  [        R                  " U
[        R                  US9nU R                  XS9nU R                  UUUUUUS9nU$ )NDYou cannot specify both input_ids and inputs_embeds at the same timerj   5You have to specify either input_ids or inputs_embedsrW   ri   r5   rP   rQ   r
  r<  r=  )r   r
  r<  r=  
ValueError%warn_if_padding_and_no_attention_maskrS   rW   rA   onesr   r   r8   r  )r/   r4   rP   rQ   r  r5   r
  r<  r=  kwargsinput_shaperW   encoder_outputss                r1   r9   FunnelBaseModel.forward  s5    2C1N-TXT_T_TqTq$8$D $++JjJj 	 &1%<k$++BYBY ]%>cdd"66yQ#..*K&',,.s3KTUU%.%:!!@T@T!"ZZCN!"[[EJJvVN	O,,))/!5# ' 
 r3   )r8   r  NNNNNNNNr=   r>   r?   r@   r   r!   r   r"   r  r  r   rA   rB   r  r   r   r9   rC   rD   rE   s   @r1   r  r    s   |  /bll /92<< 9D 9  *..2.2,0-1)-,0#'.<<$&. t+. t+	.
 llT). ||d*.  $;. #Tk. D[. 
	 . .r3   r  c                   B  ^  \ rS rSrS\SS4U 4S jjrS\R                  4S jrS\R                  SS4S jr	\
       SS	\R                  S-  S
\R                  S-  S\R                  S-  S\R                  S-  S\S-  S\S-  S\S-  S\\-  4S jj5       rSrU =r$ )FunnelModeli&  r   r   Nc                    > [         TU ]  U5        Xl        [        U5      U l        [        U5      U l        [        U5      U l        U R                  5         g r7   )
r    r!   r   r   r8   r3  r  r`  decoderr  r.   s     r1   r!   FunnelModel.__init__(  sE     *62$V,$V, 	r3   c                 .    U R                   R                  $ r7   r  r  s    r1   r   FunnelModel.get_input_embeddings2  r  r3   r  c                 $    XR                   l        g r7   r  r  s     r1   r   FunnelModel.set_input_embeddings5  r  r3   r4   rP   rQ   r5   r
  r<  r=  c           
         Ub  UOU R                   R                  nUb  UOU R                   R                  nUb  UOU R                   R                  nUb  Ub  [	        S5      eUb"  U R                  X5        UR                  5       n	O"Ub  UR                  5       S S n	O[	        S5      eUb  UR                  OUR                  n
Uc  [        R                  " XS9nUc$  [        R                  " U	[        R                  U
S9nU R                  XS9nU R                  UUUUSUS9nU R                  US	   US
   U R                   R                  S	      UUUUUS9nU(       d<  S	nUS	   4nU(       a  US
-  nXS
   X   -   4-   nU(       a  US
-  nXS   X   -   4-   nU$ [!        US	   U(       a  UR"                  UR"                  -   OS U(       a  UR$                  UR$                  -   S9$ S S9$ )Nr  rj   r  r  ri   r  Tr  r   r   )re  rf  rP   rQ   r
  r<  r=  rI   rF  )r   r
  r<  r=  r  r  rS   rW   rA   r  r   r   r8   r  r  r8  r   rH  rI  )r/   r4   rP   rQ   r5   r
  r<  r=  r  r  rW   r  decoder_outputsidxoutputss                  r1   r9   FunnelModel.forward8  s1    2C1N-TXT_T_TqTq$8$D $++JjJj 	 &1%<k$++BYBY ]%>cdd"66yQ#..*K&',,.s3KTUU%.%:!!@T@T!"ZZCN!"[[EJJvVN	O,,))/!%# ' 
 ,,(+.q1$++2I2I!2LM))/!5# ' 
 C&q)+G#q!Q%7/:N%N$PP q!Q%7/:N%N$PPN-a0# +88?;X;XXTe22_5O5OO
 	

 lp
 	
r3   )r   r  r8   r  )NNNNNNNr  rE   s   @r1   r  r  &  s    |  /bll /92<< 9D 9  *..2.2-1)-,0#'H
<<$&H
 t+H
 t+	H

 ||d*H
  $;H
 #TkH
 D[H
 
	 H
 H
r3   r  z
    Funnel Transformer model with a binary classification head on top as used during pretraining for identifying
    generated tokens.
    c                     ^  \ rS rSrS\SS4U 4S jjr\        SS\R                  S-  S\R                  S-  S\R                  S-  S	\R                  S-  S
\R                  S-  S\	S-  S\	S-  S\	S-  S\
\-  4S jj5       rSrU =r$ )FunnelForPreTrainingi  r   r   Nc                    > [         TU ]  U5        [        U5      U l        [	        U5      U l        U R                  5         g r7   )r    r!   r  ry  rm  discriminator_predictionsr  r.   s     r1   r!   FunnelForPreTraining.__init__  s3     !&))G)O&r3   r4   rP   rQ   r5   labelsr
  r<  r=  c	           
      f   Ub  UOU R                   R                  nU R                  UUUUUUUS9n
U
S   nU R                  U5      nSnUb  [        R
                  " 5       nUb`  UR                  SUR                  S   5      S:H  nUR                  SUR                  S   5      U   nX_   nU" UUR                  5       5      nO4U" UR                  SUR                  S   5      UR                  5       5      nU(       d  U4U
SS -   nUb  U4U-   $ U$ [        UUU
R                  U
R                  S9$ )a  
labels (`torch.LongTensor` of shape `(batch_size, sequence_length)`, *optional*):
    Labels for computing the ELECTRA-style loss. Input should be a sequence of tokens (see `input_ids`
    docstring) Indices should be in `[0, 1]`:

    - 0 indicates the token is an original token,
    - 1 indicates the token was replaced.

Examples:

```python
>>> from transformers import AutoTokenizer, FunnelForPreTraining
>>> import torch

>>> tokenizer = AutoTokenizer.from_pretrained("funnel-transformer/small")
>>> model = FunnelForPreTraining.from_pretrained("funnel-transformer/small")

>>> inputs = tokenizer("Hello, my dog is cute", return_tensors="pt")
>>> logits = model(**inputs).logits
```NrP   rQ   r5   r
  r<  r=  r   rj   r   r  ru  rH  rI  )r   r=  ry  r  r   r   r  r   r  r  rH  rI  )r/   r4   rP   rQ   r5   r  r
  r<  r=  r  rr  discriminator_sequence_outputru  r  loss_fctactive_lossactive_logitsactive_labelsr   s                      r1   r9   FunnelForPreTraining.forward  sh   B &1%<k$++BYBY&*kk))'/!5# '2 '
# )DA(F%//0MN++-H),11"6S6Y6YZ[6\]abb &B0M0S0STU0V WXc d & 3}/B/B/DEB0M0S0STU0V WY_YeYeYghY!<QR!@@F)-)9TGf$EvE)5CC2==	
 	
r3   )r  ry  r  )r=   r>   r?   r@   r   r!   r   rA   rB   r  r   r  r9   rC   rD   rE   s   @r1   r  r    s    |    *..2.2-1&*)-,0#'C
<<$&C
 t+C
 t+	C

 ||d*C
 t#C
  $;C
 #TkC
 D[C
 
+	+C
 C
r3   r  c                   j  ^  \ rS rSrSS0rS\SS4U 4S jjrS\R                  4S jr	S	\R                  SS4S
 jr\        SS\R                  S-  S\R                  S-  S\R                  S-  S\R                  S-  S\R                  S-  S\S-  S\S-  S\S-  S\\-  4S jj5       rSrU =r$ )FunnelForMaskedLMi  zlm_head.weightz(funnel.embeddings.word_embeddings.weightr   r   Nc                    > [         TU ]  U5        [        U5      U l        [        R
                  " UR                  UR                  5      U l        U R                  5         g r7   )
r    r!   r  ry  r   r   r(   r#   lm_headr  r.   s     r1   r!   FunnelForMaskedLM.__init__  sD     !&)yy1B1BC 	r3   c                     U R                   $ r7   r  r  s    r1   get_output_embeddings'FunnelForMaskedLM.get_output_embeddings  s    ||r3   r  c                     Xl         g r7   r  r  s     r1   set_output_embeddings'FunnelForMaskedLM.set_output_embeddings  s    %r3   r4   rP   rQ   r5   r  r
  r<  r=  c	           
         Ub  UOU R                   R                  nU R                  UUUUUUUS9n
U
S   nU R                  U5      nSnUbF  [	        5       nU" UR                  SU R                   R                  5      UR                  S5      5      nU(       d  U4U
SS -   nUb  U4U-   $ U$ [        UUU
R                  U
R                  S9$ )az  
labels (`torch.LongTensor` of shape `(batch_size, sequence_length)`, *optional*):
    Labels for computing the masked language modeling loss. Indices should be in `[-100, 0, ...,
    config.vocab_size]` (see `input_ids` docstring) Tokens with indices set to `-100` are ignored (masked), the
    loss is only computed for the tokens with labels in `[0, ..., config.vocab_size]`
Nr  r   rj   r   r  )
r   r=  ry  r  r   r  r#   r   rH  rI  )r/   r4   rP   rQ   r5   r  r
  r<  r=  r  r  rG  prediction_logitsmasked_lm_lossr  r   s                   r1   r9   FunnelForMaskedLM.forward  s    & &1%<k$++BYBY++))'/!5#  
 $AJ LL):;')H%&7&<&<RAWAW&XZ`ZeZefhZijN')GABK7F3A3M^%.YSYY$!//))	
 	
r3   )ry  r  r  )r=   r>   r?   r@   _tied_weights_keysr   r!   r   r   r  r"   r  r   rA   rB   r  r   r   r9   rC   rD   rE   s   @r1   r  r    s   *,VW|  ryy &BLL &T &  *..2.2-1&*)-,0#'/
<<$&/
 t+/
 t+	/

 ||d*/
 t#/
  $;/
 #Tk/
 D[/
 
	/
 /
r3   r  z
    Funnel Transformer Model with a sequence classification/regression head on top (two linear layer on top of the
    first timestep of the last hidden state) e.g. for GLUE tasks.
    c                     ^  \ rS rSrS\SS4U 4S jjr\        SS\R                  S-  S\R                  S-  S\R                  S-  S	\R                  S-  S
\R                  S-  S\	S-  S\	S-  S\	S-  S\
\-  4S jj5       rSrU =r$ )FunnelForSequenceClassificationi   r   r   Nc                    > [         TU ]  U5        UR                  U l        Xl        [	        U5      U l        [        XR                  5      U l        U R                  5         g r7   )	r    r!   
num_labelsr   r  ry  r  
classifierr  r.   s     r1   r!   (FunnelForSequenceClassification.__init__'  sJ      ++%f-26;L;LMr3   r4   rP   rQ   r5   r  r
  r<  r=  c	           
      >   Ub  UOU R                   R                  nU R                  UUUUUUUS9n
U
S   nUSS2S4   nU R                  U5      nSnUGb  U R                   R                  c  U R
                  S:X  a  SU R                   l        OoU R
                  S:  aN  UR                  [        R                  :X  d  UR                  [        R                  :X  a  SU R                   l        OSU R                   l        U R                   R                  S:X  aI  [        5       nU R
                  S:X  a&  U" UR                  5       UR                  5       5      nOU" X5      nOU R                   R                  S:X  a=  [        5       nU" UR                  SU R
                  5      UR                  S5      5      nO,U R                   R                  S:X  a  [        5       nU" X5      nU(       d  U4U
SS -   nUb  U4U-   $ U$ [        UUU
R                   U
R"                  S	9$ )
ae  
labels (`torch.LongTensor` of shape `(batch_size,)`, *optional*):
    Labels for computing the sequence classification/regression loss. Indices should be in `[0, ...,
    config.num_labels - 1]`. If `config.num_labels == 1` a regression loss is computed (Mean-Square loss), If
    `config.num_labels > 1` a classification loss is computed (Cross-Entropy).
Nr  r   r   
regressionsingle_label_classificationmulti_label_classificationrj   r  )r   r=  ry  r  problem_typer  rV   rA   r   r   r   rt  r   r  r   r   rH  rI  )r/   r4   rP   rQ   r5   r  r
  r<  r=  r  r  rG  pooled_outputru  r  r  r   s                    r1   r9   'FunnelForSequenceClassification.forward1  s   & &1%<k$++BYBY++))'/!5#  
 $AJ)!Q$//{{''/??a'/;DKK,__q(fllejj.HFLL\a\e\eLe/LDKK,/KDKK,{{''<7"9??a'#FNN$4fnn6FGD#F3D))-JJ+-B @&++b/R))-II,./Y,F)-)9TGf$EvE'!//))	
 	
r3   )r  r   ry  r  r  )r=   r>   r?   r@   r   r!   r   rA   rB   r  r   r   r9   rC   rD   rE   s   @r1   r  r     s    |    *..2.2-1&*)-,0#'B
<<$&B
 t+B
 t+	B

 ||d*B
 t#B
  $;B
 #TkB
 D[B
 
)	)B
 B
r3   r  c                     ^  \ rS rSrS\SS4U 4S jjr\        SS\R                  S-  S\R                  S-  S\R                  S-  S	\R                  S-  S
\R                  S-  S\	S-  S\	S-  S\	S-  S\
\-  4S jj5       rSrU =r$ )FunnelForMultipleChoiceiw  r   r   Nc                    > [         TU ]  U5        [        U5      U l        [	        US5      U l        U R                  5         g r   )r    r!   r  ry  r  r  r  r.   s     r1   r!    FunnelForMultipleChoice.__init__y  s4     %f-261=r3   r4   rP   rQ   r5   r  r
  r<  r=  c	           
         Ub  UOU R                   R                  nUb  UR                  S   OUR                  S   n
Ub!  UR                  SUR	                  S5      5      OSnUb!  UR                  SUR	                  S5      5      OSnUb!  UR                  SUR	                  S5      5      OSnUb1  UR                  SUR	                  S5      UR	                  S5      5      OSnU R                  UUUUUUUS9nUS   nUSS2S4   nU R                  U5      nUR                  SU
5      nSnUb  [        5       nU" X5      nU(       d  U4USS -   nUb  U4U-   $ U$ [        UUUR                  UR                  S9$ )a"  
labels (`torch.LongTensor` of shape `(batch_size,)`, *optional*):
    Labels for computing the multiple choice classification loss. Indices should be in `[0, ...,
    num_choices-1]` where `num_choices` is the size of the second dimension of the input tensors. (See
    `input_ids` above)
Nr   rj   r  r   r  )r   r=  r   r  rS   ry  r  r   r   rH  rI  )r/   r4   rP   rQ   r5   r  r
  r<  r=  r  num_choicesr  rG  r  ru  reshaped_logitsr  r  r   s                      r1   r9   FunnelForMultipleChoice.forward  s   & &1%<k$++BYBY,5,Aiooa(}GZGZ[\G]>G>SINN2y~~b'9:Y]	M[Mg,,R1D1DR1HImqM[Mg,,R1D1DR1HImq ( r=#5#5b#9=;M;Mb;QR 	 ++))'/!5#  
 $AJ)!Q$// ++b+6')HO4D%''!"+5F)-)9TGf$EvE("!//))	
 	
r3   )r  ry  r  )r=   r>   r?   r@   r   r!   r   rA   rB   r  r   r   r9   rC   rD   rE   s   @r1   r  r  w  s    |    *..2.2-1&*)-,0#';
<<$&;
 t+;
 t+	;

 ||d*;
 t#;
  $;;
 #Tk;
 D[;
 
*	*;
 ;
r3   r  c                     ^  \ rS rSrS\SS4U 4S jjr\        SS\R                  S-  S\R                  S-  S\R                  S-  S	\R                  S-  S
\R                  S-  S\	S-  S\	S-  S\	S-  S\
\-  4S jj5       rSrU =r$ )FunnelForTokenClassificationi  r   r   Nc                 0  > [         TU ]  U5        UR                  U l        [        U5      U l        [
        R                  " UR                  5      U l        [
        R                  " UR                  UR                  5      U l        U R                  5         g r7   )r    r!   r  r  ry  r   r+   r,   r-   r   r$   r  r  r.   s     r1   r!   %FunnelForTokenClassification.__init__  si      ++!&)zz&"7"78))F$6$68I8IJ 	r3   r4   rP   rQ   r5   r  r
  r<  r=  c	           
         Ub  UOU R                   R                  nU R                  UUUUUUUS9n
U
S   nU R                  U5      nU R	                  U5      nSnUb<  [        5       nU" UR                  SU R                  5      UR                  S5      5      nU(       d  U4U
SS -   nUb  U4U-   $ U$ [        UUU
R                  U
R                  S9$ )z
labels (`torch.LongTensor` of shape `(batch_size, sequence_length)`, *optional*):
    Labels for computing the token classification loss. Indices should be in `[0, ..., config.num_labels - 1]`.
Nr  r   rj   r   r  )r   r=  ry  r-   r  r   r  r  r   rH  rI  )r/   r4   rP   rQ   r5   r  r
  r<  r=  r  r  rG  ru  r  r  r   s                   r1   r9   $FunnelForTokenClassification.forward  s    " &1%<k$++BYBY++))'/!5#  
 $AJ LL):;!23')HFKKDOO<fkk"oNDY,F)-)9TGf$EvE$!//))	
 	
r3   )r  r-   ry  r  r  )r=   r>   r?   r@   r   r!   r   rA   rB   r  r   r   r9   rC   rD   rE   s   @r1   r  r    s    	| 	 	  *..2.2-1&*)-,0#'.
<<$&.
 t+.
 t+	.

 ||d*.
 t#.
  $;.
 #Tk.
 D[.
 
&	&.
 .
r3   r  c                   :  ^  \ rS rSrS\SS4U 4S jjr\         SS\R                  S-  S\R                  S-  S\R                  S-  S	\R                  S-  S
\R                  S-  S\R                  S-  S\	S-  S\	S-  S\	S-  S\
\-  4S jj5       rSrU =r$ )FunnelForQuestionAnsweringi  r   r   Nc                    > [         TU ]  U5        UR                  U l        [        U5      U l        [
        R                  " UR                  UR                  5      U l        U R                  5         g r7   )
r    r!   r  r  ry  r   r   r$   
qa_outputsr  r.   s     r1   r!   #FunnelForQuestionAnswering.__init__  sS      ++!&)))F$6$68I8IJ 	r3   r4   rP   rQ   r5   start_positionsend_positionsr
  r<  r=  c
           
         U	b  U	OU R                   R                  n	U R                  UUUUUUU	S9nUS   nU R                  U5      nUR	                  SSS9u  pUR                  S5      R                  5       nUR                  S5      R                  5       nS nUb  Ub  [        UR                  5       5      S:  a  UR                  S5      n[        UR                  5       5      S:  a  UR                  S5      nUR                  S5      nUR                  SU5      nUR                  SU5      n[        US9nU" X5      nU" X5      nUU-   S-  nU	(       d  X4USS  -   nUb  U4U-   $ U$ [        UUUUR                  UR                  S9$ )	Nr  r   r   rj   rk   )ignore_indexrI   )r  start_logits
end_logitsrH  rI  )r   r=  ry  r  r   rt  
contiguousr   rS   squezeclampr   r   rH  rI  )r/   r4   rP   rQ   r5   r  r  r
  r<  r=  r  r  rG  ru  r  r  
total_lossignored_indexr  
start_lossend_lossr   s                         r1   r9   "FunnelForQuestionAnswering.forward  s    &1%<k$++BYBY++))'/!5#  
 $AJ!23#)<<r<#: #++B/::<''+668

&=+D?'')*Q."1"8"8"<=%%'(1, - 5 5b 9(--a0M-33A}EO)//=AM']CH!,@J
:H$x/14J"/'!"+=F/9/EZMF*Q6Q+%!!//))
 	
r3   )ry  r  r  )	NNNNNNNNN)r=   r>   r?   r@   r   r!   r   rA   rB   r  r   r   r9   rC   rD   rE   s   @r1   r  r    s    |    *..2.2-1/3-1)-,0#';
<<$&;
 t+;
 t+	;

 ||d*;
 ,;
 ||d*;
  $;;
 #Tk;
 D[;
 
-	-;
 ;
r3   r  )	r  r  r  r  r  r  r  r  rx  )TF)=r   dataclassesr   numpyr  rA   r   torch.nnr   r   r    r	   r  activationsr
   modeling_outputsr   r   r   r   r   r   modeling_utilsr   utilsr   r   r   configuration_funnelr   
get_loggerr=   loggerr  Moduler   rG   rB   r   r   r   r  r(  r3  r  r^  r`  rm  rx  r  r  r  r  r  r  r  r  r  r  __all__r@  r3   r1   <module>r,     s   ( !    A A & !  . 9 9 . 
		H	% 
ryy "A ryy A HELL s SV [`[g[g  MG")) MG`+BII +&E")) E&<uBII <u@ di|| .1AE\`
\\,.uBII .ubRYY   _O _ _<'ryy ' 
 7 7 7 ?+ ??D Z
' Z
 Z
z M
0 M
M
` B
- B
 B
J N
&; N
N
b E
3 E
 E
P ;
#8 ;
 ;
| G
!6 G
 G
T
r3   