
    Z j                     8   S r SSKrSSKJr  SSKrSSKJr  SSKJrJrJ	r	  SSK
Jr  SSKJrJr  SS	KJr  SS
KJrJrJrJrJrJr  SSKJr  SSKJr  SSKJr  SSKJ r J!r!J"r"J#r#  SSK$J%r%  SSK&J'r'  SSK(J)r)  \#RT                  " \+5      r, " S S\RZ                  5      r. " S S\RZ                  5      r/ " S S\RZ                  5      r0 " S S\RZ                  5      r1 " S S\RZ                  5      r2 " S S\RZ                  5      r3 " S S \RZ                  5      r4 " S! S"\RZ                  5      r5 " S# S$\5      r6\! " S% S&\5      5       r7 " S' S(\RZ                  5      r8 " S) S*\RZ                  5      r9 " S+ S,\RZ                  5      r:\! " S- S.\75      5       r; " S/ S0\RZ                  5      r<\! " S1 S2\75      5       r= " S3 S4\RZ                  5      r>\!" S5S69 " S7 S8\75      5       r?\! " S9 S:\75      5       r@\! " S; S<\75      5       rA\! " S= S>\75      5       rB/ S?QrCg)@zPyTorch ConvBERT model.    N)Callable)nn)BCEWithLogitsLossCrossEntropyLossMSELoss   )initialization)ACT2FNget_activation)GradientCheckpointingLayer)"BaseModelOutputWithCrossAttentionsMaskedLMOutputMultipleChoiceModelOutputQuestionAnsweringModelOutputSequenceClassifierOutputTokenClassifierOutput)PreTrainedModel)Unpack)apply_chunking_to_forward)TransformersKwargsauto_docstringcan_return_tuplelogging)merge_with_config_defaults)capture_outputs   )ConvBertConfigc                      ^  \ rS rSrSrU 4S jr    SS\R                  S-  S\R                  S-  S\R                  S-  S\R                  S-  S	\R                  4
S
 jjr	Sr
U =r$ )ConvBertEmbeddings3   zGConstruct the embeddings from word, position and token_type embeddings.c                 
  > [         TU ]  5         [        R                  " UR                  UR
                  UR                  S9U l        [        R                  " UR                  UR
                  5      U l	        [        R                  " UR                  UR
                  5      U l        [        R                  " UR
                  UR                  S9U l        [        R                  " UR                  5      U l        U R#                  S[$        R&                  " UR                  5      R)                  S5      SS9  U R#                  S[$        R*                  " U R,                  R/                  5       [$        R0                  S9SS9  g )	N)padding_idxepsposition_idsr   F)
persistenttoken_type_idsdtype)super__init__r   	Embedding
vocab_sizeembedding_sizepad_token_idword_embeddingsmax_position_embeddingsposition_embeddingstype_vocab_sizetoken_type_embeddings	LayerNormlayer_norm_epsDropouthidden_dropout_probdropoutregister_buffertorcharangeexpandzerosr%   sizelongselfconfig	__class__s     /root/GenerationalWealth/GenerationalWealth/venv/lib/python3.13/site-packages/transformers/models/convbert/modeling_convbert.pyr-   ConvBertEmbeddings.__init__6   s   !||F,=,=v?T?Tbhbubuv#%<<0N0NPVPePe#f %'\\&2H2H&J_J_%`"f&;&;AVAVWzz&"<"<=ELL)G)GHOOPWXej 	 	
 	ekk$*;*;*@*@*B%**Ubg 	 	
    N	input_idsr)   r%   inputs_embedsreturnc                 >   Ub  UR                  5       nOUR                  5       S S nUS   nUc  U R                  S S 2S U24   nUcv  [        U S5      (       a-  U R                  S S 2S U24   nUR	                  US   U5      nUnO8[
        R                  " U[
        R                  U R                  R                  S9nUc  U R                  U5      nU R                  U5      n	U R                  U5      n
XI-   U
-   nU R                  U5      nU R                  U5      nU$ )Nr'   r   r)   r   r+   device)rA   r%   hasattrr)   r?   r=   r@   rB   rO   r2   r4   r6   r7   r;   )rD   rJ   r)   r%   rK   input_shape
seq_lengthbuffered_token_type_ids buffered_token_type_ids_expandedr4   r6   
embeddingss               rG   forwardConvBertEmbeddings.forwardF   s,     #..*K',,.s3K ^
,,Q^<L
 !t-..*.*=*=a*n*M'3J3Q3QR]^_R`bl3m0!A!&[

SWSdSdSkSk!l  00;M"66|D $ : :> J"8;PP
^^J/
\\*-
rI   )r7   r;   r4   r6   r2   )NNNN)__name__
__module____qualname____firstlineno____doc__r-   r=   
LongTensorFloatTensorrV   __static_attributes____classcell__rF   s   @rG   r   r   3   s    Q
$ .2260426$##d*$ ((4/$ &&-	$
 ((4/$ 
		$ $rI   r   c                   f   ^  \ rS rSrSrU 4S jrS\R                  S\R                  4S jrSr	U =r
$ )SeparableConv1Dm   zSThis class implements separable convolution, i.e. a depthwise and a pointwise layerc           	        > [         TU ]  5         [        R                  " UUUUUS-  SS9U l        [        R                  " X#SSS9U l        [        R                  " [        R                  " US5      5      U l	        U R                  R                  R                  R                  SUR                  S9  U R
                  R                  R                  R                  SUR                  S9  g )N   F)kernel_sizegroupspaddingbiasr   )rg   rj           meanstd)r,   r-   r   Conv1d	depthwise	pointwise	Parameterr=   r@   rj   weightdatanormal_initializer_range)rD   rE   input_filtersoutput_filtersrg   kwargsrF   s         rG   r-   SeparableConv1D.__init__p   s    # 1$
 =aV[\LL^Q!?@	""**9Q9Q*R""**9Q9Q*RrI   hidden_statesrL   c                 f    U R                  U5      nU R                  U5      nX R                  -  nU$ N)rp   rq   rj   )rD   r{   xs      rG   rV   SeparableConv1D.forward   s.    NN=)NN1	YYrI   )rj   rp   rq   rX   rY   rZ   r[   r\   r-   r=   TensorrV   r_   r`   ra   s   @rG   rc   rc   m   s,    ]S U\\ ell  rI   rc   c                      ^  \ rS rSrU 4S jr  SS\R                  S\R                  S-  S\R                  S-  S\\	   S\
\R                  \R                  4   4
S	 jjrS
rU =r$ )ConvBertSelfAttention   c                 ~  > [         TU ]  5         UR                  UR                  -  S:w  a7  [	        US5      (       d&  [        SUR                   SUR                   S35      eUR                  UR                  -  nUS:  a  UR                  U l        SU l        OX l        UR                  U l        UR                  U l        UR                  U R                  -  S:w  a  [        S5      eUR                  U R                  -  S-  U l        U R                  U R                  -  U l	        [        R                  " UR                  U R                  5      U l        [        R                  " UR                  U R                  5      U l        [        R                  " UR                  U R                  5      U l        [        XR                  U R                  U R                  5      U l        [        R                  " U R                  U R                  U R                  -  5      U l        [        R                  " UR                  U R                  5      U l        [        R&                  " U R                  S/[)        U R                  S-
  S-  5      S/S	9U l        [        R,                  " UR.                  5      U l        g )
Nr   r0   zThe hidden size (z6) is not a multiple of the number of attention heads ()r   z6hidden_size should be divisible by num_attention_headsrf   )rg   ri   )r,   r-   hidden_sizenum_attention_headsrP   
ValueError
head_ratioconv_kernel_sizeattention_head_sizeall_head_sizer   Linearquerykeyvaluerc   key_conv_attn_layerconv_kernel_layerconv_out_layerUnfoldintunfoldr9   attention_probs_dropout_probr;   )rD   rE   new_num_attention_headsrF   s      rG   r-   ConvBertSelfAttention.__init__   s=    : ::a?PVXhHiHi#F$6$6#7 8 445Q8 
 #)"<"<@Q@Q"Q"Q&$88DO'(D$'>$$//DO & 7 7 8 88A=UVV$*$6$6$:R:R$RWX#X !558P8PPYYv1143E3EF
99V//1C1CDYYv1143E3EF
#2&&(:(:D<Q<Q$
  "$4+=+=t?W?WZ^ZoZo?o!p ii(:(:D<N<NOii..2S$BWBWZ[B[_`A`=acd<e
 zz&"E"EFrI   Nr{   attention_maskencoder_hidden_statesry   rL   c                    UR                   S S n/ UQSPU R                  P7nUb#  U R                  U5      nU R                  U5      nO"U R                  U5      nU R                  U5      nU R	                  UR                  SS5      5      n	U	R                  SS5      n	U R                  U5      n
U
R                  U5      R                  SS5      nUR                  U5      R                  SS5      nUR                  U5      R                  SS5      n[        R                  " X5      nU R                  U5      n[        R                  " USU R                  S/5      n[        R                  " USS9nU R                  U5      n[        R                  " UUS   SU R                  /5      nUR                  SS5      R!                  5       R#                  S5      n[$        R&                  R)                  UU R                  S/SU R                  S-
  S-  S/SS9nUR                  SS5      R                  US   SU R                  U R                  5      n[        R                  " USU R                  U R                  /5      n[        R*                  " UU5      n[        R                  " USU R                  /5      n[        R*                  " XR                  SS5      5      nU[,        R.                  " U R                  5      -  nUb  UU-   n[$        R&                  R                  USS9nU R1                  U5      n[        R*                  " UU5      nUR3                  SSSS5      R!                  5       n[        R                  " UUS   SU R4                  U R                  /5      n[        R6                  " UU/S5      nUR9                  5       S S U R4                  U R                  -  S-  4-   nUR                  " U6 nUU4$ )	Nr'   r   rf   dimr   )rg   dilationri   strider   )shaper   r   r   r   	transposer   viewr=   multiplyr   reshaper   softmaxr   r   
contiguous	unsqueezer   
functionalr   matmulmathsqrtr;   permuter   catrA   )rD   r{   r   r   ry   rQ   hidden_shapemixed_key_layermixed_value_layermixed_key_conv_attn_layermixed_query_layerquery_layer	key_layervalue_layerconv_attn_layerr   r   attention_scoresattention_probscontext_layerconv_outnew_context_layer_shapes                         rG   rV   ConvBertSelfAttention.forward   s    $))#2.CCbC$*B*BC !,"hh'<=O $

+@ A"hh}5O $

= 9$($<$<]=T=TUVXY=Z$[!$=$G$G1$M! JJ}5',,\:DDQJ#((6@@AF	',,\:DDQJ..)BV 22?C!MM*;b$BWBWYZ=[\!MM*;C,,];~ADL^L^7_`'11!Q7BBDNNrR--..2++a/A5q9 . 
 (11!Q7??NB 2 2D4I4I
 ~D<T<TVZVkVk7lmn6GH~D<N<N7OP !<<5H5HR5PQ+dii8P8P.QQ%/.@ --//0@b/I ,,7_kB%--aAq9DDF==[^R1I1I4KcKcd
 		=(";Q? #0"4"4"6s";$$t'?'??!C?
 #
 &**,CDo--rI   )r   r   r   r   r   r;   r   r   r   r   r   r   r   NN)rX   rY   rZ   r[   r-   r=   r   r^   r   r   tuplerV   r_   r`   ra   s   @rG   r   r      s    %GT 4859	O.||O. ))D0O.  %||d2	O.
 +,O. 
u||U\\)	*O. O.rI   r   c                   z   ^  \ rS rSrU 4S jrS\R                  S\R                  S\R                  4S jrSrU =r	$ )ConvBertSelfOutputi  c                 (  > [         TU ]  5         [        R                  " UR                  UR                  5      U l        [        R                  " UR                  UR                  S9U l        [        R                  " UR                  5      U l
        g Nr#   )r,   r-   r   r   r   denser7   r8   r9   r:   r;   rC   s     rG   r-   ConvBertSelfOutput.__init__  s`    YYv1163E3EF
f&8&8f>S>STzz&"<"<=rI   r{   input_tensorrL   c                 p    U R                  U5      nU R                  U5      nU R                  X-   5      nU$ r}   r   r;   r7   rD   r{   r   s      rG   rV   ConvBertSelfOutput.forward  5    

=1]3}'CDrI   r7   r   r;   
rX   rY   rZ   r[   r-   r=   r   rV   r_   r`   ra   s   @rG   r   r     s6    >U\\  RWR^R^  rI   r   c                      ^  \ rS rSrU 4S jr  SS\R                  S\R                  S-  S\R                  S-  S\\	   S\R                  4
S	 jjr
S
rU =r$ )ConvBertAttentioni  c                 b   > [         TU ]  5         [        U5      U l        [	        U5      U l        g r}   )r,   r-   r   rD   r   outputrC   s     rG   r-   ConvBertAttention.__init__  s&    )&1	(0rI   Nr{   r   r   ry   rL   c                 X    U R                   " UU4SU0UD6u  pVU R                  XQ5      nU$ )Nr   )rD   r   )rD   r{   r   r   ry   r   _attention_outputs           rG   rV   ConvBertAttention.forward  sF      99
 #8
 	
  ;;}DrI   )r   rD   r   )rX   rY   rZ   r[   r-   r=   r   r^   r   r   rV   r_   r`   ra   s   @rG   r   r     sl    1 4859	 ||  ))D0   %||d2	 
 +,  
   rI   r   c                   b   ^  \ rS rSrU 4S jrS\R                  S\R                  4S jrSrU =r	$ )GroupedLinearLayeri&  c                   > [         TU ]  5         Xl        X l        X0l        U R                  U R                  -  U l        U R                  U R                  -  U l        [        R                  " [        R                  " U R                  U R
                  U R                  5      5      U l        [        R                  " [        R                  " U5      5      U l        g r}   )r,   r-   
input_sizeoutput_size
num_groupsgroup_in_dimgroup_out_dimr   rr   r=   emptyrs   rj   )rD   r   r   r   rF   s       rG   r-   GroupedLinearLayer.__init__'  s    $&$ OOt>!--@ll5;;t@Q@QSWSeSe#fgLL[!9:	rI   r{   rL   c                    [        UR                  5       5      S   n[        R                  " USU R                  U R
                  /5      nUR                  SSS5      n[        R                  " X0R                  5      nUR                  SSS5      n[        R                  " X2SU R                  /5      nX0R                  -   nU$ )Nr   r'   r   rf   )listrA   r=   r   r   r   r   r   rs   r   rj   )rD   r{   
batch_sizer~   s       rG   rV   GroupedLinearLayer.forward1  s    -,,./2
MM-"doot?P?P)QRIIaALLKK(IIaAMM!"d.>.>?@		MrI   )rj   r   r   r   r   r   rs   r   ra   s   @rG   r   r   &  s(    ;U\\ ell  rI   r   c                   b   ^  \ rS rSrU 4S jrS\R                  S\R                  4S jrSrU =r	$ )ConvBertIntermediatei<  c                   > [         TU ]  5         UR                  S:X  a1  [        R                  " UR
                  UR                  5      U l        O.[        UR
                  UR                  UR                  S9U l        [        UR                  [        5      (       a  [        UR                     U l        g UR                  U l        g )Nr   r   r   r   )r,   r-   r   r   r   r   intermediate_sizer   r   
isinstance
hidden_actstrr
   intermediate_act_fnrC   s     rG   r-   ConvBertIntermediate.__init__=  s    !6#5#5v7O7OPDJ+!--6;S;S`f`q`qDJ f''--'-f.?.?'@D$'-'8'8D$rI   r{   rL   c                 J    U R                  U5      nU R                  U5      nU$ r}   r   r   rD   r{   s     rG   rV   ConvBertIntermediate.forwardJ  s&    

=100?rI   r   r   ra   s   @rG   r   r   <  s(    9U\\ ell  rI   r   c                   z   ^  \ rS rSrU 4S jrS\R                  S\R                  S\R                  4S jrSrU =r	$ )ConvBertOutputiP  c                   > [         TU ]  5         UR                  S:X  a1  [        R                  " UR
                  UR                  5      U l        O.[        UR
                  UR                  UR                  S9U l        [        R                  " UR                  UR                  S9U l	        [        R                  " UR                  5      U l        g )Nr   r   r#   )r,   r-   r   r   r   r   r   r   r   r7   r8   r9   r:   r;   rC   s     rG   r-   ConvBertOutput.__init__Q  s    !6#;#;V=O=OPDJ+!33ASAS`f`q`qDJ f&8&8f>S>STzz&"<"<=rI   r{   r   rL   c                 p    U R                  U5      nU R                  U5      nU R                  X-   5      nU$ r}   r   r   s      rG   rV   ConvBertOutput.forward\  r   rI   r   r   ra   s   @rG   r   r   P  s6    	>U\\  RWR^R^  rI   r   c                      ^  \ rS rSrU 4S jr   SS\R                  S\R                  S-  S\R                  S-  S\R                  S-  S\\	   S	\R                  4S
 jjr
S rSrU =r$ )ConvBertLayeric  c                 v  > [         TU ]  5         UR                  U l        SU l        [	        U5      U l        UR                  U l        UR                  U l        U R                  (       a/  U R                  (       d  [        U  S35      e[	        U5      U l	        [        U5      U l        [        U5      U l        g )Nr   z> should be used as a decoder model if cross attention is added)r,   r-   chunk_size_feed_forwardseq_len_dimr   	attention
is_decoderadd_cross_attention	TypeErrorcrossattentionr   intermediater   r   rC   s     rG   r-   ConvBertLayer.__init__d  s    '-'E'E$*62 ++#)#=#= ##??4&(f ghh"3F";D08$V,rI   Nr{   r   r   encoder_attention_maskry   rL   c                    U R                   " UU40 UD6nU R                  (       a9  Ub6  [        U S5      (       d  [        SU  S35      eU R                  " UU4SU0UD6n[        U R                  U R                  U R                  U5      nU$ )Nr  z'If `encoder_hidden_states` are passed, z` has to be instantiated with cross-attention layers by setting `config.add_cross_attention=True`r   )	r   r  rP   AttributeErrorr  r   feed_forward_chunkr   r   )rD   r{   r   r   r  ry   r   layer_outputs           rG   rV   ConvBertLayer.forwardr  s      >>
 
 ??4@4!122$=dV DD D   $22 &  '<  	  1##T%A%A4CSCSUe
 rI   c                 J    U R                  U5      nU R                  X!5      nU$ r}   )r  r   )rD   r   intermediate_outputr  s       rG   r
   ConvBertLayer.feed_forward_chunk  s)    "//0@A{{#6IrI   )r  r   r   r  r  r  r   r   NNN)rX   rY   rZ   r[   r-   r=   r   r^   r   r   rV   r
  r_   r`   ra   s   @rG   r   r   c  s    -" 48596:|| ))D0  %||d2	
 !&t 3 +, 
@ rI   r   c                   n   ^  \ rS rSr% \\S'   SrSr\\	S.r
\R                  " 5       U 4S j5       rSrU =r$ )ConvBertPreTrainedModeli  rE   convbertT)r{   
attentionsc                   > [         TU ]  U5        [        U[        5      (       a!  [        R
                  " UR                  5        g[        U[        5      (       aU  [        R                  " UR                  SU R                  R                  S9  [        R
                  " UR                  5        g[        U[        5      (       a|  [        R                  " UR                  [        R                   " UR                  R"                  S   5      R%                  S5      5        [        R
                  " UR&                  5        gg)zInitialize the weightsrk   rl   r'   r&   N)r,   _init_weightsr   rc   initzeros_rj   r   ru   rs   rE   rv   r   copy_r%   r=   r>   r   r?   r)   )rD   modulerF   s     rG   r  %ConvBertPreTrainedModel._init_weights  s     	f%fo..KK$ 233LLSdkk6S6STKK$ 233JJv**ELL9L9L9R9RSU9V,W,^,^_f,ghKK--. 4rI    )rX   rY   rZ   r[   r   __annotations__base_model_prefixsupports_gradient_checkpointingr   r   _can_record_outputsr=   no_gradr  r_   r`   ra   s   @rG   r  r    s=    "&*#&+
 ]]_
/ 
/rI   r  c                      ^  \ rS rSrU 4S jr   SS\R                  S\R                  S-  S\R                  S-  S\R                  S-  S\4
S	 jjr	S
r
U =r$ )ConvBertEncoderi  c                    > [         TU ]  5         Xl        [        R                  " [        UR                  5       Vs/ s H  n[        U5      PM     sn5      U l        SU l	        g s  snf )NF)
r,   r-   rE   r   
ModuleListrangenum_hidden_layersr   layergradient_checkpointing)rD   rE   r   rF   s      rG   r-   ConvBertEncoder.__init__  sR    ]]5IaIaCb#cCbaM&$9Cb#cd
&+# $ds   A&Nr{   r   r   r  rL   c                 T    U R                    H  nU" UU4UUS.UD6nM     [        US9$ )N)r   r  )last_hidden_state)r(  r   )rD   r{   r   r   r  ry   layer_modules          rG   rV   ConvBertEncoder.forward  sK     !JJL( '<'=	
 M ' 2+
 	
rI   )rE   r)  r(  r  )rX   rY   rZ   r[   r-   r=   r   r^   r   rV   r_   r`   ra   s   @rG   r#  r#    sn    , 48596:
||
 ))D0
  %||d2	

 !&t 3
 
,
 
rI   r#  c                   b   ^  \ rS rSrU 4S jrS\R                  S\R                  4S jrSrU =r	$ )ConvBertPredictionHeadTransformi  c                 p  > [         TU ]  5         [        R                  " UR                  UR                  5      U l        [        UR                  [        5      (       a  [        UR                     U l
        OUR                  U l
        [        R                  " UR                  UR                  S9U l        g r   )r,   r-   r   r   r   r   r   r   r   r
   transform_act_fnr7   r8   rC   s     rG   r-   (ConvBertPredictionHeadTransform.__init__  s~    YYv1163E3EF
f''--$*6+<+<$=D!$*$5$5D!f&8&8f>S>STrI   r{   rL   c                 l    U R                  U5      nU R                  U5      nU R                  U5      nU$ r}   )r   r2  r7   r   s     rG   rV   'ConvBertPredictionHeadTransform.forward  s4    

=1--m<}5rI   )r7   r   r2  r   ra   s   @rG   r0  r0    s)    UU\\ ell  rI   r0  c                      ^  \ rS rSrSrS\4U 4S jjr SS\R                  S\R                  S-  S\R                  4S	 jjr
S
rU =r$ )ConvBertSequenceSummaryi  a  
Compute a single vector summary of a sequence hidden states.

Args:
    config ([`ConvBertConfig`]):
        The config used by the model. Relevant arguments in the config class of the model are (refer to the actual
        config class of your model for the default values it uses):

        - **summary_type** (`str`) -- The method to use to make this summary. Accepted values are:

            - `"last"` -- Take the last token hidden state (like XLNet)
            - `"first"` -- Take the first token hidden state (like Bert)
            - `"mean"` -- Take the mean of all tokens hidden states
            - `"cls_index"` -- Supply a Tensor of classification token position (GPT/GPT-2)
            - `"attn"` -- Not implemented now, use multi-head attention

        - **summary_use_proj** (`bool`) -- Add a projection after the vector extraction.
        - **summary_proj_to_labels** (`bool`) -- If `True`, the projection outputs to `config.num_labels` classes
          (otherwise to `config.hidden_size`).
        - **summary_activation** (`Optional[str]`) -- Set to `"tanh"` to add a tanh activation to the output,
          another string or `None` will add no activation.
        - **summary_first_dropout** (`float`) -- Optional dropout probability before the projection and activation.
        - **summary_last_dropout** (`float`)-- Optional dropout probability after the projection and activation.
rE   c                   > [         TU ]  5         [        USS5      U l        U R                  S:X  a  [        e[
        R                  " 5       U l        [        US5      (       a  UR                  (       aq  [        US5      (       a.  UR                  (       a  UR                  S:  a  UR                  nOUR                  n[
        R                  " UR                  U5      U l        [        USS 5      nU(       a  [        U5      O[
        R                  " 5       U l        [
        R                  " 5       U l        [        US5      (       a5  UR"                  S:  a%  [
        R$                  " UR"                  5      U l        [
        R                  " 5       U l        [        US	5      (       a7  UR(                  S:  a&  [
        R$                  " UR(                  5      U l        g g g )
Nsummary_typelastattnsummary_use_projsummary_proj_to_labelsr   summary_activationsummary_first_dropoutsummary_last_dropout)r,   r-   getattrr9  NotImplementedErrorr   IdentitysummaryrP   r<  r=  
num_labelsr   r   r   
activationfirst_dropoutr?  r9   last_dropoutr@  )rD   rE   num_classesactivation_stringrF   s       rG   r-    ConvBertSequenceSummary.__init__  sa   #FNFC& &%{{}6-..63J3Jv788V=Z=Z_e_p_pst_t$//$0099V%7%7EDL#F,@$GIZN3D$E`b`k`k`m[[]62338T8TWX8X!#F,H,H!IDKKM6122v7R7RUV7V "

6+F+F GD 8W2rI   Nr{   	cls_indexrL   c                    U R                   S:X  a  USS2S4   nGOU R                   S:X  a  USS2S4   nGOU R                   S:X  a  UR                  SS9nOU R                   S	:X  a  Uc?  [        R                  " US
SS2SS24   UR                  S   S-
  [        R
                  S9nOXUR                  S5      R                  S5      nUR                  SUR                  5       S-
  -  UR                  S5      4-   5      nUR                  SU5      R                  S5      nOU R                   S:X  a  [        eU R                  W5      nU R                  U5      nU R                  U5      nU R!                  U5      nU$ )a#  
Compute a single vector summary of a sequence hidden states.

Args:
    hidden_states (`torch.FloatTensor` of shape `[batch_size, seq_len, hidden_size]`):
        The hidden states of the last layer.
    cls_index (`torch.LongTensor` of shape `[batch_size]` or `[batch_size, ...]` where ... are optional leading dimensions of `hidden_states`, *optional*):
        Used if `summary_type == "cls_index"` and takes the last token of the sequence as classification token.

Returns:
    `torch.FloatTensor`: The summary of the sequence hidden states.
r:  Nr'   firstr   rm   r   r   rL  .r   r*   )r'   r;  )r9  rm   r=   	full_liker   rB   r   r?   r   rA   gathersqueezerB  rG  rD  rF  rH  )rD   r{   rL  r   s       rG   rV   ConvBertSequenceSummary.forward  sn    &"1b5)F')"1a4(F&("''A'.F+- !OO!#rr1*-!''+a/**	 &//3==bA	%,,Uimmo6I-JmN`N`acNdMf-fg	"))"i8@@DF&(%%##F+f%(""6*rI   )rF  rG  rH  rD  r9  r}   )rX   rY   rZ   r[   r\   r   r-   r=   r^   r]   rV   r_   r`   ra   s   @rG   r7  r7    sV    2H~ H< VZ)"..);@;K;Kd;R)			) )rI   r7  c                     ^  \ rS rSrU 4S jrS rS r\\\	     SS\
R                  S-  S\
R                  S-  S\
R                  S-  S	\
R                  S-  S
\
R                  S-  S\\   S\4S jj5       5       5       rSrU =r$ )ConvBertModeliB  c                 $  > [         TU ]  U5        [        U5      U l        UR                  UR
                  :w  a0  [        R                  " UR                  UR
                  5      U l        [        U5      U l
        Xl        U R                  5         g r}   )r,   r-   r   rU   r0   r   r   r   embeddings_projectr#  encoderrE   	post_initrC   s     rG   r-   ConvBertModel.__init__D  sj     ,V4  F$6$66&(ii0E0EvGYGY&ZD#&v.rI   c                 .    U R                   R                  $ r}   rU   r2   rD   s    rG   get_input_embeddings"ConvBertModel.get_input_embeddingsP  s    ...rI   c                 $    XR                   l        g r}   r[  )rD   r   s     rG   set_input_embeddings"ConvBertModel.set_input_embeddingsS  s    */'rI   NrJ   r   r)   r%   rK   ry   rL   c                    Ub  Ub  [        S5      eUb"  U R                  X5        UR                  5       nO"Ub  UR                  5       S S nO[        S5      eUu  pUb  UR                  OUR                  n
Uc  [        R
                  " XzS9nUcr  [        U R                  S5      (       a3  U R                  R                  S S 2S U	24   nUR                  X5      nUnO$[        R                  " U[        R                  U
S9nU R                  X'5      nU R                  XX5S9n[        U S5      (       a  U R                  U5      nU R                  " U4S	U0UD6nU$ )
NzDYou cannot specify both input_ids and inputs_embeds at the same timer'   z5You have to specify either input_ids or inputs_embeds)rO   r)   rN   )rJ   r%   r)   rK   rV  r   )r   %warn_if_padding_and_no_attention_maskrA   rO   r=   onesrP   rU   r)   r?   r@   rB   get_extended_attention_maskrV  rW  )rD   rJ   r   r)   r%   rK   ry   rQ   r   rR   rO   rS   rT   extended_attention_maskr{   encoder_outputss                   rG   rV   ConvBertModel.forwardV  sm     ]%>cdd"66yQ#..*K&',,.s3KTUU!,
%.%:!!@T@T!"ZZCN!t(899*.//*H*HKZK*X'3J3Q3QR\3i0!A!&[

SY!Z"&"B"B>"_> ( 
 4-.. 33MBM>Bll?
2?
 ?
 rI   )rE   rU   rV  rW  )NNNNN)rX   rY   rZ   r[   r-   r]  r`  r   r   r   r=   r]   r^   r   r   r   rV   r_   r`   ra   s   @rG   rT  rT  B  s    
/0   .237260426/##d*/ ))D0/ ((4/	/
 &&-/ ((4// +,/ 
,/    /rI   rT  c                   f   ^  \ rS rSrSrU 4S jrS\R                  S\R                  4S jrSr	U =r
$ )ConvBertGeneratorPredictionsi  zAPrediction module for the generator, made up of two dense layers.c                    > [         TU ]  5         [        S5      U l        [        R
                  " UR                  UR                  S9U l        [        R                  " UR                  UR                  5      U l
        g )Ngelur#   )r,   r-   r   rF  r   r7   r0   r8   r   r   r   rC   s     rG   r-   %ConvBertGeneratorPredictions.__init__  sV    (0f&;&;AVAVWYYv1163H3HI
rI   generator_hidden_statesrL   c                 l    U R                  U5      nU R                  U5      nU R                  U5      nU$ r}   )r   rF  r7   )rD   rn  r{   s      rG   rV   $ConvBertGeneratorPredictions.forward  s3    

#:;6}5rI   )r7   rF  r   )rX   rY   rZ   r[   r\   r-   r=   r^   rV   r_   r`   ra   s   @rG   rj  rj    s0    KJu/@/@ UEVEV  rI   rj  c                   2  ^  \ rS rSrSS0rU 4S jrS rS r\\	      SS\
R                  S-  S	\
R                  S-  S
\
R                  S-  S\
R                  S-  S\
R                  S-  S\
R                  S-  S\\   S\\-  4S jj5       5       rSrU =r$ )ConvBertForMaskedLMi  zgenerator_lm_head.weightz*convbert.embeddings.word_embeddings.weightc                    > [         TU ]  U5        [        U5      U l        [	        U5      U l        [        R                  " UR                  UR                  5      U l
        U R                  5         g r}   )r,   r-   rT  r  rj  generator_predictionsr   r   r0   r/   generator_lm_headrX  rC   s     rG   r-   ConvBertForMaskedLM.__init__  sR     %f-%A&%I"!#6+@+@&BSBS!TrI   c                     U R                   $ r}   ru  r\  s    rG   get_output_embeddings)ConvBertForMaskedLM.get_output_embeddings  s    %%%rI   c                     Xl         g r}   rx  )rD   r2   s     rG   set_output_embeddings)ConvBertForMaskedLM.set_output_embeddings  s    !0rI   NrJ   r   r)   r%   rK   labelsry   rL   c                 l   U R                   " U4UUUUS.UD6nUS   n	U R                  U	5      n
U R                  U
5      n
SnUbQ  [        R                  " 5       nU" U
R                  SU R                  R                  5      UR                  S5      5      n[        UU
UR                  UR                  S9$ )az  
labels (`torch.LongTensor` of shape `(batch_size, sequence_length)`, *optional*):
    Labels for computing the masked language modeling loss. Indices should be in `[-100, 0, ...,
    config.vocab_size]` (see `input_ids` docstring) Tokens with indices set to `-100` are ignored (masked), the
    loss is only computed for the tokens with labels in `[0, ..., config.vocab_size]`
r   r)   r%   rK   r   Nr'   losslogitsr{   r  )r  rt  ru  r   r   r   rE   r/   r   r{   r  )rD   rJ   r   r)   r%   rK   r~  ry   rn  generator_sequence_outputprediction_scoresr  loss_fcts                rG   rV   ConvBertForMaskedLM.forward  s    $ GKmmG
))%'G
 G
 %<A$>! 667PQ 223DE**,H-222t{{7M7MNPVP[P[\^P_`D$1??.99	
 	
rI   )r  ru  rt  NNNNNN)rX   rY   rZ   r[   _tied_weights_keysr-   ry  r|  r   r   r=   r]   r^   r   r   r   r   rV   r_   r`   ra   s   @rG   rr  rr    s    46bc&1  .237260426*.(
##d*(
 ))D0(
 ((4/	(

 &&-(
 ((4/(
   4'(
 +,(
 
	(
  (
rI   rr  c                   f   ^  \ rS rSrSrU 4S jrS\R                  S\R                  4S jrSr	U =r
$ )ConvBertClassificationHeadi  z-Head for sentence-level classification tasks.c                 n  > [         TU ]  5         [        R                  " UR                  UR                  5      U l        UR                  b  UR                  OUR                  n[        R                  " U5      U l	        [        R                  " UR                  UR                  5      U l        Xl        g r}   )r,   r-   r   r   r   r   classifier_dropoutr:   r9   r;   rE  out_projrE   rD   rE   r  rF   s      rG   r-   #ConvBertClassificationHead.__init__  s    YYv1163E3EF
)/)B)B)NF%%TZTnTn 	 zz"45		&"4"4f6G6GHrI   r{   rL   c                     US S 2SS S 24   nU R                  U5      nU R                  U5      n[        U R                  R                     " U5      nU R                  U5      nU R                  U5      nU$ )Nr   )r;   r   r
   rE   r   r  )rD   r{   ry   r~   s       rG   rV   "ConvBertClassificationHead.forward  se    !Q'"LLOJJqM4;;))*1-LLOMM!rI   )rE   r   r;   r  r   ra   s   @rG   r  r    s+    7	U\\   rI   r  z
    ConvBERT Model transformer with a sequence classification/regression head on top (a linear layer on top of the
    pooled output) e.g. for GLUE tasks.
    )custom_introc                     ^  \ rS rSrU 4S jr\\      SS\R                  S-  S\R                  S-  S\R                  S-  S\R                  S-  S\R                  S-  S	\R                  S-  S
\
\   S\\-  4S jj5       5       rSrU =r$ )!ConvBertForSequenceClassificationi  c                    > [         TU ]  U5        UR                  U l        Xl        [	        U5      U l        [        U5      U l        U R                  5         g r}   )	r,   r-   rE  rE   rT  r  r  
classifierrX  rC   s     rG   r-   *ConvBertForSequenceClassification.__init__  sF      ++%f-4V< 	rI   NrJ   r   r)   r%   rK   r~  ry   rL   c                    U R                   " U4UUUUS.UD6nUS   n	U R                  U	5      n
SnUGb  U R                  R                  c  U R                  S:X  a  SU R                  l        OoU R                  S:  aN  UR
                  [        R                  :X  d  UR
                  [        R                  :X  a  SU R                  l        OSU R                  l        U R                  R                  S:X  aI  [        5       nU R                  S:X  a&  U" U
R                  5       UR                  5       5      nOU" X5      nOU R                  R                  S:X  a=  [        5       nU" U
R                  SU R                  5      UR                  S5      5      nO,U R                  R                  S:X  a  [        5       nU" X5      n[        UU
UR                  UR                   S	9$ )
ae  
labels (`torch.LongTensor` of shape `(batch_size,)`, *optional*):
    Labels for computing the sequence classification/regression loss. Indices should be in `[0, ...,
    config.num_labels - 1]`. If `config.num_labels == 1` a regression loss is computed (Mean-Square loss), If
    `config.num_labels > 1` a classification loss is computed (Cross-Entropy).
r  r   Nr   
regressionsingle_label_classificationmulti_label_classificationr'   r  )r  r  rE   problem_typerE  r+   r=   rB   r   r   rQ  r   r   r   r   r{   r  rD   rJ   r   r)   r%   rK   r~  ry   outputssequence_outputr  r  r  s                rG   rV   )ConvBertForSequenceClassification.forward  s   $ 7;mm7
))%'7
 7
 "!*1{{''/??a'/;DKK,__q(fllejj.HFLL\a\e\eLe/LDKK,/KDKK,{{''<7"9??a'#FNN$4fnn6FGD#F3D))-JJ+-B @&++b/R))-II,./'!//))	
 	
rI   )r  rE   r  rE  r  )rX   rY   rZ   r[   r-   r   r   r=   r]   r^   r   r   r   r   rV   r_   r`   ra   s   @rG   r  r    s      .237260426*.8
##d*8
 ))D08
 ((4/	8

 &&-8
 ((4/8
   4'8
 +,8
 
)	)8
  8
rI   r  c                     ^  \ rS rSrU 4S jr\\      SS\R                  S-  S\R                  S-  S\R                  S-  S\R                  S-  S\R                  S-  S	\R                  S-  S
\
\   S\\-  4S jj5       5       rSrU =r$ )ConvBertForMultipleChoiceiD  c                    > [         TU ]  U5        [        U5      U l        [	        U5      U l        [        R                  " UR                  S5      U l	        U R                  5         g )Nr   )r,   r-   rT  r  r7  sequence_summaryr   r   r   r  rX  rC   s     rG   r-   "ConvBertForMultipleChoice.__init__F  sM     %f- 7 ?))F$6$6: 	rI   NrJ   r   r)   r%   rK   r~  ry   rL   c                    Ub  UR                   S   OUR                   S   nUb!  UR                  SUR                  S5      5      OSnUb!  UR                  SUR                  S5      5      OSnUb!  UR                  SUR                  S5      5      OSnUb!  UR                  SUR                  S5      5      OSnUb1  UR                  SUR                  S5      UR                  S5      5      OSnU R                  " U4UUUUS.UD6n	U	S   n
U R	                  U
5      nU R                  U5      nUR                  SU5      nSnUb  [        5       nU" X5      n[        UUU	R                  U	R                  S9$ )a  
input_ids (`torch.LongTensor` of shape `(batch_size, num_choices, sequence_length)`):
    Indices of input sequence tokens in the vocabulary.

    Indices can be obtained using [`AutoTokenizer`]. See [`PreTrainedTokenizer.encode`] and
    [`PreTrainedTokenizer.__call__`] for details.

    [What are input IDs?](../glossary#input-ids)
token_type_ids (`torch.LongTensor` of shape `(batch_size, num_choices, sequence_length)`, *optional*):
    Segment token indices to indicate first and second portions of the inputs. Indices are selected in `[0,
    1]`:


    - 0 corresponds to a *sentence A* token,
    - 1 corresponds to a *sentence B* token.

    [What are token type IDs?](../glossary#token-type-ids)
position_ids (`torch.LongTensor` of shape `(batch_size, num_choices, sequence_length)`, *optional*):
    Indices of positions of each input sequence tokens in the position embeddings. Selected in the range `[0,
    config.max_position_embeddings - 1]`.

    [What are position IDs?](../glossary#position-ids)
inputs_embeds (`torch.FloatTensor` of shape `(batch_size, num_choices, sequence_length, hidden_size)`, *optional*):
    Optionally, instead of passing `input_ids` you can choose to directly pass an embedded representation. This
    is useful if you want more control over how to convert *input_ids* indices into associated vectors than the
    model's internal embedding lookup matrix.
labels (`torch.LongTensor` of shape `(batch_size,)`, *optional*):
    Labels for computing the multiple choice classification loss. Indices should be in `[0, ...,
    num_choices-1]` where `num_choices` is the size of the second dimension of the input tensors. (See
    `input_ids` above)
Nr   r'   r   r  r   r  )
r   r   rA   r  r  r  r   r   r{   r  )rD   rJ   r   r)   r%   rK   r~  ry   num_choicesr  r  pooled_outputr  reshaped_logitsr  r  s                   rG   rV   !ConvBertForMultipleChoice.forwardP  s   V -6,Aiooa(}GZGZ[\G]>G>SINN2y~~b'9:Y]	M[Mg,,R1D1DR1HImqM[Mg,,R1D1DR1HImqGSG_|((\->->r-BCei ( r=#5#5b#9=;M;Mb;QR 	 7;mm7
))%'7
 7
 "!*--o>/ ++b+6')HO4D("!//))	
 	
rI   )r  r  r  r  )rX   rY   rZ   r[   r-   r   r   r=   r]   r^   r   r   r   r   rV   r_   r`   ra   s   @rG   r  r  D  s      .237260426*.N
##d*N
 ))D0N
 ((4/	N

 &&-N
 ((4/N
   4'N
 +,N
 
*	*N
  N
rI   r  c                     ^  \ rS rSrU 4S jr\\      SS\R                  S-  S\R                  S-  S\R                  S-  S\R                  S-  S\R                  S-  S	\R                  S-  S
\
\   S\\-  4S jj5       5       rSrU =r$ )ConvBertForTokenClassificationi  c                 f  > [         TU ]  U5        UR                  U l        [        U5      U l        UR
                  b  UR
                  OUR                  n[        R                  " U5      U l	        [        R                  " UR                  UR                  5      U l        U R                  5         g r}   )r,   r-   rE  rT  r  r  r:   r   r9   r;   r   r   r  rX  r  s      rG   r-   'ConvBertForTokenClassification.__init__  s      ++%f-)/)B)B)NF%%TZTnTn 	 zz"45))F$6$68I8IJ 	rI   NrJ   r   r)   r%   rK   r~  ry   rL   c                 B   U R                   " U4UUUUS.UD6nUS   n	U R                  U	5      n	U R                  U	5      n
SnUb<  [        5       nU" U
R	                  SU R
                  5      UR	                  S5      5      n[        UU
UR                  UR                  S9$ )z
labels (`torch.LongTensor` of shape `(batch_size, sequence_length)`, *optional*):
    Labels for computing the token classification loss. Indices should be in `[0, ..., config.num_labels - 1]`.
r  r   Nr'   r  )	r  r;   r  r   r   rE  r   r{   r  r  s                rG   rV   &ConvBertForTokenClassification.forward  s      7;mm7
))%'7
 7
 "!*,,71')HFKKDOO<fkk"oND$!//))	
 	
rI   )r  r  r;   rE  r  )rX   rY   rZ   r[   r-   r   r   r=   r]   r^   r   r   r   r   rV   r_   r`   ra   s   @rG   r  r    s      .237260426*.&
##d*&
 ))D0&
 ((4/	&

 &&-&
 ((4/&
   4'&
 +,&
 
&	&&
  &
rI   r  c                   8  ^  \ rS rSrU 4S jr\\       SS\R                  S-  S\R                  S-  S\R                  S-  S\R                  S-  S\R                  S-  S	\R                  S-  S
\R                  S-  S\
\   S\4S jj5       5       rSrU =r$ )ConvBertForQuestionAnsweringi  c                    > [         TU ]  U5        UR                  U l        [        U5      U l        [
        R                  " UR                  UR                  5      U l        U R                  5         g r}   )
r,   r-   rE  rT  r  r   r   r   
qa_outputsrX  rC   s     rG   r-   %ConvBertForQuestionAnswering.__init__  sS      ++%f-))F$6$68I8IJ 	rI   NrJ   r   r)   r%   rK   start_positionsend_positionsry   rL   c                    U R                   " U4UUUUS.UD6n	U	S   n
U R                  U
5      nUR                  SSS9u  pUR                  S5      R	                  5       nUR                  S5      R	                  5       nS nUb  Ub  [        UR                  5       5      S:  a  UR                  S5      n[        UR                  5       5      S:  a  UR                  S5      nUR                  S5      nUR                  SU5      nUR                  SU5      n[        US9nU" X5      nU" X5      nUU-   S-  n[        UUUU	R                  U	R                  S9$ )	Nr  r   r   r'   r   )ignore_indexrf   )r  start_logits
end_logitsr{   r  )r  r  splitrQ  r   lenrA   clampr   r   r{   r  )rD   rJ   r   r)   r%   rK   r  r  ry   r  r  r  r  r  
total_lossignored_indexr  
start_lossend_losss                      rG   rV   $ConvBertForQuestionAnswering.forward  su    7;mm7
))%'7
 7
 "!*1#)<<r<#: #++B/::<''+668

&=+D?'')*Q."1"9"9""==%%'(1, - 5 5b 9(--a0M-33A}EO)//=AM']CH!,@J
:H$x/14J+%!!//))
 	
rI   )r  rE  r  )NNNNNNN)rX   rY   rZ   r[   r-   r   r   r=   r]   r^   r   r   r   rV   r_   r`   ra   s   @rG   r  r    s      .23726042637152
##d*2
 ))D02
 ((4/	2

 &&-2
 ((4/2
 ))D02
 ''$.2
 +,2
 
&2
  2
rI   r  )rr  r  r  r  r  r   rT  r  )Dr\   r   collections.abcr   r=   r   torch.nnr   r   r    r	   r  activationsr
   r   modeling_layersr   modeling_outputsr   r   r   r   r   r   modeling_utilsr   processing_utilsr   pytorch_utilsr   utilsr   r   r   r   utils.genericr   utils.output_capturingr   configuration_convbertr   
get_loggerrX   loggerModuler   rc   r   r   r   r   r   r   r   r  r#  r0  r7  rT  rj  rr  r  r  r  r  r  __all__r  rI   rG   <module>r     s)     $   A A & 1 9  . & 6  8 5 2 
		H	%7 7tbii 4w.BII w.t  		  . ,299 (RYY &2. 2j /o / /.
bii 
:bii $`bii `F E+ E EP299 $ =
1 =
 =
@ 0 E
(? E
E
P [
 7 [
 [
| 7
%< 7
 7
t ?
#: ?
 ?
D	rI   