
    Z j                        S SK Jr  S SKrS SKrS SKJr  S SKJr  SSKJ	r
  SSKJr  SSKJr  SSKJr  SS	KJr  SS
KJr  SSKJr  SSKJrJrJr  SSKJrJrJr  SSKJ r   SSK!J"r"J#r#J$r$  SSK%J&r&  \$RN                  " \(5      r) " S S\RT                  5      r+ " S S\RT                  5      r, " S S\5      r- " S S\5      r. " S S\5      r/ " S S\RT                  5      r0 " S S\RT                  5      r1  SIS \RT                  S!\Rd                  S"\Rd                  S#\Rd                  S$\Rd                  S-  S%\3S-  S&\3S'\ \"   4S( jjr4 " S) S*\RT                  5      r5 " S+ S,\RT                  5      r6 " S- S.\5      r7 " S/ S0\RT                  5      r8 " S1 S2\RT                  5      r9 " S3 S4\5      r: " S5 S6\RT                  5      r;\# " S7 S8\5      5       r<  SJS9\=\>\>4   S:\3S;\>S$\R~                  S-  S<\>S=\R                  4S> jjrA\# " S? S@\<5      5       rBSrC\#" SASB9 " SC SD\<5      5       rD\#" SESB9 " SF SG\<5      5       rE/ SHQrFg)K    )CallableN)CrossEntropyLoss   )initialization)ACT2FN)is_deepspeed_zero3_enabled)is_fsdp_managed_module)create_bidirectional_mask)FlashAttentionKwargs)GradientCheckpointingLayer)BaseModelOutputCausalLMOutputSequenceClassifierOutput)ALL_ATTENTION_FUNCTIONSPreTrainedModel*get_torch_context_manager_or_global_device)Unpack)TransformersKwargsauto_docstringlogging   )HubertConfigc                   .   ^  \ rS rSrU 4S jrS rSrU =r$ )HubertPositionalConvEmbedding-   c                 2  > [         TU ]  5         [        R                  " UR                  UR                  UR
                  UR
                  S-  UR                  S9U l        S U l        UR                  (       a'  [        R                  " UR                  5      U l        GO[        R                  R                  n[        [        R                  R                  S5      (       a$  [        R                  R                  R                  n[        5       (       Ga%  SS KnUR"                  R%                  U R                  R&                  SS9   U" U R                  SSS9U l        S S S 5        [        U R                  S5      (       aU  U R                  R                  R&                  R(                  nU R                  R                  R&                  R*                  nO,U R                  R,                  nU R                  R.                  nUR"                  R1                  X5        UR"                  R1                  X5        OU" U R                  SSS9U l        [3        UR
                  5      U l        [6        UR8                     U l        g ! , (       d  f       GN,= f)	N   )kernel_sizepaddinggroupsweight_normr   modifier_rankweight)namedimparametrizations)super__init__nnConv1dhidden_sizenum_conv_pos_embeddingsnum_conv_pos_embedding_groupsconv
batch_normconv_pos_batch_normBatchNorm1dutilsr!   hasattrr'   r   	deepspeedzeroGatheredParametersr$   	original0	original1weight_gweight_vregister_external_parameterHubertSamePadLayerr   r   feat_extract_activation
activation)selfconfigr!   r5   r:   r;   	__class__s         {/root/GenerationalWealth/GenerationalWealth/venv/lib/python3.13/site-packages/transformers/models/hubert/modeling_hubert.pyr)   &HubertPositionalConvEmbedding.__init__.   s   II6622a777
	 %% nnV-?-?@DO((..Krxx00-@@ hh77CC)++ ^^66tyy7G7GWX6Y +DIIH! LDI Z499&899#yy99@@JJH#yy99@@JJH#yy11H#yy11H::4J::4J'		aH	)&*H*HI !?!?@ ZYs   
J
Jc                     UR                  SS5      nU R                  b  U R                  U5      nU R                  U5      nU R                  U5      nU R	                  U5      nUR                  SS5      nU$ )Nr   r   )	transposer0   r/   r   r?   r@   hidden_statess     rC   forward%HubertPositionalConvEmbedding.forwardS   sn    %//15??& OOM:M		-0]36%//15    )r?   r0   r/   r   __name__
__module____qualname____firstlineno__r)   rI   __static_attributes____classcell__rB   s   @rC   r   r   -   s    #AJ	 	rK   r   c                   .   ^  \ rS rSrU 4S jrS rSrU =r$ )r=   _   c                 R   > [         TU ]  5         US-  S:X  a  SU l        g SU l        g )Nr   r   r   )r(   r)   num_pad_remove)r@   r-   rB   s     rC   r)   HubertSamePadLayer.__init__`   s)    #:Q#>!#CarK   c                 X    U R                   S:  a  US S 2S S 2S U R                   * 24   nU$ Nr   rW   rG   s     rC   rI   HubertSamePadLayer.forwardd   s6    ")!Q0F43F3F2F0F*FGMrK   r[   rL   rS   s   @rC   r=   r=   _   s    K rK   r=   c                   2   ^  \ rS rSrSU 4S jjrS rSrU =r$ )HubertNoLayerNormConvLayerj   c                 b  > [         TU ]  5         US:  a  UR                  US-
     OSU l        UR                  U   U l        [
        R                  " U R                  U R                  UR                  U   UR                  U   UR                  S9U l
        [        UR                     U l        g )Nr   r   r   stridebias)r(   r)   conv_dimin_conv_dimout_conv_dimr*   r+   conv_kernelconv_stride	conv_biasr/   r   r>   r?   r@   rA   layer_idrB   s      rC   r)   #HubertNoLayerNormConvLayer.__init__k   s    <DqL6??8a<8a"OOH5II**84%%h/!!
	 !!?!?@rK   c                 J    U R                  U5      nU R                  U5      nU$ N)r/   r?   rG   s     rC   rI   "HubertNoLayerNormConvLayer.forwardy   s$    		-06rK   )r?   r/   re   rf   r   rL   rS   s   @rC   r^   r^   j   s    A rK   r^   c                   2   ^  \ rS rSrSU 4S jjrS rSrU =r$ )HubertLayerNormConvLayer   c                   > [         TU ]  5         US:  a  UR                  US-
     OSU l        UR                  U   U l        [
        R                  " U R                  U R                  UR                  U   UR                  U   UR                  S9U l
        [
        R                  " U R                  SS9U l        [        UR                     U l        g )Nr   r   ra   T)elementwise_affine)r(   r)   rd   re   rf   r*   r+   rg   rh   ri   r/   	LayerNorm
layer_normr   r>   r?   rj   s      rC   r)   !HubertLayerNormConvLayer.__init__   s    <DqL6??8a<8a"OOH5II**84%%h/!!
	 ,,t'8'8TR !?!?@rK   c                     U R                  U5      nUR                  SS5      nU R                  U5      nUR                  SS5      nU R                  U5      nU$ )N)r/   rF   rw   r?   rG   s     rC   rI    HubertLayerNormConvLayer.forward   sV    		-0%//B76%//B76rK   r?   r/   re   rw   rf   rp   rL   rS   s   @rC   rr   rr      s    A rK   rr   c                   2   ^  \ rS rSrSU 4S jjrS rSrU =r$ )HubertGroupNormConvLayer   c                   > [         TU ]  5         US:  a  UR                  US-
     OSU l        UR                  U   U l        [
        R                  " U R                  U R                  UR                  U   UR                  U   UR                  S9U l
        [        UR                     U l        [
        R                  " U R                  U R                  SS9U l        g )Nr   r   ra   T)
num_groupsnum_channelsaffine)r(   r)   rd   re   rf   r*   r+   rg   rh   ri   r/   r   r>   r?   	GroupNormrw   rj   s      rC   r)   !HubertGroupNormConvLayer.__init__   s    <DqL6??8a<8a"OOH5II**84%%h/!!
	 !!?!?@,,$2C2CRVRcRclpqrK   c                 l    U R                  U5      nU R                  U5      nU R                  U5      nU$ rn   )r/   rw   r?   rG   s     rC   rI    HubertGroupNormConvLayer.forward   s2    		-066rK   r}   rp   rL   rS   s   @rC   r   r      s    r  rK   r   c                   8   ^  \ rS rSrSrU 4S jrS rS rSrU =r	$ )HubertFeatureEncoder   z.Construct the features from raw audio waveformc           	        > [         TU ]  5         UR                  S:X  a@  [        USS9/[	        UR
                  S-
  5       Vs/ s H  n[        XS-   S9PM     sn-   nOVUR                  S:X  a-  [	        UR
                  5       Vs/ s H  n[        XS9PM     nnO[        SUR                   S35      e[        R                  " U5      U l        SU l        S	U l        g s  snf s  snf )
Ngroupr   )rk   r   layerz`config.feat_extract_norm` is z), but has to be one of ['group', 'layer']FT)r(   r)   feat_extract_normr   rangenum_feat_extract_layersr^   rr   
ValueErrorr*   
ModuleListconv_layersgradient_checkpointing_requires_grad)r@   rA   ir   rB   s       rC   r)   HubertFeatureEncoder.__init__   s    ##w.3FQGHLQRXRpRpstRtLuLLuq*6EBLuL K %%0QVW]WuWuQvwQvA3FGQvKwK01I1I0JJst  ==5&+#"L xs   C C%c                 N    U R                  5        H
  nSUl        M     SU l        g NF)
parametersrequires_gradr   r@   params     rC   _freeze_parameters'HubertFeatureEncoder._freeze_parameters   s#    __&E"'E '#rK   c                     US S 2S 4   nU R                   (       a  U R                  (       a  SUl        U R                   H  nU" U5      nM     U$ )NT)r   trainingr   r   )r@   input_valuesrH   
conv_layers       rC   rI   HubertFeatureEncoder.forward   sK    $QW- 4==*.M'**J&}5M + rK   )r   r   r   )
rM   rN   rO   rP   __doc__r)   r   rI   rQ   rR   rS   s   @rC   r   r      s    8#"$

 
rK   r   c                   .   ^  \ rS rSrU 4S jrS rSrU =r$ )HubertFeatureProjection   c                 x  > [         TU ]  5         UR                  U l        U R                  (       a1  [        R                  " UR
                  S   UR                  S9U l        [        R                  " UR
                  S   UR                  5      U l
        [        R                  " UR                  5      U l        g )Nr{   eps)r(   r)   feat_proj_layer_normr*   rv   rd   layer_norm_epsrw   Linearr,   
projectionDropoutfeat_proj_dropoutdropoutr@   rA   rB   s     rC   r)    HubertFeatureProjection.__init__   s}    $*$?$?!$$ ll6??2+>FDYDYZDO))FOOB$79K9KLzz&":":;rK   c                     U R                   (       a  U R                  U5      nU R                  U5      nU R                  U5      nU$ rn   )r   rw   r   r   rG   s     rC   rI   HubertFeatureProjection.forward   s;    $$ OOM:M6]3rK   )r   r   rw   r   rL   rS   s   @rC   r   r      s    < rK   r   modulequerykeyvalueattention_maskscalingr   kwargsc                    Uc  UR                  S5      S-  n[        R                  " XR                  SS5      5      U-  nUb  X-   n[        R
                  R                  USS9n[        R
                  R                  XU R                  S9n[        R                  " X5      n	U	R                  SS5      R                  5       n	X4$ )Nr{         r   r   r&   )pr   r   )
sizetorchmatmulrF   r*   
functionalsoftmaxr   r   
contiguous)
r   r   r   r   r   r   r   r   attn_weightsattn_outputs
             rC   eager_attention_forwardr      s     **R.D( <<}}Q':;gEL!#4==((2(>L==((6??([L,,|3K''1-88:K$$rK   c                   :  ^  \ rS rSrSr     SS\S\S\S\S\S	\S
\S-  4U 4S jjjr	   SS\
R                  S\
R                  S-  S\
R                  S-  S\S-  S\\   S\\
R                  \
R                  S-  \\
R                     S-  4   4S jjrSrU =r$ )HubertAttentioni  z=Multi-headed attention from 'Attention Is All You Need' paperN	embed_dim	num_headsr   
is_decoderrc   	is_causalrA   c                   > [         TU ]  5         Xl        X l        X0l        X-  U l        Xpl        U R
                  U-  U R                  :w  a  [        SU R                   SU S35      eU R
                  S-  U l        X@l	        X`l
        [        R                  " XUS9U l        [        R                  " XUS9U l        [        R                  " XUS9U l        [        R                  " XUS9U l        g )Nz;embed_dim must be divisible by num_heads (got `embed_dim`: z and `num_heads`: z).r   )rc   )r(   r)   r   r   r   head_dimrA   r   r   r   r   r*   r   k_projv_projq_projout_proj)	r@   r   r   r   r   rc   r   rA   rB   s	           rC   r)   HubertAttention.__init__	  s     	""!.MMI%$..8MdnnM]$YKr3  }}d*$"ii	4@ii	4@ii	4@		)TBrK   rH   key_value_statesr   output_attentionsr   returnc                    USLnUR                   SS n/ UQSPU R                  P7nU R                  U5      R                  U5      R	                  SS5      n	U(       a  UOUn
/ U
R                   SS QSPU R                  P7nU R                  U
5      R                  U5      R	                  SS5      nU R                  U
5      R                  U5      R	                  SS5      n[        R                  " U R                  R                  [        5      nU" U U	UUU4U R                  (       d  SOU R                  U R                  US.UD6u  nnUR                  " / UQSP76 R!                  5       nU R#                  U5      nUUS4$ )z#Input shape: Batch x Time x ChannelNr{   r   r           )r   r   r   )shaper   r   viewrF   r   r   r   get_interfacerA   _attn_implementationr   r   r   r   reshaper   r   )r@   rH   r   r   r   r   is_cross_attentioninput_shapehidden_shapequery_statescurrent_stateskv_shape
key_statesvalue_statesattention_interfacer   r   s                    rC   rI   HubertAttention.forward(  s    .T9 $))#2.88b8$--8 {{=166|DNNqRST-?)]B^))#2.BBDMMB[[055h?II!QO
{{>277AKKAqQ(?(M(MKK,,.E)
 %8
%
  $}}C$,,LL/
%
 
%
!\ "));;;;FFHmmK0L$..rK   )rA   r   r   r   r   r   r   r   r   r   r   r   )r   FTFN)NNF)rM   rN   rO   rP   r   intfloatboolr   r)   r   Tensorr   r   tuplerI   rQ   rR   rS   s   @rC   r   r     s
   G  &*CC C 	C
 C C C t#C CD 15.2).0/||0/  ,,-0/ t+	0/
  $;0/ -.0/ 
u||U\\D0%2E2LL	M0/ 0/rK   r   c                   .   ^  \ rS rSrU 4S jrS rSrU =r$ )HubertFeedForwardi[  c                   > [         TU ]  5         [        R                  " UR                  5      U l        [        R                  " UR                  UR                  5      U l	        [        UR                  [        5      (       a  [        UR                     U l        OUR                  U l        [        R                  " UR                  UR                  5      U l        [        R                  " UR                   5      U l        g rn   )r(   r)   r*   r   activation_dropoutintermediate_dropoutr   r,   intermediate_sizeintermediate_dense
isinstance
hidden_actstrr   intermediate_act_fnoutput_densehidden_dropoutoutput_dropoutr   s     rC   r)   HubertFeedForward.__init__\  s    $&JJv/H/H$I!"$))F,>,>@X@X"Yf''--'-f.?.?'@D$'-'8'8D$IIf&>&>@R@RS jj)>)>?rK   c                     U R                  U5      nU R                  U5      nU R                  U5      nU R                  U5      nU R	                  U5      nU$ rn   )r   r   r   r   r   rG   s     rC   rI   HubertFeedForward.forwardi  sX    //>00?11-@))-8++M:rK   )r   r   r   r   r   rL   rS   s   @rC   r   r   [  s    @ rK   r   c                   2   ^  \ rS rSrU 4S jrSS jrSrU =r$ )HubertEncoderLayeris  c                   > [         TU ]  5         [        UR                  UR                  UR
                  SUS9U l        [        R                  " UR                  5      U l
        [        R                  " UR                  UR                  S9U l        [        U5      U l        [        R                  " UR                  UR                  S9U l        g )NFr   r   r   r   rA   r   )r(   r)   r   r,   num_attention_headsattention_dropout	attentionr*   r   r   r   rv   r   rw   r   feed_forwardfinal_layer_normr   s     rC   r)   HubertEncoderLayer.__init__t  s    (((00,,
 zz&"7"78,,v'9'9v?T?TU-f5 "V-?-?VEZEZ [rK   c                     UnU R                  XUS9u  pnU R                  U5      nXA-   nU R                  U5      nXR                  U5      -   nU R	                  U5      nU4nU(       a  Xu4-  nU$ Nr   r   )r	  r   rw   r
  r  r@   rH   r   r   attn_residualr   _outputss           rC   rI   HubertEncoderLayer.forward  s    %)-L] *8 *
&Q ]3%56%(9(9-(HH--m< "&GrK   )r	  r   r
  r  rw   r   rL   rS   s   @rC   r  r  s  s    \ rK   r  c                      ^  \ rS rSrU 4S jr    SS\R                  S\R                  S-  S\S\S\4
S	 jjr	S
r
U =r$ )HubertEncoderi  c                   > [         TU ]  5         Xl        [        U5      U l        [
        R                  " UR                  UR                  S9U l	        [
        R                  " UR                  5      U l        [
        R                  " [        UR                  5       Vs/ s H  n[!        U5      PM     sn5      U l        SU l        g s  snf Nr   F)r(   r)   rA   r   pos_conv_embedr*   rv   r,   r   rw   r   r   r   r   r   num_hidden_layersr  layersr   r@   rA   r  rB   s      rC   r)   HubertEncoder.__init__  s    ;FC,,v'9'9v?T?TUzz&"7"78mmvOgOgIh$iIhA%7%?Ih$ij&+# %j    C	NrH   r   r   output_hidden_statesreturn_dictc                 ,   U(       a  SOS nU(       a  SOS nUb4  UR                  S5      R                  SSUR                  S   5      nSX) '   [        U R                  UUS9nU R                  U5      n	XR                  UR                  5      -   nU R                  U5      nU R                  U5      n[        5       =(       d    [        U 5      n
U R                   H  nU(       a  Xa4-   n[        R                  " / 5      nU R                  =(       a    XR                  R                   :  nU(       a  U
(       a  U" XUS9nUS   nU(       a  SnU(       d  M|  UWS   4-   nM     U(       a  Xa4-   nU(       d  [#        S	 XU4 5       5      $ [%        UUUS
9$ )N r{   r   r   r   rA   inputs_embedsr   r  NNc              3   .   #    U  H  oc  M  Uv   M     g 7frn   r"  .0vs     rC   	<genexpr>(HubertEncoder.forward.<locals>.<genexpr>       m$[q$[   	last_hidden_staterH   
attentions)	unsqueezerepeatr   r
   rA   r  todevicerw   r   r   r	   r  r   randr   	layerdropr   r   r@   rH   r   r   r  r   all_hidden_statesall_self_attentionsexpand_attention_maskposition_embeddingssynced_gpusr   dropout_probabilityskip_the_layerlayer_outputss                  rC   rI   HubertEncoder.forward  s    #7BD$5b4%$2$<$<R$@$G$G1mNaNabcNd$e!45M012;;')
 #11-@%(>(>}?S?S(TT6]302R6LT6R[[E#$58H$H! #(**R.!]]Z/B[[EZEZ/ZN![ %!Te! !.a 0 ,  &9]1=M<O&O#' !*   14D Dm]GZ$[mmm++*
 	
rK   rA   r   r   rw   r  r  NFFT)rM   rN   rO   rP   r)   r   tensorr   r   rI   rQ   rR   rS   s   @rC   r  r    s]    , /3"'%* ;
||;
 t+;
  	;

 #;
 ;
 ;
rK   r  c                   J   ^  \ rS rSrU 4S jrS\R                  4S jrSrU =r	$ )HubertAttnAdapterLayeri  c                   > [         TU ]  5         UR                  U l        UR                  U l        [        R                  " U R
                  5      U l        [        R                  " U R
                  U R                  5      U l
        [        R                  " 5       U l        [        R                  " U R                  U R
                  5      U l        g)z
Implements adapter modules directly with 3D tensor weight as parameters and without using ModuleList to speed
up training throughput.
N)r(   r)   adapter_attn_dim	input_dimr,   
hidden_dimr*   rv   normr   linear_1ReLUact_fnlinear_2r   s     rC   r)   HubertAttnAdapterLayer.__init__  s    
 	00 ,,LL1			$//4>>Bggi		$..$//BrK   rH   c                     U R                  U5      nU R                  U5      nU R                  U5      nU R                  U5      nU$ rn   )rJ  rK  rM  rN  rG   s     rC   rI   HubertAttnAdapterLayer.forward  s@    		-0m4M2m4rK   )rM  rI  rH  rK  rN  rJ  )
rM   rN   rO   rP   r)   r   FloatTensorrI   rQ   rR   rS   s   @rC   rE  rE    s     CU%6%6  rK   rE  c                   t   ^  \ rS rSrU 4S jr  S	S\R                  S\R                  S-  S\4S jjrSr	U =r
$ )
!HubertEncoderLayerStableLayerNormi  c                   > [         TU ]  5         [        UR                  UR                  UR
                  SUS9U l        [        R                  " UR                  5      U l
        [        R                  " UR                  UR                  S9U l        [        U5      U l        [        R                  " UR                  UR                  S9U l        [#        USS 5      b  [%        U5      U l        g S U l        g )NFr  r   rG  )r(   r)   r   r,   r  r  r	  r*   r   r   r   rv   r   rw   r   r
  r  getattrrE  adapter_layerr   s     rC   r)   *HubertEncoderLayerStableLayerNorm.__init__  s    (((00,,
 zz&"7"78,,v'9'9v?T?TU-f5 "V-?-?VEZEZ [6-t4@!7!?D!%DrK   NrH   r   r   c                    UnU R                  U5      nU R                  XUS9u  pnU R                  U5      nXA-   nXR                  U R	                  U5      5      -   nU R
                  b  XR                  U5      -   nU4nU(       a  Xu4-  nU$ r  )rw   r	  r   r
  r  rW  r  s           rC   rI   )HubertEncoderLayerStableLayerNorm.forward  s     &6)-L] *8 *
&Q ]3%5%(9(9$:O:OP]:^(__)),>,>},MMM "&GrK   )rW  r	  r   r
  r  rw   r   )rM   rN   rO   rP   r)   r   r   r   rI   rQ   rR   rS   s   @rC   rT  rT    sC    &, /3"'	|| t+  	 rK   rT  c                   :   ^  \ rS rSrU 4S jr    SS jrSrU =r$ )HubertEncoderStableLayerNormi&  c                   > [         TU ]  5         Xl        [        U5      U l        [
        R                  " UR                  UR                  S9U l	        [
        R                  " UR                  5      U l        [
        R                  " [        UR                  5       Vs/ s H  n[!        U5      PM     sn5      U l        SU l        g s  snf r  )r(   r)   rA   r   r  r*   rv   r,   r   rw   r   r   r   r   r   r  rT  r  r   r  s      rC   r)   %HubertEncoderStableLayerNorm.__init__'  s    ;FC,,v'9'9v?T?TUzz&"7"78mm@EfF^F^@_`@_1.v6@_`
 ',# ar  c                    U(       a  SOS nU(       a  SOS nUb4  UR                  S5      R                  SSUR                  S   5      nSX) '   [        U R                  UUS9nU R                  U5      n	X-   nU R                  U5      n[        5       =(       d    [        U 5      n
U R                   H  nU(       a  Xa4-   n[        R                  " / 5      nU R                  =(       a    XR                  R                  :  nU(       a  U
(       a  U" XUS9nUS   nU(       a  SnU(       d  M|  UWS   4-   nM     U R                  U5      nU(       a  Xa4-   nU(       d  [        S	 XU4 5       5      $ [!        UUUS
9$ )Nr"  r{   r   r   r   r#  r  r%  c              3   .   #    U  H  oc  M  Uv   M     g 7frn   r"  r'  s     rC   r*  7HubertEncoderStableLayerNorm.forward.<locals>.<genexpr>j  r,  r-  r.  )r1  r2  r   r
   rA   r  r   r   r	   r  r   r5  r   r6  rw   r   r   r7  s                  rC   rI   $HubertEncoderStableLayerNorm.forward2  s    #7BD$5b4%$2$<$<R$@$G$G1mNaNabcNd$e!45M012;;')
 #11-@%;]302R6LT6R[[E#$58H$H! #(**R.!]]Z/B[[EZEZ/ZN![ !&!Te! !.a 0 ,  &9]1=M<O&O#) !, 6 14D Dm]GZ$[mmm++*
 	
rK   rA  rB  rL   rS   s   @rC   r\  r\  &  s     	, "=
 =
rK   r\  c                       \ rS rSr% \\S'   SrSrSrSS/r	Sr
SrSrSr\R                  " 5       S	 5       rS
\R"                  \-  4S jrS\S\R"                  4S jrSrg)HubertPreTrainedModelir  rA   hubertr   audior  ParametrizedConv1dTc                    [        U[        R                  5      (       ac  [        R                  " UR
                  SU R                  R                  S9  UR                  b!  [        R                  " UR                  5        gg[        U[        R                  [        R                  [        R                  45      (       a  [        R                  " UR                  5        [        R                  " UR
                  5        [        USS5      ba  [        R                  " UR                  5        [        R                  " UR                   5        [        R                  " UR"                  5        gg[        U[        R$                  5      (       Ga,  ['        5       (       a  SSKn[+        US5      (       ak  [+        US5      (       aZ  UR,                  R/                  UR0                  UR2                  /SS9   [        R4                  " UR
                  5        SSS5        OnUR,                  R/                  UR
                  SS9   [        R4                  " UR
                  5        SSS5        O [        R4                  " UR
                  5        UR                  b!  [        R                  " UR                  5        gg[        U[6        5      (       a3  [+        US	5      (       a!  [        R8                  " UR:                  5        gg[        U[<        5      (       aN  [+        US
5      (       a<  [        R>                  " UR@                  SU R                  RB                  S-   -  5        ggg! , (       d  f       N= f! , (       d  f       N= f)zInitialize the weightsr   )meanstdNrunning_meanr   r;   r:   r"   masked_spec_embedlayer_weightsg      ?r   )"r   r*   r   initnormal_r$   rA   initializer_rangerc   zeros_rv   r   r2   ones_rV  rk  running_varnum_batches_trackedr+   r   r5   r4   r6   r7   r;   r:   kaiming_normal_HubertModeluniform_rl  HubertForSequenceClassification	constant_rm  r  )r@   r   r5   s      rC   _init_weights#HubertPreTrainedModel._init_weights~  sV    fbii((LLSdkk6S6ST{{&FKK( 'r||R^^ LMMKK$JJv}}%v~t4@F//0

6--.F667 A 		**)++ 6:..76:3N3N"::FOOV__;]mn:o,,V]]; po #::6==XY:Z,,V]]; [Z $$V]]3{{&FKK( ',,v233f667 4 ?@@v//v33SDKK<Y<Y\]<]5^_ 0 A po [Zs   6!M)!M:)
M7:
Ninput_lengthsc                     S n[        U R                  R                  U R                  R                  5       H  u  p4U" XU5      nM     U$ )z8
Computes the output length of the convolutional layers
c                 8    [         R                  " X-
  USS9S-   $ )Nfloor)rounding_moder   )r   div)input_lengthr   rb   s      rC   _conv_out_lengthPHubertPreTrainedModel._get_feat_extract_output_lengths.<locals>._conv_out_length  s      99\7wWZ[[[rK   )ziprA   rg   rh   )r@   r|  r  r   rb   s        rC    _get_feat_extract_output_lengths6HubertPreTrainedModel._get_feat_extract_output_lengths  sG    
	\
 $'t{{'>'>@W@W#XK,]PM $Y rK   feature_vector_lengthr   c                    U R                  UR                  S5      5      R                  [        R                  5      nUR
                  S   n[        R                  " XA4UR                  UR                  S9nSU[        R                  " UR
                  S   UR                  S9US-
  4'   UR                  S/5      R                  S5      R                  S/5      R                  5       nU$ )Nr{   r   )dtyper4  r   )r4  )r  sumr3  r   longr   zerosr  r4  arangeflipcumsumr   )r@   r  r   output_lengths
batch_sizes        rC   "_get_feature_vector_attention_mask8HubertPreTrainedModel._get_feature_vector_attention_mask  s    >>~?Q?QRT?UVYYZ_ZdZde#))!,
/~7K7KTbTiTi
 uv^%9%9!%<^EZEZ[]kno]opq',,bT299"=BBB4HMMOrK   r"  N)rM   rN   rO   rP   r   __annotations__base_model_prefixmain_input_nameinput_modalities_no_split_modulessupports_gradient_checkpointing_supports_flash_attn_supports_sdpa_supports_flex_attnr   no_gradrz  
LongTensorr   r  r  rQ   r"  rK   rC   rd  rd  r  s     $O-/CD&*#N
]]_!` !`Fe>N>NQT>T 
 
]b]m]m 
rK   rd  r   	mask_probmask_length	min_masksr   c           	        ^^^^^ U u  nmTS:  a  [        S5      eTT:  a  [        ST ST S35      e[        R                  R                  S5      R	                  5       mUUUUU4S jnUb-  UR                  5       R                  S5      R                  5       O[        U5       Vs/ s H  nTPM     snn[        R                  " UT4[        S	9n	/ n
U" T5      nUS
:X  a  U	$ U H  nU" U5      n[        R                  R                  [        R                  " UTS-
  -
  5      USS9n[        U5      S
:X  a  TS-
  nOUS
   n[        R                  " U[        R                  " X-
  [        R                   S	9U-  /5      nU
R#                  U5        M     [        R$                  " U
5      n
[        R&                  " U
SS2SS2S4   X[T45      n
U
R)                  X[T-  5      n
[        R                  " T5      SSSS24   n[        R&                  " UX[T45      R)                  X[T-  5      nU
U-   n
U
R+                  5       TS-
  :  a  TS-
  XTS-
  :  '   [        R,                  " XSS5        U	$ s  snf )a2  
Computes random mask spans for a given shape. Used to implement [SpecAugment: A Simple Data Augmentation Method for
ASR](https://huggingface.co/papers/1904.08779). Note that this method is not optimized to run on TPU and should be run on
CPU as part of the preprocessing during training.

Args:
    shape: The shape for which to compute masks. This should be of a tuple of size 2 where
           the first element is the batch size and the second element is the length of the axis to span.
    mask_prob:  The percentage of the whole axis (between 0 and 1) which will be masked. The number of
                independently generated mask spans of length `mask_length` is computed by
                `mask_prob*shape[1]/mask_length`. Note that due to overlaps, `mask_prob` is an upper bound and the
                actual percentage will be smaller.
    mask_length: size of the mask
    min_masks: minimum number of masked spans
    attention_mask: A (right-padded) attention mask which independently shortens the feature axis of
                    each batch dimension.
r   z&`mask_length` has to be bigger than 0.zO`mask_length` has to be smaller than `sequence_length`, but got `mask_length`: z and `sequence_length`: `c                    > [        TU -  T-  T-   5      n[        UT5      nUT-  T:  a  TT-  nU TS-
  -
  U:  a  [        U TS-
  -
  S5      nU$ )z;Given input length, compute how many spans should be maskedr   r   )r   max)r  num_masked_spanepsilonr  r  r  sequence_lengths     rC   compute_num_masked_span6_compute_mask_indices.<locals>.compute_num_masked_span  so    i,6DwNOoy9 [(?:-<O ;?+o=!,+/"BAFOrK   Nr{   r  r   F)replace)r   nprandomr5  itemdetachr  tolistr   r  r   choicer  lenconcatenateonesint32appendarraybroadcast_tor   r  put_along_axis)r   r  r  r   r  r  r  r  r|  spec_aug_maskspec_aug_mask_idxsmax_num_masked_spanr  r  spec_aug_mask_idxdummy_mask_idxoffsetsr  r  s    `` `            @@rC   _compute_mask_indicesr    s   0 #(JQABB_$]^i]j&&7q:
 	
 iinnQ$$&G $ % 	##B'..0',Z'89'8!o'89  HHj/:$GM1/Ba%1,? II,,IIlkAo67RW - 
  !Q& -q0N.q1NNN(;(MUWU]U] ^ao op
 	!!"34/ &2 "45 1a:&+(V ,33JVa@ab ii$T4]3Goog
'UV^^+5G ,g5 /A"55GVYZGZ!0CCD mB?w :s   (I0c                   :  ^  \ rS rSrS\4U 4S jjr  SS\R                  S\R                  S-  S\R                  S-  4S jjr	\
     SS	\R                  S-  S\R                  S-  S\R                  S-  S
\S-  S\S-  S\S-  S\\-  4S jj5       rSrU =r$ )rv  i5  rA   c                   > [         TU ]  U5        Xl        [        U5      U l        [        U5      U l        UR                  S:  d  UR                  S:  aG  [        R                  " [        R                  " UR                  5      R                  5       5      U l        UR                   (       a  [#        U5      U l        O['        U5      U l        U R)                  5         g Nr   )r(   r)   rA   r   feature_extractorr   feature_projectionmask_time_probmask_feature_probr*   	Parameterr   r   r,   rw  rl  do_stable_layer_normr\  encoderr  	post_initr   s     rC   r)   HubertModel.__init__7  s     !5f!="9&"A   3&&*B*BS*H%'\\%,,v?Q?Q2R2[2[2]%^D"&&7?DL(0DL 	rK   NrH   mask_time_indicesr   c                    [        U R                  SS5      (       d  U$ UR                  5       u  pEnUb(  U R                  R	                  UR
                  5      X'   OU R                  R                  S:  a  U R                  (       a  [        XE4U R                  R                  U R                  R                  UU R                  R                  S9n[        R                  " X!R                  [        R                  S9nU R                  R	                  UR
                  5      X'   U R                  R                  S:  a  U R                  (       a  [        XF4U R                  R                  U R                  R                   U R                  R"                  S9n[        R                  " XqR                  [        R                  S9nUSS2S4   R%                  SUS5      nSX'   U$ )	z
Masks extracted features along time axis and/or along feature axis according to
[SpecAugment](https://huggingface.co/papers/1904.08779).
apply_spec_augmentTNr   )r  r  r   r  )r4  r  )r  r  r  r{   )rV  rA   r   rl  r3  r  r  r   r  mask_time_lengthmask_time_min_masksr   rC  r4  r   r  mask_feature_lengthmask_feature_min_masksexpand)r@   rH   r  r   r  r  r,   mask_feature_indicess           rC   _mask_hidden_statesHubertModel._mask_hidden_statesI  s    t{{$8$??   4A3E3E3G0
[(/3/E/E/H/HI\I\/]M,[[''!+ 5-++44 KK88-++99! !&->G[G[chcmcm n/3/E/E/H/HI\I\/]M,;;((1,#8)++77 KK;;++<<	$  $)<<0DMaMainisis#t #74#@#G#GO]_#` 23M/rK   r   r   r  r   r   c                    Ub  UOU R                   R                  nUb  UOU R                   R                  nUb  UOU R                   R                  nU R	                  U5      nUR                  SS5      nUb  U R                  UR                  S   U5      nU R                  U5      n	U R                  XS9n	U R                  U	UUUUS9n
U
S   n	U(       d	  U	4U
SS -   $ [        U	U
R                  U
R                  S9$ )a  
mask_time_indices (`torch.BoolTensor` of shape `(batch_size, sequence_length)`, *optional*):
    Indices to mask extracted features for contrastive loss. When in training mode, model learns to predict
    masked extracted features in *config.proj_codevector_dim* space.

Example:

```python
>>> from transformers import AutoProcessor, HubertModel
>>> from datasets import load_dataset

>>> processor = AutoProcessor.from_pretrained("facebook/hubert-large-ls960-ft")
>>> model = HubertModel.from_pretrained("facebook/hubert-large-ls960-ft")


>>> def map_to_array(example):
...     example["speech"] = example["audio"]["array"]
...     return example


>>> ds = load_dataset("hf-internal-testing/librispeech_asr_dummy", "clean", split="validation")
>>> ds = ds.map(map_to_array)

>>> input_values = processor(ds["speech"][0], return_tensors="pt").input_values  # Batch size 1
>>> hidden_states = model(input_values).last_hidden_state
```Nr   r   )r  r   r   r  r   r   r.  )rA   r   r  r   r  rF   r  r   r  r  r  r   rH   r0  )r@   r   r   r  r   r  r   r   extract_featuresrH   encoder_outputss              rC   rI   HubertModel.forwardw  s)   J 2C1N-TXT_T_TqTq$8$D $++JjJj 	 &1%<k$++BYBY11,?+55a;%!DDEUE[E[\]E^`noN//0@A000d,,)/!5# ' 
 (*!#oab&999+)77&11
 	
rK   )rA   r  r  r  rl  r%  NNNNN)rM   rN   rO   rP   r   r)   r   rR  r  r  r   r   r   r   r   rI   rQ   rR   rS   s   @rC   rv  rv  5  s    | * 7;26	,((, !,,t3, ((4/	,\  /36:)-,0#'E
llT)E
 t+E
 !,,t3	E

  $;E
 #TkE
 D[E
 
	 E
 E
rK   rv  zn
    Hubert Model with a `language modeling` head on top for Connectionist Temporal Classification (CTC).
    )custom_introc                      ^  \ rS rSrSS\S-  4U 4S jjjrS rS rS r\	     SS\
R                  S-  S	\
R                  S-  S
\S-  S\S-  S\S-  S\
R                  S-  S\\-  4S jj5       rSrU =r$ )HubertForCTCi  Ntarget_langc                   > [         TU ]  U5        [        U5      U l        [        R
                  " UR                  5      U l        X l        UR                  c  [        SU R                   S35      e[        US5      (       a  UR                  (       a  UR                  OUR                  n[        R                   " X1R                  5      U l        U R%                  5         g)a  
target_lang (`str`, *optional*):
    Language id of adapter weights. Adapter weights are stored in the format adapter.<lang>.safetensors or
    adapter.<lang>.bin. Only relevant when using an instance of [`HubertForCTC`] with adapters. Uses 'eng' by
    default.
NzYou are trying to instantiate z with a configuration that does not define the vocabulary size of the language model head. Please instantiate the model as follows: `HubertForCTC.from_pretrained(..., vocab_size=vocab_size)`. or define `vocab_size` of your model's configuration.add_adapter)r(   r)   rv  re  r*   r   final_dropoutr   r  
vocab_sizer   rB   r4   r  output_hidden_sizer,   r   lm_headr  )r@   rA   r  r  rB   s       rC   r)   HubertForCTC.__init__  s     	 !&)zz&"6"67&$00@ AH H  *1)G)GFL^L^F%%djdvdv 	 yy!35F5FG 	rK   c                 @   [        5       [        R                  " S5      :X  a  gU R                  nUb'  [	        U R
                  SS5      c  [        SU S35      eUc.  [	        U R
                  SS5      b  [        R                  S5        gUb  U R                  USS9  gg)	a  
This method overwrites [`~PreTrainedModel.tie_weights`] so that adapter weights can be correctly loaded when
passing `target_lang=...` to `from_pretrained(...)`.

This method is **not** supposed to be called by the user and is prone to be changed in the future.
metaNrG  zCannot pass `target_lang`: z- if `config.adapter_attn_dim` is not defined.z)By default `target_lang` is set to 'eng'.T)
force_load)
r   r   r4  r  rV  rA   r   loggerinfoload_adapter)r@   r   r  s      rC   tie_weightsHubertForCTC.tie_weights  s     675<<;OO &&"wt{{<NPT'U']:;-Gtuvv WT[[:Ld%S%_KKCD$kd; %rK   c                 L    U R                   R                  R                  5         gz
Calling this function will disable the gradient computation for the feature encoder so that its parameter will
not be updated during training.
Nre  r  r   r@   s    rC   freeze_feature_encoder#HubertForCTC.freeze_feature_encoder      
 	%%88:rK   c                 T    U R                   R                  5        H
  nSUl        M     gz
Calling this function will disable the gradient computation for the base model so that its parameters will not
be updated during training. Only the classification head will be updated.
FNre  r   r   r   s     rC   freeze_base_modelHubertForCTC.freeze_base_model  #    
 [[++-E"'E .rK   r   r   r   r  r   labelsr   c                    Ub  UOU R                   R                  nUbJ  UR                  5       U R                   R                  :  a"  [	        SU R                   R                   35      eU R                  UUUUUS9nUS   n	U R                  U	5      n	U R                  U	5      n
SnUGbX  Ub  UO"[        R                  " U[        R                  S9nU R                  UR                  S5      5      R                  [        R                  5      nUS:  nUR                  S5      nUR                  U5      n[        R                   R#                  U
S[        R$                  S9R'                  SS5      n[        R(                  R*                  R-                  S	S
9   [        R                   R/                  UUUUU R                   R0                  U R                   R2                  U R                   R4                  S9nSSS5        U(       d  U
4U[6        S -   nUb  U4U-   $ U$ [9        XUR:                  UR<                  S9$ ! , (       d  f       NL= f)a  
labels (`torch.LongTensor` of shape `(batch_size, target_length)`, *optional*):
    Labels for connectionist temporal classification. Note that `target_length` has to be smaller or equal to
    the sequence length of the output logits. Indices are selected in `[-100, 0, ..., config.vocab_size - 1]`.
    All labels set to `-100` are ignored (masked), the loss is only computed for labels in `[0, ...,
    config.vocab_size - 1]`.
Nz$Label values must be <= vocab_size: r  r   r  r{   )r&   r  r   F)enabled)blank	reductionzero_infinitylosslogitsrH   r0  )rA   r   r  r  r   re  r   r  r   	ones_liker  r  r  r3  masked_selectr*   r   log_softmaxfloat32rF   backendscudnnflagsctc_losspad_token_idctc_loss_reductionctc_zero_infinity_HIDDEN_STATES_START_POSITIONr   rH   r0  )r@   r   r   r   r  r   r   r   r  rH   r  r  r|  labels_masktarget_lengthsflattened_targets	log_probsoutputs                     rC   rI   HubertForCTC.forward  s   $ &1%<k$++BYBY&**,$++2H2H"HCDKKDZDZC[\]]++)/!5#  
  
]3m, #1"<%//R^fkfpfpBq  !AA.BTBTUWBXY\\]b]g]ghM !A+K(__R0N & 4 4[ A 11&b1V``abdefI%%++E+:}}--%!"++22"kk<<"&++"?"? .  ; Y)F)G!HHF)-)9TGf$EvEG4I4IV]VhVh
 	
 ;:s   A H??
I)r   re  r  r  rn   r  )rM   rN   rO   rP   r   r)   r  r  r  r   r   r   r   r   r   rI   rQ   rR   rS   s   @rC   r  r    s    C$J  :<0;(  /3)-,0#'&*E
llT)E
 t+E
  $;	E

 #TkE
 D[E
 t#E
 
	E
 E
rK   r  z
    Hubert Model with a sequence classification head on top (a linear layer over the pooled output) for tasks like
    SUPERB Keyword Spotting.
    c                      ^  \ rS rSrU 4S jrS rS r\     SS\R                  S-  S\R                  S-  S\
S-  S	\
S-  S
\
S-  S\R                  S-  S\\-  4S jj5       rSrU =r$ )rx  iV  c                 "  > [         TU ]  U5        [        US5      (       a  UR                  (       a  [	        S5      e[        U5      U l        UR                  S-   nUR                  (       a2  [        R                  " [        R                  " U5      U-  5      U l        [        R                  " UR                  UR                   5      U l        [        R                  " UR                   UR$                  5      U l        U R)                  5         g )Nr  z]Sequence classification does not support the use of Hubert adapters (config.add_adapter=True)r   )r(   r)   r4   r  r   rv  re  r  use_weighted_layer_sumr*   r  r   r  rm  r   r,   classifier_proj_size	projector
num_labels
classifierr  )r@   rA   
num_layersrB   s      rC   r)   (HubertForSequenceClassification.__init__]  s     6=))f.@.@o  "&)--1
((!#ejj.Dz.Q!RD6#5#5v7R7RS))F$?$?ARARS 	rK   c                 L    U R                   R                  R                  5         gr  r  r  s    rC   r  6HubertForSequenceClassification.freeze_feature_encodern  r  rK   c                 T    U R                   R                  5        H
  nSUl        M     gr  r  r   s     rC   r  1HubertForSequenceClassification.freeze_base_modelu  r  rK   Nr   r   r   r  r   r   r   c                 0   Ub  UOU R                   R                  nU R                   R                  (       a  SOUnU R                  UUUUUS9nU R                   R                  (       ai  U[           n	[
        R                  " U	SS9n	[        R                  R                  U R                  SS9n
XR                  SSS5      -  R                  SS9n	OUS   n	U R                  U	5      n	Uc  U	R                  SS9nOU R                  U	R                   S   U5      nUR#                  S5      R%                  SSU	R                   S   5      nS	X) '   U	R                  SS9UR                  SS9R                  SS5      -  nU R'                  U5      nSnUbF  [)        5       nU" UR                  SU R                   R*                  5      UR                  S5      5      nU(       d  U4U[        S -   nUb  U4U-   $ U$ [-        UUUR.                  UR0                  S
9$ )a  
input_values (`torch.FloatTensor` of shape `(batch_size, sequence_length)`):
    Float values of input raw speech waveform. Values can be obtained by loading a `.flac` or `.wav` audio file
    into an array of type `list[float]`, a `numpy.ndarray` or a `torch.Tensor`, *e.g.* via the torchcodec library
    (`pip install torchcodec`) or the soundfile library (`pip install soundfile`).
    To prepare the array into `input_values`, the [`AutoProcessor`] should be used for padding and conversion
    into a tensor of type `torch.FloatTensor`. See [`HubertProcessor.__call__`] for details.
labels (`torch.LongTensor` of shape `(batch_size,)`, *optional*):
    Labels for computing the sequence classification/regression loss. Indices should be in `[0, ...,
    config.num_labels - 1]`. If `config.num_labels == 1` a regression loss is computed (Mean-Square loss), If
    `config.num_labels > 1` a classification loss is computed (Cross-Entropy).
NTr  r   r   r{   r   r   r   r  )rA   r   r  re  r  r   stackr*   r   r   rm  r   r  r  ri  r  r   r1  r2  r!  r   r   r   rH   r0  )r@   r   r   r   r  r   r   r   r  rH   norm_weightspooled_outputpadding_maskexpand_padding_maskr  r  loss_fctr  s                     rC   rI   'HubertForSequenceClassification.forward}  s   0 &1%<k$++BYBY'+{{'I'ItOc++)/!5#  
 ;;--#$ABM!KK1=M==001C1C0LL*->->r1a-HHMMRSMTM#AJM}5!)..1.5MBB=CVCVWXCY[ijL"."8"8"<"C"CAq-J]J]^_J`"a25M./)--!-4|7G7GA7G7N7S7STVXY7ZZM/')HFKKDKK,B,BCV[[QS_UDY)F)G!HHF)-)9TGf$EvE'!//))	
 	
rK   )r!  re  rm  r  r  )rM   rN   rO   rP   r)   r  r  r   r   r   r   r   r   rI   rQ   rR   rS   s   @rC   rx  rx  V  s    ";(  /3)-,0#'&*C
llT)C
 t+C
  $;	C

 #TkC
 D[C
 t#C
 
)	)C
 C
rK   rx  )r  rx  rv  rd  r  rZ   )Gcollections.abcr   numpyr  r   torch.nnr*   r    r   rn  activationsr   integrations.deepspeedr   integrations.fsdpr	   masking_utilsr
   modeling_flash_attention_utilsr   modeling_layersr   modeling_outputsr   r   r   modeling_utilsr   r   r   processing_utilsr   r3   r   r   r   configuration_hubertr   
get_loggerrM   r  Moduler   r=   r^   rr   r   r   r   r   r   r   r   r   r  r  rE  rT  r\  rd  r   r   r  ndarrayr  rv  r  r  rx  __all__r"  rK   rC   <module>rB     s  * %    % & ! @ 7 6 B 9 Y Y r r & @ @ . 
		H	%/BII /d !; *9 69 0#299 #Lbii 0 !%II%<<% 
% <<	%
 LL4'% T\% % '(%8R/bii R/j		 0!3 !HE
BII E
PRYY 2+(B +\I
299 I
X HO H H^ /3tc?tt t $$t+	t
 t ZZtn G
' G
 G
T !"  
K
( K

K
\ e
&; e
e
P frK   