
    Z jG                     j   S r SSKrSSKrSSKJr  SSKJr  SSKJr  SSK	J
r
  SSKJr  SS	KJr  SS
KJr  SSKJr  SSKJr  SSKJrJrJrJrJrJrJrJrJrJrJ r   SSK!J"r"  Sr# " S S\5      r$ " S S\5      r% " S S\5      r& " S S\RN                  5      r( " S S\5      r) " S S\RN                  5      r* " S S\5      r+ " S S \5      r, " S! S"\5      r- " S# S$\5      r. " S% S&\RN                  5      r/\ " S' S(\5      5       r0\ " S) S*\05      5       r1 " S+ S,\5      r2 " S- S.\5      r3/ S/Qr4g)0zPyTorch SEW model.    N)nn   )initialization)ACT2FN)is_deepspeed_zero3_enabled)is_fsdp_managed_module)BaseModelOutput)PreTrainedModel)auto_docstring)is_flash_attention_requested   )Wav2Vec2AttentionWav2Vec2EncoderLayerWav2Vec2FeatureEncoderWav2Vec2FeedForwardWav2Vec2ForCTC!Wav2Vec2ForSequenceClassificationWav2Vec2GroupNormConvLayerWav2Vec2LayerNormConvLayerWav2Vec2NoLayerNormConvLayerWav2Vec2SamePadLayer_compute_mask_indices   )	SEWConfigc                       \ rS rSrSrg)SEWNoLayerNormConvLayer0    N__name__
__module____qualname____firstlineno____static_attributes__r       t/root/GenerationalWealth/GenerationalWealth/venv/lib/python3.13/site-packages/transformers/models/sew/modular_sew.pyr   r   0       r%   r   c                       \ rS rSrSrg)SEWLayerNormConvLayer4   r   Nr   r   r%   r&   r)   r)   4   r'   r%   r)   c                       \ rS rSrSrg)SEWGroupNormConvLayer8   r   Nr   r   r%   r&   r,   r,   8   r'   r%   r,   c                   .   ^  \ rS rSrU 4S jrS rSrU =r$ )SEWPositionalConvEmbedding<   c           	        > [         TU ]  5         [        R                  " UR                  UR                  UR
                  UR
                  S-  UR                  UR                  S9U l        [        R                  R                  n[        [        R                  R                  S5      (       a$  [        R                  R                  R                  n[        5       (       Ga%  SS KnUR                  R!                  U R                  R"                  SS9   U" U R                  SSS9U l        S S S 5        [        U R                  S5      (       aU  U R                  R                  R"                  R$                  nU R                  R                  R"                  R&                  nO,U R                  R(                  nU R                  R*                  nUR                  R-                  X5        UR                  R-                  X5        OU" U R                  SSS9U l        [/        UR
                  5      U l        [2        UR4                     U l        g ! , (       d  f       GN,= f)	Nr   )kernel_sizepaddinggroupsstrideweight_normr   modifier_rankweight)namedimparametrizations)super__init__r   Conv1dhidden_sizenum_conv_pos_embeddingsnum_conv_pos_embedding_groupssqueeze_factorconvutilsr6   hasattrr<   r   	deepspeedzeroGatheredParametersr9   	original0	original1weight_gweight_vregister_external_parameterSEWSamePadLayerr3   r   feat_extract_activation
activation)selfconfigr6   rG   rL   rM   	__class__s         r&   r>   #SEWPositionalConvEmbedding.__init__=   s   II6622a777((
	 hh**288,,m<<((33??K%''224993C3CST2U'		aH	 Vtyy"4559955<<FF9955<<FF99--99--NN66tFNN66tF#DIIH!DDI&v'E'EF !?!?@ VUs   I
I"c                 l    U R                  U5      nU R                  U5      nU R                  U5      nU$ N)rD   r3   rQ   )rR   hidden_statess     r&   forward"SEWPositionalConvEmbedding.forward_   s2    		-0]36r%   )rQ   rD   r3   r    r!   r"   r#   r>   rY   r$   __classcell__rT   s   @r&   r/   r/   <   s     AD r%   r/   c                       \ rS rSrSrg)rO   g   r   Nr   r   r%   r&   rO   rO   g   r'   r%   rO   c                   .   ^  \ rS rSrU 4S jrS rSrU =r$ )SEWUpsamplingk   c                    > [         TU ]  5         [        R                  " UR                  UR                  UR
                  -  5      U l        [        UR                     U l	        UR
                  U l        g rW   )
r=   r>   r   Linearr@   rC   
projectionr   rP   rQ   rR   rS   rT   s     r&   r>   SEWUpsampling.__init__l   sW    ))F$6$68J8JVMbMb8bc !?!?@$33r%   c                 &   U R                  U5      nU R                  U5      nU R                  S:  a^  UR                  5       u  p#nX0R                  -  nX@R                  -  nUR	                  X#U R                  U5      nUR	                  X%U5      nU$ )Nr   )re   rQ   rC   sizereshape)rR   rX   bszsrc_lensrc_embed_dimtgt_lentgt_embed_dims          r&   rY   SEWUpsampling.forwardr   s    66"*7*<*<*>'C- 3 33G)-@-@@M)11#@S@SUbcM)11#NMr%   )rQ   re   rC   r[   r]   s   @r&   ra   ra   k   s    4 r%   ra   c                       \ rS rSrSrg)SEWFeatureEncoder   r   Nr   r   r%   r&   rr   rr      r'   r%   rr   c                       \ rS rSrSrg)SEWAttention   r   Nr   r   r%   r&   ru   ru      r'   r%   ru   c                       \ rS rSrSrg)SEWFeedForward   r   Nr   r   r%   r&   rx   rx      r'   r%   rx   c                       \ rS rSrSrg)SEWEncoderLayer   r   Nr   r   r%   r&   r{   r{      r'   r%   r{   c                   :   ^  \ rS rSrU 4S jr    SS jrSrU =r$ )
SEWEncoder   c                   > [         TU ]  5         Xl        [        U5      U l        [
        R                  " UR                  UR                  5      U l        [
        R                  " UR                  UR                  S9U l        [
        R                  " UR                  5      U l        [
        R                   " [#        UR$                  5       Vs/ s H  n['        U5      PM     sn5      U l        [+        U5      U l        SU l        g s  snf )NepsF)r=   r>   rS   r/   pos_conv_embedr   	AvgPool1drC   pool	LayerNormr@   layer_norm_eps
layer_normDropouthidden_dropoutdropout
ModuleListrangenum_hidden_layersr{   layersra   upsamplegradient_checkpointing)rR   rS   _rT   s      r&   r>   SEWEncoder.__init__   s    8@LL!6!68M8MN	,,v'9'9v?T?TUzz&"7"78mmeFLdLdFe$fFe_V%<Fe$fg%f-&+# %gs   D	c           	         U(       a  SOS nU(       a  SOS nUGb  UR                  S5      R                  SSUR                  S   5      n[        U R                  5      (       a  SX) '   Ub  SU;   a  UOS nGO_SX) '   UR                  5       R                  S5      n	XR                  R                  -  n
UR                  S   U R                  R                  -  n[        R                  " SXR                  S9R                  SS5      R                  U
R                  S   S5      nXR                  SS5      :  R                  5       nSUS S 2S S S S 24   R                  UR                  S	9-
  nU[        R                  " UR                  5      R                   -  nUR                  UR                  S   SUR                  S   UR                  S   5      nUR                  S   nUR#                  SS5      nU R%                  U5      nU R'                  U5      n[!        UR)                  S5      UR)                  S5      5      nUS
S U24   US
S U24   -   nUR#                  SS5      nU R+                  U5      nU R-                  U5      n[/        5       =(       d    [1        U 5      nU R2                   H  nU(       a  Xa4-   n[        R4                  " / 5      nU R6                  =(       a    UU R                  R8                  :  nU(       a  U(       a  U" XUS9nUS   nU(       a  SnU(       d  M}  UWS   4-   nM     U(       a  Xa4-   nU R;                  U5      nUR                  S   U:  a3  [<        R>                  RA                  USSSXR                  S   -
  45      nU(       d  [C        S XU4 5       5      $ [E        UUUS9$ )Nr   r   r           r   deviceg      ?)dtype.)attention_maskoutput_attentionsNNc              3   .   #    U  H  oc  M  Uv   M     g 7frW   r   ).0vs     r&   	<genexpr>%SEWEncoder.forward.<locals>.<genexpr>   s     m$[q$[s   	last_hidden_staterX   
attentions)#	unsqueezerepeatshaper   rS   longsumrC   torcharanger   viewexpandtor   finfomin	transposer   r   ri   r   r   r   r   r   randtraining	layerdropr   r   
functionalpadtupler	   )rR   rX   r   r   output_hidden_statesreturn_dictall_hidden_statesall_self_attentionsexpand_attention_maskinput_lengthsoutput_lengthsmax_encoder_lengthattention_idsn_input_timestepsposition_embeddingspooled_hidden_states
min_lengthsynced_gpuslayerdropout_probabilityskip_the_layerlayer_outputss                         r&   rY   SEWEncoder.forward   s    #7BD$5b4%$2$<$<R$@$G$G1mNaNabcNd$e!+DKK888;454B4NSTXfSfmq 9<45!/!4!4!6 ; ;B ?!.++2L2L!L%2%8%8%;t{{?Y?Y%Y"LL$6?T?TUT!R[VN003R8 
 #02E2Eb!2L"L!R!R!T "%~atQ6F'G'J'JQ^QdQd'J'e!e!/%++m>Q>Q2R2V2V!V!/!6!6"((+Q0D0DR0H.J^J^_aJb" *//2%//15"11-@#yy7,11"57K7P7PQS7TU
,S+:+-=>ATUXZe[eZeUeAff%//156]302R6LT6R[[E#$58H$H! #(**R.!]]Z/BT[[EZEZ/ZN![ %!Te! !.a 0 ,  &9]1=M<O&O#' !*   14D Dm4q!$55MM--maAGX[n[nop[qGq=rsMm]GZ$[mmm++*
 	
r%   )rS   r   r   r   r   r   r   r   )NFFTr[   r]   s   @r&   r~   r~      s"    	, "W
 W
r%   r~   c                       \ rS rSr% \\S'   SrSrSrSr	Sr
SrSr\R                  " 5       S 5       rS	\R                   \-  4S
 jrS\S\R                   4S jrSrg)SEWPreTrainedModel   rS   sewinput_valuesaudioTFc           
      0   [        U[        5      (       a  [        R                  " UR                  R
                  SS[        R                  " SUR                  R                  S   UR                  R                  -  -  5      -  S9  [        R                  " UR                  R                  S5        GO[        U[        R                  5      (       a6  [        R                  " UR
                  SU R                  R                  S9  GO[        U[        R                   [        R"                  45      (       aB  [        R$                  " UR                  5        [        R&                  " UR
                  5        GO[        U[        R(                  5      (       a  [+        5       (       a  SSKn[/        US5      (       ak  [/        US5      (       aZ  UR0                  R3                  UR4                  UR6                  /SS	9   [        R8                  " UR
                  5        SSS5        OnUR0                  R3                  UR
                  SS	9   [        R8                  " UR
                  5        SSS5        O [        R8                  " UR
                  5        [        U[        R                  [        R(                  45      (       a/  UR                  b!  [        R$                  " UR                  5        ggg! , (       d  f       Nm= f! , (       d  f       N~= f)
zInitialize the weightsr   r   r   )meanstdr   NrM   rL   r7   )
isinstancer/   initnormal_rD   r9   mathsqrtr2   in_channels	constant_biasr   rd   rS   initializer_ranger   	GroupNormzeros_ones_r?   r   rG   rF   rH   rI   rM   rL   kaiming_normal_)rR   modulerG   s      r&   _init_weights SEWPreTrainedModel._init_weights  s    f899LL""		!v{{'>'>q'AFKKD[D['["\]]
 NN6;;++Q/		**LLSdkk6S6STr|| <==KK$JJv}}%		**)++ 6:..76:3N3N"::FOOV__;]mn:o,,V]]; po #::6==XY:Z,,V]]; [Z $$V]]3fryy"))455&++:QKK$ ;R5 po [Zs   ?!K6!L6
L
Lr   c                     S n[        U R                  R                  U R                  R                  5       H  u  p4U" XU5      nM     U$ )z8
Computes the output length of the convolutional layers
c                 8    [         R                  " X-
  USS9S-   $ )Nfloor)rounding_moder   )r   div)input_lengthr2   r5   s      r&   _conv_out_lengthMSEWPreTrainedModel._get_feat_extract_output_lengths.<locals>._conv_out_length&  s      99\7wWZ[[[r%   )ziprS   conv_kernelconv_stride)rR   r   r   r2   r5   s        r&    _get_feat_extract_output_lengths3SEWPreTrainedModel._get_feat_extract_output_lengths!  sG    
	\
 $'t{{'>'>@W@W#XK,]PM $Y r%   feature_vector_lengthr   c                    U R                  UR                  S5      5      R                  [        R                  5      nUR
                  S   n[        R                  " XA4UR                  UR                  S9nSU[        R                  " UR
                  S   UR                  S9US-
  4'   UR                  S/5      R                  S5      R                  S/5      R                  5       nU$ )Nr   r   )r   r   r   r   )r   r   r   r   r   r   zerosr   r   r   flipcumsumbool)rR   r   r   r   
batch_sizes        r&   "_get_feature_vector_attention_mask5SEWPreTrainedModel._get_feature_vector_attention_mask0  s    >>~?Q?QRT?UVYYZ_ZdZde#))!,
/~7K7KTbTiTi
 uv^%9%9!%<^EZEZ[]kno]opq',,bT299"=BBB4HMMOr%   r   N)r    r!   r"   r#   r   __annotations__base_model_prefixmain_input_nameinput_modalitiessupports_gradient_checkpointing_supports_flash_attn_supports_sdpa_supports_flex_attnr   no_gradr   
LongTensorintr   r   r$   r   r%   r&   r   r      sz    $O&*#N
]]_% %<e>N>NQT>T 
 
]b]m]m 
r%   r   c                   :  ^  \ rS rSrS\4U 4S jjr  SS\R                  S\R                  S-  S\R                  S-  4S jjr	\
     SS	\R                  S-  S\R                  S-  S\R                  S-  S
\S-  S\S-  S\S-  S\\-  4S jj5       rSrU =r$ )SEWModeli=  rS   c                   > [         TU ]  U5        Xl        [        U5      U l        [
        R                  " UR                  S   UR                  S9U l	        UR                  S   UR                  :g  U l        U R                  (       a3  [
        R                  " UR                  S   UR                  5      U l        [
        R                  " UR                  5      U l        UR"                  S:  d  UR$                  S:  aG  [
        R&                  " [(        R*                  " UR                  5      R-                  5       5      U l        [1        U5      U l        U R5                  5         g )Nr   r   r   )r=   r>   rS   rr   feature_extractorr   r   conv_dimr   r   r@   project_featuresrd   feature_projectionr   feat_proj_dropoutfeature_dropoutmask_time_probmask_feature_prob	Parameterr   Tensoruniform_masked_spec_embedr~   encoder	post_initrf   s     r&   r>   SEWModel.__init__?  s     !26!:,,vr':@U@UV & 3v7I7I I  &(ii0CVEWEW&XD#!zz&*B*BC  3&&*B*BS*H%'\\%,,v?Q?Q2R2[2[2]%^D"!&) 	r%   NrX   mask_time_indicesr   c                    [        U R                  SS5      (       d  U$ UR                  5       u  pEnUb(  U R                  R	                  UR
                  5      X'   OU R                  R                  S:  a  U R                  (       a  [        XE4U R                  R                  U R                  R                  UU R                  R                  S9n[        R                  " X!R                  [        R                  S9nU R                  R	                  UR
                  5      X'   U R                  R                  S:  a  U R                  (       a  [        XF4U R                  R                  U R                  R                   U R                  R"                  S9n[        R                  " XqR                  [        R                  S9nUSS2S4   R%                  SUS5      nSX'   U$ )	z
Masks extracted features along time axis and/or along feature axis according to
[SpecAugment](https://huggingface.co/papers/1904.08779).
apply_spec_augmentTNr   )	mask_probmask_lengthr   	min_masks)r   r   )r  r  r  r   )getattrrS   ri   r  r   r   r  r   r   mask_time_lengthmask_time_min_masksr   tensorr   r   r  mask_feature_lengthmask_feature_min_masksr   )rR   rX   r  r   r   sequence_lengthr@   mask_feature_indicess           r&   _mask_hidden_statesSEWModel._mask_hidden_statesS  s    t{{$8$??   4A3E3E3G0
[(/3/E/E/H/HI\I\/]M,[[''!+ 5-++44 KK88-++99! !&->G[G[chcmcm n/3/E/E/H/HI\I\/]M,;;((1,#8)++77 KK;;++<<	$  $)<<0DMaMainisis#t #74#@#G#GO]_#` 23M/r%   r   r   r   r   returnc                 b   Ub  UOU R                   R                  nUb  UOU R                   R                  nUb  UOU R                   R                  nU R	                  U5      nUR                  SS5      nU R                  U5      nU R                  (       a  U R                  U5      nU R                  U5      n	Ub  U R                  U	R                  S   U5      nU R                  XS9n	U R                  U	UUUUS9n
U
S   n	U(       d	  U	4U
SS -   $ [        U	U
R                  U
R                   S9$ )a  
mask_time_indices (`torch.BoolTensor` of shape `(batch_size, sequence_length)`, *optional*):
    Indices to mask extracted features for contrastive loss. When in training mode, model learns to predict
    masked extracted features in *config.proj_codevector_dim* space.
Nr   r   )r  )r   r   r   r   r   r   )rS   r   r   r   r  r   r   r
  r  r  r   r   r%  r  r	   rX   r   )rR   r   r   r  r   r   r   kwargsextract_featuresrX   encoder_outputss              r&   rY   SEWModel.forward  sR     2C1N-TXT_T_TqTq$8$D $++JjJj 	 &1%<k$++BYBY11,?+55a;??+;<  #667GH,,-=>%!DD]EXEXYZE[]klN000d,,)/!5# ' 
 (*!#oab&999+)77&11
 	
r%   )rS   r  r  r  r  r   r  r
  r   )NNNNN)r    r!   r"   r#   r   r>   r   FloatTensorr  r%  r   r  r   r   r	   rY   r$   r\   r]   s   @r&   r  r  =  s    y . 7;26	,((, !,,t3, ((4/	,\  /36:)-,0#'4
llT)4
 t+4
 !,,t3	4

  $;4
 #Tk4
 D[4
 
	 4
 4
r%   r  c                       \ rS rSrSrg)	SEWForCTCi  r   Nr   r   r%   r&   r/  r/    r'   r%   r/  c                       \ rS rSrSrg)SEWForSequenceClassificationi  r   Nr   r   r%   r&   r1  r1    r'   r%   r1  )r/  r1  r  r   )5__doc__r   r   r    r   r   activationsr   integrations.deepspeedr   integrations.fsdpr   modeling_outputsr	   modeling_utilsr
   rE   r   utils.genericr   wav2vec2.modeling_wav2vec2r   r   r   r   r   r   r   r   r   r   r   configuration_sewr   _HIDDEN_STATES_START_POSITIONr   r)   r,   Moduler/   rO   ra   rr   ru   rx   r{   r~   r   r  r/  r1  __all__r   r%   r&   <module>r?     sH       & ! @ 7 / - # 9    ) !" 	: 		6 		6 	( (V	* 	BII ,	. 		$ 		( 		* 	c
 c
L B B BJ x
! x
 x
v	 		#D 	 Zr%   