
    Z j6              
           S SK r S SKrS SKJr  S SKrSSKJrJr  SSK	J
r
  SSKJr  SSKJrJrJr  \R"                  " \5      rS\R(                  S	\S
\S\S\R(                  4
S jr " S S\
5      rS/rg)    N)Sequence   )mel_filter_bankwindow_function)SequenceFeatureExtractor)BatchFeature)PaddingStrategy
TensorTypeloggingarray	dimensionsizestepreturnc                    U R                   S:w  a  [        S5      eUS:w  a  XR                   S-
  :w  a  [        S5      eU R                  u  pEXR-
  U-  S-   nUS::  a"  [        R                  " USU4U R
                  S9$ XFU4nU R                  S   U R                  S   U-  U R                  S   4n[        R                  R                  R                  XUS9$ )	zNA basic NumPy equivalent of PyTorch's unfold for 2D arrays along the last dim.   zFThis unfold implementation currently supports 2D arrays (batch, time).   zFThis unfold implementation only supports unfolding the last dimension.r   dtype)shapestrides)
ndim
ValueErrorr   npzerosr   r   libstride_tricks
as_strided)	r   r   r   r   
batch_sizeoriginal_length
num_framesoutput_shapeoutput_stridess	            څ/root/GenerationalWealth/GenerationalWealth/venv/lib/python3.13/site-packages/transformers/models/gemma4/feature_extraction_gemma4.py_unfoldr&      s    zzQabbB9

Q6abb"'++J!(T1A5JQxxQ-U[[AAD1LmmA&a(84(?qAQRN66**5n*]]    c            "         ^  \ rS rSrSrSS/r                S#S\S\S\S	\S
\S\S\S\S\S\S\S\S\S\S\	\   S-  S\	\   S-  4 U 4S jjjr
S\R                  S\R                  S\\R                  \R                  4   4S jr      S$S\R                  \\   -  \\R                     -  \\\      -  S\\-  \-  S\S-  S\S\S-  S \\-  S-  S	\S-  S\4S! jjrS"rU =r$ )%Gemma4AudioFeatureExtractor1   a	  An audio feature extractor Universal Speech Models https://huggingface.co/papers/2303.01037.

Args:
    feature_size (`int`, *optional*, defaults to 128):
        The feature dimension of the extracted features.
    sampling_rate (`int`, *optional*, defaults to 16000):
        The sampling rate at which the audio files should be digitalized expressed in hertz (Hz).
    padding_value (`float`, *optional*, defaults to 0.0):
        Padding value used to pad the audio. Should correspond to silences.
    return_attention_mask (`bool`, *optional*, defaults to `True`):
        Whether to return the attention mask for the generated MEL spectrograms.
    frame_length_ms (`float`, *optional*, defaults to 20.0):
        The length of a frame in milliseconds.
    hop_length_ms (`float`, *optional*, defaults to 10.0):
        Length of the overlapping windows for the STFT used to obtain the Mel Frequency coefficients.
    min_frequency (`float`, *optional*, defaults to 0.0):
        The minimum frequency (in Hz) for the Mel filterbank.
    max_frequency (`float`, *optional*, defaults to 8000.0):
        The maximum frequency (in Hz) for the Mel filterbank.
    preemphasis (`float`, *optional*, defaults to 0.0):
        The preemphasis coefficient.
    preemphasis_htk_flavor (`bool`, *optional*, defaults to `True`):
        Whether to use HTK-style preemphasis.
    fft_overdrive (`bool`, *optional*, defaults to `False`):
        Whether to use FFT overdrive.
    dither (`float`, *optional*, defaults to 0.0):
        Adds dithering. In other words, adds a small Gaussian noise to each frame.
        E.g. use 0.0001 to add dithering with a normal distribution centered
        around 0.0 with standard deviation 0.0001 (assuming [-1,+1] range of raw_speech).
        The value 0.0 means no dithering.
        Dithering has similar effect as `spectrogram(mel_floor=...)`. It reduces
        the high log_mel_fbank values for signals with hard-zero sections,
        when VAD cutoff is present in the signal.
    input_scale_factor (`float`, *optional*, defaults to 1.0):
        Scaling factor applied to the input waveform.
    mel_floor (`float`, *optional*, defaults to 0.001):
        Minimum value for Mel spectrograms to avoid log(0).
    per_bin_mean (`Optional[Sequence[float]]`, *optional*):
        Mean values for per-bin normalization.
    per_bin_stddev (`Optional[Sequence[float]]`, *optional*):
        Standard deviation values for per-bin normalization.
input_featuresinput_features_maskNfeature_sizesampling_ratepadding_valuereturn_attention_maskframe_length_mshop_length_msmin_frequencymax_frequencypreemphasispreemphasis_htk_flavorfft_overdriveditherinput_scale_factor	mel_floorper_bin_meanper_bin_stddevc                 $  > [         TU ]  " S	UUUUS.UD6  Xpl        Xl        Xl        Xl        Xl        Xl        Xl        [        [        X%-  S-  5      5      U l        [        [        X&-  S-  5      5      U l        [        R                  " U[        R                  S9U l        S["        R$                  " ["        R&                  " U R                  5      5      -  nU R                  (       a  US-  nUU l        [+        U R                  5      R-                  [        R.                  5      U l        [2        R4                  " 5          [2        R6                  " S5        [9        U R(                  S-  S-   UUUU R:                  S SS9U l        S S S 5        Ub-  [        R                  " U5      R?                  SSU5      U l         OS U l         Ub-  [        R                  " U5      R?                  SSU5      U l!        g S U l!        g ! , (       d  f       N}= f)
N)r-   r.   r/   r0   g     @@r   r   ignorer   htk)num_frequency_binsnum_mel_filtersr3   r4   r.   norm	mel_scale )"super__init__r3   r4   r5   r6   r7   r8   r9   introundframe_length
hop_lengthr   r   float64r:   mathceillog2
fft_lengthr   astypefloat32windowwarningscatch_warningssimplefilterr   r.   mel_filtersreshaper;   r<   )selfr-   r.   r/   r0   r1   r2   r3   r4   r5   r6   r7   r8   r9   r:   r;   r<   kwargsrO   	__class__s                      r%   rF   $Gemma4AudioFeatureExtractor.__init___   s   ( 	 	
%''"7		

 	
 +*&&<#*"4m&E&N OPeM$AF$JKL)2::>$))DIId.?.?$@AA
!OJ$ &d&7&78??

K $$&!!(+.#'??a#7!#; ,++"00 D ' # " 6 > >q!\ RD $D%"$((>":"B"B1a"VD"&D) '&s   AH
Hwaveformattention_maskr   c                    UR                   S:X  a  [        R                  " USS9nU R                  S:  aO  XR                  [        R                  R
                  " UR                  6 R                  UR                  5      -  -   nU R                  S:w  a  XR                  -  nU R                  S-  n[        R                  " USUS44SS	9n[        R                  " X#S4SSS
9nU R                  S-   n[        USX@R                  S9nU R                  S:  a  U R                  (       aP  USSS24   SU R                  -
  -  nUSSS24   U R                  USSS24   -  -
  n[        R                   " Xg/SS9nO*USSS24   U R                  USSS24   -  -
  nO	USSS24   nXR"                  -  n[        R$                  R'                  XR(                  SS9n	[        R*                  " U	5      n
[        R,                  " XR.                  5      n[        R0                  " XR2                  -   5      nU R4                  b  XR4                  -
  nU R6                  b  XR6                  -  nUR9                  S5      nUR                  S   n[        R:                  " U5      U R                  -  U-   S-
  nX/   R                  [<        5      nUU4$ ) r   r   )axis              ?r   )r   r   constant)mode)rd   constant_valuesr   )r   r   r   .N)nr`   )r   r   expand_dimsr8   randomrandnr   rP   r   r9   rI   padr&   rJ   r5   r6   concatenaterR   fftrfftrO   absmatmulrV   logr:   r;   r<   squeezearangebool)rX   r\   r]   pad_leftframe_size_for_unfoldframes_to_processfirst_in_framerest_in_frameframesstftmagnitude_specmel_speclog_mel_specmel_spectrogramnum_mel_framesframe_end_indicesmasks                    r%   _extract_spectrogram0Gemma4AudioFeatureExtractor._extract_spectrogram   s   ==A~~hQ7H;;++		0P0W0WX`XfXf0g"ggH""c)"9"99H $$)66(Vh]$;*M1J`ab $ 1 1A 5 $HAV]l]lmc!**!237!;sTEUEU?U!V 1#qt) <t?O?ORcdgiljlildlRm?m m(GbQ*373d6F6FIZ[^`cac`c[cId6dd&sCRCx0F ++%vv{{6__2{>99^-=-=>vvh78('*;*;;L*'*=*==L&..q1(..q1
 IIn5GJ__bcc077=$$r'   
raw_speechpadding
max_length
truncationpad_to_multiple_ofreturn_tensorsc           	         [        U[        R                  5      =(       a    [        UR                  5      S:  n	[        U[
        5      =(       a#    [        US   [        R                  [
        45      n
U	=(       d    U
nU(       a2  U Vs/ s H$  n[        R                  " U/5      R                  PM&     nnO<U(       d5  [        U[        R                  5      (       d  [        R                  " U5      nU(       d  [        R                  " U/5      /nU R                  [        SU05      UUUUUS9n/ n/ n[        UR                  UR                  5       Hd  u  nnU R                  UR                  U5      u  nnUR                  UR                  [        R                   5      5        UR                  U5        Mf     [        X5       VVs/ s H  u  nnUUS   -  PM     nnn[        XS.US9$ s  snf s  snnf )an  Creates a batch of MEL spectrograms from the provided raw speech.

This implementation uses a different algorithm for windowing and preemphasis compared to the built-in
`transformers.audio_utils.spectrogram()` function that _will_ result in different outputs. Consider this
carefully when selecting an audio feature extractor, especially with pre-trained models.

Args:
    raw_speech:
        The audio for which MEL spectrograms are created.
    padding (`Union[bool, str, PaddingStrategy]`, *optional*, defaults to `"longest"`):
        The padding strategy to use for batches of audio with different lengths.
    max_length (`int`, *optional*, defaults to 480000):
        If provided, defines the maximum length of the audio to allow. Audio longer than this will be
        truncated if `truncation=True`.
    truncation (`bool`, *optional*, defaults to `True`):
        Whether or not to truncate audio above `max_length`.
    pad_to_multiple_of (`int`, *optional*, defaults to 128):
        When padding, pad to a multiple of this value. The default value is defined for optimal TPU support.
    return_tensors (`Union[str, TensorType]`, *optional*, defaults to `None`):
        The type of tensors to return (e.g., NumPy, or Torch).
    return_attention_mask (`bool`, *optional*, defaults to `True`):
        Whether to return the attention mask for the generated MEL spectrograms.
r   r   r+   )r   r   r   r   r0   ).N)r+   r,   )tensor_type)
isinstancer   ndarraylenr   r   asarrayTrk   r   zipr+   r]   r   appendrP   rQ   )rX   r   r   r   r   r   r   r0   rY   is_batched_numpyis_batched_sequence
is_batchedrsbatched_speechprepared_speechprepared_speech_maskspeechr   s                     r%   __call__$Gemma4AudioFeatureExtractor.__call__   s   F &j"**=[#jFVFVBWZ[B[(X>t:jYZm^`^h^hjr]sCt%<)<
7ABz"**bT*,,zJBJJz2::$F$FJ/J**j\23J*J78!!1"7 " 
 ! = =~?\?\]LFD44VXXtDLFD""6==#<= ''- ^
 ILOHrsHr6DO3Hrs.\&
 	
3 C. ts   
+G0G5)r8   rO   r7   rI   rJ   r9   r4   rV   r:   r3   r;   r<   r5   r6   rR   )   i>  ra   Tg      4@g      $@ra   g     @@ra   TFra   rb   gMbP?NN)longesti S Tr   NT)__name__
__module____qualname____firstlineno____doc__model_input_namesrG   floatrt   r   rF   r   r   tupler   liststrr	   r
   r   r   __static_attributes____classcell__)rZ   s   @r%   r)   r)   1   s   )V *+@A  #"&*!%#"% '+#$'/315#H'H' H' 	H'
  $H' H' H' H' H' H' !%H' H' H' "H' H'  uo,!H'" !$.#H' H'T8%RZZ 8% 8%X]^`^h^hjljtjt^tXu 8%z 1:!(),26-1D
JJe,tBJJ/??$tE{BSSD
 o-D
 $J	D

 D
  $JD
 j(4/D
  $d{D
 
D
 D
r'   r)   )rL   rS   collections.abcr   numpyr   audio_utilsr   r   !feature_extraction_sequence_utilsr   feature_extraction_utilsr   utilsr	   r
   r   
get_loggerr   loggerr   rG   r&   r)   __all__rD   r'   r%   <module>r      s      $  ; I 4 9 9 
		H	%^2:: ^# ^S ^ ^

 ^&v
": v
r )
)r'   