
    Z j1                         S SK rS SKrSSKJr  SSKJr  SSKJr  SSK	J
r
Jr  SSKJr  \R                  " \5      rS\S	\S
\S\S\S\R&                  4S jr\" SS9 " S S\5      5       rS/rg)    N   )hertz_to_mel)SequenceFeatureExtractor)BatchFeature)
TensorTypelogging)requiresnum_mel_binsnum_spectrogram_binssample_ratelower_edge_hertzupper_edge_hertzreturnc                 b   [         R                  nSnUS-  n[         R                  " SXUS9US n	[        U	SS9SS2[         R                  4   n
[         R                  " [        USS9[        USS9U S-   US9nUSS	 [         R                  SS24   USS
 [         R                  SS24   USS [         R                  SS24   pnX-
  X-
  -  nX-
  X-
  -  n[         R
                  " S[         R                  " UU5      5      n[         R                  " UUS/SS//5      R                  U5      $ )z.NumPy-port of the JAX mel weight matrix logic.   g       @        )dtypeNkaldi)	mel_scale   r   )	npfloat64linspacer   newaxismaximumminimumpadastype)r
   r   r   r   r   r   internal_dtypebands_to_zeronyquist_hertzlinear_frequenciesspectrogram_bins_meledgeslower_edge_mel
center_melupper_edge_mellower_slopesupper_slopesmel_weights_matrixs                     ځ/root/GenerationalWealth/GenerationalWealth/venv/lib/python3.13/site-packages/transformers/models/lasr/feature_extraction_lasr.pylinear_to_mel_weight_matrixr.      s<    ZZN M#%MS-Ucderest'(:gNqRTR\R\}]KK%9%9q	E 	cr
2::q=!aBJJM"ab	"**a-  !/N )9j>YZL"9n>YZLCL,)OP66$q'9Aq6&BCJJ5QQ    )torch)backendsc                   .  ^  \ rS rSrSrSS/r      SU 4S jjrSS jr          SS\R                  \
\   -  \
\R                     -  \
\
\      -  S	\S
\S-  S\\-  S-  S\S-  S\S-  S\S-  S\S-  S\S-  S\S-  S\S-  S\4S jjrSrU =r$ )LasrFeatureExtractorB   a  
Constructs a LASR feature extractor.

This feature extractor inherits from [`~feature_extraction_sequence_utils.SequenceFeatureExtractor`] which contains
most of the main methods. Users should refer to this superclass for more information regarding those methods.

This class extracts mel-filter bank features from raw speech using a custom numpy implementation of the `Short Time
Fourier Transform` which should match pytorch's `torch.stft` equivalent.

Args:
        feature_size (`int`, *optional*, defaults to 128):
            The feature dimension of the extracted features.
        sampling_rate (`int`, *optional*, defaults to 16000):
            The sampling rate at which the audio files should be digitalized expressed in hertz (Hz).
        hop_length (`int`, *optional*, defaults to 160):
            Length of the overlapping windows for the STFT used to obtain the Mel Frequency coefficients.
        n_fft (`int`, *optional*, defaults to 512):
            Size of the Fourier transform.
        win_length (`int`, *optional*, defaults to 400):
            The window length for the STFT computation.
        padding_value (`float`, *optional*, defaults to 0.0):
            Padding value used to pad the audio. Should correspond to silences.
input_featuresattention_maskc                    > [         TU ]  " SXUS.UD6  X0l        X@l        XPl        [
        R                  " [        UUS-  S-   USS[        R                  S95      U l
        g )N)feature_sizesampling_ratepadding_valuer   r   g     @_@g     L@)r
   r   r   r   r   r    )super__init__
hop_lengthn_fft
win_lengthr0   
from_numpyr.   r   r   mel_filters)	selfr8   r9   r>   r?   r@   r:   kwargs	__class__s	           r-   r=   LasrFeatureExtractor.__init__^   sf     	wl_lwpvw$
$ ++')%*aZ!^)!&!'jj	
r/   c                    [         R                  " U R                  SU[         R                  S9nUR	                  [         R                  5      nUR                  SU R                  U R                  5      n[         R                  R                  X4-  U R                  S9n[         R                  " U5      S-  nU R                  R	                  U5      n[         R                  " Xg-  SS9n[         R                  " U5      nU$ )NF)periodicdevicer   r   )nr   gh㈵>)min)r0   hann_windowr@   r   tounfoldr>   fftrfftr?   absrB   clamplog)	rC   waveformrI   windowframesstft
power_specrB   mel_specs	            r-   _torch_extract_fbank_features2LasrFeatureExtractor._torch_extract_fbank_featuresx   s    ""4??U6Y^YfYfg;;u}}-
 T__dooFyy~~fo~<YYt_)
 &&))&1;;z7TB99X&r/   N
raw_speech
truncationpad_to_multiple_ofreturn_tensorsreturn_attention_maskpadding
max_lengthr9   do_normalizerI   return_token_timestampsr   c                 $   UbP  XR                   :w  a@  [        SU R                  R                   SU R                    SU R                    SU S3	5      eO-[        R                  SU R                  R                   S35        [        U[        R                  5      (       a  [        R                  " U5      nO[        U[        [        45      (       ae  [        US	   [        [        R                  45      (       a'  U Vs/ s H  n[        R                  " U5      PM     nnO[        R                  " U5      n[        U[        R                  5      =(       a    [        UR                  5      S
:  nU(       aW  [        UR                  5      S:  a>  [        R                  SU R                  R                   S35        UR!                  S5      n[        U[        [        45      nU(       ab  U H\  n[        UR                  5      S
:  d  M  [        R                  SU R                  R                   S35        UR!                  S5      nM^     U(       d  U(       a6  U Vs/ s H(  oSS2S4   R#                  [        R$                  5      PM*     nnO'USS2S4   R#                  [        R$                  5      /n['        SU05      nU R)                  UUUUUUSS9nUR*                  R-                  S5      nU R/                  UU
5      nSUR#                  [        R$                  5      0nU(       aO  UR0                  SS2U R2                  S
-
  SU R4                  24   nUR#                  [        R6                  5      US'   ['        UUS9$ s  snf s  snf )a  
Main method to featurize and prepare for the model one or several sequence(s). Implementation uses PyTorch for
the STFT computation if available, otherwise a slower NumPy based one.

Args:
    raw_speech (`np.ndarray`, `list[float]`, `list[np.ndarray]`, `list[list[float]]`):
        The sequence or batch of sequences to be padded. Each sequence can be a numpy array, a list of float
        values, a list of numpy arrays or a list of list of float values. Must be mono channel audio, not
        stereo, i.e. single float per timestep.
    truncation (`bool`, *optional*, default to `True`):
        Activates truncation to cut input sequences longer than *max_length* to *max_length*.
    pad_to_multiple_of (`int`, *optional*, defaults to None):
        If set will pad the sequence to a multiple of the provided value.

        This is especially useful to enable the use of Tensor Cores on NVIDIA hardware with compute capability
        `>= 7.5` (Volta), or on TPUs which benefit from having sequence lengths be a multiple of 128.
    return_attention_mask (`bool`, *optional*):
        Whether to return the attention mask. If left to the default, will return the attention mask according
        to the specific feature_extractor's default.

        [What are attention masks?](../glossary#attention-mask)

        <Tip>

        For Parakeet models, `attention_mask` should always be passed for batched inference, to avoid subtle
        bugs.

        </Tip>

    return_tensors (`str` or [`~utils.TensorType`], *optional*):
        If set, will return tensors instead of list of python integers. Acceptable values are:

        - `'tf'`: Return TensorFlow `tf.constant` objects.
        - `'pt'`: Return PyTorch `torch.Tensor` objects.
        - `'np'`: Return Numpy `np.ndarray` objects.
    sampling_rate (`int`, *optional*):
        The sampling rate at which the `raw_speech` input was sampled. It is strongly recommended to pass
        `sampling_rate` at the forward call to prevent silent errors and allow automatic speech recognition
        pipeline.
    padding_value (`float`, *optional*, defaults to 0.0):
        The value that is used to fill the padding values / vectors.
    do_normalize (`bool`, *optional*, defaults to `False`):
        Whether or not to zero-mean unit-variance normalize the input. Normalizing can help to significantly
        improve the performance of the model.
    device (`str`, *optional*, defaults to `'cpu'`):
        Specifies the device for computation of the log-mel spectrogram of audio signals in the
        `_torch_extract_fbank_features` method. (e.g., "cpu", "cuda")
    return_token_timestamps (`bool`, *optional*, defaults to `None`):
        Deprecated. Use `return_attention_mask` instead from which the number of frames can be inferred.

        Whether or not to return the number of frames of the input raw_speech.
        These num_frames can be used by the model to compute word level timestamps.
Nz3The model corresponding to this feature extractor: z& was trained using a sampling rate of zI. Please make sure that the provided `raw_speech` input was sampled with z	 and not .zDIt is strongly recommended to pass the `sampling_rate` argument to `zN()`. Failing to do so can result in silent errors that might be hard to debug.r   r   r   z2Only mono-channel audio is supported for input to z;. We will take the mean of the channels to convert to mono.r   r5   pt)ra   rb   r]   r^   r`   r_   r6   )datatensor_type)r9   
ValueErrorrE   __name__loggerwarning
isinstancer   ndarrayr0   tensorlisttupleTensorlenshapemeanrM   float32r   r   r5   squeezerZ   r6   r@   r>   bool)rC   r\   r]   r^   r_   r`   ra   rb   r9   rc   rI   rd   rD   speechis_batched_torchis_batched_sequencebatched_speechpadded_inputsr5   rh   r6   s                        r-   __call__LasrFeatureExtractor.__call__   s,   H $ 2 22 I$..JaJaIb c))-););(< =))-););(<Im_TUW  3 NNVW[WeWeWnWnVo p\ \ j"**--j1J
T5M22*Q-$

);<<AKLvell62
L
"\\*5
%j%,,?]C
HXHXDY\]D]J$4$4 5 9NNDT^^E\E\D] ^L L $,J(dE]C$v||$q(NNLT^^MdMdLe fT T $[[_F % 2JTU*D/,,U]];*JUJ$QW-00?@J%'7&DE!!1"7 ! 
 '55==bA;;NFSn//>
 !*99!T__q=P=cTXTcTc=c:cdN%3%6%6uzz%BD!">BB_ M. Vs   > N/N)r>   rB   r?   r@   )   i>     i   i  r   )cpu)
FNNNlongestNNNr   N)rk   
__module____qualname____firstlineno____doc__model_input_namesr=   rZ   r   ro   rq   floatry   intstrr   r   r   __static_attributes____classcell__)rE   s   @r-   r3   r3   B   sC   0 *+;< 
4, !)-26-1'!%$($("/3ECJJe,tBJJ/??$tE{BSSEC EC  $J	EC
 j(4/EC  $d{EC tEC $JEC TzEC TkEC d
EC "&EC 
EC ECr/   r3   )numpyr   r0   audio_utilsr   !feature_extraction_sequence_utilsr   feature_extraction_utilsr   utilsr   r   utils.import_utilsr	   
get_loggerrk   rl   r   r   ro   r.   r3   __all__r;   r/   r-   <module>r      s      ' I 4 ( * 
		H	%"R"R"R "R 	"R
 "R ZZ"RJ 
:MC3 MC MC` "
"r/   