
    Z jg                         S r SSKrSSKrSSKJr  SSKJr  SSKJ	r	  SSK
Jr  \" SS	9\ " S
 S\5      5       5       rS/rg)zMimi model configuration    N)strict   )PreTrainedConfig)RopeParameters)auto_docstringzkyutai/mimi)
checkpointc                     ^  \ rS rSr% SrSrSr\\S'   Sr	\\S'   Sr
\\S	'   S
r\\S'   Sr\\S'   Sr\\   S-  \S'   Sr\\S'   Sr\\S'   Sr\\S'   Sr\\S'   Sr\\S'   Sr\\S'   Sr\\S'   Sr\\S'   Sr\\S'   Sr\\S '   S!r\\S"'   S#r\\S$'   Sr\\S%'   Sr\\S&'   Sr \\S''   S(r!\\S)'   Sr"\\S*'   S(r#\\S+'   S(r$\\S,'   Sr%\S-  \S-'   S.r&\\S/'   S0r'\\S1'   S2r(\\S3'   S4r)\\S5'   S#r*\\S6'   S#r+\\S7'   Sr,\-\.-  S-  \S8'   S9r/\\S:'   S;r0\\-  \S<'   S=r1\\S>'   S#r2\\S?'   Sr3\\S@'   U 4SA jr4SB r5\6SC\4SD j5       r7\6SC\4SE j5       r8\6SC\4SF j5       r9\6SC\4SG j5       r:SHr;U =r<$ )I
MimiConfig   a  
audio_channels (`int`, *optional*, defaults to 1):
    Number of channels in the audio data. Either 1 for mono or 2 for stereo.
num_filters (`int`, *optional*, defaults to 64):
    Number of convolution kernels of first `MimiConv1d` down sampling layer.
num_residual_layers (`int`,  *optional*, defaults to 1):
    Number of residual layers.
upsampling_ratios (`Sequence[int]`, *optional*):
    Kernel size and stride ratios. The encoder uses downsampling ratios instead of upsampling ratios, hence it
    will use the ratios in the reverse order to the ones specified here that must match the decoder order.
    If not specified, will defaults to `[8, 6, 5, 4]`
last_kernel_size (`int`, *optional*, defaults to 3):
    Kernel size for the last convolution layer.
residual_kernel_size (`int`, *optional*, defaults to 3):
    Kernel size for the residual layers.
dilation_growth_rate (`int`, *optional*, defaults to 2):
    How much to increase the dilation with each layer.
use_causal_conv (`bool`, *optional*, defaults to `True`):
    Whether to use fully causal convolution.
pad_mode (`str`, *optional*, defaults to `"constant"`):
    Padding mode for the convolutions.
compress (`int`, *optional*, defaults to 2):
    Reduced dimensionality in residual branches.
trim_right_ratio (`float`, *optional*, defaults to 1.0):
    Ratio for trimming at the right of the transposed convolution under the `use_causal_conv = True` setup. If
    equal to 1.0, it means that all the trimming is done at the right.
num_quantizers (`int`, *optional*, defaults to 32):
    Number of quantizer channels, or codebooks, in the quantizer.
use_conv_shortcut (`bool`, *optional*, defaults to `False`):
    Whether to use a convolutional layer as the 'skip' connection in the `MimiResnetBlock` block. If False,
    an identity function will be used, giving a generic residual connection.
vector_quantization_hidden_dimension (`int`, *optional*, defaults to 256):
    Intermediate representation dimension in the residual vector quantization space.
num_semantic_quantizers (`int`, *optional*, defaults to 1):
    Number of semantic quantizer channels, or codebooks, in the semantic quantizer. Must be lower than `num_quantizers`.
upsample_groups (`int`, *optional*, defaults to 512):
    If `frame_rate!=encodec_frame_rate`, indicates the number of groups used in the upsampling operation to go from one rate to another.
use_streaming (`bool`, *optional*, defaults to `False`):
    Whether to use streaming mode. If `True`, the model encode method will return the padding cache that can be used in a subsequent call to the encode method.

Example:

```python
>>> from transformers import MimiModel, MimiConfig

>>> # Initializing a "kyutai/mimi" style configuration
>>> configuration = MimiConfig()

>>> # Initializing a model (with random weights) from the "kyutai/mimi" style configuration
>>> model = MimiModel(configuration)

>>> # Accessing the model configuration
>>> configuration = model.config
```mimii]  sampling_rate   audio_channelsi   hidden_size@   num_filtersnum_residual_layersNupsampling_ratios   kernel_sizer   last_kernel_sizeresidual_kernel_size   dilation_growth_rateTuse_causal_convconstantpad_modecompressg      ?trim_right_ratioi   codebook_size   codebook_dim    num_quantizersFuse_conv_shortcut$vector_quantization_hidden_dimensionnum_semantic_quantizersupsample_groups   num_hidden_layersintermediate_sizenum_attention_headsnum_key_value_headshead_dimgelu
hidden_acti@  max_position_embeddingsg{Gz?initializer_rangegh㈵>norm_eps	use_cacheuse_streamingrope_parameters   sliding_windowg        attention_dropoutg{Gz?layer_scale_initial_scaleattention_biastie_word_embeddingsc                 V  > U R                   (       a  U R                   O/ SQU l         U R                  b  U R                  OU R                  U l        U R                  =(       d    U R                  U R                  -  U l        UR                  SS 5      U l        [        TU ]   " S0 UD6  g )N)r)            
frame_rate )	r   r"   r   r.   r,   pop_frame_ratesuper__post_init__)selfkwargs	__class__s     |/root/GenerationalWealth/GenerationalWealth/venv/lib/python3.13/site-packages/transformers/models/mimi/configuration_mimi.pyrF   MimiConfig.__post_init__}   s    ;?;Q;Q!7!7Wc151B1B1ND--TXTdTdU)9)9T=U=U)U "::lD9''    c                     U R                   U R                  :  a&  [        SU R                   SU R                    S35      eg)zOPart of `@strict`-powered validation. Validates the architecture of the config.zVThe number of semantic quantizers should be lower than the total number of quantizers z, but is currently .N)r'   r$   
ValueErrorrG   s    rJ   validate_architecture MimiConfig.validate_architecture   sg    ''4+>+>>himi|i|h}  ~Q  RV  Rn  Rn  Qo  op  q  ?rL   returnc                     [         R                  " U R                  5      n[        R                  " U R
                  U-  5      $ N)npprodr   mathceilr   )rG   
hop_lengths     rJ   encodec_frame_rateMimiConfig.encodec_frame_rate   s0    WWT334
yy++j899rL   c                     U R                   $ rU   )r$   rP   s    rJ   num_codebooksMimiConfig.num_codebooks   s     """rL   c                    S/n[        U R                  5       H  n[        U R                  5       Hu  n[	        U R
                  [        5      (       a  [        U R
                  5      OSnUR                  S/US-   -  5        U R                  (       d  Md  UR                  S5        Mw     UR                  U5        M     UR                  S5        UR                  S5        [        R                  " U5      $ )Nr   r   )reversedr   ranger   
isinstancer   listlenextendr%   appendrX   rW   )rG   stridesratiojlen_kernel_sizess        rJ   
frame_sizeMimiConfig.frame_size   s     # d445E4334EOPTPiPikoEpEp3t'@'@#Avw s&6&:;<)))NN1%	 5 NN5! 6 	q 	qyy!!rL   c                 f    U R                   b  U R                   $ U R                  U R                  -  $ rU   )rD   r   rl   rP   s    rJ   rA   MimiConfig.frame_rate   s1     '###!!DOO33rL   )rD   r"   r.   r   )=__name__
__module____qualname____firstlineno____doc__
model_typer   int__annotations__r   r   r   r   r   rd   r   r   r   r   r   boolr   strr   r   floatr    r"   r$   r%   r&   r'   r(   r*   r+   r,   r-   r.   r0   r1   r2   r3   r4   r5   r6   r   dictr8   r9   r:   r;   r<   rF   rQ   propertyr[   r^   rl   rA   __static_attributes____classcell__)rI   s   @rJ   r
   r
      s/   5n JM3NCKK  *.tCy4'.Kc !#! !#! OT HcHc!e!M3L#NC#t#03(#3#$S$OSs!s!    HcDjJ#'S'#u#HeItM448O^d*T18NC%(us{('+u+ ND  $$( :C : : #s # # "C " ". 4E 4 4rL   r
   )rt   rX   numpyrV   huggingface_hub.dataclassesr   configuration_utilsr   modeling_rope_utilsr   utilsr   r
   __all__rB   rL   rJ   <module>r      sR       . 3 1 # =)Y4! Y4  *Y4x .rL   