
    Z j1!                         S r SSKJr  SSKJr  SSKJr  SSKJr  SSK	J
r
  \" S	S
9\ " S S\5      5       5       r\" S	S
9\ " S S\5      5       5       rSS/rg)zMoshi model configuration    )strict   )PreTrainedConfig)RopeParameters)auto_docstring   )
AutoConfigzkmhf/hf-moshiko)
checkpointc                     ^  \ rS rSr% SrSrS/rSr\\	S'   Sr
\\	S'   S	r\\	S
'   Sr\\	S'   Sr\\	S'   Sr\S-  \	S'   Sr\\	S'   Sr\\	S'   Sr\\	S'   Sr\S-  \	S'   Sr\\	S'   Sr\\	S'   Sr\\	S'   Sr\\-  \	S'   S r\\	S!'   S"r\\	S#'   Sr\\	S$'   S%r\\	S&'   Sr\S-  \	S''   Sr\S-  \	S('   Sr \\!\   -  S-  \	S)'   U 4S* jr"S+ r#S,r$U =r%$ )-MoshiDepthConfig   a  
input_size (`int`, *optional*, defaults to 4096):
    Dimensionality of the input hidden states. Used to connect the main decoder to the depth decoder.
audio_vocab_size (`int`, *optional*, defaults to 2048):
    Vocabulary size of the audio part of model. Defines the number of different tokens that can be
    represented by the `audio_codes` passed when calling the Moshi models.
ffn_dim (`int`, *optional*, defaults to 5632):
    Dimensionality of the "intermediate" (often named feed-forward) layer in the depth decoder block. Must be even.

Example:

```python
>>> from transformers import (
...     MoshiDepthConfig,
...     MoshiDepthDecoder,
... )

>>> configuration = MoshiDepthConfig()

>>> # Initializing a MoshiDepthDecoder (with random weights) from the kmhf/hf-moshiko style configuration
>>> model = MoshiDepthDecoder(configuration)

>>> # Accessing the model configuration
>>> configuration = model.config
```moshi_depthpast_key_values }  
vocab_sizei   hidden_size   
input_size   num_hidden_layers   num_attention_headsNnum_key_value_headsi   audio_vocab_size	   max_position_embeddingssilu
hidden_acthead_dim{Gz?initializer_rangeT	use_cache   sliding_window        attention_dropouti   ffn_dim:0yE>rms_norm_epsnum_codebooksFtie_word_embeddingspad_token_idbos_token_ideos_token_idc                    > U R                   b  U R                   OU R                  U l         U R                  =(       d    U R                  U R                  -  U l        [        TU ]  " S0 UD6  g )N )r   r   r   r   super__post_init__)selfkwargs	__class__s     ~/root/GenerationalWealth/GenerationalWealth/venv/lib/python3.13/site-packages/transformers/models/moshi/configuration_moshi.pyr2   MoshiDepthConfig.__post_init__N   s[    (,(@(@(LD$$RVRjRj 	  U)9)9T=U=U)U''    c                 \    U R                   S-  S:X  a  [        SU R                    S35      eg)OPart of `@strict`-powered validation. Validates the architecture of the config.r      	`ffn_dim=` must be even.N)r'   
ValueErrorr3   s    r6   validate_architecture&MoshiDepthConfig.validate_architectureU   s0    <<!q yoFGG !r8   )r   r   )&__name__
__module____qualname____firstlineno____doc__
model_typekeys_to_ignore_at_inferencer   int__annotations__r   r   r   r   r   r   r   r   strr   r!   floatr"   boolr$   r&   r'   r)   r*   r+   r,   r-   r.   listr2   r@   __static_attributes____classcell__r5   s   @r6   r   r      s$   4 J#4"5JKJs!!&*t* c #$S$JHcDj#u#ItNC%(us{(GSL%M3 %%#L#*##L#*#+/L#S	/D(/(H Hr8   r   c                     ^  \ rS rSr% SrSrS/r\\S.r	Sr
\\S'   Sr\\S	'   S
r\\S'   S
r\\S'   Sr\S-  \S'   Sr\S-  \S'   Sr\\S'   Sr\\-  S-  \S'   Sr\\S'   Sr\S-  \S'   Sr\\S'   Sr\\S'   Sr\\S'   Sr\\-  \S'   Sr\\S'   Sr \\S '   S!r!\\S"'   S#r"\\S$'   Sr#\S-  \S%'   Sr$\S-  \S&'   Sr%\\&\   -  S-  \S''   Sr'\\(-  S-  \S('   Sr)\\(-  S-  \S)'   U 4S* jr*S+ r+\,S, 5       r-\.S(\(4S- j5       r/S.r0U =r1$ )/MoshiConfig[   a  
audio_vocab_size (`int`, *optional*):
    Vocabulary size of the audio part of model. Defines the number of different tokens that can be
    represented by the `audio_codes` passed when calling the Moshi models.
ffn_dim (`int`, *optional*, defaults to 22528):
    Dimensionality of the "intermediate" (often named feed-forward) layer in the main decoder block. Must be even.
audio_encoder_config (`PreTrainedConfig | dict`, *optional*):
    Configuration for the audio encoder.
depth_decoder_config (`PreTrainedConfig | dict`, *optional*):
    Configuration for the depth decoder.

Example:

```python
>>> from transformers import (
...     MoshiConfig,
...     MoshiForConditionalGeneration,
... )

>>> configuration = MoshiConfig()

>>> # Initializing a MoshiForConditionalGeneration (with random weights) from the kmhf/hf-moshiko style configuration
>>> model = MoshiForConditionalGeneration(configuration)

>>> # Accessing the model configuration
>>> configuration = model.config

>>> # Saving the model, including its configuration
>>> model.save_pretrained("kmhf/hf-moshiko")

>>> # loading model and config from pretrained folder
>>> moshi_config = MoshiConfig.from_pretrained("kmhf/hf-moshiko")
>>> model = MoshiForConditionalGeneration.from_pretrained("kmhf/hf-moshiko", config=moshi_config)
```moshir   )audio_encoder_configdepth_decoder_configr   r   r   r       r   r   Nr   r   i  r   rope_parametersr   r   r   r    r!   Tr"   r$   r%   r&   i X  r'   r(   r)   r#   r*   Fr+   r,   r-   r.   rV   rW   c                   > U R                   b  U R                   OU R                  U l         U R                  =(       d    U R                  U R                  -  U l        [	        U R
                  [        5      (       aC  U R
                  R                  SS5      n[        R                  " U40 U R
                  D6U l        O(U R
                  c  [        R                  " S5      U l        U R                  c  U R
                  R                  OU R                  U l
        [	        U R                  [        5      (       ac  U R                  R                  U R                  U R                  U R                  U R                  S.5        [!        S0 U R                  D6U l        OU R                  c  [!        5       U l        ["        TU ]H  " S0 UD6  g )NrG   mimi)r   r   r   r*   r0   )r   r   r   r   
isinstancerV   dictpopr	   	for_modelr   codebook_sizerW   updater   r*   r   r1   r2   )r3   r4   audio_encoder_model_typer5   s      r6   r2   MoshiConfig.__post_init__   ss   (,(@(@(LD$$RVRjRj 	  U)9)9T=U=U)Ud//66'+'@'@'D'D\SY'Z$(2(<(<=U(sY]YrYr(sD%&&.(2(<(<V(DD% 8<7L7L7TD%%33Z^ZoZo 	 d//66%%,,(,(=(="&"2"2"&//%)%7%7	 )9(U4;T;T(UD%&&.(8(:D%''r8   c                    U R                   S-  S:X  a  [        SU R                    S35      eU R                  U R                  R                  :  a0  [        SU R                   SU R                  R                   S35      eg)	r:   r   r;   r<   r=   z`num_codebooks=zX` is greater than the maximum number of codebooks that the audio encoder can deal with (z). Please lower it.N)r'   r>   r*   rV   r?   s    r6   r@   !MoshiConfig.validate_architecture   s    <<!q yoFGG 9 9 G GG!$"4"4!5  6N  OS  Oh  Oh  Ov  Ov  Nw  wJ  K  Hr8   c                 .    U R                   R                  $ )N)rV   sampling_rater?   s    r6   rg   MoshiConfig.sampling_rate   s    ((666r8   c                 2    U " SSUR                  5       0UD6$ )z
Instantiate a [`MoshiConfig`] (or a derived class) from an audio encoder configuration.

Returns:
    [`MoshiConfig`]: An instance of a configuration object
rV   r0   )to_dict)clsrV   r4   s      r6   from_audio_encoder_config%MoshiConfig.from_audio_encoder_config   s*      
!5!=!=!?

 	
r8   )rV   r   rW   r   r   )2rB   rC   rD   rE   rF   rG   rH   r	   r   sub_configsr   rI   rJ   r   r   r   r   r   r   rY   r   r]   r   rK   r   r!   rL   r"   rM   r$   r&   r'   r)   r*   r+   r,   r-   r.   rN   rV   r   rW   r2   r@   propertyrg   classmethodrl   rO   rP   rQ   s   @r6   rS   rS   [   s   !F J#4"5+5O_`KJKs!!&*t*#'cDj'#'S'48O^d*T18JHcDj#u#ItNC%(us{(GSL%M3 %%#L#*##L#*#+/L#S	/D(/;?$!11D8?;?$!11D8?(< 7 7 
.
 
r8   rS   N)rF   huggingface_hub.dataclassesr   configuration_utilsr   modeling_rope_utilsr   utilsr   auto.configuration_autor	   r   rS   __all__r0   r8   r6   <module>rw      s      . 3 1 # 0 ,->H' >H  .>HB ,-|
" |
  .|
~ ,
-r8   