
    Z j-                     b   S r SSKJr  SSKJr  SSKJr  SSKJrJ	r	  \	R                  " \5      r\" SS9\ " S	 S
\5      5       5       r\" SS9\ " S S\5      5       5       r\" SS9\ " S S\5      5       5       r\" SS9\ " S S\5      5       5       r\" SS9\ " S S\5      5       5       r/ SQrg)zBlt model configuration    )strict   )PreTrainedConfig)RopeParameters)auto_docstringloggingzitazap/blt-1b-hf)
checkpointc                   >  ^  \ rS rSr% SrSrSrSr\\	S'   Sr
\S-  \	S	'   S
r\S-  \	S'   Sr\S-  \	S'   Sr\\	S'   Sr\\	S'   Sr\S-  \	S'   Sr\\	S'   Sr\\	S'   Sr\\-  S-  \	S'   Sr\\	S'   Sr\\-  S-  \	S'   Sr\\	S'   Sr\S-  \	S'   Sr\\	S '   U 4S! jrS"rU =r$ )#BltLocalEncoderConfig   F  
cross_attn_all_layers (`bool`, *optional*, defaults to `True`):
    Whether all attention layers have cross attention.
cross_attn_k (`int`, *optional*, defaults to 2):
    Number of cross-attention heads used in the model.
hidden_size_global (`int`, *int*, defaults to 2048):
    Hidden size of the global transformer layer.
blt_local_encoder    A  
vocab_sizeFNcross_attn_all_layers   cross_attn_k   hidden_size_global   hidden_size   num_attention_headsnum_key_value_heads   num_hidden_layersh㈵>rms_norm_eps        dropout `  max_position_embeddingsrope_parameterssilu
hidden_actintermediate_size{Gz?initializer_rangec                    > U R                   =(       d    U R                  U l         U R                  =(       d    [        SU R                  -  S-  5      U l        SU l        [        TU ]  " S0 UD6  g N   r   F )r   r   r'   intr   tie_word_embeddingssuper__post_init__selfkwargs	__class__s     z/root/GenerationalWealth/GenerationalWealth/venv/lib/python3.13/site-packages/transformers/models/blt/configuration_blt.pyr1   #BltLocalEncoderConfig.__post_init__9   s]    #'#;#;#Wt?W?W !%!7!7!X3q4CSCS?SVW?W;X#( ''    )r'   r   r/   ) __name__
__module____qualname____firstlineno____doc__
model_typedefault_thetar   r.   __annotations__r   boolr   r   r   r   r   r   r   floatr!   r#   r$   r   dictr&   strr'   r)   r1   __static_attributes____classcell__r5   s   @r6   r   r      s     %JMJ).4$;. L#* %)d
)K!!&*t*sL%"%GUS[4%#(S(48O^d*T18J$(sTz(#u#( (r8   r   c                     ^  \ rS rSr% SrSrSrSr\\	S'   Sr
\S-  \	S	'   S
r\S-  \	S'   Sr\S-  \	S'   Sr\\	S'   Sr\\	S'   Sr\S-  \	S'   Sr\\	S'   Sr\\	S'   Sr\\-  S-  \	S'   Sr\\	S'   Sr\\-  S-  \	S'   Sr\\	S'   Sr\\	S'   S r\\	S!'   Sr\S-  \	S"'   Sr\S-  \	S#'   Sr\\ \   -  S-  \	S$'   S%r!\\	S&'   U 4S' jr"S(r#U =r$$ ))BltLocalDecoderConfig@   r   blt_local_decoderr   r   r   TNr   r   r   r   r   r   r   r   r   r   	   r   r   r   r    r!   r"   r#   r$   r%   r&   i   r'   r(   r)   pad_token_idbos_token_ideos_token_idFr/   c                   > U R                   =(       d    U R                  U l         U R                  U R                  -  U l        U R                  =(       d    [        SU R                  -  S-  5      U l        SU l        [        TU ]   " S0 UD6  g r+   	r   r   r   head_dimr'   r.   r/   r0   r1   r2   s     r6   r1   #BltLocalDecoderConfig.__post_init__c   su    #'#;#;#Wt?W?W ((D,D,DD!%!7!7!X3q4CSCS?SVW?W;X#( ''r8   rR   r'   r   r/   )%r9   r:   r;   r<   r=   r>   r?   r   r.   r@   r   rA   r   r   r   r   r   r   r   rB   r!   r#   r$   r   rC   r&   rD   r'   r)   rM   rN   rO   listr/   r1   rE   rF   rG   s   @r6   rI   rI   @   s    %JMJ)-4$;- L#* %)d
)K!!&*t*sL%"%GUS[4%#(S(48O^d*T18J!s!#u##L#*##L#*#+/L#S	/D(/ %%( (r8   rI   c                      ^  \ rS rSr% SrSrSr\\S'   Sr	\\S'   Sr
\S-  \S	'   S
r\\S'   Sr\\S'   Sr\\-  S-  \S'   Sr\\S'   Sr\\-  S-  \S'   Sr\\S'   Sr\\S'   Sr\\S'   Sr\\S'   U 4S jrSrU =r$ )BltGlobalTransformerConfigk   blt_global_transformerr   r   r   r   r   Nr      r   r   r   r    r!      r#   r$   r%   r&   i   r'   r(   r)   Fr/   c                   > U R                   =(       d    U R                  U l         U R                  U R                  -  U l        U R                  =(       d    [        SU R                  -  S-  5      U l        SU l        [        TU ]   " S0 UD6  g r+   rQ   r2   s     r6   r1   (BltGlobalTransformerConfig.__post_init__~   su    #'#;#;#Wt?W?W ((D,D,DD!%!7!7!X3q4CSCS?SVW?W;X#( ''r8   rT   )r9   r:   r;   r<   r>   r?   r   r.   r@   r   r   r   r   rB   r!   r#   r$   r   rC   r&   rD   r'   r)   r/   rA   r1   rE   rF   rG   s   @r6   rW   rW   k   s     *JMK!!&*t*sL%"%GUS[4%#'S'48O^d*T18J!s!#u# %%( (r8   rW   c                      ^  \ rS rSr% SrSr\\S'   Sr\\S'   Sr	\\S'   S	r
\\S
'   Sr\S-  \S'   Sr\\S'   Sr\\S'   Sr\\-  S-  \S'   Sr\\S'   Sr\\-  S-  \S'   Sr\\S'   Sr\\S'   U 4S jrSrU =r$ )BltPatcherConfig   blt_patcherr   r   i   r      r      r   Nr   i    r#   r   r   r    r!   r   r'   r$   r(   r)   Fr/   c                 ,  > U R                   =(       d    U R                  U l         U R                  U R                  -  U l        U R                  =(       d    [        SU R                  -  S-  5      U l        SU l        SU l        [        TU ]$  " S0 UD6  g )Nr,   r   Fr%   r-   )
r   r   r   rR   r'   r.   r/   r&   r0   r1   r2   s     r6   r1   BltPatcherConfig.__post_init__   s|    #'#;#;#Wt?W?W ((D,D,DD!%!7!7!X3q4CSCS?SVW?W;X#(  ''r8   )rR   r&   r'   r   r/   )r9   r:   r;   r<   r>   r   r.   r@   r   r   r   r   r#   r   rB   r!   r'   r$   r   rC   r)   r/   rA   r1   rE   rF   rG   s   @r6   r_   r_      s     JJKs!!&*t*#'S'L%"%GUS[4%!s!48O^d*T18#u# %%( (r8   r_   c                     ^  \ rS rSr% SrSrS/rSr\\	\
\S.rSr\\S'   S	r\\S
'   Sr\S-  \S'   Sr\S-  \S'   Sr\S-  \S'   Sr\S-  \S'   Sr\S-  \S'   Sr\S-  \S'   Sr\S-  \S'   Sr\\   S-  \S'   Sr\S-  \S'   Sr\S-  \S'   Sr\ \!-  S-  \S'   Sr"\ \!-  S-  \S'   Sr#\ \!-  S-  \S'   Sr$\ \!-  S-  \S '   S!r%\\S"'   Sr&\S-  \S#'   Sr'\S-  \S$'   Sr(\\\   -  S-  \S%'   S&r)\\S''   Sr*\+\ -  S-  \S('   U 4S) jr,S*r-U =r.$ )+	BltConfig   a  
patch_in_forward (`bool`, *optional*, defaults to `True`):
    Whether to perform patching during the forward pass.
patch_size (`int`, *optional*, defaults to 4):
    Size of the patches used in the patching mechanism.
patching_mode (`str`, *optional*, defaults to `"entropy"`):
    The mode used for patching, such as entropy-based patching.
patching_threshold (`float`, *optional*, defaults to 1.34):
    Threshold value used for determining when to apply patches.
patching_batch_size (`int`, *optional*, defaults to 1):
    Batch size used during the patching process.
max_patch_length (`int`, *optional*):
    Maximum length of patches that can be generated.
cross_attn_k (`int`, *optional*, defaults to 2):
    Number of cross-attention heads used in the model.
encoder_hash_byte_group_size (`list`, *optional*):
    List of byte group sizes used in the encoder hash function.
encoder_hash_byte_group_vocab (`int`, *optional*, defaults to 500002):
    Vocabulary size for the encoder hash byte groups.
encoder_hash_byte_group_nb_functions (`int`, *optional*, defaults to 1):
    Number of hash functions used in the encoder byte grouping.
patcher_config (`BltPatcherConfig`, *optional*):
    Configuration for the patcher component of the model.
global_config (`BltGlobalTransformerConfig`, *optional*):
    Configuration for the global transformer component of the model.

Example:
```python
>>> from transformers import BltModel, BltConfig

>>> # Initializing a Blt configuration
>>> configuration = BltConfig()

>>> # Initializing a model from the configuration
>>> model = BltModel(configuration)

>>> # Accessing the model configuration
>>> configuration = model.config
```bltpast_key_valuesr   )patcher_configencoder_configdecoder_configglobal_configr   r   r[   r#   TNpatch_in_forward   
patch_sizeentropypatching_modeg   ]?patching_thresholdr   patching_batch_sizemax_patch_lengthr   r   encoder_hash_byte_group_sizei" encoder_hash_byte_group_vocab$encoder_hash_byte_group_nb_functionsrk   rl   rm   rn   Fr/   rM   rN   rO   r(   r)   r$   c                   > U R                   =(       d    / SQU l         U R                  c.  [        U R                  S9U l        [        R                  S5        O_[        U R                  [        5      (       a@  U R                  R                  SU R                  5        [        S0 U R                  D6U l        U R                  c.  [        U R                  S9U l	        [        R                  S5        O_[        U R                  [        5      (       a@  U R                  R                  SU R                  5        [        S0 U R                  D6U l	        U R                  c.  [        U R                  S9U l        [        R                  S5        O_[        U R                  [        5      (       a@  U R                  R                  SU R                  5        [        S0 U R                  D6U l        U R                  c.  [        U R                  S9U l        [        R                  S5        O_[        U R                  [        5      (       a@  U R                  R                  SU R                  5        [        S0 U R                  D6U l        U R                  R                  U R                   -  nX R                  R                  :w  a  UOS U R                  l        [$        TU ]L  " S0 UD6  g )	N)r   rp            r,   )r)   z8patcher_config is None, using default Blt patcher configr)   z8encoder_config is None, using default Blt encoder configz8decoder_config is None, using default Blt decoder configz6global_config is None, using default Blt global configr-   )rw   rk   r_   r)   loggerinfo
isinstancerC   
setdefaultrl   r   rm   rI   rn   rW   r   r   encoder_cross_output_sizer0   r1   )r3   r4   r   r5   s      r6   r1   BltConfig.__post_init__   s8   ,0,M,M,cQc) &"2TE[E["\DKKRS++T22**+>@V@VW"2"IT5H5H"ID&"7$J`J`"aDKKRS++T22**+>@V@VW"7"N$:M:M"ND&"7$J`J`"aDKKRS++T22**+>@V@VW"7"N$:M:M"ND%!;dNdNd!eDKKPQ**D11))*=t?U?UV!;!Qd>P>P!QD %)$7$7$C$CdFWFW$W!)BFXFXFdFd)d%jn 	4 	''r8   )rm   rl   rw   rn   rk   )/r9   r:   r;   r<   r=   r>   keys_to_ignore_at_inferencer?   r_   r   rI   rW   sub_configsr   r.   r@   r#   ro   rA   rq   rs   rD   rt   rB   ru   rv   r   rw   rU   rx   ry   rk   rC   r   rl   rm   rn   r/   rM   rN   rO   r)   r$   r   r1   rE   rF   rG   s   @r6   rg   rg      s   &P J#4"5M*//3	K J#'S'$(dTk(Jd
 )M3:)'88&'t'#'cDj' L#* 59 $s)d"2906!3:678(#*859ND++d2959ND++d2959ND++d2948M4**T18 %%#L#*##L#*#+/L#S	/D(/#u#48O^d*T18&( &(r8   rg   )rg   r_   r   rI   rW   N)r=   huggingface_hub.dataclassesr   configuration_utilsr   modeling_rope_utilsr   utilsr   r   
get_loggerr9   r~   r   rI   rW   r_   rg   __all__r-   r8   r6   <module>r      s    . 3 1 , 
		H	% -.!(, !(  /!(H -.&(, &(  /&(R -.(!1 (  /(4 -.(' (  /(4 -.p(  p(  /p(fr8   