
    Z j                     d    S r SSKJr  SSKJr  SSKJr  \" SS9\ " S S	\5      5       5       rS	/rg
)zBloom configuration    )strict   )PreTrainedConfig)auto_docstringzbigscience/bloom)
checkpointc                   L  ^  \ rS rSr% SrSrS/rSSS.rSr\	\
S	'   S
r\	\
S'   Sr\	\
S'   Sr\	\
S'   Sr\\
S'   Sr\\
S'   Sr\\
S'   Sr\	S-  \
S'   Sr\	\\	   -  S-  \
S'   Sr\	S-  \
S'   Sr\\
S'   Sr\\	-  \
S'   Sr\\	-  \
S'   Sr\	\
S'   Sr\\
S'   Sr\\
S '   U 4S! jrS"rU =r$ )#BloomConfig   a  
apply_residual_connection_post_layernorm (`bool`, *optional*, defaults to `False`):
    If enabled, use the layer norm of the hidden states as the residual in the transformer blocks
slow_but_exact (`bool`, *optional*, defaults to `False`):
    Experimental feature. Whether to use slow but exact implementation of the attention mechanism. While
    merging the TP rank tensors, due to slicing operations the results may be slightly different between the
    model trained on Megatron and our model. Please refer to [this
    issue](https://github.com/pytorch/pytorch/issues/76232). A solution to obtain more accurate results is to
    enable this feature. Enabling this will hurt the computational time of the inference. Will be probably
    resolved in the future once the main model has been fine-tuned with TP_rank=1.

Example:

```python
>>> from transformers import BloomConfig, BloomModel

>>> # Initializing a Bloom configuration
>>> configuration = BloomConfig()

>>> # Initializing a model (with random weights) from the configuration
>>> model = BloomModel(configuration)

>>> # Accessing the model configuration
>>> configuration = model.config
```bloompast_key_valuesn_layern_head)num_hidden_layersnum_attention_headsi  
vocab_size@   hidden_size      gh㈵>layer_norm_epsilong{Gz?initializer_rangeT	use_cache   Nbos_token_ideos_token_idpad_token_idF(apply_residual_connection_post_layernormg        hidden_dropoutattention_dropoutpretraining_tpslow_but_exacttie_word_embeddingsc                 v   > UR                  SS 5      nUc  U R                  OUU l        [        TU ]  " S0 UD6  g )Nn_embed )popr   super__post_init__)selfkwargsr$   	__class__s      ~/root/GenerationalWealth/GenerationalWealth/venv/lib/python3.13/site-packages/transformers/models/bloom/configuration_bloom.pyr(   BloomConfig.__post_init__K   s7    **Y-/64++G''    )r   ) __name__
__module____qualname____firstlineno____doc__
model_typekeys_to_ignore_at_inferenceattribute_mapr   int__annotations__r   r   r   r   floatr   r   boolr   r   listr   r   r   r   r    r!   r"   r(   __static_attributes____classcell__)r+   s   @r,   r	   r	      s    4 J#4"5&'M
 JKGSFCO $$#u#It L#* +,L#S	/D(,#L#*#5:,d:"%NECK%%(us{(NC ND  $$( (r.   r	   N)	r3   huggingface_hub.dataclassesr   configuration_utilsr   utilsr   r	   __all__r%   r.   r,   <module>rB      sG     . 3 # -.7(" 7(  /7(t /r.   