
    Z j=                     p    S r SSKJr  SSKJr  SSKJr  SSKJr  \" SS9\ " S	 S
\5      5       5       r	S
/r
g)zFalcon configuration    )strict   )PreTrainedConfig)RopeParameters)auto_docstringztiiuae/falcon-7b)
checkpointc                   &  ^  \ rS rSr% SrSrS/rSr\\	S'   Sr
\\	S'   S	r\\	S
'   Sr\\	S'   Sr\S-  \	S'   Sr\S-  \	S'   Sr\\	S'   Sr\\	S'   Sr\\-  S-  \	S'   Sr\\-  S-  \	S'   Sr\S-  \	S'   Sr\S-  \	S'   Sr\S-  \	S'   Sr\S-  \	S'   Sr\S-  \	S'   Sr\S-  \	S'   Sr\\	S '   Sr\\-  S-  \	S!'   S"r\S-  \	S#'   S"r \\!\   -  S-  \	S$'   Sr"\S-  \	S%'   Sr#\S-  \	S&'   S'r$\%S-  \	S('   Sr&\\	S)'   U 4S* jr'\(S+ 5       r)\(S, 5       r*S-r+U =r,$ ).FalconConfig   a  
num_ln_in_parallel_attn (`int`, *optional*):
    Set to 2 if separate layer norms are to be used for the MLP and the attention output when using parallel
    attention, otherwise, 1.
alibi (`bool`, *optional*, defaults to `False`):
    Whether to use ALiBi positional biases during self-attention.
new_decoder_architecture (`bool`, *optional*, defaults to `False`):
    Whether to use the new (Falcon-40B) decoder architecture. If `True`, the `multi_query` and `parallel_attn`
    arguments are ignored, as the new decoder always uses parallel attention.
multi_query (`bool`, *optional*, defaults to `True`):
    Whether to use multi-query attention in the decoder. Ignored when `new_decoder_architecture` is `True`.
parallel_attn (`bool`, *optional*, defaults to `True`):
    Whether to compute attention in parallel with the feedforward layer. If False, they are consecutive
    instead, as in the original Transformer architecture. Ignored when `new_decoder_architecture` is `True`.
bias (`bool`, *optional*, defaults to `False`):
    Whether to use bias on Linear layers.
ffn_hidden_size (`int`, *optional*):
    The hidden size of the feedforward layer in the Transformer decoder.
    defaults to 4x hidden dim
activation (`str`, *optional*, defaults to `"gelu"`):
    The activation function used in the feedforward layer.

Example:

```python
>>> from transformers import FalconModel, FalconConfig

>>> # Initializing a small (2-layer) Falcon configuration
>>> configuration = FalconConfig(num_hidden_layers=2)

>>> # Initializing a model from the small configuration
>>> model = FalconModel(configuration)

>>> # Accessing the model configuration
>>> configuration = model.config
```falconpast_key_valuesi   
vocab_sizei  hidden_size    num_hidden_layersG   num_attention_headsNnum_ln_in_parallel_attngh㈵>layer_norm_epsilong{Gz?initializer_rangeT	use_cacheg        hidden_dropoutattention_dropoutnum_kv_headsFalibinew_decoder_architecturemulti_queryparallel_attnbiasi   max_position_embeddingsrope_parameters   bos_token_ideos_token_idpad_token_idffn_hidden_sizegelu
activationtie_word_embeddingsc                   > UR                  SS 5      nUc  U R                  OUU l        U R                  c  U R                  OU R                  U l        U R                  c  U R                  S-  U l        [
        TU ]  " S0 UD6  g )Nn_embed    )popr   r   r   r&   super__post_init__)selfkwargsr+   	__class__s      ڀ/root/GenerationalWealth/GenerationalWealth/venv/lib/python3.13/site-packages/transformers/models/falcon/configuration_falcon.pyr0   FalconConfig.__post_init__[   sx    **Y-/64++G8<8I8I8QD44W[WhWh'#'#3#3a#7D ''    c                 4    U R                   U R                  -  $ N)r   r   r1   s    r4   head_dimFalconConfig.head_dime   s    4#;#;;;r6   c                 $    U R                   (       + $ r8   )r   r9   s    r4   rotaryFalconConfig.rotaryi   s    ::~r6   )r&   r   r   )-__name__
__module____qualname____firstlineno____doc__
model_typekeys_to_ignore_at_inferencer   int__annotations__r   r   r   r   r   floatr   r   boolr   r   r   r   r   r   r   r   r    r!   r   dictr#   r$   listr%   r&   r(   strr)   r0   propertyr:   r=   __static_attributes____classcell__)r3   s   @r4   r
   r
      s   #J J#4"5JKs!!*.S4Z.'++#u#It),NECK$&,,/us{T)/#L#*#E4$;,1dTk1#K#!%M4$;%D$+#'S'48O^d*T18!L#*!+-L#S	/D(-#L#*#"&OS4Z&#Jd
# $$( < <  r6   r
   N)rC   huggingface_hub.dataclassesr   configuration_utilsr   modeling_rope_utilsr   utilsr   r
   __all__r-   r6   r4   <module>rU      sN     . 3 1 # -.R# R  /Rj 
r6   