
    Z j                     d    S r SSKJr  SSKJr  SSKJr  \" SS9\ " S S	\5      5       5       rS	/rg
)zOpenAI GPT-2 configuration    )strict   )PreTrainedConfig)auto_docstringzopenai-community/gpt2)
checkpointc                      \ rS rSr% SrSrS/rSSSSS	.rS
r\	\
S'   Sr\	\
S'   Sr\	\
S'   Sr\	\
S'   Sr\	\
S'   Sr\	S-  \
S'   Sr\\
S'   Sr\\	-  \
S'   Sr\\	-  \
S'   Sr\\	-  \
S'   Sr\\
S'   Sr\\
S'   Sr\\
S'   Sr\\
S'   Sr\S-  \
S'   Sr\\
S '   Sr\\	-  \
S!'   Sr\\
S"'   Sr\\
S#'   S$r \	S-  \
S%'   S$r!\	\"\	   -  S-  \
S&'   Sr#\	S-  \
S''   S(r$\\
S)'   S(r%\\
S*'   S(r&\\
S+'   Sr'\\
S,'   S-r(g).
GPT2Config   a  
summary_type (`string`, *optional*, defaults to `"cls_index"`):
    Argument used when doing sequence summary, used in the models [`GPT2DoubleHeadsModel`].
    Has to be one of the following options:
        - `"last"`: Take the last token hidden state (like XLNet).
        - `"first"`: Take the first token hidden state (like BERT).
        - `"mean"`: Take the mean of all tokens hidden states.
        - `"cls_index"`: Supply a Tensor of classification token position (like GPT/GPT-2).
        - `"attn"`: Not implemented now, use multi-head attention.
summary_use_proj (`bool`, *optional*, defaults to `True`):
    Argument used when doing sequence summary, used in the models [`GPT2DoubleHeadsModel`].
    Whether or not to add a projection after the vector extraction.
summary_activation (`str`, *optional*):
    Argument used when doing sequence summary. Used in for the multiple choice head in
    [`GPT2DoubleHeadsModel`].
    Pass `"tanh"` for a tanh activation to the output, any other value will result in no activation.
summary_proj_to_labels (`bool`, *optional*, defaults to `True`):
    Argument used when doing sequence summary, used in the models [`GPT2DoubleHeadsModel`].
    Whether the projection outputs should have `config.num_labels` or `config.hidden_size` classes.
summary_first_dropout (`float`, *optional*, defaults to 0.1):
    Argument used when doing sequence summary, used in the models [`GPT2DoubleHeadsModel`].
    The dropout ratio to be used after the projection and activation.
scale_attn_by_inverse_layer_idx (`bool`, *optional*, defaults to `False`):
    Whether to additionally scale attention weights by `1 / layer_idx + 1`.
reorder_and_upcast_attn (`bool`, *optional*, defaults to `False`):
    Whether to scale keys (K) prior to computing attention (dot-product) and upcast attention
    dot-product/softmax to float() when training with mixed precision.

Example:

```python
>>> from transformers import GPT2Config, GPT2Model

>>> # Initializing a GPT2 configuration
>>> configuration = GPT2Config()

>>> # Initializing a model (with random weights) from the configuration
>>> model = GPT2Model(configuration)

>>> # Accessing the model configuration
>>> configuration = model.config
```gpt2past_key_valuesn_embdn_positionsn_headn_layer)hidden_sizemax_position_embeddingsnum_attention_headsnum_hidden_layersiQ  
vocab_sizei   i      Nn_innergelu_newactivation_functiong?resid_pdrop
embd_pdrop
attn_pdropgh㈵>layer_norm_epsilong{Gz?initializer_range	cls_indexsummary_typeTsummary_use_projsummary_activationsummary_proj_to_labelssummary_first_dropoutscale_attn_weights	use_cacheiP  bos_token_ideos_token_idpad_token_idFscale_attn_by_inverse_layer_idxreorder_and_upcast_attnadd_cross_attentiontie_word_embeddings ))__name__
__module____qualname____firstlineno____doc__
model_typekeys_to_ignore_at_inferenceattribute_mapr   int__annotations__r   r   r   r   r   r   strr   floatr   r   r   r   r    r!   boolr"   r#   r$   r%   r&   r'   r(   listr)   r*   r+   r,   r-   __static_attributes__r.       |/root/GenerationalWealth/GenerationalWealth/venv/lib/python3.13/site-packages/transformers/models/gpt2/configuration_gpt2.pyr	   r	      sj   )V J#4"5#0'&	M JKFCGSFCGS4Z))"K"!J!!J! $$#u##L##!d!%)d
)#'D'),53;,##It$L#*$+0L#S	/D(0#L#*#,1#T1$)T) %% $$r>   r	   N)	r3   huggingface_hub.dataclassesr   configuration_utilsr   utilsr   r	   __all__r.   r>   r?   <module>rD      sJ    ! . 3 # 23N%! N%  4N%b .r>   