
    Z j                      S SK Jr  S SKrS SKrS SKJr  S SKJr  S SKJ	r	  S SK
Jr  S SKJrJrJrJr  S SKrS SKrSS	KJrJrJr  SS
KJr  \	" S5      R5                  5       S-  r/ SQrSSSSSSSS.rSS1rSS1r/ SQr Sr!SSSSS S!S"S#S$S%S&.
r"\RF                  " S'5      r$\RF                  " S(5      r%\RF                  " S)5      r&\RF                  " S*5      r'\RF                  " S+\RP                  5      r)\RF                  " S,5      r*\RF                  " S-5      r+\RF                  " S.\RP                  \RX                  -  5      r-\RF                  " S/5      r.\RF                  " S05      r/\RF                  " S15      r0Sq1 " S2 S35      r2 " S4 S55      r3 " S6 S75      r4 " S8 S95      r5 " S: S;5      r6 " S< S=5      r7 " S> S?5      r81 S@kr9SASBSC.r:SD r;SkSE jr<SkSF jr=SG r>SH r?SlSI jr@SmSJ jrASK rBSnSL jrCSoSM jrDSpSN jrESqSO jrF\" SPSQ9SrSR j5       rGSS rHSsST jrISU rJSV rKStSuSW jjrLSX rMSY rN StSZ jrOS[ rPS\ rQS] rR\S" 1 S^k5      rTSqUSvS_ jrVS` rWSa rXSb rYSc rZSwSd jr[Se r\Sf r]      SxSg jr^SySh jr_StSSSSi.Sj jjr`g)z    )annotationsN)Mapping)	lru_cache)Path)	UnionType)ClassVarUnionget_args
get_origin   )MODELS_TO_PIPELINE#PIPELINE_TASKS_TO_SAMPLE_DOCSTRINGSPT_SAMPLE_DOCSTRINGS)ModelOutputsrctransformers)zconfiguration_*.pyzmodeling_*.pyztokenization_*.pyzprocessing_*.pyzimage_processing_pil_*.pyzimage_processing_*.pyzfeature_extractor_*.py)image_processing_autoIMAGE_PROCESSOR_MAPPING_NAMES)tokenization_autoTOKENIZER_MAPPING_NAMES)video_processing_autoVIDEO_PROCESSOR_MAPPING_NAMES)feature_extraction_autoFEATURE_EXTRACTOR_MAPPING_NAMES)processing_autoPROCESSOR_MAPPING_NAMES)configuration_autoCONFIG_MAPPING_NAMES)modeling_autoMODEL_MAPPING_NAMES)image_processor_classtokenizer_classvideo_processor_classfeature_extractor_classprocessor_classconfig_classmodel_class
preprocess__call__BaseImageProcessorProcessorMixin)
TextKwargsImagesKwargsVideosKwargsAudioKwargsz
, *kwargs*OpenAIGPTConfigXCLIPConfigKosmos2ConfigKosmos2_5ConfigDonutSwinConfig	EsmConfigParakeetCTCConfigOpenAIPrivacyFilterConfigLasrCTCConfigWav2Vec2Config)
openaizx-clipkosmos2z	kosmos2-5donutesmfoldparakeetzprivacy-filterlasrzwav2vec2-with-lmz*\[(.+?)\]\((https://huggingface\.co/.+?)\)z#(?m)^([ \t]*)(?=Example|Return|```)z(?m)^([ \t]*)(?=Return)z(?m)^([ \t]*)(?=Example|```)z(?:Args:)(\n.*)?(\n)?$z(of shape\s*(?:`.*?`|\(.*?\)))z(defaults to \s*[^)]*)zQ^\s{0,0}(\w+)\s*\(\s*([^, \)]*)(\s*.*?)\s*\)\s*:\s*((?:(?!\n^\s{0,0}\w+\s*\().)*)zForwardRef\('([\w.]+)'\)zOptional\[(.*?)\]z{(.*?)}c                     \ rS rSrSSS.rSSS.rSSS.rSSS.rSSS.rS	SS.r	S
SS.r
SSS.rSSS.rSSS.rSSS.rSSS.rSSS.rSSS.rSSS.rSSS.rSSS.rSSS.rSSS.rSSS.rSSS.rSSS.rSSS.rSSS.rSrg)ImageProcessorArgsi   z
    Image to preprocess. Expects a single or batch of images with pixel values ranging from 0 to 255. If
    passing in images with pixel values between 0 and 1, set `do_rescale=False`.
    Ndescriptionshapez
    Video to preprocess. Expects a single or batch of videos with pixel values ranging from 0 to 255. If
    passing in videos with pixel values between 0 and 1, set `do_rescale=False`.
    z&
    Whether to resize the image.
    z>
    Describes the maximum input dimensions to the model.
    zR
    The size by which to make sure both the height and width can be divided.
    zP
    Whether to default to a square image when resizing, if size is an int.
    z
    Resampling filter to use if resizing the image. This can be one of the enum `PILImageResampling`. Only
    has an effect if `do_resize` is set to `True`.
    z+
    Whether to center crop the image.
    z@
    Size of the output image after applying `center_crop`.
    z
    Whether to pad the image. Padding is done either to the largest size in the batch
    or to a fixed square size per image. The exact padding strategy depends on the model.
    a  
    The size in `{"height": int, "width" int}` to pad the images to. Must be larger than any image size
        provided for preprocessing. If `pad_size` is not provided, images will be padded to the largest
        height and width in the batch. Applied only when `do_pad=True.`
    z'
    Whether to rescale the image.
    zR
    Rescale factor to rescale the image by if `do_rescale` is set to `True`.
    z)
    Whether to normalize the image.
    ze
    Image mean to use for normalization. Only has an effect if `do_normalize` is set to `True`.
    zw
    Image standard deviation to use for normalization. Only has an effect if `do_normalize` is set to
    `True`.
    z.
    Whether to convert the image to RGB.
    zX
    Returns stacked tensors if set to `'pt'`, otherwise returns a list of tensors.
    zc
    Only `ChannelDimension.FIRST` is supported. Added for compatibility with slow processors.
    a  
    The channel dimension format for the input image. If unset, the channel dimension format is inferred
    from the input image. Can be one of:
    - `"channels_first"` or `ChannelDimension.FIRST`: image in (num_channels, height, width) format.
    - `"channels_last"` or `ChannelDimension.LAST`: image in (height, width, num_channels) format.
    - `"none"` or `ChannelDimension.NONE`: image in (height, width) format.
    zf
    The device to process the images on. If unset, the device is inferred from the input images.
    a5  
    Whether to disable grouping of images by size to process them individually and not in batches.
    If None, will be set to True if the images are on CPU, and False otherwise. This choice is based on
    empirical observations, as detailed here: https://github.com/huggingface/transformers/pull/38157
    z
    The number of image tokens to be used for each image in the input.
    Added for backward compatibility but this should be set as a processor attribute in future models.
    z
    Additional image preprocessing options. Model-specific kwargs are listed above; see the TypedDict class
    for the complete list of supported arguments.
     )__name__
__module____qualname____firstlineno__imagesvideos	do_resizesizesize_divisordefault_to_squareresampledo_center_crop	crop_sizedo_padpad_size
do_rescalerescale_factordo_normalize
image_mean	image_stddo_convert_rgbreturn_tensorsdata_formatinput_data_formatdevicedisable_groupingimage_seq_length
__kwargs____static_attributes__rF       r/root/GenerationalWealth/GenerationalWealth/venv/lib/python3.13/site-packages/transformers/utils/auto_docstring.pyrA   rA   i   s    F F 	I 	D 	L 	 H 	N 	I F
 H 	J 	N 	L 	J I 	N 	N 	K 	 	F
   Jrd   rA   c                  :   \ rS rSrSSS.rSSS.rSSS.rS	S
S.rSSS.rSSS.r	SS0r
SS0rSS0rSSS.rSSS.rSSS.rSSS.rSSS.rSSS.rSSS.rSSS.rS SS.rS!SS.rS"SS.rS#SS.rS$SS.rS%SS.rS&S'S.rS(S)S.rS(S)S.rS*SS.rS+S,S.rS-S.S.r S/SS.r!S0r"g)1ProcessorArgsi#  z2
    The image processor is a required input.
    z{image_processor_class})rD   typez,
    The tokenizer is a required input.
    z{tokenizer_class}z2
    The video processor is a required input.
    z{video_processor_class}z2
    The audio processor is a required input.
    z{audio_processor_class}z4
    The feature extractor is a required input.
    z{feature_extractor_class}z\
    A Jinja template to convert lists of messages in a chat into a tokenizable string.
    strrD   z
    The sequence or batch of sequences to be encoded. Each sequence can be a string or a list of strings
    (pretokenized string). If you pass a pretokenized input, set `is_split_into_words=True` to avoid ambiguity with batched inputs.
    a
  
    The audio or batch of audios to be prepared. Each audio can be a NumPy array or PyTorch tensor.
    In case of a NumPy array/PyTorch tensor, each audio should be of shape (C, T), where C is a number of channels,
    and T is the sample length of the audio.
    z
    If set, will return tensors of a particular framework. Acceptable values are:

    - `'pt'`: Return PyTorch `torch.Tensor` objects.
    - `'np'`: Return NumPy `np.ndarray` objects.
    NrC   aG  
    Whether or not to add special tokens when encoding the sequences. This will use the underlying
    [`PretrainedTokenizerBase.build_inputs_with_special_tokens`] function, which defines which tokens are
    automatically added to the input ids. This is useful if you want to add `bos` or `eos` tokens
    automatically.
    boola  
    Activates and controls padding. Accepts the following values:

    - `True` or `'longest'`: Pad to the longest sequence in the batch (or no padding if only a single
      sequence is provided).
    - `'max_length'`: Pad to a maximum length specified with the argument `max_length` or to the maximum
      acceptable input length for the model if that argument is not provided.
    - `False` or `'do_not_pad'` (default): No padding (i.e., can output a batch with sequences of different
      lengths).
    z'bool, str or [`~utils.PaddingStrategy`]a  
    Activates and controls truncation. Accepts the following values:

    - `True` or `'longest_first'`: Truncate to a maximum length specified with the argument `max_length` or
      to the maximum acceptable input length for the model if that argument is not provided. This will
      truncate token by token, removing a token from the longest sequence in the pair if a pair of
      sequences (or a batch of pairs) is provided.
    - `'only_first'`: Truncate to a maximum length specified with the argument `max_length` or to the
      maximum acceptable input length for the model if that argument is not provided. This will only
      truncate the first sequence of a pair if a pair of sequences (or a batch of pairs) is provided.
    - `'only_second'`: Truncate to a maximum length specified with the argument `max_length` or to the
      maximum acceptable input length for the model if that argument is not provided. This will only
      truncate the second sequence of a pair if a pair of sequences (or a batch of pairs) is provided.
    - `False` or `'do_not_truncate'` (default): No truncation (i.e., can output batch with sequence lengths
      greater than the model maximum admissible input size).
    z<bool, str or [`~tokenization_utils_base.TruncationStrategy`]a  
    Controls the maximum length to use by one of the truncation/padding parameters.

    If left unset or set to `None`, this will use the predefined model maximum length if a maximum length
    is required by one of the truncation/padding parameters. If the model has no specific maximum input
    length (like XLNet) truncation/padding to a maximum length will be deactivated.
    intaZ  
    If set to a number along with `max_length`, the overflowing tokens returned when
    `return_overflowing_tokens=True` will contain some tokens from the end of the truncated sequence
    returned to provide some overlap between truncated and overflowing sequences. The value of this
    argument defines the number of overlapping tokens.
    z
    If set will pad the sequence to a multiple of the provided value. Requires `padding` to be activated.
    This is especially useful to enable using Tensor Cores on NVIDIA hardware with compute capability
    `>= 7.5` (Volta).
    z
    Whether to return token type IDs. If left to the default, will return the token type IDs according to
    the specific tokenizer's default, defined by the `return_outputs` attribute.

    [What are token type IDs?](../glossary#token-type-ids)
    a  
    Whether to return the attention mask. If left to the default, will return the attention mask according
    to the specific tokenizer's default, defined by the `return_outputs` attribute.

    [What are attention masks?](../glossary#attention-mask)
    z
    Whether or not to return overflowing token sequences. If a pair of sequences of input ids (or a batch
    of pairs) is provided with `truncation_strategy = longest_first` or `True`, an error is raised instead
    of returning overflowing tokens.
    zC
    Whether or not to return special tokens mask information.
    z
    Whether or not to return `(char_start, char_end)` for each token.

    This is only available on fast tokenizers inheriting from [`PreTrainedTokenizerFast`], if using
    Python's tokenizer, this method will raise `NotImplementedError`.
    zE
    Whether or not to return the lengths of the encoded inputs.
    z@
    Whether or not to print more information and warnings.
    z
    Optional second sequence to be encoded. This can be a string, a list of strings (tokenized string using
    the `tokenize` method) or a list of integers (tokenized string ids using the `convert_tokens_to_ids`
    method).
    zstr, list[str] or list[int]a  
    The sequence or batch of sequences to be encoded as target texts. Each sequence can be a string or a
    list of strings (pretokenized string). If you pass pretokenized input, set `is_split_into_words=True`
    to avoid ambiguity with batched inputs.
    z!str, list[str] or list[list[str]]a!  
    Whether or not the input is already pre-tokenized (e.g., split into words). If set to `True`, the
    tokenizer assumes the input is already split into words (for instance, by splitting it on whitespace)
    which it will tokenize. This is useful for NER or token classification.
    zd
    Word-level bounding boxes. Each bounding box should be normalized to be on a 0-1000 scale.
    z(list[list[int]] or list[list[list[int]]]zY
    Word-level integer labels (for token classification tasks such as FUNSD, CORD).
    zlist[int] or list[list[int]]z
    Additional processing options for each modality (text, images, videos, audio). Model-specific parameters
    are listed above; see the TypedDict class for the complete list of supported arguments.
    rF   )#rG   rH   rI   rJ   image_processor	tokenizervideo_processoraudio_processorfeature_extractorchat_templatetextaudioaudiosr\   add_special_tokenspadding
truncation
max_lengthstridepad_to_multiple_ofreturn_token_type_idsreturn_attention_maskreturn_overflowing_tokensreturn_special_tokens_maskreturn_offsets_mappingreturn_lengthverbose	text_pairtext_targettext_pair_targetis_split_into_wordsboxesword_labelsrb   rc   rF   rd   re   rg   rg   #  s    *	O $	I *	O *	O ,	 	M 	 D 	 E 	 F N 	 :G  O#J* 	J F
   
 ! 	"  	M 	G
 .I
 4K
 4
  ;	E /	K Jrd   rg   c                     \ rS rSrSS0rSS0rSS0rSS0rSS0rSS0r	SS	0r
SS
0rSS0rSS0rSS0rSS0rSS0rSS0rSS0rSS0rSS0rSS0rSS0rSS0rSS0rSS0rSS0rSS0rSS0rSS0rSS0rSS0rSS0r SS 0r!SS!0r"SS"0r#SS#0r$SS$0r%SS%0r&SS&0r'SS'0r(SS(0r)SS)0r*SS0r+SS*0r,SS+0r-SS,0r.SS-0r/SS.0r0SS/0r1SS00r2SS10r3SS20r4SS30r5SS40r6SS50r7SS60r8SS70r9SS80r:SS90r;SS:0r<SS;0r=SS<0r>SS=0r?SS>0r@SS?0rASS@0rBSSA0rCSSB0rDSSC0rESSD0rFSSE0rGSSF0rHSSG0rISSH0rJSSI0rKSSJ0rLSSK0rMSSL0rNSSM0rOSSN0rPSSO0rQSSP0rRSSQ0rSSSR0rTSSS0rUSST0rVSSU0rWSSV0rXSSW0rYSSX0rZSSY0r[SSZ0r\SS[0r]SS\0r^SS]0r_SSZ0r`SS^0raSS_0rbSS`0rcSSa0rdSSb0reSSc0rfSSd0rgSSe0rhSSf0riSSg0rjSSh0rkSSi0rlSSj0rmSSk0rnSSl0roSSm0rpSSn0rqSSo0rrSSp0rsSSq0rtSSr0ruSSs0rvSSq0rwSSt0rxSSu0rySSv0rzSSr0r{SSw0r|SSx0r}SSy0r~SSs0rSSz0rSS{0rSS|0rSS}0rSS~0rSS0rSS0rSS0rSS0rSS0rSS0rSS0rSS0rSS0rSS0rSS0rSS0rSS0rSS0rSS0rSS0rSS0rSS0rSS0rSS0rSS0rSS0rSS0rSS0rSS0rSS0rSS0rSS0rSS0rSS0rSS0rSS0rSS0rSS0rSS0rSS0rSS0rSS0rSS0rSS0rSS0rSS0rSS0rSS0rSS0rSS0rSS0rSS0rSS0rSS0rSS0rSS0rSS0rSS0rSS0rSS0rSS0rSS0r\r\r\r\Yr\Yr\Yr\Yr\Yr\1r\r\r\r\&r\`r\`r\ar\ar\ar\\r\\r\'r\'r\r\r\r\r\r\?r\r\r\+r\*r\+r\+r\*r\*r\%r\$r\$r\%r\r\r\r\r\r\r\r\r\r\r\r\r\r\r\r\r\r\r\r\r\r\r\r\r\r\GGr \HGr\IGr\JGr\_Gr\_Gr\DGr\(Gr\Gr\(Gr	\(Gr
\(Gr\qGr\qGr\^Gr\Gr\Gr\Gr\Gr\Gr\GrG\ Gr\Gr\Gr\Gr\Gr\Gr\Gr\Gr\TGrG\GrG\GrG\Gr \iGr!\iGr"\7Gr#\[Gr$\Gr%SGr&g)
ConfigArgsi4  rD   zC
    Whether or not the model should return all hidden-states.
    a]  
    The `dtype` of the weights. This attribute can be used to initialize the model to a non-default `dtype`
    (which is normally `float32`) and thus allow for optimal storage allocation. For example, if the saved
    model is `float16`, ideally we want to load it back using the minimal amount of memory needed to load
    `float16` weights.
    a  
    The chunk size of all feed forward layers in the residual attention blocks. A chunk size of `0` means that
    the feed forward layer is not chunked. A chunk size of n means that the feed forward layer processes `n` <
    sequence_length embeddings at a time. For more information on feed forward chunking, see [How does Feed
    Forward Chunking work?](../glossary.html#feed-forward-chunking).
    zU
    A map from index (for instance prediction index, or target index) to label.
    z2
    A map from label to index for the model.
    z
    Problem type for `XxxForSequenceClassification` models. Can be one of `"regression"`,
            `"single_label_classification"` or `"multi_label_classification"`.
    z.
    The class name of model's tokenizer.
    zz
    Vocabulary size of the model. Defines the number of different tokens that can be represented by the `input_ids`.
    z2
    Dimension of the hidden representations.
    z/
    Dimension of the MLP representations.
    zf
    The attention head dimension. If None, it will default to hidden_size // num_attention_heads
    z=
    Number of hidden layers in the Transformer decoder.
    zX
    Number of attention heads for each attention layer in the Transformer decoder.
    a|  
    This is the number of key_value heads that should be used to implement Grouped Query Attention. If
    `num_key_value_heads=num_attention_heads`, the model will use Multi Head Attention (MHA), if
    `num_key_value_heads=1` the model will use Multi Query Attention (MQA) otherwise GQA is used. When
    converting a multi-head checkpoint to a GQA checkpoint, each group key and value head should be constructed
    by meanpooling all the original heads within that group. For more details, check out [this
    paper](https://huggingface.co/papers/2305.13245). If it is not specified, will default to
    `num_attention_heads`.
    z
    The non-linear activation function (function or string) in the decoder. For example, `"gelu"`,
    `"relu"`, `"silu"`, etc.
    zN
    The maximum sequence length that this model might ever be used with.
    zj
    The standard deviation of the truncated_normal_initializer for initializing all weight matrices.
    z;
    The epsilon used by the rms normalization layers.
    z
    Whether or not the model should return the last key/values attentions (not used by all models). Only
    relevant if `config.is_decoder=True` or when the model is a decoder-only generative model.
    a  
    Dictionary containing the configuration parameters for the RoPE embeddings. The dictionary should contain
    a value for `rope_theta` and optionally parameters used for scaling in case you want to use RoPE
    with longer `max_position_embeddings`.
    zl
    Whether to use a bias in the query, key, value and output projection layers during self-attention.
    z]
    Whether to use a bias in up_proj, down_proj and gate_proj layers in the MLP layers.
    z<
    The dropout ratio for the attention probabilities.
    a  
    Experimental feature. Tensor parallelism rank used during pretraining. Please refer to [this
    document](https://huggingface.co/docs/transformers/main/perf_train_gpu_many#tensor-parallelism) to
    understand more about it. This value is necessary to ensure exact reproducibility of the pretraining
    results. Please refer to [this issue](https://github.com/pytorch/pytorch/issues/76232).
    z6
    Token id used for padding in the vocabulary.
    z<
    Token id used for end-of-stream in the vocabulary.
    zB
    Token id used for beginning-of-stream in the vocabulary.
    z8
    Token id used for separator in the vocabulary.
    z2
    Token id used for CLS in the vocabulary.
    z\
    Whether to tie weight embeddings according to model's `tied_weights_keys` mapping.
    z:
    Size of the encoder layers and the pooler layer.
    z
    Size of the key, query, value projections per attention head. The `inner_dim` of the projection layer will
    be defined as `num_heads * d_kv`.
    zq
    Number of hidden layers in the Transformer decoder. Will use the same value as `num_layers` if not set.
    zq
    Number of hidden layers in the Transformer encoder. Will use the same value as `num_layers` if not set.
    z+
    The ratio for all dropout layers.
    z+
    The dropout ratio for classifier.
    z=
    The epsilon used by the layer normalization layers.
    z~
    A factor for initializing all weight matrices (should be kept to 1, used internally for initialization
    testing).
    zX
    Number of attention heads for each attention layer in the Transformer encoder.
    z[
    Dimensionality of the "intermediate" (often named feed-forward) layer in decoder.
    z[
    Dimensionality of the "intermediate" (often named feed-forward) layer in encoder.
    zM
    The dropout ratio for activations inside the fully connected layer.
    z
    The LayerDrop probability for the encoder. See the [LayerDrop paper](see https://huggingface.co/papers/1909.11556)
    for more details.
    z
    The LayerDrop probability for the decoder. See the [LayerDrop paper](see https://huggingface.co/papers/1909.11556)
    for more details.
    zC
    Whether to scale embeddings by dividing by sqrt(d_model).
    z
    The id of the token to force as the last generated token when `max_length` is reached. Usually set to
    `eos_token_id`.
    z6
    Intermediate size of the routed expert MLPs.
    z2
    Number of routed experts in MoE layers.

    zi
    Number of experts to route each token to. This is the top-k value for the token-choice routing.
    zL
    Number of shared experts that are always activated for all tokens.
    z
    A list that explicitly maps each layer index with its layer type. If not provided, it will be automatically
    generated based on config values.
    zB
    Whether to normalize the weights of the routed experts.

    z
    Number of selected groups for each token (for each token, ensuring the selected experts is only within `topk_group` groups).
    zO
    Dimension of the query/key heads that use rotary position embeddings.
    z'
    Dimension of the value heads.
    zU
    Dimension of the query/key heads that don't use rotary position embeddings.
    zB
    Rank of the LoRA matrices for key and value projections.
    z:
    Rank of the LoRA matrices for query projections.
    z+
    Scaling factor or routed experts.
    z#
    Number of routed experts.
    z#
    Number of shared experts.
    zA
    The config object or dictionary of the vision backbone.
    z?
    The config object or dictionary of the text backbone.
    zC
    The activation function used by the multimodal projector.
    zd
    The feature selection strategy used to select the vision feature from the vision backbone.
    
    The index of the layer to select the vision feature. If multiple indices are provided,
    the vision feature of the corresponding indices will be concatenated to form the
    vision features.
    z:
    Whether to use bias in the multimodal projector.
    zG
    The image token index used as a placeholder for input images.
    zG
    The video token index used as a placeholder for input videos.
    zF
    The audio token index used as a placeholder for input audio.
    z1
    Sequence length of one image embedding.
    z1
    Sequence length of one video embedding.
    zF
    Whether cross-attention layers should be added to the model.
    ze
    Whether the model is used as a decoder or not. If `False`, the model is used as an encoder.
    zX
    Sliding window attention window size. If `None`, no sliding window is applied.
    z2
    Whether to use sliding window attention.
    z6
    Intermediate size of the shared expert MLPs.
    zv
    The frequency of adding a sparse MoE layer. The default is 1, which means all decoder layers are sparse MoE.
    z
    Whether or not the router logits should be returned by the model. Enabling this will also allow the model
    to output the auxiliary loss, including load balancing loss and router z-loss.
    zj
    Auxiliary load balancing loss coefficient. Used to penalize uneven expert routing in MoE models.
    z
    Indices of the intermediate hidden states (feature maps) to return from the backbone. Each index
    corresponds to one stage of the model.
    z
    Names of the intermediate hidden states (feature maps) to return from the backbone. One of `"stem"`,
    `"stage1"`, `"stage2"`, etc.
    z.
    The size (resolution) of each image.
    z.
    The size (resolution) of each patch.
    z'
    The number of input channels.
    z
    Number of mel features used per input frame. Should correspond to the value used in the
    `AutoFeatureExtractor` class.
    zc
    The sampling rate at which the audio files should be digitalized expressed in hertz (Hz).
    zh
    The dropout probability for all fully connected layers in the embeddings, encoder, and pooler.
    z;
    Ratio of the MLP hidden dim to the embedding dim.
    z@
    Whether to add a bias to the queries, keys and values.
    z=
    Dimensionality of the embeddings and hidden states.
    z/
    The dropout ratio for the embeddings.
    zc
    If not `None`, cap the absolute value of the query, key, and value tensors to this value.
    z6
    The vocabulary size of the `token_type_ids`.
    z@
    The config object or dictionary of the audio backbone.
    z
    The LayerDrop probability. See the [LayerDrop paper](see https://huggingface.co/papers/1909.11556) for
    more details.
    z
    The number of tokens that each expert can process. If `None`, `expert_capacity` will be set to
    `(sequence_length / num_experts) * capacity_factor`.
    zn
    If an encoder-decoder model starts decoding with a different token than `bos`, the id of that token.
    zA
    Whether the model is used as an encoder/decoder or not.
    z=
    The number of parallel codebooks used by the model.
    z;
    Dimensionality of each codebook embedding vector.
    zB
    Dimensionality (hidden size) at each stage of the model.
    z1
    Depth of each layer in the Transformer.
    z0
    Patch size at each stage of the model.
    z,
    Stride at each stage of the model.
    z`
    Amount of noise to add to the router logits during training for better load balancing.
    zk
    Number of local experts on each device. `num_experts` should be divisible by `num_local_experts`.
    zB
    Whether to use query-key normalization in the attention.
    z2
    The configuration of the backbone model.
    z]
    Relative classification weight of the no-object class in the object detection loss.
    zU
    Relative weight of the classification error in the Hungarian matching cost.
    zN
    Relative weight of the focal loss in the panoptic segmentation loss.
    zM
    Relative weight of the dice loss in the panoptic segmentation loss.
    zV
    Relative weight of the L1 bounding box error in the Hungarian matching cost.
    zU
    Relative weight of the generalized IoU loss in the Hungarian matching cost.
    z,
    Alpha parameter in the focal loss.
    zX
    Relative weight of the generalized IoU loss in the panoptic segmentation loss.
    zX
    Relative weight of the L1 bounding box loss in the panoptic segmentation loss.
    zW
    Relative weight of the classification loss in the panoptic segmentation loss.
    z\
    The index that is ignored by the loss function of the semantic segmentation model.
    z>
    Dimensionality of text and vision projection layers.
    z;
    The initial value of the *logit_scale* parameter.
    z
    Number of initial dense layers before MoE layers begin. Layers with index < num_dense_layers will use
    standard dense MLPs instead of MoE.
    z.
    Drop path rate for the patch fusion.
    z;
    Configuration dict of the vector quantize module.
    z(
    Number of codebook embeddings.
    z+
    Whether to use double z channels.
    z2
    Number of channels for the latent space.
    z4
    Configuration dict of the Q-Former module.
    z/
    The size of the convolutional kernel.
    zX
    The ratio between the spatial resolution of the input and output feature maps.
    z
    Shrinks or expands the number of channels in each layer. This is sometimes also called "alpha" or "width multiplier".
    z6
    Whether to use absolute position embeddings.
    zM
    Whether to use relative position bias in the self-attention layers.
    zp
    Scale to use in the self-attention layers. 0.1 for base, 1e-6 for large. Set 0 to disable layer scale.
    zJ
    The config object or dictionary of the vision-language backbone.
    z_
    The scaling factor used for the Xavier initialization of the cross-attention weights.
    zZ
    Whether auxiliary decoding losses (losses at each decoder layer) are to be used.
    zB
    The config object or dictionary of the encoder backbone.
    zB
    The config object or dictionary of the decoder backbone.
    zr
    Scaling factor applied to the word embeddings. Used to scale the embeddings relative to the hidden size.
    zd
    Scaling factor applied to the output logits before computing the probability distribution.
    z=
    Scaling factor applied to the residual connections.
    z:
    Scaling factor applied to the attention weights.
    z>
    The activation function for the classification head.
    zQ
    Whether to return a `ModelOutput` (dataclass) instead of a plain tuple.
    zo
    Coefficient for the router z-loss, which penalizes large router logits to improve training stability.
    z
    Soft-capping value applied to the final logits before computing the probability distribution. Logits are
    scaled by `tanh(logit / cap) * cap`.
    z
    Hidden size of the encoder outputs projected into the cross-attention key/value space of the decoder. Used
    when the encoder and decoder have different hidden sizes.
    zb
    Dimensionality of the input acoustic features (e.g., number of mel-filterbank channels).
    z\
    Whether to calculate loss using intermediate predictions from transformer decoder.
    z=
    The epsilon used by the batch normalization layers.
    z
    The number of layers using full attention. The first `max_window_layers` layers will use full attention, while any
    additional layer afterwards will use SWA (Sliding Window Attention).
    zl
    Specifies the reduction to apply to the output of `torch.nn.CTCLoss`. Only relevant when training.
    a2  
    Percentage (between 0 and 1) of all feature vectors along the feature axis which will be masked. The
    masking procedure generates `mask_feature_prob*len(feature_axis)/mask_time_length` independent masks over
    the axis. If reasoning from the probability of each feature vector to be chosen as the start of the vector
    span to be masked, *mask_feature_prob* should be `prob_vector_start*mask_feature_length`. Note that overlap
    may decrease the actual percentage of masked vectors. This is only relevant if `apply_spec_augment` is
    `True`.
    z_
    Relative classification weight of the 'no-object' class in the object detection loss.
    zl
    Number of labels to use in the last layer added to the model, typically for a classification task.
    z=
    Number of Transformer layers in the vision encoder.
    zN
    Temporal patch size used in the 3D patch embedding for video inputs.
    z}
        The size of the spatial merge window used to reduce the number of visual tokens by merging neighboring patches.
    z]
    Token ID that marks the start of a visual segment in the multimodal input sequence.
    z[
    Token ID that marks the end of a visual segment in the multimodal input sequence.
    zB
    The number of mamba heads used in the v2 implementation.
    z'
    Head embedding dimension size
    zG
    The number of the mamba groups used in the v2 implementation.
    z2
    The size of the mamba convolution kernel
    zb
    Expanding factor (relative to hidden_size) used to determine the mamba intermediate size
    zO
    The chunks in which to break the sequence when doing prefill/training
    zg
    Flag indicating whether or not to use bias in the convolution layer of the mamba mixer block.
    z
    Flag indicating whether or not to use bias in the input and output projections (["in_proj", "out_proj"]) of the mamba mixer block
    z;
    Minimum `time_step` used to bound `dt_proj.bias`.
    z;
    Maximum `time_step` used to bound `dt_proj.bias`.
    z:
    Accepted range of time step values for clamping.
    zE
    Expand ratio to set the output dimensions for the expansion
    zM
    Size of the SSM state (latent state dimension) in the Mamba layers.
    z\
    Rank of the delta (time step) projection. Can be `"auto"` to set it automatically.
    z[
    Minimum allowed value for the discrete time step delta after softplus activation.
    zE
    Scale applied to the time step delta before discretization.
    zZ
    Initialization scheme for the time step delta. Can be `"random"` or `"uniform"`.
    zZ
    Inner state size of the SSM (state-space model) in the Mamba layers of FalconH1.
    zX
    Whether to apply normalization before the gating mechanism in the Mamba mixer.
    zb
    Whether to use RMS normalization in the Mamba layers (as opposed to standard LayerNorm).
    rF   N('  rG   rH   rI   rJ   output_hidden_stateschunk_size_feed_forwarddtypeid2labellabel2idproblem_typer"   
vocab_sizehidden_sizeintermediate_sizehead_dimnum_hidden_layersnum_attention_headsnum_key_value_heads
hidden_actmax_position_embeddingsinitializer_rangerms_norm_eps	use_cacherope_parametersattention_biasmlp_biasattention_dropoutpretraining_tppad_token_ideos_token_idbos_token_idsep_token_idcls_token_idtie_word_embeddingsd_modeld_kvnum_decoder_layersnum_encoder_layersdropout_rateclassifier_dropoutlayer_norm_epsinitializer_factorencoder_attention_headsdecoder_attention_headsdecoder_ffn_dimencoder_ffn_dimactivation_dropoutencoder_layerdropdecoder_layerdropscale_embeddingforced_eos_token_idmoe_intermediate_sizenum_expertsnum_experts_per_toknum_shared_expertslayer_typesnorm_topk_prob
topk_groupqk_rope_head_dim
v_head_dimqk_nope_head_dimkv_lora_rankq_lora_rankrouted_scaling_factorn_routed_expertsn_shared_expertsvision_configtext_configprojector_hidden_actvision_feature_select_strategyvision_feature_layermultimodal_projector_biasimage_token_idvideo_token_idaudio_token_idra   video_seq_lengthadd_cross_attention
is_decodersliding_windowuse_sliding_windowshared_expert_intermediate_sizedecoder_sparse_stepoutput_router_logitsrouter_aux_loss_coefout_indicesout_features
image_size
patch_sizenum_channelsnum_mel_binssampling_ratehidden_dropout	mlp_ratioqkv_biasn_embdresid_pdrop
embd_pdropclip_qkvtype_vocab_sizeaudio_config	layerdropexpert_capacitydecoder_start_token_idis_encoder_decodernum_codebookscodebook_dimhidden_sizesdepthspatch_sizesstridesrouter_jitter_noisenum_local_expertsqk_layernormbackbone_configno_object_weightclass_weightmask_weightdice_weight
class_cost	bbox_cost	giou_costfocal_alphamask_loss_coefficientgiou_loss_coefficientbbox_loss_coefficientcls_loss_coefficientdice_loss_coefficientsemantic_loss_ignore_indexprojection_dimlogit_scale_init_valuenum_dense_layersdrop_path_rate	vq_confignum_embeddingsdouble_latentlatent_channelsqformer_configconv_kernel_sizeoutput_stridedepth_multiplier use_absolute_position_embeddingsuse_relative_position_biaslayer_scale_init_value
vlm_configinit_xavier_stdauxiliary_lossencoder_configdecoder_configembedding_multiplierlogits_scalingresidual_multiplierattention_multiplierclassifier_activationreturn_dictrouter_z_loss_coeffinal_logit_softcappingcross_attention_hidden_size	input_dimuse_auxiliary_lossbatch_norm_epsmax_window_layersctc_loss_reductionmask_feature_probeos_coefficient
num_labelsdepthtemporal_patch_sizespatial_merge_sizevision_start_token_idvision_end_token_idmamba_n_headsmamba_d_headmamba_n_groupsmamba_d_convmamba_expandmamba_chunk_sizemamba_conv_biasmamba_proj_biastime_step_mintime_step_maxtime_step_limitexpand_ratio
state_sizetime_step_ranktime_step_floortime_step_scaletime_step_init_schememamba_d_ssmmamba_norm_before_gatemamba_rms_normmamba_d_statemamba_num_headsmamba_head_dimnum_input_channelsaudio_channelsinput_channelsin_channelsin_chansscale_attn_weightsattention_probs_dropout_prob
attn_pdropattn_dropoutdropoutresid_dropoutresidual_dropout	emb_pdropembed_dropoutembedding_dropouthidden_dropout_probhidden_dropout_rateclassifier_dropout_probclassifier_dropout_ratedropout_prob	dropout_pdecoder_attention_dropoutdecoder_dropoutencoder_dropoutroute_scaleactivation_function
hidden_dimnum_decoder_attention_headsnum_encoder_attention_headsdecoder_num_headsdecoder_num_attention_headsencoder_num_headsencoder_num_attention_headsencoder_layersdecoder_layersdecoder_num_layersencoder_num_layersd_ffdim_ffn_innerdecoder_intermediate_sizenum_kv_heads
num_layersn_layersn_layerlayersencoder_num_hidden_layersdecoder_num_hidden_layers	num_headsn_headsn_headhidden_activation
activationmlp_hidden_actd_headd_innerdim_headffn_dimattention_headsn_positionsinit_stdinitializer_stdprojector_biasimage_token_indexvideo_token_indexaudio_token_indexembedding_size	embed_dimprojection_hidden_actlayer_norm_epsilonrms_normnorm_epsepsnorm_epsilonqk_layernormsuse_qk_normuse_qkv_biasdecoder_hidden_actdecoder_hidden_dimdecoder_hidden_sizeencoder_hidden_dimencoder_hidden_sizelayer_scale_initial_scalemulti_modal_projector_biasprojector_hidden_sizeprojection_sizekernel_sizeconv_kerneluse_absolute_embeddingsuse_abs_posuse_rel_posaux_loss_coefembedding_dimensionembedding_dimemb_dimn_codebookscodebook_sizelayers_block_typesample_ratetext_vocab_sizerc   rF   rd   re   r   r   4  s     	  	 E 	 H 	 H 	 L 	 O 	 J 	 K 	  	 H 	  	  	 
 	 J 	  	  	 L 	 I 	 O 	 N 	 H 	  	 N 	 L 	 L 	 L 	 L 	 L 	  	 G 	 D 	  	  	 L 	  	 N 	  	  	  	 O 	 O 	  	  	  	 O 	  	  	 K 	  	  	 K 	 N 	 J 	  	 J 	  	 L 	 K 	  	  	  	 M 	 K 	  	 &" 	  	 ! 	 N 	 N 	 N 	  	  	  	 J 	 N 	  	 '# 	  	  	  	 K 	 L 	 J 	 J 	 L 	 L 	 M 	 N 	 I 	 H 	 F 	 K 	 J 	 H 	 O 	 L 	 I 	 O 	  	  	 M 	 L 	 L 	 F 	 K 	 G 	  	  	 L 	 O 	  	 L 	 K 	 K 	 J 	 I 	 I 	 K 	  	  	  	  	  	 " 	 N 	  	  	 N 	 I 	 N 	 M 	 O 	 N 	  	 M 	  	 ($ 	 " 	  	 J 	 O 	 N 	 N 	 N 	  	 N 	  	  	  	 K 	  	  	 # 	 I 	  	 N 	  	  	 	 	 O 	 J 	 E 	  	  	  	  	 M 	 L 	 N 	 L 	 L 	  	 O 	 O 	 M 	 M 	 O 	 L 	 J 	 N 	 O 	 O 	  	 K 	  	 N M#O!N%!N!NKH(#4 "J$LGM"IM"((00LI 1OO'K$J"9"9/"9/"9'N'N++DFG 1&L"J HGF . .#I!G F"JNFGHG)O)K H'O.N&&&NI0'HH
C!L MKL#$%$% 6!/*$O"K"K>2K,K(M#MGK!M#K Ord   r   c                  r   \ rS rSrSSS.rSSS.rSSS.rSSS.rS	SS.rS
SS.r	SSS.r
SSS.rSSS.rSSS.rSSS.rSSS.rSSS.rSSS.rSSS.rSSS.rSSS.rSSS.rSSS.rSSS.rSSS.rSSS.rSSS.rSS S.rS!S S.rS"SS.rS#SS.rS$SS.rS%S&S.r S'S(S.r!S)SS.r"S*SS.r#S+S,S.r$S-S.S.r%S/S0S.r&S1r'g)2	ModelArgsi.  a7  
    Labels for computing the masked language modeling loss. Indices should either be in `[0, ...,
    config.vocab_size]` or -100 (see `input_ids` docstring). Tokens with indices set to `-100` are ignored
    (masked), the loss is only computed for the tokens with labels in `[0, ..., config.vocab_size]`.
    (of shape `(batch_size, sequence_length)`rC   aC  
    Calculate logits for the last `num_logits_to_keep` tokens. If `0`, calculate logits for all
    `input_ids` (special case). Only last token logits are needed for generation, and calculating them only for that
    token can save memory, which becomes pretty significant for long sequences or large vocabulary size.
    Na"  
    Indices of input sequence tokens in the vocabulary. Padding will be ignored by default.

    Indices can be obtained using [`AutoTokenizer`]. See [`PreTrainedTokenizer.encode`] and
    [`PreTrainedTokenizer.__call__`] for details.

    [What are input IDs?](../glossary#input-ids)
    a  
    Float values of input raw speech waveform. Values can be obtained by loading a `.flac` or `.wav` audio file
    into an array of type `list[float]`, a `numpy.ndarray` or a `torch.Tensor`, *e.g.* via the torchcodec library
    (`pip install torchcodec`) or the soundfile library (`pip install soundfile`).
    To prepare the array into `input_values`, the [`AutoProcessor`] should be used for padding and conversion
    into a tensor of type `torch.FloatTensor`. See [`{processor_class}.__call__`] for details.
    z
    Mask to avoid performing attention on padding token indices. Mask values selected in `[0, 1]`:

    - 1 for tokens that are **not masked**,
    - 0 for tokens that are **masked**.

    [What are attention masks?](../glossary#attention-mask)
    z
    Mask to avoid performing attention on certain token indices. By default, a causal mask will be used, to
    make sure the model can only look at previous inputs in order to predict the future.
    z/of shape `(batch_size, target_sequence_length)`z
    Sequence of hidden-states at the output of the last layer of the encoder. Used in the cross-attention
    if the model is configured as a decoder.
    5of shape `(batch_size, sequence_length, hidden_size)`a,  
    Mask to avoid performing attention on the padding token indices of the encoder input. This mask is used in
    the cross-attention if the model is configured as a decoder. Mask values selected in `[0, 1]`:

    - 1 for tokens that are **not masked**,
    - 0 for tokens that are **masked**.
    a  
    Segment token indices to indicate first and second portions of the inputs. Indices are selected in `[0, 1]`:

    - 0 corresponds to a *sentence A* token,
    - 1 corresponds to a *sentence B* token.

    [What are token type IDs?](../glossary#token-type-ids)
    z
    Indices of input sequence tokens matching each modality. For example text (0), image (1), video (2).
    Multimodal token type ids can be obtained using [`AutoProcessor`]. See [`ProcessorMixin.__call__`] for details.

    z
    Indices of positions of each input sequence tokens in the position embeddings. Selected in the range `[0, config.n_positions - 1]`.

    [What are position IDs?](../glossary#position-ids)
    a  
    Pre-computed hidden-states (key and values in the self-attention blocks and in the cross-attention
    blocks) that can be used to speed up sequential decoding. This typically consists in the `past_key_values`
    returned by the model at a previous stage of decoding, when `use_cache=True` or `config.use_cache=True`.

    Only [`~cache_utils.Cache`] instance is allowed as input, see our [kv cache guide](https://huggingface.co/docs/transformers/en/kv_cache).
    If no `past_key_values` are passed, [`~cache_utils.DynamicCache`] will be initialized by default.

    The model will output the same cache format that is fed as input.

    If `past_key_values` are used, the user is expected to input only unprocessed `input_ids` (those that don't
    have their past key value states given to this model) of shape `(batch_size, unprocessed_length)` instead of all `input_ids`
    of shape `(batch_size, sequence_length)`.
    a  
    Optionally, instead of passing `input_ids` you can choose to directly pass an embedded representation. This
    is useful if you want more control over how to convert `input_ids` indices into associated vectors than the
    model's internal embedding lookup matrix.
    a  
    Indices of decoder input sequence tokens in the vocabulary.

    Indices can be obtained using [`AutoTokenizer`]. See [`PreTrainedTokenizer.encode`] and
    [`PreTrainedTokenizer.__call__`] for details.

    [What are decoder input IDs?](../glossary#decoder-input-ids)
    a(  
    Optionally, instead of passing `decoder_input_ids` you can choose to directly pass an embedded
    representation. If `past_key_values` is used, optionally only the last `decoder_inputs_embeds` have to be
    input (see `past_key_values`). This is useful if you want more control over how to convert
    `decoder_input_ids` indices into associated vectors than the model's internal embedding lookup matrix.

    If `decoder_input_ids` and `decoder_inputs_embeds` are both unset, `decoder_inputs_embeds` takes the value
    of `inputs_embeds`.
    z<of shape `(batch_size, target_sequence_length, hidden_size)`z
    If set to `True`, `past_key_values` key value states are returned and can be used to speed up decoding (see
    `past_key_values`).
    z
    Whether or not to return the attentions tensors of all attention layers. See `attentions` under returned
    tensors for more detail.
    z
    Whether or not to return the hidden states of all layers. See `hidden_states` under returned tensors for
    more detail.
    zU
    Whether or not to return a [`~utils.ModelOutput`] instead of a plain tuple.
    z9 input to the layer of shape `(batch, seq_len, embed_dim)zD
    Whether to interpolate the pre-trained position encodings.
    z
    Tuple containing the cosine and sine positional embeddings of shape `(batch_size, seq_len, head_dim)`,
    with `head_dim` being the embedding dimension of each attention head.
    a  
    Model configuration class with all the parameters of the model. Initializing with a config file does not
    load the weights associated with the model, only the configuration. Check out the
    [`~PreTrainedModel.from_pretrained`] method to load the model weights.
    a  
    Labels for position (index) of the start of the labelled span for computing the token classification loss.
    Positions are clamped to the length of the sequence (`sequence_length`). Position outside of the sequence
    are not taken into account for computing the loss.
    zof shape `(batch_size,)`a  
    Labels for position (index) of the end of the labelled span for computing the token classification loss.
    Positions are clamped to the length of the sequence (`sequence_length`). Position outside of the sequence
    are not taken into account for computing the loss.
    aB  
    Tuple consists of (`last_hidden_state`, *optional*: `hidden_states`, *optional*: `attentions`)
    `last_hidden_state` of shape `(batch_size, sequence_length, hidden_size)`, *optional*) is a sequence of
    hidden-states at the output of the last layer of the encoder. Used in the cross-attention of the decoder.
    z
    Whether or not to return the logits of all the routers. They are useful for computing the router loss, and
    should not be returned during inference.
    a  
    If an `int`, compute logits for the last `logits_to_keep` tokens. If `0`, calculate logits for all
    `input_ids` (special case). Only last token logits are needed for generation, and calculating them only for that
    token can save memory, which becomes pretty significant for long sequences or large vocabulary size.
    If a `torch.Tensor`, must be 1D corresponding to the indices to keep in the sequence length dimension.
    This is useful when using packed tensor format (single dimension for batch and sequence length).
    a  
    The tensors corresponding to the input images. Pixel values can be obtained using
    [`{image_processor_class}`]. See [`{image_processor_class}.__call__`] for details ([`{processor_class}`] uses
    [`{image_processor_class}`] for processing images).
    z=of shape `(batch_size, num_channels, image_size, image_size)`a  
    The tensors corresponding to the input video. Pixel values for videos can be obtained using
    [`{video_processor_class}`]. See [`{video_processor_class}.__call__`] for details ([`{processor_class}`] uses
    [`{video_processor_class}`] for processing videos).
    zIof shape `(batch_size, num_frames, num_channels, frame_size, frame_size)`r   z
    The feature selection strategy used to select the vision feature from the vision backbone.
    Can be one of `"default"` or `"full"`.
    zU
    The sizes of the images in the batch, being (height, width) for each image.
    zof shape `(batch_size, 2)`a  
    Mask to avoid performing attention on padding pixel values. Mask values selected in `[0, 1]`:

    - 1 for pixels that are real (i.e. **not masked**),
    - 0 for pixels that are padding (i.e. **masked**).

    [What are attention masks?](../glossary#attention-mask)
    &of shape `(batch_size, height, width)`a  
    The tensors corresponding to the input audio features. Audio features can be obtained using
    [`{feature_extractor_class}`]. See [`{feature_extractor_class}.__call__`] for details ([`{processor_class}`] uses
    [`{feature_extractor_class}`] for processing audios).
    z5of shape `(batch_size, sequence_length, feature_dim)`rF   )(rG   rH   rI   rJ   labelsnum_logits_to_keep	input_idsinput_valuesattention_maskdecoder_attention_maskencoder_hidden_statesencoder_attention_masktoken_type_idsmm_token_type_idsposition_idspast_key_valuesinputs_embedsdecoder_input_idsdecoder_inputs_embedsr   output_attentionsr   r  hidden_statesinterpolate_pos_encodingposition_embeddingsconfigstart_positionsend_positionsencoder_outputsr   logits_to_keeppixel_valuespixel_values_videosr   r   image_sizes
pixel_maskinput_featuresrc   rF   rd   re   r  r  .  sk   
 <F
  <
I <	L <
N C I <	 <
N
 <
 <L O&
 IM C
 P I   	K WM 	  
 F
 ,O
 ,M
 O  	N
 QL
 ]
  &" .	K :
J
 INrd   r  c                  b   \ rS rSrSSS.rSSSS.rS	SS
S.rSSSS.rSSS.rSSSS.r	SSS
S.r
SSSS.rSSS.rSSS
S.rSSSS.rSSSS.rSSSS.rSSSS.rSSSS.rSSS.rSSS.rSS S.rS!S S.rS"S#S.rS$S%S.rS&S'S.rS(S)S.rS*S+S.rS,S+S.rS-S.S.rS/S0S.rS1S2S.rS3S4S.r S5S6S.r!S7S8S.r"S9r#g):ModelOutputArgsi{  zQ
    Sequence of hidden-states at the output of the last layer of the model.
    r  rC   a  
    It is a [`~cache_utils.Cache`] instance. For more details, see our [kv cache guide](https://huggingface.co/docs/transformers/en/kv_cache).

    Contains pre-computed hidden-states (key and values in the self-attention blocks and optionally if
    `config.is_encoder_decoder=True` in the cross-attention blocks) that can be used (see `past_key_values`
    input) to speed up sequential decoding.
    NzHreturned when `use_cache=True` is passed or when `config.use_cache=True`)rD   rE   additional_infoa:  
    Tuple of `torch.FloatTensor` (one for the output of the embeddings, if the model has an embedding layer, +
    one for the output of each layer) of shape `(batch_size, sequence_length, hidden_size)`.

    Hidden-states of the model at the output of each layer plus the optional initial embedding outputs.
    z^returned when `output_hidden_states=True` is passed or when `config.output_hidden_states=True`a   
    Tuple of `torch.FloatTensor` (one for each layer) of shape `(batch_size, num_heads, sequence_length,
    sequence_length)`.

    Attentions weights after the attention softmax, used to compute the weighted average in the self-attention
    heads.
    zXreturned when `output_attentions=True` is passed or when `config.output_attentions=True`zV
    Last layer hidden-state after a pooling operation on the spatial dimensions.
    z$of shape `(batch_size, hidden_size)`a)  
    Tuple of `torch.FloatTensor` (one for each layer) of shape `(batch_size, num_heads, sequence_length,
    sequence_length)`.

    Attentions weights of the decoder's cross-attention layer, after the attention softmax, used to compute the
    weighted average in the cross-attention heads.
    a3  
    Tuple of `torch.FloatTensor` (one for the output of the embeddings, if the model has an embedding layer, +
    one for the output of each layer) of shape `(batch_size, sequence_length, hidden_size)`.

    Hidden-states of the decoder at the output of each layer plus the initial embedding outputs.
    a  
    Tuple of `torch.FloatTensor` (one for each layer) of shape `(batch_size, num_heads, sequence_length,
    sequence_length)`.

    Attentions weights of the decoder, after the attention softmax, used to compute the weighted average in the
    self-attention heads.
    z`
    Sequence of hidden-states at the output of the last layer of the encoder of the model.
    a3  
    Tuple of `torch.FloatTensor` (one for the output of the embeddings, if the model has an embedding layer, +
    one for the output of each layer) of shape `(batch_size, sequence_length, hidden_size)`.

    Hidden-states of the encoder at the output of each layer plus the initial embedding outputs.
    a  
    Tuple of `torch.FloatTensor` (one for each layer) of shape `(batch_size, num_heads, sequence_length,
    sequence_length)`.

    Attentions weights of the encoder, after the attention softmax, used to compute the weighted average in the
    self-attention heads.
    z
    Tuple of `torch.FloatTensor` (one for each layer) of shape `(batch_size, sequence_length, num_experts)`.

    Router logits of the model, useful to compute the auxiliary loss for Mixture of Experts models.
    zZreturned when `output_router_logits=True` is passed or when `config.add_router_probs=True`a  
    Tuple of `torch.FloatTensor` (one for each layer) of shape `(batch_size, sequence_length, num_experts)`.

    Raw router probabilities that are computed by MoE routers, these terms are used to compute the auxiliary
    loss and the z_loss for Mixture of Experts models.
    zreturned when `output_router_probs=True` and `config.add_router_probs=True` is passed or when `config.output_router_probs=True`z(
    z_loss for the sparse modules.
    z"returned when `labels` is providedz*
    aux_loss for the sparse modules.
    z-
    Span-start scores (before SoftMax).
    r  z+
    Span-end scores (before SoftMax).
    z%
    Feature maps of the stages.
    z4of shape `(batch_size, num_channels, height, width)`z+
    Reconstructed / completed images.
    z$
    The predicted spectrogram.
    z2of shape `(batch_size, sequence_length, num_bins)`z)
    Predicted depth for each pixel.
    r  z6
    Sampled values from the chosen distribution.
    zuof shape `(batch_size, num_samples, prediction_length)` or `(batch_size, num_samples, prediction_length, input_size)`z0
    Parameters of the chosen distribution.
    z0of shape `(batch_size, num_samples, num_params)`z
    Shift values of each time series' context window which is used to give the model inputs of the same
    magnitude and then used to shift back to the original magnitude.
    z6of shape `(batch_size,)` or `(batch_size, input_size)`z
    Scaling values of each time series' context window which is used to give the model inputs of the same
    magnitude and then used to rescale back to the original magnitude.
    zo
    Static features of each time series' in a batch which are copied to the covariates at inference time.
    z%of shape `(batch_size, feature size)`zJ
    Utterance embeddings used for vector similarity-based retrieval.
    z2of shape `(batch_size, config.xvector_output_dim)`z]
    Sequence of extracted feature vectors of the last convolutional layer of the model.
    z6of shape `(batch_size, sequence_length, conv_dim[-1])`zr
    Text embeddings before the projection layer, used to mimic the last hidden state of the teacher encoder.
    z*of shape `(batch_size,config.project_dim)`zu
    Image hidden states of the model produced by the vision encoder and after projecting the last hidden state.
    zAof shape `(batch_size, num_images, sequence_length, hidden_size)`zu
    Video hidden states of the model produced by the vision encoder and after projecting the last hidden state.
    zNof shape `(batch_size * num_frames, num_images, sequence_length, hidden_size)`rF   )$rG   rH   rI   rJ   last_hidden_stater  r  
attentionspooler_outputcross_attentionsdecoder_hidden_statesdecoder_attentionsencoder_last_hidden_stater  encoder_attentionsrouter_logitsrouter_probsz_lossaux_lossstart_logits
end_logitsfeature_mapsreconstructionspectrogrampredicted_depth	sequencesparamslocscalestatic_features
embeddingsextract_featuresprojection_stateimage_hidden_statesvideo_hidden_statesrc   rF   rd   re   r  r  {  sP    I	 e
O {	M u
J 8	M u
 {	 u
 I	! {	 u

 wM  ]	L ?F ?H <	L <	J H	L H	N F	K :	O I	I D	F JC JE 9	O F	J J	 >	 U	 b	rd   r  c                  d    \ rS rSrSrSrSrSrSrSr	Sr
S	rS
rSrSrSrSrSrSrSrSrSrSrSrSrg)ClassDocstringi	  a3  
    This is the configuration class to store the configuration of a {model_base_class}. It is used to instantiate a {model_name}
    model according to the specified arguments, defining the model architecture. Instantiating a configuration with the
    defaults will yield a similar configuration to that of the [{model_checkpoint}](https://huggingface.co/{model_checkpoint})

    Configuration objects inherit from [`PreTrainedConfig`] and can be used to control the model outputs. Read the
    documentation from [`PreTrainedConfig`] for more information.
    a  
    This model inherits from [`PreTrainedModel`]. Check the superclass documentation for the generic methods the
    library implements for all its model (such as downloading or saving, resizing the input embeddings, pruning heads
    etc.)

    This model is also a PyTorch [torch.nn.Module](https://pytorch.org/docs/stable/nn.html#torch.nn.Module) subclass.
    Use it as a regular PyTorch Module and refer to the PyTorch documentation for all matter related to general usage
    and behavior.
    zd
    The bare {model_name} Model outputting raw hidden-states without any specific head on top.
    zJ
    The {model_name} Model with a specified pretraining head on top.
    zf
    The bare {model_name} Decoder outputting raw hidden-states without any specific head on top.
    zh
    The bare {model_name} Text Model outputting raw hidden-states without any specific head on to.
    zk
    The {model_name} Model with a sequence classification/regression head on top e.g. for GLUE tasks.
    z
    The {model_name} transformer with a span classification head on top for extractive question-answering tasks like
    SQuAD (a linear layer on top of the hidden-states output to compute `span start logits` and `span end logits`).
    z
    The {model_name} Model with a multiple choice classification head on top (a linear layer on top of the pooled output and a
    softmax) e.g. for RocStories/SWAG tasks.
    zI
    The {model_name} Model with a `language modeling` head on top."
    z
    The {model_name} transformer with a token classification head on top (a linear layer on top of the hidden-states
    output) e.g. for Named-Entity-Recognition (NER) tasks.
    z{
    The {model_name} Model for token generation conditioned on other modalities (e.g. image-text-to-text generation).
    z>
    The {model_name} Model for causal language modeling.
    z$
    The {model_name} backbone.
    z\
    The {model_name} Model with an image classification head on top e.g. for ImageNet.
    zf
    The {model_name} Model with a semantic segmentation head on top e.g. for ADE20K, CityScapes.
    z{
    The {model_name} Model with an audio classification head on top (a linear layer on top of the pooled
    output).
    zl
    The {model_name} Model with a frame classification head on top for tasks like Speaker Diarization.
    z]
    The {model_name} Model with a distribution head on top for time-series forecasting.
    zm
    The {model_name} Model with a projection layer on top (a linear layer on top of the pooled output).
    rF   N)rG   rH   rI   rJ   ConfigPreTrainedModelModelForPreTrainingDecoder	TextModelForSequenceClassificationForQuestionAnsweringForMultipleChoiceForMaskedLMForTokenClassificationForConditionalGenerationForCausalLMBackboneForImageClassificationForSemanticSegmentationForAudioClassificationForAudioFrameClassificationForPredictionWithProjectionrc   rF   rd   re   r  r  	  s    FOENGI!

K
 KH
#MNrd   r  c                  <    \ rS rSrSrSrSrSrSrSr	Sr
S	rS
rSrSrg)
ClassAttrsi	  z
    A string indicating the attribute associated to the base model in derived classes of the same architecture adding modules on top of the base model.
    a  
    Whether the model supports gradient checkpointing or not. Gradient checkpointing is a memory-saving technique that trades compute for memory, by storing only a subset of activations (checkpoints) and recomputing the activations that are not stored during the backward pass.
    ab  
    Layers of modules that should not be split across devices should be added to `_no_split_modules`. This can be useful for modules that contains skip connections or other operations that are not compatible with splitting the module across devices. Setting this attribute will enable the use of `device_map="auto"` in the `from_pretrained` method.
    zu
    A list of keys to ignore when moving inputs or outputs between devices when using the `accelerate` library.
    zO
    Whether the model's attention implementation supports FlashAttention.
    zd
    Whether the model's attention implementation supports SDPA (Scaled Dot Product Attention).
    zN
    Whether the model's attention implementation supports FlexAttention.
    z
    Whether the model can `torch.compile` fullgraph without graph breaks. Models will auto-compile if this flag is set to `True`
    in inference, if a compilable cache is used.
    z
    Whether the model supports attention interface functions. This flag signal that the model can be used as an efficient backend in TGI and vLLM.
    za
    A list of `state_dict` keys that are potentially tied to another key in the state_dict.
    rF   N)rG   rH   rI   rJ   base_model_prefixsupports_gradient_checkpointing_no_split_modules_skip_keys_device_placement_supports_flash_attn_supports_sdpa_supports_flex_attn_can_compile_fullgraph_supports_attention_backend_tied_weights_keysrc   rF   rd   re   r  r  	  sQ    '##N#rd   r  >   argsselfkwargsdeprecated_argumentsr   r   )_out_features_out_indicesc                V    [        U R                  R                  S5      5      S-
  S-  $ )N.r      )lenrI   split)funcs    re   get_indent_levelr  
  s(    !!'',-1Q66rd   c                `   ^ SU-  mSR                  U4S jU R                  5        5       5      $ )L
Adjust the indentation of a docstring to match the specified indent level.
 
c              3  v   >#    U  H.  oR                  5       (       a  TUR                  5       -   OS v   M0     g7f) N)striplstrip).0lineprefixs     re   	<genexpr>"equalize_indent.<locals>.<genexpr>
  s+     gPfzz||Vdkkm+CPfs   69)join
splitlines)	docstringindent_levelr%  s     @re   equalize_indentr,  
  s0     <F 99gPYPdPdPfgggrd   c                   ^^ U R                  S5      n[        S U 5       SS9mSU-  mSR                  UU4S jU 5       5      $ )r  r  c              3     #    U  H@  oR                  5       (       d  M  [        U5      [        UR                  5       5      -
  v   MB     g 7fN)r!  r  r"  )r#  r$  s     re   r&  !set_min_indent.<locals>.<genexpr>$
  s-     JEDZZ\	'TS'	'Es
   A
,A
r   )defaultr  c              3  `   >#    U  H#  oR                  5       (       a  TUTS  -   OSv   M%     g 7f)Nr   )r!  )r#  r$  
min_indentr%  s     re   r&  r0  (
  s,     ZTYD::<<Vd:;//RGTYs   +.)r  minr(  )r*  r+  linesr3  r%  s      @@re   set_min_indentr6  
  sJ     OOD!EJEJJ <F99ZTYZZZrd   c                d    [         R                  U 5      nU(       a  SUR                  S5      -   $ g Nr  r   )	_re_shapesearchgroupr*  matchs     re   parse_shaper>  +
  s+    Y'EU[[^##rd   c                d    [         R                  U 5      nU(       a  SUR                  S5      -   $ g r8  )_re_defaultr:  r;  r<  s     re   parse_defaultrA  2
  s+    y)EU[[^##rd   c                   [         R                  U 5      nU(       a&  XR                  5       S nU SUR                  5        n OSn[        R                  U 5      nSnU(       a  U SUR                  5        nUR	                  S5      S   R                  5       S:X  a#  SR                  UR	                  S5      SS 5      nUR	                  S5      S   R                  5       S:X  d&  UR	                  S5      S   R                  5       S:X  a#  SR                  UR	                  S5      SS 5      nUR                  5       S:X  a  SnU(       a   UR                  S5      R                  S5      OU nUR	                  S5      S   R                  5       S:X  a#  SR                  UR	                  S5      SS 5      nUR	                  S5      S   R                  5       S:X  d&  UR	                  S5      S   R                  5       S:X  a#  SR                  UR	                  S5      SS 5      n[        US5      n0 nU(       a  US:X  a  [        n	O=[        R                  " S	U S
U S3[        R                  [        R                  -  5      n	U	R                  U5       H  nUR                  S5      n
UR                  S5      nUR                  S5      nSU;   n[!        U5      n[#        U5      nUR                  S5      R                  5       nSU-   nSU 3nUUUUUUS.X'   M     U(       a  U(       a  SU-   n[        US5      nU(       a  XU4$ X4$ )aU  
Parse the docstring to extract the Args section and return it as a dictionary.
The docstring is expected to be in the format:
Args:
    arg1 (type):
        Description of arg1.
    arg2 (type):
        Description of arg2.

# This function will also return the remaining part of the docstring after the Args section.
Returns:/Example:
...
Nr   r  z"""r   zr"""r   z^\s{0,z;}(\w+)\s*\(\s*([^, \)]*)(\s*.*?)\s*\)\s*:\s*((?:(?!\n^\s{0,z}\w+\s*\().)*)      optionalr  z    )rh   rD   rF  rE   r1  r  )_re_example_or_returnr:  start_re_args_sectionr  r!  r(  r;  r"  r6  	_re_paramrecompileDOTALL	MULTILINEfinditerr>  rA  )r*  max_indent_levelreturn_intror=  remainder_docstring
args_matchdocstring_introargs_sectionr  param_pattern
param_name
param_typer  rF  rE   r1  param_descriptions                    re   parse_docstringrZ  9
  s/    "((3E'8o.	 !((3JO#$8j&6&6&89  &r*002e;"ii(=(=d(CCR(HIO  &q)//1V;?T?TUY?Z[\?]?c?c?ein?n"ii(=(=d(CAB(GHO  "b("O7A:##A&--d3yL$#))+u4yy!3!3D!9#2!>?$"((*f48J8J48PQR8S8Y8Y8[_d8dyy!3!3D!9!"!=>!,2LF q %MJJ+,,ijzi{  |K  L		BLL(M
 #++L9EQJQJ#kk!nO!_4H0E#O4G %A 4 4 6 '*; ;"$%6$7 8"0$"#2"F :( %"%88()<a@O;;&&rd   c                $   [        U 5      nUS:X  a   [        X5      U 4$ U Vs/ s H  n[	        X15      S   PM     nn[        U5      nU(       a  X$R                  S5         n XP4$ ! [         a    [        [        U 5      U5      U 4s $ f = fs  snf )z
Check if a "nested" type hint contains a specific target type,
return the first-level type containing the target_type if found.
rF   r   T)r
   
issubclass	Exceptionrh   contains_typeanyindex)	type_hinttarget_typer  argfound_type_tuple
found_types         re   r^  r^  
  s    
 IDrz	Gi5y@@ GKKdsc7:dK%&J//56	    	Gd9o{;YFF	GKs   A& B&!B
	B
c                   [         R                  " U 5      nUc  gUR                  [        R                  R
                  5      S   S:w  a  gUR                  [        R                  R
                  5      S   nUR                  [        R                  R
                  5      S   nSn[         H{  nUR                  S5      S   nSU;   a  UR                  S5      S   OSnUR                  U5      (       d  MK  UR                  U5      (       d  Mc  U[        U5      [        U5      *  n  O   U(       a*  X4:w  a%  SS	K
Jn  XH;   d  UR                  S
S5      U;   a  U$ U$ U$ )z6
Get the model name from the file path of the object.
NmodelsrC  *r   r   )!SPECIAL_MODEL_TYPE_TO_MODULE_NAME_-)inspectgetsourcefiler  ospathsepAUTODOC_FILES
startswithendswithr  +transformers.models.auto.configuration_autork  replace)	objrq  	file_name model_name_lowercase_from_foldermodel_name_lowercase_from_file	file_typerH  endrk  s	            re   get_model_namer~  
  s,   
   %D|zz"''++r"h.

277;;'+I'+zz"''++'>r'B$%)""	$Q'*-*:iooc"2&&&9+=+=c+B+B-6s5zSXI-N* # &*J*la +O-55c3?Cdd11//++rd   c                2   U R                   nU R                  5       nU(       d  g/ n/ nU H@  nU S3nUR                  SS5      nUR                  U5        UR                  SU S35        MB     U(       d  g[	        U5      S:X  a#  SUS	    3nUS	   n	US	   R                  S
S5      n
O[	        U5      S:X  aI  SUS	    SUS    3nUS	    SUS    3n	US	   R                  S
S5       SUS   R                  S
S5       3n
OSR                  S USS  5       5      SUS    3-   nSR                  USS 5      SUS    3-   n	U Vs/ s H  oR                  S
S5      PM     nnSR                  USS 5      SUS    3-   n
SU SU SU SU	 SU
 S3nU$ s  snf )z
Generate the intro docstring for a processor class based on its attributes.

Args:
    cls: Processor class to generate intro for

Returns:
    str: Generated intro text
r   _classrl  r  z[`{z}`]r   a r   [`z[`~rD  z and a z and , c              3  ,   #    U  H
  nS U 3v   M     g7f)r  NrF   )r#  cs     re   r&  +generate_processor_intro.<locals>.<genexpr>
  s     #FobHos   NrC  z, and a z, and zConstructs a z which wraps z into a single processor.

[`z%`] offers all the functionalities of z
. See the
z for more information.
)rG   get_attributesrw  appendr  r(  )cls
class_name
attributes
componentscomponent_classesattr
class_attrattr_displaycomponents_textclasses_textclasses_text_shortr  classes_shortintros                 re   generate_processor_intror  
  s)    J ##%J JvV_
||C-,'  4
|4!89   :!z!}o.(+.q199$F	ZA	z!}oWZ]OD+A./u5Fq5I4JK #++D%89?PQR?S?[?[\`bg?h>ij 	 ))#Fj"o#FF8T^_aTbScIddyy!23B!78VDUVXDYCZ;[[9JK9JA4/9JK!YY}Sb'9:vmTVFWEX=YYj\6G H,3L> B  E L Ls   Fc                V   SSK Jn  0 nU  H  nU[        ;   d  M   [        [        U[        U   S   5      [        U   S   5      R	                  US5      nUb7  [        U[        [        -  5      (       a  US   b  US   OUS   nUb  UOUX4'   M  XCU'   M     U$ ! [
         a    Sn NRf = f)z>
Get the dictionary of placeholders for the given model name.
r   autor   NrC  )	transformers.modelsr  PLACEHOLDER_TO_AUTO_MODULEgetattrgetImportError
isinstancelisttuple)placeholders
model_nameauto_moduleplaceholders_dictplaceholderplace_holder_values         re   get_placeholders_dictr  
  s    
 8#44*%,K)CK)PQR)ST.{;A>& #j$' # "-0$,??2DR2H2T*2.ZlmnZo ' HZGe1Ckv!.1<+.% $(   *%)"*s   :BB('B(c                L   [        [        R                  U 5      5      nU(       d  U $ [        X!5      nUR	                  5        H]  u  pE[        U[        5      (       a(  US:X  a"  UR                  SUR                  SS5      5      nUc  MG  U R                  SU S3U5      n M_     U $ )z
Replaces placeholders such as {image_processor_class} in the docstring with the actual values,
deducted from the model name and the auto modules.
r!   torchvisionpilN{})	set_re_placeholdersfindallr  itemsr  dictr  rw  )r*  r  r  r  r  values         re   format_args_docstringr    s     '//	:;L .lG/557eT""{6M'MIImUYYud-CDE"!))B{m2*>FI	 8
 rd   c                z    [        U [        [        -  5      (       a  [        [        U 5      5      $ U R                  $ r/  )r  r  r  _merge_args_dicts__dict__)args_classess    re   get_args_doc_from_sourcer  '  s/    ,u-- |!455   rd      )maxsizec                R    0 nU  H  nUR                  UR                  5        M      U$ )zQCached merger of args-doc dicts. The input classes are static so caching is safe.)updater  )args_classes_tupleresultr  s      re   r  r  -  s(     F!cll# "Mrd   c                    S nU R                   nU(       d  g [        R                  U5      nU H%  u  pEUR                  S5      nSU 3nXV:X  d  M"  Un  U$    U$ )N/zhttps://huggingface.co/)__doc___re_checkpointr  removesuffix)r&   
checkpointconfig_sourcecheckpoints	ckpt_name	ckpt_linkckpt_link_from_names          re    get_checkpoint_from_config_classr  6  st    J !((M ((7K !,	**3/	 !8	{C+"J !, rd   c                R    SnU R                   S:X  a  SU S3n[        X2S-   5      nU$ )Nr   forwardzThe [`a  `] forward method, overrides the `__call__` special method.

        <Tip>

        Although the recipe for forward pass needs to be defined within this function, one should call the [`Module`]
        instance afterwards instead of this since the former takes care of running the pre and post processing steps while
        the latter silently ignores them.

        </Tip>

        r  )rG   r,  )r  r  r+  intro_docstrings       re   add_intro_docstringr  O  s=    O}}	!%j\ 
2	 
 */!;KLrd   c                   SSK Jn  Ub  [        U5      nO[        U 5      nU(       a@  U[        [        U[        S   S   5      [        S   S   5      ;  a  UR                  SS5      nU R                  R                  S5      S   nUc  SnO. [        [        U[        S   S   5      [        S   S   5      U   nX4U4$ ! [         a)    U[        ;   a  [        U   n N#S	n[        S
U S35         N6f = f)z
Extract model information from a function or its parent class.

Args:
    func (`function`): The function to extract information from
    parent_class (`class`): Optional parent class of the function
r   r  Nr&   r   rl  rm  r  ModelConfigz[ERROR] Config not found for zS. You can manually add it to HARDCODED_CONFIG_FOR_MODELS in utils/auto_docstring.py)r  r  r~  r  r  rw  rI   r  KeyErrorHARDCODED_CONFIG_FOR_MODELSprint)r  parent_classr  model_name_lowercaser  r&   s         re   _get_model_infor  b  s+    8 -l;-d3  4G7GJK">215= !  4;;CE ""((-a0J #	"%?%OPQ%RS*>:1= #$L  \99  	#'BB:;OP,34H3I  J]  ^	s   -B> >C1C10C1c                ,   U [        S5      L d	  U [        L a  g[        U 5      n[        U 5      nUGb^  U(       GaV  [	        US5      (       a  [	        US5      (       a}  UR
                  nUS;   a  SnOBUR                  SS	5      R                  S
S5      R                  SS5      R                  SS5      nU(       a  U SUR                   3nOUR                  nO[        U5      nUS:X  a,  U Vs/ s H  n[        U5      PM     nnSR                  U5      $ US:X  GaA  U(       Ga9  [        US   5      nU[        L a  [        US   5      n/ n	U H>  nU[        S5      L a  U	R                  S5        M$  U	R                  [        U5      5        M@     SR                  U	5      n
USS  Vs/ s H  n[        U5      PM     nnU
/U-   nU SSR                  U5       S3$ U[        L ay  [        US   5      nU Vs/ s H  n[        U5      PM     n	nSSR                  U	5       S3n
USS  Vs/ s H  n[        U5      PM     nnU
/U-   nU SSR                  U5       S3$ U Vs/ s H  n[        U5      PM     nnU SSR                  U5       S3$ [	        U S5      (       a  [	        U S5      (       a  U R
                  nUS;   a  SnOBUR                  SS	5      R                  S
S5      R                  SS5      R                  SS5      nU(       a  U SU R                   3nU$ U R                  nU$ [        U 5      nSU;   a  [        R                  SU5      nUR                  S
S5      R                  SS5      nU$ s  snf s  snf s  snf s  snf s  snf )a  
Recursively format a type annotation object as a string, preserving generic type arguments.

This is an internal helper used by process_type_annotation for the type object path.

Args:
    type_hint: A type annotation object

Returns:
    str: Formatted type string
....NrH   rG   )typingtypesbuiltinsr   transformers.~typing.ztypes.	builtins.r  r    | 	Annotatedr   Noner   [r  ]Union[
ForwardRef\1)rh   Ellipsisr   r
   hasattrrH   rw  rG   ri   !_format_type_annotation_recursiver(  r   r  r	   _re_forward_refsub)ra  originr  module_name
origin_strrc  arg_strsfirst_arg_origin
union_args
union_strsformatted_unionremaining_argsall_args	type_nametype_strs                  re   r  r    s    DIh!6 	"FIDd 6<((WVZ-H-H ++K==   ''=WY+WXr*W["-	   +}Afoo->?
#__
VJ $JNO$39#>$HO::h'' $)$q'29,%d1g.

%Cd4j("))&1"))*KC*PQ	 &
 #(**Z"8TXYZY[T\!]T\S"CC"HT\!]+,~=$Qtyy':&;1==!U*%d1g.
PZ[PZ?DPZ
[$*499Z+@*A"CTXYZY[T\!]T\S"CC"HT\!]+,~=$Qtyy':&;1== GKKds5c:dKQtyy23155	L	)	)gi.L.L  **99K ##OS9B'2&b)	  &-q););(<=I  "**I y>8#&**5(;H##Ir2::8RH P& "^ \ "^
 Ls   /M=)N8N+N#Nc                   Sn[        U [        5      (       d  U b  U [        S5      L a  g[        U 5      [        L d  [        U 5      [
        L a|  [        U 5      n/ nU H1  nU[        S5      L a  SnM  [        U5      nUR                  U5        M3     U(       d  SU4$ [        U5      S:X  a  US   U4$ SS	R                  U5       S
3U4$ [        U 5      nXb4$ U nSU;   a  UR                  S5       Vs/ s H  oR                  5       PM     n	nSU	;   a  SnU	 Vs/ s H  oS:w  d  M
  UPM     n	nU	(       a  SR                  U	5      OSnSR                  UR                  S5      5      R                  SS5      R                  SS5      nGOSU;   d  SU;   d  SU;   d  SU;   a1  SR                  UR                  S5      5      R                  SS5      nOSU;   aC  UR                  SS5      R                  SS5      R                  SS5      R                  SS5      nOU(       aX  SU;   aR  UR                  S5      S   S   R                  5       (       d(  UR                  SS5      R                  SS5       SU 3nO"UR                  SS5      R                  SS5      nSU;   a  [        R!                  SU5      nSU;   a  ["        R!                  SU5      nSnXr4$ s  snf s  snf )a  
Unified function to process and format a parameter's type annotation.

This function intelligently handles both type objects (from inspect.Parameter.annotation)
and string representations of types. It will:
- Use type introspection when given a type object (preserves generic arguments)
- Parse string representations when that's all that's available
- Always return a formatted type string and optional flag

Handles various type representations including:
- Type objects with generics (e.g., list[str], Optional[int])
- Union types (both Union[X, Y] and X | Y syntax)
- Modern union syntax with | (e.g., "bool | None")
- Complex typing constructs (Union, Optional, Annotated, etc.)
- Generic types with brackets
- Class type strings
- Simple types and module paths

Args:
    type_input: Either a type annotation object or a string representation of a type
    param_name (`str | None`): The parameter name (used for legacy module path handling)

Returns:
    tuple[str, bool]: (formatted_type_string, is_optional)
FN)r  TTr   r   r   r  r  r  r  r  r  r  r  r  r  z	Optional[r  z<class 'z'>r  rC  r  r  r  Optional)r  ri   rh   r   r	   r   r
   r  r  r  r(  r  r!  rw  isupperr  r  _re_optional)

type_inputrW  rF  subtypesout_strsubtypeformatted_typerX  ppartss
             re   process_type_annotationr    s   4 H j#&&tDz!9 j!U*j.D	.Q
+HG#d4j(#H!B7!K~. $ 8|#W"qz8++		' 2315x?? ;:F'' J 
$.$4$4U$;<$;q$;<U?H %51fQE5*/UZZ&R
WWZ--i89AA/SVW__`kmop
	Z	8z#9[J=VZ]akZkWWZ--i89AA/SVW
	z	! 4<<["MUUV`bdemmnrtvw 	 #+J4D4DS4I"4Ma4P4X4X4Z4Z&..DLLZY[\]]^_i^jkJ $++OSAII+WYZJ z!$((
;
 Z!%%eZ8
K = 6s   3K	K 'K c                    U R                   [        R                  R                  :X  a  g[	        U R                   5      u  pU R
                  [        R                  R                  La  SnX4$ )z
Process and format a parameter's type annotation from an inspect.Parameter object.

Args:
    param (`inspect.Parameter`): The parameter from the function signature

Returns:
    tuple[str, bool]: (formatted_type_string, is_optional)
)r   FT)
annotationrn  	Parameteremptyr  r1  )paramr  rF  s      re   _process_parameter_typer  h  s^     7,,222  7u7G7GHN }}G--333##rd   c                   SnSnSnSnSn	U(       a  SOSn
X;   ao  US:X  a  X   R                  SS5      c  X   S   (       a  X   S   nX   S   nX   R                  SS5      nU(       a  UOSnX   S   =(       d    Sn	X   S	    S
3nOjX;   ac  X    R                  SU5      nX    R                  SS5      nU(       a  SU-   OSnX    S	   nX    R                  SS5      n	U	(       a
  Xz-   S-   U	-   n	OSnX:XyXX4$ )a  
Get parameter documentation details from the appropriate source.
Tensor shape, optional status and description are taken from the custom docstring in priority if available.
Type is taken from the function signature first, then from the custom docstring if missing from the signature

Args:
    param_name (`str`): Name of the parameter
    documented_params (`dict`): Dictionary of documented parameters (manually specified in the docstring)
    source_args_dict (`dict`): Default source args dictionary to use if not in documented_params
    param_type (`str`): Current parameter type (may be updated)
    optional (`bool`): Whether the parameter is optional (may be updated)
Nr   Tz, *optional*rh   r  rF  rE   rD   r  r  r  F)r  )rW  documented_paramssource_args_dictrX  rF  rD   rE   shape_stringis_documentedr  optional_strings              re   _get_parameter_infor    sB    KELMO)1orO& "!-11&$?K ,->?*6v>J$0<!-11'4@ %u2+78IJPb*6}EFbI		'%155fjI
 ,00$?&+sU{&2=A*6::;LdS*<tCoUO {aard   c	           
        Sn	[        X5      n
Uc7  U
(       a  [        [        [        [        /5      nO[        [        [        /5      n0 nU R
                  R                  5        GHB  u  pU[        ;   df  UR                  S5      (       dP  UR                  [        R                  R                  :X  d(  UR                  [        R                  R                  :X  a  Mx  Ub  X;  a  M  Ub  X;  a  M  [        R                  X5      n[!        U5      u  pSnUR"                  [        R$                  :w  a&  UR"                  b  S['        UR"                  5       S3n[)        XXnU5      u  nnnnnnU(       ar  US:X  a%  US:X  a  SU S3nOSUR+                  S5      S	    S3nSU;   a  UOSU S3nU(       a  U S
U U SU 3nOU S
U U U U SU 3nU	[-        UUS-   5      -  n	GM  U(       a  UOSUUU(       a  UOSUS.X'    Ub4  [        R.                  " U5      =(       d    UR0                  R2                  nOH[        R.                  " [        R4                  " U5      5      =(       d    UR0                  R2                  n UR;                  SU SUR<                   SU S35        GME     X4$ ! [6        [8        4 a    UR0                  R2                  n NUf = f)a  
Process all regular parameters (not kwargs parameters) from the function signature.

Args:
    sig (`inspect.Signature`): Function signature
    func (`function`): Function the parameters belong to
    class_name (`str`): Name of the class
    documented_params (`dict`): Dictionary of parameters that are already documented
    indent_level (`int`): Indentation level
    undocumented_parameters (`list`): List to append undocumented parameters to
r   rl  , defaults to ``r  r  z`]r  rC   ():   z<fill_type>z
    <fill_description>)rh   rF  rE   rD   r1  	[ERROR] `` is part of zZ's signature, but not documented. Make sure to add it to the docstring of the function in )_is_processor_classr  r  rA   rg   
parametersr  ARGS_TO_IGNORErt  kindrn  r  VAR_POSITIONALVAR_KEYWORDARGS_TO_RENAMEr  r  r1  _emptyri   r  r  r6  ro  __code__co_filenameunwrap	TypeErrorOSErrorr  rI   )sigr  r  r  r+  undocumented_parametersr  r  allowed_paramsr*  is_processormissing_argsrW  r
  rX  rF  param_defaultr  r  r  rD   r  param_docstring_source_files                           re   _process_regular_parametersr0    s   , I&t:L 7DVXe8fg7DV8WXL ^^113
 .($$S))zzW..===zzW..:::%**J %**J#''
?
  7u=
 ==GNN*u}}/H-c%--.@-ACMat+;b
^
O\?KQ^ X%##%j\!4J#%j&6&6s&;B&?%@!CJ (+j'8*Q>OJ%/L:,>OrR]Q^"_ "l"ZL>OP]^`al`mn   q  I '1
m$%.9{?Y((L$9+#*#8#8#F#c$--JcJcL#*#8#89M#N#kRVR_R_RkRkL $**J<}T5F5F4G  Hb  co  bp  pq  rI 4P "" w' 9#}}889s   6J.9AJ..&KKc                    SnUnSnU(       d=  X    H'  nUS:X  a  US-  nM  US:X  d  M  US-  nUS:X  d  M%  Sn  O   US-  nU(       d  M=  U$ )Nr   F(r   )TrF   )r5  line_endparenthesis_countsig_line_end	found_sigchars         re   find_sig_liner9    sq    LI'Ds{!Q&!!Q&!$) $I ( 	 i rd   c                   Ub2  SUR                   ;   =(       d    [        S UR                   5       5      $  [        R                  " U 5      nU(       d  gU R                  R                  S5      S   S:X  a  g[        R                  R                  U5      nUR                  S5      =(       a    UR                  S	5      $ ! [
         a     gf = f)
  
Check if a function belongs to a ProcessorMixin class.

Uses two methods:
1. Check parent_class inheritance (if provided)
2. Check if the source file is named processing_*.py (multimodal processors)
   vs image_processing_*.py, video_processing_*.py, etc. (single-modality processors)

Args:
    func: The function to check
    parent_class: Optional parent class (if available)

Returns:
    bool: True if this is a multimodal processor (inherits from ProcessorMixin), False otherwise
r*   c              3  @   #    U  H  nS UR                   ;   v   M     g7fr*   NrG   r#  bases     re   r&  ,_is_image_processor_class.<locals>.<genexpr>A  s      D
>Rd DMM1>R   Fr  r   DummyForTestImageProcessorFastTimage_processing_.pyrG   r_  __mro__rn  ro  r&  rI   r  rp  rq  basenamert  ru  r  r  source_filefilenames       re   _is_image_processor_classrL  /  s    " #|'<'<< 
 D
>J>R>RD
 A
 	
++D1  s#A&*JJww,H 23P8I8I%8PP     C   
CCc                   Ub2  SUR                   ;   =(       d    [        S UR                   5       5      $  [        R                  " U 5      nU(       d  gU R                  R                  S5      S   S:X  a  g[        R                  R                  U5      nUR                  S5      =(       a    UR                  S	5      $ ! [
         a     gf = f)
r;  r+   c              3  @   #    U  H  nS UR                   ;   v   M     g7f)r+   Nr>  r?  s     re   r&  &_is_processor_class.<locals>.<genexpr>l  s      @
:N$-:NrB  Fr  r   DummyProcessorForTestTprocessing_rE  rF  rI  s       re   r  r  Z  s    " <#8#88 
C @
:F:N:N@
 =
 	
++D1  s#A&*AAww,H }-J(2C2CE2JJ  rM  >   r,   r/   r-   r.   ProcessingKwargsc                    U [         ;   a  U $ SU ;   d  U R                  S5      (       a  gSU ;   a  gSU ;   d  U R                  S5      (       a  gSU ;   d  U R                  S5      (       a  gS	U ;   a  g	g
)zXMap kwargs class name to base using naming conventions. Returns base class name or None.ImageProcessorKwargsr-   ProcessorKwargsrS  VideoProcessorKwargsr.   AudioProcessorKwargsr/   r,   N)_BASIC_KWARGS_NAMESru  )cls_names    re    _get_base_kwargs_class_from_namer[    sz    &&)X->->~-N-NH$!)X->->~-N-N)X->->}-M-Mxrd   c                   U n [         R                  " U5      nSnU HR  n[        U[        5      (       d  M  U[        [
        4;  d  M,  [        USS5      S:X  a  [        USS5      S:X  a  MP  Un  O   Uc>  [        UR                  5      nUb&  [        c  SSK
JnJnJnJn	Jn
  UUU	U
US	.q	[        U   nUb  X1:X  a  U$ UnM  )
a  
Get the root/base TypedDict class by walking the inheritance chain.
For model-specific kwargs like ComplexProcessingKwargs(ProcessingKwargs), returns ProcessingKwargs.
For model-specific kwargs like DummyImageProcessorKwargs(ImagesKwargs), returns ImagesKwargs.

Compatibility: On Python < 3.12, non-generic TypedDict subclasses do not have __orig_bases__ set
(cpython#103699). We fall back to naming heuristics (e.g. *ImageProcessorKwargs -> ImagesKwargs).
NrG   r   	TypedDictrH   r  r   )r/   r-   rS  r,   r.   )r-   rS  r,   r.   r/   )typing_extensionsget_original_basesr  rh   r  objectr  r[  rG   _BASIC_KWARGS_CLASSEStransformers.processing_utilsr/   r-   rS  r,   r.   )r  currentbasesparentr@  	base_namer/   r-   rS  r,   r.   s              re   _get_base_kwargs_classrg    s     G
!44W=D$%%$tVn*D4R0K?GDR^`bDcgoDo  >89I9IJI$(0  )5,<&0(4'2-) /y9>V.NC rd   c                  ^ SnSnUR                   [        ;   nU(       d  Tb  [        U4S j[         5       5      nU(       d  Xg4$ [	        UT5      n	[        UT5      n
U	(       a  [        [        [        /5      nOFU
(       a  [        [        5      nO/[        SUR                    ST(       a  TR                   OS S35      eU R                  R                  5        VVs/ s H1  u  pUR                  [        R                  R                  :X  d  M/  UPM3     nnnU GH  nUR                   [        R                  R"                  :X  a  M.  [%        UR                   S5      (       a(  [%        UR                   R&                  S	   S
5      (       d  Ms  UR                   R&                  S	   R                   [(        ;  Ga  UR                   R&                  S	   R*                  nUb  [-        U5      S	   nUR                   R&                  S	   R.                  R                  5        GH>  u  nnU	(       Ga  UR1                  S5      (       Ga  Un[3        US
S5      nUcF  [%        US5      (       a5  [3        USS5      nU H"  nU[5        S5      Ld  M  Un[3        US
S5      n  O   U[(        ;   a  M  [%        US5      (       Ga8  [3        USS5      n0 nU(       a  [-        U5      S	   nU(       d  M  UR.                  R                  5        H  u  nnUU;  a  M  [7        UU5      u  nnSnTb$  [9        [3        TUS5      5      nUS:w  a  SU S3OSn[;        UU0 UU5      u  nnnn n!n"US:X  a)  [=        SU SU SUR>                  R@                   S35        SU;   a  UOSU S3nU (       a$  U[C        U SU [D         U  SU! 3US-   5      -  nM  U[C        U SU [D         U U U SU! 3	US-   5      -  nM     GM  GM  U(       a	  UU;  a  GM  [7        UU5      u  n#n$Sn%Tb$  [9        [3        TUS5      5      n%U%S:w  a  SU% S3OSn%[;        UX;U#U$5      u  n#n&n'n(n)n*U*(       a  U#S:X  aJ  [=        SU SUR                   R&                  S	   RF                   SUR>                  R@                   S35        SU#;   a  U#OSU# S3n#U((       a%  U[C        U SU# [D         U( SU) 3US-   5      -  nGM  U[C        U SU# [D         U' U& U% SU) 3	US-   5      -  nGM  URI                  SU SUR                   R&                  S	   RF                   SUR>                  R@                   S35        GMA     UR                   R&                  S	   n+[K        U+5      R                   n,URM                  S0 5      n-U-RM                  SS 5      n.[C        S!U, S"U. 3US-   5      nGM     Xg4$ s  snnf )#a  
Process **kwargs parameters if needed.

Args:
    sig (`inspect.Signature`): Function signature
    func (`function`): Function the parameters belong to
    parent_class (`class`): Parent class of the function
    documented_kwargs (`dict`): Dictionary of kwargs that are already documented
    indent_level (`int`): Indentation level
    undocumented_parameters (`list`): List to append undocumented parameters to

Returns:
    tuple[str, str]: (kwargs docstring, kwargs summary line to add after return_tensors)
r   Nc              3  d   >^#    U  H$  m[        U4S  jTR                   5       5      v   M&     g7f)c              3  B   >#    U  H  nTUR                   ;   v   M     g 7fr/  r>  )r#  r@  unroll_kwargs_classs     re   r&  7_process_kwargs_parameters.<locals>.<genexpr>.<genexpr>  s     VAU#t}}4AUs   N)r_  rG  )r#  rk  r  s    @re   r&  -_process_kwargs_parameters.<locals>.<genexpr>  s-      
'<# VAUAUVVV'<s   ,0z&Unrolling kwargs is not supported for z of r  z class__args__r   rG   _kwargs
__origin__rF   __annotations__r  r  r  u   🚨  for 	 in file  has no typer  r  r  [ERROR] r  r  zN, but not documented. Make sure to add it to the docstring of the function in r  rb   rD   zIAdditional keyword arguments. Model-specific parameters are listed above.z**kwargs ([`z`], *optional*):)'rG   UNROLL_KWARGS_METHODSr_  UNROLL_KWARGS_CLASSESr  rL  r  rA   rg   
ValueErrorr  r  r  rn  r  r   r  r	  r  rn  BASIC_KWARGS_TYPESr  rZ  rq  ru  r  rh   r  ri   r  r  r#  r$  r6  KWARGS_INDICATORrI   r  rg  r  )/r(  r  r  documented_kwargsr+  r)  r*  kwargs_summaryunroll_kwargsr+  is_image_processorr  rl  kwargs_paramkwargs_parameterskwarg_paramkwargs_documentationrW  param_type_annotationactual_typer  r  rc  nested_kwargs_docdocumented_nested_kwargsnested_param_namenested_param_typenested_param_type_strnested_optionalnested_param_defaultnested_optional_stringnested_shape_stringnested_additional_infonested_descriptionnested_is_documentedrX  rF  r-  r  r  r  rD   r  kwargs_annot_clskwargs_type_namekwargs_infokwargs_descriptions/     `                                            re   _process_kwargs_parametersr    s9    INMM%::M\5 
'<
 
 (( 't\:L24F 35G4WX	34FG4T]]O4amH]H]syGz  {A  B
 	
  #~~3355OA 1 1 = == 	5  
 )!!W%6%6%<%<<{--z::'""++A.
C
 C
 !!**1-66>PP#.#9#9#B#B1#E#M#M #/$34H$I!$L!5@5K5K5T5TUV5W5g5g5m5m5o1
1  J$7$7	$B$B
 #8K '(=z4 PI (W5JL-Y-Y&'<j"M#'C"$t*4.1,3CT,J	 %	 $( !$66  {,=>>,3KD,Q)350,7FGX7YZ[7\4  8$ EPD_D_DeDeDg@-/@08PP (E\ 13DFB1?
 460+77:7<Qbdf;g7h 4QeikQko6J5K1$Mqs !5 !4 1 8 " 5 /! 5 6 3 6 2 4  5: %&+,=+>eI;iX\XeXeXqXqWrr~$!" :=@U9U 5]^_t^uuv[w 2  6 )^'8&9<Q;RScRde{d||~  @R  S  %T$01$4." !"	
 !*^'8&9<Q;RScRdexdy  {Q  zR  Sg  Rh  hj  k}  j~  %$01$4." !"	c Ehn ! !$;L)L'>?TV`'a$
H !#+$'j"(M$NMJW[]J]om_A$FceM (
4EYcemn g
O\?KYf !!R'&zl%8N8N8W8WXY8Z8g8g7hhqrvrr  sL  sL  rM  MY  Z 03j/@*UVFWJ&!^)l"ZL9I8J?J[[]^i]jk(1,& 	
 "^)l"ZL9I8J<.YhXijwixxz  |G  {H  I(1,& 	
 ,22#J<}[=S=S=\=\]^=_=l=l<m  n|  }A  }J  }J  }V  }V  |W  WX  YG 6pP '11::1=12BCLL&**<<(__W
 (+,,<=O<PQ1
 )H $$Ss   .WWc                r   SnSnU R                   S:X  a  [        X5      nU R                   S:X  a  [        X5      nU(       d  U(       an  SU;  ah  U(       a  [        [        5      O[        [
        5      nUS   nUR                  SS5      nUS   n	SU;   a  UOSU S3nS	U S
U	 3n
U[        XS-   5      -  nU$ )a{  
Add return_tensors parameter documentation for processor __call__ methods if not already present.

Args:
    func (`function`): Function being processed
    parent_class (`class`): Parent class of the function
    docstring (`str`): Current docstring being built
    indent_level (`int`): Indentation level

Returns:
    str: Updated docstring with return_tensors if applicable
Fr)   r(   r\   rh   z`str` or [`~utils.TensorType`]rD   r  zreturn_tensors (z, *optional*):r  )rG   r  rL  r  rg   rA   r  r6  )r  r  r*  r+  is_processor_callis_image_processor_preprocessr  return_tensors_inforX  rD   r.  s              re    _add_return_tensors_to_docstringr    s     $)!}}
"/C}}$(A$(U% 	:@PXa@a ! %]3)*<= 	
 //?@(,,V5UV
)-8 $'*#4ZAj\:K
 -ZL{mT^OA5EFF	rd   c	                   SUS-   -  S-   n	/ n
0 n0 nU b  [        U 5      u  p[        UUUUUU
UUU5	      u  pX-  n	[        XX\Xj5      u  nnX-  n	[        X%X5      n	U	U-  n	[	        U
5      S:  a  [        SR                  U
5      5        U	$ )a  
Process the parameters section of the docstring.

Args:
    func_documentation (`str`): Existing function documentation (manually specified in the docstring)
    sig (`inspect.Signature`): Function signature
    func (`function`): Function the parameters belong to
    class_name (`str`): Name of the class the function belongs to
    model_name_lowercase (`str`): Lowercase model name
    parent_class (`class`): Parent class of the function (if any)
    indent_level (`int`): Indentation level
r  r  zArgs:
r   r  )rZ  r0  r  r  r  r  r(  )func_documentationr(  r  r  r  r  r+  r  r*  r*  r)  r  r{  r.  r,  kwargs_docstringr|  s                    re   _process_parameters_sectionr    s    0 |a'(94I  %0?@R0S- %@
%!O  I (B<L($n !I 1Y]I I "#a'dii/01rd   c           	        U R                   nUcB  [        U S5      (       a1  U R                  SS  H  nUR                   c  M  UR                   n  O   Uc!  U(       a  [        SU R                   S35      eg[        U5      u  pVU(       d   U(       a  [        SU R                   S35      e[        U 5      u  pvU(       a  SS	KJn  UR                  XqS
9n	OSU S3n	U(       a  U	S-  n	OU	S-  n	Sn
U(       Ga  UR                  5        GHm  u  pUR                  SS5      nUR                  SS5      R                  5       nUR                  SS5      nUR                  S5      (       a?  UR                  S5      (       d)  UR                  S5      nUS:w  a  XSUS-    -  nUUS-   S nUR                  S5      nU(       a  [        U5      u  pU(       a  SU SU SU SU 3nOSU SU SU 3nUR!                  S5      n/ n[#        U5       HA  u  nnUS:X  a  UR%                  U5        M  UR%                  SUR'                  5       -   5        MC     SR)                  U5      n[+        US5      nU
US-   -  n
GMp     X-   nU$ )a  
Prepare the return docstring from a ModelOutput class.

This is a robust replacement for the old _prepare_output_docstrings from doc.py,
using the same parsing and formatting methods as the rest of auto_docstring.

Args:
    output_type: The ModelOutput class to generate documentation for
    config_class (`str`): Config class for the model
    add_intro (`bool`): Whether to add the introduction text

Returns:
    str: Formatted return docstring
NrG  r   zNo docstring found for `z_` or its parent classes. Make sure the ModelOutput class or one of its parents has a docstring.r   z@No `Args` or `Parameters` section is found in the docstring of `zK`. Make sure it has a docstring and contains either `Args` or `Parameters`.)PT_RETURN_INTRODUCTION)full_output_typer&   zReturns:
    `r  z:
r  rh   rD   r  rC  z- **z** (`z) -- z`) -- r   z  r  )r  r  rG  rx  rG   rZ  r  docr  formatr  r  r!  rt  ru  findr  	enumerater  r"  r(  r6  )output_typer&   	add_introoutput_docstringr@  r  rl  r  r  r  params_textrW  
param_inforX  rY  r  closing_backtick_idx
param_liner5  formatted_linesir$  
param_textparam_text_indentedr  s                            re   _prepare_return_docstringr    s    #** GK$C$C''+D||'#'<<  ,
 *;+?+?*@ AY Y   ++;<N{OcOcNd eW W
 	
 2+>/&--?O-k!"2!315UNETME K&7&=&=&?"J#3J *}b A G G I(nn->CO
 $$S))*2E2Ec2J2J'6';';C'@$'2-2L4H14L"MMJ&56JQ6N6P&QO $))#.J  7
 C
 #J<uZL/ARRWXiWjk
#J<uZLGXFYZ
 $$T*E O$U+46#**40 $**4$++-+?@ , ?3J #1Q"?.55Ka '@d  FMrd   c                   SnU b  [         R                  U 5      =nbt  [        R                  U 5      nU(       a3  XR                  5       UR                  5        nXR                  5       S n OXR                  5       S nSn [	        XCS-   5      nX@4$ UR
                  bm  UR
                  [        R                  :w  aO  [        UR
                  [        5      u  px[        XUS9nUR                  S5      (       d  SU-   n[	        XCS-   5      nX@4$ )a/  
Process the returns section of the docstring.

Args:
    func_documentation (`str`): Existing function documentation (manually specified in the docstring)
    sig (`inspect.Signature`): Function signature
    config_class (`str`): Config class for the model
    indent_level (`int`): Indentation level
r   Nr  )r  r  )
_re_returnr:  _re_examplerH  r6  return_annotationrn  r"  r^  r   r  rt  )	r  r(  r&   r+  return_docstringmatch_start	match_endr  r  s	            re   _process_returns_sectionr    s     %*:K:KL^:_+_;*l&&'9:	12C2C2E	HYZ!3OO4E4G!H12C2C2E2GH!#)*:1<LM // 
			*s/D/D/V'4S5J5JK'X$	45F`ij**400#&66)*:1<LM//rd   c           
        SSK Jn  Sn	U bA  [        R                  U 5      =n
(       a%  X
R	                  5       S n	S[        XS-   5      -   n	U	$ [        X5      (       a  U	$ UGcZ  UGbV  [        c<  [        R                  " SSR                  [        R                  " 5       5       S	35      q[        R                  U5      nUR                  R                  nU=nc   [        X   5      nUbd  Ub?  SnUR+                  5       n[        U   R-                  UUS
S
SSSS9n[        UUS-   5      n	U	$ [/        SU SUR0                   SU S35         U	$ [2         Hd  n [5        UR6                  U5      nUUR'                  5       ;   d  M0  [2        U   n[<        U   R-                  UUS
S
SSS9n[        UUS-   5      n	  U	$    U	$ ! [          a    U["        ;   a~  UR                  R$                  n["        U   nXR'                  5       ;   aL  UR)                  5        VVs/ s H  u  nnUU:X  d  M  UPM     Os  snnf snnS   nUU;   a  [        UU   5      n GNlf = f! [8        [:        4 a     GM  f = f)a  
Process the example section of the docstring.

Args:
    func_documentation (`str`): Existing function documentation (manually specified in the docstring)
    func (`function`): Function being processed
    parent_class (`class`): Parent class of the function
    class_name (`str`): Name of the class
    model_name_lowercase (`str`): Lowercase model name
    config_class (`str`): Config class for the model
    checkpoint: Checkpoint to use in examples
    indent_level (`int`): Indentation level
r   r  r   Nr  r  r2  |r3  r        z<mask>)r'   r  expected_outputexpected_lossqa_target_start_indexqa_target_end_indexmaskz [ERROR] No checkpoint found for r  zB. Please add a `checkpoint` arg to `auto_docstring` or add one in z's docstring)r'   r  r  r  r  r  )r  r  r  r:  rH  r6  r  _re_model_taskrK  rL  r(  r   keysr   CONFIG_MAPPINGr  r  r  r   valuesr  r;  r  r  rG   r   r  r   r  AttributeErrorr   )r  r  r  r  r  r&   r  r+  r  example_docstringr=  
model_taskr  checkpoint_exampler   config_class_namekvmodel_name_for_auto_configexample_annotationtaskname_model_list_for_taskmodel_list_for_taskpipeline_names                           re   _process_example_sectionr    s   " 8 %K4F4FGY4Z+Z5+Z.{{}? >2CTUEU#VVL I 
T	0	0  		"6"B!ZZ1SXX6J6O6O6Q-R,SST(UVN#**:6
$77FF #-,5%EnFj%k"  !!-%'"!'')%9$%?%F%F *1$)"'*,(*! &G &" %33E|VWGW$X!2 / 6zl!DMM?  SU  Vb  Uc  co  p. % -?(*1+2K2KMe*f' !4!;!;!==$67O$PM)L])[)b)b$.#5(-&+.0,. *c *& )77I<Z[K[(\%% -?$ k  '+FF+6+I+I+^+^((CDX(Y%(,G,G,II*>*D*D*F6*F$!Q!O`J`A*F662 6G1Q ./I J2.L $^4 s7   F? I?AIH0)H0/"III-,I-c                `   [         R                  " U 5      nU(       d  [        U 5      O
[        U5      n[        X5      u  pnU R                  nUb(  Ub%  S[        UR                  S5      S5      -   S-   U-   nO!Ub  S[        UR                  S5      S5      -   nUb8  [        X(S-   5      nUR                  5       R                  S5      (       d  US-  nO
[        X
US9nU[        UUU U
U	UUUU5	      -  n[        XX5      u  pX-  n[        UU UU
U	UUU5      nX-  n[        X5      nXl        U $ )z1
Wrapper that automatically generates docstring.
r  r   r  )r  r+  )rn  	signaturer  r  r  r6  r!  ru  r  r  r  r  r  )r  r  custom_introcustom_argsr  r  r*  r(  r+  r  r  r&   r  r*  r  r  s                   re   auto_method_docstringr    s|    

D
!C1=#D)CST`CaL 6ET5X2l#5#A!N;3D3DT3JA$NNQUUXjj		 !N;3D3DT3JA$NN "<1AB	 ))$//I'R^_	 ,
 
I ,D,( !I 1	 "I &iFI LKrd   c                
   SSK Jn  SnSnSnSnSn	Sn
SS U R                   5       ;   a1  [        U R                  XUS9R
                  R                  SS	5      n	GOS
S U R                   5       ;   aM  Sn[        U R                  U UU[        [        [        [        /5      S9R
                  R                  SS	5      n	GOSS U R                   5       ;   aJ  SnU R
                  nUc	  U(       a  Un[        U R                  U UU[        [        5      S9R
                  n
GO4[        S U R                   5       5      (       a1  Sn[        U R                  U UU[        [        5      S9R
                  n	OSS U R                   5       ;   a  SnU R
                  nUc	  U(       a  Un[        5       nU R                   H\  nUR                  S:X  a    OKU[        US0 5      R!                  5        VVs1 s H  u  p[#        U5      [$        Ld  M  UiM     snn-  nM^     U(       a  UOSn[        U R                  U UU[        [&        /5      US9R
                  n	[)        U 5      n[+        U 5      nU(       a>  SR-                  UR/                  S5       Vs/ s H  oR1                  5       PM     sn5      OSnUb  UR1                  5        S3OSnUb.   [        [        U[2        S   S   5      [2        S   S   5      U   nU(       a@  U[        [        U[2        S   S   5      [2        S   S   5      ;  a  UR                  SS5      n[8        R:                  " SSR-                  [<        R>                  RA                  5       5       S3U R                  5      nU/ :X  aP  UcM  U(       dF  U(       d?  U(       d8  [C        S U R                   S![<        R>                  RA                  5        S"35      eU/ :w  d   Uc  U(       d  U(       d  U(       d  U(       Ga  U(       a  US   OSnS#U0nUS$:X  a  URE                  UUS%.5        Ub(  [G        UU5      nURI                  S&5      (       d  US&-  nOU(       a+  [K        U 5      nU(       a  [G        UU5      n[M        UU5      nORU(       a"  S'nU(       a  [G        UU5      n[M        UU5      nO)Ub  Uc  SnO [        [<        U5      RN                  " S40 UD6n[Q        U5      (       a  [S        U U5      OSnUS:w  a5  SS( U R                   5       ;   a  U[S        [<        RT                   U5      -  nU	(       a  U[S        S&U	 3U5      -  nGOwU(       d  U(       GaN  UU
(       a  U
OS)-  n[        [        5      nU R
                  (       a  U R
                  OSn[W        U5      S   nU RX                  R!                  5        H  u  nn[[        UU5      u  nnSn[]        [        U US5      5      nUS:w  a  S*U S 3OSn[_        UUUUU5      u  nn n!n"n#n$U$(       d  MZ  US:X  a3  [a        S+U S,U Rb                   S-U Rd                  Rf                   S.35        S U;   a  UOS U S 3nU"(       a  U[S        U S/U U" S0U# 3US1-   5      -  nM  U[S        U S/U U! U  U S0U# 3US1-   5      -  nM     O[a        S2U R                   S335        WU l        U $ s  snnf s  snf ! [4         a     GN[6         a     GNf = f)5ze
Wrapper that automatically generates a docstring for classes based on their attributes and methods.
r   r  Fr   r  c              3  8   #    U  H  oR                   v   M     g 7fr/  r>  r#  xs     re   r&  'auto_class_docstring.<locals>.<genexpr>[  s     =AZZ   )r  r  r  zArgs:zParameters:r+   c              3  8   #    U  H  oR                   v   M     g 7fr/  r>  r  s     re   r&  r  _  s     >+Qjj+r  T)r  r  r  r  r   c              3  8   #    U  H  oR                   v   M     g 7fr/  r>  r  s     re   r&  r  h  s     ;{!::{r  Nc              3  @   #    U  H  nS UR                   ;   v   M     g7fr=  r>  r  s     re   r&  r  u  s     EA!QZZ/rB  PreTrainedConfigc              3  8   #    U  H  oR                   v   M     g 7fr/  r>  r  s     re   r&  r  ~  s     @Kq

Kr  rq  )r  r  r  r  r*  r  rl  r  r'   r   r&   rm  r2  r  z)$r  zE` is not registered in the auto doc. Here are the available classes: zv.
Add a `custom_intro` to the decorator if you want to use `auto_docstring` on a class not registered in the auto doc.r  r  )model_base_classmodel_checkpointr  z5Constructs a {image_processor_class} image processor.c              3  8   #    U  H  oR                   v   M     g 7fr/  r>  r  s     re   r&  r    s     >_S^azzS^r  z
Args:
r  ru  rr  rs  rt  r  r  r  z/You used `@auto_class_docstring` decorator on `zF` but this class is not part of the AutoMappings. Remove the decoratorrF   )4r  r  rG  r  __init__r  rw  r  r  rA   rg   r  r_  r  rG   r  r  r   r   r   r  r~  r(  r  titler  r  r  rK  r  r  r  r  rx  r  r,  ru  r  r  r  r  r6  r  rZ  rq  r  ri   r  r  rI   r#  r$  )%r  r  r  r  r  is_dataclassr+  	is_configr~  docstring_initdocstring_args	doc_classown_config_paramsancestorr  r  r*  r+  r  model_name_titler  nameformatting_kwargs	pre_blockr*  r  r{  rW  r  rX  rF  r-  r  r  r  rD   r  s%                                        re   auto_class_docstringr  N  s%   
 8LLINN===.LLsPZ

''''=1 	 
>#++>	>.LL#!5yBTVc6de
 ''''=1 	 
;s{{;	;KK	9#K.LL#!5oF
 ' 	 
EE	E	E!.LL#!56HI
 ' 	 
@CKK@	@	KK	9#K  EH  $66%h0A2FLLN"NdaR\]^R_goRoN"  $ /@*T.LL#!5zlC)
 ' 	 $C(L)#.Ymsxx4H4N4Ns4S T4Sq4S TUsw=M=Y*002359_c'		&%?%Nq%QR*=9!<  # $  4G7GJK">215= !  4;;CE::388N$;$;$@$@$BCDBGVDrzl*<]o~bcqczczcc  dB  cC CC C
 	
 rz\-ll^ptAwD)+;<8$$:J`j%kl#'lCI%%d++T!	05I+I|D	1)=QR	PI+I|D	1)=QR	%I5<<Q?PQIDG	NNNi[<@XZ	$$):>_SVS^S^>_)_>+I+I*J\ZZI"^,<(=|LLIY>{JI7H'*{{I /	 :1 =585H5H5N5N5P1
1'>?TV`'a$
H !# #GCR$@ AFSWYFY/- B_a (
4EGWYcemn g
O\?KYf !=!R'&zl%8H8H7ISVS_S_SkSkRllxy 03j/@*UVFWJ&!^)l"ZL8IK=Y(1,& 	
 "^)l"ZLFWXeWffhithuv(1,& 	5 6QB 	=cll^  LR  S	
 CKJm" !U  	 		s*   -[
	[
[#-[( (
\5	\\)r  r  r  c               :   ^^^ UUU4S jnU (       a  U" U 5      $ U$ )a  
Automatically generates comprehensive docstrings for model classes and methods in the Transformers library.

This decorator reduces boilerplate by automatically including standard argument descriptions while allowing
overrides to add new or custom arguments. It inspects function signatures, retrieves predefined docstrings
for common arguments (like `input_ids`, `attention_mask`, etc.), and generates complete documentation
including examples and return value descriptions.

For complete documentation and examples, read this [guide](https://huggingface.co/docs/transformers/auto_docstring).

Examples of usage:

    Basic usage (no parameters):
    ```python
    @auto_docstring
    class MyAwesomeModel(PreTrainedModel):
        def __init__(self, config, custom_parameter: int = 10):
            r'''
            custom_parameter (`int`, *optional*, defaults to 10):
                Description of the custom parameter for MyAwesomeModel.
            '''
            super().__init__(config)
            self.custom_parameter = custom_parameter
    ```

    Using `custom_intro` with a class:
    ```python
    @auto_docstring(
        custom_intro="This model implements a novel attention mechanism for improved performance."
    )
    class MySpecialModel(PreTrainedModel):
        def __init__(self, config, attention_type: str = "standard"):
            r'''
            attention_type (`str`, *optional*, defaults to "standard"):
                Type of attention mechanism to use.
            '''
            super().__init__(config)
    ```

    Using `custom_intro` with a method, and specify custom arguments and example directly in the docstring:
    ```python
    @auto_docstring(
        custom_intro="Performs forward pass with enhanced attention computation."
    )
    def forward(
        self,
        input_ids: Optional[torch.Tensor] = None,
        attention_mask: Optional[torch.Tensor] = None,
    ):
        r'''
        custom_parameter (`int`, *optional*, defaults to 10):
            Description of the custom parameter for MyAwesomeModel.

        Example:

        ```python
        >>> model = MyAwesomeModel(config)
        >>> model.forward(input_ids=torch.tensor([1, 2, 3]), attention_mask=torch.tensor([1, 1, 1]))
        ```
        '''
    ```

    Using `custom_args` to define reusable arguments:
    ```python
    VISION_ARGS = r'''
    pixel_values (`torch.FloatTensor`, *optional*):
        Pixel values of the input images.
    image_features (`torch.FloatTensor`, *optional*):
        Pre-computed image features for efficient processing.
    '''

    @auto_docstring(custom_args=VISION_ARGS)
    def encode_images(self, pixel_values=None, image_features=None):
        # ... method implementation
    ```

    Combining `custom_intro` and `custom_args`:
    ```python
    MULTIMODAL_ARGS = r'''
    vision_features (`torch.FloatTensor`, *optional*):
        Pre-extracted vision features from the vision encoder.
    fusion_strategy (`str`, *optional*, defaults to "concat"):
        Strategy for fusing text and vision modalities.
    '''

    @auto_docstring(
        custom_intro="Processes multimodal inputs combining text and vision.",
        custom_args=MULTIMODAL_ARGS
    )
    def forward(
        self,
        input_ids,
        attention_mask=None,
        vision_features=None,
        fusion_strategy="concat"
    ):
        # ... multimodal processing
    ```

    Using with ModelOutput classes:
    ```python
    @dataclass
    @auto_docstring(
        custom_intro="Custom model outputs with additional fields."
    )
    class MyModelOutput(ImageClassifierOutput):
        r'''
        loss (`torch.FloatTensor`, *optional*):
            The loss of the model.
        custom_field (`torch.FloatTensor` of shape `(batch_size, hidden_size)`, *optional*):
            A custom output field specific to this model.
        '''

        # Standard fields like hidden_states, logits, attentions etc. can be automatically documented
        # However, given that the loss docstring is often different per model, you should document it above
        loss: Optional[torch.FloatTensor] = None
        logits: Optional[torch.FloatTensor] = None
        hidden_states: Optional[tuple[torch.FloatTensor, ...]] = None
        attentions: Optional[tuple[torch.FloatTensor, ...]] = None
        custom_field: Optional[torch.FloatTensor] = None
    ```

Args:
    custom_intro (`str`, *optional*):
        Custom introduction text to add to the docstring. This replaces the default
        introduction text generated by the decorator before the Args section. Use this to describe what
        makes your model or method special.
    custom_args (`str`, *optional*):
        Custom argument documentation in docstring format. This allows you to define
        argument descriptions once and reuse them across multiple methods. The format should follow the
        standard docstring convention: `arg_name (`type`, *optional*, defaults to `value`): Description.`
    checkpoint (`str`, *optional*):
        Checkpoint name to use in examples within the docstring. This is typically
        automatically inferred from the model configuration class, but can be overridden if needed for
        custom examples.

Note:
    - Standard arguments (`input_ids`, `attention_mask`, `pixel_values`, etc.) are automatically documented
      from predefined descriptions and should not be redefined unless their behavior differs in your model.
    - New or custom arguments should be documented in the method's docstring using the `r''' '''` block
      or passed via the `custom_args` parameter.
    - For model classes, the decorator derives parameter descriptions from the `__init__` method's signature
      and docstring.
    - Return value documentation is automatically generated for methods that return ModelOutput subclasses.
c                   > [        U R                  R                  S5      5      S:  a  [        U TTTS9$ [	        U TTTS9$ )Nr  r   )r  r  r  )r  rI   r  r  r  )rx  r  r  r  s    re   auto_docstring_decorator0auto_docstring.<locals>.auto_docstring_decorator  sM    s%%c*+a/(<T^  (S_lvwwrd   rF   )rx  r  r  r  r  s    ``` re   auto_docstringr    s     fx ',,##rd   )r*  ri   r+  rk   returnri   )r   F)r  ztuple[bool, object | None])r  ri   )r  zset[str]r  ri   r  zMapping[str, str | None])r*  ri   r  ri   r  ri   )r  zobject | list[object]r  r  )r  r  r  r  )r   r/  )rW  
str | Noner  ztuple[str, bool])rZ  ri   r  r  )T)NNNNNN)NNN)a
__future__r   rn  rp  collections.abcr   	functoolsr   pathlibr   r  r   r  r   r	   r
   r   regexrK  r^  r  r   r   r   genericr   resolvePATH_TO_TRANSFORMERSrs  r  rv  rw  ry  rz  r  rL  r  rG  r  r  rM  rI  r9  r@  rN  rJ  r  r  r  r  rA   rg   r   r  r  r  r  r  r!  r  r,  r6  r>  rA  rZ  r^  r~  r  r  r  r  r  r  r  r  r  r  r  r  r0  r9  rL  r  	frozensetrY  ra  r[  rg  r  r  r  r  r  r  r  r  r  rF   rd   re   <module>r     s$   #  	 #    8 8   
 ! E{**,~=  XGW]EB;      S      "#1(  IJ 

#IJ ZZ23
jj89::7C JJ89	jj23JJXII	 **89zz./::j) w wtN Nbw! w!t'J JZ
H HV\ \~   H D#1=Q7
h[M'`!$,>6r>*! 2 2&,:^nbc L$..bt k#\"(QV(KZ   qr  "+\T%n*Z?Dnb0D^F GTtn^$TtPT ^$rd   