
    Z j                        S r SSKrSSKJr  SSKJrJrJr  SSKJr	  SSK
JrJrJr  SSKJrJrJrJrJrJrJrJr  SSKJr  SS	KJr  SS
KJrJrJr  SSKJ r J!r!  SSK"J#r#  SSK$J%r%J&r&J'r'J(r(J)r)J*r*J+r+J,r,J-r-J.r.J/r/J0r0J1r1J2r2J3r3J4r4J5r5  SSK6J7r7  \Rp                  " \95      r: " S S\&5      r; " S S\55      r< " S S\%5      r= " S S\05      r> " S S\45      r? " S S\25      r@ " S S\'5      rA\ " S S \5      5       rB " S! S"\35      rC " S# S$\,5      rD " S% S&\+5      rE " S' S(\15      rF " S) S*\(5      rG " S+ S,\*5      rH " S- S.\.5      rI " S/ S0\)5      rJ " S1 S2\/5      rK " S3 S4\-5      rL/ S5QrMg)6zPyTorch ERNIE model.    N)BCEWithLogitsLossCrossEntropyLossMSELoss   )initialization)CacheDynamicCacheEncoderDecoderCache),BaseModelOutputWithPoolingAndCrossAttentions!CausalLMOutputWithCrossAttentionsMaskedLMOutputMultipleChoiceModelOutputNextSentencePredictorOutputQuestionAnsweringModelOutputSequenceClassifierOutputTokenClassifierOutput)PreTrainedModel)Unpack)TransformersKwargsauto_docstringlogging)can_return_tuplemerge_with_config_defaults)capture_outputs   )BertCrossAttentionBertEmbeddingsBertEncoderBertForMaskedLMBertForMultipleChoiceBertForNextSentencePredictionBertForPreTrainingBertForPreTrainingOutputBertForQuestionAnsweringBertForSequenceClassificationBertForTokenClassification	BertLayerBertLMHeadModelBertLMPredictionHead	BertModel
BertPoolerBertSelfAttention   )ErnieConfigc                      ^  \ rS rSrSrU 4S jr      SS\R                  S-  S\R                  S-  S\R                  S-  S\R                  S-  S	\R                  S-  S
\	S\R                  4S jjrSrU =r$ )ErnieEmbeddings>   zGConstruct the embeddings from word, position and token_type embeddings.c                    > [         TU ]  U5        UR                  U l        UR                  (       a1  [        R                  " UR
                  UR                  5      U l        g g )N)super__init__use_task_idnn	Embeddingtask_type_vocab_sizehidden_sizetask_type_embeddings)selfconfig	__class__s     x/root/GenerationalWealth/GenerationalWealth/venv/lib/python3.13/site-packages/transformers/models/ernie/modular_ernie.pyr4   ErnieEmbeddings.__init__A   sL     !--(*V5P5PRXRdRd(eD%     N	input_idstoken_type_idstask_type_idsposition_idsinputs_embedspast_key_values_lengthreturnc                    Ub  UR                  5       nOUR                  5       S S nUu  pUc  U R                  S S 2XiU-   24   nUc  [        U S5      (       aQ  U R                  R	                  UR
                  S   S5      n
[        R                  " U
SUS9n
U
R	                  X5      nO8[        R                  " U[        R                  U R                  R                  S9nUc  U R                  U5      nU R                  U5      nUR                  UR                  5      nX[-   nU R                  U5      nX-   nU R                  (       aP  Uc8  [        R                  " U[        R                  U R                  R                  S9nU R!                  U5      nX-  nU R#                  U5      nU R%                  U5      nU$ )NrB   r   r-   )dimindex)dtypedevice)sizerD   hasattrrB   expandshapetorchgatherzeroslongrM   word_embeddingstoken_type_embeddingstoposition_embeddingsr5   r:   	LayerNormdropout)r;   rA   rB   rC   rD   rE   rF   input_shape
batch_size
seq_lengthbuffered_token_type_idsrW   
embeddingsrY   r:   s                  r>   forwardErnieEmbeddings.forwardH   s     #..*K',,.s3K!,
,,Q0FVlIl0l-lmL
 !t-..*.*=*=*D*D\EWEWXYEZ\^*_'*/,,7NTU]i*j'!8!?!?
!W!&[

SWSdSdSkSk!l  00;M $ : :> J &(()>)E)EF":
"66|D5
 $ %KuzzRVRcRcRjRj k#'#<#<]#K .J^^J/
\\*-
r@   )r:   r5   )NNNNNr   )__name__
__module____qualname____firstlineno____doc__r4   rR   
LongTensorFloatTensorintTensorra   __static_attributes____classcell__r=   s   @r>   r0   r0   >   s    Qf .226150426&'3##d*3 ((4/3 ''$.	3
 &&-3 ((4/3 !$3 
3 3r@   r0   c                       \ rS rSrSrg)ErnieSelfAttention~    Nrc   rd   re   rf   rl   rr   r@   r>   rp   rp   ~       r@   rp   c                       \ rS rSrSrg)ErnieCrossAttention   rr   Nrs   rr   r@   r>   rv   rv      rt   r@   rv   c                       \ rS rSrSrg)
ErnieLayer   rr   Nrs   rr   r@   r>   ry   ry      rt   r@   ry   c                       \ rS rSrSrg)ErniePooler   rr   Nrs   rr   r@   r>   r|   r|      rt   r@   r|   c                       \ rS rSrSrg)ErnieLMPredictionHead   rr   Nrs   rr   r@   r>   r   r      rt   r@   r   c                       \ rS rSrSrg)ErnieEncoder   rr   Nrs   rr   r@   r>   r   r      rt   r@   r   c                   x   ^  \ rS rSr\rSrSrSrSr	Sr
Sr\\\S.r\R"                  " 5       U 4S j5       rSrU =r$ )ErniePreTrainedModel   ernieT)hidden_states
attentionscross_attentionsc                   > [         TU ]  U5        [        U[        5      (       a!  [        R
                  " UR                  5        g[        U[        5      (       a|  [        R                  " UR                  [        R                  " UR                  R                  S   5      R                  S5      5        [        R
                  " UR                  5        gg)zInitialize the weightsrI   )r-   rI   N)r3   _init_weights
isinstancer   initzeros_biasr0   copy_rD   rR   arangerQ   rP   rB   )r;   moduler=   s     r>   r   "ErniePreTrainedModel._init_weights   s     	f%f344KK$00JJv**ELL9L9L9R9RSU9V,W,^,^_f,ghKK--. 1r@   rr   )rc   rd   re   rf   r.   config_classbase_model_prefixsupports_gradient_checkpointing_supports_flash_attn_supports_sdpa_supports_flex_attn_supports_attention_backendry   rp   rv   _can_record_outputsrR   no_gradr   rl   rm   rn   s   @r>   r   r      sV    L&*#N"&#(/ ]]_/ /r@   r   c                     ^  \ rS rSrS/rSU 4S jjr\\\          SS\	R                  S-  S\	R                  S-  S\	R                  S-  S\	R                  S-  S	\	R                  S-  S
\	R                  S-  S\	R                  S-  S\	R                  S-  S\S-  S\S-  S\\   S\\	R                     \-  4S jj5       5       5       rSrU =r$ )
ErnieModel   ry   c                    > [         TU ]  X5        Xl        SU l        [	        U5      U l        [        U5      U l        U(       a  [        U5      OS U l	        U R                  5         g )NF)r3   r4   r<   gradient_checkpointingr0   r`   r   encoderr|   pooler	post_init)r;   r<   add_pooling_layerr=   s      r>   r4   ErnieModel.__init__   sQ    &&+#)&1#F+->k&)D 	r@   NrA   attention_maskrB   rC   rD   rE   encoder_hidden_statesencoder_attention_maskpast_key_values	use_cachekwargsrG   c           
         USL USL-  (       a  [        S5      eU R                  R                  (       a  U
b  U
OU R                  R                  n
OSn
U
(       ab  U	c_  Uc  U R                  R                  (       a.  [        [        U R                  S9[        U R                  S95      O[        U R                  S9n	U	b  U	R                  5       OSnU R                  UUUUUUS9nU R                  UUUUU	S9u  p(U R                  " U4UUUU	U
US.UD6nUS   nU R                  b  U R                  U5      OSn[        UUUR                  S	9$ )
  
task_type_ids (`torch.LongTensor` of shape `(batch_size, sequence_length)`, *optional*):
    Task type embedding is a special embedding to represent the characteristic of different tasks, such as
    word-aware pre-training task, structure-aware pre-training task and semantic-aware pre-training task. We
    assign a `task_type_id` to each task and the `task_type_id` is in the range `[0,
    config.task_type_vocab_size-1]
Nz:You must specify exactly one of input_ids or inputs_embedsF)r<   r   )rA   rD   rB   rC   rE   rF   )r   r   embedding_outputr   r   )r   r   r   r   r   rD   )last_hidden_statepooler_outputr   )
ValueErrorr<   
is_decoderr   is_encoder_decoderr
   r	   get_seq_lengthr`   _create_attention_masksr   r   r   r   )r;   rA   r   rB   rC   rD   rE   r   r   r   r   r   rF   r   encoder_outputssequence_outputpooled_outputs                    r>   ra   ErnieModel.forward   sy   0 -t";<YZZ;;!!%.%:	@U@UII0 )48V8V $L$DlZ^ZeZeFfg!5  FUE`!?!?!Afg??%)''#9 + 
 261M1M)#9-"7+ 2N 2
. ,,	
)"7#9+%	
 	
 *!,8<8OO4UY;-'+;;
 	
r@   )r<   r`   r   r   r   )T)
NNNNNNNNNN)rc   rd   re   rf   _no_split_modulesr4   r   r   r   rR   rk   r   boolr   r   tupler   ra   rl   rm   rn   s   @r>   r   r      s=   %   *..2.2-1,0-1596:(,!%H
<<$&H
 t+H
 t+	H

 ||d*H
 llT)H
 ||d*H
  %||d2H
 !&t 3H
 H
 $;H
 +,H
 
u||	K	KH
    H
r@   r   c                       \ rS rSrSrg)ErnieForPreTrainingOutputi  rr   Nrs   rr   r@   r>   r   r     rt   r@   r   c                   n   \ rS rSrSSS.r\\        SS\R                  S-  S\R                  S-  S\R                  S-  S	\R                  S-  S
\R                  S-  S\R                  S-  S\R                  S-  S\R                  S-  S\	\
   S\\R                     \-  4S jj5       5       rSrg)ErnieForPreTrainingi  cls.predictions.bias'ernie.embeddings.word_embeddings.weightzcls.predictions.decoder.biaszcls.predictions.decoder.weightNrA   r   rB   rC   rD   rE   labelsnext_sentence_labelr   rG   c	           
         U R                   " U4UUUUUSS.U	D6n
U
SS u  pU R                  X5      u  pSnUbv  Ubs  [        5       nU" UR                  SU R                  R
                  5      UR                  S5      5      nU" UR                  SS5      UR                  S5      5      nUU-   n[        UUUU
R                  U
R                  S9$ )aj  
task_type_ids (`torch.LongTensor` of shape `(batch_size, sequence_length)`, *optional*):
    Task type embedding is a special embedding to represent the characteristic of different tasks, such as
    word-aware pre-training task, structure-aware pre-training task and semantic-aware pre-training task. We
    assign a `task_type_id` to each task and the `task_type_id` is in the range `[0,
    config.task_type_vocab_size-1]
labels (`torch.LongTensor` of shape `(batch_size, sequence_length)`, *optional*):
    Labels for computing the masked language modeling loss. Indices should be in `[-100, 0, ...,
    config.vocab_size]` (see `input_ids` docstring) Tokens with indices set to `-100` are ignored (masked),
    the loss is only computed for the tokens with labels in `[0, ..., config.vocab_size]`
next_sentence_label (`torch.LongTensor` of shape `(batch_size,)`, *optional*):
    Labels for computing the next sequence prediction (classification) loss. Input should be a sequence
    pair (see `input_ids` docstring) Indices should be in `[0, 1]`:

    - 0 indicates sequence B is a continuation of sequence A,
    - 1 indicates sequence B is a random sequence.

Example:

```python
>>> from transformers import AutoTokenizer, ErnieForPreTraining
>>> import torch

>>> tokenizer = AutoTokenizer.from_pretrained("nghuyong/ernie-1.0-base-zh")
>>> model = ErnieForPreTraining.from_pretrained("nghuyong/ernie-1.0-base-zh")

>>> inputs = tokenizer("Hello, my dog is cute", return_tensors="pt")
>>> outputs = model(**inputs)

>>> prediction_logits = outputs.prediction_logits
>>> seq_relationship_logits = outputs.seq_relationship_logits
```
Tr   rB   rC   rD   rE   return_dictNr   rI   )lossprediction_logitsseq_relationship_logitsr   r   )	r   clsr   viewr<   
vocab_sizer   r   r   )r;   rA   r   rB   rC   rD   rE   r   r   r   outputsr   r   prediction_scoresseq_relationship_score
total_lossloss_fctmasked_lm_lossnext_sentence_losss                      r>   ra   ErnieForPreTraining.forward  s    ^ **	
))'%'	
 	
 *1!&48HH_4\1
"5"A')H%&7&<&<RAWAW&XZ`ZeZefhZijN!)*@*E*Eb!*LNaNfNfgiNj!k'*<<J(/$:!//))
 	
r@   rr   NNNNNNNN)rc   rd   re   rf   _tied_weights_keysr   r   rR   rk   r   r   r   r   ra   rl   rr   r@   r>   r   r     s   (>*S
  *..2.2-1,0-1&*37H
<<$&H
 t+H
 t+	H

 ||d*H
 llT)H
 ||d*H
 t#H
 #\\D0H
 +,H
 
u||	8	8H
  H
r@   r   c                       \ rS rSr\\            SS\R                  S-  S\R                  S-  S\R                  S-  S\R                  S-  S\R                  S-  S\R                  S-  S	\R                  S-  S
\R                  S-  S\R                  S-  S\\R                     S-  S\	S-  S\
\R                  -  S\\   S\\R                     \-  4S jj5       5       rSrg)ErnieForCausalLMie  NrA   r   rB   rC   rD   rE   r   r   r   r   r   logits_to_keepr   rG   c                    U	b  SnU R                   " U4UUUUUUUU
USS.
UD6nUR                  n[        U[        5      (       a  [	        U* S5      OUnU R                  USS2USS24   5      nSnU	b)  U R                  " SUXR                  R                  S.UD6n[        UUUR                  UR                  UR                  UR                  S9$ )a@  
task_type_ids (`torch.LongTensor` of shape `(batch_size, sequence_length)`, *optional*):
    Task type embedding is a special embedding to represent the characteristic of different tasks, such as
    word-aware pre-training task, structure-aware pre-training task and semantic-aware pre-training task. We
    assign a `task_type_id` to each task and the `task_type_id` is in the range `[0,
    config.task_type_vocab_size-1]
labels (`torch.LongTensor` of shape `(batch_size, sequence_length)`, *optional*):
    Labels for computing the left-to-right language modeling loss (next word prediction). Indices should be in
    `[-100, 0, ..., config.vocab_size]` (see `input_ids` docstring) Tokens with indices set to `-100` are
    ignored (masked), the loss is only computed for the tokens with labels n `[0, ..., config.vocab_size]`
NFT)
r   rB   rC   rD   rE   r   r   r   r   r   )logitsr   r   )r   r   r   r   r   r   rr   )r   r   r   rj   slicer   loss_functionr<   r   r   r   r   r   r   )r;   rA   r   rB   rC   rD   rE   r   r   r   r   r   r   r   r   r   slice_indicesr   r   s                      r>   ra   ErnieForCausalLM.forwardf  s    : I@D

A
))'%'"7#9+A
 A
  118B>SV8W8W~ot4]k-=!(;<=%%pVF{{OeOepiopD0#33!//))$55
 	
r@   rr   )NNNNNNNNNNNr   )rc   rd   re   rf   r   r   rR   rk   listr   rj   r   r   r   r   ra   rl   rr   r@   r>   r   r   e  sM    *..2.2-1,0-1596:&*59!%-.=
<<$&=
 t+=
 t+	=

 ||d*=
 llT)=
 ||d*=
  %||d2=
 !&t 3=
 t#=
 ell+d2=
 $;=
 ell*=
 +,=
 
u||	@	@=
  =
r@   r   c                      \ rS rSrSSS.r\\         SS\R                  S-  S\R                  S-  S\R                  S-  S	\R                  S-  S
\R                  S-  S\R                  S-  S\R                  S-  S\R                  S-  S\R                  S-  S\	\
   S\\R                     \-  4S jj5       5       rSrg)ErnieForMaskedLMi  r   r   r   NrA   r   rB   rC   rD   rE   r   r   r   r   rG   c
                 <   U R                   " U4UUUUUUUSS.U
D6nUS   nU R                  U5      nSnU	bF  [        5       nU" UR                  SU R                  R
                  5      U	R                  S5      5      n[        UUUR                  UR                  S9$ )a#  
task_type_ids (`torch.LongTensor` of shape `(batch_size, sequence_length)`, *optional*):
    Task type embedding is a special embedding to represent the characteristic of different tasks, such as
    word-aware pre-training task, structure-aware pre-training task and semantic-aware pre-training task. We
    assign a `task_type_id` to each task and the `task_type_id` is in the range `[0,
    config.task_type_vocab_size-1]
labels (`torch.LongTensor` of shape `(batch_size, sequence_length)`, *optional*):
    Labels for computing the masked language modeling loss. Indices should be in `[-100, 0, ...,
    config.vocab_size]` (see `input_ids` docstring) Tokens with indices set to `-100` are ignored (masked), the
    loss is only computed for the tokens with labels in `[0, ..., config.vocab_size]`
T)r   rB   rC   rD   rE   r   r   r   r   NrI   r   r   r   r   )	r   r   r   r   r<   r   r   r   r   )r;   rA   r   rB   rC   rD   rE   r   r   r   r   r   r   r   r   r   s                   r>   ra   ErnieForMaskedLM.forward  s    4 **
))'%'"7#9
 
 "!* HH_5')H%&7&<&<RAWAW&XZ`ZeZefhZijN$!//))	
 	
r@   rr   )	NNNNNNNNN)rc   rd   re   rf   r   r   r   rR   rk   r   r   r   r   ra   rl   rr   r@   r>   r   r     s   (>*S
  *..2.2-1,0-1596:&*2
<<$&2
 t+2
 t+	2

 ||d*2
 llT)2
 ||d*2
  %||d22
 !&t 32
 t#2
 +,2
 
u||	~	-2
  2
r@   r   c                   D   \ rS rSr\\       SS\R                  S-  S\R                  S-  S\R                  S-  S\R                  S-  S\R                  S-  S\R                  S-  S	\R                  S-  S
\\	   S\
\R                     \-  4S jj5       5       rSrg)ErnieForNextSentencePredictioni  NrA   r   rB   rC   rD   rE   r   r   rG   c           
         U R                   " U4UUUUUSS.UD6n	U	S   n
U R                  U
5      nSnUb2  [        5       nU" UR                  SS5      UR                  S5      5      n[	        UUU	R
                  U	R                  S9$ )a  
task_type_ids (`torch.LongTensor` of shape `(batch_size, sequence_length)`, *optional*):
    Task type embedding is a special embedding to represent the characteristic of different tasks, such as
    word-aware pre-training task, structure-aware pre-training task and semantic-aware pre-training task. We
    assign a `task_type_id` to each task and the `task_type_id` is in the range `[0,
    config.task_type_vocab_size-1]
labels (`torch.LongTensor` of shape `(batch_size,)`, *optional*):
    Labels for computing the next sequence prediction (classification) loss. Input should be a sequence pair
    (see `input_ids` docstring). Indices should be in `[0, 1]`:

    - 0 indicates sequence B is a continuation of sequence A,
    - 1 indicates sequence B is a random sequence.

Example:

```python
>>> from transformers import AutoTokenizer, ErnieForNextSentencePrediction
>>> import torch

>>> tokenizer = AutoTokenizer.from_pretrained("nghuyong/ernie-1.0-base-zh")
>>> model = ErnieForNextSentencePrediction.from_pretrained("nghuyong/ernie-1.0-base-zh")

>>> prompt = "In Italy, pizza served in formal settings, such as at a restaurant, is presented unsliced."
>>> next_sentence = "The sky is blue due to the shorter wavelength of blue light."
>>> encoding = tokenizer(prompt, next_sentence, return_tensors="pt")

>>> outputs = model(**encoding, labels=torch.LongTensor([1]))
>>> logits = outputs.logits
>>> assert logits[0, 0] < logits[0, 1]  # next sentence was random
```
Tr   r-   NrI   r   r   )r   r   r   r   r   r   r   )r;   rA   r   rB   rC   rD   rE   r   r   r   r   seq_relationship_scoresr   r   s                 r>   ra   &ErnieForNextSentencePrediction.forward  s    Z **	
))'%'	
 	
  
"&((="9!')H!)*A*F*Fr1*Mv{{[]!_*#*!//))	
 	
r@   rr   NNNNNNN)rc   rd   re   rf   r   r   rR   rk   r   r   r   r   ra   rl   rr   r@   r>   r   r     s     *..2.2-1,0-1&*D
<<$&D
 t+D
 t+	D

 ||d*D
 llT)D
 ||d*D
 t#D
 +,D
 
u||	:	:D
  D
r@   r   c                   D   \ rS rSr\\       SS\R                  S-  S\R                  S-  S\R                  S-  S\R                  S-  S\R                  S-  S\R                  S-  S	\R                  S-  S
\\	   S\
\R                     \-  4S jj5       5       rSrg)ErnieForSequenceClassificationi/  NrA   r   rB   rC   rD   rE   r   r   rG   c           
         U R                   " U4UUUUUSS.UD6n	U	S   n
U R                  U
5      n
U R                  U
5      nSnUGb  U R                  R                  c  U R
                  S:X  a  SU R                  l        OoU R
                  S:  aN  UR                  [        R                  :X  d  UR                  [        R                  :X  a  SU R                  l        OSU R                  l        U R                  R                  S:X  aI  [        5       nU R
                  S:X  a&  U" UR                  5       UR                  5       5      nOU" X5      nOU R                  R                  S:X  a=  [        5       nU" UR                  SU R
                  5      UR                  S5      5      nO,U R                  R                  S:X  a  [        5       nU" X5      n[        UUU	R                   U	R"                  S	9$ )
a  
task_type_ids (`torch.LongTensor` of shape `(batch_size, sequence_length)`, *optional*):
    Task type embedding is a special embedding to represent the characteristic of different tasks, such as
    word-aware pre-training task, structure-aware pre-training task and semantic-aware pre-training task. We
    assign a `task_type_id` to each task and the `task_type_id` is in the range `[0,
    config.task_type_vocab_size-1]
labels (`torch.LongTensor` of shape `(batch_size,)`, *optional*):
    Labels for computing the sequence classification/regression loss. Indices should be in `[0, ...,
    config.num_labels - 1]`. If `config.num_labels == 1` a regression loss is computed (Mean-Square loss), If
    `config.num_labels > 1` a classification loss is computed (Cross-Entropy).
Tr   r-   N
regressionsingle_label_classificationmulti_label_classificationrI   r   )r   r[   
classifierr<   problem_type
num_labelsrL   rR   rU   rj   r   squeezer   r   r   r   r   r   )r;   rA   r   rB   rC   rD   rE   r   r   r   r   r   r   r   s                 r>   ra   &ErnieForSequenceClassification.forward0  s   0 **	
))'%'	
 	
  
]3/{{''/??a'/;DKK,__q(fllejj.HFLL\a\e\eLe/LDKK,/KDKK,{{''<7"9??a'#FNN$4fnn6FGD#F3D))-JJ+-B @&++b/R))-II,./'!//))	
 	
r@   rr   r   )rc   rd   re   rf   r   r   rR   rk   r   r   r   r   ra   rl   rr   r@   r>   r   r   /  s     *..2.2-1,0-1&*B
<<$&B
 t+B
 t+	B

 ||d*B
 llT)B
 ||d*B
 t#B
 +,B
 
u||	7	7B
  B
r@   r   c                   D   \ rS rSr\\       SS\R                  S-  S\R                  S-  S\R                  S-  S\R                  S-  S\R                  S-  S\R                  S-  S	\R                  S-  S
\\	   S\
\R                     \-  4S jj5       5       rSrg)ErnieForMultipleChoiceiw  NrA   r   rB   rC   rD   rE   r   r   rG   c           
         Ub  UR                   S   OUR                   S   n	Ub!  UR                  SUR                  S5      5      OSnUb!  UR                  SUR                  S5      5      OSnUb!  UR                  SUR                  S5      5      OSnUb!  UR                  SUR                  S5      5      OSnUb1  UR                  SUR                  S5      UR                  S5      5      OSnU R                  " U4UUUUUSS.UD6n
U
S   nU R	                  U5      nU R                  U5      nUR                  SU	5      nSnUb  [        5       nU" X5      n[        UUU
R                  U
R                  S9$ )aQ  
input_ids (`torch.LongTensor` of shape `(batch_size, num_choices, sequence_length)`):
    Indices of input sequence tokens in the vocabulary.

    Indices can be obtained using [`AutoTokenizer`]. See [`PreTrainedTokenizer.encode`] and
    [`PreTrainedTokenizer.__call__`] for details.

    [What are input IDs?](../glossary#input-ids)
token_type_ids (`torch.LongTensor` of shape `(batch_size, num_choices, sequence_length)`, *optional*):
    Segment token indices to indicate first and second portions of the inputs. Indices are selected in `[0,
    1]`:

    - 0 corresponds to a *sentence A* token,
    - 1 corresponds to a *sentence B* token.

    [What are token type IDs?](../glossary#token-type-ids)
task_type_ids (`torch.LongTensor` of shape `(batch_size, num_choices, sequence_length)`, *optional*):
    Task type embedding is a special embedding to represent the characteristic of different tasks, such as
    word-aware pre-training task, structure-aware pre-training task and semantic-aware pre-training task. We
    assign a `task_type_id` to each task and the `task_type_id` is in the range `[0,
    config.task_type_vocab_size-1]
position_ids (`torch.LongTensor` of shape `(batch_size, num_choices, sequence_length)`, *optional*):
    Indices of positions of each input sequence tokens in the position embeddings. Selected in the range `[0,
    config.max_position_embeddings - 1]`.

    [What are position IDs?](../glossary#position-ids)
inputs_embeds (`torch.FloatTensor` of shape `(batch_size, num_choices, sequence_length, hidden_size)`, *optional*):
    Optionally, instead of passing `input_ids` you can choose to directly pass an embedded representation. This
    is useful if you want more control over how to convert `input_ids` indices into associated vectors than the
    model's internal embedding lookup matrix.
labels (`torch.LongTensor` of shape `(batch_size,)`, *optional*):
    Labels for computing the multiple choice classification loss. Indices should be in `[0, ...,
    num_choices-1]` where `num_choices` is the size of the second dimension of the input tensors. (See
    `input_ids` above)
Nr-   rI   Tr   r   )
rQ   r   rN   r   r[   r   r   r   r   r   )r;   rA   r   rB   rC   rD   rE   r   r   num_choicesr   r   r   reshaped_logitsr   r   s                   r>   ra   ErnieForMultipleChoice.forwardx  s   ` -6,Aiooa(}GZGZ[\G]>G>SINN2y~~b'9:Y]	M[Mg,,R1D1DR1HImqM[Mg,,R1D1DR1HImqGSG_|((\->->r-BCei ( r=#5#5b#9=;M;Mb;QR 	 **	
))'%'	
 	
  
]3/ ++b+6')HO4D("!//))	
 	
r@   rr   r   )rc   rd   re   rf   r   r   rR   rk   r   r   r   r   ra   rl   rr   r@   r>   r   r   w  s     *..2.2-1,0-1&*U
<<$&U
 t+U
 t+	U

 ||d*U
 llT)U
 ||d*U
 t#U
 +,U
 
u||	8	8U
  U
r@   r   c                   D   \ rS rSr\\       SS\R                  S-  S\R                  S-  S\R                  S-  S\R                  S-  S\R                  S-  S\R                  S-  S	\R                  S-  S
\\	   S\
\R                     \-  4S jj5       5       rSrg)ErnieForTokenClassificationi  NrA   r   rB   rC   rD   rE   r   r   rG   c           
      F   U R                   " U4UUUUUSS.UD6n	U	S   n
U R                  U
5      n
U R                  U
5      nSnUb<  [        5       nU" UR	                  SU R
                  5      UR	                  S5      5      n[        UUU	R                  U	R                  S9$ )al  
task_type_ids (`torch.LongTensor` of shape `(batch_size, sequence_length)`, *optional*):
    Task type embedding is a special embedding to represent the characteristic of different tasks, such as
    word-aware pre-training task, structure-aware pre-training task and semantic-aware pre-training task. We
    assign a `task_type_id` to each task and the `task_type_id` is in the range `[0,
    config.task_type_vocab_size-1]
labels (`torch.LongTensor` of shape `(batch_size, sequence_length)`, *optional*):
    Labels for computing the token classification loss. Indices should be in `[0, ..., config.num_labels - 1]`.
Tr   r   NrI   r   )	r   r[   r   r   r   r   r   r   r   )r;   rA   r   rB   rC   rD   rE   r   r   r   r   r   r   r   s                 r>   ra   #ErnieForTokenClassification.forward  s    , **	
))'%'	
 	
 "!*,,71')HFKKDOO<fkk"oND$!//))	
 	
r@   rr   r   )rc   rd   re   rf   r   r   rR   rk   r   r   r   r   ra   rl   rr   r@   r>   r  r    s     *..2.2-1,0-1&*.
<<$&.
 t+.
 t+	.

 ||d*.
 llT).
 ||d*.
 t#.
 +,.
 
u||	4	4.
  .
r@   r  c                   d   \ rS rSr\\        SS\R                  S-  S\R                  S-  S\R                  S-  S\R                  S-  S\R                  S-  S\R                  S-  S	\R                  S-  S
\R                  S-  S\\	   S\
\R                     \-  4S jj5       5       rSrg)ErnieForQuestionAnsweringi  NrA   r   rB   rC   rD   rE   start_positionsend_positionsr   rG   c	           
         U R                   " U4UUUUUSS.U	D6n
U
S   nU R                  U5      nUR                  SSS9u  pUR                  S5      R	                  5       nUR                  S5      R	                  5       nSnUb  Ub  [        UR                  5       5      S:  a  UR                  S5      n[        UR                  5       5      S:  a  UR                  S5      nUR                  S5      nUR                  SU5      nUR                  SU5      n[        US9nU" X5      nU" X5      nUU-   S	-  n[        UUUU
R                  U
R                  S
9$ )r   Tr   r   r-   rI   )rJ   N)ignore_indexr   )r   start_logits
end_logitsr   r   )r   
qa_outputssplitr   
contiguouslenrN   clampr   r   r   r   )r;   rA   r   rB   rC   rD   rE   r  r	  r   r   r   r   r  r  r   ignored_indexr   
start_lossend_losss                       r>   ra   !ErnieForQuestionAnswering.forward  s{   * **	
))'%'	
 	
 "!*1#)<<r<#: #++B/::<''+668

&=+D?'')*Q."1"9"9""==%%'(1, - 5 5b 9(--a0M-33A}EO)//=AM']CH!,@J
:H$x/14J+%!!//))
 	
r@   rr   r   )rc   rd   re   rf   r   r   rR   rk   r   r   r   r   ra   rl   rr   r@   r>   r  r    s     *..2.2-1,0-1/3-1<
<<$&<
 t+<
 t+	<

 ||d*<
 llT)<
 ||d*<
 ,<
 ||d*<
 +,<
 
u||	;	;<
  <
r@   r  )
r   r   r   r   r   r  r   r  r   r   )Nrg   rR   torch.nnr6   r   r   r    r   r   cache_utilsr   r	   r
   modeling_outputsr   r   r   r   r   r   r   r   modeling_utilsr   processing_utilsr   utilsr   r   r   utils.genericr   r   utils.output_capturingr   bert.modeling_bertr   r   r   r   r    r!   r"   r#   r$   r%   r&   r'   r(   r)   r*   r+   r,   configuration_ernier.   
get_loggerrc   loggerr0   rp   rv   ry   r|   r   r   r   r   r   r   r   r   r   r   r   r  r  __all__rr   r@   r>   <module>r%     s      A A & C C	 	 	 . & @ @ I 5    & - 
		H	%=n =@	* 		, 		 		* 		0 		; 	 /? / /2[
 [
|	 8 	P
, P
f@
 @
F:
 :
zG
%B G
TE
%B E
PX
2 X
v1
"< 1
h?
 8 ?
Dr@   