
    Z jLU                     8   S r SSKJr  SSKrSSKJrJr  SSKJr  SSKJ	r	  SSK
JrJrJr  S	S
KJr  SSKJr  \R$                  " \5      r\" SS9\ " S S\5      5       5       r\" SS9\ " S S\5      5       5       r\" SS9\ " S S\5      5       5       r\ " S S\	5      5       r " S S\5      r " S S\5      r " S S\5      r " S S\5      r " S S \5      r\" S!S9 " S" S#\5      5       r\" S$S9 " S% S&\5      5       r\" S'S9 " S( S)\5      5       r / S*Qr!g)+z5PyTorch DPR model for Open Domain Question Answering.    )	dataclassN)Tensornn   )BaseModelOutputWithPooling)PreTrainedModel)ModelOutputauto_docstringlogging   )	BertModel   )	DPRConfigz6
    Class for outputs of [`DPRQuestionEncoder`].
    )custom_introc                       \ rS rSr% Sr\R                  \S'   Sr\	\R                  S4   S-  \S'   Sr
\	\R                  S4   S-  \S'   Srg)	DPRContextEncoderOutput(   a  
pooler_output (`torch.FloatTensor` of shape `(batch_size, embeddings_size)`):
    The DPR encoder outputs the *pooler_output* that corresponds to the context representation. Last layer
    hidden-state of the first token of the sequence (classification token) further processed by a Linear layer.
    This output is to be used to embed contexts for nearest neighbors queries with questions embeddings.
pooler_outputN.hidden_states
attentions __name__
__module____qualname____firstlineno____doc__torchFloatTensor__annotations__r   tupler   __static_attributes__r       u/root/GenerationalWealth/GenerationalWealth/venv/lib/python3.13/site-packages/transformers/models/dpr/modeling_dpr.pyr   r   (   S     $$$:>M5**C/047>7;Je'',-4;r#   r   c                       \ rS rSr% Sr\R                  \S'   Sr\	\R                  S4   S-  \S'   Sr
\	\R                  S4   S-  \S'   Srg)	DPRQuestionEncoderOutput;   a  
pooler_output (`torch.FloatTensor` of shape `(batch_size, embeddings_size)`):
    The DPR encoder outputs the *pooler_output* that corresponds to the question representation. Last layer
    hidden-state of the first token of the sequence (classification token) further processed by a Linear layer.
    This output is to be used to embed questions for nearest neighbors queries with context embeddings.
r   N.r   r   r   r   r   r#   r$   r'   r'   ;   r%   r#   r'   c                       \ rS rSr% Sr\R                  \S'   Sr\R                  S-  \S'   Sr	\R                  S-  \S'   Sr
\\R                  S4   S-  \S'   Sr\\R                  S4   S-  \S	'   S
rg)DPRReaderOutputN   a  
start_logits (`torch.FloatTensor` of shape `(n_passages, sequence_length)`):
    Logits of the start index of the span for each passage.
end_logits (`torch.FloatTensor` of shape `(n_passages, sequence_length)`):
    Logits of the end index of the span for each passage.
relevance_logits (`torch.FloatTensor` of shape `(n_passages, )`):
    Outputs of the QA classifier of the DPRReader that corresponds to the scores of each passage to answer the
    question, compared to all the other passages.
start_logitsN
end_logitsrelevance_logits.r   r   r   )r   r   r   r   r   r   r   r    r-   r.   r   r!   r   r"   r   r#   r$   r*   r*   N   s~     ###+/J!!D(/15e''$.5:>M5**C/047>7;Je'',-4;r#   r*   c                       \ rS rSrSrSrg)DPRPreTrainedModelf   Tr   N)r   r   r   r   _supports_sdpar"   r   r#   r$   r0   r0   f   s    Nr#   r0   c                      ^  \ rS rSrSrS\4U 4S jjr      SS\S\S-  S\S-  S	\S-  S
\S\S\S\	\
\S4   -  4S jjr\S\4S j5       rSrU =r$ )
DPREncoderk   
bert_modelconfigc                   > [         TU ]  U5        [        USS9U l        U R                  R                  R
                  S::  a  [        S5      eUR                  U l        U R                  S:  aD  [        R                  " U R                  R                  R
                  UR                  5      U l
        U R                  5         g )NF)add_pooling_layerr   z!Encoder hidden_size can't be zero)super__init__r   r6   r7   hidden_size
ValueErrorprojection_dimr   Linearencode_proj	post_initselfr7   	__class__s     r$   r;   DPREncoder.__init__n   s     #FeD??!!--2@AA$33"!yy)?)?)K)KVMbMbcDr#   N	input_idsattention_masktoken_type_idsinputs_embedsoutput_attentionsoutput_hidden_statesreturn_dictreturn.c           
          U R                  UUUUUUUS9n	U	S   n
U
S S 2SS S 24   nU R                  S:  a  U R                  U5      nU(       d	  X4U	SS  -   $ [        U
UU	R                  U	R
                  S9$ )NrF   rG   rH   rI   rJ   rK   rL   r   r   )last_hidden_stater   r   r   )r6   r>   r@   r   r   r   )rC   rF   rG   rH   rI   rJ   rK   rL   kwargsoutputssequence_outputpooled_outputs               r$   forwardDPREncoder.forwardy   s     //))'/!5# " 
 "!*'1a0" ,,];M#3gabkAA)-'!//))	
 	
r#   c                     U R                   S:  a  U R                  R                  $ U R                  R                  R
                  $ )Nr   )r>   r@   out_featuresr6   r7   r<   )rC   s    r$   embeddings_sizeDPREncoder.embeddings_size   s8    "##000%%111r#   )r6   r@   r>   )NNNFFF)r   r   r   r   base_model_prefixr   r;   r   boolr   r!   rU   propertyintrY   r"   __classcell__rD   s   @r$   r4   r4   k   s    $	y 	 )-(,'+"'%*!"
"
 "
 	"

 }"
  "
 #"
 "
 
$eFCK&8	8"
H 2 2 2r#   r4   c                   |   ^  \ rS rSrSrS\4U 4S jjr    SS\S\S\S-  S	\S
\S\S\	\
\S4   -  4S jjrSrU =r$ )DPRSpanPredictor   encoderr7   c                 $  > [         TU ]  U5        [        U5      U l        [        R
                  " U R                  R                  S5      U l        [        R
                  " U R                  R                  S5      U l        U R                  5         g )Nr   r   )
r:   r;   r4   rd   r   r?   rY   
qa_outputsqa_classifierrA   rB   s     r$   r;   DPRSpanPredictor.__init__   s_     !&)))DLL$@$@!DYYt||'C'CQGr#   NrF   rG   rI   rJ   rK   rL   rM   .c           	      F   Ub  UR                  5       OUR                  5       S S u  pU R                  UUUUUUS9n
U
S   nU R                  U5      nUR                  SSS9u  pUR	                  S5      R                  5       nUR	                  S5      R                  5       nU R                  US S 2SS S 24   5      nUR                  X5      nUR                  X5      nUR                  U5      nU(       d
  XU4U
SS  -   $ [        UUUU
R                  U
R                  S9$ )Nr   )rG   rI   rJ   rK   rL   r   r   )dim)r,   r-   r.   r   r   )sizerd   rf   splitsqueeze
contiguousrg   viewr*   r   r   )rC   rF   rG   rI   rJ   rK   rL   rQ   
n_passagessequence_lengthrR   rS   logitsr,   r-   r.   s                   r$   rU   DPRSpanPredictor.forward   sD    ;D:Oinn&6UbUgUgUijlklUm#
,,)'/!5#  
 "!* 1#)<<r<#: #++B/::<''+668
--oaAg.FG $((E__ZA
+00< .>?'!"+MM%!-!//))
 	
r#   )rd   rg   rf   )NFFF)r   r   r   r   r[   r   r;   r   r\   r*   r!   rU   r"   r_   r`   s   @r$   rb   rb      s    !y  (,"'%*!,
,
 ,
 }	,

  ,
 #,
 ,
 
5-	-,
 ,
r#   rb   c                   (    \ rS rSr% Sr\\S'   SrSrg)DPRPretrainedContextEncoder   z
An abstract class to handle weights initialization and a simple interface for downloading and loading pretrained
models.
r7   ctx_encoderr   N	r   r   r   r   r   r   r    r[   r"   r   r#   r$   rv   rv      s    
 %r#   rv   c                   (    \ rS rSr% Sr\\S'   SrSrg)DPRPretrainedQuestionEncoder   rx   r7   question_encoderr   Nrz   r   r#   r$   r|   r|      s    
 *r#   r|   c                   (    \ rS rSr% Sr\\S'   SrSrg)DPRPretrainedReader   rx   r7   span_predictorr   Nrz   r   r#   r$   r   r      s    
 (r#   r   zf
    The bare DPRContextEncoder transformer outputting pooler outputs as context representations.
    c                      ^  \ rS rSrS\4U 4S jjr\       SS\S-  S\S-  S\S-  S\S-  S	\S-  S
\S-  S\S-  S\	\
\S4   -  4S jj5       rSrU =r$ )DPRContextEncoderi  r7   c                 p   > [         TU ]  U5        Xl        [        U5      U l        U R                  5         g N)r:   r;   r7   r4   ry   rA   rB   s     r$   r;   DPRContextEncoder.__init__  s,     %f-r#   NrF   rG   rH   rI   rJ   rK   rL   rM   .c           
         Ub  UOU R                   R                  nUb  UOU R                   R                  nUb  UOU R                   R                  nUb  Ub  [	        S5      eUb  UR                  5       n	O"Ub  UR                  5       SS n	O[	        S5      eUb  UR                  OUR                  n
Uc/  Uc  [        R                  " XS9OXR                   R                  :g  nUc$  [        R                  " U	[        R                  U
S9nU R                  UUUUUUUS9nU(       d  USS $ [        UR                  UR                  UR                   S	9$ )
a  
input_ids (`torch.LongTensor` of shape `(batch_size, sequence_length)`):
    Indices of input sequence tokens in the vocabulary. To match pretraining, DPR input sequence should be
    formatted with [CLS] and [SEP] tokens as follows:

    (a) For sequence pairs (for a pair title+text for example):

    ```
    tokens:         [CLS] is this jack ##son ##ville ? [SEP] no it is not . [SEP]
    token_type_ids:   0   0  0    0    0     0       0   0   1  1  1  1   1   1
    ```

    (b) For single sequences (for a question for example):

    ```
    tokens:         [CLS] the dog is hairy . [SEP]
    token_type_ids:   0   0   0   0  0     0   0
    ```

    DPR is a model with absolute position embeddings so it's usually advised to pad the inputs on the right
    rather than the left.

    Indices can be obtained using [`AutoTokenizer`]. See [`PreTrainedTokenizer.encode`] and
    [`PreTrainedTokenizer.__call__`] for details.

    [What are input IDs?](../glossary#input-ids)

Examples:

```python
>>> from transformers import DPRContextEncoder, DPRContextEncoderTokenizer

>>> tokenizer = DPRContextEncoderTokenizer.from_pretrained("facebook/dpr-ctx_encoder-single-nq-base")
>>> model = DPRContextEncoder.from_pretrained("facebook/dpr-ctx_encoder-single-nq-base")
>>> input_ids = tokenizer("Hello, is my dog cute ?", return_tensors="pt")["input_ids"]
>>> embeddings = model(input_ids).pooler_output
```NDYou cannot specify both input_ids and inputs_embeds at the same timerj   5You have to specify either input_ids or inputs_embedsdevicedtyper   rO   r   r   r   r   )r7   rJ   rK   rL   r=   rl   r   r   onespad_token_idzeroslongry   r   r   r   r   rC   rF   rG   rH   rI   rJ   rK   rL   rQ   input_shaper   rR   s               r$   rU   DPRContextEncoder.forward  sl   d 2C1N-TXT_T_TqTq$8$D $++JjJj 	 &1%<k$++BYBY ]%>cdd"#..*K&',,.s3KTUU%.%:!!@T@T! $ 

;6;;#;#;; 
 !"[[EJJvVN""))'/!5# # 
 12;&!//w?T?Tahasas
 	
r#   )r7   ry   NNNNNNN)r   r   r   r   r   r;   r
   r   r\   r   r!   rU   r"   r_   r`   s   @r$   r   r     s    y   $((,(,'+)-,0#'Y
D=Y
 Y
 	Y

 }Y
  $;Y
 #TkY
 D[Y
 
!5#5	5Y
 Y
r#   r   zh
    The bare DPRQuestionEncoder transformer outputting pooler outputs as question representations.
    c                      ^  \ rS rSrS\4U 4S jjr\       SS\S-  S\S-  S\S-  S\S-  S	\S-  S
\S-  S\S-  S\	\
\S4   -  4S jj5       rSrU =r$ )DPRQuestionEncoderip  r7   c                 p   > [         TU ]  U5        Xl        [        U5      U l        U R                  5         g r   )r:   r;   r7   r4   r~   rA   rB   s     r$   r;   DPRQuestionEncoder.__init__v  s,      *6 2r#   NrF   rG   rH   rI   rJ   rK   rL   rM   .c           
         Ub  UOU R                   R                  nUb  UOU R                   R                  nUb  UOU R                   R                  nUb  Ub  [	        S5      eUb"  U R                  X5        UR                  5       n	O"Ub  UR                  5       SS n	O[	        S5      eUb  UR                  OUR                  n
Uc/  Uc  [        R                  " XS9OXR                   R                  :g  nUc$  [        R                  " U	[        R                  U
S9nU R                  UUUUUUUS9nU(       d  USS $ [        UR                  UR                   UR"                  S	9$ )
a  
input_ids (`torch.LongTensor` of shape `(batch_size, sequence_length)`):
    Indices of input sequence tokens in the vocabulary. To match pretraining, DPR input sequence should be
    formatted with [CLS] and [SEP] tokens as follows:

    (a) For sequence pairs (for a pair title+text for example):

    ```
    tokens:         [CLS] is this jack ##son ##ville ? [SEP] no it is not . [SEP]
    token_type_ids:   0   0  0    0    0     0       0   0   1  1  1  1   1   1
    ```

    (b) For single sequences (for a question for example):

    ```
    tokens:         [CLS] the dog is hairy . [SEP]
    token_type_ids:   0   0   0   0  0     0   0
    ```

    DPR is a model with absolute position embeddings so it's usually advised to pad the inputs on the right
    rather than the left.

    Indices can be obtained using [`AutoTokenizer`]. See [`PreTrainedTokenizer.encode`] and
    [`PreTrainedTokenizer.__call__`] for details.

    [What are input IDs?](../glossary#input-ids)

Examples:

```python
>>> from transformers import DPRQuestionEncoder, DPRQuestionEncoderTokenizer

>>> tokenizer = DPRQuestionEncoderTokenizer.from_pretrained("facebook/dpr-question_encoder-single-nq-base")
>>> model = DPRQuestionEncoder.from_pretrained("facebook/dpr-question_encoder-single-nq-base")
>>> input_ids = tokenizer("Hello, is my dog cute ?", return_tensors="pt")["input_ids"]
>>> embeddings = model(input_ids).pooler_output
```
Nr   rj   r   r   r   rO   r   r   )r7   rJ   rK   rL   r=   %warn_if_padding_and_no_attention_maskrl   r   r   r   r   r   r   r~   r'   r   r   r   r   s               r$   rU   DPRQuestionEncoder.forward}  sz   d 2C1N-TXT_T_TqTq$8$D $++JjJj 	 &1%<k$++BYBY ]%>cdd"66yQ#..*K&',,.s3KTUU%.%:!!@T@T! $ 

;6;;#;#;; 
 !"[[EJJvVN''))'/!5# ( 
 12;'!//w?T?Tahasas
 	
r#   )r7   r~   r   )r   r   r   r   r   r;   r
   r   r\   r'   r!   rU   r"   r_   r`   s   @r$   r   r   p  s    y   $((,(,'+)-,0#'Z
D=Z
 Z
 	Z

 }Z
  $;Z
 #TkZ
 D[Z
 
"E&#+$6	6Z
 Z
r#   r   zE
    The bare DPRReader transformer outputting span predictions.
    c                      ^  \ rS rSrS\4U 4S jjr\      SS\S-  S\S-  S\S-  S\S-  S	\S-  S
\S-  S\	\
\S4   -  4S jj5       rSrU =r$ )	DPRReaderi  r7   c                 p   > [         TU ]  U5        Xl        [        U5      U l        U R                  5         g r   )r:   r;   r7   rb   r   rA   rB   s     r$   r;   DPRReader.__init__  s,     .v6r#   NrF   rG   rI   rJ   rK   rL   rM   .c           	         Ub  UOU R                   R                  nUb  UOU R                   R                  nUb  UOU R                   R                  nUb  Ub  [	        S5      eUb"  U R                  X5        UR                  5       nO"Ub  UR                  5       SS nO[	        S5      eUb  UR                  OUR                  n	Uc  [        R                  " XS9nU R                  UUUUUUS9$ )a  
input_ids (`tuple[torch.LongTensor]` of shapes `(n_passages, sequence_length)`):
    Indices of input sequence tokens in the vocabulary. It has to be a sequence triplet with 1) the question
    and 2) the passages titles and 3) the passages texts To match pretraining, DPR `input_ids` sequence should
    be formatted with [CLS] and [SEP] with the format:

    `[CLS] <question token ids> [SEP] <titles ids> [SEP] <texts ids>`

    DPR is a model with absolute position embeddings so it's usually advised to pad the inputs on the right
    rather than the left.

    Indices can be obtained using [`DPRReaderTokenizer`]. See this class documentation for more details.

    [What are input IDs?](../glossary#input-ids)
inputs_embeds (`torch.FloatTensor` of shape `(n_passages, sequence_length, hidden_size)`, *optional*):
    Optionally, instead of passing `input_ids` you can choose to directly pass an embedded representation. This
    is useful if you want more control over how to convert `input_ids` indices into associated vectors than the
    model's internal embedding lookup matrix.

Examples:

```python
>>> from transformers import DPRReader, DPRReaderTokenizer

>>> tokenizer = DPRReaderTokenizer.from_pretrained("facebook/dpr-reader-single-nq-base")
>>> model = DPRReader.from_pretrained("facebook/dpr-reader-single-nq-base")
>>> encoded_inputs = tokenizer(
...     questions=["What is love ?"],
...     titles=["Haddaway"],
...     texts=["'What Is Love' is a song recorded by the artist Haddaway"],
...     return_tensors="pt",
... )
>>> outputs = model(**encoded_inputs)
>>> start_logits = outputs.start_logits
>>> end_logits = outputs.end_logits
>>> relevance_logits = outputs.relevance_logits
```
Nr   rj   r   r   )rI   rJ   rK   rL   )r7   rJ   rK   rL   r=   r   rl   r   r   r   r   )
rC   rF   rG   rI   rJ   rK   rL   rQ   r   r   s
             r$   rU   DPRReader.forward  s   b 2C1N-TXT_T_TqTq$8$D $++JjJj 	 &1%<k$++BYBY ]%>cdd"66yQ#..*K&',,.s3KTUU%.%:!!@T@T!"ZZCN""'/!5# # 
 	
r#   )r7   r   )NNNNNN)r   r   r   r   r   r;   r
   r   r\   r*   r!   rU   r"   r_   r`   s   @r$   r   r     s    y   $((,'+)-,0#'L
D=L
 L
 }	L

  $;L
 #TkL
 D[L
 
5-	-L
 L
r#   r   )r   rv   r0   r|   r   r   r   )"r   dataclassesr   r   r   r   modeling_outputsr   modeling_utilsr   utilsr	   r
   r   bert.modeling_bertr   configuration_dprr   
get_loggerr   loggerr   r'   r*   r0   r4   rb   rv   r|   r   r   r   r   __all__r   r#   r$   <module>r      s   < !   : - 
 + ( 
		H	% 
 
<k 
< 
< 
 
<{ 
< 
< 
 <k < <$   62# 62r7
) 7
~&"4 &+#5 +), ) 
b
3 b

b
J 
c
5 c

c
L 
U
# U

U
pr#   