
    Z j|                        S r SSKJr  SSKJr  SSKrSSKJr  SSKJr  SSK	J
r  SS	KJr  SS
KJr  SSKJrJrJr  SSKJrJr  SSKJr  SSKJr  SSKJrJrJrJrJ r   SSK!J"r"  SSK#J$r$  SSK%J&r&  \RN                  " \(5      r) " S S\RT                  5      r+ S>S\RT                  S\RX                  S\RX                  S\RX                  S\RX                  S-  S\-S\-4S jjr. " S S\RT                  5      r/ " S  S!\RT                  5      r0 " S" S#\RT                  5      r1 " S$ S%\RT                  5      r2 " S& S'\RT                  5      r3 " S( S)\5      r4 " S* S+\RT                  5      r5\ " S, S-\5      5       r6\ " S. S/\65      5       r7 " S0 S1\RT                  5      r8 " S2 S3\RT                  5      r9\ " S4 S5\65      5       r:\" S6S79\ " S8 S9\5      5       5       r;\" S:S79 " S; S<\65      5       r</ S=Qr=g)?zPyTorch Splinter model.    )Callable)	dataclassN)nn)CrossEntropyLoss   )initialization)ACT2FN)GradientCheckpointingLayer)BaseModelOutputModelOutputQuestionAnsweringModelOutput)ALL_ATTENTION_FUNCTIONSPreTrainedModel)Unpack)apply_chunking_to_forward)TransformersKwargsauto_docstringcan_return_tupleloggingtorch_compilable_check)merge_with_config_defaults)capture_outputs   )SplinterConfigc                      ^  \ rS rSrSrU 4S jr    SS\R                  S-  S\R                  S-  S\R                  S-  S\R                  S-  S	\	4
S
 jjr
SrU =r$ )SplinterEmbeddings'   zGConstruct the embeddings from word, position and token_type embeddings.c                 v  > [         TU ]  5         [        R                  " UR                  UR
                  UR                  S9U l        [        R                  " UR                  UR
                  5      U l	        [        R                  " UR                  UR
                  5      U l        [        R                  " UR
                  UR                  S9U l        [        R                  " UR                  5      U l        U R#                  S[$        R&                  " UR                  5      R)                  S5      SS9  g )N)padding_idxepsposition_idsr   F)
persistent)super__init__r   	Embedding
vocab_sizehidden_sizepad_token_idword_embeddingsmax_position_embeddingsposition_embeddingstype_vocab_sizetoken_type_embeddings	LayerNormlayer_norm_epsDropouthidden_dropout_probdropoutregister_buffertorcharangeexpandselfconfig	__class__s     /root/GenerationalWealth/GenerationalWealth/venv/lib/python3.13/site-packages/transformers/models/splinter/modeling_splinter.pyr'   SplinterEmbeddings.__init__*   s    !||F,=,=v?Q?Q_e_r_rs#%<<0N0NPVPbPb#c %'\\&2H2H&J\J\%]"f&8&8f>S>STzz&"<"<= 	ELL)G)GHOOPWXej 	 	
    N	input_idstoken_type_idsr"   inputs_embedsreturnc                    Ub  UR                  5       nOUR                  5       S S nUS   nUc  U R                  S S 2S U24   nUc8  [        R                  " U[        R                  U R                  R
                  S9nUc  U R                  U5      nU R                  U5      nXG-   nU R                  U5      n	X-  nU R                  U5      nU R                  U5      nU$ )Nr$   r   dtypedevice)sizer"   r7   zeroslongrH   r,   r0   r.   r1   r5   )
r;   rA   rB   r"   rC   input_shape
seq_lengthr0   
embeddingsr.   s
             r>   forwardSplinterEmbeddings.forward8   s      #..*K',,.s3K ^
,,Q^<L!"[[EJJtO`O`OgOghN  00;M $ : :> J":
"66|D)
^^J/
\\*-
r@   )r1   r5   r.   r0   r,   )NNNN)__name__
__module____qualname____firstlineno____doc__r'   r7   
LongTensorFloatTensortuplerO   __static_attributes____classcell__r=   s   @r>   r   r   '   s    Q
  .2260426##d* ((4/ &&-	
 ((4/ 
 r@   r   modulequerykeyvalueattention_maskscalingr5   c                    [         R                  " XR                  SS5      5      U-  nUb  X-   n[        R                  R                  US[         R                  S9R                  UR                  5      n[        R                  R                  XU R                  S9n[         R                  " X5      n	U	R                  SS5      R                  5       n	X4$ )N   r   r$   )dimrG   )ptrainingr   )r7   matmul	transposer   
functionalsoftmaxfloat32torG   r5   rf   
contiguous)
r\   r]   r^   r_   r`   ra   r5   kwargsattn_weightsattn_outputs
             r>   eager_attention_forwardrq   Z   s     <<}}Q':;gEL!#4==((2U]](SVVW\WbWbcL==((6??([L,,|3K''1-88:K$$r@   c                      ^  \ rS rSrU 4S jr S
S\R                  S\R                  S-  S\\	   S\
\R                  \R                  S-  4   4S jjrS	rU =r$ )SplinterSelfAttentionq   c                 6  > [         TU ]  5         UR                  UR                  -  S:w  a7  [	        US5      (       d&  [        SUR                   SUR                   S35      eXl        UR                  U l        [        UR                  UR                  -  5      U l        U R                  U R                  -  U l	        [        R                  " UR                  U R                  5      U l        [        R                  " UR                  U R                  5      U l        [        R                  " UR                  U R                  5      U l        [        R                  " UR                   5      U l        UR                   U l        U R                  S-  U l        g )Nr   embedding_sizezThe hidden size (z6) is not a multiple of the number of attention heads ()g      )r&   r'   r*   num_attention_headshasattr
ValueErrorr<   intattention_head_sizeall_head_sizer   Linearr]   r^   r_   r3   attention_probs_dropout_probr5   attention_dropoutra   r:   s     r>   r'   SplinterSelfAttention.__init__r   sD    : ::a?PVXhHiHi#F$6$6#7 8 445Q8 
 #)#=#= #&v'9'9F<V<V'V#W !558P8PPYYv1143E3EF
99V//1C1CDYYv1143E3EF
zz&"E"EF!'!D!D//5r@   Nhidden_statesr`   rn   rD   c                    UR                   S S n/ UQSPU R                  P7nU R                  U5      R                  U5      R	                  SS5      nU R                  U5      R                  U5      R	                  SS5      nU R                  U5      R                  U5      R	                  SS5      n[        R                  " U R                  R                  [        5      n	U	" U UUUU4U R                  (       d  SOU R                  U R                  S.UD6u  pU
R                  " / UQSP76 R!                  5       n
X4$ )Nr$   r   rc           )r5   ra   )shaper|   r]   viewrh   r^   r_   r   get_interfacer<   _attn_implementationrq   rf   r   ra   reshaperm   )r;   r   r`   rn   rL   hidden_shapequery_states
key_statesvalue_statesattention_interfacerp   ro   s               r>   rO   SplinterSelfAttention.forward   s8    $))#2.CCbC$*B*BCzz-055lCMMaQRSXXm,11,?II!QO
zz-055lCMMaQRS(?(M(MKK,,.E)
 %8	%
  $}}C$2H2HLL	%
 	%
! "));;;;FFH((r@   )
r}   r   r|   r<   r5   r^   rx   r]   ra   r_   N)rQ   rR   rS   rT   r'   r7   TensorrW   r   r   rX   rO   rY   rZ   r[   s   @r>   rs   rs   q   si    60 48)||) ))D0) +,	)
 
u||U\\D00	1) )r@   rs   c                   z   ^  \ rS rSrU 4S jrS\R                  S\R                  S\R                  4S jrSrU =r	$ )SplinterSelfOutput   c                 (  > [         TU ]  5         [        R                  " UR                  UR                  5      U l        [        R                  " UR                  UR                  S9U l        [        R                  " UR                  5      U l
        g Nr    )r&   r'   r   r~   r*   denser1   r2   r3   r4   r5   r:   s     r>   r'   SplinterSelfOutput.__init__   s`    YYv1163E3EF
f&8&8f>S>STzz&"<"<=r@   r   input_tensorrD   c                 p    U R                  U5      nU R                  U5      nU R                  X-   5      nU$ r   r   r5   r1   r;   r   r   s      r>   rO   SplinterSelfOutput.forward   5    

=1]3}'CDr@   r1   r   r5   
rQ   rR   rS   rT   r'   r7   r   rO   rY   rZ   r[   s   @r>   r   r      6    >U\\  RWR^R^  r@   r   c            	          ^  \ rS rSrU 4S jr S
S\R                  S\R                  S-  S\\	   S\R                  4S jjr
S	rU =r$ )SplinterAttention   c                 b   > [         TU ]  5         [        U5      U l        [	        U5      U l        g r   )r&   r'   rs   r;   r   outputr:   s     r>   r'   SplinterAttention.__init__   s&    )&1	(0r@   Nr   r`   rn   rD   c                 Z    UnU R                   " U4SU0UD6u  pU R                  X5      nU$ Nr`   )r;   r   )r;   r   r`   rn   residual_s         r>   rO   SplinterAttention.forward   sE     !99
)
 

 M<r@   )r   r;   r   )rQ   rR   rS   rT   r'   r7   r   rW   r   r   rO   rY   rZ   r[   s   @r>   r   r      sV    1 48|| ))D0 +,	
 
 r@   r   c                   b   ^  \ rS rSrU 4S jrS\R                  S\R                  4S jrSrU =r	$ )SplinterIntermediate   c                   > [         TU ]  5         [        R                  " UR                  UR
                  5      U l        [        UR                  [        5      (       a  [        UR                     U l        g UR                  U l        g r   )r&   r'   r   r~   r*   intermediate_sizer   
isinstance
hidden_actstrr	   intermediate_act_fnr:   s     r>   r'   SplinterIntermediate.__init__   s`    YYv1163K3KL
f''--'-f.?.?'@D$'-'8'8D$r@   r   rD   c                 J    U R                  U5      nU R                  U5      nU$ r   r   r   )r;   r   s     r>   rO   SplinterIntermediate.forward   s&    

=100?r@   r   r   r[   s   @r>   r   r      s(    9U\\ ell  r@   r   c                   z   ^  \ rS rSrU 4S jrS\R                  S\R                  S\R                  4S jrSrU =r	$ )SplinterOutput   c                 (  > [         TU ]  5         [        R                  " UR                  UR
                  5      U l        [        R                  " UR
                  UR                  S9U l        [        R                  " UR                  5      U l        g r   )r&   r'   r   r~   r   r*   r   r1   r2   r3   r4   r5   r:   s     r>   r'   SplinterOutput.__init__   s`    YYv779K9KL
f&8&8f>S>STzz&"<"<=r@   r   r   rD   c                 p    U R                  U5      nU R                  U5      nU R                  X-   5      nU$ r   r   r   s      r>   rO   SplinterOutput.forward   r   r@   r   r   r[   s   @r>   r   r      r   r@   r   c            	          ^  \ rS rSrU 4S jr SS\R                  S\R                  S-  S\\	   S\R                  4S jjr
S	 rS
rU =r$ )SplinterLayer   c                    > [         TU ]  5         UR                  U l        SU l        [	        U5      U l        [        U5      U l        [        U5      U l	        g )Nr   )
r&   r'   chunk_size_feed_forwardseq_len_dimr   	attentionr   intermediater   r   r:   s     r>   r'   SplinterLayer.__init__   sI    '-'E'E$*6208$V,r@   Nr   r`   rn   rD   c                     U R                   " U4SU0UD6n[        U R                  U R                  U R                  U5      nU$ r   )r   r   feed_forward_chunkr   r   )r;   r   r`   rn   s       r>   rO   SplinterLayer.forward   sW     
)
 
 2##T%A%A4CSCSUb
 r@   c                 J    U R                  U5      nU R                  X!5      nU$ r   )r   r   )r;   attention_outputintermediate_outputlayer_outputs       r>   r    SplinterLayer.feed_forward_chunk  s)    "//0@A{{#6Ir@   )r   r   r   r   r   r   )rQ   rR   rS   rT   r'   r7   r   rW   r   r   rO   r   rY   rZ   r[   s   @r>   r   r      s[    - 48|| ))D0 +,	
 
$ r@   r   c            	       |   ^  \ rS rSrU 4S jr S
S\R                  S\R                  S-  S\\	   S\
4S jjrS	rU =r$ )SplinterEncoderi  c                    > [         TU ]  5         Xl        [        R                  " [        UR                  5       Vs/ s H  n[        U5      PM     sn5      U l        SU l	        g s  snf )NF)
r&   r'   r<   r   
ModuleListrangenum_hidden_layersr   layergradient_checkpointing)r;   r<   ir=   s      r>   r'   SplinterEncoder.__init__  sR    ]]5IaIaCb#cCbaM&$9Cb#cd
&+# $ds   A&Nr   r`   rn   rD   c                 N    U R                    H  nU" UU40 UD6nM     [        US9$ )Nlast_hidden_state)r   r   )r;   r   r`   rn   layer_modules        r>   rO   SplinterEncoder.forward  s>     !JJL( M ' +
 	
r@   )r<   r   r   r   )rQ   rR   rS   rT   r'   r7   r   rW   r   r   r   rO   rY   rZ   r[   s   @r>   r   r     sR    , 48
||
 ))D0
 +,	

 

 
r@   r   c                   F   ^  \ rS rSr% \\S'   SrSr\\	S.r
U 4S jrSrU =r$ )SplinterPreTrainedModeli(  r<   splinterT)r   
attentionsc                   > [         TU ]  U5        [        U[        5      (       a\  [        R
                  " UR                  [        R                  " UR                  R                  S   5      R                  S5      5        g g )Nr$   r#   )r&   _init_weightsr   r   initcopy_r"   r7   r8   r   r9   )r;   r\   r=   s     r>   r   %SplinterPreTrainedModel._init_weights2  s^    f%f011JJv**ELL9L9L9R9RSU9V,W,^,^_f,gh 2r@    )rQ   rR   rS   rT   r   __annotations__base_model_prefixsupports_gradient_checkpointingr   rs   _can_record_outputsr   rY   rZ   r[   s   @r>   r   r   (  s/    "&*#&+
i ir@   r   c                     ^  \ rS rSrSrU 4S jrS rS r\\	\
     SS\R                  S-  S\R                  S-  S	\R                  S-  S
\R                  S-  S\R                  S-  S\\   S\\-  4S jj5       5       5       rSrU =r$ )SplinterModeli8  a"  
The model is an encoder (with only self-attention) following the architecture described in [Attention is all you
need](https://huggingface.co/papers/1706.03762) by Ashish Vaswani, Noam Shazeer, Niki Parmar, Jakob Uszkoreit, Llion Jones,
Aidan N. Gomez, Lukasz Kaiser and Illia Polosukhin.
c                    > [         TU ]  U5        Xl        [        U5      U l        [        U5      U l        U R                  5         g r   )r&   r'   r<   r   rN   r   encoder	post_initr:   s     r>   r'   SplinterModel.__init__@  s9     ,V4&v. 	r@   c                 .    U R                   R                  $ r   rN   r,   )r;   s    r>   get_input_embeddings"SplinterModel.get_input_embeddingsJ  s    ...r@   c                 $    XR                   l        g r   r   )r;   r_   s     r>   set_input_embeddings"SplinterModel.set_input_embeddingsM  s    */'r@   NrA   r`   rB   r"   rC   rn   rD   c                    Ub  Ub  [        S5      eUb"  U R                  X5        UR                  5       nO"Ub  UR                  5       SS nO[        S5      eUu  pUb  UR                  OUR                  n
Uc  [        R
                  " X4U
S9nUc$  [        R                  " U[        R                  U
S9nU R                  X'5      nU R                  UUUUS9nU R                  " U4SU0UD6nUS	   n[        US
9$ )a  
token_type_ids (`torch.LongTensor` of shape `batch_size, sequence_length`, *optional*):
    Segment token indices to indicate first and second portions of the inputs. Indices are selected in `[0,
    1]`:

    - 0 corresponds to a *sentence A* token,
    - 1 corresponds to a *sentence B* token.

    [What are token type IDs?](../glossary#token-type-ids)
position_ids (`torch.LongTensor` of shape `batch_size, sequence_length`, *optional*):
    Indices of positions of each input sequence tokens in the position embeddings. Selected in the range `[0,
    config.max_position_embeddings - 1]`.

    [What are position IDs?](../glossary#position-ids)
NzDYou cannot specify both input_ids and inputs_embeds at the same timer$   z5You have to specify either input_ids or inputs_embeds)rH   rF   )rA   r"   rB   rC   r`   r   r   )rz   %warn_if_padding_and_no_attention_maskrI   rH   r7   onesrJ   rK   get_extended_attention_maskrN   r   r   )r;   rA   r`   rB   r"   rC   rn   rL   
batch_sizerM   rH   extended_attention_maskembedding_outputencoder_outputssequence_outputs                  r>   rO   SplinterModel.forwardP  s,   6  ]%>cdd"66yQ#..*K&',,.s3KTUU!,
%.%:!!@T@T!"ZZ*)A6RN!"[[EJJvVN 150P0PQ_0m??%)'	 + 
 ,,
2
 

 *!,-
 	
r@   )r<   rN   r   )NNNNN)rQ   rR   rS   rT   rU   r'   r   r   r   r   r   r7   r   r   r   rX   r   rO   rY   rZ   r[   s   @r>   r   r   8  s    /0   *..2.2,0-1=
<<$&=
 t+=
 t+	=

 llT)=
 ||d*=
 +,=
 
	 =
    =
r@   r   c                   f   ^  \ rS rSrSU 4S jjrS\R                  S\R                  4S jrSrU =r	$ )SplinterFullyConnectedLayeri  c                    > [         TU ]  5         Xl        X l        [        R
                  " U R                  U R                  5      U l        [        U   U l        [        R                  " U R                  5      U l	        g r   )
r&   r'   	input_dim
output_dimr   r~   r   r	   act_fnr1   )r;   r	  r
  r   r=   s       r>   r'   $SplinterFullyConnectedLayer.__init__  sR    "$YYt~~t?
Z(doo6r@   inputsrD   c                 l    U R                  U5      nU R                  U5      nU R                  U5      nU$ r   )r   r  r1   )r;   r  r   s      r>   rO   #SplinterFullyConnectedLayer.forward  s2    

6*M2}5r@   )r1   r  r   r	  r
  )gelur   r[   s   @r>   r  r    s(    7ell u||  r@   r  c                   2   ^  \ rS rSrSrU 4S jrS rSrU =r$ )QuestionAwareSpanSelectionHeadi  z^
Implementation of Question-Aware Span Selection (QASS) head, described in Splinter's paper:

c                   > [         TU ]  5         [        UR                  UR                  5      U l        [        UR                  UR                  5      U l        [        UR                  UR                  5      U l        [        UR                  UR                  5      U l        [        R                  " UR                  UR                  SS9U l
        [        R                  " UR                  UR                  SS9U l        g )NF)bias)r&   r'   r  r*   query_start_transformquery_end_transformstart_transformend_transformr   r~   start_classifierend_classifierr:   s     r>   r'   'QuestionAwareSpanSelectionHead.__init__  s    %@ASASU[UgUg%h"#>v?Q?QSYSeSe#f :6;M;MvOaOab89K9KVM_M_` "		&*<*<f>P>PW\ ] ii(:(:F<N<NUZ[r@   c                    UR                  5       u    p4UR                  S5      R                  SSU5      n[        R                  " USUS9nU R                  U5      nU R                  U5      nU R                  U5      n	U R                  U5      n
U R                  U5      nU	R                  SSS5      n	[        R                  " X5      nU R                  U5      nU
R                  SSS5      n
[        R                  " X5      nX4$ )Nr$   r   )rd   indexr   rc   )rI   	unsqueezerepeatr7   gatherr  r  r  r  r  permuterg   r  )r;   r  	positionsr   rd   r  gathered_repsquery_start_repsquery_end_reps
start_repsend_repsr   start_logits
end_logitss                 r>   rO   &QuestionAwareSpanSelectionHead.forward  s    KKM	1##B'..q!S9V%@55mD11-@))&1
%%f---.>?''1a0
||M>++N;##Aq!,\\-:
''r@   )r  r  r  r  r  r  )	rQ   rR   rS   rT   rU   r'   rO   rY   rZ   r[   s   @r>   r  r    s    
	\( (r@   r  c                   ^  ^  \ rS rSrU 4S jr\\        SS\R                  S-  S\R                  S-  S\R                  S-  S\R                  S-  S\R                  S-  S	\R                  S-  S
\R                  S-  S\R                  S-  S\
\   S\\-  4S jj5       5       rSrU =r$ )SplinterForQuestionAnsweringi  c                    > [         TU ]  U5        [        U5      U l        [	        U5      U l        UR                  U l        U R                  5         g r   r&   r'   r   r   r  splinter_qassquestion_token_idr   r:   s     r>   r'   %SplinterForQuestionAnswering.__init__  C     %f-;FC!'!9!9 	r@   NrA   r`   rB   r"   rC   start_positionsend_positionsquestion_positionsrn   rD   c	                    Sn
Uc  UbB  [         R                  " [         R                  " XR                  5      R	                  5       SS9nOH[         R
                  " UR                  S5      [         R                  UR                  UR                  S9nUR                  S5      nSn
U R                  " U4UUUUS.U	D6nUS   nU R                  X5      u  pU
(       a!  UR                  S	5      UR                  S	5      pUbf  US	U-
  [         R                  " UR                  5      R                   -  -   nUS	U-
  [         R                  " UR                  5      R                   -  -   nSnUb  Ub  [#        UR                  5       5      S	:  a  UR                  S5      n[#        UR                  5       5      S	:  a  UR                  S5      nUR                  S	5      nUR%                  SU5        UR%                  SU5        ['        US
9nU" X5      nU" X5      nUU-   S-  n[)        UUUUR*                  UR,                  S9$ )aI  
token_type_ids (`torch.LongTensor` of shape `batch_size, sequence_length`, *optional*):
    Segment token indices to indicate first and second portions of the inputs. Indices are selected in `[0,
    1]`:

    - 0 corresponds to a *sentence A* token,
    - 1 corresponds to a *sentence B* token.

    [What are token type IDs?](../glossary#token-type-ids)
position_ids (`torch.LongTensor` of shape `batch_size, sequence_length`, *optional*):
    Indices of positions of each input sequence tokens in the position embeddings. Selected in the range `[0,
    config.max_position_embeddings - 1]`.

    [What are position IDs?](../glossary#position-ids)
question_positions (`torch.LongTensor` of shape `(batch_size, num_questions)`, *optional*):
    The positions of all question tokens. If given, start_logits and end_logits will be of shape `(batch_size,
    num_questions, sequence_length)`. If None, the first question token in each sequence in the batch will be
    the only one for which start_logits and end_logits are calculated and they will be of shape `(batch_size,
    sequence_length)`.
FNr$   )rd   r   )rG   layoutrH   Tr`   rB   r"   rC   r   ignore_indexrc   lossr(  r)  r   r   )r7   argmaxeqr0  r{   rJ   rI   rK   r7  rH   r  r   r/  squeezefinforG   minlenclamp_r   r   r   r   )r;   rA   r`   rB   r"   rC   r3  r4  r5  rn   question_positions_were_none"question_position_for_each_exampleoutputsr  r(  r)  
total_lossignored_indexloss_fct
start_lossend_losss                        r>   rO   $SplinterForQuestionAnswering.forward  s7   D (-$%$5:\\XXi)?)?@EEGR62 6;[[!&&q)MDXDXanauau62 "D!M!Mb!Q+/(--
))%'
 
 "!*#'#5#5o#Z ''3';';A'>
@R@RST@U*%'1~+=\M_M_A`AdAd*ddL#q>'9U[[IYIY=Z=^=^&^^J
&=+D?'')*Q."1"9"9""==%%'(1, - 5 5b 9(--a0M""1m4  M2']CH!,@J
:H$x/14J+%!!//))
 	
r@   r0  r   r/  NNNNNNNN)rQ   rR   rS   rT   r'   r   r   r7   r   rV   r   r   rX   r   rO   rY   rZ   r[   s   @r>   r,  r,    s     *..2.2,0-137156:W
<<$&W
 t+W
 t+	W

 llT)W
 ||d*W
 ))D0W
 ''$.W
 ",,t3W
 +,W
 
-	-W
  W
r@   r,  zB
    Class for outputs of Splinter as a span selection model.
    )custom_introc                       \ rS rSr% SrSr\R                  S-  \S'   Sr	\R                  S-  \S'   Sr
\R                  S-  \S'   Sr\\R                     S-  \S'   Sr\\R                     S-  \S'   S	rg)
SplinterForPreTrainingOutputi3  a  
loss (`torch.FloatTensor` of shape `(1,)`, *optional*, returned when start and end positions are provided):
    Total span extraction loss is the sum of a Cross-Entropy for the start and end positions.
start_logits (`torch.FloatTensor` of shape `(batch_size, num_questions, sequence_length)`):
    Span-start scores (before SoftMax).
end_logits (`torch.FloatTensor` of shape `(batch_size, num_questions, sequence_length)`):
    Span-end scores (before SoftMax).
Nr<  r(  r)  r   r   r   )rQ   rR   rS   rT   rU   r<  r7   rW   r   r(  r)  r   rX   r   rY   r   r@   r>   rQ  rQ  3  s|     &*D%

d
")-1L%##d*1+/J!!D(/59M5**+d2926Je''(4/6r@   rQ  z
    Splinter Model for the recurring span selection task as done during the pretraining. The difference to the QA task
    is that we do not have a question, but multiple question tokens that replace the occurrences of recurring spans
    instead.
    c                     ^  \ rS rSrU 4S jr\\        SS\R                  S-  S\R                  S-  S\R                  S-  S\R                  S-  S\R                  S-  S	\R                  S-  S
\R                  S-  S\R                  S-  S\
\   S\\-  4S jj5       5       rS\R                  S\R                  4S jrSrU =r$ )SplinterForPreTrainingiJ  c                    > [         TU ]  U5        [        U5      U l        [	        U5      U l        UR                  U l        U R                  5         g r   r.  r:   s     r>   r'   SplinterForPreTraining.__init__R  r2  r@   NrA   r`   rB   r"   rC   r3  r4  r5  rn   rD   c	                    Uc  Ub  Ub  [        S5      eUc  Uc  [        S5      eUc  U R                  U5      nU R                  " U4UUUUS.U	D6n
U
S   nUR                  5       u  pnU R	                  X5      u  nnUR                  S5      nUb  UR                  S5      R                  UUU5      nUSU-
  [        R                  " UR                  5      R                  -  -   nUSU-
  [        R                  " UR                  5      R                  -  -   nSnUb  Ub  UR                  S[        SUS-
  5      5        UR                  S[        SUS-
  5      5        [        U R                  R                  S9nU" UR!                  UU-  U5      UR!                  UU-  5      5      nU" UR!                  UU-  U5      UR!                  UU-  5      5      nUU-   S-  n[#        UUUU
R$                  U
R&                  S	9$ )
a
  
input_ids (`torch.LongTensor` of shape `(batch_size, num_questions, sequence_length)`):
    Indices of input sequence tokens in the vocabulary.

    Indices can be obtained using [`AutoTokenizer`]. See [`PreTrainedTokenizer.encode`] and
    [`PreTrainedTokenizer.__call__`] for details.

    [What are input IDs?](../glossary#input-ids)
token_type_ids (`torch.LongTensor` of shape `batch_size, num_questions, sequence_length`, *optional*):
    Segment token indices to indicate first and second portions of the inputs. Indices are selected in `[0,
    1]`:

    - 0 corresponds to a *sentence A* token,
    - 1 corresponds to a *sentence B* token.

    [What are token type IDs?](../glossary#token-type-ids)
position_ids (`torch.LongTensor` of shape `batch_size, num_questions, sequence_length`, *optional*):
    Indices of positions of each input sequence tokens in the position embeddings. Selected in the range `[0,
    config.max_position_embeddings - 1]`.

    [What are position IDs?](../glossary#position-ids)
inputs_embeds (`torch.FloatTensor` of shape `(batch_size, num_questions, sequence_length, hidden_size)`, *optional*):
    Optionally, instead of passing `input_ids` you can choose to directly pass an embedded representation. This
    is useful if you want more control over how to convert *input_ids* indices into associated vectors than the
    model's internal embedding lookup matrix.
start_positions (`torch.LongTensor` of shape `(batch_size, num_questions)`, *optional*):
    Labels for position (index) of the start of the labelled span for computing the token classification loss.
    Positions are clamped to the length of the sequence (`sequence_length`). Position outside of the sequence
    are not taken into account for computing the loss.
end_positions (`torch.LongTensor` of shape `(batch_size, num_questions)`, *optional*):
    Labels for position (index) of the end of the labelled span for computing the token classification loss.
    Positions are clamped to the length of the sequence (`sequence_length`). Position outside of the sequence
    are not taken into account for computing the loss.
question_positions (`torch.LongTensor` of shape `(batch_size, num_questions)`, *optional*):
    The positions of all question tokens. If given, start_logits and end_logits will be of shape `(batch_size,
    num_questions, sequence_length)`. If None, the first question token in each sequence in the batch will be
    the only one for which start_logits and end_logits are calculated and they will be of shape `(batch_size,
    sequence_length)`.
NzCquestion_positions must be specified in order to calculate the lossz?question_positions must be specified when inputs_embeds is usedr8  r   r   r9  rc   r;  )	TypeError_prepare_question_positionsr   rI   r/  r  r9   r7   r@  rG   rA  rC  maxr   r<   r+   r   rQ  r   r   )r;   rA   r`   rB   r"   rC   r3  r4  r5  rn   rF  r  r   sequence_lengthrd   r(  r)  num_questions attention_mask_for_each_questionrG  rI  rJ  rK  s                          r>   rO   SplinterForPreTraining.forward\  s=   j %/*E-Jcabb'I,=]^^'!%!A!A)!L--
))%'
 
 "!*+:+?+?+A(
S#'#5#5o#Z j*//2%/=/G/G/J/Q/QM?0, (1/O+OSXS^S^_k_q_qSrSvSv*vvL#q+K'Ku{{[e[k[kOlOpOp&ppJ
&=+D""1c!_q-@&AB  C?Q+>$?@ (T[[5M5MNH!!!*}"<oN$$Z-%?@J  
] :OL"":#=>H %x/14J+%!!//))
 	
r@   c                 6   [         R                  " XR                  R                  :H  5      u  p#[         R                  " U5      n[         R
                  " UR                  S5      UR                  5       4U R                  R                  [         R                  UR                  S9n[        UR                  S5      UR                  S5      :H  S5        [         R                  " U Vs/ s H  n[         R                  " U5      PM     sn5      nX5X'4'   U$ s  snf )Nr   rF   z?All samples in the batch must have at least one question token.)r7   wherer<   r0  bincountfullrI   rY  r+   rK   rH   r   catr8   )r;   rA   rowsflat_positionsr[  r"  ncolss           r>   rX  2SplinterForPreTraining._prepare_question_positions  s    ${{98U8U+UVt,JJ^^A 1 1 34KK$$**##	
	 	q!Y^^A%66M	
 yy=A=a%,,q/=AB .$* Bs   ( DrM  rN  )rQ   rR   rS   rT   r'   r   r   r7   r   rV   r   r   rX   rQ  rO   rX  rY   rZ   r[   s   @r>   rS  rS  J  s'     *..2.2,0-137156:n
<<$&n
 t+n
 t+	n

 llT)n
 ||d*n
 ))D0n
 ''$.n
 ",,t3n
 +,n
 
-	-n
  n
`U\\ ell  r@   rS  )r,  rS  r   r   r   )r   )>rU   collections.abcr   dataclassesr   r7   r   torch.nnr    r   r   activationsr	   modeling_layersr
   modeling_outputsr   r   r   modeling_utilsr   r   processing_utilsr   pytorch_utilsr   utilsr   r   r   r   r   utils.genericr   utils.output_capturingr   configuration_splinterr   
get_loggerrQ   loggerModuler   r   floatrq   rs   r   r   r   r   r   r   r   r   r  r  r,  rQ  rS  __all__r   r@   r>   <module>r{     s    $ !   % & ! 9 Z Z F & 6 j j 7 5 2 
		H	%/ /t %II%<<% 
% <<	%
 LL4'% % %.3)BII 3)n 		 .299  RYY . D
bii 
2 io i i W
+ W
 W
t")) $#(RYY #(L d
#: d
 d
N 
 7; 7 7" L4 LL^r@   