
    Z j2                         S SK r S SKrS SKrS SKJr  SSKJrJr  \R                  " \5      r	\ " S S5      5       r
\" SS9 " S	 S
5      5       r " S S5      r " S S\5      rg)    N)	dataclass   )is_torch_availableloggingc                   \    \ rS rSr% Sr\\S'   \\S'   Sr\S-  \S'   Sr\S-  \S'   S r	S	r
g)
InputExample   a  
A single training/test example for simple sequence classification.

Args:
    guid: Unique id for the example.
    text_a: string. The untokenized text of the first sequence. For single
        sequence tasks, only this sequence must be specified.
    text_b: (Optional) string. The untokenized text of the second sequence.
        Only must be specified for sequence pair tasks.
    label: (Optional) string. The label of the example. This should be
        specified for train and dev examples, but not for test examples.
guidtext_aNtext_blabelc                 Z    [         R                  " [        R                  " U 5      SS9S-   $ )*Serializes this instance to a JSON string.   )indent
jsondumpsdataclassesasdictselfs    s/root/GenerationalWealth/GenerationalWealth/venv/lib/python3.13/site-packages/transformers/data/processors/utils.pyto_json_stringInputExample.to_json_string/   s#    zz+,,T21=DD     )__name__
__module____qualname____firstlineno____doc__str__annotations__r   r   r   __static_attributes__r   r   r   r   r      s5     IKFC$JE3:Er   r   T)frozenc                   ~    \ rS rSr% Sr\\   \S'   Sr\\   S-  \S'   Sr	\\   S-  \S'   Sr
\\-  S-  \S'   S rS	rg)
InputFeatures4   a  
A single set of features of data. Property names are the same names as the corresponding inputs to a model.

Args:
    input_ids: Indices of input sequence tokens in the vocabulary.
    attention_mask: Mask to avoid performing attention on padding token indices.
        Mask values selected in `[0, 1]`: Usually `1` for tokens that are NOT MASKED, `0` for MASKED (padded)
        tokens.
    token_type_ids: (Optional) Segment token indices to indicate first and second
        portions of the inputs. Only some models use them.
    label: (Optional) Label corresponding to the input. Int for classification problems,
        float for regression problems.
	input_idsNattention_masktoken_type_idsr   c                 \    [         R                  " [        R                  " U 5      5      S-   $ )r   r   r   r   s    r   r   InputFeatures.to_json_stringI   s!    zz+,,T23d::r   r   )r   r    r!   r"   r#   listintr%   r,   r-   r   floatr   r&   r   r   r   r)   r)   4   sQ     Cy'+NDI$+'+NDI$+ $E3;$;r   r)   c                   P    \ rS rSrSrS rS rS rS rS r	S r
\SS
 j5       rSrg	)DataProcessorN   zEBase class for data converters for sequence classification data sets.c                     [        5       e)z
Gets an example from a dict.

Args:
    tensor_dict: Keys and values should match the corresponding Glue
        tensorflow_dataset examples.
NotImplementedError)r   tensor_dicts     r   get_example_from_tensor_dict*DataProcessor.get_example_from_tensor_dictQ   s     "##r   c                     [        5       e)z8Gets a collection of [`InputExample`] for the train set.r7   r   data_dirs     r   get_train_examples DataProcessor.get_train_examples[       !##r   c                     [        5       e)z6Gets a collection of [`InputExample`] for the dev set.r7   r=   s     r   get_dev_examplesDataProcessor.get_dev_examples_   rA   r   c                     [        5       e)z7Gets a collection of [`InputExample`] for the test set.r7   r=   s     r   get_test_examplesDataProcessor.get_test_examplesc   rA   r   c                     [        5       e)z*Gets the list of labels for this data set.r7   r   s    r   
get_labelsDataProcessor.get_labelsg   rA   r   c                     [        U R                  5       5      S:  a+  U R                  5       [        UR                  5         Ul        U$ )z
Some tensorflow_datasets datasets are not formatted the same way the GLUE datasets are. This method converts
examples to the correct format.
   )lenrI   r1   r   )r   examples     r   tfds_mapDataProcessor.tfds_mapk   s9    
 t !A% OO-c'--.@AGMr   Nc           	          [        USSS9 n[        [        R                  " USUS95      sSSS5        $ ! , (       d  f       g= f)z!Reads a tab separated value file.rz	utf-8-sig)encoding	)	delimiter	quotecharN)openr0   csvreader)cls
input_filerV   fs       r   	_read_tsvDataProcessor._read_tsvt   s3     *cK8A

1	JK 988s	   5
Ar   N)r   r    r!   r"   r#   r:   r?   rC   rF   rI   rO   classmethodr]   r&   r   r   r   r4   r4   N   s9    O$$$$$ L Lr   r4   c                       \ rS rSrSrSS jrS rS r\ SS j5       r	\SS j5       r
       SS	 jr SS
 jr     SS jrSrg)%SingleSentenceClassificationProcessor{   z@Generic processor for a single sentence classification data set.Nc                 L    Uc  / OUU l         Uc  / OUU l        X0l        X@l        g r_   )labelsexamplesmodeverbose)r   re   rf   rg   rh   s        r   __init__.SingleSentenceClassificationProcessor.__init__~   s'    "Nb&.H	r   c                 ,    [        U R                  5      $ r_   )rM   rf   r   s    r   __len__-SingleSentenceClassificationProcessor.__len__   s    4==!!r   c                     [        U[        5      (       a!  [        U R                  U R                  U   S9$ U R                  U   $ )N)re   rf   )
isinstanceslicerb   re   rf   )r   idxs     r   __getitem__1SingleSentenceClassificationProcessor.__getitem__   s<    c5!!8VZVcVcdgVhii}}S!!r   c                 B    U " S0 UD6nUR                  UUUUUUSSS9  U$ )NT)
split_namecolumn_labelcolumn_text	column_idskip_first_rowoverwrite_labelsoverwrite_examplesr   )add_examples_from_csv)	rZ   	file_nameru   rv   rw   rx   ry   kwargs	processors	            r   create_from_csv5SingleSentenceClassificationProcessor.create_from_csv   sB     M&M	''!%#)!# 	( 		
 r   c                 4    U " S0 UD6nUR                  XS9  U$ )N)re   r   )add_examples)rZ   texts_or_text_and_labelsre   r~   r   s        r   create_from_examples:SingleSentenceClassificationProcessor.create_from_examples   s%    M&M	7Gr   c	                 b   U R                  U5      n	U(       a  U	SS  n	/ n
/ n/ n[        U	5       Hm  u  pU
R                  X   5        UR                  X   5        Ub  UR                  X   5        MC  U(       a  U SU 3O
[        U5      nUR                  U5        Mo     U R	                  XXUS9$ )NrL   -)rz   r{   )r]   	enumerateappendr$   r   )r   r}   ru   rv   rw   rx   ry   rz   r{   linestextsre   idsiliner
   s                   r   r|   ;SingleSentenceClassificationProcessor.add_examples_from_csv   s     y)!"IE 'GALL*+MM$,-$

4?+.8*Qqc*c!f

4  (   3Vh ! 
 	
r   c           
      :   Ub;  [        U5      [        U5      :w  a#  [        S[        U5       S[        U5       35      eUb;  [        U5      [        U5      :w  a#  [        S[        U5       S[        U5       35      eUc  S /[        U5      -  nUc  S /[        U5      -  n/ n[        5       n[        XU5       HV  u  pn
[	        U[
        [        45      (       a  U	c  Uu  pOUnUR                  U	5        UR                  [        XS U	S95        MX     U(       a  X`l
        OU R                  R                  U5        U(       a  [        U5      U l        U R                  $ [        [        U R                  5      R                  U5      5      U l        U R                  $ )Nz(Text and labels have mismatched lengths z and z%Text and ids have mismatched lengths )r
   r   r   r   )rM   
ValueErrorsetzipro   tupler0   addr   r   rf   extendre   union)r   r   re   r   rz   r{   rf   added_labelstext_or_text_and_labelr   r
   texts               r   r   2SingleSentenceClassificationProcessor.add_examples   s    #&>"?3v;"N:3?W;X:YY^_bci_j^kl  ?s#;<CHDSIaEbDcchilmpiqhrstt;&3788C>Vc":;;Fu367OY\3]/"405$-@@U]4e-U#OOLdTYZ[ 4^ $MMM  * |,DK }} s4;;/55lCDDK}}r   c                 ^	   Uc  UR                   n[        U R                  5       VVs0 s H  u  pxX_M	     n	nn/ n
[        U R                  5       He  u  pUS-  S:X  a  [        R                  SU 35        UR                  UR                  S[        X!R                   5      S9nU
R                  U5        Mg     [        S U
 5       5      n/ n[        [        XR                  5      5       GH_  u  nu  pUS-  S:X  a.  [        R                  SU S	[        U R                  5       35        U(       a  S
OS/[        U5      -  nU[        U5      -
  nU(       a  U/U-  U-   nU(       a  SOS
/U-  U-   nOX/U-  -   nUU(       a  SOS
/U-  -   n[        U5      U:w  a  [        S[        U5       SU 35      e[        U5      U:w  a  [        S[        U5       SU 35      eU R                  S:X  a  XR                     nO;U R                  S:X  a  [!        UR                  5      nO[        U R                  5      eUS:  a  U R"                  (       a  [        R                  S5        [        R                  SUR$                   35        [        R                  SSR'                  U Vs/ s H  n[)        U5      PM     sn5       35        [        R                  SSR'                  U Vs/ s H  n[)        U5      PM     sn5       35        [        R                  SUR                   SU S35        UR                  [+        UUUS95        GMb     Uc  U$ US:X  Ga%  [-        5       (       d  [/        S5      eSSKnSSKJn  UR7                  U Vs/ s H  nUR8                  PM     snUR:                  S9n
UR7                  U Vs/ s H  nUR<                  PM     snUR:                  S9nU R                  S:X  a6  UR7                  U Vs/ s H  nUR                  PM     snUR:                  S9nOEU R                  S:X  a5  UR7                  U Vs/ s H  nUR                  PM     snUR                   S9nU" U
UW5      nU$ [        S5      es  snnf s  snf s  snf s  snf s  snf s  snf s  snf )au  
Convert examples in a list of `InputFeatures`

Args:
    tokenizer: Instance of a tokenizer that will tokenize the examples
    max_length: Maximum example length
    pad_on_left: If set to `True`, the examples will be padded on the left rather than on the right (default)
    pad_token: Padding token
    mask_padding_with_zero: If set to `True`, the attention mask will be filled by `1` for actual values
        and by `0` for padded values. If set to `False`, inverts it (`1` for padded values, `0` for actual
        values)

Returns:
    Will return a list of task-specific `InputFeatures` which can be fed to the model.

Ni'  r   zTokenizing example T)add_special_tokens
max_lengthc              3   8   #    U  H  n[        U5      v   M     g 7fr_   )rM   ).0r+   s     r   	<genexpr>ESingleSentenceClassificationProcessor.get_features.<locals>.<genexpr>  s     I=i3y>>=s   zWriting example /rL   zError with input length z vs classification
regression   z*** Example ***zguid: zinput_ids:  zattention_mask: zlabel: z (id = ))r+   r,   r   ptz8return_tensors set to 'pt' but PyTorch can't be imported)TensorDataset)dtypez)return_tensors should be `'pt'` or `None`)max_lenr   re   rf   loggerinfoencoder   minr   maxr   rM   r   rg   r   r2   rh   r
   joinr$   r)   r   RuntimeErrortorchtorch.utils.datar   tensorr+   longr,   )r   	tokenizerr   pad_on_left	pad_tokenmask_padding_with_zeroreturn_tensorsr   r   	label_mapall_input_idsex_indexrN   r+   batch_lengthfeaturesr,   padding_lengthxr   r   r\   all_attention_mask
all_labelsdatasets                            r   get_features2SingleSentenceClassificationProcessor.get_features   s/   2 "**J.7.DE.D(!UX.D	E!*4==!9H%1$1(<=!((#'z+<+<= ) I
   + ": I=II.7M==8Y.Z*H*y%1$.xj#dmm:L9MNO $:aqAC	NRN *C	N:N'[>9YF	(>1A"F"W[i!i%~)EF	!/9OAUV3WZh3h!i9~- #;C	N;K4P\~!^__>"l2 #;C<O;PPTUaTb!cddyy,,!--0l*gmm, ++!|-.fW\\N34k#((I3NIqCFI3N*O)PQR.sxx8XAQ8X/Y.Z[\ggmm_GE7!DEOOMIndijkG /[J !Ot#%''"#]^^6!LLx)Hx!!++x)HPUPZPZL[M!&.RAq/?/?.RZ_ZdZd!eyy,,"\\H*EHq177H*EUZZ\X
l*"\\H*EHq177H*EU[[\Y
#M3EzRGNHIIQ F` 4O8X *I.R*E*Es)   R'R(RRR R%R*)rf   re   rg   rh   )NNr   F) r   rL   NFr_   )r   r   rL   NFFF)NNFF)NFr   TN)r   r    r!   r"   r#   ri   rl   rr   r`   r   r   r|   r   r   r&   r   r   r   rb   rb   {   s    J""
 ej      
> kp#P #dJr   rb   )rX   r   r   r   utilsr   r   
get_loggerr   r   r   r)   r4   rb   r   r   r   <module>r      s        ! 0 
		H	% E E E0 $; ; ;2*L *LZOJM OJr   