
    Z jC                         S SK r S SKrS SKrSSKJr  SSKJrJrJ	r	  SSK
JrJrJrJrJr  \" 5       (       a  S SKrSSKJrJr   " S S	\5      r\" \" S
S95       " S S\5      5       rg)    N   )GenerationConfig)add_end_docstringsis_torch_availablerequires_backends   )ArgumentHandlerDatasetPipelinePipelineExceptionbuild_pipeline_init_args),MODEL_FOR_SEQ_TO_SEQ_CAUSAL_LM_MAPPING_NAMES0MODEL_FOR_TABLE_QUESTION_ANSWERING_MAPPING_NAMESc                   "    \ rS rSrSrSS jrSrg)%TableQuestionAnsweringArgumentHandler   z:
Handles arguments for the TableQuestionAnsweringPipeline
Nc                 T   [        U S5        SS KnUc  [        S5      eUGc/  [        U[        5      (       a(  UR                  S5      b  UR                  S5      b  U/nO[        U[        5      (       a  [        U5      S:  ay  [        S U 5       5      (       d  [        SS U 5        35      eUS   R                  S5      b  US   R                  S5      b  UnOz[        S	US   R                  5        S
35      e[        b  [        U[        5      (       d  [        U[        R                  5      (       a  U$ [        S[        U5       S35      eXS./nU HK  n[        US   UR                  5      (       a  M#  US   c  [        S5      eUR                  US   5      US'   MM     U$ )Npandasr   z(Keyword argument `table` cannot be None.querytablec              3   B   #    U  H  n[        U[        5      v   M     g 7fN)
isinstancedict.0ds     ڀ/root/GenerationalWealth/GenerationalWealth/venv/lib/python3.13/site-packages/transformers/pipelines/table_question_answering.py	<genexpr>ATableQuestionAnsweringArgumentHandler.__call__.<locals>.<genexpr>-   s     >1:a..s   z:Keyword argument `table` should be a list of dict, but is c              3   8   #    U  H  n[        U5      v   M     g 7fr   )typer   s     r   r   r    /   s     UmglbcVZ[\V]V]gls   zIf keyword argument `table` is a list of dictionaries, each dictionary should have a `table` and `query` key, but only dictionary has keys z `table` and `query` keys.zZInvalid input. Keyword argument `table` should be either of type `dict` or `list`, but is ))r   r   zTable cannot be None.)r   r   
ValueErrorr   r   getlistlenallkeysr
   typesGeneratorTyper"   	DataFrame)selfr   r   kwargspdtqa_pipeline_inputstqa_pipeline_inputs          r   __call__.TableQuestionAnsweringArgumentHandler.__call__   s    	$)=GHH]%&&599W+=+IeiiX_N`Nl',g#E4((SZ!^>>>>$TUmglUmTno  8<<(4qg9N9Z*/'$JJOPQ(--/IZZtv  $E7)C)CzRWY^YlYlGmGm u+a) 
 .3#C"D"5092<<HH%g.6$%<==.0ll;Mg;V.W"7+ #6 #"     )NN)__name__
__module____qualname____firstlineno____doc__r2   __static_attributes__r5   r4   r   r   r      s    -#r4   r   T)has_tokenizerc                      ^  \ rS rSrSrSrSrSrSrSr	Sr
\" SS9r\" 5       4U 4S jjrS	 rS
 rU 4S jrSS jrSS jrSS jrS rSrU =r$ )TableQuestionAnsweringPipelineM   ah  
Table Question Answering pipeline using a `ModelForTableQuestionAnswering`. This pipeline is only available in
PyTorch.

Unless the model you're using explicitly sets these generation parameters in its configuration files
(`generation_config.json`), the following default values will be used:
- max_new_tokens: 256

Example:

```python
>>> from transformers import pipeline

>>> oracle = pipeline(model="google/tapas-base-finetuned-wtq")
>>> table = {
...     "Repository": ["Transformers", "Datasets", "Tokenizers"],
...     "Stars": ["36542", "4512", "3934"],
...     "Contributors": ["651", "77", "34"],
...     "Programming language": ["Python", "Python", "Rust, Python and NodeJS"],
... }
>>> oracle(query="How many stars does the transformers repository have?", table=table)
{'answer': 'AVERAGE > 36542', 'coordinates': [(0, 1)], 'cells': ['36542'], 'aggregator': 'AVERAGE'}
```

Learn more about the basics of using a pipeline in the [pipeline tutorial](../pipeline_tutorial)

This tabular question answering pipeline can currently be loaded from [`pipeline`] using the following task
identifier: `"table-question-answering"`.

The models that this pipeline can use are models that have been fine-tuned on a tabular question answering task.
See the up-to-date list of available models on
[huggingface.co/models](https://huggingface.co/models?filter=table-question-answering).
ztable,queryTF   )max_new_tokensc                   > [         TU ]  " S0 UD6  Xl        [        R                  " 5       nUR                  [        5        U R                  U5        [        U R                  R                  SS 5      =(       a!    [        U R                  R                  SS 5      U l        [        U R                  R                  S5      (       a  SU l        g S U l        g )Naggregation_labelsnum_aggregation_labelstapasr5   )super__init___args_parserr   copyupdater   check_model_typegetattrmodelconfig	aggregatehasattrr"   )r-   args_parserr.   mapping	__class__s       r   rG   'TableQuestionAnsweringPipeline.__init__}   s    "6"'BGGICDg& !2!24H$O 
T[JJ7U
  'tzz'8'8:NOOG	UY	r4   c                 &    U R                   " S0 UD6$ )Nr5   )rM   )r-   inputss     r   batch_inference.TableQuestionAnsweringPipeline.batch_inference   s    zz#F##r4   c           	      :   / n/ nSnUS   R                   S   nUS   R                  U R                  5      nUS   R                  U R                  5      nUS   R                  U R                  5      nSn	[        U5       GH=  n
UGb.  U	SS2S4   n[        R
                  " UR                  5       R                  5       5      nX   n	[        UR                   S   5       H  nU	SS2S4   R                  5       U   nU	SS2S4   R                  5       U   S-
  nU	SS2S4   R                  5       U   S-
  nUS:  d  M_  US:  d  Mg  US:X  d  Mo  [        XOU4   5      X'   M     [        R                  " U5      R                  [        R                  5      R                  U R                  5      U	SS2S4'   Xj   nXz   nX   n	U R                  UR                  S5      UR                  S5      U	R                  S5      S	9nUR                   nU R"                  (       a  UR%                  UR&                  5        UR%                  U5        [        R(                  R+                  US
9nUR,                  UR                  [        R.                  5      R                  UR,                  R                  5      -  n[0        R2                  " [4        5      n[7        UR9                  5       R                  5       5       H  u  nnU	SS2S4   R                  5       U   nU	SS2S4   R                  5       U   S-
  nU	SS2S4   R                  5       U   S-
  nUS:  d  Mb  US:  d  Mj  US:X  d  Mr  UUU4   R%                  U5        M     U Vs0 s H.  nU[        R:                  " UU   5      R=                  5       S:  _M0     nnGM@     [        R>                  " [A        U5      S5      nU R"                  (       d  U4$ U[        R>                  " [A        U5      S5      4$ s  snf )z
Inference used for models that need to process sequences in a sequential fashion, like the SQA models which
handle conversational query related to a table.
N	input_idsr   attention_masktoken_type_ids   r   r   )rZ   r[   r\   )logitsg      ?)!shapetodevicerangenp
zeros_likecpunumpytolistinttorch
from_numpyr"   longrM   	unsqueezer^   rO   appendlogits_aggregationdistributions	Bernoulliprobsfloat32collectionsdefaultdictr&   	enumeratesqueezearraymeancattuple)r-   rV   
all_logitsall_aggregationsprev_answers
batch_sizerZ   r[   r\   token_type_ids_exampleindexprev_labels_examplemodel_labelsi
segment_idcol_idrow_idinput_ids_exampleattention_mask_exampleoutputsr^   dist_per_tokenprobabilitiescoords_to_probspcolrowkeylogits_batchs                                r   sequential_inference3TableQuestionAnsweringPipeline.sequential_inference   s   
 
K(..q1
;'**4;;7	 0144T[[A 0144T[[A!%:&E '&<QT&B#!}}-@-D-D-F-L-L-NO)7)>&|11!45A!71!=!D!D!Fq!IJ3AqD9@@B1EIF3AqD9@@B1EIF{v{zQ*-lF;K.L*M 6 05/?/?/M/R/RSXS]S]/^/a/abfbmbm/n&q!t, ) 0%3%:"%3%:"jj+55a85??B5??B ! G
 ^^F~~ ''(B(BCf%"00::&:IN*003I3N3Nu}}3]3`3`$$++4 M *55d;O!-"7"7"9"@"@"BC13AqD9@@B1E
,QT299;A>B,QT299;A>B!8qZ1_#S#J/66q9 D YhhXgQTC/#*>!?!D!D!F!LLXgLhL] '` yyz!2A6&*nno<SXYiSjlmIn:oo	 is   5Pc                 v   > U R                   " U0 UD6n[        TU ]  " U40 UD6n[        U5      S:X  a  US   $ U$ )a2  
Answers queries according to a table. The pipeline accepts several types of inputs which are detailed below:

- `pipeline(table, query)`
- `pipeline(table, [query])`
- `pipeline(table=table, query=query)`
- `pipeline(table=table, query=[query])`
- `pipeline({"table": table, "query": query})`
- `pipeline({"table": table, "query": [query]})`
- `pipeline([{"table": table, "query": query}, {"table": table, "query": query}])`

The `table` argument should be a dict or a DataFrame built from that dict, containing the whole table:

Example:

```python
data = {
    "actors": ["brad pitt", "leonardo di caprio", "george clooney"],
    "age": ["56", "45", "59"],
    "number of movies": ["87", "53", "69"],
    "date of birth": ["7 february 1967", "10 june 1996", "28 november 1967"],
}
```

This dictionary can be passed in as such, or can be converted to a pandas DataFrame:

Example:

```python
import pandas as pd

table = pd.DataFrame.from_dict(data)
```

Args:
    table (`pd.DataFrame` or `Dict`):
        Pandas DataFrame or dictionary that will be converted to a DataFrame containing all the table values.
        See above for an example of dictionary.
    query (`str` or `list[str]`):
        Query or list of queries that will be sent to the model alongside the table.
    sequential (`bool`, *optional*, defaults to `False`):
        Whether to do inference sequentially or as a batch. Batching is faster, but models like SQA require the
        inference to be done sequentially to extract relations within sequences, given their conversational
        nature.
    padding (`bool`, `str` or [`~utils.PaddingStrategy`], *optional*, defaults to `False`):
        Activates and controls padding. Accepts the following values:

        - `True` or `'longest'`: Pad to the longest sequence in the batch (or no padding if only a single
          sequence if provided).
        - `'max_length'`: Pad to a maximum length specified with the argument `max_length` or to the maximum
          acceptable input length for the model if that argument is not provided.
        - `False` or `'do_not_pad'` (default): No padding (i.e., can output a batch with sequences of different
          lengths).

    truncation (`bool`, `str` or [`TapasTruncationStrategy`], *optional*, defaults to `False`):
        Activates and controls truncation. Accepts the following values:

        - `True` or `'drop_rows_to_fit'`: Truncate to a maximum length specified with the argument `max_length`
          or to the maximum acceptable input length for the model if that argument is not provided. This will
          truncate row by row, removing rows from the table.
        - `False` or `'do_not_truncate'` (default): No truncation (i.e., can output batch with sequence lengths
          greater than the model maximum admissible input size).


Return:
    A dictionary or a list of dictionaries containing results: Each result is a dictionary with the following
    keys:

    - **answer** (`str`) -- The answer of the query given the table. If there is an aggregator, the answer will
      be preceded by `AGGREGATOR >`.
    - **coordinates** (`list[tuple[int, int]]`) -- Coordinates of the cells of the answers.
    - **cells** (`list[str]`) -- List of strings made up of the answer cell values.
    - **aggregator** (`str`) -- If the model has an aggregator, this returns the aggregator.
r   r   )rH   rF   r2   r'   )r-   argsr.   pipeline_inputsresultsrS   s        r   r2   'TableQuestionAnsweringPipeline.__call__   sJ    V ++T<V<'"?=f=w<11:r4   c                     0 nUb  X%S'   Ub  X5S'   0 nUb  XS'   [        U SS 5      b  U R                  US'   [        U SS 5      b  U R                  US'   U R                  US'   XV0 4$ )Npadding
truncation
sequentialassistant_modelassistant_tokenizer	tokenizer)rL   r   r   r   )r-   r   r   r   r.   preprocess_paramsforward_paramss          r   _sanitize_parameters3TableQuestionAnsweringPipeline._sanitize_parameters"  s    +2i(!.8l+!+5<(4*D1=040D0DN,-4.5A*...N;'484L4LN01 "44r4   c                     Uc  U R                   S:X  a  SnOSnUS   US   pTUR                  (       a  [        S5      eUb  US:X  a  [        S5      eU R                  XES	X2S
9nXFS'   U$ )NrE   drop_rows_to_fitdo_not_truncater   r   ztable is empty zquery is emptypt)return_tensorsr   r   )r"   emptyr$   r   )r-   pipeline_inputr   r   r   r   rV   s          r   
preprocess)TableQuestionAnsweringPipeline.preprocess5  sy    yyG#/
.
%g.w0Gu;;-..=ERK-..Tjjwr4   c                    UR                  S5      nU R                  S:X  a-  U(       a  U R                  " S0 UD6nOFU R                  " S0 UD6nO3SU;  a  U R                  US'   U R
                  R                  " S0 UDUD6nXUS.nU$ )Nr   rE   generation_config)model_inputsr   r   r5   )popr"   r   rW   r   rM   generate)r-   r   r   generate_kwargsr   r   model_outputss          r   _forward'TableQuestionAnsweringPipeline._forwardE  s      )9933ClC..>> #/97;7M7M 34jj))LLLOLG)5RYZr4   c                    US   nUS   nUS   nU R                   S:X  Ga  U R                  (       a  US S u  pVU R                  R                  X%U5      nUu  p[	        U	5       V
Vs0 s H(  u  pXR
                  R                  R                  U   _M*     nn
nU R
                  R                  R                  n[	        U	5       V
Vs0 s H  u  pX:w  d  M  XU
   S-   _M     nn
nO)US   nU R                  R                  X%5      nUS   n0 n0 n/ n[	        U5       H  u  nnU Vs/ s H  nUR                  U   PM     nnUR                  US5      nUR                  US5      nUS	R                  U5      -   UU Vs/ s H  nUR                  U   PM     snS
.nU(       a  UUS'   UR                  U5        M     [        W5      S:X  a!  [        SU R
                  R                  S5      eO,U R                  R!                  USS9 Vs/ s H  nSU0PM	     nn[        U5      S:  a  U$ US   $ s  snn
f s  snn
f s  snf s  snf s  snf )Nr   r   r   rE   r   z > r   r   z, )answercoordinatescells
aggregatorzTable question answeringzEmpty answerT)skip_special_tokensr   r   )r"   rO   r   convert_logits_to_predictionsru   rM   rN   rC   no_aggregation_label_indexiatr%   joinrm   r'   r   name_or_pathbatch_decode)r-   r   rV   r   r   r^   
logits_aggpredictionsanswer_coordinates_batchagg_predictionsr   predaggregatorsno_agg_label_indexaggregators_prefixanswersr   r   
coordinater   r   aggregator_prefixr   s                          r   postprocess*TableQuestionAnsweringPipeline.postprocessV  sc   ~.g&	*99~~%,Ra[""nnJJ6[ef<G9(\efu\vw\vQXQRq**"3"3"F"Ft"LL\vw%)ZZ%6%6%Q%Q"=F=W&=W'![_[u-A1~--=W # &" !"nnJJ6Z+6q>( %'"G&/0H&I"{ALM::.M(__UB7
$6$:$:5"$E!/$))E2BB#.FQRk
eii
3kR
 +5F<(v& 'J 6{a'(BDJJD[D[]kll   9=8S8ST[qu8S8vw8vf&)8vGwg,*w:
:? x& N S xs$   '/H/H5H5&H;=I 
	I)rH   rO   r"   )NNN)TN)F)r6   r7   r8   r9   r:   default_input_names_pipeline_calls_generate_load_processor_load_image_processor_load_feature_extractor_load_tokenizerr   _default_generation_configr   rG   rW   r   r2   r   r   r   r   r;   __classcell__)rS   s   @r   r>   r>   M   sw     D (#O!#O!1" $I#J Z$ApFPd5& "(; (;r4   r>   )rs   r*   rf   rc   
generationr   utilsr   r   r   baser	   r
   r   r   r   ri   models.auto.modeling_autor   r   r   r>   r5   r4   r   <module>r      sn       ) 
 b a 2#O 2#j ,4@Ap;X p; Bp;r4   