
    Z jQ                     4    S SK r SSKJr   " S S\5      rS/rg)    N   )TokenizersBackendc                   `   ^  \ rS rSrSr   SS\\\   -  S\S\S-  S\S\4
U 4S	 jjjr	S
r
U =r$ )ParakeetTokenizer   a  
Inherits all methods from [`PreTrainedTokenizerFast`]. Users should refer to this superclass for more information regarding those methods,
except for `_decode` which is overridden to adapt it to CTC decoding:
1. Group consecutive tokens
2. Filter out the blank token
N	token_idsskip_special_tokensclean_up_tokenization_spacesgroup_tokensreturnc                   > [        U[        5      (       a  U/nU(       a(  [        R                  " U5       Vs/ s H  ofS   PM	     nnU Vs/ s H  owU R                  :w  d  M  UPM     nn[
        TU ]  " SUUUS.UD6$ s  snf s  snf )Nr   )r   r	   r
    )
isinstanceint	itertoolsgroupbypad_token_idsuper_decode)	selfr   r	   r
   r   kwargstoken_grouptoken	__class__s	           ڃ/root/GenerationalWealth/GenerationalWealth/venv/lib/python3.13/site-packages/transformers/models/parakeet/tokenization_parakeet.pyr   ParakeetTokenizer._decode   s     i%%"I;D;L;LY;WX;WKQ;WIX )2P	ud>O>O5OU		Pw 
 3)E
 	
 	
 Y Qs   B B$Br   )FNT)__name__
__module____qualname____firstlineno____doc__r   listboolstrr   __static_attributes____classcell__)r   s   @r   r   r      sY     %*48!
c?
 "
 '+Tk	

 
 

 
    r   )r   tokenization_utils_tokenizersr   r   __all__r   r'   r   <module>r*      s%     >
) 
@ 
r'   