
    Z jxy                        S SK Jr  S SKJr  S SKJr  S SKrS SKJr  SSKJ	r	  SSK
JrJr  SS	KJr  SS
KJr  SSKJrJr  SSKJrJr  SSKJrJr  SSKJr  SSKJrJrJrJ r   SSK!J"r"J#r#  SSK$J%r%  SSK&J'r'J(r(   " S S\RR                  5      r* " S S\RR                  5      r+S r,\" S5      S<S j5       r-S\R\                  S\/S\R\                  4S jr0 S=S\RR                  S \R\                  S!\R\                  S"\R\                  S#\R\                  S-  S$\1S%\1S&\\   4S' jjr2\" \-5       " S( S)\RR                  5      5       r3 " S* S+\RR                  5      r4 " S, S-\RR                  5      r5 " S. S/\5      r6\ " S0 S1\5      5       r7\" S2S39 " S4 S5\75      5       r8\ " S6 S7\5      5       r9\" S8S39 " S9 S:\75      5       r:/ S;Qr;g)>    )Callable)	dataclass)OptionalN)nn   )ACT2FN)use_kernel_func_from_hubuse_kernelized_func)create_bidirectional_mask)GradientCheckpointingLayer)BaseModelOutputCausalLMOutput)ROPE_INIT_FUNCTIONSdynamic_rope_update)ALL_ATTENTION_FUNCTIONSPreTrainedModel)Unpack)ModelOutputTransformersKwargsauto_docstringcan_return_tuple)maybe_autocastmerge_with_config_defaults)capture_outputs   )LasrCTCConfigLasrEncoderConfigc                   j   ^  \ rS rSrS\4U 4S jjrS\R                  S\R                  4S jrSr	U =r
$ )LasrEncoderSubsampling*   configc                 &  > [         TU ]  5         [        R                  " UR                  UR
                  5      U l        [        R                  " UR
                  UR
                  UR                  UR                  S9U l
        [        R                  " UR
                  UR                  UR                  UR                  S9U l        [        R                  " UR                  UR
                  5      U l        [        R                  " 5       U l        g )N)kernel_sizestride)super__init__r   Linearnum_mel_binshidden_sizedense_0Conv1dsubsampling_conv_kernel_sizesubsampling_conv_strideconv_0subsampling_conv_channelsconv_1dense_1ReLUact_fnselfr!   	__class__s     w/root/GenerationalWealth/GenerationalWealth/venv/lib/python3.13/site-packages/transformers/models/lasr/modeling_lasr.pyr&   LasrEncoderSubsampling.__init__+   s    yy!4!4f6H6HIii;;11	
 ii,,;;11	
 yy!A!A6CUCUVggi    input_featuresreturnc                 ,   U R                  U R                  U5      5      nUR                  SS5      nU R                  U R                  U5      5      nU R                  U R	                  U5      5      nUR                  SS5      nU R                  U5      $ )Nr      )r3   r*   	transposer.   r0   r1   )r5   r:   hidden_statess      r7   forwardLasrEncoderSubsampling.forward=   sz    DLL$@A%//15DKK$>?DKK$>?%//15||M**r9   )r3   r.   r0   r*   r1   )__name__
__module____qualname____firstlineno__r   r&   torchTensorr@   __static_attributes____classcell__r6   s   @r7   r   r   *   s0     0  $+ell +u|| + +r9   r   c                      ^  \ rS rSr% \R
                  \S'   SS\4U 4S jjjr\	   SS\S-  S\
S   S\S-  S	\S
\4   4S jj5       r\R                  " 5       \S 5       5       rSrU =r$ )LasrEncoderRotaryEmbeddingF   inv_freqNr!   c                   > [         TU ]  5         UR                  U l        UR                  U l        Xl        U R
                  R                  S   U l        U R                  nU R                  S:w  a  [        U R                     nU" U R
                  U5      u  o@l
        U R                  SUSS9  U R                  SUR                  5       SS9  g )N	rope_typedefaultrN   F)
persistentoriginal_inv_freq)r%   r&   max_position_embeddingsmax_seq_len_cachedoriginal_max_seq_lenr!   rope_parametersrP   compute_default_rope_parametersr   attention_scalingregister_bufferclone)r5   r!   devicerope_init_fnrN   r6   s        r7   r&   #LasrEncoderRotaryEmbedding.__init__I   s    "("@"@$*$B$B!44[A!%!E!E>>Y&.t~~>L+7V+L((ZeD0(..2BuUr9   r\   ztorch.deviceseq_lenr;   ztorch.Tensorc           	         U R                   S   n[        U SS5      =(       d    U R                  U R                  -  nSnSU[        R
                  " SUS[        R                  S9R                  U[        R                  S9U-  -  -  nXe4$ )	aH  
Computes the inverse frequencies according to the original RoPE implementation
Args:
    config ([`~transformers.PreTrainedConfig`]):
        The model configuration.
    device (`torch.device`):
        The device to use for initialization of the inverse frequencies.
    seq_len (`int`, *optional*):
        The current sequence length. Unused for this type of RoPE.
Returns:
    Tuple of (`torch.Tensor`, `float`), containing the inverse frequencies for the RoPE embeddings and the
    post-processing scaling factor applied to the computed cos/sin (unused in this type of RoPE).

rope_thetahead_dimNg      ?r   r=   dtype)r\   rd   )	rW   getattrr)   num_attention_headsrF   arangeint64tofloat)r!   r\   r_   basedimattention_factorrN   s          r7   rX   :LasrEncoderRotaryEmbedding.compute_default_rope_parametersY   s    & %%l3fj$/c63E3EIcIc3c U\\!S!5;;?BB&X]XcXcBdgjjk
 ))r9   c                 L   U R                   S S S 2S 4   R                  5       R                  UR                  S   SS5      R	                  UR
                  5      nUS S 2S S S 24   R                  5       n[        UR
                  R                  [        5      (       a0  UR
                  R                  S:w  a  UR
                  R                  OSn[        USS9   UR                  5       UR                  5       -  R                  SS5      n[        R                  " Xf4SS	9nUR                  5       U R                  -  nUR                  5       U R                  -  n	S S S 5        WR	                  UR                   S
9W	R	                  UR                   S
94$ ! , (       d  f       N@= f)Nr   r   mpscpuF)device_typeenabledr=   rl   rc   )rN   rj   expandshaperi   r\   
isinstancetypestrr   r>   rF   catcosrY   sinrd   )
r5   xposition_idsinv_freq_expandedposition_ids_expandedrs   freqsembr|   r}   s
             r7   r@   "LasrEncoderRotaryEmbedding.forwardw   sN    !MM$4-8>>@GGHZHZ[\H]_acdehhijiqiqr ,QaZ 8 > > @'1!((--'E'E!((--[`J`ahhmmfkUC&,,.1F1L1L1NNYYZ[]^_E))UN3C'')d444C'')d444C	 D vvAGGv$cff177f&;;; DCs   BF
F#)rY   r!   rU   rV   rP   N)NNN)rB   rC   rD   rE   rF   rG   __annotations__r   r&   staticmethodr   inttuplerj   rX   no_gradr   r@   rH   rI   rJ   s   @r7   rL   rL   F   s    llV0 V V  +/+/"*!D(*(* t* 
~u$	%	* *: ]]_<  <r9   rL   c                     U SSU R                   S   S-  24   nU SU R                   S   S-  S24   n[        R                  " U* U4SS9$ )z*Rotates half the hidden dims of the input..Nrp   r=   ru   )rw   rF   r{   )r~   x1x2s      r7   rotate_halfr      sZ    	
3"!''"+"""	#B	
3q ""	#B99rc2YB''r9   rotary_pos_embc                     UR                  U5      nUR                  U5      nX-  [        U 5      U-  -   nX-  [        U5      U-  -   nXV4$ )aI  Applies Rotary Position Embedding to the query and key tensors.

Args:
    q (`torch.Tensor`): The query tensor.
    k (`torch.Tensor`): The key tensor.
    cos (`torch.Tensor`): The cosine part of the rotary embedding.
    sin (`torch.Tensor`): The sine part of the rotary embedding.
    unsqueeze_dim (`int`, *optional*, defaults to 1):
        The 'unsqueeze_dim' argument specifies the dimension along which to unsqueeze cos[position_ids] and
        sin[position_ids] so that they can be properly broadcasted to the dimensions of q and k. For example, note
        that cos[position_ids] and sin[position_ids] have the shape [batch_size, seq_len, head_dim]. Then, if q and
        k have the shape [batch_size, heads, seq_len, head_dim], then setting unsqueeze_dim=1 makes
        cos[position_ids] and sin[position_ids] broadcastable to the shapes of q and k. Similarly, if q and k have
        the shape [batch_size, seq_len, heads, head_dim], then set unsqueeze_dim=2.
Returns:
    `tuple(torch.Tensor)` comprising of the query and key tensors rotated using the Rotary Position Embedding.
)	unsqueezer   )qkr|   r}   unsqueeze_dimq_embedk_embeds          r7   apply_rotary_pos_embr      sS    & --
&C
--
&Cw;q>C/0Gw;q>C/0Gr9   r?   n_repr;   c                     U R                   u  p#pEUS:X  a  U $ U SS2SS2SSS2SS24   R                  X#XU5      n U R                  X#U-  XE5      $ )z
This is the equivalent of torch.repeat_interleave(x, dim=1, repeats=n_rep). The hidden states go from (batch,
num_key_value_heads, seqlen, head_dim) to (batch, num_attention_heads, seqlen, head_dim)
r   N)rw   rv   reshape)r?   r   batchnum_key_value_headsslenrb   s         r7   	repeat_kvr      s_    
 2?1D1D.Ez!!Qa"23::5W\dlmM  e(CTTTr9   modulequerykeyvalueattention_maskscalingdropoutkwargsc                    [        X R                  5      n[        X0R                  5      n	[        R                  " XR	                  SS5      5      U-  n
Ub  X-   n
[
        R                  R                  U
S[        R                  S9R                  UR                  5      n
[
        R                  R                  XU R                  S9n
[        R                  " X5      nUR	                  SS5      R                  5       nX4$ )Nr=   r   rp   rl   rd   ptrainingr   )r   num_key_value_groupsrF   matmulr>   r   
functionalsoftmaxfloat32ri   rd   r   r   
contiguous)r   r   r   r   r   r   r   r   
key_statesvalue_statesattn_weightsattn_outputs               r7   eager_attention_forwardr      s     3 ; ;<JU$?$?@L<<';';Aq'ABWLL!#4==((2U]](SVVW\WbWbcL==((6??([L,,|:K''1-88:K$$r9   c                      ^  \ rS rSrSrS\S\4U 4S jjr  SS\R                  S\
\R                  \R                  4   S-  S	\R                  S-  S
\\   S\
\R                  \R                  4   4
S jjrSrU =r$ )LasrEncoderAttention   z=Multi-headed attention from 'Attention Is All You Need' paperr!   	layer_idxc                 P  > [         TU ]  5         Xl        X l        [	        USUR
                  UR                  -  5      U l        UR                  UR                  -  U l	        U R                  S-  U l
        UR                  U l        SU l        [        R                  " UR
                  UR                  U R                  -  UR                  S9U l        [        R                  " UR
                  UR                  U R                  -  UR                  S9U l        [        R                  " UR
                  UR                  U R                  -  UR                  S9U l        [        R                  " UR                  U R                  -  UR
                  UR                  S9U l        g )Nrb   g      Fbias)r%   r&   r!   r   re   r)   rf   rb   r   r   r   attention_dropout	is_causalr   r'   attention_biasq_projk_projv_projo_projr5   r!   r   r6   s      r7   r&   LasrEncoderAttention.__init__   sI   "
F4F4F&JdJd4de$*$>$>&B\B\$\!}}d*!'!9!9ii : :T]] JQWQfQf
 ii : :T]] JQWQfQf
 ii : :T]] JQWQfQf
 ii&&68J8JQWQfQf
r9   Nr?   position_embeddingsr   r   r;   c                    UR                   S S n/ UQSPU R                  P7nU R                  U5      R                  U5      R	                  SS5      nU R                  U5      R                  U5      R	                  SS5      nU R                  U5      R                  U5      R	                  SS5      n	Uu  p[        XxX5      u  px[        R                  " U R                  R                  [        5      nU" U UUU	U4U R                  (       d  SOU R                  U R                  S.UD6u  pUR                   " / UQSP76 R#                  5       nU R%                  U5      nX4$ )Nrp   r   r=           )r   r   )rw   rb   r   viewr>   r   r   r   r   get_interfacer!   _attn_implementationr   r   r   r   r   r   r   )r5   r?   r   r   r   input_shapehidden_shapequery_statesr   r   r|   r}   attention_interfacer   r   s                  r7   r@   LasrEncoderAttention.forward   s\    $))#2.88b8$--8{{=166|DNNqRST[[/44\BLLQPQR
{{=166|DNNqRST&#7RU#[ (?(M(MKK,,.E)
 %8	%
  $}}C$2H2HLL	%
 	%
! "));;;;FFHkk+.((r9   )r   r!   rb   r   r   r   r   r   r   r   r   NN)rB   rC   rD   rE   __doc__r   r   r&   rF   rG   r   r   r   r@   rH   rI   rJ   s   @r7   r   r      s    G
0 
S 
4 IM.2	")||") #5<<#=>E") t+	")
 +,") 
u||U\\)	*") ")r9   r   c                   >   ^  \ rS rSrSS\4U 4S jjjrSS jrSrU =r$ )LasrEncoderConvolutionModulei  r!   c           
      F  > [         TU ]  5         UR                  nUc&  UR                  n[        [        USS5         U l        O#US   n[        UR                  SS5         U l        SU l        [        R                  " USU-  SSS	UR                  S
9U l        [        R                  " UUUSU R                  UUR                  S9U l        [        R                  " UR                  UR                  S9U l        [        R                  " X3SSS	UR                  S
9U l        g)z
Args:
    config (LasrEncoderConfig): Configuration for the model.
    module_config (dict): Configuration for the module (e.g., encoder or decoder).
N
hidden_actsilur#   
activationsamer=   r   r   )r#   r$   paddingr   )r$   r   groupsr   )momentum)r%   r&   r)   conv_kernel_sizer   re   r   getr   r   r+   convolution_biaspointwise_conv1depthwise_convBatchNorm1dbatch_norm_momentumnormpointwise_conv2)r5   r!   module_configchannelsr#   r6   s        r7   r&   %LasrEncoderConvolutionModule.__init__  s    	%%  11K$WV\6%JKDO'6K$]%6%6|V%LMDO!yya(l!QVMdMd 
 !iiLL((
 NN6#5#5@Z@Z[	!yyAaI`I` 
r9   c                    UR                  SS5      nU R                  U5      n[        R                  R	                  USS9nUb`  UR
                  [        R                  :X  a  [        R                  " U) SS9nO[        R                  " US:H  ) SS9nUR                  US5      nU R                  U5      nU R                  U5      nU R                  U5      nU R                  U5      nUR                  SS5      $ )a!  
Compute convolution module.

Args:
    hidden_states (`torch.Tensor` of shape `(batch, time, channels)`): Input tensor.
    attention_mask (`torch.Tensor` of shape `(batch, 1, time, time)`): Attention mask.

Returns:
    `torch.Tensor`: Output tensor of shape `(batch, time, channels)`.

r   r=   ru   r   )r>   r   r   r   glurd   rF   boolallmasked_fillr   r   r   r   )r5   r?   r   all_masked_rowss       r7   r@   $LasrEncoderConvolutionModule.forward0  s     &//15 ,,];))-Q)? %##uzz1"'))^O"C"'))n.C,D!"L)55osKM ++M:		-06,,];&&q!,,r9   )r   r   r   r   r   r   r   	rB   rC   rD   rE   r   r&   r@   rH   rI   rJ   s   @r7   r   r     s      
0  
  
D"- "-r9   r   c                   6   ^  \ rS rSrS\4U 4S jjrS rSrU =r$ )LasrEncoderFeedForwardiU  r!   c                 X  > [         TU ]  5         [        R                  " UR                  UR
                  UR                  S9U l        [        UR                     U l
        [        R                  " UR
                  UR                  UR                  S9U l        UR                  U l        g )Nr   )r%   r&   r   r'   r)   intermediate_sizer   linear1r   r   r   linear2activation_dropoutr4   s     r7   r&   LasrEncoderFeedForward.__init__V  s|    yy!3!3V5M5MTZTiTij !2!23yy!9!96;M;MTZTiTij"(";";r9   c                     U R                  U R                  U5      5      n[        R                  R	                  XR
                  U R                  S9nU R                  U5      nU$ )Nr   )r   r   r   r   r   r   r   r   )r5   r?   s     r7   r@   LasrEncoderFeedForward.forward]  sS    ](CD--m?V?Vaeanan-o]3r9   )r   r   r   r   r   rJ   s   @r7   r   r   U  s    <0 < r9   r   c                      ^  \ rS rSrS\S\4U 4S jjr  SS\R                  S\R                  S-  S\R                  S-  S	\	\
   S
\R                  4
S jjrSrU =r$ )LasrEncoderBlockid  r!   r   c                   > [         TU ]  5         SU l        [        U5      U l        [        X5      U l        [        U5      U l        [        U5      U l	        [        R                  " UR                  UR                  SS9U l        [        R                  " UR                  UR                  SS9U l        [        R                  " UR                  UR                  SS9U l        [        R                  " UR                  UR                  SS9U l        [        R                  " UR                  UR                  SS9U l        UR&                  U l        UR(                  U l        g )NFr   )r%   r&   gradient_checkpointingr   feed_forward1r   	self_attnr   convfeed_forward2r   	LayerNormr)   layer_norm_epsnorm_feed_forward1norm_self_att	norm_convnorm_feed_forward2norm_outfeed_forward_residual_weightsconv_residual_weightsr   s      r7   r&   LasrEncoderBlock.__init__e  s   &+#3F;-f@08	3F;"$,,v/A/A6CXCX_d"e\\&*<*<f>S>SZ_`f&8&8&:O:OV[\"$,,v/A/A6CXCX_d"eV%7%79N9NUZ[-3-Q-Q*%+%A%A"r9   Nr?   r   r   r   r;   c                 &   UnU R                  U R                  U5      5      nU R                  S   U-  U R                  S   U-  -   nU R                  U5      nU R                  " SUUUS.UD6u  pxX-   nU R                  U R                  U5      US9n	U R                  S   U-  U R                  S   U	-  -   nUnU R                  U R                  U5      5      nU R                  S   U-  U R                  S   U-  -   nU R                  U5      nU$ )Nr   r   )r?   r   r   )r    )r   r  r  r  r  r  r  r  r  r	  r
  )
r5   r?   r   r   r   residualnormalized_hidden_statesr   _conv_outputs
             r7   r@   LasrEncoderBlock.forwardw  sD    !**4+B+B=+QR..q1H<t?a?abc?dgt?tt 	 $(#5#5m#D  
2) 3
 	
 &3ii} =ni]2215EHbHbcdHehsHss **4+B+B=+QR..q1H<t?a?abc?dgt?tt 	 m4r9   )r  r  r   r  r  r   r  r  r	  r
  r  r  r   )rB   rC   rD   rE   r   r   r&   rF   rG   r   r   r@   rH   rI   rJ   s   @r7   r   r   d  s|    B0 BS B* /337	!||! t+! #\\D0	!
 +,! 
! !r9   r   c                      ^  \ rS rSr% \\S'   SrSrSrSr	S/r
SrSrSrSrSrSr\\S	.r\R*                  " 5       U 4S
 j5       rS\R.                  4S jrSS\R.                  S\S-  4S jjrSrU =r$ )LasrPreTrainedModeli  r!   modelr:   audioTr   F)r?   
attentionsc                 $   > [         TU ]  U5        g r   )r%   _init_weights)r5   r   r6   s     r7   r  !LasrPreTrainedModel._init_weights  s    f%r9   input_lengthsc                     [        U R                  [        5      (       a  U R                  R                  OU R                  nUR                  nUR
                  nSn[        U5       H  nX-
  U-  S-   nM     U$ )Nr=   r   )rx   r!   r   encoder_configr,   r-   range)r5   r  r  r#   r$   
num_layersr  s          r7   _get_subsampling_output_length2LasrPreTrainedModel._get_subsampling_output_length  sn    7A$++}7]7]33cgcncn$AA77
z"A*8VCaGM # r9   Nr   target_lengthc                     U R                  UR                  S5      5      nUb  UOUR                  5       n[        R                  " XAR
                  S9USS2S4   :  nU$ )z
Convert the input attention mask to its subsampled form. `target_length` sets the desired output length, useful
when the attention mask length differs from `sum(-1).max()` (i.e., when the longest sequence in the batch is padded)
rp   Nr\   )r"  summaxrF   rg   r\   )r5   r   r$  output_lengths
max_lengths        r7   _get_output_attention_mask.LasrPreTrainedModel._get_output_attention_mask  sa    
 <<^=O=OPR=ST&3&?]^EWEWEY
j9N9NOR`abdhahRiir9   r  r   )rB   rC   rD   rE   r   r   base_model_prefixmain_input_nameinput_modalitiessupports_gradient_checkpointing_no_split_modules_supports_flat_attention_mask_supports_sdpa_supports_flex_attn_supports_flash_attn_can_compile_fullgraph_supports_attention_backendr   r   _can_record_outputsrF   r   r  rG   r"  r   r+  rH   rI   rJ   s   @r7   r  r    s    &O&*#+,$(!N !!"&)*
 ]]_& &	ELL 		 	VY\`V` 	 	r9   r  zh
    The LasrEncoder model, based on the Conformer architecture](https://arxiv.org/abs/2005.08100).
    )custom_introc                      ^  \ rS rSr% \\S'   SrS\4U 4S jjr\\	\
\ SS\R                  S\R                  S-  S\\   S	\4S
 jj5       5       5       5       rSrU =r$ )LasrEncoderi  r!   encoderc           	        > [         TU ]  U5        SU l        UR                  U l        UR                  U l        UR
                  U l        [        U5      U l        [        U5      U l	        [        R                  " [        UR                  5       Vs/ s H  n[        X5      PM     sn5      U l        [        R                   " UR"                  UR$                  SS9U l        U R)                  5         g s  snf )NF)epsr   )r%   r&   r   r   dropout_positions	layerdropr   
subsamplerrL   
rotary_embr   
ModuleListr   num_hidden_layersr   layersr  r)   r  out_norm	post_initr   s      r7   r&   LasrEncoder.__init__  s     &+#~~!'!9!9))084V<mmBGH`H`BabBaYf0Bab
 V%7%7V=R=RY^_	 cs   C3Nr:   r   r   r;   c                 L   U R                  U5      nU R                  U[        R                  " UR                  S   UR
                  S9R                  S5      5      u  pV[        R                  R                  X@R                  U R                  S9n[        R                  R                  XPR                  U R                  S9n[        R                  R                  X`R                  U R                  S9nUb  U R                  X$R                  S   S9n[        U R                  UUS9nU R                   HS  nSnU R                  (       a'  [        R                   " / 5      n	XR"                  :  a  SnU(       a  MF  U" U4UXV4S	.UD6nMU     U R%                  U5      n['        US
9$ )aR  
Example:

```python
>>> from transformers import AutoProcessor, LasrEncoder
>>> from datasets import load_dataset, Audio

>>> model_id = TODO
>>> processor = AutoProcessor.from_pretrained(model_id)
>>> encoder = ParakeetEncoder.from_pretrained(model_id)

>>> ds = load_dataset("hf-internal-testing/librispeech_asr_dummy", "clean", split="validation")
>>> ds = ds.cast_column("audio", Audio(sampling_rate=processor.feature_extractor.sampling_rate))

>>> inputs = processor(ds[0]["audio"]["array"])
>>> encoder_outputs = encoder(**inputs)

>>> print(encoder_outputs.last_hidden_state.shape)
```
r   r&  r   r   r$  )r!   inputs_embedsr   FT)r   r   )last_hidden_state)rA  rB  rF   rg   rw   r\   r   r   r   r   r   r?  r+  r   r!   rE  randr@  rF  r   )
r5   r:   r   r   r?   r|   r}   encoder_layerto_dropdropout_probabilitys
             r7   r@   LasrEncoder.forward  st   > 7??5<<(;(;A(>}G[G[\ffghi
 --m||VZVcVc-dmm##C+A+ADMM#Zmm##C+A+ADMM#Z%!<<^[n[nop[q<rN2;;')
 "[[MG}}&+jjn#&7"G7 -!!#1),
! 	! )  m4??r9   )r   r?  r   r@  rE  rF  rB  rA  r   )rB   rC   rD   rE   r   r   r-  r&   r   r   r   r   rF   rG   r   r   r   r@   rH   rI   rJ   s   @r7   r;  r;    s     !0 "  /3?@?@ t+?@ +,	?@
 
?@     ?@r9   r;  c                       \ rS rSr% Sr\R                  \S'   Sr\	\R                     S-  \S'   Sr\	\	\R                        S-  \S'   Sr\	\	\R                        S-  \S'   Srg)	LasrGenerateOutputi-  a(  
Outputs of Lasr models.

Args:
    sequences (`torch.LongTensor` of shape `(batch_size, sequence_length)`):
        The generated sequences. The second dimension (sequence_length) is either equal to `max_length` or shorter
        if all batches finished early due to the `eos_token_id`.
    logits (`tuple(torch.FloatTensor)` *optional*, returned when `output_logits=True`):
        Unprocessed prediction scores of the language modeling head (scores for each vocabulary token before SoftMax)
        at each generation step. Tuple of `torch.FloatTensor` with up to `max_new_tokens` elements (one element for
        each generated token), with each tensor of shape `(batch_size, config.vocab_size)`.
    attentions (`tuple(tuple(torch.FloatTensor))`, *optional*, returned when `output_attentions=True`):
        Tuple (one element for each generated token) of tuples (one element for each layer of the decoder) of
        `torch.FloatTensor` of shape `(batch_size, num_heads, generated_length, sequence_length)`.
    hidden_states (`tuple(tuple(torch.FloatTensor))`, *optional*, returned when `output_hidden_states=True`):
        Tuple (one element for each generated token) of tuples (one element for each layer of the decoder) of
        `torch.FloatTensor` of shape `(batch_size, generated_length, hidden_size)`.
	sequencesNlogitsr  r?   r  )rB   rC   rD   rE   r   rF   
LongTensorr   rU  r   FloatTensorr  r?   rH   r  r9   r7   rS  rS  -  sm    & .2FE%##$t+29=JeE--./$6=<@M5u0012T9@r9   rS  zO
    Lasr Encoder with a Connectionist Temporal Classification (CTC) head.
    c                   `  ^  \ rS rSr% \\S'   S\4U 4S jjr\\  SS\	R                  S\	R                  S-  S\	R                  S-  S\\   S	\4
S
 jj5       5       r\	R                  " 5         SS\	R                  S\	R                  S-  S\S\\   S	\\	R$                  -  4
S jj5       rSrU =r$ )
LasrForCTCiH  r!   c                    > [         TU ]  U5        [        UR                  5      U l        [
        R                  " UR                  R                  UR                  SS9U l	        U R                  5         g )Nr   )r#   )r%   r&   r;  r  r<  r   r+   r)   
vocab_sizectc_headrG  r4   s     r7   r&   LasrForCTC.__init__P  sS     "6#8#89		&"7"7"C"CVEVEVdefr9   Nr:   r   labelsr   r;   c                    U R                   " SUUS.UD6nUR                  nU R                  UR                  SS5      5      R                  SS5      nSnUGbN  Ub  UO"[        R
                  " U[        R                  S9nU R                  UR                  S5      5      n	X0R                  R                  :g  n
U
R                  S5      nUR                  U
5      n[        R                  R                  US[        R                  S9R                  SS5      n[        R                   R"                  R%                  S	S
9   [        R                  R'                  UUU	UU R                  R                  U R                  R(                  U R                  R*                  S9nSSS5        [-        UUUR.                  UR0                  S9$ ! , (       d  f       N.= f)aJ  
Example:

```python
>>> from transformers import AutoProcessor, LasrForCTC
>>> from datasets import load_dataset, Audio

>>> model_id = "nvidia/lasr-ctc-1.1b"
>>> processor = AutoProcessor.from_pretrained(model_id)
>>> model = LasrForCTC.from_pretrained(model_id)

>>> ds = load_dataset("hf-internal-testing/librispeech_asr_dummy", "clean", split="validation")
>>> ds = ds.cast_column("audio", Audio(sampling_rate=processor.feature_extractor.sampling_rate))

>>> inputs = processor(ds[0]["audio"]["array"], text=ds[0]["text"])
>>> outputs = model(**inputs)

>>> print(outputs.loss)
```r:   r   r   r=   Nrc   rp   r   r   F)rt   )blank	reductionzero_infinity)lossrU  r?   r  r  )r<  rL  r\  r>   rF   	ones_likelongr"  r'  r!   pad_token_idmasked_selectr   r   log_softmaxr   backendscudnnflagsctc_lossctc_loss_reductionctc_zero_infinityr   r?   r  )r5   r:   r   r^  r   encoder_outputsr?   rU  rd  r  labels_masktarget_lengthsflattened_targets	log_probss                 r7   r@   LasrForCTC.forwardX  s   : ,, 
))
 
 (99}66q!<=GG1M #1"<%//R`hmhrhrBs  !??@R@RSU@VWM !KK$<$<<K(__R0N & 4 4[ A 11&b1V``abdefI%%++E+:}}--%!"++22"kk<<"&++"?"? .  ; )77&11	
 	
 ;:s   ?A G
Greturn_dict_in_generatec                 >   SUS'   U R                   " S	UUS.UD6nUR                  R                  SS9nUb5  U R                  X&R                  S   S9nU R
                  R                  Xb) '   U(       a*  [        UUR                  UR                  UR                  S9$ U$ )
a  
Example:

```python
>>> from transformers import AutoProcessor, LasrForCTC
>>> from datasets import load_dataset, Audio

>>> model_id = TODO
>>> processor = AutoProcessor.from_pretrained(model_id)
>>> model = LasrForCTC.from_pretrained(model_id)

>>> ds = load_dataset("hf-internal-testing/librispeech_asr_dummy", "clean", split="validation")
>>> ds = ds.cast_column("audio", Audio(sampling_rate=processor.feature_extractor.sampling_rate))

>>> inputs = processor(ds[0]["audio"]["array"], text=ds[0]["text"])
>>> predicted_ids = model.generate(**inputs)
>>> transcription = processor.batch_decode(predicted_ids, skip_special_tokens=True)

>>> print(transcription)
```
Treturn_dictr`  rp   ru   r   rJ  )rT  rU  r  r?   r  )
r@   rU  argmaxr+  rw   r!   rg  rS  r  r?   )r5   r:   r   rv  r   outputsrT  s          r7   generateLasrForCTC.generate  s    : !%}"&,, #
))#
 #
 NN))b)1	 %!<<^[j[jkl[m<nN)-)A)AIo&"%#~~"--%33	  r9   )r\  r<  r   )NF)rB   rC   rD   rE   r   r   r&   r   r   rF   rG   r   r   r   r@   r   r   rS  rV  r{  rH   rI   rJ   s   @r7   rY  rY  H  s     }   /3&*	E
E
 t+E
 t#	E

 +,E
 
E
  E
N ]]_ /3(-	33 t+3 "&	3
 +,3 
e..	.3 3r9   rY  )rY  r;  r  )r   )r   )<collections.abcr   dataclassesr   typingr   rF   r   activationsr   integrationsr	   r
   masking_utilsr   modeling_layersr   modeling_outputsr   r   modeling_rope_utilsr   r   modeling_utilsr   r   processing_utilsr   utilsr   r   r   r   utils.genericr   r   utils.output_capturingr   configuration_lasrr   r   Moduler   rL   r   r   rG   r   r   rj   r   r   r   r   r   r  r;  rS  rY  __all__r  r9   r7   <module>r     s  * % !    ! I 6 9 ? K F & V V G 5 @+RYY +8>< ><B( *+ ,2	UU\\ 	U# 	U%,, 	U& %II%<<% 
% <<	%
 LL4'% % % '(%2 )*<)299 <) +<)~E-299 E-PRYY 41 4n ./ . .b 
X@% X@
X@v A A A4 
H$ H
HV ?r9   