
    Z j                         S SK r S SKrS SKrSSKJr  SSKJr  SSKJr  \R                  " \
5      r " S S5      r " S S	\5      rg)
    N   )TrainerCallback)PREFIX_CHECKPOINT_DIR)loggingc                   >    \ rS rSrS
S\4S jjrS rS rS rS r	Sr
g	)CheckpointManager   	kill_waitc                 8    Xl         SU l        SU l        X l        g)a  
Initialize the CheckpointManager for Just-In-Time checkpoint handling.

Args:
    trainer: The Trainer instance that will be used to save checkpoints when SIGTERM is received.
    kill_wait (`int`, *optional*, defaults to 3): Grace period to distinguish between SIGTERM and SIGKILL.
FN)traineris_checkpoint_requested_original_sigterm_handlerr
   )selfr   r
   s      t/root/GenerationalWealth/GenerationalWealth/venv/lib/python3.13/site-packages/transformers/trainer_jit_checkpoint.py__init__CheckpointManager.__init__   s     ',$)-&"    c                     [         R                   " [         R                  U R                  5      U l        [        R                  S5        g )Nz4JIT checkpoint signal handler registered for SIGTERM)signalSIGTERM_sigterm_handlerr   loggerinfor   s    r   setup_signal_handler&CheckpointManager.setup_signal_handler   s,    )/v~~tG\G\)]&JKr   c                     U R                   (       a  g [        R                  SU R                   S35        [        R
                  " U R                  U R                  5      R                  5         g )Nz4SIGTERM received, will request JIT checkpoint after s)r   r   r   r
   	threadingTimer_enable_checkpointstart)r   signumframes      r   r   "CheckpointManager._sigterm_handler   sK    ''J4>>JZZ[\](?(?@FFHr   c                 <    [         R                  S5        SU l        g )Nz/Kill wait period elapsed, requesting checkpointT)r   r   r   r   s    r   r!   $CheckpointManager._enable_checkpoint&   s    EF'+$r   c                     SU l         [        R                  S5        U R                  R                  R
                  n[        R                  SU 35        U R                  R                  S S9n[         SU 3n[        R                  R                  X#5      n[        R                  " USS9  [        R                  R                  X#S5      n[        US	5       nUR                  S
U S35        S S S 5        [        R                  SU 35        U R                  R                  U R                  R                  S S9  [        R                  R!                  U5      (       a+  [        R"                  " U5        [        R                  S5        [        R                  S5        g ! , (       d  f       N= f! [$         a  n[        R'                  SU 35        e S nAff = f)NFzStarting JIT checkpointing...zSaving JIT checkpoint at step )trial-T)exist_okzcheckpoint-is-incomplete.txtwzCheckpoint started at step z and in progress...z2Created checkpoint progress sentinel marker file: zSentinel marker file removedz/Immediate JIT checkpoint completed successfullyzFailed to save JIT checkpoint: )r   r   r   r   stateglobal_step_get_output_dirr   ospathjoinmakedirsopenwrite_save_checkpointmodelexistsremove	Exceptionerror)r   current_step
output_dircheckpoint_foldercheckpoint_pathsentinel_filefes           r   execute_jit_checkpoint(CheckpointManager.execute_jit_checkpoint*   sr   !	+0D(KK78<<--99LKK8GH55D5AJ#8"9<. I ggll:IO KK$7 GGLLHfgMmS)Q5l^CVWX *KKL]O\] LL))$,,*<*<D)I ww~~m,,		-(:;KKIJ *)  	LL:1#>?	s1   CF4 F#0B2F4 #
F1-F4 4
G>GG)r   r   r
   r   N)   )__name__
__module____qualname____firstlineno__intr   r   r   r!   rC   __static_attributes__ r   r   r   r      s$    #3 #LI,"r   r   c                   B    \ rS rSrSrS rS rS rS rS r	S r
S	 rS
rg)JITCheckpointCallbackO   a:  
Callback for Just-In-Time checkpointing on SIGTERM signals.

When SIGTERM is received, the checkpoint manager sets `is_checkpoint_requested=True`.
The callbacks detect this flag and set `control.should_training_stop=True`, which signals
the Trainer's training loop to exit gracefully after saving the checkpoint.
c                      S U l         S U l        g )N)r   jit_managerr   s    r   r   JITCheckpointCallback.__init__X   s    59r   c                     Xl         UR                  R                  (       a>  [        US9U l        U R                  R                  5         [        R                  S5        g g )N)r   zJIT checkpointing enabled)r   argsenable_jit_checkpointr   rQ   r   r   r   )r   r   s     r   set_trainer!JITCheckpointCallback.set_trainer\   sE    <<--0AD113KK34 .r   c                     U R                   (       a>  U R                   R                  (       a"  SUl        U R                   R                  5         g g g NTrQ   r   should_training_stoprC   r   rT   r-   controlkwargss        r   on_pre_optimizer_step+JITCheckpointCallback.on_pre_optimizer_stepc   <     0 0 H H+/G(335 !Ir   c                     U R                   (       a>  U R                   R                  (       a"  SUl        U R                   R                  5         g g g rY   rZ   r\   s        r   on_step_begin#JITCheckpointCallback.on_step_beginh   ra   r   c                     U R                   (       aE  U R                   R                  (       a)  SUl        SUl        U R                   R	                  5         g g g NFTrQ   r   should_saver[   rC   r\   s        r   on_step_end!JITCheckpointCallback.on_step_endm   D     0 0 H H"'G+/G(335 !Ir   c                     U R                   (       aE  U R                   R                  (       a)  SUl        SUl        U R                   R	                  5         g g g rf   rg   r\   s        r   on_epoch_end"JITCheckpointCallback.on_epoch_ends   rk   r   c                     U R                   (       ag  U R                   R                  bO  [        R                  " [        R                  U R                   R                  5        [        R                  S5        g g g )Nz;Restored original SIGTERM handler after training completion)rQ   r   r   r   r   r   r\   s        r   on_train_end"JITCheckpointCallback.on_train_endy   sP     0 0 J J VMM&..$*:*:*T*TUKKUV !Wr   )rQ   r   N)rF   rG   rH   rI   __doc__r   rV   r_   rc   ri   rm   rp   rK   rL   r   r   rN   rN   O   s+    :56
6
66Wr   rN   )r0   r   r   trainer_callbackr   trainer_utilsr   utilsr   
get_loggerrF   r   r   rN   rL   r   r   <module>rw      sB    	   - 0  
		H	%? ?D.WO .Wr   