
    0h!                         d Z ddlmc mZ ddlmZ ddlmZ ddl	m
Z
 ddlmZ  e
        eddg 	       G d
 dej                                      Zej                   j                  dej                         e_         y)z!Adagrad optimizer implementation.    N)	optimizer)learning_rate_schedule)register_keras_serializable)keras_exportzkeras.optimizers.Adafactorz'keras.optimizers.experimental.Adafactor)v1c                   b     e Zd ZdZ	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 d fd	Z fdZd Zd Z fdZ xZ	S )	Adafactora  Optimizer that implements the Adafactor algorithm.

    Adafactor is commonly used in NLP tasks, and has the advantage
    of taking less memory because it only saves partial information of previous
    gradients.

    The default argument setup is based on the original paper (see reference).
    When gradients are of dimension > 2, Adafactor optimizer will delete the
    last 2 dimensions separately in its accumulator variables.

    Args:
        learning_rate: Initial value for the learning rate:
            either a floating point value,
            or a `tf.keras.optimizers.schedules.LearningRateSchedule` instance.
            Defaults to 0.001.
        beta_2_decay: float, defaults to -0.8. The decay rate of `beta_2`.
        epsilon_1: float, defaults to 1e-30. A small offset to keep denominator
            away from 0.
        epsilon_2: float, defaults to 1e-3. A small offset to avoid learning
            rate becoming too small by time.
        clip_threshold: float, defaults to 1.0. Clipping threshold. This is a
            part of Adafactor algorithm, independent from `clipnorm`,
            `clipvalue` and `global_clipnorm`.
        relative_step: bool, defaults to True. If `learning_rate` is a
            constant and `relative_step=True`, learning rate will be adjusted
            based on current iterations. This is a default learning rate decay
            in Adafactor.
      {{base_optimizer_keyword_args}}

    Reference:
        - [Shazeer, Noam et al., 2018](https://arxiv.org/abs/1804.04235).

    c                     t        |   d||||	|
||||d	| | j                  |      | _        || _        || _        || _        || _        || _        y )N)	nameweight_decayclipnorm	clipvalueglobal_clipnormuse_emaema_momentumema_overwrite_frequencyjit_compile )	super__init___build_learning_rate_learning_ratebeta_2_decay	epsilon_1	epsilon_2clip_thresholdrelative_step)selflearning_rater   r   r   r   r   r   r   r   r   r   r   r   r   r   kwargs	__class__s                    [/var/www/html/engine/venv/lib/python3.12/site-packages/tf_keras/src/optimizers/adafactor.pyr   zAdafactor.__init__D   sw    & 	 	
%+%$;#	
 	
 #77F("",*    c           	         t         |   |       t        | d      r| j                  ryd| _        g | _        g | _        g | _        |D ]x  }t        |j                        dk  r{| j                  j                  t        j                  dd|j                                | j
                  j                  t        j                  dd|j                                n|j                  dd }|j                  dd	 |j                  d   z   }| j                  j                  | j                  ||j                  d|j                   
             | j
                  j                  | j                  ||j                  d|j                   
             | j                  j                  | j                  |d             { y)a  Initialize optimizer variables.

        Adam optimizer has 3 types of variables: momentums, velocities and
        velocity_hat (only set when amsgrad is applied),

        Args:
            var_list: list of model variables to build Adam variables on.
        _builtNT   r   zr/)r   )shapedtyper   zc/v)model_variablevariable_name)r   buildhasattrr%   _r_c_vlenr)   appendtfVariable_shared_nameadd_variabler*   add_variable_from_reference)r   var_listvarr_shapec_shaper!   s        r"   r.   zAdafactor.buildj   s    	h4"t{{ 	C399~! r{{1R8H8H7I3JKLr{{1R8H8H7I3JKL ))CR.))CR.399R=8%%%!ii!#"2"2!34 &  %%%!ii!#"2"2!34 &  GGNN00#&c 1 1	r#   c                 x    t        j                  t        j                  t        j                  |                  S )N)r5   sqrtreduce_meansquare)r   xs     r"   _rmszAdafactor._rms   s"    wwr~~biil344r#   c           
         t        j                  | j                  |j                        }t        j                  | j                  |j                        }t        j                  d|j                        }t        j                  | j
                  dz   |j                        }t        | j                  t        j                        s?| j                  r3t        j                  |t         j                  j                  |            }| j                  |      }| j                  | j                   |      }| j"                  | j                   |      }	| j$                  | j                   |      }
t        j                  |t         j                  j                  |            }t        j&                  || j)                  |            |z  }t        j*                  |      | j,                  z   }dt        j.                  || j0                        z
  }t3        |j4                        dk\  r|j7                  ||z  d|z
  t        j8                  |d      z  z          |	j7                  ||	z  d|z
  t        j8                  |d      z  z          |
j7                  t        j:                  |t        j8                  |dd      z  d      t        j:                  |	d      z         n|
j7                  ||
z  d|z
  |z  z          t        j<                  |      t         j                  j                  |
      z  }|t        j&                  || j)                  |      | j>                  z        z  }|jA                  | |z         y	)
z=Update step given gradient and the associated model variable.      ?   r&   r'   )axisr(   T)rG   keepdimsN)!r5   castr   r*   r   
iterations
isinstancer   r   LearningRateScheduler   minimummathrsqrt_var_keyr0   _index_dictr1   r2   maximumrC   rA   r   powr   r3   r)   assignr@   expand_dimsconvert_to_tensorr   
assign_add)r   gradientvariablelrr   one
local_stepvar_keyrcr+   rho_talpha_tregulated_grad_squarebeta_2_tu_tu_t_hats                    r"   update_stepzAdafactor.update_step   s    WWT''8GGDNNHNN;	ggc8>>*WWT__q0(..A
##%;%P%P "" Bj 9:B--)GGD$$W-.GGD$$W-.GGD$$W-.

2rww}}Z89**Y		((;<uD "		( 3dnn Drvvj$*;*;<<x~~!# HH1x<..!6R@AA HH1x<..!6R@AA
 HHqrDAA ..B'( HHX\Q\5J$JJK ""8,rww}}Q/??

33$:M:M)MOOWHw./r#   c           	          t         |          }|j                  | j                  | j                        | j
                  | j                  | j                  | j                  | j                  d       |S )N)r   r   r   r   r   r   )
r   
get_configupdate_serialize_hyperparameterr   r   r   r   r   r   )r   configr!   s     r"   rh   zAdafactor.get_config   sj    #%!%!?!?''" !% 1 1!^^!^^"&"5"5!%!3!3		
 r#   )MbP?ggKH9rl   rE   TNNNNFgGz?NTr	   )
__name__
__module____qualname____doc__r   r.   rC   rf   rh   __classcell__)r!   s   @r"   r	   r	      sY     H  $!$+L,\560p r#   r	   z{{base_optimizer_keyword_args}})rp   tensorflow.compat.v2compatv2r5   tf_keras.src.optimizersr   !tf_keras.src.optimizers.schedulesr   'tf_keras.src.saving.object_registrationr    tensorflow.python.util.tf_exportr   	Optimizerr	   replacebase_optimizer_keyword_argsr   r#   r"   <module>r|      s    ( ! ! - D O :  -	
A	## A AH %%--%y'L'L	 r#   