
    0hd1                     z    d Z ddlmc mZ ddlmZ ddlmZ  edddg       G d d	ej                               Z
y)
z'Ftrl-proximal optimizer implementation.    N)optimizer_v2)keras_exportzkeras.optimizers.legacy.Ftrlzkeras.optimizers.Ftrl)v1c                   ^     e Zd ZdZ	 	 	 	 	 	 	 	 d fd	Zd Z fdZd	dZd	dZ fdZ	 xZ
S )
Ftrla  Optimizer that implements the FTRL algorithm.

    "Follow The Regularized Leader" (FTRL) is an optimization algorithm
    developed at Google for click-through rate prediction in the early 2010s. It
    is most suitable for shallow models with large and sparse feature spaces.
    The algorithm is described by
    [McMahan et al., 2013](https://research.google.com/pubs/archive/41159.pdf).
    The TF-Keras version has support for both online L2 regularization
    (the L2 regularization described in the paper
    above) and shrinkage-type L2 regularization
    (which is the addition of an L2 penalty to the loss function).

    Initialization:

    ```python
    n = 0
    sigma = 0
    z = 0
    ```

    Update rule for one variable `w`:

    ```python
    prev_n = n
    n = n + g ** 2
    sigma = (sqrt(n) - sqrt(prev_n)) / lr
    z = z + g - sigma * w
    if abs(z) < lambda_1:
      w = 0
    else:
      w = (sgn(z) * lambda_1 - z) / ((beta + sqrt(n)) / alpha + lambda_2)
    ```

    Notation:

    - `lr` is the learning rate
    - `g` is the gradient for the variable
    - `lambda_1` is the L1 regularization strength
    - `lambda_2` is the L2 regularization strength

    Check the documentation for the `l2_shrinkage_regularization_strength`
    parameter for more details when shrinkage is enabled, in which case gradient
    is replaced with a gradient with shrinkage.

    Args:
      learning_rate: A `Tensor`, floating point value, or a schedule that is a
        `tf.keras.optimizers.schedules.LearningRateSchedule`. The learning rate.
      learning_rate_power: A float value, must be less or equal to zero.
        Controls how the learning rate decreases during training. Use zero for
        a fixed learning rate.
      initial_accumulator_value: The starting value for accumulators.
        Only zero or positive values are allowed.
      l1_regularization_strength: A float value, must be greater than or
        equal to zero. Defaults to `0.0`.
      l2_regularization_strength: A float value, must be greater than or
        equal to zero. Defaults to `0.0`.
      name: Optional name prefix for the operations created when applying
        gradients.  Defaults to `"Ftrl"`.
      l2_shrinkage_regularization_strength: A float value, must be greater than
        or equal to zero. This differs from L2 above in that the L2 above is a
        stabilization penalty, whereas this L2 shrinkage is a magnitude penalty.
        When input is sparse shrinkage will only happen on the active weights.
      beta: A float value, representing the beta value from the paper.
        Defaults to `0.0`.
      **kwargs: keyword arguments. Allowed arguments are `clipvalue`,
        `clipnorm`, `global_clipnorm`.
        If `clipvalue` (float) is set, the gradient of each weight
        is clipped to be no higher than this value.
        If `clipnorm` (float) is set, the gradient of each weight
        is individually clipped so that its norm is no higher than this value.
        If `global_clipnorm` (float) is set the gradient of all weights is
        clipped so that their global norm is no higher than this value.

    Reference:
      - [McMahan et al., 2013](
        https://research.google.com/pubs/archive/41159.pdf)
    c	                    t        
|   |fi |	 |dk  rt        d| d      |dkD  rt        d| d      |dk  rt        d| d      |dk  rt        d| d      |dk  rt        d| d      | j                  d|       | j                  d	| j                         | j                  d
|       | j                  d|       | j                  d|       | j                  d|       || _        || _        y )N        z^`initial_accumulator_value` needs to be positive or zero. Received: initial_accumulator_value=.zR`learning_rate_power` needs to be negative or zero. Received: learning_rate_power=z``l1_regularization_strength` needs to be positive or zero. Received: l1_regularization_strength=z``l2_regularization_strength` needs to be positive or zero. Received: l2_regularization_strength=zt`l2_shrinkage_regularization_strength` needs to be positive or zero. Received: l2_shrinkage_regularization_strength=learning_ratedecaylearning_rate_powerl1_regularization_strengthl2_regularization_strengthbeta)super__init__
ValueError
_set_hyper_initial_decay_initial_accumulator_value%_l2_shrinkage_regularization_strength)selfr   r   initial_accumulator_valuer   r   name$l2_shrinkage_regularization_strengthr   kwargs	__class__s             ]/var/www/html/engine/venv/lib/python3.12/site-packages/tf_keras/src/optimizers/legacy/ftrl.pyr   zFtrl.__init__m   s`    	(($s*--F,GqJ 
 $'':&;1> 
 &+8-.a1 
 &+8-.a1 
 0#589<  	7!4!45-/BC(*D	
 	(*D	
 	%*C'0 	2    c                     |D ]r  }|j                   j                  }t        j                  j                  j                  | j                  |      }| j                  |d|       | j                  |d       t y )N)dtypeaccumulatorlinear)r!   
base_dtypetfcompatr   constant_initializerr   add_slot)r   var_listvarr!   inits        r   _create_slotszFtrl._create_slots   sh     	)CII((E99<<44//u 5 D MM#}d3MM#x(	)r   c                    t         |   |||       |||f   j                  t        t	        j
                  | j                  d|            t	        j
                  | j                  d|            t	        j
                  | j                  d|            t	        j
                  | j                  d|            t	        j                  | j                  |                   y )Nr   r   r   r   )r   r   r   r   r   )	r   _prepare_localupdatedictr%   identity
_get_hypercastr   )r   
var_device	var_dtypeapply_stater   s       r   r.   zFtrl._prepare_local   s    z9kBZ+,33$&KKOO$99E% ,.;;OO$@)L, ,.;;OO$@)L, [[!CD57WW>>	6	
r   c                    |j                   |j                  j                  }}|xs i j                  ||f      xs | j	                  ||      }|d   |d   d|d   z  z  z   }| j                  |d      }| j                  |d      }	| j                  dk  rYt        j                  j                  |j                  |j                  |	j                  ||d   |d   ||d	   | j                  
	      S t        j                  j                  |j                  |j                  |	j                  ||d   |d   ||d   |d	   | j                  
      S )Nr   r          @lr_tr"   r#   r	   r   r   )	r*   accumr#   gradlrl1l2lr_poweruse_lockingr   )
r*   r:   r#   r;   r<   r=   r>   l2_shrinkager?   r@   )devicer!   r$   get_fallback_apply_stateget_slotr   r%   raw_opsResourceApplyFtrlhandle_use_lockingResourceApplyFtrlV2)
r   r;   r*   r6   r4   r5   coefficients#adjusted_l2_regularization_strengthr:   r#   s
             r   _resource_apply_densezFtrl._resource_apply_dense   sn    #

CII,@,@I
#)r..#
 ?''
I> 	 /;(/
 C,v*>$>?/@+ c=1sH-55<:://JJll}}' <=6%&;< -- 0 
 
 ::11JJll}}' <=6): &&;< -- 2  r   c                    |j                   |j                  j                  }}|xs i j                  ||f      xs | j	                  ||      }|d   |d   d|d   z  z  z   }| j                  |d      }	| j                  |d      }
| j                  dk  rZt        j                  j                  |j                  |	j                  |
j                  |||d   |d   ||d	   | j                  

      S t        j                  j                  |j                  |	j                  |
j                  |||d   |d   ||d   |d	   | j                        S )Nr   r   r8   r9   r"   r#   r	   r   r   )
r*   r:   r#   r;   indicesr<   r=   r>   r?   r@   r   )r*   r:   r#   r;   rO   r<   r=   r>   rA   r?   r@   )rB   r!   r$   rC   rD   rE   r   r%   rF   ResourceSparseApplyFtrlrH   rI   ResourceSparseApplyFtrlV2)r   r;   r*   rO   r6   r4   r5   rK   rL   r:   r#   s              r   _resource_apply_sparsezFtrl._resource_apply_sparse   st    #

CII,@,@I
#)r..#
 ?''
I> 	 /;(/
 C,v*>$>?/@+ c=1sH-55<::55JJll}}' <=6%&;< -- 6   ::77JJll}}' <=6): &&;< -- 8  r   c                 *   t         |          }|j                  | j                  d      | j                  | j
                  | j                  d      | j                  d      | j                  d      | j                  d      | j                  d       |S )Nr   r   r   r   r   )r   r   r   r   r   r   r   r   )r   
get_configr/   _serialize_hyperparameterr   r   r   )r   configr   s     r   rT   zFtrl.get_config  s    #%!%!?!?#" ,,-1-L-L'+'E'E)( /3.L.L0/ /3.L.L0/ 66v>8<8b8b!	
( r   )gMbP?g      g?r	   r	   r   r	   r	   )N)__name__
__module____qualname____doc__r   r,   r.   rM   rR   rT   __classcell__)r   s   @r   r   r      sJ    
L`  "%#&#&-0:
x)
()V+Z r   r   )rZ   tensorflow.compat.v2r&   v2r%   tf_keras.src.optimizers.legacyr    tensorflow.python.util.tf_exportr   OptimizerV2r    r   r   <module>rb      sO    . " ! 7 : "!?@W<## W	Wr   