
    0h'                         d Z ddlmc mZ ddlmZ ddlmZ ddl	m
Z
 ddlmZ  edddg	       G d
 dej                               Zy)zNadam optimizer implementation.    N)backend_config)optimizer_v2)learning_rate_schedule)keras_exportzkeras.optimizers.legacy.Nadamzkeras.optimizers.Nadam)v1c                   b     e Zd ZdZdZ	 	 	 	 	 d
 fd	Zd Zd Z fdZddZ	ddZ
 fd	Z xZS )Nadama
  Optimizer that implements the NAdam algorithm.
    Much like Adam is essentially RMSprop with momentum, Nadam is Adam with
    Nesterov momentum.

    Args:
      learning_rate: A Tensor or a floating point value.  The learning rate.
      beta_1: A float value or a constant float tensor. The exponential decay
        rate for the 1st moment estimates.
      beta_2: A float value or a constant float tensor. The exponential decay
        rate for the exponentially weighted infinity norm.
      epsilon: A small constant for numerical stability.
      name: Optional name for the operations created when applying gradients.
        Defaults to `"Nadam"`.
      **kwargs: keyword arguments. Allowed arguments are `clipvalue`,
        `clipnorm`, `global_clipnorm`.
        If `clipvalue` (float) is set, the gradient of each weight
        is clipped to be no higher than this value.
        If `clipnorm` (float) is set, the gradient of each weight
        is individually clipped so that its norm is no higher than this value.
        If `global_clipnorm` (float) is set the gradient of all weights is
        clipped so that their global norm is no higher than this value.

    Usage Example:
      >>> opt = tf.keras.optimizers.legacy.Nadam(learning_rate=0.2)
      >>> var1 = tf.Variable(10.0)
      >>> loss = lambda: (var1 ** 2) / 2.0
      >>> step_count = opt.minimize(loss, [var1]).numpy()
      >>> "{:.1f}".format(var1.numpy())
      9.8

    Reference:
      - [Dozat, 2015](http://cs229.stanford.edu/proj2015/054_report.pdf).
    Tc                    |j                  dd      |d<   |j                  d|      }t        |t        j                        rt        d      t        |   |fi | | j                  d|j                  d|             | j                  d| j                         | j                  d|       | j                  d|       |xs t        j                         | _        d | _        y )	Nschedule_decaygMbp?decaylrzdThe Nadam optimizer does not support tf.keras.optimizers.LearningRateSchedules as the learning rate.learning_ratebeta_1beta_2)popget
isinstancer   LearningRateSchedule
ValueErrorsuper__init__
_set_hyper_initial_decayr   epsilon_m_cache)selfr   r   r   r   namekwargs	__class__s          ^/var/www/html/engine/venv/lib/python3.12/site-packages/tf_keras/src/optimizers/legacy/nadam.pyr   zNadam.__init__D   s     !**%5u=w

471FF
 !  	((D-)HI!4!45&)&):."8"8":    c                 d   |d   j                   j                  }| j                  Y| j                  dg |ddt        j
                  j                        | _        | j                  j                  | j                         |D ]  }| j                  |d        |D ]  }| j                  |d        y )Nr   momentum_cacheonesF)shapedtypeinitializer	trainableaggregationmv)
r&   
base_dtyper   
add_weighttfVariableAggregationONLY_FIRST_REPLICA_weightsappendadd_slot)r   var_list	var_dtypevars       r    _create_slotszNadam._create_slotsa   s    QK%%00	==  OO "22EE , DM MM  / 	$CMM#s#	$  	$CMM#s#	$r!   c                 4   t        j                  | j                  d|            }t        j                  | j                  d|            }t        j                  | j                  d|            }t        j                  | j                  dz   |      }t        j                  | j                  dz   |      }t        j                  d|      }	|ddt        j
                  |	| j                  |z        z  z
  z  }
|ddt        j
                  |	| j                  |z        z  z
  z  }t        j                  | j                  |      |
z  }|| j                  j                  u rSt        j                  t         j                  j                  j                  | j                  || j                  	            }||z  }t        || t        j                  | j                   |      |||
|d|z
  d|z
  d|
z
  d|z
  d|z
  dt        j
                  ||      z
  
      |||f<   y )Nr   r   r         gQ?g      ?g      ?use_locking)lr_tneg_lr_tr   beta_1_tbeta_2_tm_tm_t_1one_minus_beta_1_tone_minus_beta_2_tone_minus_m_tone_minus_m_schedule_newone_minus_m_schedule_nextv_t_prime_denominator)r.   identity
_get_hypercast
iterationspowr   _m_cache_readr   r&   compatr   assign_use_lockingdictconvert_to_tensorr   )r   
var_devicer5   apply_stater=   r?   r@   
local_step	next_step
decay_baserA   rB   m_schedule_newm_schedule_nexts                 r    _prepare_localzNadam._prepare_localu   s   {{4???IFG;;txCD;;txCDWWT__q0)<
GGDOOa/;	WWT9-
#
D,?,?*,LMNN
 #
D,?,?),KLMM
 !3!3Y?#E+++[[		##MM>t?P?P $ N
 )50/3U((yA 8| 8|)%(>%9&)O&;"%x(D"D0
Z+,r!   c                 j    t        j                  | j                        | _        t        |   |      S N)r.   rI   r   rN   r   _prepare)r   r4   r   s     r    r^   zNadam._prepare   s*      [[7w))r!   c                 F   |j                   |j                  j                  }}|xs i j                  ||f      xs | j	                  ||      }| j                  |d      }| j                  |d      }||d   z  }	|d   |z  |d   |z  z   }
t        j                  j                  j                  ||
| j                        }
|
|d   z  }|d   |z  |d	   t        j                  |      z  z   }t        j                  j                  j                  ||| j                        }||d
   z  }|d   |	z  |d   |z  z   }||d   |z  t        j                  |      |d   z   z  z
  }t        j                  j                  j                  ||| j                        j                  S )Nr*   r+   rF   r?   rC   r;   rG   r@   rD   rH   rE   rB   r=   r   )devicer&   r,   r   _fallback_apply_stateget_slotr.   rO   r   rP   rQ   squaresqrtop)r   gradr6   rU   rT   r5   coefficientsr*   r+   g_primerA   	m_t_primev_t	v_t_primem_t_barvar_ts                   r    _resource_apply_densezNadam._resource_apply_dense   s    #

CII,@,@I
#)r..#
 ?''
I> 	 MM#s#MM#s#&@AA$q(/0478 	 iill!!!Sd6G6G!H,'BCC	:&*\ .
IIdO.  iill!!!Sd6G6G!H,'>??	)G37#i/0 	 l6*W4GGIi!88
 
 yy||""34;L;L"MPPPr!   c                 6   |j                   |j                  j                  }}|xs i j                  ||f      xs | j	                  ||      }| j                  |d      }| j                  |d      }	||d   z  }
||d   z  }t        j                  j                  j                  |||d   z  | j                        }t        j                  |g      5  | j                  |||      }t        j                  ||      }d d d        |d   z  }|d   |
z  |d	   |z  z   }||z  |d
   z  }t        j                  j                  j                  |	|	|d   z  | j                        }t        j                  |g      5  | j                  |	||      }t        j                  ||      }d d d        |d   z  }t        j                  |      |d   z   }| j                  |||d   |z  |z        }t        j                  |||g S # 1 sw Y   xY w# 1 sw Y   jxY w)Nr*   r+   rF   rC   r?   r;   rG   rE   rB   rD   r@   rH   r   r>   )r`   r&   r,   r   ra   rb   r.   rO   r   rP   rQ   control_dependencies_resource_scatter_addgatherrd   group)r   rf   r6   indicesrU   rT   r5   rg   r*   r+   rh   m_scaled_g_valuesrA   	m_t_sliceri   rl   v_scaled_g_valuesrj   	v_t_slicerk   v_prime_sqrt_plus_eps
var_updates                         r    _resource_apply_sparsezNadam._resource_apply_sparse   sL    #

CII,@,@I
#)r..#
 ?''
I> 	 MM#s#MM#s#&@AA !<0D#EEiill!!q<
++9J9J " 
 $$cU+ 	0,,Q9JKC		#w/I	0 -H II	)G37#i/0 	 "D[L9M,NNiill!!q<
++9J9J " 
 $$cU+ 	0,,Q9JKC		#w/I	0 -D EE	 "	 2\)5L L//$w.1FF


 xx*gs3449	0 	0 	0 	0s   *H<*HHHc                     t         |          }|j                  | j                  d      | j                  | j                  d      | j                  d      | j
                  d       |S )Nr   r   r   )r   r   r   r   r   )r   
get_configupdate_serialize_hyperparameterr   r   )r   configr   s     r    r}   zNadam.get_config   sg    #%!%!?!?#" ,,88B88B<<
	
 r!   )gMbP?g?g+?gHz>r	   r]   )__name__
__module____qualname____doc___HAS_AGGREGATE_GRADr   r7   r[   r^   rn   r{   r}   __classcell__)r   s   @r    r	   r	      sM    
 D  :$('
R*Q<-5^ r!   r	   )r   tensorflow.compat.v2rO   v2r.   tf_keras.srcr   tf_keras.src.optimizers.legacyr   !tf_keras.src.optimizers.schedulesr    tensorflow.python.util.tf_exportr   OptimizerV2r	    r!   r    <module>r      sS    & ! ! ' 7 D : # "AB_L$$ _	_r!   