
    0h!                     ^    d Z ddlmc mZ ddlmZ ddlmZ  ed       G d de             Z	y)zAttention layer that can be used in sequence DNN/CNN models.

This file follows the terminology of https://arxiv.org/abs/1706.03762 Figure 2.
Attention is formed by three tensors: Query, Key and Value.
    N)BaseDenseAttention)keras_exportzkeras.layers.Attentionc                   >     e Zd ZdZd fd	Z fdZd Z fdZ xZS )	Attentiona   Dot-product attention layer, a.k.a. Luong-style attention.

    Inputs are `query` tensor of shape `[batch_size, Tq, dim]`, `value` tensor
    of shape `[batch_size, Tv, dim]` and `key` tensor of shape
    `[batch_size, Tv, dim]`. The calculation follows the steps:

    1. Calculate scores with shape `[batch_size, Tq, Tv]` as a `query`-`key` dot
        product: `scores = tf.matmul(query, key, transpose_b=True)`.
    2. Use scores to calculate a distribution with shape
        `[batch_size, Tq, Tv]`: `distribution = tf.nn.softmax(scores)`.
    3. Use `distribution` to create a linear combination of `value` with
         shape `[batch_size, Tq, dim]`:
         `return tf.matmul(distribution, value)`.

    Args:
        use_scale: If `True`, will create a scalar variable to scale the
            attention scores.
        dropout: Float between 0 and 1. Fraction of the units to drop for the
            attention scores. Defaults to 0.0.
        score_mode: Function to use to compute attention scores, one of
            `{"dot", "concat"}`. `"dot"` refers to the dot product between the
            query and key vectors. `"concat"` refers to the hyperbolic tangent
            of the concatenation of the query and key vectors.

    Call arguments:
        inputs: List of the following tensors:
            * query: Query `Tensor` of shape `[batch_size, Tq, dim]`.
            * value: Value `Tensor` of shape `[batch_size, Tv, dim]`.
            * key: Optional key `Tensor` of shape `[batch_size, Tv, dim]`. If
                not given, will use `value` for both `key` and `value`, which is
                the most common case.
        mask: List of the following tensors:
            * query_mask: A boolean mask `Tensor` of shape `[batch_size, Tq]`.
                If given, the output will be zero at the positions where
                `mask==False`.
            * value_mask: A boolean mask `Tensor` of shape `[batch_size, Tv]`.
                If given, will apply the mask such that values at positions
                 where `mask==False` do not contribute to the result.
        return_attention_scores: bool, it `True`, returns the attention scores
            (after masking and softmax) as an additional output argument.
        training: Python boolean indicating whether the layer should behave in
            training mode (adding dropout) or in inference mode (no dropout).
        use_causal_mask: Boolean. Set to `True` for decoder self-attention. Adds
            a mask such that position `i` cannot attend to positions `j > i`.
            This prevents the flow of information from the future towards the
            past.
            Defaults to `False`.

    Output:

        Attention outputs of shape `[batch_size, Tq, dim]`.
        [Optional] Attention scores after masking and softmax with shape
            `[batch_size, Tq, Tv]`.

    The meaning of `query`, `value` and `key` depend on the application. In the
    case of text similarity, for example, `query` is the sequence embeddings of
    the first piece of text and `value` is the sequence embeddings of the second
    piece of text. `key` is usually the same tensor as `value`.

    Here is a code example for using `Attention` in a CNN+Attention network:

    ```python
    # Variable-length int sequences.
    query_input = tf.keras.Input(shape=(None,), dtype='int32')
    value_input = tf.keras.Input(shape=(None,), dtype='int32')

    # Embedding lookup.
    token_embedding = tf.keras.layers.Embedding(input_dim=1000, output_dim=64)
    # Query embeddings of shape [batch_size, Tq, dimension].
    query_embeddings = token_embedding(query_input)
    # Value embeddings of shape [batch_size, Tv, dimension].
    value_embeddings = token_embedding(value_input)

    # CNN layer.
    cnn_layer = tf.keras.layers.Conv1D(
        filters=100,
        kernel_size=4,
        # Use 'same' padding so outputs have the same shape as inputs.
        padding='same')
    # Query encoding of shape [batch_size, Tq, filters].
    query_seq_encoding = cnn_layer(query_embeddings)
    # Value encoding of shape [batch_size, Tv, filters].
    value_seq_encoding = cnn_layer(value_embeddings)

    # Query-value attention of shape [batch_size, Tq, filters].
    query_value_attention_seq = tf.keras.layers.Attention()(
        [query_seq_encoding, value_seq_encoding])

    # Reduce over the sequence axis to produce encodings of shape
    # [batch_size, filters].
    query_encoding = tf.keras.layers.GlobalAveragePooling1D()(
        query_seq_encoding)
    query_value_attention = tf.keras.layers.GlobalAveragePooling1D()(
        query_value_attention_seq)

    # Concatenate query and document encodings to produce a DNN input layer.
    input_layer = tf.keras.layers.Concatenate()(
        [query_encoding, query_value_attention])

    # Add DNN layers, and create Model.
    # ...
    ```
    c                 z    t        |   di | || _        || _        | j                  dvrt	        d| d      y )N)dotconcatzReceived: score_mode=z*. Acceptable values are: ["dot", "concat"] )super__init__	use_scale
score_mode
ValueError)selfr   r   kwargs	__class__s       a/var/www/html/engine/venv/lib/python3.12/site-packages/tf_keras/src/layers/attention/attention.pyr   zAttention.__init__   sQ    "6""$??"33'
| 4) )  4    c                    | j                   r&| j                  ddd| j                  d      | _        nd| _        | j                  dk(  r&| j                  ddd| j                  d      | _        nd| _        t        |   |       y)	zNCreates variable when `use_scale` is True or `score_mode` is
        `concat`.scaler
   onesT)nameshapeinitializerdtype	trainableNr	   concat_score_weight)r   
add_weightr   r   r   r   r   build)r   input_shaper   s     r   r   zAttention.build   s     >>"jj ) DJ DJ??h&'+*"jj (7 (D$ (,D$k"r   c                 &   | j                   dk(  r5t        j                  ||d      }| j                  || j                  z  }|S | j                   dk(  rt        j                  |d      }t        j                  |d      }| j                  I| j
                  t        j                  t        j                  | j                  ||z   z        d      z  }|S | j
                  t        j                  t        j                  ||z         d      z  }S )	a  Calculates attention scores as a query-key dot product.

        Args:
            query: Query tensor of shape `[batch_size, Tq, dim]`.
            key: Key tensor of shape `[batch_size, Tv, dim]`.
        Returns:
            Tensor of shape `[batch_size, Tq, Tv]`.
        r   T)transpose_br	   )axis)r   tfmatmulr   expand_dimsr   
reduce_sumtanh)r   querykeyscores
q_reshaped
k_reshapeds         r   _calculate_scoreszAttention._calculate_scores   s     ??e#YYuct<Fzz%$**$   __( B7J"5Jzz%11BMMGGDJJ*z*ABC"5  	 11BMMGGJ3425  r   c                     | j                   | j                  d}t        |          }t	        t        |j                               t        |j                               z         S )N)r   r   )r   r   r   
get_configdictlistitems)r   configbase_configr   s      r   r3   zAttention.get_config   sK    #~~T__Mg(*D**,-V\\^0DDEEr   )Fr   )	__name__
__module____qualname____doc__r   r   r1   r3   __classcell__)r   s   @r   r   r      s%    fP#2<F Fr   r   )
r<   tensorflow.compat.v2compatv2r'   2tf_keras.src.layers.attention.base_dense_attentionr    tensorflow.python.util.tf_exportr   r   r
   r   r   <module>rC      sB    " ! Q : &'mF" mF (mFr   