
    cic>                       d dl mZ ddlmZmZ ddlmZ ddlmZ ed        Zed        Z	ej                  ed	               Zej                  e ej                  d
      d                      Zej                  e ej                  d      dDd                     Zej                  edEd              Zed        Zed        Zed        Zed        Zed        Zed        Zed        Zej                  e ej.                  ddd      dFd                     Zej                  e ej.                  dd      dGd                     Zed         Zed!        Zed"        Zed#        Zej                  e ej.                  d$dd      dFd%                     Zej                  e ej.                  d&d      dGd'                     Zed(        Z ed)        Z!ej                  e ej.                  d*d+,      dHdId-                     Z"ed.        Z#ej                  e ej.                  d/      dJd0                     Z$ed1        Z%ej                  e ej.                  d2      dEd3                     Z&ej                  e ejN                  d4d+,      dKdId5                     Z(ed6        Z)ej                  e ejN                  d7      dLd8                     Z*edMd9       Z+edNd:       Z,edOd;       Z-edPd<       Z.eddej^                  fdQd=       Z0edej^                  fdRd>       Z1edSdTd?       Z2edej^                  fdRd@       Z3edA        Z4ej                  edSdB              Z5edC        Z6y)U    )annotations   )jitconstexpr_function   )core)mathc                8    d}| }|dkD  r|dz  }|dz  }|dkD  r|S )Nr   r    )ilog2ns      R/var/www/html/engine/venv/lib/python3.12/site-packages/triton/language/standard.py_log2r   
   s5    D	A
a%	a	 a% K    c                &    | | dz
  z  dk(  xr | dk7  S Nr   r   r   )r   s    r   _is_power_of_twor      s    QKA(!q&(r   c                    | |z   dz
  |z  S )z
    Computes the ceiling division of :code:`x` by :code:`div`

    :param x: the input number
    :type x: Block
    :param div: the divisor
    :type div: Block
    r   r   )xdivs     r   cdivr      s     GaKCr   sigmoidc                :    ddt        j                  |        z   z  S )Nr   )r	   exp)r   s    r   r   r   ,   s     DHHaRL !!r   softmaxNc                    |d}n|}| t        | ||      z
  }t        j                  |      }t        |||      }t        j                  |||      S )Nr   	keep_dims)maxr	   r   sumfdiv)r   dimr   ieee_rounding_dimznumdens           r   r   r   3   sT     { "	C49--A
((1+C
c49
-C99S#}--r   c                H    t        j                  | | j                  g|      S )zn
    Returns a contiguous flattened view of :code:`x`.

    :param x: the input tensor
    :type x: Block
    )can_reorder)r   reshapenumel)r   r*   s     r   ravelr-   A   s     <<AGG9+>>r   c                    | |z  |z   }||z  }||z  }||z  }t        j                  ||z
  |      }||z  }|||z  z   }	||z  }
|	|
fS )a  
    Transforms the indices of a row-major `size_i * size_j` matrix into
    the indices of a column-major matrix for each group of `size_g` rows.

    For example, for :code:`size_i = size_j = 4` and :code:`size_g = 2`, it will
    transform ::

        [[0 , 1 , 2 , 3 ],
         [4 , 5 , 6 , 7 ],
         [8 , 9 , 10, 11],
         [12, 13, 14, 15]]

    into ::

        [[0, 2,  4 , 6 ],
         [1, 3,  5 , 7 ],
         [8, 10, 12, 14],
         [9, 11, 13, 15]]
    r   minimum)r   jsize_isize_jsize_gijsize_gjgroup_idoff_inew_inew_js              r   	swizzle2dr;   M   sm    , 
VaB voGW}HvE\\&5.&1F	gBBKE&LE%<r   c                0    t        j                  | d|      S )a'  
    Returns a tensor filled with the scalar value 0 for the given :code:`shape` and :code:`dtype`.

    :param shape: Shape of the new array, e.g., (8, 16) or (8, )
    :type shape: tuple of ints
    :param dtype: Data-type of the new array, e.g., :code:`tl.float16`
    :type dtype: DType
    r   )r   full)shapedtypes     r   zerosr@   u   s     99UAu%%r   c                B    t        | j                  | j                        S )z
    Returns a tensor of zeros with the same shape and type as a given tensor.

    :param input: input tensor
    :type input: Tensor
    )r@   r>   r?   )inputs    r   
zeros_likerC      s     ekk**r   c                    |r| |k(  xr ||k  }nd}| |kD  xs |}t        j                  || |      }t        j                  |||      }||fS NFr   where)	value1index1value2index2tie_break_lefttiegtv_reti_rets	            r   _argmax_combinerQ      sY    26F?	&	CBJJr66*EJJr66*E%<r   c                     t        | |||d      S NTrQ   rH   rI   rJ   rK   s       r   _argmax_combine_tie_break_leftrV          66664@@r   c                     t        | |||d      S rE   rT   rU   s       r   _argmax_combine_tie_break_fastrY          66665AAr   c                .    t        j                  | |      S N)r   maximumabs     r   _elementwise_maxra          <<1r   r]   return_indicesreturn_indices_tie_break_left)return_indices_argtie_break_argc                z   t        j                  |       } |r<|rt        j                  | |t        |      S t        j                  | |t        |      S t        j
                  | j                  j                        t        j
                  d      k  rt        j
                  | j                  j                               r | j                  t         j                        } n@| j                  j                         sJ d       | j                  t         j                        } t        j                  | |t        |      S Nr       z"Expecting input to be integer type)r   _promote_bfloat16_to_float32_reduce_with_indicesrV   rY   	constexprr?   primitive_bitwidthis_floatingtofloat32is_intint32reducera   rB   axisrc   rd   r   s        r   r    r       s    
 --e4E(,,UD:Xdmnn,,UD:Xdmnn>>%++889DNN2<NN~~ekk5578.{{))+Q-QQ+,{{5$(8INNr   zmaximum indexrL   )rf   c                ,    t        | |d||      \  }}|S NT)rc   rd   r   )r    rB   ru   rL   r   _rets         r   argmaxr{      s!     5$tSamvwHQJr   c                    |r| |k(  xr ||k  }nd}| |k  xs |}t        j                  || |      }t        j                  |||      }||fS rE   rF   )	rH   rI   rJ   rK   rL   rM   lt	value_ret	index_rets	            r   _argmin_combiner      sZ    26F?	&	CB

2vv.I

2vv.Iir   c                     t        | |||d      S rS   r   rU   s       r   _argmin_combine_tie_break_leftr      rW   r   c                     t        | |||d      S rE   r   rU   s       r   _argmin_combine_tie_break_fastr      rZ   r   c                .    t        j                  | |      S r\   r/   r^   s     r   _elementwise_minr      rb   r   r0   c                T   t        j                  |       } |r<|rt        j                  | |t        |      S t        j                  | |t        |      S t        j
                  | j                  j                        dk  rt        j
                  | j                  j                               r | j                  t         j                        } n@| j                  j                         sJ d       | j                  t         j                        } t        j                  | |t        |      S rh   )r   rj   rk   r   r   rl   r?   rm   rn   ro   rp   rq   rr   rs   r   rt   s        r   minr      s    
 --e4E(,,UD:Xdmnn,,UD:Xdmnn>>%++889B>~~ekk5578.{{))+Q-QQ+,{{5$(8INNr   zminimum indexc                ,    t        | |d||      \  }}|S rw   )r   rx   s         r   argminr      s!     TQ_ktuFAsJr   c                    | |z   S r\   r   r^   s     r   _sum_combiner         q5Lr   c                    ||S d }| j                         r%| j                  dk  rt        j                  }|S d }|S | j	                         r!| j                  dk  rt        j
                  nd }|S )Nri   )is_int_signedint_bitwidthr   rr   is_int_unsigneduint32)in_dtyper?   	out_dtypes      r   _pick_sum_dtyper     s{     I"*"7"7""<DJJ	  CG	  
	!	!	##+#8#82#=DKK4	r   r!   r?   )	dtype_argc                    t        | j                  |      }|| j                  |      } t        j                  | |t
        |      S )Nr   )r   r?   ro   r   rs   r   )rB   ru   r   r?   r   s        r   r!   r!     s=    
 !0U CI#;;udLIFFr   c                    | |z  S r\   r   r^   s     r   _xor_combiner   &  r   r   zxor sumc                    t        j                  | j                  j                  j	                         d       t        j
                  | |t        |      S )Nz#xor_sum only supported for integersr   )r   static_asserttypescalarrq   rs   r   rB   ru   r   s      r   xor_sumr   .  s=     	uzz((//13XY;;udLIFFr   c                    | |z  S r\   r   )r   ys     r   _or_combiner   9  r   r   	reduce_orc                    t        j                  | j                  j                  j	                         d       t        j
                  | |t        |      S )Nz%reduce_or only supported for integersr   )r   r   r   r   rq   rs   r   r   s      r   r   r   >  s=     	uzz((//13Z[;;udK9EEr   cumsumc                    t        j                  |       } t        | j                  |      }|| j	                  |      } t        j
                  | |t        |      S r\   )r   rj   r   r?   ro   associative_scanr   )rB   ru   reverser?   r   s        r   r   r   I  sO     --e4E /U CI#  lGDDr   c                    | |z  S r\   r   r^   s     r   _prod_combiner   [  r   r   cumprodc                d    t        j                  |       } t        j                  | |t        |      S r\   )r   rj   r   r   )rB   ru   r   s      r   r   r   `  s+    
 --e4E  mWEEr   c                    t        j                  dd      }t        j                  |dg| |z
  dz
  z  dgz   dg|z  z         }|S )Nr   r   r   )r   aranger+   )n_dimsr1   ars      r   
_indicatorr   l  sH    	Q	B	b1#!a0A36!q@	ABIr   c                j   t        | j                        }t        j                  | j                  j
                  d      }| j                  |d      }|t        ||dz
  |z
  d      z  }|j                  | j                  d      }t        ||      }t        j                  | |kD  ||z  k7  ||       }	|	S )NTbitwidthsignedbitcastr   )
r   r,   r   get_int_dtyper?   rm   ro   r   r   rG   )
r   flipr   r   idtypeixiyr   is_rightrz   s
             r   _compare_and_swapr   s  s     #177^F )C)CDQF	
fd	#B	gb&1*q.$/	/B
aggt$A &!$H **a!e11a
8CJr   c                    |dk(  r t        t        | j                        |      }n|}t        j                  |      D ]  }t        | ||dz
  |z
        }  | S )zb
    order_type 0 == ascending
    order_type 1 == descending
    order_type 2 == alternating
    r   r   )r   r   r,   r   static_ranger   )r   stageorderr   r   s        r   _bitonic_merge_hypercuber     sY     z%.%0u% 6auqy1}56Hr   c                    t        j                  | dgt        | j                        z        }t	        |||      }t        j                  || j
                        } | S )Nr   )r   r+   r   r,   r   r>   )r   r   r   r   hs        r   _bitonic_merger     sH    QeAGGn,-A E51AQ AHr   c                   |t        | j                        dz
  n|}t        j                  |t        | j                        dz
  k(  d       t	        | j                  |         }||n
t	        |      }t	        | j
                        }t        j                  | dg|z        }t        j                  d|dz         D ]  }	t        ||	|	|k  rdn|      } t        j                  |dz   |dz         D ]d  }	|r&t        |t	        |j
                        dz
  |z
        n%t        |t	        |j
                        dz
  |z
        }t        |||	|k  rdn|      }f t        j                  || j                  dd d|z  gz         } | S )ai  
    Sorts a tensor along a specified dimension.

    :param x: The input tensor to be sorted.
    :type x: Tensor
    :param dim: The dimension along which to sort the tensor. If None, the tensor is sorted along the last dimension. Currently, only sorting along the last dimension is supported.
    :type dim: int, optional
    :param k: the number of top elements to select. If none, assume k = x.shape[dim]
    :type k: int, optional
    :param descending: If set to True, the tensor is sorted in descending order. If set to False, the tensor is sorted in ascending order.
    :type descending: bool, optional
    Nr   +only minor dimension is currently supportedr   )ru   )lenr>   r   r   r   r,   r+   r   r   r    r   )
r   kr#   
descendingr%   log_nlog_kr   r   r   s
             r   	sort_implr     sk    03{3qww<!+Dts177|a//1^_!!''$-0E%&YEE!HE"177^F 	Qf%A q%!), K$QE	1zJK
 uqy%!)4 O9CCqww!+e35QV[\]\c\cVdghVhkpVpIr$QAIq:NO
 	Q5z12AHr   c                    t        | ||      S )N)r#   r   r   )r   r#   r   s      r   sortr     s    QCJ77r   c                     t        | ||d      S )NT)r   r#   r   r   )r   r   r#   s      r   topkr     s    Q!66r   c                    |t        | j                        dz
  n|}t        j                  |t        | j                        dz
  k(  d       t	        | j                  d         }t        | |||      S )Nr   r   r   )r   r>   r   r   r   r   )r   r#   r   r%   r   s        r   bitonic_merger     sb     03{3qww<!+Dts177|a//1^_"1772;/F!VZ88r   c                L    | t        |      dz
  } | dk  r| t        |      z  } | S r   )r   )r#   r>   s     r   _get_flip_dimr     s.    
{%j1n
Qws5zJr   c                   t        j                  t        | j                         |k  xr |t        | j                        k         t	        || j                        }t        j                  t        | j                  |                t        | j                  |         }t        j                  | j                  j                  d      }t        j                  | j                  |d      | j                  d| dg|z  z   | j                  |dz   d z         }t        j                  |      D ]  }|t        |||z   d      z  } t        j                  || j                        j                  | j                  d      } | S )z
    Flips a tensor `x` along the dimension `dim`.

    :param x: the first input tensor
    :type x: Block
    :param dim: the dimension to flip along
    :type dim: int
    Tr   r   Nr   r   )r   r   r   r>   r   r   r   r   r?   rm   r+   ro   r   r   )r   r#   r%   stepsr   r   r   s          r   r   r     s:    	AGG}+Bc!''l0BC(agg6D'67!!''$-0E )C)CDQFQTT&$T/$1#+1MPQPWPWX\_`X`XaPb1bcAu% +4!8T**+Q ##AGGT#:AHr   c                    t        j                  | |      }t        |j                        dk(  r|S t        j                  ||j                  dd d|j                  d   z  gz         S )a7  
    Interleaves the values of two tensors along their last dimension. The two tensors must have the same shape.
    Equivalent to `tl.join(a, b).reshape(a.shape[:-1] + [2 * a.shape[-1]])`

    :param a: The first input tensor.
    :type a: Tensor
    :param b: The second input tensor.
    :type b: Tensor
    r   Nr   )r   joinr   r>   r+   )r_   r`   cs      r   
interleaver     sY     			!QA
177|q
 ||Aqwws|q1772;.??@@r   )NFF)F)NFTF)TF)NFN)r?   core.constexprrE   )r   FN)r   F)r   r   r1   r   )r   r   )r   r   r   r   )r   r   r   r   r   r   )r   r   r#   r   r   r   )r#   r   r   r   r\   )r   r   r#   r   )7
__future__r   runtime.jitr   r    r   r	   r   r   _tensor_member_fnr   _add_math_1arg_docstrr   r   r-   r;   r@   rC   rQ   rV   rY   ra   _add_reduction_docstrr    r{   r   r   r   r   r   r   r   r   r!   r   r   r   r   _add_scan_docstrr   r   r   r   r   r   r   CONSTEXPR_0r   r   r   r   r   r   r   r   r   r   <module>r      sb   " 1  
   ) ) 	   	  I&" '  " I&. '  . ?  ? $ $N 	& 	& + +   A A B B   I:J*IKOK  O" O;KL M       A A B B   I:J*IKOK  O" O;KL M  
     EW5G 6  G   I&G '  G   K(F )  F x73	E 4  	E   y!F "  F    $  *   %)dhdtdt % %P "&TEUEU 8 8 7 7 +/dN^N^ 9 9     . A Ar   