
    `i                         d dl Z d dlZd dlZd dlZd dlZd dlmZ d dlm	Z	 d dl
mZmZmZ d dlZd dlmZ d dlmZmZmZ g dZg dZeez   Zg dZed	gz   Zeez   Zed	gz   Zd
dgZdgez   dgz   ez   d	gz   Z e ee      h dz
        Z d Z!d Z"d Z#d Z$d Z%d Z&d Z'd Z(d Z)d Z*d Z+d Z,d Z-d Z.d Z/d Z0d Z1d2d ee   fd!Z2d3d"ejf                  d#eeejh                  f   fd$Z5d"e6d#ejn                  fd%Z8d#e6fd&Z9d' Z:d4d(Z;d4d)Z<ejz                  j}                   e;         e<       *      Z?d+e@d,e@fd-ZAd.eejh                  ej                  j                  j$                  f   d#ejh                  fd/ZDd3d0eee6      fd1ZEy)5    Nknobs)OptionalSetUnion)RandomState)TensorWrapperreinterprettype_canonicalisation_dict)int8int16int32int64)uint8uint16uint32uint64)float16float32float64bfloat16float8_e4m3fnfloat8_e5m2boolr   >   r   r   r   c                  H    t         j                  j                  dd      dk(  S )NTRITON_INTERPRET01)osenvironget     R/var/www/html/engine/venv/lib/python3.12/site-packages/triton/_internal_testing.pyis_interpreterr%      s    ::>>,c2c99r#   c                  |    t               ry t        j                  j                  j                  j                         S N)r%   tritonruntimedriveractiveget_current_targetr"   r#   r$   r,   r,      s*    >>  ''::<<r#   c                  <    t               } | dS | j                  dk(  S )NFcudar,   backendtargets    r$   is_cudar3   $   s"    !FN5@&(@@r#   c                  b    t               xr$ t        j                  j                         d   dk\  S )Nr      r3   torchr.   get_device_capabilityr"   r#   r$   is_ampere_or_newerr9   )   &    9C99;A>!CCr#   c                  b    t               xr$ t        j                  j                         d   dk(  S )Nr   
   r6   r"   r#   r$   is_blackwellr=   -   s&    9D99;A>"DDr#   c                  b    t               xr$ t        j                  j                         d   dk\  S Nr   	   r6   r"   r#   r$   is_hopper_or_newerrA   1   r:   r#   c                  b    t               xr$ t        j                  j                         d   dk(  S r?   r6   r"   r#   r$   	is_hopperrC   5   r:   r#   c                  <    t               } | dS | j                  dk(  S )NFhipr/   r1   s    r$   is_hiprF   9   "    !FN5?%(??r#   c                  b    t               } | d uxr  | j                  dk(  xr | j                  dk(  S )NrE   gfx90ar,   r0   archr1   s    r$   is_hip_cdna2rL   >   1    !FU&..E"9UfkkX>UUr#   c                  b    t               } | d uxr  | j                  dk(  xr | j                  dk(  S )NrE   gfx942rJ   r1   s    r$   is_hip_cdna3rP   C   rM   r#   c                  b    t               } | d uxr  | j                  dk(  xr | j                  dk(  S )NrE   gfx950rJ   r1   s    r$   is_hip_cdna4rS   H   rM   r#   c                  `    t               } | d uxr | j                  dk(  xr d| j                  v S )NrE   gfx11rJ   r1   s    r$   is_hip_gfx11rV   M   1    !FT&..E"9Tg>TTr#   c                  `    t               } | d uxr | j                  dk(  xr d| j                  v S )NrE   gfx12rJ   r1   s    r$   is_hip_gfx12rZ   R   rW   r#   c                  F    t               xs t               xs
 t               S r'   )rL   rP   rS   r"   r#   r$   is_hip_cdnar\   W   s    >=\^=|~=r#   c                      t               rdS dS )Ni  i   )rS   r"   r#   r$   get_hip_lds_sizer^   [   s    !^6..r#   c                  <    t               } | dS | j                  dk(  S )NFxpur/   r1   s    r$   is_xpura   _   rG   r#   c                  H    t               } | dS t        | j                        S )N )r,   strrK   r1   s    r$   get_archre   d   s"    !F25S%55r#   rsc                 F   t        | t              r| f} |t        d      }|t        t        z   v rt        j                  t        t
        |            }||j                  nt        ||j                        }||j                  nt        ||j                        }t        t
        |      }|j                  ||| |      }d||dk(  <   |S |r)d|v r%|j                  dd| t
        j                        }|S |t        v r"|j                  dd|       j                  |      S |d	k(  rV|j                  dd|       j                  d
      j                  d      t        j                   d      z  j                  d
      S |dv r|j                  dd|       dkD  S t#        d|       )zp
    Override `rs` if you're calling this function twice and don't want the same
    result for both calls.
       )seed)dtype   r   float8   (   r   r   r   l      )r   int1bool_g        zUnknown dtype )
isinstanceintr   
int_dtypesuint_dtypesnpiinfogetattrminmaxrandintr   float_dtypesnormalastypeviewr   RuntimeError)shape	dtype_strrf   lowhighrv   rj   xs           r$   numpy_randomr   i   s|   
 %		zb!J,,Y/0;eiiCUYY,? Luyyc$		.BI&JJsD%uJ5!q&		x9,JJr2uBGGJ4	l	"yyAu%,,Y77	j	 		!Q&--i8==hG"))T^J__eefopp	/	/yyAu%++^I;788r#   r   returnc                    | j                   j                  }|t        v r_|j                  d      }| j	                  t        t        |            }t        t        j                  ||      t        t        |            S |r3d|v r/t        t        j                  | |      t        t        |            S |dk(  r*|dk(  r%t        j                  | |      j                         S t        j                  | |      S )z
    Note: We need dst_type because the type of x can be different from dst_type.
          For example: x is of type `float32`, dst_type is `bfloat16`.
          If dst_type is None, we infer dst_type from x.
    u)devicerl   r   r   )rj   namert   lstripr}   rw   ru   r
   r7   tensortlr   )r   r   dst_typetsigned_type_namex_signeds         r$   	to_tritonr      s     	
AK88C=88GB(89:5<<@'"a.QQH,u||Af=wr8?TUU	>h*4<<&1::<<||Af--r#   c                 <    t        j                  t        |    d       S r'   )r   	str_to_tyr   r   s    r$   str_to_triton_dtyper      s    <<215t<<r#   c                 $   t        | t        j                  j                        r| j                  S t        | t
        j                        r0t        j                  dt        |             }|j                  d      S t        dt        |              )Nz^torch\.(\w+)$rk   znot a triton or torch dtype: )rq   r(   languagerj   r   r7   rematchrd   group	TypeErrortype)rj   ms     r$   torch_dtype_namer      sh    %../zz	E5;;	'HH&E
3wwqz7U}EFFr#   c                    t        | t              rX| j                  j                         j	                         j                  t        t        t        | j                                    S t        | t        j                        rf| j                  t        j                  u r,| j                         j                         j	                         S | j                         j	                         S t        d|        )Nz Not a triton-compatible tensor: )rq   r	   basecpunumpyr}   rw   ru   r   rj   r7   Tensorr   float
ValueErrorr   s    r$   to_numpyr      s    !]#vvzz|!!#**727G7P+QRR	Au||	$77enn$557==?((**uuw}};A3?@@r#   c                 R   t               ryt               syt        j                  j                  j
                  }| rdnd}t        t        t        |j                  d                  }t        |      dk(  sJ |       t        j                  j                         d   dk\  xr ||k\  S )	NTF)   r   )r      .   r   r@   )r%   r3   r   nvidiaptxasversiontuplemaprr   splitlenr7   r.   r8   )
byval_onlycuda_versionmin_cuda_versioncuda_version_tuples       r$   supports_tmar      s    9<<%%--L",w's3(:(:3(?@A!"a';);;'::++-a0A5`:LP`:``r#   c                 
    | ryy)NzURequires __grid_constant__ TMA support (NVIDIA Hopper or higher, CUDA 12.0 or higher)zLRequires advanced TMA support (NVIDIA Hopper or higher, CUDA 12.3 or higher)r"   )r   s    r$   tma_skip_msgr      s    f]r#   )reasonsizealignc                 N    t        j                  | t         j                  d      S )Nr.   )rj   r   )r7   emptyr   )r   r   _s      r$   default_alloc_fnr      s    ;;t5::f==r#   r   c                 z    t        | t        j                  j                  j                        r| j
                  S | S r'   )rq   r(   r)   jitr	   r   )r   s    r$   unwrap_tensorr      s*    !V^^''556vvHr#   skipped_attrc                 X  	 ddl m | 
t               } t        j                         j
                  j                         D ci c]1  \  }}t        |j                        r|j                  k7  r|| vr||3 c}}g j                  	fd}	fd}||fS c c}}w )Nr   r   c                     j                         D ]  \  } }t        | |j                         j                                |j                  j                         D ]W  }|j                  t        j                  v rj                  |j                  d       =j                  |j                         Y  d_        S )NF)raisingT)itemssetattrcopyresetknob_descriptorsvalueskeyr   r    delenvappendpropagate_env)r   knobsetknobenv_to_unsetr   	knobs_mapmonkeypatchs      r$   fresh_functionz)_fresh_knobs_impl.<locals>.fresh_function   s    &__. 	2MD'E4!5!5!7800779 288rzz)&&txx&? ''1	2	2 #r#   c                      j                         D ]  \  } }t        | |        j                          D ]&  }|t        j                  v st        j                  |= ( _        y r'   )r   r   undor   r    r   )r   r   kr   r   r   r   prev_propagate_envs      r$   reset_functionz)_fresh_knobs_impl.<locals>.reset_function   sf    &__. 	*MD'E4)	* 	 	"ABJJJJqM	" 1r#   )
r(   r   setpytestMonkeyPatch__dict__r   rq   
base_knobsr   )
r   r   r   r   r   r   r   r   r   r   s
        @@@@@r$   _fresh_knobs_implr      s    u$$&K #^^113D'gu//0W@P@P5PUYamUm 	gI L,,
	1 	1 >))Gs   	6B&)NNNr'   )F)Fr   r   r   ru   r7   r(   triton.languager   r   r   typingr   r   r   r   numpy.randomr   triton.runtime.jitr	   r
   r   rs   rt   integral_dtypesr{   float_dtypes_with_bfloat16dtypesdtypes_with_bfloat16torch_float8_dtypestorch_dtypessortedr   
tma_dtypesr%   r,   r3   r9   r=   rA   rC   rF   rL   rP   rS   rV   rZ   r\   r^   ra   re   r   ndarrayr   r   rd   rj   r   r   r   r   r   markskipifrequires_tmarr   r   r)   r   r   r   r"   r#   r$   <module>r      s   	 	      ' '  $ U U0
5{*0)ZL8 	<	', &6 x*$y0<?:,NC,-0NNO
:=A
DEDD@
V
V
V
U
U
>/@
6
9x'< 9<. .u]ELL=X7Y .&=3 =288 =Gs GA	a^ {{!!ln"4\^!L>3 >s >U5<<););)I)IIJ u|| +*HSX$6 +*r#   