
    ci^              	         d dl mZmZ d dlZd dlZd dlZd dlZd dlZd dlZd dl	Z	d dl
Z
d dlmZ d dlmZ d dlmZ d dlmZmZmZmZmZmZmZmZmZmZ d dlmZ d dlmZ d	d
l m!Z! ddl"m"Z" ddl m#Z# d	dl$m%Z%m&Z&m'Z'm(Z( ddl)m*Z* d dl+m,Z, dZ-dZ. ed      Z/ G d dej`                        Z1d6dZ2 G d d      Z3i Z4g Z5d Z6d7dZ7 G d dee/         Z8d Z9d Z:d Z; G d  d!      Z<e G d" d#             Z=d$ Z> G d% d&e<e8e/         Z?ed8d'       Z@edddddddd(	 	 	 	 	 	 	 	 	 	 	 	 	 d9d)       Z@	 d:dddddddd(	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 d;d*Z@ G d+ d,      ZA G d- d.      ZBd/ ZCd0 ZD G d1 d2e<      ZE G d3 d4e<      ZFd5 ZGy)<    )annotationsdivisionN)defaultdict)	dataclass)cached_property)
CallableGenericIterableOptionalTypeVarUnionoverloadDictAnyTupleTensorDescriptor)
ModuleType   )knobs   )driver)_async_compile)find_paths_ifget_iterable_pathtype_canonicalisation_dictcanonicalize_dtype)get_cache_key)get_cache_invalidating_env_varsztriton.languagez"triton.experimental.gluon.languageTc                  ~     e Zd ZdZd fdZed        Zd Zd ZddZ	d Z
d Zd	 Zd
 Zd Zd Zd Zd Zd Z xZS )DependenciesFindera  
    This AST visitor is used to find dependencies of a JITFunction. This can
    be used to invalidate a JITFunction's hash when its source code -- or
    that of its dependencies -- changes.

    This visitor also keeps track of the global variables touched by the
    JITFunction.  When we launch the kernel, we check that these have the same
    values as they did when we ran this visitor.  If not, we raise an error (or
    otherwise we could recompile).
    c                    t         |           || _        t        j                  |j                  d            | _        || _        || _        h d| _	        t        t        ddh| _        i | _        d| _        y )Nutf-8>
   intlenmaxminlistfloatprintrangegetattr
isinstancecopymathF)super__init__namehashlibsha256encodehasherglobals	nonlocalssupported_python_builtinsGLUON_MODULETRITON_MODULEsupported_modulesused_global_valsvisiting_arg_default_value)selfr3   r8   r9   src	__class__s        L/var/www/html/engine/venv/lib/python3.12/site-packages/triton/runtime/jit.pyr2   zDependenciesFinder.__init__.   st    	nnSZZ%89 "*
& 	"
" TV*/'    c                6    | j                   j                         S N)r7   	hexdigestr@   s    rC   retzDependenciesFinder.retY   s    {{$$&&rD   c                    t        j                  |j                        ryt        |dd      }|j	                  t
              S )NT
__module__ )inspect	isbuiltinfuncr-   
startswithr<   )r@   noderO   modules       rC   _is_triton_builtinz%DependenciesFinder._is_triton_builtin]   s6    TYY'|R0  //rD   c                >   t        |t              sJ | j                  j                         |j                  j                         z  D ]_  }|\  }}| j                  |   \  }}|j                  |   \  }}||k7  s2t	        d| d| d| j
                   d|j                   d| d       | j                  j                  |j                         |j                  }|t        t        |dd            z  }| j                  j                  |j                  d	             y )
NGlobal variable z has value z when compiling z, but inner kernel z has conflicting value z7 from when it was first compiled.  This is not allowed.noinlineFr$   )r.   JITCallabler>   keysRuntimeErrorr3   __name__update	cache_keystrr-   r7   r6   )r@   rO   kvar_name_v1v2func_keys           rC   _update_hashzDependenciesFinder._update_hashc   s/   $,,, &&++-0E0E0J0J0LL 	AKHa))!,EB))!,EBRx"&xjB4?OPTPYPY{Zmnrn{n{m|  }T  UW  TX  XO  P 	 	$$T%:%:;>>Cj%8998??734rD   c                   ddl m} |t        |      t        u ry t	        |dd      ry t	        |dd      dk(  ry t        |t              r| j                  |       y t        |      r*t        |t              st        ||      st        d|       | j                  ry |/t        j                  |      |f| j                  |t        |      f<   y )	Nr   	constexpr__triton_builtin__FrK   rL   ztriton.language.extra.libdevicez!Unsupported function referenced: )language.corerg   typer   r-   r.   rW   rd   callablerY   r?   r/   deepcopyr>   id)r@   valvar_dictr3   rg   s        rC   record_referencez#DependenciesFinder.record_referenceu   s    - ;$s)z13,e4 3b)-NNc;'c"C=C!6z#y?Y!B3%HII **;?==;Mx:XD!!4H"67rD   c                >    t        |j                        t        j                  u r|j                  S |j                   j
                  v ry  fd} ||j                        \  }}|j                   j                  v r|S  j                  |||j                         |S )Nc                    j                   j                  | d       }||j                   fS j                  j                  | d       }||j                  fS y)NNN)r8   getr9   )r3   rn   r@   s     rC   name_lookupz2DependenciesFinder.visit_Name.<locals>.name_lookup   sZ    ,,""4.CDLL((..$$T40CDNN**rD   )rj   ctxastStorerm   local_namesr:   rp   )r@   rQ   ru   rn   ro   s   `    rC   
visit_NamezDependenciesFinder.visit_Name   s    >SYY&77N77d&&&	 $DGG,X77d444Jc8TWW5
rD   c                ^    |j                   D cg c]  }| j                  |       c}S c c}w rF   )eltsvisit)r@   rQ   elts      rC   visit_TuplezDependenciesFinder.visit_Tuple   s$     ,0995C

3555s   *c                f   | j                  |j                        }t        |t        j                        r6| j                  |j                        }t        |t        j                        r6t        |dd      }||| j                  v ry t        ||j                        }| j                  |       |S )NrZ   rL   )	r}   valuer.   rw   	Attributer-   r=   attrrp   )r@   rQ   lhslhs_namerI   s        rC   visit_Attributez"DependenciesFinder.visit_Attribute   s    jj$cmm,**SYY'C cmm,3
B/;(d&<&<<c499%c"
rD   c                    |j                   j                   D ch c]  }|j                   c}| _        | j                  |       y c c}w rF   )argsargry   generic_visit)r@   rQ   r   s      rC   visit_FunctionDefz$DependenciesFinder.visit_FunctionDef   s4    /3yy~~>CGG>4  ?s   Ac                p     fd}t        j                  |j                  |j                  |j                  r|j                  gng |j
                        D ]  } j                  |         ||j                         |j                   j                  |j                          ||j                         y )Nc                    	 j                   rJ d_         | D ]  }|j                  |        	 d_         y # d_         w xY w)NTF)r?   r}   )defaultsexprr@   s     rC   visit_defaultsz:DependenciesFinder.visit_arguments.<locals>.visit_defaults   sS    8::::26/$ )D'

4() 38/%/s   < < 	A)
	itertoolschainposonlyargsr   vararg
kwonlyargsr}   kw_defaultskwargr   )r@   rQ   r   r   s   `   rC   visit_argumentsz"DependenciesFinder.visit_arguments   s    	8 ??4#3#3TYYQUQ\Q\bdfjfufuv 	CJJsO	 	t''(::!JJtzz"t}}%rD   c                    | j                  |      }t        |t              r| xj                  t	        |      z  c_        y | j                  j                  |       y rF   )r}   r.   r)   ry   setadd)r@   rQ   targets      rC   visitAssnTargetz"DependenciesFinder.visitAssnTarget   sE     D!fd#F+  (rD   c                    t        |j                        dk7  rt        d      | j                  |j                  d          | j	                  |       y )Nr   z2Simultaneous multiple assignment is not supported.r   )r&   targets	TypeErrorr   r   r@   rQ   s     rC   visit_AssignzDependenciesFinder.visit_Assign   sG    t||!
 PQQT\\!_- 	4 rD   c                \    | j                  |j                         | j                  |       y rF   r   r   r   r   s     rC   visit_AnnAssignz"DependenciesFinder.visit_AnnAssign   $    T[[) 	4 rD   c                \    | j                  |j                         | j                  |       y rF   r   r   s     rC   	visit_ForzDependenciesFinder.visit_For   r   rD   )returnNoners   )rZ   rK   __qualname____doc__r2   propertyrI   rS   rd   rp   rz   r   r   r   r   r   r   r   r   __classcell__rB   s   @rC   r"   r"   "   s`    	)0V ' '05$ D06
	!
&@)!!!rD   r"   c                    dd l mc m} t        | t              r| j                         } | j                  d      r7| j                  d      } t        |       } | j                  d      sJ d| dd  z   S | j                  d      rdt        | d d       z   S | j                  d      rdt        | dd        z   S | j                  d      rt        | j                  d            S t        | |j                        rdt        | j                         S t        | |j                        r| j                  } n(t        | t              r| j                  } nt	        |       } t!        j"                  | j%                  d	d
      |       S )Nr   zconst const**kr   ztl._trL   )triton.language.corelanguagecorer.   r]   striprP   removeprefix_normalize_tyendswithpointer_type
element_tydtyper3   rj   rZ   r   rt   replace)tyr   s     rC   r   r     s?   ''"cXXZ==")Br"B==%%%"QR&= ;;sr#2w///==r!"v...== !788	B))	*=/011	B

	#WW	B	[[W%))"**T2*>CCrD   c                      e Zd ZdZ	 	 ddZed        Zedd       Zedd       Zed        Z	ed        Z
ed        Zed	        Zy
)KernelParamzBRepresents a parameter (name plus metadata) to a @jit'ed function.c                <    || _         || _        || _        || _        y rF   )num_paramdo_not_specializedo_not_specialize_on_alignment)r@   r   paramr   r   s        rC   r2   zKernelParam.__init__)  s     !2.L+rD   c                .    | j                   j                  S rF   )r   r3   rH   s    rC   r3   zKernelParam.name0  s    {{rD   c                    | j                   j                  r1| j                   j                  t        j                  j                  k(  ryt        | j                   j                        S )NrL   )r   
annotationrM   	Parameteremptyr   rH   s    rC   r   zKernelParam.annotation4  sD    {{%%)?)?7CTCTCZCZ)ZT[[3344rD   c                    | j                   }|j                  d      r|dd  }n|j                  d      r|dd  }|t        t        j                               v r| j                   S y)Nr   r   r   r   rL   )r   rP   r   r   values)r@   as     rC   annotation_typezKernelParam.annotation_type:  s]    OO<<!"A\\#!"A.55788??"rD   c                    d| j                   v S Nrg   )r   rH   s    rC   is_constexprzKernelParam.is_constexprE  s    doo--rD   c                r    | j                   ryd| j                  v xs | j                  j                  d      S )NFr   r   )r   r   rP   rH   s    rC   is_constzKernelParam.is_constI  s1    $//)MT__-G-G-MMrD   c                .    | j                   j                  S rF   )r   defaultrH   s    rC   r   zKernelParam.defaultO  s    {{"""rD   c                d    | j                   j                  t        j                  j                  k7  S rF   )r   r   rM   r   r   rH   s    rC   has_defaultzKernelParam.has_defaultS  s#    {{""g&7&7&=&===rD   N)r   r%   r   zinspect.Parameterr   boolr   r   r   r]   )rZ   rK   r   r   r2   r   r3   r   r   r   r   r   r   r    rD   rC   r   r   &  s    LM15M     5 5
   . . N N
 # # > >rD   r   c                8     ddl m ddlm d fd	S )Nr   rf   r   r   c                     yt         t              ryt         t              r7|r  d|      nd } dk(  r|ryd k  r	 dk  rd	|fS d
 k  r	 dk  rd|fS d|fS t         t              ryt	         d      rZ j
                  |f}t        j                  |d       }|!|d   rdndt        |d         z   }|t        |<   |r  d|      nd }||fS t         t              rd j                  fS t               rd fS t         t              rP D cg c]
  } |       }} fd}	 |	|D cg c]  }|d   	 c}      }
 |	|D cg c]  }|d   	 c}      }|
|fS t         t              rTt	         j                  d      sJ t         j                  j
                        }d| t         j                         dd fS t               rat	         j                  d      sJ t         j                  j
                        }d| t         j                         d j                   dd fS t#        dt%               z        c c}w c c}w c c}w )N)rg   N)u1Nr%   )alignr   )rg   r   i   ii32l            l    u64i64)fp32Ndata_ptrr   r   r   tensorrg   c                N    t        d      r t              |  S t        |       S )N_fields)hasattrrj   tuple)valsr   s    rC   <lambda>zAcreate_specialize_impl.<locals>.specialize_impl.<locals>.<lambda>  s&    '#y:Qid3i&6 W\]aWb rD   ztensordesc<>,zUnsupported type: %s)r.   r   r%   r*   r   r   	dtype2strrt   r   rW   r\   r   r   baser)   block_shapelayoutr   rj   )r   r   specialize_valuer   keydskresxspec
make_tupletysrX   innerGluonTensorDescriptorrg   specialize_extraspecialize_impls   `            rC   r   z/create_specialize_impl.<locals>.specialize_impla  s\   ;&T"S!?O"3U;UYCax,'SSI%5s|###"2s|#s|#U#!S*%99h'C--T*C{"1vt32DSV2LL!$	#BR"3>X\C:[)//Y'%%U#0341OA&4D4bJD1qad12CT2qt23D;-.388Z000&sxx~~6E!%coo)>(?qA4HH23388Z000&sxx~~6E!%coo)>(?qaPRVWW2T#Y>?? 512s   I3II)FTT)r   rg   'triton.experimental.gluon.nvidia.hopperr   )r   r   rg   r   s   `@@@rC   create_specialize_implr  \  s    $a-@ -@^ rD   c                    t        t              dk(  rt        j                  t        d              t        d   } || |      d   S )Nr   c                     y rF   r   )r`   kwargss     rC   r   zmangle_type.<locals>.<lambda>  s    rD   )r   )r&   specialize_impl_cacheappendr  )r   
specializer   s      rC   mangle_typer	    sA    
 !Q&$$%;<T%UV+A.O3<Q??rD   c                       e Zd ZU ded<   ddZy)KernelInterfacer    runc                      fdS )z
        A JIT function is launched with: fn[grid](*args, **kwargs).
        Hence JITFunction.__getitem__ returns a callable proxy that
        memorizes the grid.
        c                 .     j                   | dd|S )NFgridwarmup)r  )r   r  r  r@   s     rC   r   z-KernelInterface.__getitem__.<locals>.<lambda>  s    xtxx$T%'YRX'Y rD   r   )r@   r  s   ``rC   __getitem__zKernelInterface.__getitem__  s     ZYrD   N)r   r    )rZ   rK   r   __annotations__r  r   rD   rC   r  r    s    	
FZrD   r  c           
        |j                         D ci c],  \  }}||j                  j                  dk(  rt        |      n|. }}}dd l}| ||j                         D cg c]  }t        |       c}t        |j                               |j                         D cg c]  }t        |       c}t        |j                               |j                  d}	|j                  |	      }
|
S c c}}w c c}w c c}w )Nr   r   )r3   	signatureconstant_keysconstant_vals
attrs_keys
attrs_valsoptionsr   )
itemsrB   rZ   r]   jsonrX   r)   r   __dict__dumps)r3   r  	constantsattrsr  r   r   r  r   objserialized_objs              rC   serialize_specialization_datar#    s    enetetevwWaWZ\aEOO$<$<$Gc%jURwIw9QZQ_Q_Qa?bAQ?bY %**,0OQa0O_cdidpdpdr_s##CC
 ZZ_N x @c0Os   1C, C2C7c                   t        | j                        t        |      k(  sJ g }t        | j                  j                         |      D ]  \  }}|j                  r|j                  d| d       )|j                  rdnd}|j                  rdnd}|j                  rdnd}d| d| d| d| d	}	|j                  rt        |j                  t              r"|j                  dk(  s|j                  dd	 d
v rd}|r#|j                  d|j                   d|	 d       |j                  d|j                   d       |j                  |	        	 d }
ddj                  t        t        |
| j                  j                                     dgz          ddj                  | j                  j                         D cg c]
  }d| d|  c}       ddj                  |       d}| j                  j                         D ci c];  \  }}|j                   t"        j$                  j&                  urd| |j                   = }}}t(        |d<   t+        |j,                        |d<   t/        ||       |d   S c c}w c c}}w )a2  
    Equivalent to sig.bind followed by apply_defaults. This generates a
    native Python function (using exec) which can be memoized on a per-kernel
    basis to avoid having to run these expensive functions -- which constitute
    much of the kernel launch overhead -- every time we run the kernel.
    z("constexpr", )TrueFalsezspecialize_impl(, r   Nr   )fpbfFz("z",) + z[1:]z", None)c                x    | d   j                   t        j                  j                  u r| d   S | d    d| d    S )Nr   r   z	=default_)r   rM   r   r   )r   s    rC   r   z0create_function_from_signature.<locals>.<lambda>  sA    AaDLLG,=,=,C,CCAaD AaD6QZ[\]^[_Z`Ia rD   z
def dynamic_func(z	**optionsz):
    params = {'z': z}
    specialization = [r   z-]
    return params, specialization, options
default_rW   r   dynamic_func)r&   
parametersziprX   r   r  r   r   r   r   r.   r]   joinr)   mapr  r   rM   r   r   rW   r  get_arg_specializationexec)sigkparamsbackendspecializationr3   kpr   r  r   rI   r   	func_bodyr   func_namespaces                 rC   create_function_from_signaturer<    s    s~~#g,...N++-w7 0b??!!N4&":;!#v'H$&$8$8fJ!@@GfE$TF"XJbBugQOC!!b00#6))T1R5G5G5K|5[%*
"))Br/A/A.B&T*RS #))Br/A/A.B(*KL%%/'0, bC))DS#..*>*>*@!ABk]RST U		3>>;N;N;PQ4QtfCv.QRS Txx/0 1I >>//1D%== 1 1 7 77 4&5==(N  %0N=!(>w?]?](^N$% 	N# .))% R
s    I=A Jc                8    | j                    d| j                   S )N.)rK   r   fns    rC   get_full_namerA    s    mm_Aboo.//rD   c                  d    e Zd Zd Zd Zed        Zd Zed        Zd Z	d Z
d Z eee
	      Zy
)rW   c                   || _         t        j                  |      | _        	 t        j                  |      \  | _        | _        t        |      | _	        t        j                         | _        t        j                  dj                  | j                              }|t!        j"                  d|t         j$                        j'                         d  }|| _        d | _        i | _        |j.                  | _        |j0                  | _        |j2                  | _        |j4                  | _        |j6                  | _        y # t        $ r}t        d      |d }~ww xY w)Nz1@jit functions should be defined in a Python filerL   z^def\s+\w+\s*\()r@  rM   r  getsourcelinesraw_srcstarting_line_numberOSError
ValueErrorrA  _fn_name	threadingRLock
_hash_locktextwrapdedentr1  research	MULTILINEstart_srchashr>   r   rZ   r   __globals__rK   )r@   r@  erA   s       rC   r2   zJITCallable.__init__  s    **2.	Y6=6L6LR6P3DL$3 &b)#//+ oobggdll34")).R\\BHHJKL		 TV zzOO>>--7  	YPQWXX	Ys   "D= =	EEEc                n    | j                   t        j                  | j                        j                  z  S rF   )rU  rM   getclosurevarsr@  r9   rH   s    rC   get_capture_scopezJITCallable.get_capture_scope  s(    '"8"8"A"K"KKKrD   c                   | j                   5  | j                  | j                  cd d d        S d| j                   | _        t        j                  | j
                        j                  }t        | j                  | j                  || j                        }|j                  | j                                |j                  t        | j                        z   | _        t        t!        |j"                  j%                                     | _        ddlm} | xj                  t        | j"                  j%                         D cg c]  \  \  }}\  }}t+        ||      r||f c}}}      z  c_        t-        j.                  | j                  j1                  d            j3                         | _        d d d        | j                  S c c}}}w # 1 sw Y   | j                  S xY w)Nz
recursion:)r3   r8   r9   rA   r   rf   r$   )rL  rT  rI  rM   rX  r@  r9   r"   rU  rA   r}   parserI   r]   rF  dictsortedr>   r  r   rg   r.   r4   r5   r6   rG   )r@   r9   dependencies_finderrg   r3   r`   rn   s          rC   r\   zJITCallable.cache_key  s    __ 	Nyy$yy	N 	N
 %T]]O4DI..tww7AAI"4$--QUQaQamv9=#C%%djjl3+//#d6O6O2PPDI$(0C0T0T0Z0Z0\)]$^D!6II9=9N9N9T9T9V= ="5)4Xc1!+C!;  $Sk = > >I  tyy'7'7'@AKKMDI#	N$ yy	=	N$ yys$   GDG G
%AG
GG%c                   t        j                  | j                        }t        |t         j                        sJ t        |j                        dk(  sJ t        |j                  d   t         j                        sJ |S )Nr   r   )rw   r[  rS  r.   Moduler&   bodyFunctionDef)r@   trees     rC   r[  zJITCallable.parse2  s_    yy#$

+++499~"""$))A,888rD   c                    ddl m}  ||       S )Nr   )constexpr_type)r   re  )r@   re  s     rC   rj   zJITCallable.type9  s    7d##rD   c                     d| _         || _        y)a"  
        The only method allowed to modify src.
        Bypasses the __setattr__ restriction by calling super().__setattr__ directly.

        Note that it is the callers responsibility to make sure any triton functions that call this function have the `.hash` value reset to None.
        N)rT  rS  )r@   new_srcs     rC   _unsafe_update_srczJITCallable._unsafe_update_src>  s     		rD   c                    t        d      )NzqCannot set attribute 'src' directly. Use '_unsafe_update_src()' and manually clear `.hash` of all callersinstead.)AttributeErrorrH   s    rC   _set_srczJITCallable._set_srcH  s     ( ) 	)rD   c                    | j                   S rF   )rS  rH   s    rC   _get_srczJITCallable._get_srcM  s    yyrD   )fgetfsetN)rZ   rK   r   r2   rY  r   r\   r[  rj   rh  rk  rm  rA   r   rD   rC   rW   rW     sX     (DL  2 $ $)
 x
0CrD   rW   c                  ,    e Zd ZU ded<   ded<   ded<   y)JitFunctionInfor   rR   r]   r3   JITFunctionjit_functionN)rZ   rK   r   r  r   rD   rC   rq  rq  S  s    
IrD   rq  c                    t        |      t        |      f}| j                  |d       }||S t        |      t        |      z   }|| |<   |S rF   )r   r]   rt   )kernel_key_cacher8  r  r   r\   s        rC   compute_cache_keyrv  Z  sW     #g,
/C $$S$/IN#c'l2I%SrD   c                  r     e Zd Zd Z	 	 ddZd Zd Zd Zd Zd Z		 	 d fd	Z
d	 Zd
 Zd Zd Zd Z xZS )rr  c                     y)NFr   rH   s    rC   is_gluonzJITFunction.is_gluong  s    rD   c	                   |sy | j                   j                  }	| j                   j                  }
dj                  t	        | j
                  |d         D cg c]  \  }}|j                   d|  c}}      }|	 d|j                   d|j                   d|j                   d|j                   d|j                   d	| d
}t        | j                         }t        ||||d   ||      }||||j                  |j                  |j                  |j                  |j                  |j                  |||d} |||t        |
|	|       d|i||d      S c c}}w )Nr(  r   z: z[num_warps=z, num_ctas=z, num_stages=z, enable_fp_fusion=z, launch_cooperative_grid=](r%  r   )r  devicer  	num_warpsnum_ctas
num_stagesenable_fp_fusionlaunch_cooperative_gridextern_libsconfigsspecialization_data	is_warmupr   F)r   reprr@  compileis_manual_warmupalready_compiled)r@  r   rK   r1  r0  paramsr3   r}  r~  r  r  r  rA  r#  r  rq  )r@   hookr   r  r|  r  r  r  r  r3   rR   r   r   	arg_reprsr  	full_namer  r  s                     rC   
_call_hookzJITFunction._call_hookj  s    ww####IIc$++WZ[\W]F^_%**Rt4_`	{7#4#4"5[AQAQ@RR_`g`r`r_s  tG  HO  H`  H`  Ga  a{  |C  |[  |[  {\  \^  _h  ^i  ij  k!$''*	;IyR[]def]gipruv #" **((!,, ' 8 8'.'F'F"..#6"
 vtT2C*6*&"
 	
+ `s   E
c                T    t        |      sJ | j                  j                  |       y)z
        Add a hook that will be executed prior to the execution of run
        function with args and kwargs passed into the kernel
        N)rk   pre_run_hooksr  )r@   r  s     rC   add_pre_run_hookzJITFunction.add_pre_run_hook  s$    
 ~~!!$'rD   c                    ddl m}m}m}m} t
        j                  j                         } ||      }|| _        || _        || _        t        | j                  | j                  |      }i i |||fS )z1
        Precompute as much as possible.
        r   )CompiledKernelr  	ASTSourcemake_backend)compilerr  r  r  r  r   activeget_current_targetr<  r  r  )r@   r  r  r  r  r   r7  binders           rC   create_binderzJITFunction.create_binder  sd     	PO113v&,"/WU2vw..rD   c           
        |j                  |      }| j                  D cg c]  }|j                   }}|D cg c]  }|d   	 }}t        ||      D 	
ci c]  \  }	}
|	|

 }}	}
d|vsJ d       d|vsJ d       d|vsJ d       |D ]#  }	|	|j                  vs|	|vst        d|	z         t        |d	       }|D ci c]&  }|t        t        |j                               |      ( }}|D cg c]  }|d
   	 }}t        |d       }|D 	ci c]  }	|	|j                  t        ||	              }}	||||fS c c}w c c}w c c}
}	w c c}w c c}w c c}	w )Nr   device_typez=device_type option is deprecated; current target will be usedr|  z8device option is deprecated; current device will be usedstreamz8stream option is deprecated; current stream will be usedz2Keyword argument %s was specified but unrecognisedc                    |dk(  S r   r   )r`   rn   s     rC   r   z(JITFunction._pack_args.<locals>.<lambda>  s    3+;M rD   r   c                "    t        |t              S rF   )r.   r]   )r`   r   s     rC   r   z(JITFunction._pack_args.<locals>.<lambda>  s    Z35G rD   )parse_optionsr  r3   r0  r  KeyErrorr   r   r)   r   
parse_attr)r@   r7  r  
bound_argsr8  r  r   sigkeyssigvalsr^   vr  
constexprspathattrvalsr   s                   rC   
_pack_argszJITFunction._pack_args  s   ''/#';;/a166//!/0A1Q400(+GW(=>fq!QT>	>F*k,kk*v%a'aa%v%a'aa% 	YA(((Qg-=SVWWXX	Y #7,MN
[efSWd-d:3D3D3F.GNNf
f"01QAaD11h(GHPUV1G&&'81'EFFVV	:u44% 00> g1Vs"   D;E E +E1E#Ec                  |j                  d| j                        xs t        j                  j                  |d<   t        j
                  j                         }t        j
                  j                  |      }| j                  D ]
  } ||i |  | j                  |   \  }}	}
}} ||i |\  }}}t        |	||      }|j                  |d       }|4| j                  |||||      \  }}}}| j                  |||||||      }|y t               }| j                  j                         D ]6  \  \  }}\  }}|j                  ||      x}|k7  s$t!        d| d| d|        |s|J t#        |      r ||      }t%        |      }|d   }|dkD  r|d   nd}|dkD  r|d   nd}t'        |d      r|j)                         } |j*                  ||g|j-                          } |j.                  |||||j0                  |j2                  |t        j                  j4                  t        j                  j6                  g	|j-                           |S )	NdebugrU   z1 has changed since we compiled this kernel, from z to r   r   r   result)rt   r  r   runtimer   r  get_current_deviceget_current_streamr  device_cachesrv  r  _do_compileobjectr>   r  rY   rk   r&   r   r  launch_metadatar   r  functionpacked_metadatalaunch_enter_hooklaunch_exit_hook) r@   r  r  r   r  r|  r  r  kernel_cacheru  r   r7  r  r  r8  r  r   kernelr  r  r   not_presentr3   r`   rn   globals_dictnewVal	grid_sizegrid_0grid_1grid_2r  s                                    rC   r  zJITFunction.run  s    **Wdjj9PU]]=P=Pw 11311&9 && 	"D$!&!	" CGBTBTU[B\?& /5d.Ef.E+
NG 0.'J!!#t, >48OOGVU_aoDK5M1GY
E %%c9fj'SXZ`aF~ h.2.C.C.I.I.K 	q*IT1*\&**4==#E"&tf,]^a]bbfgmfnoq q	q
 ###~J'D	I!WF )AT!W1F )AT!W1Fvx(4f44T6XJDUDUDWXOFJJvvvvvH^H^`o}}668V8VnYcYjYjYlnrD   c                T    | j                   | j                  S | j                  |      S rF   )_reprrI  )r@   r`   s     rC   r  zJITFunction.repr  s"     $

 2t}}E

1ErD   c	           	        |r|ng }|r|ng }t         |   |       |j                  | _        || _        || _        || _        || _        || _        g | _	        t        | j                  j                  j                               D ]T  \  }	}
|	|v xs |
j                  |v }|	|v xs |
j                  |v }| j                  j                  t!        |	|
||             V t#        | j$                        | _        d | _        || _        || _        | j                  D cg c]  }|j                   c}| _        | j                  D cg c]  }|j0                  s|j2                   c}| _        g | _        y c c}w c c}w rF   )r1   r2   rK   rR   versionr   r   r  r  r  	enumerater  r/  r   r3   r  r   r   r  r  r  r  rV   	arg_namesr   r   r  r  )r@   r@  r  r   r   r  rV   r  r  ir   dnsdns_oaprB   s                 rC   r2   zJITFunction.__init__  sW   1B-Ki)Goq&mm!2.L+
.!$..";";"B"B"DE 	CHAu((KEJJ:K,KC88hEJJJh<hFKK{1eS&AB	C )););< 
  +/++6Q!&&6*.++HQ155H  	 7Hs   E2E7E7c               \     | j                   t        t        j                  |      |dd|S )NTr  )r  r2  
MockTensor
wrap_dtype)r@   r  r   r  s       rC   r  zJITFunction.warmup   s*    txxZ5J5JD1QT$\U[\\rD   c           	     L   dd l }dd lm} t        j                  j                         }|j                  |      }|d   | j                  k7  rt        d|d    d| j                         t        t        |d         }|d   }t        ||      D 	ci c]4  \  }}	||j                  j                  |	      r|j                  |	      n|	6 }
}}	t        t        |d         }|d   }t        t        ||            }t        |d	   j                               }|d
   j                         D 	ci c]#  \  }}	|t!        |	t"              rt        |	      n|	% }}}	|d   }| j$                  |   \  }}}}}|j'                  |      }| j)                  ||||
||d      S c c}	}w c c}	}w )Nr   r3   zSpecialization data is for z but trying to preload for r  r  r  r  r  r  r   T)r  )r  triton.languager   r   r  r  loadsrI  rY   r2  r   r0  r   is_dtyper\  r  r.   r)   r  r  r  )r@   r  r  tlr|  deserialized_objr  r  r   r   r  r  r  r   r  r  r`   r7  s                     rC   preloadzJITFunction.preload#  s   $113::&9:F#t}}4-.>v.F-GGbcgcpcpbqrt tE#3O#DE(9 "-?
U BHH$5$5e$<%%G

 
  0 >?
%l3
SZ01)+6<<>?	 /y9??A
U E4!8ueC
 
 u%"0081a!''0   
 	


s   9F,(F c           
     B     j                      \  }}	} j                  t        j                  j                  g      ry  j                         t        j                  j                         }
|
Ct               t        |	      } fd} f	d}|
j                  |||      }|S  j                  j                        }|<    j                  t        j                  j                  g       |S )Nc                 B    j                  j                         S )N)r   r  	_env_vars)r  r  )env_varsr  r@   rA   r   s   rC   async_compilez.JITFunction._do_compile.<locals>.async_compileS  s!    ||C@P@P\d|eerD   c           
     r   	 | <   j                  t        j                  j                  g	       y rF   )r  r   r  jit_post_compile_hook)
r  r   r  r|  r  r   r  r@   r  r  s
    rC   finalize_compilez1JITFunction._do_compile.<locals>.finalize_compileV  s9    $*S! C CS)U[]gip!&1rD   )r   r  )r  r  r   r  jit_cache_hookr  r   active_modert   r   r   submitr  r  r  )r@   r   r  r|  r  r  r   r  r`   r7  
async_moder\   r  r  r  r  r  rA   r   s   ````````       @@@@rC   r  zJITFunction._do_compileF  s    .2.@.@.H+a!??5==77iQ[]dglfmouvnnT9j%@#//335
!68H%c7GXFIf f1 1
  &&y-AQRF 	 \\#fg>N>N\OF &LOOEMM??iQWYcelotnu"$rD   c                    t        d      )Nz:Cannot call @triton.jit'd outside of the scope of a kernel)rY   r@   r   r  s      rC   __call__zJITFunction.__call__c  s    WXXrD   c                P    d| j                    d| j                  j                   dS )NzJITFunction(:r%  )rR   r@  r   rH   s    rC   __repr__zJITFunction.__repr__f  s&    dkk]!DGG,@,@+ACCrD   )r   zbool | None)NNNNNNN)rZ   rK   r   ry  r  r  r  r  r  r  r2   r  r  r  r  r  r   r   s   @rC   rr  rr  e  s`    ,
 
,
\(/502hF mq;?" H]!
F:YDrD   rr  c                     y rF   r   r?  s    rC   jitr  o  s    rD   r  r  r  r   r   r  rV   c                     y rF   r   r  s          rC   r  r  t  s     rD   c               @    dfd}|  ||       S |S )a<  
    Decorator for JIT-compiling a function using the Triton compiler.

    :note: When a jit'd function is called, arguments are
        implicitly converted to pointers if they have a :code:`.data_ptr()` method
        and a `.dtype` attribute.

    :note: This function will be compiled and run on the GPU. It will only have access to:

           * python primitives,
           * builtins within the triton package,
           * arguments to this function,
           * other jit'd functions

    :param fn: the function to be jit-compiled
    :type fn: Callable
    c           
         t        |       sJ t        j                  j                  rddlm}  ||       S t        |       S )Nr   )InterpretedFunction)r  r   r   r  rV   r  r  )rk   r   r  	interpretinterpreterr  rr  )	r@  r  r  r   r   r  rV   r  r  s	     rC   	decoratorzjit.<locals>.decorator  sj    ||==""8&r7N_Fdlq08tUdf f "3/M! /	 	rD   r@  r    r   zJITFunction[T]r   )	r@  r  r  r  r   r   r  rV   r  s	    ``````` rC   r  r    s&    : & 
~} rD   c                  N    e Zd ZdZed        ZddZd Zed        Zed        Z	y)	r  zr
    Can be used in place of real tensors when calling:
        kernel.warmup(MockTensor(torch.float32), ...)
    c                l    | j                   j                  dk(  r| j                  dk(  rt        |       S | S )Nr   torch)rB   rZ   rK   r  )r   s    rC   r  zMockTensor.wrap_dtype  s.    ==!!W,71Jc?"
rD   Nc                *    |dg}|| _         || _        y )Nr   )r   shape)r@   r   r  s      rC   r2   zMockTensor.__init__  s    =CE

rD   c                    dg}| j                   dd  D ]  }|j                  |d   |z          t        t        |            S )Nr   r   )r  r  r   reversed)r@   stridessizes      rC   stridezMockTensor.stride  sG    #JJqrN 	/DNN72;-.	/Xg&''rD   c                      yNr   r   r   rD   rC   r   zMockTensor.data_ptr      rD   c                      yr  r   r   rD   rC   	ptr_rangezMockTensor.ptr_range  r  rD   rF   )
rZ   rK   r   r   staticmethodr  r2   r  r   r  r   rD   rC   r  r    sM    
  
(    rD   r  c                  J    e Zd Zd Zd Zd ZddZd Zd Zd Z	d Z
d	 Zd
 Zy)TensorWrapperc                    || _         || _        |j                  | _        |j                  | _        | j                  j                  | _        y rF   )r   r   datar|  r  )r@   r   r   s      rC   r2   zTensorWrapper.__init__  s5    
	II	kkYY__
rD   c                6    | j                   j                         S rF   )r   r   rH   s    rC   r   zTensorWrapper.data_ptr  s    yy!!##rD   c                4     | j                   j                  | S rF   )r   r  )r@   r   s     rC   r  zTensorWrapper.stride  s    tyy&&rD   c                <    d| j                    d| j                   dS )NzTensorWrapper[r{  r%  )r   r   rH   s    rC   __str__zTensorWrapper.__str__  s    

|2dii[::rD   c                6    | j                   j                         S rF   )r   element_sizerH   s    rC   r  zTensorWrapper.element_size  s    yy%%''rD   c                ^    t        | j                  j                         | j                        S rF   )r  r   cpur   rH   s    rC   r  zTensorWrapper.cpu  s    TYY]]_djj99rD   c                N    | j                   j                  |j                          y rF   )r   copy_)r@   others     rC   r  zTensorWrapper.copy_  s    		

#rD   c                ^    t        | j                  j                         | j                        S rF   )r  r   cloner   rH   s    rC   r  zTensorWrapper.clone  s    TYY__.

;;rD   c                `    t        | j                  j                  |      | j                        S rF   )r  r   tor   )r@   r|  s     rC   r  zTensorWrapper.to  s     TYY\\&14::>>rD   c                `    t        | j                  j                  |      | j                        S rF   )r  r   	new_emptyr   )r@   sizess     rC   r  zTensorWrapper.new_empty   s"    TYY007DDrD   Nr   )rZ   rK   r   r2   r   r  r
  r  r  r  r  r  r  r   rD   rC   r  r    s5    %$';(:$<?ErD   r  c                    t        | t              r;|| j                  j                  k(  r| j                  S t        | j                  |      S t	        | d      rt        | |      S t        dt        |        d      )Nr   zCannot reinterpret a r>  )r.   r  r   r   r   r   rj   )r   r   s     rC   reinterpretr    sk    &-(FKK%%%;; !e44		$VU++/V~Q?@@rD   c                L   | }t        |t              s|j                  }t        |t              s|j                  j                  j                  }|j
                  }t        |j                        D ].  \  }}|j                         j                  d      s&||z  } ||fS  ||fS )Nzdef )
r.   rW   r@  __code__co_filenamerF  r  rE  r   rP   )r@  base_fn	file_name
begin_lineidxlines         rC   get_jit_fn_file_liner#    s    G+.** +.

##//I--J w/ 	T::<""6*#Jj  	 j  rD   c                      e Zd Zd Zd Zy)BoundConstexprFunctionc                     || _         || _        y rF   )__self____func__)r@   instancer@  s      rC   r2   zBoundConstexprFunction.__init__'  s     rD   c                B     | j                   | j                  g|i |S rF   )r(  r'  r  s      rC   r  zBoundConstexprFunction.__call__+  s!    t}}T]]<T<V<<rD   N)rZ   rK   r   r2   r  r   rD   rC   r%  r%  %  s    =rD   r%  c                  0     e Zd Z fdZd ZdddZ xZS )ConstexprFunctionc                $    t         |   |       y rF   )r1   r2   )r@   r@  rB   s     rC   r2   zConstexprFunction.__init__1  s    rD   c                "    |t        ||       S | S rF   )r%  )r@   r!  objclasss      rC   __get__zConstexprFunction.__get__4  s    ?)#t44rD   N)	_semanticc                  ddl m}m} |D cg c]
  } ||       }}|j                         D ci c]  \  }}| ||       }}} | j                  |i |}	||	S t
        j                  j                  r|	S  ||	      S c c}w c c}}w )Nr   )_unwrap_if_constexprrg   )r   r3  rg   r  r@  r   r  r  )
r@   r1  r   r  r3  rg   r   r^   r  r   s
             rC   r  zConstexprFunction.__call__:  s    H156A$Q'66;A<<>J!Q!)!,,JJ dggt&v&J ==""J~ 7Js
   BB)rZ   rK   r   r2   r0  r  r   r   s   @rC   r,  r,  /  s     )-  rD   r,  c                    t        |       S )z
    Wraps an arbitrary Python function so that it can be called at
    compile-time on constexpr arguments in a Triton function and
    returns a constexpr result.
    )r,  r?  s    rC   constexpr_functionr5  M  s     R  rD   r   )Fr  )r  Optional[Callable]r  r6  r   Optional[Iterable[int | str]]r   r7  r  Optional[bool]rV   r8  r   zCallable[[T], JITFunction[T]]rF   )r@  zOptional[T]r  r6  r  r6  r   r7  r   r7  r  r8  rV   r8  r   z4Union[JITFunction[T], Callable[[T], JITFunction[T]]])H
__future__r   r   rw   r/   r4   rM   r   rJ  rO  rM  collectionsr   dataclassesr   	functoolsr   typingr   r	   r
   r   r   r   r   r   r   r   triton.tools.tensor_descriptorr   typesr   rL   r   r   r   _utilsr   r   r   r   cacher   triton._C.libtritonr   r<   r;   r    NodeVisitorr"   r   r   r   r  r  r	  r  r#  r<  rA  rW   rq  rv  rr  r  r  r  r  r#  r%  r,  r5  r   rD   rC   <module>rD     s>   , 
      	  # ! % d d d ;     e e   ?!3CLb! b!TD4/> />d 	 4n@	Zgaj 	Z	7*t0_1 _1D   BD+q1 BDT 
 
 
 #*.7;DH #
 
 (	

 5
 %B
 
 
 #
 

 4 #*.7;DH #44 	4
 (4 54 %B4 4 4 :4x B"E "EJA!$=[ = <!rD   