
    ci"                         d dl Z d dlZd dlZd dlZd dlmZ d dlmZ d dl	m
Z
 d dlmZ d dlZd dlZe G d d             ZdZd	 Zd
efdZedk(  r e        yy)    N)ArgumentParser)	dataclass)Path)Listc                       e Zd ZU dZdZeed<   dZeed<   dZeed<   dZ	eed<   dZ
edz  ed<   d	Zeed
<   dZeed<   dZedz  ed<   dZedz  ed<   y)CompileArgsz@
    A class to contain arguments from command-line parser.
     pathkernel_name	signaturegridNtarget   	num_warps   
num_stagesout_nameout_path)__name__
__module____qualname____doc__r
   str__annotations__r   r   r   r   r   intr   r   r   r        N/var/www/html/engine/venv/lib/python3.12/site-packages/triton/tools/compile.pyr   r      sm     D#NKIsD#NFC$JIsJHcDj HdTk r   r   a  
Triton ahead-of-time compiler:

This program compiles the kernel with name `kernel-name` in the file at the
provided `path` into self-contained C source-code that embeds the `cubin`
data along with utilities to load, unload and launch the kernel.

signature is provided as a list of (optionally divisibility-hinted) types
or constexpr values, e.g.

`compile.py --kernel-name kernel --signature "*fp32:16, i32:16, 1024, i32" --out-name kernel /path/to/kernel.py`

will compile triton.JITFunction of name `kernel` inside the file `/path/to/kernel.py`.
Said kernel will be specialized such that argument 0, 1 are assumed to be multiple of 16,
and argument 2 is assumed to be a compile-time constant of value 1024, i.e. it won't be part of the generated prototype.

The resulting entry point will have signature

CUresult kernel_{specialization_suffix}(CUstream stream, unsigned gX, unsigned gY, unsigned gZ, float* arg0, int32_t arg1, int32_t arg2)

Different such specialized entry points can be combined using the `linker.py` script.

NOTE: when resolving the scope of /path/to/kernel.py, the file will be executed from within its parent directory with the python interpreter
used to run this `compile.py` script
c                  J   t        t              } | j                  dd       | j                  ddt        ddd	
       | j                  ddt        d d       | j                  ddt        dd       | j                  ddt        dd       | j                  ddt        d d       | j                  ddt
        d d       | j                  ddt        dd	        | j                  d!d"t        d#d	        | j                         }t        d$i t        |      }t        |       y )%N)descriptionr
   zTPath to Python source containing desired kernel in its scope. File will be executed.)helpz--kernel-namez-nr	   zName of the kernel to compileT)typedefaultr!   requiredz--targetz-tzThe target to compile towards, in format of '<backend>:<arch>:<warp-size>'; e.g., 'cuda:80:32', 'hip:gfx942:64'. Default to None, which means using current machine's GPU target)r"   r#   r!   z--num-warpsz-wr   z$Number of warps to launch the kernelz--num-stagesz-nsr   z/Number of stages (meta-parameter of the kernel)z
--out-namez-onz Out name for the compiled kernelz
--out-pathz-ozOut filenamez--signaturez-szSignature of the kernel)r"   r!   r$   z--gridz-gzLaunch grid of the kernelr   )
r   descadd_argumentr   r   r   
parse_argsr   varscompile_kernel)parsercli_argsargss      r   mainr-   9   s4   -F
s  u
CJi!%  '
DsDo  p t#qGmn
CN  P
e#tJlm
dt.Y
t#<U`de
$S7R]ab  "H(h(D4r   r,   c                    | j                   r| j                   n| j                  }| j                  r| j                  n
t        |      }t        | j                        }t
        j                  j                  dt        |j                               t        j                  j                  |j                  |      }t        j                  j                  |      }|j                  j                  |       t!        || j                        }| j"                  j%                  d      }t'        |      dk(  sJ t)        t+        d | j,                  j%                  d                  }dt.        t           fd}	d| j0                   d| j2                   }
 |	||
gz         }d	 }t5        |      D ci c]&  \  }}d
|v s|f ||j%                  d
      d         ( }}}|j7                         D ci c]  \  }}|	|| }}}t5        |      D ci c]  \  }}|j8                  |    ||       }}}|j7                         D ci c]  \  }}|	|| }}}|j7                         D ]   \  }}|dk(  s|||j8                  |d      <   " t5        |      D ci c]'  \  }}|j8                  |   |j%                  d
      d   ) }}}|D ]  }d||<   	 dj;                  |j=                         D cg c]  }t        |       c}      }|j7                         D cg c]  \  }}| d|  }}}|d| j0                   d| j2                   gz  }|j=                         D ]  }|dv rJ d|         |j7                         D ci c]  \  }}|dk(  s|ddgg }}}t>        j@                  jC                  ||||      }| jD                  r?t?        jF                  j@                  jH                  | jD                  j%                  d
       n1t>        jJ                  jL                  jN                  jQ                         }t>        j@                  jS                  |      }| j0                  | j2                  d}|jU                  |      }t?        jV                  |||jX                        }t!        |jZ                  dd      dkD  rt]        d      |jZ                  j^                  dkD  rt]        d      g }g } g }!g }"t5        |j8                        D ]  \  }}#|#|vrK|ja                  |#       | ja                  ||#          |!ja                  |#       |"ja                  ||#          U|jc                  |fd       dk(  sl|ja                  |#       | ja                  d        d}$t5        |j=                               D ]J  \  }}%|$t        |      z  }$|jc                  |fd       dk(  r|$dz  }$|jc                  |fd       dk(  sF|$dz  }$L dj;                  |||$g      }&|jd                  |jf                     }'t        ti        jj                  |'            d d! }(t>        jJ                  jL                  jN                  jl                  })i d"|&d#| j                  d$t'        |'      d%d&j;                  to        |(d d d    |(dd d          D *+cg c]  \  }*}+d'|* |+  c}+}*      dd&j;                  to        |!|"      D ,%cg c]  \  },}% |)|%       d(|,  c}%},      d)d&j;                  to        ||       D ,%cg c]  \  },}% |)|%       d(|,  c}%},      d*d&j;                  |!D -cg c]  }-d+|- 	 c}-d,gz   d-gz         d.t'        |!      d z   d/|d0|jZ                  jp                  d1| j0                  d2dj;                  ||
g      d3|d   d4|d   d5|d    d6d}.g }/|jr                  }0t        tt              j                  d7z  |0z  }1|1jw                  d8      D ]  }2|2jx                  }3|j{                  d9| d|$ |3       }4|4j}                  d:      5 }5|5j                   |2j                         j                  d;i |.       d d d        |/ja                  |4        |&|/fS c c}}w c c}}w c c}}w c c}}w c c}}w c c}w c c}}w c c}}w c c}+}*w c c}%},w c c}%},w c c}-w # 1 sw Y   fxY w)<Nr   ,r   c                 $    | j                  d      S )N )strip)ss    r   <lambda>z compile_kernel.<locals>.<lambda>_   s    1773< r   r   c                     t        j                         }|j                  dj                  |       j	                                |j                         d d S )Nr1      )hashlibsha256updatejoinencode	hexdigest)r   ms     r   hash_signaturez&compile_kernel.<locals>.hash_signaturea   s?    NN	)$++-.{{}Ra  r   warpsxstagesc                 v    	 t        |       }|S # t        $ r Y nw xY w	 t        |       }|S # t        $ r Y y w xY w)N)r   
ValueErrorfloat)r3   rets     r   	constexprz!compile_kernel.<locals>.constexpri   sO    	a&CJ 			(CJ 		s    	, 	88:r   rE   x=z
num_warps=znum_stages=)r      z#Only 1 and 16 are valid hints, got rI   ztt.divisibility)fn
constexprsr   attrs)r   r   )r   optionsglobal_scratch_sizezMAOT compiling kernels with global scratch requirements is not yet implementedzNAOT compiling kernels with profile scratch requirements is not yet implementedi32r	   cd_   r   triton_kernel_namebin_sizebin_dataz, 0xr1   full_signaturearg_pointers&z&global_scratchz&profile_scratchnum_argskernel_docstringsharedr   	algo_infogridXgridYgridZ_placeholderextraz	compile.*.wr   )Br   r   r   r   r
   sysinsertr   parent	importlibutilspec_from_file_locationstemmodule_from_specloaderexec_modulegetattrr   splitlenlistmapr   r   r   r   	enumerateitems	arg_namesr:   valuestritoncompiler	ASTSourcer   backends	GPUTargetruntimedriveractiveget_current_targetmake_backendparse_optionscompile__dict__metadataRuntimeErrorprofile_scratch_sizeappendgetasm
binary_extbinasciihexlifymap_python_to_cpp_typezipr^   backend__file__globsuffixwith_suffixopenwrite	read_textformat)6r,   r   r   arg_pathspecmodkernelr   r   r>   meta_sigsig_hashrE   ir3   hintskv	constantskeyvalue	const_sig
doc_stringhrL   srcr   r   kwargsrM   ccinforx   	arg_typesarg_names_not_1arg_types_not_1arg_namer   ty	func_namer   hex_	ty_to_cpprG   ynameargparamsoutput_filesbackend_nametemplate_dirtemplate_pathextoutput_filefps6                                                         r   r)   r)   P   s    $t}}43C3CH $t}}4>H DIIHHHOOAs8??+,>>11(--JD
..
)
)$
/CKKC S$**+F99??3Dt9>> S/1E1Ec1JKLI!$s) !
 t~~&gdoo->?Hi8*45H <EY;O\41aSVZ[S[aUIaggcl1o..\E\#kkm=daq}QT=E=?H?STtq!!!!$il2TIT"+//"3E$!Qq}AEIEkkm 8
UA:27If&&s1v./8 CLIBVW$!Q!!!$aggcl1o5WIW %$	#%)*:*:*<=Q#a&=>I)2):;AQCq*;J;Z/0K?P2QRRJ\\^ GG|FB1#FF|G6;kkmOdaqBwQ#R())OEO
//
#
#v)y`e
#
fC ;; __%%//1B1B31GH"NN1188KKM oo**62G>>IF##F+G^^C8H8HIFv 5q9A=jkk++a/kllIIOO !1!12 $89$X&Yx01""8,""9X#67YYud#q(X&U#$ F9++-. 2#a&99aUD!Q&cMF99aUD!R'cMF (Hf56I
**W''
(Cx$%a+D%%,,CCIyd.. 	CH 	DIIs4!9d14a4j7QRtq!A3qc{RS	
 	TYY#o_nJophdB9R=/4& 9pq 	$))sS\^gOh$i84	"av%>$ij 			"HQse9"HL]K^"^btau"uv 	C(1, 	J 	&//(( 	T^^ 	SXXy(34 	a 	a 	a  	!F$ L>>L>((72\AL%**;7 )""**Qxj&#+GHc" 	AbHH5],,.55??@	AK() l""q ]=TE X >;
 P` Sp$i"H"	A 	Asl   .b);b)/
b/:b/ b5
b;b;,c2cc	ccc?c9c$(c*0c//c8	__main__)r   r7   importlib.utilrj   rg   argparser   dataclassesr   pathlibr   typingr   rz   triton.backendsr   r%   r-   r)   r   r   r   r   <module>r      sk       
 # !     ! ! !6.~# ~#B zF r   