
    ci{R                       d dl mZ d dlZd dlZddlmZmZ ddlmZ ddlm	Z	 ddlm
Z
mZ ddlmZmZ dd	lmZ dd
lmZmZmZmZ ddlmZ ddlmZ d dlmZ d dlZd dlZd dlZd dlZd dl Z dZ!de!iZ"dZ#de#iZ$d Z% G d d      Z& G d d      Z' ejP                         d        Z)d Z*d$dZ+ G d d      Z,d%dZ-d&dZ. G d d      Z/ G d d e0      Z1d! Z2 G d" d#      Z3y)'    )annotationsN   )get_cache_invalidating_env_varsir)backends)Language)BaseBackend	GPUTarget)__version__knobs)OutOfResources)get_cache_managerget_dump_managerget_override_managerget_cache_key)driver)get_sass)Pathz=\.(?:visible|extern)\s+\.(?:entry|func)\s+(\w+)\s*\(([^)]*)\)ptxz\.param\s+\.(\w+)c                    t        j                  d|       }t        j                  d|       }|yt        j                  dd|       } |dt        |j	                  d            z   S | S )Nz!tt\.ptr<([^,]+)ztt.nv_tma_desc = 1	nvTmaDescz {[^}]+} *   )researchsubconvert_type_reprgroup)xmatchtmas      R/var/www/html/engine/venv/lib/python3.12/site-packages/triton/compiler/compiler.pyr   r   '   sd     II)1-E
)))1
-C

{B"A&u{{1~666H    c                  *    e Zd ZdddZd ZddZd Zy)		ASTSourceNc                   || _         t        j                  | _        d| _        |j
                  | _        || _        t               | _	        |g|j                         D ]T  \  }}t        |t              r|j                  j                  |      fn|}t        |t              sJ || j                  |<   V |xs
 t               | _        | j                  j#                         D ]  }t        |t              rt%        d       y )NttirzSignature keys must be string)fnr   TRITONlanguageext__name__name	signaturedict	constantsitems
isinstancestr	arg_namesindextupleattrskeys	TypeError)selfr)   r/   
constexprsr8   kvs          r#   __init__zASTSource.__init__6   s     KK	"!"((* &11;As1CR\\''*-!!U+++$%q!& _df
$$& 	AAa% ?@@	Ar$   c           	        t        | j                  j                               D cg c]  \  }}|	 }}}d }dj                  t        | j                  j                               D cg c]  \  }} ||       c}}      }| j
                  j                   dt        | j                         d| d| }t        j                  |j                  d            j                         S c c}}w c c}}w )Nc                H    t        | d      r| j                  S t        |       S )N	cache_key)hasattrrB   r4   )r    s    r#   <lambda>z ASTSource.hash.<locals>.<lambda>I   s    71k+BAKK A r$   -utf-8)sortedr/   r2   joinr1   r)   rB   r4   r8   hashlibsha256encode	hexdigest)r;   r=   r>   
sorted_sigget_keyconstants_keykeys          r#   hashzASTSource.hashG   s    $*4>>+?+?+A$BCDAqaC
CN@T@T@V9W!XA'!*!XY""#1S_$5Qzl!M?S~~cjj12<<>>	 D!Xs   C**C0
c                >    ddl m}  || j                  | ||||      S )Nr   )ast_to_ttir)contextoptionscodegen_fns
module_map)code_generatorrS   r)   )r;   targetrU   rV   rW   rT   rS   s          r#   make_irzASTSource.make_irN   s$    /477D'7Xc&02 	2r$   c                    t               S N)r0   r;   s    r#   parse_optionszASTSource.parse_optionsS   s	    vr$   )NNreturnNonerY   r
   r-   
__module____qualname__r?   rQ   rZ   r^    r$   r#   r&   r&   4   s    A"?2
r$   r&   c                  &    e Zd Zd Zd ZddZd Zy)IRSourcec                   || _         t        |      }|j                  dd  | _        t        j
                  | _        |j                         | _        t        j                  |       |j                  |       | j                  dk(  rt        j                  t        | j                     | j                  t        j                        }|j                  d      | _        |j                  d      }t        j"                  t$        | j                     |      }t'        |      D ci c]  \  }}|t)        |       c}}| _        y t        j,                  | j                   |      | _        | j.                  j1                         }	d|	z   | _        | j.                  j3                  |	      }
| j.                  j5                  |
      }t'        |      D ci c]  \  }}||
 c}}| _        y c c}}w c c}}w )Nr   r   r   @)pathr   suffixr,   r   r*   r+   	read_textsrcr   load_dialectsr   r   prototype_pattern	MULTILINEr   r.   findallarg_type_pattern	enumerater   r/   parse_mlir_modulemoduleget_entry_func_nameget_functionget_function_signature)r;   rk   rT   backendr!   r/   typesr=   tyfn_namefuncOpfunc_tys               r#   r?   zIRSource.__init__Y   sn   	Dz;;qr? >>#
!g& 88uII/9488R\\READIAIJJ/99EEDMeDTU51ba!22!66UDN..tyy'BDKkk557GgDI[[--g6Fkk88@G1:71CD2aeDDN V Es   "G&G,c                z    t        j                  | j                  j                  d            j	                         S )NrF   )rI   rJ   rn   rK   rL   r]   s    r#   rQ   zIRSource.hashr   s'    ~~dhhoog67AACCr$   c                <    || j                   _        | j                   S r\   )rv   rT   )r;   rY   rU   rV   rW   rT   s         r#   rZ   zIRSource.make_iru   s    %{{r$   c                    | j                   dk(  r(| j                  j                  d      }|J d       d|iS t               S )Nttgirzttg.num-warpsz'Unable to parse ttg.num-warps attribute	num_warps)r,   rv   get_int_attrr0   )r;   r   s     r#   r^   zIRSource.parse_optionsy   sF    88w00AI(S*SS(++vr$   Nrb   rc   rf   r$   r#   rh   rh   W   s    E2Dr$   rh   c                Z    t         j                  j                  j                  |       d   S )Nmax_shared_mem)r   activeutilsget_device_properties)devices    r#   r   r      s#    ==44V<=MNNr$   c                    |dk(  s|dk(  rt        j                  | |      }||_        |S |dk(  s
|dk(  s|dk(  rt        |       j	                         S |dk(  s|dk(  rt        |       j                         S y )Nr(   r   llirr   amdgcncubinhsaco)r   ru   rT   r   rm   
read_bytes)	full_namer,   rT   rv   s       r#   parser      sx    
f}w%%i9 
f}uxI((**
g~I))++ (r$   c                ,   t         j                  j                  ry| j                  t	        | j                         | j
                  t	        | j
                         ddg}|D cg c]"  }|j                  dt        j                        $ }}| j                  g }4t        fd|D              s|j                         j                  4t        ||dd       D ]  \  }}||_         |sd| _	        yd|d   _        |d   | _	        yc c}w )	z
    Removes code_generator.py and related files from tracebacks.

    These are uninteresting to the user -- "just show me *my* code!"
    Nz"/triton/compiler/code_generator.pyz/ast.py/c              3     K   | ]6  }j                   j                  j                  j                  |      s3| 8 y wr\   )tb_framef_codeco_filenameendswith).0ftbs     r#   	<genexpr>z#filter_traceback.<locals>.<genexpr>   s/     V2;;+=+=+I+I+R+RST+U1Vs   4??r   r   )r   compilationfront_end_debugging	__cause__filter_traceback__context__replaceossep__traceback__anyappendtb_nextzip)e	BAD_FILESbad_fileframes	cur_frame
next_framer   s         @r#   r   r      s    ,,{{%}} ' 	-I @II8!!#rvv.III	
BF
.ViVVMM"ZZ .
 $'vvabz#: 'J&	' !r
 )! Js   ''Dc                  ,    e Zd ZddZddZddZddZy)	CompileTimerc                `    t        j                          | _        d | _        g | _        d | _        y r\   )timestartir_initialization_endlowering_stage_endsstore_results_endr]   s    r#   r?   zCompileTimer.__init__   s'     IIK
37"<> /3r$   c                6    t        j                          | _        y r\   )r   r   r]   s    r#   finished_ir_initializationz'CompileTimer.finished_ir_initialization   s    %)YY["r$   c                b    | j                   j                  |t        j                         f       y r\   )r   r   r   )r;   
stage_names     r#   stage_finishedzCompileTimer.stage_finished   s!      ''TYY[(ABr$   c                l   t        j                          }| j                  || _        n|| _        dd}g }| j                  }| j                  D ]!  \  }}|j	                  | |||      f       |}# t        j                   || j                  | j                        | ||| j                              S )Nc                *    |yt        || z
  dz        S )Nr   i@B )int)r   ends     r#   deltazCompileTimer.end.<locals>.delta   s    {ew.//r$   )ir_initializationlowering_stagesstore_results)r   floatr   zfloat | Noner`   r   )r   r   r   r   r   r   CompileTimesr   )r;   	timestampr   lowering_stage_durationsstage_startr   	stage_ends          r#   r   zCompileTimer.end   s    IIK	%%-)2D&%.D"	0
 $& 00%)%=%= 	$!J	$++Z{I9V,WX#K	$ !!#DJJ0J0JK4T-C-CD
 	
r$   Nr_   )r   r4   r`   ra   )r`   zknobs.CompileTimes)r-   rd   re   r?   r   r   r   rf   r$   r#   r   r      s    41C
r$   r   c                   t         j                  j                  }|r
t               }|t        j
                  j                         }t        |t              sJ d       t        |      }t        | t               }|r8t        | t              sJ d       t        j                         }t        | ||      } | j                         }	|j                  t!        |xs
 t!               fi |	      }|
t#               n|}
t%        | |||
      }t'        j(                  |j+                  d            j-                         }t/        |      }t         j                  j0                  }t         j                  j2                  }t         j                  j4                  }|rt7        | j9                               nd }|rt;        | j9                               nd }| j<                  d d }| d}|j?                  |      xs i }|jA                  |      }t         j                  jB                  }|sF|DtE        | ||      }|r3 || |jF                  jI                         |jK                         d       |S ||d	|jL                  |
}tN        |d
<   t!               }|jQ                  ||| jR                         tU        |jW                               jY                  | jZ                        }|r|dz  }t        | t              s:t        j                         }t        j\                  |       |j]                  |       |j_                  |      }|ja                         }	 | jc                  ||||      }|r'| d| jZ                   } |ji                  ||       || <   n| d} |ji                  ||       || <   t         j                  jj                  }!|r5|!r3|jm                  | jn                         tq        d| jn                          |rjs                          tU        |ju                               |d  D ]  \  }"}# |#||      }$| d|" } |6|jA                  dd       x}%rP|%jw                  d|"       r<ty        |%|"|      }$n.|j{                  |       x}&rtq        d|&        ty        |&|"|      }$|r|"dv r|ji                  |$|       || <   |7|ji                  |$|        |"dk(  r t}        |$      }'|ji                  |'|dz          |!|"k(  r0|j{                  |       }(|$jm                  |(       tq        d|(        |$}|sj                  |"        |ji                  t        j                  |t              |d      ||<   |j                  ||       t         j                  j                  s|j                          |r || ||jK                         d       tE        | ||      S # td        $ r}tg        |        d }~ww xY w)Nz target must be of GPUTarget typez'source must be either AST or a filepath)env_varsrF      .jsonT)rn   metadatametadata_grouptimes	cache_hit)rQ   rY   triton_versionr   .z.sourcezCreating new locations for ir_overridez
Overriding kernel with file )r   r   jsonr   z.sass)defaultF)binary)Fr   r   listenerr   r   r   get_current_targetr3   r
   make_backendr&   r4   r   rT   rh   r^   r0   r   r   rI   rJ   rK   rL   r   overridedump_irstore_binary_onlyr   rQ   r   r.   	get_groupgetalways_compileCompiledKernelr   _asdictr   __dict__r   
add_stagesr+   listr9   r6   r,   ro   get_codegen_implementationget_module_maprZ   	Exceptionr   put
use_ir_loccreate_location_snapshotrk   printr   r2   r   r   get_filer   r   r   dumpsvars	put_groupenable_asandisable_multithreading))rn   rY   rU   	_env_varscompilation_listenertimerrz   	ir_sourcerT   extra_optionsr   rP   rQ   fn_cache_managerenable_overrideenable_ir_dumpstore_only_binaryfn_override_managerfn_dump_manager	file_namemetadata_filenamer   metadata_pathr   resr   stagesfirst_stagerV   rW   rv   r   ir_filenamer   r,   
compile_irnext_moduler   r   sassir_full_names)                                            r#   compiler     s    ,,55~113fi(L*LL(6"GsI..I#s#N%NN#**,sGW-%%'M##D):DF$Lm$LMG4=4E.09H
Wg
AC>>#**W-.88:D(. ''00O&&..N));;>M.sxxz:SW6D&sxxz2$O
 #I$+U+%//0ABHbN"&&'89M&&55Nm7S.$7 --/-iik 
  

 	H "-HVFvw5v{{}%++CGG4Kq c8$**,
!g&44W=K'')JVWk:wO
 "1SWWI.&6&:&:6;&O{#"7+&6&:&:6;&O{#""--JZ''1+CHH:67((*/= &Z 2"1SE*&  (||M4@@@kFZFZ]^_b^c[dFe#Kg>-66{CCYC29+>?	38K!s.H'H*:*>*>{K*XN;'&[9g~,##D)g*=>+44[AL00>/~>?  %5&8 )9(<(<TZZZ^=_arDI )= )KN$%0.A ((&&( x^c^g^g^i',	. #~t44E  s   W 	W8'W33W8c                   t        j                         D cg c]*  }|j                  j                  |       s|j                  , }}t	        |      dk7  r't        t	        |       d| j                   d| d       |d   |       S c c}w )Nr   z! compatible backends for target (z) (z). There should only be one.r   )r   valuescompilersupports_targetlenRuntimeErrorrz   )rY   r    activess      r#   r   r   r  s    #+??#4[a

8R8RSY8Zqzz[G[
7|q7|n=fnn=MSQXPYYuvx 	x71:f	 \s
    BBc                      e Zd Zd Zd Zd Zy)LazyDictc                     || _         g | _        y r\   )dataextras)r;   r  s     r#   r?   zLazyDict.__init__|  s    	r$   c                    | j                   D ]  \  }}| j                   || z  | _         | j                   j                          | j                  S r\   )r  r  clearr;   funcargss      r#   r   zLazyDict.get  sG    ++ 	0JD$		D$K/DI	0yyr$   c                >    | j                   j                  ||f       y r\   )r  r   r  s      r#   addzLazyDict.add  s    D$<(r$   N)r-   rd   re   r?   r   r   rf   r$   r#   r  r  z  s    )r$   r  c                      e Zd Zd Zy)AsmDictc                T    |dk(  rt        | d         }nt        d|z        || |<   |S )Nr  r   zUnknown key: '%s')r   KeyError)r;   rP   values      r#   __missing__zAsmDict.__missing__  s6    &=T']+E.455S	r$   N)r-   rd   re   r&  rf   r$   r#   r"  r"    s    r$   r"  c                ,    t        j                  |       r\   )copydeepcopy)errr  kwargss      r#   _raise_errorr,    s    
--
r$   c                  4    e Zd Zd Zd Zed        Zd Zd Zy)r   c           	        ddl m} t        d |j                         D              }t	        j
                  |j                               }t        |d         |d<   |d   }t        |d   |d   |d         |d<    |d	t        t        |j                                           } |di || _        t        | j                  j                        }	|	j                  | j                        | _        || _        || _        | j                  j&                  | _        |j                         D 
cg c]"  \  }
}|
j)                  d
      rt+        |      $ }}
}|	j,                  }t/        |D ci c]B  }|j0                  dd  |j0                  dd  |k(  r|j3                         n|j                         D c}      | _        || _        | j4                  |   | _        d | _        d | _        d | _        y c c}}
w c c}w )Nr   )
namedtuplec              3  \   K   | ]$  \  }}|j                  d       st        |       & yw)r   N)r   r   )r   cps      r#   r   z*CompiledKernel.__init__.<locals>.<genexpr>  s$     `$!QAJJW^L_d1g`s   ,,cluster_dimsrY   rz   arch	warp_sizeKernelMetadatar   r   rf   ) collectionsr/  nextr2   r   loadsrm   r7   r
   rG   r   r9   r   r   rY   pack_metadatapacked_metadatarn   rQ   r.   r   r   
binary_extr"  rl   r   asmr   kernelrv   function_run)r;   rn   r   rQ   r/  r  r   rY   r6  rz   r1  r2  	asm_filesr<  files                  r#   r?   zCompiledKernel.__init__  s   *`.2F2F2H`a::m5578#(.)A#B (#&vi'8&.&Q\J]^#$4fT(--/=R6ST&22t}}334&44T]]C	MM&&	)7)=)=)?[AqzzRYGZT!W[	[''
!
 KKO$++ab/Z2OT__.UYUcUcUee
  -hhz* 	 \
s   *G-G-)AG3c                     j                   y  fd}t        j                  j                         }t        j                  j	                   j
                   j                         _        t        |      } j                  j                  |kD  r' |t         j                  j                  |d             t         j                  d      rX j                  j                  Bd} j                  j                  |kD  r' |t         j                  j                  |d             t        j                  j                  Ut        j                  j                   j                    j                    j"                   j$                   j&                         t        j                  j(                  j+                   j"                   j,                   j                  j                  |      \   _          _         _         _         _        t        j                  j5                         j6                  } j                  j8                  |z   j2                  kD  r4 |t         j                  j8                  |z   j2                  d             t        j                  j:                  Vt        j                  j;                   j                    j                    j"                   j$                   j&                         y y )Nc                p    t        j                  |       }t        j                  t        |      _        | r\   )r(  r)  	functoolspartialr,  r@  )r*  
cloned_errr;   s     r#   raise_z,CompiledKernel._init_handles.<locals>.raise_  s,     s+J!)),
CDIIr$   zshared memory	tmem_sizei   ztensor memorythreads)rv   r   r   get_current_devicelauncher_clsrn   r   r@  r   sharedr   rC   rI  r   runtimekernel_load_start_hookr?  r.   r   rQ   r   load_binaryr>  n_regsn_spillsn_max_threadsr   r5  r   kernel_load_end_hook)r;   rH  r   
max_sharedmax_tmem_sizer5  s   `     r#   _init_handleszCompiledKernel._init_handles  s   ;;"	 113MM..txxG	#F+
==*,>$--"6"6
OTU4==+.4==3J3J3VM}}&&6~dmm&=&=}o^_==//;MM00dmmTYYX\XkXkmqmvmvwU[UbUbUhUhUtUtIIt{{DMM$8$8&VBRT]DK@RMM446@@	==""Y.1C1CC>$--"9"9I"EtGYGY[def==--9MM..t{{DMM499VZViVikoktktu :r$   c                R    | j                   | j                          | j                   S r\   )r@  rW  r]   s    r#   runzCompiledKernel.run  s"    99 yyr$   c                   t         j                  j                  y | j                          t	        | j
                  | j                  |d      }t        | j                  t              r | j                  j                  j                  |S t        | j                  j                  j                  |      D ci c]  \  }}||
 }}}|j                  | j                  j                  j                  || j                  |f       |S c c}}w )N)r.   r?  stream)r   rN  launch_enter_hookrW  r  r.   r?  r3   rn   r&   r)   launch_metadatar   r5   r   r   )r;   gridr[  r  retr.   argarg_dicts           r#   r]  zCompiledKernel.launch_metadata  s    ==**2		t}}PVWX$((I.$((++2M2M2UJ/2488;;3H3H$/OP)$D#IPP++dDMM8-LM
 Qs   8Dc                <      j                          d d fd
}|S )N)r[  c                |   | =t         j                  j                         }t         j                  j                  |      }  j                  | g| } j
                  d   d   d   | j                  j                  |t        j                  j                  t        j                  j                  g	|  y )Nr   r   r   )r   r   rK  get_current_streamr]  rY  r?  r;  r   rN  r\  launch_exit_hook)r[  r  r   r]  r^  r;   s       r#   runnerz*CompiledKernel.__getitem__.<locals>.runner  s    ~99;99&A2d224G$GODHHT!Wd1gtAwtG[G[]l]]44emm6T6T]W[]r$   )rW  )r;   r^  rf  s   `` r#   __getitem__zCompiledKernel.__getitem__  s    !% 	] r$   N)	r-   rd   re   r?   rW  propertyrY  r]  rg  rf   r$   r#   r   r     s,    >#vJ  
	r$   r   )r   BaseException)NNN)rY   r
   r`   r	   )4
__future__r   rI   r   _C.libtritonr   r   r   backends.compilerr   r	   r
   r   r   r   runtime.autotunerr   runtime.cacher   r   r   r   runtime.driverr   tools.disasmr   pathlibr   r   rE  r   r   r(  ptx_prototype_patternrp   ptx_arg_type_patternrs   r   r&   rh   	lru_cacher   r   r   r   r  r   r  r0   r"  r,  r   rf   r$   r#   <module>ru     s    "   >  ( 6 ! . d d # #  	  	   Y 	   , 	 

   F' 'T O O,#$L$
 $
NQ5h) ) 
d 
b br$   