
    ci;                     `   d dl Z d dlmZmZmZmZ d dlZd dlmZ	 d dlm
Z d dlmZ d dlmZ d dlmZ d dlm
Z d dlmZ d d	lmZ d d
lmZmZ d dlmZ ddlmZ ddlm Z m!Z! ddl"m#Z# dZ$ G d d      Z% G d d      Z&dee'e#jP                  f   de#jP                  fdZ)de'fdZ* G d de      Z+y)    N)DictOptionalUnionAny)ir)proton)amd)nvidia)passes)LazyDict)JITFunction)set_profile_allocatorNullAllocator)backends   )Hook   )set_instrumentation_onset_instrumentation_off)modec                   ,    e Zd Zd Zdededee   fdZy)CudaAllocatorc                     || _         y N)instrumentation_hook)selfr   s     _/var/www/html/engine/venv/lib/python3.12/site-packages/triton/profiler/hooks/instrumentation.py__init__zCudaAllocator.__init__   s
    $8!    size	alignmentstreamc                 F   || j                   j                  k7  r%t        d| d| j                   j                         ||z   dz
  |z  |z  }t        || j                   j                        }dd l}|j                  |f|j                  d      }|| j                   _        |S )NzAlignment mismatch: z != r   r   cudadtypedevice)	r   profile_buffer_alignmentRuntimeErrormaxprofile_buffer_sizetorchemptyuint8buffer)r   r    r!   r"   aligned_sizer,   r/   s          r   __call__zCudaAllocator.__call__   s    11JJJ&ykd6O6O6h6h5ijl ly(1,:YF
 <)B)B)V)VW 	l-U[[P+1!!(r   N)__name__
__module____qualname__r   intr   r1    r   r   r   r      s%    9S S (3- r   r   c                   @    e Zd Zdeeef   fdZdefdZdefdZd Z	y)Instrumentationir_mapc                     || _         y r   )manager)r   r9   s     r   r   zInstrumentation.__init__1   s	    r   r   c                 Z    || j                   v rt        d|       || j                   |<   y )NzIR already registered: )r;   r)   )r   r   funcs      r   registerzInstrumentation.register4   s/    !8=>>Rr   c                 n    | j                  |       || j                  v r | j                  |   |       y y r   )load_dialectsr;   )r   r   pmcontexts       r   patchzInstrumentation.patch9   s4    7#DLLR  r   c                 .    t        j                  |       y r   )triton_protonr@   )r   ctxs     r   r@   zInstrumentation.load_dialects>   s    ##C(r   N)
r2   r3   r4   r   strr   r   r>   rC   r@   r6   r   r   r8   r8   /   s3    tCH~  3  
! !
)r   r8   mode_objreturnc                   
 t        | t        j                        r| S | sd} | j                  d      }|d   }i }|dd  D ]/  }d|v r|j                  dd      \  }}|||<   "t	        d| d       |j                  dd	      |j                  d
d      |j                  dd      t        |j                  dd            |j                  dd      |j                  dd      |j                  dd      |j                  dd      d

fd} |dt        j                        
d<    |d
t        j                        
d
<    |dt        j                        
d<    |dt        j                        
d<    |dt        j                        
d<   t        
d         dkD  r0
d   j                  d      D cg c]  }|j                          c}ng }	|	D ]"  }|t        j                  vst	        d|        |	D cg c]  }t        j                  |    c}
d<   |dk(  rt        j                  di 
S |dk(  rt        j                   di 
S t	        d|        c c}w c c}w )Ndefault:r   r   =z#Malformed instrumentation option: ''metric_typecyclebuffer_typesharedbuffer_strategycircularbuffer_size0granularitywarpsampling_strategynonesampling_options optimizations)rO   rQ   rS   rU   rW   rY   r[   r]   c                 N    |    }|r||vrt        d|  d|       |r||   S |S )NzUnknown z: )
ValueError)opt_namemappingvalueoptionss      r   get_option_valuez)_interpret_mode.<locals>.get_option_value[   s@    !U')xzE7;<<!&wu~1E1r   ,zUnknown optimization: mmazUnknown mode: r6   )
isinstancer   InstrumentationModesplitr_   getr5   metric_typesbuffer_typesbuffer_strategiesgranularitiessampling_strategieslenstripr]   DefaultMMA)rH   parts	mode_nameoptsoptkeyvalrd   rb   valuesrc   s             @r   _interpret_moder{   B   s   (D445NN3EaIDQRy K#:yya(HCDIB3%qIJJK xxw7Q^`hHi88$5zBSVW[W_W_`morWsStxxv6TXXVikqMr HH%7<txxXgikOl	G2 .mT=N=NOGM-mT=N=NOGM!12CTE[E[!\G-mT=O=OPGM#34GIaIa#bG  EHP_H`DadeDeGO4::3?A {{} Akm  ?***5eW=>>? HNNe 2 25 9NGO I||&g&&	e	xx"'"">(455A
  Os   3I:I!c                      t         j                  j                  j                  j	                         j
                  } | dk(  ry| dk(  ryt        d|        )Nr$   r
   hipr	   zUnsupported backend: )tritonruntimedriveractiveget_current_targetbackendr)   )r   s    r   _get_backend_namer   x   sO    nn##**==?GGG&	E	27)<==r   c                      e Zd ZU dZeed<   dZeed<   dZeed<   dZ	e
e   ed<   dZeed	<   d
Zeed<   dedeej"                  f   fdZd Zd Zdedededeeef   deddfdZdefdZdeddfdZdeddfdZdeddfdZy)InstrumentationHookr   priorityactive_countFenable_host_bufferNhost_bufferr   r+      r(   rH   c                 `    t        |      | _        t        |       | _        d | _        i | _        y r   )r{   r   r   	allocatorr/   metadata_path)r   rH   s     r   r   zInstrumentationHook.__init__   s)    .=h.G	&t,79r   c                     t         j                  dkD  rt        d      t         xj                  dz  c_        t                t        j
                  j                  j                  j                         t        j
                  j                  j                  j                  j                        d   t                fdfdt        fdfdd	      t           j                  _        t!         j"                         t$        j&                   j(                  t+        j,                        fd
       }|t$        _        y )Nr   zFOnly one instance of the instrumentation hook can be active at a time.r   max_shared_memc                 x   t         j                  j                  j                   j                  v rdnd}t	        j
                  | j                   j                  j                   j                  j                   j                  j                   j                  j                   j                  j                   j                  j                   j                  j                  j                  |       t        j                   j#                  |        t         j                  j$                  j                   j                  v rt	        j&                  |        t	        j(                  |        t         j                  j*                  j                   j                  v rdk(  rt	        j,                  |        y y y )NFTr	   )r   OptimizeCLOCK32r]   rE   add_convert_proton_to_protongpurO   rY   r[   rW   rS   rQ   rU   r+   r(   triton_passescommonadd_cseSCHED_STORESadd_schedule_buffer_store!add_allocate_proton_shared_memorySCHED_BARRIERSadd_sched_barriers)rA   is_long_clkbackend_namer   r   s     r   to_llvmir_passesz6InstrumentationHook.activate.<locals>.to_llvmir_passes   s9   #'==#8#8DII<S<S#S%Y]K99"dii>S>SUYU^U^UpUp:>)):T:TVZV_V_VkVk:>)):S:SUYU^U^UjUj:>)):O:OQ_:>:R:RTXTqTq:EG   ((,}}))TYY-D-DD77;;;B?}}++tyy/F/FF<[`K`004 LaFr   c                 B   t        j                  |        dk(  rt        j                  |        y dk(  rit        j                  j
                  j                  j                  j                        d   j                  d      d   }t        j                  | |       y y )Nr
   r	   archrL   r   )rE   )add_allocate_proton_global_scratch_buffer%add_convert_proton_nvidia_gpu_to_llvmr~   r   r   r   utilsget_device_propertiesri   "add_convert_proton_amd_gpu_to_llvm)rA   r   r   r'   s     r   to_llvm_passesz4InstrumentationHook.activate.<locals>.to_llvm_passes   s    CCBGx'CCBG&~~,,3399OOPVWX^_eefijklm@@TJ 'r   c                      |       S r   r6   )rA   r   s    r   <lambda>z.InstrumentationHook.activate.<locals>.<lambda>   s    '+ r   c                      |       S r   r6   )rA   r   s    r   r   z.InstrumentationHook.activate.<locals>.<lambda>   s    ~b) r   )ttgpuir_to_llvmirllvmir_to_llvmc                 8    t              |d<    | g|i |S )Ninstrumentation_mode)rG   )r   argskwargsoriginal_modeoriginal_runs      r   instrumented_runz6InstrumentationHook.activate.<locals>.instrumented_run   s(    -0-?F)*6t6v66r   )r   r   r)   r   r~   r   r   r   get_current_devicer   r   r   r8   r   compilerinstrumentationr   r   r   runr   	functoolswraps)	r   r   r   r'   r   r   r   r   r   s	   ` @@@@@@@r   activatezInstrumentationHook.activate   s    ++a/ghh((A-( &&--@@B..55;;QQRXYZjk(*	5$	K ;J+)	K
 ;''7 	dnn-"				&	7 
'	7 +r   c                    t         j                  dk(  ry t         xj                  dz  c_        t               }i t        |   j                  _        t                t        t        j                  d      r#t        j                  j                  t        _	        t        t                      t         j                  rd t         _        d | _        y )Nr   r   __wrapped__)r   r   r   r   r   r   r   hasattrr   r   r   r   r   r   r   r/   )r   r   s     r   
deactivatezInstrumentationHook.deactivate   s    ++q0((A-((* ;=''7 	 ! ;??M2)oo99KO 	mo. 11.2+ r   modulefunctionnamemetadata_grouphashrI   c                    |sy t        d |j                         D        d       }t        d |j                         D        d       }|| j                  |<   |rt        j                         }t        j
                  |       t               }	|	dk(  rt        j
                  |       n|	dk(  rt        j
                  |       t        j
                  |       t        j                  ||      }||_        t        j                  |      }
t        j                  |      }t        j                  |||
||       y t        d|       )Nc              3   J   K   | ]  \  }}|j                  d       s|  yw)ttgirNendswith.0rx   paths      r   	<genexpr>z2InstrumentationHook.init_handle.<locals>.<genexpr>   s      `dV]H_`   ##c              3   J   K   | ]  \  }}|j                  d       s|  yw)jsonNr   r   s      r   r   z2InstrumentationHook.init_handle.<locals>.<genexpr>   s      eysDcll\bNdder   r
   r	   z+IR path not found in metadata for function )nextitemsr   	triton_irrB   r@   r   triton_nvidia
triton_amdrE   parse_mlir_moduleget_scope_id_namesget_scope_id_parents	libprotoninit_function_metadatar)   )r   r   r   r   r   r   ir_pathr   rB   r   scope_id_namesscope_id_parentss               r   init_handlezInstrumentationHook.init_handle   s    `n.B.B.D`bfgeN4H4H4Jegkl'48$'')G##G,,.Lx'++G4&((1''000'BF$FN*==fEN,AA&I,,Xt^M]_lm!LXJWXXr   c                 R    | j                   dS | j                   j                         S )Nr   )r/   data_ptr)r   s    r   	_data_ptrzInstrumentationHook._data_ptr  s#    KK'qCT[[-A-A-CCr   metadatac                 z   |j                   j                  d      }|j                   j                  d      }| j                  dn4| j                  j                         | j                  j	                         z  }t        j                  ||| j                         |       t        j                  rd t        _
        y y Nr   r"   r   )datarj   r/   element_sizenumelr   enter_instrumented_opr   r   r   r   r   r   r=   r"   
alloc_sizes        r   enterzInstrumentationHook.enter	  s    }}  ,""8,++-Q4;;3K3K3MPTP[P[PaPaPc3c
''dnn6F
S11.2+ 2r   c                    |j                   j                  d      }|j                   j                  d      }| j                  dn4| j                  j                         | j                  j	                         z  }t        j                  ||| j                         |       t        j                  r| j                  |       y y r   )r   rj   r/   r   r   r   exit_instrumented_opr   r   r   _populate_host_bufferr   s        r   exitzInstrumentationHook.exit  s    }}  ,""8,++-Q4;;3K3K3MPTP[P[PaPaPc3c
&&vtT^^5EzR11&&t, 2r   c           
         |r| j                   |   rdd l}dd l}dd l}dt        t
        t        f   dt        fd}| j                  dn4| j                  j                         | j                  j                         z  }| j                  j                  j                         j                  d      }i }t        | j                   |   d      5 }	|j!                  |	      }d d d         ||d         }
|d   }|d   }| j                  j"                  t$        j&                  j(                  k(  r|n
t+        |      }t        ||z        }| j                  j                  d	k(  xr1 | j                  j,                  t$        j.                  j0                  k(  }|rt3        |      D cg c]  }| }}n|D cg c]  }t        |       }}d
|dz  z   }d}|}|}t4        |||||
||||g
|} |j6                  dt+        |      z  g| }|j9                  ||z   |j:                  d      t<        _        t<        j>                  d | }|jA                  |jC                  tE        |      |j:                               t<        j>                  |d  jG                  | j                        }|jA                  | j                  jI                                y y y # 1 sw Y   xY wc c}w c c}w )Nr   targetrI   c                 (    | d   dk(  ry| d   dk(  ryy)Nr   r$   r   r}   r   r   r6   )r   s    r   encode_targetz@InstrumentationHook._populate_host_buffer.<locals>.encode_target   s%    )$.I&%/r   re   rprofile_scratch_size	num_warpsr\   (      Icpur%   )r&   )%r   r,   structr   r   rG   r   r5   r/   r   r   r   r[   rq   ri   openloadrY   rE   SAMPLING_STRATEGYNONErp   rW   GRANULARITYWARPrangeVERSIONpackr-   r.   r   r   copy_tensorlistview_asr   )r   r   r,   r   r   r   r   sampled_warpsr   filedevice_typescratch_mem_size
total_unituid_num	block_numis_all_warpsiuid_vecheader_sizeheader_offsetpayload_offsetpayload_sizeheader_valuesheader_bytesconfig_portiondata_portions                             r   r   z)InstrumentationHook._populate_host_buffer  s   **84d38n   #kk1t{{7O7O7QTXT_T_TeTeTg7gJ II66<<>DDSIMDd((2C8 'Dyy' (X7K#$:;k*J$(II$?$?=CbCbCgCg$gjmpnGJ)99:IB  9955;w		@U@UYfYrYrYwYw@wL&+J&78188+89a3q699w{*KM(N%L^\S^`iku ',3M '6;;sS-?'?P-PL.3kk+
:RZ_ZeZensk.t+0<<\kJN  d<.@!TU.::;<HPPQUQ\Q\]Lt{{01[ 58 ' 'V 99s   K$	K4K#K)r2   r3   r4   r   r5   __annotations__r   r   boolr   r   r   r+   r(   r   rG   r   rh   r   r   r   r   r   r   r   r   r   r   r6   r   r   r   r      s    HcL#$$!%K#%  $'c':tS$2J2J'J!K :9+v8Y# Y YC YQUVY[^V^Q_ Ygj Yos Y6D3 D3h 34 3-X -$ -N2c N2d N2r   r   ),r   typingr   r   r   r   r~   triton._C.libtritonr   r   r   rE   r	   r   r
   r   r   r   triton._C.libprotonr   triton.compilerr   triton.runtime.jitr   triton.runtime._allocationr   r   triton.backendsr   hookr   flagsr   r   r\   r   r  r   r8   rG   rh   r{   r   r   r6   r   r   <module>r%     s     - -  / 7 1 7 7 3 $ * K $  C   .) )&36eC)A)A$AB 36tG_G_ 36l>3 >f2$ f2r   