
    i1                     <   U d dl Z d dlZd dlZd dlZd dlZd dlZd dlZd dlZd dlZd dl	Z	d dl
Z	d dlZd dlZd dlZd dlZd dlmZmZ d dlmZmZmZmZmZ d dlmZ d dlZd dlmZmZmZ d dlmZ d dl m!Z! d dl"m#Z# d d	l$m%Z% d d
l&m'Z' d dl(m)Z) d dl*m+Z+ d dl,m-Z- d dl.m/Z/m0Z0 d dl1m2Z2 d dl3m4Z4 d dl5m6Z6 d dl7m8Z8 ddl9m:Z:m;Z; ddl;m<Z< ddl=m>Z>m?Z?m@Z@mAZAmBZB ddlCmDZD  ej                  eF      ZGdaHeeIeJeKeLf         eMd<   daNeOeMd<   daPeeJeQeeK   f      eMd<    e'eFd      ZR e'eFd      ZSeIe   ZT e j                  ddd g      ZVg d!ZWej                  d"eOfd#       ZY	 	 d`d$eIe>   d%eOd&eeK   d"dfd'ZZd(eIe>   d"ej                  fd)Z\	 	 dad$eeT   d*eJeKeKf   d+eeK   d,eQd"df
d-Z]d*eJeKeKf   d"eJeKeVf   fd.Z^d/ej>                  jZ                  d(eTd"dfd0Z_ej                  d"ed   fd1       Zai abeJeKeJeKeIeK   f   f   eMd2<   i aceJeKeIeK   f   eMd3<   dadeeQ   eMd4<   i aeeJeKeKf   eMd5<   i afeJeKeIeK   f   eMd6<   d ageQeMd7<   dbd8Zhej                  d"ed   fd9       Zi G d: d;      Zj G d< d=      Zkd$eTd"dfd>Zld$eTd"dfd?Zmd@eIeeKdf      d"dfdAZnd$ee>   d"dfdBZodCeepeeqf      d"dfdDZrdbdEZsej                  d"ed   fdF       Ztej                   G dG dH             Zv ej                         ZxdIeeQ   dJeJeKef   d"eJeKeJeKeIeK   f   f   fdKZydLeJeKef   d"eJeKeJeKef   f   fdMZzd"eJeKef   fdNZ{d"eJeKeJeKeIeK   f   f   fdOZ|	 dcdPeee>   e<f   dQeKdReOd"eeQ   fdSZ}dTedUed"dfdVZ~dWeKd"efdXZddYdZed[eKf   d\ej                   j                  d]eJeKef   d^ee4   d"eKf
d_Zy)d    N)IteratorSequence)AnyCallableIOOptionalUnion)patch)
draw_graphget_aot_graph_nameget_graph_being_compiled)fx)save_graph_repro)get_debug_dir)utils)getArtifactLogger)trace_structured)signpost_event)GraphModule)_extract_tensor_metadataTensorMetadata)legalize_graph)FileLike)
OrderedSet)tree_map   )configir)ExternKernel)BaseSchedulerNodeFusedSchedulerNodeNopKernelSchedulerNode
OutputNodeSchedulerNode)VGRAPH_EXECUTION_ORDERFRECORD_GRAPH_EXECUTIONGRAPH_COMPILE_IDSir_pre_fusionir_post_fusionBufMetanamen_origin)dotz-Gnslimit=2z-Gnslimit1=2z-Gmaxiter=5000returnc                  0    t        j                  d      d uS )Nr.   )shutilwhich     O/var/www/html/engine/venv/lib/python3.12/site-packages/torch/_inductor/debug.pyhas_dotr6   >   s    <<d**r4   nodesprint_graphfnamec           
         t               st        j                  d       y|
t               }t	        |       }|j
                  D ]  }d|j                  vr|j                  d   j                  }t        |t              rt        |d   t              r|d   f}n|d   }d}t        |t        j                        r|j                  j                  }t        ||ddddd      }||j                  d<    |rt!        |       t#        i |      }t%        |       |j&                  j)                          t+        ||dt,        j.                  j0                         y)z$
    Draw a graph in fname.svg.
    z*draw_buffers() requires `graphviz` packageNfusion_metar   tensor_metaF)
clear_metadot_graph_shape)r6   logwarningr   create_fx_from_snodesr7   metagroup
isinstancetupleintr   ComputedBufferdatadtyper   printr   r   graphlintr   r   tracer>   )	r7   r8   r9   rK   noderC   rI   metadatagms	            r5   draw_buffersrQ   C   s"    9@A}(*!%(E ,		)		-(..eU#%(C(qa dB--.IIOOE!%dD$M#+		- !,$ e	R	B2HHMMO
EeV\\5Q5Qr4   snodesc                 
   dt         dt        dt        f   fd}t        j                  dg d      }i }i }t
        j                  j                         }d}g }d}| D ]  }	|	j                         rd}
|
}ns|	j                         rd	}
|
}n^t        |	t              rd
}
|
}nIt        |	t              rd}
|	j                  }n*t        |	t              rd}
|	j                  }nt        d      t
        j                   j"                  j%                  |	j'                         d      }|
 d| } ||      }i }t)        |	d      rd|	j+                         i}|j-                  |d|      }dt.        t0        t        f   dt2        ffd |	      r|j5                  |       |	j7                         }||_         |||	|
      |j:                  d<   |||<   |	j=                         D ]  }|||j7                         <    ||} | D ]  }	|	j7                         }|	j>                  j@                  }||   }g }|D ]{  }|j8                  |v r||j8                     }nD|jC                  |      5  |jE                  |j8                        }|||j8                  <   ddd       |k(  rk|j5                  |       } tG        |      |_$         |jK                  tM        |      dk(  r|d          |S tG        |             |S # 1 sw Y   ixY w)B
    Creates a FX Graph from a list of SchedulerNode objects.
    r,   r/   .c                 4    dt         dt        fd}| |_        |S )Nargsr/   c                       yNr   r3   )rV   s    r5   func1z;create_fx_from_snodes.<locals>.get_fake_func.<locals>.func1w   s    r4   )r   rF   __name__)r,   rY   s     r5   get_fake_funcz,create_fx_from_snodes.<locals>.get_fake_funcv   s#    	 	 	 r4   
FusionMeta)rC   snodetypeNexterntemplatenopcomputefusedzUnknown node typeoriginal_atenz: 
get_devicedevicer3   rV   kwargsr]   c                     t        | t              rt        fd| j                  D              S t        d | j	                         D              S )Nc              3   .   K   | ]  } |        y wNr3   ).0x	in_outputs     r5   	<genexpr>z;create_fx_from_snodes.<locals>.in_output.<locals>.<genexpr>   s     >A9Q<>s   c              3   r   K   | ]/  }|j                   D ]  }t        |j                  t                 1 y wrk   )usersrD   rN   r#   )rl   bufusers      r5   ro   z;create_fx_from_snodes.<locals>.in_output.<locals>.<genexpr>   s<      II  499j11s   57)rD   r!   anyrR   get_outputs)r]   rn   s    r5   rn   z(create_fx_from_snodes.<locals>.in_output   sF    %!34>>>>  ,,.  r4   r;   r   r   )'strr   rF   collections
namedtupletorchr   Graph	is_externis_templaterD   r"   r$   rC   r!   RuntimeError	_inductorr   get_fused_kernel_name	get_nodeshasattrre   call_functionr	   r    boolappendget_namer,   rB   ru   read_writesreadsinserting_beforeplaceholderrE   rV   outputlen)rR   r[   r\   buf_to_fx_nodenode_to_fx_noderK   
first_nodeoutputsrC   r]   	node_type
fused_name	func_name	node_funcrh   fx_noder,   rr   depsnew_argsdepdep_nodern   s                         @r5   rA   rA   q   s   
C HS#X$6  ''6PQJNOHHNNEJGE 2!?? IE "IE56IE}-!IKKE12IKKE233__**@@OO

 !kJ<0	!),	5,' 0 0 23F%%ib%H	U#46H#HI 	d 	 UNN7#~~&0y&I]# '$$& 	5C-4N3<<>*	5  Je2!j  '~~  &&!$' 		&Cxx>))#((3++J7 8$00:H/7N388,8 7"OOH%		& X#'& 
LLs7|q0EL 7<GnEL8 8s   &+K99Lnode_name_to_buf_nameparent_buf_name	n_originsc                 X   | y | D ]  }|j                         }|j                         }| t        |      dkD  rt        ||||n|       Et        |      dk(  r|d   |k(  sJ |j                  }||j
                  x|j
                  D ]  }|j                  }	|	|vs||n|||	<     y )Nr   r   )r   r   r   $update_orig_fx_node_name_to_buf_namerN   originsr,   )
r7   r   r   r   rN   buf_namechildren_nodesir_nodeorigin	node_names
             r5   r   r      s     } ==?)%#n*=*A0%+3
 ~&!+q0AT0III))?goo5oo 	FI  55 / 7H_ &i0	#r4   c                     i }| j                         D ]-  \  }}||vrt        |g      ||<   ||   j                  |       / i }| j                         D ]"  \  }}t        ||         }t	        ||      ||<   $ |S rk   )itemsr   addr   r+   )r   buf_name_to_n_noder   r   node_name_to_buf_metan_nodes         r5   get_node_name_to_buf_metar      s     4::< 8	8--+5yk+Bx(x(,,Y7	8 4::< E	8'12+28V+Di(E ! r4   rP   c                     i }t        ||       |yt        |      }| j                  j                  D ]9  }|j                  |v s|j                  |j                        |j                  d<   ; y)rT   Nbuf_meta)r   r   rK   r7   r,   getrB   )rP   rR   r   r   rN   s        r5   annotate_orig_fx_with_snodesr     sm     -/(1FG$56KL I99--$9$=$=dii$HDIIj!Ir4   c               #     K   t         j                  j                  dd      dk(  } dd l}t	        j
                  |j                  j                  j                        }t        j                         }| s	 d  |j                          y |j                  t        dd             t         j                  j                  t!               d      }t        j"                  |d       t	        j$                  t         j                  j                  |d	t'                d
            }|j)                  t        j*                         |j-                  t	        j.                  d             |j1                  |       	 d  |j3                  |       |j                          y # |j                          w xY w# |j3                  |       |j                          w xY ww)NTORCH_COMPILE_DEBUG01r   z*functorch.compile.config.debug_partitionerTtorchinductor)exist_okaot_z
_debug.log3[%(filename)s:%(lineno)d %(levelname)s] %(message)s)osenvironr   torch._functorch.aot_autogradlogging	getLogger
_functorchaot_autogradrZ   
contextlib	ExitStackcloseenter_contextr
   pathjoinr   makedirsFileHandlerr   setLevelDEBUGsetFormatter	Formatter
addHandlerremoveHandler)compile_debugry   r?   stackr   fhs         r5   enable_aot_loggingr     sZ    JJNN#8#>#EM(


E,,99BB
CC  "E	KKM 
JDQR77<<9DKKt$			
%'(
3	

B KKOOOP NN2"3 KKM0 	"s7   A1G4F 8C>G7F2 ;"GF//G2#GG _inductor_post_to_pre_grad_nodes._inductor_triton_kernel_to_post_grad_node_info_pre_grad_graph_id#_inductor_pre_grad_node_stack_trace_inductor_kernel_stack_trace(_inductor_kernel_provenance_debug_handlec                      da y rX   )r   r3   r4   r5   -reset_inductor_kernel_provenance_debug_handler   L  s    /0,r4   c               #     K   t         } t        j                         }t        j                         }t        j                         }t
        j                         }da i ai ai ai a	 d | a |a|a|a|ay# | a |a|a|a|aw xY ww)zzContext manager that resets provenance tracking globals upon entering
    and restores their original values when exiting.N)r   r   copyr   r   r   )original_pre_grad_graph_idoriginal_post_to_pre_grad_nodes-original_triton_kernel_to_post_grad_node_info+original_inductor_pre_grad_node_stack_trace$original_inductor_kernel_stack_traces        r5   reset_provenance_globalsr   Q  s      "4&F&K&K&M#6;;= 2 	,002 0 ,H+L+L+N( ')$572*,'#% 
 8+J(9 	7 (L$7 	, 8+J(9 	7 (L$7 	,s   A!B$A3 (B3A??Bc                   ^   e Zd Z ej                         Zededee   fd       Z	ddZ
deddfdZ	 dded	ed
ededee   f
dZej                   	 dded	ed
ededeee      f
d       ZdedefdZddZddZdededdfdZdeee      dee   dee   ddfdZddZdedeed      fdZy)DebugContextfolder_namer/   c                 6   t         j                  j                  xs
 t               }t        j
                  D ]`  }t        j                  j                  |d|  d|       }t        j                  j                  |      rIt        j                  |       |c S  y )Nr   .)r   rM   	debug_dirr   r   _counterr   r   r   existsr   )r   r   ndirnames       r5   create_debug_dirzDebugContext.create_debug_dir  s{    LL**=mo	&& 	Aggll-q$G
 77>>'*G$	 r4   Nc                 R    d | _         d | _        t        j                         | _        y rk   )_prof_pathr   r   _stack)selfs    r5   __init__zDebugContext.__init__  s     

 **,r4   new_pathc                    | j                   sy |j                  d      sJ |       ddlm} 	  || d      5  t        j
                  j                  |      rt        j                  |       t        j                  | j                   |       d d d        y # 1 sw Y   y xY w# t        $ r$ t        j                  d| j                   |       Y y w xY w)Nz.debugr   )FileLockz.lockz(Failed to copy debug files from %s to %s)r   endswithfilelockr   r   r   r   r1   rmtreecopytreeOSErrorr?   r@   )r   r   r   s      r5   r   zDebugContext.copy  s    zz  *4H4*%	XJe,- 677>>(+MM(+

H56 6 6  	KK:DJJ	s/   B" ABB" BB" B" "*CCfilename
write_moderV   rh   c                     | j                   sJ t        t        j                  j	                  | j                   |      |g|i |S rk   r   openr   r   r   )r   r   r   rV   rh   s        r5   fopenzDebugContext.fopen  s:     zzzBGGLLX6
TTTVTTr4   c              /      K   | j                   sJ t        t        j                  j	                  | j                   |      |g|i |5 }| d d d        y # 1 sw Y   y xY wwrk   r   )r   r   r   rV   rh   fs         r5   fopen_contextzDebugContext.fopen_context  sW      zzz"'',,tzz84jR4R6R 	VWG	 	 	s   AA#	A	A#A A#suffixc                 r    | j                   sJ t        j                  j                  | j                   |      S rk   )r   r   r   r   )r   r  s     r5   r   zDebugContext.filename  s'    zzzww||DJJ//r4   c                    t         j                  j                  dd l}| j                  sJ t
        j                  j                  | j                  t
        j                  j                  | j                         d      }|j                  |d      5 }|j                  | j                  t
        j                  j                  | j                               d d d        t         j                  j                  |       y y # 1 sw Y   *xY w)Nr   z.tar.gzzw:gz)arcname)r   rM   
upload_tartarfiler   r   r   r   basenamer   r   )r   r  tar_filetars       r5   r  zDebugContext.upload_tar  s    <<"".:::ww||

rww//

;<GDH h/ J3

BGG,<,<TZZ,HIJLL##H- /J Js   ADD
c                    t         j                  rjt        j                  d      j                  }j                  t        j                         dt        dd ffd}| j                  j                  ||       | j                  j                  t        j                  |              t         j                  j                  sy | j                  t!                     | _        t         j                  j$                  r | j'                  dt        j                         t         j                  j(                  r!| j'                  dt        j*                         y y )Nztorch._dynamolevelr/   c                 (    j                  |        y rk   )r   )r  r?   s    r5   reset_log_levelz/DebugContext.__enter__.<locals>.reset_log_level  s    U#r4   z	debug.logzinfo.log)r   debugr   r   r  r   r   r   r   callbackr   r%   set_debug_handlerrM   enabledr   r   r   	debug_log_setup_log_captureinfo_logINFO)r   
prev_levelr  r?   s      @r5   	__enter__zDebugContext.__enter__  s    <<##O4CJLL'$s $t $ KK  *=!!!"5"5d";<||##**+=+?@
<<!!##K?<<  ##J= !r4   r  c                    t        j                  d      }| j                  j                  | j	                  |            }t        j
                  |      }|j                  |       |j                  t        j                  d             |j                  |       |j                  t        |j                  |             | j                  j                  |j                  |       y )Nztorch._inductorr   )r   r   r   r   r   StreamHandlerr   r   r   r   minr  r  r   )r   r   r  r?   fdchs         r5   r  zDebugContext._setup_log_capture  s    
  12[[&&tzz(';<""2&
E
ST	
 	rSE*+S..3r4   exc_typeexc_valexc_tbc                 .   | j                   r*| j                   j                          | j                          | j                  r9| j	                          t
        j                  dt               | j                         | j                  j                          y )Nz%s debug trace: %s)
r   disable_save_profile_datar   r  r?   r@   r   r   r   )r   r  r   r!  s       r5   __exit__zDebugContext.__exit__  sa     ::JJ ##%::OOKK,.F.H$**Ur4   c                    | j                   sJ | j                   j                  | j                  d             | j                  d      5 }t	        j
                  | j                   |      }|j                          |j                  d       |j                  d       |j                  d       |j                  d       d d d        y # 1 sw Y   y xY w)Nzcompile.profzcompile.stats)streamcumtimed   tottime)	r   
dump_statsr   r   pstatsStats
strip_dirs
sort_statsprint_stats)r   r  statss      r5   r$  zDebugContext._save_profile_data  s    zzz

dmmN;<ZZ( 	#BLLB7EY'c"Y'c"	# 	# 	#s   
A6C		Cr,   ).Nc                 
   t         j                  j                  r0t        t         j                  |      r	 t        t	        |       |      S dt        dt        dd fd}|S # t
        $ r t        j                  dd       Y y w xY w)Nz Ignoring exception in debug codeTexc_inforV   rh   r/   c                       y rk   r3   rg   s     r5   ignoredz)DebugContext.__getattr__.<locals>.ignored  s    r4   )	r   rM   r  getattrDebugFormatter	Exceptionr?   r@   r   )r   r,   r6  s      r5   __getattr__zDebugContext.__getattr__	  sy    <<GFLL$$?~d3T::s c d  N  >Ns   A  BBr/   N)w)rZ   
__module____qualname__	itertoolscountr   staticmethodrv   r   r   r   r   r   r   r   r   contextmanagerr   r  r   r  r  rF   r  r^   BaseExceptionr%  r$  r   r:  r3   r4   r5   r   r   |  s   y Hc hsm  -
S T & UU U 	U
 U 
CU  		 	 		
 	 
"S'		 	0s 0s 0
.>.44 4 
	4 4./ -( 	
 
	# )1D(E r4   r   c                      e Zd ZdeddfdZdej                  j                  deej                     ddfdZ
dej                  j                  deej                     ddfdZd	eddfd
Zd	eddfdZed	edefd       Zd	eddfdZdej                  j                  d	eddfdZddededdfdZdedeej,                     dedef   dededee   ddfdZy)r8  handlerr/   Nc                 x    |j                   | _         |j                  | _        |j                  | _        || _        y rk   )r   r  r   rE  )r   rE  s     r5   r   zDebugFormatter.__init__  s/    ]]
$22((r4   rP   inputsc           
         | j                  d      5 }d }t        j                  j                  j                  j
                  rRt        j                  j                  j                  |      }t        j                  j                  |j                        }t        j                  j                  j                  j
                  }t        j                  j                  j                  ddd      5  t        |||d||       d d d        d d d        | j                  d      5 }|j                  |j!                  d             d d d        y # 1 sw Y   MxY w# 1 sw Y   QxY w# 1 sw Y   y xY w)Nzfx_graph_runnable.pyF)ztrace.enabledztrace.save_real_tensorsinductor)save_dirstable_hashzfx_graph_readable.pyprint_output)r   ry   r~   r   rM   save_real_tensors_subclasses
fake_utilstry_convert_fake_to_realr   r   r   r,   r
   r   writeprint_readable)r   rP   rG  r  rJ  rK  s         r5   fx_graphzDebugFormatter.fx_graph  s+   
 ZZ./ 	2H%%++==**55NNvV77??2773
  //0066HHK''--"'EJ 
 !% +
	, ZZ./ 	<2HHR&&E&:;	< 	<
 
	 	,	< 	<s0   CE/EE""E%E	EE"%E.c                     | j                  d      5 }|j                  |j                  d             d d d        y # 1 sw Y   y xY w)Nzfx_graph_transformed.pyFrL  )r   rR  rS  )r   rP   rG  r  s       r5   fx_graph_transformedz#DebugFormatter.fx_graph_transformed=  sB    
 ZZ12 	<bHHR&&E&:;	< 	< 	<s	   "=Ar7   c                     | j                  d      5 }|j                  | j                  |             d d d        y # 1 sw Y   y xY w)Nzir_pre_fusion.txtr   rR  	_write_irr   r7   r  s      r5   r)   zDebugFormatter.ir_pre_fusionE  s;    ZZ+, 	,HHT^^E*+	, 	, 	,	   !<Ac                     | j                  d      5 }|j                  | j                  |             d d d        y # 1 sw Y   y xY w)Nzir_post_fusion.txtrX  rZ  s      r5   r*   zDebugFormatter.ir_post_fusionI  s;    ZZ,- 	,HHT^^E*+	, 	, 	,r[  c                     t        j                         }| D ]2  }|j                  |j                                |j                  d       4 |j	                         S )Nz


)ioStringIOrR  	debug_strgetvalue)r7   rr   rN   s      r5   rY  zDebugFormatter._write_irM  sI    kkm 	 DIIdnn&'IIh	  ||~r4   c                 <    t        || j                  d             y )Nzgraph_diagram.svg)r9   )rQ   r   )r   r7   s     r5   graph_diagramzDebugFormatter.graph_diagramU  s    U$--0C"DEr4   c                     t        ||       t        || j                  d      dt        dt        j
                  j                         y )Nzorig_fx_graph_diagram.svgFT)r9   r=   progparse_stack_tracer>   )r   r   r   GRAPHVIZ_COMMAND_SCALABLEr   rM   r>   )r   rP   r7   s      r5   draw_orig_fx_graphz!DebugFormatter.draw_orig_fx_graphX  s<    
 	%R/-- ;<*""LL88	
r4   r   	extensionc                 T    t        j                  || j                  d|              y )Nzoutput_code.)r1   r   r   )r   r   ri  s      r5   output_codezDebugFormatter.output_codeg  s     Hdmml9+,FGHr4   r,   input_nodestimingsChoiceCallerelapseprecompile_elapseprescreening_elapsec           	      N   ddl m dt         j                  dt        t        t        f   ffd|t
        j                  j                         t
        j                  j                         |D cg c]
  } |       c}|||d}| j                  ddd	
      5 }	|j                         D ][  \  }
}t        |
j                               }|j                  |       ||d<   t        j                  ||	       |	j                  d       ] 	 d d d        y c c}w # 1 sw Y   y xY w)Nr   )FixedLayoutrN   r/   c           	          t        | d      r| j                  }nd}|t        |       j                  d}	 | j	                         }t        |      rd}	 t        |j                        } |j                  |j                  g t        j                  j                  j                  |j                         g t        j                  j                  j                  |j"                        |      }t%        |      |d<   nt%        |      |d<   	 t%        | j'                               |d<   	 t%        | j)                               |d	<   	 t%        t        j                  j                  j                  | j+                                     |d
<   	 t%        t        j                  j                  j                  | j-                                     |d<   	 t%        t        j                  j                  j                  | j/                                     |d<   t        | d      r9t        | j0                  t2        j4                        r | j0                        |d<   |S # t        $ rJ 	 t        j                  j                  j                  |j                  d      }n# t        $ r Y nw xY wY Cw xY w# t        $ r Y w xY w# t        $ r Y w xY w# t        $ r Y w xY w# t        $ r Y aw xY w# t        $ r Y -w xY w# t        $ r Y w xY w)Nr,    )r,   r^   r   )fallback)rI   sizestrideoffsetlayoutrI   rf   rx  rw  numelrH   )r   r,   r^   rZ   get_output_specrD   rF   ry  r9  r%   rK   sizevars	size_hintrf   rI   
size_hintsrw  rx  rv   	get_dtypere   
get_strideget_size	get_numelrH   r   IRNode)rN   r   	node_inforz  ry  static_layoutrs  build_node_infos         r5   r  z>DebugFormatter.log_autotuning_results.<locals>.build_node_infou  s   tV$ II		!T
++I--/fk2F!!$V]]!3 %0$llHqww//::6;;GHL!1!1!<!<V]]!KL%%M +.m*<Ih'*-f+Ih'%()9%:	'"&)$//*;&<	(#&)GG$$//0AB'	(#
$'(8(8(C(CDMMO(T$U	&!%()9)9)C)CDNNDT)U%V	'" tV$DIIryy)I$3DII$>	&!W % !!%&WW%5%5%?%? & &@ &F  ) ! !!"            s   J1 I *B$J1 K ,K 	AK! AK1 AL 	J.%5JJ.	J'$J.&J''J.*J1 -J..J1 1	J>=J>	KK	KK!	K.-K.1	K>=K>	LL)op_namecuda_device_namecuda_device_countrl  autotuning_timeprecompile_timeprescreening_timezautotuning_result_json_list.txtatzutf-8)encodingbenchmark_result
)r   rs  r  dictrv   ry   cudaget_device_namedevice_countr  r   	info_dictupdatejsondumprR  )r   r,   rl  rm  ro  rp  rq  rN   general_propertiesr  callertimer  rs  r  s                @@r5   log_autotuning_resultsz%DebugFormatter.log_autotuning_resultsj  s    	$:	")) :	S#X :	z  %

 : : <!&!8!8!:>IJdOD1J%0!4
 -tg   
 	 '  !1!1!34	  !3404	,-		)R(	 	 K
	 	s   2D
A/DD$)py)rZ   r=  r>  r   r   ry   r   r   listTensorrT  rV  SchedulerNodeListr)   r*   rA  rv   rY  rc  rh  rk  r   r  r  floatr   r  r3   r4   r5   r8  r8    s     <HH  < U\\"< 
	<<<HH  < U\\"< 
	<,#4 , ,,$5 ,$ , * s  F#4 F F
HH  
 !
 
	
IC IC I4 IXX "))_X ne+,	X
 X !X &e_X 
Xr4   r8  c                     t         j                  t        j                        r)t         j	                  dt
        j                  |              t        j                  j                  |        y )NzBEFORE FUSION
%s)
ir_pre_fusion_logisEnabledForr   r  infor8  rY  r%   r  r)   r7   s    r5   log_ir_pre_fusionr    sB    %%gll32N4L4LU4STGG% r4   c                     t         j                  t        j                        r)t         j	                  dt
        j                  |              t        j                  j                  |        y )NzAFTER FUSION
%s)
ir_post_fusion_logr  r   r  r  r8  rY  r%   r  r*   r  s    r5   log_ir_post_fusionr    sB    &&w||4 2N4L4LU4STGG5!r4   schedulec                 x     	 t        dd  fd       y # t        $ r t        j                  dd       Y y w xY w)Nartifactc                      dddS )Ninductor_collective_scheduler  r,   r  r3   r3   r4   r5   <lambda>z+_dump_collective_schedule.<locals>.<lambda>  s    6"! r4   c                       S rk   r3   r  s   r5   r  z+_dump_collective_schedule.<locals>.<lambda>  s    x r4   metadata_fn
payload_fnzAFailed to log inductor_collective_schedule via structured loggingTr3  )r   r9  r?   r  r  s   `r5   _dump_collective_scheduler    sE    
 (	
  
		O 	 	

s     99c           
          | D cg c]6  }t        t        |dd       x}t        j                        rt        |dd       8 }}|rt	        |       y y c c}w )NrN   python_kernel_name)rD   r7  r   _CollectiveKernelr  )r7   rN   opr  s       r5   log_collective_scheduler    s`     GD&$77b9M9MN 	($/H  !(+ s   ;Anode_runtimesc           	      x   	 t         j                  j                  j                  dt        t
        t              dt        t           ffd}dt        dt        t           fd}g | D ]  \  }}t        |j                  d|j                               }t        j                  |j                        rdnd}g }	 |j                         D ]  }|j                  }	|	j                         }
t!        |	j"                  t$        j&                        r|	j)                         nd	}|	j+                         }|j-                   ||
       ||       ||      d
        	 j-                  ||||d       	 t1        dd fd       y	# t.        $ r Y 7w xY w# t.        $ r t2        j5                  dd       Y y	w xY w)zDLog per-op runtime estimates and output tensor metadata for TLParse.rm   r/   c                 .    | t         |             S g S rk   )r  )rm   to_size_hintss    r5   to_listz,log_runtime_and_tensor_meta.<locals>.to_list  s    -.]4a()BBr4   rI   c                 D    | y t        |       }|j                  d      }|S )Nztorch.)rv   removeprefix)rI   ss     r5   dtype_to_strz1log_runtime_and_tensor_meta.<locals>.dtype_to_str  s&    }E
Ax(AHr4   r  
collectiverb   N)shaperx  rI   )r,   r^   estimated_runtime_nsr   r  c                      dddS )N inductor_runtime_and_tensor_metar  r  r3   r3   r4   r5   r  z-log_runtime_and_tensor_meta.<locals>.<lambda>&  s    :"! r4   c                      d iS )Nopsr3   )r  s   r5   r  z-log_runtime_and_tensor_meta.<locals>.<lambda>*  s    s| r4   r  z.Failed to log inductor_runtime_and_tensor_metaTr3  )r%   rK   r}  r  r   r   r   r  rv   r7  rN   r   r   is_collectiveru   maybe_get_sizerD   rz  r   Layoutr  maybe_get_dtyper   r9  r   r?   r  )r  r  r  r  
runtime_nsr,   op_typer   rr   irnoder  rx  rI   r  r  s                @@r5   log_runtime_and_tensor_metar    s   :S((33	Cx. 	C49 	C	 	 	 %'* !	MAz166#7FD&+&9&9!&&&AlyG -/G==? C XXF"113E &fmmRYY? ))+! 
 #224ENN%,U^&-fo%1%%8& JJ #,6&	5!	F 	 ,	
  (  S		BT	RSs7   B7F <BF,F 	FF FF  F98F9c                      t         sy	 t        dd d        y# t        $ r t        j	                  dd       Y yw xY w)	z:Emit a structured artifact with the graph execution order.Nr  c                      dddS )Ngraph_executionr  r  r3   r3   r4   r5   r  z%log_graph_execution.<locals>.<lambda>7  s    )"! r4   c                      dt         iS )Ngraph_execution_order)r&   r3   r4   r5   r  z%log_graph_execution.<locals>.<lambda>;  s     79NO r4   r  zFailed to log graph_executionTr3  )r&   r   r9  r?   r  r3   r4   r5   log_graph_executionr  0  sH     
B P	
  B		1D	ABs     ==c               #   l   K   g a i ada	 d t                dada day# t                dada daw xY ww)z5Record graph execution order and log it once on exit.TNF)r&   r(   r'   r  r3   r4   r5   $record_and_log_graph_execution_orderr  A  sU      !!!& $  	!& $ s   4 414c                   6    e Zd ZU eed<   ej                  ed<   y)TensorMetadataHoldertensor_metadatarf   N)rZ   r=  r>  r   __annotations__ry   rf   r3   r4   r5   r  r  Q  s    ##LLr4   r  pre_grad_graph_idpost_to_pre_grad_nodes_jsonc           	         i i d}t        |t              st        j                  d       |S t        | t              s|S t        j                  t              }t        j                  t              }	 dt        t        t        f   dt        fd}|j                         D ]<  \  }}t        |t              st        j                  d       |c S |D ]  } ||      s|c c S |j                  d      | k(  r.||d      j                  |       ||   j                  |d          |j                  d	g       D 	cg c]  }	|	|f }
}	|
sx|
j                         \  } ||      s|c c S |j                  d      | k(  r.||d      j                         |   j                  |d          |
j!                  fd
|j                  d	g       D               |
r
 ? dt        t        t        f   ddfd} ||        ||       ||dS c c}	w # t"        $ rd}t%        dddt        |      t'        j(                         d       t        j                  d|       t        j                  d|        |cY d}~S d}~ww xY w)zx
    Create bidirectional mappings between pre_grad graph nodes
    and post_grad graph code nodes, and vice versa.
    )	preToPost	postToPrezCProvenance tacking error: post_to_pre_grad_nodes_json is not a dictrN   r/   c                     t        | t              st        j                  d       yd| vsd| vsd| vrt        j                  d       yy)NzVProvenance tacking error: node provenance in post_to_pre_grad_nodes_json is not a dictFgraph_idr,   	from_nodezYProvenance tacking error: node provenance in post_to_pre_grad_nodes_json has wrong formatT)rD   r  r?   error)rN   s    r5   check_formatz8create_mapping_pre_post_grad_nodes.<locals>.check_formatv  sN    dD)		l %t);{RV?V		o r4   zIProvenance tacking error: post_to_pre_grad_nodes_json value is not a listr  r,   r  c              3   &   K   | ]  }|f 
 y wrk   r3   )rl   r   
parent_keys     r5   ro   z5create_mapping_pre_post_grad_nodes.<locals>.<genexpr>  s      !,-J!s   dNc                 J    | D ]  }t        | |         | |<    t        |       } y rk   r  r  r  keys     r5   convert_sets_to_listszAcreate_mapping_pre_post_grad_nodes.<locals>.convert_sets_to_lists  +     &af#&QAr4   rI  provenance_tracking_error"create_mapping_pre_post_grad_nodesfunction	error_msgstack_tracez post_to_pre_grad_nodes_json:  %szpre_grad_graph_id:  %s)rD   r  r?   r  rF   rw   defaultdictr   rv   r   r   r   r  r   r   popextendr9  r   	traceback
format_exc)r  r  empty_returnpre_to_postpost_to_prer  	outer_key
node_arrayrN   r   r   current_noder  er  s                 @r5   r  r  Z  s    5L
 148		WX'- "-"9"9*"EK"-"9"9*"EKD	tCH~ 	$ 	 &A%F%F%H 	!Izj$/		_ $#" #D)''88J'+<<V-11)<	*..tF|< 26+r1JKA!YKK/4yy{,L*'5++#''
37HH#L$89==jI#J/33L4HILL !1=1A1A+r1R!  	6	T#s(^ 	 	 	k*k*$$
 	
) L0   	'@ V(335	
 			46QR		*,=>sR   3AH H $AH :HH 
 H +A.H 3H H 	J AI;5J ;J triton_kernel_to_post_grad_jsonc           	      F   i i d}t        | t              st        j                  d       |S t	        j
                  t              }	 | j                         D ]I  \  }}t        |t              st        j                  d       |c S |D ]  }||   j                  |        K dt        t        t        f   ddfd} ||       | |dS # t        $ rN}t        dd	d
t        |      t        j                         d       t        j                  d|        |cY d}~S d}~ww xY w)zqCreate bidirectional mappings between triton kernel name and post_grad
    graph code nodes, and vice versa.
    )cppCodeToPostpostToCppCodezGProvenance tacking error: triton_kernel_to_post_grad_json is not a dictzMProvenance tacking error: triton_kernel_to_post_grad_json value is not a listr  r/   Nc                 J    | D ]  }t        | |         | |<    t        |       } y rk   r  r  s     r5   r  zFcreate_node_mapping_kernel_to_post_grad.<locals>.convert_sets_to_lists  r  r4   rI  r  "create_mapping_kernel_to_post_gradr  z$triton_kernel_to_post_grad_json:  %s)rD   r  r?   r  rw   r  r   r   r  r   rv   r   r9  r   r  r  )r  r   post_to_cpp_coder  r  	curr_noder  r  s           r5   'create_node_mapping_kernel_to_post_gradr    s:    /L
 5t<		U	
 '2'>'>z'J$%D%J%J%L 	;!Izj$/		c $#' ;	 +//	:;	;	T#s(^ 	 	 	./<-
 	
   	'@ V(335	
 			24S	
 s&   >C	 AC	 		D ADD D c            	         	 i } t         rrt        t              }i t        |} t        j
                  j                  r?t        j                  j                  dd      5 }t        j                  | |       d d d        d| d<   | S # 1 sw Y   xY w# t        $ r8}t        dddt        |      t        j                          d       i cY d }~S d }~ww xY w)	Nz/inductor_provenance_tracking_node_mappings.jsonr<  g       @versionrI  r  dump_inductor_provenance_infor  )r   r  r   r   r   rM   r  r%   r  r   r  r  r9  r   rv   r  r  )node_mappingnode_mapping_kernelr  r  s       r5   r  r    s    " (*"I>#2%L ||##WW]]Es 0IIlB/0 #&Y0 0   	'; V(335	
 	s6   AB B4B BB 	C-CCCc            	          	 t         j                  di       } t        t        j	                               t        t
        j	                               z  }i }|D ]p  }t
        j                  |g       }t               }|D ]#  }|j                  | j                  |g              % t        j                  |g       |t        |      d||<   r |S # t        $ r8}t        dddt        |      t        j                         d       i cY d}~S d}~ww xY w)zCreate kernel information JSONr  )stack_tracespost_grad_nodespre_grad_nodesrI  r  create_kernel_information_jsonr  N)r   r   r   r   keysr   r  r  r9  r   rv   r  r  )r  all_kernelsresultkernel_namer  r  	post_noder  s           r5   r  r    s   %
 7::;K !=!B!B!DE
:??AI
 
 & 	KLPPRO /9lN, F	%%kooi&DEF != @ @b Q#2"&~"6#F;	  
'< V(335	
 	
s   C	C 	D-DDDnode_scheduler  r{   c           	        
 	 ddl m}m} t        dz  ag }| dt         }|rt	        | t
              sJ t        j                  |g       
| j                  r,| j                  j                  }|
vr6
j                  |       n$
j                  
fd| j                  D               t        | j                               }nt	        | t              sJ t               }| D ]  }|||fvs
|j                   t        j                  |g       
|j#                  |j                   j                                
j                  
fd|j                   j                  D                t        |      }t$        j                  |g       j                  |       t        S # t&        $ r6}	t)        ddd	t+        |	      t-        j.                         d
       Y d}	~	yd}	~	ww xY w)z
    Set the mapping between `kernel_name` and the post_grad nodes in `node_schedule`.

    Returns a unique int debug handler for each call to this function.
    r   )DisableReductionEnableReduction:c              3   R   K   | ]  }|j                   vr|j                      y wrk   r,   rl   r   curr_node_infos     r5   ro   z:set_kernel_post_grad_provenance_tracing.<locals>.<genexpr>i  s)      &{{.8 KK&   $'Nc              3   R   K   | ]  }|j                   vr|j                      y wrk   r%  r&  s     r5   ro   z:set_kernel_post_grad_provenance_tracing.<locals>.<genexpr>{  s)      . &%{{.@ #KK.r(  rI  r  'set_kernel_post_grad_provenance_tracingr  )codegen.simd_kernel_featuresr!  r"  r   rD   r   r   
setdefaultorigin_noder,   r   r  r   r  get_stack_tracesr   rN   r  r   r9  r   rv   r  r  )r  r  r{   r!  r"  r  origin_node_namestack_traces_setr]   r  r'  s             @r5   r*  r*  G  s   =S 	1A50"$$Q'O&PQm\:::KVVRN ((#0#<#<#A#A #>9"))*:;%% &"/"7"7& 
   > > @ALmT2220:& 2B CCzz-JUU +R '
 )//

0K0K0MN&-- .*/***<*<.    01L$//R@GGU77  	'E V(335	
 s%   C%F )F 6B$F 	G$,GGrV   rh   c                     d}t         j                  j                  |      st        j                  |       dt        dt        fd}t        || |f      \  }}d}| d| dt        t               d}t        |d	      5 }t        j                  ||f|       d
d
d
       t        j                  t        j                        rd| d|d}	t        |	       y
y
# 1 sw Y   BxY w)z
    This function is used to save arguments for a compile_fx_inner function call
    to the file system.  Later on one can replay the compile_fx_inner call
    with the saved arguments using load_args_and_run_compile_fx_inner.
    z/tmp/inductor_saved_argsrm   r/   c                 x    t        | t        j                        rt        t	        |       | j
                        S | S )z
        Pickle FakeTensor will result in error:
        AttributeError: Can't pickle local object 'WeakValueDictionary.__init__.<locals>.remove'

        Convert all Tensor to metadata. This may also makes pickle faster.
        )rD   ry   r  r  r   rf   rm   s    r5   handle_tensorz5save_args_for_compile_fx_inner.<locals>.handle_tensor  s.     a&'(@(CQXXNNHr4   compile_fx_inner/_z.pklwbNz3
Arguments for a compile_fx_inner call is saved to z. To replay the call,
run the following:

from torch._inductor.debug import load_args_and_run_compile_fx_inner
load_args_and_run_compile_fx_inner(z
)
        )r   r   r   mkdirr   r   nextsave_args_cntr   pickler  r?   r  r   r   rJ   )
rV   rh   folderr4  args_to_savekwargs_to_savefn_namer   r  messages
             r5   save_args_for_compile_fx_innerrB    s     (F77>>&!

 
 
 $,MD&>#J L. GXQwiqm!4 5T:D	dD	 7Q\>2A67 &337& 9$ %)8 ,	 	g '7 7s   >CC!r   c                    ddl m} t        | d      5 }t        j                  |      \  }}d d d        dt
        dt
        fd}t        j                  j                  d      }|5  t        j                  d	d
      5  t        |f      \  }} ||i |cd d d        cd d d        S # 1 sw Y   ~xY w# 1 sw Y   nxY wd d d        y # 1 sw Y   y xY w)Nr   )r5  rbrm   r/   c                 
   t        | t              rrt        j                  j                  j                  | j                  j                  | j                  j                  | j                  j                  | j                        S | S rk   )rD   r  ry   _dynamotestingrand_stridedr  r  rx  rI   rf   r3  s    r5   r4  z9load_args_and_run_compile_fx_inner.<locals>.handle_tensor  se    a-.==((55!!''!!((!!''	  Hr4   T)allow_non_fake_inputs	save_argsF)torch._inductor.compile_fxr5  r   r<  loadr   ry   rO  FakeTensorModer   r
   r   )r   r5  r  rV   rh   r4  	fake_modes          r5   "load_args_and_run_compile_fx_innerrO    s    ;	dD	 &Q{{1~f&	 	 	 !!00t0LI	 1FLLe4 1f~>f001 1 1& &1 1 1 1 1s/   B)&C
=B5	C
)B25B>	:C

C)package_pathfunc.exported_programinductor_configsrP  c                   ddl m} ddlm} ddlm} ddlm} |j                  j                  }|j                  d      }	t        |	t        j                  j                        sJ |j                  \  }
}	 |r$|j                  j                   dk(  r ||d	|
       |r|j                  j                   dk(  rt#        j$                  |	      }t#        j$                  |j                        }t#        j$                  |      } |||d   |d   |
      \  }}t'        |      }t        j(                  j)                  ||d      } | |j                  d      |||dd        | |	|
||||      S # |$ r(} ||dd|       t*        j-                  d       |d }~wt.        $ r2}|r)d}|j                  j                   dk(  rd} ||d	||       |d }~ww xY w)Nr   )AccuracyError)dump_to_minify)r   )_aoti_flatten_inputsF)check_guards   aot_inductor)options   r   )strictTaccuracy)rS  rP  load_and_runcheck_accuracy)rS  rP  r_  aot_inductor_accuracyminify)commandr[  zAccuracy failedrun)torch._dynamo.debug_utilsrU  torch._dynamo.repro.aotirV  torch._inductorr   rK  rW  rZ  dump_aoti_minifiermodulerD   ry   r   r   example_inputsrepro_levelr   deepcopyrE   exportr?   r@   r9  )rQ  rR  rS  rP  rU  rV  r   rW  use_minifierrP   rV   rh   gm_copyexample_inputs_copyconfig_copyflat_example_inputstuple_inputsflattened_epr  rc  s                       r5   aot_inductor_minifier_wrapperru    s    87&?&&99L		 	 e	 	4Bb%((..///#22LD&>F//;;q@ (
 F//;;q@ mmB'G"&--0@0O0O"P--(89K/C#A&#A&#	0, !!45L <<..wU.SL###7!,)!) -%%
 	
  #$		
 	%& G""..!3 (	 s%   7C7E/ /G4#FG#-GG)FNrX   r;  )F)rw   r   r   dataclasses	functoolsr^  r?  r  r   r   os.pathr<  r,  r1   r  collections.abcr   r   typingr   r   r   r   r	   unittest.mockr
   ry   functorch.compiler   r   r   r   torch._dynamo.repro.after_aotr   torch._dynamo.utilsr   rg  r   torch._loggingr   torch._logging._internalr   torch._utils_internalr   torch.fx.graph_moduler   torch.fx.passes.shape_propr   r   torch.fx.passes.tools_commonr   torch.typesr   torch.utils._ordered_setr   torch.utils._pytreer   ru  r   r   r   	schedulerr    r!   r"   r#   r$   virtualizedr%   r   rZ   r?   r&   r  r  rv   objectr  r'   r   r(   rF   r  r  r  rx   r+   rg  cacher6   rQ   rz   rA   r   r   r   rB  r   r   r   r   r   r   r   r   r   r   r8  r  r  r  r  rE   r  r  r  r  	dataclassr  r@  r;  r  r  r  r  r*  rB  rO  rm  ExportedProgramru  r3   r4   r5   <module>r     s        	    	      . 5 5   V V  : - ! , 5 0 - O 7   / (     g! <@ xT#v+%6 78 ?$  $8< 8Dhsm!345 <%h@ &x1AB I 
 +
 
 VZ,@
AT  + + + +!"++ C=+ 
	+\_$'8"9 _bhh _J &*	 %& S>  c]  	 
 
 F!S>!	#w,!"III 
I" %HTN % %V EG  $sDd3i,@'@"A FGI .S$s)^0D I$( HSM (68 #T#s(^ 857 d3S	>2 701 (# 11
 '
(4. '
 '
TY Yxj jZ!. !4 !"/ "D "
U39-=(> 
4 
"	,8,=#> 	,4 	,=Sxc5j8I/J =St =S@B" !htn ! !   
  	!^}^!%c3h^ 
#tCcN#
#$^B9%)#s(^9	#tCH~
9x#tCH~ #L'S$sDI~2F-F(G 'Z H"34lBCHH H c]	HV+# + + +\1S 1S 18 (,Q
38
Qll22Q 38n	Q
 8$Q 	Qr4   