
    i                       U d dl Z d dlZd dlZd dlZd dlZd dlZd dlZd dlZd dlZd dl	Zd dl
mZ d dlmZmZ d dlmZmZmZmZmZ d dlZd dlZd dlZd dlmZ d dlmc mZ d dlmZm Z  d dl!m"Z" d dl#m$Z% d dl&m'Z' d d	l(m)Z) d d
l*m+Z+ d dl,m-Z-m.Z. d dl/m0Z0m1Z1 d dl2m3Z3m4Z4m5Z5m6Z6m7Z7m8Z8 d dl9m:Z: d dl;m<Z< d dl=m>Z> ddl?m$Z$ ddl@mAZA ddlBmCZCmDZDmEZE ddlFmGZG ddlHmIZImJZJ ddlKmLZL ddlMmNZNmOZO ddlPmQZQmRZRmSZS erd dlTZTe$j                  ZVeWeXd<    ej                  eZ      Z[ej                  eXd<   ej                  j                  Z^ej                  j                  Z_e G d d             Z`e G d d             Zae G d d              Zbd!ej                  d"eWfd#Zdd$ej                  d"eWfd%Zfd$ej                  d"eWfd&Zgd!ej                  d"ehfd'Zi G d( d)      Zj ej       Zk	 dd*ej                  d+emej                     d,emej                     d-emeI   d.een   d"ej                  fd/Zod!ej                  d"eWfd0Zpd!ej                  d"eWfd1Zqd!ej                  d"eWfd2Zrd!ej                  d"eWfd3Zsd!ej                  d"eWfd4Ztd!ej                  d"eWfd5Zud!ej                  d"eWfd6Zvd!ej                  d"eWfd7Zwd!ej                  d"eWfd8Zxd!ej                  d"eWfd9Zyd:ej                  d"ezemej                     emej                     emeI   emeI   f   fd;Z{d<emej                     d=enfd>Z|d?eemej                     ezej                     f   d"ehfd@Z}	 	 ddAej2                  j                  d!ej2                  j                  dBe~dCe~fdDZdAej2                  j                  d!ej2                  j                  dEej2                  j                  dFej                   dGe~dHe~d"ej2                  j                  fdIZdJej                  d"e~fdKZd"emej                      fdLZd!ej2                  j                  d"eWfdMZd"ej                   fdNZdOej                   d"ezfdPZdAej2                  j                  d"dfdQZdAej2                  j                  d"dfdRZdSej                  dTej                  dUeenej                  f   d"dfdVZ	 dd<emej                     dSej                  dTej                  dWee<ej                        d"df
dXZddYd:ej                  d<emej                     dZemej                     d[ehdWee<ej                        d"ezej                  ej                  f   fd\Zddd]d:ej                  d^eemeh      dWee<ej                        d"ezej                  ej                  f   fd_Z ehd`      Zdaehd"ehfdbZd!ej                  d"ehfdcZdAej                  fddZej&                  de        Zdfeej                  ehf   d"emezej                  ehf      fdgZdhej                  d"ej                  fdiZdjej2                  j                  dkej2                  j                  dlej2                  j                  dmej2                  j                  dnej.                  doehdpej2                  j                  dqej2                  j                  fdrZd:ej                  djej                  dkej                  dsehd"ezej                  ej                  f   f
dtZd:ej                  d"dfduZd:ej                  d"dfdvZd:ej                  d"ej                  fdwZ	 dd*ej                  dxeadyebdzee<ej                        fd{Zd| Zd"e`fd}ZdAej                  fd~Zd*ej                  deme~   deme~   de~dxeademej                     d"eze~emeh   emeh   f   fdZd dlmZ dej                  dehd"ej                  fdZd Z	 dd*ej                  dxead"emej                     fdZd*ej2                  j                  d<emej2                  j                     fdZd Z	 dddd:ej                  d^eemeh      d"ezej                  ej                  f   fdZ	 	 	 	 	 ddej2                  j                  dendendeWdeeenemen   f      deWdeen   d"dfdZy)    Ndefaultdict)	dataclassreplace)AnyCallableOptionalTYPE_CHECKINGUnion)countersis_node_meta_valid)(create_structured_trace_for_min_cut_info)config)trace_structured)extract_tensor_metadata)BackwardState)is_sym_nodepy_sym_types)magic_methodsmethod_to_operator)find_symbol_binding_fx_nodesfree_symbolshint_intis_symbol_binding_fx_nodestatically_known_falsestatically_known_true)graph_drawer)
OrderedSet)CheckpointPolicy   )GraphInfoProvider)dp_knapsackgreedy_knapsackilp_knapsack)KnapsackEvaluator)	AOTOutputSavedForBackwardsAOTOutput)get_aot_graph_name)get_cuda_generator_meta_valis_with_effects)fx_graph_cseget_aten_targetraise_getitemsAOT_PARTITIONER_DEBUGlogc                      e Zd ZU dZee   ed<   ee   ed<   ee   ed<   ee   ed<   ee   ed<   dej                  fdZ	dej                  fd	Z
dej                  fd
Zdej                  fdZdej                  fdZy)OpTypesz8Class for keeping track of different operator categoriesfusible_opscompute_intensive_ops
random_opsview_opsrecomputable_opsnodec                 0    t        |      | j                  v S N)r,   r2   selfr7   s     W/var/www/html/engine/venv/lib/python3.12/site-packages/torch/_functorch/partitioners.py
is_fusiblezOpTypes.is_fusibleN   s    t$(8(888    c                 0    t        |      | j                  v S r9   )r,   r3   r:   s     r<   is_compute_intensivezOpTypes.is_compute_intensiveQ   s    t$(B(BBBr>   c                 0    t        |      | j                  v S r9   )r,   r4   r:   s     r<   	is_randomzOpTypes.is_randomT   s    t$77r>   c                 0    t        |      | j                  v S r9   )r,   r5   r:   s     r<   is_viewzOpTypes.is_viewW   s    t$55r>   c                 0    t        |      | j                  v S r9   )r,   r6   r:   s     r<   is_recomputablezOpTypes.is_recomputableZ   s    t$(=(===r>   N)__name__
__module____qualname____doc__r   r   __annotations__fxNoder=   r@   rB   rD   rF    r>   r<   r1   r1   D   s    BH%%%h//8$$"" **9rww 9C C8bgg 86BGG 6>BGG >r>   r1   c                      e Zd ZU eej
                     ed<   eej
                     ed<   eej
                     ed<   eej
                     ed<   eej
                  e	f   ed<   eej
                     ed<   e
j                  deej
                     fd       Zd	ej
                  defd
Zd	ej
                  defdZd	ej
                  defdZd	ej
                  de	fdZy)NodeInfoinputs_required_fw_nodesrequired_bw_nodesunclaimed_nodesfw_orderstatic_lifetime_input_nodesreturnc                 F     t        d  j                  D         fd      S )Nc              3       K   | ]  }|  y wr9   rN   .0ns     r<   	<genexpr>z-NodeInfo.required_fw_nodes.<locals>.<genexpr>m   s     01Q0s   c                 "    j                   |    S r9   )rU   )r\   r;   s    r<   <lambda>z,NodeInfo.required_fw_nodes.<locals>.<lambda>m   s    a@P r>   key)sortedrR   r;   s   `r<   required_fw_nodeszNodeInfo.required_fw_nodesj   s!    0//06P
 	
r>   r\   c                     || j                   v S r9   )rR   r;   r\   s     r<   is_required_fwzNodeInfo.is_required_fwp   s    D++++r>   c                     || j                   v S r9   )rS   rf   s     r<   is_required_bwzNodeInfo.is_required_bws   s    D****r>   c                     || j                   v S r9   )rT   rf   s     r<   is_unclaimedzNodeInfo.is_unclaimedv   s    D((((r>   c                 R    || j                   v sJ d| d       | j                  |   S )NNode z not in fw nodes!)rR   rU   rf   s     r<   get_fw_orderzNodeInfo.get_fw_ordery   s4    D+++IuQC7H-II+}}Qr>   N)rG   rH   rI   listrL   rM   rK   r   dictint	functoolscached_propertyrd   boolrg   ri   rk   rn   rN   r>   r<   rP   rP   ^   s     M"277++!"''**((277C<  !+BGG!44
4= 
 

, ,D ,+ +D +)bgg )$ ) bgg  #  r>   rP   c                   @    e Zd ZU eed<   eed<   eed<   eed<   eed<   y)MinCutOptionsban_if_used_far_apartban_if_long_fusible_chainsban_if_materialized_backwardban_if_not_in_allowlistban_if_reductionN)rG   rH   rI   rt   rK   rN   r>   r<   rv   rv   ~   s      $$"&&!!r>   rv   r7   rW   c                 z    | j                   j                  dd       t        j                  t        j                  fv S )N	recompute)metagetr   MUST_RECOMPUTEPREFER_RECOMPUTEr7   s    r<   must_recomputer      s5    99==d+''))0  r>   fx_gc                 T    | j                   j                  D ]  }t        |      s y y)NTF)graphnodesr   r   r7   s     r<   has_recomputable_opsr      s+    

   $ r>   c                     | j                   j                  D ]W  }t        |      st        |j                  d      s&t
        j                  j                  |j                  j                  v sW y y)NtagsTF)	r   r   r   hasattrtargettorchTagnondeterministic_seededr   r   s     r<   has_recomputable_rng_opsr      sU    

   4 V,		11T[[5E5EE r>   c                     t        | j                  d   t        j                  t        j                  f      ryt        | j                  d   t        j
                        sJ y)Nvalr       )
isinstancer~   r   SymIntSymBoolSymFloatr   s    r<   sym_node_sizer      sE    $))E"U\\5==$ABdii&777r>   c                       e Zd Zd Zy)InvalidNodeBasec                      y)NzInvalid NoderN   rc   s    r<   __repr__zInvalidNodeBase.__repr__   s    r>   N)rG   rH   rI   r   rN   r>   r<   r   r      s    r>   r   joint_graphrQ   outputsoutputs_descssubgraphc                    t        j                         }i |D ]3  }|j                  |j                        }|j                  |_        ||<   5 | j
                  D ]/  }t        |      r|dk7  r
t        |<   t        |      r|dk7  r
t        |<   8|v r=|j                  dk(  r
t        |<   V|j                  dk(  rt        j                  |j                  i |j                  }|D 	cg c]/  }	t        |	t         j                        rt        |	   t               1 }}	t#        |      r
t        |<   |j%                  |fd      |<   |j                  dk(  r|j%                  |fd      |<   |j                  dk(  s02 g }
|D ]s  }	t        |	t         j                        rF|	vrt'        d	|	 d
      t        |	   t               rJ d	|	 d       |
j)                  |	          c|
j)                  |	       u |j+                  t-        |
            }||j                  d<   |j/                          |j1                          |S c c}	w )a  
    Given a graph, extracts out a subgraph that takes the specified nodes as
    inputs and returns the specified outputs.

    This includes specifying non-placeholder nodes as inputs.

    The general strategy is to initialize all inputs with proxies as we
    encounter them, and trace through the graph, only keeping values which take
    in valid proxies. Then, all dead code is eliminated.
    backwardforwardplaceholdercall_functionc                     |    S r9   rN   xenvs    r<   r_   z4_extract_graph_with_inputs_outputs.<locals>.<lambda>       CF r>   get_attrc                     |    S r9   rN   r   s    r<   r_   z4_extract_graph_with_inputs_outputs.<locals>.<lambda>   r   r>   outputrm   z couldn't be found in envz was invalid, but is outputdesc)rL   Graphr   namer~   r   _must_be_in_backwardInvalidNode_must_be_in_forwardoppytreearg_tree_leavesargskwargsr   rM   r   any	node_copyRuntimeErrorappendr   tupleeliminate_dead_codelint)r   rQ   r   r   r   	new_graphr7   new_nodeall_argsr   output_valuesoutr   s               @r<   "_extract_graph_with_inputs_outputsr      sK   " 
I
C  ((3		D		 !! %(j*@#CIt$Y)>#CI3; WW%#CIWW'--tyyHDKKHH "a) 3q6?3H 
 8}'D	!++D2BCCIWW
"!++D2BCCIWW => M 	$a!|"U1#-F#GHH!#a&/: s56:   Q(  #	$ 

5/
0C$CHHV!!#NN;s   .4Ic                     | j                   dk(  xr3 dt        | j                        vxr t        |        xr t	        |        S Nr   tangents)r   strr   _is_bwd_seed_offset_is_fwd_seed_offsetr   s    r<   
_is_primalr      sK    =  	*c$++..	*#D))	* $D))	r>   c                 R    | j                   dk(  xr dt        | j                        v S r   r   r   r   r   s    r<   _is_tangentr     s$    77m#F
c$++6F(FFr>   c                     | j                   dk(  xr0 dt        | j                        v xs dt        | j                        v S )Nr   bwd_seedbwd_base_offsetr   r   s    r<   r   r   	  =    77m# c$++&&O*;s4;;?O*Or>   c                     | j                   dk(  xr0 dt        | j                        v xs dt        | j                        v S )Nr   fwd_seedfwd_base_offsetr   r   s    r<   r   r     r   r>   c                 v    | j                   dk(  xr) t        | j                  j                  d      t              S )Nr   r   )r   r   r~   r   r   r   s    r<   _is_backward_stater     s*    77m#W
499==3G(WWr>   c                 @    | j                   j                  dd       dk(  S )Npartitioner_tagis_backwardr~   r   r   s    r<   _has_tag_is_backwardr     s    99==*D1]BBr>   c                 @    | j                   j                  dd       dk(  S )Nr   must_be_in_forwardr   r   s    r<   _has_tag_must_be_in_forwardr     s    99==*D15IIIr>   c                 @    | j                   j                  dd       dk(  S )Nr   must_be_in_backwardr   r   s    r<   _has_tag_must_be_in_backwardr   !  s    99==*D15JJJr>   c                     t        |       S r9   )r   r   s    r<   r   r   %  s    &t,,r>   c                 L    t        |       xs t        |       xr t        |       S r9   )r   r   r*   r   s    r<   r   r   )  s&    '- T"<t'<r>   joint_modulec          	      l   t        j                  d | j                  j                  d      D         }t        j                  t	        t        | j                  j                  d                  j                  j                  dd gt        |      z              }|d | }||d  }|d | }||d  }||||fS )Nc              3   4   K   | ]  }|j                     y wr9   r   r[   r7   s     r<   r]   z+_extract_fwd_bwd_outputs.<locals>.<genexpr>3  s     	K$))	K   r   r   r   )	r   r   r   
find_nodesnextiterr~   r   len)r   num_fwd_outputsr   r   fwd_outputsbwd_outputsfwd_outputs_descsbwd_outputs_descss           r<   _extract_fwd_bwd_outputsr   /  s     $$	K 2 2 = = = J	KG **T,$$//8/<=>CCGGTFS\)	
M
 *?+K/*+K%&67%o&67%68IIIr>   saved_valuesr   c                 V    | D ]$  }|j                   |k(  s| j                  |        y  y r9   )r   remove)r   r   saved_values      r<   _remove_by_namer   A  s0    # t#,r>   fwd_module_outputsc                     t        |       }t        t        |       dz
  dd      D ]  }t        | |         r|dz   } |S  |S )Nr    )r   ranger   )r   idxis      r<   find_first_sym_noder  H  sX      
!C3)*Q.B7 -a01a%CJ	 Jr>   r   maxminc           	         | j                  |      5  | j                  t        j                  j                  j
                  j                  |f      }t        j                  j                  j
                  j                  |j                  d         |j                  d<   t        |j                  d         |j                  d<   d d d        | j                        5  | j                  t        j                  j                  j                  j                  |dgdf      }t        j                  j                  j                  j                  |j                  d   dgd      |j                  d<   t        |j                  d         |j                  d<   d d d        | j                        5  | j                  t        j                  j                  j                  j                  |t        j                  f      }t        j                  j                  j                  j                  |j                  d   t        j                        |j                  d<   t        |j                  d         |j                  d<   d d d        | j                        5  | j                  t        j                  j                  j                  j                  ||f      }t        j                  j                  j                  j                  |j                  d   |      |j                  d<   t        |j                  d         |j                  d<   d d d        | j                        5  | j                  t        j                  j                  j                  j                  |f      }t        j                  j                  j                  j                  |j                  d         |j                  d<   t        |j                  d         |j                  d<   d d d        | j                        5  | j                  t        j                  j                  j                  j                   ||f      }	t        j                  j                  j                  j!                  |j                  d   |      |	j                  d<   t        |	j                  d         |	j                  d<   d d d        | j                  	      5  | j                  t        j                  j                  j                  j                  |	t        j"                  fdt%        |j&                        z         }
t        j                  j                  j                  j                  |	j                  d   t        j"                        |
j                  d<   t        |
j                  d         |
j                  d<   d d d        |
S # 1 sw Y   1xY w# 1 sw Y   lxY w# 1 sw Y   xY w# 1 sw Y   xY w# 1 sw Y   xY w# 1 sw Y   NxY w# 1 sw Y   
S xY w)Nr   r   tensor_metar   T
fp8_scale_r   r   )inserting_afterr   r   opsatenabsdefaultr~   r   amaxprimsconvert_element_typefloat64	clamp_min
reciprocalmulTensorfloat32r   r   )r   r7   r  r  abs_node	amax_nodeamax_64_nodeclamp_min_nodereciprocal_nodemul_node
scale_nodes              r<   calculate_quantization_scalingr  S  s     
		t	$ U&&IINN&& ' 
  %yy~~1199$))E:JKe'>x}}U?S'Tm$U 
		x	( W''IINN''RD$' ( 
	 !&		 3 3 ; ;MM% 2$!
	u )@	u@U(V	}%W 
		y	) 

**IIOO0088U]]+ + 
 $)99??#G#G#O#ONN5!5==$
%  ,Ce$,
-(

 
		|	, 

,,IINN$$,,$ - 
 &+YY^^%=%=%E%Ee$c&
E" .E&.
M*

 
		~	. 

--IINN%%-- " . 
 ',iinn&?&?&G&G&'
U# /F  '/
]+

 
			/ U&&IINN%%!3' ' 
  %yy~~1188  ' 
e (?x}}U?S'Tm$U 
		x	( 	Y((IIOO0088EMM*DII. ) 


 "'!E!E!M!MMM% %--"

 *AQVAW)X
&	Y IU UW W

 



 



 

U U	Y sZ   B3W4B9X0CXB5X(B3X(4B5X5C(Y4W>XXX%(X25X?Yr  
quant_typer  	clamp_maxc           	      	   | j                  |      5  | j                  t        j                  j                  j
                  j                  |t        j                  f      }t        j                  j                  j
                  j                  |j                  d   t        j                        |j                  d<   t        |j                  d         |j                  d<   d d d        | j                        5  | j                  t        j                  j                  j                  j                  ||f      }t        j                  j                  j                  j                  |j                  d   |j                  d         |j                  d<   t        |j                  d         |j                  d<   d d d        | j                        5  | j                  t        j                  j                  j                  j                  ||f      }t        j                  j                  j                  j                  |j                  d   |      |j                  d<   t        |j                  d         |j                  d<   d d d        | j                        5  | j                  t        j                  j                  j                  j                  ||f      }	t        j                  j                  j                  j                  |j                  d   |      |	j                  d<   t        |	j                  d         |	j                  d<   d d d        | j                  	      5  | j                  t        j                  j                  j
                  j                  |	|fdt        |j                         z         }
t        j                  j                  j
                  j                  |	j                  d   |      |
j                  d<   t        |
j                  d         |
j                  d<   d d d        |
S # 1 sw Y   hxY w# 1 sw Y   xY w# 1 sw Y   xY w# 1 sw Y   xY w# 1 sw Y   
S xY w)Nr   r   r  
fp8_quant_r	  )r
  r   r   r  r  r  r  r  r~   r   r  r  r  r  r!  r   r   )r   r7   r  r   r  r!  target_node_32scaled_target_nodeclamp_min_scaled_nodeclamp_max_scaled_nodequant_activation_nodes              r<   perform_quantizationr)    s    
		z	* 

,,IIOO0088& - 
 &+YY__%I%I%Q%QIIeemm&
E" .E&.
M*

 
		~	. 

"00IINN%% *- 1 
 */););)B)B&
(>*
& 2I##E*2
.

 
		1	2 

 % 3 3IINN$$,,$i0 !4 !
 -2IINN,D,D,L,L##E*I-
""5) 5L!&&u-5
""=1

 
		4	5 

 % 3 3IINN$$,,'3 !4 !
 -2IINN,D,D,L,L!&&u-y-
""5) 5L!&&u-5
""=1

 
		4	5 
 % 3 3IIOO0088'4DII. !4 !
 IIOO0088%**51: 	""5)
 5L!&&u-5
""=1
 ! u

 



 



 



 


 ! s@   CQ	<CQB5Q#%B5Q03CQ=	QQ #Q-0Q:=Rtensorc                 R    | j                         }| j                         }||z  dz  S )z
    Calculate the size of a PyTorch tensor in megabytes (MB).

    Args:
        tensor (torch.Tensor): Input tensor

    Returns:
        float: Memory size in MB
    i   )numelelement_size)r*  num_elementsr-  s      r<   calculate_tensor_sizer/    s-     <<>L&&(L<'K88r>   c            	          t         j                  j                  j                  d   j	                  dd      } | j                  d      D cg c]$  }t        t         |j                  d      d         & } }| S c c}w )N!activation_quantization_aten_passallowed_dtypesztorch.bfloat16;.r   )r   	_inductorr   post_grad_fusion_optionsr   splitgetattr)r2  dtypes     r<   get_allowed_dtypesr:    sz    __++DD+	c
,-  ;I:N:Ns:S16u{{3'+,N  s   )A8c                 B   t               }t        |       r| j                  d   j                  |vryt        j
                  j                  j                  d   j                  dd      }t        | j                  d         }t        j
                  j                  j                  d   j                  dd      s||k\  S t        j
                  j                  j                  d   j                  dd      rt        ||k\        xs t        ||k\         S t        ||k\        S )Nr   Fr1  
size_in_mbd   skip_dynamo_guardsquantize_dynamic_shape)r:  r   r~   r9  r   r5  r   r6  r   r/  r   r   )r7   r2  size_thresholdr<  s       r<   should_quantizerA     s   ')Nd#tyy'7'='=^'S__++DD+	c,  'tyy'78J??!!::+	c
&' ^++ ??!!::/

#&
./ )n, J+J.,HIIJ
 )~)EFFr>   c                      t         j                  j                  j                  d   j	                  dd      } t        t         | j                  d      d         S )Nr1  r   ztorch.float8_e5m2r4  r   )r   r5  r   r6  r   r8  r7  )r   s    r<   get_quant_typerC    sN    ''@@+	c,+,  5***3/344r>   r9  c                 \    t        j                  |       }|j                  |j                  fS )z
    Calculate the range of values for a given torch.dtype.
    Args:
        dtype (torch.dtype): The input dtype.
    Returns:
        tuple: A tuple containing the minimum and maximum values.
    )r   finfor  r  )r9  infos     r<   calculate_rangerG  "  s%     ;;uD88TXXr>   c           
         | j                  d      d   }|j                  d   }t               }t        |      \  }}t	               }g g }}|D ]  }	|	j
                  j                  dd      s!t        j                  j                  j                  d   j                  dd      rMt        | |	|d	      }
t        | |	|
|||      }t        |
      s|j                  |
       n|j                  |
       n| j                  |	      5  | j!                  t        j"                  j$                  j&                  j(                  |	|fd
t+        |	j,                        z         }t        j"                  j$                  j&                  j)                  |	j
                  d   |      |j
                  d<   t/        |j
                  d         |j
                  d<   d d d        ||	<    |D 	cg c]  }	|	|v r||	   n|	 }}	t1        |      }||z   }|r|d | |z   ||d  z   }|j3                  dt5        |             t6        d   dxx   dz  cc<   y # 1 sw Y   yxY wc c}	w )Nr   r   r   saved_for_quantizationFr1  use_scalingT-q=r#  r	  r   r  inductor%activation_quantization_fwd_aten_passr    )r   r   rC  rG  rp   r~   r   r   r5  r   r6  r  r)  r   r   r
  r   r  r  r  r  r   r   r   r  
update_argr   r   )r   r   r   r   r  r!  node_to_quanttensor_scale_nodessym_scale_nodesr7   r  
quant_nodeoutput_updated_argsr  scale_nodess                  r<   quantize_activation_fwrU  .  sa   *1-F++a.K!J*:6IyFM*,b #-99==159%%>>3c-&' <4E
 24ZI
 #:.&--j9#**:6 **40 !&!4!4		<<DD"J/)C		N: "5 "J 		<<DD IIe,j OOE*
 6M".6JOOM2 #-M$G#-L LWCGt}4d$>  1
2C$6K%36I#$6OO 	 a234Z@AQFA9  s   CI#II	c           
      
  	 | j                   D cg c]  }|j                  dk(  s| }}d }|D ]~  }|j                  j                  dd      s!|j                  j	                  d       |j                  j	                  d      }t
        j                  j                  j                  d   j                  dd      r| j                  |      5  d|j                  j                  dd	      z   	t        	fd
|D              }d d d        | j                        5  | j                  t
        j                  j                  j                   j"                  ||f      }t
        j                  j                  j                   j#                  |j                  d   |      |j                  d<   t%        |j                  d         |j                  d<   d d d        | j                  |      5  | j                  t
        j                  j&                  j(                  j*                  ||f      }t
        j                  j&                  j(                  j+                  |j                  d   |j                  d         |j                  d<   t%        |j                  d         |j                  d<   d d d        | j                        5  | j                  t
        j                  j                  j                   j"                  ||f      }t
        j                  j                  j                   j#                  |j                  d   |      |j                  d<   t%        |j                  d         |j                  d<   d d d        n| j                  |      5  | j                  t
        j                  j                  j                   j"                  ||fdt-        |j                        z         }t
        j                  j                  j                   j#                  |j                  d   |      |j                  d<   t%        |j                  d         |j                  d<   d d d        t/        |j0                  j3                               D ]   }|k7  s	||k7  s|j5                  ||       "  t6        d   dxx   dz  cc<   y c c}w # 1 sw Y   xY w# 1 sw Y   	xY w# 1 sw Y   ;xY w# 1 sw Y   xY w# 1 sw Y   xY w)Nr   rI  Fdequant_typer1  rJ  r  r#   c              3   @   K   | ]  }|j                   k(  r|  y wr9   r   )r[   	bwd_input
scale_names     r<   r]   z)quantize_activation_bw.<locals>.<genexpr>x  s%      &%$>>Z7 "&s   r   r   r  dequant_r	  rL  %activation_quantization_bwd_aten_passr    )r   r   r~   r   popr   r5  r   r6  r
  r   r   r   r   r  r  r  r  r   r  divr  r   ro   userskeysreplace_input_withr   )
r   r7   	bw_inputsactivation_noderW  r  divided_target_node_32dequant_nodeuserr\  s
            @r<   quantize_activation_bwri  i  si   "'++J$M1IJIJO H@99==159IIMM2399==8L%%>>3c-'( **40 !-		0A0A,PR0S!SJ!% &)2& "J **:6 &+&9&9		<<DD"L1 ': 'O
 		<<DD IIe,l $((/
 ;R',,U3;O((7 **?; 
-2-@-@		**11-z: .A .* :?9K9K9R9R',,U3Z__U5K:*//6 00F0K0KE0RS +//>
 **+AB #(#6#6		<<DD4lC $7 $L
 		<<DD277> !%%e,
 8O$))%08L%%m4  **40 #(#6#6		<<DD"L1'#dii.8 $7 $L 		<<DD IIe,l !%%e,
 8O$))%08L%%m4 TZZ__./ @<'DO,C++D,?@MH@T Z@AQFAY K  
 
  sJ   SS4SB5S*CS%B5S2CS>S	S"	%S/	2S;	>T	
fwd_module
bwd_modulebwd_module_inputsc                     t        dd  fd       t         j                         t        dd  fd       t        dd fd        j                  j                  d	
      d   j                  d   }|D ]  }d|j
                  v s||j
                  j                  dd         }j                  j                  |      5  j                  j                  |j
                        }d d d        |j                  d   }j                  j                  |j                         d|j                  d<   ||j                  d<   |j                  |       j                  j                  |        t        j                  j                  j                   d   j#                  dd      rt%        j                  j                  d
            }|d   }	t'        |      D ]  }
t)        |
      r|
}	 n  j                  j                  d	
      d   j                  d   }|D ]  }d|j
                  v sj                  j                  |	      5  j                  j                  |j
                        }d d d        j                  j                  |j                         |}	 t+        j                         t        dd fd       y # 1 sw Y   xY w# 1 sw Y   gxY w)Nartifactc                      dddS )N,before_activation_quantization_fwd_aten_passstringr   encodingrN   rN   r>   r<   r_   z5perform_fp8_activation_quantization.<locals>.<lambda>      B 
 r>   c                  ,     j                  ddd      S NFT)print_outputinclude_strideinclude_deviceprint_readablerj  s   r<   r_   z5perform_fp8_activation_quantization.<locals>.<lambda>      :44tD 5 
 r>   metadata_fn
payload_fnc                      dddS )N+after_activation_quantization_fwd_aten_passrq  rr  rN   rN   r>   r<   r_   z5perform_fp8_activation_quantization.<locals>.<lambda>      A 
 r>   c                  ,     j                  ddd      S rv  rz  r|  s   r<   r_   z5perform_fp8_activation_quantization.<locals>.<lambda>  r}  r>   c                      dddS )N,before_activation_quantization_bwd_aten_passrq  rr  rN   rN   r>   r<   r_   z5perform_fp8_activation_quantization.<locals>.<lambda>  rt  r>   c                  ,     j                  ddd      S rv  rz  rk  s   r<   r_   z5perform_fp8_activation_quantization.<locals>.<lambda>  r}  r>   r   r   r   r#  rX  rZ  rW  TrI  r1  rJ  r   r   r  c                      dddS )N+after_activation_quantization_bwd_aten_passrq  rr  rN   rN   r>   r<   r_   z5perform_fp8_activation_quantization.<locals>.<lambda>  r  r>   c                  ,     j                  ddd      S rv  rz  r  s   r<   r_   z5perform_fp8_activation_quantization.<locals>.<lambda>  r}  r>   )r   rU  r   r   r   r   r   r
  r   r~   updatereplace_all_uses_with
erase_noder   r5  r   r6  r   ro   reversedr   ri  )rj  rk  rl  quant_fwd_module_outputsfwd_noder[  quant_bwd_inputrW  quant_bwd_module_inputsbwd_input_locbw_inputscaled_fwd_module_outputsscale_bwd_inputs   ``           r<   #perform_fp8_activation_quantizationr    s   
 

	 :++,

	 

	  *//::h:GJOOPQR, 
38==()(--*?*?b*QRI!!11)< S","2"2">">HMM">"RS$>>.9L  ''6=AO  !9:3?O  0++O<''	2
3 66+	c- #'z'7'7'B'Bm'B'T"U/3 !89 	Hx( (	
 %/$4$4$?$?8$?$LQ$O$T$TUV$W!1 	0Hx}},%%55mD W&0&6&6&B&B&B&VOW$$++HMM: /	0 :++,

	?S S0W Ws   'K'KK	K(	rV   c                 2   t         j                  j                  dd       	 y |r|D cg c]  }|j                   c}ng }| D ci c]  }|j                  | }}t        j
                  j                  j                  d   j                  dd      r)| D ci c]  }d|j                  vs|j                  |  }}|j                  j                  d      d   j                  d   }|j                  j                  d      D ci c]  }|j                  | }}d}	|D ]  }|j                  |v st        |      s|j                  |v r!t        j                  d	|j                         Md
|j                  d<   |j                  d   j                  |j                  d<   d
||j                     j                  d<   |j                  d   j                  ||j                     j                  d<   d
}	 |	rt        |||       y y c c}w c c}w c c}w c c}w )Nr1  exclude_primalsFprimalsr   r   r   r   z*Skipping quantization of static input %s: TrI  r   rW  )inductor_configr6  r   r   r   r5  r   r   r   r   rA  r/   debugr~   r9  r  )
r   rj  rk  rV   r7   static_input_namessaved_values_namesr   rl  should_perform_fp8_quants
             r<   enable_activation_quantizationr    s    	0044/	
 	
 	 '  ;;t; 
 7CCd$))T/CC66+	c
U#$ )5
 $	8RDIItO
 
 $))444A!DII!L$.$4$4$?$?=$?$Q 		4   %" 
,99**t/Dyy..		F		R26DII./(,		%(8(>(>DIIn%JNdii(--.FG@D		%@P@V@Vdii(--n='+$
,  +J
DUV  9 	< D
s   HH
H*HH)rV   saved_sym_nodesr   c                   t        | |      \  }}}}| j                  j                  d      }	g t        t        |	      }
g t        t
        |	      }g t        t        |	      }g t        t        |	      }g t        t        |	      }t        | j                  ||z   |z   |z   ||d      }t        j                  j                         }|j                  d      D ]  }|j                  s-t        ||j                         t        ||j                         <|rIt!        d |j                  D              r-t        ||j                         t        ||j                         t        |      st        ||j                         |rJ  t#               }g }g }|D ]C  }t%        |      }|r#|j'                  |       |j)                  |       3|j)                  |       E t+        | j                        }t-        j.                  |||      D ]]  }d|j0                  vrt3        |j0                  d         |z
  }t5        |d       D ]  }||vr|j)                  ||           ||z  }_ |j7                          |j9                  ||z          t        | j                  |
|z   ||z   |z   |t;        t=        |      t=        |      z         D cg c]  }t?        |       c}z   d	      }t        | j                  ||z   |z   |z   |z   ||d      }t@        jB                  jE                  | |      }t@        jB                  jE                  | |      }tG        ||||       ||fS c c}w )
Nr   r   r   r   c              3      K   | ]X  }|j                   t        j                  j                  j                  j
                  u xr t        |j                        d k(   Z ywr   N)r   r   r  _c10d_functionalwait_tensorr  r   ra  rZ   s     r<   r]   z+_extract_fwd_bwd_modules.<locals>.<genexpr>e  sS      )
  HH		22>>FFF "AGG!")
s   AA r   c                     | j                   S r9   rZ  )ss    r<   r_   z*_extract_fwd_bwd_modules.<locals>.<lambda>  s
    166 r>   r`   r   )$r   r   r   filterr   r   r   r   r   r   r   distributedis_availablera  r   r   allr   r   addr   r   	itertoolschainr~   r   rb   clearextendr   r   r'   rL   _lazy_graph_module_make_graph_moduler  )r   r   r  r   rV   r   r   r   r   placeholdersprimal_inputstangent_inputsfwd_seed_offset_inputsbwd_seed_offset_inputsbackward_state_inputs	bwd_graphdistributed_enabledr7   saved_symbolssaved_sym_nodes_bindingsaved_sym_nodes_derivedsymbolsymbol_bindingsnew_symbolsr  r  	fwd_graphrj  rk  s                                r<   _extract_fwd_bwd_modulesr  @  s    	!O CK/1B  %%00M0BL7fZ67M9vk<89NIv&9<HIIv&9<HIGf%7FG2,&7:PPI  ++88:$$$6 )zzL$))4OTYY7
 !S )
 ZZ)
 &

 L$))4OTYY7%L$))4(((')2 /9lM     1*40f%#**40#**401 3<3E3EFO 7~V %		!"499U#34}D)9: 	?A '#**?1+=>	? 	$%" 25LLM 3..l"_4 3|,s?/CCD
 'q)
	

 	
I 3
	
	 !	!  		 
 	
I &&99,	RJ&&99,	RJ"j*.I z!!/
s   M!)static_lifetime_input_indicesrV   r  c                f   t        |       rt        | |||      S t        t        t        | j
                  j                              }t        t        t        | j
                  j                              }||z   }t        | |      \  }}	}
}t        | j
                  |||
d      }t        d |j                  D              }g }g }| j
                  j                  D ]  }|j                  |vrt        |      r|j                  |       /d|j                  vrA|j                  dk(  r2|j                   }t#        d |D              sJ |j%                  |       ~|j                   D cg c]  }|j                  |vs| }}d|j                  v r$t#        d |D              r|j%                  |       |j                  |        t        t&        j)                  |      j+                               }t        t&        j)                  |      j+                               }t-        | ||||	      S c c}w )
a  
    Partitions the :attr:`joint_module` in a manner that closely resembles the
    behavior observed in the original ``.forward()`` and ``.backward()`` of the
    callable, i.e., the resulting forward graph contains those operators that
    are executed in the original ``.forward()`` callable passed to
    :func:`aot_function`.

    The default partitioner collects the operators that are between the forward
    inputs and the forward outputs. This helps in finding the tensors which have
    to be stashed for the backward pass. These stashed tensors become the output
    of the generated forward graph. The remaining operators are then placed in
    the backward graph.

    .. warning::
        This API is experimental and likely to change.

    Args:
        joint_module(fx.GraphModule): The joint forward and backward graph. This
            is the result of AOT Autograd tracing.

    Returns:
        Returns the generated forward and backward Fx graph modules.
    )r   r  r  r   c              3   T   K   | ]   }|j                   d k7  s|j                   " ywr   Nr   r   r   s     r<   r]   z$default_partition.<locals>.<genexpr>  s$      $$''X:M		$   ((r  r   c              3   V   K   | ]!  }|j                   t        j                  k(   # y wr9   )r   operatorgetitemr[   rh  s     r<   r]   z$default_partition.<locals>.<genexpr>  s     I4t{{h&6&66Is   ')c              3   2   K   | ]  }t        |        y wr9   r   rZ   s     r<   r]   z$default_partition.<locals>.<genexpr>   s      2#$A2   r  r   rV   )r   #min_cut_rematerialization_partitionro   r  r   r   r   r   r   r   r   r   r   r   r~   r   ra  r  r  rp   fromkeysrb  r  )r   _joint_inputsr   r  rV   r  r  rQ   r   r   r   r   forward_only_graphforward_node_namesr   r  r7   ra  r\   backward_usagess                       r<   default_partitionr    s   > L)2+*G	
 	
 
L,>,>,D,DEFM!&)<l>P>P>V>V"WX33F O CK/1B <FK1BI $ $066$  LO""(( *99..t ""4($))+?0JJJEI5IIII&  ::7I)IO  		)c 2(72 /  &&7##D);*< l388:;L4==9>>@AO#''$? )s   'H.;H.g    .Ar,  c                      | |j                   z  S r9   )itemsize)r,  r9  s     r<   _tensor_nbytesr    s    5>>!!r>   c                 V   dt         fdd| j                  v r| j                  d   }t        |t              ryt        |t        t
        f      rt        fd|D              S t        |t              r"t        fd|j                         D              S t        |t        j                        r |      S t        dt        |       d|        | j                  d	k(  s:| j                  t        j                  j                   j"                  j$                  u ry
t        d|  d      )NrW   c                     t        | t        j                        syt        t	        | j                         d      | j                        S )Nr      fallback)r   r   r  r  r   r,  r9  r   s    r<   object_nbytesz_size_of.<locals>.object_nbytes"  s1    !U\\*hqwwy4@!''JJr>   r   r    c              3   .   K   | ]  } |        y wr9   rN   )r[   r\   r  s     r<   r]   z_size_of.<locals>.<genexpr>/  s     5A}Q'5   c              3   4   K   | ]  \  }} |        y wr9   rN   )r[   _r\   r  s      r<   r]   z_size_of.<locals>.<genexpr>1  s     @DAq}Q'@   zUnknown metadata type z	 on node r   r   rm   zO didn't have `val` metadata; we should always have `val` metadata on the nodes.)rq   r~   r   r   ro   r   sumrp   itemsr   r  r   typer   r   r  r  _assert_scalarr  )r7   r   r  s     @r<   _size_ofr  !  s    KC K
 		iic<( dE]+5555T"@CIIK@@@U\\* %%3DI;ivNOOww*uyy~~/L/L/T/T T

vde r>   c           	      2   ddl m}  |t              }| j                  D ]3  }|j                  dk(  s||j
                  j                  xx   dz  cc<   5 t        j                  dt        |j                         t        j                  d      d             y )Nr   r   r   r    z%sTra   reverse)collectionsr   rq   r   r   r   rG   r/   rF  rb   r  r  
itemgetter)r   r   cntr7   s       r<   
_count_opsr  >  sr    '%c*C +77o%$$%*%+ HHT6#))+8+>+>q+A4PQr>   c                     g } t        t        j                  j                        D ]  }t	        t        j                  j                  |      }t        |t        j                  j                        sL|j                         D ]G  }t	        ||      }t        j                  j                  |j                  v s6| j                  |          | S r9   )dirr   r  r  r8  r   _opsOpOverloadPacket	overloadsr   	pointwiser   r   )r  	attr_nameopoverloadpacketoverloadop_overloads        r<   pointwise_opsr   H  s    
C( 
	"599>>9=*EJJ,G,GH(224 	H!"2H=Kyy""k&6&66

+,	
 Jr>   	depth_mapc                     | D ci c]7  }t        |t        j                  j                  j                        s2|||   9 }}t        |j                         t        j                  d      d      S c c}w )Nr    Tr  )	r   r   rL   r7   rM   rb   r  r  r  )r   r  arg
arg_depthss       r<   sort_depthsr  Z  sf    '+ #z#uxx}}?Q?Q/RYs^J  *""$(*=*=a*@$OOs
   3A2A2gmc                   
 t        j                         i 
| j                  j                  d      D ]  }j	                  |
fd      
|<    t        | j                  j                        D ci c]  \  }}||
 c}}
fd}t        t        t        | j                  j                              }d}t        j                  }|D ]#  }|j                  D ]  }|   |k  s|   }|} % || S t        | j                  j                        d|    D ]V  }|j                  dk(  s|j                  t        j                   j"                  j$                  j&                  k(  sO ||       X t        | j                  j                        |   d D ]
  } ||        t        j                   j)                  |       }	|	S c c}}w )a  
    This pass finds the first bwd node in the graph (by looking at users of
    tangents) and then reorders the graph by walking from this node to all the
    way to the end of the graph. At each op in this traversal, we insert this op
    in a new graph and try to bring only the relevant subgraph from the other
    non-bwd edges relevant for this op. This closely mimics the behavior of
    autograd engine.

    Why is this pass required in the first place?

    This is an artifact of how partitioners work today. The starting point of
    partitioner is a joint graph, which is fwd and then bwd graph. In the case
    of checkpointing, we keep portions of fwd graph in their original place in
    the joint graph, while obtaining a bwd graph. As a result, the resulting bwd
    graph has copies of recomputed fwd subgraphs followed by the original bwd
    graph. If we run this naively, this leads to bad memory footprint, because
    the fwd subgraphs are live for way longer duration than necessary. This pass
    reorders the operations such that we prioritize the ops for the original bwd
    graph while only realizing those ops from the fwd graph that are necessary
    at any given point in the graph.
    r   r   c                     |    S r9   rN   r   s    r<   r_   z5reordering_to_mimic_autograd_engine.<locals>.<lambda>}  s    A r>   c                 *   | g}t               }t        |      dkD  rH|j                         } | |v s| v r'|j                  |        || j                  z  }t        |      dkD  rHt        |fd      }|D ]  } j                  | fd      | <    y )Nr   c                     |    S r9   rN   )r\   orders    r<   r_   zSreordering_to_mimic_autograd_engine.<locals>.insert_node_in_graph.<locals>.<lambda>  s    %( r>   r`   c                     |    S r9   rN   r   s    r<   r_   zSreordering_to_mimic_autograd_engine.<locals>.insert_node_in_graph.<locals>.<lambda>  r   r>   )r   r   r_  r  all_input_nodesrb   r   )r7   	cur_nodesinsertable_nodesr   r   r  s      r<   insert_node_in_graphzAreordering_to_mimic_autograd_engine.<locals>.insert_node_in_graph  s    F	0:)nq ==?D''43;  & ---I )nq  ""28JK$ 	DD!++D2BCCI	Dr>   Nr   )rL   r   r   r   r   	enumerater   ro   r  r   mathinfra  r   r   r   r  r  copy_r  GraphModule)r  r7   r  r  r  first_node_in_bwdminimum_ordertangentrh  new_gmr   r   r  s             @@@r<   #reordering_to_mimic_autograd_enginer  a  s   . 
I"$C ##}#5 @''.>?D	@ )2"((..(AB93T3YBED$ &bhhnn=>NHHM! )MM 	)DT{]* %d$(!	))  	 RXX^^$%?u->'?@ '77o%$++9M9M9U9U*U &' RXX^^$U+<%=%?@ #T"# XX!!"i0FMY Cs   5G
	fw_module	bw_modulefw_nodebw_nodedevice	rng_countlast_fwd_inputlast_bwd_inputc                    |j                   }|J | j                  }	|j                  }
t        j                  j                  j
                  }| j                  j                  |      5  | j                  j                  d|       }t        |      |j                  d<   |}ddd       |j                  j                  |      5  |j                  j                  d|       }t        |      |j                  d<   |}ddd       t        |j                        }|d<   | j                  j                  |      5  |	j                  d||j                  g|j                  |      }ddd       |j                         |	j!                  |       t        |j                        }|d<   |
j#                  |      5  |
j                  d||j                  g|j                  |      }|j                  |       |
j!                  |       ddd       ||fS # 1 sw Y   zxY w# 1 sw Y   +xY w# 1 sw Y   xY w# 1 sw Y   ||fS xY w)a%  
    Note [CUDA Graph Safe RNG Functionalization]

    CUDA Graph capture doesn't work with get_rng_state and set_rng_state because these functions operate on CPU values,
    while CUDA Graph RNG capture uses on-device CUDA tensors. To solve this, we use graphsafe_set_state with a
    CUDA Generator registered to the CUDA Graph before capture begins. graphsafe_set_state updates the generator's pointer
    to reference a different GeneratorImpl, ensuring subsequent calls are correctly forwarded to the desired generator
    (and its cuda-tensor RNG state during graph capture).

    For each RNG operation's forward/backward pair:

    - We create two generators initialized with identical values
    - Each forward and backward call advances its respective generator equally
    - This keeps generators synchronized so forward and backward operations use matching RNG values

    When forward is called multiple times before backward (causing desynchronization):

    - We save the forward RNG state
    - We update the backward Generator's state before executing backward

    Before each CUDA Graph replay, replay_prologue updates captured RNG pointers with current states, ensuring backward Generator
    changes are reflected during replay.

    This function modifies both forward and backward computation graphs by:

    Creating RNG state placeholders for both passes
    Updating the forward node to use graph-safe RNG state
    Updating the backward node to use graph-safe RNG state

    For more details: https://github.com/pytorch/pytorch/issues/113541
    Nfwd_rng_state_r   bwd_rng_state_	rng_stater   r   r   )indexr   r   _prims	rng_primsgraphsafe_run_with_rng_stater
  r   r)   r~   rp   r   create_noder   r   r  r  inserting_before)r  r  r  r  r  r   r!  r"  
device_idxfw_graphbw_graphr+  fwd_rng_statebwd_rng_state	fw_kwargsfunctional_fw_node
bwd_kwargs
rng_outputs                     r<   %apply_graphsafe_rng_functionalizationr7    s2   R J!!!HH#(<<#9#9#V#V  
	(	(	8 '!33nYK4PQ$?
$K5!&' 
	(	(	8 '!33nYK4PQ$?
$K5!&	' W^^$I*Ik		(	(	1 
%11(..07<<0	 2 

 !!"45  gnn%J+J{		"	"7	+ %))(..07<<0	 * 

 	%%j1G$% >))M' '' '
 
% >))s1   (9H9H&:.H3=AH?H#&H03H<?Inum_sym_nodesc                   ' t        j                         }d }dt        t        j                     fd'dt        t        j                     fd} ||       } ||      } ||      }	i }
| j
                  j                  D ]  }t        |      st        |j                  d      s&t        j                  j                  |j                  j                  v sW||j                     }||j                     }|	|j                     }||d|
|<    t        j                  j                  j                   }t        j                  j                  j"                  }d }|j
                  j%                  d	      D ]  }d
|j                  v s|} n |t'        d      g }t)        t+        |j
                  j%                  d	                  }t)        t+        |j
                  j%                  d	                  }t-        'fd|
j/                         D              }|j1                  t        j                  d             t3        |      dkD  }t        j4                  j6                  }t6        j8                  xr* | xr% |j:                   xs |j<                  j>                  }tA        |
jC                               D ]%  \  }\  }}|d   }|d   } '|      }|j
                  }|j
                  }|r'|%|jD                  dk(  rtG        ||||||||      \  }}]|jI                  |      5  |jK                  d||j                  g|jL                  |jN                        }|jK                  dtP        jR                  |dfi       } ||      |jT                  d<   |jK                  dtP        jR                  |dfi       } tW        jV                  |jT                        | _*        |jY                  |        |j[                  |       |j]                  |       d d d        |jI                  |      5  dt)        |       }!|j_                  |!      }" ||      |"jT                  d<   d d d        |jI                  |      5  |jK                  d|"|j                  g|jL                  |jN                        } |jY                  |        |j[                  |       d d d        ( |rt)        ta        |j
                  j%                  d	                  }#|#jL                  d   }$t3        |$      |z
  }%|$d |% tc        |      z   |$|%d  z   }&|j
                  je                  |&       |j
                  j[                  |#       |jg                          |jg                          ||fS # 1 sw Y   xY w# 1 sw Y   LxY w# 1 sw Y   xY w)Nc                    i }| j                   j                  D ]i  }|j                  dk(  st        |j                  d      s*t
        j                  j                  |j                  j                  v s[|||j                  <   k |S )Nr   r   )
r   r   r   r   r   r   r   r   r   r   )gmodrandom_nodesr7   s      r<   get_rng_opsz*functionalize_rng_ops.<locals>.get_rng_ops&  sl    JJ$$ 	/D?*DKK0II559I9II*.TYY'	/ r>   rW   c                     d| j                   vry| j                   d   }t        |t              s|f}|D ]D  }t        |t        j                        s|j
                  j                  dk(  s8|j
                  c S  t        j
                  d      S )zV
        Check the example value of the node outputs to find the device type.
        r   Ncudacpu)r~   r   r   r   r  r  r  )r7   
candidates	candidates      r<   
get_devicez)functionalize_rng_ops.<locals>.get_device1  s     		!YYu%
*e,$J# 	,I)U\\2##((F2$+++	,
 ||E""r>   r  c                 (   ddl m}  |       }|J |5  | E| j                  dk(  r6|j                  t        j
                  j                               cd d d        S |j                  t	        j                               cd d d        S # 1 sw Y   y xY w)Nr   )detect_fake_moder?  )torch._guardsrE  r  from_tensorr   r?  get_rng_state)r  rE  	fake_modes      r<   get_sample_rng_statez3functionalize_rng_ops.<locals>.get_sample_rng_stateC  s    2$&	$$$ 	@!fkkV&; ,,UZZ-E-E-GH	@ 	@ (()<)<)>?	@ 	@ 	@s   >B"BBr   )fwdbwdr   r   r  zaCouldn't find tangent node in graph inputs. This is unexpected, please file a bug if you see thisc              3   4   K   | ]  } |d            yw)rK  NrN   )r[   	node_pairrC  s     r<   r]   z(functionalize_rng_ops.<locals>.<genexpr>o  s       )2
9U#$r  r@  r    rK  rL  r?  r   r'  r   r   rng_state_output_r   )4r  countr	   r   r  r   r   r   r   r   r   r   r   r   r)  r*  run_and_save_rng_staterun_with_rng_stater   r   r   r  r   valuesdiscardr   r5  r   graphsafe_rng_functionalizationfallback_randomtest_configs*graphsafe_rng_func_ignores_fallback_randomr  r  r  r7  r-  r,  r   r   r  r  r~   copyr  r  r   r   r   r   r   	recompile)(r   r  r  r8  uidr=  rJ  joint_graph_rng_opsfw_graph_rng_opsbw_graph_rng_opsrecomputable_rng_ops_mapr7   	base_noder  r  run_and_save_rngrR  bw_tangent_start_nodefw_rng_state_outputsr!  r"  devicesmulti_cuda_devices
ind_config'use_rng_graphsafe_rng_functionalizationr   rN  r  r/  r0  r4  stater6  
state_namebw_rng_state_nodefw_output_node
fw_outputssym_node_start_idxr   rC  s(                                          @r<   functionalize_rng_opsrn    s   2 //
C	#HU\\2 #$@Xell%; @ &l3"9-"9-!""(( 	S4 V,		11T[[5E5EE+DII6I&tyy1G&tyy1G:A'2R$Y/	S ||--DD//BB **m*< 		!$(! $o
 	
 (9??#=#=#=#OPQN(9??#=#=#=#OPQN 6N6U6U6W G OOELL'( W) ''J.. 	
""	
 *** R&&QQ , .7 &&(. I-)	)Iy E"E"G$???? 4"v%-R	.*NN **73 3%-%9%9#$!..87<<8">>	 &: &" !,,#$$,a0	 -  %9$@

5!%11#$$*  2 
 #'))GLL"9
--j9##G,$++E2;3@ **+@A M0c<
$,$8$8$D!0DV0L!&&u-M
 **73 	-%11#&+W^^KgllK">>	 2 
 --j9##G,	- 	-AI-\ d9??#=#=#=#JKL#((+
 _}<**+()*+,-. 	
 	w'"">2iA3 3@M M
	- 	-s&   &C.V-5V,;AV9V)	,V6	9W	c                    | j                   j                  D ]t  }t        |j                  t        j
                  j                        s2|j                  j                  dk(  sLt        |      rXt        j                  |j                  d<   v y)z
    By default, the partitioner is not allowed to recompute collectives
    unless they come from a user-annotated AC region.
    See Note [Recomputing collectives in the partitioner]
    r  r}   N)r   r   r   r   r   r  
OpOverload	namespacer   r   	MUST_SAVEr~   )r   r7   s     r<   force_save_collectivesrs    sh     ""(( @t{{EJJ$9$9:%%);;"4(%5%?%?DIIk"@r>   c                    t               }t        | j                  j                        D ]  }|j                  dk(  r|j
                  t        j                  j                  j                  j                  k(  }|rrt        |      r|j                  |j                  d          t        |      s|j                  d   |v st        j                   |j                  d   j"                  d<    y  y )Nr   r   r    r}   )r   r  r   r   r   r   r   r  r  r  r  r   r  r   r   r   rr  r~   )r   has_mutation_in_bwr7   is_copy_s       r<   force_save_bw_mutation_srcrw    s     5?L++112 77h;;%)).."6"6">">>+D1"&&tyy|4*40TYYq\EW5W1A1K1K		!!!+. !r>   c                    | j                   j                  D ]  }t        |      s|j                  D ]K  }t        |      s|j                  d   |j                  d   kD  s/t
        j                  |j                  d<   M |j                  j                  dd      st        d |j                  D              rt
        j                  |j                  d<    | S )a  
    If there are two consecutive checkpointed blocks with no operator in
    between, we would still want to stash the tensor at the boundary of
    checkpointed blocks. The following pass makes the last output node
    non-recomputable to allow for that.
    ac_graph_idr}   has_backward_hookFc              3   2   K   | ]  }t        |        y wr9   )r   r  s     r<   r]   z)cleanup_recompute_tags.<locals>.<genexpr>  s      E)-t$Er  )	r   r   r   ra  r~   r   rr  r   r   )r   r7   rh  s      r<   cleanup_recompute_tagsr|    s     ""(( D$

 H"4(		-0499]3KK-=-G-GDIIk*H yy}}0%8 E15E B& *:)C)C		+&7D8 r>   	node_infomin_cut_optionsdont_banc                   %&'()*+,-./0 
t               t               /t        rQt        d | j                  D              }|t        d /j                  D              z
  }t
        j                  d|       d &d '&'/fd(	 dd l}(/fd	**/fd
}(fd)dt        f)/fd}	|j                         .t               %%./fd}
| j                  D ]L  }|j                  dk(  r|j                  v rm|j                  vr0.j                  |j                   dz   dt"        j$                         `.j                  |j                   dz   dt"        j$                         t'        |      r0.j                  |j                   dz   dt"        j$                         t)        |      st+        |      r |
|       j-                  |      r ||      r |
|       d|j.                  vxr d|j.                  vxs8 d|j.                  v xr( t1        |j.                  d   t2        j4                         }t7        |      rt        t9        |            }nQ|r<t1        |j.                  j;                  d      t<              rdnt"        j$                  }n |	|j>                        }.j                  |j                   dz   |j                   dz   |       |j@                  D ]>  }.j                  |j                   dz   |j                   dz   t"        j$                         @ O dtB        tD        jF                     dtH        dtH        f(fd}jJ                  r(jL                  D ]  }|j@                  D cg c]$  }j-                  |      rjO                  |      & }}|j@                  D cg c]  }j-                  |      s| }}tQ        |      dkD  sw ||tS        |            }tU        |j@                        D ]x  }j-                  |      sjO                  |      |kD  s* (||      s4|%v r9t
        j                  d|jO                  |      ||jO                  |              |
|       z  jV                  r^t               }| j                  D ]D  }j-                  |      sjO                  |      |fg}jO                  |      }tQ        |      dkD  sJtY        jZ                  |      \  }}||v r,|j]                  |       jO                  |      |dz   kD  rNtQ        |      dk(  r@t
        j                  d||jO                  |      jO                  |              |
|       |j@                  D ]J  }j-                  |      s (||      s|%vs$tY        j^                  |jO                  |      |f       L tQ        |      dkD  rG 	 |ja                  .dd      \  }}|\  }-t               }.fd |D        D ]   \  0}|jo                  -0fd!|D               " t               }|D ](  \  } }!| d d" |!d d# k(  sJ | d d" }"|j]                  |"       * tq        |       +ts        | j                        D #ci c]  \  }#}||#
 c}}#,tu        +fd$|D        ,fd%&      }$|$%fS # t        $ r}t        d      |d }~ww xY wc c}w c c}w # tb        $ ri t
        j                  d       t
        j                  dje                  |jf                  jh                  jk                  .                   tm        .        w xY wc c}}#w )'Nc              3      K   | ]H  }|j                   d k(  r7t        |j                  d      r!t        |j                  j                         J yw)r   _overloadpacketN)r   r   r   r   r  r   s     r<   r]   z solve_min_cut.<locals>.<genexpr>7  sA      &
ww/)gdkkCT.U ++,&
s   AAc              3   2   K   | ]  }t        |        y wr9   )r   r[   r  s     r<   r]   z solve_min_cut.<locals>.<genexpr><  s      4
CF4
r  z&Ops banned from re-materialization: %sc                 D   |j                   t        j                  j                  j                  k7  ry|j
                  d   }t        j                  j                  j                  |      \  }}|D ].  }|j                  |   }| |u r yt        |t              s)| |v s. y yNFr   T)r   r   r  higher_orderauto_functionalizedr   _higher_order_opsauto_functionalizeget_mutable_argsr   r   ro   )ab
mutable_opmutable_arg_namesr  r   r  s          r<   !can_fuse_into_auto_functionalizedz8solve_min_cut.<locals>.can_fuse_into_auto_functionalizedA  s    88uyy--AAAVVAY
 ##66GG
S	
% 	 D((4.CCx#t$8	  r>   c                     |j                   t        j                  j                  j                  k7  ry|j
                  d   }|D ]  }|j
                  d   |   }| |u s y y)NFtensors_to_cloner   T)r   r   r  r   triton_kernel_wrapper_functionalr   )r  r  r  r   r  s        r<   .can_fuse_into_triton_kernel_wrapper_functionalzEsolve_min_cut.<locals>.can_fuse_into_triton_kernel_wrapper_functionalR  sb    88uyy--NNNHH%78% 	D((8$T*CCx	 r>   c                 b   t        |      t        j                  k(  ry | |      ry | |      ry| j                  t        j
                  u r>| j                  d   j                  t        j                  j                  j                  u ryj                  |       xr j                  |      S )NTr   F)r,   r  catr   r  r  r   r   r  r  r  r=   )r  r  r  r  op_typess     r<   r=   z!solve_min_cut.<locals>.is_fusible\  s     1),Q29!Q?HH(((q	  yy%%FFG
 ""1%@(*=*=a*@@r>   r   zANeed networkx installed to perform smart recomputation heuristicsc                 <   j                  |       ryt        | g      }t        |      dkD  ro|j                         }|j                  D ]A  }j                  |      s ||      s yj                  |      s1|j                  |       C t        |      dkD  royr  )rD   r   r   r_  ra  rg   r  )r7   r  currh  r=   r}  r  s       r<   is_materialized_backwardsz0solve_min_cut.<locals>.is_materialized_backwardsv  s    D!v&	)nq --/C		 ( //5jd>S##D)MM$'	( )nq  r>   c                 T   | j                   dk7  ry| j                  t        j                  k(  ry| j                  j                  dd       t        j                  k(  ryt        j                  rj                  |       ry| j                  t        j                  j                  t        j                  j                  fv ryj                  rj!                  |       s$yj#                  |       sj%                  |       ryj&                  r3 |       r+t(        j+                  d| t-        | j.                               y| j0                  dk  r| j0                  t        j2                  kD  ryj4                  r/t7        d | j8                  D              }t;        |       }|dz  |k  S y)	Nr   Fr}   Tzmaterialized backwards: %s %si  c              3   h   K   | ]*  }t        |t        j                        st        |       , y wr9   )r   rL   rM   r  r  s     r<   r]   zBsolve_min_cut.<locals>.should_ban_recomputation.<locals>.<genexpr>  s&      % !*Q2H%s   22r   )r   r   r  r  r~   r   r   rr  r   recompute_viewsrD   r  lift_fresh_copyr  
lift_freshrz   rF   rB   r@   ry   r/   r  r   ra  dist_from_bwmax_dist_from_bwr{   r  r   r  )r7   input_tensors_sizeoutput_sizer  r~  r  s      r<   should_ban_recomputationz/solve_min_cut.<locals>.should_ban_recomputation  sc   77o%;;(***99==d+/?/I/II!!h&6&6t&<;;4//779P9PQQ22++D1!!$'8+H+H+N 77<U=
 II5tU4::=NO t#(9(9F<S<S(S ++!$ %%)YY% " #4.K?%777r>   c                 f      j                   dk(  ryt         fd j                  D               S )Nr   Tc              3   0   K   | ]  } |        y wr9   rN   )r[   rh  r=   r7   s     r<   r]   z9solve_min_cut.<locals>.is_materialized.<locals>.<genexpr>  s     E$z$-Es   )r   r  ra  )r7   r=   s   `r<   is_materializedz&solve_min_cut.<locals>.is_materialized  s*    77m#E$**EEEEr>   rW   c           
         t         j                  r| |v ryt        |       }t         j                  r!j	                  |       rt
        j                  S t        | j                  d   t              r-t        | j                  d   t        j                        st        S t        |dt        t        | j                   d      d      z  z        } |       r|S |dz  S )Nr   r   g?r=  r       )r    treat_parameters_as_free_to_saver  r  rD   r  r  r   r~   r   r   r   INT_INFrq   r  r  r  )r7   rV   mem_szr  r  s      r<   get_node_weightz&solve_min_cut.<locals>.get_node_weight  s    3333$!!h&6&6t&< 88Odii&5dii.= Vsc#d.?.?*Eq&IIJK4 MA:r>   c                    j                  |       ry| v r\t        | j                  t        j                  j
                        xr | j                  j                  dk(  }t        j                  s|syt        |       ryd| j                  v r(t        | j                  d   t        j                        ryj                  |        j                  d| j                  dz   t        j                          y)NFr  r   source_incapacityT)rD   r   r   r   r  rp  rq  r   (unsafe_allow_optimization_of_collectivesr   r~   r   r  add_edger   r  r  )r7   is_collectivebanned_nodesr  nx_graphr  s     r<   ban_recomputation_if_allowedz3solve_min_cut.<locals>.ban_recomputation_if_allowed  s    D!8 4;;

(=(=> @KK))-??  >>m $DII*TYYu-=u~~"N
 	(DII$5Ir>   r   r  sinkr  _outr   r          start_nodes	max_rangec                    g }| D ]*  }t        j                  |
j                  |      |df       , t        |      dkD  rt        j                  |      \  }}}|s
j                  |      S |j
                  D ]_  }
j                  |      s
j                  |      |kD  r*
j                  |      | 	||      f}||vsJt        j                  ||       a t        |      dkD  r|S )z
        Finds the first unfusible node in the chain of nodes starting from
        `start_nodes` and returns its position.
        Tr   )heapqheappushrn   r   heappopra  rg   )r  r  sorted_nodesr\   r  r7   node_is_fusiblerh  r   r=   r}  s            r<   find_first_unfusiblez+solve_min_cut.<locals>.find_first_unfusible>  s    
 9; 	OANN<)*@*@*CQ)MN	O ,!#',}}\'B$At_" --d33

 
:++D1 --d3i? !..t4"4.6C
 ,.|S9
:	 ,!# r>   z1used above/below fusible %s:(%s) -> %s -> %s:(%s)r=  ztoo long %s %s %s %sr  z-Failed to compute min-cut on following graph:
c              3   ,   K   | ]  }||   f  y wr9   rN   )r[   r\   r  s     r<   r]   z solve_min_cut.<locals>.<genexpr>  s     8Q$8s   c              3   0   K   | ]  }|v s|f  y wr9   rN   )r[   vnon_reachableus     r<   r]   z solve_min_cut.<locals>.<genexpr>  s     Aa=.@q!fAs   		c              3   (   K   | ]	  }|     y wr9   rN   r[   r7   name_to_nodes     r<   r]   z solve_min_cut.<locals>.<genexpr>  s     2d	2s   c                     |    S r9   rN   )r   node_idxs    r<   r_   zsolve_min_cut.<locals>.<lambda>  s    (1+ r>   r`   );r   get_default_op_listr.   r   r6   r/   rF  networkxImportErrorr   floatDiGraphr   rS   rQ   r  r   r  r  r   r   r   rg   r~   r   r   r  r   r   r   r   rV   ra  ro   rL   rM   rq   rw   rd   rn   r   r  r   rx   r  r  r  r  minimum_cut	Exceptionjoin	readwriteedgelistgenerate_edgelistvisualize_min_cut_graphr  get_name_to_noder  rb   )1r   r}  r~  r  joint_module_opsops_ignorednxer  r  r  r7   is_non_tensor_nodeweightrh  r  	used_nodeordersfw_usersfirst_unfusible_usevisited
start_nodefusiblestart_orderr  r  	cut_value	partition	reachablecutsetnbrs	cut_nodesnode_innode_out	node_namer  r   r  r  r  r=   r  r  r  r  r  r  r  r  s1    ```                                 @@@@@@@@@@@@r<   solve_min_cutr  ,  s    <"$H% &
#))&
 

 ' 4
$554
 *
 
 	9;G"A&0dFe < zz|H(2L6 !! 3X77h9...9+++!!$))e"3Vdhh!O dii&0&488L$
 dii%/$((Kd248(.
 ##D).Ft.L(. "E}DII'EUtyy SDIIe4Dell)S%S 	 t=./F!$))--"6FDHH  %T9+P+PQF$))e+TYY-?&QJJ 	XDdii&0$))e2CdhhW	Xe3XL$rww- C C 4 ,,"44 	;I &OO++D1 &&t,F  "+I4L4LT4RH  6{Q&:8S[&Q#!)//2 ;D!006%2248;NN&y$7</$O%%229=/ %2248 5T:!;	;P 11'1|%++ !	VJ++J7''
3Z@2G $00<Kg,"w/3'>C  **3/+2CCG)HH."!..s3!..z: 15II VD!006&sD1 4w1G1G1Mt0TUV) g,"!	VF!~~h&I	9  )I}*4,F8i8 B4AdAAB ",I# !s|x},,,CRL	i !
 $K0L+4[5F5F+GHic4c	HH2	28ML %%]
  O
	R
R  @A2<<00BB8LMN)	& Is=   [ )[0;[5[58[: %]/	[-[(([-:A2],c                    dd l }dd l}|j                  j                  |       j	                         }|j                  |      d   }|j                         D ]c  }| |j                            |j                            d   }|j                  t        |             |t        d      k(  sS|j                  d       e t        j                  d       |j                  d       y )Nr   r  r  redz2Visualizing the failed graph to min_cut_failed.svgzmin_cut_failed.svg)r  pydotnx_pydotto_pydot	to_stringgraph_from_dot_data	get_edges
get_sourceget_destination	set_labelr   r  	set_colorr/   rF  	write_svg)r  r  r  
dot_format	dot_graphedger  s          r<   r  r    s    %%h/99;J))*5a8I##% "$//+,T-A-A-CDZPs6{#U5\!NN5!" HHAB,-r>   c                  x   g t         j                  t         j                  t         j                  t         j                  t         j
                  t         j                  t         j                  t         j                  t         j                  t         j                  t         j                  t         j                  t         j                  t         j                  t         j                  t         j                   t         j"                  t         j$                  t         j&                  t         j(                  t         j*                  t         j,                  t         j.                  t         j0                  t         j2                  t         j4                  t         j6                  t         j8                  t         j:                  t         j<                  t         j>                  t         j@                  t         jB                  t         jD                  t         jF                  t         jH                  t         jJ                  t         jL                  t         jN                  t         jP                  t         jR                  t         jT                  t         jV                  t         jX                  t         jZ                  t         j\                  t         j^                  t         j`                  t         jb                  t         jd                  t         jf                  t         jh                  t         jj                  t         jl                  t         jn                  t         jp                  t         jr                  t         jt                  t         jv                  t         jx                  t         jz                  t         j|                  t         j~                  t         j                  t         j                  t         j                  t         j                  t         j                  t         j                  t         j                  t        j                  t         j                  t         j                  t         j                  t         j                  } t         j                  t         j                  t         j                  g}|t         j                  t         j                  t         j                  t        j                  t         j                  t         j                  t         j                  t         j                  t         j                  g	z  }|}| g t        j                  t        j                  t         j                  t         j                  t         j                  t        j                  t        j                  t         j                  t         j                  t        j                  t         j                  t         j                  t         j                  t         j                  t         j                  t         j                  t         j                  t         j                  t         j                  t         j                  t         j                  t         j                  t         j                  t         j                  t         j                  t         j                  t         j                  t         j                  t         j                  t         j                  t         j                  t         j                  t         j                  t        j                  t        j                  z  } | t         j                  t         j                  gz  } | |z  } | t               z  } | t         j                  gz  } | t        D cg c]  }t        |       c}z  } t        |       }t        t        dt        f      t         j                  t         j                  t         j                  g      }t         j                  t         j                  t         j                  t         j                   t         j                  t         j                  t         j                  t         j                  t         j
                  t         j                  t         j                  g}||z  }t        |t        |      |t        |      |      S c c}w )N.)r  r  subr`  atan2r  r  r  pow	remainderfmod__and____or____xor__
__lshift__
__rshift__eqnegegtleltr  bitwise_notceilfloorfracnegreluroundsilutruncr/   log10log1plog2lgammaexpexpm1erferfccosacoscoshsinasinsinhtanatantanhatanhsqrtrsqrtr  sigmoidsoftplus	thresholdthreshold_backwardclampwherelerpaddcmulgelugelu_backwardr  mean_grad_sum_to_sizesum_to_sizer  totype_asr  r  squeeze	unsqueezersub_to_copyaliasviewslicetr  broadcast_in_dimexpand
as_stridedpermuteselectr7  r  clone	full_likevarstd_unsafe_viewreshapebroadcast_tensorsscalar_tensorones	new_zerosr  arangetriuvar_meanisinfr   fullzerosempty
empty_likeargmaxmaximumiota'_low_memory_max_pool_offsets_to_indicesr(  gatherr   
zeros_liker   r   r   r   r   native_dropout	rand_like
randn_likemmconvolutionconvolution_backwardbmmaddmm#_scaled_dot_product_flash_attention'_scaled_dot_product_efficient_attention_flash_attention_forward_efficient_attention_forwardupsample_bilinear2d
_scaled_mmr1   )default_recomputable_opsrecomputable_view_opsr5   mr6   r4   r3   r2   s           r<   r  r    s   L0L0L0 	L0 	

	L0
 	L0 	L0 	L0 	L0 	L0 			L0 	L0 	L0 	L0 	L0 	L0  	!L0" 	#L0$ 	%L0& 	'L0( 	)L0* 	+L0, 	-L0. 	/L00 			1L02 	

3L04 			5L06 	7L08 			9L0: 	

;L0< 			=L0> 	

?L0@ 	AL0B 	

CL0D 	

EL0F 			GL0H 	IL0J 	KL0L 	

ML0N 	OL0P 			QL0R 	SL0T 			UL0V 			WL0X 	YL0Z 			[L0\ 			]L0^ 	_L0` 			aL0b 			cL0d 	

eL0f 			gL0h 	

iL0j 	kL0l 	mL0n 	oL0p 	qL0r 	sL0t 	

uL0v 	

wL0x 			yL0z 	{L0| 			}L0~ 	L0@ 	AL0B 			CL0D 	EL0F 	GL0H 			IL0J 	KL0L 	ML0N 	OL0P 	QL0R 	SL0T 			UL0V 	WL0Z "\\4>>4::F		




 
 %H $!		$!""$! 	

$! 		$!
 	$! 			$! 			$! 	$! 	$! 	$! 	$! 	$! 			$! 	$! 	

$!  	!$!" 	#$!$ 	%$!& 			'$!( 	)$!* 	+$!, 	-$!. 			/$!0 	1$!2 	

3$!4 	5$!6 			7$!8 	9$!: 	

;$!< 	

=$!> 	?$!@ 	A$!B 	C$!D 	

E$!F 	55G$! $L T[[ 99(/!   N1!3A!6 NN!":;HS#X./			dnndoo>J 	!!

0044%%))   #Z/K()8 + !Os   7d7c                 J    i }| j                   D ]  }|||j                  <    |S r9   )r   r   )r   r  r7   s      r<   r  r  {  s.    L '"&TYY'r>   memoryruntimes
max_memoryall_recomputable_banned_nodesc                    t         j                  }|dk(  rt        |||      S |dk(  rt        |||      S |dk(  rt	        |||      S |dk(  rZt
        j                  d       t        j                  | |||      }t	        ||t        |      j                  t        |            S t        |      r ||| |||      \  }}	d	||	fS t        d
|       )Ngreedyilpdpdynamic_memory_budget_dpzdynamic_memory_budget_dp is an experimental solver. It does not guarantee performance improvements. Additionally, it is not guaranteed to be stable.)r   r~   recorded_knapsack_input_memories recorded_knapsack_input_runtimes)graph_info_provider)knapsack_algomax_mem_budgetr  z,Not aware of memory budget knapsack solver: )r   activation_memory_budget_solverr#   r$   r"   r/   warningr!   inialize_from_graphr%   get_knee_point_memory_budgetcallabler   )
r   r{  r|  r}  r}  r~  SOLVERr  saved_node_idxrecomp_node_idxs
             r<   #_optimize_runtime_with_given_memoryr    s    33Fvx<<	5FHj99	468Z88	-	-?	

 0CC#*G-3-5	
 $7**)) + 	
 		
 
&	*0KY8U+
' ^_55I&RSSr>   no_dispatchr   r  c                     t        | j                        }fd}|D cg c]
  } ||       }}| j                         D cg c]
  } ||       }}| j                  ||      S c c}w c c}w )Nc                     t        |       S )Nr  )r   )dr  s    r<   realize_symbolz8_remove_symbols_without_guarding.<locals>.realize_symbol  s    H--r>   )stride)ro   shaper  new_empty_strided)r   r  r  r  r  r  s    `    r<    _remove_symbols_without_guardingr    sk    ME. )..1^A.E.)*4AnQ4F4uV44 /4s   A'A,c                 F   	 t         j                  }d }|dk(  ry|dk(  rat               5  ddlm} t        j                  | j                   j                  f      \  	|j                  	 fd      }|cd d d        S |dk(  rudd	l
m} t        j                  | j                   j                  f      \  	 |d
      5 }  j                  i 	 d d d        j                         }t        |d      S t        d|       # 1 sw Y   y xY w# 1 sw Y   ?xY w)Nc                 z   t        | t        j                        rAt        | j                  d   t        j
                        rt        | j                  d   d      S t        | t        j                        rAt        | j                  d   t        j                        rt        | j                  d   d      S t        | t        j                        r(t        | j                  d   t        j                        ryt        | t        j                        r(t        | j                  d   t        j                        ry| S )Nr   r  r        ?T)r   rL   rM   r~   r   r  r  r   r   r   r   r  s    r<   materialize_argz)estimate_runtime.<locals>.materialize_arg  s    a!j&M3AFF5MDQQ277#
166%=%,,(OAFF5MD99277#
166%=%..(Q277#
166%=%--(PHr>   testingr    profiler   )benchmarkerc                  (     j                    i S r9   )r   )r   r   r7   s   r<   r_   z"estimate_runtime.<locals>.<lambda>  s    ;4;;3O3O r>   flops)FlopCounterModeF)displayz Not aware of runtime estimator: )r   *activation_memory_budget_runtime_estimatorr  $torch._inductor.runtime.benchmarkingr  r   tree_mapr   r   benchmark_gputorch.utils.flop_counterr  r   get_total_flopsr  r   )
r7   RUNTIME_MODEr  r  msr  modecounted_flopsr   r   s
   `       @@r<   estimate_runtimer    s   DDL
 y 		"] 	H!???TYY<TULD&**+OPB	 	 
	 <DKK8PQfU+ 	)tDKK((	),,.=!$$=l^LMM#	 		) 	)s   ADDDD c                 P    !"#$%&'()* |dkD  s|dk  rt        d|       t        t        j                  t        j                  t        j
                  t        j                  t        j                        }t        j                  rt        |dddd      }|dk(  rj                  S t         |      \  }}|dk(  r|S dt        t        j                     dt        fd	! !j                        ( !|      &&(k  r|S &(fd
}dt        t        j                     f!&(fd"t        |ddd      }t         |      \  }} "|      |k  r|S t        |d      t               \  }	}
 "|	      |k  r|	S ddlm# t%        #fdj                  D              %dt$        t        j                     dt        t        j                     f#%fd} ||
      }|D cg c]1  }|j&                  j)                  dd      t*        j,                  k(  r|3 }}|D cg c]	  }||vs| }}t/        |t0        d       t3               dk(  rj                  |z   S  D cg c]  } |t1        |             c}' D cg c]  }t5        |       c}*ddlm)  ')*fd$t        j:                  r"$ *fd} |d       |d      g}|d   dd  |d   dd  k7  r|d   |d   fg}|r|j=                         \  }}|d   |d   z
  dk  r#|j?                  |       |j?                  |       F ||d   |d   z   dz        }|dd  |dd  k7  r|j?                  ||f       |dd  |dd  k7  r|j?                  ||f       |r|jA                          dd l!m"} |D cg c]  }|d   	 }}|D cg c]  }|d   	 }}|jG                  d       |jI                  ||d        tK        |      D ]"  \  }}|jM                  |d!|||   fd"d#d$%       $ |jO                  d&       |jQ                  d'       |jS                  d(       |jU                  d       |jW                         }|jY                          t[        j\                         }t        j^                  't        j^                  }t[        j`                  |d)       d*}tb        jd                  jg                         r?tb        jd                  ji                         r!d+tb        jd                  jk                          }tZ        jl                  jo                  |d,| d-tq                d.      }|js                  |       tt        jw                  d/|        $| 0      d   S c c}w c c}w c c}w c c}w c c}w c c}w )1Nr    r   zJThe valid ranges for memory budget are 0 <= m <= 1. The provided value is )rw   rx   ry   rz   r{   F)rw   rx   ry   rz   r   rW   c                 :    t        t        t        |             dz  S N    eA)r  mapr  )r   s    r<   estimate_activations_sizez:choose_saved_values_set.<locals>.estimate_activations_size	  s    3x./#55r>   c                     | dz  z
  z  S r  rN   )szmax_act_sizemin_act_sizes    r<   get_normalized_sizez4choose_saved_values_set.<locals>.get_normalized_size	  s    S\L899r>   activationsc                 &     |       z
  z
  z  S r9   rN   )r  r  r  r  s    r<   get_mem_ratioz.choose_saved_values_set.<locals>.get_mem_ratio	  s"    )+6E<'
 	
r>   )rw   rx   ry   )rz   )get_node_storagec              3   .   K   | ]  } |        y wr9   rN   )r[   r7   r  s     r<   r]   z*choose_saved_values_set.<locals>.<genexpr>5	  s     T4 0 6Tr  r  c                 r    | D cg c]&  }|j                   t        d      k  r |      vr|( c}S c c}w r  )r  rq   )r  r  r  input_storagess     r<   get_recomputable_banned_nodesz>choose_saved_values_set.<locals>.get_recomputable_banned_nodes7	  sD    
 "
 S)$Q'~= 
 	
 
s   +4r}   Tr  r  c           
      d           5  t        |t        | d      |      \  }}}d d d        t               }D ]  }	 |j                  |           |j                        sJ t        ||
|      \  }}	t        rt        |||       |fS # 1 sw Y   pxY w# t        $ r Y rw xY w)Nr   )r   r~  saved_node_idxsrecomputable_node_idxsexpected_runtimememories_banned_nodesruntimes_banned_nodesmin_cut_saved_values)	r  r  r   r  BaseExceptionissubsetr  r.   r   )memory_budgetr}  r   r  r  r  r  r  r   r  aggressive_optionsr~  r  r  r  s             r<   get_saved_values_knapsackz:choose_saved_values_set.<locals>.get_saved_values_knapsack_	  s    ] 	
 4%%M1%-	 &		 )3) 	C:3?@	   !>???'	
a !4'.K /'=!1&;&;%1	 ---Q	 	$ ! s   B B#B #	B/.B/c                 N     |       \  }}| t              |z
   |      fS )N)r}  r   )r  )r  r   r  r  r  r   r}  r  s      r<   estimate_for_budgetz4choose_saved_values_set.<locals>.estimate_for_budget	  s@    -FYK.*L* )*-==l+ r>   r  r  gMbP?r  )
      )figsizeo)markerz.4fzoffset points)r   r  center)
textcoordsxytexthazMemory Budgetz Runtime of Recomputed Componentsz:Pareto Frontier of Memory Budget vs. Recomputation Runtime)exist_okrX  _rank_memory_budget_paretor  z.svgz%Generated Pareto frontier curve at %s)r  r}  r   )<r   rv   r   ban_recompute_used_far_apart!ban_recompute_long_fusible_chains#ban_recompute_materialized_backwardban_recompute_not_in_allowlistban_recompute_reductionsaggressive_recomputationr   rQ   r  ro   rL   rM   r  torch._inductor.fx_utilsr  r   r~   r   r   rr  rb   r  r   r  torch.utils._mode_utilsr  visualize_memory_budget_paretor_  r   sortmatplotlib.pyplotpyplotfigureplotr  annotatexlabelylabeltitlegridgcfshowosgetcwdmemory_budget_pareto_dirmakedirsr   r  r  is_initializedget_rankpathr  r(   savefigr/   r  )+r   r}  r  r~  runtime_optimized_saved_valuesr  r  more_aggressive_optionsmore_aggressive_saved_values%aggressive_recomputation_saved_valuesr  r  recomputable_banned_nodesr  must_save_nodesr7   r  optionsbisectslhsrhsmidpltitemx_valuesy_valuestxtfigfig_dirrank_suffixfig_namer  r~  r  r  r  r  r  r  r  r  r  r  s+   ``                             @@@@@@@@@@@@r<   choose_saved_values_setr    s   
 qMA-XYfXgh
 	
 $$AA#)#K#K%+%O%O & E E88O &&!"'',).$)
 (5)%"A --6RWW 6% 6 -Y-=-=>L,-KLL|#--:
4= 

 &##(%*	 '4Y 7'# ! 12]B++  % ;HY 2;7)< :;mK449T9CSCSTTN
 )
	bgg
 !>l K +66::k5)-=-G-GG 	
O  -!0H! ! %+!x%! ()Q./112O-.HQK( ,I#' 4). ).V ,,	 	 's+-@-EF1:ab>WQZ^+
GAJ/0G";;=Sq6CF?T)NN3'NN3')3q6CF?a*?@qr7c!"g%NNC:.qr7c!"g%NNC:.  	'(/0DG00(/0DG00 	

7
#8C0  ) 	FAsLLs)hqk"*  	 	

?#

56		NOggi
))+**655GKK$/))+0A0A0P0P0R"5#4#4#=#=#?"@AK77<<+K=:L:N9OtT
 	H;XF %#yk	 	]
!^ 10s*   '6V
#	V-V&VV"V4V#c                    ddl m d }fd}t        j                  j	                         rct        j                  j                         rDt        j                  j                         dkD  r" ||       r ||       rt               5          5  |D cg c]  }|j                   c}g}t        t        j                  j                               D cg c]  }g  }}t        j                  j                  ||d          t        |       }g }	i }
t        |      D ]w  \  }}|D cg c]  }||   	 }}d}|D ]C  }t        |      }||z  }|t        j                  j                         k(  s5||
|j                  <   E ||
d<   |	j                  |       y t        j                   |	t        j                  j"                  j%                               }t        j                  j'                  |t        j                  j"                  j(                  j*                         t-        t        j.                  |      j1                               }d	| d
|
 t3        dd fd       ||   D cg c]  }||   	 }}d d d        d d d        |S |S c c}w c c}w c c}w c c}w # 1 sw Y   )xY w# 1 sw Y   |S xY w)Nr   )unset_fake_temporarilyc                     | j                   D ]K  }t        |j                  t        j                  j
                        s2|j                  j                  dv sK y y)N>   c10d_functionalr  TF)r   r   r   r   r  rp  rq  )r   r7   s     r<   has_collectivesz3_sync_decision_cross_ranks.<locals>.has_collectives	  sM    %% 	DUZZ22++''+RR		
 r>   c                     dj                  d | j                  D              }t        j                  |j	                  d            j                         }t        t        j                  j                               D cg c]  }d  c}t               5          5  t        j                  j                  |       d d d        d d d        t        fdD              S c c}w # 1 sw Y   *xY w# 1 sw Y   .xY w)N/c              3   4   K   | ]  }|j                     y wr9   rZ  )r[   r   s     r<   r]   zE_sync_decision_cross_ranks.<locals>.has_same_nodes.<locals>.<genexpr>	  s     >qAFF>r   zutf-8c              3   .   K   | ]  }d    |k(    ywr  rN   )r[   r   
all_inputss     r<   r]   zE_sync_decision_cross_ranks.<locals>.has_same_nodes.<locals>.<genexpr>	  s     :!:a=A%:r  )r  r   hashlibsha256encode	hexdigestr   r   r  get_world_sizer  all_gather_objectr  )r   node_strrQ   r  r  r  s       @r<   has_same_nodesz2_sync_decision_cross_ranks.<locals>.has_same_nodes	  s    
 88>K,=,=>> 89CCE$)%*;*;*J*J*L$MNqdN
] 	D24 	D//
FC	D 	D :z:::	 O	D 	D 	D 	Ds*    	C#C4!C(?C4(C1	-C44C=r    z
total size)r  r   zpicked_rank_idx=z, saved_nodes of current rank=rn  c                      dddS )N)aot_joint_graph_sync_decision_cross_ranksrq  rr  rN   rN   r>   r<   r_   z,_sync_decision_cross_ranks.<locals>.<lambda>
  s    G (% r>   c                       S r9   rN   )sync_decision_cross_ranks_strs   r<   r_   z,_sync_decision_cross_ranks.<locals>.<lambda>
  s    #@ r>   r~  )torch._subclasses.fake_tensorr  r   r  r  r  r  r  r   r   r   r  r  r  r  r   r*  distributed_c10d_get_object_coll_device
all_reduceReduceOpMAXrq   argminr  r   )r   r   r  r"  r   objectsr  saved_ops_names_all_ranksr  saved_sizessaved_ops_with_sizesr  saved_ops_namesop_namesaved_nodes
saved_sizer7   size_of_nodesaved_sizes_tensorpicked_rank_idxr\   r&  r  s                        @@r<   _sync_decision_cross_ranksr9  	  s    E; 	&&(,,.,,.2K(;'] *	24 *	(45156G!%"3"3"B"B"DE::% : //0I7ST:V+K8L%'K35 (12K(L 	/$_DST|G4TT
' GD#+D>L,.Je//88:::F,TYY7	G
 6@$\2"":.	/ "'((99QQS" (("u'8'8'I'I'R'R'V'V )  "%,,/A"B"G"G"IJO.>>OOm  oC  nD  -E) A *C?)S$%QL Q*	 *	X <W 6: U:Q*	 *	 *	X sb   J5J)J--J)	J#A	J),J
8:J)3D J)3J$?J)J5J))J2	.J55J?c                    d}|rdnd}t        t        | j                  j                  d                  }| j                  j                  dt        j
                  j                  j                        D ]  }t        | |j                  d   j                        }t        |t        j                        sBg }t        |j                  j                  d            D ]  \  }}	||	j                  v s| j                  j!                  |      5  | j                  j#                  | d|       }
|d	z  }|	j$                  d
   |
j$                  d
<   |
}|j'                  |
       ddd        |s| j                  j!                  |      5  | j                  j)                  dt        j
                  j                  j                  g |j                  |i       }|j+                  |d       ddd       |j$                  j-                  d      }|r8|\  }}g ||D cg c]  }|j$                  d
    c}}||fj$                  d<   | j                  j/                  |        | S # 1 sw Y   xY w# 1 sw Y   xY wc c}w )u  
    Graph-safe RNG lets torch.compile use CUDA Graphs for graphs with RNG ops.
    For graphs without HOPs, the partitioner adds placeholder nodes
    fwd_rng_state_* and bw_rng_state_* to the forward and backward graphs. At
    runtime, the AOTDispatcher retrieves these RNG states and passes them to the
    compiled graphs.

    This works well for no-HOP graphs. With HOPs, the partitioner runs
    recursively: it first partitions the HOP (producing forward/backward HOP
    subgraphs) and then stitches them back into the outer joint graph. For HOPs
    that contain RNG ops, the outer joint graph now includes HOP subgraph
    modules with extra RNG placeholders. We must thread these placeholders
    through the outer module partitioned forward and backward graphs—this
    function does exactly that. It collects the RNG placeholder nodes from the
    HOPs and creates corresponding placeholders in the outer forward and
    backward graphs.

    There is a catch: for a short period, the joint graph is in a “bad” state.
    The HOP subgraphs expect additional inputs (because of the new
    placeholders), but the outer graph call sites don't yet provide them. We
    can't fix this in the joint graph because the joint graph's input signature
    is fixed (primals, tangents). As a compromise, we keep the joint graph in
    somewhat of a bad state for some time and, once the outer forward and
    backward graphs are partitioned, insert the corresponding RNG placeholders
    and wire up the calls.
    r   r2  r1  r   r   r   )r   r   r  r    r   NT)propagate_metaeager_input_vals)r   r  r   r   r   r  r  invoke_subgraphr8  r   r   r   rL   r  r  r   r
  r   r~   r   r,  r  r   r  )moduler   r   
rng_string
last_inputhop_noder   new_rng_inputsr  placeholder_noder&  new_hop_node_with_fixed_args
eager_vals
eager_argseager_kwargsinpnew_eager_argss                    r<   thread_graphsafe_rng_from_hopsrJ  )
  su   8 I$/_Jhv||66-6HIJJLL++599#9#9#I#I ,  .2 68==#3#:#:;h/N)2))]);* 9%% !1!6!66  55jA 9$*LL$<$<)l!I;7%	 "Q	0@0E0Ee0L	u-%.
&--i89 99 \\11(; 	39<<3K3K'		..>>9(--9.9	40 224T 3 	 &]]../AB
/9,J&#&5CDc#((5/D&N
 '$M0556HI ''1].2` MI9 9	 	" Es    AI,A#I9 J,I69J	)r  c          	      
	   | j                   j                          | j                          | j                   }t        j                  rt        |      }|| _         | j                   }t        |       }t        |       }	|rt        |       } t        j                  st        |        t        |        fd}
|g } |
| |      }t        |j                        dk(  rt        | |||j                        S t!        | j                   j"                        D ]  }|j$                  dk(  rt'        d      |_        #|j+                  |      sd|_        <t'        d      |_        |j,                  D ]*  }t/        |j(                  |j(                  dz         |_        ,  t        j0                  }|j"                  D ]=  }t3        |j4                  j7                  dd      t8              s.|j4                  d   } n t;        |||	      }t        j<                  rt=        ||      }t?        tA        tB        |            }t?        tA        d
 |            }tE        | |||j                        \  }}|r|	rtG        | ||t        |            \  }}tI        |      }tK        |      }tK        |      }tM        |d      }tM        |d      }tN        rtQ        |D cg c]  }tS        |      tU        |      f c}      }tW        d |D              dz  }tX        j[                  d|       tX        j[                  d|       t]        d |j                   j"                  D              }t]        d |j                   j"                  D              }||z  }t_        t&              }|j                   j"                  D ]R  }|j`                  |v stc        |jd                  d      s)|tU        |jd                  jf                        xx   dz  cc<   T tX        j[                  dt        |      t        |      t        |             tQ        |ji                         tk        jl                  d      d      }tX        j[                  d|       ||fS c c}w )ax  
    Partitions the joint graph such that the backward recomputes the forward.
    Recomputing helps in trading off memory bandwidth with computation.

    To create the fwd and bwd graph, we copy the joint graph, manually set the
    outputs to just original forward or backward outputs. And then we run the
    resulting graphs through dead code elimination.

    .. warning::
        This API is experimental and likely to change.

    Args:
        joint_module(fx.GraphModule): The joint forward and backward graph. This
            is the result of AOT Autograd tracing.
        _joint_inputs: The inputs to the joint graph. This is unused.
        compiler: This option determines the default set of recomputable ops.
            Currently, there are two options: ``nvfuser`` and ``inductor``.
        recomputable_ops: This is an optional set of recomputable ops. If this
            is not None, then this set of ops will be used instead of the
            default set of ops.
        num_fwd_outputs: The number of outputs from the forward graph.

    Returns:
        Returns the generated forward and backward Fx graph modules.
    c                    t        | j                        t               | j                  j                  D ]m  }|j                  dk(  r d|j
                  v rj                  |       nt        |      rj                  |       |v sSj                  |j                         o t        t        t        | j                  j                              }t        t        t        | j                  j                              }||z   }t        |       \  }}}}	j                  d |D               t        | j                  |||d      }
t        fd|
j                  D              t        fd| j                  j                  D              }t        fdt!        |      D              }d	}i }| j                  j                  D ]  }|v s|||<   |d
z  } t#        ||||      S )Nr   r   r  c              3   F   K   | ]  }||j                   dk7  s|  y w)Nr   r   )r[   r  s     r<   r]   zNmin_cut_rematerialization_partition.<locals>.classify_nodes.<locals>.<genexpr>
  s$      !
am8HA!
s   !!!r   c              3   Z   K   | ]"  }|j                   d k7  r|j                      $ ywr  r  r  s     r<   r]   zNmin_cut_rematerialization_partition.<locals>.classify_nodes.<locals>.<genexpr>
  s.      <
ww(" #<
s   (+c              3   2   K   | ]  }|vr|vr|  y wr9   rN   )r[   r7   rS   rd   s     r<   r]   zNmin_cut_rematerialization_partition.<locals>.classify_nodes.<locals>.<genexpr>
  s*      :
,,=N1N :
s   c              3   2   K   | ]  \  }}|v s|  y wr9   rN   )r[   r  pr  s      r<   r]   zNmin_cut_rematerialization_partition.<locals>.classify_nodes.<locals>.<genexpr>
  s"      1
!Qa;X6XA1
s   r   r    )r  r   r   r   r   r   r  r   r  ra  ro   r  r   r   r   r   r  rP   )r   r  r7   r  r  rQ   r   r   r   r   r  rT   rV   fw_cntrU   r  rS   rd   r   s    `             @@@r<   classify_nodesz;min_cut_rematerialization_partition.<locals>.classify_nodes
  s   '(:(:;1; &&,, 	5Dww-'J$++,E!%%d+%d+!%%d+((!((4	5 VJ0B0B0H0HIJ!%&(:(:(@(@A"
 !77$\?S 	G["35F 	   !
"!
 	
 @5F	
 2< <
*00<
 2

 0: :
$**00:
 0

 '1 1
#M21
 '
#  &&,, 	D((!'!	 '
 	
r>   Nr   )r   r  rV   r   r  r    r  )r  c                     t        |        S r9   r  )r\   s    r<   r_   z5min_cut_rematerialization_partition.<locals>.<lambda>  s    [^); r>   r  F)r   Tc              3   2   K   | ]  }t        |        y wr9   )r  r  s     r<   r]   z6min_cut_rematerialization_partition.<locals>.<genexpr>,  s     'J'Jr  z'Theoretical Activations Stored: %.2f GBz,Theoretical Per Activation Storage Sizes: %sc              3   T   K   | ]   }|j                   d k(  s|j                   " ywr   Nr  r   s     r<   r]   z6min_cut_rematerialization_partition.<locals>.<genexpr>1  $      %
477o;UDII%
r  c              3   T   K   | ]   }|j                   d k(  s|j                   " ywrW  r  r   s     r<   r]   z6min_cut_rematerialization_partition.<locals>.<genexpr>4  rX  r  r  z# remat/fw/bw: %d/%d/%dr  zCount of Ops Rematerialized: %s)7r   r   rZ  r   cser+   r   r   r|  r  rs  rw  r   rS   r  rV   r  r   r   rq   r  rg   ra  r  activation_memory_budgetr   r~   r   r  r  r9  ro   r  r   r  rn  r  r-   rJ  r.   rb   r  r   r  r/   rF  r   r   r   r   r   r  r  r  r  )r   r  compilerr   r  r   	cse_graphr   graph_has_recomputable_opsgraph_has_recomputable_rng_opsrS  r}  r7   rh  r  r   r  r  r  r  sorted_sizestotal_activations_size_gbfw_module_nodesbw_module_nodesremat_nodescountsrematerialized_opss      `                       r<   r  r  {
  s   D **,D zz &	&$$K!5l!C%=l%K"!-l;::|,|,4
l %,(*%|-JKI
 9&&'1, +*G(1(M(M
 	
 ++112 R77h #CD))$/ !D #CD

 R$'(9(94;L;Lq;P$Q!RR 33M!! diimmOT:EB IIo6M +#L
 ((1+|L6+|<=O;\JKL 4''$-$I$IIy ")#8iC4H$ Iy 4I>I y)Iy)I.yeLI.ydKIlKSV4KL %('J\'J$JS$P!:<UV 	?N$ %
"+//"7"7%
 
 % %
"+//"7"7%
 
 &7!,S!1OO)) 	>DyyK'GDKKAR,Ss4;;6678A=8	> 	%  		
 $LLN 3 3A 6
 	24FGi= Ls   R tracedfnamefigname
clear_metaprogparse_stack_tracedot_graph_shapec                    |rWt        j                  | j                        }t        j                  | |      } | j                  j
                  D ]	  }i |_         t        j                  j                  |      \  }	}
|
sdt        j                  z   }
t        j                  d|	|
       t        j                  | |||      }|j!                         }t#        |d|
j%                  d      z         }|	 |
 }|	 ||       y  |||       y )Nr4  zWriting FX graph to file: %s%s)rl  rm  write_)rk  )rY  deepcopyr   rL   r  r   r~   r  r  splitextr   torch_compile_graph_formatr/   rF  r   FxGraphDrawerget_main_dot_graphr8  lstrip)rg  rh  ri  rj  rk  rl  rm  r   r7   baseextgr   write_methods                 r<   
draw_graphrz  J  s     MM&,,/		2LL&& 	DDI	  'ID#F555HH-tS9""+'		A 	
A1hC89LfSENE|UU&r>   r9   )g      @rK  )r    )rL  )fx_graphTNFN)rY  rr   r  r  r  loggingr  r  r  os.pathr  r   dataclassesr   r   typingr   r   r	   r
   r   r   torch._inductor.inductor_primstorch.distributedtorch.fxrL   torch.utils._pytreeutils_pytreer   torch._dynamo.utilsr   r   ;torch._functorch._activation_checkpointing.ac_logging_utilsr   torch._inductorr   r  torch._loggingr   r'  r   %torch.fx.experimental._backward_stater   "torch.fx.experimental.proxy_tensorr   r   torch.fx.experimental.sym_noder   r   %torch.fx.experimental.symbolic_shapesr   r   r   r   r   r   torch.fx.passesr   torch.utils._ordered_setr   torch.utils.checkpointr   rX  -_activation_checkpointing.graph_info_providerr!   "_activation_checkpointing.knapsackr"   r#   r$   ,_activation_checkpointing.knapsack_evaluatorr%   _aot_autograd.descriptorsr&   r'   _aot_autograd.logging_utilsr(   _aot_autograd.utilsr)   r*   compile_utilsr+   r,   r-   sympydebug_partitionerr.   rt   rK   	getLoggerrG   r/   Loggerr  r  r  r1   rP   rv   rM   r   r  r   r   rq   r   r   r   r   ro   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r  r  r  r9  r)  r  r/  r:  rA  rC  rG  rU  ri  rp   r  r  r  r  r  r  r  r  cacher   r  r  r  r7  rn  rs  rw  r|  r  r  r  r  r  r  r  r  r  r  r9  rJ  r  rz  rN   r>   r<   <module>r     s
           	  # * @ @  %   $ $ < 6 + A ? H L  ) / 3  L 
 L L ; M H H  %66 t 6'g''1W^^ 1yy~~		 > > >2      >    T r~~ $ 2>> d  C  
  #JJMJ "'']J 	?	J
 smJ XXJZRWW  Gbgg G$ Gbgg $ bgg $ XRWW X XCrww C4 CJbgg J$ JKrww K4 K-bgg -$ -rww 4 J..J
4=$rww-i$y/IJJ$$rww- s d277mU277^;< 	J88>>J
((--J 
J 
	JZB!88>>B!
((--B! B! 	B!
 B! B! XX]]B!J9%,, 95 9"D- G%((-- GD G45 5	5;; 	5 	8G%((.. 8GT 8GvMG%((.. MGT MG`UUU CL)U 
	Ux BF	,Wrww-,W,W ,W "**RWW*=!>	,W
 
,Wj BFx"..x"rww-x" "'']x"
 x" "**RWW*=!>x" 2>>2>>)*x"@ :>AE\..\
 $,DI#6\ "**RWW*=!>\ 2>>2>>)*\~ c("# " "277 s :Rbhh R  "Pbggsl!3 PU277C<=P8Q PJBNN Jr~~ JZZ*xx##Z*xx##Z* XX]]Z* XX]]	Z*
 LLZ* Z* HHMMZ* HHMMZ*zR ..R ~~R  ~~R  	R 
 2>>2>>)*R j@ @D @R^^  .# #BNN #T /3	S&S&S& #S& z"''*+	S&l."eW ePBHH +T+TK+T 5k+T 	+T
 +T $(=+T 5$s)T#Y&'+T\ 05 5 5 5$NT o	o	o	 
"'']	o	dNN/3EHHMM/BNbOj L  :>L ..L  $,DI#6L  2>>2>>)*L d ,0#%)'HH  '' ' 	'
 5d3i(
)' ' c]' 
'r>   