
    i"                        d Z ddlZddlZddlZddlZddlZddlmZmZ ddl	m
Z
mZ ddlmZmZmZmZmZ ddlZddlmc mZ ddlmZ ddlmZmZmZmZmZ ddlm Z m!Z! dd	l"m#Z#m$Z$m%Z%m&Z& dd
l'm(Z(m)Z) ddl*m+Z+m,Z,m-Z-m.Z. ddl/m0Z0m1Z1 ddl2m3Z3 ddl4m5Z5 ddl6m7Z7 ddl8m9Z9 ddl:m;Z; ddl<m=Z=m>Z>m?Z?m@Z@mAZAmBZBmCZC ddlDmEZEmFZF ddlGmHZHmIZI ddlJmKZKmLZL ddlMmNZN ddlOmPZP erddlQmRZR dZS e0eTd      ZU e0eTd      ZVdeWfdZXdeWfdZYdeej                     deej                     fd Z[d!eded"ef   fd#Z\ G d$ d%      Z] G d& d'      Z^ G d( d)      Z_ e^       Z`g d*Za eNeeej                  eg      Zc ej                         aed+efdefd,Zg G d- d.      Zhd/aid/ajd/akd/aldamej                  	 	 dDd0ed"ef   d1eWd2eWde
d3   fd4       Zoej                  de
d3   fd5       ZpdEd6Zqd7eej                     d8eeL   d9eeL   d:eLd;eeL   d<eeL   d=eLderej                     fd>Zsd?eeW   d@ej                  dAeeej                        dBej                  dereej                        f
dCZty)Fa  
Provides functionality for compiling PyTorch's autograd (automatic differentiation) system.

This module implements compiled autograd, which traces and optimizes backward pass
computations at runtime. The key components are:

- AutogradCompilerInstance: Traces and compiles autograd graphs using FX
- Context managers (_enable/_disable): Control when compiled autograd is active
- Utility functions: Support graph manipulation, tensor operations, and hooks

Compiled autograd can significantly improve backward pass performance by removing
Python overhead and enabling additional optimizations. It works by capturing
backward computations into an FX graph that can be compiled and optimized,
while maintaining the same semantics as eager mode autograd.
    N)Counterdefaultdict)	GeneratorSequence)AnyCallableOptionalTYPE_CHECKINGUnion)enable_python_dispatcher)call_accumulate_gradcall_backward	call_hookFakeCompiledAutogradEngineunwrap_maybe_dynamic_intGetItemSourceLocalSource)countersget_chromium_event_loggerlazy_format_graph_codeset_locals_to_steal)AutogradLazyBackwardCompileInfo%CachedAutogradLazyBackwardCompileInfo)compile_contextCompileContext	CompileIdSource)getArtifactLoggertrace_structuredclone_preserve_strides)FakeTensorMode)
FakeTensor)GraphModule)BackwardState)	decomposedisable_autocast_cachedisable_proxy_modes_tracingfetch_object_proxyProxyTorchDispatchModePythonKeyTracertrack_tensor_tree)
DimDynamicShapeEnv)preserve_node_metaset_stack_trace)FloatLikeTypeIntLikeType)
OrderedSet)CapturedTraceback)Proxya  You can turn off compiled autograd by either:
1. Moving the unsupported autograd call outside of the torch.compile'd region.
2. Wrapping the unsupported autograd call in the torch._dynamo.compiled_autograd._disable() context manager.
3. Setting torch._dynamo.config.compiled_autograd=False for the torch.compile call containing the unsupported autograd call.
4. Setting torch._dynamo.config.compiled_autograd=False at the start of the program.compiled_autogradcompiled_autograd_verbosereturnc                  h    t         j                  j                  j                  j	                  d      S )Nr8   )torch_logging	_internal	log_stateis_artifact_enabled     Y/var/www/html/engine/venv/lib/python3.12/site-packages/torch/_dynamo/compiled_autograd.py snapshot_verbose_logging_enabledrC   T   s(    >>##--AA# rA   c                  ^    t         j                  j                  j                  j                  S N)r;   	_inductorconfigtriton
cudagraphsr@   rA   rB   snapshot_cudagraph_enabledrJ   Z   s    ??!!((333rA   xc                      | t        |       S | S rE   r!   )rK   s    rB   maybe_clonerM   ^   s    }%a((HrA   CompiledFunction.c                 \   t        | j                  t              r| j                  j                  S t        | j                  t              rLt
        j                  j                  j                         5  | j                  j                         cd d d        S t        d      # 1 sw Y   y xY w)NzEUnexpected Lazy Backward Compilation Info Type. Please file an issue.)
isinstance_lazy_backward_infor   	bw_moduler   r;   _subclassesfake_tensorunset_fake_temporarilybw_module_fnAssertionError)rN   s    rB   extract_bw_modulerX   d   s    ,,.M  33===	,,.S
 **AAC 	G#77DDF	G 	G S
 	
	G 	Gs   3B""B+c                       e Zd ZdeddfdZdej                  j                  ddfdZde	ej                     ddfdZd	e	ej                     ddfd
Zy)
NaNCheckeraccumulate_gradr9   Nc                 <    || _         g | _        i | _        g | _        y rE   )r[   params_indicesparams_to_checkoutput_names)selfr[   s     rB   __init__zNaNChecker.__init__   s"    .)+8:')rA   graphc                 d   t        t        |j                              }|j                  dt              }|j                  d      d   j
                  d   }| j                  t        |      k(  r| j                  | k(  sJ |D ]  }|j
                  d   }|j                  t        j                  k(  r.|j
                  d   |u rt        |j
                  d   t              sJ | j                  j                  |j
                  d           |D cg c]  }|j                   c}| _        y c c}w )Ncall_functionoptargetoutputrf   r      )nextiternodes
find_nodesr   argsr[   boolrg   operatorgetitemrP   intr]   appendnamer_   )r`   rb   inputs_nodeacc_grad_nodesoutput_nodesnode
param_nodes          rB   prep_with_graphzNaNChecker.prep_with_graph   s&   4,-))'; * 
 ''8'4Q7<<Q?##t(
 
""<'78	9 9 # 
	;D1J
 !!X%5%55OOA&+5zq1378 &&zq'9:
	; 4@@4TYY@@s   D-inputsc                     | j                   sy | j                  D ]W  }||   j                  }|.t        j                  |      j                         rJ d| d       ||   | j                  d| d<   Y y )Nz9Compiled autograd running under anomaly mode with inputs[zD] already having NaN gradient. This is not supported. {TURN_OFF_MSG}zinputs[])r[   r]   gradr;   isnananyr^   )r`   r|   idxr   s       rB   prep_with_inputszNaNChecker.prep_with_inputs   s    ## && 	AC#;##D ;;t,002 OPSu UQ Q2
 6<C[D  73%q!12	ArA   outc                 6   | j                   r|rJ g }| j                  j                         D ]R  \  }}|j                  J t	        j
                  |j                        j                         sB|j                  |       T |rt        ddj                  |       d      y g }t        |      D ]G  \  }}t	        j
                  |      j                         s*|j                  | j                  |          I |rt        ddj                  |       d      y )Nz9Compiled Autograd returned NaN gradients for parameters: ,.z;Compiled Autograd returned NaN gradients for output nodes: )r[   r^   itemsr   r;   r   r   rt   RuntimeErrorjoin	enumerater_   )r`   r   
nan_params
inputs_strparam	nan_gradsir   s           rB   checkzNaNChecker.check   s!   N7$&J%)%9%9%?%?%A 2!
Ezz---;;uzz*..0%%j12
 "OPSPXPXYcPdOeefg   $&I$S> ;4;;t$((*$$T%6%6q%9:; "QRURZRZ[dReQffgh  rA   )__name__
__module____qualname__rp   ra   r;   fxGraphr{   tupleTensorr   r   r@   rA   rB   rZ   rZ      sn    * * *AUXX^^ A A0AuU\\': At A u||,  rA   rZ   c            
       J    e Zd ZddZdededef   dededef
d	Zdedefd
Z	y)OpNamespacer9   Nc                 "    t               | _        y rE   )r   custom_function_name_counterr`   s    rB   ra   zOpNamespace.__init__   s    :A))rA   ru   fn.is_custom_functionis_traceablec                    |r1d|z   }| j                   |   }| j                   |xx   dz  cc<   | | }t        | |      rJ t        |||      |r,t        | |t        j
                  j                               |S t        j
                  j                  dt        dt        dt        ffd       }t        | ||       |S )NCppNoderj   ro   kwargsr9   c                       | i |S rE   r@   )ro   r   results     rB   run_non_traceable_cpp_in_eagerz7OpNamespace.add.<locals>.run_non_traceable_cpp_in_eager   s    t.v..rA   )	r   hasattrOpsetattrr;   _dynamoallow_in_graphdisabler   )r`   ru   r   r   r   countr   r   s          @rB   addzOpNamespace.add   s     t#D55d;E--d3q83VE7#D4&&&D"01D$ < <V DE  ]]""/c /S /S / #/ D$ >?rA   c                     t        | |      S rE   )getattr)r`   ru   s     rB   getzOpNamespace.get   s    tT""rA   r9   N)
r   r   r   ra   strr   r   rp   r   r   r@   rA   rB   r   r      sY    D S#X !	
  
6# # #rA   r   c                   N    e Zd Zdededef   deddfdZded	edefd
ZdefdZ	y)r   ru   r   .r   r9   Nc                 <    || _         || _        || _        d| _        y )Nz#torch._dynamo.compiled_autograd.ops)r   r   r   r   )r`   ru   r   r   s       rB   ra   zOp.__init__   s!     "4?rA   ro   r   c                 &     | j                   |i |S rE   )r   )r`   ro   r   s      rB   __call__zOp.__call__   s    tww'''rA   c                 :    | j                   dz   | j                  z   S )Nr   )r   r   r   s    rB   __repr__zOp.__repr__   s    $t}}44rA   )
r   r   r   r   r   r   rp   ra   r   r   r@   rA   rB   r   r      s[    @@%c3h/@EI@	@(c (S (S (5# 5rA   r   )r|   sizesscalarshookspacked_datacompiled_autograd_idc           	      B    t        t        t        | d d                   S )N)r   frame_idframe_compile_id)r   r   r   )r   s    rB   make_compile_contextr     s(    %9!%	
 rA   c                   l   e Zd Zdedef   ddfdZdej                  dee	   de
fdZed	ed
edefd       Zdeej                     dee   deeeef      deeeeef         dededeeeej                     ee   ee   f   fdZdee   ddfdZdee   deej                     deej                     dededee   dee   fdZdee   dee   deej                     dedej8                  j:                  j<                  dee   deeej                     df   fdZdee   dee   d ee   d!ed"ee   d#ee   d$edeej                     fd%Z d&ee   d'ej                  d(ee   d)ej                  deej                     f
d*Z!dej                  fd+Z"d,ed-edef   d.ed/edef
d0Z#d,ed1ee   d2ed3ee   deej                     f
d4Z$d-edef   d2ed3ee   deej                     fd5Z%d6ed7ee   d2ed3ee   deej                     f
d8Z&d9ed:edej                  fd;Z'd<ej                  d=ej                  d>eddfd?Z(d@edef   d2edAedejR                  jT                  fdBZ+dCedDedej                  fdEZ,deej                     dCedFedeej                     fdGZ-deej                     dCedFedeej                     fdHZ.dee   dCedeej                     fdIZ/d7eej                     deej                     dCedeej                     fdJZ0dKej                  dCedeej                     fdLZ1dMejR                  jd                  dee   fdNZ3dOedefdPZ4djdQZ5de6e   fdRZ7dSede8fdTZ9d7edeedef   ef   fdUZ:ed2ee   deejR                  jv                     fdV       Z<edOejR                  jv                  defdW       Z=djdXZ>djdYZ?djdZZ@djd[ZAdjd\ZBdjd]ZCdjd^ZDd_edefd`ZE	 dkdaee   dbedeeeeef         dee   fdcZFddedeGfdeZHdfedgedheej8                  j                     ddfdiZJy)lAutogradCompilerInstancecompiler_fn.r9   Nc                 4   || _         t        j                         | _        | j                  j                  | _        t               | _        t        dd| j                        | _        t               | _
        t        | j                  d      | _        d | _        y )NT)allow_fallback_kernelsallow_non_fake_inputs	shape_envsymbolic)r   
contextlib	ExitStackstackcloser/   r   r#   fake_tensor_moder,   	fx_tracerr+   
proxy_modehooks_proxy)r`   r   s     rB   ra   z!AutogradCompilerInstance.__init__  st    &))+
ZZ%%
! .#'"&nn!

 )*0L,0rA   rK   sourcec                 t    t        |t        j                        sJ | j                  j	                  ||      S )N)r   )rP   r;   r   r   from_tensor)r`   rK   r   s      rB   	wrap_fakez"AutogradCompilerInstance.wrap_fake,  s2    !U\\***$$0060BBrA   ru   r   c                 ,    t        t        |       |      S rE   r   )ru   r   s     rB   r   zAutogradCompilerInstance.source0  s    [.44rA   r|   r   r   originsr[   
check_nansc                 "    t         d   dxx   dz  cc<   t        t               _        t	        t
               _        t         j                         _         j                  j                          |rt        |      nd  _        t        j                          _        t               j!                  d j                  d j                  id       t"        j$                  j'                          j(                  _        t"        j,                  j/                  t0               j(                  _        i  j(                  _        i  _         fdt8        D        \  } _         _         _         _          jB                  jE                  tG                      |\  }}	}
 jB                  jE                  tI                      |d	   }	 tK        |      D ]*  \  }} jM                  | jO                  d
|            ||<   , 	  jY                  |||       tK        |      D cg c]@  \  }} jZ                  j]                  | jO                  d|      t^        j`                        B }}}tc        te        |            D cg c]  } j:                  |    }}tK        |      D ]J  \  }} j(                  jg                  dth        ||   fi       ||<   ||    j6                  |jj                  <   L  jY                  |||	      }tK        |      D ]  \  }} jO                  d|      }tm        |t
              r/ jZ                  j]                  ||t^        j`                        ||<   Wtm        |tn              rL jZ                  jq                   jZ                  js                  ||t^        j`                        ||      ||<   tu        dtU        |              jY                  | j<                  |
       tK        |      D ]+  \  }} j<                  |    j6                  |jj                  <   -  jB                  jE                  tw        i               jB                  jE                   jx                          jB                  jE                   jz                          jB                  jE                  t}                       jx                  jZ                  J  jx                  jZ                  } jB                  jE                  t"        j,                  j~                  j                  j                  |             t        t        j                               |||fS # tP        $ r$}tS        dtU        |       dtV               |d }~ww xY wc c}}w c c}w )Nr7   capturesrj   graph_idTlog_pt2_compile_event)
tracer_clsc              3   Z   K   | ]"  }j                   j                  d |di        $ yw)placeholderr@   N)r   create_proxy).0ru   r`   s     rB   	<genexpr>z9AutogradCompilerInstance.begin_capture.<locals>.<genexpr>T  s.      
 NN''tRD
s   (+r   r|   zFound tensor of type z,, which is not supported by FakeTensorMode. r   rd   r   )r   dynamic_dim)hintr   zUnexpected scalar type: )Er   rk   COMPILE_COUNTERidr   rs   aot_id_counterr   r   	__enter__rZ   nan_checkertimetime_nsstart_time_nsr   log_event_startr;   nnModuler   rootr   r   r,   rb   tensor_attrssymnode_proxy_lookup_graph_placeholderssizes_proxyscalars_proxyr   packed_data_proxyr   enter_contextr0   r   r   r   r   	ExceptionNotImplementedErrortypeTURN_OFF_MSGbind_objects_to_proxiesr   $create_unspecified_symint_and_symbolr.   DYNAMICrangelenr   r   ry   rP   floatcreate_symfloatnodecreate_unspecified_symbolrW   r'   r   r   r(   experimentalsymbolic_shapes_suppress_guardsr   r   current_compile_id)r`   r|   r   r   r   r[   r   
args_proxyinputs_originssizes_originsscalars_originsrK   r   eval	sym_sizesr   proxiessymintr   symvalenvs   `                     rB   begin_capturez&AutogradCompilerInstance.begin_capture4  s    	$%j1Q61'.9#.>3DGG<&&(:D:o6$!\\^!#33!"&	 	4 	
 $hhoo/$xx~~~I&(#$&!
+
	
" 	

  !3!569@6 	

  !9!;< 1I	#F+ LQ"nnQHc0JKsL 	$$VZH &e,
 S NN??GS)""
	 
 16c)n0EF14##A&FF"9- 	@IAv44(	GAJ 6=QZD%%fkk2	@ ..y'=Q!'* 	LHC[[C0F#s##~~RR&& 
 C'#~~AANN<<%$.$6$6 = 
 !  B   %%?cKK'	L( 	$$Wd.@.@/R"7+ 	KIAv595G5G5JD%%fkk2	K 	

  2/

  !6!67

  1

  !7!9:$$..:::##--

  HH!!11BB3G	
 1134	
 	
C  	%'Qy0\]i\jk	
 Gs%   8U AV=V	VU>>Vcompile_reasonsc                 2    sJ t        dd fd       y )Nartifactc                      dddS )N!compiled_autograd_compile_reasonsjsonru   encodingr@   r@   rA   rB   <lambda>z>AutogradCompilerInstance.log_compile_reasons.<locals>.<lambda>  s    ;"! rA   c                       S rE   r@   )r  s   rB   r  z>AutogradCompilerInstance.log_compile_reasons.<locals>.<lambda>  s     rA   metadata_fn
payload_fn)r    )r`   r  s    `rB   log_compile_reasonsz,AutogradCompilerInstance.log_compile_reasons  s"      /	
rA   pinputspsaved_tensorssaved_tensorspctxctxmaybe_backward_state_idxc           	          j                         D cg c]  } j                  |       }}j                  }	t        |	      |	j                  |	j
                  |	j                  ~	t        j                         r(j                  D ]  }
|
j                  st        d       t        j                  j                  dt        t        j                     dt        t            dt        t"           dt"        ffd       } j$                  j'                  d|||g|i       d | j(                  |   dt*        t        j                     f fd	} |       }d
t"        dt,        dt        t"           dt        j                  f fd}t        j.                  j0                  j2                  j5                  ||      }t7        j8                   j                  |      }|S c c}w )Nz@torch.compile does not currently support higher order gradients.ctx_saved_tensorsctx_symints	flat_argsr9   c                 v    t        j                  j                  j                  j                  | |g| }|S rE   )r;   
_functorch_aot_autogradruntime_wrappers_backward_prologue_functional)r+  r,  r-  r   maybe_subclass_metadatametadatas       rB   call_aot_bwd_prologuezOAutogradCompilerInstance.proxy_call_aot_backward.<locals>.call_aot_bwd_prologue  sD     ""00AA__!'	
 C JrA   rd   kindrg   ro   r   c            	      F	   dt         j                  j                  dt        fd}  | j                        }t        |t        d u      z
        D cg c]  }|   	 }}j                         }t        |      t        j                        k(  sJ |D cg c]  }j                  |       }}||d t        |       |j                         d}i d }t              j                     rdj                      z  j                  xx   dz  cc<   dt        dt        ffd}	j                  j                  D ]  }
|
j                  d	k(  r1||   j                  } |	|
j                         |_        ||
<   |dz  }D|
j                  d
k(  rt        |
j"                        dk(  sJ |
j"                  d   D cg c]U  }t%        |t         j                  j&                        r-t         j                  j)                  |   j*                        n|W }}|
j                  dk(  r|
j,                  }j*                  j/                  |      }t1        j*                  j2                  |t5        |             j*                  j7                  d|di       } |	|
j                         |_        ||
<   x|
j                  dk(  r|
j,                  t         j8                  j:                  j<                  j>                  k(  r3t         j8                  j:                  j@                  j>                  |
_        j*                  j                  jC                  |
fd      } |	|
j                         |_        ||
<   <|
j                  dk(  r|
j,                  }j*                  j/                  |      }t1        j*                  j2                  |t5        |             j*                  j                  jC                  |
fd      }||_        ||
<   tE        d       |J dt         jF                  fd}|D cg c]/  }t%        |t         j                  j(                        r |       n|1 }}jI                  ||       |S c c}w c c}w c c}w c c}w )Nrb   r9   c                 Z    d}| j                   D ]  }|j                  dk(  r|dz  } |S  |S )Nr   r   rj   )rm   rf   )rb   num_argsry   s      rB   
num_inputszkAutogradCompilerInstance.proxy_call_aot_backward.<locals>.copy_paste_aot_backward_graph.<locals>.num_inputs   sA    !KK Dww-/ A   rA   r   _rj   	node_namec                     d d|  S )Naotr<  r@   )r=  deduped_aot_ids    rB   make_uniquezlAutogradCompilerInstance.proxy_call_aot_backward.<locals>.copy_paste_aot_backward_graph.<locals>.make_unique&  s    ^,Ai[99rA   r   rh   get_attrr@   rd   c                     |    S rE   r@   nvalue_remaps    rB   r  ziAutogradCompilerInstance.proxy_call_aot_backward.<locals>.copy_paste_aot_backward_graph.<locals>.<lambda>F      A rA   call_modulec                     |    S rE   r@   rD  s    rB   r  ziAutogradCompilerInstance.proxy_call_aot_backward.<locals>.copy_paste_aot_backward_graph.<locals>.<lambda>O  rG  rA   zshouldn't get herec                  t    t               5  t        j                  ddddd      cd d d        S # 1 sw Y   y xY w)Nr   {   r)   r;   zerosr@   rA   rB   dummyzfAutogradCompilerInstance.proxy_call_aot_backward.<locals>.copy_paste_aot_backward_graph.<locals>.dummyZ  s1    02 8 ;;q!Q378 8 8s   .7)%r;   r   r   rs   rb   r  _get_compiled_autograd_symintsr  symintsto_proxyrt   r   r   rm   rf   ry   ru   ro   rP   Noder6   r   rg   get_fresh_qualnamer   r   r   create_nodeopsatenviewdefaultreshape	node_copyrW   r   r   )r;  r:  r   	pall_argsrP  r  psymintsargs_idxpoutputsrA  ry   phrE  ru   qualnamer   rN  ooutputsr@  rF  aot_idrR   r(  pbackward_statepgradsr`   s                      @@rB   copy_paste_aot_backward_graphzWAutogradCompilerInstance.proxy_call_aot_backward.<locals>.copy_paste_aot_backward_graph  s    %((..  S   ")//2H#(Ct8S4T)T#Uq	I  88:Gw<3s{{#333329:Qa(:H:(0InG%*  1 HK7;H ![N""6*Ad&9&9&&A%B"CC'1,':s :s : "-- *?77m+"8,11B)$))4BG(*K%MHWW(tyy>Q...
 "&1	   &a7 {1~t~~F H   WW
*;;D#~~@@FHDNN//79d;ST!^^77
HbRTUF"-dii"8FK(.K%WW/{{eiinn&9&9&A&AA ',iinn&<&<&D&D!^^11;;6F #.dii"8FK(.K%WW-;;D#~~@@FHDNN//79d;ST!^^11;;6F %-FM(.K%()=>>U*?X '''85<< 8
 JRDE:a8a?G  (((;Nk ;> Xs   RRAR4Rsubclass_meta
is_runtimeunwrapped_argsc                 D    t         j                  j                  dt        dt        f fd       }t	        j
                  j                  |      }j                  j                  d|t        |      i       }j                         }j                  |g|g       |S )Nri  r9   c                  *    j                  |       S )N)rh  )creation_fn)ri  rh  rg  s    rB   make_subclasszkAutogradCompilerInstance.proxy_call_aot_backward.<locals>.proxy_subclass_constructor.<locals>.make_subclassi  s    $00J0WWrA   rd   r6  )r;   r   r   r   pytreetree_maprQ  r   r   r   allocate_dummyr   )rg  rh  ri  rm  punwrapped_argspoutputrh   r`   s   ``     rB   proxy_subclass_constructorzTAutogradCompilerInstance.proxy_call_aot_backward.<locals>.proxy_subclass_constructorf  s     ]]))Xs Xs X *X %oodmm^LOnn11$$?+	 2 G ((*F((&G9=MrA   )make_subclass_override)rO  rQ  _forward_clsrX   r4  r3  _aot_idr;   is_grad_enabledoutput_inforequires_gradr   r   r   r   r   r3   r   r   r   r   listrp   r/  r0  r1  _backward_epilogue_functionalrn  ro  )r`   r$  r%  r&  r'  r(  r)  r  r\  rN   output_alias_infor5  rf  rb  rs  resultspresultsrc  rR   r3  r4  rd  re  s   `    `           @@@@@@rB   proxy_call_aot_backwardz0AutogradCompilerInstance.proxy_call_aot_backward  s   ( /2.P.P.RSDMM!$SS ++%&67	#,,"2"J"J!))  "%-%9%9 !$22&Z  
	%	%	'5	!+.	 !	 		 
&	 ,, ( 
  - 	
 #/"../GHOc	tELL/A c	 c	J 01		,0	BJ3-	\\	( ""00AA__##=	 ` 
 ??4==':e Ts   G0output_metadatasbackward_idxc           
      x   | j                   J | j                   |   }| j                  |      }| j                  |      }	t        |j                  d      r| j	                  ||	||||      }
n(| j
                  j                  dt        ||	g|i       }
|
J t               5  g }t        |      D ]M  \  }}||
|   |j                  d        |\  }}}}|j                  t        j                  ||||             O | j                  ||
       d d d        t        |      S # 1 sw Y   t              S xY w)Nrv  rd   r6  )sizedtypelayoutdevice)r   rQ  r   ru  r  r   r   r   r)   r   rt   r;   emptyr   r   )r`   r|   r  r&  r  r(  r)  r'  r$  r%  r  grad_insr   output_metadatar  r  r  r  s                     rB   proxy_call_backwardz,AutogradCompilerInstance.proxy_call_backward  sf    +++---'}53##Y/22(G nn11$$" 
  2 	G """(* 	<57H(12B(C $_"*gcl.BOOD).=+tKKTvfU ((7;	< X	< Xs   #A0D&&D9
base_sizesbase_stridesbase_storage_offset
view_sizesview_stridesview_storage_offsetc           	          || j                  |      | j                  |      | j                  |      | j                  |      | j                  |      | j                  |      f}| j                  t        |d gdz        S )N   )rQ  
proxy_callcopy_slices_prologue)	r`   r|   r  r  r  r  r  r  ro   s	            rB   call_copy_slices_prologuez2AutogradCompilerInstance.call_copy_slices_prologue  st     MM*%MM,'MM-.MM*%MM,'MM-.
 3TD6A:FFrA   needs_input_gradr   res
grad_slicec                 R    | j                  t        ||||fd gt        |      z        S rE   )r  copy_slices_epiloguer  )r`   r  r   r  r  s        rB   call_copy_slices_epiloguez2AutogradCompilerInstance.call_copy_slices_epilogue  s4      vsJ7FS)**
 	
rA   c                 p    t               5  t        j                  ddg      cd d d        S # 1 sw Y   y xY w)Nr   i[rL  r   s    rB   rp  z'AutogradCompilerInstance.allocate_dummy  s-    (* 	/;;9~.	/ 	/ 	/s   ,5fn_namer   r   r   c                 2    t         j                  ||||      S )zBinds ops.fn_name = fn)rU  r   )r`   r  r   r   r   s        rB   bind_functionz&AutogradCompilerInstance.bind_function  s     www$6EErA   gradsro   r  c                 Z    t         j                  |      }| j                  ||g||      S )z:Proxies a call to ops.fn_name(grads, *args) into the graph)rU  r   r  )r`   r  r  ro   r  rf   s         rB   apply_functionalz)AutogradCompilerInstance.apply_functional  s+     WWWrE>D>?CCrA   c                 `    t        j                  |      \  }}t        j                   fd|      } j                  j	                  d||i       }|D cg c]  } j                          }} j                  |t        t        |            D 	cg c]  }	||	   	 c}	       |S c c}w c c}	w )z*Proxies a call to fn(*args) into the graphc                 &    j                  |       S rE   rQ  )r  r`   s    rB   r  z5AutogradCompilerInstance.proxy_call.<locals>.<lambda>  s    t}}Q/? rA   rd   ro   r   )	rn  tree_flattenro  r   r   rp  r   r  r  )
r`   r   ro   r  r-  r<  
proxy_args	proxy_outr   r   s
   `         rB   r  z#AutogradCompilerInstance.proxy_call  s     **40	1__%?F
NN//Rj 0 
	 2AAA$%%'AA$$VE#f+DV-Wqil-WX B-Ws   B&B+
r<  rb  c                    t         j                  d      }t        j                  | j                  |g|      }| j
                  j                  d||i       }t        |      t        |      k(  sJ | j                  ||       |S )zEProxies a call to ops.validate_outputs(outputs, *args) into the graphvalidate_outputsrd   r  )	rU  r   rn  ro  rQ  r   r   r  r   )r`   r<  rb  ro   r  rf   r  new_proxy_outputss           rB   r  z)AutogradCompilerInstance.validate_outputs  s     WW'(__T]]W4Dt4DE
 NN77Rj 8 
 ?#s7|333$$W.?@rA   old_varnew_varc                     | j                  |      }| j                  |      }| j                  j                  dt        j                  ||fi       }| j                         }| j                  |g|g       |S Nrd   r  )rQ  r   r   r;   r   rp  r   )r`   r  r  old_var_proxynew_var_proxyr  r   s          rB   
accumulatez#AutogradCompilerInstance.accumulate  ss    g.g.NN//UYYm]-KTV 0 
	 $$&$$fX	{;rA   variabler   has_post_hooksc                     | j                   j                  dt        | j                  |      | j                  |      |fi        y r  )r   r   r   rQ  )r`   r  r   r  s       rB   r[   z(AutogradCompilerInstance.accumulate_grad  sF     	## h'd#
  	$ 		
rA   hookr   c                     | j                   j                  dt        |g|D cg c]  }| j                  |       c}|      S c c}w Nrd   )r   r   r   rQ  )r`   r  ro   r   rK   s        rB   proxy_call_hookz(AutogradCompilerInstance.proxy_call_hook$  sO     ~~**,01q$--"1 
 	

 2s   Ahook_iddata_idc                     | j                   J | j                   |   }| j                  |   }| j                  ||d      }| j                         }| j	                  |g|g       |S )Nunpack_hook	hook_type)r   r   r  rp  r   )r`   r  r  r  dataproxyr   s          rB   r  z$AutogradCompilerInstance.unpack_hook1  s{    +++(%%g.$$# % 

 !!#$$cUUG4
rA   r   c                     | j                   J | j                   |   }| j                  |||   d      }t               5  t        ||         ||<   | j	                  ||   g|g       d d d        |S # 1 sw Y   |S xY w)Ntensor_pre_hookr  r   r  r)   rM   r   )r`   r|   r  r   r  r  s         rB   r  z(AutogradCompilerInstance.tensor_pre_hook>  s     +++($$1I' % 

 )* 	?#F1I.F1I((&)ug>	? 	? s   )A22A<c           	      R   | j                   j                  dt        j                  j                  j
                  j                  || j                  ||         fi       }t               5  t        ||         ||<   | j                  ||   g|g       d d d        |S # 1 sw Y   |S xY wr  )r   r   r;   _Cr   r7   call_cpp_tensor_pre_hooksrQ  r)   rM   r   )r`   r|   r  r   r  s        rB   cpp_tensor_pre_hookz,AutogradCompilerInstance.cpp_tensor_pre_hookM  s     ++HH..HHdmmF1I./	
 )* 	?#F1I.F1I((&)ug>	? 	? s   ))BB&c                    | j                   J | j                   |   }| j                  ||d      }t               5  |D cg c]  }t        |       }}| j	                  ||       d d d        |S c c}w # 1 sw Y   |S xY w)Npre_hookr  r  )r`   r|   r  r  r  rK   s         rB   r  z!AutogradCompilerInstance.pre_hook[  s    +++(&&  ' 

 )* 	:.45k!n5F5((9	:  6	: s   A6A1A61A66B c                    | j                   J | j                   |   }| j                  |||d      }t               5  |D cg c]  }t        |       }}| j	                  ||       d d d        |S c c}w # 1 sw Y   |S xY w)N	post_hookr  r  )r`   rb  r|   r  r  r  rK   s          rB   r  z"AutogradCompilerInstance.post_hookh  s     +++(&&!	 ' 
 )* 	;/67!{1~7G7((':	;  8	; s   A7A2A72A77Binputc                    t        |t        j                        sJ | j                  J | j                  |   }| j	                  ||d      }t               5  t        |      g}| j                  ||g       d d d        |S # 1 sw Y   S xY w)Npost_acc_grad_hookr  )rP   r;   r   r   r  r)   rM   r   )r`   r  r  r  r  r  s         rB   r  z+AutogradCompilerInstance.post_acc_grad_hookx  s     %...+++($$* % 

 )* 	7u%&C((ug6	7 
	7 
s    BBrb   c                    i }d}t        |j                        }|d   j                  dk(  sJ |d   }t        |j                  j	                               }t        t              }||   |d   k(  sJ |t        |      z   dz
  }||   |d   k(  sJ t        |      D ]  \  }	}
|s)|
j                  d   j                  j                  dk(  rd}1|
j                  d   j                  j                  d	k(  }t        |
j                  d   j                               dk(  }|s|st        |
j                  j	                               }t        d
 |D              s|
||	<    |rn|j                         D ]B  }
t        j                  d|
       |
j                  d   j!                         |
j                  d<   D t        |j	                               S g S )NFr   r|   rj   r  cudaTcpuc              3     K   | ]  }t        |j                  t        j                  j                        xr |j                  j
                  d v xs3 t        |j                  t              xr |j                  j                     yw))primsrV  N)rP   rg   r;   _ops
OpOverload	namespacer   r   r   users     rB   r   zDAutogradCompilerInstance.move_graph_nodes_to_cuda.<locals>.<genexpr>  sw      
  #4;;

0E0EF G KK115FF
 #4;;3 ? $ > >>
s   BB	zMoving node %s from cpu to cuda)rz  rm   rg   userskeysr  r   r   metar  r   r  allvaluesverbose_logdebugr  )r`   rb   to_movehas_cuda_inputsrm   r|   inputs_usersfirst_getitem_idxlast_getitem_idxr   ry   is_cpu	is_scalar
node_userss                 rB   move_graph_nodes_to_cudaz1AutogradCompilerInstance.move_graph_nodes_to_cuda  s   ,.U[[!Qx(***qFLL--/0 34&'<?:::,s</@@1D%&,r*:::: . 	&GAt"tyy'7'>'>'C'Cv'M"&YYu%,,11U:FDIIe,11349I)!$**//"34
 
 !+
 
 "&GAJ-	&4 ( ;!!"CTJ#'99U#3#8#8#:		% ;
 ''	rA   ry   c                 @   t        |t        j                  j                        xry |j                  dk(  xrh |j
                  t        j                  j                  j                  j                  t        j                  j                  j                  j                  fv S r  )rP   r;   r   rR  rf   rg   rU  rV  sym_sizers   	sym_numelrX  )r`   ry   s     rB   is_sym_nodez$AutogradCompilerInstance.is_sym_node  sn    tUXX]]+ O?*O		''++UYY^^-E-E-M-MNO	
rA   c                 t   t               t        | j                  j                  j	                  d            D ].  \  }}j                  |j                  j                                0 t        t              dz
  k(  sJ dt        j                  j                  dt        ffd}t        | j                  j                  j                        }| j                  j                  j                  |       t        | j                  j                  j                        }t         j#                  d||z
         y )Nr   ri   rj   ry   r9   c                 p    | v s!| j                   dk(  r| j                  t        v ry| j                         S )Nrd   T)rf   rg   _impure_targets	is_impure)ry   unpack_nodess    rB   r  z/AutogradCompilerInstance.dce.<locals>.is_impure  s2    |#?*t{{o/M>>##rA   zDCE removed %d nodes)r4   r   r   rb   rn   updater  r  r  r   r;   r   rR  rp   rm   eliminate_dead_coder  r  )r`   r   ry   r  beforeafterr  s         @rB   dcezAutogradCompilerInstance.dce  s    
 3=, !5!5!@!@M!@!RS 	3GAt

 12	3C+,q0000	$EHHMM 	$d 	$ T^^))//000;DNN((../0&5.ArA   c                    g }g }t        | j                  j                  j                        }t	        |       t	        |      }|j
                  dk(  sJ |j                  j                         D ]P  }|j                  t        j                  k(  sJ |j                  r|j                  |       @|j                  |       R t               }|D ]  }t        |j                  t              sJ |j                  d   |k(  sJ t        |j                  d   t               sJ t#        |      }|j%                  |j                  d          |j                  d   |f|_         |D ]'  }	| j                  j                  j'                  |	       ) |S )Nr   r   rj   )rl   r   rb   rm   rk   ru   r  r  rg   rq   rr   rt   setrP   ro   r   rs   r  r   
erase_node)
r`   
used_sizesunused_sizesit
sizes_nodegetitem_nodeused_sizes_idxusednext_size_idxunuseds
             rB   remove_unused_sizesz,AutogradCompilerInstance.remove_unused_sizes  sh   
 $..&&,,-R"X
')))&,,113 	2L&&(*:*::::!!!!,/ ##L1	2 $'5 	6Ddii///99Q<:---diilC000/Mtyy|,1}5DI	6 # 	4FNN  ++F3	4 rA   r   c                 l    t        | j                  j                  | j                  j                  |      S rE   )r%   r   r   rb   )r`   r   s     rB   create_graph_modulez,AutogradCompilerInstance.create_graph_module  s%    4>>..0D0DbIIrA   c                 :     j                   j                  dt        j                  di         j                  j                           j                   j                  dd j                   j                   j                  |            fi        g t               r% j                   j                   j                         j                   j                  j                  D ]%  }dD ]  }||j                  v s|j                  |=   ' t        dd  fd        j                           j!                           j#                           j%                           j'                           j)                           j+                           j-                           j.                  r/ j.                  j1                   j                   j                          j3                          j5                  d	 j6                         t9        d
g       t;        dddd      }t<        j?                  d|       t@        jC                  d|       t        dfd       dtD        dtF        f   d
tF        dtF        dtF        dtF        dtF        dtH        tF        tF        f   f fd}tK               jM                  dtO        jP                         d j6                  i jR                  d        jT                  jW                  d d d        | jY                        fS )Nrd   r@   rh   )tensor_metaexample_valuer  r  c                      dddS )N&compiled_autograd_graph_pre_reorderingstringr  r@   r@   rA   rB   r  z6AutogradCompilerInstance.end_capture.<locals>.<lambda>  s    @$! rA   c                      t         j                  j                   j                  j                  d j                   d      j                  d      S )NCompiledAutogradPreReorderingFprint_output)r%   r   r   rb   r   print_readabler   s   rB   r  z6AutogradCompilerInstance.end_capture.<locals>.<lambda>!  sG    {##$$"477)=9  n%n0	 rA   r   r  r|   zCompiled autograd graphT)include_deviceinclude_stridecoloredz%scompiled_autograd_graphc                  (     j                  d      S )NFr  )r  )rb   s   rB   r  z6AutogradCompilerInstance.end_capture.<locals>.<lambda>L  s    u333G rA   )r"  compiled_fn.r   r   r   packed_inputsr9   c           	         	 da j                  rj                  j                  |       g }t        |      D ]i  \  }}|v s|dkD  rI|j	                  t        j                  d|             t
        j                  j                  |d   d       Y|j	                  |       k D ](  }	||	   j                         j                  d      ||	<   * t               5  t        j                        5   | |||||      }
j                  rj                  j                  |
       |
cd d d        cd d d        da S # 1 sw Y   nxY wd d d        da y # 1 sw Y   da y xY w# da w xY w)NTr   r  rj   )non_blockingF)in_compiled_autograd_regionr   r   r   rt   r;   r  r   maybe_mark_dynamic
pin_memoryr  _disabler   r   r   )r  r|   r   r   r   r  filtered_sizesr   integerr   r   runtime_inputs_to_mover`   r  s              rB   runtime_wrapperz=AutogradCompilerInstance.end_capture.<locals>.runtime_wrapperO  sf   4.2+##$$55f=!#$-e$4 ;LCn,"Q;*11%++a2IJ!MM<<^B=OQRS*11':; 0 OA &q	 4 4 6 ; ; ; NF1IO Z !5dgg!> %C ''((..s3   /4+   /4+ /4+e+sO   A E! BE! E25D<'	E0	E! <E	EE! EE! E! !E%r7   r   r   )-r   r   r   _exec_final_callbacks_stubr   r   rT  
create_argrQ  rJ   r  rb   rm   r  r    delay_unpack_hook_nodesreorder_tensor_pre_hook_nodes'reorder_pre_hook_nodes_to_schedule_asapreorder_accumulate_grad_nodes%reorder_pre_hook_nodes_to_mimic_eager reorder_post_acc_grad_hook_nodesreorder_post_hook_nodesr  r   r{   r  r  r   r   r   compiled_autograd_loginfor  r  r   r   r   r   log_event_endr   r   r   r   __exit__r   )	r`   rb  ry   fieldlazy_graph_coder%  rb   r$  r  s	   `     @@@rB   end_capturez$AutogradCompilerInstance.end_capture  s   ##&AA		
 	

""^^&&t}}W'=>@		
 -/%'%)%B%B4>>CWCW%X"
 NN((.. 	)D@ )DII%		%()	)
 	1	
 	$$&**,446**,224--/$$& 	
,,T^^-A-AB 113((+;DGG9)EFEH:.0%
 	""49$0%G	

$	4!#s(+$	4$	4 $	4 	$	4
 $	4 $	4 38_$	4L 	"#11LLN!"& 	2 	
 	%%dD$7 0 0 777rA   c                 z    | D cg c]*  }t        |      t        j                  j                  u s)|, }}|S c c}w rE   )r   r;   r   rR  )ro   rE  rm   s      rB   get_all_nodesz&AutogradCompilerInstance.get_all_nodes  s4     !=qDGuxx}}$<== >s   *88c                     | j                   dk(  sH| j                   dk(  r:| j                  t        j                  k(  r| j                  d   j                   dk(  ryy)Nr   rd   r   TF)rf   rg   rq   rr   ro   )ry   s    rB   is_placeholderz'AutogradCompilerInstance.is_placeholder  sF    77m#GG&x///		!=0rA   c                    | j                   j                  j                  dt              D ]  }|j                  d   |j                  d   }}d}|j
                  t        j                  k(  r|}|j                  d   }t        ||g      }||j                  usm| j                  |      r|j                  |       ||j                  |        y)a  
        Usage of AOTAutograd causes all the accumulate_grad_ nodes to get pushed to the end of
        the graph.  This differs from eager mode, which schedules them as soon as possible. This
        pass attempts to reorder the graph to mimic eager behavior.
        rd   re   r   rj   N)r   rb   rn   r   ro   rg   rq   rr   maxprevr9  rt   )r`   ry   rz   	grad_noder  args         rB   r+  z6AutogradCompilerInstance.reorder_accumulate_grad_nodes  s     NN((33'; 4 
 	-D %)IIaL$))A,	JL8#3#33((--a0	z9-.C$))#D,?,?,D

4 +JJ|,	-rA   c                     | j                   j                  j                  dt              D ]H  }|j                  j                  dd      dk7  r#t        |j                        }|j                  |       J y)zp
        We can delay unpack hooks until they are needed, even later than in the eager autograd engine.
        rd   re   r  Nr  )	r   rb   rn   r   r   r   minr  prepend)r`   ry   
first_users      rB   r(  z0AutogradCompilerInstance.delay_unpack_hook_nodes  si     NN((33y 4 
 	%D {{{D1]BTZZJt$	%rA   c                 f   | j                   j                  j                  dt              D ]  }|j                  j                  dd      dk7  r#|j                  d   }|j                  d   }||j                  usP| j                  |      rb|j                  |       |j                  |        y)a  
        Usage of AOTAutograd causes all the tensor_pre_hook nodes to get pushed
        to the end of the graph. This differs from eager mode, which schedules
        them as soon as possible. This pass attempts to reorder the graph to
        mimic eager behavior.
        rd   re   r  Nr  r   rj   )
r   rb   rn   r   r   r   ro   r<  r9  rt   )r`   ry   r  
input_nodes       rB   r)  z6AutogradCompilerInstance.reorder_tensor_pre_hook_nodes  s     NN((33y 4 
 	*D {{{D15FF99Q<L1J*43F3Fz3R!!,/##D)	*rA   c                    | j                   j                  j                  dt              D ]Y  }|j                  j                  dd      dk7  r$|j                  d   }| j                  |j                  d         }g }g }|g}|D ]p  }|j                  dk(  s|j                  t        j                  k(  s1|j                  |j                  d          |j                  |       |j                  |       r t        ||      D ]'  \  }}	|j                  |       |j                  |	       ) t        |      }
|
|j                   us| j#                  |
      r1|
j                  |       |D ]  }|j                  |        \ y)a  
        In this function, we schedule the pre hooks as soon as possible. This
        does not match eager behavior (schedule pre hook right before its
        registered node), but it can make acc grad be scheduled properly when
        the pre hooks are registered to them. After reordering acc grad node, we
        will reorder the pre hooks again to mimic eager behavior.
        rd   re   r  Nr  r   rj   )r   rb   rn   r   r   r   ro   r7  rf   rg   rq   rr   rt   zipremover;  r<  r9  )r`   ry   r  input_nodes	to_remove	to_append
hook_blockrE  abr>  s              rB   r*  z@AutogradCompilerInstance.reorder_pre_hook_nodes_to_schedule_asap  sh    NN((33y 4 
 	+D {{{D1Z?99Q<L,,TYYq\:KIIJ  )44?*qxx8;K;K/K$$QVVAY/$$Q'%%a(	)
 Iy1 &1""1%""1%& k"C$))#D,?,?,D

<(# +A ''*+3	+rA   c                    g }| j                   j                  j                  dt              D ]3  }|j                  j                  dd      dk7  r#|j                  |       5 t        |      D ]  }|j                  d   }t        |j                  j                               }t        |      dk(  rDt        d |D              sJ t        t        |d   j                  j                                     }||j                  us|j!                  |       |j!                  |       |D ]  }|j!                  |         y)a%  
        Usage of AOTAutograd causes all the pre_hook nodes to get pushed to the
        end of the graph. This differs from eager mode, which schedules them
        right before their registered node execution. This pass attempts to
        reorder the graph to mimic eager behavior.
        rd   re   r  Nr  r   c              3   x   K   | ]2  }|j                   d k(  xr |j                  t        j                  k(   4 yw)rd   N)rf   rg   rq   rr   r  s     rB   r   zQAutogradCompilerInstance.reorder_pre_hook_nodes_to_mimic_eager.<locals>.<genexpr>   s8       ?*Nt{{h>N>N/NNs   8:)r   rb   rn   r   r   r   rt   reversedro   rz  r  r  r  r  rk   rl   rA  )r`   	pre_hooksry   hook_getitem_noder  registered_noderr   s          rB   r,  z>AutogradCompilerInstance.reorder_pre_hook_nodes_to_mimic_eager  s1    	NN((33y 4 
 	#D {{{D1Z?T"	# Y' 	5D $		!*+E5zQ  !    #4a(;(;(=#>?Odii/''(9:''-$ 5G#++G45#	5rA   c                    g }| j                   j                  j                  dt              D ]3  }|j                  j                  dd      dk7  r#|j                  |       5 t        |      D ]  }|j                  d   }|j                  d   }d}t        |j                  j                               D ])  }|j                  dk(  s|j                  t        k(  s'|} n |J d       |j                  |       |j                  |        y)	a  
        Usage of AOTAutograd causes all the post_acc_grad_hook nodes to get
        pushed to the end of the graph. This differs from eager mode, which
        schedules them as soon as possible. This pass attempts to reorder the
        graph to mimic eager behavior.
        rd   re   r  Nr  r   rj   z8post_acc_grad_hook must have corresponding acc grad node)r   rb   rn   r   r   r   rt   rP  ro   rz  r  r  rf   rg   r   )r`   post_acc_grad_hooksry   r  rz   acc_grad_noderE  s          rB   r-  z9AutogradCompilerInstance.reorder_post_acc_grad_hook_nodes  s    !NN((33y 4 
 	-D {{{D15II&&t,	- 01 	&D99Q<L1J !M***//12 44?*qxx;O/O$%M
 !, J,
   .%#	&rA   c           	      R   g }| j                   j                  j                  dt              D ]3  j                  j                  dd      dk7  r#|j                         5 t        |      D ]  j                  d   }j                  d   }j                  d   }t        |      dkD  r@g }|j                  t        |             |D ]=  }|j                  fd	t        |j                  j                               D               ? t        |      }|j                  dk(  r|j                   t"        k(  r|j                  d   }d}	t        |j                  j                               D ]H  }
|
j                  dk(  s|
j                   t        k(  s'|
j                  j                  dd      d
k(  sG|
}	J |	$|	j                  |       |j                         p|j$                  us| j'                  |      r|j                  |       |j                          y)a  
        Usage of AOTAutograd causes all the post_hook nodes to get pushed to the
        end of the graph. This differs from eager mode, which schedules them as
        soon as possible. This pass attempts to reorder the graph to mimic eager
        behavior.
        rd   re   r  Nr  r   rj      c              3      K   | ]G  }|j                   d k(  r2|j                  t        k(  rj                  j	                  dd      dk(  s| I yw)rd   r  Nr  )rf   rg   r   r   r   )r   r  ry   s     rB   r   zCAutogradCompilerInstance.reorder_post_hook_nodes.<locals>.<genexpr>J  sH      -?2 KK94 KKOOK>+M -s   AAr  )r   rb   rn   r   r   r   rt   rP  ro   r  extendrz  r  r  r;  rf   rg   r   r<  r9  )r`   
post_hooksr  rx   rH  input_nodes_and_usersrD  r>  rz   post_acc_grad_hook_noderE  ry   s              @rB   r.  z0AutogradCompilerInstance.reorder_post_hook_nodes0  s    
NN((33y 4 
 	$D {{{D1[@d#	$ Z( (	*D99Q<L99Q<L))A,K< 1$$&!!((k):;) 	
%,, - $Z%5%5%:%:%< =- 	 +,Cvv(SZZ;O-O XXa[
*.'j..3356 4A/HH	1HHLLd;?SS23/4 +6+22<@ ''-$))#D,?,?,D

<(##D)Q(	*rA   tc                 0    |y t        |t              r|D cg c]  } j                  |       c}S t        |t              rt         fd|D              S t        |t        j
                  t        j                  f      r j                  |j                     S t        |t        j                        s|S t         j                  |      }t        |t        j                  j                  j                  j                        sJ |j                   S c c}w )Nc              3   @   K   | ]  }j                  |        y wrE   r  )r   rK   r`   s     rB   r   z4AutogradCompilerInstance.to_proxy.<locals>.<genexpr>o  s     5aq)5s   )rP   rz  rQ  r   r;   SymIntSymFloatr   ry   r   r*   r   r   r  proxy_tensor_ProxyTensorr  )r`   r^  rK   rc  s   `   rB   rQ  z!AutogradCompilerInstance.to_proxyi  s    9a./0DMM!$00a51555a%,,78,,QVV44!U\\*H)$..!<,(=(=(J(J(W(WXXX!!! 1s   Dobjectsr  c                    t        |t        j                  j                        r|rft	        |      t	        |      k(  sJ g }t        t	        |            D ]1  }||   \  }}| j                  ||d        |j                  ||          3 |}n$t        t	        |            D cg c]  }||   	 }}t	        |      t	        |      k(  sJ t        ||d | j                         |S c c}w N)constanttracer)
rP   r;   r   r6   r  r  set_node_originrt   r-   r   )r`   re  r  r   bound_proxiesr   nodecall_indexr=  s           rB   r   z0AutogradCompilerInstance.bind_objects_to_proxiesy  s     guxx~~.7|s7|333 "s7|, 5A07
-NI((NDI!((45 (/4S\/BC!71:CC7|s7|+++'7T$..Q	 Ds   #C%indexc                     | j                   J | j                   |   }t               }t        ||d | j                         |S rg  )r   r&   r-   r   )r`   rm  r  bw_states       rB   bind_backward_statez,AutogradCompilerInstance.bind_backward_state  sB    +++  ' ?(EDPrA   r=  rl  pyobjc                    d}|;|j                   }t        |d      r#|j                  t        d      |j                  }| | d| d}t        j                         j                         d   }|j                  d|      }t        |       y )N rv  zThis compiled backward function was saved by AOTAutogradCache, which does not support
                    compiled autograd. Please turn off AOTAutogradCache using `TORCHINDUCTOR_AUTOGRAD_CACHE=0`.z (NodeCall )r  z:raw_stack_trace = CapturedTraceback.extract().format()[-1])
ru  r   rQ   r   rv  r5   extractformatreplacer1   )	r`   r=  rl  rq  maybe_aot_idforward_clsnew_coderaw_stack_tracenew_stack_traces	            rB   rj  z(AutogradCompilerInstance.set_node_origin  s     ,,K{I.22:&s   +22[k.9IK+335<<>rB)11H(
 	(rA   r   rE   )Kr   r   r   r   r   ra   r;   r   r	   r   r$   r   staticmethodr   r   r   rz  rs   r   r  r   rp   r3   r2   r  r#  r   r  autogradfunctionBackwardCFunctionr  r  r  rp  r  r  r  r  r  r[   r   r6   r  r  r  r  r  r  r  r   r  r  r  r  r  r%   r  r5  rR  r7  r9  r+  r(  r)  r*  r,  r-  r.  rQ  r   r&   rp  Functionrj  r@   rA   rB   r   r     sR   1HS#X$6 14 1C5<< C&1A Cj C 5S 5s 5} 5 5w
U\\"w
 Cyw
 eCJ'(	w

 d5c?+,w
 w
 w
 
sD&[(94;NN	Ow
r
c
 

F#F !.F  -	F
 F F #+3-F 
#FP11 #3-1  -	1
 1 ^^$$661 #+3-1 
x%s*	+1fGG SMG sm	G
 !G SMG smG !G 
%,,	G*
"4.
 
 c]	

 LL
 
%,,	
/ /
FF S#XF !	F
 F 
F	D	D }	D 		D
 "#	D 
%,,		D38$,/BJ3-	%,,	'}47JRSV-	%,,	#   

,1LL
JN
	

S#X&
/2
>A
	
3   5<<(36;>	ell	5<<(36;>	ell	x} s tELL?Q ELL)3;ELL3ITW	ell	 \\,/	ell	*/ehhnn /c /b
 
 
B, SX  DJc Jk J|83 |85#s(1CS1H+I |8| HSM d588==.A  
 UXX]] t  -*%*("+H!5F"&H7*r"# "# "( 48	#  $uS#X/0	
 
#,  )) ) //0	)
 
)rA   r   Fr   dynamicignore_active_disable_ctx)NNNc              #     K   |st         rd  y |rt        |      t        u sJ ddlm} |j
                  j                  dk(  r
da	 d  day t        j                  j                         rddlm} t        j                  j                  j                  j!                  t#        j$                  t&        |       |      \  }}t)               r7t        j                  j                  j                  j+                  t,               dat0        }t0        dz  a	 t        j2                  j5                  d      5  d  d d d        |sdat        j                  j                  j                  j!                  ||       t0        dz  at0        |k(  sJ d       y # daw xY w# 1 sw Y   bxY w# |sdat        j                  j                  j                  j!                  ||       t0        dz  at0        |k(  sJ d       w xY ww)	Nr   )
eval_frameforce_eagerTF)cudagraph_treesrj   zINested Compiled Autograd Contexts must return before their parent context)active_disable_ctxr   rp   torch._dynamor  _stancestance%compiled_autograd_enabled_force_eagerr;   r  is_availabletorch._inductorr  r  r   r7   set_autograd_compiler	functoolspartialr   rC   set_verbose_loggerr  compiled_autograd_enableddepthr~  set_multithreading_enabled)r   r  r  r  r  prior_compilerprior_dynamicprior_depths           rB   _enabler    s    4 %);=D(((,$$5 591>8=5 zz&&(;
   22HH!!":KH' 01  22EEkR(,%KQJE^^>>uE  &05-  22HH"M 
+ _+= 9>5*  &05-  22HH"M 
+ _+sQ   AG;F CG;F% 3F8F%  AG;FG;F"F% %AG88G;c               #     K   t         j                  j                  j                  j	                  d d      \  } }dat        sda	 d  | rdadat         j                  j                  j                  j	                  | |       y # | rdadat         j                  j                  j                  j	                  | |       w xY ww)NFT)r;   r  r   r7   r  r  r  )r  r  s     rB   r!  r!  
  s     
 	**@@uM !&!
(,%"**@@M	
 (,%"**@@M	
s   ACB 	;C<C  Cc                  r   da t        rJ t        j                  j                  j
                  j                  d d       t        j                  j                  j
                  j                  d        t        j                  j                  j
                  j                          t        j                         ay )NF)r  r  r;   r  r   r7   r  r  clear_cache	itertoolsr   r   r@   rA   rB   resetr  !  sw     %***	HH&&<<T5I	HH&&99$?	HH&&224oo'OrA   r|   r  r  r  r  r  r  c                     | d   }|j                  ||      }|J |j                  |       ||z
  }	|j                  |||	      }
||
|
j                  t        j
                        gS )Nr   )memory_format)new_empty_stridedcopy_
as_stridedcloner;   contiguous_format)r|   r  r  r  r  r  r  r   r   offsetr  s              rB   r  r  .  sw     !9D##J=F
LL #66F"":|VDJJ
 0 0u?V?V 0 WXXrA   r  r   r  r  c                     d gt        |       z  }t        t        |             D ];  }| |   s	||   |dk(  r ||   }|J |j                  |       |||<   4||   ||<   = |S )Nr   )r  r  r  )r  r   r  r  grad_inputsr   to_copys          rB   r  r  B  s     26=M9N0NK3'() 
(A1v~Ava&***  )!'A!$QA
( rA   )TTr   )u__doc__r   r  r  rq   r   collectionsr   r   collections.abcr   r   typingr   r   r	   r
   r   r;   torch.utils._pytreeutils_pytreern  torch._dispatch.pythonr   torch._dynamo.external_utilsr   r   r   r   r   torch._dynamo.sourcer   r   torch._dynamo.utilsr   r   r   r   /torch._functorch._aot_autograd.runtime_wrappersr   r   torch._guardsr   r   r   r   torch._loggingr   r    torch._prims_commonr"   torch._subclassesr#   torch._subclasses.fake_tensorr$   torch.fxr%   %torch.fx.experimental._backward_stater&   "torch.fx.experimental.proxy_tensorr'   r(   r)   r*   r+   r,   r-   %torch.fx.experimental.symbolic_shapesr.   r/   torch.fx.tracebackr0   r1   torch.typesr2   r3   torch.utils._ordered_setr4   torch.utils._tracebackr5   torch.fx.proxyr6   r   r   r/  r  rp   rC   rJ   r   rM   rX   rZ   r   r   rU  r   r&  r  r   r   rs   r   r   r  r  r  r  r  contextmanagerr  r!  r  rz  r  r  r@   rA   rB   <module>r     s         , / @ @  $ $ ;  <  M L > 6 , 4   ?   G B 2 / 4 $X *(4GH *EF$ 4D 48ELL) hu||.D 
 
c0B 
6G G^ #  #F5 5  m M "==	 ")//#	s 	s 	O) O)f$ "  ). % $  	  &*I#s(#II  $I  	I IX 
),- 
 
,(YU\\"Y%Y ;'Y %	Y
 %Y ;'Y %Y 
%,,Y(tnLL 
(5<<(	) 	
 
(5<<
 !rA   