
    i                   >   U d dl mZ d dlZd dlZd dlZd dlZd dlZd dlZd dlZd dl	Z	d dl
Z
d dlmZmZmZmZmZ d dlmZmZ d dlmZ d dlmZ d dlmZmZmZmZmZmZmZmZmZm Z m!Z!m"Z" d dl#m$Z$m%Z%m&Z&m'Z'm(Z(m)Z)m*Z* d d	l+m,Z, d dl-Z-d d
l-m.Z.m/Z/m0Z0 d dl1m2c m3c m4Z5 d dl6m7c m8Z9 d dl:Z;d dl<Z;d dl=m8c m>Z? d dl@mAZA d dlBmCZC d dlDmEZE d dlFmGZG d dlHmIZI d dlJmKZKmLZLmMZMmNZNmOZO d dlPmQZQ d dlRmSZSmTZTmUZUmVZVmWZWmXZXmYZYmZZZ d dl[m\Z\ d dl]m^Z^ d dl_m`Z`maZambZb d dlcmdZd ddlemfZfmgZg ddlhmiZimjZjmkZkmlZlmmZm ddlgmnZnmoZompZpmqZqmrZr ddlsmtZt ddlumvZvmwZwmxZxmyZy ddlzm{Z{ ddl|m}Z}m~Z~ ddl8mZmZmZmZmZmZmZmZmZmZmZmZmZmZmZmZmZmZmZmZmZ dd lmZmZmZ e r+d d!lmZ d d"lRmZ d d#l[mZ dd$lmZ dd%lmZ dd&lmZ dd'l8mZ neZd(ed)<   	 d dlZejL                  Zd*Z e'd,      Z e!d-      Z e!d.      Z e!d/      Ze"ee.f   Zd(ed0<   e"eee.f   Zd(ed1<   e"e;jd                  jf                  e;jd                  jh                  f   Zd(ed2<    ejl                  e      Z ej*                  e	jr                  d34      Ze;j*                  jt                  Z e ejv                  d5d6            Z e ejv                  d7d8            Z	 e"ed9eed9f   d:d;eee"eeed9f   d9d:d;f         f   Zd(ed<<   d*d=Z ej                  d*>       G d? d@             ZÐd+dAZĐd,dBZŐd-dCZƐd-dDZ	 	 	 	 	 	 d.dEZg dFZg dGZ	 d/	 	 	 	 	 d0dHZːd1dIZ	 d/	 	 	 	 	 d0dJZed2d3dK       Zed2d4dL       Z	 d2	 	 	 	 	 d5dMZ	 	 	 	 d6dNZ	 	 	 	 d7dOZАd8dPZѐd8dQZ	 	 	 	 	 	 d9dRZ	 	 	 	 	 	 	 	 d:dSZ	 	 	 	 	 	 d;dTZՐd<dUZ֐d=dVZ G dW d;      Z ed+>       G dX dY             Ze G dZ d[eث             Zڐd>d\Ze G d] d^eګ             Ze G d_ d`eܫ             Z eda       edb       edc       edd       ede       edf      dgZdhedi<   	 d2	 	 	 	 	 	 	 d?djZe G dk dleګ             Zd e/d       f	 	 	 	 	 	 	 d@dmZeee.   ee.   gef   Zd(edn<    G do dpe      Z G dq dre      Z G ds dte      Ze G du dveګ             Ze G dw dxe             Ze G dy dzeګ             ZdAd{ZdAd|Z	 	 	 	 	 dB	 	 	 	 	 	 	 	 	 	 	 	 	 dCd}Z	 	 	 	 	 	 dDd~ZdEdZe G d deث             Ze G d de             Ze G d de             Ze G d de             Ze G d de             Ze G d de             Ze G d de             Ze G d de             Z G d de      Ze G d deث             Ze G d de             Ze G d de             Z	 	 	 	 	 	 dFdZdGdZ G d d      Ze G d de             Z G d de      Z G d de      Z G d de      Z  G d de      Z G d de      Ze G d de             Z G d de      Z ed+>       G d deej             Z ed+>       G d dee٫             Z G d de      Z G d de      Z G d de      Z	e G d deث             Z
e G d deث             Z ed+>       G d de             Z G d de      Z G d de      Ze"eeeeee"eeeef      f   Z G d d      Z G d dÐe      Z G dĄ dŐe      Z G dƄ dǐe      Z G dȄ dɐe      Z G dʄ dːe      Z	 	 	 	 dHd̄Z ed+>       G d̈́ dΐe             Z G dτ dАe      Z G dф dҐe      Z ed+>       G dӄ dԐe             Z ed+>       G dՄ d֐e             Z G dׄ dؐe      Z G dل dڐe      Z G dۄ dܐe      Z  G d݄ dސe      Z! G d߄ de!      Z" G d de!      Z# G d de      Z$ G d de      Z% G d de      Z& G d de      Z' G d de      Z( G d de(      Z) G d de      Z* G d de      Z+ G d de      Z, G d de      Z- G d de      Z. G d de      Z/ G d de      Z0 ed+>       G d d             Z1 G d d e      Z2 ed+>       G d de2             Z3 G d de2      Z4e G d de             Z5 G d de      Z6ej                   G d	 d
eث             Z7 G d d9e7      Z8 G d de7      Z9 ed+>       G d deث             Z:dIdZ; ed+>       G d de             Z< ed+>       G d de             Z=	 	 	 	 dJdZ> ed+>       G d de             Z? G d de2      Z@ G d deث      ZAe G d deA             ZBe G d deA             ZC G d  d!e2      ZD G d" d#eD      ZE G d$ d%eD      ZF G d& d'eD      ZGdKd(ZHdKd)ZIy# e$ r dZd+ZY w xY w(L      )annotationsN)	Container	GeneratorIterableIteratorSequence)AbstractContextManagernullcontext)Enum)partial)AnyCallablecastClassVarLiteralOptionaloverloadSupportsFloatSupportsIntTYPE_CHECKINGTypeVarUnion)assert_neverNeveroverride	ParamSpecSelf	TypeAliasTypeIs)patch)ExprIntegerSymbol)identity)GraphModuleSerializer)can_auto_functionalize)metricsget_free_symbols)compute_required_storage_lengthis_boolean_dtypeis_float_dtypemake_channels_last_strides_for
StrideType)get_schema_info)&_remove_effect_token_unbacked_bindingscompute_unbacked_bindingsfree_symbolsfree_unbacked_symbolsrebind_unbackedresolve_unbacked_bindingsShapeEnvSymTypes)Node
OrderedSet)CleanDivFloorDivModularIndexing)SymT   )configdependencies)BackendFeatureCodegenSymbolget_scheduling_for_deviceindex_prevent_reorderingKernel)Depextract_free_symbols#extract_input_node_reduction_rangesextract_read_writesvar_builder)LoopBody)OpCounterCSEOpCountResultReductionType	StoreMode)benchmarker)DevicePropertiesReductionHint)argsortargsort_symcache_on_selfceildivconvert_shape_to_inductorconvert_shape_to_symintdeveloper_warningdo_bench_using_profilingdtype_from_sizeget_dtype_sizeget_kernel_metadataGPU_ALIGN_BYTESir_dataclass
is_dynamicis_gpu	sympy_dotsympy_index_symbolsympy_index_symbol_with_prefixsympy_product
sympy_substensor_is_aligned)opsOpsValueV)FakeScriptObject)SympyBoolean)Argument)CUDATemplate)PythonWrapperCodegen)GraphLowering)IndentedBufferr   ro   TF_P_T_U_V_IntLike_NumLike_OpOverloadsz  prefixTORCH_AUTOTUNE_WARMUP   TORCH_AUTOTUNE_REPd   	TensorBoxr#   IRNode_NodeOrNodesc                .    t        | t        t        f      S N)
isinstanceintr"   xs    L/var/www/html/engine/venv/lib/python3.12/site-packages/torch/_inductor/ir.py
_is_staticr      s    a#w((    )frozenc                  J    e Zd ZU ded<   ded<   ded<   ded<   d	ed
<   ded<   y)GraphPartitionSignatureOrderedSet[sympy.Symbol]symbol_inputsz5dict[str, Union[IRNode, sympy.Expr, TorchBindObject]]input_nodeslist[IRNode]output_nodeszdict[str, bool]input_deallocationboolskip_cudagraph	list[str]constant_namesN__name__
__module____qualname____annotations__ r   r   r   r      s/     ,+ GF (' r   r   c                "    dfd |        y )Nc                   | y t        | t        t        f      r| D ]
  } |        y t        | t              r| j	                         D ]
  } |        y t        | t
        t        t        t        t        j                  j                  j                  t        t        t        t         f	      sJ dt#        |        d       y )NzFound zE, which is not a supported top level IR node. See [Note: Inductor IR])r   listtupledictvalues
ExpandViewDynamicScalarAssertScalarr   sympylogicboolalgBooleanr!   r   EffectfulKernelShapeAsConstantBuffertype)nodesnode_check_tensorboxs     r   r   z%validate_ir.<locals>._check_tensorbox   s     =e}- ' &'t$ ' &' ! KK''//#)
  e%jk r   )r   Optional[_NodeOrNodes]returnNoner   )node_or_nodesr   s    @r   validate_irr      s    < ]#r   c                T     t         t              sJ t                      d fd}|S )Nc                 0     t        t              | i |S r   )getattrri   )argskwargsnames     r   fnzops_wrapper.<locals>.fn  s    !wsD!42622r   )r   objectr   r   r   rj   )r   strr   )r   r   s   ` r   ops_wrapperr     s(    dC ,$t*, 3 Ir   c           
     b    t        t        | t        t        |                         dfd}|S )Nc                    t        |       t              k(  sJ t        t        |             D cg c]
  }| |       c}S c c}w r   lenrange)indexi	inv_orders     r   reindexz inverse_reorder.<locals>.reindex  s?    5zS^+++-23u:->?il#???   Ar   Sequence[_T]r   r   )r   zipr   r   )orderr   r   s     @r   inverse_reorderr     s*    Sc%j 123I@ Nr   c                     d fd}|S )Nc                    t        |       t              k(  sJ t        t        |             D cg c]
  }| |       c}S c c}w r   r   )r   r   r   s     r   r   zsame_reorder.<locals>.reindex&  s>    5zSZ''').s5z):;AeAh;;;r   r   r   )r   r   s   ` r   same_reorderr   %  s    < Nr   c                     d fd}|S )Nc                       |             S r   r   )r   reindex1reindex2s    r   r   z fuse_reindexing.<locals>.reindex1  s    ((r   )r   r   r   zSequence[_V]r   )r   r   r   s   `` r   fuse_reindexingr   -  s    ) Nr   )   r      r?   )   r   r   r   r?   c                `    |t        d | D              rt        |       }|S t        ||       }|S )z1
    Convert strides to fill order (argsort)
    c              3  \   K   | ]$  }t        |t        t        j                  f       & y wr   )r   r   r   r"   .0ss     r   	<genexpr>z!get_fill_order.<locals>.<genexpr>A  s      Q
1sEMM.B CQ   *,)allrT   rU   )seq	shape_env
sorted_idxs      r   get_fill_orderr   ;  s<     CQSQQ$+CL
  !C0
r   c                    t        |       D ci c]  \  }}||
 }}}t        t        |             D cg c]  }||   	 }}|S c c}}w c c}w )z
    Convert stride order to fill order
    For channel last format,

    stride order = [3, 0, 2, 1] and fill order = [1, 3, 2, 0]
    )	enumerater   r   )r   idxposlookupr   
fill_orders         r   stride_order2fill_orderr   I  sR     (1'7883c3h8F8%*3u:%67&)7J7 97s
   AAc                    t        | |      }t        t        |             D cg c]  }d }}t        |      D ]
  \  }}|||<    |S c c}w )z)
    Convert strides to stride order
    r   )r   r   r   r   )r   r   r   _outr   elems          r   get_stride_orderr   U  sV     !/sI >JCHo
&1
&C
&Z( 4D	J 's   	Ac                     y r   r   r   guard_shapes     r   ir_node_to_tensorr   b  s    KNr   c                     y r   r   r   s     r   r   r   f  s    LOr   c                   | y |s%t         j                  j                  j                  }nt        }| j                         D cg c]
  } ||       }}t        |       r.| j                         j                  D cg c]
  } ||       }}nt        j                  |      }| j                         }| j                         }t        |      }t        |      }t         j                  j                  j                  j                         5  t!        j"                  ||||      j%                         }d d d        |S c c}w c c}w # 1 sw Y   S xY w)N)sizestridedtypedevice)rk   graphsizevars	size_hintr$   get_sizeis_storage_and_layout
get_layoutr   FlexibleLayoutcontiguous_strides	get_dtype
get_devicerY   r   suppress_guardstorchempty_stridedzero_)	r   r   shape_fnr   r   r   r   r   ts	            r   r   r   j  s    	y 77##--!".AHQK.D.Q'(||~'<'<=!(1+==2248KKME\\^F"4(D$V,F	
			#	#	3	3	5 fE&

%' 	
 H / > Hs   D<;E
(EEc                0    t        | t              r| sd gS | S r   )r   r   values    r   may_convert_to_optionalr    s     %u vLr   c                    t        | t              s| | S t        | t        j                        r| j                  S t        | t
        t        f      rt        | j                               S t        d|  dt	        |       j                   d       y )Nzget_device_type(: ))r   r   r  r   r   r   
OutputSpecget_device_typer  r   r   r   s    r   r  r    sq     !SQY	Au||	$vv	A
+	,q||~..#A3ba)9)9(:!<=r   c                    t        |       }|dv rt        t        | d      dk(  ryy|t        |      x}yddlm} t        |t              sJ t        |             t        ||      S )N)cpucuda_backendtritonTFr?   )TritonScheduling)	r  r   r@   rD   codegen.tritonr  r   r   
issubclass)r   r   device_schedulingr  s       r   	is_tritonr    sy    QF  6fXX./8;!:6!BBK0'.G5F0GG.')9::r   c                    t        |       dk(  S )Nr  )r  r   s    r   is_cpur!    s    1&&r   c           	          t         t              rB j                         2t         j	                               st         j                               ryt         fdt        t         j	                               dz
        D              }t        j                  j                  j                   j	                         d         dk(  xs= t        j                  j                  j                   j                         d         dk  }|xr |S )NFc              3     K   | ]D  }t         j                  j                  j                  j	                         |         z  d k(   F yw)r   N)rk   r   r   size_hint_or_throw
get_stride)r   r   	alignmentr   s     r   r   z2is_aligned_realized_tensor_hint.<locals>.<genexpr>  sD       
			,	,Q\\^A->	?)	KPQQs   A
Ar?   )r   r   maybe_get_strider3   r%  r   r   r   r   rk   r   r   r$  )r   r&  aligned_stridesaligned_last_dims   ``  r   is_aligned_realized_tensor_hintr+    s    
 q&!' 0 . s1<<>*Q./ O 	
++ALLN2,>?1D 	F77..qzz|B/?@AE  //r   c                   t        |      t        |       k(  rt        |       t        |      k(  sJ t        || |      D ]  \  }}}t        j                  j                  j                  |d      r2t        j                  j                  j                  ||      r]t        j                  j                  j                  |      t        j                  j                  j                  |      k(  r y y)zP
    Returns true if the strides are equal, ignoring dimensions of size 1 .
    r?   FT)r   r   rk   r   r   statically_known_leqstatically_known_equalssymbolic_hint)strides1strides2shapedims1s2s         r   significant_strides_equalr6    s     u:X&3x=CM+III5(H5 	R7700a8ww77
''""0048H8H8V8V9
 
 	 r   c                Z   t        |       s| S t        d t        || j                               D              r| S t	        || j                         | j                               s| S t        |       \  }}g |j                  }t        | j                               D ]8  \  }}t        j                  j                  j                  |d      s1||   ||<   : t        |j                  |j                  |j                   ||j"                  |j$                        }t'        t)        ||            S )a  
    Tries to match the strides of the tensor to those in the meta_strides. Strides of insignificant
    dimensions - size 0 or 1 - will be updated.

    If there are real stride differences (NHWC vs NCHW), or the tensor is not realized, then the input will be returned
    c              3  v   K   | ]1  \  }}t         j                  j                  j                  ||       3 y wr   rk   r   r   r.  r   r4  r5  s      r   r   z2try_match_insignificant_strides.<locals>.<genexpr>  s3      B 	
00R8   79r?   datalayout)r  r   r   r%  r6  r   as_storage_and_layoutr   r   rk   r   r   r-  FixedLayoutr   r   r   offset	is_pinnedr   ReinterpretView)tensorstridesstorage
old_layout
new_strider   r   
new_layouts           r   try_match_insignificant_stridesrJ    s    !(
 '6#4#4#67  $Wf.?.?.A6??CTU/7GZ%:$$%J&//+, '17700A6#AJJqM' J _'*EFFr   c                    | j                   j                  d      d   }t        |j                        D cg c]  \  }}|	 c}}|j                  d<   ddlm}  ||        y c c}}w )Noutput)opr   user_visible_output_idxs)record_original_output_strides)r   
find_nodesr   r   metatorch._inductor.compile_fxrO  )gmoutput_noder   r   rO  s        r   gm_original_output_stridesrU  
  sa    ((%%%215K#K$4$454Q4K/0 J"2&4s   A$c                    t               }| D ]>  }|t        |j                         d      z  }|t        |j                         d      z  }@ t	        |      S )NFunbacked_only)r:   r)   r   r%  r   )inputssym_varsinps      r   get_symbolic_inputsr\    sW    !+H L$S\\^5II$S^^%5UKKL >r   c                     e Zd ZU dZ e       Zded<    ej                  d      Z	ded<    ej                  d      Z
ded	<    ej                  d      Zd
ed<   eej                  dGd              ZedHd       ZdIdZdJdZdKdZdLdZdMdZdNdZdKdZdOdPdZ	 dQ	 	 	 	 	 	 	 dRdZdSdZdTdZdUdZdVdZdWdZdXdZdYdZ dZdZ!d[dZ"e#d\d        Z$d]d!Z%dYd"Z&d^d#Z'd_d`d%Z(dad&Z)dbd'Z*dYd(Z+dcd)Z,ddd*Z-ded+Z.d[d,Z/dfd-Z0d^d.Z1dYd/Z2d_dgd0Z3dhd1Z4dJd2Z5did3Z6dJd4Z7	 dj	 	 	 	 	 dkd5Z8dld6Z9dmd7Z:	 dj	 	 	 	 	 dnd8Z;dod9Z<dpd:Z=dqd;Z>drd<Z?	 dj	 	 	 dsd=Z@d^d>ZAdZd?ZBdYd@ZCdYdAZDdtdBZEdudCZFdfdDZGdudEZHeIr
e#dSdF       ZJy$y$)vr   zBase class for all intermediate representation (IR) nodes in TorchInductor.

    Note:
        This is an abstract base class. Most methods raise NotImplementedError
        and must be overridden by concrete subclasses.
    zClassVar[OrderedSet[Any]]_current_originsF)initOrderedSet[Any]originsOptional[list[str]]	tracebackOptional[torch.fx.Node]origin_nodec              #     K   t         j                  }|| z  t         _        	 d  |t         _        y # |t         _        w xY wwr   )r   r^  )ra  olds     r   current_originszIRNode.current_origins-  s7      %%"%-	*&)F#cF#s   A2 A?Ac                L    t        | t        t        t        t        t
        f      S r   )r   ComputedBufferInputsKernelInputBufferrC  TemplateBuffer)r   s    r   is_realized_nodezIRNode.is_realized_node7  s&    	
 		
r   c                2    t         j                  | ||       y r   )r   __setattr__)selfattrr  s      r   _post_init_setattrzIRNode._post_init_setattrD  s     	4u-r   c                    t        | j                        }| j                  d|       | j                  dt        j                  rt        j                         nd        | j                  dd        y )Nra  rc  re  )r:   r^  rs  r@   debug_ir_tracebackrc  format_stack)rq  ra  s     r   __post_init__zIRNode.__post_init__J  s[    T223	73V5N5N//1TX	
 	t4r   c                B    t        d | j                         D              S )Nc              3  4   K   | ]  }|j                     y wr   r   r   deps     r   r   z(IRNode.get_read_names.<locals>.<genexpr>S       ?s#((?   r:   	get_readsrq  s    r   get_read_nameszIRNode.get_read_namesR      ?dnn.>???r   c                    | j                   S r   )rc  r  s    r   get_tracebackzIRNode.get_tracebackU  s    ~~r   c                    | j                   S r   re  r  s    r   get_origin_nodezIRNode.get_origin_nodeX      r   c                     y r   r   r  s    r   get_defining_opzIRNode.get_defining_op[      r   c                t   t               }| j                  }t        | t              r(| j	                         }| j
                  rt        |g      }|D ]  }t        |d      r(|j                  r|j                  |j                         7t        j                  j                  j                  j                  di       j                  |j                  g       }t        |t              s|D ]J  }t        j                  j                  j                   j                  |d       }|s:|j                  |       L  |S )Nstack_trace	postToPre)r:   ra  r   ExternKernelr  re  hasattrr  addr  	_inductordebug _inductor_post_to_pre_grad_nodesgetr   r   #_inductor_pre_grad_node_stack_trace)rq  stack_tracesra  re  r   pre_grad_nodes	node_namer  s           r   get_stack_traceszIRNode.get_stack_traces^  s    )3,,dL)..0K$k]3 	6Dt]+0@0@  !1!12 OO))JJNN#Rc$))R( 
 ".$7!/ 6I--QQUU%t  
 #$((56	6( r   c                   dt        | dd       }|rt        |      dkD  r|d d  d}| j                         s|gS g }| j                         D ]8  }|j                  d       ||j	                  d      z  }|j                  d	       : |g|z   S )
Nzorigins=ra   @   =   z...zstack_traces = {
})r   r   r  appendsplit)rq  shortenra  stack_trace_strr  s        r   common_reprzIRNode.common_repr}  s    WT9b9:;s7|b( "c*G$$&9002 	(K""#56{0066O""3'	( y?**r   c                $   t        |      t        | j                  |            z   }t        t        t        |            }|r5t	        dj                  |            }t        |       j                   d| dS t        |       j                   d| dS )Nz,
z(
z
)(r  )r   r  mapr   indentjoinr   r   )rq  linesr  	multiline	new_liness        r   
str_helperzIRNode.str_helper  s     Ud4#3#3G#<==Se_%uzz%01I4j))*#i[<<4j))*!E7!44r   c                    | j                   S r   r   r  s    r   r  zIRNode.get_dtype      zzr   c                B    	 | j                         S # t        $ r Y y w xY wr   )r  NotImplementedErrorr  s    r   maybe_get_dtypezIRNode.maybe_get_dtype  s&    	>>##" 		    	c                2    t        dt        |        d      )Nz#get_layout() is not implemented by !r  r   r  s    r   r  zIRNode.get_layout  s    !$GT
|ST"UVVr   c                B    	 | j                         S # t        $ r Y y w xY wr   )r  r  r  s    r   maybe_get_layoutzIRNode.maybe_get_layout  &    	??$$" 		r  c                "    | j                         S r   )r  r  s    r   get_output_speczIRNode.get_output_spec  s      r   c                B    	 | j                         S # t        $ r Y y w xY wr   )r  r  r  s    r   maybe_get_output_speczIRNode.maybe_get_output_spec  s(    	''))" 		r  c                >    t        | j                         t              S )z4True for single tensor output (excludes MultiOutput))r   r  Layoutr  s    r   has_tensor_outputzIRNode.has_tensor_output  s    $446??r   c                2    t        dt        |        d      )Nz!get_size() is not implemented by r  r  r  s    r   r   zIRNode.get_size  s    !$Ed4j\QR"STTr   c                B    	 | j                         S # t        $ r Y y w xY wr   )r   r  r  s    r   maybe_get_sizezIRNode.maybe_get_size  %    	==?"" 		r  c                "    | j                         S r   r   r  s    r   r2  zIRNode.shape  s    }}r   c                4    t        | j                               S r   )rf   r   r  s    r   	get_numelzIRNode.get_numel  s    T]]_--r   c                    t         j                  j                  j                  t	        j
                  | j                         d            S Nr   rk   r   r   statically_known_truer   Eqr  r  s    r   is_zero_elementszIRNode.is_zero_elements  0    ww55ehht~~?OQR6STTr   c                0    t        dt        |              )a)  
        If the IRNode refers to data which has not been materialized (e.g.,
        it is a Pointwise/Reduction that could potentially have more
        compute fused into it), realize the IRNode into physical memory,
        ending the possibility of fusing into it, but allowing, e.g., multiple
        users to access the data without having to recompute.

        Check StorageBox.realize for a particularly notable implementation.

        TODO(ezyang): I think, in principle, every IRNode should have an
        implementation of this, and most of the time no-op is OK, but you
        really do have to audit each IRNode for this, so for now, raise
        an error if it's not implemented.  Note that some code in graph.py
        will catch this thrown error and suppress it with a warning.
        zrealize NYI on r  r  s    r   realizezIRNode.realize  s      "ODJ<"@AAr   Nc                0    t        dt        |              )Nzcodegen_reference NYI on r  rq  writers     r   codegen_referencezIRNode.codegen_reference  s    !$=d4j\"JKKr   c                     y r   r   r  s    r   r  zIRNode.get_device  r  r   c                .    | j                         }|J |S r   )r  rq  r   s     r   get_device_or_errorzIRNode.get_device_or_error  s    "!!!r   c                     yNFr   r  s    r   has_exceeded_max_readszIRNode.has_exceeded_max_reads      r   c                >    t        t        |       j                        r   r  r   r   r  s    r   make_loaderzIRNode.make_loader      !$t*"5"566r   c                >    t        t        |       j                        r   r  r  s    r   make_indexerzIRNode.make_indexer  r  r   c                >    t        t        |       j                        r   r  r  s    r   r%  zIRNode.get_stride  r  r   c                B    	 | j                         S # t        $ r Y y w xY wr   )r%  r  r  s    r   r(  zIRNode.maybe_get_stride  r  r  c                >    t        t        |       j                        r   r  r  s    r   get_namezIRNode.get_name  r  r   c                B    	 | j                         S # t        $ r Y y w xY wr   )r  r  r  s    r   maybe_get_namezIRNode.maybe_get_name  r  r  c                v    	 | j                         t        j                  j                  v S # t        $ r Y yw xY wr  )r  rk   r   graph_inputsr  r  s    r   is_input_bufferzIRNode.is_input_buffer  s4    	==?agg&:&:::" 		s   ), 	88c                     yr  r   rq  	thresholds     r   has_large_inner_fnzIRNode.has_large_inner_fn  r  r   c                     y r   r   rq  userss     r   
mark_reusezIRNode.mark_reuse
      r   c                     y r   r   r  s    r   realize_hintzIRNode.realize_hint  r  r   c                >    t        t        |       j                        r   r  r  s    r   unwrap_viewzIRNode.unwrap_view  r  r   c                >    t        t        |       j                        r   r  r  s    r   freeze_layoutzIRNode.freeze_layout  r  r   c                >    t        t        |       j                        r   r  rq  r   allow_paddings      r   freeze_layout_with_stride_orderz&IRNode.freeze_layout_with_stride_order       "$t*"5"566r   c                >    t        t        |       j                        r   r  rq  r   s     r   freeze_layout_with_fill_orderz$IRNode.freeze_layout_with_fill_order  r  r   c                >    t        t        |       j                        r   r  rq  r   s     r   freeze_layout_with_same_orderz$IRNode.freeze_layout_with_same_order  r  r   c                >    t        t        |       j                        r   r  rq  exact_stridesr  s      r    freeze_layout_with_exact_stridesz'IRNode.freeze_layout_with_exact_strides!  r  r   c                >    t        t        |       j                        r   r  r  s    r   get_read_writeszIRNode.get_read_writes&  r  r   c                6    | j                         j                  S r   r  readsr  s    r   r  zIRNode.get_reads)      ##%+++r   c                4    t        | j                               S r   )r   r  r  s    r   	num_readszIRNode.num_reads,  s    4>>#$$r   c                >    t        t        |       j                        r   r  r  s    r   get_storage_numelzIRNode.get_storage_numel/  r  r   c                >    t        t        |       j                        r   r  rq  rX  s     r   get_free_symbol_useszIRNode.get_free_symbol_uses2  r  r   c                >    t        t        |       j                        r   r  r  s    r   get_reduction_typezIRNode.get_reduction_type7  r  r   c                >    t        t        |       j                        r   r  r  s    r   get_reduction_sizezIRNode.get_reduction_size:  r  r   c                     yr  r   r  s    r   	is_externzIRNode.is_extern=  r  r   c                     yr  r   r  s    r   is_no_opzIRNode.is_no_op@  r  r   c                >    t        t        |       j                        r   r  r  s     r   constant_to_devicezIRNode.constant_to_deviceC  r  r   c                >    t        t        |       j                        r   r  r  s    r   get_mutation_nameszIRNode.get_mutation_namesF  r  r   c                >    t        t        |       j                        r   r  r  s    r   get_operation_namezIRNode.get_operation_nameI  r  r   c                >    t        t        |       j                        r   r  r  s    r   get_inputs_that_alias_outputz#IRNode.get_inputs_that_alias_outputL  r  r   c                     y r   r   r  s    r   r   zIRNode.dtypeQ  s    (+r   )ra  zOrderedSet[Node]r   zGenerator[None, None, None]r   r   r   r   )rr  r   r  r   r   r   r   r   r   OrderedSet[str])r   rb  r   rd  r   zOptional[Operation]T)r  r   r   Sequence[str])TT)r  zSequence[object]r  r   r  r   r   r   r   torch.dtype)r   zOptional[torch.dtype]r   r  )r   zOptional[Layout]r   r  )r   zOptional[OutputSpec]r   r   r   Sequence[Expr])r   Optional[Sequence[_IntLike]])r   z.Union[_IntLike, sympy.Rel, Sequence[_IntLike]]r   r!   r   Optional[str]r   r  zOptional[IndentedBuffer]r   r   r   Optional[torch.device]r   torch.devicer   $Callable[[Sequence[Expr]], OpsValue]r    Callable[[Sequence[Expr]], Expr]r   Sequence[_IntLike]r   r   r  Optional[int]r   r   r  r   r   r   r   r   Fr   Sequence[int]r  r   r   r   r   rQ  r   r   r   rI  r   r   r  rI  r  r   r   r   r   dependencies.ReadWritesr   zOrderedSet[Dep]r   r   r   rw   rX  r   r   r   r   rC  r   r   r   r3  )Kr   r   r   __doc__r:   r^  r   dataclassesfieldra  rc  re  staticmethod
contextlibcontextmanagerrh  rn  rs  rw  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r   r  propertyr2  r  r  r  r  r  r  r  r  r  r%  r(  r  r  r  r  r  r  r  r  r  r  r	  r  r  r  r  r  r  r  r  r   r"  r$  r&  r(  r*  r   r   r   r   r   r   r     sN    3=,/>  1{00e<G_<%6[%6%6E%BI"B+<;+<+<%+HK(H*  * 

 

.5@ >+  PT	5%	504	5HL	5		5W!@U  .UB$L
777777 ;@7"7377	7
77 HM7/7@D7	7
7,%7 %*7!7	!7
777777 	+ 
+ r   c                      e Zd ZddZddZddZddZddZddZddZ	ddZ
dd	Zdd
ZddZddZddZ	 d	 	 	 ddZddZy) 	Operationc                    d | _         y r   operation_namer  s    r   rw  zOperation.__post_init__W  s
    -1r   c                    t         r   r  r  s    r   r  zOperation.get_deviceZ      !!r   c                6    t        | d      sJ | j                  S Nre  )r  re  r  s    r   r  zOperation.get_origin_node]  s    t]+++r   c                6    t        | d      sJ | j                  S )Nra  )r  ra  r  s    r   get_originszOperation.get_originsa  s    tY'''||r   c                6    | j                   J | j                   S r   rg  r  s    r   r(  zOperation.get_operation_namee  s     ""..."""r   c                     yr  r   r  s    r   r   zOperation.is_externi  r  r   c                     yr  r   r  s    r   r"  zOperation.is_no_opl  r  r   c                    t         r   rj  r  s    r   r  zOperation.get_read_writeso  rk  r   c                &    || j                         v S r   )r  )rq  r   s     r   
is_user_ofzOperation.is_user_ofr  s    t**,,,r   c                B    t        d | j                         D              S )Nc              3  4   K   | ]  }|j                     y wr   rz  r{  s     r   r   z+Operation.get_read_names.<locals>.<genexpr>v  r}  r~  r  r  s    r   r  zOperation.get_read_namesu  r  r   c                6    | j                         j                  S r   r  r  s    r   r  zOperation.get_readsx  r  r   c                    t         r   rj  r  s    r   get_outputszOperation.get_outputs{  rk  r   c                    t               S r   r9   r  s    r   get_unbacked_symbol_defsz"Operation.get_unbacked_symbol_defs~  
    |r   c                    t               S )a  
        When unbacked_only=True:
        Returns the unbacked symbols which are required to be in scope in
        order to successfully perform codegen for this buffer.  For example,
        a buffer that corresponds to an extern kernel call that takes i0 as
        an argument would return {i0} here.  This is used to generate necessary
        dependencies that ensure we actually bind i0 in codegen before you
        try to use it.

        Note that this is NOT transitive; in particular, if this buffer takes
        in as input another buffer with dynamic shape (e.g., (i0,)), we will
        not report it here, because you will already have a dependency
        on that buffer, which will eventually have a dependency on i0 if
        necessary.

        When unbacked_only=False:
        Similar to `unbacked_only=True` but including all free symbols
        instead of only free unbacked symbols.
        r9   r  s     r   r  zOperation.get_free_symbol_uses  s    , |r   c                     y)z
        Gets extra global memory size needed by this buffer.
        Some algorithms (e.g. group gemm) may require extra global memory in the generated code.
        r   r   r  s    r   get_workspace_sizezOperation.get_workspace_size  s    
 r   Nr-  r@  r0  )r   r`  rJ  r8  rU  )r   r   r   r   r.  rW  r   list[Buffer]r   r   rO  rZ  rX  )r   r   r   rw  r  r  ro  r(  r   r"  r  ru  r  r  rz  r|  r  r  r   r   r   re  re  U  sc    2" #"-@," %*!	!0r   re  c                  H    e Zd ZU ded<   ded<   ded<   ded<   	 d	 	 	 dd	Zd d
Zd! fdZd"dZeZd#dZ	d$dZ
d%dZd%dZe	 	 	 	 	 	 d&d       Zeej"                  fd'd       Zed(d       Zd)dZed"d       Zd*d+dZdd,dZd-dZd.dZd/dZd%dZd0dZd1dZ xZS )2LoopsrC  r   r5  r   Callable[..., Any]inner_fnrI  rangesc                     t               j                  g fd| j                  D        | j                         S )Nc              3  6   K   | ]  }t        |        y wr   r(   r   erX  s     r   r   z-Loops.get_free_symbol_uses.<locals>.<genexpr>  s     FQq-0F   )r:   unionr  inner_fn_free_symbolsr  s    `r   r  zLoops.get_free_symbol_uses  s@     "z|!! 
F$++F
&&}5
 	
r   c                   | j                  d| j                  j                   dt        | j                        | j                         g|D cg c]  }| dt        | |        c}z   d| j                  gz         S c c}w )N'=origin_node=)r  r   r   r   r   inner_fn_strr   re  )rq  namesr   s      r   _to_strzLoops._to_str  s    DKK$$%Q'DJJ!!#
 <AA4$qt,-.AB d..1234
 	
 Bs   A?
c                "    t         |           y r   )superrw  rq  	__class__s    r   rw  zLoops.__post_init__  s    r   c                $    | j                  d      S )Nr  r  r  s    r   __str__zLoops.__str__  s    ||K((r   c                    | j                   S r   r   r  s    r   r  zLoops.get_device      {{r   c                    | j                   S r   r  r  s    r   r  zLoops.get_origin_node  r  r   c                    | j                   S r   r  r  s    r   r   zLoops.get_size  r  r   c                    | j                   S r   r  r  s    r   get_pointwise_sizezLoops.get_pointwise_size  r  r   c                    |j                  dd       }|j                  dd       } | |i |}|j                  d|       |j                  d|xs |j                         t        j	                  |      S )Nre  rc  )poprs  rc  r   create)clsr   r   re  tbrs         r   r  zLoops.create  so     jj5ZZT*   	
]K8	["*;<""r   c                    t        |       D cg c]0  \  }}|dk(  rt        j                  j                  nt	        ||      2 c}}S c c}}w Nr?   )r   r   SZerore   )r  r{   nr   s       r   _indexzLoops._index  sH     "&)
1 FEGGLL(Fvq(QQ
 	
 
s   5Ac                `   t        t        j                               }t        j                  |      5  t	        j
                  t        dd      5   | j                  | j                           |j                         cd d d        cd d d        S # 1 sw Y   nxY wd d d        y # 1 sw Y   y xY wNallow_indexingT)
rM   rk   MockHandlerset_ops_handlerr    r   r  r  inner_fn_argsgetvalue)rq  	opcounters     r   inner_fn_opcountzLoops.inner_fn_opcount  s     1	i(	(LL)94@	( DMM4--/0%%'	( 	( 	( 	( 	( 	( 	(s#   B$-B<	B$B	B$$B-c                :    | j                  | j                        fS r   )r  r  r  s    r   r  zLoops.inner_fn_args  s    DKK(**r   c                r    t        j                  j                  | j                  g| j	                          S r   )rk   KernelFormatterHandlerir_to_stringr  r  r  s    r   r  zLoops.inner_fn_str  s3    ''44MM
 ..0
 	
r   c                x    |d}t        |t        j                        }| j                         j                  |kD  S r  )maxr@   realize_opcount_thresholdr  num_opsr  s     r   r  zLoops.has_large_inner_fn  s9    I	6#C#CD	$$&..::r   c                h    | j                  | j                        }t        | j                  ||      S NrW  )r  r  rH   r  )rq  rX  r   s      r   r  zLoops.inner_fn_free_symbols  s'    DKK(#DMM5VVr   c                |   t        j                  t        dd      5  | j                         rJt	        | j                         | j                         | j                               j                  cd d d        S t	        | j                         | j                               j                  cd d d        S # 1 sw Y   y xY wr  )	r    r   r  r  rJ   r  r   r  r  r  s    r   r  zLoops.get_reads  s    \\.*:DA 	&&(*$$&MMO++- %	 	 +$$&MMO %	 	 	s   AB271B22B;c                H    t        | j                         j                        S r   )r:   r  read_buffersr  s    r   r  zLoops.get_read_names  s    $//1>>??r   c                H    t        | j                         j                        S r   )r   r  r  r  s    r   r  zLoops.num_reads  s    4((*7788r   c                2    t        dt        |        d      )Nz+get_reduction_size() is not implemented by r  r  r  s    r   r  zLoops.get_reduction_size      !9$t*QG
 	
r   c                2    t        dt        |        d      )Nz+get_reduction_type() is not implemented by r  r  r  s    r   r  zLoops.get_reduction_type  r  r   c                2    t        dt        |        d      )Nz+constant_to_device() is not implemented by r  r  r  s     r   r$  zLoops.constant_to_device  r  r   rO  rZ  )r  r3  r   r   r-  rJ  r@  r0  r9  )r   r   r   r   r   'Union[TensorBox, ShapeAsConstantBuffer])r  rI  r{   r>   r   r:  )r   rN   r   zSequence[Sequence[_IntLike]]r   rK  rX  r   r   OrderedSet[Symbol]rW  r.  rX  r=  r[  ) r   r   r   r   r  r  rw  r  __repr__r  r  r   r  classmethodr  r`  r>   INDEXr  rV   r  r  r  r  r  r  r  r  r  r  r$  __classcell__r  s   @r   r  r    s       %*
!
	!
	
 ) H  ###&#	0# # :>** 
 
 ( (+ 
 

;W@9




r   r  c                   |j                   rt        j                  t        d      |      S t        j                  d|      S )Nnanr   )is_floating_pointri   constantfloat)r   r   s     r   nop_loader_fnr  %  s1    ||E%L%00||Au%%r   c                  D    e Zd ZddZddZd	dZ	 	 	 	 	 	 	 	 d
dZddZy)	Pointwisec                p    | j                         rt        t        | j                        S | j                  S Nr  )r  r   r  r   r  r  s    r   r  zPointwise.make_loader.  s)      "=

;;}}r   c                    g S r   r   r  s    r   r  zPointwise.get_reduction_size5  s    	r   c                     y r   r   r  s    r   r  zPointwise.get_reduction_type8  r  r   c                p    | j                         }t        j                  |xs d ||       ||            S Nunnamed)r  ri   storerq  output_nameindexervarsloaders        r   store_outputzPointwise.store_output;  s2     !!#yy1	74=&,OOr   c                    | j                         } t        j                  t        d|      |      }t	        || j
                  || j                        S FMove this to a given device. Requires that all reads are to constants.override_devicer   r   r  r  )r  r    r   ConstantBufferr  r   r  rq  r   r  s      r   r$  zPointwise.constant_to_deviceD  sK    !!#Hn.?HP**;;	
 	
r   NrD  )r   zSequence[sympy.Expr]r=  )r  r>  r  !Callable[[Sequence[Expr]], Never]r  r:  r   r   r[  )r   r   r   r  r  r  r  r$  r   r   r   r  r  ,  sF    P"P 3P 	P
 
P	
r   r  c                  F    e Zd ZU ded<   dZded<   ddZ	 	 	 	 	 	 	 	 d	dZy)
ScatterrG  output_indexerNrP   scatter_modec                    | j                         } t        j                  t        d|      |      }t	        || j
                  || j                  | j                  | j                        S )r  r  )r   r   r  r  r  r  )	r  r    r   r  r  r   r  r  r  r  s      r   r$  zScatter.constant_to_deviceU  s]    !!#Hn.?HP**;;..**
 	
r   c                    | j                         }|d}t        j                  | || j                  |             ||      | j                        S )Nr  )mode)r  ri   r  r  r  r  s        r   r  zScatter.store_outputb  sT     !!##KyyD''-.4L""	
 	
r   r[  )r  r>  r  r  r  r:  r   r   )r   r   r   r   r  r$  r  r   r   r   r  r  P  sB    44"L)"

"
 3
 	

 

r   r  
logical_ormaximumminimummulr  bitwise_xor)anyr  minprodsumxor_sumz"dict[str, Callable[..., OpsValue]]REDUCTION_COMBINE_FNc                      t         v r	t             S  dv r	 	 	 	 	 	 d fd}|S  dk(  r	 	 	 	 	 	 dd}|S t        d        )Nargmaxargminc                   | \  }}|\  }}dk(  rt        j                  ||      }nt        j                  ||      }t        j                  ||      }t	              rt        j
                  ||      }t        j
                  ||      }	t        j                  |t        j                  ||	            }t        j                  |t        j                  ||	            }rt        j                  ||      nt        j                  ||      }
t        j                  |t        j                  ||
            }t        j                  |||      t        j                  |||      fS )Nr	  )	ri   ltgteqr,   ner  logical_andwhere)aba_valuea_indexb_valueb_indexmaskequala_isnanb_isnantiearg_break_ties_leftr   reduction_types              r   argmax_combine_fnz3get_reduction_combine_fn.<locals>.argmax_combine_fn  s     !GW GW)vvgw/vvgw/FF7G,Ee$&&'2&&'2~~dCFF7G,DEucoogw.OP ' w(VVGW- 
 >>$s(CDD		$1		$1 r   welford_combinec                l    | \  }}}|\  }}}||z
  }||z   }	||	z  }
|||
z  z   ||z   ||z  |z  |
z  z   |	fS r   r   )r  r  a_meana_m2a_weightb_meanb_m2b_weightdelta
new_weight	w2_over_ws              r   welford_combine_fnz4get_reduction_combine_fn.<locals>.welford_combine_fn  sm     &'"FD(%&"FD(VOE!H,J :-I**teemh6BB r   zunknown reduction_type=)r  tuple[object, object]r  r+  r   tuple[OpsValue, OpsValue])r  #tuple[OpsValue, OpsValue, OpsValue]r  r-  r   r-  )r  r  )r  r   r  r  r*  s   ```  r   get_reduction_combine_fnr.  }  s     --#N33	/	/	$	)>	&	: ! 	,	,	2	2	 1	  "! "$;N;K"LMMr   c                      e Zd ZU ded<   ded<   ded<   ded<   dd	ZeZd d! fd
Zd"dZd#dZ	 	 	 	 	 	 	 	 	 	 d$dZ	d%dZ
d&dZd d!dZd'dZe	 d(	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 d)d       Ze	 	 	 	 	 	 	 	 	 	 d*d       Zeej&                  df	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 d+d       Ze	 	 	 	 	 	 d,d       Ze	 	 	 	 	 	 d,d       Ze	 	 	 	 	 	 	 	 d-d       Ze	 	 	 	 	 	 d.d       Ze	 d(	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 d/d       Ze	 	 	 	 	 	 	 	 	 	 	 	 d0d       Ze	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 d1d       Ze	 d(	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 d2d       Ze	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 d3d       Z xZS )4	ReductionrI  reduction_rangesrO   r  r5  	src_dtyperS   reduction_hintc                $    | j                  d      S )N)r  r1  r  r  r  s    r   r  zReduction.__str__  s    ||LMMr   c                z    t         |          t               j                  fd| j                  D         z  S )Nc              3  6   K   | ]  }t        |        y wr   r(   r  s     r   r   z1Reduction.get_free_symbol_uses.<locals>.<genexpr>  s     PQq-0Pr  )r  r  r:   r  r1  rq  rX  r  s    `r   r  zReduction.get_free_symbol_uses  s:    w+M:=OZ\=O=OP$:O:OP>
 
 	
r   c                    | j                   S r   )r1  r  s    r   r  zReduction.get_reduction_size  s    $$$r   c                    | j                   S r   )r  r  s    r   r  zReduction.get_reduction_type      """r   c           	         t        j                  | j                  | j                  | j                  | j                  ||            }t        j                  |xs d ||      |       y r  )ri   	reductionr   r2  r  r  store_reduction)rq  r  r  r  reduction_varsr  s         r   r=  zReduction.store_reduction  sT     JJNNMM$/	
 	K49gdmUKr   c                X    t        | j                        t        | j                        z   S r   )r   r  r1  r  s    r   index_lengthzReduction.index_length  s!    4;;#d&;&;"<<<r   c                    | j                  | j                        }| j                  | j                  t        j                        }||fS r   )r  r  r1  r>   R0_INDEX)rq  r   rindexs      r   r  zReduction.inner_fn_args  s8    DKK(T22DMMBvr   c                    | j                  | j                        }| j                  | j                  t        j                        }t        | j                  |||      S r  )r  r  r1  r>   rB  rH   r  )rq  rX  r   rC  s       r   r  zReduction.inner_fn_free_symbols  sH    DKK(T22DMMB#MM5&
 	
r   c           
     
   | j                         } t        j                  t        d|      |      }t	        || j
                  || j                  | j                  | j                  | j                  t        j                        S )r  r  r   r   r  r  r1  r  r2  r3  )r  r    r   r  r0  r   r  r1  r  r2  rS   DEFAULTr  s      r   r$  zReduction.constant_to_device  sm    !!#Hn.?HP**;;!22..nn(00	
 		
r   Nc	           
        t         j                  j                  j                  |      }	t         j                  j                  j                  t	        |            }
|dk(  xsG t         j                  j                  | t        j                         xr |dvxr t        j                  }t        |	      rt        |
      st        j                  dfS t        j                  |       }|j                  }d}|rat!        j"                  t         j$                  j&                  | d      }t!        j"                  t         j$                  j&                  | d      }n	 	 	 	 	 	 dd}|}|
dk(  r ||	|
      }|dk(  rt        j(                  |fS |t+        |t,              rt/        j0                  t2        d	d      5  t5        |      \  }}d d d        hft         j                  j                  j                  t	        ||z               }|	|k(  r,t6        j9                  d
|||||       t        j(                  dfS t        j(                  |fS |	|k  s|
|dz  dz  k\  rt        j                  dfS t;        | |||||dk7  r|nd|t        j                        }dd} ||      \  }}|r ||      \  }}t=        |      dk(  rt        j                  dfS t?        j@                  |jC                         |jE                               \  \  }}}d}d}|D ]  }t         j                  j                  jG                  ||      }t         j                  j                  jI                  ||tK        |jM                                     } tO        d | D              }!|!r|dz  }|dz  } ||kD  rt        j(                   ||	|
      fS t        jP                   ||	|
      fS # 1 sw Y   xY w)Nscanr  r?       T)inner_reductionFc                     yr  r   )reduction_numel_hint
numel_hints     r   inner_reduction_splitsz4Reduction.num_splits.<locals>.inner_reduction_splits)  s     r   r  zUse previous IRNode's range and reduction_ranges instead of split. current ranges: %s, current reduction ranges: %s, current split: %d, new ranges: %s, new reduction ranges: %sr'  r   r  rF  c           	     2  	 | j                         }|J t        d t        || j                         | j	                               |       }|j                         }|j                  J |j                  D  cg c].  } t        | t              rt        | t        j                        s| 0 }} g }d}t        |j                  d       D ]  	t        	fd|D              s|j                  	j                         	j                   t"        j$                  j&                  v sZt"        j$                  j&                  	j                      }t)        |j*                  dd       }|j-                          t)        |j*                  dd       |k7  sd} ||fS c c} w )	Nr   r   r   r   r>  r=  Fc                    | j                   S r   rz  r   s    r   <lambda>z@Reduction.num_splits.<locals>.get_read_indices.<locals>.<lambda>y  s
    aff r   keyc              3  N   K   | ]  }|j                   j                  v   y wr   )r   r2   )r   r  mds     r   r   zAReduction.num_splits.<locals>.get_read_indices.<locals>.<genexpr>z  s      FaqBHH111Fs   "%r   T)r  rj  r  r  r   r  
range_varsr   r!   r   Numbersortedr  r   r  r   r   rk   r   name_to_bufferr   r>  decide_layout)
r  r   cbread_writesrY  indiceschangedbuforiginal_striderX  s
            @r   get_read_indicesz.Reduction.num_splits.<locals>.get_read_indicesa  sh   \\^F%%%%!++-
 B ,,.K ))555 %//a&z!U\\/J J 
 GG[..4DE +F:FFNN288,ww!''"8"88gg44RWW=*1#**h*M))+"3::x>/Q&*G+ G##!s   83Fr   c              3  &   K   | ]	  }|d kD    ywr?   Nr   r   s     r   r   z'Reduction.num_splits.<locals>.<genexpr>  s     /!A/   )rM  r   rN  r   r   r   )r  r0  r   ztuple[Sequence[Expr], bool]))rk   r   r   r/  rf   has_featurerB   REDUCE_TO_SINGLE_ELEMENTr@   split_reductionsr   rS   rG  rR   r  multi_processor_count	functoolsr   choicesreduction_split_factorINNERr   r   r    r   r  rI   logr  r0  r   rA   index_vars_squeezer   r  simplify_with_rangesstride_hintsr   keysr   OUTER)"r   	dst_dtyper2  r  r  r1  r  reduction_numel
input_noderM  rN  should_splitpropsnum_smmin_elements_per_threadrO  outer_reduction_splitsr  
new_rangesnew_reduction_rangesextracted_numel_hintr  rd  r`  ra  r   r>  ranges1	num_outer	num_innerr   jrE  outers"                                     r   
num_splitszReduction.num_splits  s     !ww//==oNWW%%33M&4IJ
%/ 
##FN,S,STT (( '' 	 /0Z
5K ((!++ ''/,,"$@I@Q@Q		00&$A" AJ@Q@Q		00&%A"
&)  &<" ?*+?LEz$**E11%*Z*K\\.2BDI H <JG",H
 ).B.N+,77+;+;+I+I%j3G&GH,( ,/CC		G #,!&0	  -22B66 &&-- $;;VaZ"_, ((!++--;v-E>5(00	
!	$F ,A.)!,JGQw<1 ((!++'3'F'FJJL!..0(
$NW 		 		A  55aAAgg&&33>4#7G /w//EQ	Q			 y  &&(>$j)   !&&(>$j)  QH Hs   6OOc                     t         j                  j                  j                        t	        ||      dfd|dv r1t        t        j                              	 	 	 	 	 	 d fdfdS  S )z1Convert inner_fn from a reduction to an pointwisec                     t        j                   fdt        j                  D cg c]  }t	        |       c} D              S c c}w )Nc              3  0   K   | ]  } |        y wr   r   )r   rC  r   value_fns     r   r   z=Reduction._unroll_reduction_fn.<locals>.fn.<locals>.<genexpr>  s        UF+   )rl  reduce	itertoolsproductr   )r   r   
combine_fnr1  r  s   ` r   r   z*Reduction._unroll_reduction_fn.<locals>.fn  sH    ##"+"3"3,<=q%(=# 
 >s   A
r	  r  c                    |D cg c]  }t        j                  |       }} | |      t        j                   |      t        j
                        fS c c}w r   )r   expandri   
index_exprr  int64)r   rC  r   flatten_indexr  s      r   r  z0Reduction._unroll_reduction_fn.<locals>.value_fn  sP     4::a%,,q/::UF+NN=#8%++F  ;s   Ac                     |       d   S r  r   )r   r   s    r   rT  z0Reduction._unroll_reduction_fn.<locals>.<lambda>  s    E1 r   )r   rI  r   r   )r   rI  rC  rI  r   r,  )rk   r   r   guard_int_seqr.  _fixed_indexerr  r  )r  r1  r  r2  r  r  r   r  s   ``  @@@@r   _unroll_reduction_fnzReduction._unroll_reduction_fn  s     77++99:JK-niH
		 11* 112BCM
)3E* .-HIr   c
                   t         j                  j                  j                  t	                    dk(  rkdfd}
 |
d       |
d       |
d       |
d      dj                         v s
J  d       dfd}t        j                  |||t        |            S dk(  r+dv rdfd	}ndfd
}t        j                  |||      S t        t              rt         j                  j                  j                        t        j                  k  rNt	        |      dk7  st        |j                        r+t        j                  || j!                  |      |      S | j#                  ||||		      \  }}dfd} ||      }|t$        j&                  k(  r|}|dk(  rX|	J t)        j*                  t,        dd      5  t/        |	      \  }}d d d        J J | j1                  ||||||
      S |dkD  r| j3                  ||||||	
      S t4        j                  t7        ||||            S # 1 sw Y   pxY w)Nr   c                   t         j                  k(  rt        |       S j                  r+t        | t              sJ t        |              t        |       S t        | t              sJ t        |              t        |       S r   )	r  r   r  r   r   r   r  r   r   )valrv  s    r   py_cnstz!Reduction.create.<locals>.py_cnst  sg    

*9$00%c=9D49D9 :%%c;7BcB7s8Or   r?   )r  r  r  r   z* not supported for zero-dimension tensors!c                6    t        j                           S r   ri   r  )r   rv  r  rtypes_to_initss    r   const_fnz"Reduction.create.<locals>.const_fn  s    ||ON$CYOOr   r  r  c                0    t        j                  d      S r  r  )r   rv  s    r   r   zReduction.create.<locals>.fn  s    <<955r   c                n    D cg c]  }t         j                  j                   }} | |      S c c}w r   r   r  r  )r   r   reduction_indexr  r1  s      r   r   zReduction.create.<locals>.fn  s1    =M&Nuww||&NO&N#E?;; 'O   !2c                `    t              r| S | dkD  rt        | t        j                        S | S r  )r   r  r@   min_num_split)r  rw  s    r   _maybe_increase_splitz/Reduction.create.<locals>._maybe_increase_split2  s/    /*qy5&"6"677r   r'  r  TrF  )r  r   r   zUnion[bool, float, int])r   r   r   rj   )r  r   r   r   )rk   r   r   simplifyrf   rt  r  r  r   r   r"   r$  r@   unroll_reductions_thresholdrb   r   r  r  rS   rG  r    r   r  rI   !create_multilayer_existing_rangescreate_multilayerr   r0  )r  r   rv  r2  r  r  r1  r  r3  rx  r  r  r   hintr  r  r~  r  rw  r  s     ` ` ``          @@r   r  zReduction.create  s    ''**33MBR4STa$ qz"1:
qz	O "_%9%9%;; !""LM;P ##!F|	 $   a!556
< ##YF $  
 0  33OD001v&!+vfkk/B ##11.	  $   nn

e	 &e,
 ]222!NB;)))n.>E 3V40
0 )))'33388 $  QY((   !!1-#-	
 	
C s   IIc           	        | dv rAt        |      rt        d      S t        |      ryt        j                  |      j
                  S | dv rAt        |      rt        d      S t        |      ryt        j                  |      j                  S t        |      rdnd}t        |      rdnd}|||||||f|||ft        d      |fd	|    S )
N)r  r  z-infF)r  r	  infTr   r?   )r  r  r  r   welford_reducer  online_softmax_reduce)r,   r  r+   r  iinfor  r  )r  r   zeroones       r   default_accumulatorzReduction.default_accumulatorr  s     ..e$V}$!%({{5)---..e$U|#!%({{5)---(/uQ&u-d1#T40 $dD1&+FmT%:
  	r   c                :    | dk(  ryt         j                  | |      S )Nr  r   )r0  r  r  r   s     r   default_valuezReduction.default_value  s#     --,,^UCCr   c                    | dk(  r|S | dk  r(|dk  r#|t         j                  k(  rt         j                  S | dk  r(|dk  r#|t         j                  k(  rt         j                  S |S )Nr'     i      )rS   ru  
OUTER_TINY)r  rN  r3  s      r   _multilayer_second_step_hintz&Reduction._multilayer_second_step_hint  sg     B;!!C<J#-.MDWDW2W +++TMc!-"5"55 +++r   c                z   |yt         j                  j                  j                  |j	                         |      sy|j                          	 t        |       |j                         }t        |dd       D ]3  \  }}t         j                  j                  j                  |d      s1|c S  y# t        $ r Y yw xY w)z
        If we are reducing over the full tensor, and it is non-dense in the last dimension,
        reindex so we reduce over the dense dimension. initially just handle complete
        reduction case
        Nr'  r?   )
rk   r   r   r.  r  r  r?  r  r%  r   )r  rw  rx  rE  r   r   s         r   $check_for_split_dense_dim_reindexingz.Reduction.check_for_split_dense_dim_reindexing  s     ww77  "O
 	!*- '')gcrl+ 	DAqww771=	  # 		s   B. .	B:9B:c                
  
 | j                  |      }t        j                  |g|      t        j                  j
                  j                  t        j                  |z  d             
	 	 	 	 	 	 d
fd}	|	S )Nr   c                   |\  }| ^ }|z  |z   d
fd}r`t              }t        j                  t        j                  |      t        j                  |            }t        j                  ||	      S  |       S )Nc                 $       g            S r   r   )r`  r  	new_indexr   s   r   bodyzCReduction._multilayer_wrap_loader.<locals>.wrapper_fn.<locals>.body  s    i');<<r   )r   rj   )r\   ri   r  r  masked)r   r  reduction_blockr  index_dtyper  r`  r  
block_sizedefaultr  	need_maskrw  r   s         @@r   
wrapper_fnz5Reduction._multilayer_wrap_loader.<locals>.wrapper_fn  s     "1_*/'Y ?2_DG= = -o>vvNN7K8NN?K@ zz$g66vr   )r   Sequence[Symbol]r  r  r   rj   )	r  Viewdynamic_reshape_indexerrk   r   r   r  r   r  )r  r  r1  rw  r  r  r  rx  dense_indexr  r  r   s    ` ` ``   @@r   _multilayer_wrap_loaderz!Reduction._multilayer_wrap_loader  s     >>Z
 ../
 ((>>HH_u,a0
 
		#	6F		 	( r   c                    t        d D              s
J d       t        j                  |t        |      t        |      z         	 	 	 	 	 	 dfd}|S )Nc              3  &   K   | ]	  }|d k(    ywrf  r   r   r  s     r   r   zDReduction._multilayer_wrap_loader_existing_ranges.<locals>.<genexpr>  s     3a163rg  z8Only enabled for numel_hint == 1, found original_ranges=c           	         | d t               }| t              d  } | t        |      t        |      z               S r   )r   r   )merged_indexnew_reduction_indexoriginal_idxr  r  original_rangesr   s       r   r  zEReduction._multilayer_wrap_loader_existing_ranges.<locals>.wrapper_fn  sQ     ((>#o*>?L$S%9%;<Ii(51D+EEF r   )r  r:  r  r:  r   rj   )r   r  r  r   )r  r  r  original_reduction_rangesr~  r  r  r   s    ``    @r   '_multilayer_wrap_loader_existing_rangesz1Reduction._multilayer_wrap_loader_existing_ranges  sy     3?33 	
G6HI	
3 ..%uZ'85AU;V'V
		(		!/		 		 r   c                   |t         j                  t         j                  fvr|nt         j                  }t        j                  |||||||	|      }|j                          |j                         	 	 	 	 	 	 dfd}t        j                  j                  j                  t        |            }| j                  |
||      }||dt        |       k(  sJ t        j                  t	        |||||t        |      d |	||            S )a
        Break a large reduction up into multiple smaller reductions
        recursively
        c                     g | |      S r   r   )r   r  intermediate_loaders     r   intermediate_fnz;Reduction.create_multilayer_helper.<locals>.intermediate_fn:  s     ''A'A'ABBr   NrF  )r   rI  r  rI  r   rj   )r  float16bfloat16r  r0  r  r  r  rk   r   r   r   rf   r  r   r   )r  r   rv  r2  r  r  r  r~  r  r  r  r3  intermediate_dtypeintermediater  rN  r  s                   @r   create_multilayer_helperz"Reduction.create_multilayer_helper  s(   0  ??  	
 !'' 	
 	*668	C%	C8J	C	C
 WW%%//o0NO
99:~
 *-Cs?/C"DDDD(&!+C,@,B!C-#-	
 	
r   c                    t        |      }t        ||dz
  z   |      }| j                  ||      }| j                  |||||||
      }| j	                  ||||||g |||g|||	      S )r  r?   )rf   r<   r  r  r  )r  r   rv  r2  r  r  r1  r  r  r3  rx  rw  r  r  r  s                  r   r  zReduction.create_multilayerR  s    & ((89o;UC
##NI>00

 ++feL
 	
r   c                j    | j                  |||||      }| j                  ||||||g ||||	d|
      S )r  r'  )r  r  )r  r   rv  r2  r  r  r  r~  r  r  r3  r  s               r   r  z+Reduction.create_multilayer_existing_ranges  sc    $ @@% 

 ++%+o+
+ 
 	
r   rJ  rO  r  r9  r=  )
r  r>  r  r  r  r:  r>  r  r   r   rX  r   zSequence[Sequence[Expr]]r[  r   )r   rC  rv  r5  r2  r5  r  zCallable[_P, OpsValue]r  rI  r1  rI  r  z%Union[ReductionType, Literal['scan']]rw  r!   rx  Optional[IRNode]r   tuple[ReductionHint, _IntLike])
r  z<Callable[[Sequence[_IntLike], Sequence[_IntLike]], OpsValue]r1  rI  r  r   r2  r5  r   z(Callable[[Sequence[_IntLike]], OpsValue])r   rC  rv  r5  r2  r5  r  r  r  r:  r1  r:  r  rO   r3  rS   rx  r  r   r  r  r   r   r5  r   #Union[_NumLike, Sequence[_NumLike]])r  rw   rN  r   r3  rS   r   rS   )rw  rw   rx  r  r   rL  )r  Callable[..., OpsValue]r1  rI  rw  rw   r  rw   r  rw   r  r  rx  r  r   Callable[..., object])r  4Callable[[Sequence[Expr], Sequence[Expr]], OpsValue]r  r:  r  r:  r~  Sequence[Integer]r  r  r   z@Callable[[Sequence[sympy.Expr], Sequence[sympy.Expr]], OpsValue])r   rC  rv  r5  r2  r5  r  r  r  r:  r  r:  r~  
list[Expr]r  list[Integer]r  rO   r  rw   r3  rS   r   r  )r   rC  rv  r5  r2  r5  r  r  r  r:  r1  r:  r  rO   r  rw   r3  rS   rx  r  r   r  )r   rC  rv  r5  r2  r5  r  r  r  r:  r  r:  r~  r  r  r  r  rO   r3  rS   r   r  )r   r   r   r   r  r  r  r  r  r=  r@  r  r  r$  r`  r  r  r  rS   rG  r  r  r  r  r  r  r  r  r  r  r  r  s   @r   r0  r0    s   ((!!!!N H

%#L"L 3L 	L
 )L 
L=


  (,``` ` )	`
 #` -` >` ` %` 
(` `D )N),) ) 	)
 
2) )V  )6(=(='+_
_
 _
 	_

 %_
 _
 )_
 &_
 &_
 %_
 
1_
 _
B $/	, < DD$/D	,D D %(:G	   &4D	 >  (,('( -( "	(
 ( ( 5( %( 
( (T D ( $2	
 & 0 
J 8 =
=
 =
 	=

 '=
 (=
 $2=
 =
 ,=
 &=
 =
 &=
 
1=
 =
~  (,+
+
 +
 	+

 %+
 +
 )+
 &+
 +
 &+
 %+
 
1+
 +
Z $
$
 $
 	$

 %$
 ($
 $2$
 "$
 ,$
 &$
 &$
 
1$
 $
r   r0  c                     d fd}|S )1A closure containing math to read a given elementc                    t        |       t              k(  sJ t        |       t              k(  sJ }t        |       D ]  \  }}}|dk7  s|||z  z   } |S r  )r   r   )r   resultr   stszrA  r   r   s        r   r  z_fixed_indexer.<locals>.indexer  ss    !c%jCK&???5zSY&&&ufd3 	+KCRQw#(*	+ r   )r   rQ  r   r   r   )r   r   rA  r  s   ``` r   r  r    s     Nr   INNER_FN_TYc                  l     e Zd ZU ded<   	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 d fdZ	 	 	 	 	 	 	 	 	 	 ddZ xZS )MultiOutputReductionr   output_indexc
           
         t              rft              dk(  rd   }
n	 	 	 	 	 	 dfd}
t        |   |||
|||||       |	| _        y )Nr?   r   c                2     t         fdD              S )Nc              3  0   K   | ]  } |        y wr   r   )r   r   r   reduction_idxs     r   r   z@MultiOutputReduction.__init__.<locals>.loader.<locals>.<genexpr>  s     HR]3Hr  )r   )r   r   	inner_fnss   ``r   r  z-MultiOutputReduction.__init__.<locals>.loader  s     HiHHHr   rF  )r   r:  r   r:  r   ztuple[OpsValue, ...])callabler   r  __init__r  )rq  r   rv  r  r  r1  r  r2  r3  r  r  r  s      `       r   r  zMultiOutputReduction.__init__  s     I"I y>Qq\FI#I4BI%I
 	-)) 	 		
 )r   c           	     :   t        j                  | j                  | j                  | j                  | j                  ||            }t        |t        t        f      sJ t        |             || j                     }t        j                  |xs d ||      |      S r  )ri   r<  r   r2  r  r  r   r   r   r   r  r=  )rq  r  r  r  r>  r   r  s          r   r=  z$MultiOutputReduction.store_reduction  s     JJNNMM$/	
 &5$-0>$v,>0t(()"";#;)WT]ERRr   )r   rC  rv  r5  r  z)Union[INNER_FN_TY, Sequence[INNER_FN_TY]]r  r  r1  r  r  rO   r2  r5  r3  rS   r  r   )
r  r>  r  r  r  r:  r>  r  r   r   )r   r   r   r   r  r=  r  r  s   @r   r  r    s    #)#) #) =	#)
 "#) ,#) &#) #) &#) #)JS"S 3S 	S
 )S 
Sr   r  c                  ^    e Zd Zeej
                  df	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 dd       Zy)OnlineSoftmaxReductionNc
           	         t        fdt        |      D              }
|
D ]  }|j                           |
S )z>
        Create the reduction disregarding splitting.
        c              3  j   K   | ]*  }t         j                  t        d |	             , yw)r  N)r   r  r  )	r   
output_idxr   rv  r  r  r3  r1  r2  s	     r   r   z0OnlineSoftmaxReduction.create.<locals>.<genexpr>	  sI      
  $$+"

s   03)r   r   r  )r  r   rv  r2  r  r  r1  
num_outputr3  rx  resultsr  s    `````` `   r   r  zOnlineSoftmaxReduction.create  sG       
 
 $J/
 
   	AIIK	r   )r   rC  rv  r5  r2  r5  r  r  r  r:  r1  r:  r
  r   r3  rS   rx  r  r   1Sequence[Union[TensorBox, ShapeAsConstantBuffer]])r   r   r   r  rS   rG  r  r   r   r   r  r    s     )6(=(='+!! ! 	!
 %! ! )! ! &! %! 
;! !r   r  c                      e Zd Zeej
                  f	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 dd       Ze	 	 	 	 	 	 dd       Ze	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 dd       Z	y)WelfordReductionc                   |dv sJ t         j                  j                  j                  t	                    }d
fd}	|dk(  r |	d      }
 |	d      } |	d      }|
||fS |dk(  r@	 	 	 	 dfd|dk(  r |d          |	d       |	d      fS t        fd|D              S t        j                  |d   ||      \  }}|t        j                  k(  r|}|dkD  r| j                  ||||      S t        d	      D cg c](  }t        j                  t        ||||	            * }}|D ]  }|j                           |S c c}w )N)r  r  c                X     d fd}t         j                  |t                    S )Nc                0    t        j                        S r   r  )r   r   r  s    r   r  z8WelfordReduction.create.<locals>.const.<locals>.inner_fn/  s    || r   r  r   r:  r   rj   r  r  r   )r  r  r   r   r  s   ` r   constz&WelfordReduction.create.<locals>.const.  s2     ##!F|	 $  r   r   r?   c                X     d fd}t         j                  |t                    S )Nc                n    D cg c]  }t         j                  j                   }} | |      S c c}w r   r  )r   r   r  r  r1  s      r   r  z7WelfordReduction.create.<locals>.copy.<locals>.inner_fnG  s1    =M&Nuww||&NO&N!#77 'Or  r  r  r  )r  r  r   r   r  r1  s   ` r   copyz%WelfordReduction.create.<locals>.copyD  s2    8 !''!%<	 (  r   r  c              3  .   K   | ]  } |        y wr   r   )r   r   r  s     r   r   z*WelfordReduction.create.<locals>.<genexpr>U  s     :"T"X:   )r  rw  r   )r  r   r   r  )r  r  r   r  )rk   r   r   r  rf   r   r0  r  rS   rG  r  r   r   r  r  r  )r  r   r   r  r  r1  r  r3  rw  r  meanm2weightr  r  r	  r  r  r  s    `` ``            @r   r  zWelfordReduction.create  s    !FFFF''**33MBR4ST	 a8DqB1XFV##aL8  !11IaL)58U1X==:	:::&  **aL)+ + 	
e ]222!N19(( 	 	2 $Ah
   $""

 
   	AIIK	%
s   -Ec                     y)N)r   r   r   r   r  s     r   r  zWelfordReduction.default_value  s     r   c	                    t              t        j                  j                  j	                  t        j                  z  d             }	|	rH|dk7  rC	 	 	 	 	 	 	 	 d
fd}
 j                  ||d   t        |
d      t        |
d      f|d|      S t        dz
  z         t        j                  |t         fd|D              g |g||      }|D ]  }|j                           	 	 	 	 	 	 	 	 ddt        j                  j                  j                  t        |            } j                  ||      }t        j                  |t        fd	|D              |gd|      S )r  r   r  c                0    t        j                  |      S r   r  )r   r   r  r   s      r   r  z4WelfordReduction.create_multilayer.<locals>.constant  s     ||E511r   r  r?   )r   r   r  r  r1  r  r  r3  c           	   3  L   K   | ]  }j                  |d         yw)r   )r  N)r  )r   r  r  r  rw  r1  r  s     r   r   z5WelfordReduction.create_multilayer.<locals>.<genexpr>  s=      
  ++$# , 
s   !$c                     |g | |      S r   r   )r   r  r  s      r   intermediate_loader_fnzBWelfordReduction.create_multilayer.<locals>.intermediate_loader_fn  s    
 4E4O455r   c              3  T   K   | ]  }t        |j                                 ! yw))r  N)r   r  )r   r   r"  s     r   r   z5WelfordReduction.create_multilayer.<locals>.<genexpr>  s*       .q}}GG   %()r   r:  r   r:  r  r   r   rj   )r   r:  r  r:  r  rE  r   rj   )rf   rk   r   r   r  r   r  r  r   r<   r  r  r   r  r   r  )r  r   r   r  r  r1  r  r  r3  r  r  intermediatesr   rN  r  r"  rw  s   ` `  ` `      @@@r   r  z"WelfordReduction.create_multilayer  s     ((89((>>HH_u,a0
 
	 +<<2#24B2KN22
 ((aLHA.HA.
 !10- )   o;UC
(// 
 (
 
 feL#
&  	AIIK		6!	6+	6 9	6 		6 WW%%//f0EF
99:~
  && &  G
 	
r   N)r   rC  r   r5  r  Sequence[Callable[..., Any]]r  r  r1  r  r  rO   r3  rS   r   r  r  )r   rC  r   r5  r  r&  r  r  r1  r  r  rO   r  rw   r3  rS   r   r  )
r   r   r   r  rS   rG  r  r`  r  r  r   r   r   r  r    s    )6(=(=vv v 0	v
 v (v &v &v 
;v vp $/	, 
 Z
Z
 Z
 0	Z

 Z
 (Z
 &Z
 Z
 &Z
 
;Z
 Z
r   r  c                  b    e Zd ZU ded<   ded<   ded<   ded<   ded	<   d
ed<   ded<   ded<   dd fdZd  fdZ	 	 	 	 	 	 	 	 	 	 d!dZd"dZd#dZd#dZ	d#dZ
d$dZd%dZdddZeej                   fdd	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 d&d       Ze	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 d'd       Z xZS )(Scanr  scan_rangesr   =Callable[[tuple[Any, ...], tuple[Any, ...]], tuple[Any, ...]]r  zFCallable[[Sequence[_IntLike], Sequence[_IntLike]], Sequence[_IntLike]]r   rS   r3  r   r  tuple[torch.dtype, ...]dtypestuple[Callable[..., Any], ...]r  c                    t         |          t               j                  fd| j                  D         z   t               j                  fd| j
                  D         z  S )Nc              3  6   K   | ]  }t        |        y wr   r(   r  s     r   r   z,Scan.get_free_symbol_uses.<locals>.<genexpr>	       O"1m4Or  c              3  6   K   | ]  }t        |        y wr   r(   r  s     r   r   z,Scan.get_free_symbol_uses.<locals>.<genexpr>	       H"1m4Hr  )r  r  r:   r  r)  r   r7  s    `r   r  zScan.get_free_symbol_uses
	  sa    
 G(7 jl  Od>N>NO !jl  HdiiH		
r   c                    t        | j                        t        | j                        z   t        | j                        k(  sJ t        |           y r   )r   r  r)  r   r  rw  r  s    r   rw  zScan.__post_init__	  =    4;;#d&6&6"773tyy>IIIr   c                   | j                  ||      t        fd| j                  D              }t        j                  | j
                  | j                  |      }t        j                  |xs d |      || j                           S )Nc              3  .   K   | ]  } |        y wr   r   r   r  r   s     r   r   z'Scan.store_reduction.<locals>.<genexpr>$	       Dx}Dr  r  )	r   r   r  ri   rI  r,  r  r  r  )rq  r  r  r  	scan_varsr   r  r   s          @r   r=  zScan.store_reduction	  so     ll4+DT^^DD$++t?yy$9gclF4;L;L4M
 	
r   c                     y)Ncustomr   r  s    r   r  zScan.get_reduction_type*	  s    r   c                    | j                   S r   )r)  r  s    r   r  zScan.get_reduction_size.	  r  r   c                    | j                   S r   r   r  s    r   r   zScan.get_size1	      yyr   c                    | j                   S r   r  r  s    r   r  zScan.get_pointwise_size4	  r  r   c                X    t        | j                        t        | j                        z   S r   )r   r  r)  r  s    r   r@  zScan.index_length7	  !    4;;#d&6&6"777r   c                    | j                  | j                        }| j                  | j                  t        j                        }| j                  ||      }|fS r   )r  r  r)  r>   rB  r   rq  r   rC  r   s       r   r  zScan.inner_fn_args:	  E    DKK(T--t}}=ll5&)vr   c                    | j                  | j                        }| j                  | j                  t        j                        }| j                  ||      }t        | j                  ||      S r  )r  r  r)  r>   rB  r   rH   r  rq  rX  r   rC  r   s        r   r  zScan.inner_fn_free_symbols@	  Q    DKK(T--t}}=ll5&)#DMM3mTTr   T)can_fallback_to_atenc                  g |d  |dz   d  |   gt         j                  j                  |t        j                        sd gt        |      z  S t        |      dkD  r=t         j                  j                  |t        j                        sd gt        |      z  S t         j                  j                  }
|
j                  t                    }t        |      t        |      k(  sJ |
j                  t        j                  |d            r?t        t        |            D cg c]!  }t        j                  |||   ||   |      # c}S | j!                  ||d   |d   ||      \  }}t"        }|dkD  r[t$        j&                  j(                  d u xs t*        xr	 t,        dk\  xr t        |      dk(  }|s|rd gt        |      z  S d}nt.        }dfd}t        t        |            D cg c]0  }t0        j                   |d	|||   |||   ||||||d|	      2 }}|D ]  }|j3                           |S c c}w c c}w )
Nr?   r  r   )r   r   r  axispointwise_rangesr)  r  
scan_numelz3.3.0c                    t        |      t              k(  sJ t        |       t              k(  sJ g | d  || d  S r   r   )r   
scan_indexrK  rL  r)  s     r   r   zScan.create.<locals>.reindex	  S    z?c+&6666u:%5!6666>U5D\>J>tu>>r   )r   r   r,  r  r  r   r  r)  r  r   r3  r  )r   r:  rP  r:  r   r  r   )rk   r   rh  rB   SCANr   TUPLE_REDUCTIONr   r  rf   r  r   Ler   r  r  r  r(  r  versionhip
has_tritontriton_version	SplitScanr   r  )r  r   r,  r  r   rK  r  r3  rI  r   r   rM  r  r  	scan_typesupports_splitr   r  r  rL  r)  s        `             @@r   r  zScan.createF	  s    =T%4[<4q
+;<Dzlww""6>+>+>?6CK''v;?177#6#6N22$
 6CK''77##&&}['AB
6{c)n,,, ))%((:q*AB %*#f+$6 !   ! .&|4	 !   &)^^)q\-#!! &4 	&
"
 	>!!T)Wj.V^w=V%v;!#  "' 6CK//!"J%		?. !&c&k 2%
$ #  ! .!&|4'+ +)##1!- 
 
*  	FNN	 N
s   )&I85Ic	           
     N    dfd}	t         j                  ||||	||d|      S )Nc                ,     g | d  || d        S r   r   )r   r   rK  r  s     r   r  z#Scan.num_splits.<locals>.wrapper_fn	  s*    Fc%4jF=F3tu:FGGr   rI  )r   rv  r2  r  r  r1  r  rw  )r   r:  r   r:  r   rj   )r0  r  )
r  r   r   r  rK  rL  r)  r  rM  r  s
      ``     r   r  zScan.num_splits	  s;    	H ###(!& $ 	
 		
r   rO  r  r-  )
r  r>  r  z%Callable[[Sequence[_IntLike]], Never]r  r:  r9  r  r   r   r=  r9  rX  r  )r   rC  r,  r+  r  z+tuple[Callable[[Sequence[Expr]], Any], ...]r   r  rK  r   r  r*  r3  rS   rI  r   r   r   r   ;Sequence[Optional[Union[TensorBox, ShapeAsConstantBuffer]]])r   rC  r   r5  r  rE  rK  r   rL  r  r)  r  r  r*  rM  r!   r   r  )r   r   r   r   r  rw  r=  r  r  r   r  r@  r  r  r  rS   rG  r  r  r  r  s   @r   r(  r(    s   
MMSS!!##--
 
"
 7
 	

 $
 

 8U  )6(=(=_ &*__ (_ ?	_
 _ _ R_ &_ #_ _ 
E_ _B 

 
 7	

 
 (
 #
 R
 
 
(
 
r   r(  c                      e Zd Zy)rY  N)r   r   r   r   r   r   rY  rY  	  s    r   rY  c                  0    e Zd ZU ded<   ded<   ded<   ded<   ded	<   d
ed<   ded<   ded<   ded<   dd fdZd fdZ	 	 	 	 	 	 	 	 	 	 ddZd dZd!dZd!dZ	d!dZ
d"dZd#dZdddZeej                   f	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 d$d       Z xZS )%Sortr  sort_rangesr   z:Callable[[Sequence[Expr], Sequence[Expr]], Sequence[Expr]]r   rS   r3  r   r  r+  r,  r-  r  r   stable
descendingc                    t         |          t               j                  fd| j                  D         z   t               j                  fd| j
                  D         z  S )Nc              3  6   K   | ]  }t        |        y wr   r(   r  s     r   r   z,Sort.get_free_symbol_uses.<locals>.<genexpr>	  r0  r  c              3  6   K   | ]  }t        |        y wr   r(   r  s     r   r   z,Sort.get_free_symbol_uses.<locals>.<genexpr>	  r2  r  )r  r  r:   r  rb  r   r7  s    `r   r  zSort.get_free_symbol_uses	  s_    G(7 jl  Od>N>NO !jl  HdiiH		
r   c                    t        | j                        t        | j                        z   t        | j                        k(  sJ t        |           y r   )r   r  rb  r   r  rw  r  s    r   rw  zSort.__post_init__	  r4  r   c                ,   | j                  ||      t        fd| j                  D              }t        j                  | j
                  || j                  | j                        }t        j                  |xs d |      || j                           S )Nc              3  .   K   | ]  } |        y wr   r   r7  s     r   r   z'Sort.store_reduction.<locals>.<genexpr>	  r8  r  r  )
r   r   r  ri   sortr,  rc  rd  r  r  )rq  r  r  r  r>  r   r  r   s          @r   r=  zSort.store_reduction	  su     ll40DT^^DD$++vt{{DOOLyy$9gclF4;L;L4M
 	
r   c                     y)Nrk  r   r  s    r   r  zSort.get_reduction_type	  s    r   c                    | j                   S r   )rb  r  s    r   r  zSort.get_reduction_size	  r  r   c                    | j                   S r   r>  r  s    r   r   zSort.get_size	  r?  r   c                    | j                   S r   r  r  s    r   r  zSort.get_pointwise_size
  r  r   c                X    t        | j                        t        | j                        z   S r   )r   r  rb  r  s    r   r@  zSort.index_length
  rB  r   c                    | j                  | j                        }| j                  | j                  t        j                        }| j                  ||      }|fS r   )r  r  rb  r>   rB  r   rD  s       r   r  zSort.inner_fn_args
  rE  r   c                    | j                  | j                        }| j                  | j                  t        j                        }| j                  ||      }t        | j                  ||      S r  )r  r  rb  r>   rB  r   rH   r  rG  s        r   r  zSort.inner_fn_free_symbols
  rH  r   c	                   g |d  |dz   d  |   gt         j                  j                  |t        j                        sd gt        |      z  S t         j                  j                  }
|
j                  t                    }d}t        j                  j                  xr% |
j                  t        j                  ||            }|sd gt        |      z  S t        |      t        |      k(  sJ |
j                  t        j                  |d            r?t        t        |            D cg c]!  }t         j#                  |||   ||   |      # c}S dfd}t        t        |            D cg c]4  }t$        j#                  t'        d|||   |||   |||||||d|	      6 }}|D ]  }|j)                           |S c c}w c c}w )Nr?   r  r  c                    t        |      t              k(  sJ t        |       t              k(  sJ g | d  || d  S r   rO  )r   
sort_indexrK  rL  rb  s     r   r   zSort.create.<locals>.reindexB
  rQ  r   )r   r   r,  r  r  r   r  rb  r   r3  r  rc  rd  )r   r:  ru  r:  r   r  r   )rk   r   rh  rB   SORTr   r   r  rf   r@   r  persistent_reductionsr  r   rT  r   r  r  r   ra  r  )r  r   r,  r  r   rK  rc  rd  r3  r   r   
sort_numel
max_rblockis_persistent_kernelr  r   r  r  rL  rb  s        `            @@r   r  zSort.create
  s    =T%4[<4q
+;<Dzlww""6>+>+>?6CK''77##&&}['AB
 
MM// Q..uxx
J/OP 	 $6CK''6{c)n,,, ))%((:q*AB %*#f+$6 !   ! .&|4	 !  	?0 !&c&k 2'
& %  ! .!&|4'+ +##1!-!) 
 
,  	FNN	 Q
s   2&G99GrO  r  r-  )
r  r>  r  rG  r  r:  r>  r:  r   r   r=  r9  rX  r  )r   rC  r,  r+  r  z'tuple[Callable[[list[Expr]], Any], ...]r   r  rK  r   rc  r   rd  r   r3  rS   r   r   r   r^  )r   r   r   r   r  rw  r=  r  r  r   r  r@  r  r  r  rS   rG  r  r  r  s   @r   ra  ra  	  s+    
GG!!##--L	
 
"
 2
 	

 '
 

 8U  )6(=(=LL (L ;	L
 L L L L &L L 
EL Lr   ra  c                >    	 t        | d       y# t        $ r Y yw xY w)NFfreezeT)r?  r  r   s    r   r  r  c
  s&    a. s    	c                    	 t        | d      \  }}|j                         r|j                          |j                         S # t        $ r Y yw xY wNFr|  )r?  should_pad_stridespad_stridesis_contiguousr  )r   _bufferr>  s      r    is_contiguous_storage_and_layoutr  k
  sR    /%@ $$& ##%% s   ?A 	AAc                   t        | t              rt        | j                  |||||      S t        | t              r:t        | j                  |||||      \  }}| | j                  j                         fS t        | t              r|rn|r0| j                          | j                         j                         s>J || j                  ||       n&|| j                  ||       n| j                          t	        |       | j                         fS t        | t              r(t        | j                  |      \  }}|| j                  fS t        )z
    Try to simplify x into a StorageBox and a Layout.

    allow_padding only affect how we apply stride_order. When allow_padding
    is True, we have the freedom to add padding when applying the stride_order.
    r}  want_contiguousstride_orderr  r  r  r|  )r   r   r?  r=  
StorageBoxr  Bufferr  r  r  r  r]  rC  r>  r  )	r   r}  r  r  r  r  r   r>  buffers	            r   r?  r?  w
  sG    !Y$FF+%''
 	
 !Z )FF+%''
	6 !&&##%%%!V!||~33555)11  2  *22! 3  !!}alln,,!_% *FF
	 qxx
r   c                d    	 t        | d      \  }}|j                  |      S # t        $ r Y yw xY wr  )r?  is_stride_orderedr  )r   r  r  r>  s       r   "is_stride_order_storage_and_layoutr  
  s:    /%@''55 s    # 	//c                   t        | t        t        f      rt        | j                        S t        | t
              rt| j                  }t        j                  j                  j                  |j                  t        |j                        z  t               }t        | j                        xs |S t        | t              r*| j!                         t        j                  j"                  v S yr  )r   r   r  is_unalignedr=  rC  r>  rk   r   r   statically_known_multiple_ofrA  r]   r   r_   r  r  unaligned_buffers)r   r>  has_unaligned_layouts      r   r  r  
  s    $J/0DII&&$(#$77#3#3#P#PMMN6<<88/$
  
 DII&>*>>$}}!''";";;; r   c                      e Zd ZU ded<   dddZddZddZddZedd       Z	ddZ
d d	Zd!d
Zd"dZd#dZd$dZd%dZd&dZd'dZd(dZd%dZd%dZd)dZd*dZd+dZd,dZy)-BaseViewr   r=  c                8    | j                   j                  |      S r   r=  r  r  s     r   r  zBaseView.get_free_symbol_uses
  s    yy--m<<r   c                    t        d|        )Nzmake_reindexer NYI on rj  r  s    r   make_reindexerzBaseView.make_reindexer
  s    !$:4&"ABBr   c                l    | j                   j                         | j                         dfd}|S )Nc                       |             S r   r   r   innerr   s    r   r  z&BaseView.make_indexer.<locals>.indexer
      &&r   )r   r:  r   r!   )r=  r  r  )rq  r  r  r   s     @@r   r  zBaseView.make_indexer
  s/    		&&(%%'	' r   c                l    | j                   j                         | j                         dfd}|S )Nc                       |             S r   r   r  s    r   r  z$BaseView.make_loader.<locals>.loader
  r  r   r  )r=  r  r  )rq  r  r  r   s     @@r   r  zBaseView.make_loader
  s/    		%%'%%'	' r   c                6    | j                   j                         S r   )r=  r  r  s    r   r   zBaseView.dtype
  s    yy""$$r   c                6    | j                   j                         S r   r=  r  r  s    r   r  zBaseView.get_layout
      yy##%%r   c                6    | j                   j                         S r   r=  r  r  s    r   r  zBaseView.get_device
  r  r   c                     y r   r   r  s    r   r  zBaseView.get_origin_node
  r  r   c                6    | j                   j                         S r   r=  r  r  s    r   r  zBaseView.get_name
      yy!!##r   c                "    | j                         S r   r  r  s    r   r  zBaseView.get_pointwise_size
      }}r   c                8    | j                   j                  |      S r   r=  r  r  s     r   r  zBaseView.mark_reuse
      yy##E**r   c                6    | j                   j                         S r   r=  r  r  s    r   r  zBaseView.has_exceeded_max_reads      yy//11r   c                6    | j                   j                         S r   r=  r  r  s    r   r  zBaseView.realize      yy  ""r   c                8    | j                   j                          y r   r=  r  r  s    r   r  zBaseView.realize_hint  s    		 r   c                6    | j                   j                         S r   r=  r  r  s    r   r  zBaseView.get_storage_numel
      yy**,,r   c                6    | j                   j                         S r   r=  r   r  s    r   r   zBaseView.is_extern      yy""$$r   c                    t        | j                  t              sJ t        | j                               | j                  j	                         S r   )r   r=  r  r   is_module_bufferr  s    r   r  zBaseView.is_module_buffer  s6    $))X.?TYY?.yy))++r   c                6    | j                   j                         S r   r=  r  r  s    r   r  zBaseView.get_read_names      yy''))r   c                    t        j                  t        dd      5  t        | j	                         | j                               j                  cd d d        S # 1 sw Y   y xY wr  )r    r   r  rJ   r  r   r  r  s    r   r  zBaseView.get_reads  sL    \\.*:DA 	&  " e		 	 	s   2AA!c                d    | }t        |t              r|j                  }t        |t              r|S r   )r   r  r=  )rq  r   s     r   r  zBaseView.unwrap_view  s+    H%A H%r   c                    | j                         } t        j                  t        d|      |      }t	        || j                         || j                               S r  )r  r    r   r  r  r  r   r  s      r   r$  zBaseView.constant_to_device$  sP    !!#Hn.?HP.."==?	
 	
r   NrO  r  r   *Callable[[Sequence[Expr]], Sequence[Expr]]rF  rD  r4  r6  r@  r0  rJ  r9  rM  r8  r=  r-  rY  r.  rW  rN  r[  )r   r   r   r   r  r  r  r  rc  r   r  r  r  r  r  r  r  r  r  r  r   r  r  r  r  r$  r   r   r   r  r  
  s    
L=C % %&&$+2#!-%,*	
r   r  c                  P    e Zd ZU ded<   edd       Zed	d       Zd
dZ	 	 ddZ	y)r   r:  r   c                :   t         j                  j                  }|D cg c]  }t        j                  |       }}| j                         }dgt        |      t        |      z
  z  t        |      z   }t        |      t        |      k(  sJ t        t        |            D ]r  }||   dk(  r||   J ||   ||<   ||   ,t         j                  j                  j                  ||         rM|j                  ||   ||   z
  d      dk(  rmJ d        |S c c}w )zReplace `-1` with correct sizesNr'  r   fallbackzKBroadcast failed in ExpandView({x.get_size()}, {new_size}) on dimension {i})rk   r   r   r   r  r   r   r   r   is_size_one_or_falser   )r   new_sizer   r   old_sizer   s         r   _normalize_sizezExpandView._normalize_size4  s#    77##-56ELLO66::<6S]S]:;d8nL8}H---s8}% 	A{b {...&qk!$(8(8(M(M)   ))(1+*Ca)PTUU aU	" + 7s   Dc                   | j                  ||      }t        |      rt        |      \  }}t        |      t        |j                        z
  }|dk\  sJ t
        j                  j                  g|z  }t        |j                  |j                        D ]Y  \  }}|j                  t        j                  j                  j                  |      s|nt
        j                  j                         [ t        |j                   |j"                  t%        |      ||j&                  |j(                        }	t+        ||	      S t-        ||      S )Nr   r<  )r=  r   )r  r  r?  r   r   r   r  r  r   r   r  rk   r   r   r  r@  r   r   r   rA  rB  rC  r   )
r  r   r  rF  rG  skiprH  r   r   rI  s
             r   r  zExpandView.createO  s   &&q(3 #"7":GZx=3z#77D199'',,$.J #J$5$5z G !!77++@@F  %!!  X!!$$J #
CCqx00r   c                    | j                   S r   r>  r  s    r   r   zExpandView.get_sizej  r?  r   c                    | j                         }| j                  j                         t        |      t              z
  	 	 	 	 dfd}|S )Nc                    t        | d        } t        |       t              k(  sJ t        t                    D ](  }|   dk(  st        j                  j
                  | |<   * | S r  )r   r   r   r   r  r  )r   r   actualr  s     r   r   z*ExpandView.make_reindexer.<locals>.reindext  sf     tu&Eu:V,,,3v;' ,!9>$ww||E!H, Lr   r   r:  r   r:  )r   r=  r   )rq  targetr   r  r  s      @@r   r  zExpandView.make_reindexerm  sP     ##%6{S[(		!				 r   N)r   r   r  rI  r   rI  )r   r   r  rI  r   r  r9  r  )
r   r   r   r   r`  r  r  r  r   r  r   r   r   r   r   0  s@    
 4 1 14	3r   r   c                  P    e Zd ZU ded<   edd       Zed	d       Zd
dZ	 	 ddZy)PermuteViewr  dimsc           
        | j                  |      }t        |      t        t        t        |                  k(  sJ t	        |      rt        |      \  }}t        |j                  |j                  |D cg c]  }|j                  |    c}|D cg c]  }|j                  |    c}|j                  |j                        }t        ||      S t        ||      S c c}w c c}w )Nr<  )r=  r  )_map_neg_dimsr:   r   r   r  r?  r@  r   r   r   r   rA  rB  rC  r  )r  r   r  rF  rG  r   rI  s          r   r  zPermuteView.create  s      &$:eCI.>#???? #"7":GZ$!!  -12#2/34!""1%4!!$$J #
CC-- 34s   5CC#c                R    |D cg c]  }|dk\  r|nt        |      |z    c}S c c}w r  rO  )r  r  r3  s      r   r  zPermuteView._map_neg_dims  s)    @DEsaxSY_4EEEs   $c                   t        | j                  | j                              t        t        t	        | j                                    k(  sJ | j
                  j                         }| j                  D cg c]  }||   	 c}S c c}w r   )r:   r  r  r   r   r=  r   )rq  r   r   s      r   r   zPermuteView.get_size  so    $,,TYY78J#dii.!=
 
 	
 
 yy!!#!%+AQ+++s   7Bc                L   t        | j                        D ci c]  \  }}||
 c}}t        t        | j                              D cg c]  }|   	 c}t	              t	        t        t        | j                                    k(  sJ 	 	 	 	 dfd}|S c c}}w c c}w )Nc                4    D cg c]  }| |   	 c}S c c}w r   r   )r   r   invs     r   r   z+PermuteView.make_reindexer.<locals>.reindex  s     '**E!H***s   r  )r   r  r   r   r:   )rq  r   r  r   r  s       @r   r  zPermuteView.make_reindexer  s     !*$)) 451q!t5$S^45!s1v5#*U3tyy>-B"CCCC	+!	+	+
  65s   BB!N)r   r   r  rQ  r   r  )r  rQ  r   	list[int]r9  r  )	r   r   r   r   r  r  r  r   r  r   r   r   r  r    sB    
. .$ F F,	3r   r  c                  F    e Zd Zedddd       Ze	 	 	 	 dd       ZddZy)	SqueezeViewN)r3  c          	        t        |      r+t        |      \  }}g }g }|?t        |t              sJ t	        |             d|k  r|t        |j                        k  sJ t        t        |j                  |j                              D ]g  \  }\  }}	|)|dk7  s|j                  |       |j                  |	       4||k7  r#|j                  |       |j                  |	       \|dk(  rbJ d        t        |j                  |j                  |||j                  |j                        }
t!        ||
      S |8t"        j%                  ||j'                         D cg c]
  }|dk7  s	| c}      S |j'                         |   dk(  sJ t"        j%                  |t        |j'                               D cg c]  \  }}||k7  s| c}}      S c c}w c c}}w )Nr   r?   zexpected squeezed size to be 1r<  )r  r?  r   r   r   r   r   r   r   r   r  r@  r   r   rA  rB  rC  r  r  r   )r  r   r3  rF  rG  r  rH  r   r   r   rI  r   s               r   r  zSqueezeView.create  s    #"7":GZHJ!#s+6T#Y6+CxC#joo*>$>>>%.s:??JDUDU/V%W 
K!>D&;qy -"))&1Cx -"))&1#qyJ*JJy
K %!!  !!$$J #
CC;;;qajjl"Ea1f1"EFF::<$)));;q1::<1H"UAAQTH1"UVV #F #Vs   
G
'G
6GGc                    | D cg c]
  }|dk7  s	| }}t        |       D cg c]  \  }}|dk7  s| c}}t        |       dfd}||fS c c}w c c}}w )Nr?   c                    t        |       t              k(  sJ |  d        t        j                  j                  gz  }t	        |       D ]
  \  }}|||<    t        |      S )N )r   r   r  r  r   r   )r   r  r   r   lengthnot_ones       r   r   z%SqueezeView.squeezer.<locals>.reindex  sk    u:W-C%'/CC-/Igu- #Q!"	####r   )r   r:  r   ztuple[Expr])r   r   )r   r   r  r   r   r  r  s        @@r   squeezerzSqueezeView.squeezer  s_      $.!qAvA..!*4;AAF1;T	$    /;s   
AAAAc                    t        d      )Nzuse SqueezeView.create())AssertionError)rq  r=  s     r   r  zSqueezeView.__init__  s    788r   )r   r   r3  rL  r   r   )r   r:  r   z9tuple[list[int], Callable[[Sequence[Expr]], tuple[Expr]]])r=  r   r   r   )r   r   r   r  r  r`  r  r  r   r   r   r  r    sC    7; $W $WL !!	B! ! 9r   r  c                  l    e Zd ZU ded<   ded<   	 	 ddZddZddZeZe	 	 	 	 	 	 	 	 dd       Z	dd	Z
y
)GenericViewr:  r   r  r   c                    | j                   S r   )r   r  s    r   r  zGenericView.make_reindexer  s     ||r   c                   t        t        | j                              D cg c]  }t        t        j
                  |       }}t        | j                  |            }ddj                  t        t        |             d| S c c}w )Nzlambda , r  )r   r   r   re   r>   r  r   r   r  r  r   )rq  r  	index_old	index_news       r   reindex_strzGenericView.reindex_str  ss    CHTYYCX
>?*4::q9
	 
 i01	3sI#6789+FF	
s   !Bc                z    | j                  | j                  d| j                   d| j                          g      S )Nsize=zreindex=)r  r=  r   r  r  s    r   r  zGenericView.__str__  s=    YY%		{+x8H8H8J7K-LM
 	
r   c                *     | |t        |      |      S )Nr=  r   r   )r   )r  r   r  r   s       r   r  zGenericView.create	  s     X@@r   c                    | j                   S r   r>  r  s    r   r   zGenericView.get_size  r?  r   Nr  rJ  )r   r   r  r:  r   r  r   r  r9  )r   r   r   r   r  r  r  r  r  r  r   r   r   r   r  r    sp    
77	3
G

 HAA !A <	A
 
A Ar   r  c                      e Zd Zedd       Zedd       Ze	 	 	 	 	 	 d	d       Ze	 d
	 	 	 	 	 	 	 dd       Ze	 d
	 	 	 	 	 	 	 dd       Z	y)r  c                    t        j                  |       } t        j                  |      }t        j                  j                  j
                  j                  } |t        j                  | d            r| |z   } | S r  )r   r  rk   r   r   r   evaluate_exprLt)r   r   r  s      r   handle_negative_indexzView.handle_negative_index  sZ    ll3||D!((22@@#q)**C
r   c                  	 t        |t              sJ t        |             | j                  |j	                         |      \  	}t
        j                  j                  j                  	|      r|S d}t        t        	            dkD  st        t        |            dkD  rd}d|v rd	fd} | |t        |      |      S t        |      s|r|r t        |      st        j                  |      }t        |d      \  }}t!        |j"                  |j$                  |t&        j)                  |      |j*                  |j,                        }t/        ||      S | j1                  	|      } | |t        |      |      S )	NFr   Tc                4    t        dgt              z        S r  )r   r   )r   r  s    r   fake_reindexz!View.create.<locals>.fake_reindex3  s    aS3x=011r   r  )r  r<  )r   r   r   ztuple[int, ...])r   r   r   resolve_negative_sizer   rk   r   r   statically_known_list_equalsr   r3   r   r  r  require_contiguousr?  r@  r   r   r  r  rA  rB  rC  r  )
r  r   r  unbacked_symbols_in_sizesr  rF  rG  rI  r   r  s
            @r   r  zView.create!  sZ   (H-=tH~=- 66qzz|XN( 77888LH$)!%h/014(23a7(,%=2 ADNLII-a04M(2RST2U !33A6"74"PGZ$!!  11(;!!$$J #
CC--hAX@@r   c                F   |D cg c]+  }t         j                  j                  j                  |      - }}| D cg c]+  }t         j                  j                  j                  |      - } }t	        |      }t        t        |            D ]J  }||   dk(  st        j                  j                  ||<   t        t        |       t        |            ||<    n t         j                  j                  j                  t        |       t        |             | |fS c c}w c c}w )Nr'  )rk   r   r   r  r   r   r   r   r  Oner;   rf   check_equals)r  r  r   r   s       r   r  zView.resolve_negative_sizeO  s     ;CCQAGG$$--a0CC:BCQAGG$$--a0CC>s8}% 	A{b #ggkk&}X'>h@WX		 	
%%mH&=}X?VW!! DCs
   0D0DNc                    	 | j                  |||      }|S # t        t        f$ r@ t        |      g}| j                  ||      }| j                  ||      }t	        ||      }Y |S w xY wr   )_dynamic_reshape_indexerr  
IndexErrorrf   r   )r  r  r  	dense_dimr   flatr   r   s           r   r  zView.dynamic_reshape_indexer`  sz    	:228XyQG  
+ 	:!(+,D33HdCH33D(CH%h9G	:s    AA&%A&c                t   t         j                  j                  j                  }t	        t        |            D cg c]  }t        t        j                  |       c}t        t        |            }t        |       }|duxr! |t        |      dz
  k7  xr t        |      dk(  }|r&|J |j                  |      }|j                  |       g |r=|r:|j                         }	|j                         \  }
}|	dk(  r>j                  t        j                  j                         |j                  |
|f       n|dk(  r|j                  |	       n ||       ||	      k(  r=j                  |
       t         j                  j                  j!                  ||	       nh ||       ||	      k  r ||       ||	      k  r2|j                         \  }}||z  |
z   }
||z  } ||       ||	      k  r2j                  |
       t         j                  j                  j!                  ||	       n ||       ||	      kD  rt        j                  j"                  }|	}j                  t%        |
||             ||z  } ||       ||	      kD  rH|j                         }j                  t%        |
||             ||z  }|	|z  }	 ||       ||	      kD  rHt         j                  j                  j!                  ||	       nt&        |r|r:|rf|j                         }	t         j                  j                  j!                  |	d       j                  t        j                  j                         |rf|r@|j                         \  }
}t         j                  j                  j!                  |d       |r@|At        |      dk(  r3j)                          j                         }j+                  ||       nj)                          t              t        |       k(  sJ 	 	 	 	 dfd}|S c c}w )zG
        Perform a reshape entirely by modifying indexing math
        Nr?   c                    t        |       t              k(  sJ t        |       t              f       t        t        |             t        fdD              S )Nc              3  6   K   | ]  }t        |        y wr   )rg   )r   r   replacementss     r   r   zAView._dynamic_reshape_indexer.<locals>.reindex.<locals>.<genexpr>  s     HA|4Hr  )r   r   r   r   )r   r  r  	view_exprs    @r   r   z.View._dynamic_reshape_indexer.<locals>.reindex  sO     u:T*CSZT,CC*D% 01LHiHHHr   r  )rk   r   r   r   r   r   re   r>   VIEWr   r   r  r  r   r  r  r
  r	  r=   r  reverseinsert)r  r  r  r   r   	stack_new	stack_oldreordering_dense_dimold_dimsize_oldvarsize_newvar2	size_new2divisormodulus
dense_exprr   r  r  s                     @@r   r  zView._dynamic_reshape_indexerq  s    GG$$..	 CHHBV
=>*499a8
 T8,-	N	 T! #S^a//#H" 	
  (((mmI.GW%	I }}H%MMOMC1}  .  #x1Q  *8$	((;;  %  --hA8$y'::)Ih,??&/mmoOD)/C/C')3H  )Ih,??   %  --hA8$y'::''++"  gw!GH!G+)Ih,??'mmoG$$_S'7%KL%/G''1H	  )Ih,??
   --hA$$= I@  }}HGG))(A6UWW\\* 
 %MMOMCGG))(A6   S]a%7"JY
39~X...	I!	I	I [
s   !P5)r   r!   r   r!   r   r!   )r   r   r  r:  r   r   )r  r:  r  r:  r   ztuple[list[Expr], list[Expr]]r   )r  rI  r  rI  r  rL  r   &Callable[[Sequence[_T]], Sequence[_V]])r  r:  r  r:  r  rL  r   r  )
r   r   r   r`  r   r  r  r  r  r  r   r   r   r  r    s      +A +AZ " ",:"	&" "  
 $(	$ % !	
 
0    $(X X X !X 
4	X Xr   r  c                       e Zd ZU dZded<   d fdZddZeZddZddZ	ddZ
edd	       Zdd
ZddZddZddZddZddZ	 d	 	 	 ddZdddZd dZ xZS )!rC  z*Pretend our storage has a different layoutr  r>  c                    t         |           t        | j                  t              r0t
        j                  | d| j                  j                                y y )Nr=  )r  rw  r   r=  r  r   rp  r  r  s    r   rw  zReinterpretView.__post_init__  s@    dii*tVTYY-B-B-DE +r   c                P    | j                  | j                  | j                  g      S r   )r  r=  r>  r  s    r   r  zReinterpretView.__str__  s&    		
 	
r   c                6    | j                   j                         S r   r  r  s    r   r  zReinterpretView.get_name  r  r   c                .    | j                   j                  S r   )r>  r   r  s    r   r  zReinterpretView.get_device  s    {{!!!r   c                     y r   r   r  s    r   r  zReinterpretView.get_origin_node  r  r   c                .    | j                   j                  S r   )r>  r   r  s    r   r   zReinterpretView.dtype  s    {{   r   c                @    t        | j                  j                        S r   )r   r>  r   r  s    r   r   zReinterpretView.get_size  s    DKK$$%%r   c                @    t        | j                  j                        S r   )r   r>  r   r  s    r   r%  zReinterpretView.get_stride  s    DKK&&''r   c                     d fd}|S )Nc                T   j                   j                         }t        j                  j	                          ||             }j                   j
                  j                  j
                  k7  r5t        j                  |j
                  j                  j
                        S |S r   )r>  r  ri   loadr  r   r=  to_dtype_bitcast)r   r  
tmp_loaderrq  s      r   r  z+ReinterpretView.make_loader.<locals>.loader  sp    kk..0G$--/75>BJ{{  DIIOO3++J

DIIOOTT!!r   r   r:  r   rj   r   rq  r  s   ` r   r  zReinterpretView.make_loader  s    	" r   c                6    | j                   j                         S r   )r>  r  r  s    r   r  zReinterpretView.make_indexer       {{''))r   c                    | j                   S r   r>  r  s    r   r  zReinterpretView.get_layout  r  r   c                     y r   r   r  s    r   r  zReinterpretView.freeze_layout  r  r   c                    t        | j                  j                  |      t        | j                  j                  |      z  t        | j                  j                  |      z  S r   )r)   r>  r   r   rA  r  s     r   r  z$ReinterpretView.get_free_symbol_uses	  sQ     T[[--}=t{{11=ABt{{11=AB	
r   c                t   t         j                  j                  j                  | j                  | j
                  j                  | j
                  j                  | j
                  j                  ||j                  n#t         j                  j                  j                  | j
                  j                        S r  )rk   r   wrapper_codecodegen_reinterpret_viewr=  r>  r   r   rA  	writeliner   r  s     r   r  z!ReinterpretView.codegen_reference  s     ww##<<IIKKKKKK & 2F8L8L8V8V++## = 
 	
r   c                     yr  r   r  s    r   r  zReinterpretView.num_reads      r   r-  rJ  r@  r0  r4  r9  rD  rF  r6  rO  rZ  r   r?  rX  )r   r   r   r]  r   rw  r  r  r  r  r  rc  r   r   r%  r  r  r  r  r  r  r  r  r  s   @r   rC  rC    s    4NF

 H$" ! !&(	* %*
!
	!

r   rC  c                  \    e Zd ZU dZded<   ed
d       ZddZeZe	dd       Z
ddZddZy	)	DtypeViewz(Pretend our storage has a different typer5  target_dtypec                    t        |      r]t        |      \  }}t        |j                  ||j                  |j
                  |j                  |j                        }t        ||      S t        ||      S )Nr<  )r=  rC  )
r  r?  r@  r   r   r   rA  rB  rC  rB  )r  r   	new_dtyperF  rG  rI  s         r   r  zDtypeView.create)  sm     #"7":GZ$!!!!!!$$J #
CCai88r   c                P    | j                  | j                  | j                  g      S r   )r  r=  rC  r  s    r   r  zDtypeView.__str__8  s     		4+<+<=>>r   c                    | j                   S r   )rC  r  s    r   r   zDtypeView.dtype=  s       r   c                6    | j                   j                         S r   r=  r   r  s    r   r   zDtypeView.get_sizeA  r  r   c                L      j                   j                         d fd}|S )Nc                z    t        j                   |       j                  j                  j                        S r   )ri   r1  rC  r=  r   )r   r  rq  s    r   r  z%DtypeView.make_loader.<locals>.loaderG  s*    ''c
D4E4EtyyWWr   r  r=  r  )rq  r  r  s   ` @r   r  zDtypeView.make_loaderD  s"    		%%'	X r   N)r   r   rE  r5  r   r  rJ  r4  r9  rD  )r   r   r   r]  r   r  r  r  r  rc  r   r   r  r   r   r   rB  rB  #  sE    29 9? H! !$r   rB  c                  d    e Zd Ze	 	 	 	 	 	 	 	 	 	 dd       Ze	 	 d	 	 	 	 	 	 	 	 	 	 	 	 	 dd       Zy)	SliceViewc                l   	
 t         j                  j                  
|j                         |   t	        d ||fD              r!t
        j                  	t
        j                  n
j                  	
j                  d	
fd	 	 	 	 	 	 	 	 	 	 d fd} ||dd      } |||      }||fS )zz
        Normalize start and end such that both are in the range
        [0, x.get_size()[dim]] and start <= end.
        c              3  2   K   | ]  }t        |        y wr   )r3   r   r   s     r   r   z0SliceView.normalize_start_end.<locals>.<genexpr>Y  s     HA$Q'H   c                    j                  | |      r| n | |      }j                  ||      r|}|S  ||      }|S r   )statically_known_geqr-  )r   lowerupperclamped_lowerclamped_fullmax_funcmin_funcr   s        r   clampz,SliceView.normalize_start_end.<locals>.clamp`  s`    221e<(1eBT 
 00F  
   mU3 
  r   c                D    | |S j                  |       }  | ||      S r   )r   )r  rU  rV  r  r[  r  dim_sizes       r   
clamp_wrapz1SliceView.normalize_start_end.<locals>.clamp_wrapk  s0     {++C:CeU++r   r   )r   r!   rU  r   rV  r   r   r!   )
r  zUnion[int, None]rU  r   rV  r   r  Union[Expr, int]r   r_  )
rk   r   r   r   r   r   MinMaxevaluate_minevaluate_max)r  r   r3  startendr^  r[  r]  rY  rZ  r   s   `     @@@@@r   normalize_start_endzSliceView.normalize_start_endN  s     77##::<$H%h1GHHyyHyyH,,H,,H		 	,!	,*-	,69	,DT	,	, 5!Xq1eXx8czr   c           	        t        j                        t        t              sdkD  sJ        	 dk(  r|dk\  rdk(  r|S t        |j                               |r| j                  ||      \  }t        |z
  dz
  z         <   t        |      rt        |      \  }}t        |j                        }	|	   z  |	<   t        |j                  |j                  |	|j                  |j                     z  z   |j                         }
t#        ||
      S 	 	 	 	 dfd}t%        ||      S # t        $ r Y w xY w)Nr   l    r?   r<  c                    t        |       t              k(  sJ d|  d        t        |       } |    z  z   | <   | S )Nzwrong ndim r  )r   r   )r   r3  r  rd  steps    r   r   z!SliceView.create.<locals>.reindex  sR     u:X.P+eWAhZ0PP.KEsd*U2E#JLr   r  r  )r   r  r   r!   	TypeErrorr   r   rf  r<   r  r?  r   r@  r   r   rA  rB  rC  rN  )r  r   r3  rd  re  ri  r[  rF  rG  rH  rI  r   r  s     `` `      @r   r  zSliceView.createx  sk    ||D!$%7471	zcY.419 

%
 00CDJE3 uq!94@ #"7":GZj//0J(o4JsO$!!  !!J$5$5c$:U$BB$$J #
CC	!		 	 ah@@K  		s   D> >	E
EN)
r   r   r3  r   rd  r   re  r   r   ztuple[int, int])r?   T)r   r   r3  r   rd  r   re  r   ri  r   r[  r   r   r   )r   r   r   r  rf  r  r   r   r   rN  rN  M  s    '' '),'36'	' 'R  3A3A 3A 	3A
 3A 3A 3A 
3A 3Ar   rN  c                  B    e Zd ZU ded<   ded<   d
dZddZddZddZy	)BaseConstantr5  r   rC  r   c                     yNr   r   r  s    r   r   zBaseConstant.get_size  s    r   c                    | j                   S r   r  r  s    r   r  zBaseConstant.get_device  r  r   c                     y r   r   r  s    r   r  zBaseConstant.get_origin_node  r  r   c                    t               S r   r9   r  s    r   r  zBaseConstant.get_reads  r}  r   Nr9  r@  r0  rW  )r   r   r   r   r   r  r  r  r   r   r   rl  rl    s"    r   rl  c                  D    e Zd ZU ded<   ded<   ded<   ddZddZdd	Zy
)Constantr   r  r5  r   rC  r   c                     d fd}|S )Nc                X    t        j                  j                  j                        S r   )ri   r  r  r   r   rq  s    r   r  z$Constant.make_loader.<locals>.loader  s    <<

DJJ77r   r3  r   r4  s   ` r   r  zConstant.make_loader  s    	8 r   c                     y r   r   r  s    r   r  zConstant.realize  r  r   c                F    t        | j                  | j                  |      S )N)r  r   r   )rs  r  r   r  s     r   r$  zConstant.constant_to_device  s    djj

6JJr   NrD  r=  r[  )r   r   r   r   r  r  r$  r   r   r   rs  rs    s#    JKr   rs  c                  <    e Zd ZU ded<   ded<   ded<   d
dZddZy	)IndexingConstantr   r   r5  r   rC  r   c                     d fd}|S )Nc                X    t        j                  j                  j                        S r   )ri   r  r   r   rv  s    r   r  z,IndexingConstant.make_loader.<locals>.loader  s    >>$**djj99r   r3  r   r4  s   ` r   r  zIndexingConstant.make_loader  s    	: r   c                F    t        | j                  | j                  |      S )N)r   r   r   )rz  r   r   r  s     r   r$  z#IndexingConstant.constant_to_device  s    djj

6RRr   NrD  r[  )r   r   r   r   r  r$  r   r   r   rz  rz    s    JSr   rz  c                L   d}d}t        t        t        ||                   D ]  \  }}|dk(  rt        j                  j
                  j                  ||      s,t        j                  j
                  j                  ||      s y|t        j                  d|      z  }||z  } yNr?   FT)	reversedr   r   rk   r   r   r.  r   ra  )r   r2  expected_strideexpected_stride_maxr   ys         r   is_contiguous_strides_for_shaper    s     Os5&123 
16ww77
''""::1>QRuyyA.1
 r   c                <    t         j                  | j                  z  S r   )r@   padding_alignment_bytesitemsizer  s    r   get_align_for_dtyper    s    ))U^^;;r   c                  2    e Zd ZdZddZddZ	 d	 	 	 d	dZy)
r  zxAbstract base for Layout, MultiOutputLayout, NoneLayout.
    Represents the memory layout of the output of an Operation.c                >    t        t        |       j                        r   r  r  s    r   r  zOutputSpec.get_device   r  r   c                >    t        t        |       j                        r   r  r  s    r   storage_sizezOutputSpec.storage_size  r  r   c                >    t        t        |       j                        r   r  r  s     r   r  zOutputSpec.get_free_symbol_uses  r  r   Nr@  rX  rO  rZ  )r   r   r   r]  r  r  r  r   r   r   r  r    s,    C77 %*7!7	!7r   r  c                     e Zd ZdZd ed      df	 	 	 	 	 	 	 	 	 	 	 	 	 ddZddZeZddZddZ	dd	Z
e	 	 	 	 	 	 dd
       ZddZddZddZe	 	 	 	 	 	 	 	 dd       ZddZddZddZd dZd!dZd"dZ	 d#	 	 	 d$dZy)%r  zo
    Layout base class

    Carries tensor meta-information including offset and
    whether it is pinned.
    Nr   Fc                F   |t         j                  |      }|| _        || _        t	        |      t	        |      k(  sJ d| d|        t        d |D              sJ || _        || _        || _        || _	        | j                  r| j                  j                  dk(  sJ y y )Nr  	, stride=c              3  H   K   | ]  }t        |t        t        f        y wr   )r   r!   r   r   s     r   r   z"Layout.__init__.<locals>.<genexpr>#  s     <!:a$-<    "r  )r  r  r   r   r   r   r   r   rA  rB  r   )rq  r   r   r   r   rA  rB  s          r   r  zLayout.__init__  s     >#66t<F
4yCK'H5ix)HH'<t<<<<	"NN(8(8E(ABB(ANr   c                   d}| j                   dk7  rd| j                    }| j                  j                  dnd| j                  j                   }d}| j                  rd| j                   }t	        |       j
                   d| j                  j                   | d| j                   d| j                   d	| j                   | | d
S )Nr  r   z	, offset=:z, is_pinned=z('z', z, size=r  r  )	rA  r   r   rB  r   r   r   r   r   )rq  rA  device_index_stris_pinned_strs       r   r  zLayout.__str__+  s    ;;! .F!%!2!2!:2!DKKDUDUCV@W>>*4>>*:;MDz""#2dkk&6&6%78H7ITZZL YII;i}VH]O1N	
r   c                    | j                   S r   r  r  s    r   r  zLayout.get_device;  r  r   c                
   t         j                  5  t        j                  t	        | j
                        t	        | j                        | j                  | j                  | j                        cd d d        S # 1 sw Y   y xY w)N)r   r   
pin_memory)
rk   	fake_moder  r	  rY   r   r   r   r   rB  r  s    r   get_examplezLayout.get_example>  sY    [[ 	&&'		2'4jj{{>>	 	 	s   AA99Bc                B    t        | j                  | j                        S r   )r  r   r   r  s    r   r  zLayout.is_contiguousH  s    .t{{DIIFFr   c                    t        |       }|dvs| d   dk(  ryt        |t        |       |       D ]  \  }}}|dk7  s||k7  s y y)N)r      r?   FT)r   r   r-   )r2  rE  ndimleftrightr   s         r   is_channels_last_contiguousz"Layout.is_channels_last_contiguousK  sa     5zvqQ!$3E:E"
 	D% qyTU]		
 r   c                    t        | j                  t        t        j	                  t        t        | j                                          | j                        D ]  \  }}}|dk7  s||k7  s y yr  )r   r   r  r  r  r   r   )rq  r  r  r   s       r   is_transposedzLayout.is_transposedY  sc    !$KK^66tHTYY<O7PQRII"
 	D%
 qyTU]	 r   c                   t        | j                        t        |      k(  sJ t        | j                        D cg c]5  \  }}t        j
                  j                  j                  |d      dk7  r|7 }}}|D cg c]  }| j                  |    }}|D cg c]  }||   	 }}d	d} ||      }dgt        |      z  }t        t        |            D ]  }||   |||   <    t        t        |      dz
        D ][  }||   ||dz      kD  }t        |t              s7t        j
                  j                  j                  ||   ||dz      kD  d      }|s[ y yc c}}w c c}w c c}w )
Nr   r  r?   c                `    t        |       }| D cg c]  }|j                  |       c}S c c}w r   )r[  r   )arr
sorted_arrelements      r   sorted_indicesz0Layout.is_stride_ordered.<locals>.sorted_indicesp  s*    J=@A'J$$W-AAAs   +r'  T)size_obliviousF)r  rQ  r   rQ  )r   r   r   r   rk   r   r   r   r   r   r   
_shape_envr  )	rq  r   r   r3  non_1_indicesr   r  stride_orderedexprs	            r   r  zLayout.is_stride_orderedc  s~   4;;3u:---
 $DII.
3ww))#):a? 
 
 +88Q$++a.882?@Qa@@	B
 u% E
*s5z" 	1A'-ayN58$	1 s5zA~& 	A!!$~a!e'<<DdD)ww))77"1%q1u(==d 8  	 ;
 9@s   :E=E"E'c                    dgt        t        t        dt        | j                        dz
                    z   }t        |      g|z   }| j                  |      S Nr   r?   )r   r  r   r   r   r  r  s     r   is_channels_last_stride_orderedz&Layout.is_channels_last_stride_ordered  sN    d8E!S-=-A$BCDDUu$%%e,,r   c                   t        |      }t        |       dk(  r| S t        j                  st        j                  ||       r| S t        j                         }t        |d      r|j                  j                  dd      r| S t        t        j                  d      rt        j                  j                  nddfdrt        fd| D              r| S t        |       }t        |      }t!        t        |             D cg c]  }d }}d	||d   <   d}	t#        |d	d d	
      D ]  \  }
}||
d	z
     }||   ||   z  }t%        |t&        t(        j*                  f      xr |t        j,                  kD  xr ||z  dk7  xs, t%        |t(        j.                        xr t        j0                  }|||<   |st3        ||      |z  ||<   d}	 |	s| S t4        xj6                  d	z  c_        |S c c}w )z
        The padding does not change stride order but makes sure all strides larger
        than the threshold are multiple of align.
        r   rQ  dislike_paddingFr  Nc                |    yt        | t        j                        syt        fd| j                  D              S )NFc              3  @   K   | ]  }j                  |        y wr   )is_unbacked_symint)r   r   r   s     r   r   zILayout._pad_strides.<locals>.contains_unbacked_symints.<locals>.<genexpr>  s     R1y33A6Rs   )r   r   r!   r   r2   )r  r   s    r   contains_unbacked_symintsz6Layout._pad_strides.<locals>.contains_unbacked_symints  s4     dEJJ/R@Q@QRRRr   c              3  .   K   | ]  } |        y wr   r   )r   r   r  s     r   r   z&Layout._pad_strides.<locals>.<genexpr>  s     Na6q9Nr  r?   )rd  T)r  zsympy.Expr | intr   r   )r  r   r@   pad_channels_lastr  r  rk   get_current_noder  rQ  r  r   r  r   r   r   r   r   r   r   r   r"   padding_stride_thresholdr!   pad_dynamic_shapesrW   r'   num_comprehensive_padding)
in_stridesr   r   aligncurrent_fx_noder  r   r   new_stridespaddedrankr   prev_idxr   require_paddingr  r   s                  @@r   _pad_strideszLayout._pad_strides  s    $E*z?a''F,N,N*-
 ,,.?F+0D0D0H0Hu1
 *1!''<*HAGG&&d		S N:NN'
I>,\:
"'J"89Qq99 &'JqM"":ab>; 	ID#!$(+H *T(^;F 6C#78 (V<<<(UNa'P VUZZ0NV5N5N	 
  &K#*65#9E#AC 	  ))Q.)9 :s   	G-c                    t        | t              sJ t        |              | j                  J | j	                  | j                  | j
                  | j                        | _        y r   )r   r  r   r   r  r   r   r  s    r   r  zLayout.pad_strides  sM    $/;d;/{{&&&''TYY

Kr   c                F    t         j                  xr t        | t              S r   )r@   comprehensive_paddingr   r  r  s    r   r  zLayout.should_pad_strides  s    ++P
40PPr   c                    t        | t              r| S | j                         r| j                          t        | j                  | j
                  | j                  | j                  | j                  | j                        S r   )
r   r@  r  r  r   r   r   r   rA  rB  r  s    r   as_fixedzLayout.as_fixed  s`    dK(K""$KKJJIIKKKKNN
 	
r   c                    t         j                  sJ dt        |       j                   d       | j	                         j                         S )Nzconvert z to FixedLayout first)r  r  r   r   r  r  r  s    r   r  zLayout.make_indexer  sG    ,, 	
tDz**++@A	
, }}++--r   c                f   t        |t              xr | j                  |j                  k(  xr | j                  |j                  k(  xrj | j                  |j                  k(  xrO | j
                  |j
                  k(  xr4 | j                  |j                  k(  xr | j                  |j                  k(  S r   )r   r  r   r   r   r   rA  rB  )rq  others     r   __eq__zLayout.__eq__  s    uf% 2u||+2

ekk)2 		UZZ'2 u||+	2
 u||+2 %//1	
r   c                X    t        | j                  | j                  | j                        S r   )r*   r   r   rA  r  s    r   r  zLayout.storage_size  s    .tyy$++t{{SSr   c                    t        | j                  |      t        | j                  |      z  t        | j                  |      z  S r   )r)   r   r   rA  r  s     r   r  zLayout.get_free_symbol_uses  s=     TYY6t{{M:;t{{M:;	
r   )r   rC  r   r5  r   r:  r   zOptional[Sequence[Expr]]rA  r!   rB  r   r   r   rJ  rB  )r   torch.Tensorr8  )r2  rI  rE  rI  r   r   )r   rQ  r   r   )r  rQ  r   r:  r   r5  r   rQ  r-  )r   r@  rF  )r  r   r   r   r<  rO  rZ  )r   r   r   r]  r"   r  r  r  r  r  r  r`  r  r  r  r  r  r  r  r  r  r  r  r  r   r   r   r  r    s5    ,0qzCC C 	C
 )C C C 
C,
 HG !,>	 !F- B!B)7B@KB	B BHL
Q
.	
T %*
!
	!
r   r  c                      e Zd ZdZddZy)r@  z A Tensor layout we cannot changec                X    t        | j                  | j                  | j                        S )r  )r  r   r   rA  r  s    r   r  zFixedLayout.make_indexer	  s    diidkkBBr   NrF  )r   r   r   r]  r  r   r   r   r@  r@    s    *Cr   r@  c                       e Zd ZdZdZedd       Zedd       Zedd       Ze	 	 	 	 	 	 dd       Z	e	 	 	 	 	 	 dd       Z
	 d	 	 	 	 	 ddZ	 d	 	 	 	 	 dd	Zdd
ZddZ	 	 d	 	 	 	 	 	 	 	 	 	 	 d fdZ xZS )r  z-A Tensor layout that we are allowed to changeFc                    t        |       dk(  rg S t        j                  j                  g}t	        | dd        D ]  }|j                  ||d   z          t        t	        |            S )Nr   r?   r'  )r   r   r  r	  r  r  r   )sizesreversed_stridesr   s      r   r  z!FlexibleLayout.contiguous_strides  sh    u:?I!GGKK=U12Y' 	AD##D+;B+?$?@	AH-.//r   c                    t        t        t        |                   t        |      k(  s	J | |f       t        j                  j
                  }dgt        |      z  }|D ]  }|||<   || |   z  } |S )z
        Create a stride based on the order the dimensions should be filled in.

        In this format, channels last would be:
            [1, 3, 2, 0]
        N)r:   r   r   r   r  r	  )r  r   next_striderE  r   s        r   fill_orderedzFlexibleLayout.fill_ordered  sx     %E
+,
50AAQE5>QAggkk&3u:% 	1A$GAJ%a0K	1 r   c                    t        t        t        |                   t        |      k(  sJ t        |      }t        j                  | |      S )z
        Create a stride based on the sorted order of a permuted range.

        In this format, channels last would be:
            [3, 0, 2, 1]
        )r:   r   r   r   r  r  )r  r   r   s      r   r  zFlexibleLayout.stride_ordered.  sB     %E
+,
50AAAA,U3
**5*==r   c                >   |t         j                  k(  rt        j                  | t              S |t         j
                  k(  rt        j                  | t              S |t         j                  k(  rt        j                  |       S t        j                  d|       t        )aq  
        Create a stride based on a memory format.

        Memory format is translasted into a stride order,
        so channels_last is the same as:
            FlexibleLayout.stride_ordered(sizes, [3, 0, 2, 1])

        This interface does not support memory_format `torch.preserve_format`
        which should be used to deduce a format from another source
        z>stride_ordered_for_memory_format, unsuppored memory_format: %s)r  channels_lastr  r  NHWC_STRIDE_ORDERchannels_last_3dNHWDC_STRIDE_ORDERcontiguous_formatr  rp  r  r  )r  memory_formats     r    stride_ordered_for_memory_formatz/FlexibleLayout.stride_ordered_for_memory_format:  s     E///!008IJJe444!008JKKe555!44U;;IIP &%r   c                (   t        |       t        |      k(  sJ |D cg c]+  }t        j                  j                  j	                  |      - }}t        t        t        |            |j                        }t        j                  | |      S c c}w )z
        Create a stride that has the same stride order as given stride

        For example, if given stride is [1000, 1, 100, 10],
        the fill order should be [1, 3, 2, 0]
        rU  )
r   rk   r   r   r$  r[  r   __getitem__r  r  )r  r   r   r   s       r   same_orderedzFlexibleLayout.same_orderedU  sv     5zS[(((BHIQ!''""55a8IIE#f+.F4F4FG
**5*== Js   0Bc                0   | j                  | j                  |      }| j                         r)|r'| j                  || j                  | j                        }t        | j                  | j                  | j                  || j                  | j                        S r   )	r  r   r  r  r   r@  r   rA  rB  )rq  r   r  rH  s       r   as_stride_orderzFlexibleLayout.as_stride_orderd  sw     ((E:
""$**:tyy$**MJKKJJIIKKNN
 	
r   c                    |}| j                         r)|r'| j                  || j                  | j                        }t	        | j
                  | j                  | j                  || j                  | j                        S r   )r  r  r   r   r@  r   rA  rB  )rq  r  r  rH  s       r   as_exact_strideszFlexibleLayout.as_exact_stridest  sf     #
""$**:tyy$**MJKKJJIIKKNN
 	
r   c                ,   | j                  | j                  |      }| j                         r'| j                  || j                  | j                        }t        | j                  | j                  | j                  || j                  | j                        S r   )	r  r   r  r  r   r@  r   rA  rB  )rq  r   rH  s      r   as_fill_orderzFlexibleLayout.as_fill_order  ss    $($5$5dii$G
""$**:tyy$**MJKKJJIIKKNN
 	
r   c                ,   | j                  | j                  |      }| j                         r'| j                  || j                  | j                        }t        | j                  | j                  | j                  || j                  | j                        S r   )	r  r   r  r  r   r@  r   rA  rB  )rq  r   rH  s      r   as_same_orderzFlexibleLayout.as_same_order  ss    &&tyy&9
""$**:tyy$**MJKKJJIIKKNN
 	
r   c                    |rt         j                  ||      }nt         j                  |      }t        |   |||||       y )NrB  )r  r  r  r  r  )rq  r   r   r   r  rB  rE  r  s          r   r  zFlexibleLayout.__init__  sB     $11$EG$77=GgKr   )r  rQ  r   r  )r  rQ  r   rQ  r   r  )r  rQ  r   rQ  r   r:  )r  rQ  r  ztorch.memory_formatr   r:  )r  rQ  r   rI  r   r:  rO  )r   rQ  r  r   r   r@  )r  rI  r  r   r   r@  )r   rQ  r   r@  )r   rI  r   r@  r  )r   rC  r   r5  r   r:  r  'Optional[Sequence[Union[int, Integer]]]rB  r   r   r   )r   r   r   r]  r  r`  r  r  r  r  r  r  r  r  r  r  r  r  s   @r   r  r    sF   7N 0 0    	> 	> &&-@&	& &4 >>&8>	> > ;@
"
37
	
" HM
/
@D
	
 

$ AELL L 	L
 >L L 
L Lr   r  c                  F     e Zd ZdZd fdZddZddZ	 d		 	 	 d
dZ xZS )NonOwningLayoutz,Is a view into the storage of another tensorc                    |j                         }t        | 	  |j                  |j                  |j
                  |j                         || _        y r   )r  r  r  r   r   r   r   view)rq  r  r>  r  s      r   r  zNonOwningLayout.__init__  sA    "MMLLKKMM		
 	r   c                >    | j                         j                         S r   )r  r  r  s    r   r  zNonOwningLayout.make_indexer  s    }}++--r   c                    | j                   j                         j                  }|dk(  ryddlm} t
        j                  j                  j                  ||      S )Nr   Tr?   )	ALIGNMENT)	r  r  rA  utilsr  rk   r   r   r  )rq  rA  r  s      r   maybe_guard_alignedz#NonOwningLayout.maybe_guard_aligned  sD    %%'..Q;$ww<<VYOOr   c                4   t        | j                  t              sJ | j                  j                  }t        |t              sJ t        |             |j                  }t        |t              sJ t        |             |j                  j                  |      S r   )	r   r  rC  r=  r  r   r  r>  r  )rq  rX  boxinput_buffers       r   r  z$NonOwningLayout.get_free_symbol_uses  sw     $))_555iinn#z*5DI5*xx,/:c:/""77FFr   )r  zUnion[BaseView, TensorBox]r   r   rF  r8  rO  rZ  )	r   r   r   r]  r  r  r  r  r  r  s   @r   r  r    s3    6.P %*G!G	!Gr   r  c                      e Zd ZdZy)CommBufferTypesymm_memN)r   r   r   SYMM_MEMr   r   r   r  r    s    Hr   r  c                  F     e Zd ZU dZded<   ded<   	 	 	 	 	 	 d fdZ xZS )CommBufferLayoutax  
    A layout that signifies the buffer is a comm buffer.
    In terms of striding, the layout is identical to `FixedLayout`.

    Buffers with this layout do not participate in in-place reuse - it can be
    neither the source nor the target for in-place reuse.

    For detailed motivation and usage of this layout, see
    NOTE [lowering-time collective optimization].
    r  comm_buffer_typer   
group_namec                "   t        |t              st        d| d      |j                         }t        |   |j                  |j                  |j                  |j                  |j                  |j                         || _        || _        y )NzJA `CommBufferLayout` can only be initialized with a `FlexibleLayout` (got z).r   r   r   r   rA  rB  )r   r  r  r  r  r  r   r   r   r   rA  rB  r  r  )rq  r>  r  r  fixedr  s        r   r  zCommBufferLayout.__init__  s     &.1 ++1("6 
 !<<++<<<<oo 	 	
 !1$r   )r>  r  r  r  r  r   )r   r   r   r]  r   r  r  r  s   @r   r  r    s;    	 %$O%% )% 	% %r   r  c                      e Zd ZU ded<    ej
                  d       Zded<    ej
                  d       Zded<   dd	Zdd
Z	ddZ
y)
NoneLayoutrA  r   c                     dgS r  r   r   r   r   rT  zNoneLayout.<lambda>  s     r   default_factoryr  r   c                     dgS r  r   r   r   r   rT  zNoneLayout.<lambda>  s    1# r   r   c                     yr  r   r  s    r   r  zNoneLayout.storage_size
  r@  r   c                    | S r   r   r  s    r   r  zNoneLayout.as_fixed      r   c                    | j                   S r   r  r  s    r   r  zNoneLayout.get_device  r  r   NrX  r7  r@  )r   r   r   r   r^  r_  r   r   r  r  r  r   r   r   r  r    sG     #"'k''DD)D)))+FFIFr   r  c                       e Zd Zd
 fdZedd       Zej                  dd       ZddZddZddZ	e
	 d	 	 	 	 	 	 	 dd       ZddZdd	Z xZS )MutationLayoutSHOULDREMOVEc                   t         |   |j                         |j                         |j	                         d        || _        | j                         j                         }t        j                  j                  |       y r   )r  r  r  r  r   r  
get_bufferr  rk   r   mark_buffer_mutated)rq  r  r   r  s      r   r  z#MutationLayoutSHOULDREMOVE.__init__  se    &&(OO		
  ))+	##D)r   c                6    | j                         j                  S r   )real_layoutr   r  s    r   r   z!MutationLayoutSHOULDREMOVE.stride   s    !(((r   c                     y r   r   )rq  r  s     r   r   z!MutationLayoutSHOULDREMOVE.stride$  s    r   c                >    | j                         j                         S r   )r  r  r  s    r   r  z'MutationLayoutSHOULDREMOVE.storage_size(  s    !..00r   c                x    dfd | j                         }t        |t              sJ t        |             |S )Nc                    t        | t              r | j                        S t        | t              r | j	                               S t        | t
              r | j                        S | S r   )r   r  r  r  r  
MutableBoxr=  )r  unwrap_viewss    r   r!  z;MutationLayoutSHOULDREMOVE.get_buffer.<locals>.unwrap_views,  sY    &"<=#FMM22&(+#F$6$6$899&*-#FKK00Mr   )r  r   r   r   )r  r   r  r   )rq  r  r!  s     @r   r  z%MutationLayoutSHOULDREMOVE.get_buffer+  s6    	 dkk*&&)74<7)r   c                ^    | j                         j                  }t        |t              sJ |S r   )r  r>  r   r  )rq  r>  s     r   r  z&MutationLayoutSHOULDREMOVE.real_layout9  s)    "))&&)))r   c                   |j                          t        j                  j                  |j	                                t        |t              r|j                  }|j                          |st        j                  |j                         |j                         |j                         t        |j                         |j                               D cg c]/  \  }}t        j                  j                   j#                  ||      1 c}}      }t        |t$        t&        f      sJ |j                  }|j                          t)        |d      sJ |       t        |j                  j*                  t,              s$J t/        |j                  j*                               t1        |      |j                  _        |j                  S c c}}w )Nr  r=  )r  rk   r   r  r  r   r   r=  r  r  r  r  r  r  r   r   r   check_equals_and_simplifyr  r   r  r>  r  r   r  )r  srcdstunsafe_aliasr  r  r   s          r   realize_intoz'MutationLayoutSHOULDREMOVE.realize_into>  sI    	 	
##CLLN3c9%((C 	##~~'mmo* !$CLLNCLLN C1 GG$$>>q!D	 $ D dXz$:;;;))CsF#(S(##((//>:QD<QQ:4S9xxs   4Gc                    | S r   r   r  s    r   r  z#MutationLayoutSHOULDREMOVE.as_fixedf  r  r   c                6    | j                   j                         S r   )r  r  r  s    r   r  z'MutationLayoutSHOULDREMOVE.make_indexeri  r6  r   )r  r   r   r   r9  )r  r   r   r   r<  )r   r  r6  rO  )r%  r   r&  r   r'  r   r   r   )r   r   rF  )r   r   r   r  rc  r   setterr  r  r  r  r(  r  r  r  r  s   @r   r  r    s    	* ) ) ]] 1
 <A%%%%59%	% %N*r   r  c                  P    e Zd ZU ded<   ded<   d# fdZd$dZd%dZd&dZd'd	Zd(d
Z	e
d)d       Zd*dZd+dZd,dZd-dZd.dZd/dZd0dZd#dZ	 d1	 	 	 	 	 d2dZd3dZd4dZ	 d1	 	 	 	 	 d5dZd0dZd6dZd7d8dZd#dZd9dZd9dZd:dZ	 d1	 	 	 d;dZd<d Z d=d!Z!d0d"Z" xZ#S )>r  r>  r   r  r>  c                F    t         |           | j                  dd        y rm  )r  rw  rs  r  s    r   rw  zBuffer.__post_init__w  s    t4r   c                >    | j                         j                         S r   )r  r  r  s    r   r  zBuffer.make_indexer{  s     --//r   c                @    | j                   sJ |        | j                   S r   rz  r  s    r   r  zBuffer.get_name~  s    yy$yyyr   c                    t        | j                  t              r| j                  j                         S t	        t        | j                        j                        r   )r   r>  r  r  r  r   r   r  s    r   r  zBuffer.get_example  s=    dkk6*;;**,,!$t{{"3"<"<==r   c                >    | j                         j                         S r   )r  r  r  s    r   r  zBuffer.get_device  s    ##%0022r   c                     y r   r   r  s    r   r  zBuffer.get_defining_op  r  r   c                6    | j                         j                  S r   )r  r   r  s    r   r   zBuffer.dtype  s     &&&r   c                :    g | j                         j                  S r   )r  r   r  s    r   r   zBuffer.get_size  s    ("''((r   c                :    g | j                         j                  S r   )r  r   r  s    r   r%  zBuffer.get_stride  s    *"))**r   c                6    | j                         j                  S r   )r  rA  r  s    r   
get_offsetzBuffer.get_offset  s     '''r   c                    t        | j                  t              r| j                  S t        t	        | j                        j
                        r   )r   r>  r  r  r   r   r  s    r   r  zBuffer.get_layout  s4    dkk6*;;!$t{{"3"<"<==r   c                    | j                   S r   r8  r  s    r   r  zBuffer.get_output_spec  r  r   c                "    | j                         S r   )r  r  s    r   r  zBuffer.get_storage_numel  s    ~~r   c                6    | j                         j                  S r   )r  rB  r  s    r   get_is_pinnedzBuffer.get_is_pinned  s     ***r   c                    t        | j                  t              r;t        | j                  t              s | j                  j	                         | _        y y y r   )r   r>  r  r  r  r  s    r   r  zBuffer.freeze_layout  s>    dkk6*:KK4
 ++..0DK4
*r   c                    t        | j                  t              sJ t        | j                               | j                  j	                  ||      | _        y Nr  )r   r>  r  r   r  r   s      r   r  z&Buffer.freeze_layout_with_stride_order  sA     $++~6IT[[8II6kk11%}1Ur   c                    t        | j                  t              sJ t        | j                               | j                  j	                  |      | _        y r   )r   r>  r  r   r  r  s     r   r  z$Buffer.freeze_layout_with_fill_order  s:    $++~6IT[[8II6kk//6r   c                    t        | j                  t              sJ t        | j                               | j                  j	                  |      | _        y r   )r   r>  r  r   r  r  s     r   r	  z$Buffer.freeze_layout_with_same_order  s:    $++~6IT[[8II6kk//7r   c                    t        | j                  t              sJ t        | j                               | j                  j	                  ||      | _        y r?  )r   r>  r  r   r  r  s      r   r  z'Buffer.freeze_layout_with_exact_strides  sF     $++~6IT[[8II6kk22 3 
r   c                    t         j                  j                  j                  t	        j
                  | j                         d            S r  r  r  s    r   r  zBuffer.is_zero_elements  r  r   c                r      j                         rt        t         j                               S d fd}|S )Nr  c                x    j                         }t        j                  j                  xs d ||             S r  )r  ri   r0  r   r   r  rq  s     r   r  z"Buffer.make_loader.<locals>.loader  s/    '')G88DII2GENCCr   r3  )r  r   r  r  r4  s   ` r   r  zBuffer.make_loader  s0      "=0@AA	D r   c                "    | j                         S r   r  r  s     r   r  zBuffer.codegen_reference  r  r   c                     y r   r   r  s    r   r]  zBuffer.decide_layout  r  r   c                    t        | j                  t              r%| j                  j                  j	                         gS yrn  )r   r>  r  r  r  r  s    r   r*  z#Buffer.get_inputs_that_alias_output  s/    dkk?3KK$$--/00r   c                    t        | j                  t              r%| j                  j                  j	                         gS yrn  )r   r>  r  r  r  r  s    r   r&  zBuffer.get_mutation_names  s0    dkk#=>KK&&//122r   c                6    t        | j                         g      S r   )r:   r  r  s    r   r  zBuffer.get_read_names  s    4==?+,,r   c                    t               S r   r9   r  s     r   r  zBuffer.get_free_symbol_uses       |r   c                    t               S r   r9   r  s    r   r|  zBuffer.get_unbacked_symbol_defs  r}  r   c                     y r   r   r  s    r   r  zBuffer.realize  r  r   c                     yr  r   r  s    r   should_allocatezBuffer.should_allocate  s    r   r-  rF  rJ  )r   z!Union[torch.Tensor, sympy.Symbol]r@  r1  r4  r9  )r   r  r<  r6  r7  rX  r8  rO  rP  rR  )r   rQ  r   r   )r  rQ  r  r   r   r   rD  r   r?  r\  r.  rZ  r  r=  )$r   r   r   r   rw  r  r  r  r  r  rc  r   r   r%  r7  r  r  r  r<  r  r  r  r	  r  r  r  r  r]  r*  r&  r  r  r|  r  rR  r  r  s   @r   r  r  m  s    
50>
3 ' ')+(>
 +1 ;@V"V37V	V78
 CH
*
;?
	
U	

- %*!	!
r   r  c                  <    e Zd ZddZddZej                  ZddZy)OperationBufferc                    | gS r   r   r  s    r   rz  zOperationBuffer.get_outputs  s	    vr   c                    | S r   r   r  s    r   r  zOperationBuffer.get_defining_op  r  r   c                X    t         j                  |        t        j                  |        y r   )r  rw  re  r  s    r   rw  zOperationBuffer.__post_init__  s    T"%r   Nr  r   re  r-  )r   r   r   rz  r  re  r(  rw  r   r   r   rT  rT    s     #55&r   rT  c                      e Zd ZddZy)rl  c                     yr  r   r  s    r   r  zInputBuffer.num_reads  r@  r   NrX  )r   r   r   r  r   r   r   rl  rl    s    r   rl  c                      e Zd ZdZy)DonatedBufferaY  
    Represents a donated buffer which is a saved tensor that is not alias to any
    fwd inputs, fwd user outputs, and bwd outputs. We generally cannot inplace
    reuse the input tensor memory during backward since it might be used in another
    function. However, donated buffer can be inplace reused during backward
    to save memory.
    N)r   r   r   r]  r   r   r   r\  r\    s    r   r\  c                  ,    e Zd ZU dZded<   ddZddZy)r  NrA  r  c                     d fd}|S )Nc                    j                         j                         }t        j                  t        j
                  j                  j                         j                         ||             S r   )	r  r  ri   r0  rk   r   constant_namer  r  rF  s     r   r  z*ConstantBuffer.make_loader.<locals>.loader  sP    oo'446G88%%dmmot7K7KL r   r3  r   r4  s   ` r   r  zConstantBuffer.make_loader  s    	 r   c                    t        t        j                  j                  | j	                         |      | j
                        S N)r   r>  )r  rk   r   r`  r  r>  r  s     r   r$  z!ConstantBuffer.constant_to_device"  s/    &&t}}?
 	
r   rD  r[  )r   r   r   r  r   r  r$  r   r   r   r  r    s    .2O+2
r   r  c                  @    e Zd ZddZ	 d	 	 	 d	dZd
ddZddZddZy)NoneAsConstantBufferc                    t               S r   r9   r  s    r   r  zNoneAsConstantBuffer.get_reads*  r}  r   c                    t               S r   r9   r  s     r   r  z)NoneAsConstantBuffer.get_free_symbol_uses-  rN  r   Nc                J    t         j                  j                  j                  S r   )rk   r   r<  none_strr  s     r   r  z&NoneAsConstantBuffer.codegen_reference2  s    ww##,,,r   c                    t        d       S Nr  )r  r  s    r   r  z$NoneAsConstantBuffer.get_output_spec5  s    &&r   c                     yr  r   r  s    r   r  z&NoneAsConstantBuffer.has_tensor_output8  r  r   rW  rO  rZ  r   r?  r7  r8  )r   r   r   r  r  r  r  r  r   r   r   rd  rd  (  s0     %*!	!
-'r   rd  c                  <    e Zd ZU ded<   	 d	 	 	 ddZd	d
dZddZy)r   r!   r  c                .    t        | j                  |      S r   )r)   r  r  s     r   r  z*ShapeAsConstantBuffer.get_free_symbol_uses@  s      		=99r   Nc                h    t         j                  j                  j                  | j                        S r   )rk   r   r<  codegen_sizevarr  r  s     r   r  z'ShapeAsConstantBuffer.codegen_referenceE  s!    ww##33DII>>r   c                     yr  r   r  s    r   r  z'ShapeAsConstantBuffer.has_tensor_outputH  r  r   rO  rZ  r   r?  r8  )r   r   r   r   r  r  r  r   r   r   r   r   <  s+    
J %*:!:	!:
?r   r   c                  J    e Zd ZU dZded<   dZded<   eej                  dd              Z	ddZ
dd	Zdd
ZddZd dZ	 d!	 	 	 d"dZd# fdZd$dZd%dZd&dZd'dZe	 	 d(d       Z	 	 d)	 	 	 	 	 d*dZe	 d+	 	 	 	 	 	 	 	 	 	 	 d,d       Zd-dZddZd$dZd$dZd.dZ xZS )/rj  zb
    Represents a buffer that is computed during kernel execution rather than being an input.
    r  r=  FzClassVar[bool]_force_realizec               #     K   t         j                  } 	 dt         _        d  | t         _        y # | t         _        w xY wwNT)rj  rr  )	old_values    r   force_realizezComputedBuffer.force_realizeU  s3      #11		6,0N),5N)IN)s   ?/ ?<?c                    | j                   | j                   S t        | j                  d      r| j                  j                   S y)z
        Returns self.name if it exists, otherwise returns the name of the data node if that exists.
        If neither exist, returns None.
        Nr   )r   r  r=  r  s    r   get_computed_buffer_namez'ComputedBuffer.get_computed_buffer_name_  s7    
 99 99499f%99>>!r   c                6    | j                   j                         S r   r=  r  r  s    r   r  zComputedBuffer.num_readsj  r  r   c                6    | j                   j                         S r   r=  r  r  s    r   r  zComputedBuffer.get_readsm  r  r   c                6    | j                   j                         S r   r  r  s    r   r  zComputedBuffer.get_read_namesp  r  r   c                X   t        | j                  t        t        t        t
        f      s0t        j                  t               t               t                     S t        j                  t        dd      5  | j                  j                         rTt        | j                         | j                  j                         | j                  j!                               cd d d        S t        | j                         | j                  j#                               cd d d        S # 1 sw Y   y xY w)Nr  writesindex_exprsr  T)r   r=  r0  r(  ra  r  rA   
ReadWritesr:   r    r   r  r  rJ   get_store_functionr  r  r   r  s    r   r  zComputedBuffer.get_read_writess  s    $))itY%GH** l!|&L  \\.*:DA 	yy++-*++-II002II002	 	 +++-II&&(	 	 	s   6A%D %1D  D)c                   | j                   j                  |      | j                  j                  |      z  }| j                         r@t	        | j                         t              r"|| j                         j                  |      z  }|S r   )r>  r  r=  has_store_functionr   r  rL   r  )rq  rX  r  s      r   r  z#ComputedBuffer.get_free_symbol_uses  sz    " 11
II**=9: ""$##%x*
 d**,AA-PPFr   c                    | j                         s_| j                  t        j                  j                  vr9| j                         dk(  r&| j                  s| j                  j                         S t        | !         S r  )
r  r   rk   r   mutated_buffersr  rr  r=  r  r  r  s    r   r  zComputedBuffer.make_loader  s`    '')		!8!88 A%'' 99((**w"$$r   c                V    t        | j                  t        t        t        t
        f      S r   )r   r=  r0  r(  ra  r  r  s    r   r  z!ComputedBuffer.has_store_function  s    $))itY%GHHr   c                   | j                         j                         j                         }t        | j                  t
        t        t        f      r+t        | j                  j                  | j                  |      S t        | j                  t              sJ t        | j                               t        | j                  j                  | j                  |      S r   )r  r  r  r   r=  r0  r(  ra  r   r=  r   r  r   r  )rq  r  s     r   r  z!ComputedBuffer.get_store_function  s    //#,,.;;=dii)T4!8949944diiIIdii3DT$))_D349911499gFFr   c                P   t        | j                  t              r{t        j                  | j
                  j                         | j
                  j                               \  \  }}}| j                         j                  }t        d |D              sJ |D cg c]_  }t        |t        j                        rCt        |j                  |D ci c]#  }|dk7  s	|t        j                  j                   % c}      a }}}|rt        | j
                  t"        t$        f      r| j
                  j'                  ||      }n|}|D cg c],  }t(        j*                  j,                  j/                  ||      . }	}ddlm}
  |
|	| j5                               S yc c}w c c}}w c c}w )al  
        If our layout is still flexible, try to determine the stride order based on stride orders of reads.

        TODO(jansel): A better algorithm here would look at downstream consumers of this
                      value and try to do global graph-level layout optimization.
                      This is also something just begging to be autotuned.
        c              3  p   K   | ].  }t        |t        j                  t        j                  f       0 y wr   )r   rA   StarDep	MemoryDepr  s     r   r   z0ComputedBuffer.get_fill_order.<locals>.<genexpr>  s0       1|33\5K5KLMs   46r   r?   pick_loop_orderN)r   r>  r  rA   rq  r=  r  r  r  r  r   r  rg   r   r   r  r  r(  ra  r   rk   r   r   rs  	schedulerr  r   )rq  
index_varsr>  r   r  r  vr`  r  stride_lengthsr  s              r   r   zComputedBuffer.get_fill_order  sj    dkk>2.:.M.M		,,.		0L0L0N/+(Z! ((*00E      a!7!78 177n$WPQUVPVQ_$WXE  dii$6"ii//
NKG(GMR"EIAGG$$11$@" " 7&~t}}GG# %X"s$   3F
FF6	F1F#Fc                    t        | j                  t              r5| j                         }|r| j	                  |       y | j                          y y r   )r   r>  r  r   r  r  r  s     r   r]  zComputedBuffer.decide_layout  s@    dkk>2'')E2259""$ 3r   c                z   t        j                  | j                  j                         | j                  j	                         d      \  }}t        j                  t        d| j                               5  t        | j                         | j                         r|n|d d |g| }d d d        g }g }g }g }|j                         D ]^  \  }}	||d   v r'|rJ |j                  |       |j                  |	       4||d   v sJ |j                  |       |j                  |	       ` ||f||ffS # 1 sw Y   xY w)Nqrz   r  r?   r   )rA   rq  r=  r  r  r    r   r  r  rL   r  r  itemsr  )
rq  r   
var_rangesr  r  reduce_vars
index_sizereduce_sizer  r   s
             r   get_default_sizes_bodyz%ComputedBuffer.get_default_sizes_body  sI    (::II((*DII,H,H,JSV
j \\.*;T__=NO 	'')002Ra 	D	 
!#
$$& 	&DAqDG|&&!!!$!!!$DG|#|""1%""1%	& K($[0III)	 	s   52D11D:c                     j                         \  \  }}}\  }}|r |||f|||f      \  \  }}}\  }}g |j                  j                         |t        |t              rt        |      dk(  sJ |\  }}	t        |t              sJ t        |             t        |	t              sJ t        |	             t        d |	D              sJ |j                  }
|
|k(  s	J |
|f       |	D cg c]	  }|vs| }	}|	z  g |j                         t        j                  j                   t        j                         sj#                  |j%                                	 	 	 	 	 	 	 	 	 	 d fd}||z   }t'        t)                      xs t*        j,                   } |||||      \  }}} |||||      \  }}}t/        j0                  ||d      \  \  }}}t3        | ||       ||      g|||      }||f|fS c c}w )an  
        This is a main place where we do loop transformations in a
        backend-agnostic way.

        Here we:
            1) Remove any 1 dimensions
            2) Fuse contiguous dimensions together
            3) Reorder dimensions based on stride orders

        Optional argument extra_indexing_constraints can be used to append additional
        indexing expressions to existing ones derived from buffer's body. This can be useful
        to fuse scheduler nodes with compatible ranges, e.g. (s0*s1*...,) and (s0, s1, s2, ...)
        on CPU by preventing indexing simplifications and obtaining index/reduce ranges for
        the scheduler node compatible with other nodes.
        Optional argument recompute_sizes_body_func can be used to recompute sizes and body
        on the default body. This can be useful to append additional loop transformations.
        r   c              3  <   K   | ]  }t        |t                y wr   )r   r!   )r   fs     r   r   z6ComputedBuffer.simplify_and_reorder.<locals>.<genexpr>7  s     Hqz!T*H   c           	         j                  | ||
      \  }}} ||       } |rGt        j                  j                  j	                  | |t        	| |            \  }}}t        ||      }n|}|||fS r   )_apply_loop_reorderingrk   r   r   _simplify_loopsrE   r   )x_varssupport_varsr  simplify_loopsreindex0r   r   _pruner   index_formulasmemory_addrsrq  s            r   simplify_and_reorderzAComputedBuffer.simplify_and_reorder.<locals>.simplify_and_reorderH  s     )-(C(Ce\)%E8X f%F*+''*:*:*J*J,^VUK+'x
 *(H="'8++r   prz   )
r  Sequence[sympy.Symbol]r  r  r  rQ  r  r   r   dtuple[list[int], Callable[[Sequence[int]], Sequence[int]], Callable[[Sequence[int]], Sequence[int]]])r  indexing_exprsr   r   r   r   r   r   r   r   r  get_write_exprsrk   r   rh  rB   PREFER_STORE_LOOP_ORDERextendget_read_exprsrb   r  r@   loop_ordering_after_fusionrA   index_vars_no_squeezerL   )rq  extra_indexing_constraintsrecompute_sizes_body_funcr  r  r  r  r  extra_indexing_rangesextra_indexing_exprexpected_var_rangesr  r  r  should_merge_loopsiter_rangesiter_reindexr   reduce_rangesreduce_reindex	iter_varsr  r  r  s   `                     @@r   r  z#ComputedBuffer.simplify_and_reorder	  s   4 '')		
%Z%Z %
 *[)4*k1J	)[)[
 94..5578%15u=23q89 :T6!#63T:WDAV<WW:148S$?R:SS8H4GHHHH"&//&*?? #%B ? /#!>2I# # 11N0--/0ww""4)O)OP 3 3 56	,*	,0	, !	, !		,

	,6 "K/t,--VV5V5V1V 	 (<	(
$\1 ,@{4F,
(~q
 0</Q/Q0
, K*
 )$n[&AB
 ]+T11I#s   -	G?7G?c           
     X   ddl m} |g }	 |D cg c]-  }t        j                  j                  j                  || |      / }}t        |      t        |      k(  rt        |d         t        |       k(  sJ t        t         ||||                  }|D 	cg c]  }	||	   	 }}	|t#        |      t%        |      fS c c}w # t        $ rZ t        j                  r*t        j                  dt        t        | |            |       t        t!        t        |                  }Y w xY wc c}	w )zU
        Shuffle the order of loops around to hopefully improve performance.
        r?   r  r   z%Did not simplify complex index:
%s
%s)r  r  rk   r   r   rs  r   r   r  	Exceptionr@   r  rp  warningr   r   r   r   r   )
r  r  r  r  priority_idxr  r  rE  r   r   s
             r   r  z%ComputedBuffer._apply_loop_reordering  s'    	/L	, )   --dJMG  w<3|#44WQZCM :   /'5,"OPQE $))aq))l5)?5+AAA#  	,||=Z/0 
 s5z*+E	, *s*   C 2B<AC D'<C A D$#D$c                6    | j                   j                         S r   r=  r  r  s    r   r  z!ComputedBuffer.get_reduction_size      yy++--r   c                6    | j                   j                         S r   r=  r  r  s    r   r  z!ComputedBuffer.get_reduction_type  r  r   c                6    | j                   j                         S r   )r=  r  r  s    r   r"  zComputedBuffer.is_no_op  s    yy))++r   c                     yrt  r   r  s    r   rR  zComputedBuffer.should_allocate  r  r   c                8    | j                   j                  |      S )r  r=  r$  r  s     r   r$  z!ComputedBuffer.constant_to_device  s    yy++F33r   )r   Iterator[None]r=  rX  rW  r.  rU  rO  rZ  rD  r8  )r   zCallable[..., None])r   Optional[list[int]]r-  )r   zMtuple[tuple[list[Expr], list[Expr]], LoopBody, tuple[list[Expr], list[Expr]]]NN)r  *Optional[tuple[dict[Any, Any], list[Any]]]r  Optional[Callable[..., Any]]r   z8tuple[tuple[list[Expr], list[Expr]], Optional[LoopBody]]r   )r  r  r  r  r  rQ  r  zlist[sympy.Expr]r  r  r   r  r9  r[  )r   r   r   r]  r   rr  r`  ra  rb  rv  rx  r  r  r  r  r  r  r  r  r   r]  rV   r  r  r  r  r  r"  rR  r$  r  r  s   @r   rj  rj  L  sO    K%*NN*6  6	%%*, %*!	!6	%IG%N% J
J JD RVBFz2$Nz2 $@z2 
B	z2x  -1%B*%B,%B %B '	%B
 *%B
%B %BN..,4r   rj  c                  v     e Zd ZdZ	 	 	 	 	 	 	 	 d	 fdZd
dZdddZddZddZddZ		 	 d	 	 	 	 	 ddZ
 xZS )rm  zt
    Represents a Triton (in the future other type) of template operator
    that we can fuse an epilogue onto.
    c                    t         |   d |       t        j                  |      | _        || _        t        j                  j                  |       | _	        t        j                  j                  |        y rb  )r  r  rk  unwrap_storagerY  make_kernel_renderrk   r   register_bufferr   register_operation)rq  r>  rY  r  r  s       r   r  zTemplateBuffer.__init__  sY     	d62"11&9"4GG++D1		""4(r   c                &    | j                  d      S )NT	normalize)rJ   r  s    r   r  zTemplateBuffer.get_read_writes  s    ''$'77r   c           	     d   | j                         | j                         j                         dfd}t        j                  || j                         d|      }| j                  D ]  t        t        t        f      sJ t                     t        j                  t              sJ t        j                               j                  j                         dfd}|xj                  t        j                  |j                         d|      j                  z  c_         |S )Nc                ^    t        |      dk(  sJ t        j                   |       d      S )Nr   fake)r   ri   r  )r   rC  r  r   s     r   dummyz1TemplateBuffer.extract_read_writes.<locals>.dummy  s,    v;!###99T75>6::r   r   r  c                x    t        |      dk(  sJ t        j                  j                          |             S r  )r   ri   r0  r  )r   rC  r  r[  s     r   r  z1TemplateBuffer.extract_read_writes.<locals>.dummy  s0    6{a'''xx??r   )r   Sequence[Any]rC  r  r   r   )r  r  r  rA   rJ   r   rY  r   rC  r  r   r>  r  r  )rq  r  r  depsr  r[  r   s       @@@r   rJ   z"TemplateBuffer.extract_read_writes  s    }}//#002	; //4==?B)
 ;; 	CcOV#<=HtCyH=cjj&1C4

3CC1jj--/G@ JJ,::s||~rYeJ	 r   c                6    t         j                  j                  S r   )r   r  r	  r  s    r   r  z!TemplateBuffer.get_reduction_size  s    ww{{r   c                     y r   r   r  s    r   r  z!TemplateBuffer.get_reduction_type  r  r   c                     yrt  r   r  s    r   rR  zTemplateBuffer.should_allocate  r  r   c                *    | j                         g fd fS r   r  )rq  r  r  s      r   r  z#TemplateBuffer.simplify_and_reorder  s$      
 	
r   )r>  r  rY  Sequence[IRNode]r  r  r   r   rU  rO  )r  r   r   rV  r9  r=  r8  r  )r  r  r  r  r   z<tuple[tuple[Sequence[Expr], list[Expr]], Optional[LoopBody]])r   r   r   r]  r  r  rJ   r  r  rR  r  r  r  s   @r   rm  rm    sy    

)
) !
) 9	
)
 

)88
 RVBF
$N
 $@
 
F	
r   rm  c                  j     e Zd Z	 	 d	 	 	 	 	 	 	 	 	 	 	 d fdZ	 d	 	 	 d	 fdZd
dZddZddZ xZS )TritonTemplateBufferc           
        t         
|   |||       || _        | g| _        |t        j
                  j                  j                  t        j
                  j                  j                  f}t        j                  j                  j                  }||v sJ d| d|        t        | j                  d   t              sJ t!        | j                  d                | j                  d   j#                         }| xj                  |D 	cg c]  }	t%        t'        |      |	|        c}	z  c_        |r|n	t)               | _        d| _        d| _        yc c}	w )a  
        NOTE:[TritonTemplates with multiple outputs]
        We want the ability for TritonTemplates to output multiple tensors. Triton
        kernels have no notion of outputs and this is done by creating tensors that
        are then mutated by the kernel. Currently our STORE_OUTPUT codegen doesn't
        support creating multinode outputs for triton templates.
        We work around this by creating an extra input buffer during the lowering
        and we mark them as mutated inputs.
        Nz$Mutated inputs are only allowed for z	 but got r   r  )r  r  mutated_inputsoutputsr  ri   higher_orderflex_attentionflex_attention_backwardrk   r   current_noder  r   rY  r   r   r  MutationOutputr  r:   allowed_prologue_inpssubgraph_inpssubgraph_outs)rq  r>  rY  r  r  r  allowed_setr  r   rb  r  s             r   r  zTritonTemplateBuffer.__init__  s:   " 	);<,&*V% 		&&55		&&>>K 77//66L;. 6{m9\N[. dkk!nf5KtDKKN7KK5[[^..0FLL) z8#tD L &;!
 	" SW?Cs   	Ec                   t         |   |      }| j                  r| j                  ng }| j                  r| j                  ng }|D ]m  }t	        |t
        j                        r|j                  t        ||             9t	        |t              r!|j                  |j                  |             j|mJ  |D ]7  }t	        |t              r!|j                  |j                  |             4|7J  |S r   )
r  r  r  r  r   r   r!   updater)   r   )rq  rX  resr  r  r[  r   r  s          r   r  z)TritonTemplateBuffer.get_free_symbol_uses3  s     g*=9.2.@.@**b.2.@.@**b  	#C#uzz*

+C?@C(

333MBC{"{	# ! 	#C#v&

333MBC{"{		# 
r   c                    | j                   S r   r  r  s    r   rz  z TritonTemplateBuffer.get_outputsJ      ||r   c                    | j                   S r   )r  r  s    r   get_allowed_prologue_inpsz.TritonTemplateBuffer.get_allowed_prologue_inpsM  s    )))r   c                &    d| j                    d}|S )NzTritonTemplateBuffer(layout=r  r8  )rq  r   s     r   r  zTritonTemplateBuffer.__str__P  s    ,T[[M;
r   r  )r>  r  rY  r  r  zOptional[Callable[_P, _T]]r  Optional[Iterable[IRNode]]r  zOptional[OrderedSet[str]]r   r   rO  rZ  r  r.  rJ  )	r   r   r   r  r  rz  r  r  r  r  s   @r   r  r    s{     6:;?*D*D !*D 7	*D
 3*D  9*D 
*DZ %*!	!.*r   r  c                  x     e Zd ZdZ	 	 	 	 	 	 	 	 	 	 d fdZddZddZddZddZddZ	ddZ
dd	Zdd
Z xZS )ChoiceCallera.  
    Represents a possible choice used in autotune_process.py.
    During autotuning, self.benchmark() is first called to get benchmark result,
    and if this choice is selected, self.output_node() is called to get the output_node.

    Children classes: TritonTemplateCaller, CUDATemplateCaller.
    c                Z    t         |           || _        || _        || _        || _        y r   )r  r  r   r>  r   description)rq  r   r   r>  r  r  s        r   r  zChoiceCaller.__init__a  s0     		& 'r   c                   | j                         t        t        d}t        j                  rt        fdfi |S t        j                  d|ifi |S )N)warmuprepc                        S r   r   )algor   s   r   rT  z(ChoiceCaller.benchmark.<locals>.<lambda>w  s    D$K r   r   )to_callableautotune_warmupautotune_repr@   /profile_bandwidth_with_do_bench_using_profilingr[   rQ   	benchmark)rq  r   r   benchmark_configsr  s     ` @r   r  zChoiceCaller.benchmarkp  s[    !%
 AA+,?UCTUU$$T4%SARSSr   c                    t         r   rj  r  s    r   	call_namezChoiceCaller.call_namez  rk  r   c                    t         r   rj  r  s    r   r	  zChoiceCaller.to_callable}  rk  r   c                "    | j                         S )z
        Hash key for the underlying kernel. By default, we assume there are no
        runtime params, so kernel hash key defaults to choice caller's hash key.
        )hash_keyr  s    r   kernel_hash_keyzChoiceCaller.kernel_hash_key  s    
 }}r   c                    t         r   rj  r  s    r   r  zChoiceCaller.hash_key  rk  r   c                    t         r   rj  r  s    r   rT  zChoiceCaller.output_node  rk  r   c                    i S )zRInformation returned here is logged to the autotune log file when that is enabled.r   r  s    r   	info_dictzChoiceCaller.info_dict  s    	r   c                     y)Nunsupported_choicer   r  s    r   autoheuristic_idzChoiceCaller.autoheuristic_id  s    #r   )
r   r   r   r  r>  r  r  r   r   r   )r   r   r   r  r   r  rJ  )r   r  )r   r  )r   z<dict[str, Union[PrimitiveInfoType, list[PrimitiveInfoType]]])r   r   r   r]  r  r  r  r	  r  r  rT  r  r  r  r  s   @r   r  r  X  sf    '' "' 	'
 ' 
'T""""$r   r  c                      e Zd ZddZy)TritonTemplateCallerBasec                    t         r   rj  r  s    r   get_make_kernel_renderz/TritonTemplateCallerBase.get_make_kernel_render  rk  r   N)r   r   )r   r   r   r  r   r   r   r  r    s    "r   r  c                       e Zd ZdZ	 	 	 	 	 	 	 	 	 	 	 	 d	 fdZed
d       Z	 d	 	 	 ddZej                  dd       Z
ddZ	 d	 	 	 ddZ	 	 	 	 ddZ xZS )MultiTemplateBufferaG  
    Represents a Buffer with multiple backing implementation choices.

    Choices can be TritonTemplates or ExternKernels. During scheduling if there is a potential
    epilogue we will benchmark each of the choices with the epilogue to determine an implementation.
    Otherwise, the fastest base choice will be chosen.
    c                    t         |   ||d |       || _        i | _        || _        t        d |D              | _        i | _        y )N)r>  rY  r  r  c              3     K   | ]R  }t        |t              xs< t        |t        j                  j                  j
                        xr |j                   T y wr   )r   r  r  r  select_algorithmExternKernelCallerhas_out_variant)r   choices     r   r   z/MultiTemplateBuffer.__init__.<locals>.<genexpr>  sT      %
  v78 65??#C#C#V#VW +**%
s   AA)r  r  _choice_timings_fn_choice_timingsoriginal_inputsr   _output_plannable_make_kernel_renders)rq  r>  rY  choice_timings_fnunfiltered_choicesr  r  s         r   r  zMultiTemplateBuffer.__init__  sd     	#"7	 	 	
 #4OQ%!$ %
 -%
 "
 ?A!r   c                    | j                   S )z^
        Are all possible choices TritonTemplates or Extern Kernels with out variants
        )r+  r  s    r   output_plannablez$MultiTemplateBuffer.output_plannable  s    
 %%%r   c                x    || j                   vr| j                  |      | j                   |<   | j                   |   S r   )r)  r(  )rq  hint_overrides     r   choice_timingsz"MultiTemplateBuffer.choice_timings  s>      4 44262I2I-2XD  /##M22r   c              #  0  K   t        |t        j                  j                  j                        sJ t        |             | j                  |j                  k(  sJ | j                  }|j                         | _        	 d  || _        y # || _        w xY wwr   )	r   r  r  r$  TritonTemplateCallerr   r>  r  r  )rq  callerrenders      r   swap_as_triton_callerz)MultiTemplateBuffer.swap_as_triton_caller  s     EOO44II
 	<	 
 {{fmm+++(("("?"?"A	-&,D#fD#s   A;B>B
 B
	BBc                N   t        |t        j                  j                  j                        sJ t        |             | j                         |j                  j                  k(  sJ | j                         |j                  j                  k(  sJ |j                         | _        y r   )r   r  r  r$  r5  r   r   r>  r   r%  r   r  r  )rq  r6  s     r   finalize_as_triton_callerz-MultiTemplateBuffer.finalize_as_triton_caller  s    EOO44II
 	<	 
 }}&--"4"4444 FMM$8$8888"("?"?"Ar   c                b    | j                  |      }t        ||j                        }|||   fS )N)r2  rU  )r3  r  r  )rq  r2  timings
min_choices       r   get_min_choicez"MultiTemplateBuffer.get_min_choice  s7     %%M%Bgkk2
GJ/00r   c                    |j                         D ]"  \  }}|j                         | j                  |<   $ | j                  d   | _        y)z;Finalize with multiple callers for different hint overridesN)r  r  r,  r  )rq  callersr2  r6  s       r   finalize_as_triton_callersz.MultiTemplateBuffer.finalize_as_triton_callers  sN     &-]]_ 	W!M67=7T7T7VD%%m4	W #'";";D"Ar   )r>  r  rY  r  r-  z4Callable[[Optional[int]], dict[ChoiceCaller, float]]r.  zlist[ChoiceCaller]r  r/  r   r   r8  r   )r2  rL  r   zdict[ChoiceCaller, float])r6  r  r   r  )r6  r  r   r   )r2  rL  r   ztuple[ChoiceCaller, float])r@  z-dict[Optional[int], TritonTemplateCallerBase]r   r   )r   r   r   r]  r  rc  r0  r3  ra  rb  r8  r:  r>  rA  r  r  s   @r   r!  r!    s    AA !A P	A
 /A  /A 
A6 & & .23*3	"3 - -B .21*1	#1BDB	Br   r!  c                  L     e Zd Z	 	 	 	 	 	 	 	 	 	 	 	 	 	 d fdZddZddZ xZS )CUDATemplateBufferc                R    t         |   |||       || _        || _        || _        y r   )r  r  workspace_sizetemplatesupports_epilogue_fusion)rq  r>  rY  r  rE  rF  rG  r  s          r   r  zCUDATemplateBuffer.__init__  s.     	);<, (@%r   c                6    | j                   | j                   S dS r  )rE  r  s    r   r  z%CUDATemplateBuffer.get_workspace_size  s    &*&9&9&Et""L1Lr   c                x    | j                         D ]'  }t        j                  |j                         d d        ) y r   )rz  ri   r  r  )rq  rL  s     r   emulate_store_fnz#CUDATemplateBuffer.emulate_store_fn  s1    &&( 	5FIIfoo't4	5r   )r>  r  rY  r  r  Callable[_P, _T]rE  r   rF  ro   rG  r   r   r   rX  r-  )r   r   r   r  r  rJ  r  r  s   @r   rC  rC    s_    AA !A -	A
 A A #'A 
AM5r   rC  c                  D     e Zd Z	 	 	 	 	 	 	 	 	 	 	 	 d fdZd fdZ xZS )CppTemplateBufferc                R    t         |   |||       || _        || _        d | _        y r   )r  r  rF  r'  r  )rq  r>  rY  r  rF  r'  r  s         r   r  zCppTemplateBuffer.__init__  s,     	);< /3r   c                v   t        | j                  t              rt        | j                  t              sJ t        | j                               | j                  d   }t        |t              sJ t        |             |j                  }t        |t              sJ t        |             |S t        | %         S r  )
r   r>  MultiOutputLayoutr  r   r   r  r  r  r  )rq  first_outputr>  r  s      r   r  zCppTemplateBuffer.get_layout  s    dkk#45dllH5ItDLL7II5<<?LlF3GT,5GG3!((Fff-;tF|;-M7%''r   )r>  r  rY  r  r  rK  rF  ro   r'  r   r   r   r6  )r   r   r   r  r  r  r  s   @r   rM  rM    sL    44 !4 -	4
 4 4 
4	( 	(r   rM  c                  F     e Zd ZdZ	 d	 	 	 	 	 	 	 	 	 	 	 d fdZddZ xZS )CuteDSLTemplateBufferz
    Buffer for CuteDSL (CUTLASS Python DSL) template kernels.
    Similar to other template buffers but specialized for CuteDSL operations.
    c           
        t         |   |||       || _        || _        | g| _        |t        | j                  d   t              sJ t        | j                  d                | j                  d   j                         }| xj                  |D cg c]  }t        t        |      ||        c}z  c_        y y c c}w )Nr   r  )r  r  rF  r  r  r   rY  r   r   r  r  r  )	rq  r>  rY  r  rF  r  r   rb  r  s	           r   r  zCuteDSLTemplateBuffer.__init__,  s     	);< ,&*V%dkk!nf5KtDKKN7KK5[[^..0FLL) z8#tD L &s   B<c                    | j                   S r   r  r  s    r   rz  z!CuteDSLTemplateBuffer.get_outputsA  r  r   r   )r>  r  rY  r  r  rK  rF  r   r  r  r   r   r  )r   r   r   r]  r  rz  r  r  s   @r   rS  rS  &  sQ     6: ! -	
  3 
*r   rS  c                &    t        d | D              S )Nc              3  <   K   | ]  }t        |t                y wr   r   r   r   r  s     r   r   z#is_node_sequence.<locals>.<genexpr>H  s     4z!V$4r  )r   )r   s    r   is_node_sequencerZ  E  s     4e444r   c                  ~    e Zd ZU ded<   ddZddZddZedd       Ze		 	 	 	 dd       Z
ddZdd	Z	 d	 	 	 dd
Zy)rk  )Sequence[Union[IRNode, Sequence[IRNode]]]rY  c                d    | j                   |   }t        |t              sJ |j                         S r   rY  r   r   r  )rq  r   inputs      r   
input_namezInputsKernel.input_nameO  s,    A%(((~~r   c                   t        t        j                            }t        j                  | j                  D ]c  }t        |t              r|j                  fd|D               .t        |t              r?|j                   |j                                      e t        t        j                     fd| j                         D              }t        j                  ||t                     S )Nc              3  J   K   | ]  } |j                                 y wr   rH  )r   r   r  s     r   r   z/InputsKernel.get_read_writes.<locals>.<genexpr>Y  s     BqWQZZ\2B    #c              3  J   K   | ]  } |j                                 y wr   rH  )r   rb  r  s     r   r   z/InputsKernel.get_read_writes.<locals>.<genexpr>`  s!      .
(+GCLLN#.
rc  r  )r:   rA   rG   r  rY  r   r   r  r   r  r  rz  r  )rq  r  r_  r  r  s       @r   r  zInputsKernel.get_read_writesT  s    <++,.&&[[ 	5E%*BEBBE#89		'%.."234	5 L,,- .
/3/?/?/A.
 
 &&"
 	
r   c                6    | j                         j                  S r   r  r  s    r   r  zInputsKernel.get_readsj  r  r   c                   t        |t              r|j                  }t        |t              r|j                  }t        |t              r%t        |t
              st        j                  |      }t        |t              r| j                  |      S t        |t              r|S t        |t        t
        f      sJ t        |             |S r   )r   r   r=  r  r  rC  r  realize_inputunwrap_storage_for_inputTorchBindObjectr  r   r  r   s     r   rh  z%InputsKernel.unwrap_storage_for_inputm  s    a#Aa$Aa":a+I**1-Aa#
 //22a)H!fo67@a@7r   c                    g }| D ][  }t        |t              r#|D cg c]  }t        j                  |       }}nt        j                  |      }|j	                  |       ] |S c c}w r   )r   r   rk  rh  r  )rY  
inputs_newr   r   s       r   r  zInputsKernel.unwrap_storage  sl     =?
 	!A!X&GHI!\::1=II 99!<a 	! 	 Js   A%c                     yrt  r   r  s    r   r   zInputsKernel.is_extern  r  r   c                     yr  r   r  s    r   r  zInputsKernel.num_reads  r@  r   c                    t        t        j                            }| j                  D ]B  }t	        |t
              r||j                  |      z  }(|D ]  }||j                  |      z  } D |S r   )r:   r   r#   rY  r   r   r  )rq  rX  r  r[  	inner_inps        r   r  z!InputsKernel.get_free_symbol_uses  sv     u||$&;; 	GC#v&S--m<<!$ GI77FFAG		G r   N)r   r   r   r   rU  rW  r   r   r   r   )rY  r\  r   z%list[Union[IRNode, Sequence[IRNode]]]r8  rX  rO  rZ  )r   r   r   r   r`  r  r  r  rh  r`  r  r   r  r  r   r   r   rk  rk  K  ss    55 

,,  $ 
9
	.
 
 %*
!
	!
r   rk  c                      e Zd ZddZddZy)	NopKernelc                     yrt  r   r  s    r   r"  zNopKernel.is_no_op  r  r   c                    t               S r   r9   r  s    r   r  zNopKernel.get_reads  r}  r   Nr8  rW  )r   r   r   r"  r  r   r   r   rs  rs    s    r   rs  c                  n    e Zd ZdZedd       Ze	 d		 	 	 	 	 d
d       Z	 d	 	 	 ddZedd       ZddZ	y)ConcatKernelzn
    There isn't actually a real kernel for concat, we just change the
    storage for the upstream data.
    c                z
   |d   j                         }|d   j                         }t        |d   j                               }dg}||   g}d|cxk  rt	        |      k  sJ  J t        dt	        |            D ]  }||   j                         }	|j                  ||          t	        |	      t	        |      k(  sJ ||   j                         |k(  sJ ||   j                         |k(  sJ t        t	        |            D ]I  }
|
|k(  r||
   |	|
   z   ||
<   t        j                  j                  j                  ||
   |	|
         ||
<   K |j                  ||           t        j                  |      }t        j                  r$t        j!                  |||d   j"                        }t        t	        |            D ]k  }||   }t%        |      s|j'                         }t)        |t*              s5t        j-                  |j.                  |j0                        s`t3        |      } n t5        d |D              }t        j                  j6                  j8                  d   }t)        |t              sJ t;        |             |du rt5        d |D              rt3        |      }t=        d |D              }|J t?        dt+        |||||      g 	      }tA        |      }g }tC        |      D ]  \  }}t)        |tD        tF        f      sJ t;        |             | jI                  |tJ        jM                  ||||   ||   d
            }t)        |tN              sJ t;        |             t)        |jP                  t              sJ t;        |jP                               |jP                  j                  |       t)        |jR                  tD              r|jR                  jU                         }n|jR                  }t)        |t@              s|jW                         s1|j                         x}EtY        |j:                        s\t[        |      ri|j                  |j]                                 t	        |      dkD  rMt        j                  j_                  |t`        jb                        rt        j                  je                  |       t        j                  jg                  |      |_4        | jk                  |jP                        |_(        t        j                  jm                  |       |S )z6
        Create the concat kernel from inputs
        r   r?   c              3  2   K   | ]  }t        |        y wr   )r  rQ  s     r   r   z&ConcatKernel.create.<locals>.<genexpr>  s     -W1.CA.F-WrR  Fc              3     K   | ]p  }d |j                   v xr\ |j                   d    j                  t        j                        xs- |j                   d    j                  t        j                         r yw)r  r  N)rQ  r  r  r  r  r   args     r   r   z&ConcatKernel.create.<locals>.<genexpr>  sq      <
  SXX --E<O<O-P W88E?00u?U?U0V<
s   A6A8c              3  j   K   | ]+  }t        |      xr |j                         j                   - y wr   )r  r  rB  rQ  s     r   r   z&ConcatKernel.create.<locals>.<genexpr>  s/      
FG!!$A)A)AA
s   13N)r   r   r   r   rB  r   r>  rY  )r[  )7r  r  r   r   r   r   r  rk   r   r   r$  r  r  r@   r  r  r  r   r  r  r   r@  r  r   r   r-   r   r  r   r   r   rw  r  r   r  r   r(  rN  r  r  rY  r=  r  r  rb   ra   r(  rh  rB   FOREACHregister_operation_listr  r   r  r  )r  rY  r3  r   r   r  offsets_startoffsets_endr   
input_sizer  output_strider   r>  any_input_is_storage_and_layoutfx_node_argsrB  concat_kernelkernelop_namesr[  r  input_unwrappeddevs                           r   r  zConcatKernel.create  s   
 %%'q	##%q	**,-}oC'#h-'''''q#f+& 	.A++-J  #/z?c(m333!9&&(E111!9'')V3333x=) 8"*1+
1"=HQK"#''"2"2"L"L Z]#HQK	 x}-	. (6'H'H'R''"//xM
 s6{# 		Aq	A$Q'K88fmmT$B8$LM		 +.-WPV-W*W'ww++003,-AtL/AA-*e3 <
 $<
 9
 ;8DM 
KQ
 
	 !!!$$# 

 M*' 	CFAscHj#9:EDIE:++  Cq!1;q> ! L lF3GT,5GG3m22D9U4@T@T;UU9  ''5#((H-"%(("6"6"8"%(( ?J7#335NN,,S9388$"<0 ? ? AB1	C4 x=1!4!4V^=S=S!TGG++H5WW44]C"11-2F2FG	""=1r   Nc                   t        |t              r| j                  |j                  |      S t        |t        t
        f      sJ t        |             t        |j                  t              rt        |j                  j                  t              r|j                  j                  sy|yt        |j                               t        |j                               k(  syt        d t        |j                         |j                               D              S t        |j                  d      xrA t        |j                  j                  t               xr t        |j                  t"               S )NFTc              3  v   K   | ]1  \  }}t         j                  j                  j                  ||       3 y wr   r9  r:  s      r   r   z=ConcatKernel.can_realize_into_without_copy.<locals>.<genexpr>6  s3      B   88R@r;  r>  )r   r   can_realize_into_without_copyr=  r  r  r   r!  r>  r@  r0  r   r%  r   r   r  r  ExternKernelAlloc)r  r%  r&  s      r   r  z*ConcatKernel.can_realize_into_without_copy  s    c9%44SXXsCC#*56AS	A6chh 34sxx<xx00 { s~~'(C0@,AA !#.."2CNN4DE   CHHh' <388??N;<sxx):;;	
r   c                .    t         j                  | |      S r   )rs  r  r  s     r   r  z!ConcatKernel.get_free_symbol_usesA  s     --dMBBr   c                ^   t        |t              s&t        |      rt        |      \  }}t        ||      }t        |t              sJ t	        |             t        |t
              r| j                  |j                  |      S t        |t              r`|j                          t        |j                  d      sJ | j                  ||      r&t        |      |j                  _        |j                  S t        j                  |j!                         |j#                         |j%                         t'        |j)                         |j)                               D cg c]/  \  }}t*        j,                  j.                  j1                  ||      1 c}}      }| j                  ||      S c c}}w )Nr<  r>  r  )r   rC  r  r?  r   r   r(  r=  r  r  r  r  r  r>  r  r  r  r  r  r   r   rk   r   r   r$  )r  r%  r&  rF  r>  r  r  pws           r   r(  zConcatKernel.realize_intoF  sL   
 #/$S)"7"<%76B#/:c:/c9%##CHHc22c:&KKM388X...00c:"1#"6xx>>#--/__&  ?Aq   ::1a@	  
 C((s   4F)c                     yrt  r   r  s    r   rR  zConcatKernel.should_allocateg  r  r   )rY  r  r3  r   r   r  r   )r%  r   r&  r  r   r   rO  rZ  )r%  r   r&  r   r   r   r8  )
r   r   r   r]  r  r  r  r  r(  rR  r   r   r   rw  rw    s    
 l l\ 26!
!
/!
	!
 !
H %*C!C	!C
 ) )@r   rw  c                      e Zd ZU dZdZded<    ej                  e      Z	ded<   dZ
d	ed
<   dZded<   dZded<    ej                  e      Zded<   dZded<   dZded<    ej                  e      Zded<   dZded<    ej                  e      Zded<    ej                  e      Zded<   	 	 	 	 	 	 	 d@	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 dA fdZdBdZdCdZdDdZdDd ZdEd!ZdEd"ZdFdGd#ZdHd$ZdId%ZedJd&       Z e!	 	 	 	 	 	 	 	 dKd'       Z"e!dLd(       Z#e!dMd)       Z$e!dMd*       Z%e!	 	 	 dN	 	 	 	 	 	 	 	 	 dOd+       Z&e!	 dP	 	 	 	 	 	 	 dQd,       Z'e!	 dP	 	 	 	 	 	 	 dRd-       Z(e!dMd.       Z)e!dMd/       Z*e!dMd0       Z+e!dMd1       Z,dDd2Z-	 	 	 	 	 	 dSd3Z.dFdTd4Z/dUd5Z0dVd6Z1dPdWd7Z2dId8Z3dEd9Z4dEd:Z5dEd;Z6dXd<Z7dYd=Z8	 dP	 	 	 dZd>Z9dId?Z:e:Z; xZ<S )[r  z
    A class that represents Kernels which are not directly lowered to Inductor
    Loop Level IR, such as custom operators, or aten operators which we fallback to.
    r   r  constant_argsr  dict[str, Any]r   NOptional[ReinterpretView]output_viewr>  python_kernel_namecpp_kernel_nameIterable[str]ordered_kwargs_for_cpp_kernelOptional[_OpOverloads]op_overloadzOptional[list[dict[str, Any]]]arg_propertieszdict[str, dict[str, Any]]allarg_propertiesz#Optional[dict[str, dict[str, Any]]]kwarg_propertiesz"dict[sympy.Symbol, pytree.KeyPath]unbacked_bindingszlist[MutationOutput]mutation_outputsc                6   t         |   |||       || _        |r|ni | _        || _        |
| _        | j                  |       | j                  |       |	| _        | j                          i | _
        g | _        t        j                  j                  | _        y Nr  )r  r  r  r   r  r  set_cpp_kernel_nameset_python_kernel_namer  collect_arg_kwarg_propertiesr  r  rk   r   r  fx_node)rq  r   r>  rY  r  r   r  r  r  r  r  r  s              r   r  zExternKernel.__init__  s     	 	 	

 + &fB&&  1##$67-J*))+!# "ww++r   c                     | g| j                   S r   )r  r  s    r   rz  zExternKernel.get_outputs  s    -t,,--r   c                    t               S r   r9   r  s    r   r|  z%ExternKernel.get_unbacked_symbol_defs  r}  r   c                N   t        | j                  t        j                  j                        r\| j                  j
                  j                  D cg c]2  }|j                  s$|j                  |j                  |j                  d4 c}n+t        t        | j                              D cg c]  }i  c}| _        t        | j                  t        j                  j                        rP| j                  j
                  j                  D ci c]&  }|j                  |j                  |j                  d( c}ni | _        t        | j                  t        j                  j                        r| j                   sJ| j                  j
                  j                  D cg c]  }|j                  s|j                   c}| _        | j                  j
                  j                  D cg c]  }|j                  s| c}| _        y g | _        y c c}w c c}w c c}w c c}w c c}w )N)r   r   r  )r   r  )r   r  r  _ops
OpOverload_schema	arguments
kwarg_onlyr   	real_typer  r   r   rY  r  r  r  schema_kwargs)rq  r   r   s      r   r  z)ExternKernel.collect_arg_kwarg_properties  s    $**EJJ,A,AB ))11;; || FFKK%&__ $C$456"6 	$ $**EJJ,A,AB ))11;; qOO
  	 d&&

(=(=>55$($4$4$<$<$F$F6 !,,AFF62  ++33==""D "$D? 76"s*   7H+	H+HH/H'H"9H"c                z    t        | j                  t              r!| j                          | j	                          y y r   )r   r>  r  apply_constraintr  r  s    r   r]  zExternKernel.decide_layout  s-    dkk>2!!#  3r   c                J    t        | |      \  }}|r|j                  |       y y r   )r^   make_comment)rq  wrapper
origin_str_detailed_origin_strs       r   codegen_commentzExternKernel.codegen_comment  s*    +>tW+M(
(  , r   c                    t         r   rj  rq  r  s     r   codegenzExternKernel.codegen  rk  r   c                   || _         t        j                  j                  r.t	        | j
                  t        j                  j                        sy | j
                  }| j                   |j                  dk(  rU|j                  dk(  r|j                  j                  d      d   n|j                  j                  dd      }d| d| _         y |j                  j                  | _         y y )Natenr  .r   r   z
at::_ops::z::call)r  rk   r   cpp_wrapperr   r  r  r  r  	namespace_overloadnamer   r  replacer  r   )rq  r  r  opnames       r   r  z ExternKernel.set_cpp_kernel_name  s    .ww""*ejj33+
 !!'6) ++y8 OO))#.q100c: 
 *4F86'B$'-~~':':$ (r   c                   || _         |y | j                  }|y t        |t        j                  j
                        rd|j                   | _         y |j                  j                  dd       d|j                   | _         y )Nztorch.ops.higher_order.._ops..ops.r  )	r  r  r   r  r  HigherOrderOperatorr   r   r  )rq  r  r  s      r   r  z#ExternKernel.set_python_kernel_name  s    "4)!!>

 > >?(??P&QD# $$,,Xw?@&//ARS #r   c                j   ddl m} | j                         x}r|j                  nt        j
                  j                  }t        j
                  j                  r| j                  J | j                  S t        j
                  j                  rt        t        j
                  j                  |      s(J t        t        j
                  j                               | j                  J t        j
                  j                  j                  | j                  |      S | j                  J | j                  S )Nr?   )CppWrapperCpu)codegen.cpp_wrapper_cpur  r  r   rk   r   device_type
fx_wrapperr  r  r   r<  r  get_c_shim_func_name)rq  r  dr   s       r   get_kernel_namezExternKernel.get_kernel_name  s    :!%!22A29L9L77**666***WW  agg22MB D$$E B ''33377''<<$$f  **666***r   c           	        t         j                  | j                         | j                         | j	                         | j                         | j                         | j                               }|j                          |S )N)r   r   r  r  re  rc  )	r  r  r  r  r  r   r  r  r  )r   r  s     r   
copy_inputzExternKernel.copy_input  sa    <<>++-]]_::<))+oo'  
 	

	r   c                B	   ||d}t        j                  |      \  }g g }g }|D ]  }j                  t        |t              xr t        |t
                      d   r|j                  |       Lt        |t              r5t        j                  j                  j                  j                  |d       }|j                  |        	 	 	 	 	 	 dfd}	|D 
cg c]  }
| j                  |
       }}
|D ]  }
t        |
      st        |
d        g }|D ]  }
t        |
t              se|
j!                         t        j                  j"                  v r;|j                  t        j                  j"                  |
j!                                   yt        |
t              se|
j!                         t        j                  j$                  v r;|j                  t        j                  j$                  |
j!                                   t        |
t&              r!|j                  |
j)                                t        |
t*        j,                  j.                  j
                        ro|
j0                  j2                  }|
j0                  j4                  dk(  r|J |j                  t*        j6                  j8                  |   j;                                |j                  t=        |
d               |	||      \  }} ||i |}d }t        j>                  j                  x}rt        j@                  jB                  jE                  d	      }tG               }t        j@                  jH                  t*        jJ                  jL                  jN                  k(  r|d
   }tQ        t        j@                        }|5  tS        |t        j@                  |       d d d        tU        |||      }t        |tV        tX        f      s|gn|}|D ]~  }t        |t*        jZ                        s|j\                  s+d}t        j                  j@                  jB                  jE                  dd       x}r| d| }|t        j                  _/         ||||	|fS c c}
w # 1 sw Y   xY w)N)r   r   r'  )r  c                $   g }t        |       }t        |      }D ]9  }|r|j                  t        |              |j                  t        |             ; t        j                  |      }|j                  dg       |j                  di       fS )Nr   r   )iterr  nextpytreetree_unflattenr  )	new_tensor_argsnew_non_tensor_argsr  
it_tensorsit_non_tensors	is_tensorr  	args_specis_arg_tensors	          r   unflatten_argsz3ExternKernel.process_kernel.<locals>.unflatten_argsA  s     Fo.J!"56N* 8	MM$z"23MM$~"67	8
 %%fi8A55$aeeHb&999r   Tr|  r  )r   r  r?   zEsparsity not handled. Please file issue for sparse inference weights.r  z Found from : 
 )r  r   r  r   r   ztuple[list[_T], dict[str, _T]])0r  tree_flattenr  r   r   GeneratorStater!   rk   r   r   r   create_symintnoderg  r  r?  r  r  	constantstorchbind_constantsri  	get_valuer  r  irr   r   r   r  default_generatorsclone_stater   r  r  rQ  r  r
   r  _higher_order_opseffectswith_effectsr0   r4   r1   r   r   Tensor	is_sparsedisable_cudagraphs_reason)r  r  r   r   binded_args	args_flattensor_argsnon_tensor_argsr}  r  r   example_argsdevice_indexnew_args
new_kwargsexample_outputr  r   node_meta_valctxexample_out_lir  msgr  r  r  s                           @@r   process_kernelzExternKernel.process_kernel$  s     $v6%22;?	9%' 		,C  3'O
30O,O R ""3'c4(''**44FFsQUFVC&&s+		,	:)	:@L	:+	: 6AAs((+AA  	6A$Q'%a5	6 	 	  	LA a*qzz|qww?P?P/P##AGG$5$5ajjl$CDq(+JJLAGG$?$??##AGG$?$?

$MNA/##AKKM2Au11@@A xx~~xx}}.<3KKK##JJ11,?KKM ##$5aT$JK'	L*  .lOL*8Z8JN---9-NN//33E:M0;C~~$$(?(?(G(G(T(TT -a 0<Q^^L K	1>>>JK 9>=! ntUm<  	
   	8A!U\\*q{{]"#''"6"6";";"?"?t"TT;T E!2;-@C471	8 
 	
O BjK Ks   R:RRc                >   t        |t              sJ t        |             t        |t              r|S |j	                         }t
        j                  j                  |j                               }|J |j                         }|d|j                  v rt        |t        t        t        f      rt        |j                  t              r|j                  d   j                  t         j"                        s-|j                  d   j                  t         j$                        r)|j'                  t)        |j+                                      n|j-                          t/        j0                  |j+                         d      \  }}|d   } |j3                         |      }t
        j                  j4                  j7                  ||      }t
        j                  j4                  j9                  ||      }	t
        j                  j4                  j;                  ||      }
t=        ||	      |
z   }||k7  rt>        jA                  d|	|
|       tB        t        |jD                  tG        |jI                         |jK                         |j+                         |	|
d      	      S )
z
        In order to pass this to an extern kernel we need a
        ReinterpretView not a View.  This allows us to avoid some
        unneeded copies.
        r  r{  r  rz   r   z@convert_to_reinterpret_view failed: stride=%s offset=%s index=%sFr	  r<  )&r   r  r   rC  r  rk   r   r  r  r  rQ  r  r   r>  r  r  r  r  r  r	  r-   r   r  rA   rq  r  r   rr  stride_vars
offset_varrc   rp  r  r  r=  r@  r  r  )r  r   x_unwrap_viewrb  x_unwrap_view_fx_node
index_argsr  rY  r   rE  rA  expecteds               r   convert_to_reinterpret_viewz(ExternKernel.convert_to_reinterpret_view  s6    !X&/Q/&a)H gg  !7!7!9: # 3 3 5 "-.333=?FJ*OP=//@%**51??"'"5"5 @  )--e4BB"'"8"8 C 
 77.}/E/E/GH '')!-!@!@JJL"

J  ]
  ,  55eZH''""..ujA!!,,UJ?Z1F:HIIR	 &%,,.kkmZZ\

 
	
r   c                   |
t               S t        |t        t        j                  j
                  j                  t        f      rt        |      S t        |t              r[t        j                  j                  t        j                  |j                  |j!                         |j#                                     S t        |t$              r|S t        |t&              r| j)                  |j*                        S t        |t,              r4t-        | j)                  |j*                        |j/                               S t        |t0              r;|j3                          t5        |j7                               r	 | j9                  |      S t        |t<              r|j3                          |S t        |t>        t        f      r|S | jA                  |      S # t:        $ r Y Vw xY w)N)r  )r   r   r<  )!rd  r   r!   r   r   r   r   r   r   rs  rk   r   add_tensor_constantr  rD  r  r  r  r  r   rg  r=  rC  r  r  r  r  r  r  r  r  NonTensorObjr  rj  s     r   rg  zExternKernel.realize_input  si   9'))a$ 3 3 ; ;SAB(a00a"77..QWWAKKM!,,.Q  a(Ha#$$QVV,,a)"&&qvv.q||~  a"IIK$Q]]_5::1== a$IIKHa,(=>?H~~a   + s   8G 	G G c                    t        |      r<t        |j                               dk(  r|S |j                         D ]  }|dk(  s	|c S  | j                  |      S r  )r  r   r%  r  )r  r   r   s      r   require_stride1zExternKernel.require_stride1  sT     #1<<>"a',,. Q;H ~~a  r   c                2	   ||J |j                         dv r|s|S t        |      rt        |j                         t              r|rt        ||      xr$ t        |j                         j                         }t        |dd|rJt        t        j                  j                  j                  |j                         j                              n||       |S t        |ddd ||       |S t        |j                         t        t        f      rf|r|j                         j!                  |      s5|rCt#        ||j                         j                  |j%                               r|t'        ||      S |S t        |j                         x}t(              rwt        |j+                         x}t              rt-        d      t        |t              r<|r|j!                  |      s'|r't#        ||j                  |j%                               r|S t        |t.              rX|r|j                         j!                  |      s5|r5t#        ||j                         j                  |j%                               r|S t        |t0              rt        |j2                  t4              rt        |j2                  t6              st        |j9                         x}      rtt;        |d      rht        |j2                  t<              sN	 | j?                  |j2                        |_        |r| jA                  |||      S |r| jC                  |||      S 	 d }	|j%                         }
|t        j                  j                  }tG        tI        |j%                                     D cg c]<  }|jK                  ||   d	      r%|jM                  |j%                         |   d
      r|> }	}|	D ].  }tN        jP                  jR                  jU                  ||d	d      }0 | jW                  |      }t        |dd|||       |rt        ||      sJ |S |	r<|
|J tN        jP                  jR                  jY                  ||
      }t'        ||      S |S # tD        $ r Y Hw xY wc c}w )N)r   r?   TF)r}  r  r  r  r  zHthe MutationLayoutSHOULDREMOVE's real layout shouldn't be FlexibleLayoutr=  r  r   r   r?   )-r  r  r   r  r  r  r3   r   r?  r   rk   r   r   size_hints_or_throwr@  r  r  r6  r   rJ  r  r  r  rl  r   r=  r  rC  r  r  r  r  require_stride_orderrequire_exact_stridesr  r   r   r.  rT  r  r  loweringslice_r  r  )r  r   r   r  r  use_current_stride_ordermutation_layoutr  r  expanded_dims	orig_sizer   r   r3  s                 r   require_strideszExternKernel.require_strides  sS     M$===;;=F"=H !#!,,..9 0R50 0K3ALLN4I4IJJ - *#(-  8 - ! 0 0 D D$%LLN$9$9!" "'&3 H *#(-%)&3&3 HALLN[/,JK1<<>;;EB!1%q||~'<'<ajjl %0 4A}E 
 $%LLN25O $3$?$?$AA[N )b   [9{<<UC%5);+=+=qzz|
 H a%q||~77>-!1<<>#8#8!**,
 Hq)$1668,qvv7%Q]]_&DkEV,{//1BC88@335 4   #44= 5   # .2JJL	$ww''H s1::<0133M!4DaH11!**,q/1E M  % BOO,,33AsAqAB
 NN1!''	
 5a???  (]-FFF((//9=A21mDDW ' s   ;5R 1R AR	RRc                *    | j                  |||      S )N)r  r  r  )r  r   r  r  s       r   r  z"ExternKernel.require_exact_strides  s#     ""]- # 
 	
r   c                *    | j                  |||      S )N)r   r  r  )r  r   r   r  s       r   r  z!ExternKernel.require_stride_order  s     ""1E"OOr   c                .    | j                  |t              S r   )r  r  rj  s     r   require_channels_lastz"ExternKernel.require_channels_last  s    ''+<==r   c                .    | j                  |t              S r   )r  r  rj  s     r   require_channels_last_3dz%ExternKernel.require_channels_last_3d  s    ''+=>>r   c                    dd} ||      r|S | j                  |t        j                  |j                                     S )Nc                    	 | j                         }|t        j                  j
                  v xr' t        j                  j
                  |   j                  S # t        t        f$ r Y yw xY wr  )r  AttributeErrorr  rk   r   r  	is_mkldnn)r   r   s     r   is_mkldnn_tensorz9ExternKernel.require_contiguous.<locals>.is_mkldnn_tensor  s]    zz| 177,,,R1B1B41H1R1RR #$78 s   A A)(A)r   r   r   r   r  r  r  r   )r  r   r$  s      r   r  zExternKernel.require_contiguous  s@    	S AH,,>44QZZ\B r   c                h    | j                  |t        j                  |j                                     S r   r&  rj  s     r   require_contiguous_stridesz'ExternKernel.require_contiguous_strides  s-     ((~00>
 	
r   c                     y r   r   r  s    r   r  zExternKernel.apply_constraint  r  r   c                   t        |t              sJ t        |             t        |t              st        |      }| j                  sJ d       t        |      }t        | j                        }||k  rqt        j                  d| j                  ||z
         t        ||      D ]>  }| j                  |   d   }|j                  ||v r||   n| j                  |   d          @ |S )Nz/ExternKernel.arg_properties should not be emptyzv%s has %d unprovided positional arguments. Will check if they are in the keyword arguments or will use default values.r   r  )r   r   r   r   r  r   rp  r  r  r   r  )rq  r   r   n_args
n_pos_argsr   arg_names          r   fill_non_provided_argsz#ExternKernel.fill_non_provided_args  s     $)54:5)$%:D""U$UU"T,,-
 JII^  V#	 6:. ..q1&96) 8$,,Q/@ r   c                \   t         j                  j                  rGg }d }|r]| j                  rQt	        | j
                        t	        |      k(  sJ d       | j                  D ci c]  }|j                  d      | }}t        | j
                        D ]  \  }}|.|J |j                  ||         }|r|j                  d      nd }n\t	        | j                        |z   }	| j                  r6|	t	        | j                        k  r| j                  |	   j                  d      nd }|j                  t         j                  j                  j                  ||              |S | j
                  D 
cg c]+  }
t         j                  j                  j                  |
      - c}
S c c}w c c}
w )NzDnames passed to codegen_const_args does not match self.constant_argsr   r   )rk   r   r  r  r   r  r  r   rY  r  r<  val_to_arg_str)rq  r  r  name_to_arg_propertiesr}  r   r   proptype_r   r  s              r   codegen_const_argszExternKernel.codegen_const_args
  s   77F
 &*",,4--.#e*< Z< 594G4G*-0CGGFOS(*& * "$"4"45 M1)5 ,,,155eAh?D04DHHV,$Edkk*Q.C  ..3T=P=P9Q3Q ++C044V<! 
 agg22AA!UKLM MDHDVDVWqAGG((77:WW'*& Xs   $F$10F)c                   t         j                  j                  rC| j                  7| j	                  g | j
                  | j                  | j                        }d}n| j
                  }d}g }t        |      D ]  \  }}t         j                  j                  r| j                  r|t        | j                        k  sJ d       | j                  |   j                  d      }|j                  t         j                  j                  j                  ||             |j                  t         j                  j                  j                  |              |r|j                  | j!                                |S )NFTz-Invalid access to ExternKernel.arg_propertiesr   )rk   r   r  r  r.  rY  r  r   r   r  r   r  r  r<  r0  r  r4  )rq  rY  need_codegen_constant_argsr   r   r   r3  s          r   codegen_argszExternKernel.codegen_args+  s5   774#3#3#?003$++3 2 23T[[F */&[[F)-&f% 	DDAqww""**q3t7J7J3K/K CK ++A.226:AGG00??5IJAGG00??BC	D &KK//12r   c                    ||v r|j                  |      S || j                  v r| j                  j                  |      S | j                  j                  |      x}|j                  d      S t        | d      )zGiven an argument name, queries for values in (in order):
        1. any provided kwargs for this function.
        2. the class self.kwargs member.
        3. any available default arguments in self.allarg_properties.r  z not in self.allarg_properties)r  r   r  r  )rq  r-  r   r}  s       r   get_kwargs_valuezExternKernel.get_kwargs_valueE  sx    
 v::h''t{{";;??8,,))--h77CD77?++z)GHIIr   c           	        t         j                  j                  r| j                  t	        | j
                        dk(  rg S g }| j                  D ]  }|r|dk(  r| j                  |      }t        |t              r|j                  |       >| j                  J | j                  j                  |i       j                  d      }|j                  t         j                  j                  j                  ||              |S | j                  j!                         D cg c]3  \  }}| dt         j                  j                  j                  |       5 }}}|S c c}}w )Nr   r   r   r  )rk   r   r  r  r   r  r  r9  r   r!   r  r  r  r<  r0  r   r  )rq  skip_outr   r-  r  r3  ks          r   codegen_kwargszExternKernel.codegen_kwargsR  s=   77+D4F4F0G10L	F >> QE 1))(3a&MM!$11=== 2266xDHHPEMM!''"6"6"E"Ea"OPQ"  !KK--/Aq #Qqww++::1=>?F  	s    8Ec                    | j                   S| j                   j                  }t        |dd      }|j                  dd      }|j	                  dd      d   }| d| }|S d}|S )	Nr   unknown_namespacer  r  r  r?   r   
unknown_op)r  r  r   r  rsplit)rq  r  op_namespaceop_names       r   get_op_namezExternKernel.get_op_namel  sv    <<#\\((F"6<9LML'//'BL'..sA6q9L%ax0G  #Gr   c                   t         j                  rt        j                  j                  st        | j                               dk(  ry t        j                  j                  j                  | j                               }t        j                  j                  j                  | j                               }| j                         }|j                  d| j                          d| d| d|d	       y y y )Nr   zassert_size_stride(r  r  )r@   size_assertsrk   r   r  rf   r   r<  codegen_shape_tupler%  rD  r>  r  )rq  r  r   r   rC  s        r   codegen_size_assertsz!ExternKernel.codegen_size_assertsw  s    qww':':T]]_-277'';;DMMOLDWW))==doo>OPF&&(G%dmmo%6bb7+UVW (;r   c           	     H   t         j                  rt        j                  j                  sw| j                         }|t        j                  j                  v}| j                         }|r |j                  d| dt         d|d       y |j                  d| d| d       y y y )Nzassert_alignment(r  r  z	# buffer z (op: z) is assumed to be not aligned)
r@   alignment_assertsrk   r   r  r  r  rD  r>  r_   )rq  r  r   alignedrC  s        r   codegen_alignment_assertsz&ExternKernel.codegen_alignment_asserts  s    ##AGG,?,?==?D!''";";;G&&(G!!'vR/@7+QO !!vVG94RS -@#r   c                    t         j                  j                  rt        j                  j
                  ry|j                          | j                         }|j                  d| d| d       y)zc
        Track outputs of fallback operators if config.test_configs.track_memory_lifecycle
        Nztrack_tensor(z, 'z'))	r@   test_configstrack_memory_lifecyclerk   r   r  "write_memory_track_allocation_oncer  r>  )rq  r  r   s      r   codegen_memory_trackingz$ExternKernel.codegen_memory_tracking  sV     ""99QWW=P=P224}}M$s4&;<r   c                N    | j                         }| j                         }|g g|fS )zD
        get output sizes and strides, for template_codegen
        )r   r%  )rq  _size_strides      r   get_group_stridezExternKernel.get_group_stride  s*     //#r{G##r   c                   t         j                  j                  }| j                         }| j	                         }|D cg c]  }|j                  |       }}t        t        |            D cg c]  }t        d|        }}t        t        t        |            |j                  d      }t        |      D 	ci c]  \  }}	|	|
 }
}}	t        t        |
            D cg c]  }|
|   	 }}|D cg c]  }||   	 }}| j                         } ||      }t         j                  j                  j                  |||g      \  }}}t        d      \  }}t        t!        | ||D cg c]
  } ||       c}                  }t#        t%        j&                  |      |      }|t)        |      fS c c}w c c}w c c}	}w c c}w c c}w c c}w )zC
        Manually get canonicalization of the output index
        r  T)rV  r  c)rk   r   r   r   r%  r   r   r   rd   r[  r  r   r  r  rK   r   r   rg   r   r  r   )rq  r   r  rE  r   r   r  index_orderr   r   r   r   r  r   	new_sizesr   r  r   add_varreplacements                       r   canonicalizezExternKernel.canonicalize  s   
 77##//#29:Q8%%a(::;@U;LMa(1QC1M
MU3w<0g6I6ISWX+4[+ABxsC#s(BB$)#f+$67q77-23jm3
3##%
#%&WW%5%5%E%Ew&
"	7F !%
73z7	3R1GAJ3R+STU5<<.<eI&&&+ ;M C73 4Ss#   F5/F:>F?$G6G
+Gc                    |rt         nt        }t        j                  | |      }| j                  D ]  }| ||      z  } | j
                  j                         D ]  }| ||      z  } |S r   )maybe_free_unbacked_symbolsmaybe_free_symbolsrk  r  r  r   r   )rq  rX  maybe_get_symbolsr  r}  s        r   r  z!ExternKernel.get_free_symbol_uses  s|     ,9'>P 	 --dMB%% 	(C"3''A	(;;%%' 	(C"3''A	(r   c           
     "   t        | dd       }d|g}|t        j                  |       D cg c]'  }|j                   dt        | |j                         ) c}z  }|j	                  d| j
                         | j                  |      S c c}w )Nr  zpython_kernel_name=r  r  )r   r^  fieldsr   r  re  r  )rq  kernel_namer  r_  s       r   r  zExternKernel.__str__  s    d$8$?!+1
 	$++D1
 zzl!GD%**567
 	
 	|D$4$4#789u%%
s   ,Br   NNNNr   N)r   r>  r>  r  rY  r\  r  r  r   Optional[dict[str, Any]]r  r  r  r>  r  r>  r  r  r  r  r   r   r  r  r-  r  rp   r   r   r   r  r>  r   r   )r  r>  r   r   rJ  )r   r   r   r  )r  ry   r   r   r   r   r   zituple[Any, list[Any], list[Any], Callable[[Any, Any], Any], Optional[dict[sympy.Symbol, pytree.KeyPath]]])r   r   r   rC  rq  )NNF)
r   r   r   Optional[Sequence[int]]r  r;  r  r   r   r   rO  )r   r   r  rI  r  r   r   r   )r   r   r   rQ  r  r   r   r   )r   r  r   r  r   r  )r  rb  r   r   r   r   )r-  r   r   r   r   r   )r;  r   r   r   )r   z'tuple[list[Sequence[Expr]], list[Expr]])r   ztuple[Expr, Sequence[Expr]]rZ  )=r   r   r   r]  r  r   r^  r_  r   r   r  r  r  r   r  r  r  r  r  r  r  r  rz  r|  r  r]  r  r  r  r  r  r`  r  r  r   r  rg  r  r  r  r  r  r  r  r(  r  r.  r4  r7  r9  r=  rD  rH  rL  rQ  rU  r\  r  r  r  r  r  s   @r   r  r  k  s   
 $&M=%.[..tDFND-1K*1(,,%)O]) 4E;3D3D4!=  +/K'.59N293D;3D3D40  =A9@<MK<M<M=9  .?[->->t-T*T (*+/15,0)-79.2,, , :	,
 %, ), /, *, ', (5, ,, 
,<.#$J!
-
";0+& 
 
 w
!w
*-w
9<w

w
 w
r C
 C
J ! !@ ! !  *.6:#aa 'a 4	a
 a 
a aF QV

'9
JN
	
 
 DIPP,P=AP	P P
 > > ? ?  " 
 
"!"+9"	"HXB4J4	
	=$'@ %*!	!
& Hr   r  c                  f     e Zd ZddZ	 	 	 	 	 	 	 d	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 d fdZddZ xZS )ExternKernelOutc                &    |j                  |        y r   )generate_extern_kernel_outr  s     r   r  zExternKernelOut.codegen  s    **40r   c
                &   | j                  |      }
t        |
t              sJ t        |
             t        |   d ||
||xs i d ||||	
       t        j                  j                  |       | _	        t        j                  j                  |        y r   )r  r   r   r   r  r  rk   r   r  r   r  )rq  r>  rY  r  r   r  r  r  r  r  unwrapped_inputsr  s              r   r  zExternKernelOut.__init__  s      ..v6*H5Mt<L7MM5Lb)	
 GG++D1		""4(r   c                     yrt  r   r  s    r   rR  zExternKernelOut.should_allocate  r  r   rf  rd  )r>  r  rY  r  r  r  r   re  r  r  r  r>  r  r>  r  r  r  r  r   r   r8  )r   r   r   r  r  rR  r  r  s   @r   rk  rk    s    1 (*+/15,0)-79.2)) !) %	)
 )) /) *) ') (5) ,) 
):r   rk  c                        e Zd Zd fdZ xZS )RandomSeedsc                   t        j                  t         j                        }t        |   t        |t         j                  |g      g |j                  |j                  |ggddt        j                  j                         y )NrQ  zaten.randint.low_outzat::_ops::randint_low_out::call)r>  rY  r  r  r  r  )r  r  r  r  r  r@  r  r  r  randintlow_out)rq  countr   limitsr  s       r   r  zRandomSeeds.__init__	  sl    U[[)kkW
 !::vzzE7;5 >,, 	 	
r   )rv  r   r   rC  r   r   r   r   r   r  r  r  s   @r   rr  rr    s    
 
r   rr  c                  h     e Zd ZddZ	 	 	 	 	 	 d	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 d fdZddZd	dZ xZS )
r  c                &    |j                  |        y r   )generate_extern_kernel_allocr  s     r   r  zExternKernelAlloc.codegen      ,,T2r   c	                F   | j                  |      }	t        d |	D              sJ t        
|   d |t	        t
        t           |	      ||xs i d ||||
       g | _        t        j                  j                  |       | _        t        j                  j                  |        y )Nc              3  <   K   | ]  }t        |t                y wr   rX  )r   r   s     r   r   z-ExternKernelAlloc.__init__.<locals>.<genexpr>,  s     CQ:a(Cr  )r  r   r  r  r   r   r   r  rk   r   r  r   r  )rq  r>  rY  r  r   r  r  r  r  ro  r  s             r   r  zExternKernelAlloc.__init__   s      ..v6C2BCCCC&!#34Lb)	
 ')GG++D1		""4(r   c                     yr  r   r  s    r   rR  z!ExternKernelAlloc.should_allocate@  r  r   c                    t         r   rj  r  s    r   r  z"ExternKernelAlloc.apply_constraintC  rk  r   rf  )r   NNNr   N)r>  r  rY  r  r  r  r   re  r  r>  r  r>  r  r  r  r  r   r   r8  r-  )r   r   r   r  r  rR  r  r  r  s   @r   r  r    s    3 (*+/,0)-79.2)) !) %	)
 )) *) ') (5) ,) 
)@"r   r  c                  T     e Zd ZdZ	 	 	 	 	 	 	 	 d fdZddZd	dZd
dZddZ xZ	S )r  zP
    An output buffer that represents the mutation of a pre-existing buffer
    c                    t         |   d |       |j                         }t        j                  j                  |       |g| _        || _        t        j                  j                  |       | _	        y rb  )
r  r  r  rk   r   r  mutation_namesmutating_noder  r   )rq  r>  mutated_noder  mutated_node_namer  s        r   r  zMutationOutput.__init__L  sb     	d62(113	##$5601(5GG++D1	r   c                    | j                   S r   )r  r  s    r   r  zMutationOutput.get_defining_opV  s    !!!r   c                    | j                   S r   )r  r  s    r   r&  z!MutationOutput.get_mutation_namesY  r:  r   c                     yr  r   r  s    r   rR  zMutationOutput.should_allocate\  r  r   c                ^    | j                         }d |D        D cg c]  }|| c}S c c}w )Nc              3  Z   K   | ]#  }t         j                  j                  |       % y wr   )rk   r   try_get_buffer)r   r   s     r   r   z6MutationOutput.get_mutation_buffers.<locals>.<genexpr>c  s     P..t4P   )+)r&  )rq  r  rb  s      r   get_mutation_buffersz#MutationOutput.get_mutation_buffers_  s=    002 QP
 
 	
 
s   *)r>  r  r  r   r  re  r   r   rX  r\  r8  r   r  )
r   r   r   r]  r  r  r&  rR  r  r  r  s   @r   r  r  G  sA    2 2062GP2	2"#
r   r  c                       e Zd ZU dZi Zded<   e	 	 	 	 	 	 d	d       Ze	 	 	 	 	 	 d	d       Z	 	 	 	 	 	 	 	 d
 fdZ	ddZ
ddZ xZS )TMADescriptorad  
    An IR node representing a generic host-side TMA descriptor in the Triton API
    Mostly useful for user-defined Triton kernels relying on host-side TMA;
    but can, in principle, be used for Inductor's Triton templates, too.

    See TMADescriptorExperimental and TMADescriptorStable for the two implementations
    (the old API and the new API)
    zdict[Any, TMADescriptor]_CACHEc                    t        |      dk(  sJ |d   dk(  rt        |g|d    S |d   dk(  sJ t        |g|d    S )Nr   r   experimentalr?   rc  )r   TMADescriptorExperimentalTMADescriptorStable)r  rD  tma_metas      r   _create_implzTMADescriptor._create_implv  s\     8}!!!A;.(,VBhqkBBA;(***&v<<<r   c                    t        |      |f}|| j                  vr| j                  ||      | j                  |<   | j                  |   S r   )idr  r  )r  rD  r  rV  s       r   r  zTMADescriptor.create  sF     &z8$cjj !..vx@CJJsOzz#r   c           
     @   t         |   d t        t        ||j	                                     t        t        t           |      t        |      d        || _	        t        j                  j                  |       | _        t        j                  j                  |        y )Nr<  )r  r  r  rC  r  r   r   r  r   rD  rk   r   r  r   r  )rq  rD  rY  r  r  s       r   r  zTMADescriptor.__init__  s     	 !,,. &!6*- 	
  GG++D1		""4(r   c                &    |j                  |        y r   )generate_tma_descriptorr  s     r   r  zTMADescriptor.codegen      ''-r   c                    | j                   S r   )rD  r  s    r   
get_tensorzTMADescriptor.get_tensor  r  r   )rD  r   r  ztuple[str, tuple[Any, ...]]r   r  )rD  r   rY  r  r  r  r   r   rf  rN  )r   r   r   r]  r  r   r  r  r  r  r  r  r  r  s   @r   r  r  h  s     (*F$)=='B=	= = 'B	 ))&3)DQ)	)..r   r  c                  :     e Zd ZdZ	 d	 	 	 	 	 	 	 	 	 d fdZ xZS )r  z
    the new host-side TMA Descriptor API:
    (the ones obtained via create_{1d,2d}_tma_descriptor calls).

    See also TMADescriptorStable for the new API.
    c                b   t        |      dv sJ t        |      t        |      k(  sJ ||j                         j                  }|| _        || _        || _        t        | j                        | _        |g}g | j                  | j                  | j
                  }t        | !  |||       y )N)r?   r   rD  rY  r  )	r   r  r  r  
block_dimselement_sizer  r  r  )rq  rD  r  r  r  rY  r  r  s          r   r  z"TMADescriptorExperimental.__init__  s     4yF"""4yC
O+++!++-66L	$(		N	
YY
__
 
 	' 	 	
r   r   )
rD  r   r  list[Union[int, torch.SymInt]]r  r  r  rL  r   r   r   r   r   r]  r  r  r  s   @r   r  r    sG     '+

 -
 3	

 $
 

 
r   r  c                  $     e Zd ZdZd fdZ xZS )r  z
    the new host-side TMA descriptor API
    (the ones obtained via TensorDescriptor.from_tensor).

    See also TMADescriptorExperimental for the old API.
    c                :    || _         t        | 	  ||g|       y )Nr  )block_shaper  r  )rq  rD  r  r  s      r   r  zTMADescriptorStable.__init__  s(    &8% 	 	
r   )rD  r   r  r  r  r  s   @r   r  r    s    
 
r   r  c                  <     e Zd Z	 	 	 	 	 	 	 	 	 	 d fdZddZ xZS )SubgraphBufferc                   t         
|   d ||       || _        || _        t        j
                  j                  |       | _        t        j
                  j                  |        t        j
                  j                  | j                  ||      | _
        t        | j                        sJ t        | j                        }|D ]T  }|| j                  j                  |j                  <   | j                  j                  j!                  |j                         V |D cg c]  }|j                   c}| _        dd lmc m}	 t	        j*                  | j                        5  |	j-                  ddd      5   | j                  j.                  | j                    d d d        d d d        y c c}w # 1 sw Y   xY w# 1 sw Y   y xY w)Nr   FATEN)max_autotunemax_autotune_gemmmax_autotune_gemm_backends)r  r  rS  example_inputsrk   r   r  r   r  make_subgraphsubgraphrZ  rY  r\  r  graph_input_namesr  
sym_inputstorch._inductor.configr  r@   set_graph_handlerr    run)rq  r>  r   rS  r  subgraph_namer  sym_inpsym_varinductor_configr  s             r   r  zSubgraphBuffer.__init__  sn    	v{3,GG++D1		""4(--dgg~}U,,,(5
! 	AG7>DMM&&w||4MM++227<<@	A 8BBG7<<B88  / 	8 &&""'+1 '  8
 "!!4#6#678	8 	8	 C8 8	8 	8s*   F* F;5$F/F;/F8	4F;;Gc                    G d d      }t        | j                        sJ | j                  D cg c]  }|j                          }}|j                   || j                        g | j
                  || j                  g       y c c}w )Nc                      e Zd ZddZy),SubgraphBuffer.codegen.<locals>.CodegenGraphc                4    || _         |j                  | _        y r   )r   r   )rq  r   s     r   r  z5SubgraphBuffer.codegen.<locals>.CodegenGraph.__init__	  s    "
!JJ	r   N)r   rq   )r   r   r   r  r   r   r   CodegenGraphr    s    'r   r  )rZ  rY  r  'codegen_subgraph_with_flattened_outputsr  r  r   )rq  r  r  r  outer_inputss        r   r  zSubgraphBuffer.codegen  sz    	' 	'
  ,,,7;{{C!++-CC77'-doo--YYK	
 Ds   B)
r>  r  r   r  rS  torch.fx.GraphModuler  	list[Any]r  r   rf  r   r   r   r  r  r  r  s   @r   r  r    s>    "8"8 ""8 !	"8
 ""8 "8H
r   r  c                  |     e Zd ZddZed	d       Z	 d
	 	 	 d fdZddZ	 	 	 	 	 	 	 	 	 	 d fdZddZ	ddZ
 xZS )UserDefinedTritonKernelc                D   ddl m} ddlm} |j	                  | j
                        g }g }g }t        |      rt        d      r%|j                  fdj                  D               n)t        d      sJ |j                  j                         t        d      r:j                  D ]*  }|j                  j                  j                  |          , n)t        d      sJ |j                  j                         j                   }j                  |||fS )	Nr   )	Autotuner)kernel_side_tablerestore_idxc              3  P   K   | ]  }j                   j                  |     y wr   )r   	arg_namesr   r   r  s     r   r   zBUserDefinedTritonKernel.get_kernel_and_metadata.<locals>.<genexpr>$  s%      */0FII''**s   #&restore_value	reset_idxreset_to_zero)triton.runtime.autotunerr  *torch._higher_order_ops.triton_kernel_wrapr  
get_kernel
kernel_idxr   r  r  r  r  r  r  r   r  r  configs)rq  r  r  r  restore_value_argsreset_to_zero_argsr   r  s          @r   get_kernel_and_metadataz/UserDefinedTritonKernel.get_kernel_and_metadata  s   6P"--doo>(*(*fi( v}-")) *4:4F4F*  v777"))&*>*>?v{+)) FA&--fii.A.A!.DEF v777"))&*>*>?nnGYYFw 24FFFr   c                   ddl m} | j                         \  }}}|j                  || j                  ||| j
                        \  }}}| j                  D 	ci c]  }	|	| j                  |	       }
}	t        d      rt        d      sJ t                     t        fdj                  D              }g }g }g }g }t        j                  |
j                         t        t        j                   d      |            D ]q  \  }}||v r |       r|j#                  |       |j#                  |       t%        |t&              r?|j#                  |j)                                |j#                  |j+                                t%        |t,        t.        t0        t2        j4                  f      r,|j#                  |       |j#                  t        |             ||v r(|j#                  d       |j#                  t,               |R	  |       r(|j#                  d       |j#                  t,               8|j7                          |j7                          Zt9        d	t        |       d
|        | j;                  |       |j=                  ||||||d| j?                         | j@                  jB                  	       yc c}	w )YOverrides the parent member.
        See https://github.com/pytorch/pytorch/issues/151692r   )triton_version_uses_attrs_dictr  
constexprsc              3  <   K   | ]  }j                   |     y wr   )r  r  s     r   r   z2UserDefinedTritonKernel.codegen.<locals>.<genexpr>W  s     $TQV%5%5a%8$Ts   r  r'  NzUnsupported arg type: r  T)	arg_typesraw_argsraw_keystriton_metar  r   original_fxnode_name)"torch._inductor.utilsr  r  !define_user_defined_triton_kernelr   gridr  r9  r  r   r:   r  r  chainr  r   repeatr  r   r   r  r  r   r  r   r   r!   r  r  r  generate_kernel_callr  r  r   )rq  r  r  r  r  r  new_namer  extra_launch_argsr<  
named_argsconstexpr_namesr   r  raw_keys_filteredraw_args_filteredr   r}  r  s                     @r   r  zUserDefinedTritonKernel.codegen6  s   
 	I ((*	
 55KKII
		
 261S1S
,-At$$Q''

 
 v{+0M 	
tP
 	
M %$T&BSBS$TT!	')')"I$4$4R$8:K L
 $	WID# &+I+K$$T*$$S)#v&C1134  1C#udEJJ!?@C   c+( B  % 23KKO$$S)%))+%))+),B49+RPSu*UVVI$	WL 	W%$$&&#??$!%!2!2 	% 
	
g
s   Kc                P    t         |   |      t        | j                  |      z  S r   )r  r  r)   r  r7  s     r   r  z,UserDefinedTritonKernel.get_free_symbol_uses  s-    
 w+M:=MII}>
 
 	
r   c                    t               S r   r9   r  s    r   r|  z0UserDefinedTritonKernel.get_unbacked_symbol_defs  r}  r   c               B   g }i }g }|j                         D ]  \  }}	t        |	t              rXt        j	                  | j                  |	            }
||v rt        j                  |
||         }
|j                  |
       |
||<   n|j                  |	       |	||<    t        |      dk7  sJ |d   j                         | _        t        |t              sJ t        |             t        | =  d t!        | j                        |t#        |      |       || _        || _        | j)                         \  }}}}t+        |d      sJ |j,                  D cg c]	  }||v s| c}| _        ddlm} t        |      dkD  r|d   j4                  ni } ||i |||      D cg c]  }||   	 c}| _        | j6                  D cg c]#  }t9        t!        | j                        ||       % c}| _        t<        j>                  jA                  |        y c c}w c c}w c c}w )Nr   r  r  )identify_mutated_tensors)!r  r   r   rk  rh  rg  r  r  r  r   r  r   r   r   r  r  r  r   r  r  r  r  r  r  r  r  r   mutable_argsr  r  rk   r   r  )rq  r  r  tma_descriptor_metadatakernel_argsrY  r   r  r<  r  r  r  r  r   r}  r  autotuned_kwargsrV  rb  r  s                      r   r  z UserDefinedTritonKernel.__init__  s!     "$&&(%%' 		DAq!Y' 99$:L:LQ:OP//%,,Q0G0JKAa q	$$Q'q			 6{aQi**,&(+9T&\9+dkk*- 	
 %	 $ < < >A v{+++!++.
sk/AC.
* 	X03Gq0@71:,,b 0;;;*:;=T
 
 ((!
 :T[[93E!
 	
""4(%.

!
s   	HH H(Hc                ,    t        | j                        S r   )r   r  r  s    r   rz  z#UserDefinedTritonKernel.get_outputs  s    D))**r   c                    | j                   S r   r  r  s    r   r  z"UserDefinedTritonKernel.get_device  r  r   )r   z(tuple[Kernel, Any, list[str], list[str]]rf  rO  rZ  r  )
r  r   r  r   r  r  r  r  r   r   r  r@  )r   r   r   r  r   r  r  r|  r  rz  r  r  r  s   @r   r  r    s    G> W
 W
t %*
!
	!
;) ;) 	;)
 "0;) $;) 
;)z+r   r  c                  T     e Zd ZdZddZddZd	dZd
dZ	 	 	 	 	 	 	 	 d fdZ xZ	S )InplaceBernoulliFallbackE
    This needs to be a custom class to handle mutation properly
    c                   t        d | j                  D              sJ d | j                  D        \  }t        j                  j                  r\|j                  | j                          d| ddj                  t        t        | j                               d|j                          y |j                  | j                          d| ddj                  t        t        | j                               d|j                          y )Nc              3  <   K   | ]  }t        |t                y wr   rX  r   r  s     r   r   z3InplaceBernoulliFallback.codegen.<locals>.<genexpr>  s     >Q:a(>r  c              3  X   K   | ]"  }t        t        |      j                          $ y wr   )r   r   r  r	  s     r   r   z3InplaceBernoulliFallback.codegen.<locals>.<genexpr>  s     IVQ113Is   (*r  r  z, NULL)r  )r   rY  rk   r   r  r>  r  r  r  reprr  ending)rq  r  r   s      r   r  z InplaceBernoulliFallback.codegen  s    >$++>>>>IT[[I77 '')*!A3b3tTEWEW;X1Y0ZZabibpbpaqr '')*!A3b3tTEWEW;X1Y0ZZ[\c\j\j[klr   c                     yr  r   r  s    r   rR  z(InplaceBernoulliFallback.should_allocate  r  r   c                &    | j                  d      gS r  r`  r  s    r   r&  z+InplaceBernoulliFallback.get_mutation_names      "##r   c                    t               S r   r9   r  s    r   r|  z1InplaceBernoulliFallback.get_unbacked_symbol_defs  r}  r   c                ^   t         |   d t        |j                               | j	                  |g      ||       t
        j                  j                  |j                                t
        j                  j                  |       | _
        t
        j                  j                  |        y )Nr  r  )r  r  r  r  r  rk   r   r  r  r  r   r  )rq  r  r   r  r  s       r   r  z!InplaceBernoulliFallback.__init__  s     	alln-$# 	 	
 	
##AJJL1GG++D1		""4(r   rf  r8  r\  r  )r  ry   r   r   r  r   r   r   
r   r   r   r]  r  rR  r&  r|  r  r  r  s   @r   r  r    sF    $)'),2)DG)	) )r   r  c                  x     e Zd ZdZddZd	dZd
dZddZ	 	 	 	 	 	 	 	 d fdZe		 d	 	 	 	 	 	 	 dd       Z
 xZS )InplaceCopyFallbackr  c                R    | j                         \  }}}|j                  |||       y r   )r7  codegen_device_copy)rq  r  r&  r%  non_blockings        r   r  zInplaceCopyFallback.codegen  s)    #'#4#4#6 c<##Cl;r   c                     yr  r   r  s    r   rR  z#InplaceCopyFallback.should_allocate  r  r   c                &    | j                  d      gS r  r	  r  s    r   r&  z&InplaceCopyFallback.get_mutation_names  r	  r   c                    t               S r   r9   r  s    r   r|  z,InplaceCopyFallback.get_unbacked_symbol_defs  r}  r   c                   t         |   d |||dd       t        j                  j	                  |d   j                                t        j                  j                  |       | _        t        j                  j                  |        y )Nz
aten.copy_aoti_torch_copy_)r  r  r   )	r  r  rk   r   r  r  r  r   r  )rq  r>  rY  r  r  s       r   r  zInplaceCopyFallback.__init__  sr     	+. 	 	
 	
##F1I$6$6$89GG++D1		""4(r   c                    ||fD cg c]  }| j                  |       }}|f}t        t        |j                               ||      }|S c c}w rj  )rg  r	  r  r  )r  r&  r%  r	  r  rY  r  r  s           r   r  zInplaceCopyFallback.create1  sX     25c
;1###A&;;%$cnn./

  <s   Arf  r8  r\  r  )r>  r  rY  r  r  r  r   r   rO  )r&  r   r%  r   r	  r   r   r	  )r   r   r   r]  r  rR  r&  r|  r  r  r  r  r  s   @r   r	  r	    s~    <$)) !) %	)
 
)$ <A

%
59
	
 
r   r	  c                  8    e Zd ZdZddZd	dZd
dZddZd	dZy)MutatingFirstArgExternKernelr  c                   t        | j                        sJ g d | j                  D        t        t        | j                        }|j                  | j                          ddj                  |       d|j                          y )Nc              3  <   K   | ]  }|j                           y wr   r  r	  s     r   r   z7MutatingFirstArgExternKernel.codegen.<locals>.<genexpr>G  s     9a!!#9r  r  r  r  )	rZ  rY  r  r	  r  r>  r  r  r	  )rq  r  argrefss      r   r  z$MutatingFirstArgExternKernel.codegenD  s~    ,,,
9T[[9
t))*
 	##%&a		'(:';1W^^<LM	
r   c                     yr  r   r  s    r   rR  z,MutatingFirstArgExternKernel.should_allocateN  r  r   c                &    | j                  d      gS r  r	  r  s    r   r&  z/MutatingFirstArgExternKernel.get_mutation_namesQ  r	  r   c                    t               S r   r9   r  s    r   r|  z5MutatingFirstArgExternKernel.get_unbacked_symbol_defsT  r}  r   c                     yrt  r   r  s    r   has_side_effectsz-MutatingFirstArgExternKernel.has_side_effectsW  r  r   Nrf  r8  r\  r  )	r   r   r   r]  r  rR  r&  r|  r"	  r   r   r   r	  r	  ?  s     
$r   r	  c                        e Zd Zd fdZ xZS )ResizeStorageBytesc                   t        |t              sJ d       t        |   d t	        |j                               | j                  |g      |f       t        j                  j                  |j                                t        j                  j                  |       | _        t        j                  j                  |        d| _        d| _        t        |t         t"        t$        f      sJ t'        |             t        j                  j(                  j+                  |j,                  j                                y )NzTODO: dynamic shapesr  )r  z"inductor_ops.resize_storage_bytes_z&torch::inductor::resize_storage_bytes_)r   r   r  r  r  r  r  rk   r   r  r  r  r   r  r  r  r  r  r   r   never_reuse_buffersr  r=  )rq  variabler  r  s      r   r  zResizeStorageBytes.__init__\  s    (C(@*@@(h1134
+#+	 	 	
 	
##H$5$5$78GG++D1		""4("FG(Xz9$EFVXVF	##''(>(>(@Ar   )r'	  r   r  r   r   r   rx  r  s   @r   r$	  r$	  [  s    B Br   r$	  c                  (     e Zd Zd fdZddZ xZS )SetSourceTensorKernelc                   |j                          t        | 	  |j                         ||gdt        j
                  j                  j                  j                         t        |t        t        t        f      sJ t        |             t        j                  j                   j#                  |j$                  j'                                t        j                  j                   j#                  |j'                                t        j                  j                   j#                  | j'                                |j)                         }t+        t-        |      ||       t+        t-        |      ||       g| _        y )Nz!torch.ops.aten.set_.source_Tensor)r  r  r  )r  r  r  r  r  ri   r  set_source_Tensorr   r  r  r   r   rk   r   r&	  r  r=  r  r  r  r  r  )rq  self_tensorstorage_tensorr   r  s       r   r  zSetSourceTensorKernel.__init__n  s   $$&%%'.)B		++99	 	 	
 +*i'HI 	
4L
 	
I 	
##''(8(8(A(A(CD	##''(?(?(AB	##''8**,:V4k4H:V4ndK!
r   c                F    | j                  d      | j                  d      gS r  r	  r  s    r   r*  z2SetSourceTensorKernel.get_inputs_that_alias_output  s    "DOOA$677r   )r-	  r   r.	  r   r   r   r\  )r   r   r   r  r*  r  r  s   @r   r)	  r)	  m  s    
(8r   r)	  c                  j     e Zd ZdZd
dZddZddZddZddd	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 d fd	Z xZ	S )ScatterFallbackz
    This needs to be a custom class to handle mutation properly.
    This class handles both aten.scatter_ and aten.scatter_reduce_.
    It also handle the case `src` being a scalar properly.
    c           
        | j                   d   }t        j                  j                  rddd}||v r||   }t	        | j
                        sJ | j                  rd | j
                  D        \  }}}n%d | j
                  D        \  }}| j                  d   }|j                  ||| j                  d   ||g| j                  | j                  | j                  || j                                y )	Nr  r  r  )r  multiplyc              3  <   K   | ]  }|j                           y wr   r	  r	  s     r   r   z*ScatterFallback.codegen.<locals>.<genexpr>  s     Jq224Jr  c              3  <   K   | ]  }|j                           y wr   r	  r	  s     r   r   z*ScatterFallback.codegen.<locals>.<genexpr>  s     EA!--/Er  r?   r   )r   rk   r   r  rZ  rY  src_is_tensorr  generate_scatter_fallbackr  r  r=  )rq  r  r  get_operator_enumr   r   r%  s          r   r  zScatterFallback.codegen  s    X&77(-6 B***62,,,JdkkJOQsEEJQ$$Q'C))""1%uc2  ##!	
r   c                     yr  r   r  s    r   rR  zScatterFallback.should_allocate  r  r   c                f    | j                   d   }t        |t              sJ |j                         gS r  r^  rq  r[  s     r   r&  z"ScatterFallback.get_mutation_names  s.    kk!n#v&&&r   c                    t               S r   r9   r  s    r   r|  z(ScatterFallback.get_unbacked_symbol_defs  r}  r   NTr  include_selfc          
     f   t        |t              | _        | j                  r%|||fD cg c]  }| j                  |       }	}|f}
n$||fD cg c]  }| j                  |       }	}||f}
t        |   d t        |j                               | j                  |	      |
||dt        |      ddg|       t        j                  j                  |j                                t        j                  j                  |       | _        t        j                  j!                  |        y c c}w c c}w )Nr  r=	  r  r>	  )r  r  r  )r   r   r6	  rg  r  r  r  r  r  r   rk   r   r  r  r  r   r  )rq  r  r   r3  r   r%  r  r>	  r  tensorsr  r  s              r   r  zScatterFallback.__init__  s    (Y7 78%oFt))!,FGF FM78%jAt))!,AGA #JMalln-(|<";/+3^*D# 	 		
 	
##AJJL1GG++D1		""4(% G Bs   D)D.rf  r8  ri  r  )r  ry   r   r   r3  r   r   r   r%  r   r  r>  r>	  r   r   r   r	  r  s   @r   r1	  r1	    s|    
0 
 !%!!)!!) !) 	!)
 !) !) !) !) 
!) !)r   r1	  c                  \     e Zd ZdZddZddZd	dZd
dZ	 	 	 	 	 	 	 	 	 	 	 	 d fdZ xZ	S )IndexPutFallbackzQ
    This needs to be a custom class to handle mutation and indices properly
    c                   t        | j                        sJ d | j                  D        ^}}}g }t        |      }t        | j                        D ]b  \  }}| j                  |   |j                  t        |             0|j                  t        j                  j                  j                         d  |j                  | j                         |||g| j                           y )Nc              3  <   K   | ]  }|j                           y wr   r	  r	  s     r   r   z+IndexPutFallback.codegen.<locals>.<genexpr>  s     &Rq':':'<&Rr  )rZ  rY  r  r   r`  r  r  rk   r   r<  rh  generate_index_put_fallbackr  r4  )	rq  r  r   r   valid_indicesr`  iter_valid_indicesr   r   s	            r   r  zIndexPutFallback.codegen  s    ,,,&Rdkk&R#F]!-0dll+ 	>DAq||A*t$678qww33<<=		> 	,++  "Aw	
9=9P9P9R	
r   c                     yr  r   r  s    r   rR  z IndexPutFallback.should_allocate  r  r   c                &    | j                  d      gS r  r	  r  s    r   r&  z#IndexPutFallback.get_mutation_names  r	  r   c                    t               S r   r9   r  s    r   r|  z)IndexPutFallback.get_unbacked_symbol_defs  r}  r   c           	        || _         |D cg c]  }||	 }}||g|D cg c]  }| j                  |       }}d}	t        
|   d t	        j                               | j                  |      |fd|	|       t        j                  j                  | j                  d             t        j                  j                  |       | _        t        j                  j                  |        y c c}w c c}w )Naoti_torch_index_put_outr  zaten.index_put_)r  r  r  r   )r`  rg  r  r  r  r  r  rk   r   r  r`  r  r   r  )rq  r  r   r`  r   
accumulater   rF	  r@	  r  r  s             r   r  zIndexPutFallback.__init__  s     $+=qq}==34f2M}2MNQ4%%a(NN4alln-(M0+# 	 	
 	
##DOOA$67GG++D1		""4( >Ns   C/C/C4rf  r8  r\  r  )r  ztorch._ops.OpOverloadr   r   r`  r  r   r  rM	  r   r   r   r	  r  s   @r   rB	  rB	    s`    
$)*) ) 	)
 ) ) 
) )r   rB	  c                  &    e Zd Zedd       ZddZy)
DeviceCopyc           	     `   |j                         sKt        d |j                         D              r+t        j                  j
                  s|j                  |      S t        j                  j                  |       |j                         }|J t        j                  j                  |       t        d       |f}t        j                  |      }d }|j                         r|j                         }t!        |j"                        xr |j"                  dk(  xr |}|j"                  dk(  xr t!        |j"                        xr |}|r t%        |      rd|j'                         _        t+        t-        ||j/                         |j                         ||      | j1                  |      g|      S )Nc              3  T   K   | ]   }|t         j                  j                  v  " y wr   )rk   r   r  r  s     r   r   z$DeviceCopy.create.<locals>.<genexpr>  s     GqA***Gs   &(zDeviceCopy in input programr  Tr  )r   r   r  r@   aot_inductoruse_runtime_constant_foldingr$  rk   r   add_device_infor  rZ   r  r  r   r%  rb   r   r  r  rB  rO	  r@  r  rg  )	r  r   r   r	  x_devicer  r   is_destination_pinnedis_source_pinneds	            r   r  zDeviceCopy.create  sc    GA4D4D4FGG''DD''//	'<<>###	)78%++A.::<\\^F8==!KfkkU&:K| 	 MMU"Kvfkk':K| 	  5a 8'+ALLN$

/ q!"

 
	
r   c                   | j                         }t        |      dk(  sJ | j                  r2|j                  |d   | j                  j	                         |d          y |j                  |d   | j	                         |d          y )Nr   r   r?   )r7  r   r  r	  r  )rq  r  r   s      r   r  zDeviceCopy.codegen6  s{      "4yA~~''Q));;=tAw ''Q1G1G1I4PQ7Sr   N)r   r   r   rC  r	  r   r   r   rf  )r   r   r   r  r  r  r   r   r   rO	  rO	    s    '
 '
RTr   rO	  c                  n     e Zd ZdZddZd	dZ	 	 	 	 	 	 	 	 	 	 	 	 d
 fdZddZ	 d	 	 	 ddZddZ	 xZ
S )DynamicSelectStorageOffseta  
    The result of computing a dynamic selection index is determined as follows: when the index in the
    select operation is unbacked, the actual index calculation is ambiguous for negative indices
    (index + size) versus non-negative indices (just index). To resolve this, we allocate an unbacked
    SymInt to represent the storage offset and decompose the select operation into a call to as_strided,
    computing the storage offset at runtime with this node.
    c                    t               S r   r9   r  s    r   r  z$DynamicSelectStorageOffset.get_readsJ  r}  r   c                     yr  r   r  s    r   rR  z*DynamicSelectStorageOffset.should_allocateM  r  r   c                    t         |   d t        t        j                  d            g        || _        || _        || _        || _        || _	        y Nr  r  )
r  r  r  r  r   unbacked_offset_symbolr   base_offsetbase_dim_strider   )rq  r_	  r   r`	  ra	  r   r  s         r   r  z#DynamicSelectStorageOffset.__init__P  sL     	ze1DErJ '=#
&.	r   c                .    t        | j                  g      S r   )r:   r_	  r  s    r   r|  z3DynamicSelectStorageOffset.get_unbacked_symbol_defsa  s    466788r   c                .    t        | j                  |      S r   )r)   r   r  s     r   r  z/DynamicSelectStorageOffset.get_free_symbol_usesd  s      

M::r   c                &    |j                  |        y r   )codegen_dynamic_select_indexr  s     r   r  z"DynamicSelectStorageOffset.codegeni  r|  r   rW  r8  )r_	  sympy.Symbolr   rf	  r`	  Union[sympy.Symbol, int]ra	  rg	  r   rg	  r   r   r  rO  rZ  rf  )r   r   r   r]  r  rR  r  r|  r  r  r  r  s   @r   rZ	  rZ	  A  su     ,  .	
 2 ' 
"9 %*;!;	!;
3r   rZ	  c                  T     e Zd ZdZddZddZ	 	 	 	 	 	 	 	 d	 fdZd
dZddZ xZ	S )r   z;
    The result of a call to aten._local_scalar_dense.
    c                    t               S r   r9   r  s    r   r  zDynamicScalar.get_readsr  r}  r   c                     yr  r   r  s    r   rR  zDynamicScalar.should_allocateu  r  r   c                    |j                          t        | 	  d t        t	        j
                  d            | j                  |g             || _        || _        y r^	  )	r  r  r  r  r  r   r  symkeypath)rq  rl	  rm	  r=  r  s       r   r  zDynamicScalar.__init__x  sM     	*ELL$78$:M:Mtf:U	
 r   c                .    t        | j                  g      S r   )r:   rl	  r  s    r   r|  z&DynamicScalar.get_unbacked_symbol_defs  s    488*%%r   c                &    |j                  |        y r   )codegen_dynamic_scalarr  s     r   r  zDynamicScalar.codegen  s    &&t,r   rW  r8  )rl	  rf	  rm	  zpytree.KeyPathr=  r   r   r   r  rf  )
r   r   r   r]  r  rR  r  r|  r  r  r  s   @r   r   r   m  sA    *8@F	&-r   r   c                  V     e Zd ZdZddZd	dZd
 fdZd	dZ	 d	 	 	 ddZddZ	 xZ
S )r   z5
    The result of a call to aten._assert_scalar
    c                    t               S r   r9   r  s    r   r  zAssertScalar.get_reads  r}  r   c                     yr  r   r  s    r   rR  zAssertScalar.should_allocate  r  r   c                ~    t         |   d t        t        j                  d            g        || _        || _        y r^	  )r  r  r  r  r   scalarr  )rq  ru	  r  r  s      r   r  zAssertScalar.__init__  s7    ell512	
 r   c                     yrt  r   r  s    r   r"	  zAssertScalar.has_side_effects  r  r   c                .    t        | j                  |      S r   )r)   ru	  r  s     r   r  z!AssertScalar.get_free_symbol_uses  s      ];;r   c           	        t         j                  sy t        t        | j	                  d                  }t
        j                  j                  ry t
        j                  j                  rad| d}t
        j                  j                  j                  | j                  d      }|j                  d| d| j                   d| d	       y t
        j                  j                  j                  | j                  d      }|j                  d
| d       |j                  dt        | j                         d       |j                  | j!                          d       y )NFrW  zstd::to_string(r  )r  zif (!(z()) { throw std::runtime_error("Expected z but received " + z); }zif not (z):z    raise RuntimeError(z = None)r@   scalar_assertsr  r  r  rk   r   r  r  r<  codegen_cpp_sizevarru	  r>  r  codegen_python_sizevarr	  r  )rq  r  symbol
symbol_strsizevars        r   r  zAssertScalar.codegen  s4   $$ d44454IJK77WW  *6(!4Jgg**>>e ? G 	!J488*Tfgqfrrwx gg**AAe B G 	45 7TXX7GqIJ  19:r   rW  r8  )ru	  rm   r  r   r   r   rO  rZ  rf  )r   r   r   r]  r  rR  r  r"	  r  r  r  r  s   @r   r   r     s;    	 %*<!<	!<
;r   r   c                  "    e Zd ZU ded<   ded<   y)ExternKernelNoder   r   zexport_schema.Noder   Nr   r   r   r   r	  r	    s    
I
r   r	  c                       e Zd ZdZ	 ddd	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 d fdZd fdZddZddZddZe		 	 	 	 	 	 dd	       Z
dd
ZddZddZd Zedd       Ze	dd       Zedd       Zd fdZ xZS )FallbackKernelz
    A class that represents a fallback kernel for handling operators that are not
    directly support by inductor. It currently supports functional ops, view ops,
    inplace aten ops, and mutating ops that are auto-functionalizable.
    Nr  c                   t            |t        |      t        |      |       d _        |xs i  _        t        |t        j                  j                  t        j                  j                  f      sJ d| dt        |       d       | _        | _        |i n| _         j                  J t        j                   j#                   j                         g  _        g  _        t         j                  t        j                  j                        ry d j                  j)                         v ry  j                  j*                  }t        j,                  j.                  j1                   j                        r- j&                  j3                  |d   j5                                y |j6                  rt9        |      st;        d|        j                   j<                   j>                        \  }	}d
 fd	}
t        j,                  j.                  jA                  ||	|      D ]  \  }} |
||        y )Nr	  F#Fails to create FallbackKernel for r   not supported_c10d_functionalr   z'NYI: Can't generate FallbackKernel for c                    t         j                  t        j                        r&t        |t        t
        f      sJ t        |             t        j                   j                        rt        |t
        t        f      rJ |y  j                  y d fd}t        j                   j                        r||D ]
  } ||        y y t        j                   j                        sJ  ||       y )Nc                   j                   j                  | j                                j                  J j                  j                  r?j
                  j                  t        t        | j                               |              y y rj  )	alias_namesr  r  
alias_infois_writer  r  r  r  )r  inforq  s    r   	add_aliaszPFallbackKernel.__init__.<locals>.handle_aliasing_and_mutation.<locals>.add_alias+  sj      ''

5222??++))00&z'H!TR ,r   )r  r   r   r   )
r   r   r  ListTyper   r   library_utilsis_tensor_like_typer	  is_tensorlist_like_type)r	  r}  r	  optional_tensor_argrq  s   `   r   handle_aliasing_and_mutationz=FallbackKernel.__init__.<locals>.handle_aliasing_and_mutation  s    $))U^^4!#e}5@tCy@500; &cE4=999{& 44TYY??/2 7+!"567 # %88CCC#r   )r	  ztorch._C.Argumentr}  r   r   r   )!r  r  r   use_runtime_dispatchr  r   r  r  r  r  r   r  r  r   r  rk   r   warn_fallbackr	  r  r   r  _libraryr  mutates_and_returns_first_argr  r  
is_mutabler&   r  rY  r  
zip_schema)rq  r>  r  r  nontensor_argsr  r   r  schemar   r	  r	  r}  r  s   `            r   r  zFallbackKernel.__init__  s    	+.!	 	 	
 %*!!2!8bUZZ**EJJ,J,JK
 	X04<.W	X 
 ","Nb&&222	d556 '))+d&&

(F(FG !1!1!6!6!88
 !!)) >>==d>N>NO&&{1~'>'>'@A%;F%C%9&B  **4;;8J8JKf	< --88vN 	4ID#(s3	4r   c                ,   t         |          }| j                  t        j                  j
                  j                  u rT| j                  D ]E  }t        |t              s|j                  t        j                  |j                                     }G |S r   )r  r  r  r  _prims	rng_primsgraphsafe_run_with_rng_stater  r   r  	with_readrA   r  r  )rq  r_  r}  r  s      r   r  zFallbackKernel.get_read_writes>  sy    g-/u||55RRR)) c>2"-"7"7$,,S\\^<#K r   c           	     n    |j                  | j                         | j                  t        | dd             S Nr  )(codegen_unbacked_symbol_defs_for_outputsr  r  r   r  s     r   codegen_unbacked_symbol_defsz+FallbackKernel.codegen_unbacked_symbol_defsJ  s0    ??MMOT\\749Ld+S
 	
r   c                    t        | dd       x}rBt        t        j                  j                  j
                  |      }|J |j                         S t               S r	  )r   r5   rk   r   r   r   rt  r:   rq  r  resolveds      r   r|  z'FallbackKernel.get_unbacked_symbol_defsO  sZ     '.A4 HHH0  **,=H '''==?"<r   c                Z   t         j                   G d d             }t        | j                        sJ | j                  D cg c]  } ||j	                                }}| j                  || j                        \  }}t        j                  j                  rt        | j                  t        j                  j                        r| j                  ||      }t!        | j                  j"                  j$                  |      D cg c]9  \  }}t        j                  j&                  j)                  ||j*                        ; }}}n6|D cg c]+  }t        j                  j&                  j)                  |      - }}| j,                  j/                  |       |S c c}w c c}}w c c}w )Nc                       e Zd ZU ded<   ddZy))FallbackKernel.codegen_args.<locals>.Shimr   refc                    | j                   S r   )r	  r  s    r   r  z2FallbackKernel.codegen_args.<locals>.Shim.__repr__^  s    xxr   NrJ  )r   r   r   r   r  r   r   r   Shimr	  Z  s    H r   r	  )r^  	dataclassrZ  rY  r  r  r  rk   r   r  r   r  r  r  r  r.  r   r  r  r<  r0  r  r   r  )rq  r	  r   r  r   r   params          r   r7  zFallbackKernel.codegen_argsY  sY   				  	  
	   ,,,<@KKHqtA//12HH**;8J8JKf77:d.>.>

@U@U#V..tV<D !$D$4$4$<$<$F$F ME1 $$33AuGD 
 EIIqAGG((77:IDI 	6" I
 Js   F>F"0F(c                F   | r!| D cg c]  }t        |t              r| c}nd }|r7| sJ | D cg c]#  }|j                         s|j                         % }}|d   S t        |t        j                        r|j
                  S t        |t        t        f      rxt        d |D              }|D cg c]  }|s|	 }}t        |      dk(  r|d   S |D ]7  }t        |t        j
                        sJ t        |j                        s5|c S  |d   S y c c}w c c}w c c}w )Nr   c              3  H   K   | ]  }t         j                  d |        y wr   )r	  find_devicerQ  s     r   r   z-FallbackKernel.find_device.<locals>.<genexpr>  s"      $89**43$r  r?   )r   ri  r  r  r  r   r   r   r:   r   rb   r   )r  r  r  non_torch_bind_tensor_argsr}  devices
device_setr   s           r   r	  zFallbackKernel.find_deviceq  s#     $J1:a+IQJ 	#
 &;3>SC#..BRs~~'SGS1:nell3!(((ntUm4# $=K$ J -7A&&vAGA7|q qz!! "!&%,,777&++&!M" 1:/ K T Bs!   DDDD1D9Dc                    t        | j                  t        j                  j                        ryt        | j                        j                         S r  )r   r  r  r  r  r/   r	  r  s    r   r"	  zFallbackKernel.has_side_effects  s9    d&&

(F(FGt//0;;==r   c                   t        | j                  t        j                  j                  t        j                  j
                  f      s+J d| j                   dt        | j                         d       t        | j                  t        j                  j
                        sSd| j                  j                         vr7| j                  j                  j                  rt        | j                        rg S | j                  S )Nr	  r  r	  r	  )r   r  r  r  r  r  r   r   r  r	  r&   r	  r  s    r   r*  z+FallbackKernel.get_inputs_that_alias_output  s    uzz44ejj6T6TU
 	
 2$2B2B1C2D$$%&n6	
 
 4++UZZ-K-KL"$*:*:*?*?*AA  ((33&t'7'78I###r   c                N    t        | j                        dk  sJ | j                  S r  )r   r  r  s    r   r&  z!FallbackKernel.get_mutation_names  s'    4&&'1,,,"""r   c           	        t         j                  d| j                         | j                         t	        | t
              sJ t        |              | j                  | j                  | j                        \  }}| j                  ||      }| j                  D cg c]  } | j                  |fi | }}| j                  }t        j                  j                  sg ||S t!        dg       }|j#                  |||      }	 	 	 	 	 	 dd}t	        |t$        j&                  j(                  j*                        r#|j-                  |d   |d         j.                  }	n|j0                  j.                  }	t3        |	      dk(  r>| j4                  r| j4                  n| j6                  }
|	d   j8                  } |||
      g}n9t;        |	| j4                        D cg c]  \  }} ||j8                  |       }}}| j                  J t=        | j                         t?        j@                  | j                  jC                         ||i             }t        jD                  jG                  |       g ||S c c}w c c}}w )	a  
        ProxyExecutor Design Note
        We export the ExternFallbackNodes (for custom ops) into a serialized file
        and run it with a host side proxy executor to address the ABI problem
        This is currently only implemented for fbcode. Eventually, we will also make this work for OSS.
        Detailed design doc can be found at
        https://docs.google.com/document/d/1wC4DOZFaYym2t1Esz0X5yxlLI3RDnSiyRbUus3bkJ64/edit?usp=sharing
        z4Extern kernel node added for node %s with target %s.Nc           	        t        | t        j                  t        j                  f      r|}t        |t        t
        f      rt        |      dk(  sJ |d   }t        | t        j                        rTt        |t              sJ t        j                  j                  t        j                  |j                                     S |J t        j                  j                  d      S t        | t        j                        rt        | j                         t        j                        rpt        |t              sJ t!        |             t        j                  j                  |D cg c]&  }t        j                  |j                               ( c}      S t        | t        j"                        rt        | j                         t        j                        r|>t        j                  j                  t        j$                  j                  d            S t        |t              sJ t        j                  j                  t        j$                  j                  t        j                  |j                                           S t        | t        j&                        r t        j                  j                  |	      S t)        d
t!        |              c c}w )Nr?   r   rz  )	as_tensorT)as_none)
as_tensors)as_optional_tensor)as_intzUnsupported return type )r   r  
TensorTypeNoneTyper   r   r   r   export_schemarn   r  TensorArgumentr  r	  getElementTyper   r   OptionalTypeOptionalTensorArgumentIntTypeRuntimeError)return_typerL  r   s      r   handle_single_outputzFFallbackKernel.export_extern_kernel_node.<locals>.handle_single_output  sh    +(8(8%..'IJftUm4v;!+++ )Ck5+;+;<%c6222(1188"/">">CLLN"S 9   ;&;(11888FFK8Z**,e.>.>> "&(3AT&\A3$--44 $*  &44#,,.I  5   K););<**,e.>.>B >(1188+8+O+O+V+V$( ,W , 9   &ff555(1188+8+O+O+V+V&3&B&B%+__%6' ,W , 9   K7$--44F4CC"%=d;>O=P#QRR7 s   )+K!r   r?   )r  rY  r  metadata)r   r   )r	  z6Union[torch.TensorType, torch.ListType, torch.JitType]rL  Union[IRNode, Sequence[IRNode]]r   zexport_schema.Argument)$rp  r  r  r  r   r	  r   r  rY  r  r.  r  r9  rk   r   aot_moder%   serialize_inputsr  r  	torchbindCallTorchBindr	  returnsr  r   r  r  r  r   r	  r	  r8   r   extern_kernel_nodesr  )rq  r   r   rV  ordered_kwargsr  
serializernamed_argumentsr	  r	  r  r	  output_argumentsreturn_schemarL  r   s                   r   export_extern_kernel_nodez(FallbackKernel.export_extern_kernel_node  sj    			BMMO	
 $/;d;/**4;;8J8JKf**48 99
 "D!!#00
 
 !!ww+T+N++*44
$55fdFK3	SO3	S33	S $3	Sj fe55??MMNmmDGT!W5==Gnn,,Gw<1 '+lldll8M8MG!!*..K 4[' JK .1$,,-G 
 *M6	 %!++    +++##'',,.&(	
 	
$$T*''''K
` s   I3I8c                "     j                   }|J |j                  dk(  rt        |t        j                  j
                        sJ t        |             t        j                  j                  rddl
m} t        |      |vrt        j                  d|       d _        n~|j                  dk(  r4t        |t        j                  j
                        sKJ t        |             t        j                  j                  r!|t         j"                  j$                  v _        t        j                  j                  rt        |t        j                  j
                        r j                  sdfd j'                   j(                   j*                        \  }t-        j.                  | fd	 j0                  D              }t3        fd
t5        ||j6                  j8                        D               _         j;                  |        j                  r j=                         } j>                  J  j                   J |jA                   jC                          j>                   fd j                   | jD                  r jD                  n jF                         n^|jI                          t         jJ                  tL              r3 jO                  |        jQ                  |        jS                  |        jU                  |       y)r  Nr  r   )inductor_fallback_opszG%s is missing a c-shim implementation, using proxy executor as fallbackT
_quantizedc                    t        | t        j                        r | j                               S t        | t        j                        S r   )r   r  r	  r	  
NumberType)r  	is_numbers    r   r	  z)FallbackKernel.codegen.<locals>.is_numberM  s:    a!3!34$Q%5%5%788!!U%5%566r   c              3  D   K   | ]  } j                   |fi   y wr   )r9  )r   r<  r   rq  s     r   r   z)FallbackKernel.codegen.<locals>.<genexpr>Z  s+       *D))!6v6s    c              3  l   K   | ]+  \  }}t        |t              xr  |j                         - y wr   )r   complexr  )r   r  r  r	  s      r   r   z)FallbackKernel.codegen.<locals>.<genexpr>_  s5      ,Aq 1g&A9Q[[+AA,s   14c                 H    g  j                          j                         S r   )r7  r=  r  s   r   rT  z(FallbackKernel.codegen.<locals>.<lambda>m  s$    F$++-F0C0C0EF r   )r  ztorch.JitTyper   r   )+r  r  r   r  r  r  r   rk   r   r  torchgen.aoti.fallback_opsr	  r   rp  r  r	  r@   rR	  custom_ops_to_c_shimsr  rY  r  r  r  r  r   r   r  r  r  r	  r  ,generate_fallback_kernel_with_runtime_lookupr  r  r  generate_fallback_kernelr>  r  rH  rL  rQ  r	  )	rq  r  r  r	  r   	args_iterexported_argsr	  r   s	   `      @@r   r  zFallbackKernel.codegen&  so    !!!!!v%fejj&;&;<Jd6lJ<ww""Lv;&;; KKa 15D--fejj&;&;<Jd6lJ<WW   f11GGG % GG65::#8#89--7  ..t{{D<N<NOLD& "!??I ), ,	6>>+C+CD, )D%
 	W%$$ ::<M**666##///@@''F   $$2G2G ,,T2$++v.))'2..w7,,W5))'2r   c           	         d}	 | j                         }t        | j                  | j                  t        | j                               t        | j                               |      S # t        $ r Y ]w xY w)NFr  )rB  r	  r@  r   r   rX   r   r   )rL  rB  s     r   tensor_to_layoutzFallbackKernel.tensor_to_layout|  sj    		((*I MMLL%fkkm4%fmmo6
 	
  		s   A& &	A21A2c                    t         j                  f}||vr,t        t        d   t        j
                  j                        }n
t               }|5    j                  |g|i |\  }}}}	}
ddd       t        d D               j                  |      }|sCt        |t        j                  j                  j                        rt        j                   d      }|  t#        |      ||	
      n!|sJ d         t%        |      ||	
      d fd |g       }t        |t&        t(        f      r	|_        |S t        |t,              rt)        |      _        |S |g_        |S # 1 sw Y   xY w)	z9Create an instance of FallbackKernel from an _OpOverloadsNc              3  2   K   | ]  }t        |        y wr   )r  r|  s     r   r   z(FallbackKernel.create.<locals>.<genexpr>  s     !K,s"3!KrR  r  r  r	  z"Not sure where to find device infoc                    t         t        t        f      r. t                fdt	        t                     D              S t         t              r: j                         D ci c]  \  }}| |t               |fgz           c}}S t         t        j                        rnt        j                               }t        j                  sst               s3t        j                   j"                  j%                  |j&                         |S t         t(              r S t         t        j*                        r j,                  j.                  S  J dt                d       y c c}}w )Nc              3  T   K   | ]  } |   t              |fgz          ! y wr   )r   )r   r   generate_outputr`  rL  s     r   r   zAFallbackKernel.create.<locals>.generate_output.<locals>.<genexpr>  s5      $ $F1Iw4<:K9L/LM$r$  zFallbackKernel output type z is not supported)r   r   r   r   r   r   r   r  r  r  MultiOutputr	  r@    assume_unaligned_fallback_outputrh   rk   r   r  r  r   r   SymIntr   r  )	rL  r`  rV  r  rb  r  r	  has_unaligned_inputpackeds	   ``   r   r	  z.FallbackKernel.create.<locals>.generate_output  sH   &4-0#tF| $"3v;/$   FD) %+LLN S g$v,9L8M.MNN  FELL1!((0 ;;*,V4GG--11#((;
FC(FELL1{{'''~ 1$v,?PQ~ 3s   +#E3)rL  r   r`  zlist[tuple[Any, int]]r   r   )r  *_fused_moving_avg_obs_fq_helper_functionalr   r	   rk   r   r  r
   r   r   r	  r   r  r  r	  r	  r   r  rP  r   r   r  r   )r  r  r   r   fake_incorrect_kernelscontextr  r  r  r  r  r   r  r	  r	  r	  s   `            @@@r   r  zFallbackKernel.create  s    #'"Q"Q!S//1$79J9JKG!mG 	< #""6;D;F;!	< "!K{!KKn=*E++55CC
 \\%(F!&)"3F ???6!0"3F 	  	D "."5ge}-$FN
 	 &"7^FN  &YFNg	< 	<s   E99Fc                     t         |          S r   )r  r  r  s    r   r  zFallbackKernel.apply_constraint  s    w'))r   r   r>  r  r  ry   r  r  r	  r  r  r  r   re  r  ,Optional[dict[sympy.Symbol, pytree.KeyPath]]r   r   rU  rf  )r   zContainer[sympy.Symbol]ri  )r  z Optional[Sequence[torch.Tensor]]r  r  r   r   r8  r\  )rL  r  r   r@  )r  ry   r   r   r   r   r   r	  r-  )r   r   r   r]  r  r  r	  r|  r7  r`  r	  r"	  r*  r&  r	  r   r  r	  r  r  r  r  r  s   @r   r	  r	    s    ,0h4 KOh4h4 h4 &	h4
 &h4 +h4 )h4 Hh4 
h4T


 0 5GT	 :>
$*#w(r S3 S3j 
 
 [ [z* *r   r	  c                  T     e Zd ZdZddZddZdd	 	 	 	 	 	 	 	 	 	 	 	 	 d	 fdZ xZS )
ComplexViewz9View a complex number as two dtyped numbers or vice versac                     yr  r   r  s    r   rR  zComplexView.should_allocate  r  r   c                &    | j                  d      gS r  r	  r  s    r   r*  z(ComplexView.get_inputs_that_alias_output  s    "##r   Nr	  c               0    t         |   ||||||       y )Nr	  )r  r  )rq  r>  r  r  r	  r  r  r  s          r   r  zComplexView.__init__  s)     	/ 	 	
r   r8  r\  )r>  r  r  ry   r  r  r	  r  r  r  r  r	  r   r   )r   r   r   r]  rR  r*  r  r  r  s   @r   r	  r	    sf    C$ KO

 
 &	

 &
 +
 H
 

 
r   r	  c                      e Zd ZdZddZy)MemoryCheckKernelz
    Custom kernel for memory checking that generates direct function calls

    TODO - the custom op was erroring with str inputs. should be able to custom op directly.
    c                    |j                          | j                  \  }}}t        |      }t        |      }|r|j                  d       d| d| d| d}n	d| d| d}|j                  |       y)z.Override codegen to write direct function callzV# note: dont currently distinguish between buffers returned and dealloc'd in last stepzcheck_memory_step(allocated=z, freed=z, is_final_step=r  N)rP  r  r	  r>  )rq  r  
alive_list	dead_listis_final_step
alive_repr	dead_reprcalls           r   r  zMemoryCheckKernel.codegen  s     	224/3/A/A,
I}*%
O	h 2*Xi[P`an`oopqD1*Xi[PQRD$r   Nrf  )r   r   r   r]  r  r   r   r   r
  r
    s     r   r
  c                       e Zd ZU ded<   ddZy)rP  rC  r   c                    | j                   S r   r  r  s    r   r  zMultiOutputLayout.get_device*  r  r   Nr@  )r   r   r   r   r  r   r   r   rP  rP  &  s    r   rP  c                  `     e Zd ZddZ	 d	 	 	 	 	 	 	 	 	 d fdZ	 d	 	 	 d	dZd
dZddZ xZS )r	  c                    |j                  |        | j                  s#| j                  |       | j                  |       y y r   )codegen_multi_output!skip_size_stride_alignment_checksrH  rL  r  s     r   r  zMultiOutput.codegen/  s:    $$T*55%%g.**73 6r   c                    t         |   d ||gd       t        j                  j	                  |       | _        t        j                  j                  |        || _        || _        y rn  )	r  r  rk   r   r  r   r  r`  r
  )rq  r>  r_  r`  r
  r  s        r   r  zMultiOutput.__init__5  sQ     	vw3GG++D1		""4(1R.r   c                p    | j                   d   }t        |t              sJ |       |j                  |      S r  )rY  r   r   r  )rq  rX  rx  s      r   r  z MultiOutput.get_free_symbol_usesB  s7     [[^
*f-9z9-..}==r   c                p    t        | j                        dk(  xr t        | j                  d   t              S )Nr?   r   )r   rY  r   rM  r  s    r   rR  zMultiOutput.should_allocateI  s0    4;;1$ 
t{{1~'89	
r   c                    | j                   D cg c]>  }t        |t              r,t        |j	                               dkD  r|j                         @ c}S c c}w r  )rY  r   r	  r   r*  r  r;	  s     r   r*  z(MultiOutput.get_inputs_that_alias_outputN  sN     {{
#~.C4467!; LLN
 	
 
s   AArf  rO  )
r>  r  r_  r   r`  zlist[tuple[Any, ...]]r
  r   r   r   rZ  r8  r\  )	r   r   r   r  r  r  rR  r*  r  r  s   @r   r	  r	  .  sl    4 38SS S '	S
 ,0S 
S %*>!>	!>


r   r	  c                     e Zd ZU dZded<   d,dZd-dZd.dZd/dZd0dZ	d1d	Z
d2d3dZd4dZd5dZd6dZd,dZd5dZ	 d7	 	 	 	 	 d8dZd9dZd:dZ	 d7	 	 	 	 	 d;dZd<dZd=dZd>dZd?dZd@dZdAdZd,dZd,dZdBdZdCdZd1dZdCd Z d@d!Z!	 d7	 	 	 dDd"Z"dEd#Z#dFd$Z$d2dGd%Z%e&dHd&       Z'dId'Z(dHd(Z)dAd)Z*e&dJd*       Z+d1d+Z,e,Z-y
)Kr   zC
    TensorBox / StorageBox allow in-place mutation of Tensors
    r   r=  c                6    | j                   j                         S r   r  r  s    r   r  z!MutableBox.has_exceeded_max_readsa  r  r   c                6    | j                   j                         S r   r  r  s    r   r  zMutableBox.get_deviced  r  r   c                6    | j                   j                         S r   rL  r  s    r   r  zMutableBox.make_loaderg      yy$$&&r   c                6    | j                   j                         S r   )r=  r  r  s    r   r  zMutableBox.make_indexerj      yy%%''r   c                6    | j                   j                         S r   )r=  r%  r  s    r   r%  zMutableBox.get_stridem  r  r   c                6    | j                   j                         S r   r  r  s    r   r  zMutableBox.get_namep  r  r   Nc                8    | j                   j                  |      S r   )r=  r  r  s     r   r  zMutableBox.has_large_inner_fns  s    yy++I66r   c                8    | j                   j                  |      S r   r  r  s     r   r  zMutableBox.mark_reusev  r  r   c                6    | j                   j                         S r   r  r  s    r   r  zMutableBox.realize_hinty  r
  r   c                6    | j                   j                         S r   )r=  r  r  s    r   r  zMutableBox.unwrap_view|  r
  r   c                6    | j                   j                         S r   )r=  r  r  s    r   r  zMutableBox.is_input_buffer      yy((**r   c                6    | j                   j                         S r   )r=  r  r  s    r   r  zMutableBox.freeze_layout  s    yy&&((r   c                :    | j                   j                  ||      S r   )r=  r  r   s      r   r  z*MutableBox.freeze_layout_with_stride_order  s     yy88NNr   c                8    | j                   j                  |      S r   )r=  r  r  s     r   r  z(MutableBox.freeze_layout_with_fill_order  s    yy66u==r   c                8    | j                   j                  |      S r   )r=  r	  r  s     r   r	  z(MutableBox.freeze_layout_with_same_order  s    yy66v>>r   c                :    | j                   j                  ||      S r   )r=  r  r  s      r   r  z+MutableBox.freeze_layout_with_exact_strides  s     yy99-WWr   c                6    | j                   j                         S r   )r=  r  r  s    r   r  zMutableBox.get_read_writes  r"
  r   c                6    | j                   j                         S r   r|  r  s    r   r  zMutableBox.get_reads  r  r   c                6    | j                   j                         S r   rz  r  s    r   r  zMutableBox.num_reads  r  r   c                6    | j                   j                         S r   r  r  s    r   r  zMutableBox.get_storage_numel  r  r   c                6    | j                   j                         S r   r  r  s    r   r  zMutableBox.get_reduction_type  r  r   c                6    | j                   j                         S r   r  r  s    r   r  zMutableBox.get_reduction_size  r  r   c                6    | j                   j                         S r   r  r  s    r   r   zMutableBox.is_extern  r  r   c                6    | j                   j                         S r   )r=  r"  r  s    r   r"  zMutableBox.is_no_op  r  r   c                8    | j                   j                  |      S r   r  r  s     r   r$  zMutableBox.constant_to_device  s    yy++F33r   c                6    | j                   j                         S r   )r=  r&  r  s    r   r&  zMutableBox.get_mutation_names  r  r   c                6    | j                   j                         S r   )r=  r(  r  s    r   r(  zMutableBox.get_operation_name  r  r   c                6    | j                   j                         S r   )r=  r*  r  s    r   r*  z'MutableBox.get_inputs_that_alias_output  s    yy5577r   c                6    | j                   j                         S r   r  r  s    r   r  zMutableBox.realize  r  r   c                8    | j                   j                  |      S r   r  r  s     r   r  zMutableBox.get_free_symbol_uses  s     yy--m<<r   c                6    | j                   j                         S r   r  r  s    r   r  zMutableBox.get_read_names  r  r   c                6    | j                   j                         S r   )r=  r  r  s    r   r  zMutableBox.get_defining_op  r"
  r   c                8    | j                   j                  |      S r   )r=  r  r  s     r   r  zMutableBox.codegen_reference  s    yy**622r   c                6    | j                   j                         S r   r=  r  r  s    r   r>  zMutableBox.layout  s     yy((**r   c                6    | j                   j                         S r   r  r  s    r   r  zMutableBox.get_layout  r  r   c                6    | j                   j                         S r   r:
  r  s    r   r  zMutableBox.get_output_spec  r"
  r   c                6    | j                   j                         S r   rI  r  s    r   r   zMutableBox.get_size  r  r   c                .    | j                   j                  S r   )r=  r   r  s    r   r   zMutableBox.dtype  s    yyr   c                t   t        | j                  t              rQt        |       j                   dt        | j                        j                   d}d}| j                  j                  }n&t        |       j                   d}| j                  }d}|t        t        |            |g}dj                  |      S )Nr  z))r  r  )r   r=  r   r   r   r  r   r  )rq  line0endlr  r  s        r   r  zMutableBox.__str__  s    dii,Dz**+1T$))_-E-E,FaHEDIINNEDz**+1-EIIED 3u:

 yyr   r8  r@  rD  rF  rH  rJ  r   rK  rM  r-  rN  rO  rP  rR  rS  rT  rU  rW  rX  rY  r=  r9  r[  r\  rZ  r.  r1  r?  r7  r6  r4  ).r   r   r   r]  r   r  r  r  r  r%  r  r  r  r  r  r  r  r  r  r	  r  r  r  r  r  r  r  r   r"  r$  r&  r(  r*  r  r  r  r  r  rc  r>  r  r  r   r   r  r  r   r   r   r   r   Y  sP    L2&'(&$7+('+) ;@O"O37O	O
>? HMX/X@DX	X
+%%-..%$4..8# %*=!=	!=
*+3 + +&+$   " Hr   r   c                      e Zd Zedd       Zy)r   c                N    t        | t              r| S t        t        |             S r   )r   r   r   r  )r=  s    r   r  zTensorBox.create  s"    d12KD)**r   N)r=  r   r   r  )r   r   r   r`  r  r   r   r   r   r     s    + +r   c                  X    e Zd ZdZddZddZddZddZddZddZ	ddZ
dd	Zdd
Zy)r  z7
    StorageBox allow in-place mutation of Tensors
    c                    t        | j                  t        t        f      r4| j                  j	                         t
        j                  j                  v S yr  )r   r=  rl  rC  r  rk   r   r  r  s    r   r  zStorageBox.is_input_buffer  s:    dii+!?@99%%'177+?+???r   c                    t        | j                  t              xr4 | j                  j                         t        j
                  j                  v S r   )r   r=  r  r  rk   r   r  r  s    r   r  zStorageBox.is_module_buffer   s9    tyy>3 :		""$(9(99	
r   c           	        t         j                  | j                        r| j                  j                         S t	        | j                  t
        t        t        t        f      sJ t        | j                               | j                  j                         }| j                  j                         }| j                  j                         }|J t        d t        || j                  j                         | j                  j!                         d      | j                        | _        t"        j$                  j'                  | j                        | j                  _        t"        j$                  j+                  | j                         | j,                  | j                  _        || j                  _        || j                  _        | j                  j(                  S )NF)r   r   r   rB  rR  )r   rn  r=  r  r   r  r0  r(  ra  r   r  r  r  rj  r  r  r   rk   r   r  r   r  ra  re  rc  )rq  re  rc  r   s       r   r  zStorageBox.realize   sP   ""499-99%%''$))iD$%GH 	
$IIK
 	
H ii//1II++-	%%'!!!"!ii))+YY'')	 	
	 00;			""499- LL		 +		'		yy~~r   c                    t        | j                  t        t        f      r9| j                  j	                         j
                  dkD  r| j                          yyy)zL
        Called on buffers we expect to be forced to realize later.
        r?   N)r   r=  r  r0  r  nontrivial_read_countr  r  s    r   r  zStorageBox.realize_hint%   sF    
 tyy9i"89		**,BBQFLLN G :r   c                H    t        d | j                         D              |kD  S )Nc              3  Z   K   | ]#  }t         j                  j                  |       % y wr   )rk   r   get_dep_size_hintr{  s     r   r   zBStorageBox.has_accumulated_enough_reads_by_size.<locals>.<genexpr>1   s     K3))#.Kr  )r  r  r  s     r   $has_accumulated_enough_reads_by_sizez/StorageBox.has_accumulated_enough_reads_by_size/   s!    K$..:JKKiW	
r   c                
   t        | j                  t              xrh | j                         t        j
                  kD  xsE | j                         xs3 t        j                  d uxr | j                  t        j                        S r   )	r   r=  r  r  r@   realize_acc_reads_thresholdr   realize_acc_reads_size_thresholdrM
  r  s    r   r  z!StorageBox.has_exceeded_max_reads4   sq    $))Y/ 	
NNvAAA &&( 77tC ==;;		
r   c                F   |dkD  rt        | j                  t        t        f      r{t	        | j                        r3| j                  j                         ddg}t        fd|D              ry| j                         t        j                  kD  xs | j                         S y)zj
        A heuristic to decide if we should realize a tensor
        that is used multiple times.
        r?   expsigmoidc              3  :   K   | ]  }|j                   v   y wr   )used_ops)r   r   opcounts     r   r   z5StorageBox.should_realize_on_reuse.<locals>.<genexpr>J   s     @qG,,,@s   TF)r   r=  r  r0  r!  r  r   r  r@   realize_reads_thresholdr  )rq  r  	heavy_opsrV
  s      @r   should_realize_on_reusez"StorageBox.should_realize_on_reuse@   s    
 19DII	9/EFdii ))446"I.	@i@@ 6#A#AA -**, r   c                H    | j                  |      r| j                          y y r   )rY
  r  r  s     r   r  zStorageBox.mark_reuseR   s    ''.LLN /r   c                6    | j                   j                         S r   rz  r  s    r   r  zStorageBox.num_readsV   r  r   Nr8  r=  r-  )r  r   r   r   )r  r   r   r   rM  rX  )r   r   r   r]  r  r  r  r  rM
  r  rY
  r  r  r   r   r   r  r    s4    

:



$%r   r  c                  0    e Zd ZU ded<   ded<   dZded<   y)Subgraphr   r   r  graph_moduleNzOptional[GraphLowering]r   )r   r   r   r   r   r   r   r   r]
  r]
  Z   s    
I&&%)E")r   r]
  c                    | D cg c]$  }t        |t              r|j                         n|& } }t        t	        d | D                    t        |       k  S c c}w )Nc              3  2   K   | ]  }t        |        y wr   )r  )r   r  s     r   r   z'_has_aliased_buffers.<locals>.<genexpr>g   s     ;"V*;rR  )r   rC  r  r   r:   )buffersr  s     r   _has_aliased_buffersrb
  a   s^      !+6? COG 
 z;7;;<s7|KKs   )Ac                       e Zd ZU dZdZded<   dZded<   dZded<   	 	 	 	 	 	 	 	 d fdZe		 	 	 	 	 	 dd	       Z
dd
Z xZS )InvokeSubgraphz.
    Ir node for the invoke_subgraph HOP.
    NOptional[Subgraph]r  Optional[Sequence[IRNode]]operandsr  c                    t         |   d ||       || _        t        j                  j                  |       | _        t        j                  j                  |        y r  )r  r  r  rk   r   r  r   r  )rq  r  rg
  r>  r  s       r   r  zInvokeSubgraph.__init__t   sQ     	 	 	

 !GG++D1		""4(r   c                N   ddl m} t        j                  j                  }d}|j
                  j                  d      x}r	|d   dd }n+|j                  dd }|D cg c]  }|j
                  d    }}|D cg c]  }| j                  |       }}g }	t        |      D ]H  \  }
}t        |t        t        f      r|	j                  |       .|	j                   ||||
                J |	}|j                  |t        j                  j                  |j                  ||j                         |_        t        j"                  |j                        5   |j                  j$                  |  ddd       |j                  j&                  }d}|D ]$  }t        |t              r|j)                         } n |J t+        ||t-        |	      
      	 	 	 	 	 	 dfd}t        |      D cg c]  \  }} |||       }}}|_        |S c c}w c c}w # 1 sw Y   xY wc c}}w )zFor each operand, get a realized input, force it to have the same
        strides as the subgraph inputs, then use an InvokeSubgraphr?   )constrain_to_fake_tensorNeager_input_valsr   r   r  rS  r  r  r  )r  rg
  r>  c                Z   t        | t        t        f      r| S | j                         }|J t	        t        || j                         | j                         | j                         | j                         j                  | j                         j                        t        |fgd      S )Nr	  T)r
  )r   r   rd  r  r	  r@  r  r   r%  r  rA  rB  r   )rL  indr   invoke_subgraphs      r   create_outputz,InvokeSubgraph.create.<locals>.create_output   s     &#8:N"OP**,)))"%$..0#__.%002%00299"("3"3"5"?"? $C[M6: r   )rL  r   rn
  r   r   z?Union[ShapeAsConstantBuffer, NoneAsConstantBuffer, MultiOutput])r  rj
  rk   r   r  rQ  r  r   rg  r   r   r   r  r  r  r^
  r   r  r  graph_outputsr  rd
  rP  r  )r  r  rg
  rj
  r  fake_operandsrk
  fx_operandsr   new_operandsr   operandr  r   rp
  r   rL  outsro
  s                     @r   r  zInvokeSubgraph.create   s>    	7 ww+++00445GHHH,Q/3M '++AB/K4?@qQVVE]@M@
 AI!I1#"3"3A"6!I!I%'%h/ 	LC'$9>#JK##G,##,WmC6HI		  >>!WW22((,&mm 3 HN
 $$X^^4 3"""M23 ....  	Gg'<= ++-	 !!!($F3
		!$	L	. ;DG:LMYQfa(MM"&K A
 "J(3 3T Ns   H9HH,H!Hc                &    |j                  |        y r   )codegen_invoke_subgraphr  s     r   r  zInvokeSubgraph.codegen   r  r   )r  r]
  rg
  r  r>  rP  r   r   )r  r]
  rg
  r   r   zElist[Union[ShapeAsConstantBuffer, NoneAsConstantBuffer, MultiOutput]]rf  )r   r   r   r]  r  r   rg
  r  r  r  r  r  r  r  s   @r   rd
  rd
  j   s     $(H '+/H(/*.G'.
) 
),<
)FW
)	
) WW,2W	NW Wr.r   rd
  c                       e Zd ZU dZded<   dZded<   dZded<   dZded<   dZd	ed
<   	 	 	 	 	 	 	 	 	 	 	 	 	 	 d fdZ	e
dd       Ze	 	 	 	 	 	 	 	 	 	 dd       ZddZddZ xZS )ConditionalNr  	predicaterf
  rg
  re
  true_subgraphfalse_subgraphOptional[Sequence[MultiOutput]]r  c                   || _         || _        || _        || _        t	        |g|      \  }}t
        	|   d |||       ||| _        t        j                  j                  |       | _        t        j                  j                  |        y N)r   r>  rY  r  )r{
  rg
  r|
  r}
  _split_by_sym_typer  r  r  rk   r   r  r   r  )
rq  r{
  rg
  r|
  r}
  r>  r  sym_argsr  r  s
            r   r  zConditional.__init__   s     # *, 2I3I3I J+"	 	 	
 (%6D"GG++D1		""4(r   c                R    t        | t              r| S | j                  j                  S r   )r   r   r   r  )r   s    r   _maybe_exprzConditional._maybe_expr!  s    aHvv{{r   c                   | j                  |      }|D cg c]  }| j                  |       }}t        j                  j                  j                  d   }t        |t              sJ t        |             t        d |D              sJ |D cg c]  }t        t        |      j                  d   ! }}||fD ]  }|j                  t        j                  j                  |j                  ||j                        |_        t        j                  |j                        5   |j                  j                   |  ddd        |j                  J |j                  J |j                  j"                  }	|j                  j"                  }
d|	fd|
ffD ]!  \  }}t%        |	      st'        d| d	|        t)        |	      t)        |
      k(  s	J |	|
f       t+        t-        |	|
            D ]  \  }\  }}|j/                         |j/                         k(  s
J |||f       |j1                         |j1                         k(  s
J |||f       |j3                         j4                  |j3                         j4                  k(  rJ |||f        t7        d
 |g|z   D              }t9        t        j                  j:                  j<                  t        j                  j                  j                  j?                  dd            }|J d       tA        ||||tC        |      |      }t+        t-        |	t        j                  j                  j                  d               D cg c]  \  }\  }}tE        tG        ||j1                         |jI                         D cg c]  }t@        jK                  |       c}|jM                         D cg c]  }t@        jK                  |       c}|j3                         j4                  |j3                         jN                        |tP        |fg       }}}}}||_)        |S c c}w c c}w # 1 sw Y   xY wc c}w c c}w c c}}}}w )zNCreate a Sequence of IRNodes from a conditional statement (see .lowering.cond)r'  c              3  <   K   | ]  }t        |t                y wr   )r   r8   rY  s     r   r   z%Conditional.create.<locals>.<genexpr>!  s     <1:a&<r  r  Nrl
  true_fnfalse_fnzVOutput aliasing is currently not supported in compiled torch.cond. The outputs of the z% subgraph of torch.cond are aliased: c              3  \   K   | ]$  }t        |t              s|j                          & y wr   )r   r   r  )r   os     r   r   z%Conditional.create.<locals>.<genexpr>7!  s)      
a!67 LLN
r   r  zcannot determine devicer  )r{
  rg
  r|
  r}
  r>  r  r	  )*rg  rk   r   r  r   r   r   r   r   r   r8   rQ  r  r^
  r   r  r  rq
  rb
  r  r   r   r   r  r  r  rA  r  r5   r   r   r  rz
  rP  r	  r@  r   r
  r   rB  r   r  )r  r{
  r
  r
  rg
  r   rs
  rr
  r  true_outputsfalse_outputsr   r  r   t_of_or   r  conditionalrL  merged_outputr  s                         r   r  zConditional.create!  s2    %%i0	2:;QC%%a(;; ! 4 4 9 9" =+x0C${2CC0<<<<<<GHqdA++E2HH (+ 		7H~~%!"!6!6,,#0"*-- "7 "
 ((8 7&HNN&&67 7		7 }}(((~~)))}}22 44(,7*m9TU 	MD'#L1$**./TU\T]_ 	 < C$66U}8UU6&s<'GH 	UMAzS>>#s~~'77F!S#F7==?cmmo5D3}D5>>#**cnn.>.E.EET3PS}TE	U
  
[8+
 

 6GG&&GG  %%))*=tD
 !<#<<!!!#$F3/
4 /8L!''"6"6";";E"BC/#
 
" +*FM! ! **,@M@R@R@TU"+11"5U>K>R>R>T8://3 ",,.55$//1;;	 
 
, &_ <
 I7 7\ V
s<   Q$Q#Q)7Q$
 Q<Q$
Q,A	Q$
Q	
Q$
c           	         |j                  |        |j                  | j                         | j                  t	        | di              y r	  )codegen_conditionalr	  r  r  r   r  s     r   r  zConditional.codegenc!  s9    ##D)88MMOT\\749Lb+Q	
r   c                    t        | dd       x}rKt        t        j                  j                  j
                  |      }|J t        |j                               S t               S r	  r   r5   rk   r   r   r   r:   rt  r	  s      r   r|  z$Conditional.get_unbacked_symbol_defsi!  _     '.A4 HHH0  **,=H '''hmmo..<r   )r{
  r   rg
  r  r|
  r]
  r}
  r]
  r>  rP  r  r	  r   r   )r   zUnion[int, torch.SymInt]r   zUnion[int, sympy.Expr])
r{
  r   r
  r]
  r
  r]
  rg
  z-list[Union[TensorBox, ShapeAsConstantBuffer]]r   r  rf  r  )r   r   r   r{
  r   rg
  r|
  r}
  r  r  r`  r
  r  r  r  r|  r  r  s   @r   rz
  rz
     s    "&I&+/H(/(,M%,)-N&-/3G,3)) #)  	)
 !) ") H) 
)8  
 XX X 	X
 @X 
X Xt
 r   rz
  c                    g }g }| D ]?  }t        |t              r|j                  |j                         /|j                  |       A ||fS r   )r   r   r  r  )r   non_sym_argsr
  r}  s       r   r
  r
  t!  sS     LH %c01OOCHH%$	% \!!r   c                       e Zd ZU dZdZded<   dZded<   dZded<   dZded<   dZ	d	ed
<   	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 d fdZ
edd       Ze	 	 	 	 	 	 	 	 	 	 	 	 dd       ZddZddZ xZS )	WhileLoopzSThe IR node for while_loop and while_loop_stack_output. It supports input mutation.Nrf
  carried_inputsadditional_inputsre
  cond_subgraphbody_subgraphr~
  r  c                .   || _         || _        || _        || _        t	        g ||      \  }}	t
        
|   d ||	|       ||| _        || _        t        j                  j                  |       | _        t        j                  j                  |        y r
  )r
  r
  r
  r
  r
  r  r  r  stack_outputrk   r   r  r   r  )rq  r
  r
  r
  r
  r>  r  r
  r
  r  r  s             r   r  zWhileLoop.__init__!  s     -!2** 21n101!
+ 	"	 	 	
 (%6D"(GG++D1		""4(r   c                   t        |       s| S ddlm} | D cg c]$  }t        |t              r|j                         n|& }}t               }g }t        t        | |            D ]X  \  }\  }}t        |      |v r|j                   ||             .|j                  t        |             |j                  |       Z |S c c}w )Nr?   )clone)rb
  r  r
  r   rC  r  r:   r   r   r  r  r  )	r
  r
  r  unwrapped_buffersseen_buffersr  r   original_inputunwrapped_buffers	            r   _clone_aliased_inputszWhileLoop._clone_aliased_inputs!  s    #N3!! 	$
 )
 %/v$GF VS
 
 )35> 126
 	.1A1 0 "#|3eN34  $4!56n-	. %
s   )Cc                "   ddl m} 	 	 	 	 	 	 dd}t        j                  j                  j
                  d   }t        j                  j                  j
                  d   }	||	z   }
|
D cg c]  }|j                  d    }}|D cg c]  }|j                  d    }}|	D cg c]  }|j                  d    }}|D cg c]  }| j                  |       }}t        j                  |      } |||      }|D cg c]  }| j                  |       }} |||      }||z   }||fD ]	  }|j                  t        |
t              sJ t        |
             t        j                  j                  |j                  |
|j                        |_        t        j                   |j                        5   |j                  j"                  |  ||u rYt%        |j                  j&                        t%        |      k(  sJ  ||j                  j&                  |      |j                  _        ddd        |j                  r|j                  sJ |j                  j&                  }|j                  j&                  }t)        |      rt+        d	|       t%        |      d
k(  sJ |       |d   }t        |t,              sK|j/                         t0        j2                  k(  sJ |       t%        |j5                               dk(  sJ |       t%        |      dkD  sJ d       |d   j7                         }|J t%        |      t%        |      k(  s	J ||f       t9        t;        ||            D ]  \  }\  }}	 	 	 	 	 	 dd} ||j5                         |j5                                 ||j=                         |j=                                |j7                         |j7                         k(  sJ ||||f       |j/                         |j/                         k(  rJ |||f        |J t?        t        j                  j@                  jB                  t        j                  j                  j                  jE                  dd            }t        ||||tG        |      ||      }|j                  8t        |j                  jH                  t0        jJ                  jL                        sJ  ||j                  jH                  |      d   }tO        |      }|D cg c]  }||   	 } }tQ        |       }!g }"g |_)        g |_*        |rt%        |      dk(  sJ d       t9        t        j                  j                  j                  d         D ]  \  }}#tW        tY        |#jZ                  |#j\                  |#j_                         D $cg c]  }$t`        jc                  |$       c}$|#je                         D %cg c]  }%t`        jc                  |%       c}%      |tf        |fg      }&|jR                  ji                  |&       |"ji                  |&        nt9        |      D ]
  \  }}#||v rb|t%        |      k  sJ d       tk        |!      }'|jT                  ji                  tm        |'jn                  |'|             |"ji                  |'       mtW        tY        |#j7                         |#j/                         |#j5                         |#j=                         |#jq                         jr                        |tf        |fg      }&|jR                  ji                  |&       |"ji                  |&        t;        ||"      D ]g  \  }(})|(ju                         t        j                  jv                  v s1t        j                  jx                  j{                  |)ju                                i |"S c c}w c c}w c c}w c c}w c c}w # 1 sw Y   xY wc c}w c c}$w c c}%w )zcreate the while_loop IR node. stack_output controls whether it stack
        each iterations' output, which is necessary for training.
        r   )check_input_alias_and_mutationc                &   t        |       t        |      k(  sJ g }t        | |      D ]f  \  }}t        |t        j                        r6|j                  t        j                  ||j                         d             V|j                  |       h |S )NFr  )	r   r   r   r  r  r  r  r  r   )tensor_boxesfake_tensorsretr  fks        r   _require_exact_stridesz0WhileLoop.create.<locals>._require_exact_strides!  s     |$L(9999ClL9 #Bb%,,/JJ$::		5 ;  JJrN# Jr   r'  r  Nrl
  zOutput aliasing is currently not supported in compiled torch.while_loop. The outputs of the body_fn subgraph of torch.while_loop are aliased: r?   z9torch.while_loop is assumed to have at least one operand.c                    t        |       t        |      k(  sJ t        | |      D ]/  \  }}t        j                  j                  j                  ||       1 y r   )r   r   rk   r   r   r
  )	lhs_exprs	rhs_exprslhsrhss       r   _guard_list_equalsz,WhileLoop.create.<locals>._guard_list_equals3"  sN     9~Y777 #Iy 9 <HCGG$$11#s;<r   r  r  )r
  r
  r
  r
  r>  r  r
  r   z-NYI: while_loop_stack_output input mutations.)r   r   r   r   zonly carries can be mutated.)r   r   r   r   rA  )r
  r  r
  z,list[Union[int, torch.SymInt, torch.Tensor]]r   r   )r
   Sequence[Union[int, sympy.Expr]]r
  r
  r   r   )>torch._higher_order_ops.utilsr
  rk   r   r  r   rQ  rg  r
  r
  r   r   r   r  r^
  r   r  r  r   rq
  rb
  r  r   r  r  r   r   r  r   r   r%  r5   r   r   r  rP  modulefxGraphModuler:   r  r  r  r	  r@  r   r   r   rz
  r
  r   r   r  r  r  r>  r  rA  r  r  r&	  r  )*r  cond_fnbody_fnr
  r
  r
  r
  r
  fx_carried_inputsfx_additional_inputsfx_all_inputsr   fake_all_inputsfake_carried_inputsfake_additional_inputscarried_inputs_additional_inputs_
all_inputsr  cond_outputsbody_outputsr  r   r   rM  bor
  r  
while_loopmutated_idxsmutated_idx_setr   r  mutated_inputs_iterall_outputsrL  r  r  	multi_outmutated_inputr[  r   s*                                             r   r  zWhileLoop.create!  s    	Q	*	F	 	" GG0055b9 ww3388<),@@2?@Q166%=@@6GHqvve}HH9M!NA!&&-!N!N9GHA3,,Q/HH#99/J0BUV<MNqc//2NN3 6
 %'99
 '* 	H~~%!-:OD<OO:!"!6!6,,#0"*-- "7 "
 ((8 &HNN&&8  7*"8>>#?#?@C/E      8N$NN88/84 	4 }}..}}22}}22- XXdWeg  < A%3|3%O!23;;=EJJ.11.qzz|$),1,):" 	
G	
" A))+!!!?#s<'88 	
;
 	
8 %S,%GH 	AKAxB<;<;< < r{{}bkkm<r}}@ ==?bmmo5J2r67JJ5<<>R\\^3@aR[@3	A" !!!5GG&&GG  %%))*=tD

 *0!!$F3/%	

 }}(ZMM  %(("6"6.
 	
 

 6MM  /

 %\25DEc*S/EE #>2$&
&(
#'1, ?,  ))=)=)B)B5)IJ .V'%}}$llDJKKMRbk55b9RFLmmoV 7 7 ;V	 C[M		 "")))4""9-.  )6 2V/)^!44T6TT4$()<$=M//66&}';';]JW  &&}5 +##)#4#4#6"("2"2"4!'!2#)#4#4#6#)#4#4#6#=#= #
!I &&--i8&&y1-20 NK8 	@HC||~!5!55 ++//?	@ U AH!NH O t F" SVs=   (__! _&<_+8_0/A7_5`:`*`5_?	c           	         |j                  | | j                         |j                  | j                         | j                  t        | di              y r	  )codegen_while_loopr
  r	  r  r  r   r  s     r   r  zWhileLoop.codegen"  sA    ""4):):;88MMOT\\749Lb+Q	
r   c                    t        | dd       x}rKt        t        j                  j                  j
                  |      }|J t        |j                               S t               S r	  r
  r	  s      r   r|  z"WhileLoop.get_unbacked_symbol_defs"  r
  r   )r
  r  r
  r  r
  r]
  r
  r]
  r>  rP  r  r	  r
  r   r   r   )r
  r  r   r  )r
  r]
  r
  r]
  r
  r  r
  r  r
  r   r   r	  rf  r  )r   r   r   r]  r
  r   r
  r
  r
  r  r  r`  r
  r  r  r  r|  r  r  s   @r   r
  r
  !  s   ]15N.54818(,M%,(,M%,/3G,3)() ,)  	)
  ) ") H) ) 
)D  8 KK K )	K
 ,K K 
)K KZ
 r   r
  c                  \     e Zd Z	 ddd	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 d fdZd fdZd	dZ xZS )
r   Nr	  c          	     j   t         |   |||||d |       ddlm} |D 	cg c]   }	t	        |	t
              r|	j                  n|	" }
}	 ||g ||
|      }|J || _        t        j                  j                  j                  |d       | _        | t        j                  j                  |<   y c c}	w )Nr   r  r   )get_effect_key)r  r  torch._higher_order_ops.effectsr
  r   ri  r  effect_typerk   r   effectful_opsr  prev_effect_buffer)rq  r>  r  r  r	  r  r   r  r
  r  uncovered_argsr
  r  s               r   r  zEffectfulKernel.__init__"  s     	/ 	 	
 	C GR
ABz!_5AGG1<
 
 %V-O~-O-OQWX&&&&"#''"7"7";";K"N-1k*
s   %B0c                    t         |          }| j                  F|j                  j	                  t        j                  | j                  j                                      |S r   )r  r  r
  r  r  rA   r  r  )rq  r_  r  s     r   r  zEffectfulKernel.get_read_writes"  sU    g-/"".!!$$T%<%<%E%E%GH r   c                     yrt  r   r  s    r   r"	  z EffectfulKernel.has_side_effects"  r  r   r   r	  rU  r8  )r   r   r   r  r  r"	  r  r  s   @r   r   r   "  ss     ,02 KO22 2 &	2
 &2 +2 )2 H2 
2@r   r   c                      e Zd Z	 d	 	 	 ddZy)r  c                    t               S r   r9   r  s     r   r  z!NonTensorObj.get_free_symbol_uses"  rN  r   NrO  rZ  )r   r   r   r  r   r   r   r  r  "  s    $)!	!r   r  c                  L    e Zd ZU ded<   ded<   ddZdddZddZdd	Zdd
Zy)ri  r   r   +Union[FakeScriptObject, torch.ScriptObject]r  c                    | j                   S r   rz  r  s    r   r  zTorchBindObject.get_name"  r?  r   Nc                    | j                   S r   rz  r  s     r   r  z!TorchBindObject.codegen_reference"  r?  r   c                    | j                   S r   r  r  s    r   r  zTorchBindObject.get_value"  r  r   c                    t        | j                  t        j                        r| j                  S | j                  j                  S r   )r   r  r  ScriptObjectreal_objr  s    r   get_real_objzTorchBindObject.get_real_obj"  s0    djj%"4"45::::&&&r   c                   | j                         }t        |d      sJ t        |j                               }t	        j
                  |      d   }|D cg c]=  }t        |t        j                        r!|j                         |j                         z  ? }}t        j                  t        j                  |d      S c c}w )N__obj_flatten__r   )r
  r  r   r
  r  r  r   r  r  r  numelrl  r  operatorr  )rq  real_script_obj	flat_dict
flat_elemsr   
flat_sizess         r   get_buf_byteszTorchBindObject.get_buf_bytes"  s    ++-(9:::88:;	((3A6
  
!U\\* NNqwwy(

 

 j!<<
s   AB=rJ  r   r?  )r   r
  )r   ztorch.ScriptObjectrX  )	r   r   r   r   r  r  r  r
  r
  r   r   r   ri  ri  "  s&    
I66'=r   ri  c                  4    e Zd ZU ded<   ded<   ddZd	d
dZy)r  r   r   rC  r   c                    | j                   S r   rz  r  s    r   r  zGeneratorState.get_name#  r?  r   Nc                    | j                   S r   rz  r  s     r   r  z GeneratorState.codegen_reference
#  r?  r   rJ  r   r?  )r   r   r   r   r  r  r   r   r   r  r  #  s    
Ir   r  c                  r    e Zd ZddZddZdd	dZe	 	 	 	 	 	 	 	 	 	 d
d       Ze	 	 	 	 	 	 	 	 	 	 dd       Zy)_CollectiveKernelc                     yr  r   r  s    r   rR  z!_CollectiveKernel.should_allocate#  r  r   c                     yrt  r   r  s    r   r"	  z"_CollectiveKernel.has_side_effects#  r  r   Nc                \   t        | j                        t        j                  j                  u sJ d       | j                  }||| _        n|j                  j                  | _        |j                  j                  D cg c]  }|j                  s|j                   c}| _
        y c c}w )Nz,Setting cpp kernel needs a valid op_overload)r   r  r  r  r  r  r  r   r  r  r  )rq  r  r  r   s       r   r  z%_CollectiveKernel.set_cpp_kernel_name#  s    D$$%)>)>> 	
:	
> !!&#2D #)>>#6#6D  #NN44.
AFF.
* .
s    B)B)c           
     *   t         j                  j                  5   | j                  ||g|i |\  }}}}}	d d d        	rJ | d|	        D ]  }
|
j	                           |d   j                         } | t        |      ||      }t        j                  |      }|j                  j                  |D cg c]  }t        t        |      ||       c}       |j                  j                  |D cg c]  }|j                          c}       d|v r`|j                  j                  t        t        |      |d   |             |j                  j                  |d   j                                y y # 1 sw Y   GxY wc c}w c c}w )Nr  r   r  r   )rk   r   r  r   r  r  r  r  tree_leavesr  r  r  r	  r  r  )r  r  rY  r   r   _example_outputr  r  r  r  
tensor_argr   r	  inpsrb  r[  s                   r   create_inplacez _CollectiveKernel.create_inplace+#  s    WW 	D #""66CDCFC!	D %E2C1D&EE$% 	!J 	! Q**,f%
 !!&)&&OST^Jf5sFCT	

 	!!T"Bc3<<>"BCF?##**z8&-P %%fUm&<&<&>? 9	D 	D. U #Cs   E>=F;F>Fc           
        t         j                  j                  5   | j                  ||g|i |\  }}}}}	d d d        	rJ | d|	        D ]  }
|
j	                           t        t              r| j                  ||      }|J  | t        |      ||      }t        |      D cg c](  \  }}t        | j                  |      |t        |fg      * c}}|_        t        |j                  |      D ]T  \  }}t        j                  st!        |      r"t         j                  j"                  j%                  |j&                         V |j                  S  | | j                  |      ||      }t        j                  st!        |      s3t         j                  j"                  j%                  |j&                         |g|_        |S # 1 sw Y   xY wc c}}w )Nr  r  )rk   r   r  r   r  r   r   r	  rP  r   r	  r	  r  r   r@   r	  rh   r  r  r   )r  r  rY  r   r   r  r  r  r  r  r
  r   r	  r   rD  rb  s                   r   create_out_of_placez%_CollectiveKernel.create_out_of_placel#  s    WW 	D #""66CDCFC!	D %F3D2E&FF$% 	!J 	! nd+__[.AF%%%!0F "+>!: Av ((0AYKFN  #6>>>B <V::BSC GG--11#((;	<
 >>!$$^4F 66>O? ))--fkk:$XFNMc	D 	D,s   G0-GGr8  r   rg  )
r  ry   rY  zUnion[IRNode, list[IRNode]]r   r   r   r   r   r   )
r  ry   rY  z!Union[TensorBox, list[TensorBox]]r   r   r   r   r   z+Union[list[MultiOutput], _CollectiveKernel])	r   r   r   rR  r"	  r  r  r
  r  r   r   r   r
  r
  #  s    

( (@(@ ,(@ 	(@
 (@ 
(@ (@@ 88 28 	8
 8 
58 8r   r
  c                  P     e Zd Z	 ddd	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 d fdZddZ xZS )_AllReduce_KernelNr	  c          	     T    t         |   |||||d |       | j                  d       y )Nr
  +aoti_torch_cpu__c10d_functional_all_reduce_r  r  r  	rq  r>  r  r  r	  r  r   r  r  s	           r   r  z_AllReduce_Kernel.__init__#  =     	/ 	 	
 	  !NOr   c                    |j                  d       |j                  |        t        | j                  t              r| j                  |       y y Nz+torch/csrc/inductor/aoti_torch/c/shim_cpu.hinclude_extra_headerr{  r   r>  r  rH  r  s     r   r  z_AllReduce_Kernel.codegen#  @    $$%RS,,T2dkk6*%%g. +r   r   r	  rf  r  r  s   @r   r  r  #  sw     ,0P KOPP P &	P
 &P +P )P HP 
P,/r   r  c                  P     e Zd Z	 ddd	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 d fdZddZ xZS )_AllReduceKernelNr	  c          	     T    t         |   |||||d |       | j                  d       y )Nr
  *aoti_torch_cpu__c10d_functional_all_reducer  r  s	           r   r  z_AllReduceKernel.__init__#  s=     	/ 	 	
 	  !MNr   c                    |j                  d       |j                  |        t        | j                  t              r| j                  |       y y r
  r  r  s     r   r  z_AllReduceKernel.codegen#  r  r   r   r	  rf  r  r  s   @r   r  r  #  sw     ,0O KOOO O &	O
 &O +O )O HO 
O,/r   r  c                  v     e Zd Z	 ddd	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 d	 fdZd
dZddZedd       Zd fdZ xZ	S )_WaitKernelNr	  c          	     T    t         |   |||||d |       | j                  d       y )Nr
  +aoti_torch_cpu__c10d_functional_wait_tensorr  r  s	           r   r  z_WaitKernel.__init__#  r  r   c                    |j                  d       |j                  |        t        | j                  t              r| j                  |       y y r
  r  r  s     r   r  z_WaitKernel.codegen#  r  r   c                r   | j                   d   }t        |t              sJ t        |t              r2|j                   d   }t        |t              sJ t	        |             |gS t        |t
              rC|j                   d   }t        |t              r"|j                  d   \  }}|j                   |   gS g S g S r  )rY  r   r   r
  r   r	  r`  )rq  r[  r   collr   r   s         r   get_volatile_readsz_WaitKernel.get_volatile_reads$  s    kk!n#v&&&c,-

1Aa(1$q'1(3J[) ::a=D$ 12Q3C())I Ir   c                r   t         j                  j                  5  | j                  ||      \  }}}}}d d d        rJ | d|         | t	        |j                               |      }|j                  j                  t        t	        |j                               ||             y # 1 sw Y   zxY w)Nr  r  )	rk   r   r  r   r  r  r  r  r  )	r  r  r[  r
  r  r  r  r  r	  s	            r   create_waitz_WaitKernel.create_wait$  s    WW 	0 ""63/!	0 %E2C1D&EE$cnn./
 	&&:S^^-=>VL	
!	0 	0s   B--B6c                    t         |          }| j                         }|D ]>  }|j                  j	                  t        j                  |j                                      @ |S r   )r  r  r  r  r  rA   r  r  )rq  r_  volatile_readsvrr  s       r   r  z_WaitKernel.get_read_writes2$  sZ    g-/002  	GB!!,"6"6r{{}"EF	Gr   r   r	  rf  r  )r  ry   r[  r   r   r   rU  )
r   r   r   r  r  r  r  r  r  r  r  s   @r   r  r  #  s     ,0P KOPP P &	P
 &P +P )P HP 
P,/0 
 
* r   r  c                2   t        | t        t        f      rt        |       S t        | t        t
        f      r2t        t        j                            }| D ]  }|t        |      z  } |S t        | t        j                        rt        |       S t               S r   )r   r7   r!   r3   r   r   r:   r   r#   r^  r  r  r   r  r  s      r   r^  r^  >$  s    !h%&$Q''	At}	%u||$& 	0A,Q//A	0	Au||	$$Q''|r   c                2   t        | t        t        f      rt        |       S t        | t        t
        f      r2t        t        j                            }| D ]  }|t        |      z  } |S t        | t        j                        rt        |       S t               S r   )r   r7   r!   r2   r   r   r:   r   r#   r_  r  r  r!  s      r   r_  r_  N$  s~    !h%&A	At}	%u||$& 	'A#A&&A	'	Au||	$A|r   )r   r   r   r   )r   r   r   r   )r   r   r   r  )r   rQ  r   z&Callable[[Sequence[_T]], Sequence[_T]])r   z&Callable[[Sequence[_U]], Sequence[_V]]r   z&Callable[[Sequence[_T]], Sequence[_U]]r   r$  r   )r   z(Sequence[Union[int, torch.SymInt, Expr]]r   zOptional[ShapeEnv]r   rQ  )r   Sequence[Union[int, Integer]]r   rQ  r2  )r   zLiteral[None]r   r   r   r   )r   r   r   r   r   r  )r   r  r   r   r   zOptional[torch.Tensor])r  zOptional[Sequence[_T]]r   z Optional[Sequence[Optional[_T]]])r   z2Union[IRNode, OutputSpec, torch.device, None, str]r   r>  )r   z&Union[IRNode, torch.device, None, str]r   r   )r   zUnion[Buffer, TensorBox]r&  r   r   r   )r0  rI  r1  rI  r2  rI  r   r   )rD  r   rE  z"Sequence[Union[int, torch.SymInt]]r   r   )rS  r  r   r   )rY  r  r   r  )r   zUnion[Expr, Sequence[Expr]]r   r5  r   rj   )r  r   r   r5  r  r   r   r  )r   rQ  r   rh  rA  r!   r   rG  r%  )TFNFN)r   r   r}  r   r  r   r  r  r  r   r  r  r   ztuple[StorageBox, Layout])r   r   r  r#  r   r   r,  )r   rI  r2  rI  r   r   )r   r5  r   r   )r   r\  r   zTypeIs[Sequence[IRNode]])ra
  r  r   r   )r   r  r   z-tuple[list[ShapeAsConstantBuffer], list[Any]])r   r   r   r  (J  
__future__r   ra  r^  rl  r  loggingr
  ostextwraprc  collections.abcr   r   r   r   r   r	   r
   enumr   r   typingr   r   r   r   r   r   r   r   r   r   r   r   typing_extensionsr   r   r   r   r   r   r   unittest.mockr    r   r!   r"   r#   torch._export.serde.schema_exportserder	  r	  torch._library.utilsr	  r  r	  torch._loggingr  torch.fxtorch.utils._pytree_pytreer  torch._dynamo.utilsr$   torch._export.serde.serializer%   *torch._higher_order_ops.auto_functionalizer&   torch._inductorr'   r  r)   torch._prims_commonr*   r+   r,   r-   r.   torch._subclasses.fake_tensorr/   %torch.fx.experimental.symbolic_shapesr0   r1   r2   r3   r4   r5   r6   r7   torch.fx.noder8   torch.utils._ordered_setr:   torch.utils._sympy.functionsr;   r<   r=   torch.utils._sympy.symbolr>   r  r@   rA   codegen.commonrB   rC   rD   rE   rF   rG   rH   rI   rJ   rK   	loop_bodyrL   ops_handlerrM   rN   rO   rP   runtime.benchmarkingrQ   runtime.hintsrR   rS   rT   rU   rV   rW   rX   rY   rZ   r[   r\   r]   r^   r_   r`   ra   rb   rc   rd   re   rf   rg   rh   virtualizedri   rj   rk   "torch._library.fake_class_registryrl   rm   rn   codegen.cuda.cuda_templatero   codegen.wrapperrp   r   rq   rr   r   r   r  __version__rX  rW  ImportErrorrs   rt   ru   rv   r   rw   r  rx   r  r  r  ry   	getLoggerr   rp  r  r  getenvr
  r  r   r   r   r   r	  r   r   r   r   r   r   r  r  r   r   r   r   r  r  r  r!  r+  r6  rJ  rU  r\  r   re  r  r  r  r  r  r.  r0  r  r  r  r  r  r(  rY  ra  r  r  r?  r  r  r  r   r  r  r  r  rC  rB  rN  rl  rs  rz  r  r  r  r  r@  r  r  r  r  r  r  r  rT  rl  r\  r  rd  r   rj  rm  r  r   r   PrimitiveInfoTyper  r  r!  rC  rM  rS  rZ  rk  rs  rw  r  rk  rr  r  r  r  r  r  r  r  r  r	  r	  r$	  r)	  r1	  rB	  rO	  rZ	  r   r   r	  r	  r	  r
  rP  r	  r   r   r  r]
  rb
  rd
  rz
  r
  r
  r   r  ri  r  r
  r  r  r  r^  r_  r   r   r   <module>rN     s   "       	   N N :           ' ' 2 2 , ,   $ $ ( ? M # 2  :	 	 	  / L L * "     N N - :     . * ) CB&85$% %L)$''NJ t_T]T]T]CI&) &C,-) -

 5 5uzz7U7U UVi Vg!			8??4	8yy~~ibii 7<=92991378'T  k	sDk!12K8STU	i 	) d#  $$$D44 , ! $  TX	1>P	 TX
	1
>P

 
 N 
 N 
 O 
 O .2&*8!%	>9	>	>;('00,/0	00    
	.$G$G/$G $GN'u, u,p	 UH H HV @
F @
 @
F&  
  
  
F 
i 
 
F |$y!y!u=)< 8  JN<N<N +<NBF<N<N~ h
 h
 h
Z '+1:
#  &	& "8D>8D>"BH"LMY M7S9 7St#1 #L[
+ [
| D
5 D
 D
P 	 	 	 U5 U Up	 !<@=A999 9 :	9
 9 ;9 9x:	$ ]
v ]
 ]
@ N N Nb -( - -` :9( :9 :9z !( ! !H s; s sl Rh R Rj & & &R_A _AD 6  " K| K K$ S| S S'9	(<7 7  v
Z v
 v
rC& C\LV \L~ Gf  GFT %%{ %%P   .V* V*r UDV] D DN U&fi & & & 
K 
[ 
& 6  & F   Un4_ n4 n4bE
_ E
PL> L^ #udCeCeT<Q6R1SST :$ :$z"| "
WB. WBt5 50( (4N >5455 UQ? Q Qh @9 @F Us< s sl U"l " "J
/ 
((" ("V
V 
B=L =@%
 %
P
- 
$1
\ 1
hGl GT))| ))Z/, /d< 8B5 B$8- 82K)l K)\4)| 4)n3T 3Tl)3 )3X-L -8;;< ;;| U  
^*& ^*B U
. 
 
<   2 
  &
, &
V S S Sl+
 +_% _%D U*v * *L Up.\ p. p.f UR , R  R j"
"2" Ud  d  d N	,n ,^6  =l = =B \  W Wt/) />/( />R# Rp C^  NJs   	j 	jj