
    iY                     0   U d Z ddlZddlZddlmZmZ ddlmZmZ ddl	m
Z
 ddlZddlZddlmZ ddlmZ ddlmZ d	d
lmZmZ d	dlmZmZmZ eeef   Z ej:                  e      Zda e
e!eee   f      e"d<   de!e#ejH                  jJ                  f   fdZ&dejH                  jN                  dededeee      de!eee   f   de!ee!eedf   ef   f   de(e   de#de!eee   f   de!eee   f   ddfdZ)dede!eee   f   fdZ*de(e   de(e   fdZ+dede,e   ddfdZ-dede(e   deejH                  jN                  e(ee      e!eee   f   e!ee!eedf   ef   f   f   fd Z.dejH                  jN                  de!eee   f   ddfd!Z/dejH                  jN                  de!eee   f   de!eee   f   fd"Z0dejH                  jN                  de!eee   f   ddfd#Z1de!eee   f   de!eee   f   ddfd$Z2ded%e(e   de(e   d&ee   de3f
d'Z4d(ede3fd)Z5d(edeeddf   fd*Z6d(ededee   fd+Z7d(ed,ed-ejH                  jN                  dee(e   e!eedf   ef   f   fd.Z8d(ed/ed0e!eedf   ef   d1edejH                  jN                  dee   fd2Z9y)3a"  
This module implements graph deduplication functionality for TorchDynamo's optimization pipeline.
Graph deduplication identifies identical subgraphs in the computational graph and merges them
to reduce redundancy and improve performance. The process involves analyzing regions of the graph,
identifying structurally equivalent regions, and replacing them with a single shared implementation.
This optimization is particularly effective for models with repeated patterns or similar computational
structures across different parts of the network.
    N)defaultdictdeque)	GeneratorIterable)Optional)config)StorageWeakRef)
OrderedSet   )NodeRegion)_detect_cycles_get_flat_args_get_flat_args_uniquelast_node_to_additional_depsreturnc                    | j                   j                  | j                        }| j                   j                  }t	        | j                  | j                   j                        }i }|D ]  }t        |      }|d   }t        ||      \  }}	}
}t        |	      s0t        j                  j                  | j                  |      }| j                  d|      }|||<   | j                  j                         5  | j                  j                  d|di       }ddd       |D ]   }t        | j                  ||	|
|||||
       "  |at#        | j                  |       |S # 1 sw Y   JxY w)a  
    This is the main entry point for applying the graph deduplication pass. Deduplication occurs in two phases:
    1. Subgraph creation:
        Subgraph creation works by taking one representative region from each region group and creating a subgraph from it, which will then be used to replace all regions in the group. This is implemented by first copying all nodes of the region to the new subgraph and then finding all inputs which are not within the region and creating placeholders for them. For the outputs, all regions in a region group need to be scanned to ensure the largest set of outputs is found, and then an output node is created which returns a tuple of all outputs.

    2. Graph replacement:
        To replace each region with the extracted subgraph, the node index in the region and argument index within the node's flattened args and kwargs are recorded once during subgraph creation. This allows us to determine which (external to the region) nodes and in which order these nodes are passed as inputs. For the outputs, getitem nodes are created for each output, and all nodes in the region with external outputs are replaced by the proper getitem node. Finally, all original nodes are erased (there should be no uses of these left in the graph).

The deduplication mutates the output_graph argument in place.

Returns a mapping of nodes to their subgraph output replacement node to remap outputs
when they are created in output_graph.
    r   subgraphget_attr N)region_trackerget_identical_regionsgraphnode_to_mutated_arg_positions_populate_additional_deps_get_all_output_indices_create_subgraphlisttorchfxGraphModule
nn_modulesinstall_subgraphinserting_beforecreate_node_replace_region_with_subgraphr   _stable_topological_sort)output_graphduplicated_region_groupsr   node_to_additional_depssub_gmsregion_groupinds_with_external_usersregionr   external_node_usagesnode_usage_to_tuple_elemsind_to_tuple_specsub_gmsubgraph_nameget_subgraph_nodes                  [/var/www/html/engine/venv/lib/python3.12/site-packages/torch/_dynamo/graph_deduplication.pyapply_graph_deduplicationr6   %   s   8  ,::PP  	##AA " 8L77UU 02G0 "#:<#H a V%=>	
 % ()%%l&=&=xH$55j&I!'002 	 , 2 2 > >M2r!	
 # 	F)""!$)!('-	-"L $;  N7	 	s   5EE&	r   r.   r4   r/   r0   r1   .r-   r3   r*   r   c
           
         g }
t               }|D ]  }t        t        |            }|\  }}||   }t        |i       }|D ]/  \  }}||   }||	v s||	|   v st        j                  d|         y  ||v r8||   D cg c]  }||   	 }}|j                  |       |
j                  |       |
j                  ||           t        ||
||      ry ||g|
}| j                  dt        j                  j                  j                  |i       }d}t               }|D ]  }||   }t        |      r2||   }|j                  t!        |||||              |t#        |      z  }E| j                  dt$        j&                  ||fi       }|j)                  |d       |dz  } t+        |      D ]f  }||v r||vr| j-                  |       |j/                  |d        |j1                         D ]%  }	 |j3                  |       |j5                  |       ' h t8        j:                  r2t=        t?        | |             tA        | |       | jC                          y y c c}w # t6        $ r Y }w xY w)Nz3NYI: Failed to substitute region %s due to mutationcall_functionr   Tpropagate_metar   )"r
   nextiterr   logdebugupdateextendappend_has_aliasingr%   r   opshigher_orderinvoke_subgraph_is_tuple_node_replace_tuple_outputslenoperatorgetitemreplace_all_uses_withreversed
erase_nodepopvaluesremoveaddKeyErrorr   graph_deduplication_lintprintr   r'   lint)r   r.   r4   r/   r0   r1   r-   r3   r*   r   sub_argsflattened_getitem_nodesusagesusagenode_ind	usage_indnodeflattened_args_kwargsuser_indnode_usage_induserituple_elemsinvoke_argsinvoke_subgraph_nodeindflattened_output_nodesexternal_user_ind
tuple_specsubgraph_outputdepss                                  r5   r&   r&   |   s    H0:& >T&\"#)h .tR 8(. 	$Hn(#D44!%B4%HHIIMv 	 --.G.NO6!9OKO#**;7OOK(OO1)<=%>. 24K 	$m?h?K ,,		..
	 C/9|5 '($*+<=J"))&#z+?
 3z?"C#//!1!14H#3NPRO &&t&L1HC$   **--T" 	 ##D$/+224 	DD!-.	( &&nU$;<= (?@

 ' Px  s   ;I-"I	I'&I'c                    t        t        t        t           f   t              }t	        |       }t        |       D ]P  \  }}t        |i       }t        |      D ]1  \  }}t        |t              s||vs||   j                  ||f       3 R |S N)	r   r   r
   
UsageIndexset	enumerater   
isinstancerQ   )r.   external_node_to_usagesregion_uniquerZ   r\   r]   arg_indin_nodes           r5   _get_external_inputsru      s     *$
:0F*FG
SKM#F+ J$ .tR 8 )*? @ 	JGW'4(WM-I
 (044h5HI	JJ #"    regionsc                 R    t               }| D ]  }t        ||        t        |      S rl   )rn   _get_inds_with_external_userssorted)rw   r-   r.   s      r5   r   r      s6     *- H%f.FGH *++rv   inds_uniquec                     t        |       D ]1  \  }}|j                  D ]  }|| vs||vs|j                  |        3 y rl   )ro   usersrQ   )r.   r{   re   r\   r`   s        r5   ry   ry      sI    v& )	TJJ 	)D6!k)OOC(	))rv   c                    t         j                  j                         t        |       }t	        t
        t                     }i t               }i }|j                         D ]  \  }}t        |      rht        ||       }|D ]?  }	j                  d|j                   d|	       }
|
| |	   <   |j                  | |	          A ||t        t        |            <   n#j                  d|j                         }
|
|<   |j                  |        dt         dt         ffddt         dt         ffd}g }i }t#        |       D ]X  \  }	}||vs ||      }|	|v st        |      r%t%        ||      \  }||	<   |j'                  |       H|j                  |       Z j)                  t+        |             |||fS )Nsupgraph_input__flattened_subgraph_input_r\   r   c                     | v r|    S | S rl   r   )r\   region_to_subgraph_nodes    r5   map_argz!_create_subgraph.<locals>.map_arg(  s    ***400Krv   c                 <    j                  | fd      }|| <   |S )Nc                      |       S rl   r   )oldr   s    r5   <lambda>z<_create_subgraph.<locals>.copy_to_subgraph.<locals>.<lambda>/  s    WS\ rv   )	node_copy)r\   subgraph_noder   r   r   s     r5   copy_to_subgraphz*_create_subgraph.<locals>.copy_to_subgraph.  s'     **41IJ(5%rv   )r   r    Graphru   r   r
   rm   itemsrF   _get_flattened_node_indicesplaceholdernamerQ   r;   r<   rA   r   ro   _create_getitem_nodesr@   outputtuple)r.   r-   external_input_to_usagesr/   rW   r0   r\   usage_indicesflattened_node_indicesre   r   r   output_listr1   r   getitem_nodesr   r   r   s                   @@@r5   r   r     s     %xx~~/H3F;
: 679 0:CE7==? 3m$
 &Av%N"- 9&22%dii[C5A 8C's4'++F3K89 ' &d4+>&?@ #../LMK,7#D)##M2+3.d t t  
 Kv& 6	T..,T2M..!$'<QmX=9M#4S#9  &&}5&&}56 OOE+&')+DFWWWrv   c                    t        t        | j                              }t        t                  }t        t               }t        t                  }d }|r|j                         }|j                  dk(  r%|j                  |       |j                  rJ d       Ft        ||      D cg c]  }||vr|
 }	}|	r||	d      j                  |       n^|j                  |       |r|j                  |ur|j                  |       |}|j                  t        |j                  |d                   |r|j                  |       |s!t        |      t        | j                        k(  sJ y c c}w )Nr   z!output nodes should have no usersr   )r   rL   nodesr
   r   r   rN   targetrQ   r}   r   rA   r;   r@   r?   rH   )
r   r*   pendingreadywaitingoutputscursorr\   xwaiting_fors
             r5   r'   r'   G  sO    8EKK()G tE $G  G F
{{};;("KKzzF#FF> +41HI
~ 
 

  KO$++D1IIdO&++T1d#F NN8GKKb$9:;1 4 
LL3u:U[[)99999'
s   &E'c                 T    t        t              }t        ||       t        | |       |S rl   )r   r
   _add_mutation_dependencies_add_global_state_dependencies)r   r   r*   s      r5   r   r   |  s-     =H
<S<>UV"5*AB""rv   c                    dd l }t        | j                        }|j                  j                  |j                  j
                  h}g }dt        t           dt        t        t        t           t        f   d d f   fd} ||      D ]|  \  }}t        |i       |D 	cg c]	  }	|	vs|	 }
}	|
r||   }|j                  |
       |j                  |v sM||   }|j                  fd|D               |j                  |       ~ y c c}	w )Nr   	all_nodesr   c              3      K   g }t        t        |             }|r+|j                         }||f |j                  |       |r*y y wrl   )r   rL   rN   rA   )r   
prev_nodes
next_nodescur_nodes       r5   prev_cur_nodesz6_add_global_state_dependencies.<locals>.prev_cur_nodes  sI      "$
(9-.
!~~'Hh&&h' s   AAAc              3   ,   K   | ]  }|vs|  y wrl   r   ).0nargs_uniques     r5   	<genexpr>z1_add_global_state_dependencies.<locals>.<genexpr>  s     "QA[<P1"Qs   	)	torch.ampr   r   amp_enter_autocast_exit_autocastr   r   r   r   r?   r   rA   )r   r*   r   r   global_state_targetsall_nodes_dep_onr   r   r   r   new_depsadditional_depsr   s               @r5   r   r     s    U[[!I "II55uyy7O7OP#%	(:	(	5dT)*D$6	7	( !/y 9 .
H+Hb9/H!1K3GAHH5h?O""8,??225h?O"""Qj"QQ##H-.Hs   	D&Dc                     | j                         D ]g  \  }}t        |i       }|D ]Q  }||   }|j                  D ];  }||u r||k  r||   j                  |       "||kD  s(||   j                  |       = S i y rl   )r   r   r}   rQ   )r   r*   r\   indicesflat_args_kwargsindexmutated_argr`   s           r5   r   r     s     7<<> <g)$3  	<E*51K#)) <4<D[+D155d;D[+D155d;<	<<rv   inputsrW   c           	      X   t               }|D ]p  }||v r|j                  d   }t        |t        j                        s2t        |j                               }||v rt        j                  d| ||   |        y|||<   r t               }|D ]  }	| |	   }
|
|v r|
s|
j                  d   }t        |t              rJ t        |t        j                        sLt        |j                               }||v rt        j                  d| ||   |
        y|
||<    |j                         |j                         z  }t        |      dkD  rX|D cg c]  }||   ||   f }}dj                  |D 	cg c]  \  }	}|	 d|  c}}	      }t        j                  d| |       yy	c c}w c c}}	w )
Nexample_valuezYNYI: Failed to substitute region %s due to input-output aliasing detected at nodes %s, %sTzZNYI: Failed to substitute region %s due to output-output aliasing detected at nodes %s, %sr   z, z and zUNYI: Failed to substitute region %s due to input-output aliasing detected at nodes %sF)dictmetarp   r   Tensorr	   _typed_storager=   r>   r   keysrH   join)r.   r   r-   rW   input_storagesr\   r   storageoutput_storagesra   out_nodeintersected_storagessaliasedos                  r5   rB   rB     s    26N +**		/2mU\\2$]%A%A%CDG.(		o"7+	 &*N7#+  37&O% 4!9..$MM/:M!-666-6()E)E)GHo-IIt'0 	  +3(%4& *..0?3G3G3II
 1$ >R
89^A 23
 
 ))@1s%s^@A		c	

 
 As   F!/F&
r\   c                 <    t        | j                  d   t              S )Nr   )rp   r   r   )r\   s    r5   rF   rF     s    dii0%88rv   c              #      K   | j                   D ]B  }|j                  t        j                  k(  s!t	        |j
                  d   t              s?| D y w)Nr   )r}   r   rI   rJ   rp   argsint)r\   r`   s     r5   _get_children_getitemsr     sA     

 ;;(***z$))A,/LJs   -AAAc                 \   t              D ci c]  \  }}||
 }}}t               }t        t        |             }|rf|j	                         }t        fd|j                  D              r|j                  ||          t        |      D ]  }|j                  |        |rf|S c c}}w )zaReturns an ordered set of indices, each representing a node in the region which will be flattenedc              3   &   K   | ]  }|v  
 y wrl   r   )r   r`   r.   s     r5   r   z._get_flattened_node_indices.<locals>.<genexpr>  s     9$tv~9s   )	ro   r
   r   r   popleftanyr}   rQ   rA   )	r\   r.   ra   r   flattened_node_to_indnode_indicesqueuer   childs	    `       r5   r   r     s    .7.?@daQT@@$.LL(./E
==?9(..9928<=+H5 	 ELL	 	   As   B(subgraph_tuple_noder   c           	      |   | j                   d   }t        |t              sJ d       g }t        t	        |      D cg c]  \  }}||f|f c}}      }i }|r|j                         \  }	}
}|j                  |      5  |j                  dt        j                  ||
d   fi       }d d d        |	j                   d<   t        |      ||
<   |j                  |       t        |	t              r4|j                  t	        |	      D cg c]  \  }}||
|fz   |f c}}       |r||fS c c}}w # 1 sw Y   xY wc c}}w )Nr   z#_get_getitem_children expects tupler8   r   )r   rp   r   r   ro   r   inserting_afterr%   rI   rJ   rH   rA   r@   )r\   r   r   tupr   ra   er   path_to_output_indexcur_elempathparentnew_getitem_nodes                r5   r   r     sF    ))O
$Cc5!H#HH! "MinMdaAt01MNE
!&$%%f- 	'33!1!1FDH3Er 	 2:o.%(%7T"-.h&LL@I(@ST1!TQD["23T " ...) N	 	 Us   D&
=(D,D8
,D5output_indexrh   rd   c           	      Z   t        |       sJ d       t        d t        |       D              }t               }|r|j	                         \  }}t        |      D ]&  }	|j                  |	||	j                  d   fz   f       ( |j                  |      5  |j                  dt        j                  ||||   z   fi       }
d d d        |j                  
d       |j                  |       |j                  |       |r|j                  |        |j                  |        |S # 1 sw Y   exY w)Nz+_replace_tuple_outputs expects a tuple nodec              3   @   K   | ]  }||j                   d    ff  yw)r   N)r   )r   cs     r5   r   z)_replace_tuple_outputs.<locals>.<genexpr>:  s     J1qvvayl#Js   r   r8   Tr9   )rF   r   r   r
   rN   rA   r   r   r%   rI   rJ   rK   rM   rQ   )r\   r   rh   rd   r   r   erased_nodesr   r   r   ri   s              r5   rG   rG   1  s,    $N!NNJ-CD-IJJE%/\L
$'1 	3ALL!TQVVAYL012	3 ""#78 	#//  %|j6F'FG	O	 	&&t&L"" " 
TT	 	s   +D!!D*):__doc__loggingrI   collectionsr   r   collections.abcr   r   typingr   r   torch.fxtorch._dynamor    torch.multiprocessing.reductionsr	   torch.utils._ordered_setr
   graph_region_trackerr   r   graph_utilsr   r   r   r   r   rm   	getLogger__name__r=   r   r   __annotations__strr    r!   r6   r   r   r&   ru   r   rn   ry   r   r'   r   r   r   boolrB   rF   r   r   r   rG   r   rv   r5   <module>r      so     * /      ; / . N N 38_
g!GK htD*T2B,B'CD KTtC9M9M4M/N Tn^88>>^^ ^ #:j#9:	^
  $J
3$?@^ CeCHos&:!;;<^ #3i^ ^ "$
4(8"89^ $(jo(=#>^ 
^B##	$
:&
&'#$,T&\ ,d3i ,)& )s3x )D )CXCX"3iCX 	HHNNJ	 Z_	$%d5c?C'(	()+CXL2:88>>2:!$
4(8"892: 
2:j#88>>#:>tZPS_?T:U#	$
4 
 !#!.88>>!.48z$?O9O4P!.	!.H<#'jo(=#><!$
4(8"89< 
<.88J8 #3i8 (-	8
 
8v9 9$ 9 )D$4D*E d F z# /
/%)/5:XX^^/
4:tE#s(OS0112/<
 U38_c)* 	
 88>> rv   