
    iqF              	       :   d dl Z d dlZd dlmZ d dlmZmZ d dlmZ d dl	m
Z
mZ d dlmZ d dlmZ d dlmZmZmZ d dlZd d	lmZ g d
Z ee      Z G d dee      Z ed       G d d             Zededee   ded   fd       ZdededdfdZde e   ddfdZ!de e   ddfdZ"dedede e   fdZ#dede e   fdZ$dede e   fdZ%dede e   fdZ&dede e   fdZ' ed      Z( ed      Z)d ee)   d!ee)ge(f   de*e(e e)   f   fd"Z+d#ede e   fd$Z,d#ede e   fd%Z-d&ede e   fd'Z.d&ede e   fd(Z/defd)Z0dedefd*Z1d&ede e   fd+Z2d&edefd,Z3d-edefd.Z4d&edefd/Z5d0ede e   fd1Z6d2ee   defd3Z7de e   fd4Z8d5ede e   fd6Z9de e   fd7Z:y)8    N)defaultdict)IterableIterator)contextmanager)asdict	dataclass)Enum)	getLogger)CallableOptionalTypeVar)signpost_event)AffinityMode6maybe_temporarily_apply_numa_binding_to_current_threadNumaOptionsc                        e Zd ZdZdZdZdZdZy)r   zW
    See behavior description for each affinity mode
    in torch.distributed.run.
    nodesocket	exclusivezcore-complexN)__name__
__module____qualname____doc__NODESOCKET	EXCLUSIVECORE_COMPLEX     L/var/www/html/engine/venv/lib/python3.12/site-packages/torch/numa/binding.pyr   r      s    
 DFI!Lr   r   T)frozenc                   (    e Zd ZU eed<   	 dZeed<   y)r   affinity_modeF!should_fall_back_if_binding_failsN)r   r   r   r   __annotations__r$   boolr   r   r    r   r   $   s     /4%t3r   r   	gpu_indexnuma_optionsreturnc              #   h   K   |d yt               }t        | |       d t        |       yw)z
    1. Applies NUMA binding to the current thread, suitable for the thread
    which will be interacting with GPU gpu_index.
    2. Resets to the original CPU affinity before exiting the context manager.
    Nr'   r(   logical_cpu_indices)+_get_allowed_cpu_indices_for_current_thread%_apply_numa_binding_to_current_thread$_bind_current_thread_to_logical_cpus)r'   r(   original_logical_cpu_indicess      r    r   r   2   s:      #N#P ), 
(8s   02c           	         | t        |      d}t        j                  d|       	 t        | |      }t        j                  dt	        |             t        |       t        j                  dt	        |             t        |       t        j                  dt	        |             t        ddi |d	t	        |      i
       y # t        $ rt t        ddi |dt        j                         i
       t        j                  d|       |j                  r*t        j                  dt        j                                Y y  w xY w)Nr+   z0Attempting to apply NUMA binding, given input %rz0Computed logical_cpu_indices=%s for NUMA bindingr,   z1Validated logical_cpu_indices=%s for NUMA bindingz=Successfully bound to logical_cpu_indices=%s for NUMA bindingnuma_bindingapply_successr-   )categoryname
parametersapply_exception	tracebackz)Failed to apply NUMA binding for input=%rzHContinuing executing without applying NUMA binding, despite exception %s)r   loggerinfo_get_logical_cpus_to_bind_to_get_ranges_str_from_ints%_raise_if_logical_cpu_indices_invalidr0   r   	Exceptionr9   
format_exc	exceptionr$   warning)r'   r(   kwargsr-   s       r    r/   r/   I   sH    |,F KKBFK-:l
 	>%&9:	

 	.BUV?%&9:	

 	-ATUK%&9:	

 	# %'@AT'U	
  #"Y113	
 	DfM99NNZ$$& !s   BC A9EEr-   c                     | st        d      y )Nz+Must bind to a non-empty set of CPU indices)RuntimeErrorr,   s    r    r>   r>      s    HII r   c                 0    t        j                  d|        y Nr   )ossched_setaffinityr,   s    r    r0   r0      s    /0r   c                    |j                   t        j                  k(  rt        |       }|S |j                   t        j                  k(  rt        |       }|S |j                   t        j                  k(  rt        |       }|S |j                   t        j                  k(  rt        |       }|S t        d|j                    d      )z
    Args:
        gpu_index: The index of the GPU that will be used by the subprocess.
            Example: 0
        numa_options: See NumaOptions for details.

    Returns:
        Set of logical CPU indices to bind to.
    r'   zAffinity mode z not supported.)r#   r   r   !_node_get_logical_cpus_to_bind_tor   #_socket_get_logical_cpus_to_bind_tor   &_exclusive_get_logical_cpus_to_bind_tor   )_core_complex_get_logical_cpus_to_bind_to
ValueError)r'   r(   logical_cpuss      r    r<   r<      s     !!\%6%6689M  
	#	#|':':	::YO  
	#	#|'='=	==	R  
	#	#|'@'@	@@9U  >,*D*D)E_UVVr   c                 2    t        |       }t        |      S )z-
    Core logic of 'node' numa strategy.
    rK   numa_node_index)"_get_numa_node_index_for_gpu_index._get_allowed_logical_cpu_indices_for_numa_node)r'   rT   s     r    rL   rL      s     99MO9' r   c                     t        |       }t        |      }t        |      }t               }|D ]  }|j	                  t        |              |S )z/
    Core logic of 'socket' numa strategy.
    rK   rS   )socket_index)rU   _get_socket_index_for_numa_node'_get_numa_node_indices_for_socket_indexsetupdaterV   )r'   numa_node_index_of_gpurX   numa_node_indicesrQ   rT   s         r    rM   rM      sg     @)T2.L @! 5L, 
: /	

 r   c                 R   t        |       }t        |      }t        |      }|j                  |       }t	        |      }t        |d       }t        t        |j                                     }t        |      t        |      z  }t        |      t        |      z  }|dk  r+t        dt        |       d|ddt        |       d	z         ||z  t        ||      z   }||z   ||k  rdnd
z   }	t        |j                               ||	 D 
ch c]  }
|
D ]  }|  }}
}|S c c}}
w )z2
    Core logic of 'exclusive' numa strategy.
    rK   rS   c                 ,    t        t        |             S Nlogical_cpu_index)min6_get_logical_cpu_indices_sharing_same_physical_core_asrb   s    r    <lambda>z8_exclusive_get_logical_cpus_to_bind_to.<locals>.<lambda>   s    #B"3#
 r      zThere are only z# physical cores on numa_node_index=,z but there are z% GPUs associated with this NUMA node.r   )rU   _get_gpu_indices_for_numa_nodesortedindexrV   	_group_bydictitemslenrE   rd   listvalues)r'   rT   gpu_indicesoriginal_gpu_relative_indexallowed_logical_cpu_indices,physical_core_to_allowed_logical_cpu_indicesnum_physical_cores_per_gpu(num_gpus_to_give_one_extra_physical_corestartendr-   rc   $logical_cpu_indices_for_original_gpus                r    rN   rN      s    99MO0QK%K"-"3"3I">"P'# 4=#	
40 48;AACD40 "%4"	[	"
 0340K0, "A%c"NOPPtdscuuvwK 011VWX
 	
 (*DDs#%MH E 	
$	% +-UU 	
  $(8??A$

$, "5,
 	 	,,( , 0/,s   D#c                 >   t        |       }t        |      }t        |      }|j                  |       }t	        |      }t        |d       }t        t        |j                         d             }|t        |      z  }t        |j                               |   }|S )z
    Core logic of 'core-complex' numa strategy.

    Each GPU is assigned a full core complex (group of cores sharing L3 cache)
    within its affined NUMA node.
    rK   rS   c                 ,    t        t        |             S ra   )rd   1_get_logical_cpus_sharing_same_max_level_cache_asrb   s    r    rf   z;_core_complex_get_logical_cpus_to_bind_to.<locals>.<lambda>%  s    #="3#
 r   c                 *    t        | d          | d   fS )Nrg   r   )ro   )items    r    rf   z;_core_complex_get_logical_cpus_to_bind_to.<locals>.<lambda>1  s    s47|mT!W5 r   )key)rU   ri   rj   rk   rV   rl   rm   rn   ro   rp   rq   )r'   rT   rr   rs   rt   .max_level_cache_to_allowed_logical_cpu_indicescache_index_for_original_gpurz   s           r    rO   rO     s     99MO0QK%K"-"3"3I">"P'# 6?#	
62 6::@@B 6		
62 $?6B $  ,06==?,",$( 0/r   KVrq   get_keyc                 j    t        t              }| D ]  } ||      }||   j                  |         |S )z2
    Groups elements with same key into sets.
    )r   r[   add)rq   r   key_to_valuesvaluer   s        r    rl   rl   C  sA     -8,<M &encu%& r   rc   c                     d|  d}t        |      5 }t        |j                               cd d d        S # 1 sw Y   y xY w)N/sys/devices/system/cpu/cpuz/topology/thread_siblings_list)open_get_set_of_int_from_ranges_strread)rc   "thread_siblings_list_absolute_pathfs      r    re   re   N  sH     &&7%88VW ' 
0	1 9Q.qvvx89 9 9s   5>c                 8   d|  d}d}t               }t        j                  |      D ]I  }|j                  d      r|dd  j	                         s)t        j
                  j                  ||      }t        j
                  j                  |d      }t        |      5 }|j                         j                         dvr
	 d d d        	 d d d        t        j
                  j                  |d      }t        |      5 }	t        |	j                               }
d d d        
|k  r|
}t        j
                  j                  |d	      }t        |      5 }t        |j                               }d d d        L |S # 1 sw Y   xY w# 1 sw Y   oxY w# 1 sw Y   qxY w)
Nr   z/cacherk      type>   DataUnifiedlevelshared_cpu_list)r[   rH   listdir
startswith	isdecimalpathjoinr   r   stripintr   )rc   cpu_cache_dir_absolute_path	max_level$logical_cpus_sharing_max_level_cacheentrycache_index_absolute_pathtype_absolute_path	type_filelevel_absolute_path
level_filer   shared_cpu_list_absolute_pathshare_cpu_list_files                r    r}   r}   X  s    &&7%8?   I+.5(78 (ab	0C0C0E$&GGLL1Le$T!  WW\\*CVL$% 	~~%%'/BB	 	B	 !ggll+DgN%& 	+*
)*E	+I	(*%'8)
% /0 	4G3R#((*40	 	+4 0/'	 	
	+ 	+	 	s$   "E7;FF7F 	F	F	rT   c                 8    t        |       }t               }||z  S NrS   )0_get_cpu_indices_for_numa_node_MAYBE_NOT_ALLOWEDr.   )rT   all_cpu_indicesallowed_cpu_indicess      r    rV   rV   ~  s'    F'O FG000r   c                     d|  d}	 t        |      5 }|j                         }ddd       t	              S # 1 sw Y   xY w# t        $ r}t        d| d      |d}~ww xY w)z
    Returns:
        Indices of all CPUs associated with numa_node_index. However, the list
        is not filtered based on whether the thread is allowed to use them.
    z/sys/devices/system/node/nodez/cpulistNz:Could not determine CPUs corresponding to numa_node_index=.)r   r   FileNotFoundErrorrE   r   )rT   cpulist_absolute_pathr   cpu_range_stres        r    r   r     s}     <O;LHU'( 	%AFFHM	% +=99	% 	% I8J!L
	s*   A 7A A A 	A!AA!c                  >    t         j                  j                         S )N)torchcudadevice_countr   r   r    _get_gpu_countr     s    ::""$$r   c                 T   t         j                  j                  |       }|j                  }|j                  }|j
                  }|dd|dd|dd}d| d}t        |      5 }t        t        |j                         j                               d      cd d d        S # 1 sw Y   y xY w)N04x:02xz.0z/sys/bus/pci/devices/z
/numa_noder   )r   r   get_device_propertiespci_domain_id
pci_bus_idpci_device_idr   maxr   r   r   )r'   device_propertiesdomainbusdevicepci_addrpci_numa_node_absolute_pathr   s           r    rU   rU     s    

88C,,F

&
&C,,F Qs3iqB7H$9(:"N	)	* -a 3qvvx~~'(!,	- - -s   #1BB'c                 l    t        t                     D ch c]  }t        |      | k(  r| c}S c c}w )NrK   )ranger   rU   )rT   r'   s     r    ri   ri     s9     ~/0-	BoU 	  s   1c                 2    t        |       }t        |      S NrS   )	cpu_index)._get_arbitrary_allowed_cpu_index_for_numa_node_get_socket_index_for_cpu)rT   arbitrary_cpu_indexs     r    rY   rY     s    H' %/BCCr   r   c                     d|  d}	 t        |      5 }t        |j                         j                               cd d d        S # 1 sw Y   y xY w# t        $ r}t        d|       |d }~ww xY w)Nr   z/topology/physical_package_idz)Could not determine socket for cpu_index=)r   r   r   r   r   rE   )r   package_id_absolute_pathr   r   s       r    r   r     sv    
%i[0MN R*+ 	)qqvvx~~'(	) 	) 	) RGYLIJPQQRs3   A 'A	A A	A A 	A-A((A-c                 ,    t        t        |             S r   )rd   rV   rS   s    r    r   r     s    6W r   
ranges_strc                 :   t               }| j                  d      D ]|  }|j                         }|sd|v rI|j                  d      \  }}t        |      t        |      }}|j	                  t        ||dz                c|j                  t        |             ~ |S )z
    Util for parsing a string of int ranges, as in a sysfs file.

    Args:
        ranges_str: E.g., "0-2,4,6-7"

    Returns:
        E.g., {0, 1, 2, 4, 6, 7}
    rh   -rg   )r[   splitr   r   r\   r   r   )r   ints	range_str	start_strend_strrx   ry   s          r    r   r     s     UD%%c* 	%	OO%	)!*!5IwYW3EKKeS1W-.HHS^$	% Kr   r   c                 <   | syt        |       }g }|d   x}}|dd D ]?  }||dz   k(  r|}||k(  r|j                  |        n|j                  | d|        |x}}A ||k(  r|j                  |        n|j                  | d|        dj                  |      S )z
    Convert a set of integers to a compact string with ranges.

    Args:
        ints: E.g., {0, 1, 2, 4, 6, 7}

    Returns:
        E.g., "0-2,4,6-7"
     r   rg   Nr   rh   )rj   appendr   )r   sorted_intsrangesrx   prevnums         r    r=   r=     s     ,KFq>!ED12 $(?D})q/0ED }!q'(88Fr   c                      t        d      5 } | j                         }d d d        t        |      S # 1 sw Y   t              S xY w)Nz!/sys/devices/system/node/possible)r   r   r   )r   possible_nodes_strs     r    !_get_systemwide_numa_node_indicesr     sE    	1	2 &aVVX& ++=>>& ++=>>s	   0ArX   c                     t               }t               }|D ]/  }t        |      }| t        |      k(  s|j	                  |       1 |S r   )r   r[   r   r   r   )rX   systemwide_numa_node_indicesmatching_numa_node_indicesrT   r   s        r    rZ   rZ     sW    #D#F !$7 <L+
 4?RSS&**?;< &%r   c                  ,    t        j                  d      S rG   )rH   sched_getaffinityr   r   r    r.   r.   !  s    ""r   );rH   r9   collectionsr   collections.abcr   r   
contextlibr   dataclassesr   r   enumr	   loggingr
   typingr   r   r   r   torch._utils_internalr   __all__r   r:   strr   r   r   r   r/   r[   r>   r0   r<   rL   rM   rN   rO   r   r   rm   rl   re   r}   rV   r   r   rU   ri   rY   r   r   r   r=   r   rZ   r.   r   r   r    <module>r      s   	  # . % )   . .  0 
8		"3 	" $
4 
4 
4 %-k%:d^ ,66%06	6rJ#c( Jt J
1S 1d 1
  	X	8C CH c c#h .C0 C0S C0L,0C ,0CH ,0^ CLCLhqk HaS!V,< aQi 99X9#0#0X#0L1s 1sSVx 1::X:&% %-S -S -$s s3x D D DRC RC Rs s  C 0!HSM !c !H?3s8 ?&S &SX &#SX #r   