
    ciX                     0   d dl mZ d dlmZmZ d dlmZmZmZ d dl	m
Z
 d Z G d d      Z ed	       G d
 de             Z ed	       G d de             Z ed	       G d de             Z ed	       G d de             Z ed	       G d de             Z edd       G d de             Z G d d      Ze
d        Z ed	       G d de             Z edd       G d de             Z edd       G d de             Zd#d Zd! Zy")$    )	dataclass)ListOptional)_unwrap_if_constexpr_unwrap_shapeconstexpr_type)constexpr_functionc                 4   | j                   xs dg|z  }| j                  xs dg|z  }| j                  xs t        t	        t        |                  }t        j                  | d|       t        j                  | d|       t        j                  | d|       y )N   ctas_per_cgacta_split_num	cta_order)r   r   r   listreversedrangeobject__setattr__)layoutrankr   r   r   s        e/var/www/html/engine/venv/lib/python3.12/site-packages/triton/experimental/gluon/language/_layouts.py_realize_cta_layoutr      s    &&41#*L((6QC$JM  ?D%+)>$?I
v~|<
v>
v{I6    c                        e Zd ZdZed        Zy)DistributedLayoutz@
    Base class for distributed memory layouts in Gluon IR.
    c                     t        |       S Nr   selfs    r   typezDistributedLayout.type       d##r   N__name__
__module____qualname____doc__propertyr     r   r   r   r           $ $r   r   T)frozenc                       e Zd Zd Zd Zy)
AutoLayoutc                 "    |j                         S r   )get_auto_layoutr   builders     r   _to_irzAutoLayout._to_ir   s    &&((r   c                      y)NALr(   r   s    r   manglezAutoLayout.mangle    s    r   N)r#   r$   r%   r1   r4   r(   r   r   r,   r,      s    )r   r,   c                        e Zd ZU dZee   ed<   ee   ed<   ee   ed<   ee   ed<   dZeee      ed<   dZ	eee      ed<   dZ
eee      ed	<    fd
Zd ZdefdZd Z xZS )BlockedLayouta`  
    Represents a blocked layout, partitioning a tensor across threads, warps, and CTAs.

    Args:
        size_per_thread (List[int]): Number of elements per thread per dimension.
        threads_per_warp (List[int]): Number of threads per warp per dimension.
        warps_per_cta (List[int]): Number of warps per CTA per dimension.
        order (List[int]): The ordering of dimensions for partitioning.
        ctas_per_cga (Optional[List[int]]): CTAs per CGA grouping.
        cta_split_num (Optional[List[int]]): Split factors for CTAs.
        cta_order (Optional[List[int]]): Ordering for CTAs.
    size_per_threadthreads_per_warpwarps_per_ctaorderNr   r   r   c                 j   t         |   dt        | j                               t         |   dt        | j                               t         |   dt        | j
                               t         |   dt        | j                               t         |   dt        | j                               t         |   dt        | j                               t         |   dt        | j                               t        | j                        }t        | |       t        | j                        |k(  sJ t        | j
                        |k(  sJ t        | j                        |k(  sJ t        | j                        |k(  sJ t        | j                        |k(  sJ t        | j                        |k(  sJ y )Nr7   r8   r9   r:   r   r   r   )superr   r   r7   r8   r9   r:   r   r   r   lenr   r   r   	__class__s     r   __post_init__zBlockedLayout.__post_init__:   si   -/CDDXDX/YZ.0DTEZEZ0[\O-A$BTBT-UVG%9$**%EFN,@ARAR,STO-A$BTBT-UVK)=dnn)MN4''(D$'4(()T1114%%&$...4::$&&&4$$%---4%%&$...4>>"d***r   c           	          |j                  | j                  | j                  | j                  | j                  | j
                  | j                  | j                        S r   )get_blocked_layoutr7   r8   r9   r:   r   r   r   r/   s     r   r1   zBlockedLayout._to_irL   sP    ))  !!JJNN
 	
r   returnc                 4   d } || j                         } || j                        } || j                        } || j                        } || j                        } || j
                        } || j                        }d| d| d| d| d| d| d| dS )Nc                 F    | ydj                  t        t        |             S N _joinmapstrxs    r   	stringifyz'BlockedLayout.mangle.<locals>.stringifyY       y88CQK((r   B)r7   r8   r9   r:   r   r   r   )	r   rO   r7   r8   r9   r:   r   r   r   s	            r   r4   zBlockedLayout.mangleW   s    	)
 $D$8$89$T%:%:;!$"4"45$**% !2!23!$"4"45dnn-	?#1%5$6aawaP\~]^_l^mmnoxnyyz{{r   c                    t        t        | j                        t        | j                        t        | j                        t        | j
                        | j                  rt        | j                        nd | j                  rt        | j                        nd | j                  rt        | j                        f      S d f      S r   )	hashtupler7   r8   r9   r:   r   r   r   r   s    r   __hash__zBlockedLayout.__hash__g   s    $&&'$''($$$%$**(,(9(9E$##$t)-););E$$$%%)^^E$..!
  	 :>
  	r   r#   r$   r%   r&   r   int__annotations__r   r   r   r   r@   r1   rL   r4   rU   __classcell__r?   s   @r   r6   r6   $   s     #Y3i99(,L(49%,)-M8DI&-%)IxS	")+$	
| | 	r   r6   c                   P     e Zd ZU dZeed<   eed<    fdZd Zde	fdZ
d Z xZS )	SliceLayoutz
    Represents a layout corresponding to slicing a distributed tensor along one dimension.

    Args:
        dim (int): The dimension index to slice.
        parent (DistributedLayout): The parent layout before slicing.
    dimparentc                     t         |   dt        | j                               t         |   dt        | j                               y )Nr]   r^   )r<   r   r   r]   r^   r   r?   s    r   r@   zSliceLayout.__post_init__   s5    E#7#ABH&:4;;&GHr   c                 l    |j                  | j                  | j                  j                  |            S r   )get_slice_layoutr]   r^   r1   r/   s     r   r1   zSliceLayout._to_ir   s.    ''HHKKw'
 	
r   rC   c                 X    d| j                    d| j                  j                          dS )NSLrH   )r]   r^   r4   r   s    r   r4   zSliceLayout.mangle   s)    DHH:Qt{{1134B77r   c                 D    t        | j                  | j                  f      S r   )rS   r]   r^   r   s    r   rU   zSliceLayout.__hash__   s    TXXt{{+,,r   r#   r$   r%   r&   rW   rX   r   r@   r1   rL   r4   rU   rY   rZ   s   @r   r\   r\   s   s1     
HI
8 8-r   r\   c                        e Zd ZU dZeee      ed<   eee      ed<   eee      ed<   eee      ed<   ee   ed<    fdZd Zd	 Z	d
 Z
 xZS )DistributedLinearLayouta  
    Represents a linear distributed layout with explicit bases at register, lane, warp, and block levels.
    See: https://arxiv.org/abs/2505.23819 for reference.

    Args:
        reg_bases (List[List[int]]): Bases for register-level distribution.
        lane_bases (List[List[int]]): Bases for lane-level distribution.
        warp_bases (List[List[int]]): Bases for warp-level distribution.
        block_bases (List[List[int]]): Bases for block-level distribution.
        shape (List[int]): The tensor global shape.
    	reg_bases
lane_bases
warp_basesblock_basesshapec                    t         |   dt        | j                               t         |   dt        | j                               t         |   dt        | j
                               t         |   dt        | j                               t         |   dt        | j                               t        | j                        }| j                  D ]  }t        |      |k(  rJ  | j                  D ]  }t        |      |k(  rJ  | j
                  D ]  }t        |      |k(  rJ  | j                  D ]  }t        |      |k(  rJ  y )Nri   rj   rk   rl   rm   )	r<   r   r   ri   rj   rk   rl   rm   r=   )r   r   basisr?   s      r   r@   z%DistributedLinearLayout.__post_init__   s    Kt~~)FGL-*HIL-*HIM=9I9I+JKG]4::%>?4::^^ 	&Eu:%%%	&__ 	&Eu:%%%	&__ 	&Eu:%%%	&%% 	&Eu:%%%	&r   c                     |j                  | j                  | j                  | j                  | j                  | j
                        S r   )get_distributed_linear_layoutri   rj   rk   rl   rm   r/   s     r   r1   zDistributedLinearLayout._to_ir   s<    44T^^T__VZVeVegkgwgw59ZZA 	Ar   c                     d| j                    d| j                   d| j                   d| j                   d| j                   dS )NDLLrH   )ri   rj   rk   rl   rm   r   s    r   r4   zDistributedLinearLayout.mangle   sI    T^^$Adoo%6a7H$JZJZI[[\]a]g]g\hhkllr   c                 P   t        t        t        t        | j                              t        t        t        | j                              t        t        t        | j
                              t        t        t        | j                              t        | j                        f      S r   )rS   rT   rK   ri   rj   rk   rl   rm   r   s    r   rU   z DistributedLinearLayout.__hash__   sn    #eT^^,-#eT__-.#eT__-.#eT--./$**
  	r   )r#   r$   r%   r&   r   rW   rX   r@   r1   r4   rU   rY   rZ   s   @r   rh   rh      sb    
 DIT#YT#Yd3i 9&$Amr   rh   c                   Z     e Zd ZU dZeed<   eed<   eed<    fdZd Zde	fdZ
d	 Z xZS )
DotOperandLayouta
  
    Represents a layout for a dot operand.

    Args:
        operand_index (int): 0 for LHS and 1 for RHS of the dot operation.
        parent (DistributedLayout): The parent layout, representing the MMA.
        k_width (int): Number of elements per 32-bits.
    operand_indexr^   k_widthc                     t         |   dt        | j                               t         |   dt        | j                               t         |   dt        | j
                               y )Nrw   r^   rx   )r<   r   r   rw   r^   rx   r`   s    r   r@   zDotOperandLayout.__post_init__   sP    O-A$BTBT-UVH&:4;;&GHI';DLL'IJr   c                     |j                  | j                  | j                  j                  |      | j                        S r   )get_dot_operand_layoutrw   r^   r1   rx   r/   s     r   r1   zDotOperandLayout._to_ir   s4    --d.@.@$++BTBTU\B]_c_k_kllr   rC   c                 r    d| j                    d| j                  j                          d| j                   dS )NDOrH   )rw   r^   r4   rx   r   s    r   r4   zDotOperandLayout.mangle   s6    D&&'q););)=(>a~RPPr   c                 Z    t        | j                  | j                  | j                  f      S r   )rS   rw   r^   rx   r   s    r   rU   zDotOperandLayout.__hash__   s"    T''dllCDDr   rf   rZ   s   @r   rv   rv      s;     LK
mQ QEr   rv   )r*   eqc                        e Zd ZU dZee   ed<   ee   ed<   ee   ed<   dZeee      ed<   dZ	eee      ed<   dZ
eee      ed<    fd	Zd
 ZdefdZd Z xZS )NVMMADistributedLayouta  
    Represents a layout for NVIDIA MMA (tensor core) operations.

    Args:
        version (List[int]): Version identifier for the MMA instruction.
        warps_per_cta (List[int]): Number of warps per CTA.
        instr_shape (List[int]): Instruction shape for MMA.
        ctas_per_cga (Optional[List[int]]): CTAs per CGA grouping.
        cta_split_num (Optional[List[int]]): Split factors for CTAs.
        cta_order (Optional[List[int]]): CTA ordering.
    versionr9   instr_shapeNr   r   r   c                    t         |   dt        | j                               t         |   dt        | j                               t         |   dt        | j
                               t         |   dt        | j                               t         |   dt        | j                               t         |   dt        | j                               t        | j                        }t        | |       t        | j                        |k(  sJ t        | j                        |k(  sJ t        | j                        |k(  sJ y )Nr   r9   r   r   r   r   )r<   r   r   r   r9   r   r   r   r   r=   r   r>   s     r   r@   z$NVMMADistributedLayout.__post_init__   s   I';DLL'IJO-A$BTBT-UVM+?@P@P+QRN,@ARAR,STO-A$BTBT-UVK)=dnn)MN4%%&D$'4$$%---4%%&$...4>>"d***r   c                     |j                  | j                  | j                  | j                  | j                  | j
                  | j                        S r   )get_mma_layoutr   r9   r   r   r   r   r/   s     r   r1   zNVMMADistributedLayout._to_ir  sE    %%dllD4F4FHYHY[_[m[m&*nnd6F6FH 	Hr   rC   c                     d| j                    d| j                   d| j                   d| j                   d| j                   d| j
                   dS )NMMA_rH   _MMA)r   r9   r   r   r   r   r   s    r   r4   zNVMMADistributedLayout.mangle	  sq    dll^1T%7%7$8$:J:J9K1TM^M^L__`aeasas`ttuvz  wE  wE  vF  FJ  K  	Kr   c           
      f   t        t        | j                        t        | j                        t        | j                        | j
                  rt        | j
                        nd | j                  rt        | j                        nd | j                  rt        | j                        f      S d f      S r   )rS   rT   r   r9   r   r   r   r   r   s    r   rU   zNVMMADistributedLayout.__hash__  s    U4<<(%0B0B*C4++,$J[J[eD4E4E.Fae262D2DU4--.$.2nnU4>>*H I 	I CGH I 	Ir   rV   rZ   s   @r   r   r      s    
 #Y9c(,L(49%,)-M8DI&-%)IxS	")+HK KIr   r   c                        e Zd ZdZed        Zy)SharedLayoutz;
    Base class for shared memory layouts in Gluon IR.
    c                     t        |       S r   r   r   s    r   r    zSharedLayout.type  r!   r   Nr"   r(   r   r   r   r     r)   r   r   c                     | }|Bt        |      t        |       k(  sJ t        t        |            D ]  }||xx   ||   z  cc<    |S r   )r=   r   )rm   r   shape_per_ctar]   s       r   _get_shape_per_ctar     sX    M =!SZ///]+, 	5C#-"44	5r   c                        e Zd ZU dZeed<   eed<   eed<   dZeed<   dZeed<   dZ	e
ee      ed	<   dZe
ee      ed
<   dZe
ee      ed<    fdZd Zee	 	 dd              ZdefdZd Z xZS )NVMMASharedLayouta4  
    Represents a layout for shared memory suitable for NVIDIA MMA operations.

    Args:
        swizzle_byte_width (int): Width in bytes for swizzling.
        element_bitwidth (int): Bitwidth of element type.
        rank (int): Rank of the tensor.
        transposed (bool): Whether the layout is transposed.
        fp4_padded (bool): Whether FP4 padding is used.
        ctas_per_cga (Optional[List[int]]): CTAs per CGA grouping.
        cta_split_num (Optional[List[int]]): Split factors for CTAs.
        cta_order (Optional[List[int]]): CTA ordering.
    swizzle_byte_widthelement_bitwidthr   F
transposed
fp4_paddedNr   r   r   c                 B   t         |   dt        | j                               t         |   dt        | j                               t         |   dt        | j
                               t         |   dt        | j                               t         |   dt        | j                               t         |   dt        | j                               t         |   dt        | j                               t         |   dt        | j                               | j                  d	v sJ | j                  d
v sJ | j
                  }t        | |       t        | j                        |k(  sJ t        | j                        |k(  sJ t        | j                        |k(  sJ y )Nr   r   r   r   r   r   r   r   )          @   )r   r   r      )r<   r   r   r   r   r   r   r   r   r   r   r   r=   r>   s     r   r@   zNVMMASharedLayout.__post_init__?  sZ   02FtG^G^2_`.0DTEZEZ0[\F$8$CDL*>t*OPL*>t*OPN,@ARAR,STO-A$BTBT-UVK)=dnn)MN$$777&&*::::yyD$'4$$%---4%%&$...4>>"d***r   c           	          |j                  | j                  | j                  | j                  | j                  | j
                  | j                  | j                        S r   )get_nvmma_shared_layoutr   r   r   r   r   r   r   r/   s     r   r1   zNVMMASharedLayout._to_irQ  sN    ..##!!OOOONN
 	
r   c           
      t   |rdnd}t        | |      }t        |       }	|r|dd |dd z   }|d   |z  }
|
|j                  z  dz  }|dk\  r|dz  dk(  rd}n"|dk\  r|dz  dk(  rd}n|d	k\  r|d	z  dk(  rd	}nd}d}|dd D ]  }||z  }	 t        |       dk  s|dk  rd}t        ||j                  |	|||||
      S )zReturns an NVMMASharedLayout with default swizzling for a given shape.

        This picks the largest swizzle pattern compatible with the shape, which
        allows emitting the fewest TMA or MMA messages.
           r   Nr   r   r   r   r   )r   r   r   r   r   r   r   r   )r   r=   primitive_bitwidthr   )block_shapedtyper   r   r   r   r   packing_factorr   r   contig_dim_sizecontig_dim_bytesr   flatten_outer_dimsizes                  r   get_default_forz!NVMMASharedLayout.get_default_for\  s'    )a*;F;)!"-bq0AAM'+n<*U-E-EEJs"'7#'='B!$#(82(=(B!##(82(=(B!#!"!#2& 	&D%	&{a#4q#8!" 1"55!!%'	
 		
r   rC   c           	      p    d| j                    d| j                   d| j                   d| j                   d	S )NNVMMA_rH   _NVMMA)r   r   r   r   r   s    r   r4   zNVMMASharedLayout.mangle  s@    //0$2G2G1H$//IZZ[\`\k\k[llrssr   c                 \   t        | j                  | j                  | j                  | j                  | j
                  | j                  rt        | j                        nd | j                  rt        | j                        nd | j                  rt        | j                        f      S d f      S r   )
rS   r   r   r   r   r   r   rT   r   r   r   s    r   rU   zNVMMASharedLayout.__hash__  s    T,,d.C.CTYYPTP_P_aeapap151B1BU4,,-262D2DU4--.$.2nnU4>>*H I 	I CGH I 	Ir   )FFNNN)r#   r$   r%   r&   rW   rX   r   boolr   r   r   r   r   r   r@   r1   staticmethodr	   r   rL   r4   rU   rY   rZ   s   @r   r   r   '  s     
IJJ(,L(49%,)-M8DI&-%)IxS	")+$	
 qu"&&
  &
Pt tIr   r   c                        e Zd ZU dZeed<   eed<   eed<   ee   ed<   dZeee      ed<   dZ	eee      ed<   dZ
eee      ed	<    fd
Zd ZdefdZd Z xZS )SwizzledSharedLayouta  
    Represents a generic swizzled shared memory layout.

    Args:
        vec (int): Vector width for swizzling.
        per_phase (int): Elements per swizzle phase.
        max_phase (int): Maximum number of swizzle phases.
        order (List[int]): Dimension ordering for swizzling.
        ctas_per_cga (Optional[List[int]]): CTAs per CGA grouping.
        cta_split_num (Optional[List[int]]): Split factors for CTAs.
        cta_order (Optional[List[int]]): CTA ordering.
    vec	per_phase	max_phaser:   Nr   r   r   c                    t         |   dt        | j                               t         |   dt        | j                               t         |   dt        | j
                               t         |   dt        | j                               t         |   dt        | j                               t         |   dt        | j                               t         |   dt        | j                               t        | j                        }t        | |       t        | j                        |k(  sJ t        | j                        |k(  sJ t        | j                        |k(  sJ y )Nr   r   r   r:   r   r   r   )r<   r   r   r   r   r   r:   r   r   r   r=   r   r>   s     r   r@   z"SwizzledSharedLayout.__post_init__  s   E#7#ABK)=dnn)MNK)=dnn)MNG%9$**%EFN,@ARAR,STO-A$BTBT-UVK)=dnn)MN4::D$'4$$%---4%%&$...4>>"d***r   c           	          |j                  | j                  | j                  | j                  | j                  | j
                  | j                  | j                        S r   )get_swizzled_shared_layoutr   r   r   r:   r   r   r   r/   s     r   r1   zSwizzledSharedLayout._to_ir  sJ    11HHNNNNJJNN
 	
r   rC   c                     d }d| j                    d| j                   d| j                   d || j                         d || j                         d || j
                         d || j                         dS )Nc                 F    | ydj                  t        t        |             S rF   rI   rM   s    r   rO   z.SwizzledSharedLayout.mangle.<locals>.stringify  rP   r   SSS_rH   _SSS)r   r   r   r:   r   r   r   r   rO   s     r   r4   zSwizzledSharedLayout.mangle  s    	)
 dhhZq 0$..1A9TZZCXBYYZ[deievev[wZxxy  {D  EI  EW  EW  {X  zY  YZ  [d  ei  es  es  [t  Zu  uy  z  	zr   c                 X   t        | j                  | j                  | j                  t	        | j
                        | j                  rt	        | j                        nd | j                  rt	        | j                        nd | j                  rt	        | j                        f      S d f      S r   )	rS   r   r   r   rT   r:   r   r   r   r   s    r   rU   zSwizzledSharedLayout.__hash__  s    TXXt~~t~~4::&DDUDUd.?.?(@[_262D2DU4--.$.2nnU4>>*H I 	I CGH I 	Ir   )r#   r$   r%   r&   rW   rX   r   r   r   r   r   r@   r1   rL   r4   rU   rY   rZ   s   @r   r   r     sz     
HNN9(,L(49%,)-M8DI&-%)IxS	")+	
z zIr   r   c                        e Zd ZU dZeee      ed<   ee   ed<   dZeee      ed<   dZ	eee      ed<   dZ
eee      ed<    fdZd	 Zd
efdZd Zd Z xZS )PaddedSharedLayouta  
    Represents a layout for the access to shared memory. Compared to SwizzledSharedLayout,
    it uses padding to avoid shared memory bank conflicts. After every interval tensor elements,
    the corresponding number of padding elements are inserted.
    If a position corresponds to multiple intervals, the padding amounts are summed.

    In the following example of a tensor,
    `eM` represents original elements in the and `pN` represents padded element.

    Before padding, the shared memory looks like:
    [e0, e1,
     e2, e3,
     e4, e5,
     e6, e7,
     ...]

    After padding with interval-padding list [[2, 1], [4, 2]],
    the shared memory will be
    [e0, e1, p0,
     e2, e3, p1, p2, p3,
     e4, e5, p4,
     e6, e7, p5, p6, p7,
     ...]

    Args:
        interval_padding_pairs (List[int]): List of [interval, padding] pair and both interval and padding must be powers of 2.
        order (List[int]): Order of logical tensor dimensions; fastest-varying first.
        ctas_per_cga (Optional[List[int]]): CTAs per CGA grouping.
        cta_split_num (Optional[List[int]]): Split factors for CTAs.
        cta_order (Optional[List[int]]): CTA ordering.
    interval_padding_pairsr:   Nr   r   r   c                    t         |   dt        | j                               t         |   dt	        | j
                               t         |   dt	        | j                               t         |   dt	        | j                               t         |   dt	        | j                               | j                          y )Nr   r:   r   r   r   )
r<   r   r   r   r   r:   r   r   r   verifyr`   s    r   r@   z PaddedSharedLayout.__post_init__  s    4mDD_D_6`aG%9$**%EFN,@ARAR,STO-A$BTBT-UVK)=dnn)MNr   c                     t        | j                   \  }}|j                  ||| j                  | j                  | j
                  | j                        S r   )zipr   get_padded_shared_layoutr:   r   r   r   )r   r0   	intervalspaddingss       r   r1   zPaddedSharedLayout._to_ir   sO    !4#>#>?	8//	8TZZQUQbQbdhdvdv04@ 	@r   rC   c                     d }d || j                          d || j                         d || j                         d || j                         d || j                         dS )Nc                 F    | ydj                  t        t        |             S rF   rI   rM   s    r   rO   z,PaddedSharedLayout.mangle.<locals>.stringify  rP   r   PaddedShared_rH   _PaddedShared)r   r:   r   r   r   r   s     r   r4   zPaddedSharedLayout.mangle  s    	)
 y)D)DEFa	RVR\R\H]G^^_`ijnj{j{`|_}}~  @I  JN  J\  J\  @]  ^  ^_  `i  jn  jx  jx  `y  _z  zG  H  	Hr   c                 H   | j                   }t        |      dkD  sJ d       t        d |D              sJ t        | \  }}t	        t        |            }t        |      t        |      k(  sJ d t        fd|D              sJ d       t        fd|D              sJ d       t        | j                        }|dkD  sJ d	       t        | |       t        | j                        |k(  sJ t        | j                        |k(  sJ t        | j                        |k(  sJ y )
Nr   zVPaddedSharedLayout interval_padding_pairs must have at least one interval-padding pairc              3   8   K   | ]  }t        |      d k(    yw)r   N)r=   ).0pairs     r   	<genexpr>z,PaddedSharedLayout.verify.<locals>.<genexpr>  s     4d3t9>4s   c                 &    | dkD  xr | | dz
  z  dk(  S )Nr   r   r(   )ns    r   <lambda>z+PaddedSharedLayout.verify.<locals>.<lambda>  s    !a%"<AQK1,< r   c              3   .   K   | ]  } |        y wr   r(   r   r   is_power_of_2s     r   r   z,PaddedSharedLayout.verify.<locals>.<genexpr>  s     7=#7   z;PaddedSharedLayout interval values must all be power of twoc              3   .   K   | ]  } |        y wr   r(   r   s     r   r   z,PaddedSharedLayout.verify.<locals>.<genexpr>  s     6=#6r   z:PaddedSharedLayout padding values must all be power of twoz*PaddedSharedLayout order must not be empty)r   r=   allr   r   setr:   r   r   r   r   )r   pairsr   r   unique_intervalsr   r   s         @r   r   zPaddedSharedLayout.verify  s   ++5zA~www~4e4444!5k	8I/#$I666<7Y77v9vv76X66t8tt64::axEEExD$'4$$%---4%%&$...4>>"d***r   c           	      Z   t        t        t        t        | j                              t        | j                        | j
                  rt        | j
                        nd | j                  rt        | j                        nd | j                  rt        | j                        f      S d f      S r   )rS   rT   rK   r   r:   r   r   r   r   s    r   rU   zPaddedSharedLayout.__hash__#  s    U3ud&A&ABC4::&DDUDUd.?.?(@[_262D2DU4--.$.2nnU4>>*H I 	I CGH I 	Ir   )r#   r$   r%   r&   r   rW   rX   r   r   r   r   r@   r1   rL   r4   r   rU   rY   rZ   s   @r   r   r     s    > !cO+9(,L(49%,)-M8DI&-%)IxS	")@
H H+*Ir   r   c                     dg|z  }| s|S d }| D ]D  }t        d t        |      D        d       }||}||xx   dz  cc<   1|r4|J ||xx   dz  cc<   F |S )Nr   c              3   2   K   | ]  \  }}|d k7  s|  yw)r   Nr(   )r   ivs      r   r   z bases_per_dim.<locals>.<genexpr>5  s     =$!Qa1fA=s   r   )next	enumerate)basesr   skip_broadcastresultnon_zero_idxro   idxs          r   bases_per_dimr   +  s~    S4ZFL 	&=)E"2=tD?L3K1K+++< A% 	& Mr   c                     t        | t              rt        | j                  t	        |            S t        | t
        t        f      rt        | j                  |      S | j                  S r   )	
isinstancerh   r   rk   r=   r\   rv   r9   r^   )r   rm   s     r   r9   r9   A  sQ    &12V..E
;;	F[*:;	<V]]E22###r   N)T)dataclassesr   typingr   r   triton.language.corer   r   r   triton.runtime.jitr	   r   r   r,   r6   r\   rh   rv   r   r   r   r   r   r   r   r9   r(   r   r   <module>r      s   ! ! T T 17$ $ $"   $K% K K\ $-# - -8 $2/ 2 2j $E( E E8 $4 ,I. ,I !,I^$ $   $eI eI eIP $4 <I< <I !<I~ $4 VI VI !VIt,$r   