
    i@              #       >   U d dl Z d dlmZmZmZ d dlmZ d dlZd dlm	Z	  ed      Z
 ed      Zi Zeej                  j                  ef   ed<    eh d      Zd	ed
edeee
ef   gee
ef   f   fdZ edd      	 d4dd d ddej,                  dej,                  dej,                  deej,                     dedededej,                  fd       Zdee   dedefdZdej,                  dededej,                  fdZdej,                  d ej,                  d!ed"edee   d#edej,                  fd$Zd!ed"eddfd%Zdej,                  d ej,                  d!ed"edee   dej,                  fd&Z ed'd      	 	 	 d5dd d d dd(dd)dej,                  d ej,                  d*ej,                  d+eej,                     d,eej,                     d-eej,                     d.ed/ed0ed#edee   d1ed2ee   deej,                  ej,                  ej,                  ej,                  f   fd3       Z y)6    N)CallableOptionalTypeVar)	ParamSpec)_dtype_mappings_P_RONNX_ATEN_DECOMP_TABLE>      
         op_typeopset_versionreturnc                 d     dt         t        t        f   dt         t        t        f   f fd}|S )zDDecorator to register an ONNX operator with a custom implementation.funcr   c                     d }t        j                  j                  d d| d      |       }| t        t	        t	        t         j
                  j                        |      <   |j                  |        |S )Nopsetzonnx::. )mutates_args)torchlibrary	custom_opr
   getattropsonnxregister_fake)r   overloadtorch_opr   r   s      N/var/www/html/engine/venv/lib/python3.12/site-packages/torch/onnx/ops/_impl.py	decoratorz_onnx_op.<locals>.decorator   s|    =/*==**WIQxj) + 

  	wwuyy~~w'GRS
 	t$    )r   r   r	   )r   r   r#   s   `` r"   _onnx_opr%      s0    
R( Xb"f-=  r$   RotaryEmbedding   F)interleaved	num_headsrotary_embedding_dimx	cos_cache	sin_cacheposition_idsr(   r)   r*   c                V   | j                   t              }d   d   t        j                  j	                         dk(  fd       t        j                  j                   d   k(  fd       t        j                  j                   d   k(  fd       t        j                  j	                         dk(  xr j	                         dk(  fd	       n@t        j                  j	                         d
k(  xr j	                         d
k(  fd       |dk(  rt        j
                  | d      } nG|d
k(  rBt        j                  |dk7  fd       d   }||z  }	||	g}
t        j                  | |
      } t        j                  t        | j                         dk(  d        | j                   d
   }	|dk(  r|	}| ddddddd|f   }| dddddd|df   }|dz        nt        j                  j                   d   k(  xr j                   d   k(  fd       t        j                  j                   d   k(  xr j                   d   k(  fd       t        j                  j                   d   k(  fd       t        j                  j                   d   k(  fd       t        j                  d      t        j                  d      |r%|dddddddddf   }|dddddddddf   }nt        j                  |dd      \  }}|z  |z  z
  }|z  |z  z   }|rft        j                  |d      }t        j                  |d      }t        j                  ||fd      }t        j                  ||j                         }nt        j                  ||fd      }t        j                  ||fd      }|d
k(  rt        j                  |      S t        j
                  |d      S )z_RotaryEmbedding-23 https://onnx.ai/onnx/operators/onnx__RotaryEmbedding.html#rotaryembedding-23r   N   c                  "    d j                    S )Nz6position_ids must be 2D when provided. Received shape shape)r.   s   r"   <lambda>z%rotary_embedding_23.<locals>.<lambda>F   s    L\M_M_L`a r$   c                  .    d  dj                   d    S )Nz6position_ids first dim (batch) must match x.shape[0] (). Received r   r3   )
batch_sizer.   s   r"   r5   z%rotary_embedding_23.<locals>.<lambda>J   s$    LZLXdeqewewxyezd{| r$   r   c                  .    d d j                   d    S )Nz;position_ids second dim (sequence) must match x.shape[-2] (r7   r   r3   )r.   sequence_lengths   r"   r5   z%rotary_embedding_23.<locals>.<lambda>N   s=    QRaQbbno{  pB  pB  CD  pE  oF  G r$   c                  <    d j                    dj                    S )NzWcos_cache/sin_cache must be 2D when position_ids is provided. Received cos_cache shape , sin_cache shape r3   r,   r-   s   r"   r5   z%rotary_embedding_23.<locals>.<lambda>R   &     ((1'88J9??J[] r$      c                  <    d j                    dj                    S )Nz[cos_cache/sin_cache must be 3D when position_ids is not provided. Received cos_cache shape r<   r3   r=   s   r"   r5   z%rotary_embedding_23.<locals>.<lambda>X   r>   r$      )r   r1   r   r?   c                      d  S )NzKnum_heads must be provided for 3D inputs. Received input tensor with shape r   )input_shapes   r"   r5   z%rotary_embedding_23.<locals>.<lambda>e   s    abmano r$   c                       y)Nzx should be a 4D tensor by nowr   r   r$   r"   r5   z%rotary_embedding_23.<locals>.<lambda>l       r$   c                  0    dj                    d  d dS )Nzcos has shape  but expected (batch=, seq=, ...)r3   )r8   cosr:   s   r"   r5   z%rotary_embedding_23.<locals>.<lambda>   $    .+@FSbRccij r$   c                  0    dj                    d  d dS )Nzsin has shape rG   rH   rI   r3   )r8   r:   sins   r"   r5   z%rotary_embedding_23.<locals>.<lambda>   rK   r$   c                  0    d j                   d    d dS )NzLast dimension of cos cache (rN   ') should match rotary_embedding_dim/2 ().r3   )rJ   rotary_embedding_dim_halfs   r"   r5   z%rotary_embedding_23.<locals>.<lambda>   .    /		">ef  fA  AC  D r$   c                  0    dj                   d    d  dS )NzLast dimension of sin cache (rN   rP   rQ   r3   )rR   rM   s   r"   r5   z%rotary_embedding_23.<locals>.<lambda>   rS   r$   dim)
r4   lenr   _checkrV   permutereshape	unsqueezechunkcat)r+   r,   r-   r.   r(   r)   r*   
input_rankhidden_size	head_size	new_shapex_rotatex_not_rotatex1x2realimagx_rotate_concatoutputr8   rJ   rC   rR   r:   rM   s    ```               @@@@@@r"   rotary_embedding_23rj   /   s    ''K[!JQJ!"oO !#a	
 	q!Z/|	
 	q!_4 G	
 	MMOq 9Y]]_%9]	
 	MMOq 9Y]]_%9]	
 Q MM!\*	qNo	
 "!n9,	)YG	MM!Y'	LLQWW"$LM
I q (Aq////0HQ12334L 4 9 
 
 	LL		!
"Fsyy|'Fj 
LL		!
"Fsyy|'Fj 
LL		"22 	D 
LL		"22 	D //QC //QC
 aAqt!tm$aAqt!tm$Xqb1B 8cBhD8cBhD  tR(tR())T4Lb9==(..A99dD\r2YY,/R8FQ}}V[11 ==..r$   scaler`   c                 :    | | S dt        j                  |      z  S )z/Get the scale factor for attention computation.g      ?)mathsqrt)rk   r`   s     r"   _get_scale_factorro      s     %5GC$))I2F,FGr$   tensorr8   c                     | j                   d   | j                   d   }}||z  }| j                  ||||      j                  dd      j                         S )z1Reshape 3D tensor to 4D for multi-head attention.r   r1   )r4   view	transpose
contiguous)rp   r8   r)   r:   r_   r`   s         r"   _reshape_3d_to_4dru      sP     $*<<?FLLO[Oy(IJIF	1a	r$   QKcurrent_q_num_headscurrent_kv_num_headsqk_matmul_output_modec           	          |dk(  rt        | ||||      S t        j                  t        j                  | |j	                  dd                  S )z1Get QK output tensor based on the specified mode.r   r0   rN   )_compute_qk_output_for_mode_0r   
zeros_likematmulrs   )rv   rw   rx   ry   rk   rz   s         r"   _get_qk_output_for_aten_spdar      sO     !,q%';U
 	

 QB0C DEEr$   c                 H     t        j                   z  dk(   fd       y)z-Validate Group Query Attention configuration.r   c                      d d  dS )Nzq_num_heads (z%) must be divisible by kv_num_heads (z	) for GQAr   )ry   rx   s   r"   r5   z-_validate_gqa_configuration.<locals>.<lambda>   s    - 344YZnYooxy r$   N)r   rX   )rx   ry   s   ``r"   _validate_gqa_configurationr      s"     
LL22a7yr$   c                     |}||k7  r||z  }|j                  |d      }t        || j                  d         }t        j                  |      }| |z  }	||z  }
t        j                  |	|
j                  dd            S )zDHelper function to compute QK output for qk_matmul_output_mode == 0.r   rU   r?   r0   rN   )repeat_interleavero   r4   rm   rn   r   r~   rs   )rv   rw   rx   ry   rk   K_for_qkrepeat_factorscale_factor
sqrt_scaleQ_scaledK_scaleds              r"   r|   r|      s     H22+/CC&&}!&<$UAGGAJ7L<(J:~H*$H<<("4"4R"<==r$   	Attention        )	is_causalkv_num_headsq_num_headsrz   rk   softcapsoftmax_precisionV	attn_maskpast_key
past_valuer   r   r   r   r   c                	   d\  }}}t        | j                        }| j                  d   }t        | j                        dk(  rWt        j                  |dk7  xr |dk7  d        | j                  d   }t	        | ||      } t	        |||      }t	        |||      }t        j                  t        | j                        dk(  xr2 t        |j                        dk(  xr t        |j                        dk(  d        | j                  |   }t        |
|      }
|t        j                  ||g|	      n|j                         }|t        j                  ||g|	      n|j                         }||}}| j                  |   }|j                  |   }| j                  |   }|j                  |   }|d
k(  xr0 |	dk(  xr) |du xr# |du xs |j                  t        j                  k(  }t        ||       |rud}|"|j                  t        j                  k(  r| n|}t        j                  j                  j                  | |||d
||
t        ||k7              }t        | ||||
|	      }nY||k7  r+||z  }|j                  ||	      }|j                  ||	      }t        j                   ||| j                  | j"                        }|ryt        j                  |du d        t        j$                  t        j&                  ||t        j                  | j"                              }|j)                  | t+        d            }|?|j                  t        j                  k(  r|j)                  | t+        d            }n||z   }t        |
| j                  d         } t-        j.                  |       }!| |!z  }"||!z  }#t        j0                  |"|#j3                  dd            }$|$}|$|z   }%|	dk(  r|%}|d
kD  r|t        j4                  |%|z        z  }%|	dk(  r|%}|w|t6        v rW|%j                  }&|%j9                  t:        j<                  |         }%t        j>                  |%d	      }'|'j9                  |&      }'n/t        j>                  |%d	      }'nt        j>                  |%d	      }'|	dk(  r|'}t        j0                  |'|      }|dk(  r1|j3                  dd      jA                         jC                  ||d      }||||fS )zMAttention-23 https://onnx.ai/onnx/operators/onnx__Attention.html#attention-23)r   r1   r?   r   r?   c                       y)Nz;q_num_heads and kv_num_heads must be provided for 3D inputsr   r   r$   r"   r5   zattention_23.<locals>.<lambda>  rE   r$   r   rA   c                       y)Nz'Q, K, and V should be 4D tensors by nowr   r   r$   r"   r5   zattention_23.<locals>.<lambda>!  rE   r$   NrU   r   )r   	dropout_pr   rk   
enable_gqa)dtypedevicec                       y)Nz'Cannot use both is_causal and attn_maskr   r   r$   r"   r5   zattention_23.<locals>.<lambda>v  rE   r$   z-infr0   rN   r1   )"rW   r4   r   rX   ru   ro   r]   cloner   boolr   nn
functionalscaled_dot_product_attentionr   r   zerosr   trilonesmasked_fillfloatrm   rn   r~   rs   tanh-_ATTENTION_23_ALLOWED_INTERMEDIATE_PRECISIONStor   ONNX_DTYPE_TO_TORCH_DTYPEsoftmaxrt   rr   )(rv   rw   r   r   r   r   r   r   r   rz   rk   r   r   num_head_dimsequence_dimhead_diminput_shape_lenr8   q_sequence_lengthq_head_sizepresent_keypresent_valuerx   ry   kv_sequence_lengthcan_use_sdpasdpa_attn_maskri   	qk_outputr   	attn_biascausal_maskr   r   r   r   qk_matmul_outputqk_with_biasoriginal_dtype
qk_softmaxs(                                           r"   attention_23r      s	   & ,3(L, !''lOJ 177|q12!2Q	
 GGAJa[9a\:a\:	LLAGGEc!''la/ECLA4E9 ''(#Ke[1E
  			8Q-\2WWY  ! 			:q/|4WWY  qA '',/77<0-. 	3 	A!Q&	A%	A $?)//UZZ"?	    35IJ  +4??ejj+HiZiN$$AA$#';; B 
 1 !
	 "66/3GGM##M|#DA##M|#DA KK1
	
 LLT!#T  **

%&**88	K "--{lE&MJI  %**,%119*eFmL	 &	1	 )
; YY|,
z>z> !<<(2D2DR2LM %	 ()3 A%$I S="UZZw0F%GGL A%$I ( $QQ!-!3!3+#==>OP  #]]<R@
']]>:
"]]<R@
|<J A%"I j!, ! Q"--/44ZARTVW 	 ;y88r$   )N)NNN)!rm   typingr   r   r   typing_extensionsr   r   torch.onnx.opsr   r   r	   r
   dict_ops
OpOverload__annotations__	frozensetr   strintr%   Tensorr   rj   r   ro   ru   r   r   r|   tupler   r   r$   r"   <module>r      s|    . . '  * t_T] AC UZZ22H<= B091 -!$xB (2r6"223* 

R 
 ,0	D/  !D/||D/||D/ ||D/ 5<<(	D/ D/ D/ D/ \\D/ !D/NHXe_ H H H

LL
&)
69

\\
F||F||F F 	F
 E?F F \\F$47	>||>||> > 	>
 E?> \\>, 
+r
 )-'+)-F9 !"!'+F9||F9||F9 ||F9 %	F9
 u||$F9 &F9 F9 F9 F9 F9 E?F9 F9  }F9 5<<u||U\\ABF9 F9r$   