
    0hpm                         d Z ddlZddlZddlZddlZddlZddlZddl	m
c mZ ddlmZ ddlmZ  edg       	 dd       Z	 	 ddZd	 Zd
 Zd Zd Zd Zd Zd Z	 	 	 	 ddZd Zd Zd Zd Zd Zy)z&Keras image dataset loading utilities.    N)io_utils)keras_exportzkeras.utils.split_dataset)v1c                 (   t        |       }|t        j                  j                  t        t
        t        j                  fvrt        d|  dt        |              ||t        d      t        | |      }|rK|t        j                  dt        d            }t        j                  |       t        j                   |       t#        |      }t%        |||      \  }}t	        |d|       }t	        || d       }	t'        |||       }t'        |	||       }	t        j                  j                  j)                  |      }t        j                  j                  j)                  |	      }	|t        j                  j                  u r:t+        |       r/t-        |       }
|
"|j/                  |
      }|	j/                  |
      }	|j1                  t        j                  j2                        }|	j1                  t        j                  j2                        }	||	fS )a  Split a dataset into a left half and a right half (e.g. train / test).

    Args:
        dataset: A `tf.data.Dataset` object, or a list/tuple of arrays with the
          same length.
        left_size: If float (in the range `[0, 1]`), it signifies
          the fraction of the data to pack in the left dataset. If integer, it
          signifies the number of samples to pack in the left dataset. If
          `None`, it uses the complement to `right_size`. Defaults to `None`.
        right_size: If float (in the range `[0, 1]`), it signifies
          the fraction of the data to pack in the right dataset. If integer, it
          signifies the number of samples to pack in the right dataset. If
          `None`, it uses the complement to `left_size`. Defaults to `None`.
        shuffle: Boolean, whether to shuffle the data before splitting it.
        seed: A random seed for shuffling.

    Returns:
        A tuple of two `tf.data.Dataset` objects: the left and right splits.

    Example:

    >>> data = np.random.random(size=(1000, 4))
    >>> left_ds, right_ds = tf.keras.utils.split_dataset(data, left_size=0.8)
    >>> int(left_ds.cardinality())
    800
    >>> int(right_ds.cardinality())
    200

    znThe `dataset` argument must be either a `tf.data.Dataset` object or a list/tuple of arrays. Received: dataset=z	 of type NzoAt least one of the `left_size` or `right_size` must be specified. Received: left_size=None and right_size=Noner       .A)_get_type_spectfdataDatasetlisttuplenpndarray	TypeErrortype
ValueError_convert_dataset_to_listrandomrandintintseedshufflelen_rescale_dataset_split_sizes_restore_dataset_from_listfrom_tensor_slices
is_batchedget_batch_sizebatchprefetchAUTOTUNE)dataset	left_size
right_sizer   r   dataset_type_specdataset_as_listtotal_length
left_splitright_split
batch_sizes              Z/var/www/html/engine/venv/lib/python3.12/site-packages/tf_keras/src/utils/dataset_utils.pysplit_datasetr,       s   B 'w/$rzz JJ!!(	4=/C
 	
 i/
 	
 /w8IJO<>>!SX.DD''L8:|Iz ojy12J
{|45K+%wJ -&K 33J?J''//44[AK BGGOO+
70C#G,
!#))*5J%++J7K$$RWW%5%56J&&rww'7'78K{""    c                 R   t        | |      }g }t        j                         }t        ||||      D ]I  }|t        t        fv r(	 t        j                  |      }|j                  |       9|j                  |       K |S # t        $ r t        j                  |t              }Y Mw xY w)a  Convert `tf.data.Dataset` object or list/tuple of NumPy arrays to a list.

    Args:
        dataset : A `tf.data.Dataset` object or a list/tuple of arrays.
        dataset_type_spec : the type of the dataset
        data_size_warning_flag (bool, optional): If set to True, a warning will
          be issued if the dataset takes longer than 10 seconds to iterate.
          Defaults to `True`.
        ensure_shape_similarity (bool, optional): If set to True, the shape of
          the first sample will be used to validate the shape of rest of the
          samples. Defaults to `True`.

    Returns:
        List: A list of tuples/NumPy arrays.
    dtype)
_get_data_iterator_from_datasettime_get_next_sampler   r   r   arrayr   objectappend)	r"   r%   data_size_warning_flagensure_shape_similaritydataset_iteratorr&   
start_timesamplearrs	            r+   r   r   x   s    * 7" OJ"	 + -5hhv& ""3'""6*+"   5hhvV45s   A??$B&%B&c                 f   |t         k(  rt        |       dk(  rt        d      t        | d         t        j
                  u r}| d   j                  }t        |       D ]_  \  }}t	        j                  |      j                  d   |d   k7  s/t        d| d| dt	        j                  |      j                   d       nt        dt        | d                t        t        |        S |t        k(  rt        |       dk(  rt        d      t        | d         t        j
                  u r}| d   j                  }t        |       D ]_  \  }}t	        j                  |      j                  d   |d   k7  s/t        d	| d| dt	        j                  |      j                   d
       nt        dt        | d                t        t        |        S |t        j                  j                  k(  r&t!        |       r| j#                         } t        |       S |t        j
                  k(  rt        |       S y)a2  Get the iterator from a dataset.

    Args:
        dataset :  A `tf.data.Dataset` object or a list/tuple of arrays.
        dataset_type_spec : the type of the dataset

    Raises:
        ValueError:
                  - If the dataset is empty.
                  - If the dataset is not a `tf.data.Dataset` object
                    or a list/tuple of arrays.
                  - If the dataset is a list/tuple of arrays and the
                    length of the list/tuple is not equal to the number

    Returns:
        iterator: An `iterator` object.
    r   zJReceived an empty list dataset. Please provide a non-empty list of arrays.zPReceived a list of NumPy arrays with different lengths. Mismatch found at index z, Expected shape=z Received shape=z<.Please provide a list of NumPy arrays with the same length.z5Expected a list of `numpy.ndarray` objects,Received: zJReceived an empty list dataset.Please provide a non-empty tuple of arrays.zQReceived a tuple of NumPy arrays with different lengths. Mismatch found at index z=.Please provide a tuple of NumPy arrays with the same length.z7Expected a tuple of `numpy.ndarray` objects, Received: N)r   r   r   r   r   r   shape	enumerater4   r   iterzipr   r	   r
   r   r   unbatch)r"   r%   expected_shapeielements        r+   r1   r1      sW   $ D w<1= 
 '!*%3$QZ--N'0 	
788G$**1-1BB$<<=3 ?**8)9 :**,((7*;*A*A)B C++ 	 !'!*-.0 
 CM""	e	#w<1> 
 '!*%3$QZ--N'0 	
788G$**1-1BB$<<=3 ?**8)9 :**,((7*;*A*A)B C++ 	 !'!*-.0 
 CM""	bggoo	-goo'GG}	bjj	(G} 
)r-   c           
   #     K   	 t        |       } t        |       }t        |t        j                  t
        j                  f      r t        j                  |      j                  }nd}d}| t        |       D ]  \  }}|rq|t        j                  |      j                  k7  rOt        dt        j                  |      j                   dt        j                  |      j                   d| d      |rM|dz  d	k(  rEt        j                         }t        ||z
        dkD  r |rt        j                  d
t         d       d}|  y# t        $ r t        d      w xY ww)a   "Yield data samples from the `dataset_iterator`.

    Args:
        dataset_iterator : An `iterator` object.
        ensure_shape_similarity (bool, optional): If set to True, the shape of
          the first sample will be used to validate the shape of rest of the
          samples. Defaults to `True`.
        data_size_warning_flag (bool, optional): If set to True, a warning will
          be issued if the dataset takes longer than 10 seconds to iterate.
          Defaults to `True`.
        start_time (float): the start time of the dataset iteration. this is
          used only if `data_size_warning_flag` is set to true.

    Raises:
        ValueError: - If the dataset is empty.
                    - If `ensure_shape_similarity` is set to True and the
                      shape of the first sample is not equal to the shape of
                      atleast one of the rest of the samples.

    Yields:
        data_sample: A tuple/list of numpy arrays.
    NFz|Received an empty Dataset. `dataset` must be a non-empty list/tuple of `numpy.ndarray` objects or `tf.data.Dataset` objects.z<All `dataset` samples must have same shape, Expected shape: z Received shape: z
 at index .
   r   zThe dataset is taking longer than 10 seconds to iterate over. This may be due to the size of the dataset. Keep in mind that the `split_dataset` utility is only for small in-memory dataset (e.g. < 10,000 samples).r,   )categorysource)r@   next
isinstancer	   Tensorr   r   r4   r>   StopIterationr   r?   r2   r   warningswarnResourceWarning)	r9   r8   r7   r:   first_samplefirst_sample_shaperD   r;   cur_times	            r+   r3   r3      s]    8
 01,-lRYY

$;<!#,!7!=!=!%&+# /0 	6"!RXXf%5%;%;; '')xx'='C'C&D E'')xx'7'='=&>jc  "2v{99;x*,-27MMM3
 "1. .3*1  
,
 	

s   EA(E ,CEEEc                 t   |t         t        fv rt        d t        |  D              S |t        j                  j
                  k(  rtt        |j                  t              rBi }| D ]9  }|j                         D ]$  \  }}||vr|g||<   ||   j                  |       & ; |S t        d t        |  D              S | S )z,Restore the dataset from the list of arrays.c              3   F   K   | ]  }t        j                  |        y wNr   r4   .0r;   s     r+   	<genexpr>z-_restore_dataset_from_list.<locals>.<genexpr>A  s     J&RXXf%J   !c              3   F   K   | ]  }t        j                  |        y wrW   rX   rY   s     r+   r[   z-_restore_dataset_from_list.<locals>.<genexpr>M  s     Nf&)Nr\   )r   r   rA   r	   r
   r   rL   element_specdictitemsr6   )r&   r%   original_datasetrestored_datasetdkvs          r+   r   r   <  s     UDM)JC4IJJJ	bggoo	-&33T:!$ 6GGI 6DAq 00/0c(+(+2215	66 $#N_8MNNNr-   c                    t        |       }t        |      }| /|t        t        fvr!||t        t        fvrt        d| d|       | |t        t        fvrt        d| d      ||t        t        fvrt        d| d      | dk(  r|dk(  rt	        d      |t        k(  r
| dk  s| |k\  s|t        k(  r| dk  s| d	k\  rt	        d
| d|        |t        k(  r
|dk  s||k\  s|t        k(  r|dk  s|d	k\  rt	        d| d|       ||cxk(  r	t        k(  rn n|| z   d	kD  rt	        d      |t        k(  rt        | |z        } n|t        k(  rt        |       } |t        k(  rt        ||z        }n|t        k(  rt        |      }| ||z
  } n||| z
  }| |z   |kD  rt	        d| |z    d|       | df|dffD ]"  \  }}|dk(  st	        d| d|  d| d| d	       t        |       t        |      }} | |fS )ar  Rescale the dataset split sizes.

    We want to ensure that the sum of
    the split sizes is equal to the total length of the dataset.

    Args:
        left_size : The size of the left dataset split.
        right_size : The size of the right dataset split.
        total_length : The total length of the dataset.

    Raises:
        TypeError: - If `left_size` or `right_size` is not an integer or float.
        ValueError: - If `left_size` or `right_size` is negative or greater
                      than 1 or greater than `total_length`.

    Returns:
        tuple: A tuple of rescaled left_size and right_size
    zjInvalid `left_size` and `right_size` Types. Expected: integer or float or None, Received: type(left_size)=z and type(right_size)=zTInvalid `left_size` Type. Expected: int or float or None, Received: type(left_size)=z.  zUInvalid `right_size` Type. Expected: int or float or None,Received: type(right_size)=rG   r   z]Both `left_size` and `right_size` are zero. At least one of the split sizes must be non-zero.   z=`left_size` should be either a positive integer smaller than z<, or a float within the range `[0, 1]`. Received: left_size=zB`right_size` should be either a positive integer and smaller than z< or a float within the range `[0, 1]`. Received: right_size=z^The sum of `left_size` and `right_size` is greater than 1. It must be less than or equal to 1.zvThe sum of `left_size` and `right_size` should be smaller than the {total_length}. Received: left_size + right_size = zand total_length = leftrightzWith `dataset` of length=z, `left_size`=z and `right_size`=z.Resulting zN side dataset split will be empty. Adjust any of the aforementioned parameters)r   r   floatr   r   round)r#   r$   r'   left_size_typeright_size_typesplitsides          r+   r   r   Q  s   & )_N:&O 	.e"D?3,#FC4_4EG
 	
 U|!C))7(8=
 	
 /#u"E**9):!=
 	
 A~*/@
 	
 	#!^yL8U"!^yA~(> *>k
 	
 	31_
l :e#1_
a  ,~ .?l
 	
 	>2U2"Q&:
 	

 )l23		3	)$	%:45
	C	:&
 :-			!I-
:,22;J2F1G!,1
 	
 #F+j'-BC tA:+L>+/
| <!F #>>   	NC
OzIj  r-   c                 X   t        | t              rt        S t        | t              rt        S t        | t        j                        rt        j                  S t        | t
              rt
        S t        | t        j                  j                        rt        j                  j                  S y)z!Get the type spec of the dataset.N)	rL   r   r   r   r   r_   r	   r
   r   )r"   s    r+   r   r     sj    '5!	GT	"	GRZZ	(zz	GT	"	GRWW__	-wwr-   c                     t        | d      S )z, "Check if the `tf.data.Dataset` is batched._batch_size)hasattr
tf_datasets    r+   r   r     s    :}--r-   c                 2    t        |       r| j                  S y)z"Get the batch size of the dataset.N)r   rr   rt   s    r+   r   r     s    *%%%r-   c           
          |dk7  rdg}|}ng }t        t        j                  j                  j	                               D ]  }t        j                  j                  j                  t        j                  j                  j                   |            sU|j                  d      rg|j                  d      r|dd }|j                  |        |s|}n(t        |      t        |      k7  rt        d| d|       t        t        |t        t        |                        }	t         j"                  j%                         }
g }g } fd	|D        D ]+  }|j                  |
j'                  t(        ||	||f             - g }|D ]+  }|j+                         \  }}|j                  |       ||z  }- |d
vrSt        |      t        |      k7  r't        dt        |       dt        |       d  d      t        t        |            }nId}t-        j.                  t        |      fd      }|D ]!  }||||t        |      z    |t        |      z  }# |#t1        j2                  dt        |       d       n.t1        j2                  dt        |       dt        |       d       |
j5                          |
j                          |D cg c],  }t        j                  j                  j                   |      . }}|r|t,        j6                  j9                  d      }t,        j6                  j;                  |      }|j=                  |       t,        j6                  j;                  |      }|j=                  |       |||fS c c}w )a  Make list of all files in `directory`, with their labels.

    Args:
      directory: Directory where the data is located.
          If `labels` is "inferred", it should contain
          subdirectories, each containing files for a class.
          Otherwise, the directory structure is ignored.
      labels: Either "inferred"
          (labels are generated from the directory structure),
          None (no labels),
          or a list/tuple of integer labels of the same size as the number of
          valid files found in the directory. Labels should be sorted according
          to the alphanumeric order of the image file paths
          (obtained via `os.walk(directory)` in Python).
      formats: Allowlist of file extensions to index (e.g. ".jpg", ".txt").
      class_names: Only valid if "labels" is "inferred". This is the explicit
          list of class names (must match names of subdirectories). Used
          to control the order of the classes
          (otherwise alphanumerical order is used).
      shuffle: Whether to shuffle the data. Default: True.
          If set to False, sorts the data in alphanumeric order.
      seed: Optional random seed for shuffling.
      follow_links: Whether to visits subdirectories pointed to by symlinks.

    Returns:
      tuple (file_paths, labels, class_names).
        file_paths: list of file paths (strings).
        labels: list of matching integer labels (same length as file_paths)
        class_names: names of the classes corresponding to these labels, in
          order.
    inferred rG   /NzjThe `class_names` passed did not match the names of the subdirectories of the target directory. Expected: z, but received: c              3   r   K   | ].  }t         j                  j                  j                  |       0 y wrW   )r	   iogfilejoin)rZ   subdir	directorys     r+   r[   z"index_directory.<locals>.<genexpr>5  s&     NFBEEKK$$Y7Ns   47)rx   NzfExpected the lengths of `labels` to match the number of files in the target directory. len(labels) is z while we found z files in directory r   int32r/   zFound z files.z files belonging to z	 classes.r   )sortedr	   r}   r~   listdirisdirr   
startswithendswithr6   setr   r_   rA   ranger   multiprocessingpool
ThreadPoolapply_asyncindex_subdirectorygetr   zerosr   	print_msgcloser   r   RandomStater   )r   labelsformatsclass_namesr   r   follow_linkssubdirsr   class_indicesr   results	filenamesdirpathlabels_listrespartial_filenamespartial_labelsrD   fname
file_pathsrngs   `                     r+   index_directoryr     sO   P $RUU[[00;< 	+Fuu{{  !1!1)V!DE((-s+!'NN6*	+ !K;3w</ !!(	)9+H 
 [%K0@*ABCM **,DGINgN 
"-w?	

 K ',/GGI)>>*&&	' ''v;#i.(Dv;-/I/? @  ){!-  S[)3y>+7;) 	%N2@F1q3~../^$$A	% ~VC	N#37;<S^$ %k"#9.	
 	JJLIIKBKL"%%++""9e4LJL<99$$S)Dii##D)Jii##D)Fv{** Ms   	1Nc              #   ,  K   |s*t         j                  j                  j                  |       }nt	        j                  | |      }t        |d       D ]<  \  }}}t        |      D ](  }|j                         j                  |      s#||f * > y w)N)followlinksc                     | d   S )Nr    xs    r+   <lambda>z"iter_valid_files.<locals>.<lambda>l  s
    QqT r-   )key)r	   r}   r~   walkosr   lowerr   )r   r   r   r   root_filesr   s           r+   iter_valid_filesr   g  s~     uu{{	*wwyl; >: "aE] 	"E{{}%%g.Ek!	""s   BB	Bc                    t         j                  j                  |       }t        | ||      }g }g }|D ]  \  }}	|j	                  ||          t
        j                  j                  j                  ||	      }
t
        j                  j                  j                  |t         j                  j                  |
|             }|j	                  |        ||fS )aN  Recursively walks directory and list image paths and their class index.

    Args:
      directory: string, target directory.
      class_indices: dict mapping class names to their index.
      follow_links: boolean, whether to recursively follow subdirectories
        (if False, we only list top-level images in `directory`).
      formats: Allowlist of file extensions to index (e.g. ".jpg", ".txt").

    Returns:
      tuple `(filenames, labels)`. `filenames` is a list of relative file
        paths, and `labels` is a list of integer labels corresponding to these
        files.
    )
r   pathbasenamer   r6   r	   r}   r~   r   relpath)r   r   r   r   dirnamevalid_filesr   r   r   r   absolute_pathrelative_paths               r+   r   r   r  s     ggy)G"9lGDKFI" (emG,-((u5((RWW__]I>
 	'( fr-   c                 (   |s| |fS t        |t        |       z        }|dk(  r5t        j                  dt        |       |z
   d       | d|  } |d|  }| |fS |dk(  r)t        j                  d| d       | | d } || d }| |fS t	        d|       )a  Potentially restict samples & labels to a training or validation split.

    Args:
      samples: List of elements.
      labels: List of corresponding labels.
      validation_split: Float, fraction of data to reserve for validation.
      subset: Subset of the data to return.
        Either "training", "validation", or None. If None, we return all of the
        data.

    Returns:
      tuple (samples, labels), potentially restricted to the specified subset.
    trainingzUsing z files for training.N
validationz files for validation.z>`subset` must be either "training" or "validation", received: )r   r   r   r   r   )samplesr   validation_splitsubsetnum_val_sampless        r+    get_training_or_validation_splitr     s     *S\9:OS\O344LM	
 +O+,)/)* F? 
<	VO#44JKL?*+,()* F?	 **03
 	
r-   c                 &   t         j                  j                  j                  |       }|dk(  r.|j	                  d t         j                  j
                        }|S |dk(  r.|j	                  fdt         j                  j
                        }|S )a]  Create a tf.data.Dataset from the list/tuple of labels.

    Args:
      labels: list/tuple of labels to be converted into a tf.data.Dataset.
      label_mode: String describing the encoding of `labels`. Options are:
      - 'binary' indicates that the labels (there can be only 2) are encoded as
        `float32` scalars with values 0 or 1 (e.g. for `binary_crossentropy`).
      - 'categorical' means that the labels are mapped into a categorical
        vector.  (e.g. for `categorical_crossentropy` loss).
      num_classes: number of classes of labels.

    Returns:
      A `Dataset` instance.
    binaryc                 X    t        j                  t        j                  | d      d      S )Nfloat32r{   )axis)r	   expand_dimscastr   s    r+   r   z#labels_to_dataset.<locals>.<lambda>  s    bnnRWWQ	%:D r-   )num_parallel_callscategoricalc                 0    t        j                  |       S rW   )r	   one_hot)r   num_classess    r+   r   z#labels_to_dataset.<locals>.<lambda>  s    bjjK0 r-   )r	   r
   r   r   mapr!   )r   
label_moder   label_dss     ` r+   labels_to_datasetr     s     ww11&9HX<<D!ww//   
 O 
}	$<<0!ww//   
 Or-   c                     | rd| cxk  rdk  sn t        d|        | s|r| r|st        d      |dvrt        d|       | r|r|t        d      yyy)	a  Raise errors in case of invalid argument values.

    Args:
      validation_split: float between 0 and 1, fraction of data to reserve for
        validation.
      subset: One of "training", "validation" or "both". Only used if
        `validation_split` is set.
      shuffle: Whether to shuffle the data. Either True or False.
      seed: random seed for shuffling and transformations.
    r   rg   z6`validation_split` must be between 0 and 1, received: zBIf `subset` is set, `validation_split` must be set, and inversely.)r   r   bothNzF`subset` must be either "training", "validation" or "both", received: NzIf using `validation_split` and shuffling the data, you must provide a `seed` argument, to make sure that there is no overlap between the training and validation subset.)r   )r   r   r   r   s       r+   check_validation_split_argr     s     $4 8q 8)*,
 	
 	F-=&P
 	
 ==117:
 	
 GB
 	
 )5Gr-   )NNFN)TT)NTNF) __doc__multiprocessing.poolr   r   r   r2   rO   numpyr   tensorflow.compat.v2compatv2r	   tf_keras.src.utilsr    tensorflow.python.util.tf_exportr   r,   r   r1   r3   r   r   r   r   r   r   r   r   r   r   r   r   r-   r+   <module>r      s    -  	     ! ! ' : )b1BFT# 2T#t   	,^K\DN*!D .
 	t+n":!H:
r-   