a
    Of%                     @  s>  d Z ddlmZ ddlZddlmZ ddlZddlm	Z	 ddl
mZ ddlmZ ddlmZmZmZmZmZ erdd	lmZmZmZ dd
lmZmZ ddlmZmZmZmZ dZ ddddddZ!dde dfdddddddddZ"de fddddddd Z#de dfd!ddddd"d#d$Z$de dfd%ddddd"d&d'Z%dS )(z"
data hash pandas / numpy objects
    )annotationsN)TYPE_CHECKING)hash_object_array)is_list_like)CategoricalDtype)ABCDataFrameABCExtensionArrayABCIndexABCMultiIndex	ABCSeries)HashableIterableIterator)	ArrayLikenpt)	DataFrameIndex
MultiIndexSeriesZ0123456789123456zIterator[np.ndarray]intznpt.NDArray[np.uint64])arrays	num_itemsreturnc           	      C  s   zt | }W n" ty.   tjg tjd Y S 0 t|g| } td}t|td }d}t| D ]:\}}|| }||N }||9 }|td| | 7 }|}qh|d |ksJ d|td7 }|S )	z
    Parameters
    ----------
    arrays : Iterator[np.ndarray]
    num_items : int

    Returns
    -------
    np.ndarray[uint64]

    Should be the same as CPython's tupleobject.c
    )dtypeiCB ixV4 r   iXB    zFed in wrong num_itemsi| )	nextStopIterationnparrayuint64	itertoolschainZ
zeros_like	enumerate)	r   r   firstZmultoutZlast_iiaZ	inverse_i r'   Q/var/www/ai-form-bot/venv/lib/python3.9/site-packages/pandas/core/util/hashing.pycombine_hash_arrays/   s"    
r)   Tutf8zIndex | DataFrame | Seriesboolstrz
str | Noner   )objindexencodinghash_key
categorizer   c                   s  ddl m} du rtttr8|tdddS ttrptj j	ddd}||ddd}ntt
rtj j	ddd}|rȇ fd	d
dD }t|g|}	t|	d}||jddd}nttrj fdd
 D }
tj}|rL fdd
dD }|d7 }t|
|}dd
 |D }
t|
|}||jddd}ntdt |S )a>  
    Return a data hash of the Index/Series/DataFrame.

    Parameters
    ----------
    obj : Index, Series, or DataFrame
    index : bool, default True
        Include the index in the hash (if Series/DataFrame).
    encoding : str, default 'utf8'
        Encoding for data & key when strings.
    hash_key : str, default _default_hash_key
        Hash_key for string key to encode.
    categorize : bool, default True
        Whether to first categorize object arrays before hashing. This is more
        efficient when the array contains duplicate values.

    Returns
    -------
    Series of uint64, same length as the object

    Examples
    --------
    >>> pd.util.hash_pandas_object(pd.Series([1, 2, 3]))
    0    14639053686158035780
    1     3869563279212530728
    2      393322362522515241
    dtype: uint64
    r   )r   Nr   F)r   copyr2   )r.   r   r2   c                 3  s$   | ]}t jd  djV  qdS F)r.   r/   r0   r1   Nhash_pandas_objectr.   _values.0_r1   r/   r0   r-   r'   r(   	<genexpr>   s   z%hash_pandas_object.<locals>.<genexpr>N   c                 3  s"   | ]\}}t |j V  qd S r=   )
hash_arrayr7   )r9   r:   Zseries)r1   r/   r0   r'   r(   r<      s   c                 3  s$   | ]}t jd  djV  qdS r4   r5   r8   r;   r'   r(   r<      s   r   c                 s  s   | ]
}|V  qd S r=   r'   )r9   xr'   r'   r(   r<          zUnexpected type for hashing )pandasr   _default_hash_key
isinstancer
   hash_tuplesr	   r?   r7   astyper   r    r!   r)   r.   r   itemslencolumns	TypeErrortype)r-   r.   r/   r0   r1   r   hZserZ
index_iterr   hashesr   Zindex_hash_generator_hashesr'   r;   r(   r6   S   sJ    #







r6   z+MultiIndex | Iterable[tuple[Hashable, ...]])valsr/   r0   r   c                   sz   t | stdddlm m} t| ts6|| n|  fddtj	D }fdd|D }t
|t|}|S )a  
    Hash an MultiIndex / listlike-of-tuples efficiently.

    Parameters
    ----------
    vals : MultiIndex or listlike-of-tuples
    encoding : str, default 'utf8'
    hash_key : str, default _default_hash_key

    Returns
    -------
    ndarray[np.uint64] of hashed values
    z'must be convertible to a list-of-tuplesr   )Categoricalr   c              	     s,   g | ]$}  j| tj| d dqS )F
categoriesordered)_simple_newcodesr   Zlevels)r9   level)rP   mir'   r(   
<listcomp>   s
   zhash_tuples.<locals>.<listcomp>c                 3  s   | ]}|j  d dV  qdS )Fr/   r0   r1   N)_hash_pandas_object)r9   cat)r/   r0   r'   r(   r<      s   zhash_tuples.<locals>.<genexpr>)r   rJ   rB   rP   r   rD   r
   from_tuplesrangeZnlevelsr)   rH   )rO   r/   r0   r   Zcat_valsrM   rL   r'   )rP   r/   r0   rW   r(   rE      s    
	rE   r   )rO   r/   r0   r1   r   c                 C  s\   t | dstdt| tr,| j|||dS t| tjsNtdt| j dt	| |||S )a  
    Given a 1d array, return an array of deterministic integers.

    Parameters
    ----------
    vals : ndarray or ExtensionArray
    encoding : str, default 'utf8'
        Encoding for data & key when strings.
    hash_key : str, default _default_hash_key
        Hash_key for string key to encode.
    categorize : bool, default True
        Whether to first categorize object arrays before hashing. This is more
        efficient when the array contains duplicate values.

    Returns
    -------
    ndarray[np.uint64, ndim=1]
        Hashed values, same length as the vals.

    Examples
    --------
    >>> pd.util.hash_array(np.array([1, 2, 3]))
    array([ 6238072747940578789, 15839785061582574730,  2185194620014831856],
      dtype=uint64)
    r   zmust pass a ndarray-likerY   z6hash_array requires np.ndarray or ExtensionArray, not z!. Use hash_pandas_object instead.)
hasattrrJ   rD   r   rZ   r   ZndarrayrK   __name___hash_ndarray)rO   r/   r0   r1   r'   r'   r(   r?      s    

r?   z
np.ndarrayc                 C  s  | j }t|tjr@t| j|||}t| j|||}|d|  S |tkrT| d} nt	|j
tjtjfr~| djddd} nt	|j
tjr|jdkr| d| j j d} n|rdd	lm}m}m}	 |	| dd
\}
}t||dd}||
|}|j||ddS zt| ||} W n, tyD   t| tt||} Y n0 | | d? N } | td9 } | | d? N } | td9 } | | d? N } | S )z!
    See hash_array.__doc__.
       u8i8Fr3      ur   )rP   r   	factorize)sortrQ   rY      l   e9z    l   b&&&	    )r   r   Z
issubdtypeZ
complex128r`   realimagr+   rF   
issubclassrK   Z
datetime64Ztimedelta64viewnumberitemsizerB   rP   r   rf   r   rT   rZ   r   rJ   r,   objectr   )rO   r/   r0   r1   r   Z	hash_realZ	hash_imagrP   r   rf   rU   rR   r[   r'   r'   r(   r`     s>    	
r`   )&__doc__
__future__r   r    typingr   numpyr   Zpandas._libs.hashingr   Zpandas.core.dtypes.commonr   Zpandas.core.dtypes.dtypesr   Zpandas.core.dtypes.genericr   r   r	   r
   r   collections.abcr   r   r   Zpandas._typingr   r   rB   r   r   r   r   rC   r)   r6   rE   r?   r`   r'   r'   r'   r(   <module>   s<   	&f43