a
    Of.                     @  s  d Z ddlmZ ddlmZmZ ddlmZmZm	Z	 ddl
ZddlmZ ddlmZmZmZ ddlmZ dd	lmZmZ dd
lmZmZmZ ddlmZ ddlmZ ddlm Z  erddl!m"Z"m#Z# ddl$m%Z%m&Z& ddddddddZ'G dd deZ(G dd de(Z)G dd de(Z*ddddd Z+d!d"d!d#d$d%Z,d!d"d!d&d'd(Z-d!d"d!d&d)d*Z.d!d"d!d+d,d-Z/d!d.d/d0d1Z0dd2d3d4d5Z1dS )6z
Module responsible for execution of NDFrame.describe() method.

Method NDFrame.describe() delegates actual execution to function describe_ndframe().
    )annotations)ABCabstractmethod)TYPE_CHECKINGCallablecastN)	Timestamp)DtypeObjNDFrameTnpt)validate_percentile)is_bool_dtypeis_numeric_dtype)
ArrowDtypeDatetimeTZDtypeExtensionDtype)Float64Dtype)concat)format_percentiles)HashableSequence)	DataFrameSeriesr
   str | Sequence[str] | Nonez#Sequence[float] | np.ndarray | None)objincludeexcludepercentilesreturnc                 C  sN   t |}| jdkr$ttd| d}nttd| ||d}|j|d}tt|S )a   Describe series or dataframe.

    Called from pandas.core.generic.NDFrame.describe()

    Parameters
    ----------
    obj: DataFrame or Series
        Either dataframe or series to be described.
    include : 'all', list-like of dtypes or None (default), optional
        A white list of data types to include in the result. Ignored for ``Series``.
    exclude : list-like of dtypes or None (default), optional,
        A black list of data types to omit from the result. Ignored for ``Series``.
    percentiles : list-like of numbers, optional
        The percentiles to include in the output. All should fall between 0 and 1.
        The default is ``[.25, .5, .75]``, which returns the 25th, 50th, and
        75th percentiles.

    Returns
    -------
    Dataframe or series description.
       r   r   r   )r   r   r   )r   )_refine_percentilesndimSeriesDescriberr   DataFrameDescriberdescriber
   )r   r   r   r   Z	describerresult r'   U/var/www/ai-form-bot/venv/lib/python3.9/site-packages/pandas/core/methods/describe.pydescribe_ndframe7   s    
r)   c                   @  s4   e Zd ZdZdddddZedddd	d
ZdS )NDFrameDescriberAbstractzAbstract class for describing dataframe or series.

    Parameters
    ----------
    obj : Series or DataFrame
        Object to be described.
    zDataFrame | SeriesNone)r   r   c                 C  s
   || _ d S Nr    )selfr   r'   r'   r(   __init__n   s    z!NDFrameDescriberAbstract.__init__Sequence[float] | np.ndarrayr   r   c                 C  s   dS )zDo describe either series or dataframe.

        Parameters
        ----------
        percentiles : list-like of numbers
            The percentiles to include in the output.
        Nr'   )r-   r   r'   r'   r(   r%   q   s    z!NDFrameDescriberAbstract.describeN)__name__
__module____qualname____doc__r.   r   r%   r'   r'   r'   r(   r*   e   s   r*   c                   @  s*   e Zd ZU dZded< dddddZdS )	r#   z2Class responsible for creating series description.r   r   r/   r0   c                 C  s   t | j}|| j|S r,   )select_describe_funcr   )r-   r   describe_funcr'   r'   r(   r%      s    zSeriesDescriber.describeN)r1   r2   r3   r4   __annotations__r%   r'   r'   r'   r(   r#   |   s   
r#   c                      sT   e Zd ZU dZded< ddddd fddZd	dd
ddZddddZ  ZS )r$   ab  Class responsible for creating dataobj description.

    Parameters
    ----------
    obj : DataFrame
        DataFrame to be described.
    include : 'all', list-like of dtypes or None
        A white list of data types to include in the result.
    exclude : list-like of dtypes or None
        A black list of data types to omit from the result.
    r   r   r   r+   )r   r   r   r   c                  s:   || _ || _|jdkr*|jjdkr*tdt | d S )N   r   z+Cannot describe a DataFrame without columns)r   r   r"   columnssize
ValueErrorsuperr.   )r-   r   r   r   	__class__r'   r(   r.      s
    zDataFrameDescriber.__init__r/   r0   c                   sj   |   }g }| D ] \}}t|}|||| qt| t fdd|D ddd}|j |_|S )Nc                   s   g | ]}|j  d dqS )F)copy)Zreindex.0xZ	col_namesr'   r(   
<listcomp>       z/DataFrameDescriber.describe.<locals>.<listcomp>r   F)Zaxissort)_select_dataitemsr5   appendreorder_columnsr   r9   r?   )r-   r   dataldesc_seriesr6   dr'   rC   r(   r%      s    zDataFrameDescriber.describe)r   c                 C  s   | j du rB| jdu rBtjdg}| jj|d}t|jdkr~| j}n<| j dkrj| jdurbd}t|| j}n| jj| j | jd}|S )zSelect columns to be described.Ndatetime)r   r   allz*exclude must be None when include is 'all')r   r   )	r   r   npnumberr   Zselect_dtypeslenr9   r;   )r-   Zdefault_includerK   msgr'   r'   r(   rG      s    


zDataFrameDescriber._select_data)	r1   r2   r3   r4   r7   r.   r%   rG   __classcell__r'   r'   r=   r(   r$      s
   
r$   zSequence[Series]zlist[Hashable])rL   r   c                 C  sT   g }t  }tdd | D td}|D ]*}|D ] }||vr,|| || q,q$|S )z,Set a convenient order for rows for display.c                 s  s   | ]}|j V  qd S r,   )indexr@   r'   r'   r(   	<genexpr>   rE   z"reorder_columns.<locals>.<genexpr>)key)setsortedrT   addrI   )rL   namesZ
seen_namesZldesc_indexesZidxnamesnamer'   r'   r(   rJ      s    
rJ   r   zSequence[float])rN   r   r   c                 C  s   ddl m} t|}g d| dg }|  |  |  |  g| |  | 	 g }t
| jtrt
| jtr| jjdkrd}qddl}t| }qt }n| jjdv rtd}nd}|||| j|d	S )
zDescribe series containing numerical data.

    Parameters
    ----------
    series : Series
        Series to be described.
    percentiles : list-like of numbers
        The percentiles to include in the output.
    r   r   )countmeanstdminmaxmNZiufbfloatrW   r^   dtype)pandasr   r   r`   ra   rb   rc   quantiletolistrd   
isinstancerh   r   r   kindZpyarrowZfloat64r   rR   r^   )rN   r   r   formatted_percentiles
stat_indexrO   rh   par'   r'   r(   describe_numeric_1d   s(    
rq   )rK   percentiles_ignoredr   c           
      C  s   g d}|   }t||dk }|dkrD|jd |jd  }}d}ntjtj }}d}|  |||g}ddlm}	 |	||| j	|dS )zDescribe series containing categorical data.

    Parameters
    ----------
    data : Series
        Series to be described.
    percentiles_ignored : list-like of numbers
        Ignored, but in place to unify interface.
    )r`   uniquetopfreqr   Nobjectr_   rg   )
value_countsrT   rW   ilocrR   nanr`   ri   r   r^   )
rK   rr   r]   	objcountscount_uniquert   ru   rh   r&   r   r'   r'   r(   describe_categorical_1d  s    r|   c                 C  s
  ddg}|   }t||dk }|  |g}d}|dkr|jd |jd  }}| jj}	|  j	d}
t
|}|jdur|	dur||	}n
||	}|g d7 }|||t
|
 |	dt
|
 |	dg7 }n |dd	g7 }|tjtjg7 }d
}ddlm} |||| j|dS )zDescribe series containing timestamp data treated as categorical.

    Parameters
    ----------
    data : Series
        Series to be described.
    percentiles_ignored : list-like of numbers
        Ignored, but in place to unify interface.
    r`   rs   r   Ni8)rt   ru   firstlast)tzrt   ru   rv   r_   rg   )rw   rT   r`   rW   rx   dtr   Zdropnavaluesviewr   tzinfoZ
tz_convertZtz_localizerc   rd   rR   ry   ri   r   r^   )rK   rr   r]   rz   r{   r&   rh   rt   ru   r   Zasintr   r'   r'   r(   $describe_timestamp_as_categorical_1d"  s2    

r   )rK   r   r   c                 C  sd   ddl m} t|}g d| dg }|  |  |  g| |  |  g }|||| j	dS )zDescribe series containing datetime64 dtype.

    Parameters
    ----------
    data : Series
        Series to be described.
    percentiles : list-like of numbers
        The percentiles to include in the output.
    r   r_   )r`   ra   rc   rd   )rW   r^   )
ri   r   r   r`   ra   rc   rj   rk   rd   r^   )rK   r   r   rn   ro   rO   r'   r'   r(   describe_timestamp_1dR  s    r   r   )rK   r   c                 C  sN   t | jrtS t| rtS | jjdks2t| jtr6tS | jjdkrFtS tS dS )zSelect proper function for describing series based on data type.

    Parameters
    ----------
    data : Series
        Series to be described.
    Mre   N)	r   rh   r|   r   rq   rm   rl   r   r   )rK   r'   r'   r(   r5   j  s    

r5   znpt.NDArray[np.float64]r0   c                 C  st   | du rt g dS t| } t|  d| vr8| d t | } t | }| dusXJ t|t| k rptd|S )z
    Ensure that percentiles are unique and sorted.

    Parameters
    ----------
    percentiles : list-like of numbers, optional
        The percentiles to include in the output.
    N)g      ?      ?g      ?r   z%percentiles cannot contain duplicates)	rR   arraylistr   rI   Zasarrayrs   rT   r;   )r   Zunique_pctsr'   r'   r(   r!     s    


r!   )2r4   
__future__r   abcr   r   typingr   r   r   numpyrR   Zpandas._libs.tslibsr   Zpandas._typingr	   r
   r   Zpandas.util._validatorsr   Zpandas.core.dtypes.commonr   r   Zpandas.core.dtypes.dtypesr   r   r   Zpandas.core.arrays.floatingr   Zpandas.core.reshape.concatr   Zpandas.io.formats.formatr   collections.abcr   r   ri   r   r   r)   r*   r#   r$   rJ   rq   r|   r   r   r5   r!   r'   r'   r'   r(   <module>   s4   .D) 0