a
    Ofe                     @  s<  d dl mZ d dlZd dlmZmZ d dlZd dlZd dl	m
  mZ d dlmZ d dlmZ d dlmZ d dlmZmZ d dlmZmZmZmZ d d	lmZ d d
lmZ d dlm   m!Z" d dlm#Z#m$Z$ d dl%m&Z& d dl'm(Z( d dl)m*Z* d dl+m,Z,m-Z-m.Z. d dl/m0Z0 d dl1m2Z2 d dl3m4Z4m5Z5m6Z6m7Z7m8Z8 erjd dl9m:Z:m;Z;m<Z< d dl=m>Z> d dl?m@Z@ G dd dZAd?dddddZBd@ddddd ZCdAd!dd!d"d#d$ZDd%dd!d&d'd(ZEdBd!ddd*d+d,ZFdCd!ddd*d-d.ZGd/d/d0d1d2ZHdDd!d3ddd!d4d5d6ZId7d3d3d7d8d9d:ZJd!d;dd<d=d>ZKdS )E    )annotationsN)TYPE_CHECKINGcast)PerformanceWarning)cache_readonly)find_stack_level)find_common_typemaybe_promote)ensure_platform_intis_1d_only_ea_dtype
is_integerneeds_i8_conversion)ExtensionDtype)notna)	factorizeunique)factorize_from_iterable)ensure_wrapped_if_datetimelike)	DataFrame)Index
MultiIndex
RangeIndex)concat)Series)compress_group_indexdecons_obs_group_idsget_compressed_idsget_group_indexget_group_index_sorter)	ArrayLikeLevelnpt)ExtensionArray)
FrozenListc                   @  s   e Zd ZdZd*ddddddd	Zed
dddZeddddZdddddZdd Z	eddddZ
eddddZddddZd+d d!Zd"d#d$d%Zeddd&d'Zeddd(d)ZdS ),
_Unstackera	  
    Helper class to unstack data / pivot with multi-level index

    Parameters
    ----------
    index : MultiIndex
    level : int or str, default last level
        Level to "unstack". Accepts a name for the level.
    fill_value : scalar, optional
        Default value to fill in missing values if subgroups do not have the
        same set of labels. By default, missing values will be replaced with
        the default fill value for that data type, NaN for float, NaT for
        datetimelike, etc. For integer types, by default data will converted to
        float and missing values will be set to NaN.
    constructor : object
        Pandas ``DataFrame`` or subclass used to create unstacked
        response.  If None, DataFrame will be used.

    Examples
    --------
    >>> index = pd.MultiIndex.from_tuples([('one', 'a'), ('one', 'b'),
    ...                                    ('two', 'a'), ('two', 'b')])
    >>> s = pd.Series(np.arange(1, 5, dtype=np.int64), index=index)
    >>> s
    one  a    1
         b    2
    two  a    3
         b    4
    dtype: int64

    >>> s.unstack(level=-1)
         a  b
    one  1  2
    two  3  4

    >>> s.unstack(level=0)
       one  two
    a    1    3
    b    2    4

    Returns
    -------
    unstacked : DataFrame
    Tr   r    boolNone)indexlevelsortreturnc           	      C  s  || _ || _| | _| j|| _d| jj| j v r:dnd| _t| jj	| _
t| jj| _| j| j| _| j
| j| _|j	| j | _| jst| jj| j }| j|| _| j|| _tdd | j
D }| jj}|| }|ttjjkrtjd| dtt d |   d S )	N   r   c                 S  s   g | ]
}|j qS  )size).0Zindex_levelr-   r-   T/var/www/ai-form-bot/venv/lib/python3.9/site-packages/pandas/core/reshape/reshape.py
<listcomp>       z'_Unstacker.__init__.<locals>.<listcomp>z%The following operation may generate z& cells in the resulting pandas object.)
stacklevel)constructorr)   Zremove_unused_levelsr'   _get_level_numberr(   codesliftlistlevelsnew_index_levelsnamesnew_index_namespopremoved_nameremoved_levelremoved_level_fullr   takenpmaxr.   ZiinfoZint32warningswarnr   r   _make_selectors)	selfr'   r(   r4   r)   Zunique_codesZnum_rowsZnum_columnsZ	num_cellsr-   r-   r0   __init__n   s0    

z_Unstacker.__init__z-tuple[npt.NDArray[np.intp], list[np.ndarray]])r*   c           
      C  s   | j }t| jj}t| jj}|d | ||d d   || g }tdd |d | ||d d   || g D }t||\}}t|}t||}	|	|fS )Nr,   c                 s  s   | ]}t |V  qd S Nlenr/   xr-   r-   r0   	<genexpr>   r2   z2_Unstacker._indexer_and_to_sort.<locals>.<genexpr>)	r(   r8   r'   r6   r9   tupler   rK   r   )
rG   vr6   levsto_sortsizes
comp_indexobs_idsngroupsindexerr-   r-   r0   _indexer_and_to_sort   s    &4
z_Unstacker._indexer_and_to_sortzlist[np.ndarray]c                   s&   | j \ }| jr" fdd|D S |S )Nc                   s   g | ]}|  qS r-   rA   )r/   linerW   r-   r0   r1      r2   z,_Unstacker.sorted_labels.<locals>.<listcomp>)rX   r)   )rG   rR   r-   r[   r0   sorted_labels   s    
z_Unstacker.sorted_labelsz
np.ndarray)valuesr*   c                 C  s(   | j r$| j\}}tj||dd}|S |S )Nr   )axis)r)   rX   algosZtake_nd)rG   r]   rW   _sorted_valuesr-   r-   r0   _make_sorted_values   s
    
z_Unstacker._make_sorted_valuesc           
      C  s   | j }| jd d }tdd |D }t||\}}t|}t|}| jj| j | j	 }||f| _
| jd ||  | j	 }tjt| j
td}	|	|d |	 t| jk rtd|| _|	| _| jr|t|| _nttj|ddd | _d S )	Nr+   c                 s  s   | ]}t |V  qd S rI   rJ   rL   r-   r-   r0   rN      r2   z-_Unstacker._make_selectors.<locals>.<genexpr>dtypeTz0Index contains duplicate entries, cannot reshape)Zreturn_indexr,   )r:   r\   rO   r   rK   r
   r'   Zlevshaper(   r7   
full_shaperB   zerosprodr%   putsum
ValueErrorgroup_indexmaskr)   Zsearchsortedarange
compressorr   )
rG   
new_levelsZremaining_labelsZlevel_sizesrT   rU   rV   strideselectorrl   r-   r-   r0   rF      s$    
z_Unstacker._make_selectorsc                 C  s   t | j S rI   )r%   rl   allrG   r-   r-   r0   mask_all   s    z_Unstacker.mask_allz2tuple[npt.NDArray[np.intp], npt.NDArray[np.bool_]]c                 C  s6   t jt| jt jd}| j|dd\}}||dfS )Nrc   r+   
fill_valuer   )rB   rm   rK   r'   Zintpget_new_valuesany)rG   Z	dummy_arr
new_valuesrl   r-   r-   r0   arange_result   s    z_Unstacker.arange_resultr   c                 C  sn   |j dkr|d d tjf }|d u r:|jd dkr:td| ||\}}| |}| j}| j||||j	dS )Nr,   z-must pass column labels for multi-column data)r'   columnsrd   )
ndimrB   newaxisshaperj   rw   get_new_columns	new_indexr4   rd   )rG   r]   value_columnsrv   r`   r{   r'   r-   r-   r0   
get_result   s    


z_Unstacker.get_resultNc                 C  s  |j dkr|d d tjf }| |}| j\}}|jd }|| }||f}| j}	| j}
|
rt|r|	|||
dd	|}tj|td}||fS |j}|
r|j}tj||d}nTt|tr| }|j||d}||d d < n&t||\}}tj||d}|| |j}tj|td}t|jr>|d}|d}n|j|dd}t||	d|||||d t|jr|d}t|}||j}||fS )	Nr,      rc   i8Fcopyu1zM8[ns])r|   rB   r}   rb   re   r~   rl   rt   rK   reshapeZswapaxesZonesr%   rd   empty
isinstancer   construct_array_type_emptyr	   fillnamerf   r   viewastype
libreshapeunstackr   )rG   r]   rv   ra   lengthwidthrp   Zresult_widthZresult_shaperl   rt   ry   Znew_maskrd   clsr   r-   r-   r0   rw      sb    







z_Unstacker.get_new_valueszIndex | None)r   c           	        s   |d u rB| j dkr"| jj| jdS | jjd| jjd}|| jS t| j| j  }t|}t	t
|| t|tr|j| jf }|j| jf } fdd|jD }n|| jg}|j| jg} g}| j}|t|| t|||ddS )Nr   r   )itemc                   s   g | ]}|  qS r-   rY   r/   ZlabZ
propagatorr-   r0   r1   Z  r2   z._Unstacker.get_new_columns.<locals>.<listcomp>Fr9   r6   r;   Zverify_integrity)r7   r?   _renamer>   insert	_na_valuerenamerK   rB   repeatrm   r   r   r9   r@   r;   r6   r   	_repeaterappendtile)	rG   r   levrp   r   ro   	new_names	new_codesrepeaterr-   r   r0   r   E  s0    

z_Unstacker.get_new_columnsc                 C  s\   t | jt | jkr8| j| j}| jrXt|dd}n t | j| j }t|| j }|S )Nr   r+   )rK   r@   r?   Zget_indexerr7   rB   r   rm   )rG   r   rp   r-   r-   r0   r   k  s    z_Unstacker._repeaterc                   s    fdd j d d D }t jdkrr jd |d  }}|dk r\|t||j}|| jd S t	 j| jddS )Nc                   s   g | ]}|  jqS r-   )rA   rn   r   rs   r-   r0   r1   }  r2   z(_Unstacker.new_index.<locals>.<listcomp>r+   r,   r   Fr   )
r\   rK   r:   rx   r   r   rA   r   r<   r   )rG   Zresult_codesr(   level_codesr-   rs   r0   r   z  s    z_Unstacker.new_index)T)N)__name__
__module____qualname____doc__rH   r   rX   r\   rb   rF   rt   rz   r   rw   r   r   r   r-   r-   r-   r0   r$   @   s(   . .
O&r$   TzSeries | DataFramer%   )datar)   c                   s  t  dkr| S | jtt jv r0 g fdd D   fddtjD }fdd D }fdd D }fdd D }fdd|D }fd	d|D }	fd
d|D }
tdd |D }t||ddd}t	|dd\}}t
||||dd}|st|dd}n"t||g |	|g |
dg dd}t| trx|  }||_|jd||d}|}|}|}nt| jtr| } r d|j||d}fdd D  q|S | jdd}||_|jd||d}t|tr|jn|jttsJ jd g| }| jjg| }jd g}|fdd|D  t|||dd}t|trv||_n||_|S )Nr   c                   s   g | ]}  |qS r-   )r5   r/   ir'   r-   r0   r1     r2   z%_unstack_multiple.<locals>.<listcomp>c                   s   g | ]}| vr|qS r-   r-   r   )clocsr-   r0   r1     r2   c                   s   g | ]} j | qS r-   r9   r   r   r-   r0   r1     r2   c                   s   g | ]} j | qS r-   r6   r   r   r-   r0   r1     r2   c                   s   g | ]} j | qS r-   r;   r   r   r-   r0   r1     r2   c                   s   g | ]} j | qS r-   r   r   r   r-   r0   r1     r2   c                   s   g | ]} j | qS r-   r   r   r   r-   r0   r1     r2   c                   s   g | ]} j | qS r-   r   r   r   r-   r0   r1     r2   c                 s  s   | ]}t |V  qd S rI   rJ   rL   r-   r-   r0   rN     r2   z$_unstack_multiple.<locals>.<genexpr>F)r)   xnullr)   )r   __placeholder__r   r   rv   r)   c                   s    g | ]}| k r|n|d  qS r,   r-   r/   rP   )valr-   r0   r1     r2   deepc                 3  s   | ]}|  jd  V  qdS )r+   N)rA   r6   )r/   Zrec)unstcolsr-   r0   rN     r2   )rK   r'   r   r   r;   rangenlevelsrO   r   r   r   r   r   r   r   r   r{   r=   r9   r   r6   extend)r   r   rv   r)   ZrlocsZclevelsZccodesZcnamesZrlevelsZrcodesZrnamesr~   rk   Zcomp_idsrU   Zrecons_codesZdummy_indexdummyZ	unstackedro   r   r   resultZdummy_dfnew_columnsr-   )r   r'   r   r   r0   _unstack_multiple  sx    


r   )objr)   c                 C  s   t |ttfr2t|dkr*t| |||dS |d }t|sN|dksN| j| t | trt | jt	rtt
| |||dS | jjddS ndt | jt	stdt| j dn@t| jrt| |||d	S t| j|| j|d
}|j| jd |dS d S )Nr,   r   r   r   T)Zfuture_stackz'index must be a MultiIndex to unstack, z was passedr   r(   r4   r)   r   rv   )r   rO   r8   rK   r   r   r'   r5   r   r   _unstack_frameTstackrj   typer   rd   _unstack_extension_seriesr$   Z_constructor_expanddimr   _values)r   r(   rv   r)   	unstackerr-   r-   r0   r     s,    

r   r   )r   r)   r*   c                 C  sb   t | jtsJ t| j|| j|d}| jsJ| jj||d}| j||j	dS |j
| j| j|dS d S )Nr   ru   )axesr   )r   r'   r   r$   _constructorZ_can_fast_transposeZ_mgrr   Z_constructor_from_mgrr   r   r   r{   )r   r(   rv   r)   r   Zmgrr-   r-   r0   r     s    
r   r   )seriesr)   r*   c                 C  s,   |   }|j|||d}|jdg|_|S )an  
    Unstack an ExtensionArray-backed Series.

    The ExtensionDtype is preserved.

    Parameters
    ----------
    series : Series
        A Series with an ExtensionArray for values
    level : Any
        The level name or number.
    fill_value : Any
        The user-level (not physical storage) fill value to use for
        missing values introduced by the reshape. Passed to
        ``series.values.take``.
    sort : bool
        Whether to sort the resulting MuliIndex levels

    Returns
    -------
    DataFrame
        Each column of the DataFrame will have the same dtype as
        the input Series.
    )r(   rv   r)   r   )Zto_framer   r{   _drop_level_numbers)r   r(   rv   r)   Zdfr   r-   r-   r0   r     s    r   r+   )framedropnar)   c                   s  dd }| j \} | j|}t| jtr:t| |||dS t| jtrt| jj} fdd| jj	D }|| j\}	}
|
|	 |
t|
|  t| jj}|
| jj t|||dd}nTtt|| j| jf \}\}}
| t|
| f}t||| jj| jjgdd}| js| jrt| jj}|d }t|trt| }|d	d |  D }t|| }n
| j }n
| j }|rt|}|| }|| }| j||d
S )z
    Convert DataFrame to Series with multi-level Index. Columns become the
    second level of the resulting hierarchical index

    Returns
    -------
    stacked : Series or DataFrame
    c                 S  s,   | j r| tt| fS t| \}}||fS rI   )Z	is_uniquerB   rm   rK   r   )r'   r6   
categoriesr-   r-   r0   stack_factorizeM  s    zstack.<locals>.stack_factorize)	level_numr   r)   c                   s   g | ]}|  qS r-   r   r   Kr-   r0   r1   ^  r2   zstack.<locals>.<listcomp>Fr   r   c                 S  s   g | ]\}}|j qS r-   )r   )r/   r`   colr-   r-   r0   r1   }  r2   r   )r~   r{   r5   r   r   _stack_multi_columnsr'   r8   r9   r6   r   rB   r   ravelr;   r   zipmapr   r   Z_is_homogeneous_typedtypesr   r   r   _concat_same_typeitems"_reorder_for_extension_array_stackr   Z_constructor_sliced)r   r(   r   r)   r   Nr   ro   r   ZclevZclabr   r   r9   Zilabr6   r   rd   arrry   rl   r-   r   r0   r   C  sT    



r   c                   s   t  fdd|D r6 }|D ]t|||d}qndt dd |D r } fdd|D }|r|dt|||d}fdd|D }q^ntd	|S )
Nc                 3  s   | ]}| j jv V  qd S rI   )r{   r;   r/   r   r   r-   r0   rN     r2   z!stack_multiple.<locals>.<genexpr>)r   r)   c                 s  s   | ]}t |tV  qd S rI   )r   intr   r-   r-   r0   rN     r2   c                   s   g | ]} j |qS r-   )r{   r5   r   r   r-   r0   r1     r2   z"stack_multiple.<locals>.<listcomp>r   c                   s    g | ]}| kr|n|d  qS r   r-   r   r   r-   r0   r1     r2   zTlevel should contain all level names or all level numbers, not a mixture of the two.)rr   r   r=   rj   )r   r(   r   r)   r   r-   )r   r   r0   stack_multiple  s    
r   r   )r{   r*   c                 C  s   t | jdkr&| jd j| jd dS dd t| jdd | jdd D }t| }dd	 t|D }t| }tj	d
d t|| jD | jdd dS )zBCreates a MultiIndex from the first N-1 levels of this MultiIndex.r   r   r   c                   s"   g | ]\ } fd d|D qS )c                   s    g | ]}|d kr | ndqS )r   Nr-   )r/   cr   r-   r0   r1     r2   z8_stack_multi_column_index.<locals>.<listcomp>.<listcomp>r-   r/   r6   r-   r   r0   r1     s   z-_stack_multi_column_index.<locals>.<listcomp>Nr+   c                 s  s   | ]\}}|V  qd S rI   r-   )r/   keyr`   r-   r-   r0   rN     r2   z,_stack_multi_column_index.<locals>.<genexpr>c                 S  s*   g | ]"\}}d |vr"t ||jdn|qS )Nrc   )r   rd   )r/   Znew_levr   r-   r-   r0   r1     s   r   )
rK   r9   r   r;   r   r6   	itertoolsgroupbyr   Zfrom_arrays)r{   rQ   ZtuplesZunique_tuplesZnew_levsr-   r-   r0   _stack_multi_column_index  s    
r   r   )r   r   r   r)   r*   c           $   	     s  ddddd}| j dd}|j}t|ts0J ||jd kr|}t||jd D ](}|||}	||d |}
||	|
}qR| |_}| s|r|d	|}|j|dd
}|j}t	t|}t
|}i }|jd }t|jd }|rt|}|t|d }t||}t|g }|D ]D}z|j|}W n$ tyR   || Y qY n0 t|tsjt|}n|j|j }|kr|jd d |j| f }||jjd |_|j|dj}n|jd d |f }t|j  t t r: ! " fdd|# D }|j$\}}t%|| &||j'( }||}n|j}|j)dkrT|( }|||< qt|d	krx|*|}t|}t|j+trt,|j+j}t,|j+j-}fdd|j+jD }n*t.|j+\}} | g}|/g}|j+j0g}|| |t1|| || jj-|  t|||dd}!| j2||!|d}"| jjdkrl| j3|g }#|"j4|#sl|"|# }"|r|"j5d	dd}"|"S )Nr   r   r   r{   c                 S  s   | |j v r|j |  S | S )z
        Logic for converting the level number to something we can safely pass
        to swaplevel.

        If `level_num` matches a column name return the name from
        position `level_num`, otherwise return `level_num`.
        r   r   r-   r-   r0   _convert_level_number  s    

z3_stack_multi_columns.<locals>._convert_level_numberFr   r,   r   )r(   r^   r+   )r{   c                   s    g | ]\}}|j j d dqS )Fr   )r   r   )r/   r`   rM   rc   r-   r0   r1     r2   z(_stack_multi_columns.<locals>.<listcomp>c                   s   g | ]}|  qS r-   r   r   )levsizer-   r0   r1   2  r2   r   )r'   r{   rr   )r^   how)6r   r{   r   r   r   r   Z	swaplevelZ_is_lexsortedZ
sort_indexr   r   r9   r   r6   rB   r)   r   rK   rA   Zget_locKeyErrorr   slicestopstartlocZreindexr]   ilocr   r   tolistr   r   r   r   r~   rm   r   r   r   r|   
differencer'   r8   r;   r   r   r   r   r   r   equalsr   )$r   r   r   r)   r   thisZmi_colsZroll_columnsr   Zlev1Zlev2Zlevel_to_sortr   Znew_dataZ
level_valsr   Zlevel_vals_nanZlevel_vals_usedZ	drop_colsr   r   Z	slice_lenchunkZvalue_sliceZsubsetr   r   idxro   r   r   Z	old_codesZ
old_levelsr   r   desired_columnsr-   )rd   r   r0   r     s    













r   r"   )r   n_rows	n_columnsr*   c                 C  s&   t || ||j }| |S )a  
    Re-orders the values when stacking multiple extension-arrays.

    The indirect stacking method used for EAs requires a followup
    take to get the order correct.

    Parameters
    ----------
    arr : ExtensionArray
    n_rows, n_columns : int
        The number of rows and columns in the original DataFrame.

    Returns
    -------
    taken : ExtensionArray
        The original `arr` with elements re-ordered appropriately

    Examples
    --------
    >>> arr = np.array(['a', 'b', 'c', 'd', 'e', 'f'])
    >>> _reorder_for_extension_array_stack(arr, 2, 3)
    array(['a', 'c', 'e', 'b', 'd', 'f'], dtype='<U1')

    >>> _reorder_for_extension_array_stack(arr, 3, 2)
    array(['a', 'd', 'b', 'e', 'c', 'f'], dtype='<U1')
    )rB   rm   r   r   r   rA   )r   r  r  r   r-   r-   r0   r   P  s    !r   z	list[int])r   r(   r*   c                   sx   j  t j krtdtdd} j fddt j jD d d d }tdkrvt	}|
|}n|}| }| }g }|D ]}	t j dkr  }
nLtdkr|	f}	t|	tfdd	t j jD } jd d |f }
t j jk r|
j ||
_ n0|jdkrL|
jdkr<d
|
_ntt|
j |
_ ||
 qt|d
kr jst|}t|t  }n>t j jk r j | }nd
g}t| jjd}d
}t j jk r j | }|j |s|| }t jtr. jj}tt jj d|f}n,t! jdd\}}|g}tt|d|f}t|trx|j}|" j }n| g}t!|ddd
 g} fdd|D }t|| ||  jj#t|j# dd|_t }t|}t$|}t|| |t%t$|| }|&|}|jdkrb j jtkrbt|j d
krPt'|jd}n|j(d d d
f }|jdkrtd |_|S )Nz8Columns with duplicate values are not supported in stackT)reversec                   s   g | ]}| vr|qS r-   r-   r/   k)r(   r-   r0   r1   |  r2   zstack_v3.<locals>.<listcomp>r+   r,   c                 3  s&   | ]}|v rt  ntd V  qd S rI   )nextr   r  )genr(   r-   r0   rN     s   zstack_v3.<locals>.<genexpr>r   )r{   rd   F)Zuse_na_sentinelc                   s   g | ]}t |t qS r-   )rB   r   rK   r   r   r-   r0   r1     r2   r   r   r   ))r{   ZnuniquerK   rj   sortedr   r   r   rB   ZargsortZ_reorder_ilevelsr   r   iterrO   r   r|   r   r   r   r   r   r   r   rd   r   r   r'   r   r9   r8   r   r6   r   Zdrop_duplicatesr;   rm   r   rA   r   r   )r   r(   Zdrop_levnumsZ
stack_colsZsorterZordered_stack_colsZstack_cols_uniqueZordered_stack_cols_uniquebufr   r   Zcolumn_indexerr   ratior   r  Zindex_levelsZindex_codesr6   ZuniquesZcolumn_levelsZcolumn_codesZlen_dfZ	n_uniquesrW   Zidxsr-   )r   r  r(   r0   stack_v3u  s    "




"
r  )NT)NT)NT)r+   TT)TT)r+   TT)L
__future__r   r   typingr   r   rD   numpyrB   Zpandas._libs.reshapeZ_libsr   r   Zpandas.errorsr   Zpandas.util._decoratorsr   Zpandas.util._exceptionsr   Zpandas.core.dtypes.castr   r	   Zpandas.core.dtypes.commonr
   r   r   r   Zpandas.core.dtypes.dtypesr   Zpandas.core.dtypes.missingr   Zpandas.core.algorithmscoreZ
algorithmsr_   r   r   Zpandas.core.arrays.categoricalr   Zpandas.core.constructionr   Zpandas.core.framer   Zpandas.core.indexes.apir   r   r   Zpandas.core.reshape.concatr   Zpandas.core.seriesr   Zpandas.core.sortingr   r   r   r   r   Zpandas._typingr   r    r!   Zpandas.core.arraysr"   Zpandas.core.indexes.frozenr#   r$   r   r   r   r   r   r   r   r   r   r  r-   r-   r-   r0   <module>   sT     Q [% %M!  %