a
    Pf                     @   s  d Z ddlZddlZddlm  mZ ddlm	Z	m
Z
mZmZmZmZmZmZmZ ddlmZ ddlmZ dd Zdd Zejjejd	d
dgejdddgejdddgejjdddddggedejjddddggedejdd
dgejdddgejdd
dgejdd
dgejdd
dgdd Zejd d
dgd!d" Zejd#d$d%gg d&gd'd( Zejd#d$d%gg d&gd)d* Z d+d, Z!d-d. Z"ej#d/d0 Z$d1d2 Z%d3d4 Z&d5d6 Z'd7d8 Z(ejd9g d:ejdddgejd;g d<ejd=d
dgejd>d
dgd?d@ Z)ejdAe*ej+dBe,dCdDej+dEe,dCdDgejdFd
dgejdGddg dHg dIg dJfd
dg dKg dLg dMfd
d
g dNg dOg dPfgdQdR Z-ej#dSdT Z.ejdUddddVg dWg dXfd
d
ddVg dYg dZfd
dddVg d[g d\fd
dd
d]g d^g d\fgd_d` Z/ej#dadb Z0ejdcddg ddg defdd
g dfg dgfd
dg dhg difd
d
g djg dkfgdldm Z1ej#dndo Z2ejdpd
dqdqgej3g drg dsdtfdg dueedqgeg dvedwdxej4ggg dyg dzg d{gg dsd|fgejdddgd}d~ Z5ejd=dd
gejddd
gejdddVej6g dej7dfd
d]e6g dfgdd Z8dd Z9ejd=d
dgejdddVej6g dej7dfd
d]e6g dfgdd Z:ejd=d
dgejdddVej6g dej7dfd
d]e6g dfgdd Z;ejd=d
dgejddg dfd
g dfgejdddVej6g dej7dfd
d]e6g dfgdd Z<ejd=dd
gejddd
gejdddVej6g dej7dfd
d]e6g dfgdd Z=ejdddVg dfd
d]g dkfgdd Z>ejdde?dg dfde?ddg g dfgejd=dd
gdd Z@ejdddgdd ZAdd ZBdd ZCdd ZDdd ZEdd ZFejd d
dgdd ZGdd ZHejdd
dgejdFd
dgdd ZIejdd
dgejdFd
dgdd ZJdS )z
these are systematically testing all of the args to value_counts
with different size combinations. This is to ensure stability of the sorting
and proper parameter handling
    N)	CategoricalCategoricalIndex	DataFrameGrouperIndex
MultiIndexSeries
date_rangeto_datetime)Versionc                  C   s   t dgdgd} | d d| d< | dd  }t ddggddgd}|d d|d< t|}tdg|d	d
}t|| d S )NfemaleUS)gendercountryr   categoryr   columns   countindexname)	r   astypegroupbyvalue_countsr   
from_framer   tmassert_series_equal)dfresultZdf_mi_expectedZmi_expectedexpected r!   g/var/www/ai-form-bot/venv/lib/python3.9/site-packages/pandas/tests/groupby/methods/test_value_counts.py.tests_value_counts_index_names_category_column   s    
r#   c                 C   s   t ddd}ttjdtd|tjd||tjdd|d |d}| r|d d	|d< tj	|j
dd d
df< tj	|j
dd ddf< tj	|j
dd ddf< tj	|j
dd ddf< tj	|j
dd ddf< |S )Nz
2015-08-24
   )Zperiods   abcdr   )1st2nd3rdr)   float   r'         r(            	   )r	   r   nprandomZdefault_rngchoicelistZintegersr   nanloc)	seed_nansnmdaysframer!   r!   r"   seed_df0   s    r=   r8   TFnum_rowsr$   2   max_int      keysr'   r(   )Zidsbinsisortznormalize, name)T
proportion)Fr   sort	ascendingdropnac                 C   s   t | ||}dd }|||	|
|d}|j||d}|d jf i |}|j||d}|d jtjfi |}|jjd d dg |j_||}t|||f\}}t	
| |  d S )Nc                 S   s2   t t| jjt| jj}tj|| jjd| _| S )Nnames)	r5   mapr   Zget_level_valuesrangeZnlevelsr   from_arraysrK   )r   Zarrr!   r!   r"   rebuild_indexa   s    z7test_series_groupby_value_counts.<locals>.rebuild_index)	normalizerG   rH   rI   rD   rG   r)   )r=   r   r   applyr   r   rK   renamerL   r   r   
sort_index)r8   r>   r@   rC   rD   rE   rP   r   rG   rH   rI   r   rO   kwargsgrleftrightr!   r!   r"    test_series_groupby_value_countsG   s     
rZ   utcc                 C   s   t g dg dddg}t|d | dd|d< |td	dd
}|d   }|d tj }|j	j
|j	_
|d}t|| d S )NiGI]i)J]iJ]iK]i)<M]iU=M]iN]appler^   bananar_   oranger`   pear	TimestampFoodr,   rc   sr[   unitDatetime1Dfreqkeyrd   r   )r   dropr
   r   r   r   rU   rS   r   r   rK   rT   r   r   )r[   r   dfgr   r    r!   r!   r"   -test_series_groupby_value_counts_with_grouper|   s    	
ro   r   AB)rp   rq   Cc                 C   sf   t | d}|| d d }|| d   }tg |jdd}tjg gt|  | d|_t	
|| d S )Nr   rR   r   )dtyper   rJ   )r   r   r   r   rs   r   rN   lenr   r   r   r   r   rn   r   r    r!   r!   r"   &test_series_groupby_value_counts_empty   s    
rv   c                 C   sP   t tt| g| d}|| d d }|| d   }| }t|| d S )N)datar   rR   )r   rM   rt   r   r   r   r   ru   r!   r!   r"   (test_series_groupby_value_counts_one_row   s
    rx   c                  C   sp   t tdgddgd} | dg }t ddgttddgtddgddgdddgd	d
}t	
|| d S )Nab)
categoriesr   r   Fr   )r{   orderedrs   r   rw   r   r   )r   r   r   r   r   rN   r2   arrayr   r   r   )re   r   r    r!   r!   r"   /test_series_groupby_value_counts_on_categorical   s    r   c                  C   s   t g dg dg dd} | jddgddd	 }|jdd}td
dgddgg dgg dg dg dgg dd}tg d|dd}t|| d S )Nmaler   r   r   r   r   lowmediumhighr   r   r   r   FRr   r   r   r   r   	educationr   r   r   FrQ   r   r   r   r   r   )r   r   r   )r   r   r   r   r   )r   r   r   r   r   )r   r   r%   r   r%   r   r   r   levelscodesrK   r   r   r   r%   r   r   r   )r   r   r   r   r   r   r   )r   gbr   r   r    r!   r!   r"   (test_series_groupby_value_counts_no_sort   s    r   c                   C   s   t g dg dg ddS )Nr   r   r   r   r   r!   r!   r!   r"   education_df   s    r   c                 C   sz   d}t jt|d | jddd}W d    n1 s60    Y  tjtdd |  W d    n1 sl0    Y  d S )Nz+DataFrame.groupby with axis=1 is deprecatedmatchr   r   axisr   )r   assert_produces_warningFutureWarningr   pytestraisesNotImplementedErrorr   )r   msggpr!   r!   r"   	test_axis   s
    ,r   c                 C   sJ   |  d}tjtdd |jdgd W d    n1 s<0    Y  d S )Nr   subsetr   r   )r   r   r   
ValueErrorr   )r   r   r!   r!   r"   test_bad_subset   s    
r   c                 C   sv   t tjt dkr(|tjjddd | dddg jdd	}t	g d
t
jg dg dddd}t|| d S )N1.25Ypandas default unstable sorting of duplicatesissue with numpy>=1.25 with AVX instructionsFreasonstrictr   r   r   TrP   )      ?      ?r   r   r   )r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   rJ   rF   r}   )r   r2   __version__applymarkerr   markxfailr   r   r   r   from_tuplesr   r   )r   requestr   r    r!   r!   r"   
test_basic   s&    	
r   c                 C   s   | | j |||dS )NrP   rG   rH   )r   )r   rC   rP   rG   rH   r!   r!   r"   _frame_value_counts  s    r   r   columnr~   functionzsort, ascending))FN)TT)TFas_indexr<   c	                    s  t tjt dkr4|r4|r4|r4|tjjddd d d j fddd| }	 j|	|d	}
|
d
dg j	|||d}|r|dkrt
nd }d}tj||d& |
td
dg|||}W d    n1 s0    Y  |rt|| n|rdnd}| jd|idd}|dkr>|jddidd}t|d dd|d< n2|dkrZ|d dk|d< nt|d dd|d< t|| n d
 d  d   d< |
d j	|||d}||_|r0|jjdd}|d jdjd|d
< |d jdjd|d< |d= |jdd idd}t||_t|| nV|dd
|d jdjd |dd|d jdjd |d= t|| d S )Nr   r   Fr   r   c                    s    d |  dkS )Nr   r   r!   )xr   r!   r"   <lambda>D      z6test_against_frame_and_seriesgroupby.<locals>.<lambda>r   )byr   r   r   r   r   z7DataFrameGroupBy.apply operated on the grouping columnsr   rF   r   r   r   r   level_0r   r   r   -Zbothr   r%   )r   r2   r   r   r   r   r   valuesr   r   DeprecationWarningr   r   rS   r   r   reset_indexrT   whereassert_frame_equalr   r   Zto_framestrsplitgetr   r   insert)r   r   rP   r   rG   rH   r   r<   r   r   r   r   warnr   r    Zindex_framer!   r   r"   $test_against_frame_and_seriesgroupby   sh    

"

""r   rs   zstring[pyarrow_numpy]Zpyarrow)Zmarksstring[pyarrow]rP   zCsort, ascending, expected_rows, expected_count, expected_group_size)r   r   r%   r,      r   )r   r,   r   r,   r   )r,   r   r   r%   r   )r%   r   r   r   r   )r,   r   r,   r   r   )r   r   r%   r   r,   )r   r   r   r   r%   )r   r,   r   r   r,   c                    s    |j |_jddgddd}|d j|||d}	t }
dD ]4  fdd	|D |
 < |
 |}
|
j ||
_qJ|r||
d
< |
d
  |  < |dkr|
d
  |
d
< n ||
d< |dkr|
d  |
d< t|	|
 d S )Nr   r   Fr   rG   r   r   r   c                    s   g | ]}  | qS r!   r!   .0rowr   r   r!   r"   
<listcomp>  r   z!test_compound.<locals>.<listcomp>rF   r   r   )r   r   r   r   r   Zconvert_dtypesr   r   )r   rP   rG   rH   expected_rowsZexpected_countZexpected_group_sizers   r   r   r    r!   r   r"   test_compoundw  s(    

r   c                   C   s$   t g dg dg ddg ddS )Nr   r   r   r   )r%   r   r      )r%   r   r   r   rl   Znum_legsZ	num_wings)Zfalcondogcatantr   r   r!   r!   r!   r"   
animals_df  s    r   z?sort, ascending, normalize, name, expected_data, expected_indexr   r   r%   r   )r   r   r   )r%   r   r   r%   r   r   r   r   r%   )r   )r%   r   r   r   r%   r   r   )r   )r   r%   r   )r   r%   r   rF   )r   r   r   c           
      C   s^   | j |||d}t|tj|g dd|d}t|| | dj |||d}	t|	| d S )N)rG   rH   rP   r   rJ   r}   rl   )r   r   r   rN   r   r   r   )
r   rG   rH   rP   r   expected_dataexpected_indexresult_framer    result_frame_groupbyr!   r!   r"   test_data_frame_value_counts  s    
r   c                  C   s`   t j} tdd| d| ddddg	ddd| | ddddg	dddddd| d| g	ddddddd| | g	d	S )
Nr   r   r   r,   r%   rA   r0   r.   )rp   rq   rr   D)r2   r6   r   )r9   r!   r!   r"   nulls_df  s    r   z:group_dropna, count_dropna, expected_rows, expected_values)	r   r   r,   rA   r.   r   r0   r%   r   )	r   r         ?r   r   r   r   r   r   )r   r   r,   rA   r%   r   )r   r   r   r   r   r   )r   r   rA   r.   r   r0   )r   r   r   r   r   r   )r   r   rA   )r   r   r   c                    s   t tjt dkr,|s,|tjjddd jddg|d}|jdd|d	}t	 }j
D ]  fd
d|D | < qZt|}	t||	dd}
t||
 d S )Nr   r   Fr   rp   rq   )rI   T)rP   rG   rI   c                    s   g | ]}  | qS r!   r!   r   r   r   r!   r"   r     r   z,test_dropna_combinations.<locals>.<listcomp>rF   r}   )r   r2   r   r   r   r   r   r   r   r   r   r   r   r   r   r   )r   Zgroup_dropnaZcount_dropnar   expected_valuesr   r   r   r   r   r    r!   r   r"   test_dropna_combinations  s    	

r   c                 C   s    t g dg dd| | dgdS )Nr   )JohnAnner   BethSmithLouiserl   Z
first_nameZmiddle_namer   )Znulls_fixturer!   r!   r"   names_with_nulls_df  s    
r   z%dropna, expected_data, expected_indexr   )r   r   )r   r   )r   r   r   rJ   r   )r   r   r   r   r   )r   r   r   r   )r   r   r%   r%   )r%   r   r   r%   r   c           	      C   s`   | j ||d}t|||d}|r0|tt| }t|| | dj ||d}t|| d S )N)rI   rP   r}   rl   )r   r   r*   rt   r   r   r   )	r   rI   rP   r   r   r   r   r    r   r!   r!   r"   #test_data_frame_value_counts_dropna  s    !
r   observedznormalize, name, expected_data)r%   r   r   r   r   r   r   r   r   r   r   r   rs   )r   r   r           r   r   r   r   r   r   r   r   c                 C   s   t tjt dkr(|tjjddd | djd||d}|j	|d}t
jg d	g d
d}	t||	|d}
tdD ]"}|
jjt|
jj| |d|
_qv|rt||
 n |
j|rdndd}t|| d S )Nr   r   Fr   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   rJ   r}   r,   levelrF   r   r   )r   r2   r   r   r   r   r   r   r   r   r   r   r   rM   r   
set_levelsr   r   r   r   r   r   r   r   r   rP   r   r   r   r   r   r   expected_seriesir    r!   r!   r"   =test_categorical_single_grouper_with_only_observed_categoriesJ  s<    



r  c                 C   s   |   d} | d jdg| d< | jd||d}|j|d}t|tj|g dd|d}	t	d	D ]@}
t
|	jj|
 }|
d
kr|| d jj}|	jj||
d|	_qd|rt||	 n|	j|d}t|| d S )Nr   r   ASIAr   r   r   rJ   r}   r,   r   r  r  )copyr   r   Zadd_categoriesr   r   r   r   r   rM   r   r   r   Zset_categoriesr{   r	  r   r   r   r   )r   r   r   r   rP   r   r   r   r   r  r  Zindex_levelr    r!   r!   r"   !assert_categorical_single_grouper  s.    
r  c              	   C   sJ   t tjt dkr(|tjjddd g d}t| |d||||d d S )Nr   r   Fr   r   Tr   r   r   r   rP   r   r   r   r2   r   r   r   r   r   r  r   r   rP   r   r   r   r   r!   r!   r"   -test_categorical_single_grouper_observed_true  s"    
r  )r%   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   )r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   c              	   C   sJ   t tjt dkr(|tjjddd g d}t| |d||||d d S )Nr   r   Fr   )r   r   r   r   r   r  r   r   r  r  r  r  )r  r   r   )r  r   r   )r  r   r   )r  r   r   )r  r   r   )r  r   r   r  r  r  r!   r!   r"   .test_categorical_single_grouper_observed_false  s"    ,
r  zobserved, expected_index)r   r   r   )r   r   r   r   r   r   )r   r   r   r   r   r   )r   r   r   r   r   r   )r   r   r   r   r   r   )r   r   r   )r   r   r   )r   r   r   )r  r  r  r  r  )r   r   r%   r   r   r   r   r   r   r   r   r   )r   r   r   r   r   r   r   r   r   r   r   r   c                 C   s   |   } | d d| d< | d d| d< | jddg||d}|j|d}t|r^||dk n|tj|g dd|d	}	td
D ]"}
|	jj	t
|	jj|
 |
d|	_q|rt||	 n |	j|rdndd}t|| d S )Nr   r   r   r   r   r   )r   r   r   rJ   r}   r%   r  rF   r   r  )r  r   r   r   r   r   r   rM   r   r	  r   r   r   r   r   r   )r   r   r   r   rP   r   r   r   r   r  r  r    r!   r!   r"   "test_categorical_multiple_groupersI  s2    7


r  c                 C   s   t tjt dkr(|tjjddd |  } | d d| d< | d d| d< | j	d||d	}|j
|d
}g d}	t|tj|	g dd|d}
tddD ]"}|
jjt|
jj| |d|
_q|rt||
 n |
j|rdndd}t|| d S )Nr   r   Fr   r   r   r   r   r   r   r   r   rJ   r}   r   r,   r  rF   r   r  )r   r2   r   r   r   r   r   r  r   r   r   r   r   r   rM   r   r	  r   r   r   r   r   r   r
  r!   r!   r"   test_categorical_non_groupers  s>    


r  z*normalize, expected_label, expected_valuesr   c                 C   s   t g dg dd}|jg dddd gdd	}|jd
| d}t dtjg dtddg ddg ddg d||i}t|| d S )Nr   r   r%   r,   )rp   rq   )r   rA   r   rp   c                 S   s   | dkrdS dS )Nr   r.   r0   r!   )r  r!   r!   r"   r     r   z&test_mixed_groupings.<locals>.<lambda>Fr   TrG   rP   r   )r   r   rA   r   r   Zlevel_2)r0   r0   r.   rq   )r   r,   r%   )r   r   r   r2   r~   intr   r   )rP   expected_labelr   r   r   r   r    r!   r!   r"   test_mixed_groupings  s    		r"  ztest, columns, expected_namesrepeatZabbde)ry   Ndrz   rz   er  r&   level_1)ry   Nr$  rz   cr&  c           
      C   s   t g dg dg|d}ddg}dtjddgtjd	d
g}|j||d }|rvtdtj||ddd}t	
|| n@dd |D }t|}	d|	d< |	d t ||	d}t	|| d S )N)r   r,   rA   r.   r1   )r%   r   r   r0   r$   r   )r   r   r.   r,   rA   r1   )r%   r   r0   r   r   r$   ry   r   r   r   r$  r  r   rJ   r   r}   c                 S   s   g | ]}t |d g qS )r   )r5   r   r!   r!   r"   r     r   z0test_column_label_duplicates.<locals>.<listcomp>r&  )r   r2   r~   int64r   r   r   r   r   r   r   r5   appendr   )
testr   Zexpected_namesr   r   r   rC   r   r    Zexpected_columnsr!   r!   r"   test_column_label_duplicates  s(    
r+  znormalize, expected_labelc                 C   sl   t g dgdd|gdjddd}d| d}tjt|d	 |j| d
 W d    n1 s^0    Y  d S )Nr  ry   rz   r   Fr  zColumn label 'z' is duplicate of result columnr   r   )r   r   r   r   r   r   )rP   r!  r   r   r!   r!   r"   test_result_label_duplicates'  s    	r,  c                  C   sf   t dddgi} | tjddgtjd}| }tdgtjddggd dgddd}t	
|| d S )Nry   r   r   r%   rJ   r   r   )r   r   r2   r~   r(  r   r   r   r   r   r   )r   r   r   r    r!   r!   r"   test_ambiguous_grouping8  s    r-  c                  C   sh   t g dg ddg dd} d}tjt|d$ | djdgd	 W d    n1 sZ0    Y  d S )
Nry   rz   r'  r   yr0  c1c2r   r   r   r   z;Keys {'c1'} in subset cannot be in the groupby column keys.r   r2  r   r   r   r   r   r   r   r   r   r!   r!   r"   "test_subset_overlaps_gb_key_raisesC  s    r7  c                  C   sh   t g dg ddg dd} d}tjt|d$ | djd	gd
 W d    n1 sZ0    Y  d S )Nr.  r/  r1  r4  r   z4Keys {'c3'} in subset do not exist in the DataFrame.r   r2  c3r   r5  r6  r!   r!   r"   !test_subset_doesnt_exist_in_frameK  s    r9  c                  C   sp   t g dg ddg dd} | jddjdgd	}td
dgtjdd
gddggd dgddd}t|| d S )Nr.  r/  r1  r4  r   r   r  r3  r   r   r%   r   r0  rJ   r   r   r   r   r   r   r   rN   r   r   r   r   r    r!   r!   r"   test_subsetS  s    r<  c                  C   s   t g dg dg dgg dg dd} | jddjdgd	}td
dgtjdd
gddgddggg dddd}t|| d S )N)ry   r   r   )rz   r0  r0  r4  )r2  r3  r3  )r   r   r   r  r3  r   r   r%   r   r0  )Nr3  r3  rJ   r   r   r:  r;  r!   r!   r"   test_subset_duplicate_columns_  s    r=  c           	      C   s   t g dg dddg}t|d | ddj||d< |td	dd
}| }tg d| d|}|d  }t	||g dgg dt
dg dgg dd}td|dd}t|| d S )Nr\   r]   rb   r,   rc   re   rf   rh   ri   rj   )z
2019-08-06z
2019-08-07z
2019-08-09z
2019-08-10)r[   )r^   r_   r`   ra   )r   r   r   r%   r%   r,   r   )r   r   r   r%   r%   r,   )rh   rc   rd   r   r   r   r   )r   rm   r
   dtZas_unitr   r   r   uniquer   rM   r   r   r   )	r[   rg   r   r   r   datesZ
timestampsr   r    r!   r!   r"   test_value_counts_time_grouperq  s.    	rA  c                  C   sj   t g dg dg dd} | jddgddd}|d	  }t g dg dg ddd
}t|| d S )N)ry   ry   ry   )ry   ry   r$  r.  r  r   r%   Fr   r,   )r   r%   r,   r   )r   r   r   r   r   )r   r   r   r    r!   r!   r"   !test_value_counts_integer_columns  s    rB  vc_sortc           
      C   s   t g dg dd}|jd| d}|j||d}|r@g d}ng d}td	d
gddggg dg dgddgd}t|||rdndd}| r|rg d}	n,| r|sg d}	n| s|rg d}	ng d}	||	}t|| d S )Nr%   r   r   r   r,   r   r,   r,   ry   r   ry   rQ   r  )UUUUUU?UUUUUU?r   r   r   r%   r,   r   )r   r   r   )r   r   r   r   r   rF   r   r   )r   r   r%   )r   r%   r   )r%   r   r   )r   r   r   r   r   taker   r   )
rG   rC  rP   r   r   r   r   r   r    takerr!   r!   r"   test_value_counts_sort  s&    
"



rK  c           
      C   s   t g dg dddd}|jd| dd}|j||d	}|rFg d
}ng d}|rVdnd}t dtg ddtg d||iddg| }| r|rg d}	n,| r|sg d}	n| s|rg d}	ng d}	||	}t|| d S )NrD  rE  rF  r   r   ry   T)rG   r   r  )rG  rH  r   r   )r%   r   r   r   rF   r   )r   r   r%   r%   r   )r,   r   r,   r   )r   r   r%   r,   )r   r%   r   r,   )r%   r,   r   r   )r   r   r   r   Z	set_indexrI  r   r   )
rG   rC  rP   r   r   r   r   r   r    rJ  r!   r!   r"   "test_value_counts_sort_categorical  s4    




rL  )K__doc__numpyr2   r   Zpandas.util._test_decoratorsutilZ_test_decoratorstdZpandasr   r   r   r   r   r   r   r	   r
   Zpandas._testingZ_testingr   Zpandas.util.versionr   r#   r=   r   ZslowZparametrizereprrZ   ro   rv   rx   r   r   Zfixturer   r   r   r   r   r   objectparamZ
skip_if_nor   r   r   r   r   r   rN   r6   r   r~   r(  r  r  r  r  r  r  r"  r5   r+  r,  r-  r7  r9  r<  r=  rA  rB  rK  rL  r!   r!   r!   r"   <module>   s  ,*




 K"





	
:#*
&0 %<



!