a
    Pf]                  	   @   s  d Z ddlZddlZddlZddlmZ ddlmZ ddl	Z
ddlmZ ddlmZmZ dd Zejdd	 Zejd
d Zdd Zdd Zdd Zdd Zdd Zdd Zdd Zdd Zdd Zdd Zejj d d!d"d# Z!ejj d d!d$d% Z"d&d' Z#d(d) Z$d*d+ Z%d,d- Z&d.d/ Z'ej(d0ej)e*d1e+d1de
j,gd2d3 Z-ej(d4d5d6gd7d8 Z.d9d: Z/d;d< Z0d=d> Z1ej(d?d5d6gejj d@d!dAdB Z2ej(d?d5d6gejj d@d!dCdD Z3ej(dEdFdGgej(d?d5d6gdHdI Z4ej(dEdFdGgej(dJe
j5e
j6gdKdL Z7dMdN Z8dOdP Z9ej:dQdRdS Z;ej:dQdTdU Z<dVdW Z=dXdY Z>ej(dZg d[e6g d\fg d]e6g d^fgd_d` Z?dadb Z@ej(dcejAejBej*gddde ZCdfdg ZDdhdi ZEdjdk ZFdldm ZGdndo ZHdpdq ZIdS )rz
This module tests the functionality of StringArray and ArrowStringArray.
Tests for the str accessors are in pandas/tests/strings/test_string_array.py
    N)pa_version_under12p0)is_dtype_equal)ArrowStringArrayArrowStringArrayNumpySemanticsc                 C   s   | j dkrtjS tjS d S )Npyarrow_numpy)storagenpnanpdNAdtype r   `/var/www/ai-form-bot/venv/lib/python3.9/site-packages/pandas/tests/arrays/string_/test_string.pyna_val   s    
r   c                 C   s   t j| dS )z=Fixture giving StringDtype from parametrized 'string_storage')r   )r
   StringDtype)string_storager   r   r   r      s    r   c                 C   s   |   S )z3Fixture giving array type from parametrized 'dtype')Zconstruct_array_typer   r   r   r   cls#   s    r   c                 C   s   t dt jdt jdg| di}| jdkr0d}nd}t||ksDJ | jdkrTd}nd	}t|j|ksjJ | jd
krd}d| d}n,| jdkrd}d| d}nd}d| d}t|jj|ksJ d S )NAabr   r   z     A
0    a
1  NaN
2    bz      A
0     a
1  <NA>
2     bz10      a
1    NaN
2      b
Name: A, dtype: stringz40       a
1    <NA>
2       b
Name: A, dtype: stringpyarrowr   <z+>
['a', <NA>, 'b']
Length: 3, dtype: stringr   z*>
['a', nan, 'b']
Length: 3, dtype: stringStringArray)r
   	DataFramearrayr   r   reprr   )r   dfexpectedZarr_namer   r   r   	test_repr)   s$     



r   c                 C   s<   | j g d|d}|d d us"J |d t|ju s8J d S )N)r   Nr   r      )_from_sequencer   r   )r   r   r   r   r   r   test_none_to_nanC   s    r"   c                 C   s   | j ddg|d}| tjju r$d}nd}tjt|d d|d< W d    n1 sT0    Y  | tjju rpd	}nd}tjt|d& td
dg|d d < W d    n1 s0    Y  d S )Nr   r   r   z4Cannot set non-string value '10' into a StringArray.Scalar must be NA or strmatch
   r   zMust provide strings.r       )	r!   r
   arraysr   pytestraises	TypeErrorr   r   )r   r   arrmsgr   r   r   test_setitem_validatesI   s    &r.   c                 C   s<   t jddg| d}d|d< t jddg| d}t|| d S )Nr   cr   dr   r
   r   tmassert_extension_array_equal)r   r,   r   r   r   r   test_setitem_with_scalar_string[   s    r4   c                 C   sf   t jg d| d}tdd g}| }||ddg< t jdt jdg| d}t|| t|| d S )Nr   r   r/   r   r   r   r    r/   )r
   r   r   copyr   r2   r3   assert_numpy_array_equal)r   r,   valueZ
value_origr   r   r   r   $test_setitem_with_array_with_missingd   s    r9   c                 C   s   t t jddd}d |d< || }t|j| s6J |d}t|| ||jd  }|| }t|j| stJ ||j}t|| d S )N2000   )Zperiodsr   zdatetime64[ns])	r
   SeriesZ
date_rangeastyper   r   r2   assert_series_equalZiloc)r   serZcastedresultZser2Zcasted2Zresult2r   r   r   test_astype_roundtripq   s    


rB   c                 C   s   t jg d| d}t jg d| d}|| }t jg d| d}t|| ||}t|| ||}t jg d| d}t|| |j|dd}t jg d| d}t|| d S )	N)r   r   r/   NNr   )xyNzN)axbyNNN)xaybNNN-)
fill_value)rF   rG   zc-z-zN)r
   r=   r2   r?   addZradd)r   r   r   rA   r   r   r   r   test_add   s    

rM   c                 C   s   | j |v r(d}tjjd |d}|| tjg d| d}tjg dgtd}tj	t
dd ||  W d    n1 sz0    Y  t|}tj	t
dd ||  W d    n1 s0    Y  d S )Nz*Failed: DID NOT RAISE <class 'ValueError'>r*   reasonr5   r   z3 != 1r$   )r   r)   markxfailapplymarkerr
   r   r   objectr*   
ValueErrorr=   )r   requestarrow_string_storagerO   rP   r   r   sr   r   r   test_add_2d   s    

&
rX   c                 C   sj   t jg d| d}g d}|| }t jg d| d}t|| || }t jg d| d}t|| d S )N)r   r   NNr   )rC   NrD   N)rF   NNN)rH   NNNr1   )r   r   otherrA   r   r   r   r   test_add_sequence   s    rZ   c                 C   sP   t jg d| d}|d }t jg d| d}t|| d| }t|| d S )Nr   r   Nr   r'   )ZaaZbbNr1   )r   r   rA   r   r   r   r   test_mul   s    r\   zGH-28527rO   c                 C   s   t jg d| d}t jg dgtd}||tu s8J || }t g dg| }t|| || }t g dg| }t|| d S )N)r   r   r/   r0   r   )trD   vw)atrG   ZcvZdw)tarI   vcwd)	r
   r   r   rS   __add__NotImplementedr>   r2   assert_frame_equalr   r,   r   rA   r   r   r   r   test_add_strings   s    ri   c                 C   s   t jddtjtjg| d}t dtjdtjgg}||tu sDJ || }t dtjtjtjgg| }t	|| || }t dtjtjtjgg| }t	|| d S )Nr   r   r   rC   rD   rF   rH   )
r
   r   r   r	   r   re   rf   r>   r2   rg   rh   r   r   r   test_add_frame   s      rj   c                    s   d| j  d tjg d|d}dt| }|jdkrt fdd|D }| tjkrjd|d	< nd
|d	< t	||
tj nJ|jdkrdnd}tj fdd|D td}tj||d}t|| d S )N__r   Nr/   r   r   r   c                    s   g | ]}t | qS r   getattr.0itemop_namerY   r   r   
<listcomp>       z2test_comparison_methods_scalar.<locals>.<listcomp>Tr    Fr   boolean[pyarrow]booleanc                    s   g | ]}t | qS r   rm   ro   rr   r   r   rt      ru   )__name__r
   r   rn   r   r   operatorner2   r7   r>   Zbool_rS   r3   )comparison_opr   r   rA   r   expected_dtyper   rr   r   test_comparison_methods_scalar   s    


r}   c                 C   s   d| j  d}tjg d|d}t||tj}|jdkrptj| krTtg d}ntg d}t	
|| n<|jdkr~dnd	}tjg d
|d}t	|| t	|| d S )Nrk   rl   r   r   TTTFFFr   rv   rw   NNN)rx   r
   r   rn   r   r   ry   rz   r   r2   r7   r3   )r{   r   rs   r   rA   r   r|   r   r   r   $test_comparison_methods_scalar_pd_na   s    

r   c           	      C   s   d| j  d}tjg d|d}d}|dvrltjtdd t||| W d    n1 s^0    Y  d S t|||}|jdkrg d	g d
d| }t|}t	
|| nBg dg dd| }|jdkrdnd}tj||d}t	|| d S )Nrk   rl   r   *   )__eq____ne__z(Invalid comparison|not supported betweenr$   r   r   r~   )FNF)TNTr   rv   rw   )rx   r
   r   r)   r*   r+   rn   r   r   r2   r7   r3   )	r{   r   rs   r   rY   rA   Zexpected_datar   r|   r   r   r   )test_comparison_methods_scalar_not_string  s,    ,

r   c                 C   s`  d| j  d}tjg d|d}g d}t|||}|jdkrtj| krZtg d}n(tg d}t|d ||d |d< t	|| t||tj
}tj| krtg d	}ntg d}t	|| n|jd
krdnd}tjt|d dd}t|d ||d |d< tj||d}t|| t||tj
}tjg d|d}t|| d S )Nrk   rl   r   )NNr/   r   )TTFr   r<   r~   r   rv   rw   rS   )rK   r   r   )rx   r
   r   rn   r   ry   rz   r   r2   r7   r   fulllenr3   )r{   r   rs   r   rY   rA   r   r|   r   r   r   test_comparison_methods_array!  s.    


r   c                 C   sH  | t jju rd}nd}tjt|d& | tjddgdd W d    n1 sP0    Y  tjt|d | tg  W d    n1 s0    Y  | t jju r| tjdtjgt	d | tjdd gt	d ntjt|d( | tjdtjgt	d W d    n1 s0    Y  tjt|d& | tjdd gt	d W d    n1 sV0    Y  tjt|d( | tjdt j
gt	d W d    n1 s0    Y  tjt|d. | tjdtdd	gt	d W d    n1 s0    Y  tjt|d. | tjdtdd	gt	d W d    n1 s:0    Y  d S )
Nz7StringArray requires a sequence of strings or pandas.NAzBUnsupported type '<class 'numpy.ndarray'>' for ArrowExtensionArrayr$   r   r   ZS1r   NaTns)r
   r(   r   r)   r*   rT   r   r   r	   rS   r   Z
datetime64Ztimedelta64)r   r-   r   r   r   test_constructor_raisesB  s(    4,868>r   nar	   c                 C   s>   t jtdt jg}tt jtjd| gdd| d S )Nr   rS   r   )r
   r(   r   r   r   r   r2   r3   )r   r   r   r   r   test_constructor_nan_likec  s    r   r6   TFc           	      C   s   t jdt jgtd}| }t jdtjgtd}|j||| d}|tt	fv rndd l
}||j|| dd}n||}t|| t|| d S )Nr   r   )r   r6   r   TtypeZfrom_pandas)r   r   r	   rS   r6   r
   r   r!   r   r   r   stringr2   r3   r7   )	r6   r   r   Znan_arrZexpected_inputZna_arrrA   par   r   r   r   test_from_sequence_no_mutatek  s    r   c                 C   s   t jg d| d}|d}tjg ddd}t|| t jdt jdg| d}| jdkrdt}d}nt	}d	}t
j||d
 |d W d    n1 s0    Y  d S )N)123r   int64)r    r'      r   r   r   z#cannot convert float NaN to integerzJint\(\) argument must be a string, a bytes-like object or a( real)? numberr$   )r
   r   r>   r   r2   r7   r   r   rT   r+   r)   r*   )r   r,   rA   r   errr-   r   r   r   test_astype_int~  s    

r   c                 C   sF   t jdt jdg| d}|d}t jdt jdgdd}t|| d S )Nr   r   r   Int64r    r   )r
   r   r   r>   r2   r3   r   r,   rA   r   r   r   r   test_astype_nullable_int  s    
r   c                 C   sF   t jdt jdg| d}||}t jdtjdg|d}t|| d S )Nz1.1z3.3r   g?gffffff
@)r
   r=   r   r>   r   r	   r2   r?   )r   Zany_float_dtyper@   rA   r   r   r   r   test_astype_float  s    
r   skipnazNot implemented StringArray.sumc                 C   s.   t jg d|d}|j| d}|dks*J d S )Nr5   r   r   abc)r
   r=   sumr   r   r,   rA   r   r   r   test_reduce  s    r   c                 C   sB   t jg d|d}|j| d}| r0|dks>J nt |s>J d S )N)Nr   Nr   r/   Nr   r   r   )r
   r=   r   isnar   r   r   r   test_reduce_missing  s
    r   methodminmaxc                 C   sZ   t jg d|d}t|| |d}|rD| dkr2dnd}||ksVJ n|t|ju sVJ d S )Nr   r   r/   Nr   r   r   r   r/   )r
   r=   rn   r   r   )r   r   r   r,   rA   r   r   r   r   test_min_max  s    r   boxc           
      C   s   |j |v rB|tju rB|tju r$d}nd}tjjt|d}|| |g d|d}tt	| |}| dkrldnd}	||	ks|J d S )	Nz<'<=' not supported between instances of 'str' and 'NoneType'z0'ArrowStringArray' object has no attribute 'max'rN   r   r   r   r   r/   )
r   r
   r   r)   rP   rQ   r+   rR   rn   r   )
r   r   r   rU   rV   rO   rP   r,   rA   r   r   r   r   test_min_max_numpy  s    

r   c                 C   s   t jdt jg| d}|jdd}t jddg| d}t|| |jtdd}t jddg| d}t|| | j|v r~d}nd}t	j
t|d |jdd W d    n1 s0    Y  d S )	Nr   r   r   )r8   z"Invalid value '1' for dtype stringz3Cannot set non-string value '1' into a StringArray.r$   r    )r
   r   r   Zfillnar2   r3   r   str_r   r)   r*   r+   )r   rV   r,   resr   r-   r   r   r   test_fillna_args  s    
r   c                 C   s   t d}dd lm} tjg d| d}||}|jt|| dd}| jdv rbt	rb|
|}| jdkr|||| }||sJ d S )	Nr   r   r5   r   Tr   )r   r   python)r)   importorskipZpyarrow.computeZcomputer
   r   listlarge_stringr   r   chunked_arraycastr   equals)r   r   Zpcdatar,   r   r   r   r   test_arrow_array  s    



r   z0ignore:Passing a BlockManager:DeprecationWarningc           
      C   s  t d}|r*|dkr*|t jjdd tjg d| d}td|i}||}| j	dkrt|
djd	ksJ n|
djd
ksJ td| | }W d    n1 s0    Y  t|d jtjsJ |d| d}	t||	 |jd t|d ju sJ d S )Nr   r   1infer_string takes precedence over string storager]   r[   r   r   r   r   r   r   string[])r'   r   )r)   r   rR   rP   rQ   r
   r   r   tabler   fieldr   option_context	to_pandas
isinstancer   r   r>   r2   rg   locr   
r   Zstring_storage2rU   Zusing_infer_stringr   r   r   r   rA   r   r   r   r   test_arrow_roundtrip  s&    


&r   c           
      C   s  t d}|r*|dkr*|t jjdd tjg | d}td|i}||}| j	dkrp|
djdksJ n|
djd	ksJ |j|jg | d
g|jd}td| | }W d    n1 s0    Y  t|d jtjsJ |d| d}	t||	 d S )Nr   r   r   r]   r   r   r   r   r   )r   )schemar   r   r   )r)   r   rR   rP   rQ   r
   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r>   r2   rg   r   r   r   r    test_arrow_load_from_zero_chunks  s&    


 &r   c                 C   s   t | dddkrd}nt | dddkr,d}nd}tjdd	dtjg| d
}|jdd}tjg d|g d |dd}t|| |jdd}tjddg|d d |dd}t|| d S )Nr    r   zint64[pyarrow]r   r   r   r   r   r   F)Zdropna)r'   r    r    )r   r    r   countindexr   nameTr'   r    )rn   r
   r   r   value_countsr=   r2   r?   )r   	exp_dtyper,   rA   r   r   r   r   test_value_counts_na0  s    r   c                 C   s   t | dddkrd}nt | dddkr.tj}nd}tjdddtjg| d	}|jd
d}tjddg|d d |ddd }t|| d S )Nr   r   r   zdouble[pyarrow]r   ZFloat64r   r   r   T)	normalizer'   r    Z
proportionr   r   )	rn   r   float64r
   r=   r   r   r2   r?   )r   r   r@   rA   r   r   r   r    test_value_counts_with_normalizeA  s    "r   zvalues, expectedr5   r   r[   )FFTc              	   C   s   t j| |d} d}tjt|d t ddl |  }t|| t |  }t |}t	|| t 
|  }t 
|}t|| W d    n1 s0    Y  W d    n1 s0    Y  d S )Nr   z"use_inf_as_na option is deprecatedr$   zmode.use_inf_as_naT)r
   r   r2   Zassert_produces_warningFutureWarningr   r   r7   r=   r?   r   rg   )valuesr   r   r-   rA   r   r   r   test_use_inf_as_naN  s    	

r   c                 C   sd   | j |v rtd| j   tjg d| d}d|j  k rZ|   krZ|jddk s`n J d S )Nznot applicable for r5   r   r   T)deep)r   r)   skipr
   r=   nbytesZmemory_usage)r   rV   Zseriesr   r   r   test_memory_usageg  s    
r   float_dtypec                 C   s:   t jdg| d}||}t jdg|d}t|| d S )Ng?r   z0.1)r
   r=   r>   r2   r?   )r   r   r@   rA   r   r   r   r   test_astype_from_float_dtyper  s    
r   c                 C   sH   t jdt jdg| d}t|}tjdt| dgtd}t|| d S )Nr   r   r   )r
   r   r   r   r   rS   r2   r7   r   r   r   r   "test_to_numpy_returns_pdna_default{  s    
r   c                 C   sJ   |}t jdt jdg| d}|j|d}tjd|dgtd}t|| d S )Nr   r   r   )na_value)r
   r   r   Zto_numpyr   rS   r2   r7   )r   Znulls_fixturer   r,   rA   r   r   r   r   test_to_numpy_na_value  s
    r   c                 C   s   t jg d| d}|ddg}t g d}t|| |dt jg}t g d}t|| |g }t g d}t|| |d|g}t g d}t|| d S )Nr[   r   r   r/   )TFF)TFTr   )r
   r=   isinr2   r?   r   )r   Zfixed_now_tsrW   rA   r   r   r   r   	test_isin  s    
r   c                 C   s   t jg d| d}tg d}d ||< |jd t|ju s@J t jg d| d}t|jt jju rjd}nd}t	j
t|d d||< W d    n1 s0    Y  d S )Nr5   r   )FTFr    zCannot set non-string valuer#   r$   )r
   r=   r   r   r   r   r   r(   r   r)   r*   r+   )r   r@   maskr-   r   r   r   (test_setitem_scalar_with_mask_validation  s    r   c                 C   sD   g d}t j|t jd}tj|| d}tj|| d}t|| d S Nr5   r   )r   r   r   r
   r2   r3   r   valsr,   rA   r   r   r   r   test_from_numpy_str  s
    r   c                 C   s2   g d}t j|| d}| }|}t|| d S r   )r
   r   tolistr2   Zassert_equalr   r   r   r   test_tolist  s
    r   )J__doc__ry   numpyr   r)   Zpandas.compat.pyarrowr   Zpandas.core.dtypes.commonr   Zpandasr
   Zpandas._testingZ_testingr2   Zpandas.core.arrays.string_arrowr   r   r   Zfixturer   r   r   r"   r.   r4   r9   rB   rM   rX   rZ   r\   rP   rQ   ri   rj   r}   r   r   r   r   Zparametrizer	   r   floatr   r   r   r   r   r   r   r   r   r=   r   r   r   r   filterwarningsr   r   r   r   r   r   Zfloat16Zfloat32r   r   r   r   r   r   r   r   r   r   r   <module>   s   

	


!!$

	






