a
    PfzV                     @   s  d Z ddlmZ ddlZddlZddlmZ ddlm	Z	m
Z
mZ ddlmZ ejdZejdZejdZd	d
 Zdd Zejdddgddgddgdgdgdgdggejdddgdd Zdd Zejdddggdd Zdd Zdd Zejd dge	ejgd!gd"gd#e
dgd$d%d&fdd'ge	ejgd"gd(ejd)gd$d*gd+d&fd$d*ge	ejgd"gd(ejd)gd$d*gd+d&fgd,d- Zejd.i e	d$d/ejd0d1ejd2gg d3d4d5d6ejd7ejd8gd9fg g d:d;d<e	g d=g d3g d>d9fd$gd;d<e	ejd/d?d0d1d@d2gg d3g d>d9fdg g d:ie	d$d/ejd0d1ejd2gg d3d4d5d6ejd7ejd8gd9fgdAdB Z dCdD Z!dEdF Z"dGdH Z#ejdIdJdKgdLdM Z$eejdNdOd!dPgejejgdQdRggfd;dSdPgd@dTgdUdRggfgdVdW Z%edXdY Z&ejdZd!ejd[gd[ejggfd'd!d\d]d[gejejggfgd^d_ Z'd`da Z(dbdc Z)ejdde*dedf e*dg ddegie	e*dee*dggfe*dedh di i e	e*ded!gd?d'ggfe*dedj ddegie	ejd!gfgdkdl Z+dmdn Z,ejdod;d?dpgfdOejdqgfgdrds Z-dtdu Z.eejdvdOd;gdwdx Z/eejdydzd{d|d}d~dgfd~d$difgdd Z0edd Z1dd Z2edd Z3dd Z4dd Z5dS )zg
Tests that NA values are properly handled during
parsing for all of the parsers defined in parsers.py
    )StringION)STR_NA_VALUES)	DataFrameIndex
MultiIndexz=ignore:Passing a BlockManager to DataFrame:DeprecationWarningZpyarrow_xfailZpyarrow_skipc                 C   sn   | }d}| t|}tg ddtjdgtjddggg dd}|jd	kr^d |jd
< d |jd< t|| d S )NzA,B,C
a,b,c
d,,f
,g,h
abcdfghABCcolumnspyarrow)   r   )   r   )	read_csvr   r   npnanengineloctmassert_frame_equalall_parsersparserdataresultexpected r%   ^/var/www/ai-form-bot/venv/lib/python3.9/site-packages/pandas/tests/io/parser/test_na_values.pytest_string_nas   s    


r'   c                 C   st   | }d}t ddgtjdgtjtjggddgd}|jdkrVd |jd	d
gdf< d |jd< |t|}t|| d S )NzA,B
foo,bar
NA,baz
NaN,nan
foobarbazr   r   r   r   r   r   r   r   )	r   r   r   r   r   r   r   r   r   )r    r!   r"   r$   r#   r%   r%   r&   test_detect_string_na-   s     

r,   	na_valuesz-999.0z-999ig     8r"   zA,B
-999,1.2
2,-999
3,4.5
z"A,B
-999,1.200
2,-999.000
3,4.500
c           	      C   s   | }t tjdgdtjgddggddgd}|jdkrtd	d
 |D sd}tjt|d" |jt	||d W d    n1 s0    Y  d S |jdkrd|v rtj
jdd}|| |jt	||d}t|| d S )Ng333333?       @g      @g      @r   r   r   r   c                 s   s   | ]}t |tV  qd S )N)
isinstancestr).0xr%   r%   r&   	<genexpr>_       z,test_non_string_na_values.<locals>.<genexpr>9The 'pyarrow' engine requires all na_values to be stringsmatchr-   z-999.000z4pyarrow engined does not recognize equivalent floatsreason)r   r   r   r   allpytestraises	TypeErrorr   r   markxfailapplymarkerr   r   )	r    r"   r-   requestr!   r$   msgr?   r#   r%   r%   r&   test_non_string_na_values>   s    &0
rD   c                    s   h d}|t ksJ | }t|fdd td fddt|D }ttjttd}|j	|d d}t
|| d S )	N>    zN/Az-NaNNoner   NaNz1.#INDz-1.#INDz#N/A N/AZNULLZNAz#NAz<NA>nullz#N/Az-1.#QNANz1.#QNANz-nanzn/ac                    sf   | dkrd}n| dkr&d dg|  }| | }|  d k rbd dg |  d  }| | }|S )Nr   rE   ,r   )join)ivbufZjoined)nvr%   r&   r      s    z!test_default_na_values.<locals>.f
c                    s   g | ]\}} ||qS r%   r%   )r1   rK   rL   )r   r%   r&   
<listcomp>   r4   z*test_default_na_values.<locals>.<listcomp>)r   index)header)r   lenr   rJ   	enumerater   r   r   ranger   r   r   )r    Z
_NA_VALUESr!   r"   r$   r#   r%   )r   rN   r&   test_default_na_valuesq   s     rV   r*   c                 C   s   | }d}t dtjdgtjdtjgddtjggg dd}|jd	krd
}tjt|d& |jt||dgd W d    n1 s0    Y  d S |jt||dgd}t	
|| d S )Nz3A,B,C
ignore,this,row
1,NA,3
-1.#IND,5,baz
7,8,NaN
      ?            r   r   r   z@skiprows argument must be an integer when using engine='pyarrow'r6   r   )r-   Zskiprowsr   r   r   r   r<   r=   
ValueErrorr   r   r   r   )r    r-   r!   r"   r$   rC   r#   r%   r%   r&   test_custom_na_values   s    (
4r^   c                 C   s|   d}| }| t|}ttjdtjdgtdtjddtjgtdg dd}|jdkrld |jd< d |jd	< t	
|| d S )
Nz1A,B,C
True,False,True
NA,True,False
False,NA,TrueTFdtype)TFTr   r   )r   r   r+   )r   r   r   r   arrayr   objectr   r   r   r   r    r"   r!   r#   r$   r%   r%   r&   test_bool_na_values   s    


rd   c                 C   s   d}| }|j dkrdd}tjt|d, |jt|dgdgdd W d    n1 sV0    Y  d S |jt|dgdgdd}ttjdtjdgtjdtjdgtjdtjdgd	}t	
|| d S )
Nz3A,B,C
foo,bar,NA
bar,foo,foo
foo,bar,NA
bar,foo,foor   z;pyarrow engine doesn't support passing a dict for na_valuesr6   r(   r)   )r   r   r8   r   r   r<   r=   r]   r   r   r   r   r   r   r   r    r"   r!   rC   Zdfr$   r%   r%   r&   test_na_value_dict   s    
:rg   zindex_col,expectedr   rY   )r	   r
   r   r   namerQ   r   )r	   r   )r   r   r
   )namesc                 C   s.   d}| }|j t|t |d}t|| d S )Nza,b,c,d
0,NA,1,5
)r-   	index_col)r   r   setr   r   )r    rl   r$   r"   r!   r#   r%   r%   r&   test_na_value_dict_multi_index   s    rn   zkwargs,expectedr	   r   er   r   r   rX      rY      rZ   onetwothreefivesevenr   )r   r   Fr-   keep_default_nar   r	   rE   r   ro   r   r   )rs   rt   ru   r   rv   rE   rw   rE   r   c           	      C   s   d}| }|j dkrd|v rtt|d trtd}tjt|d& |jt|fi | W d    n1 sf0    Y  d S tj	 }|
| |jt|fi |}t|| d S )NzAA,B,C
a,1,one
b,2,two
,3,three
d,4,nan
e,5,five
nan,6,
g,7,seven
r   r-   ?The pyarrow engine doesn't support passing a dict for na_valuesr6   )r   r/   dictr<   r=   r]   r   r   r?   r@   rA   r   r   )	r    kwargsr$   rB   r"   r!   rC   r?   r#   r%   r%   r&   test_na_values_keep_default  s    .

4

r~   c                 C   sF   d}| }|j t|dd}tg dg dg dd}t|| d S )NzAA,B,C
a,1,None
b,2,two
,3,None
d,4,nan
e,5,five
nan,6,
g,7,seven
F)ry   rz   rp   )rF   rt   rF   r   rv   rE   rw   r   r   r   r   r   r   rc   r%   r%   r&   !test_no_na_values_no_keep_defaultK  s    
r   c                 C   s   d}| }|j dkrbd}tjt|d* |jt|ddgidd W d    n1 sT0    Y  d S |jt|ddgidd}td	gtjgd
}t	
|| d S )Nza,b
,2r   r{   r6   r	   2Frx   rE   r   r	   re   )r    r"   r!   rC   r#   r$   r%   r%   r&   &test_no_keep_default_na_dict_na_valuese  s    
$r   c                 C   s   d}| }|j dkr`d}tjt|d( |jt|ddidd W d    n1 sR0    Y  d S |jt|ddidd}td	gtjgd
}t	
|| d S )Nza,b
1,2r   r{   r6   r	   r   Frx   r   r   re   rf   r%   r%   r&   -test_no_keep_default_na_dict_na_scalar_valuesy  s    
6r   col_zero_na_valuesi Z113125c              
   C   s   d}| }t tjdgtjdgdtjgddgddgd	d
gtjdgd}|jdkrd}tjt|d0 |jt|d dd
dd|dd W d    n1 s0    Y  d S |jt|d dd
dd|dd}t	
|| d S )Nz_113125,"blah","/blaha",kjsdkj,412.166,225.874,214.008
729639,"qwer","",asdfkj,466.681,,252.373
g    ND&AZqwerz/blahaZkjsdkjZasdfkjg-y@g7A`*}@z225.874rE   g-o@)r   r   r   rX   rq   rY   rr   r   r{   r6   Fz214.008Zblah)r   rr   r   r   )rR   ry   r-   r\   )r    r   r"   r!   r$   rC   r#   r%   r%   r&   1test_no_keep_default_na_dict_na_values_diff_reprs  s:    
$r   zna_filter,row_dataTr   rX   r   1r   3c                 C   s>   d}| }|j t|dg|d}t|ddgd}t|| d S )NzA,B
1,A
nan,B
3,C
r   )r-   	na_filterr   r   r   )r    r   row_datar"   r!   r#   r$   r%   r%   r&   !test_na_values_na_filter_override  s
    	r   c              
   C   sf   | }d}| t|}tdddddtjtjtjgdddd	d
tjtjtjggg dd}t|| d S )NzlDate,Currency,Symbol,Type,Units,UnitPrice,Cost,Tax
2012-03-14,USD,AAPL,BUY,1000
2012-05-12,USD,SBUX,SELL,500z
2012-03-14USDZAAPLZBUYi  z
2012-05-12ZSBUXZSELLi  )DateZCurrencySymbolTypeZUnitsZ	UnitPriceZCostZTaxr   )r   r   r   r   r   r   r   r   r%   r%   r&   test_na_trailing_columns  s    r   zna_values,row_datar.   r   rW   c           
      C   s
  | }ddg}d}|j dkrt|trt|tr8t}d}nt}d}tj||d$ |jt|||d W d    n1 sx0    Y  d S |j dkrd}tjt|d$ |jt|||d W d    n1 s0    Y  d S |jt|||d}t	||d	}	t
||	 d S )
Nr   r	   1,2
2,1r   r{   r5   r6   rk   r-   r   )r   r/   r|   r]   r>   r<   r=   r   r   r   r   r   )
r    r-   r   r!   rk   r"   errrC   r#   r$   r%   r%   r&   test_na_values_scalar  s(    	
2
2r   c           	      C   s   | }ddd}|  }ddg}d}tddgtjtjgg|d	}|jd
krd}tjt|d$ |jt	|||d W d    n1 s0    Y  d S |jt	|||d}t
|| t
|| d S )Nr   r   r   r   r	   r   rW   r.   r   r   r{   r6   r   )copyr   r   r   r   r<   r=   r]   r   r   r   r   Zassert_dict_equal)	r    r!   r-   Zna_values_copyrk   r"   r$   rC   r#   r%   r%   r&   test_na_values_dict_aliasing  s    

2r   c                 C   s   d}| }ddi}|j dkrbd}tjt|d" |jt||d W d    n1 sT0    Y  d S |jt||d}tdtjd	gi}t	
|| d S )
Nza
foo
1r   r(   r   r{   r6   r8   r   r   re   )r    r"   r!   r-   rC   r#   r$   r%   r%   r&   test_na_values_dict_col_index  s    
0r   zdata,kwargs,expectedl            rO   l           z,1z
,2z
1c           	      C   s   | }|j dkrfd|v rfd}tjt|d* |jt|fdd i| W d    n1 sX0    Y  d S |j dkrtjjdd}|| |jt|fdd i|}t	
|| d S )Nr   r-   r5   r6   rR   z!Returns float64 instead of objectr9   )r   r<   r=   r>   r   r   r?   r@   rA   r   r   )	r    r"   r}   r$   rB   r!   rC   r?   r#   r%   r%   r&   test_na_values_uint64.  s    8

r   c                 C   sH   d}| }t ddgitdgddd}|jt|dd	d
}t|| d S )Nza,1
b,2r   r   r	   r   rh   rj   r   F)rl   ry   )r   r   r   r   r   r   )r    r"   r!   r$   r#   r%   r%   r&   *test_empty_na_values_no_default_with_indexK  s
    r   zna_filter,index_data5g      @c           	      C   sz   | }d}|j dkr2|du r2tjjdd}|| tddgdd	gd
t|ddd}|jt|dg|d}t	
|| d S )Na,b,c
1,,3
4,5,6r   Fzmismatched index resultr9   r   rq   rX   rr   )r   r
   r	   rh   rj   )rl   r   )r   r<   r?   r@   rA   r   r   r   r   r   r   )	r    r   Z
index_datarB   r!   r"   r?   r$   r#   r%   r%   r&   test_no_na_filter_on_indexU  s    
"r   c                 C   s\   | }d}|j t|dgddgd}tdtjgdtjgdtd	d
gddd}t|| d S )Nzidx,col1,col2
1,3,4
2,inf,-infr   infz-inf)rl   r-   rX   rq   )col1col2r   r   idxrh   rj   )r   r   r   r   r   r   r   r   )r    r!   r"   outr$   r%   r%   r&   !test_inf_na_values_with_int_indexh  s    "r   r   c                 C   sV   | }d}|rt jnd}tddg|dgddgd}|jt||td	}t|| d S )
Nr   rE   r   4r   r   6r   )r   r`   )r   r   r   r   r   r0   r   r   )r    r   r!   r"   emptyr$   r#   r%   r%   r&   +test_na_values_with_dtype_str_and_na_filteru  s    r   zdata, na_values)zfalse,1
,1
trueN)zfalse,1
null,1
trueN)zfalse,1
nan,1
trueN)false,1
foo,1
truer(   r   r(   c                 C   sd   | }d ddg}tjt|d0 |jt|d ddgddi|d W d    n1 sV0    Y  d S )	N|z(Bool column has NA values in column [0a]zRcannot safely convert passed user dtype of bool for object dtyped data in column 0r6   r   r	   bool)rR   rk   r`   r-   )rJ   r<   r=   r]   r   r   )r    r"   r-   r!   rC   r%   r%   r&   !test_cast_NA_to_bool_raises_error  s    r   c                 C   sb   | }d}|j t|d g dtttdd }tddgddgdd	gdd
dgd}t|| d S )NzDFile: small.csv,,
10010010233,0123,654
foo,,bar
01001000155,4530,898)r   r   Zcol3)rR   rk   r`   Z10010010233Z01001000155Z0123Z4530Z654Z898r   rX   rj   )r   r   r0   Zdropnar   r   r   r   r%   r%   r&   test_str_nan_dropped  s     

	r   c                 C   s   | }d}|j dkrhd}tjt|d0 |jt|ttdddid W d    n1 sZ0    Y  d S |jt|ttdddid}td	gdgt	j
gd
}t|| d S )NzA,B,B
X,Y,Z
1,2,infr   r{   r6   r   r   Zr   )rR   r-   r   ))r   X)r   Yr   )r   r<   r=   r]   r   r   listrU   r   r   r   r   r   )r    r!   r"   rC   r#   r$   r%   r%   r&   test_nan_multi_index  s&    
$r   c                 C   sL   | }d}t jtdd" |jt|dd W d    n1 s>0    Y  d S )N0
NaN
True
False
z	NA valuesr6   r   r_   r<   r=   r]   r   r   r    r!   r"   r%   r%   r&   test_bool_and_nan_to_bool  s    r   c                 C   sL   | }d}t jtdd" |jt|dd W d    n1 s>0    Y  d S )Nr   zconvert|NoneTyper6   intr_   r   r   r%   r%   r&   test_bool_and_nan_to_int  s    r   c                 C   s@   | }d}|j t|dd}tdtjddgi}t|| d S )Nr   floatr_   0rW   g        )r   r   r   	from_dictr   r   r   r   r   r%   r%   r&   test_bool_and_nan_to_float  s
    r   )6__doc__ior   numpyr   r<   Zpandas._libs.parsersr   Zpandasr   r   r   Zpandas._testingZ_testingr   r?   filterwarningsZ
pytestmarkZusefixturesZxfail_pyarrowZskip_pyarrowr'   r,   ZparametrizerD   rV   r^   rd   rg   r   from_tuplesrn   r~   r   r   r   r   r   r   r   r   r   r0   r   r   r   r   r   r   r   r   r   r   r   r%   r%   r%   r&   <module>   s8  0
$


-

'

( 





