a
    Pf                     @   s  d Z ddlZddlmZ ddlmZ ddlZddlZddlZ	ddl
Z
ddlmZ ddlmZ ddlmZ ddlmZmZmZ ddlZddlmZ dd	lmZ dd
lmZmZmZm Z m!Z! zddl"Z"dZ#W n e$y   dZ#Y n0 zddl%Z%dZ&W n e$y   dZ&Y n0 e
j'(de
j'(dgZ)e
j*e
j+de
j'j,e& pHeddddkddde
j+de
j'j,e# dddgddd Z-e
j*dd Z.e
j*dd Z/e
j*dd  Z0e
j*d!d" Z1e
j*d#d$ Z2e
j*ej3ej4j5ej3ej4j6ej3ej4j7ej8d%d&ej8d'd&ej8d(d&ej8d)d&gdd*d+ Z9dKd-d.Z:d/d0 Z;d1d2 Z<d3d4 Z=d5d6 Z>d7d8 Z?d9d: Z@d;d< ZAd=d> ZBd?d@ ZCdAdB ZDG dCdD dDZEG dEdF dFeEZFG dGdH dHeEZGG dIdJ dJeEZHdS )Lz test parquet compat     N)Decimal)BytesIO)using_copy_on_write)_get_option)is_platform_windows)pa_version_under11p0pa_version_under13p0pa_version_under15p0)Version)FastParquetImplPyArrowImpl
get_engineread_parquet
to_parquetTFz2ignore:DataFrame._data is deprecated:FutureWarningz=ignore:Passing a BlockManager to DataFrame:DeprecationWarningfastparquetmode.data_managerZsilentarrayz4fastparquet is not installed or ArrayManager is usedreason)Zmarkspyarrowpyarrow is not installed)paramsc                 C   s   | j S Nparamrequest r   U/var/www/ai-form-bot/venv/lib/python3.9/site-packages/pandas/tests/io/test_parquet.pyengine;   s    r    c                   C   s   t std dS )Nr   r   )_HAVE_PYARROWpytestskipr   r   r   r   paQ   s    
r$   c                   C   s.   t std ntddddkr*td dS )Nzfastparquet is not installedr   Tr   r   z.ArrayManager is not supported with fastparquetr   )_HAVE_FASTPARQUETr"   r#   r   r   r   r   r   fpX   s
    
r&   c                   C   s   t g dddS )N         fooAB)pd	DataFramer   r   r   r   	df_compata   s    r1   c               
   C   sB   t tdttddtjddddg dt jd	d
dd} | S )Nabcr(            @      @float64dtypeTFT20130101r*   periods)abdef)r/   r0   listrangenparange
date_range)dfr   r   r   df_cross_compatf   s    rH   c                   C   s   t tddtjdgg dg dg dttddtd	d
dtjdddddtjdgg dt jdd	dt 	dt j
t 	dgdS )Nr2   r=   cr=   NrI   )   foo   bars   bazr+   barbazr(   r3   r*      u1r4   r5   r6   r7          @      @r9   r:   r;   Z20130103)stringstring_with_nanZstring_with_nonebytesunicodeintZuintfloatZfloat_with_nanbooldatetimedatetime_with_nat)r/   r0   rB   rD   nanrC   rE   astyperF   	TimestampZNaTr   r   r   r   df_fullx   s$    

r`   z2019-01-04T16:41:24+0200z%Y-%m-%dT%H:%M:%S%zz2019-01-04T16:41:24+0215z2019-01-04T16:41:24-0200z2019-01-04T16:41:24-0215c                 C   s   | j S r   r   r   r   r   r   timezone_aware_date_list   s    ra   r)   c
                    s   p
ddipi du r |r4|d< |d<  fdd}
du rt  |
|	 W d   q1 s|0    Y  n|
|	 dS )a  Verify parquet serializer and deserializer produce the same results.

    Performs a pandas to disk and disk to pandas round trip,
    then compares the 2 resulting DataFrames to verify equality.

    Parameters
    ----------
    df: Dataframe
    engine: str, optional
        'pyarrow' or 'fastparquet'
    path: str, optional
    write_kwargs: dict of str:str, optional
    read_kwargs: dict of str:str, optional
    expected: DataFrame, optional
        Expected deserialization result, otherwise will be equal to `df`
    check_names: list of str, optional
        Closed set of column names to be compared
    check_like: bool, optional
        If True, ignore the order of index & columns.
    repeat: int, optional
        How many times to repeat the test
    compressionNr    c                    sZ   t | D ]L}jfi  tfi }dv r@d jd< tj| d qd S )NrU   r(   rU   )check_names
check_likecheck_dtype)rC   r   r   loctmassert_frame_equal)repeat_actualrf   re   rd   rG   expectedpathread_kwargswrite_kwargsr   r   compare   s    
z!check_round_trip.<locals>.compare)rh   ensure_clean)rG   r    ro   rq   rp   rn   rd   re   rf   rj   rr   r   rm   r   check_round_trip   s    "
(rt   c                 C   s0   ddl m} |j| dd}|jjj|ks,J dS )zCheck partitions of a parquet file are as expected.

    Parameters
    ----------
    path: str
        Path of the dataset.
    expected: iterable of str
        Expected partition names.
    r   NZhive)partitioning)Zpyarrow.datasetdatasetru   schemanames)ro   rn   Zdsrv   r   r   r   check_partition_names   s    
ry   c                 C   sB   d}t jt|d t| dd W d    n1 s40    Y  d S )Nz.engine must be one of 'pyarrow', 'fastparquet'matchr+   rN   )r"   raises
ValueErrorrt   )r1   msgr   r   r   test_invalid_engine   s    r   c                 C   s8   t dd t|  W d    n1 s*0    Y  d S )Nio.parquet.enginer   r/   option_contextrt   )r1   r$   r   r   r   test_options_py   s    r   c                 C   s8   t dd t|  W d    n1 s*0    Y  d S )Nr   r   r   )r1   r&   r   r   r   test_options_fp   s    r   c                 C   s8   t dd t|  W d    n1 s*0    Y  d S )Nr   autor   )r1   r&   r$   r   r   r   test_options_auto  s    r   c                 C   sV  t tdtsJ t tdts$J tddF t tdtsDJ t tdtsVJ t tdtshJ W d    n1 s|0    Y  tddF t tdtsJ t tdtsJ t tdtsJ W d    n1 s0    Y  tddL t tdts
J t tdtsJ t tdts2J W d    n1 sH0    Y  d S )Nr   r   r   r   )
isinstancer   r   r   r/   r   )r&   r$   r   r   r   test_options_get_engine  s    00r   c                  C   s  ddl m}  | d}| d}ts(dnttjt|k }tsBdnttjt|k }to\| }tof| }|s~|s~|rd| d}t	j
t|d td	 W d    q1 s0    Y  n:d
}t	j
t|d td	 W d    n1 s0    Y  |rBd| d}t	j
t|d td	 W d    n1 s60    Y  n<d}t	j
t|d td	 W d    n1 st0    Y  d S )Nr   )VERSIONSr   r   FzPandas requires version .z. or newer of .pyarrow.rz   r   z%Missing optional dependency .pyarrow.z. or newer of .fastparquet.z)Missing optional dependency .fastparquet.)Zpandas.compat._optionalr   getr!   r
   r   __version__r%   r   r"   r|   ImportErrorr   )r   Z
pa_min_verZ
fp_min_verZhave_pa_bad_versionZhave_fp_bad_versionZhave_usable_paZhave_usable_fpr{   r   r   r   "test_get_engine_auto_error_message   s8    



(&*r   c                 C   s~   | }t  ^}|j||d d t||d}t || t||ddgd}t ||ddg  W d    n1 sp0    Y  d S N)r    rb   r    r=   r?   )r    columnsrh   rs   r   r   ri   rH   r$   r&   rG   ro   resultr   r   r   test_cross_engine_pa_fpK  s    
r   c                 C   s~   | }t  ^}|j||d d t||d}t || t||ddgd}t ||ddg  W d    n1 sp0    Y  d S r   r   r   r   r   r   test_cross_engine_fp_paY  s    
r   c              	   C   s   t dg di}d}t N}tjt|ddd |||  W d    n1 sT0    Y  W d    n1 sr0    Y  d S )Nr=   r'   zqStarting with pandas version 3.0 all arguments of to_parquet except for the argument 'path' will be keyword-only.F)r{   Zcheck_stacklevelZraise_on_extra_warnings)r/   r0   rh   rs   assert_produces_warningFutureWarningr   )r    rG   r~   ro   r   r   r   !test_parquet_pos_args_deprecationf  s    
r   c                   @   s4   e Zd Zdd Zdd Zejjejjdd Z	dS )Basec              	   C   sj   t  N}tj||d  t|||d d W d    n1 s>0    Y  W d    n1 s\0    Y  d S )Nrz   rb   )rh   rs   r"   r|   r   )selfrG   r    excerr_msgro   r   r   r   check_error_on_writex  s    
zBase.check_error_on_writec              	   C   sf   t  J}t |  t|||d d W d    n1 s:0    Y  W d    n1 sX0    Y  d S )Nr   )rh   rs   external_error_raisedr   )r   rG   r    r   ro   r   r   r   check_external_error_on_write~  s    
z"Base.check_external_error_on_writec                 C   sr   |dkrt | t|dddddd*}|j| d t|j}W d    n1 sX0    Y  t|| d S )	Nr   iodataparquetzsimple.parquetrb)mode)content)	r"   importorskipopenZserve_contentreadr   urlrh   ri   )r   Z
httpserverdatapathr1   r    rA   rG   r   r   r   test_parquet_read_from_url  s    
(zBase.test_parquet_read_from_urlN)
__name__
__module____qualname__r   r   r"   marknetwork
single_cpur   r   r   r   r   r   w  s
   r   c                   @   s   e Zd Zdd Zdd Zejdg ddd Zd	d
 Z	dd Z
dd Zdd Zdd Zdd Zdd Zdd Zdd Zdd Zdd Zdd  Zejd!g d"d#d$ Zd%S )&	TestBasicc                 C   sF   t g dddt dtg dfD ]}d}| ||t| q(d S )Nr'   r(   r+   r:   z+to_parquet only supports IO with DataFrames)r/   Seriesr_   rD   r   r   r}   )r   r    objr~   r   r   r   
test_error  s    zTestBasic.test_errorc                 C   s6   t tdttddd}ddg|_t|| d S )Nr2   r(   r3   rT   rX   r+   rN   )r/   r0   rB   rC   r   rt   )r   r    rG   r   r   r   test_columns_dtypes  s    
zTestBasic.test_columns_dtypesrb   )NgzipZsnappybrotlic                 C   s(   t dg di}t||d|id d S )Nr-   r'   rb   rq   r/   r0   rt   )r   r    rb   rG   r   r   r   test_compression  s    zTestBasic.test_compressionc                 C   sJ   t tdttddd}t dtdi}t|||ddgid d S )Nr2   r(   r3   r   rT   r   rn   rp   r/   r0   rB   rC   rt   )r   r    rG   rn   r   r   r   test_read_columns  s
    zTestBasic.test_read_columnsc              	   C   sX   t ttdtdd}t dddgi}t||||ddgid	gdgd
dd d S )Nr3   Zaabb)rX   partrX   r   r(   partition_colsr   )r   ==r=   )filtersr   )ro   rn   rq   rp   rj   r   )r   r    tmp_pathrG   rn   r   r   r   test_read_filters  s    
zTestBasic.test_read_filtersc                 C   s   |dk}|r(|dkr(| tjjdd tdg di}t|| g dtjddd	td
g dg}|D ]2}||_	t
|tjr|j	d |_	t|||d qjg d|_	d|j	_t|| d S )Nr   zfastparquet write into indexr   r-   r'   )r)   r*   r3   r:   r*   r;   r2   )r(   r*   r3   )rd   )r   r(   r)   r+   )applymarkerr"   r   xfailr/   r0   rt   rF   rB   indexr   ZDatetimeIndex
_with_freqname)r   r    r   r   rd   rG   Zindexesr   r   r   r   test_write_index  s(    

zTestBasic.test_write_indexc                 C   s:   |}t dg di}t jg d}||_t|| d S )Nr-   r'   )r=   r(   )r=   r)   )r>   r(   )r/   r0   
MultiIndexfrom_tuplesr   rt   )r   r$   r    rG   r   r   r   r   test_write_multiindex  s
    zTestBasic.test_write_multiindexc                 C   s   |}t jdddd}t jtjddt| dftdd}t j	j
d	d
g|gddgd}|jd d}||fD ]4}||_t|| t||dddgi|ddg d qpd S )Nz01-Jan-2018z01-Dec-2018ZMS)freqr)   r*   ABCr   Level1Level2leveldate)rx   r   r-   r.   rp   rn   )r/   rF   r0   rD   randomdefault_rngstandard_normallenrB   r   Zfrom_productcopyr   rt   )r   r$   r    datesrG   Zindex1index2r   r   r   r   test_multiindex_with_columns  s     
z&TestBasic.test_multiindex_with_columnsc                 C   s   t g dg dd}d dd}|jdd}t||||d t jg dg ddg d	d
}t||||d g dg dg}t jttddd tdD d|d
}|jdd}t||||d d S )Nr'   )qrs)r=   r>   F)rb   r   T)droprq   rn   )ZzyxZwvuZtsrr   rN   rN   rO   rO   r+   r+   quxr   onetwor   r   r   r   r   r      c                 S   s   g | ]
}| qS r   r   ).0ir   r   r   
<listcomp>      z7TestBasic.test_write_ignoring_index.<locals>.<listcomp>)r   r   )r/   r0   Zreset_indexrt   rB   rC   )r   r    rG   rq   rn   arraysr   r   r   test_write_ignoring_index  s     
 z#TestBasic.test_write_ignoring_indexc                 C   s\   t jg d}t jtjdd|d}|dkrF| ||t	d n|dkrXt
|| d S )Nr   r)   )r3   r*   r   r   Column name must be a stringr   )r/   r   r   r0   rD   r   r   r   r   	TypeErrorrt   )r   r    Z
mi_columnsrG   r   r   r   test_write_column_multiindex  s    z&TestBasic.test_write_column_multiindexc                 C   sh   g dg dg}t jtjdd|d}ddg|j_|dkrR| ||t	d	 n|d
krdt
|| d S )Nr   )r(   r)   r(   r)   r(   r)   r(   r)   r)   r   r   r   r   r   r   zColumn namer   )r/   r0   rD   r   r   r   r   rx   r   r}   rt   r   r    r   rG   r   r   r   &test_write_column_multiindex_nonstring)  s    z0TestBasic.test_write_column_multiindex_nonstringc                 C   sJ   |}g dg dg}t jtjdd|d}ddg|j_t|| d S )Nr   r   r)   r   r   Z	ColLevel1Z	ColLevel2)	r/   r0   rD   r   r   r   r   rx   rt   r   r$   r    r   rG   r   r   r   #test_write_column_multiindex_string:  s    z-TestBasic.test_write_column_multiindex_stringc                 C   s>   |}g d}t jtjdd|d}d|j_t|| d S )N)rN   rO   r+   r   r)   r   r3   r   Z	StringCol)	r/   r0   rD   r   r   r   r   r   rt   r   r   r   r   test_write_column_index_stringK  s    z(TestBasic.test_write_column_index_stringc                 C   sT   g d}t jtjdd|d}d|j_|dkrF| ||t	d n
t
|| d S )Nr(   r)   r*   r3   r)   r   r   ZNonStringColr   r   )r/   r0   rD   r   r   r   r   r   r   r   rt   r   r   r   r   !test_write_column_index_nonstringY  s    z+TestBasic.test_write_column_index_nonstringc           
      C   s  t d}|dkr*t jjdd}|| ttg ddtg ddtg dtg d	tg d
dtg ddtg ddd}t	 6}|
|| t||d}t||dd}W d    n1 s0    Y  |d jtdksJ ttjg dddtjg dddtjg dddtjg d	ddtjg d
ddtjg dddtjg dddd}	|dkr|jddd}|	jddd}	t||	 d S )Nzpyarrow.parquetr   z.Fastparquet nullable dtype support is disabledr   r(   r)   r*   NZint64Zuint8)r=   r>   rI   N)TFTNr   )      ?rR   rS   NZfloat32r6   )r=   r>   rI   r?   r@   rA   gr   numpy_nullabler    dtype_backendr=   Int64r7   UInt8rT   booleanZFloat32Float64rI   r(   )Zaxis)r"   r   r   r   r   r   tabler   rh   rs   write_tabler   r8   rD   r/   r0   r   ri   )
r   r    r   pqr   r	  ro   Zresult1Zresult2rn   r   r   r   test_dtype_backendi  sF    


,
zTestBasic.test_dtype_backendr8   )	r  r  r  objectzdatetime64[ns, UTC]rY   z	period[D]r  rT   c                 C   sT   t dt jg |di}d }|dkr<t dt jg ddi}t||ddi|d d S )Nvaluer7   rY   r  r  r  r   )r/   r0   r   rt   )r   r$   r8   rG   rn   r   r   r   test_read_empty_array  s    zTestBasic.test_read_empty_arrayN)r   r   r   r   r   r"   r   parametrizer   r   r   r   r   r   r   r   r   r   r   r   r  r  r   r   r   r   r     s*   
		 1r   c                   @   s  e Zd Zdd Zdd Zdd Zdd Zd	d
 Zdd Zdd Z	e
jje dde
jje dde
jdeejgdd Zdd Ze
jjdd Ze
jjdd Ze
jje
jddgg gdd Zdd  Zd!d" Zd#d$ Zd%d& Ze
jjded'd( gd)d*gd+d,d- Zd.d/ Zd0d1 Z d2d3 Z!d4d5 Z"d6d7 Z#d8d9 Z$d:d; Z%d<d= Z&d>d? Z'd@dA Z(dBdC Z)dDdE Z*dFdG Z+dHdI Z,dJdK Z-e
jje.dLddMdN Z/dOdP Z0dQS )RTestParquetPyArrowc                 C   s@   |}t jdddd}|d }||d< g d|d< t|| d S )Nr:   r*   Europe/Brusselsr<   tzdatetime_tzTNTbool_with_none)r/   rF   r   rt   )r   r$   r`   rG   dtir   r   r   
test_basic  s    
zTestParquetPyArrow.test_basicc                 C   s<   |}t jdddd|d< t|||ddg dddgid	 d S )
Nr:   r*   r  r  r  rT   rX   r   r   )r/   rF   rt   )r   r$   r`   rG   r   r   r   test_basic_subset_columns  s    

z,TestParquetPyArrow.test_basic_subset_columnsc                 C   sL   |j |d}t|tsJ t|}t|}| }d |jd< t|| d S )Nr   rc   )	r   r   rV   r   r   r   rg   rh   ri   )r   r$   r`   Z	buf_bytesZ
buf_streamresrn   r   r   r   *test_to_bytes_without_path_or_buf_provided  s    
z=TestParquetPyArrow.test_to_bytes_without_path_or_buf_providedc                 C   s8   t jtdddtdd }| ||td d S )N   r3   r*   aaar   zDuplicate column names found	r/   r0   rD   rE   ZreshaperB   r   r   r}   r   r$   rG   r   r   r   test_duplicate_columns  s    $z)TestParquetPyArrow.test_duplicate_columnsc                 C   s&   t dt jdddi}t|| d S )Nr=   1 dayr*   r;   )r/   r0   timedelta_rangert   r   r   r   r   test_timedelta  s    z!TestParquetPyArrow.test_timedeltac                 C   s&   t dg di}| ||tj d S )Nr=   r=   r(   rR   )r/   r0   r   r   ArrowExceptionr   r   r   r   test_unsupported  s    z#TestParquetPyArrow.test_unsupportedc                 C   sF   t jddt jd}tj|dgd}tr8| ||tj n
t	|| d S )Nr)   
   r7   fp16r   r   )
rD   rE   float16r/   r0   r	   r   r   r&  rt   )r   r$   r   rG   r   r   r   test_unsupported_float16  s
    z+TestParquetPyArrow.test_unsupported_float16zqPyArrow does not cleanup of partial files dumps when unsupported dtypes are passed to_parquet function in windowsr   zfloat16 works on 15	path_typec              	   C   s   t jddt jd}tj|dgd}t b}||}ttj	 |j
||d W d    n1 sd0    Y  tj|r~J W d    n1 s0    Y  d S )Nr)   r(  r7   r)  r*  )ro   r    )rD   rE   r+  r/   r0   rh   rs   r   r   r&  r   osro   isfile)r   r$   r-  r   rG   Zpath_strro   r   r   r    test_unsupported_float16_cleanup  s    
,z3TestParquetPyArrow.test_unsupported_float16_cleanupc                 C   sd   t  }t td|d< t jg dt g dd|d< t jg dg dd	d
|d< t|| d S )NZabcdefr=   )rN   r+   r+   rN   NrN   rM   r7   r>   )r=   r>   rI   r=   rI   r>   )r>   rI   r?   T)
categoriesorderedrI   )r/   r0   CategoricalrB   ZCategoricalDtypert   r   r   r   r   test_categorical  s    

z#TestParquetPyArrow.test_categoricalc                 C   s@   t d}|jf i |}d|i}t|||j d||d d S )Ns3fs
filesystem/pyarrow.parquetro   rp   rq   )r"   r   ZS3FileSystemrt   r   )r   r1   s3_public_bucketr$   s3sor5  Zs3kwr   r   r   test_s3_roundtrip_explicit_fs$  s    

z0TestParquetPyArrow.test_s3_roundtrip_explicit_fsc                 C   s(   d|i}t ||d|j d||d d S )Nstorage_optionss3://r7  r8  rt   r   )r   r1   r9  r$   r:  r   r   r   test_s3_roundtrip1  s    z$TestParquetPyArrow.test_s3_roundtrippartition_colr-   c              
   C   sr   t d | }|r@|t|tj}d}|| |||< t|||d|j	 dd|i|d |dddd	 d S )
Nr5  categoryr>  z/parquet_dirr=  )r   rb   r=  Tr(   )rn   ro   rp   rq   re   rj   )
r"   r   r   r^   dictfromkeysrD   Zint32rt   r   )r   r1   r9  r$   rA  r:  Zexpected_dfZpartition_col_typer   r   r   test_s3_roundtrip_for_dir=  s*    
z,TestParquetPyArrow.test_s3_roundtrip_for_dirc                 C   s2   t d t }|| t|}t|| d S )Nr   )r"   r   r   r   r   rh   ri   )r   r1   bufferZdf_from_bufr   r   r   test_read_file_like_obj_supportd  s
    

z2TestParquetPyArrow.test_read_file_like_obj_supportc                 C   s   t d |dd |dd t jtdd td W d    n1 sN0    Y  t jtdd |d W d    n1 s0    Y  d S )Nr   HOMEZTestingUserUSERPROFILEz.*TestingUser.*rz   z~/file.parquet)r"   r   Zsetenvr|   OSErrorr   r   )r   r1   Zmonkeypatchr   r   r   test_expand_userk  s    
&z#TestParquetPyArrow.test_expand_userc                 C   s>   ddg}|}|j ||d d t|| t|j|jks:J d S )NrZ   rX   r   rb   r   ry   r   shape)r   r   r$   r`   r   rG   r   r   r   test_partition_cols_supportedt  s
    
z0TestParquetPyArrow.test_partition_cols_supportedc                 C   s@   d}|g}|}|j ||d d t|| t|j|jks<J d S )NrZ   rL  rM  )r   r   r$   r`   r   partition_cols_listrG   r   r   r   test_partition_cols_string|  s    
z-TestParquetPyArrow.test_partition_cols_stringc                 C   s   | S r   r   )xr   r   r   <lambda>  r   zTestParquetPyArrow.<lambda>rT   zpathlib.Path)Zidsc           	      C   s<   d}|g}|}||}|j ||d t|j|jks8J d S )Nr.   )r   )r   r   rN  )	r   r   r$   r1   r-  r   rP  rG   ro   r   r   r   test_partition_cols_pathlib  s    z.TestParquetPyArrow.test_partition_cols_pathlibc                 C   s   t jg g d}t|| d S )N)r   r   r   r   r   r   r   test_empty_dataframe  s    z'TestParquetPyArrow.test_empty_dataframec                 C   sV   dd l }tdddgi}||jd| dg}|t}t||d|i|d d S )Nr   rR  r(   )typerw   r   )	r   r/   r0   rw   fieldZbool_r^   rZ   rt   )r   r$   r   rG   rw   Zout_dfr   r   r   test_write_with_schema  s
    
z)TestParquetPyArrow.test_write_with_schemac                 C   sz   t d ttjg dddtjg dddtjg dddd}t|| td	tjg d
ddi}t|| d S )Nr   r'   r  r7   ZUInt32rJ   rT   r=   r>   rI   r=   r   )r"   r   r/   r0   r   rt   r   r   r   r    test_additional_extension_arrays  s    

z3TestParquetPyArrow.test_additional_extension_arraysc              	   C   sr   t d tdtjg dddi}td|, t|||d| dd	 W d    n1 sd0    Y  d S )
Nr   r=   rJ   string[pyarrow]r7   string_storagezstring[]rn   )r"   r   r/   r0   r   r   rt   r^   )r   r$   r\  rG   r   r   r    test_pyarrow_backed_string_array  s    
z3TestParquetPyArrow.test_pyarrow_backed_string_arrayc                 C   sV   t d ttjg dtjddddtjtjddddd}t	|| d S )	Nr   ))r   r(   )r(   r)   )r*   r3   z
2012-01-01r*   D)r<   r   r3   )rI   r?   r@   )
r"   r   r/   r0   ZIntervalIndexr   period_rangeZfrom_breaksrF   rt   r   r   r   r   test_additional_extension_types  s    

z2TestParquetPyArrow.test_additional_extension_typesc                 C   s4   d}t dt jddddi}t||d|id d S )	Nz2.6r=   z
2017-01-01Z1nsr(  r   r<   versionr   )r/   r0   rF   rt   )r   r$   verrG   r   r   r   test_timestamp_nanoseconds  s    z-TestParquetPyArrow.test_timestamp_nanosecondsc                 C   sP   |j tjjkr"|tjjdd d|g }tj	|d|id}t
||dd d S )Nzitemporary skip this test until it is properly resolved: https://github.com/pandas-dev/pandas/issues/37286r      index_as_colr   r   F)rf   )tzinfor[   timezoneutcr   r"   r   r   r/   r0   rt   )r   r   r$   ra   idxrG   r   r   r   test_timezone_aware_index  s    
z,TestParquetPyArrow.test_timezone_aware_indexc                 C   sz   t d tdttdi}t .}|j||d t	||dgd}W d    n1 s\0    Y  t
|dksvJ d S )Nr   r=   r*   r   r=   r   r   r   r(   )r"   r   r/   r0   rB   rC   rh   rs   r   r   r   )r   r$   rG   ro   r   r   r   r   test_filter_row_groups  s    

.z)TestParquetPyArrow.test_filter_row_groupsc                 C   s   t jtjddg dd}t (}|j||d t	||}W d    n1 sV0    Y  |r|t
|jt jjjsJ nt
|jt jjjsJ d S )Nr)   )r(  r*   )r-   r.   Cr   r   )r/   r0   rD   r   r   r   rh   rs   r   r   r   Z_mgrcoreZ	internalsZArrayManagerZBlockManager)r   r$   Zusing_array_managerrG   ro   r   r   r   r   test_read_parquet_manager  s    
(z,TestParquetPyArrow.test_read_parquet_managerc                 C   s   dd l }|}tjdddd}|d }||d< g d|d< |j|}|jtjd	}tr|d
 	d|d
< |d 	d|d< |d 	t|j
ddd|d< t||ddi|d d S )Nr   r:   r*   r  r  r  r  r  )Ztypes_mapperr[   ztimestamp[us][pyarrow]r\   us)unitr  r  r   r    rp   rn   )r   r/   rF   r   TableZfrom_pandasZ	to_pandasZ
ArrowDtyper   r^   	timestamprt   )r   r$   r`   r   rG   r  Zpa_tablern   r   r   r   &test_read_dtype_backend_pyarrow_config  s,    
z9TestParquetPyArrow.test_read_dtype_backend_pyarrow_configc                 C   sn   t jdddgit jddgdddd	}| }d
d l}t|jtdkrV|jd|_t	||ddi|d d S )Nr=   r(   r)   r*   r3   testr   zint64[pyarrow])r   r8   r   z11.0.0r  r   rw  )
r/   r0   Indexr   r   r
   r   r   r^   rt   )r   r$   rG   rn   r   r   r   r   ,test_read_dtype_backend_pyarrow_config_index  s    z?TestParquetPyArrow.test_read_dtype_backend_pyarrow_config_indexc                 C   s   t tdttddd}ddg|_t|| ddg|_tjtdd	 t|| W d    n1 sj0    Y  t		d
ddddt		d
ddddg|_t|| d S )Nr2   r(   r3   r   r   rK   rL   z|S3rz     )
r/   r0   rB   rC   r   rt   r"   r|   NotImplementedErrorr[   r   r   r   r   test_columns_dtypes_not_invalid*  s    


(z2TestParquetPyArrow.test_columns_dtypes_not_invalidc                 C   s(   t jt jg dddd}t|| d S )NrY  custom namer|  r   r/   r0   r}  rt   r   r   r   r   test_empty_columns>  s    z%TestParquetPyArrow.test_empty_columnsc                 C   sR   |d }t jddgid}ddi|_|j||d t||d}|j|jksNJ d S )Nztest_df_metadata.pr(   r   Ztest_attributer   )r/   r0   attrsr   r   )r   r   r$   ro   rG   Znew_dfr   r   r   test_df_attrs_persistenceC  s    
z,TestParquetPyArrow.test_df_attrs_persistencec                 C   s   |d }t jdddgiddgd}|j|dd t d	d
 t|dd}W d    n1 s^0    Y  t jdddgidt jddgddd}t|| d S )Nztest_string_inference.pr=   rR  yr>   )r   r   r   r   future.infer_stringTstring[pyarrow_numpy]r7   )r   r8   r   )r/   r0   r   r   r   r}  rh   ri   r   r   r$   ro   rG   r   rn   r   r   r   test_string_inferenceK  s    *
z(TestParquetPyArrow.test_string_inferenceznot supported before 11.0c                 C   st   dd l }|d }tjdtdgidd}|j||d|dfgd t|}tjdd	gid
d}t	|| d S )Nr   z	decimal.pr=   z123.00r[  r7   rg  )rw   Z123zstring[python])
r   r/   r0   r   r   rw   Z
decimal128r   rh   ri   r  r   r   r   test_roundtrip_decimalY  s     z)TestParquetPyArrow.test_roundtrip_decimalc                 C   s   dd l }dd lm} |d }|d|g d| i}||| tdd t	|}W d    n1 sp0    Y  tj
dg didtjdgddd	}t|| d S )
Nr   zlarge_string.pr=   )Nr>   rI   r  Tr  r7   )r   r8   r   )r   Zpyarrow.parquetr   r	  r   Zlarge_stringr
  r/   r   r   r0   r}  rh   ri   )r   r   r$   r  ro   r	  r   rn   r   r   r   #test_infer_string_large_string_typee  s    &
z6TestParquetPyArrow.test_infer_string_large_string_typeN)1r   r   r   r  r  r  r!  r$  r'  r,  r"   r   r   r   skipifr	   r  strpathlibPathr0  r4  r   r<  r@  rE  rG  rK  rO  rQ  rT  rU  rX  rZ  r_  rb  rf  rn  rq  rt  rz  r~  r  r  r  r  r   r  r  r   r   r   r   r    sj   


		
	
r  c                   @   s   e Zd Zdd Zdd Zdd Zdd Zd	d
 Zdd Zdd Z	e
jjdd Zdd Zdd Zdd Zdd Ze
jje dddd Ze
jje dddd Zdd  Zd!d" Zd#d$ Zd%d& Zd'd( Zd)d* Zd+d, Ze
jje ddd-d. Zd/S )0TestParquetFastParquetc                 C   sF   |}t jdddd}|d }||d< t jddd|d< t|| d S )	Nr:   r*   z
US/Easternr  r  r"  r;   	timedelta)r/   rF   r   r#  rt   )r   r&   r`   rG   r  r   r   r   r    s    
z!TestParquetFastParquet.test_basicc                 C   s   t tdttddd}t}d}ddg|_| |||| ddg|_| |||| td	ddddtd	ddddg|_| |||| d S )
Nr2   r(   r3   r   r   r   rK   rL   r  )r/   r0   rB   rC   r   r   r   r[   )r   r&   rG   errr~   r   r   r   test_columns_dtypes_invalid  s    

z2TestParquetFastParquet.test_columns_dtypes_invalidc                 C   s<   t jtdddtdd }d}| ||t| d S )Nr  r3   r*   r  r   z9Cannot create parquet dataset with duplicate column namesr  r   r&   rG   r~   r   r   r   r!    s    $z-TestParquetFastParquet.test_duplicate_columnsc                 C   s@   t dg di}t jddtjdgidd}t|||dd d S )	Nr=   )TNFr   g        r+  r7   F)rn   rf   )r/   r0   rD   r]   rt   r   r&   rG   rn   r   r   r   test_bool_with_none  s    z*TestParquetFastParquet.test_bool_with_nonec                 C   sT   t dt jddddi}| ||td  t dg di}d}| ||t| d S )Nr=   Z2013Mr*   rc  r%  z"Can't infer object conversion type)r/   r0   ra  r   r}   r  r   r   r   r'    s
    z'TestParquetFastParquet.test_unsupportedc                 C   s&   t dt tdi}t|| d S )Nr=   r2   )r/   r0   r3  rB   rt   )r   r&   rG   r   r   r   r4    s    z'TestParquetFastParquet.test_categoricalc                 C   sx   dt tdi}t|}t 2}|j||d dd t||dgd}W d    n1 sZ0    Y  t|dkstJ d S )Nr=   r*   r(   )r    rb   Zrow_group_offsetsro  rp  )	rB   rC   r/   r0   rh   rs   r   r   r   )r   r&   r?   rG   ro   r   r   r   r   rq    s    

.z-TestParquetFastParquet.test_filter_row_groupsc                 C   s*   t ||d|j dd|id |dd d S )Nr>  z/fastparquet.parquetr=  )rb   r=  r8  r?  )r   r1   r9  r&   r:  r   r   r   r@    s    z(TestParquetFastParquet.test_s3_roundtripc                 C   s\   ddg}|}|j |d|d d tj|s.J dd l}|t|dj}t|dksXJ d S )NrZ   rX   r   r    r   rb   r   Fr)   	r   r.  ro   existsr   ZParquetFiler  Zcatsr   r   r   r&   r`   r   rG   r   Zactual_partition_colsr   r   r   rO    s    z4TestParquetFastParquet.test_partition_cols_supportedc                 C   sX   d}|}|j |d|d d tj|s*J dd l}|t|dj}t|dksTJ d S )NrZ   r   r  r   Fr(   r  r  r   r   r   rQ    s    z1TestParquetFastParquet.test_partition_cols_stringc                 C   s\   ddg}|}|j |dd |d tj|s.J dd l}|t|dj}t|dksXJ d S )NrZ   rX   r   )r    rb   partition_onr   Fr)   r  r  r   r   r   test_partition_on_supported  s    z2TestParquetFastParquet.test_partition_on_supportedc                 C   sV   ddg}|}d}t jt|d$ |j|dd ||d W d    n1 sH0    Y  d S )NrZ   rX   zYCannot use both partition_on and partition_cols. Use partition_cols for partitioning datarz   r   )r    rb   r  r   )r"   r|   r}   r   )r   r   r&   r`   r   rG   r~   r   r   r   3test_error_on_using_partition_cols_and_partition_on  s    zJTestParquetFastParquet.test_error_on_using_partition_cols_and_partition_onzfastparquet writes into Indexr   c                 C   s"   t  }| }t|||d d S )Nr^  )r/   r0   r   rt   r  r   r   r   rU    s    z+TestParquetFastParquet.test_empty_dataframec                 C   s>   d|g }t j|d|id}| }d|j_t|||d d S )Nrg  rh  ri  r   r^  )r/   r0   r   r   r   rt   )r   r&   ra   rm  rG   rn   r   r   r   rn    s
    
z0TestParquetFastParquet.test_timezone_aware_indexc              
   C   s   t dddgi}t }|| tjtddH tt	 t
|ddd W d    n1 sd0    Y  W d    n1 s0    Y  tjtdd t
|dd	d
 W d    n1 s0    Y  W d    n1 s0    Y  d S )Nr=   r(   r)   z!not supported for the fastparquetrz   r   T)r    Zuse_nullable_dtypesr   r  )r/   r0   rh   rs   r   r"   r|   r}   r   r   r   )r   r&   rG   ro   r   r   r   &test_use_nullable_dtypes_not_supported'  s    

Jz=TestParquetFastParquet.test_use_nullable_dtypes_not_supportedc              	   C   s   t dl}t|d tjtdd t|dd W d    n1 sL0    Y  t|j	dd W d    n1 s|0    Y  d S )	Ntest.parquets   breakit rz   r   r   F)
missing_ok)
rh   rs   r  r  write_bytesr"   r|   	Exceptionr   unlink)r   ro   r   r   r   $test_close_file_handle_on_read_error2  s
    *z;TestParquetFastParquet.test_close_file_handle_on_read_errorc              	   C   s   t jddgddgdd}tdT}t| d}|| W d    n1 sT0    Y  t||d}W d    n1 s~0    Y  t|| d S )Nr   r(   r,   r  r  wbr   )	r/   r0   rh   rs   r   encoder   r   ri   )r   r    rG   ro   rA   r   r   r   r   test_bytes_file_name:  s    (*z+TestParquetFastParquet.test_bytes_file_namec              	   C   s  t d tjddgddgdd}t N}t jtdd  |j|ddd	 W d    n1 sb0    Y  W d    n1 s0    Y  t \}t	
|d
 t jtdd t|ddd	 W d    n1 s0    Y  W d    n1 s0    Y  d S )Nr   r   r(   r,   r  zfilesystem is not implementedrz   r+   r    r6  rK   )r"   r   r/   r0   rh   rs   r|   r  r   r  r  r  r   r   rG   ro   r   r   r   test_filesystem_notimplementedD  s    

L
z5TestParquetFastParquet.test_filesystem_notimplementedc              	   C   s  t d tjddgddgdd}t N}t jtdd  |j|ddd	 W d    n1 sb0    Y  W d    n1 s0    Y  t \}t	
|d
 t jtdd t|ddd	 W d    n1 s0    Y  W d    n1 s0    Y  d S )Nr   r   r(   r,   r  z1filesystem must be a pyarrow or fsspec FileSystemrz   r+   r  rK   )r"   r   r/   r0   rh   rs   r|   r}   r   r  r  r  r   r  r   r   r   test_invalid_filesystemT  s    

L
z.TestParquetFastParquet.test_invalid_filesystemc              	   C   s  t d}tjddgddgdd}t X}t jtdd* |j|d|	 d	d
id W d    n1 sl0    Y  W d    n1 s0    Y  t f}t
|d t jtdd( t|d|	 d	d
id W d    n1 s0    Y  W d    n1 s
0    Y  d S )Nz
pyarrow.fsr   r(   r,   r  z8storage_options not supported with a pyarrow FileSystem.rz   r   r+   rN   )r    r6  r=  rK   )r"   r   r/   r0   rh   rs   r|   r  r   ZLocalFileSystemr  r  r  r   )r   Zpa_fsrG   ro   r   r   r   .test_unsupported_pa_filesystem_storage_optionsd  s2    

B
zETestParquetFastParquet.test_unsupported_pa_filesystem_storage_optionsc              	   C   s   d}t dttddi}tdT}|| tjt	|d t
|dd W d    n1 sb0    Y  W d    n1 s0    Y  d S )	NzPdtype_backend numpy is invalid, only 'numpy_nullable' and 'pyarrow' are allowed.rX   r(   r3   ztmp.parquetrz   numpy)r  )r/   r0   rB   rC   rh   rs   r   r"   r|   r}   r   )r   r    r~   rG   ro   r   r   r   test_invalid_dtype_backend  s    
z1TestParquetFastParquet.test_invalid_dtype_backendc                 C   sF   t jt jg dddd}t jt jg dddd}t|||d d S )NrY  r  r|  r   r^  r  r  r   r   r   r    s    z)TestParquetFastParquet.test_empty_columnsN)r   r   r   r  r  r!  r  r'  r4  rq  r"   r   r   r@  rO  rQ  r  r  r  r   rU  rn  r  r  r  r  r  r  r  r  r   r   r   r   r    s4   	



	
r  )	NNNNNTFTr)   )I__doc__r[   decimalr   r   r   r.  r  r  rD   r"   Zpandas._configr   Zpandas._config.configr   Zpandas.compatr   Zpandas.compat.pyarrowr   r   r	   Zpandasr/   Zpandas._testingZ_testingrh   Zpandas.util.versionr
   Zpandas.io.parquetr   r   r   r   r   r   r!   r   r   r%   r   filterwarningsZ
pytestmarkZfixturer   r  r    r$   r&   r1   rH   r`   nowrk  rl  minmaxstrptimera   rt   ry   r   r   r   r   r   r   r   r   r   r   r   r  r  r   r   r   r   <module>   s   


	






         
B+  /   L