a
    Pf                     @   sn  d dl mZ d dlmZ d dlmZmZ d dlZd dlm	Z	 d dl
Z
d dlZd dlmZ d dlZd dlZd dlmZ d dlm  mZ d dlZd dlmZmZmZmZmZmZmZm Z m!Z! d dl"m#Z$ d d	l%m&Z&m'Z' d d
l(m)Z) ej*g dddd Z+dd Z,dd Z-dd Z.dd Z/ej*ej0de1de1dgdej0de1ddgddd Z2G dd dZ3dS )    )Iterator)partial)BytesIOStringION)Path)URLError)is_platform_windows)	NA	DataFrame
MultiIndexSeries	Timestamp
date_rangeread_csv	read_htmlto_datetime)ArrowStringArrayStringArray)file_path_to_url)zchinese_utf-16.htmlzchinese_utf-32.htmlzchinese_utf-8.htmlzletz_latin1.html)paramsc                 C   s   |ddd| j S )z6Parametrized fixture for HTML encoding test filenames.iodataZhtml_encoding)param)requestdatapath r   R/var/www/ai-form-bot/venv/lib/python3.9/site-packages/pandas/tests/io/test_html.pyhtml_encoding_file(   s    
r   c                 O   s   t | t |ks,J dt |  dt | d}ttdd | |}|sPJ |t| |D ]0\}}tj||g|R i | |jrZJ dqZd S )Nz*lists are not of equal size len(list1) == z, len(list2) == z$not all list elements are DataFramesc                 S   s   t | tot |tS N
isinstancer
   )xyr   r   r   <lambda>>       z(assert_framelist_equal.<locals>.<lambda>zframes are both empty)lenallmapziptmassert_frame_equalempty)Zlist1Zlist2argskwargsmsgZboth_framesZframe_iZframe_jr   r   r   assert_framelist_equal5   s&    r/   c                 C   sj   t d}t d | |dd t jtdd& t|ddd	d
dd W d    n1 s\0    Y  d S )Nbs4html5lib__version__z4.2zPandas requires versionmatchr   r   html	spam.htmlflavor)pytestimportorskipsetattrraisesImportErrorr   )Zmonkeypatchr   r0   r   r   r   test_bs4_version_failsI   s
    

r>   c                  C   sX   d} d}d| d }t jt|d" tt| d|d W d    n1 sJ0    Y  d S )Nz
google.comzinvalid flavorz\{z \} is not a valid set of flavorsr3   Zgoogler4   r8   )r9   r<   
ValueErrorr   r   )urlr8   r.   r   r   r   test_invalid_flavorR   s
    rB   c                 C   sZ   t d t d t d | dddd}t|ddgd	}t|ddgd	}t|| d S )
Nr0   lxmlr1   r   r   r5   valid_markup.htmlr   )	index_colr8   )r9   r:   r   r/   )r   filenameZdfs_lxmlZdfs_bs4r   r   r   test_same_ordering[   s    


rG   r0   r1   )ZmarksrC   c                 C   s   t t| jdS )Nr7   )r   r   r   )r   r   r   r   flavor_read_htmlf   s    rH   c                   @   s0  e Zd Zdd Zejdd Zejdd Zdd Zd	d
 Z	ej
jej
jdd Zej
jej
jdd Zej
jdd Zdd Zdd Zdd Zdd Zdd Zdd Zdd Zdd  Zd!d" Zd#d$ Zd%d& Zd'd( Zd)d* Zd+d, Zd-d. Zd/d0 Z d1d2 Z!d3d4 Z"d5d6 Z#d7d8 Z$ej
jej
jd9d: Z%ej
jej
jej
jd;d< Z&ej
jd=d> Z'ej
jd?d@ Z(ej
jdAdB Z)ej
jdCdD Z*ej
jdEdF Z+ej
jdGdH Z,ej
jdIdJ Z-ej
jdKdL Z.ej
jdMdN Z/dOdP Z0ejdQdR Z1ej
jej
jdSdT Z2ej
jej
jdUdV Z3dWdX Z4dYdZ Z5d[d\ Z6d]d^ Z7d_d` Z8dadb Z9ej
jdcdd Z:ej
jdedf Z;dgdh Z<didj Z=dkdl Z>dmdn Z?dodp Z@dqdr ZAdsdt ZBdudv ZCdwdx ZDdydz ZEd{d| ZFd}d~ ZGdd ZHej
Idddgdd ZJdd ZKdd ZLdd ZMdd ZNdd ZOdd ZPdd ZQej
jdd ZRdd ZSdd ZTej
IddeUdgdfdeUdgeUdgfgdd ZVej
Idddgdd ZWej
Xddd ZYdd ZZdd Z[ej
jej
jdd Z\dd Z]dd Z^ej
Idg ddd Z_dd Z`dd Zadd Zbdd ZcdS )TestReadHtmlc                 C   s>   d}t jt|d |d W d    n1 s00    Y  d S )NzPassing literal html to 'read_html' is deprecated and will be removed in a future version. To read from a literal string, wrap it in a 'StringIO' object.r3   a  <table>
                <thead>
                    <tr>
                        <th>A</th>
                        <th>B</th>
                    </tr>
                </thead>
                <tbody>
                    <tr>
                        <td>1</td>
                        <td>2</td>
                    </tr>
                </tbody>
                <tbody>
                    <tr>
                        <td>3</td>
                        <td>4</td>
                    </tr>
                </tbody>
            </table>)r)   Zassert_produces_warningFutureWarning)selfrH   r.   r   r   r   test_literal_html_deprecationq   s    z*TestReadHtml.test_literal_html_deprecationc                 C   s   |ddddS )Nr   r   r5   r6   r   rK   r   r   r   r   	spam_data   s    zTestReadHtml.spam_datac                 C   s   |ddddS )Nr   r   r5   banklist.htmlr   rM   r   r   r   banklist_data   s    zTestReadHtml.banklist_datac                 C   sh   t tjddtjtdtdddj	
t}| }|t|ddid	d
d	 }t|| d S )N   )      abcdtypecolumnsz{:.3f}classZ	dataframer   )attrsrE   )r
   nprandomdefault_rngpdIndexlistobjectr'   formatZastypefloatto_htmlr   r)   r*   )rK   rH   dfoutresr   r   r   test_to_html_compat   s     z TestReadHtml.test_to_html_compatc           	   
      s  t tdtjdgddtg dddtdtjdgddtg d	ddg d
g dg dg dd}|dkrttjg dtjd}ttjddtgtjd}nr|dkrt	dddl
m   g d} g d}n.t	dtg d}tg d}|jdd}td|$ |t||dd }W d    n1 sT0    Y  t tdtjdgddtg dddtdtjdgddtg d	ddtddtgddtg ddd||d|dkrdd lddl
m  t  fddjD t| d S )N   rS   ZInt64rU   )ri   rQ   rS         ?      @ZFloat64)rj   g       @rk   )TFN)TFTabc)rm   rn   N)rm   rn   ro   defghpythonrm   rn   pyarrowr   )ArrowExtensionArrayFindexzmode.string_storagedtype_backendTbooleanc                    s$   i | ]}| j | d dqS )T)Zfrom_pandas)array).0colrw   expectedpar   r   
<dictcomp>   s   z3TestReadHtml.test_dtype_backend.<locals>.<dictcomp>)r
   r   r[   nanr   r}   Zobject_r	   r9   r:   Zpandas.arraysrw   r   rd   r^   Zoption_contextr   rv   rX   r)   r*   )	rK   Zstring_storager{   rH   re   Zstring_arrayZstring_array_narf   resultr   r   r   test_dtype_backend   sZ    

4
zTestReadHtml.test_dtype_backendc                 C   sf   t |dd<}|j| d ||jdd}||jdd}W d    n1 sN0    Y  t|| d S )Nutf-8encodingcontentFirst Federal Bank of Floridar3   Metcalf Bankopenserve_contentreadrA   r/   )rK   
httpserverrP   rH   rr   df1df2r   r   r   test_banklist_url   s    $zTestReadHtml.test_banklist_urlc                 C   sf   t |dd<}|j| d ||jdd}||jdd}W d    n1 sN0    Y  t|| d S )Nr   r   r   	.*Water.*r3   Unitr   )rK   r   rN   rH   rr   r   r   r   r   r   test_spam_url   s
    ,zTestReadHtml.test_spam_urlc                 C   s2   ||dddid}||dddid}t || d S )Nz.*Florida.*idtabler4   rZ   r   r/   )rK   rP   rH   r   r   r   r   r   test_banklist   s    

zTestReadHtml.test_banklistc                 C   sR   ||dd}||dd}t || |d jd dks8J |d jd dksNJ d S )Nr   r3   r   r   r   r   
ProximatesZNutrient)r/   ilocrX   rK   rN   rH   r   r   r   r   r   	test_spam  s
    
zTestReadHtml.test_spamc                 C   s$   ||}|D ]}t |tsJ qd S r   r   )rK   rN   rH   dfsre   r   r   r   test_spam_no_match  s    zTestReadHtml.test_spam_no_matchc                 C   s,   ||ddid}|D ]}t |tsJ qd S )Nr   r   )rZ   r   )rK   rP   rH   r   re   r   r   r   test_banklist_no_match  s    z#TestReadHtml.test_banklist_no_matchc                 C   s2   ||dddd }|j d dks$J |jr.J d S )Nr   rQ   r4   headerr   r   )rX   r+   )rK   rN   rH   re   r   r   r   test_spam_header  s    zTestReadHtml.test_spam_headerc                 C   s*   ||ddd}||ddd}t || d S Nr   ri   r4   skiprowsr   r   r   r   r   r   test_skiprows_int"  s    zTestReadHtml.test_skiprows_intc                 C   s2   ||dt dd}||dt dd}t|| d S Nr   rQ   r   r   )ranger/   r   r   r   r   test_skiprows_range(  s    z TestReadHtml.test_skiprows_rangec                 C   s2   ||dddgd}||dddgd}t || d S Nr   ri   rQ   r   r   r   r   r   r   r   test_skiprows_list.  s    zTestReadHtml.test_skiprows_listc                 C   s2   ||dddhd}||dddhd}t || d S r   r   r   r   r   r   test_skiprows_set4  s    zTestReadHtml.test_skiprows_setc                 C   s*   ||ddd}||ddd}t || d S r   r   r   r   r   r   test_skiprows_slice:  s    z TestReadHtml.test_skiprows_slicec                 C   s2   ||dt dd}||dt dd}t|| d S r   slicer/   r   r   r   r   test_skiprows_slice_short@  s    z&TestReadHtml.test_skiprows_slice_shortc                 C   s8   ||dt ddd}||dt dddd}t|| d S )	Nr   rQ      r   r   rR   ri   r   r   r   r   r   test_skiprows_slice_longF  s    z%TestReadHtml.test_skiprows_slice_longc                 C   s6   ||dt dd}||dt dd}t|| d S r   )r[   Zaranger/   r   r   r   r   test_skiprows_ndarrayL  s    z"TestReadHtml.test_skiprows_ndarrayc                 C   s@   t jtdd ||ddd W d    n1 s20    Y  d S )Nz%is not a valid type for skipping rowsr3   r   Zasdfr   )r9   r<   	TypeError)rK   rN   rH   r   r   r   test_skiprows_invalidR  s    z"TestReadHtml.test_skiprows_invalidc                 C   s*   ||ddd}||ddd}t || d S Nr   r   r4   rE   r   r   r   r   r   r   
test_indexV  s    zTestReadHtml.test_indexc                 C   s.   ||dddd}||dddd}t || d S Nr   ri   r   )r4   r   rE   r   r   r   r   r   r   test_header_and_index_no_types[  s    z+TestReadHtml.test_header_and_index_no_typesc                 C   s.   ||dddd}||dddd}t || d S r   r   r   r   r   r    test_header_and_index_with_types`  s    z-TestReadHtml.test_header_and_index_with_typesc                 C   s*   ||ddd}||ddd}t || d S r   r   r   r   r   r   test_infer_typese  s    zTestReadHtml.test_infer_typesc                 C   s   t |dd}t| }W d    n1 s.0    Y  t |dd}t| }W d    n1 sf0    Y  ||dd}||dd}t|| d S NzUTF-8r   r   r3   r   )r   r   r   r/   )rK   rN   rH   rr   data1data2r   r   r   r   r   test_string_iok  s    **zTestReadHtml.test_string_ioc                 C   sb   t |dd}| }W d    n1 s*0    Y  |t|dd}|t|dd}t|| d S r   )r   r   r   r/   )rK   rN   rH   rr   r   r   r   r   r   r   test_stringv  s
    &zTestReadHtml.test_stringc                 C   s~   t |dd}||dd}W d    n1 s.0    Y  t |dd}||dd}W d    n1 sf0    Y  t|| d S r   )r   r/   )rK   rN   rH   rr   r   r   r   r   r   test_file_like  s
    **zTestReadHtml.test_file_likec                 C   sL   |j ddd tjtdd |ddd W d    n1 s>0    Y  d S )Nz#urlopen error unknown url type: git  coder3   zgit://github.comr   )r   r9   r<   r   rK   r   rH   r   r   r   test_bad_url_protocol  s    z"TestReadHtml.test_bad_url_protocolc                 C   sR   |j ddd tjttfdd ||jdd W d    n1 sD0    Y  d S )NzName or service not knownr   r   zHTTP Error 404: NOT FOUNDr3   r   )r   r9   r<   r   r@   rA   r   r   r   r   test_invalid_url  s    zTestReadHtml.test_invalid_urlc                 C   sL   |}|t tj|dddid}t|ts0J |D ]}t|ts4J q4d S )NZFirstr   r   r   )r   ospathabspathr    r`   r
   rK   rP   rH   rA   r   re   r   r   r   test_file_url  s    zTestReadHtml.test_file_urlc                 C   sH   |}t jtdd" ||dddid W d    n1 s:0    Y  d S )NzNo tables foundr3   r   r   Z	tasdfabler   r9   r<   r@   )rK   rP   rH   rA   r   r   r   test_invalid_table_attrs  s
    
z%TestReadHtml.test_invalid_table_attrsc                 C   s0   ||dddiddgdd }t |jts,J d S )NMetcalfr   r   r   ri   )r4   rZ   r   r    rX   r   rK   rP   rH   re   r   r   r   test_multiindex_header  s    z#TestReadHtml.test_multiindex_headerc                 C   s0   ||dddiddgdd }t |jts,J d S )Nr   r   r   r   ri   )r4   rZ   rE   )r    ry   r   r   r   r   r   test_multiindex_index  s    z"TestReadHtml.test_multiindex_indexc                 C   sF   ||dddiddgddgdd }t |jts2J t |jtsBJ d S )Nr   r   r   r   ri   )r4   rZ   r   rE   )r    rX   r   ry   r   r   r   r   test_multiindex_header_index  s    z)TestReadHtml.test_multiindex_header_indexc                 C   s2   ||dddiddgddd }t |jts.J d S Nr   r   r   r   ri   )r4   rZ   r   r   r   r   r   r   r   &test_multiindex_header_skiprows_tuples  s    z3TestReadHtml.test_multiindex_header_skiprows_tuplesc                 C   s2   ||dddiddgddd }t |jts.J d S r   r   r   r   r   r   test_multiindex_header_skiprows  s    z,TestReadHtml.test_multiindex_header_skiprowsc                 C   sH   ||dddiddgddgddd }t |jts4J t |jtsDJ d S )Nr   r   r   r   ri   )r4   rZ   r   rE   r   )r    ry   r   rX   r   r   r   r   %test_multiindex_header_index_skiprows  s    z2TestReadHtml.test_multiindex_header_index_skiprowsc                 C   sX   |}|t tj|ttdddid}t|ts<J |D ]}t|ts@J q@d S )NZFloridar   r   r   )	r   r   r   r   recompiler    r`   r
   r   r   r   r   test_regex_idempotency  s    z#TestReadHtml.test_regex_idempotencyc                 C   sD   d}t jt|d ||ddd W d    n1 s60    Y  d S )Nz\(you passed a negative value\)r3   ZWaterr   r   r   )rK   rN   rH   r.   r   r   r   test_negative_skiprows  s    z#TestReadHtml.test_negative_skiprowsc                 C   s   dS )Naf  
          <table class="contentstable" align="center"><tr>
            <td width="50%">
            <p class="biglink"><a class="biglink" href="whatsnew/2.7.html">What's new in Python 2.7?</a><br/>
                <span class="linkdescr">or <a href="whatsnew/index.html">all "What's new" documents</a> since 2.0</span></p>
            <p class="biglink"><a class="biglink" href="tutorial/index.html">Tutorial</a><br/>
                <span class="linkdescr">start here</span></p>
            <p class="biglink"><a class="biglink" href="library/index.html">Library Reference</a><br/>
                <span class="linkdescr">keep this under your pillow</span></p>
            <p class="biglink"><a class="biglink" href="reference/index.html">Language Reference</a><br/>
                <span class="linkdescr">describes syntax and language elements</span></p>
            <p class="biglink"><a class="biglink" href="using/index.html">Python Setup and Usage</a><br/>
                <span class="linkdescr">how to use Python on different platforms</span></p>
            <p class="biglink"><a class="biglink" href="howto/index.html">Python HOWTOs</a><br/>
                <span class="linkdescr">in-depth documents on specific topics</span></p>
            </td><td width="50%">
            <p class="biglink"><a class="biglink" href="installing/index.html">Installing Python Modules</a><br/>
                <span class="linkdescr">installing from the Python Package Index &amp; other sources</span></p>
            <p class="biglink"><a class="biglink" href="distributing/index.html">Distributing Python Modules</a><br/>
                <span class="linkdescr">publishing modules for installation by others</span></p>
            <p class="biglink"><a class="biglink" href="extending/index.html">Extending and Embedding</a><br/>
                <span class="linkdescr">tutorial for C/C++ programmers</span></p>
            <p class="biglink"><a class="biglink" href="c-api/index.html">Python/C API</a><br/>
                <span class="linkdescr">reference for C/C++ programmers</span></p>
            <p class="biglink"><a class="biglink" href="faq/index.html">FAQs</a><br/>
                <span class="linkdescr">frequently asked questions (with answers!)</span></p>
            </td></tr>
        </table>

        <p><strong>Indices and tables:</strong></p>
        <table class="contentstable" align="center"><tr>
            <td width="50%">
            <p class="biglink"><a class="biglink" href="py-modindex.html">Python Global Module Index</a><br/>
                <span class="linkdescr">quick access to all modules</span></p>
            <p class="biglink"><a class="biglink" href="genindex.html">General Index</a><br/>
                <span class="linkdescr">all functions, classes, terms</span></p>
            <p class="biglink"><a class="biglink" href="glossary.html">Glossary</a><br/>
                <span class="linkdescr">the most important terms explained</span></p>
            </td><td width="50%">
            <p class="biglink"><a class="biglink" href="search.html">Search page</a><br/>
                <span class="linkdescr">search this documentation</span></p>
            <p class="biglink"><a class="biglink" href="contents.html">Complete Table of Contents</a><br/>
                <span class="linkdescr">lists all sections and subsections</span></p>
            </td></tr>
        </table>
        r   rK   r   r   r   python_docs  s    zTestReadHtml.python_docsc                 C   s.   |j |d ||jdd}t|dks*J d S )Nr   Pythonr3   ri   )r   rA   r%   )rK   r   r   rH   r   r   r   r   test_multiple_matches(  s    z"TestReadHtml.test_multiple_matchesc                 C   s@   |j |d ||jdd}dd |D }t|ddgks<J d S )Nr   r   r3   c                 S   s   g | ]}|j d  dd qS )r   r   rR   )r   )r~   re   r   r   r   
<listcomp>4  r$   z7TestReadHtml.test_python_docs_table.<locals>.<listcomp>ZPythZWhat)r   rA   sorted)rK   r   r   rH   r   zzr   r   r   test_python_docs_table/  s    z#TestReadHtml.test_python_docs_tablec                 C   s$   d}|t |}t|dks J dS )z@
        Make sure that read_html ignores empty tables.
        a  
            <table>
                <thead>
                    <tr>
                        <th>A</th>
                        <th>B</th>
                    </tr>
                </thead>
                <tbody>
                    <tr>
                        <td>1</td>
                        <td>2</td>
                    </tr>
                </tbody>
            </table>
            <table>
                <tbody>
                </tbody>
            </table>
        ri   N)r   r%   )rK   rH   r5   r   r   r   r   test_empty_tables7  s    zTestReadHtml.test_empty_tablesc                 C   s<   |t dd }tddgddggddgd	}t|| d S )
Na  <table>
            <thead>
                <tr>
                    <th>A</th>
                    <th>B</th>
                </tr>
            </thead>
            <tbody>
                <tr>
                    <td>1</td>
                    <td>2</td>
                </tr>
            </tbody>
            <tbody>
                <tr>
                    <td>3</td>
                    <td>4</td>
                </tr>
            </tbody>
        </table>r   ri   rQ   rS   rR   ABr   rX   r   r
   r)   r*   rK   rH   r   r   r   r   r   test_multiple_tbodyR  s    z TestReadHtml.test_multiple_tbodyc                 C   s2   |t dd }tddidgd}t|| dS )zt
        Don't fail with bs4 when there is a header and only one column
        as described in issue #9178
        a3  <table>
                <thead>
                    <tr>
                        <th>Header</th>
                    </tr>
                </thead>
                <tbody>
                    <tr>
                        <td>first</td>
                    </tr>
                </tbody>
            </table>r   Headerfirstr   ry   Nr   r   r   r   r   test_header_and_one_columnr  s    z'TestReadHtml.test_header_and_one_columnc                 C   s6   |t dd }tg dgg dd}t|| dS )zK
        Ensure parser adds <tr> within <thead> on malformed HTML.
        a  <table>
            <thead>
                <tr>
                    <th>Country</th>
                    <th>Municipality</th>
                    <th>Year</th>
                </tr>
            </thead>
            <tbody>
                <tr>
                    <td>Ukraine</td>
                    <th>Odessa</th>
                    <td>1944</td>
                </tr>
            </tbody>
        </table>r   )ZUkraineZOdessa  )ZCountryZMunicipalityZYearr   Nr   r   r   r   r   test_thead_without_tr  s    z"TestReadHtml.test_thead_without_trc           	      C   s   d}t ddggddgd}t ddgddggddgd}|jd	d
}|jdd
}|t|d }|t|d }t|| t|| dS )zh
        Make sure that read_html reads tfoot, containing td or th.
        Ignores empty tfoot
        a  <table>
            <thead>
                <tr>
                    <th>A</th>
                    <th>B</th>
                </tr>
            </thead>
            <tbody>
                <tr>
                    <td>bodyA</td>
                    <td>bodyB</td>
                </tr>
            </tbody>
            <tfoot>
                {footer}
            </tfoot>
        </table>ZbodyAZbodyBr   r   r   ZfootAZfootB )footerz%<tr><td>footA</td><th>footB</th></tr>r   N)r
   rb   r   r)   r*   )	rK   rH   Zdata_templateZ	expected1Z	expected2r   r   Zresult1Zresult2r   r   r   test_tfoot_read  s    zTestReadHtml.test_tfoot_readc                 C   s6   |t dddd }tddggdd}t|| d S )Na
  
            <table>
                <tr>
                    <td>S</td>
                    <td>I</td>
                </tr>
                <tr>
                    <td>text</td>
                    <td>1944</td>
                </tr>
            </table>
        r   r   textr   )SIrW   r   r   r   r   r   &test_parse_header_of_non_string_column  s    z3TestReadHtml.test_parse_header_of_non_string_columnc                    s   ddl m   fdd}||dddidd }t|d	d
ddttdd}|j|jksZJ g d}g d}||||}	||}
|	}ddg}|| t||< t	
||
 d S )Nr   _remove_whitespacec                    s&   z
 | W S  t y    |  Y S 0 d S r   )AttributeErrorr!   r  r   r   try_remove_ws  s    
z8TestReadHtml.test_banklist_header.<locals>.try_remove_wsr   r   r   r   r   r   csvzbanklist.csv)Updated DateClosing Date
converters)
z,First Vietnamese American Bank In Vietnamesez"Westernbank Puerto Rico En Espanolz*R-G Premier Bank of Puerto Rico En EspanolzEurobank En EspanolzSanderson State Bank En EspanolzLWashington Mutual Bank (Including its subsidiary Washington Mutual Bank FSB)zSilver State Bank En Espanolz%AmTrade International Bank En EspanolzHamilton Bank, NA En Espanolz6The Citizens Savings Bank Pioneer Community Bank, Inc.)
zFirst Vietnamese American BankzWesternbank Puerto RicozR-G Premier Bank of Puerto RicoZEurobankzSanderson State BankzWashington Mutual BankzSilver State BankzAmTrade International BankzHamilton Bank, NAzThe Citizens Savings Bankr	  r  )Zpandas.io.htmlr  r   r   shaper'   replaceapplyr   r)   r*   )rK   rP   r   rH   r  re   Zground_trutholdnewZdfnewZgtnewZ	convertedZ	date_colsr   r  r   test_banklist_header  s     
z!TestReadHtml.test_banklist_headerc                 C   sn   d}t |dd}| }W d    n1 s.0    Y  ||v sDJ ||dddidd }|| v sjJ d S )NzGold Canyonr   r   r   r   r   r   )r   r   Z	to_string)rK   rP   rH   gcrr   Zraw_textre   r   r   r   test_gold_canyon  s    &
zTestReadHtml.test_gold_canyonc                 C   s8   |t dddd }|t dddd }t|| d S )Na  <table>
                        <thead>
                            <tr style="text-align: right;">
                            <th></th>
                            <th>C_l0_g0</th>
                            <th>C_l0_g1</th>
                            <th>C_l0_g2</th>
                            <th>C_l0_g3</th>
                            <th>C_l0_g4</th>
                            </tr>
                        </thead>
                        <tbody>
                            <tr>
                            <th>R_l0_g0</th>
                            <td> 0.763</td>
                            <td> 0.233</td>
                            <td> nan</td>
                            <td> nan</td>
                            <td> nan</td>
                            </tr>
                            <tr>
                            <th>R_l0_g1</th>
                            <td> 0.244</td>
                            <td> 0.285</td>
                            <td> 0.392</td>
                            <td> 0.137</td>
                            <td> 0.222</td>
                            </tr>
                        </tbody>
                    </table>r   rE   a  <table>
                    <thead>
                        <tr style="text-align: right;">
                        <th></th>
                        <th>C_l0_g0</th>
                        <th>C_l0_g1</th>
                        <th>C_l0_g2</th>
                        <th>C_l0_g3</th>
                        <th>C_l0_g4</th>
                        </tr>
                    </thead>
                    <tbody>
                        <tr>
                        <th>R_l0_g0</th>
                        <td> 0.763</td>
                        <td> 0.233</td>
                        </tr>
                        <tr>
                        <th>R_l0_g1</th>
                        <td> 0.244</td>
                        <td> 0.285</td>
                        <td> 0.392</td>
                        <td> 0.137</td>
                        <td> 0.222</td>
                        </tr>
                    </tbody>
                 </table>)r   r)   r*   )rK   rH   r   r   r   r   r   test_different_number_of_cols'  s"     "$!z*TestReadHtml.test_different_number_of_colsc                 C   s6   |t dd }tg dgg dd}t|| d S )NaZ  
            <table>
                <tr>
                    <th>A</th>
                    <th colspan="1">B</th>
                    <th rowspan="1">C</th>
                </tr>
                <tr>
                    <td>a</td>
                    <td>b</td>
                    <td>c</td>
                </tr>
            </table>
        r   rl   )r   r   CrW   r   r   r   r   r   test_colspan_rowspan_1o  s    z#TestReadHtml.test_colspan_rowspan_1c                 C   s:   |t dddd }tg dgg dd}t|| d S )Na  
            <table>
                <tr>
                    <td colspan="2">X</td>
                    <td>Y</td>
                    <td rowspan="2">Z</td>
                    <td>W</td>
                </tr>
                <tr>
                    <td>A</td>
                    <td colspan="2">B</td>
                    <td>C</td>
                </tr>
            </table>
        r   r   )r   r   r   Zr  )XzX.1Yr  Wr   r   r   r   r   r    test_colspan_rowspan_copy_values  s    z-TestReadHtml.test_colspan_rowspan_copy_valuesc                 C   s:   |t dddd }tg dgg dd}t|| d S )Na(  
            <table>
                <tr>
                    <td rowspan="2">A</td>
                    <td rowspan="2" colspan="3">B</td>
                    <td>C</td>
                </tr>
                <tr>
                    <td>D</td>
                </tr>
            </table>
        r   r   )r   r   r   r   D)r   r   zB.1zB.2r  r   r   r   r   r   r   test_colspan_rowspan_both_not_1  s    z,TestReadHtml.test_colspan_rowspan_both_not_1c                 C   s:   |t dddd }tddggddgd}t|| d S )Nz
            <table>
                <tr>
                    <td>A</td>
                    <td rowspan="2">B</td>
                </tr>
                <tr>
                    <td>C</td>
                </tr>
            </table>
        r   r   r  r   r   r   r   r   r   r   r   test_rowspan_at_end_of_row  s    z'TestReadHtml.test_rowspan_at_end_of_rowc                 C   s@   |t dddd }tddgddggddgd}t|| d S )Nz
            <table>
                <tr>
                    <td rowspan="3">A</td>
                    <td rowspan="3">B</td>
                </tr>
            </table>
        r   r   r   r   r   r   r   r   r   r   test_rowspan_only_rows  s    
z#TestReadHtml.test_rowspan_only_rowsc                 C   sV   |t dd }tddgddggddgddggd}tdd	gg|d
}t|| d S )Nam  
            <table>
                <tr>
                    <th>A</th>
                    <th>B</th>
                </tr>
                <tr>
                    <th>a</th>
                    <th>b</th>
                </tr>
                <tr>
                    <td>1</td>
                    <td>2</td>
                </tr>
            </table>
        r   r   r   rm   rn   ri   ZlevelscodesrQ   r   r   r   r
   r)   r*   rK   rH   r   rX   r   r   r   r   +test_header_inferred_from_rows_with_only_th  s    $z8TestReadHtml.test_header_inferred_from_rows_with_only_thc                 C   sh   t dtdddi}| }|t|dgdd}t||d  |t|dgdd}t||d  d S )Ndate1/1/2001
   Zperiodsri   r   Zparse_datesrE   )r
   r   rd   r   r)   r*   )rK   rH   re   r   rg   r   r   r   test_parse_dates_list  s    z"TestReadHtml.test_parse_dates_listc                 C   sp   t tddd}t|dd |dd d}|t| dd	d
gid	d}td|i}t||d  d S )Nr'  r(  r)  c                 S   s   t |  S r   )strr&  r  r   r   r   r#   %  r$   z7TestReadHtml.test_parse_dates_combine.<locals>.<lambda>c                 S   s   t |  S r   )r,  timer  r   r   r   r#   &  r$   )r&  r-  datetimeri   rQ   r*  r   )r   r   r
   r'   r   rd   r)   r*   )rK   rH   Z	raw_datesre   rg   Znewdfr   r   r   test_parse_dates_combine!  s    z%TestReadHtml.test_parse_dates_combinec                 C   s   |dddd}t j|s,J t| dt j|sJJ t| d||ddd	d
 }|jdksjJ d|jd v s|J |d jtdksJ t	|j
d dsJ d S )Nr   r   r5   wikipedia_states.htmlz is not a filez is an empty fileArizonari   r   r   )<      Unnamedr   sq mifloat64)r   r5  HzPN$A)r   r   isfilereprgetsizer  rX   rV   r[   allcloselocrK   r   rH   r   r   r   r   r   test_wikipedia_states_table/  s    z(TestReadHtml.test_wikipedia_states_tablec                 C   sn   |dddd}||dddd }|j dks.J d	|jd
 d v sDJ |jjdksTJ t|jd dsjJ d S )Nr   r   r5   r0  r1  r   r   )r2     r4  r   ri   rQ   )ZAlaska)zTotal area[2]r5  r7  )r  rX   Znlevelsr[   r;  r<  r=  r   r   r    test_wikipedia_states_multiindex9  s    z-TestReadHtml.test_wikipedia_states_multiindexc                 C   sD   |t dddgd}tddggtddgd	}t|d | d S )
NaK  
                <table>
                    <thead>
                        <tr><th></th><th></tr>
                        <tr><th>A</th><th>B</th></tr>
                    </thead>
                    <tbody>
                        <tr><td>a</td><td>b</td></tr>
                    </tbody>
                </table>
            r   ri   r   rm   rn   )Unnamed: 0_level_0r   )zUnnamed: 1_level_0r   rW   )r   r
   r   from_tuplesr)   r*   r   r   r   r   %test_parser_error_on_empty_header_rowA  s    z2TestReadHtml.test_parser_error_on_empty_header_rowc                 C   sN   |t dddd }tddidgd}|d jtdks>J t|| d S )	Na  <html>
            <body>
             <table>
                <thead>
                    <tr>
                        <th>Header</th>
                    </tr>
                </thead>
                <tbody>
                    <tr>
                        <td>1100#101</td>
                    </tr>
                </tbody>
            </table>
            </body>
        </html>#)decimalr   r   gClg0@r   r6  )r   r
   rV   r[   r)   r*   r   r   r   r   test_decimal_rowsZ  s    zTestReadHtml.test_decimal_rowsargTFc                 C   sH   t d}tjt|d |||d W d    n1 s:0    Y  d S )NzPassing a bool to header is invalid. Use header=None for no header or header=int or list-like of ints to specify the row(s) making up the column namesr3   r   )r   escaper9   r<   r   )rK   rN   rG  rH   r.   r   r   r   test_bool_header_argw  s
    z!TestReadHtml.test_bool_header_argc                 C   s8   |t ddtidd }tdddgi}t|| d S )Na  <table>
                 <thead>
                   <tr>
                     <th>a</th>
                    </tr>
                 </thead>
                 <tbody>
                   <tr>
                     <td> 0.763</td>
                   </tr>
                   <tr>
                     <td> 0.244</td>
                   </tr>
                 </tbody>
               </table>rm   r
  r   z0.763z0.244)r   r,  r
   r)   r*   r   r   r   r   test_converters  s    zTestReadHtml.test_convertersc                 C   s8   |t ddgdd }tddtjgi}t|| d S )Na  <table>
                 <thead>
                   <tr>
                     <th>a</th>
                   </tr>
                 </thead>
                 <tbody>
                   <tr>
                     <td> 0.763</td>
                   </tr>
                   <tr>
                     <td> 0.244</td>
                   </tr>
                 </tbody>
               </table>gZd;?)Z	na_valuesr   rm   g"~j?r   r
   r[   r   r)   r*   r   r   r   r   test_na_values  s    zTestReadHtml.test_na_valuesc                 C   sl   d}t dddgi}|t|ddd }t|| t dtjtjgi}|t|ddd }t|| d S )	Na  <table>
                        <thead>
                            <tr>
                            <th>a</th>
                            </tr>
                        </thead>
                        <tbody>
                            <tr>
                            <td> N/A</td>
                            </tr>
                            <tr>
                            <td> NA</td>
                            </tr>
                        </tbody>
                    </table>rm   zN/Ar	   F)Zkeep_default_nar   T)r
   r   r)   r*   r[   r   )rK   rH   Z	html_dataexpected_dfhtml_dfr   r   r   test_keep_default_na  s    z!TestReadHtml.test_keep_default_nac                 C   s@   |t dd }tddgtjtjggddgd}t|| d S )Nak  
            <table>
                <tr>
                    <th>A</th>
                    <th>B</th>
                </tr>
                <tr>
                    <td>a</td>
                    <td>b</td>
                </tr>
                <tr>
                    <td></td>
                    <td></td>
                </tr>
            </table>
        r   rm   rn   r   r   r   rK  r   r   r   r   test_preserve_empty_rows  s     z%TestReadHtml.test_preserve_empty_rowsc                 C   sV   |t dd }tddgddggddgddggd}tdd	gg|d
}t|| d S )NaU  
            <table>
                <thead>
                    <tr><th></th><th></tr>
                    <tr><th>A</th><th>B</th></tr>
                    <tr><th>a</th><th>b</th></tr>
                </thead>
                <tbody>
                    <tr><td>1</td><td>2</td></tr>
                </tbody>
            </table>
        r   r   r   rm   rn   ri   r!  rQ   r   r#  r$  r   r   r   ,test_ignore_empty_rows_when_inferring_header  s    $z9TestReadHtml.test_ignore_empty_rows_when_inferring_headerc                 C   sL   t g dd}g dg dg|_|jdd}|t|d }t|| d S )N))ZHillaryD   r  )ZBernieJ   r  )ZDonaldE   R)r   )rA  ZAgeZParty)NamezUnnamed: 1_level_1zUnnamed: 2_level_1Frx   r   )r
   rX   rd   r   r)   r*   )rK   rH   rM  r5   rN  r   r   r   test_multiple_header_rows  s    z&TestReadHtml.test_multiple_header_rowsc                 C   s>   |dddd}||dd}t |ts(J t |d ts:J d S )Nr   r   r5   rD   r   r  )r    r`   r
   )rK   r   rH   rF   r   r   r   r   test_works_on_valid_markup  s    z'TestReadHtml.test_works_on_valid_markupc                 C   s$   |dddd}||dddgd d S )	Nr   r   r5   rO   r   rC   r1   r?   r   )rK   r   rH   rP   r   r   r   test_fallback_success  s    z"TestReadHtml.test_fallback_successc                 C   s>   t ddd}ttjdd|d}| }d|v s:J d S )Nz
2000-01-01r(  r)  rQ   )r(  rR   rx   )r   r
   r[   r\   r]   Zstandard_normalrd   )rK   rngre   r   r   r   r   test_to_html_timestamp  s    z#TestReadHtml.test_to_html_timestampc                 C   s   t dddg}| }|jdd}|jdd}|jdd}|jdd}|jdd}d|v s`J ||kslJ ||ksxJ ||ksJ d	|v sJ d
|vsJ d|vsJ ||ksJ d S )Nri   rQ   r   r   T)borderr   Fz border="1"z border="2"z border="0"z border)r
   rd   )rK   re   Zout_border_defaultZout_border_trueZout_border_explicit_defaultZout_border_nondefaultZout_border_zeroZout_border_falser   r   r   test_to_html_borderless"  s    z$TestReadHtml.test_to_html_borderlesszdisplayed_only,exp0,exp1ZfooNzfoo  bar  baz  quxc                 C   sR   d}|t ||d}t|d | |d ur>t|d | nt|dksNJ d S )Na  <html>
          <body>
            <table>
              <tr>
                <td>
                  foo
                  <span style="display:none;text-align:center">bar</span>
                  <span style="display:none">baz</span>
                  <span style="display: none">qux</span>
                </td>
              </tr>
            </table>
            <table style="display: none">
              <tr>
                <td>foo</td>
              </tr>
            </table>
          </body>
        </html>displayed_onlyr   ri   )r   r)   r*   r%   )rK   r`  Zexp0Zexp1rH   r   r   r   r   r   test_displayed_only5  s    	z TestReadHtml.test_displayed_onlyr`  c                 C   s>   d}|t ||dd }tddgddgd}t|| d S )	NaW  
        <table>
            <tr>
                <th>A</th>
                <th>B</th>
            </tr>
            <tr>
                <td>1</td>
                <td>2</td>
            </tr>
            <tr>
                <td><span style="display:none"></span>4</td>
                <td>5</td>
            </tr>
        </table>
        r_  r   ri   rR   rQ   r   r\  r   )rK   r`  rH   Z
html_tabler   r   r   r   r   &test_displayed_only_with_many_elementsZ  s    z3TestReadHtml.test_displayed_only_with_many_elementsz\ignore:You provided Unicode markup but also provided a value for from_encoding.*:UserWarningc                 C   s  t j|}t j|d }|d\}}zt|d&}|| |dd }W d    n1 sb0    Y  t|d*}|t| |dd }	W d    n1 s0    Y  |||dd }
t	
||	 t	
||
 W n: ty   t rd|v sd|v rt   Y n0 d S )Nr   _rb)r   rE   Z16Z32)r   r   basenamesplitextsplitr   r   popr   r)   r*   	Exceptionr   r9   skip)rK   r   rH   	base_pathrootrc  r   ZfobjZfrom_stringZfrom_file_likefrom_filenamer   r   r   test_encoder  s,    
((
zTestReadHtml.test_encodec                 C   sx   |j ddkrtd G dd dt}|d}||s>J tjtdd || W d    n1 sj0    Y  d S )	Nr8   rC   zNot applicable for lxmlc                   @   s   e Zd Zdd ZdS )zFTestReadHtml.test_parse_failure_unseekable.<locals>.UnseekableStringIOc                 S   s   dS NFr   r   r   r   r   seekable  s    zOTestReadHtml.test_parse_failure_unseekable.<locals>.UnseekableStringIO.seekableN)__name__
__module____qualname__rp  r   r   r   r   UnseekableStringIO  s   rt  z?
            <table><tr><td>spam<foobr />eggs</td></tr></table>z#passed a non-rewindable file objectr3   )keywordsgetr9   rj  r   r<   r@   )rK   rH   rt  badr   r   r   test_parse_failure_unseekable  s    
z*TestReadHtml.test_parse_failure_unseekablec                 C   s:   G dd d}|d}|d}||s*J ||s6J d S )Nc                   @   sJ   e Zd ZddddZdddZdd Zd	d
 Zdd ZedddZ	dS )z9TestReadHtml.test_parse_failure_rewinds.<locals>.MockFileN)returnc                 S   s   || _ d| _d S ro  )r   at_end)rK   r   r   r   r   __init__  s    zBTestReadHtml.test_parse_failure_rewinds.<locals>.MockFile.__init__c                 S   s   | j r
dn| j}d| _ |S )Nr   T)rz  r   )rK   sizer   r   r   r   r     s    z>TestReadHtml.test_parse_failure_rewinds.<locals>.MockFile.readc                 S   s
   d| _ d S ro  )rz  )rK   offsetr   r   r   seek  s    z>TestReadHtml.test_parse_failure_rewinds.<locals>.MockFile.seekc                 S   s   dS )NTr   r   r   r   r   rp    s    zBTestReadHtml.test_parse_failure_rewinds.<locals>.MockFile.seekablec                 S   s   d S r   r   r   r   r   r   __next__  s    zBTestReadHtml.test_parse_failure_rewinds.<locals>.MockFile.__next__c                 S   s   | S r   r   r   r   r   r   __iter__  s    zBTestReadHtml.test_parse_failure_rewinds.<locals>.MockFile.__iter__)N)
rq  rr  rs  r{  r   r~  rp  r  r   r  r   r   r   r   MockFile  s   
r  z/<table><tr><td>spam<br />eggs</td></tr></table>z2<table><tr><td>spam<foobr />eggs</td></tr></table>r   )rK   rH   r  Zgoodrw  r   r   r   test_parse_failure_rewinds  s
    z'TestReadHtml.test_parse_failure_rewindsc                 C   s   G dd dt j}|dddd}|||fd}|||fd}|  |  | sL| r^qLd |j  u rv|ju s|n J d S )Nc                       s   e Zd Z fddZ  ZS )z@TestReadHtml.test_importcheck_thread_safety.<locals>.ErrorThreadc              
      sD   zt    W n* ty8 } z|| _W Y d }~nd }~0 0 d | _d S r   )superrunri  err)rK   r  	__class__r   r   r    s
    zDTestReadHtml.test_importcheck_thread_safety.<locals>.ErrorThread.run)rq  rr  rs  r  __classcell__r   r   r  r   ErrorThread  s   r  r   r   r5   rD   )targetr,   )	threadingThreadstartis_aliver  )rK   r   rH   r  rF   Zhelper_thread1Zhelper_thread2r   r   r   test_importcheck_thread_safety  s    	z+TestReadHtml.test_importcheck_thread_safetyc                 C   s>   |dddd}t |}||d }||d }t|| d S )Nr   r   r5   r6   r   )r   r)   r*   )rK   r   rH   Zfile_path_string	file_pathr   r   r   r   r   test_parse_path_object  s
    z#TestReadHtml.test_parse_path_objectc                 C   s2   |t dd }tdggdgd}t|| d S )Nz
            <table>
                <tr>
                    <th>A</th>
                </tr>
                <tr>
                    <td>word1<br>word2</td>
                </tr>
            </table>
        r   zword1 word2r   r   r   r   r   r   r   test_parse_br_as_space  s    z#TestReadHtml.test_parse_br_as_space)r&   bodyr   r   c           
      C   s   d}g dg dg dg dg dg dd}|d	 }|d
 }|d }|dkrh|d }|d }|d }n4|dkrz|d }n"|dkr|d }n|dkr|d }|t ||dd }t||g|d}	|	tj}	t||	 d S )Na  
          <table>
            <tr>
              <th>HTTP</th>
              <th>FTP</th>
              <th><a href="https://en.wiktionary.org/wiki/linkless">Linkless</a></th>
            </tr>
            <tr>
              <td><a href="https://en.wikipedia.org/">Wikipedia</a></td>
              <td>SURROUNDING <a href="ftp://ftp.us.debian.org/">Debian</a> TEXT</td>
              <td>Linkless</td>
            </tr>
            <tfoot>
              <tr>
                <td><a href="https://en.wikipedia.org/wiki/Page_footer">Footer</a></td>
                <td>
                  Multiple <a href="1">links:</a> <a href="2">Only first captured.</a>
                </td>
              </tr>
            </tfoot>
          </table>
          )HTTPFTPLinkless))r  N)r  N)r  z'https://en.wiktionary.org/wiki/linkless)	WikipediaSURROUNDING Debian TEXTr  ))r  zhttps://en.wikipedia.org/)r  zftp://ftp.us.debian.org/)r  N)Footer$Multiple links: Only first captured.N))r  z)https://en.wikipedia.org/wiki/Page_footer)r  1N)head_ignorehead_extractbody_ignorebody_extractfooter_ignorefooter_extractr  r  r  r&   r  r  r  r  r   r   Zextract_linksr   rW   )r   r
   Zfillnar[   r   r)   r*   )
rK   rG  rH   Zgh_13141_dataZgh_13141_expectedZdata_expZfoot_expZhead_expr   r   r   r   r   test_extract_links  s2    


zTestReadHtml.test_extract_linksc                 C   sB   d}t jt|d t|dd W d    n1 s40    Y  d S )NzY`extract_links` must be one of {None, "header", "footer", "body", "all"}, got "incorrect"r3   Z	incorrectr  r9   r<   r@   r   )rK   rN   r.   r   r   r   test_extract_links_badC  s    z#TestReadHtml.test_extract_links_badc                 C   s4   d}|t |ddd }tdgg}t|| d S )Nz
        <table>
          <tr>
            <td>
              <a href='https://google.com'>Google.com</a>
            </td>
          </tr>
        </table>
        r&   r  r   )z
Google.comzhttps://google.comr   rK   rH   r   r   r   r   r   r    test_extract_links_all_no_headerK  s    	z-TestReadHtml.test_extract_links_all_no_headerc                 C   sB   d}t jt|d tddd W d    n1 s40    Y  d S )NzPdtype_backend numpy is invalid, only 'numpy_nullable' and 'pyarrow' are allowed.r3   testnumpyrz   r  )rK   r.   r   r   r   test_invalid_dtype_backendZ  s    z'TestReadHtml.test_invalid_dtype_backendc                 C   s@   d}|t |d }tddgddggddgd	}t|| d S )
Na  
        <table>
            <tr>
                <th>
                    <style>.style</style>
                    A
                    </th>
                <th>B</th>
            </tr>
            <tr>
                <td>A1</td>
                <td>B1</td>
            </tr>
            <tr>
                <td>A2</td>
                <td>B2</td>
            </tr>
        </table>
        r   A1ZB1A2ZB2r   r   r   r   r  r   r   r   test_style_tagb  s    zTestReadHtml.test_style_tag)drq  rr  rs  rL   r9   fixturerN   rP   rh   r   marknetworkZ
single_cpur   r   Zslowr   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r  r  r  r  r  r  r  r  r   r%  r+  r/  r>  r@  rC  rF  ZparametrizerI  rJ  rL  rO  rP  rQ  rW  rX  rY  r[  r^  r
   ra  rb  filterwarningsrn  rx  r  r  r  r  r  r  r  r  r  r   r   r   r   rI   p   s   

<

		
	










0  &
/
H# 






"
DrI   )4collections.abcr   	functoolsr   r   r   r   r   pathlibr   r   r  urllib.errorr   r  r[   r9   Zpandas.compatr   Zpandas.util._test_decoratorsutilZ_test_decoratorstdZpandasr^   r	   r
   r   r   r   r   r   r   r   Zpandas._testingZ_testingr)   Zpandas.core.arraysr   r   Zpandas.io.commonr   r  r   r/   r>   rB   rG   r   Z
skip_if_norH   rI   r   r   r   r   <module>   s>   ,
		
