o
    _"<f@=                     @   sl   d dl Z d dlZd dlZd dlmZ d dlmZmZmZ G dd deZ	G dd deZ
G dd	 d	eZdS )
    N)BeautifulSoup)EntitySubstitutionEncodingDetectorUnicodeDammitc                   @   sh   e Zd ZdZdd Zdd Zdd Zdd	 Zd
d Zdd Z	dd Z
dd Zdd Zdd Zdd ZdS )TestUnicodeDammitz"Standalone tests of UnicodeDammit.c                 C   s   d}t |}|j|ksJ d S )Nu   I'm already Unicode! ☃r   unicode_markupselfmarkupdammit r   L/var/www/html/kck/venv/lib/python3.10/site-packages/bs4/tests/test_dammit.pytest_unicode_input      z$TestUnicodeDammit.test_unicode_inputc                 C   s   d}t |}|jdksJ d S )N   <foo></foo>u   <foo>‘’“”</foo>r   r	   r   r   r   test_smart_quotes_to_unicode   r   z.TestUnicodeDammit.test_smart_quotes_to_unicodec                 C   "   d}t |dd}|jdksJ d S )Nr   xmlsmart_quotes_toz+<foo>&#x2018;&#x2019;&#x201C;&#x201D;</foo>r   r	   r   r   r   !test_smart_quotes_to_xml_entities      z3TestUnicodeDammit.test_smart_quotes_to_xml_entitiesc                 C   r   )Nr   htmlr   z'<foo>&lsquo;&rsquo;&ldquo;&rdquo;</foo>r   r	   r   r   r   "test_smart_quotes_to_html_entities   r   z4TestUnicodeDammit.test_smart_quotes_to_html_entitiesc                 C   r   )Nr   asciir   z<foo>''""</foo>r   r	   r   r   r   test_smart_quotes_to_ascii#   r   z,TestUnicodeDammit.test_smart_quotes_to_asciic                 C   s0   d}t |}|j dksJ |jdksJ d S )Ns   Sacré bleu! ☃utf-8u   Sacré bleu! ☃r   original_encodinglowerr   )r
   utf8r   r   r   r   test_detect_utf8(   s   z"TestUnicodeDammit.test_detect_utf8c                 C   s4   d}t |dg}|j dksJ |jdksJ d S )N   
iso-8859-8u   םולשr   r
   hebrewr   r   r   r   test_convert_hebrew.   s   z%TestUnicodeDammit.test_convert_hebrewc                 C   s6   d}t |}|j dksJ |jd|ksJ d S )Ns   ケータイ Watchr   )r   r   r    r   encode)r
   utf_8r   r   r   r   /test_dont_see_smart_quotes_where_there_are_none4   s   zATestUnicodeDammit.test_dont_see_smart_quotes_where_there_are_nonec                 C   s,   d d}t|dg}|j dksJ d S )N   Räksmörgåsr   r$   r(   r   r   r    r
   	utf8_datar   r   r   r    test_ignore_inappropriate_codecs:   s   
z2TestUnicodeDammit.test_ignore_inappropriate_codecsc                 C   s6   d d}dD ]}t||g}|j dksJ qd S )Nr+   r   )z.utf8z...z
utF---16.!r,   )r
   r.   bad_encodingr   r   r   r   test_ignore_invalid_codecs?   s
   
z,TestUnicodeDammit.test_ignore_invalid_codecsc                 C   sL   d d}t|dgd}|j dksJ t|ddgd}|jd ks$J d S )Nr+   r   )exclude_encodingszwindows-1252r,   r-   r   r   r   test_exclude_encodingsE   s   
z(TestUnicodeDammit.test_exclude_encodingsN)__name__
__module____qualname____doc__r   r   r   r   r   r"   r'   r*   r/   r1   r3   r   r   r   r   r      s    r   c                   @   sT   e Zd Zdd Zdd Zdd Zdd Zd	d
 Zdd Zdd Z	dd Z
dd ZdS )TestEncodingDetectorc                 C   s"   t d}t|j}d|v sJ d S )Ns'   <?xml version="1.0" encoding="UTF-" ?>u   utf-�)r   list	encodings)r
   detectedr:   r   r   r   Ptest_encoding_detector_replaces_junk_in_encoding_name_with_replacement_characterU   s
   
zeTestEncodingDetector.test_encoding_detector_replaces_junk_in_encoding_name_with_replacement_characterc                 C   s(   dD ]}t |dd}d|jksJ qd S )N)s&   <html><meta charset="euc-jp" /></html>s&   <html><meta charset='euc-jp' /></html>s$   <html><meta charset=euc-jp /></html>s#   <html><meta charset=euc-jp/></html>Tis_htmlzeuc-jp)r   r   r
   datar   r   r   r    test_detect_html5_style_meta_tag[   s   z5TestEncodingDetector.test_detect_html5_style_meta_tagc              	   C   s   d}t jj}ttj z1dd }|t j_t|}d|jks J d|jv s'J t	|d}|js1J W ttj
 |t j_d S ttj
 |t j_w )NsT   ﻿<?xml version="1.0" encoding="UTF-8"?>
<html><b>بتر</b>
<i>ѐ</i></html>c                 S   s   d S Nr   )strr   r   r   noop{   s   zETestEncodingDetector.test_last_ditch_entity_replacement.<locals>.noopTu   �zhtml.parser)bs4r   chardet_dammitloggingdisableWARNINGr   contains_replacement_charactersr   r   NOTSET)r
   docchardetrD   r   soupr   r   r   "test_last_ditch_entity_replacemente   s   

z7TestEncodingDetector.test_last_ditch_entity_replacementc                 C   s,   d}t |}d|jksJ d|jksJ d S )N   < a >   < / a > u   <a>áé</a>utf-16le)r   r   r   r?   r   r   r   test_byte_order_mark_removed   s   z1TestEncodingDetector.test_byte_order_mark_removedc                 C   s   d}t |}t |dgd}d|jksJ t |dgd}d|jks"J dgdd |jD ks/J d	}t |dgd
gd}d
|jksAJ dd
gdd |jD ksOJ d S )NrP   zutf-16)known_definite_encodingsr   )user_encodingsrQ   c                 S      g | ]}|d  qS r   r   .0xr   r   r   
<listcomp>       zRTestEncodingDetector.test_known_definite_versus_user_encodings.<locals>.<listcomp>r#   r$   )rS   rT   c                 S   rU   rV   r   rW   r   r   r   rZ      r[   r   r   tried_encodings)r
   r@   r   beforeafterr&   r   r   r   )test_known_definite_versus_user_encodings   s    z>TestEncodingDetector.test_known_definite_versus_user_encodingsc                 C   sH   d}t |dgdgdgd}d|jksJ g ddd |jD ks"J d S )	Nr#   	shift-jisr   r$   )rS   override_encodingsrT   )ra   r   r$   c                 S   rU   rV   r   rW   r   r   r   rZ      r[   zKTestEncodingDetector.test_deprecated_override_encodings.<locals>.<listcomp>r\   r%   r   r   r   "test_deprecated_override_encodings   s   z7TestEncodingDetector.test_deprecated_override_encodingsc                 C   st   d d}d d}|| | }tt |d W d    n1 s%w   Y  t|}d|dks8J d S )Nu	   ☃☃☃r!   u   “Hi, I like Windows!”windows_1252u+   ☃☃☃“Hi, I like Windows!”☃☃☃)r(   pytestraisesUnicodeDecodeErrordecoder   	detwingle)r
   r!   rd   rL   fixedr   r   r   test_detwingle   s   

z#TestEncodingDetector.test_detwinglec                 C   s<   dD ]}| d}|dsJ t|}||ksJ qd S )N)u   œu   ₓu   ðr!      )r(   endswithr   ri   )r
   tricky_unicode_charinputoutputr   r   r   +test_detwingle_ignores_multibyte_characters   s   

z@TestEncodingDetector.test_detwingle_ignores_multibyte_charactersc                 C   s  d}| d}d}| d}tj}||ddd u sJ d||ddks%J d||ddks/J d||ks7J d||ks?J d	d
 }||| d u sMJ ||| d u sWJ ||| ddddksdJ ||dddksnJ |d	| dddkszJ |d| ddd u sJ d S )Nz0<html><head><meta charset="utf-8"></head></html>r   z,<?xml version="1.0" encoding="ISO-8859-1" ?>Fr=   r   Tz
iso-8859-1    i  )r>   search_entire_document)rs      a)r(   r   find_declared_encoding)r
   html_unicode
html_bytesxml_unicode	xml_bytesmspacerr   r   r   test_find_declared_encoding   s&   

z0TestEncodingDetector.test_find_declared_encodingN)r4   r5   r6   r<   rA   rO   rR   r`   rc   rk   rq   r|   r   r   r   r   r8   S   s    
#r8   c                   @   s   e Zd ZdZdd Zdd Zdd Zdd	 Zd
d Zdd Z	dd Z
dd Zdd Zdd Zdd Zdd Zdd Zdd Zdd Zd S )!TestEntitySubstitutionz1Standalone tests of the EntitySubstitution class.c                 C   s
   t | _d S rB   )r   subr
   r   r   r   setup_method  s   
z#TestEntitySubstitution.setup_methodc                 C   s   d}| j |dksJ d S )Nu   foo∀☃õbaru   foo&forall;☃&otilde;barr~   substitute_htmlr
   sr   r   r   test_simple_html_substitution  s   z4TestEntitySubstitution.test_simple_html_substitutionc                 C   s&   d}t |}| j|jdksJ d S )Ns   fooz&lsquo;&rsquo;foo&ldquo;&rdquo;)r   r~   r   r   )r
   quotesr   r   r   r   test_smart_quote_substitution  s   z4TestEntitySubstitution.test_smart_quote_substitutionc                 C   s:   dD ]\}}d}|| }|| }| j ||ksJ qd S )N)
)z&models;u   ⊧)z&Nfr;u   𝔑)z&ngeqq;u   ≧̸)z&not;   ¬)z&Not;u   ⫬z||)fjr   )z&gt;>)z&lt;<)z&amp;&z3 %s 4r   )r
   entityutemplaterawwith_entitiesr   r   r   test_html5_entity#  s   z(TestEntitySubstitution.test_html5_entityc                 C   s<   d}d}| j ||ksJ d}d}| j ||ksJ d S )Nu   fjords ⊔ penguinszfjords &sqcup; penguinsu   fjords ⊔︀ penguinszfjords &sqcups; penguinsr   )r
   r@   r   r   r   r   )test_html5_entity_with_variation_selectorD  s   z@TestEntitySubstitution.test_html5_entity_with_variation_selectorc                 C   s   d}| j |d|ksJ d S )NWelcome to "my bar"Fr~   substitute_xmlr   r   r   r   Itest_xml_converstion_includes_no_quotes_if_make_quoted_attribute_is_falseP     z`TestEntitySubstitution.test_xml_converstion_includes_no_quotes_if_make_quoted_attribute_is_falsec                 C   s0   | j dddksJ | j dddksJ d S )NWelcomeTz	"Welcome"z	Bob's Barz"Bob's Bar"r   r   r   r   r   6test_xml_attribute_quoting_normally_uses_double_quotesT  s   zMTestEntitySubstitution.test_xml_attribute_quoting_normally_uses_double_quotesc                 C      d}| j |ddksJ d S )Nr   Tz'Welcome to "my bar"'r   r   r   r   r   Otest_xml_attribute_quoting_uses_single_quotes_when_value_contains_double_quotesX  r   zfTestEntitySubstitution.test_xml_attribute_quoting_uses_single_quotes_when_value_contains_double_quotesc                 C   r   )NWelcome to "Bob's Bar"Tz""Welcome to &quot;Bob's Bar&quot;"r   r   r   r   r   btest_xml_attribute_quoting_escapes_single_quotes_when_value_contains_both_single_and_double_quotes\  r   zyTestEntitySubstitution.test_xml_attribute_quoting_escapes_single_quotes_when_value_contains_both_single_and_double_quotesc                 C   s   d}| j ||ksJ d S )Nr   r   )r
   quotedr   r   r   <test_xml_quotes_arent_escaped_when_value_is_not_being_quoted`  s   zSTestEntitySubstitution.test_xml_quotes_arent_escaped_when_value_is_not_being_quotedc                 C      | j ddks
J d S )Nzfoo<bar>zfoo&lt;bar&gt;r   r   r   r   r   'test_xml_quoting_handles_angle_bracketsd     z>TestEntitySubstitution.test_xml_quoting_handles_angle_bracketsc                 C   r   )NzAT&TzAT&amp;Tr   r   r   r   r   #test_xml_quoting_handles_ampersandsg  r   z:TestEntitySubstitution.test_xml_quoting_handles_ampersandsc                 C   r   )N&Aacute;T&Tz&amp;Aacute;T&amp;Tr   r   r   r   r   Etest_xml_quoting_including_ampersands_when_they_are_part_of_an_entityj  r   z\TestEntitySubstitution.test_xml_quoting_including_ampersands_when_they_are_part_of_an_entityc                 C   r   )Nr   z&Aacute;T&amp;T)r~   "substitute_xml_containing_entitiesr   r   r   r   Dtest_xml_quoting_ignoring_ampersands_when_they_are_part_of_an_entitym  r   z[TestEntitySubstitution.test_xml_quoting_ignoring_ampersands_when_they_are_part_of_an_entityc                 C   s   d}| j ||ksJ dS )z:There's no need to do this except inside attribute values.zBob's "bar"Nr   )r
   textr   r   r    test_quotes_not_html_substitutedp  s   z7TestEntitySubstitution.test_quotes_not_html_substitutedN)r4   r5   r6   r7   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r}     s"    !r}   )re   rG   rE   r   
bs4.dammitr   r   r   objectr   r8   r}   r   r   r   r   <module>   s   G ?