o
    _"<fç  ã                   @   s  d Z dZddlZddlmZ ddlmZ ddlZddlmZm	Z	 ddl
mZ ddlZddlZddlZddlZddlZddlZddlZddlZdd	„ Zd#dd„ZG dd„ deƒZdd„ ZdZdZd$dd„Zd%dd„Zd&dd„Zd'dd„Zd(d d!„Zed"kr…eej  ¡ ƒ dS dS ))z=Diagnostic functions, mainly for use when doing tech support.ÚMITé    N)ÚBytesIO)Ú
HTMLParser)ÚBeautifulSoupÚ__version__)Úbuilder_registryc                 C   sR  t dt ƒ t dtj ƒ g d¢}|D ]}tjD ]	}||jv r! nq| |¡ t d| ƒ qd|v rc| d¡ zddl	m
} t d	d
 tt|jƒ¡ ƒ W n tyb } z
t dƒ W Y d}~nd}~ww d|v r‹zddl}t d|j ƒ W n tyŠ } z
t dƒ W Y d}~nd}~ww t| dƒr•|  ¡ } nJ|  d¡sŸ|  d¡r«t d|  ƒ t dƒ dS z&tj | ¡rÐt d|  ƒ t| ƒ}| ¡ } W d  ƒ n1 sËw   Y  W n	 tyÚ   Y nw t dƒ |D ]E}t d| ƒ d}	z
t| |d}
d}	W n ty } zt d| ƒ t ¡  W Y d}~nd}~ww |	r"t d| ƒ t |
 ¡ ƒ t dƒ qádS )z¼Diagnostic suite for isolating common problems.

    :param data: A string containing markup that needs to be explained.
    :return: None; diagnostics are printed to standard output.
    z'Diagnostic running on Beautiful Soup %szPython version %s)úhtml.parserÚhtml5libÚlxmlz;I noticed that %s is not installed. Installing it may help.r
   zlxml-xmlr   ©ÚetreezFound lxml version %sÚ.z.lxml is not installed or couldn't be imported.Nr	   zFound html5lib version %sz2html5lib is not installed or couldn't be imported.Úreadzhttp:zhttps:z<"%s" looks like a URL. Beautiful Soup is not an HTTP client.zpYou need to use some other library to get the document behind the URL, and feed that document to Beautiful Soup.z7"%s" looks like a filename. Reading data from the file.Ú z#Trying to parse your markup with %sF)ÚfeaturesTú%s could not parse the markup.z#Here's what %s did with the markup:zP--------------------------------------------------------------------------------)Úprintr   ÚsysÚversionr   Úbuildersr   ÚremoveÚappendr
   r   ÚjoinÚmapÚstrÚLXML_VERSIONÚImportErrorr	   Úhasattrr   Ú
startswithÚosÚpathÚexistsÚopenÚ
ValueErrorr   Ú	ExceptionÚ	tracebackÚ	print_excÚprettify)ÚdataÚbasic_parsersÚnameÚbuilderr   Úer	   ÚfpÚparserÚsuccessÚsoup© r1   úC/var/www/html/kck/venv/lib/python3.10/site-packages/bs4/diagnose.pyÚdiagnose   sŠ   

ÿ
ÿÿ€
ÿ€ÿÿ€ÿ



ÿ€ý€þ
ór3   Tc                 K   sp   ddl m} | dd¡}t| tƒr|  d¡} t| ƒ}|j|f||dœ|¤ŽD ]\}}td||j	|j
f ƒ q&dS )	a´  Print out the lxml events that occur during parsing.

    This lets you see how lxml parses a document when no Beautiful
    Soup code is running. You can use this to determine whether
    an lxml-specific problem is in Beautiful Soup's lxml tree builders
    or in lxml itself.

    :param data: Some markup.
    :param html: If True, markup will be parsed with lxml's HTML parser.
       if False, lxml's XML parser will be used.
    r   r   ÚrecoverTÚutf8)Úhtmlr4   z%s, %4s, %sN)r
   r   ÚpopÚ
isinstancer   Úencoder   Ú	iterparser   ÚtagÚtext)r(   r6   Úkwargsr   r4   ÚreaderÚeventÚelementr1   r1   r2   Ú
lxml_trace]   s   

ÿÿÿýrA   c                   @   s`   e Zd ZdZdd„ Zdd„ Zdd„ Zdd	„ Zd
d„ Zdd„ Z	dd„ Z
dd„ Zdd„ Zdd„ ZdS )ÚAnnouncingParserzèSubclass of HTMLParser that announces parse events, without doing
    anything else.

    You can use this to get a picture of how html.parser sees a given
    document. The easiest way to do this is to call `htmlparser_trace`.
    c                 C   s   t |ƒ d S )N)r   )ÚselfÚsr1   r1   r2   Ú_p{   s   zAnnouncingParser._pc                 C   ó   |   d| ¡ d S )Nz%s START©rE   )rC   r*   Úattrsr1   r1   r2   Úhandle_starttag~   ó   z AnnouncingParser.handle_starttagc                 C   rF   )Nz%s ENDrG   ©rC   r*   r1   r1   r2   Úhandle_endtag   rJ   zAnnouncingParser.handle_endtagc                 C   rF   )Nz%s DATArG   ©rC   r(   r1   r1   r2   Úhandle_data„   rJ   zAnnouncingParser.handle_datac                 C   rF   )Nz
%s CHARREFrG   rK   r1   r1   r2   Úhandle_charref‡   rJ   zAnnouncingParser.handle_charrefc                 C   rF   )Nz%s ENTITYREFrG   rK   r1   r1   r2   Úhandle_entityrefŠ   rJ   z!AnnouncingParser.handle_entityrefc                 C   rF   )Nz
%s COMMENTrG   rM   r1   r1   r2   Úhandle_comment   rJ   zAnnouncingParser.handle_commentc                 C   rF   )Nz%s DECLrG   rM   r1   r1   r2   Úhandle_decl   rJ   zAnnouncingParser.handle_declc                 C   rF   )Nz%s UNKNOWN-DECLrG   rM   r1   r1   r2   Úunknown_decl“   rJ   zAnnouncingParser.unknown_declc                 C   rF   )Nz%s PIrG   rM   r1   r1   r2   Ú	handle_pi–   rJ   zAnnouncingParser.handle_piN)Ú__name__Ú
__module__Ú__qualname__Ú__doc__rE   rI   rL   rN   rO   rP   rQ   rR   rS   rT   r1   r1   r1   r2   rB   s   s    rB   c                 C   s   t ƒ }| | ¡ dS )zÂPrint out the HTMLParser events that occur during parsing.

    This lets you see how HTMLParser parses a document when no
    Beautiful Soup code is running.

    :param data: Some markup.
    N)rB   Úfeed)r(   r.   r1   r1   r2   Úhtmlparser_trace™   s   rZ   ÚaeiouÚbcdfghjklmnpqrstvwxyzé   c                 C   s:   d}t | ƒD ]}|d dkrt}nt}|t |¡7 }q|S )z#Generate a random word-like string.r   é   r   )ÚrangeÚ_consonantsÚ_vowelsÚrandomÚchoice)ÚlengthrD   ÚiÚtr1   r1   r2   Úrword§   s   rg   é   c                 C   s   d  dd„ t| ƒD ƒ¡S )z'Generate a random sentence-like string.ú c                 s   s     | ]}t t d d¡ƒV  qdS )rh   é	   N)rg   rb   Úrandint)Ú.0re   r1   r1   r2   Ú	<genexpr>´   s   € zrsentence.<locals>.<genexpr>)r   r_   )rd   r1   r1   r2   Ú	rsentence²   s   rn   éè  c                 C   sš   g d¢}g }t | ƒD ]9}t dd¡}|dkr#t |¡}| d| ¡ q
|dkr3| tt dd¡ƒ¡ q
|dkrCt |¡}| d| ¡ q
d	d
 |¡ d S )z+Randomly generate an invalid HTML document.)ÚpÚdivÚspanre   ÚbÚscriptÚtabler   é   z<%s>é   rh   r^   z</%s>z<html>Ú
z</html>)r_   rb   rk   rc   r   rn   r   )Únum_elementsÚ	tag_namesÚelementsre   rc   Útag_namer1   r1   r2   Úrdoc¶   s   

€r}   é † c           
      C   s&  t dt ƒ t| ƒ}t dt|ƒ ƒ dddgddfD ]>}d}zt ¡ }t||ƒ}t ¡ }d}W n tyK } zt d	| ƒ t ¡  W Y d
}~nd
}~ww |rXt d||| f ƒ qddl	m
} t ¡ }| |¡ t ¡ }t d||  ƒ dd
l}	|	 ¡ }t ¡ }| |¡ t ¡ }t d||  ƒ d
S )z.Very basic head-to-head performance benchmark.z1Comparative parser benchmark on Beautiful Soup %sz3Generated a large invalid HTML document (%d bytes).r
   r6   r	   r   FTr   Nz"BS4+%s parsed the markup in %.2fs.r   r   z$Raw lxml parsed the markup in %.2fs.z(Raw html5lib parsed the markup in %.2fs.)r   r   r}   ÚlenÚtimer   r$   r%   r&   r
   r   ÚHTMLr	   r   Úparse)
ry   r(   r.   r/   Úar0   rs   r,   r   r	   r1   r1   r2   Úbenchmark_parsersÈ   s:   
€þ€

r„   r
   c                 C   sX   t  ¡ }|j}t| ƒ}tt||d}t d|||¡ t 	|¡}| 
d¡ | dd¡ dS )z7Use Python's profiler on a randomly generated document.)Úbs4r(   r.   zbs4.BeautifulSoup(data, parser)Ú
cumulativez_html5lib|bs4é2   N)ÚtempfileÚNamedTemporaryFiler*   r}   Údictr…   ÚcProfileÚrunctxÚpstatsÚStatsÚ
sort_statsÚprint_stats)ry   r.   Ú
filehandleÚfilenamer(   ÚvarsÚstatsr1   r1   r2   Úprofileè   s   

r•   Ú__main__)T)r]   )rh   )ro   )r~   )r~   r
   )!rX   Ú__license__r‹   Úior   Úhtml.parserr   r…   r   r   Úbs4.builderr   r   r   rb   rˆ   r€   r%   r   r3   rA   rB   rZ   ra   r`   rg   rn   r}   r„   r•   rU   Ústdinr   r1   r1   r1   r2   Ú<module>   s<    
G&




 ÿ