o
    a"<fN                     @   st  d Z ddlZddlZddlZejddedd ddlmZmZ ddlm	Z	 dd	l
mZmZ dd
l
mZ ddlmZ ddlmZ ddlmZ ddlmZ ddlmZmZ ddlmZmZ ddlmZ e	jZeeZ e	j!d e	j!d e	j!d hZ"e	j!d Z#e	j!d Z$g dZ%G dd dZ&G dd deZ'G dd deZ(dd Z)dd  Z*d!d" Z+e,d#Z-d$d% Z.G d&d' d'eZ/dS )(z
Shim module between Bleach and html5lib. This makes it easier to upgrade the
html5lib library without having to change a lot of code.
    Nignorez"html5lib's sanitizer is deprecatedzbleach._vendor.html5lib)messagecategorymodule)
HTMLParsergetTreeWalker)	constants)
namespacesprefixes)_ReparseException)Filter)allowed_protocols)HTMLInputStream)escapeHTMLSerializer)attributeMapHTMLTokenizer)TrieStartTagEndTagEmptyTag
Characters
ParseError)paabbraddressareaarticleasideaudiobbasebdibdo
blockquotebodybrbuttoncanvascaptioncitecodecolcolgroupdatadatalistdddeldetailsdfndialogdivdldtemembedfieldset
figcaptionfigurefooterformh1h2h3h4h5h6headheaderhgrouphrhtmliiframeimginputinskbdkeygenlabellegendlilinkmapmarkmenumetameternavnoscriptobjectoloptgroupoptionoutputpparampicturepreprogressqrprtrubyssampscriptsectionselectslotsmallsourcespanstrongstylesubsummarysuptabletbodytdtemplatetextareatfootththeadtimetitletrtrackuulvarvideowbrc                   @   sf   e Zd ZdZdd Zedd Zedd Zedd	 Zd
d Z	dddZ
dd Zdd Zdd ZdS )InputStreamWithMemoryzWraps an HTMLInputStream to remember characters since last <

    This wraps existing HTMLInputStream classes to keep track of the stream
    since the last < which marked an open tag state.

    c                 C   s$   || _ | j j| _| j j| _g | _d S N)_inner_streamresetposition_buffer)selfinner_stream r   K/var/www/html/kck/venv/lib/python3.10/site-packages/bleach/html5lib_shim.py__init__   s   


zInputStreamWithMemory.__init__c                 C      | j jS r   )r   errorsr   r   r   r   r         zInputStreamWithMemory.errorsc                 C   r   r   )r   charEncodingr   r   r   r   r      r   z"InputStreamWithMemory.charEncodingc                 C   r   r   )r   changeEncodingr   r   r   r   r      r   z$InputStreamWithMemory.changeEncodingc                 C   s   | j  }|r| j| |S r   )r   charr   append)r   cr   r   r   r      s   
zInputStreamWithMemory.charFc                 C   s$   | j j||d}| jt| |S )N)opposite)r   
charsUntilr   extendlist)r   
charactersr   charsr   r   r   r      s   z InputStreamWithMemory.charsUntilc                 C   s   | j r	| j d | j|S )N)r   popr   unget)r   r   r   r   r   r      s   zInputStreamWithMemory.ungetc                 C   s   d | jS )zReturns the stream history since last '<'

        Since the buffer starts at the last '<' as as seen by tagOpenState(),
        we know that everything from that point to when this method is called
        is the "tag" that is being tokenized.

         )joinr   r   r   r   r   get_tag   s   zInputStreamWithMemory.get_tagc                 C   s   dg| _ dS )zResets stream history to just '<'

        This gets called by tagOpenState() which marks a '<' that denotes an
        open tag. Any time we see that, we reset the buffer.

        <N)r   r   r   r   r   	start_tag   s   zInputStreamWithMemory.start_tagNF)__name__
__module____qualname____doc__r   propertyr   r   r   r   r   r   r   r   r   r   r   r   r      s    




r   c                       sT   e Zd ZdZd fdd	Z fddZd fdd		Z fd
dZ fddZ  Z	S )BleachHTMLTokenizerz1Tokenizer that doesn't consume character entitiesFc                    s,   t t| jdi | || _t| j| _d S )Nr   )superr   r   consume_entitiesr   stream)r   r   kwargs	__class__r   r   r     s   zBleachHTMLTokenizer.__init__c                 #   s   d }t t|  D ]v}|d urt|d dkr6|d tv r6|dr6tdd |d  D |d< d }|V  n=|d dkr_| jjd ur_|d 	 
 | jjvr_| j |d< t|d< d }|V  n|d tkrk|V  |}n|V  |V  d }q
|d tkr}|}q
|V  q
|r|V  d S d S )Nr.   z#invalid-character-in-attribute-nametypec                 s   s4    | ]\}}d |vrd|vrd|vr||fV  qdS )"'r   Nr   ).0	attr_name
attr_valuer   r   r   	<genexpr>  s    z/BleachHTMLTokenizer.__iter__.<locals>.<genexpr>z!expected-closing-tag-but-got-char)r   r   __iter__TAG_TOKEN_TYPESgetr   itemsparsertagslowerstripr   r   CHARACTERS_TYPEPARSEERROR_TYPE)r   last_error_tokentokenr   r   r   r     sD   	
	
zBleachHTMLTokenizer.__iter__Nc                    sP   | j rtt| ||S |r| jd d d  d7  < d S | jtdd d S )Nr.   r      &r   r.   )r   r   r   consumeEntitycurrentToken
tokenQueuer   r   )r   allowedCharfromAttributer   r   r   r   U  s   

z!BleachHTMLTokenizer.consumeEntityc                    s   | j   tt|  S r   )r   r   r   r   tagOpenStater   r   r   r   r   i  s   
z BleachHTMLTokenizer.tagOpenStatec                    s   | j }| jjd ur9|d tv r9|d  | jjvr9| jjr d}n| j }t|d}|| _ | j	
| | j| _d S tt|   d S )Nr   namer   r   )r   r   r   r   r   r   r   r   r   r   r   	dataStatestater   r   emitCurrentToken)r   r   new_data	new_tokenr   r   r   r   q  s   

z$BleachHTMLTokenizer.emitCurrentTokenr   )NF)
r   r   r   r   r   r   r   r   r   __classcell__r   r   r   r   r      s    Jr   c                       s,   e Zd ZdZ fddZ	d	ddZ  ZS )
BleachHTMLParserz$Parser that uses BleachHTMLTokenizerc                    sB   |durdd |D nd| _ || _|| _tt| jdi | dS )a  
        :arg tags: list of allowed tags--everything else is either stripped or
            escaped; if None, then this doesn't look at tags at all
        :arg strip: whether to strip disallowed tags (True) or escape them (False);
            if tags=None, then this doesn't have any effect
        :arg consume_entities: whether to consume entities (default behavior) or
            leave them as is when tokenizing (BleachHTMLTokenizer-added behavior)

        Nc                 S   s   g | ]}|  qS r   )r   )r   tagr   r   r   
<listcomp>  s    z-BleachHTMLParser.__init__.<locals>.<listcomp>r   )r   r   r   r   r   r   )r   r   r   r   r   r   r   r   r     s   
zBleachHTMLParser.__init__Fr5   Tc                 K   sh   || _ || _|| _td|| j| d|| _|   z|   W d S  ty3   |   |   Y d S w )N)r   r   r   r   )	innerHTMLMode	container	scriptingr   r   	tokenizerr   mainLoopReparseException)r   r   	innerHTMLr   r   r   r   r   r   _parse  s   	zBleachHTMLParser._parse)Fr5   T)r   r   r   r   r   r   r   r   r   r   r   r     s
    r   c                 C   s   | d dkrDt | dk rdS | d dv r| dd d}}n	| dd d}}|d	kr-dS t||}d|  k r?d
k rBt|S  dS dS t| dS )a9  Convert an entity (minus the & and ; part) into what it represents

    This handles numeric, hex, and text entities.

    :arg value: the string (minus the ``&`` and ``;`` part) to convert

    :returns: unicode character or None if it's an ambiguous ampersand that
        doesn't match a character entity

    r   #   Nr   xX   
   r   i   )lenintchrENTITIESr   )valueint_as_stringr!   
code_pointr   r   r   convert_entity  s   
r   c                 C   s   d| vr| S g }t | D ]6}|sq|dr=t|}|dur=t|}|dur=|| |t|d d }|r<|| q|| qd|S )zConverts all found entities in the text

    :arg text: the text to convert entities in

    :returns: unicode text with converted entities

    r   Nr   r   )next_possible_entity
startswithmatch_entityr   r   r   r   )textnew_textpartentity	converted	remainderr   r   r   convert_entities  s$   



r   c                 C   s.  | d dkr
t d| dd } t| } d}dtj }| rj| d dkrjd}| d | r<| d d	v r<d
}|| d7 }nd}| r\| d |vr\| d}||vrPn||7 }| r\| d |vsF|rh| rh| d dkrh|S dS | r| d |vr| d}t|s}n||7 }| r| d |vsr|r| r| d dkr|S dS )aH  Returns first entity in stream or None if no entity exists

    Note: For Bleach purposes, entities must start with a "&" and end with
    a ";". This ignoresambiguous character entities that have no ";" at the
    end.

    :arg stream: the character stream

    :returns: ``None`` or the entity string without "&" or ";"

    r   r   zStream should begin with "&"r   Nr   z<&=;r   r   0123456789abcdefABCDEF
0123456789;)
ValueErrorr   string
whitespacer   ENTITIES_TRIEhas_keys_with_prefix)r   possible_entityend_charactersallowedr   r   r   r   r     s>   




r   z(&)c                 c   sD    t t| D ]\}}|dkr|V  q|d dkrd| V  qdS )zTakes a text and generates a list of possible entities

    :arg text: the text to look at

    :returns: generator where each part (except the first) starts with an
        "&"

    r   r   r   N)	enumerateAMP_SPLIT_REsplit)r   rJ   r   r   r   r   r   ?  s   	
r   c                       s.   e Zd ZdZdZdd Zd fdd	Z  ZS )	BleachHTMLSerializerz[HTMLSerializer that undoes & -> &amp; in attributes and sets
    escape_rcdata to True
    Tc                 c   s    | dd}t|D ]6}|sq|dr:t|}|dur:t|dur:d| d V  |t|d d }|r9|V  q| ddV  qdS )z,Escapes just bare & in HTML attribute valuesz&amp;r   Nr  r   )replacer   r   r   r   r   )r   stokenr   r   r   r   r   escape_base_amp\  s   
z$BleachHTMLSerializer.escape_base_ampNc                 #   s    d}d}t t| ||D ]4}|r8|dkrd}n|r.|dkr-| |D ]}|V  q$d}qn|dkr4d}|V  q|dr?d}|V  qdS )zWrap HTMLSerializer.serialize and conver & to &amp; in attribute values

        Note that this converts & to &amp; in attribute values where the & isn't
        already part of an unambiguous character entity.

        F>r   =Tr   N)r   r  	serializer  r   )r   
treewalkerencodingin_tagafter_equalsr  r   r   r   r   r  {  s*   
zBleachHTMLSerializer.serializer   )r   r   r   r   escape_rcdatar  r  r   r   r   r   r   r  O  s
    
r  )0r   rer  warningsfilterwarningsDeprecationWarningbleach._vendor.html5libr   r   r   !bleach._vendor.html5lib.constantsr	   r
   r   r   $bleach._vendor.html5lib.filters.baser   )bleach._vendor.html5lib.filters.sanitizerr   SanitizerFilter$bleach._vendor.html5lib._inputstreamr   "bleach._vendor.html5lib.serializerr   r   "bleach._vendor.html5lib._tokenizerr   r   bleach._vendor.html5lib._trier   entitiesr   r  
tokenTypesr   r   r   	HTML_TAGSr   r   r   r   r   r   compiler  r   r  r   r   r   r   <module>   sP   

t? *"#
: