o
    a"<fsU                  	   @   s   d dl mZ d dlZd dlZd dlmZ d dlmZ d dlm	Z	 d dl
mZ g dZdd	gd	gd	gd
Zg Zg dZddd eed deddeddD Zede d ejZdZG dd dZdd ZG dd de	jZdS )    )chainN)urlparse)unescape)html5lib_shim)alphabetize_attributes)aabbracronymb
blockquotecodeemiliolstrongulhreftitle)r   r   r	   )httphttpsmailto c                 C   s   g | ]}t |qS  )chr).0cr   r   G/var/www/html/kck/venv/lib/python3.10/site-packages/bleach/sanitizer.py
<listcomp>,       r   	                []?c                   @   s0   e Zd ZdZeeeedddfddZdd Z	dS )	Cleanera  Cleaner for cleaning HTML fragments of malicious content

    This cleaner is a security-focused function whose sole purpose is to remove
    malicious content from a string such that it can be displayed as content in
    a web page.

    To use::

        from bleach.sanitizer import Cleaner

        cleaner = Cleaner()

        for text in all_the_yucky_things:
            sanitized = cleaner.clean(text)

    .. Note::

       This cleaner is not designed to use to transform content to be used in
       non-web-page contexts.

    .. Warning::

       This cleaner is not thread-safe--the html parser has internal state.
       Create a separate cleaner per thread!


    FTNc                 C   sn   || _ || _|| _|| _|| _|| _|pg | _tj| j | jddd| _	t
d| _tjddddddd| _dS )a  Initializes a Cleaner

        :arg list tags: allowed list of tags; defaults to
            ``bleach.sanitizer.ALLOWED_TAGS``

        :arg dict attributes: allowed attributes; can be a callable, list or dict;
            defaults to ``bleach.sanitizer.ALLOWED_ATTRIBUTES``

        :arg list styles: allowed list of css styles; defaults to
            ``bleach.sanitizer.ALLOWED_STYLES``

        :arg list protocols: allowed list of protocols for links; defaults
            to ``bleach.sanitizer.ALLOWED_PROTOCOLS``

        :arg bool strip: whether or not to strip disallowed elements

        :arg bool strip_comments: whether or not to strip HTML comments

        :arg list filters: list of html5lib Filter classes to pass streamed content through

            .. seealso:: http://html5lib.readthedocs.io/en/latest/movingparts.html#filters

            .. Warning::

               Using filters changes the output of ``bleach.Cleaner.clean``.
               Make sure the way the filters change the output are secure.

        F)tagsstripconsume_entitiesnamespaceHTMLElementsetreealwaysT)quote_attr_valuesomit_optional_tagsescape_lt_in_attrsresolve_entitiessanitizealphabetical_attributesN)r)   
attributesstyles	protocolsr*   strip_commentsfiltersr   BleachHTMLParserparsergetTreeWalkerwalkerBleachHTMLSerializer
serializer)selfr)   r5   r6   r7   r*   r8   r9   r   r   r   __init__T   s,   &
zCleaner.__init__c              
   C   s   t |tsdj|jjd}t||sdS | j|}t| 	|| j
| j| j| j| j| jg d}| jD ]}||d}q3| j|S )zCleans text and returns sanitized result as unicode

        :arg str text: text to be cleaned

        :returns: sanitized text as unicode

        :raises TypeError: if ``text`` is not a text type

        z9argument cannot be of '{name}' type, must be of text type)namer   )sourcer5   strip_disallowed_elementsstrip_html_commentsallowed_elementsallowed_css_propertiesallowed_protocolsallowed_svg_properties)rC   )
isinstancestrformat	__class____name__	TypeErrorr;   parseFragmentBleachSanitizerFilterr=   r5   r*   r8   r)   r6   r7   r9   r?   render)r@   textmessagedomfilteredfilter_classr   r   r   clean   s,   


zCleaner.clean)
rN   
__module____qualname____doc__ALLOWED_TAGSALLOWED_ATTRIBUTESALLOWED_STYLESALLOWED_PROTOCOLSrA   rX   r   r   r   r   r(   7   s    
Br(   c                    sH   t  r S t tr fdd}|S t tr  fdd}|S td)a0  Generates attribute filter function for the given attributes value

    The attributes value can take one of several shapes. This returns a filter
    function appropriate to the attributes value. One nice thing about this is
    that there's less if/then shenanigans in the ``allow_token`` method.

    c                    s`   |  v r |  }t |r|| ||S ||v rdS d v r. d }t |r*|| ||S ||v S dS )NT*F)callable)tagattrvalueattr_valr5   r   r   _attr_filter   s   z.attribute_filter_factory.<locals>._attr_filterc                    s   | v S Nr   )rb   rc   rd   rf   r   r   rg      s   z3attributes needs to be a callable, a list or a dict)ra   rJ   dictlist
ValueError)r5   rg   r   rf   r   attribute_filter_factory   s   

rl   c                       sp   e Zd ZdZeddf fdd	Zdd Zdd	 Zd
d Zdd Z	dd Z
dd Zdd Zdd Zdd Z  ZS )rQ   zmhtml5lib Filter that sanitizes text

    This filter can be used anywhere html5lib filters can be used.

    FTc                    s@   t || _|| _|| _tjddtdd tt| j	|fi |S )a   Creates a BleachSanitizerFilter instance

        :arg Treewalker source: stream

        :arg list tags: allowed list of tags; defaults to
            ``bleach.sanitizer.ALLOWED_TAGS``

        :arg dict attributes: allowed attributes; can be a callable, list or dict;
            defaults to ``bleach.sanitizer.ALLOWED_ATTRIBUTES``

        :arg list styles: allowed list of css styles; defaults to
            ``bleach.sanitizer.ALLOWED_STYLES``

        :arg list protocols: allowed list of protocols for links; defaults
            to ``bleach.sanitizer.ALLOWED_PROTOCOLS``

        :arg bool strip_disallowed_elements: whether or not to strip disallowed
            elements

        :arg bool strip_html_comments: whether or not to strip HTML comments

        ignorez"html5lib's sanitizer is deprecatedzbleach._vendor.html5lib)rT   categorymodule)
rl   attr_filterrD   rE   warningsfilterwarningsDeprecationWarningsuperrQ   rA   )r@   rC   r5   rD   rE   kwargsrM   r   r   rA      s   
zBleachSanitizerFilter.__init__c                 c   sB    |D ]}|  |}|sqt|tr|D ]}|V  qq|V  qd S rh   )sanitize_tokenrJ   rj   )r@   token_iteratortokenretsubtokenr   r   r   sanitize_stream  s   

z%BleachSanitizerFilter.sanitize_streamc                 c   s    g }|D ]2}|r(|d dkr| | qddd |D dd}g }|V  n|d dkr4| | q|V  qddd |D dd}|V  dS )	z/Merge consecutive Characters tokens in a streamtype
Charactersr   c                 S      g | ]}|d  qS datar   r   
char_tokenr   r   r   r   8  r   z:BleachSanitizerFilter.merge_characters.<locals>.<listcomp>)r   r}   c                 S   r   r   r   r   r   r   r   r   F  r   N)appendjoin)r@   rx   characters_bufferry   	new_tokenr   r   r   merge_characters*  s,   


z&BleachSanitizerFilter.merge_charactersc                 C   s   |  | tj| S rh   )r   r|   r   Filter__iter__)r@   r   r   r   r   K  s   zBleachSanitizerFilter.__iter__c                 C   s   |d }|dv r*|d | j v r| |S | jrdS d|v r%t|d |d< | |S |dkrC| jsAtj|d ddd	d
|d< |S dS |dkrL| |S |S )a  Sanitize a token either by HTML-encoding or dropping.

        Unlike sanitizer.Filter, allowed_attributes can be a dict of {'tag':
        ['attribute', 'pairs'], 'tag': callable}.

        Here callable is a function with two arguments of attribute name and
        value. It should return true of false.

        Also gives the option to strip tags instead of encoding.

        :arg dict token: token to sanitize

        :returns: token or list of tokens

        r}   )StartTagEndTagEmptyTagrB   Nr   Commentz&quot;z&#x27;)"')entitiesr~   )	rF   allow_tokenrD   r   disallowed_tokenrE   r   escapesanitize_characters)r@   ry   
token_typer   r   r   rw   P  s&   



z$BleachSanitizerFilter.sanitize_tokenc                 C   s   | dd}|s
|S tt|}||d< d|vr|S g }t|D ]E}|s&q!|dr^t|}|dur^|dkrA|ddd n|d|d	 |t	|d
 d }|r]|d|d q!|d|d q!|S )a  Handles Characters tokens

        Our overridden tokenizer doesn't do anything with entities. However,
        that means that the serializer will convert all ``&`` in Characters
        tokens to ``&amp;``.

        Since we don't want that, we extract entities here and convert them to
        Entity tokens so the serializer will let them be.

        :arg token: the Characters token to work on

        :returns: a list of tokens

        r   r   &Nampr~   )r}   r   Entity)r}   rB      )
getINVISIBLE_CHARACTERS_REsubINVISIBLE_REPLACEMENT_CHARr   next_possible_entity
startswithmatch_entityr   len)r@   ry   r   
new_tokenspartentity	remainderr   r   r   r     s.   

z)BleachSanitizerFilter.sanitize_charactersc                 C   s   t |}tdd|}|dd}| }zt|}W n
 ty&   Y dS w |jr3|j|v r1|S dS |	dr:|S d|v rI|
dd |v rI|S d|v rO|S dS )	zChecks a uri value to see if it's allowed

        :arg value: the uri value to sanitize
        :arg allowed_protocols: list of allowed protocols

        :returns: allowed value or None

        z[`\000-\040\177-\240\s]+r   u   �N#:r   r   )r   convert_entitiesrer   replacelowerr   rk   schemer   split)r@   rd   rH   	new_valueparsedr   r   r   sanitize_uri_value  s(   


z(BleachSanitizerFilter.sanitize_uri_valuec           	      C   s   d|v rwi }|d   D ]d\}}|\}}| |d ||sq|| jv r1| || j}|du r/q|}|| jv rHtddt|}|	 }|sFq|}d|d f| j
v rc|dtjd dffv rctd	|rcq|d
krl| |}|||< qt||d< |S )z-Handles the case where we're allowing the tagr   rB   Nzurl\s*\(\s*[^#\s][^)]+?\) )Nr   xlinkr   z
^\s*[^#\s])Nstyle)itemsrp   attr_val_is_urir   rH   svg_attr_val_allows_refr   r   r   r*   svg_allow_local_hrefr   
namespacessearchsanitize_cssr   )	r@   ry   attrsnamespaced_nameval	namespacerB   r   new_valr   r   r   r     s:   



z!BleachSanitizerFilter.allow_tokenc                 C   s  |d }|dkrd|d  |d< nV|d r_|dv sJ g }|d   D ]-\\}}}|r2|s2||}}|d u s;|tjvr>|}n	dtj| |f }|d||f  q#d	|d d
|f |d< nd|d  |d< |drx|d d d d |d< d|d< |d= |S )Nr}   r   z</%s>rB   r   )r   r   z%s:%sz %s="%s"z<%s%s>r   z<%s>selfClosingz/>r~   )r   r   prefixesr   r   r   )r@   ry   r   r   nsrB   vr   r   r   r   r   /  s4   


z&BleachSanitizerFilter.disallowed_tokenc                 C   s   t |}tdd|}|d}tjdtjtjB d}|D ]
}||s* dS q td|s3dS g }t	d|D ],\}}|sBq;|
 | jv rU||d	 | d  q;|
 | jv rg||d	 | d  q;d|S )
zSanitizes css in style tagszurl\s*\(\s*[^\s)]+?\s*\)\s*r   ;ak  ^(  # consider a style attribute value as composed of:
[/:,#%!.\s\w]    # a non-newline character
|\w-\w           # 3 characters in the form \w-\w
|'[\s\w]+'\s*    # a single quoted string of [\s\w]+ with trailing space
|"[\s\w]+"       # a double quoted string of [\s\w]+
|\([\d,%\.\s]+\) # a parenthesized string of one or more digits, commas, periods, ...
)*$)flagsr   z ^\s*([-\w]+\s*:[^:;]*(;\s*|$))*$z([-\w]+)\s*:\s*([^:;]*)z: )r   r   r   compiler   r   UVERBOSEmatchfindallr   rG   r   rI   r   )r@   r   partsgauntletr   rX   proprd   r   r   r   r   [  s.   




z"BleachSanitizerFilter.sanitize_css)rN   rY   rZ   r[   r]   rA   r|   r   r   rw   r   r   r   r   r   __classcell__r   r   rv   r   rQ      s    	+!/=7<,rQ   )	itertoolsr   r   rq   bleach._vendor.parser   xml.sax.saxutilsr   bleachr   bleach.utilsr   r\   r]   r^   r_   r   rangeINVISIBLE_CHARACTERSr   UNICODEr   r   r(   rl   SanitizerFilterrQ   r   r   r   r   <module>   s.    & 
+