o
    µ!<f×0  ã                   @   s€   d Z ddlZddlZddlZddlmZ ddlmZmZm	Z	 ddl
mZ ddlmZ ddlmZ dd	lmZ G d
d„ deƒZdS )a  
Module containing the UniversalDetector detector class, which is the primary
class a user of ``chardet`` should use.

:author: Mark Pilgrim (initial port to Python)
:author: Shy Shalom (original C code)
:author: Dan Blanchard (major refactoring for 3.0)
:author: Ian Cordasco
é    Né   )ÚCharSetGroupProber)Ú
InputStateÚLanguageFilterÚProbingState)ÚEscCharSetProber)ÚLatin1Prober)ÚMBCSGroupProber)ÚSBCSGroupProberc                	   @   sn   e Zd ZdZdZe d¡Ze d¡Ze d¡Z	dddd	d
ddddœZ
ejfdd„Zdd„ Zdd„ Zdd„ ZdS )ÚUniversalDetectoraq  
    The ``UniversalDetector`` class underlies the ``chardet.detect`` function
    and coordinates all of the different charset probers.

    To get a ``dict`` containing an encoding and its confidence, you can simply
    run:

    .. code::

            u = UniversalDetector()
            u.feed(some_bytes)
            u.close()
            detected = u.result

    gš™™™™™É?s   [€-ÿ]s   (|~{)s   [€-Ÿ]zWindows-1252zWindows-1250zWindows-1251zWindows-1256zWindows-1253zWindows-1255zWindows-1254zWindows-1257)z
iso-8859-1z
iso-8859-2z
iso-8859-5z
iso-8859-6z
iso-8859-7z
iso-8859-8z
iso-8859-9ziso-8859-13c                 C   sN   d | _ g | _d | _d | _d | _d | _d | _|| _t 	t
¡| _d | _|  ¡  d S )N)Ú_esc_charset_proberÚ_charset_probersÚresultÚdoneÚ	_got_dataÚ_input_stateÚ
_last_charÚlang_filterÚloggingÚ	getLoggerÚ__name__ÚloggerÚ_has_win_bytesÚreset)Úselfr   © r   ú\/var/www/html/kck/venv/lib/python3.10/site-packages/pip/_vendor/chardet/universaldetector.pyÚ__init__Q   s   zUniversalDetector.__init__c                 C   sV   ddddœ| _ d| _d| _d| _tj| _d| _| jr| j 	¡  | j
D ]}| 	¡  q"dS )zæ
        Reset the UniversalDetector and all of its probers back to their
        initial states.  This is called by ``__init__``, so you only need to
        call this directly in between analyses of different documents.
        Nç        ©ÚencodingÚ
confidenceÚlanguageFó    )r   r   r   r   r   Ú
PURE_ASCIIr   r   r   r   r   )r   Úproberr   r   r   r   ^   s   


ÿzUniversalDetector.resetc                 C   s6  | j rdS t|ƒsdS t|tƒst|ƒ}| jso| tj¡r%ddddœ| _n;| tj	tj
f¡r6ddddœ| _n*| d¡rCddddœ| _n| d	¡rPd
dddœ| _n| tjtjf¡r`ddddœ| _d| _| jd durod| _ dS | jtjkr“| j |¡r€tj| _n| jtjkr“| j | j| ¡r“tj| _|dd… | _| jtjkrÇ| js©t| jƒ| _| j |¡tjkrÅ| jj| j ¡ | jjdœ| _d| _ dS dS | jtjkr| jsìt | jƒg| _| jt!j"@ rå| j #t$ƒ ¡ | j #t%ƒ ¡ | jD ]}| |¡tjkr
|j| ¡ |jdœ| _d| _  nqï| j& |¡rd| _'dS dS dS )aý  
        Takes a chunk of a document and feeds it through all of the relevant
        charset probers.

        After calling ``feed``, you can check the value of the ``done``
        attribute to see if you need to continue feeding the
        ``UniversalDetector`` more data, or if it has made a prediction
        (in the ``result`` attribute).

        .. note::
           You should always call ``close`` when you're done feeding in your
           document if ``done`` is not already ``True``.
        Nz	UTF-8-SIGç      ð?Ú r   zUTF-32s   þÿ  zX-ISO-10646-UCS-4-3412s     ÿþzX-ISO-10646-UCS-4-2143zUTF-16Tr    éÿÿÿÿ)(r   ÚlenÚ
isinstanceÚ	bytearrayr   Ú
startswithÚcodecsÚBOM_UTF8r   ÚBOM_UTF32_LEÚBOM_UTF32_BEÚBOM_LEÚBOM_BEr   r   r$   ÚHIGH_BYTE_DETECTORÚsearchÚ	HIGH_BYTEÚESC_DETECTORr   Ú	ESC_ASCIIr   r   r   Úfeedr   ÚFOUND_ITÚcharset_nameÚget_confidencer"   r   r	   r   ÚNON_CJKÚappendr
   r   ÚWIN_BYTE_DETECTORr   )r   Úbyte_strr%   r   r   r   r8   o   s–   

þÿ
þ

þ

þþ
ÿû
ù
þû
ñzUniversalDetector.feedc           	   	   C   sf  | j r| jS d| _ | js| j d¡ nY| jtjkr!ddddœ| _nK| jtjkrld}d}d}| j	D ]}|s5q0| 
¡ }||krA|}|}q0|rl|| jkrl|j}|j ¡ }| 
¡ }| d	¡rd| jrd| j ||¡}|||jdœ| _| j ¡ tjkr°| jd
 du r°| j d¡ | j	D ]+}|s‰q„t|tƒr¢|jD ]}| j d|j|j| 
¡ ¡ q‘q„| j d|j|j| 
¡ ¡ q„| jS )zæ
        Stop analyzing the current document and come up with a final
        prediction.

        :returns:  The ``result`` attribute, a ``dict`` with the keys
                   `encoding`, `confidence`, and `language`.
        Tzno data received!Úasciir&   r'   r   Nr   ziso-8859r    z no probers hit minimum thresholdz%s %s confidence = %s)r   r   r   r   Údebugr   r   r$   r5   r   r;   ÚMINIMUM_THRESHOLDr:   Úlowerr,   r   ÚISO_WIN_MAPÚgetr"   ÚgetEffectiveLevelr   ÚDEBUGr*   r   Úprobers)	r   Úprober_confidenceÚmax_prober_confidenceÚ
max_proberr%   r:   Úlower_charset_namer!   Úgroup_proberr   r   r   ÚcloseÜ   sn   	
þ
€

ÿþ


ýÿýzUniversalDetector.closeN)r   Ú
__module__Ú__qualname__Ú__doc__rB   ÚreÚcompiler3   r6   r>   rD   r   ÚALLr   r   r8   rN   r   r   r   r   r   3   s&    


ù	mr   )rQ   r-   r   rR   Úcharsetgroupproberr   Úenumsr   r   r   Ú	escproberr   Úlatin1proberr   Úmbcsgroupproberr	   Úsbcsgroupproberr
   Úobjectr   r   r   r   r   Ú<module>   s   