
    @[h                     \    d dl Z d dlZddlmZ  ej        d          Z G d d          ZdS )    N   )ProbingStates%   [a-zA-Z]*[-]+[a-zA-Z]*[^a-zA-Z-]?c                       e Zd ZdZddZd Zed             Zd Zed             Z	d Z
ed	             Zed
             Zed             ZdS )CharSetProbergffffff?Nc                 ^    d | _         || _        t          j        t                    | _        d S N)_statelang_filterlogging	getLogger__name__logger)selfr
   s     p/var/www/api.easyaligner.net/htdocs/venv_linux/lib/python3.11/site-packages/pip/_vendor/chardet/charsetprober.py__init__zCharSetProber.__init__+   s'    &'11    c                 (    t           j        | _        d S r   )r   	DETECTINGr	   r   s    r   resetzCharSetProber.reset0   s    ",r   c                     d S r    r   s    r   charset_namezCharSetProber.charset_name3   s    tr   c                     t           r   )NotImplementedError)r   byte_strs     r   feedzCharSetProber.feed7   s    !!r   c                     | j         S r   )r	   r   s    r   statezCharSetProber.state:   s
    {r   c                     dS )Ng        r   r   s    r   get_confidencezCharSetProber.get_confidence>   s    sr   c                 2    t          j        dd|           } | S )Ns   ([ -])+    )resub)bufs    r   filter_high_byte_onlyz#CharSetProber.filter_high_byte_onlyA   s    f&c22
r   c                    t                      }t                              |           }|D ]Z}|                    |dd                    |dd         }|                                s|dk     rd}|                    |           [|S )u7  
        We define three types of bytes:
        alphabet: english alphabets [a-zA-Z]
        international: international characters [-ÿ]
        marker: everything else [^a-zA-Z-ÿ]
        The input buffer can be thought to contain a series of words delimited
        by markers. This function works to filter all words that contain at
        least one international character. All contiguous sequences of markers
        are replaced by a single space ascii character.
        This filter applies to all scripts which do not use English characters.
        N   r#   )	bytearrayINTERNATIONAL_WORDS_PATTERNfindallextendisalpha)r&   filteredwordsword	last_chars        r   filter_international_wordsz(CharSetProber.filter_international_wordsF   s     ;;
 ,33C88 
	' 
	'DOOD"I&&& RSS	I$$&& !9w+>+> 	OOI&&&&r   c                 v   t                      }d}d}t          |                               d          } t          |           D ]U\  }}|dk    r|dz   }d}|dk    r<||k    r4|s2|                    | ||                    |                    d           d}V|s|                    | |d	                    |S )
a[  
        Returns a copy of ``buf`` that retains only the sequences of English
        alphabet and high byte characters that are not between <> characters.
        This filter can be applied to all scripts which contain both English
        characters and extended ASCII characters, but is currently only used by
        ``Latin1Prober``.
        Fr   c   >r      <r#   TN)r+   
memoryviewcast	enumerater.   )r&   r0   in_tagprevcurrbuf_chars         r   remove_xml_tagszCharSetProber.remove_xml_tagsh   s     ;;oo""3'''nn 	 	ND(4axT!!$;;v; OOCT	N333OOD)))  	( OOCJ'''r   r   )r   
__module____qualname__SHORTCUT_THRESHOLDr   r   propertyr   r   r   r!   staticmethodr'   r4   r@   r   r   r   r   r   '   s        2 2 2 2
- - -   X" " "   X     \   \B ! ! \! ! !r   r   )r   r$   enumsr   compiler,   r   r   r   r   <module>rH      s~   :  				      (bj8  
c c c c c c c c c cr   