
    @[ha                      :    d dl mZ d dlmZ  G d de          ZdS )   )CharSetProber)ProbingStatec                        e Zd ZdZdZdZ fdZ fdZed             Z	ed             Z
d Zd	 Zd
 Zd Zd Zd Zd Zd Zd Zed             Zd Z xZS )UTF1632Proberad  
    This class simply looks for occurrences of zero bytes, and infers
    whether the file is UTF16 or UTF32 (low-endian or big-endian)
    For instance, files looking like (       [nonzero] )+
    have a good probability to be UTF32BE.  Files looking like (   [nonzero] )+
    may be guessed to be UTF16BE, and inversely for little-endian varieties.
       gGz?c                 2   t                                                       d| _        dgdz  | _        dgdz  | _        t
          j        | _        g d| _        d| _	        d| _
        d| _        d| _        d| _        d| _        |                                  d S )N       r	   r	   r	   r	   F)super__init__positionzeros_at_modnonzeros_at_modr   	DETECTING_statequadinvalid_utf16beinvalid_utf16leinvalid_utf32beinvalid_utf32le'first_half_surrogate_pair_detected_16be'first_half_surrogate_pair_detected_16leresetself	__class__s    p/var/www/api.easyaligner.net/htdocs/venv_linux/lib/python3.11/site-packages/pip/_vendor/chardet/utf1632prober.pyr   zUTF1632Prober.__init__'   s    C!G !sQw", LL	$$$$7<47<4

    c                 
   t                                                       d| _        dgdz  | _        dgdz  | _        t
          j        | _        d| _        d| _	        d| _
        d| _        d| _        d| _        g d| _        d S )Nr	   r
   Fr   )r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   s    r   r   zUTF1632Prober.reset6   s    C!G !sQw",$$$$7<47<4 LL			r   c                     |                                  rdS |                                 rdS |                                 rdS |                                 rdS dS )Nzutf-32bezutf-32lezutf-16bezutf-16lezutf-16)is_likely_utf32beis_likely_utf32leis_likely_utf16beis_likely_utf16ler   s    r   charset_namezUTF1632Prober.charset_nameD   sk    !!## 	:!!## 	:!!## 	:!!## 	:xr   c                     dS )N  r&   s    r   languagezUTF1632Prober.languageQ   s    rr   c                 2    t          d| j        dz            S )N      ?g      @maxr   r&   s    r   approx_32bit_charsz UTF1632Prober.approx_32bit_charsU       3+,,,r   c                 2    t          d| j        dz            S )Nr-   g       @r.   r&   s    r   approx_16bit_charsz UTF1632Prober.approx_16bit_charsX   r1   r   c                    |                                  }|| j        k    ok| j        d         |z  | j        k    oR| j        d         |z  | j        k    o9| j        d         |z  | j        k    o | j        d         |z  | j        k    o| j         S Nr	   r         )r0   MIN_CHARS_FOR_DETECTIONr   EXPECTED_RATIOr   r   r   approx_charss     r   r"   zUTF1632Prober.is_likely_utf32be[   s    ..00t;; 
a </$2EE )!!$|3d6II)!!$|3d6II) $Q',69LL) ((	
r   c                    |                                  }|| j        k    ok| j        d         |z  | j        k    oR| j        d         |z  | j        k    o9| j        d         |z  | j        k    o | j        d         |z  | j        k    o| j         S r5   )r0   r8   r   r9   r   r   r:   s     r   r#   zUTF1632Prober.is_likely_utf32lee   s    ..00t;; 
 #l2T5HH )!!$|3d6II)!!$|3d6II) !!$|3d6II) ((	
r   c                     |                                  }|| j        k    oU| j        d         | j        d         z   |z  | j        k    o.| j        d         | j        d         z   |z  | j        k    o| j         S )Nr   r7   r	   r6   )r3   r8   r   r9   r   r   r:   s     r   r$   zUTF1632Prober.is_likely_utf16beo       ..00t;; 
!!$t';A'>>,N!" )"1%(9!(<<L!") ((	
r   c                     |                                  }|| j        k    oU| j        d         | j        d         z   |z  | j        k    o.| j        d         | j        d         z   |z  | j        k    o| j         S )Nr	   r6   r   r7   )r3   r8   r   r9   r   r   r:   s     r   r%   zUTF1632Prober.is_likely_utf16ley   r>   r   c                 H   |d         dk    s:|d         dk    s.|d         dk    r)|d         dk    rd|d         cxk    rdk    r
n nd| _         |d         dk    s;|d         dk    s/|d         dk    r,|d         dk    r"d|d         cxk    rdk    rn d	S d| _        d	S d	S d	S d	S )
z
        Validate if the quad of bytes is valid UTF-32.

        UTF-32 is valid in the range 0x00000000 - 0x0010FFFF
        excluding 0x0000D800 - 0x0000DFFF

        https://en.wikipedia.org/wiki/UTF-32
        r	   r         r6      Tr7   N)r   r   )r   r   s     r   validate_utf32_charactersz'UTF1632Prober.validate_utf32_characters   s     GqLLAw~~Q1aA$$q'2I2I2I2IT2I2I2I2I2I#'D GqLLAw~~Q1aA$$q'2I2I2I2IT2I2I2I2I2I2I#'D    2I2Ir   c                    | j         s<d|d         cxk    rdk    rn nd| _         nCd|d         cxk    rdk    r
n n-d| _        n%d|d         cxk    rdk    rn nd| _         nd| _        | j        sAd|d         cxk    rdk    rn n	d| _        d	S d|d         cxk    rdk    rn d	S d| _        d	S d	S d|d         cxk    rdk    rn n	d| _        d	S d| _        d	S )
a9  
        Validate if the pair of bytes is  valid UTF-16.

        UTF-16 is valid in the range 0x0000 - 0xFFFF excluding 0xD800 - 0xFFFF
        with an exception for surrogate pairs, which must be in the range
        0xD800-0xDBFF followed by 0xDC00-0xDFFF

        https://en.wikipedia.org/wiki/UTF-16
        rB   r	      T   rC   Fr   N)r   r   r   r   )r   pairs     r   validate_utf16_charactersz'UTF1632Prober.validate_utf16_characters   sj    ; 		,tAw&&&&$&&&&&?C<<a((((D((((('+$tAw&&&&$&&&&&?D<<'+$; 		,tAw&&&&$&&&&&?C<<<a((((D(((((('+$$$ )( tAw&&&&$&&&&&?D<<<'+$$$r   c                    |D ]}| j         dz  }|| j        |<   |dk    r^|                     | j                   |                     | j        dd                    |                     | j        dd                    |dk    r| j        |xx         dz  cc<   n| j        |xx         dz  cc<   | xj         dz  c_         | j        S )Nr
   r7   r	   r6   r   )r   r   rD   rI   r   r   state)r   byte_strcmod4s       r   feedzUTF1632Prober.feed   s     	 	A=1$DDIdOqyy..ty999..ty1~>>>..ty1~>>>Avv!$'''1,''''$T***a/***MMQMMMzr   c                     | j         t          j        t          j        hv r| j         S |                                 dk    rt          j        | _         n| j        dk    rt          j        | _         | j         S )Ng?i   )r   r   NOT_MEFOUND_ITget_confidencer   r&   s    r   rK   zUTF1632Prober.state   sf    ;<.0EFFF;  4''&/DKK]X%% '-DK{r   c                     |                                  s<|                                 s(|                                 s|                                 rdndS )Ng333333?g        )r%   r$   r#   r"   r&   s    r   rS   zUTF1632Prober.get_confidence   sh     &&(( ))++ ))++	
 ))++DD 		
r   )__name__
__module____qualname____doc__r8   r9   r   r   propertyr'   r+   r0   r3   r"   r#   r$   r%   rD   rI   rO   rK   rS   __classcell__)r   s   @r   r   r      sP         !N    ! ! ! ! ! 
 
 X
   X- - -- - -
 
 

 
 

 
 

 
 
( ( (,, , ,@   
 
 X


 

 

 

 

 

 

r   r   N)charsetproberr   enumsr   r   r*   r   r   <module>r]      si   * ) ( ( ( ( (      F
 F
 F
 F
 F
M F
 F
 F
 F
 F
r   