
    !}gI                       d Z ddlmZ ddlZddlZddlmZmZ ddl	m
Z
 ddlmZ ddlZddlZddlmZmZ dd	lmZmZmZ dd
lmZ ej                            d          ZdZ e
d           G d d                      Z G d d          Z e            Z G d d          Z eej                   	 	 d#d$d            Z! eej"                  d             Z" G d d           Z#d%d"Z$dS )&aZ  `tldextract` accurately separates a URL's subdomain, domain, and public suffix.

It does this via the Public Suffix List (PSL).

    >>> import tldextract

    >>> tldextract.extract('http://forums.news.cnn.com/')
    ExtractResult(subdomain='forums.news', domain='cnn', suffix='com', is_private=False)

    >>> tldextract.extract('http://forums.bbc.co.uk/') # United Kingdom
    ExtractResult(subdomain='forums', domain='bbc', suffix='co.uk', is_private=False)

    >>> tldextract.extract('http://www.worldbank.org.kg/') # Kyrgyzstan
    ExtractResult(subdomain='www', domain='worldbank', suffix='org.kg', is_private=False)

Note subdomain and suffix are _optional_. Not all URL-like inputs have a
subdomain or a valid suffix.

    >>> tldextract.extract('google.com')
    ExtractResult(subdomain='', domain='google', suffix='com', is_private=False)

    >>> tldextract.extract('google.notavalidsuffix')
    ExtractResult(subdomain='google', domain='notavalidsuffix', suffix='', is_private=False)

    >>> tldextract.extract('http://127.0.0.1:8080/deployed/')
    ExtractResult(subdomain='', domain='127.0.0.1', suffix='', is_private=False)

To rejoin the original hostname, if it was indeed a valid, registered hostname:

    >>> ext = tldextract.extract('http://forums.bbc.co.uk')
    >>> ext.registered_domain
    'bbc.co.uk'
    >>> ext.fqdn
    'forums.bbc.co.uk'
    )annotationsN)
CollectionSequence)	dataclass)wraps   )	DiskCacheget_cache_dir)lenient_netloclooks_like_iplooks_like_ipv6)get_suffix_listsTLDEXTRACT_CACHE_TIMEOUT)z4https://publicsuffix.org/list/public_suffix_list.datzQhttps://raw.githubusercontent.com/publicsuffix/list/master/public_suffix_list.datT)orderc                      e Zd ZU dZded<   ded<   ded<   ded<   edd	            Zedd
            Zedd            Zedd            Z	dS )ExtractResultzA URL's extracted subdomain, domain, and suffix.

    Also contains metadata, like a flag that indicates if the URL has a private suffix.
    str	subdomaindomainsuffixbool
is_privatereturnc                D    | j         r| j        r| j         d| j          S dS )zJoins the domain and suffix fields with a dot, if they're both set.

        >>> extract('http://forums.bbc.co.uk').registered_domain
        'bbc.co.uk'
        >>> extract('http://localhost:8080').registered_domain
        ''
        . )r   r   selfs    U/var/www/py-google-trends/myenv/lib/python3.11/site-packages/tldextract/tldextract.pyregistered_domainzExtractResult.registered_domainH   s4     ; 	24; 	2k11DK111r    c                    | j         r?| j        s| j        r1d                    d | j        | j        | j         fD                       S dS )zReturns a Fully Qualified Domain Name, if there is a proper domain/suffix.

        >>> extract('http://forums.bbc.co.uk/path/to/file').fqdn
        'forums.bbc.co.uk'
        >>> extract('http://localhost:8080').fqdn
        ''
        r   c              3     K   | ]}||V  	d S N ).0is     r   	<genexpr>z%ExtractResult.fqdn.<locals>.<genexpr>_   s(      WW!UVWAWWWWWWr!   r   )r   r   r   joinr   r   s    r   fqdnzExtractResult.fqdnU   sV     ; 	XDK 	X4? 	X88WWT['QWWWWWWrr!   c                f    | j         r)| j        s"| j        st          | j                   r| j         S dS )a  Returns the ipv4 if that is what the presented domain/url is.

        >>> extract('http://127.0.0.1/path/to/file').ipv4
        '127.0.0.1'
        >>> extract('http://127.0.0.1.1/path/to/file').ipv4
        ''
        >>> extract('http://256.1.1.1').ipv4
        ''
        r   )r   r   r   r   r   s    r   ipv4zExtractResult.ipv4b   sE     K	[	$(N	 dk**	
 ;rr!   c                    d}t          | j                  |k    rP| j        d         dk    r?| j        d         dk    r.| j        s'| j        s | j        dd         }t	          |          r|S dS )a  Returns the ipv6 if that is what the presented domain/url is.

        >>> extract('http://[aBcD:ef01:2345:6789:aBcD:ef01:127.0.0.1]/path/to/file').ipv6
        'aBcD:ef01:2345:6789:aBcD:ef01:127.0.0.1'
        >>> extract('http://[aBcD:ef01:2345:6789:aBcD:ef01:127.0.0.1.1]/path/to/file').ipv6
        ''
        >>> extract('http://[aBcD:ef01:2345:6789:aBcD:ef01:256.0.0.1]').ipv6
        ''
           r   []r   r   )lenr   r   r   r   )r   min_num_ipv6_charsdebracketeds      r   ipv6zExtractResult.ipv6u   s      222A#%%B3&&[ '$(N ' +ad+K{++ #""rr!   N)r   r   )
__name__
__module____qualname____doc____annotations__propertyr    r*   r,   r5   r%   r!   r   r   r   <   s          
 NNNKKKKKK
 
 
 X
 
 
 
 X
    X$    X  r!   r   c                      e Zd ZdZ e            edddefd%dZ	 	 d&d'dZ	 	 d&d'dZ		 	 d&d(dZ
	 d)d*dZ	 d+d,d Zed)d-d"            Z	 d)d.d$ZdS )/
TLDExtractzOA callable for extracting, subdomain, domain, and suffix components from a URL.TFr%   	cache_dir
str | Nonesuffix_list_urlsSequence[str]fallback_to_snapshotr   include_psl_private_domainsextra_suffixescache_fetch_timeoutstr | float | Noner   Nonec                >   |pd}t          d |D                       | _        || _        | j        s|s| j        st          d          || _        || _        d| _        t          |t                    rt          |          n|| _
        t          |          | _        dS )a  Construct a callable for extracting subdomain, domain, and suffix components from a URL.

        Upon calling it, it first checks for a JSON in `cache_dir`. By default,
        the `cache_dir` will live in the tldextract directory. You can disable
        the caching functionality of this module by setting `cache_dir` to `None`.

        If the cached version does not exist, such as on the first run, HTTP
        request the URLs in `suffix_list_urls` in order, and use the first
        successful response for public suffix definitions. Subsequent, untried
        URLs are ignored. The default URLs are the latest version of the
        Mozilla Public Suffix List and its mirror, but any similar document URL
        could be specified. Local files can be specified by using the `file://`
        protocol (see `urllib2` documentation). To disable HTTP requests, set
        this to an empty sequence.

        If there is no cached version loaded and no data is found from the `suffix_list_urls`,
        the module will fall back to the included TLD set snapshot. If you do not want
        this behavior, you may set `fallback_to_snapshot` to False, and an exception will be
        raised instead.

        The Public Suffix List includes a list of "private domains" as TLDs,
        such as blogspot.com. These do not fit `tldextract`'s definition of a
        suffix, so these domains are excluded by default. If you'd like them
        included instead, set `include_psl_private_domains` to True.

        You can specify additional suffixes in the `extra_suffixes` argument.
        These will be merged into whatever public suffix definitions are
        already in use by `tldextract`, above.

        cache_fetch_timeout is passed unmodified to the underlying request object
        per the requests documentation here:
        http://docs.python-requests.org/en/master/user/advanced/#timeouts

        cache_fetch_timeout can also be set to a single value with the
        environment variable TLDEXTRACT_CACHE_TIMEOUT, like so:

        TLDEXTRACT_CACHE_TIMEOUT="1.2"

        When set this way, the same timeout value will be used for both connect
        and read timeouts
        r%   c              3  f   K   | ],}|                                 |                                 V  -d S r$   )strip)r&   urls     r   r(   z&TLDExtract.__init__.<locals>.<genexpr>   sK       &
 &
syy{{&
IIKK&
 &
 &
 &
 &
 &
r!   zThe arguments you have provided disable all ways for tldextract to obtain data. Please provide a suffix list data, a cache_dir, or set `fallback_to_snapshot` to `True`.N)tupler@   rB   
ValueErrorrC   rD   
_extractor
isinstancer   floatrE   r	   _cache)r   r>   r@   rB   rC   rD   rE   s          r   __init__zTLDExtract.__init__   s    d ,1r % &
 &
#3&
 &
 &
 !
 !
 %9!% 	 	d6O 	;   ,G(,@D -s33%E%&&&$ 	 
  	**r!   NrK   r   bool | Nonesessionrequests.Session | Noner   c                2    |                      |||          S )zAlias for `extract_str`.rT   )extract_strr   rK   rC   rT   s       r   __call__zTLDExtract.__call__   s      %@'RRRr!   c                L    |                      t          |          ||          S )a  Take a string URL and splits it into its subdomain, domain, and suffix components.

        I.e. its effective TLD, gTLD, ccTLD, etc. components.

        >>> extractor = TLDExtract()
        >>> extractor.extract_str('http://forums.news.cnn.com/')
        ExtractResult(subdomain='forums.news', domain='cnn', suffix='com', is_private=False)
        >>> extractor.extract_str('http://forums.bbc.co.uk/')
        ExtractResult(subdomain='forums', domain='bbc', suffix='co.uk', is_private=False)

        Allows configuring the HTTP request via the optional `session`
        parameter. For example, if you need to use a HTTP proxy. See also
        `requests.Session`.

        >>> import requests
        >>> session = requests.Session()
        >>> # customize your session here
        >>> with session:
        ...     extractor.extract_str("http://forums.news.cnn.com/", session=session)
        ExtractResult(subdomain='forums.news', domain='cnn', suffix='com', is_private=False)
        rW   )_extract_netlocr   rY   s       r   rX   zTLDExtract.extract_str   s1    6 ##3!<g $ 
 
 	
r!   3urllib.parse.ParseResult | urllib.parse.SplitResultc                <    |                      |j        ||          S )a  Take the output of urllib.parse URL parsing methods and further splits the parsed URL.

        Splits the parsed URL into its subdomain, domain, and suffix
        components, i.e. its effective TLD, gTLD, ccTLD, etc. components.

        This method is like `extract_str` but faster, as the string's domain
        name has already been parsed.

        >>> extractor = TLDExtract()
        >>> extractor.extract_urllib(urllib.parse.urlsplit('http://forums.news.cnn.com/'))
        ExtractResult(subdomain='forums.news', domain='cnn', suffix='com', is_private=False)
        >>> extractor.extract_urllib(urllib.parse.urlsplit('http://forums.bbc.co.uk/'))
        ExtractResult(subdomain='forums', domain='bbc', suffix='co.uk', is_private=False)
        rW   )r\   netlocrY   s       r   extract_urllibzTLDExtract.extract_urllib  s+    ( ##J3W $ 
 
 	
r!   r_   c                (   |                     dd                               dd                               dd          }d}t          |          |k    rB|d         dk    r6|d         d	k    r*t          |d
d                   rt          d|dd          S |                    d          }|                     |                              ||          \  }}d}	|t          |          cxk    r|	k    r$n n!t          |          rt          d|d|          S |t          |          k    rd                    ||d                    nd}
|dk    r d                    |d |d
z
                     nd}|r||d
z
           nd}t          |||
|          S )Nu   。r   u   ．u   ｡r.   r   r/   r0   r1   r   r   F)r   rW   )rC      )	replacer2   r   r   split_get_tld_extractorsuffix_indexr   r)   )r   r_   rC   rT   netloc_with_ascii_dotsr3   labelsrf   r   num_ipv4_labelsr   r   r   s                r   r\   zTLDExtract._extract_netloc  s    NN8X..WXx((WXx(( 	 &''+===&q)S00&r*c115ad;<< W$R)?PUVVVV'--c22#'#:#: $; $
 $

,v;V,
W
W 	!j 3v;;9999/99999m"?
 ?
9 !%;RLLL4@CKK4O4O&/000UW<HA<M<MCHHV$6lQ&6$67888SU	-9Aq())rY
CCCr!   	fetch_nowc                z    d| _         | j                                         |r|                     |           dS dS )z/Force fetch the latest suffix list definitions.NrW   )rN   rQ   clearre   )r   rj   rT   s      r   updatezTLDExtract.updateA  sN      	5##G#44444	5 	5r!   	list[str]c                l    t          |                     |                                                    S )zReturns the list of tld's used by default.

        This will vary based on `include_psl_private_domains` and `extra_suffixes`
        rW   )listre   tlds)r   rT   s     r   rq   zTLDExtract.tldsJ  s/     D++G+<<AACCDDDr!   _PublicSuffixListTLDExtractorc                ,   | j         r| j         S t          | j        | j        | j        | j        |          \  }}t          ||| j        g          st          d          t          ||t          | j                  | j                  | _         | j         S )a1  Get or compute this object's TLDExtractor.

        Looks up the TLDExtractor in roughly the following order, based on the
        settings passed to __init__:

        1. Memoized on `self`
        2. Local system _cache file
        3. Remote PSL, over HTTP
        4. Bundled PSL snapshot file
        )cacheurlsrE   rB   rT   z)No tlds set. Cannot proceed without tlds.)public_tldsprivate_tlds
extra_tldsrC   )rN   r   rQ   r@   rE   rB   anyrD   rM   rr   rp   rC   )r   rT   rv   rw   s       r   re   zTLDExtract._get_tld_extractorR  s     ? 	#?"$4+& $ 8!%!:%
 %
 %
!\ Kt/BCDD 	JHIII7#%D/00(,(H	
 
 
 r!   )r>   r?   r@   rA   rB   r   rC   r   rD   rA   rE   rF   r   rG   )NNrK   r   rC   rS   rT   rU   r   r   )rK   r]   rC   rS   rT   rU   r   r   r$   )r_   r   rC   rS   rT   rU   r   r   FN)rj   r   rT   rU   r   rG   )rT   rU   r   rn   )rT   rU   r   rr   )r6   r7   r8   r9   r
   PUBLIC_SUFFIX_LIST_URLSCACHE_TIMEOUTrR   rZ   rX   r`   r\   rm   r;   rq   re   r%   r!   r   r=   r=      sH       YY
 !.*A%),1(*2?H+ H+ H+ H+ H+Z 48+/	S S S S S 48+/	
 
 
 
 
D 48+/	
 
 
 
 
8 ,0	$D $D $D $D $DN KO5 5 5 5 5 E E E E XE 26! ! ! ! ! ! !r!   r=   c                  H    e Zd ZdZ	 	 	 dddZe	 ddd            ZdddZdS )Triez:Trie for storing eTLDs with their labels in reverse-order.NFmatchesdict[str, Trie] | Noneendr   r   r   rG   c                8    |r|ni | _         || _        || _        dS )zTODO.N)r   r   r   )r   r   r   r   s       r   rR   zTrie.__init__|  s&     #*1wwr$r!   public_suffixesCollection[str]private_suffixesCollection[str] | Nonec                    t                      }| D ]}|                    |           |g }|D ]}|                    |d           |S )z?Create a Trie from a list of suffixes and return its root node.NT)r   
add_suffix)r   r   	root_noder   s       r   createzTrie.create  sm     FF	% 	) 	)F  ((((#!& 	/ 	/F  ....r!   r   r   c                    | }|                     d          }|                                 |D ].}||j        vrt                      |j        |<   |j        |         }/d|_        ||_        dS )z+Append a suffix's labels to this Trie node.r   TN)rd   reverser   r   r   r   )r   r   r   noderh   labels         r   r   zTrie.add_suffix  st    c"" 	' 	'EDL((&*ffU#<&DD$r!   )NFF)r   r   r   r   r   r   r   rG   r$   )r   r   r   r   r   r   F)r   r   r   r   r   rG   )r6   r7   r8   r9   rR   staticmethodr   r   r%   r!   r   r   r   y  s        DD +/ 		% 	% 	% 	% 	%  48    \$% % % % % % %r!   r   FrK   r   rC   rS   rT   rU   r   c                &    t          | ||          S )N)rC   rT   )TLD_EXTRACTOR)rK   rC   rT   s      r   extractr     s#     )Dg   r!   c                 $    t          j        | i |S r$   )r   rm   )argskwargss     r   rm   rm     s    0000r!   c                  4    e Zd ZdZ	 ddd	ZdddZ	 dddZd
S )rr   z8Wrapper around this project's main algo for PSL lookups.Frv   rn   rw   rx   rC   r   c                >   || _         || _        || _        t          ||z   |z             | _        t          ||z             | _        t                              | j        t          |                    | _        t                              | j                  | _	        d S r$   )
rC   rv   rw   	frozensettlds_incl_privatetlds_excl_privater   r   tlds_incl_private_trietlds_excl_private_trie)r   rv   rw   rx   rC   s        r   rR   z&_PublicSuffixListTLDExtractor.__init__  s     ,G(&(!*;+E
+R!S!S!*;+C!D!D&*kk"Il$;$;'
 '
# '+kk$2H&I&I###r!   NrS   r   frozenset[str]c                4    || j         }|r| j        n| j        S )z,Get the currently filtered list of suffixes.)rC   r   r   )r   rC   s     r   rq   z"_PublicSuffixListTLDExtractor.tlds  s.    &.*.*J' +(D""'	
r!   spltuple[int, bool]c                   || j         }|r| j        n| j        }t          |          }|}t	          |          D ]~}t          |          }||j        v r|dz  }|j        |         }|j        r|}6d|j        v }|r=d|z   |j        v }	|	r||j        d         j        fc S |dz
  |j        d         j        fc S  ||j        fS )zReturn the index of the first suffix label, and whether it is private.

        Returns len(spl) if no suffix is found.
        Nr   *!)	rC   r   r   r2   reversed_decode_punycoder   r   r   )
r   r   rC   r   r'   jr   decoded_labelis_wildcardis_wildcard_exceptions
             r   rf   z*_PublicSuffixListTLDExtractor.suffix_index  s    '.*.*J' +-D'', 	
 HHc]] 	 	E,U33M,,Q|M28 A-K ;(+m(;t|(K%( ;dl3/:::::1udl3/:::::$/!!r!   r   )rv   rn   rw   rn   rx   rn   rC   r   r$   )rC   rS   r   r   )r   rn   rC   rS   r   r   )r6   r7   r8   r9   rR   rq   rf   r%   r!   r   rr   rr     sv        BB -2J J J J J$	
 	
 	
 	
 	
 JN#" #" #" #" #" #" #"r!   rr   r   c                    |                                  }|                    d          }|r,	 t          j        |          S # t          t
          f$ r Y nw xY w|S )Nzxn--)lower
startswithidnadecodeUnicodeError
IndexError)r   loweredlooks_like_punys      r   r   r      si    kkmmG((00O 	;w'''j) 	 	 	D	Ns   A AAr{   rz   )r   r   r   r   )%r9   
__future__r   osurllib.parseurllibcollections.abcr   r   dataclassesr   	functoolsr   r   requestsrt   r	   r
   remoter   r   r   suffix_listr   environgetr}   r|   r   r=   r   r   rZ   r   rm   rr   r   r%   r!   r   <module>r      sP  " "H # " " " " " 				     0 0 0 0 0 0 0 0 ! ! ! ! ! !         + + + + + + + + B B B B B B B B B B ) ) ) ) ) )
9::  M M M M M M M M`f f f f f f f fR 
.% .% .% .% .% .% .% .%b } 05'+     }1 1 1C" C" C" C" C" C" C" C"L     r!   