
    %$}g2N                        d Z ddlZddlZddlZddlZddlZ	 ddlZdZn# e$ r dZY nw xY w	 ddl	Z	dZ
n# e$ r dZ
Y nw xY wddlmZmZ ddlmZmZ ddlmZ ddlmZ dd	lmZ dd
lmZmZmZmZmZmZmZmZ ddlm Z  ddl!m"Z" ddl#m$Z$ ddl%m&Z&m'Z' ddl(m)Z) ddl*m+Z+m,Z,m-Z-  ej.        e/          Z0 G d d          Z1 e1            Z2 G d de          Z3 G d d          Z4 G d d          Z5 G d d          Z6de7de6fdZ8dS )zd
Defines a URL store which holds URLs along with relevant information and entails crawling helpers.
    NTF)defaultdictdeque)datetime	timedelta)Enum)
itemgetter)Lock)AnyDefaultDictDequeDictListOptionalTupleUnion)RobotFileParser   )normalize_url)filter_links)lang_filtervalidate_url)clear_caches)get_base_urlget_host_and_pathis_known_linkc                   h    e Zd ZdZdZddeddfdZededefd	            Z	dedefd
Z
dedefdZdS )
CompressorzYUse system information on available compression modules and define corresponding methods.)
compressordecompressorTcompressionreturnNc                     |rt           rt          j        n|rt          rt          j        n| j        | _        |rt           rt          j        n|rt          rt          j        n| j        | _        d S N)	HAS_BZ2bz2compressHAS_ZLIBzlib
_identicalr   
decompressr   )selfr    s     P/var/www/py-google-trends/myenv/lib/python3.11/site-packages/courlan/urlstore.py__init__zCompressor.__init__:   su     P&PCLL"-O(O 	 R&RCNN$/QHQ$/ 	    datac                     | S )zReturn unchanged data. )r/   s    r,   r)   zCompressor._identicalF   s	     r.   c                 T    |                      t          j        |d                    S )z9Pickle the data and compress it if a method is available.   )protocol)r   pickledumpsr+   r/   s     r,   r&   zCompressor.compressK   s#    v|D1===>>>r.   c                 P    t          j        |                     |                    S )zADecompress the data if a method is available and load the object.)r5   loadsr   r7   s     r,   r*   zCompressor.decompressO   s     |D--d33444r.   T)__name__
__module____qualname____doc__	__slots__boolr-   staticmethodr
   r)   r&   bytesr*   r1   r.   r,   r   r   6   s        __.I

 

D 

D 

 

 

 

      \?S ?S ? ? ? ?5u 5 5 5 5 5 5 5r.   r   c                       e Zd ZdZdZdZdZdS )Statez0Record state information about a domain or host.r         N)r;   r<   r=   r>   OPENALL_VISITEDBUSTEDr1   r.   r,   rD   rD   W   s#        66DKFFFr.   rD   c                   4    e Zd ZdZdZej        fdeddfdZdS )DomainEntryz7Class to record host-related information and URL paths.)countrulesstate	timestamptotaltuplesrN   r!   Nc                 r    d| _         d | _        || _        d | _        d| _        t                      | _        d S )Nr   )rL   rM   rN   rO   rP   r   rQ   )r+   rN   s     r,   r-   zDomainEntry.__init__b   s4    
04
!
(,
+077r.   )r;   r<   r=   r>   r?   rD   rG   r-   r1   r.   r,   rK   rK   ^   sK        ==KI&+j 3 3e 3T 3 3 3 3 3 3r.   rK   c                   6    e Zd ZdZdZdededdfdZdefdZdS )	UrlPathTuplezBClass storing information for URL paths relative to a domain/host.)urlpathvisitedrU   rV   r!   Nc                 H    |                     d          | _        || _        d S )Nutf-8)encoderU   rV   )r+   rU   rV   s      r,   r-   zUrlPathTuple.__init__o   s     %nnW55$r.   c                 6    | j                             d          S )zGet the URL path as string.rX   )rU   decoder+   s    r,   pathzUrlPathTuple.paths   s    |""7+++r.   )	r;   r<   r=   r>   r?   strr@   r-   r]   r1   r.   r,   rT   rT   k   sc        HH&I% %d %t % % % %,c , , , , , ,r.   rT   c                   4   e Zd ZdZdZ	 	 	 	 	 dIdedee         ded	ed
eddfdZ	 dJde	e         dede
eee         f         fdZdedee         fdZdKdZ	 	 	 dLdedeee                  dee         deee                  ddf
dZ	 dMde	e         dee         de	eeef                  fdZ	 	 	 dNdee	e                  dee	e                  deddfdZ	 	 	 dOdedededee         d eddfd!Zd"e	e         ddfd#ZdKd$Zde	e         fd%Zde	e         fd&Zdedefd'Zdefd(Zdede	e         fd)Zdede	e         fd*Zde	e         de	e         fd+Z de	e         de	eeef                  fd,Z!dedefd-Z"dedefd.Z#dPded/edee         fd0Z$	 	 dQd3e%d4ede	e         fd5Z&	 dRd4ed3ede	e         fd8Z'd9ed:ee(         ddfd;Z)d9edee(         fd<Z*dSd9ed>e%de%fd?Z+de	e         fd@Z,defdAZ-dBe%defdCZ.de	e         fdDZ/dKdEZ0dKdFZ1dGeddfdHZ2dS )TUrlStorezNDefines a class to store domain-classified URLs and perform checks against it.)
compresseddonelanguagestricttrailing_slashurldict_lockFNTra   rc   rd   trailingverboser!   c                     | _         d _        | _        | _        | _        t          t                     _        t                       _	        dt          dt          dd f fd}|r_t          j                            d          sBt          j        t          j        |           t          j        t          j        |           d S d S d S )NFnumframer!   c                     t                               dt          j                                                              t          j        d           d S )Nz<Processing interrupted, dumping unvisited URLs from %s hostsr   )LOGGERdebuglenrf   print_unvisited_urlssysexit)rk   rl   r+   s     r,   dump_unvisited_urlsz.UrlStore.__init__.<locals>.dump_unvisited_urls   sN    LLNDL!!   %%'''HQKKKKKr.   win)ra   rb   rc   rd   re   r   rK   rf   r	   rg   r
   rr   platform
startswithsignalSIGINTSIGTERM)r+   ra   rc   rd   rh   ri   rt   s   `      r,   r-   zUrlStore.__init__   s     !+	'/"$,6A+6N6N66
	S 	 	 	 	 	 	 	 	  	?3<22599 	?M&-)<===M&.*=>>>>>	? 	? 	? 	?r.   r/   rV   c                    t          t                    }t                              |          D ]}	 t	          |          \  }}|du r"t
                              d|           t          | j        Et          || j        | j
        | j                  du r"t
                              d|           t          t          || j
        | j        | j                  }t          |          \  }}||                             t          ||                     # t           t          f$ r t
                              d|           Y w xY w|S )NFzInvalid URL: %szWrong language: %s)rd   rc   re   zDiscarding URL: %s)r   r   dictfromkeysr   rn   ro   
ValueErrorrc   r   rd   re   r   r   appendrT   	TypeErrorwarning)	r+   r/   rV   	inputdicturlvalidation_result
parsed_urlhostinforU   s	            r,   _buffer_urlszUrlStore._buffer_urls   s`    <Gu;M;M	==&& 	: 	:C:0<S0A0A-!:$--LL!2C888$$ M-#T]DK9L   
 LL!5s;;;$$*;!]#'#6	  
 %6j$A$A!'(#**<+I+IJJJJz* : : :3S99999:s   C!D,EEdomainc                     || j         v rC| j        r*t                              | j         |         j                  S | j         |         j        S t                      S r#   )rf   ra   
COMPRESSORr*   rQ   r   r+   r   s     r,   
_load_urlszUrlStore._load_urls   sQ    T\!! J!,,T\&-A-HIII<'..wwr.   c                     | j         sYt          d | j                                        D                       r+| j        5  d| _         d d d            d S # 1 swxY w Y   d S d S d S )Nc              3   @   K   | ]}|j         t          j        k    V  d S r#   rN   rD   rG   .0vs     r,   	<genexpr>z%UrlStore._set_done.<locals>.<genexpr>   s+       V V1EJ!6 V V V V V Vr.   T)rb   allrf   valuesrg   r\   s    r,   	_set_donezUrlStore._set_done   s    y 	!S V V@S@S@U@U V V VVV 	! ! ! 	! ! ! ! ! ! ! ! ! ! ! ! ! ! ! ! ! !	! 	! 	! 	!s   AAAto_rightrO   to_leftc                 <   |                     d          rd|dd          z   }|| j        v r|}nH|                     d          r3d|dd          z   }|| j        v r| j        |         | j        |<   | j        |= || j        v rB| j        |         j        t          j        u rd S |                     |          }d |D             nt                      }t                      |!|                    fd|D                        |!|	                    fd	|D                        | j
        5  | j        r+t                              |          | j        |         _        n|| j        |         _        t          |          | j        |         _        ||| j        |         _        t%          d
 |D                       rt          j        | j        |         _        n*t          j        | j        |         _        | j        rd| _        d d d            d S # 1 swxY w Y   d S )Nzhttp://https   zhttps://httpr3   c                 6    h | ]}|                                 S r1   r]   r   us     r,   	<setcomp>z'UrlStore._store_urls.<locals>.<setcomp>   s     ,,,!QVVXX,,,r.   c              3   `   K   | ](}t          |                                          $|V  )d S r#   r   r]   r   tknowns     r,   r   z'UrlStore._store_urls.<locals>.<genexpr>   s:      RRa=53Q3QRRRRRRRr.   c              3   `   K   | ](}t          |                                          $|V  )d S r#   r   r   s     r,   r   z'UrlStore._store_urls.<locals>.<genexpr>   s:      UU!mAFFHHe6T6TUAUUUUUUr.   c              3   $   K   | ]}|j         V  d S r#   )rV   r   s     r,   r   z'UrlStore._store_urls.<locals>.<genexpr>   s$      ++19++++++r.   F)rw   rf   rN   rD   rI   r   r   setextend
extendleftrg   ra   r   r&   rQ   rp   rP   rO   r   rH   rG   rb   )r+   r   r   rO   r   	candidateurlsr   s          @r,   _store_urlszUrlStore._store_urls   sv    Y'' 
	,&*,IDL(("z** 	,+IDL(('+|I'>V$L+ T\!!|F#)U\99??6**D,,t,,,EE77DEEE KKRRRR8RRRRRROOUUUUwUUUUUUZ 	& 	& 3.8.A.A$.G.GV$++.2V$+),TDL &$1:V$.++d+++++ &-2->V$**-2ZV$*9 & %DI	& 	& 	& 	& 	& 	& 	& 	& 	& 	& 	& 	& 	& 	& 	& 	& 	& 	&s   ,CHHHr   switchc                 (   d }i }t                               |          }t          |          D ]V}t          |          \  }}||k    r!|}d |                     |          D             }||v r|dk    s|dk    r||         r||= Wt          |          S )Nc                 B    i | ]}|                                 |j        S r1   )r]   rV   r   s     r,   
<dictcomp>z)UrlStore._search_urls.<locals>.<dictcomp>  s$    VVVqqvvxxVVVr.   r   rE   )r|   r}   sortedr   r   list)	r+   r   r   last_domainknown_pathsremaining_urlsr   r   rU   s	            r,   _search_urlszUrlStore._search_urls  s     &*13t,,.)) 
	( 
	(C 1# 6 6Hg;&&&VVDOOH<U<UVVV+%%!!G0D"3'N###r.   
appendleftc                 &   |rE|                      ||                                          D ]\  }}|                     ||           |rE|                      ||                                          D ]\  }}|                     ||           dS dS )zAdd a list of URLs to the (possibly) existing one.
        Optional: append certain URLs to the left,
        specify if the URLs have already been visited.)r   )r   N)r   itemsr   )r+   r   r   rV   host	urltupless         r,   add_urlszUrlStore.add_urls  s      	;#'#4#4T7#C#C#I#I#K#K ; ;i  	 :::: 	:#'#4#4Z#I#I#O#O#Q#Q : :i  y 9999	: 	:: :r.   
htmlstringr   externallangwith_navc           	          t          |          }|                     |          }t          ||||p| j        || j        |          \  }}	|                     ||	           dS )zJFind links in a HTML document, filter them and add them to the data store.)r   r   r   r   rM   rd   r   )r   r   N)r   	get_rulesr   rc   rd   r   )
r+   r   r   r   r   r   base_urlrM   linkslinks_prioritys
             r,   add_from_htmlzUrlStore.add_from_html-  sw      $$x(( ,!&;!
 !
 !
~ 	5^<<<<<r.   domainsc                    | j         5  |D ]$}t          t          j                  | j        |<   %	 ddd           n# 1 swxY w Y   |                                  t          j                    }t          	                    d|           dS )z)Declare domains void and prune the store.)rN   Nz'%s objects in GC after UrlStore.discard)
rg   rK   rD   rI   rf   r   gccollectrn   ro   )r+   r   drk   s       r,   discardzUrlStore.discardD  s    Z 	B 	B B B"-EL"A"A"AQB	B 	B 	B 	B 	B 	B 	B 	B 	B 	B 	B 	B 	B 	B 	B 	jll>DDDDDs   (=AAc                     | j         5  t          t                    | _        ddd           n# 1 swxY w Y   t	                       t          j                    }t                              d|           dS )zRe-initialize the URL store.Nz UrlStore reset, %s objects in GC)	rg   r   rK   rf   r   r   r   rn   ro   )r+   rk   s     r,   resetzUrlStore.resetM  s    Z 	4 	4&{33DL	4 	4 	4 	4 	4 	4 	4 	4 	4 	4 	4 	4 	4 	4 	4jll7=====s   .22c                 N    t          | j                                                  S )z#Return all known domains as a list.)r   rf   keysr\   s    r,   get_known_domainszUrlStore.get_known_domainsW  s    DL%%''(((r.   c                 H    d | j                                         D             S )ziFind all domains for which there are unvisited URLs
        and potentially adjust done meta-information.c                 B    g | ]\  }}|j         t          j        k    |S r1   r   )r   r   r   s      r,   
<listcomp>z2UrlStore.get_unvisited_domains.<locals>.<listcomp>^  s*    LLLdaag6K6K6K6K6Kr.   )rf   r   r\   s    r,   get_unvisited_domainszUrlStore.get_unvisited_domains[  s'     MLdl0022LLLLr.   c                 X    || j         v r | j         |         j        t          j        k    S dS )z9Tell if all known URLs for the website have been visited.F)rf   rN   rD   rG   r   s     r,   is_exhausted_domainzUrlStore.is_exhausted_domain`  s+    T\!!<'-;;ur.   c                 D    t          |                                           S )zFReturn the number of websites for which there are still URLs to visit.)rp   r   r\   s    r,   unvisited_websites_numberz"UrlStore.unvisited_websites_numberg  s    4--//000r.   c                 F    fd|                                D             S )zLGet all already known URLs for the given domain (ex. "https://example.org").c                 >    g | ]}|                                 z   S r1   r   r   r   r   s     r,   r   z,UrlStore.find_known_urls.<locals>.<listcomp>o  s&    CCCa!CCCr.   )r   r   s    `r,   find_known_urlszUrlStore.find_known_urlsm  s)    CCCC4??6+B+BCCCCr.   c                 t    |                                s!fd|                               D             S g S )z,Get all unvisited URLs for the given domain.c                 L    g | ] }|j         	|                                z   !S r1   )rV   r]   r   s     r,   r   z0UrlStore.find_unvisited_urls.<locals>.<listcomp>t  s-    XXX!aiXFQVVXX%XXXr.   )r   r   r   s    `r,   find_unvisited_urlszUrlStore.find_unvisited_urlsq  sE    ''// 	YXXXXtv/F/FXXXX	r.   c                 0    |                      |d          S )z:Take a list of URLs and return the currently unknown ones.r   r   r   r+   r   s     r,   filter_unknown_urlszUrlStore.filter_unknown_urlsw        a 000r.   c                 0    |                      |d          S )z<Take a list of URLs and return the currently unvisited ones.rE   r   r   r   s     r,   filter_unvisited_urlszUrlStore.filter_unvisited_urls{  r   r.   c                 J    t          |                     |g                     S )z0Check if the given URL has already been visited.)r@   r   )r+   r   s     r,   has_been_visitedzUrlStore.has_been_visited  s#    22C599::::r.   c                 h    t          |          \  }}|d |                     |          D             v S )z/Check if the given URL has already been stored.c                 6    h | ]}|                                 S r1   r   r   s     r,   r   z$UrlStore.is_known.<locals>.<setcomp>  s     GGG16688GGGr.   )r   r   )r+   r   r   rU   s       r,   is_knownzUrlStore.is_known  s:    -c22'GGT__X-F-FGGGGGr.   
as_visitedc                    |                      |          s|                     |          }|D ]}|j        s|rjd|_        | j        5  | j        |         xj        dz  c_        ddd           n# 1 swxY w Y   |                     ||t          j                               ||	                                z   c S | j        5  t          j        | j        |         _        ddd           n# 1 swxY w Y   |                                  dS )zSRetrieve a single URL and consider it to be visited (with corresponding timestamp).Tr   NrO   )r   r   rV   rg   rf   rL   r   r   nowr]   rD   rH   rN   r   )r+   r   r   
url_tuplesr   s        r,   get_urlzUrlStore.get_url  s    ''// 	/00J! / /{ /! W&*!Z < < L066!;66< < < < < < < < < < < < < < <((x|~~(VVV!CHHJJ..../ Z 	; 	;).):DL &	; 	; 	; 	; 	; 	; 	; 	; 	; 	; 	; 	; 	; 	; 	;ts$   A..A2	5A2	C,,C03C0      $@'  
time_limitmax_urlsc                    g }| j                                         D ]\  }}|j        t          j        k    r|j        r1t          j                    |j        z
                                  |k    rA| 	                    |          }|*|
                    |           t          |          |k    r n|                                  |S )zaGet a list of immediately downloadable URLs according to the given
        time limit per domain.)rf   r   rN   rD   rG   rO   r   r   total_secondsr   r   rp   r   )r+   r   r   r   websiteentryr   s          r,   get_download_urlszUrlStore.get_download_urls  s     "l0022 	 	NGU{ej((OLNNU_4CCEE
RRll7++?KK$$$4yyH,,r.   d   
   c                    |                                  }|sg S |t          |          z  pd}g }|D ]}|                     |          }g }|D ]}	t          |          |k    s#t          |          t          |          z   |k    r np|	j        sh|                    |	                                           d|	_        | j        5  | j        |         xj        dz  c_        ddd           n# 1 swxY w Y   t          j
                    }
| j        |         j        }|r|
|z
                                  |k    rd}n)|t          |
|z
                                  d          z
  }|D ]!}|                    |||z   f           ||z  }"|
t          d||z
            z   }|                     |||           |                                  t#          |t%          d                    S )	zcGet up to the specified number of URLs along with a suitable
        backoff schedule (in seconds).r   TNg        z.2fr   r   )key)r   rp   r   rV   r   r]   rg   rf   rL   r   r   rO   r   floatr   r   r   r   r   )r+   r   r   	potential
per_domaintargetsr   r   urlpathsr   r   original_timestampschedule_secsrU   
total_diffs                  r,   establish_download_schedulez$UrlStore.establish_download_schedule  s[    ..00	 	IY/41
+- "	G "	GF00J"$H! 
8 
8MMZ//Gs8}}4AAE{ 8OOCHHJJ///"&CK 8 8V,22a7228 8 8 8 8 8 8 8 8 8 8 8 8 8 8 ,..C!%f!5!?&,,;;==
JJ # *U00??AAGG. . ! $ , ,v/?@AAA+yMJ,FGGGJVZ:FFFFg:a==1111s   C++C/2C/r   rM   c                 l    | j         rt                              |          }|| j        |         _        dS )z)Store crawling rules for a given website.N)ra   r   r&   rf   rM   )r+   r   rM   s      r,   store_ruleszUrlStore.store_rules  s4    ? 	/''..E&+W###r.   c                     || j         v rC| j        r*t                              | j         |         j                  S | j         |         j        S dS )z7Return the stored crawling rules for the given website.N)rf   ra   r   r*   rM   )r+   r   s     r,   r   zUrlStore.get_rules  sM    dl"" J!,,T\'-B-HIII<(..tr.   r3   defaultc                     d}|                      |          }	 |                    d          }n# t          $ r Y nw xY w|p|S )zBReturn the delay as extracted from robots.txt, or a given default.N*)r   crawl_delayAttributeError)r+   r   r  delayrM   s        r,   get_crawl_delayzUrlStore.get_crawl_delay  s_    w''	%%c**EE 	 	 	D	 s   / 
<<c                 H    d | j                                         D             S )z2Return all download counts for the hosts in store.c                     g | ]	}|j         
S r1   rL   r   s     r,   r   z+UrlStore.get_all_counts.<locals>.<listcomp>
  s    777A777r.   )rf   r   r\   s    r,   get_all_countszUrlStore.get_all_counts  s$    77!4!4!6!67777r.   c                 b    t          d | j                                        D                       S )z!Find number of all URLs in store.c              3   $   K   | ]}|j         V  d S r#   )rP   r   s     r,   r   z,UrlStore.total_url_number.<locals>.<genexpr>  s$      ::q17::::::r.   )sumrf   r   r\   s    r,   total_url_numberzUrlStore.total_url_number  s-    ::DL$7$7$9$9::::::r.   	thresholdc                 h    t          fd| j                                        D                       S )z^Find out if the download limit (in seconds) has been reached for one of the websites in store.c              3   .   K   | ]}|j         k    V  d S r#   r  )r   r   r  s     r,   r   z6UrlStore.download_threshold_reached.<locals>.<genexpr>  s*      GGA17i'GGGGGGr.   )anyrf   r   )r+   r  s    `r,   download_threshold_reachedz#UrlStore.download_threshold_reached  s4    GGGG1D1D1F1FGGGGGGr.   c                 n    g }| j         D ]*}|                    |                     |                     +|S )z Return a list of all known URLs.)rf   r   r   )r+   r   r   s      r,   	dump_urlszUrlStore.dump_urls  s@    l 	6 	6FKK,,V445555r.   c                     | j         D ]9}t          d                    |                     |                    d           :dS )z"Print all unvisited URLs in store.
TflushN)rf   printjoinr   r   s     r,   rq   zUrlStore.print_unvisited_urls  sQ    l 	K 	KF$))D44V<<==TJJJJJ	K 	Kr.   c           	          | j         D ]Et          d                    fd|                               D                       d           FdS )z5Print all URLs in store (URL + TAB + visited or not).r&  c                 j    g | ]/} |                                  d t          |j                   0S )	)r]   r^   rV   r   s     r,   r   z'UrlStore.print_urls.<locals>.<listcomp>%  sL        "?16688??s19~~??  r.   Tr'  N)rf   r)  r*  r   r   s    @r,   
print_urlszUrlStore.print_urls   s    l 		 		F		   !%!8!8        		 		r.   filenamec                     | ` t          |d          5 }t          j        | |           ddd           dS # 1 swxY w Y   dS )zWrite the URL store to disk.wbN)rg   openr5   dump)r+   r/  outputs      r,   writezUrlStore.write/  s    J(D!! 	&VKf%%%	& 	& 	& 	& 	& 	& 	& 	& 	& 	& 	& 	& 	& 	& 	& 	& 	& 	&s   6::)FNFTF)F)r!   N)NNNr#   )NNF)FNTr:   )r   r   )r   r   )r3   )3r;   r<   r=   r>   r?   r@   r   r^   r-   r   r   r   rT   r   r   r   r   r   intr   r
   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r  r   r  r   r  r   r  r  r  r"  r$  rq   r.  r5  r1   r.   r,   r`   r`   x   s>       TTI !"&? ?? 3-? 	?
 ? ? 
? ? ? ?< 05   I (, 	S%--	.       D |)<    ! ! ! ! 37(,154& 4&4& 5./4& H%	4&
 %-.4& 
4& 4& 4& 4&n 8<$ $I$'/}$	eCHo	$ $ $ $4 %)*.	: :tCy!: T#Y': 	:
 
: : : :( "= == = 	=
 sm= = 
= = = =.EtCy ET E E E E> > > >)49 ) ) ) )MtCy M M M M
# $    13 1 1 1 1Dc Dd3i D D D D# $s)    1S	 1d3i 1 1 1 11$s) 1U38_8M 1 1 1 1;C ;D ; ; ; ;HC HD H H H H c t x}    . !   
c	   0 6822 2222/222	c22 22 22 22l,3 ,x/H ,T , , , , /)B    	  	 s 	 U 	 5 	  	  	  	 8S	 8 8 8 8;# ; ; ; ;HE Hd H H H H49    K K K K
   &c &d & & & & & &r.   r`   r/  r!   c                     t          | d          5 }t          j        |          }ddd           n# 1 swxY w Y   t                      |_        |S )zLoad a URL store from disk.rbN)r2  r5   loadr	   rg   )r/  r4  	url_stores      r,   
load_storer;  6  s    	h		 (K''	( ( ( ( ( ( ( ( ( ( ( ( ( ( (ffIOs   266)9r>   r   loggingr5   rx   rr   r%   r$   ImportErrorr(   r'   collectionsr   r   r   r   enumr   operatorr   	threadingr	   typingr
   r   r   r   r   r   r   r   urllib.robotparserr   cleanr   corer   filtersr   r   metar   urlutilsr   r   r   	getLoggerr;   rn   r   r   rD   rK   rT   r`   r^   r;  r1   r.   r,   <module>rJ     s    
			    



JJJGG   GGGKKKHH   HHH + * * * * * * * ( ( ( ( ( ( ( (                  	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 / . . . . .                   . . . . . . . .       D D D D D D D D D D 
	8	$	$5 5 5 5 5 5 5 5< Z\\
    D   
3 
3 
3 
3 
3 
3 
3 
3
, 
, 
, 
, 
, 
, 
, 
,{& {& {& {& {& {& {& {&|       s    ))4 >>