
    %$}g)                        d Z ddlZddlZddlZddlmZmZ ddlmZ ddl	m
Z
mZmZmZmZ ddlmZ ddlmZ dd	lmZ  ej        e          Zd
e
de
fdZ	 	 	 	 ddee         dededee         dedeeeef                  fdZdedeee                  fdZd
e
ddfdZd
e
ddfdZd
e
ddfdZddZ edk    r e              dS dS )z,
Implements a basic command-line interface.
    N)ProcessPoolExecutoras_completed)islice)AnyIteratorListOptionalTuple   )	check_url)_make_sample)UrlStoreargsreturnc                 J   t          j        d          }|                    dd          }|                    dddt          d	           |                    d
ddt          d	           |                    dddt                     |                    dddd           |                    dddt
                     |                    dd          }|                    ddd           |                    dddt                     |                    d d!d"d           |                    d#d$          }|                    d%d&t
                     |                    d'd(t
                     |                    d)d*t
                     |                                S )+z(Define parser for command-line argumentsz"Command-line interface for Courlan)descriptionzI/OzManage input and outputz-iz--inputfilezname of input file (required)T)helptyperequiredz-oz--outputfilezname of output file (required)z-dz--discardedfilez/name of file to store discarded URLs (optional))r   r   z-vz	--verbosezincrease output verbosity
store_true)r   actionz-pz
--parallelz4number of parallel processes (not used for sampling)	FilteringzConfigure URL filtersz--strictzperform more restrictive testsz-lz
--languagez$use language filter (ISO 639-1 code)z-rz--redirectszcheck redirectsSamplingz+Use sampling by host, configure sample sizez--samplezsize of sample per domainz--exclude-maxz%exclude domains with more than n URLsz--exclude-minz%exclude domains with less than n URLs)argparseArgumentParseradd_argument_groupadd_argumentstrint
parse_args)r   
argsparsergroup1group2group3s        K/var/www/py-google-trends/myenv/lib/python3.11/site-packages/courlan/cli.pyr    r       s7   (8  J **52KLLF
,     -     >	     k ;L     C	     **;8OPPF
9,     l!Gc     m"3L     **A F 
)D3OOO
EC     EC       """    Furlsstrictwith_redirectslanguagewith_navc                     g }| D ]M}t          |||||          }||                    d|d         f           6|                    d|f           N|S )z6Internal function to be used with CLI multiprocessing.)r(   r)   r*   r+   NTr   F)r   append)r'   r(   r)   r*   r+   resultsurlresults           r%   _cli_check_urlsr1   O   s     G ) ))
 
 
 NND&),----NNE3<((((Nr&   	inputfilec              #      K   t          | ddd          5 }	 d t          |d          D             }|s	 ddd           dS |V  /# 1 swxY w Y   dS )	zRead input line in batchesrutf-8ignoreencodingerrorsTc                 6    g | ]}|                                 S  )strip).0lines     r%   
<listcomp>z _batch_lines.<locals>.<listcomp>k   s     EEEdTZZ\\EEEr&   i N)openr   )r2   inputfhbatchs      r%   _batch_linesrC   g   s      	iwx	@	@	@ G	EEfWe.D.DEEEE 	       
 KKK		         s   AAAAc                    | j         r%t                              t          j                   n$t                              t          j                   t          dd| j        | j                   }t          | j	                  D ]}|
                    |           t          | j        dd          5 }t          || j        | j        | j                  D ]}|                    |dz              	 ddd           dS # 1 swxY w Y   dS )	zSample URLs on the CLI.TN)
compressedr*   r(   verbosewr5   r8   )exclude_minexclude_max
)rF   LOGGERsetLevelloggingDEBUGERRORr   r(   rC   r2   add_urlsr@   
outputfiler   samplerI   rJ   write)r   urlstorerB   outputfhr/   s        r%   _cli_samplerW   q   sT   | '&&&&&&&$t{DL  H dn-- ! !%    	dosW	5	5	5 'K((	
 
 
 	' 	'C NN3:&&&&	'' ' ' ' ' ' ' ' ' ' ' ' ' ' ' ' ' 's   1>C==DDc                 .   
 t           j                  5 
t           j        dd          5 }t           j        ddd          5 }	 g }t          |          d	k     rHt          t          |d	                    }|sn(|                    |           t          |          d	k     H|sn 
fd
|D             }t          |          D ]}|
                                D ]k\  }}|r|                    |dz                j        Dt           j        dd          5 }	|	                    |           ddd           n# 1 swxY w Y   l	 ddd           n# 1 swxY w Y   ddd           n# 1 swxY w Y   ddd           dS # 1 swxY w Y   dS )z7Read input file bit by bit and process URLs in batches.)max_workersrG   r5   rH   r4   r6   r7   Ti  c              3   t   K   | ]2}                     t          |j        j        j                   V  3dS ))r(   r)   r*   N)submitr1   r(   	redirectsr*   )r=   rB   r   executors     r%   	<genexpr>z_cli_process.<locals>.<genexpr>   sa       	 	  #;#'>!]    	 	 	 	 	 	r&   rK   Na)r   parallelr@   rR   r2   lenlistr   r-   r   r0   rT   discardedfile)r   rV   rA   batches
line_batchfuturesfuturevalidr/   	discardfhr]   s   `         @r%   _cli_processrj      s   		7	7	7 $18TwF F F $1	4gh  $1 
	1Gg,,%%!&$"7"788
! z***	 g,,%%  	 	 	 	 	 %	 	 	G 'w// 	1 	1"(--// 1 1JE3 1 sTz2222+7! .g   1&%OOC0001 1 1 1 1 1 1 1 1 1 1 1 1 1 11/	1 $1 $1 $1 $1 $1 $1 $1 $1 $1 $1 $1 $1 $1 $1 $1 $1 $1 $1 $1 $1 $1 $1 $1 $1 $1 $1 $1 $1 $1 $1 $1 $1 $1 $1 $1 $1 $1 $1 $1 $1 $1 $1 $1 $1 $1 $1 $1 $1s}   F
E2	CED?3E?EEEEE2EE2"E#E2&F
2E6	6F
9E6	:F

FFc                 T    | j         rt          |            dS t          |            dS )z+Start processing according to the argumentsN)rS   rW   rj   r   s    r%   process_argsrm      s4    { DTr&   c                  f    t          t          j        dd                   } t          |            dS )zRun as a command-line utility.r   N)r    sysargvrm   rl   s    r%   mainrq      s-    chqrrl##Dr&   __main__)FFNF)r   N)!__doc__r   rN   ro   concurrent.futuresr   r   	itertoolsr   typingr   r   r   r	   r
   corer   samplingr   rU   r   	getLogger__name__rL   r    r   boolr1   rC   rW   rj   rm   rq   r;   r&   r%   <module>r|      s,      



 @ @ @ @ @ @ @ @       7 7 7 7 7 7 7 7 7 7 7 7 7 7       " " " " " "       
	8	$	$7#S 7#S 7# 7# 7# 7#x  " 
s)  sm	
  
%c	
   0C HT#Y$7    'c 'd ' ' ' '.&1s &1t &1 &1 &1 &1Rs t        zDFFFFF r&   