
    %$}ge(                        d Z ddlZddlZddlZddlmZ ddlmZ ddlm	Z	 ddl
mZmZmZmZmZmZmZmZmZ ddlmZmZ ej        j        d	k    r, eej        d
          rej                            d           ej        j        d	k    r, eej        d
          rej                            d           de	de	fdZde	de	fdZde	de	fdZddZde	ddfdZ e!dk    r e             dS dS )z.
Implementing a basic command-line interface.
    N)version)python_version)Any   )	cli_crawlercli_discoveryexaminefile_processing_pipelineload_blacklistload_input_dictprobe_homepageurl_processing_pipelinewrite_result)PARALLEL_CORESSUPPORTED_FMT_CLIzUTF-8reconfigurezutf-8)encodingparserreturnc           
      	   |                      dd          }|                                }|                      dd          }|                      dd          }|                                }|                      dd          }|                      d	d
          }|                                }|                    dddt                     |                    ddt                     |                    dddt                     |                    ddt          t
                     |                    dddt                     |                    ddd           |                    ddd t                     |                    d!d"t                     |                    d#d$d           |                    d%d&d'd(d)*           |                    d+d,d'd(d)*           |                    d-d.d'd(d)*           |                    d/d0d'd(d)*           |                    d1d2d'd(d)*           |                    d3d4d           |                    d5d6d7t          8           |                    d9d:d;d           |                    d<d=d           |                    d>d?d           |                    d@dAd           |                    dBdCdD           |                    dEdFdD           |                    dGdHd           |                    dIdJd           |                    dKdLt                     |                    dMdNd           |                    dOdPt                     |                    dQdRd           |                    dSdTd           |                    dUdVt          dWX           |                    dYdZd           |                    d[d\d           |                    d]d^d           |                    d_d`d           |                    dadbd           |                    dcddd           |                    dedfd           |                     dgdhdidjdkl           |                     dmdndodpt          dq           drt                       s           | S )tz,Add argument groups and arguments to parser.Inputz%URLs, files or directories to processOutputz+Determines if and how files will be written
NavigationzLink discovery and web crawling
Extractionz-Customization of text and metadata processingFormatzSelection of the output formatz-iz--input-filez'name of input file for batch processing)helptypez--input-dirz5read files from a specified directory (relative path)z-uz--URLzcustom URL downloadz
--parallelzAspecify a number of cores/threads for downloads and/or processing)r   r   defaultz-bz--blacklistz:file containing unwanted URLs to discard during processingz--listz/display a list of URLs without downloading them
store_true)r   actionz-oz--output-dirz6write results in a specified directory (relative path)z--backup-dirz9preserve a copy of downloaded files in a backup directoryz--keep-dirsz-keep input directory structure and file namesz--feedz.look for feeds and/or pass a feed URL as input?TF)r   nargsconstr   z	--sitemapzBlook for sitemaps for the given website and/or enter a sitemap URLz--crawlzJcrawl a fixed number of pages within a website starting from the given URLz	--explorez=explore the given websites (combination of sitemap and crawl)z--probez?probe for extractable content (works best with target language)z
--archivedz=try to fetch URLs from the Internet Archive if downloads failz--url-filterzLonly process/output URLs containing these patterns (space-separated strings)+)r   r"   r   z-fz--fastz!fast (without fallback detection)z--formattingz,include text formatting (bold, italic, etc.)z--linksz5include links along with their targets (experimental)z--imagesz.include image sources in output (experimental)z--no-commentszdon't output any commentsstore_falsez--no-tableszdon't output any table elementsz--only-with-metadataz4only output those documents with title, URL and datez--with-metadataz&extract and add metadata to the outputz--target-languagez*select a target language (ISO 639-1 codes)z--deduplicatez+filter out duplicate documents and sectionsz--config-filezAoverride standard extraction parameters with a custom config filez--precisionz;favor extraction precision (less noise, possibly less text)z--recallz8favor extraction recall (more text, possibly more noise)z--output-formatzdetermine output formattxt)r   choicesr   z--csvzshorthand for CSV outputz--htmlzshorthand for HTML outputz--jsonzshorthand for JSON outputz
--markdownzshorthand for MD outputz--xmlzshorthand for XML outputz--xmlteizshorthand for XML TEI outputz--validate-teizvalidate XML TEI outputz-vz	--verbosecountr   z&increase logging verbosity (-v or -vv))r    r   r   z	--versionz!show version information and exitr   zTrafilatura trafilaturaz
 - Python )r   r    r   )	add_argument_groupadd_mutually_exclusive_groupadd_argumentstrintr   r   r   r   )	r   group1	group1_exgroup2group3	group3_exgroup4group5	group5_exs	            O/var/www/py-google-trends/myenv/lib/python3.11/site-packages/trafilatura/cli.pyadd_argsr8      s    &&w0WXXF3355I&&x1^__F&&|5VWWF3355I&&|5deeF&&x1QRRF3355I4F   " " " =T   " " " 42   " " " ` .  : : : mY   " " " N+  - - - nU   " " " X   " " " L+  - - - 8M!u  > > > ;a!u  > > > 9i!u  > > > ;\!u  > > > 9^!u  > > > \+  - - - k!  - - - h@+  - - - K+  - - - 	T+  - - - 
M+  - - - 8,  . . . >,  . . . .S+  - - - )E+  - - - +I   " " " J+  - - - `   " " " Z+  - - - 
W+  - - -
 ,6 1 %  ' ' ' 77+  - - - 88+  - - - 88+  - - - <6+  - - - 77+  - - - :;+  - - - (6+  - - - k'1E     0Sw}55SSAQAQSS	     M    argsc                     t          j        d          }t          |          }t          |                                          S )z(Define parser for command-line argumentsz&Command-line interface for Trafilatura)description)argparseArgumentParserr8   map_args
parse_args)r:   r   s     r7   r@   r@      s=    $1YZZZFfFF%%''(((r9   c                 B    dD ]}t          | |          r	|| _         n| S )z2Map existing options to format and output choices.)csvhtmljsonmarkdownxmlxmltei)getattroutput_format)r:   otypes     r7   r?   r?      s?     F  4 	!&DE	 Kr9   c                  f    t          t          j        dd                   } t          |            dS )z  Run as a command-line utility. r   N)r@   sysargvprocess_args)r:   s    r7   mainrO      s-    chqrrl##Dr9   c                    d}| j         dk    r+t          j        t          j        t          j                   n5| j         dk    r*t          j        t          j        t          j                   | j        rt          | j                  | _        | j	        s| j
        s| j        rt          |           }n| j        rt          |            n| j        rt!          |            n| j        rt%          |            nv| j        s| j        r t+          |           }t-          | |          }nHt/          t          j        j                                        | | j                  }t7          ||            |dk    rt          j        |           dS dS )z8Perform the actual processing according to the argumentsr   r   )streamlevel   )urlN)verboseloggingbasicConfigrL   stdoutWARNINGDEBUG	blacklistr   explorefeedsitemapr   crawlr   prober   	input_dirr
   
input_fileURLr   r   r	   stdinbufferreadr   exit)r:   	exit_code	url_storeresults       r7   rN   rN      s   I|q3:W_EEEEE			3:W]CCCC~ 8'77
 | #ty #DL #!$''		 
 #D 
 #t 
 # &&&& 
 #DH ##D))	+D)<<		 )..00$DHEEEVT""" A~~ ~r9   __main__)r   N)"__doc__r=   rV   rL   importlib.metadatar   platformr   typingr   	cli_utilsr   r   r	   r
   r   r   r   r   r   settingsr   r   rX   r   hasattrr   stderrr8   r@   r?   rO   rN   __name__ r9   r7   <module>rv      s,      



 & & & & & & # # # # # #      ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? 8 7 7 7 7 7 7 7 :'!!ggcj-&H&H!JG,,,:'!!ggcj-&H&H!JG,,,PS PS P P P Pf)S )S ) ) ) )3 3       *s *t * * * *Z zDFFFFF r9   