
    %$}g|"                     *   U d Z ddlZddlZddlmZ ddlmZmZmZmZm	Z	 ddl
Z
	 ddlmZ n# e$ r dZY nw xY wddlmZ ddlmZmZmZ dd	lmZ  ej        e          Zd
dhZee         ed<    e
j        e
j        j                   e
j         !                    ddg d          Z" e
j#        e"          Z$ eddd
d          Z% ej&        dej'                  Z( ej&        dej'                  Z) G d d          Z*dede+fdZ,de-de+fdZ.de-dee         fdZ/de	e-ef         defdZ0d edefd!Z1d"edee         fd#Z2d$ede+fd%Z3d&ed$edefd'Z4d(edee         fd)Z5d(e	e-eef         dee         fd*Z6d+ed,ee         defd-Z7d.edefd/Z8dS )0z7
Module bundling functions related to HTML processing.
    N)datetime)AnyListOptionalSetUnion)detect)
from_bytes)HtmlElement
HTMLParser
fromstring   )MAX_FILE_SIZEutf-8utf_8UNICODE_ALIASES   )i  i  i  i  i  )totalconnectstatus_forcelist)retriesFT)collect_idsdefault_doctypeencoding
remove_pisz^< ?! ?DOCTYPE.+?/ ?>z(<html.*?)\s*/>c                   :    e Zd ZdZg dZdedededededd	fd
Zd	S )	Extractorz0Defines a class to store all extraction options.	extensiveformatmaxminoriginalextensive_searchmax_datemin_dateoriginal_dateoutputformatreturnNc                 L    || _         || _        || _        || _        || _        d S )Nr   )selfr$   r%   r&   r'   r(   s         N/var/www/py-google-trends/myenv/lib/python3.11/site-packages/htmldate/utils.py__init__zExtractor.__init__4   s+      0'%%+    )	__name__
__module____qualname____doc__	__slots__boolr   strr-    r.   r,   r   r   /   ss        66AAAI,, , 	,
 , , 
, , , , , ,r.   r   datar)   c                 >    | rt          |           t          k    rdS dS )z6Check if the input object is suitable to be processed.TF)lenr   r7   s    r,   is_wrong_documentr;   C   s$     3t99},,t5r.   c                 V    	 |                      d           n# t          $ r Y dS w xY wdS )zLSimple heuristic to determine if a bytestring uses standard unicode encodingzUTF-8FT)decodeUnicodeDecodeErrorr:   s    r,   isutf8r?   J   sC    G   uu4s    
&&bytesobjectc                 V   t          |           rdgS g }t          >t          |           d         }|'|                    |                                           t	          | dd                   pt	          |           }|                    d |D                        d |D             S )z<Read all input or first chunk and return a list of encodingsr   Nr   i:  c                     g | ]	}|j         
S r6   )r   ).0rs     r,   
<listcomp>z#detect_encoding.<locals>.<listcomp>c   s    :::1AJ:::r.   c                 $    g | ]}|t           v|S r6   )r   )rC   gs     r,   rE   z#detect_encoding.<locals>.<listcomp>f   s"    ;;;!!?":":A":":":r.   )r?   cchardet_detectappendlowerr
   extend)r@   guessescchardet_guessdetection_resultss       r,   detect_encodingrO   S   s     k yG"(55jA%NN>//11222 #;vv#677R:k;R;RNN::(9:::;;; <;w;;;;r.   filecontentc                    t          | t                    r| S d}t          |           D ]M}	 |                     |          } n4# t          t
          f$ r  t                              d|           d}Y Jw xY w|pt          | dd          S )znGuess bytestring encoding and try to decode to Unicode string.
    Resort to destructive conversion otherwise.Nzwrong encoding detected: %sr   replace)r   errors)
isinstancer5   rO   r=   LookupErrorr>   LOGGERwarning)rP   htmltextguessed_encodings      r,   decode_filerZ   i   s     +s## H+K88  	"))*:;;H
 E	 /0 	 	 	NN8:JKKKHHH	 Ks;KKKKs   A.A43A4responsec                     t          | t          j        j                  st	          | d          r| j        }n| }t          |          S )zRead the urllib3 object corresponding to the server response, then
    try to guess its encoding and decode it to return a unicode stringr7   )rT   urllib3r[   HTTPResponsehasattrr7   rZ   )r[   resp_contents     r,   decode_responsera   }   sJ     (G,9::  ghPV>W>W  }|$$$r.   urlc                    	 t                               d| d          }|j        dk    r"t                              d|j        |            nwt          |j                  rt                              d|            nGt          |j                  S # t          $ r&}t                              d| |           Y d}~nd}~ww xY wdS )	a,  Fetches page using urllib3 and decodes the response.

    Args:
        url: URL of the page to fetch.

    Returns:
        HTML code as string, or Urllib3 response object (headers + body), or empty string in case
        the result is invalid, or None if there was a problem with the network.

    GET   )timeout   z!not a 200 response: %s for URL %szincorrect input data for URL %szdownload error: %s %sN)		HTTP_POOLrequeststatusrV   errorr;   r7   ra   	Exception)rb   r[   errs      r,   	fetch_urlrn      s    2 $$UC$<<
 ?c!!LL<hosSSSSx}-- 	2LL:C@@@@"8=111  8 8 8,c3777777778 4s   B 
C B;;C 	beginningc                 
    d| vS )zOAssess if the object is proper HTML (awith a corresponding tag or declaration).htmlr6   )ro   s    r,   is_dubious_htmlrr      s    ""r.   
htmlstringc                 n   d|v r<|                      d          \  }}}t                              d|d          dz   |z   } t          t	          |                                                     D ]E\  }}d|v r4|                    d          rt                              d| d          }  n	|d	k    r nF| S )
z>Repair faulty HTML strings to make then palatable for libxml2.doctype
 r   )countz<htmlz/>z\1>   )	partitionDOCTYPE_TAGsub	enumerateiter
splitlinesendswithFAULTY_HTML)rs   ro   	firstline_restilines          r,   repair_faulty_htmlr      s     I'11$77	1d __R!_<<tCdJ
T*"7"7"9"9::;;  4d??t}}T22?$1EEJEq55E r.   
htmlobjectc                     d}	 t          |                     d          t                    }n2# t          $ r%}t                              d|           Y d}~nd}~ww xY w|S )z!Try to pass bytes to LXML parser.Nutf8parserzlxml parser bytestring %s)r   encodeHTML_PARSERrl   rV   rk   )r   treerm   s      r,   fromstring_bytesr      sv    D7*++F33KHHH 7 7 70#666666667Ks   ). 
AAAc                    t          | t                    r| S t          | t          t          f          st	          dt          |                     t          | t                    rU|                     d          r@d| vr<t                              d|            t          |           } | t          d|           d}t          |           } | dd                                         }t          | |          } d}	 t          | t          	          }nN# t          $ r d
}t!          |           }Y n1t"          $ r%}t                              d|           Y d}~nd}~ww xY w|t'          |          dk     r|st!          |           }|Lt)          |          r=t'          |          dk     r*t                              dt'          |                     d}|S )zkLoad object given as input and validate its type
    (accepted: lxml.html tree, bytestring and string)
    zincompatible input type: %shttp zURL detected, downloading: %sNzURL couldn't be processed: %s2   Fr   Tzlxml parsing failed: %sr   ry   z9parsed tree length: %s, wrong data type or not valid HTML)rT   r   bytesr5   	TypeErrortype
startswithrV   debugrn   
ValueErrorrZ   rJ   r   r   r   r   rl   rk   r9   rr   )r   r   ro   fallback_parserm   s        r,   	load_htmlr      s   
 *k** j5#,// I5tJ7G7GHHH 	:s##	J!!&))	J z!!4jAAAz**
<jIIIDZ((J3B3%%''I#J	::JN5*[999 , , ,
++ 5 5 5.444444445 	D		A~
++ OI663t99q==GT	
 	
 	
 Ks   ;D E/	E8EEr   elemlistc                     |                      |          D ]-}|                                }||                    |           .| S )zDelete selected elements.)r~   	getparentremove)r   r   elementparents       r,   
clean_htmlr      sL    99X&& # #""$$MM'"""Kr.   stringc                 t    d                     |                                                                           S )z7Remove superfluous space and normalize remaining space.r   )joinsplitstrip)r   s    r,   	trim_textr     s(    88FLLNN##))+++r.   )9r2   loggingrer   typingr   r   r   r   r   r]   cchardetr	   rH   ImportErrorcharset_normalizerr
   	lxml.htmlr   r   r   settingsr   	getLoggerr/   rV   r   r5   __annotations__disable_warnings
exceptionsInsecureRequestWarningutilRetryRETRY_STRATEGYPoolManagerrh   r   compileIr{   r   r   r4   r;   r   r?   rO   rZ   ra   rn   rr   r   r   r   r   r   r6   r.   r,   <module>r      s      				       2 2 2 2 2 2 2 2 2 2 2 2 2 2 2222222   OOO ) ) ) ) ) ) 9 9 9 9 9 9 9 9 9 9 # # # # # # 
	8	$	$$g.S . . .  +B C C C##
... $  
  G777	juw4   bj0"$77bj+RT22, , , , , , , ,(C D     4    < <49 < < < <,LU5#:. L3 L L L L(%c %c % % % %3 8C=    :#s #t # # # #
3 3 3      +)>    1%sK 78 1Xk=R 1 1 1 1h[ DI +    ,c ,c , , , , , ,s   , 66