
    !}g                         d Z dZdZdZdZddlZddlZddlmZ dd	l	m
Z
 dd
lmZ  ej        e          ZdZd ZddZddZd Z G d de          ZddZdS )zb
All code involving requests and responses over the http network
must be abstracted in this file.
	newspaperzLucas Ou-YangMITzCopyright 2014, Lucas Ou-Yang    N   )Configuration)
ThreadPoolcjz
ISO-8859-1c                 6    |r|nd|it                      | d|dS )zThis Wrapper method exists b/c some values in req_kwargs dict
    are methods which need to be called every time we make a request
    z
User-AgentT)headerscookiestimeoutallow_redirectsproxiesr   )r   	useragentr   r   s       Q/var/www/py-google-trends/myenv/lib/python3.11/site-packages/newspaper/network.pyget_request_kwargsr      s4    
 &D77L)+D44      c                     	 t          | ||          S # t          j        j        $ r+}t                              d|d|            Y d}~dS d}~ww xY w)z HTTP response code agnostic
    zget_html() error. z	 on URL: N )get_html_2XX_onlyrequests
exceptionsRequestExceptionlogdebug)urlconfigresponsees       r   get_htmlr    %   sj     fh777/   			qqq##>???rrrrrs    A AAc           
         |pt                      }|j        }|j        }|j        }|j        }|t          |          S t          j        dd| it          ||||          }t          |          }|j	        r|
                                 |S )zConsolidated logic for http requests from newspaper. We handle error cases:
    - Attempt to find encoding of the html by using HTTP header. Fallback to
      'ISO-8859-1' if not provided.
    - Error out if a non 2XX HTTP response code is returned.
    Nr    )r   browser_user_agentrequest_timeoutr   r   _get_html_from_responser   getr   http_success_onlyraise_for_status)r   r   r   r   r   r   r   htmls           r   r   r   /   s     &}F)I$GnGnG&x000| M MM%gy'7KKM MH #8,,D $!!###Kr   c                    | j         t          k    r| j        }nn| j        }d| j                            d          vrKt          j                            | j                  }t          |          dk    r|d         | _         | j        }|pdS )Ncharsetzcontent-typer   r   )
encodingFAIL_ENCODINGtextcontentr   r&   r   utilsget_encodings_from_contentlen)r   r)   	encodingss      r   r%   r%   J   s    M))}H,00@@@@ AA(-PPI9~~!!$-aL!}:2r   c                        e Zd ZdZddZd ZdS )MRequesta  Wrapper for request object for multithreading. If the domain we are
    crawling is under heavy load, the self.resp will be left as None.
    If this is the case, we still want to report the url which has failed
    so (perhaps) we can try again later.
    Nc                     || _         || _        |pt                      }|j        | _        |j        | _        |j        | _        |j        | _        d | _	        d S N)
r   r   r   r#   r   r$   r   r   r   resp)selfr   r   s      r   __init__zMRequest.__init___   sO    *=??2-~~			r   c           
      n   	 t          j        | j        fi t          | j        | j        | j        | j                  | _        | j	        j
        r| j                                         d S d S # t           j        j        $ r5}t                              dt!          |          z              Y d }~d S d }~ww xY w)Nz[REQUEST FAILED] )r   r&   r   r   r   r   r   r   r8   r   r'   r(   r   r   r   criticalstr)r9   r   s     r   sendzMRequest.sendi   s    	7 TX K K1CdndlDL2J 2J K KDI{, -	**,,,,,- -"3 	7 	7 	7LL,s1vv5666666666	7s   A%A+ +B4?*B//B4r7   )__name__
__module____qualname____doc__r:   r>   r"   r   r   r5   r5   Y   sA         
   7 7 7 7 7r   r5   c                    |pt                      }|j        }|j        }t          ||          }g }| D ]%}|                    t          ||                     &|D ]}|                    |j                   |                                 |S )zRequest multiple urls via mthreading, order of urls & requests is stable
    returns same requests but with response variables filled.
    )	r   number_threadsthread_timeout_secondsr   appendr5   add_taskr>   wait_completion)urlsr   num_threadsr   pool
m_requestsr   reqs           r   multithread_requestrN   s   s     &}F'K+Gk7++DJ 1 1(3//0000    chr   )NNr7   )rB   	__title__
__author____license____copyright__loggingr   configurationr   
mthreadingr   settingsr	   	getLoggerr?   r   r-   r   r    r   r%   objectr5   rN   r"   r   r   <module>rY      s    	
/   ( ( ( ( ( ( " " " " " "      g!! 
 
 
      6  7 7 7 7 7v 7 7 74     r   