
    !}g                         d Z dZdZdZdZddlZddlZddlZddlZddl	Z
ddlZddlmZmZ dd	lmZ  ej        e          Zd
ZdZdZd Zd Zd Zd Zd Zd ZddZddZ G d d          ZdS )zc
The following image extraction implementation was taken from an old
copy of Reddit's source code.
	newspaperzLucas Ou-YangMITzCopyright 2014, Lucas Ou-Yang    N)Image	ImageFile   )urlsi   )Z   r	   i  c                     t          j                    }|                     || j                   |                    d           |                                S Nr   )ioStringIOsaveformatseekread)imagess     P/var/www/py-google-trends/myenv/lib/python3.11/site-packages/newspaper/images.pyimage_to_strr      s?    
A	JJq%,FF1III6688O    c                     t          j        |           } |                     d           t          j        |           }|S r   )r   r   r   r   open)r   r   s     r   str_to_imager   $   s0    
AAFF1IIIJqMMELr   c                 n    t          |           } |                     t          t          j                   | S N)square_image	thumbnailthumbnail_sizer   	ANTIALIAS)r   s    r   prepare_imager    +   s*    E	OONEO444Lr   c                     |                                  }t          |          fd|D             }t          d |D                        S )z' Calculate the entropy of an image
    c                 4    g | ]}t          |          z  S  )float).0h	hist_sizes     r   
<listcomp>z!image_entropy.<locals>.<listcomp>6   s$    ///QE!HHy ///r   c                 J    g | ] }|d k    |t          j        |d          z  !S )r      )mathlog)r%   ps     r   r(   z!image_entropy.<locals>.<listcomp>7   s,    <<<Q!VVTXa^^#VVVr   )	histogramsum)imghistr'   s     @r   image_entropyr2   1   sV     ==??DD		I////$///D<<T<<<====r   c                    | j         \  }}||k    rt          ||z
  d          }|                     d||z
  ||f          }|                     dd||f          }t          |          t          |          k     r|                     dd|||z
  f          } n|                     d|||f          } | j         \  }}||k    | S )z}If the image is taller than it is wide, square it off. determine
    which pieces to cut off based on the entropy pieces
    
   r   )sizemincropr2   )r0   xyslice_heightbottomtops         r   r   r   :   s     8DAq
a%%1q5"~~1a,.1566hh1a.//  =#5#555((Aq!Q%5677CC((A|Q233Cx1 a%% Jr   c                     |                      d          } d                    d |                     d          D                       } | S )z(Url quotes unicode data out of urls
    utf8 c                 v    g | ]6}t          |          d k    rt          j                            |          n|7S )   )ordurllibparsequote)r%   cs     r   r(   zclean_url.<locals>.<listcomp>Q   sR     H H H./SVVs]] <%%a(((()H H Hr   zutf-8)encodejoindecode)urls    r   	clean_urlrK   M   s\     **V

C
'' H H36::g3F3FH H H I ICJr   Fc                 	   d}|rd nd}t          |           } |                     d          s|S d }	 	 t          j        | dd||d          }|r |j                            t                    }n|j                                        }|j                            d          }	|	sI||D|j                                         |j        j	        r |j        j	                                         S S S d	|	v rt          j                    }
|}|
j        s|r	 |
                    |           nv# t          $ r t          j                     d }
Y nt"          $ r t          j                     d }
Y nbt$          $ r)}t'          j        |           d
k    }|rn|d }
Y d }~n5d }~ww xY w|j                            t                    }||z  }|
j        s||
I||D|j                                         |j        j	        r |j        j	                                         S S S |rZ|
j        rS|
j        j        |D|j                                         |j        j	        r |j        j	                                         S S S |rI||D|j                                         |j        j	        r |j        j	                                         S S S nK|rI||D|j                                         |j        j	        r |j        j	                                         S S S |	|f|D|j                                         |j        j	        r |j        j	                                         S S S # t          j        j        $ r}|dz  }||k    rnt0                              d| d|           |cY d }~|D|j                                         |j        j	        r |j        j	                                         S S S Y d }~nd }~ww xY w	 |C|j                                         |j        j	        r|j        j	                                         nL# |D|j                                         |j        j	        r |j        j	                                         w w w xY wQ)Nr   NN)zhttp://zhttps://T   )z
User-AgentReferer)streamtimeoutheaderszContent-Typer   icor   zerror while fetching: z refer: )rK   
startswithrequestsgetrawr   
chunk_sizerR   close_connectionr   Parserr   feedIOError	traceback	print_exc
ValueError	Exceptionr   url_to_filetyper5   
exceptionsRequestExceptionr,   debug)rJ   	useragentrefererretries	dimensioncur_trynothingresponsecontentcontent_typer-   new_datae
is_favicons                 r   	fetch_urlrr   V   s|   G1dd\G
C..C>>122 HE5D	5|Ca'"J J   H  .",++J77",++--#+//??L b #""$$$<+ 5L,224444 $5c ,&&$&&"' (h (x(((("   !+--- %   !+--- $ 	 	 	 '+&:3&?&?5&H
% $ "#G 	  (|00<<Hx'G- ' (h (0 9"& #""$$$<+ 5L,224444 $5'  # #7<  #""$$$<+ 5L,224444 $5#  #" #""$$$<+ 5L,224444 $5##  #""$$$<+ 5L,224444 $5  ( #""$$$<+ 5L,224444 $5 "3 	 	 	qLG'!!			33) * * *#""$$$<+ 5L,224444 $5 "!!!!	!
 #""$$$<+ 5L,22444 #""$$$<+ 5L,224444 $5IE5s   A3M .#M D( 'M (FM 	F'M )	F2FM F3M M 2M =M 	M O+(,O&O+P5 !P5 &O++P5 5A	Q>c                 *    t          | |||d          S )NT)ri   )rr   )rJ   rf   rg   rh   s       r   fetch_image_dimensionrt      s    S)WgFFFFr   c                   ,    e Zd Zd Zd Zd Zd Zd ZdS )Scraperc                     |j         | _         |j        | _        |j        | _        |j        | _        | j        j        | _        d S r   )rJ   imgstop_imgconfigbrowser_user_agentrf   )selfarticles     r   __init__zScraper.__init__   s6    ;L	n7r   c                 0   | j         s	| j        sd S | j        r| j        S d}d }| j         D ]>}t          || j        | j                  }|                     ||          }||k    r|}|}?t                              d                    |                     |S )Nr   rg   zusing max img {})	rx   ry   rt   rf   rJ   calculate_arear,   re   r   )r|   max_areamax_urlimg_urlri   areas         r   largest_image_urlzScraper.largest_image_url   s    y 	 	4< 	 <y 	" 	"G-; ; ;I&&w	::Dh!		$++G44555r   c                    |sdS |d         |d         z  }|t           k     rt                              d|z             dS |d         t          d         k     rdS t	          |          t          |          z  }|| j        j        k    rt                              d|z             dS |                                }d|v sd|v r"t                              d|z             |dz  }|S )	Nr   r   zignore little %szignore dims %sspritelogozpenalizing sprite %sr4   )	minimal_arear,   re   r   maxr6   rz   image_dimension_rationlower)r|   r   ri   r   current_ratiolower_case_urls         r   r   zScraper.calculate_area   s     	1|il*,II(723331 Q<.+++1IY74;===II&01111 ~%%>)A)AII,w6777BJDr   c                 |    t          || j        | j                  }|                     ||          }|t          k    S )Nr   )rt   rf   rJ   r   r   )r|   r   ri   r   s       r   satisfies_requirementszScraper.satisfies_requirements   sA    )T^TX7 7 7	""7I66l""r   c                     |                                  }|ret          || j                  \  }}|rJt          |          }	 t	          |          }n&# t
          $ r}d|j        v rY d}~dS Y d}~nd}~ww xY w||fS dS )zGIdentifies top image, trims out a thumbnail and also has a url
        r   
interlacedNrM   )r   rr   rJ   r   r    r]   message)r|   	image_urlrn   	image_strr   rp   s         r   r   zScraper.thumbnail   s     **,,	 		(&/	48&L&L&L#L) ($Y//$)%00EE $ $ $#qy00#ttttt 10000$ i''zs   A 
A5	A00A5N)__name__
__module____qualname__r~   r   r   r   r   r#   r   r   rv   rv      s_        8 8 8  &  0# # #    r   rv   )Nr   F)Nr   ) __doc__	__title__
__author____license____copyright__loggingr+   r   r^   urllib.parserC   rU   PILr   r   r?   r   	getLoggerr   r,   rX   r   r   r   r   r    r2   r   rK   rr   rt   rv   r#   r   r   <module>r      ss    	
/   				                               g!!
      > > >  &  M5 M5 M5 M5`G G G GH H H H H H H H H Hr   