
    !}g0]                         d dl Z ddlmZ ddlmZ  G d de          Zd Z e j        de j                  Z	 e j        d	e j                  Z
 e j        d
          Zd ZdS )    N   )_BaseHTMLProcessor)make_safe_absolute_uric                        e Zd Zh dZh dZh dZh dZh dZ ej	        d          Z
h dZh dZh d	Zh d
ZdZdZh dZd fd	Z fdZ fdZ fdZd Zd Z fdZd Zd fd	Z xZS )_HTMLSanitizer>d   event-sourceabimpqsubrdddldtemh1h2h3h4h5h6hrlioltdthtrttulbigcoldeldfndirdivimginskbdmapnavpresubsupvarabbrareacitecodefontformmenusampspantimeasideaudioinputlabelmetersmallsoundtabletbodytfoottheadvideobuttoncanvascenterdialogfigurefooterheaderkeygenlegendnextidoptionoutputselectsourcespacerstrikestrongacronymaddressarticlecaptioncommanddetailssectioncolgroupdatagriddatalistfieldsetmulticolnoscriptoptgroupprogresstextarea
blockquote
figcaption>   
point-size
repeat-max
repeat-minaccept-charsetchidaltr(   endforlowmaxminpqgrelrevsrcurnr3   axischarr5   colsdatafacer8   highhreficonlanglistloopnameopenpingrowssizer;   steptypevrmlwrapalignchoffclassclearcolordelayframeismapr@   mediarulesscopeshapestartstyletitlevaluewidthacceptactionbordercoordsdynsrcgutterheighthiddenhspacelowsrcmethodnohrefnowrapposterprompttargetusemapvalignvolumevspacebalancebgcolorcharoffcharsetcheckedcolspancompactdataflddatasrcdefaultenctypeheaderskeytypeloopendnoshadeoptimumpatternpreloadreplacerowspansummarycontrolsdatetimedisabledhreflanglongdescmultiplereadonlyrequiredselectedsuppresstabindextemplatevariable	accesskey	autofocus	challenge	draggable	hidefocus	inputmode	loopcount	loopstart	maxlength
background
galleryimg
radiogroup
toppaddingbordercolorcellpaddingcellspacingleftspacingautocompletebgpropertiesdatapagesizerightspacingunselectablebottompaddingbordercolordarkcontenteditablebordercolorlightxml:lang>   r   appletscript>.   
text-alignline-heightpause-afterpitch-rangespeech-ratetext-indentwhite-spaceborder-colorpause-beforespeak-headerunicode-bidivoice-familyspeak-numeralletter-spacingvertical-alignborder-collapsetext-decorationbackground-colorborder-top-colorborder-left-colorspeak-punctuationborder-right-colorborder-bottom-colorr7   r   r   floatpausepitchspeakr   cursorr   stressr   azimuthdisplayoverflowrichness	direction	elevation	font-size
font-stylefont-familyfont-weightfont-variant>'   
!importantredtopaquaautoblueboldbothgrayleftlimenavynonetealblackblockbrowngreenoliverightsolidwhitebottomrK   dasheddotteditalicmaroonmediumnormalr   purplesilveryellowfuchsiapointercollapse	underlinetransparentz\^(#[0-9a-f]+|rgb\(\d+%?,\d*%?,?\d*%?\)?|\d{0,2}\.?\d{0,2}(cm|em|ex|in|mm|pc|pt|px|%|,|\))?)$>,   annotation-xmlmimnmomsmtdmtrmathmrowmsubmsupr)  mfracmovermrootmsqrtmsrowmtextmerrormglyphmslinemspacemstackmstylemtablemundermactionmfencedmpaddedmscarrymsgroupmsubsupmenclosemlongdivmphantom	mscarries	semantics
annotation
malignmark
mlabeledtr
munderovermaligngroupmprescriptsmmultiscripts>c   altimg-widthaltimg-heightaltimg-valignrq   r(   r{   edger8   r   r   sider   closedepthfencer   othershiftr   xmlnsaccentaltimgr   lengthlquotelspacerquoterspacealttextr  largeopmaxsizeminsizer   voffsetbevelledcrossoutencodinglocationmathsizenotationnumalignr  positionrowalignrowlinesstretchy	charalign	equalrows	fontstyle	linebreak	mathcolor	selection	separator	symmetric
actiontype
columnspan
denomalign
fontweight
groupalign
rowspacing
separators
stackalignaccentundercolumnaligncolumnlinescolumnwidthindentalignindentshiftlineleadingmathvariantscriptleveldecimalpointdisplaystyleequalcolumnsframespacingindenttargetlongdivstylecolumnspacinglinethicknessmovablelimitsscriptminsizealignmentscopelinebreakstylemathbackgroundsubscriptshiftindentalignlastindentshiftlastminlabelspacingindentalignfirstindentshiftfirstsuperscriptshiftlinebreakmultcharinfixlinebreakstylescriptsizemultiplier
xlink:href
xlink:show
xlink:typexmlns:xlink>#   	font-facefont-face-srcmissing-glyphfont-face-namer	   gsetsvgusedefsdesclinepathrectstoptextglyphhkernmpathr   tspancirclemarkerswitchanimateellipsepolygonmetadatapolylineanimateColoranimateMotionforeignObjectlinearGradientradialGradientanimateTransform>   panose-1x-heightxml:base	xml:space
cap-height
glyph-name
marker-end
marker-mid
stop-color
xlink:rolearabic-formhoriz-adv-xtext-anchorxlink:titlefont-stretchmarker-startstop-opacityunits-per-emaccent-heightunicode-rangexlink:actuatexlink:arcrolehoriz-origin-xcolor-renderingstroke-dasharrayoverline-positionstroke-dashoffsetstroke-miterlimitoverline-thicknessunderline-positionunderline-thicknessstrikethrough-positionstrikethrough-thicknessdkrxybycxcydxdyfxfyg1g2rq   rxrytou1u2x1x2y1y2durrs   rv   rw   bboxfillfromr   r   r  refXrefYr   beginr   r   slopestemhstemvr   rw  ascentr   offsetorientoriginpointsrotatestroker   valueswidthscontentdescentr  hangingopacityrestartunicodeversionviewBoxadditivecalcModekeyTimes	keyPoints	repeatDur	transform
accumulate
alphabetic
keySplines
pathLength
visibility
zoomAndPanbaseProfileideographicmarkerUnitsmarkerWidthrepeatCountmarkerHeightmathematicalattributeNameattributeTypegradientUnitssystemLanguagerequiredFeaturesrequiredExtensionspreserveAspectRatior   	fill-ruler  r  r  r  r  r  r  r  fill-opacityr  stroke-widthstroke-linecapstroke-opacitystroke-linejoinN>   r  r,  rQ  rR  rS  rT  rU  rV  application/xhtml+xmlc                     t          t          |                               ||           d| _        d| _        d| _        d S Nr   )superr   __init__unacceptablestackmathmlOKsvgOK)selfr  _type	__class__s      T/var/www/py-google-trends/myenv/lib/python3.11/site-packages/feedparser/sanitizer.pyr[  z_HTMLSanitizer.__init__  s=    nd##,,Xu===!"


    c                     t          t          |                                            d| _        d| _        d| _        d S rY  )rZ  r   resetr\  r]  r^  )r_  ra  s    rb  re  z_HTMLSanitizer.reset  s9    nd##))+++!"


rc  c                 ~  
 | j         }i }|| j        vs| j        r|| j        v r| xj        dz  c_        | j                            d          rXt          |                              d          s6|dk    r|	                    d           |dk    r|	                    d           |dk    rd|v r| xj
        dz  c_
        |dk    rd|v r| xj        dz  c_        | j
        r|| j        v r| j        }n| j        r|| j        v r| j        sxd | j        D             

fd	| j        D             }
| _        d
 |D             | _        d | j        D             

fd| j        D             }
| _        d |D             | _        | j        }| j                            ||          }| j        }n|| j        vrd S | j
        s| j        r2t#          d |D                       rd|vr|	                    d           g }|                     |          D ]\  }}|dk    r3d|v r/|                     |          }	|	r|	                    ||	f           >||v rB|                    ||          }|dk    rt)          |          }|	                    ||f           t+          t,          |                               ||           d S )Nr   htmlrw  r  )rw  zhttp://www.w3.org/2000/svgrI  )rw  z"http://www.w3.org/1998/Math/MathMLc                 6    g | ]}|                                 S  lower.0attrs     rb  
<listcomp>z3_HTMLSanitizer.unknown_starttag.<locals>.<listcomp>  s     JJJdTZZ\\JJJrc  c                     g | ]}|v|	S ri  ri  rm  r	   rk  s     rb  ro  z3_HTMLSanitizer.unknown_starttag.<locals>.<listcomp>  s    LLLQe^^1^^^rc  c                 8    i | ]}|                                 |S ri  rj  rm  r	   s     rb  
<dictcomp>z3_HTMLSanitizer.unknown_starttag.<locals>.<dictcomp>
  "    (C(C(C!A(C(C(Crc  c                 6    g | ]}|                                 S ri  rj  rl  s     rb  ro  z3_HTMLSanitizer.unknown_starttag.<locals>.<listcomp>  s     HHHdTZZ\\HHHrc  c                     g | ]}|v|	S ri  ri  rq  s     rb  ro  z3_HTMLSanitizer.unknown_starttag.<locals>.<listcomp>  s    JJJ1E>>1>>>rc  c                 8    i | ]}|                                 |S ri  rj  rs  s     rb  rt  z3_HTMLSanitizer.unknown_starttag.<locals>.<dictcomp>  ru  rc  c              3   P   K   | ]!}|d                               d          |V  "dS )r   zxlink:N)
startswithrs  s     rb  	<genexpr>z2_HTMLSanitizer.unknown_starttag.<locals>.<genexpr>  s7      BB!!(A(ABABBBBBBrc  )r  zhttp://www.w3.org/1999/xlinkr   r   )acceptable_attributesacceptable_elementsr^  "unacceptable_elements_with_end_tagr\  r`  endswithdictgetappendr]  mathml_elementsmathml_attributessvg_elementssvg_attr_mapsvg_attributessvg_elem_mapanynormalize_attrssanitize_styler   rZ  r   unknown_starttag)r_  tagattrsr|  keymapmixclean_attrskeyr   clean_valuerk  ra  s             @rb  r  z_HTMLSanitizer.unknown_starttag  sa    $ :d...$*.d===&&!+&& z""6** VE{{w// Ve||%LMMMf}}%TUUU f}}!PTY!Y!Y"e|| G5 P P

a

 } (<!<!<(,(>%% t'8 8 8 ( 	DJJd6IJJJELLLLd&9LLLC*/D'(C(Cs(C(C(CD%HHd6GHHHEJJJJd&7JJJC(-D%(C(Cs(C(C(CD%(,(;%'++C55*D444 = 	RDJ 	RBBuBBBCC RF%OOLL!PQQQ..u55 
	1 
	1JCg~~'-B"B"B"11%88 ;&&['9:::---jjc**&==2599E""C<000nd##44S+FFFFFrc  c                    || j         vr|| j        v r| xj        dz  c_        | j        r'|| j        v r|dk    r| j        r| xj        dz  c_        nK| j        rB|| j        v r9| j                            ||          }|dk    r| j        r| xj        dz  c_        nd S t          t          |                               |           d S )Nr   rI  r  )r}  r~  r\  r]  r  r^  r  r  r  rZ  r   unknown_endtag)r_  r  ra  s     rb  r  z_HTMLSanitizer.unknown_endtag*  s    d...d===&&!+&&} (<!<!<&==T]=MMQ&MM t'8 8 8'++C55%<<DJ<JJ!OJJnd##22377777rc  c                     d S Nri  r_  r  s     rb  	handle_piz_HTMLSanitizer.handle_pi9      rc  c                     d S r  ri  r  s     rb  handle_declz_HTMLSanitizer.handle_decl<  r  rc  c                 j    | j         s*t          t          |                               |           d S d S r  )r\  rZ  r   handle_data)r_  r  ra  s     rb  r  z_HTMLSanitizer.handle_data?  s<    % 	:.$''33D99999	: 	:rc  c                 J   t          j        d                              d|          }t          j        d|          sdS t          j        dd|                                          rdS g }t          j        d|          D ]\  }}|s	|                                | j        v r|                    |dz   |z   dz              C|	                    d	          d
                                         dv r[|	                                D ]'}|| j
        vr| j                            |          s n(|                    |dz   |z   dz              | j        r9|                                | j        v r|                    |dz   |z   dz              d                    |          S )Nzurl\s*\(\s*[^\s)]+?\s*\)\s* z@^([:,;#%.\sa-zA-Z0-9!]|\w-\w|'[\s\w]+'|"[\s\w]+"|\([\d,\s]+\))*$ z\s*[-\w]+\s*:\s*[^:;]*;?z([-\w]+)\s*:\s*([^:;]*)z: ;-r   )r   r   marginpadding)recompiler0   matchstripfindallrk  acceptable_css_propertiesr  splitacceptable_css_keywordsvalid_css_valuesr^  acceptable_svg_propertiesjoin)r_  r   cleanpropr   keywords         rb  r  z_HTMLSanitizer.sanitize_styleC  s   
9::>>sEJJ x_afgg 	2 6-r599??AA 	2:&@%HH 	8 	8KD% zz||t===TD[50367777C#))++/\\\${{}} < <G#4+GGG$($9$?$?$H$H H LLu!4s!:;;; 8

0N N NTD[5036777xxrc  r   c                    t          t          |                               ||          }|dk    r|S t          j        d                              | j        |dz             }|r|                                S t          | j                  S )Nr   z--[^>]*>   )	rZ  r   parse_commentr  r  searchrawdatars   len)r_  r   reportretr  ra  s        rb  r  z_HTMLSanitizer.parse_commentc  s}    ND))776BB!88J 
;''..t|QqSAA 	99;;4<   rc  )NrW  )r   )__name__
__module____qualname__r}  r|  r~  r  r  r  r  r  r  r  r  r  r  r  r  r[  re  r  r  r  r  r  r  r  __classcell__)ra  s   @rb  r   r   "   s       e e eNN N N`* * *&/! /! /!d( ( (T "rz	 - - -O^d d dN$ $ $LNM M MN^ LL	! 	! 	!         =G =G =G =G =G~8 8 8 8 8    : : : : :  @
! 
! 
! 
! 
! 
! 
! 
! 
! 
!rc  r   c                     t          ||          }|                     dd          } |                    |            |                                }|                                                    dd          }|S )Nz	<![CDATA[z&lt;![CDATA[z

)r   r   feedrT   r  )html_sourcer  r`  r   r   s        rb  _sanitize_htmlr  p  sg    x''A%%k>BBKFF;88::D::<<--DKrc  s   ^\s*<!ENTITY([^>]*?)>s   ^\s*<!DOCTYPE([^>]*?)>s   \s+(\w+)\s+"(&#\w+;|[^&"]*)"c                    t          j        d|           }|r|                                pd}| d|dz            | |dz   d         } }t                              |          }t                              d|          }t                              |          }|r|d         pd}d|                                v rd}nd}d}t          |          dk    r+|r)d	 |D             }|rd
d	                    |          z   dz   }t                              ||          | z   } d t                              |          D             }|| |fS )zStrips and replaces the DOCTYPE, returns (rss_version, stripped_data)

    rss_version may be 'rss091n' or None
    stripped_data is the same XML document with a replaced DOCTYPE
    s   <\wNr   rc  r   s   netscaperss091nc                 F    g | ]}t                               |          |S ri  )RE_SAFE_ENTITY_PATTERNr  )rm  es     rb  ro  z#replace_doctype.<locals>.<listcomp>  s<     
 
 
%++A..

 
 
rc  s   <!DOCTYPE feed [
<!ENTITYs   >
<!ENTITY s   >
]>c                 f    i | ].\  }}|                     d           |                     d           /S )zutf-8)decode)rm  r  vs      rb  rt  z#replace_doctype.<locals>.<dictcomp>  sF       Aq 	
188G,,  rc  )r  r  r   RE_ENTITY_PATTERNr  r0   RE_DOCTYPE_PATTERNrk  r  r  r  )	r   r   headentity_resultsdoctype_resultsdoctyper5  replacementsafe_entitiess	            rb  replace_doctyper    s    Igt$$E#ekkmm)rEhuQwheAghh$D '..t44N  d++D )0066O4/!"4;Ggmmoo%% K
?q  ^ 
 
#
 
 

  	#7)..}==>"#K !!+t44t;D *22;??  M D-''rc  )r  rg  r   urlsr   r   r  r  	MULTILINEr  r  r  r  ri  rc  rb  <module>r     s   8 
			 $ $ $ $ $ $ ( ( ( ( ( (K! K! K! K! K!' K! K! K!\   BJ8",GG   RZ :BLII  $$DEE ,( ,( ,( ,( ,(rc  